From 7fb53c62dca6b2a8ef23b56a4287e62ac0326de5 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sun, 30 Oct 2022 15:42:57 -0400 Subject: [PATCH] Implement rseq-based RCU Signed-off-by: Mathieu Desnoyers --- README.md | 3 +++ src/Makefile | 2 +- src/rcu.c | 17 +++++++++++++---- src/rcu.h | 41 +++++++++++++++++++++++++++++++++-------- 4 files changed, 50 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index f1a3515..82d9e17 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ # side Static Instrumentation Dynamically Enabled + +# dependencies +librseq: https://github.com/compudj/librseq diff --git a/src/Makefile b/src/Makefile index a4b38aa..b199075 100644 --- a/src/Makefile +++ b/src/Makefile @@ -21,7 +21,7 @@ test.o: test.c $(HEADERS) gcc $(CFLAGS) $(CPPFLAGS) -c -o $@ $< test: tracer.o test.o side.o rcu.o smp.o - gcc $(CFLAGS) -o $@ $^ + gcc $(CFLAGS) -o $@ $^ -lrseq .PHONY: clean diff --git a/src/rcu.c b/src/rcu.c index bb3bfed..45136ad 100644 --- a/src/rcu.c +++ b/src/rcu.c @@ -24,10 +24,14 @@ void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_reade for (i = 0; i < gp_state->nr_cpus; i++) { struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i]; - if (active_readers[0]) + if (active_readers[0]) { sum[0] -= __atomic_load_n(&cpu_state->count[0].end, __ATOMIC_RELAXED); - if (active_readers[1]) + sum[0] -= __atomic_load_n(&cpu_state->count[0].rseq_end, __ATOMIC_RELAXED); + } + if (active_readers[1]) { sum[1] -= __atomic_load_n(&cpu_state->count[1].end, __ATOMIC_RELAXED); + sum[1] -= __atomic_load_n(&cpu_state->count[1].rseq_end, __ATOMIC_RELAXED); + } } /* @@ -45,10 +49,14 @@ void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_reade for (i = 0; i < gp_state->nr_cpus; i++) { struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i]; - if (active_readers[0]) + if (active_readers[0]) { sum[0] += __atomic_load_n(&cpu_state->count[0].begin, __ATOMIC_RELAXED); - if (active_readers[1]) + sum[0] += __atomic_load_n(&cpu_state->count[0].rseq_begin, __ATOMIC_RELAXED); + } + if (active_readers[1]) { sum[1] += __atomic_load_n(&cpu_state->count[1].begin, __ATOMIC_RELAXED); + sum[1] += __atomic_load_n(&cpu_state->count[1].rseq_begin, __ATOMIC_RELAXED); + } } if (active_readers[0]) active_readers[0] = sum[0]; @@ -167,6 +175,7 @@ void side_rcu_gp_init(struct side_rcu_gp_state *rcu_gp) void side_rcu_gp_exit(struct side_rcu_gp_state *rcu_gp) { + rseq_prepare_unload(); pthread_mutex_destroy(&rcu_gp->gp_lock); free(rcu_gp->percpu_state); } diff --git a/src/rcu.h b/src/rcu.h index f25aa87..25655ca 100644 --- a/src/rcu.h +++ b/src/rcu.h @@ -11,12 +11,16 @@ #include #include #include +#include +#include #define SIDE_CACHE_LINE_SIZE 256 struct side_rcu_percpu_count { uintptr_t begin; + uintptr_t rseq_begin; uintptr_t end; + uintptr_t rseq_end; } __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE))); struct side_rcu_cpu_gp_state { @@ -30,17 +34,27 @@ struct side_rcu_gp_state { pthread_mutex_t gp_lock; }; -//TODO: replace atomics by rseq (when available) //TODO: replace acquire/release by membarrier+compiler barrier (when available) //TODO: implement wait/wakeup for grace period using sys_futex static inline unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state) { - int cpu = sched_getcpu(); unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED); + struct side_rcu_cpu_gp_state *cpu_gp_state; + int cpu; - if (cpu < 0) + if (side_likely(rseq_offset > 0)) { + cpu = rseq_cpu_start(); + cpu_gp_state = &gp_state->percpu_state[cpu]; + if (!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_begin, 1, cpu)) + goto fence; + } + cpu = sched_getcpu(); + if (side_unlikely(cpu < 0)) cpu = 0; + cpu_gp_state = &gp_state->percpu_state[cpu]; + (void) __atomic_add_fetch(&cpu_gp_state->count[period].begin, 1, __ATOMIC_RELAXED); +fence: /* * This memory barrier (A) ensures that the contents of the * read-side critical section does not leak before the "begin" @@ -51,17 +65,16 @@ unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state) * barrier (C). It is redundant with memory barrier (B) for that * purpose. */ - (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_SEQ_CST); + __atomic_thread_fence(__ATOMIC_SEQ_CST); return period; } static inline void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period) { - int cpu = sched_getcpu(); + struct side_rcu_cpu_gp_state *cpu_gp_state; + int cpu; - if (cpu < 0) - cpu = 0; /* * This memory barrier (B) ensures that the contents of the * read-side critical section does not leak after the "end" @@ -72,7 +85,19 @@ void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period) * barrier (C). It is redundant with memory barrier (A) for that * purpose. */ - (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_SEQ_CST); + __atomic_thread_fence(__ATOMIC_SEQ_CST); + + if (side_likely(rseq_offset > 0)) { + cpu = rseq_cpu_start(); + cpu_gp_state = &gp_state->percpu_state[cpu]; + if (!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_end, 1, cpu)) + return; + } + cpu = sched_getcpu(); + if (side_unlikely(cpu < 0)) + cpu = 0; + cpu_gp_state = &gp_state->percpu_state[cpu]; + (void) __atomic_add_fetch(&cpu_gp_state->count[period].end, 1, __ATOMIC_RELAXED); } #define side_rcu_dereference(p) \ -- 2.34.1