* Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/
+#ifndef _SIDE_RCU_H
+#define _SIDE_RCU_H
+
#include <sched.h>
#include <stdint.h>
#include <pthread.h>
#include <stdbool.h>
#include <poll.h>
+#include <rseq/rseq.h>
+#include <linux/futex.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <side/macros.h>
#define SIDE_CACHE_LINE_SIZE 256
struct side_rcu_percpu_count {
uintptr_t begin;
+ uintptr_t rseq_begin;
uintptr_t end;
-} __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
+ uintptr_t rseq_end;
+};
struct side_rcu_cpu_gp_state {
struct side_rcu_percpu_count count[2];
-};
+} __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
struct side_rcu_gp_state {
struct side_rcu_cpu_gp_state *percpu_state;
int nr_cpus;
+ int32_t futex;
unsigned int period;
pthread_mutex_t gp_lock;
};
-//TODO: replace atomics by rseq (when available)
-//TODO: replace acquire/release by membarrier+compiler barrier (when available)
-//TODO: implement wait/wakeup for grace period using sys_futex
-static inline
-unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
-{
- int cpu = sched_getcpu();
- unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
+struct side_rcu_read_state {
+ struct side_rcu_percpu_count *percpu_count;
+ int cpu;
+};
- if (cpu < 0)
- cpu = 0;
- /*
- * This acquire MO pairs with the release fence at the end of
- * side_rcu_wait_grace_period().
- */
- (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_SEQ_CST);
- return period;
-}
+extern unsigned int side_rcu_rseq_membarrier_available __attribute__((visibility("hidden")));
static inline
-void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
+int futex(int32_t *uaddr, int op, int32_t val,
+ const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
{
- int cpu = sched_getcpu();
-
- if (cpu < 0)
- cpu = 0;
- /*
- * This release MO pairs with the acquire fence at the beginning
- * of side_rcu_wait_grace_period().
- */
- (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_SEQ_CST);
+ return syscall(__NR_futex, uaddr, op, val, timeout, uaddr2, val3);
}
-#define side_rcu_dereference(p) \
- __extension__ \
- ({ \
- (__typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \
- (_____side_v); \
- })
-
-#define side_rcu_assign_pointer(p, v) __atomic_store_n(&(p), v, __ATOMIC_RELEASE); \
-
+/*
+ * Wake-up side_rcu_wait_grace_period. Called concurrently from many
+ * threads.
+ */
static inline
-void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_readers)
+void side_rcu_wake_up_gp(struct side_rcu_gp_state *gp_state)
{
- uintptr_t sum[2] = { 0, 0 }; /* begin - end */
- int i;
-
- for (i = 0; i < gp_state->nr_cpus; i++) {
- struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
-
- sum[0] -= __atomic_load_n(&cpu_state->count[0].end, __ATOMIC_RELAXED);
- sum[1] -= __atomic_load_n(&cpu_state->count[1].end, __ATOMIC_RELAXED);
- }
-
- /*
- * Read end counts before begin counts. Reading end
- * before begin count ensures we never see an end
- * without having seen its associated begin, in case of
- * a thread migration during the traversal over each
- * cpu.
- */
- __atomic_thread_fence(__ATOMIC_SEQ_CST);
-
- for (i = 0; i < gp_state->nr_cpus; i++) {
- struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
-
- sum[0] += __atomic_load_n(&cpu_state->count[0].begin, __ATOMIC_RELAXED);
- sum[1] += __atomic_load_n(&cpu_state->count[1].begin, __ATOMIC_RELAXED);
+ if (side_unlikely(__atomic_load_n(&gp_state->futex, __ATOMIC_RELAXED) == -1)) {
+ __atomic_store_n(&gp_state->futex, 0, __ATOMIC_RELAXED);
+ /* TODO: handle futex return values. */
+ (void) futex(&gp_state->futex, FUTEX_WAKE, 1, NULL, NULL, 0);
}
- active_readers[0] = sum[0];
- active_readers[1] = sum[1];
}
static inline
-void wait_for_prev_period_readers(struct side_rcu_gp_state *gp_state)
+void side_rcu_read_begin(struct side_rcu_gp_state *gp_state, struct side_rcu_read_state *read_state)
{
- unsigned int prev_period = gp_state->period ^ 1;
- bool active_readers[2];
-
- /*
- * Wait for the sum of CPU begin/end counts to match for the
- * previous period.
- */
- for (;;) {
- check_active_readers(gp_state, active_readers);
- if (!active_readers[prev_period])
- break;
- /* Retry after 10ms. */
- poll(NULL, 0, 10);
+ struct side_rcu_percpu_count *begin_cpu_count;
+ struct side_rcu_cpu_gp_state *cpu_gp_state;
+ unsigned int period;
+ int cpu;
+
+ cpu = rseq_cpu_start();
+ period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
+ cpu_gp_state = &gp_state->percpu_state[cpu];
+ read_state->percpu_count = begin_cpu_count = &cpu_gp_state->count[period];
+ read_state->cpu = cpu;
+ if (side_likely(side_rcu_rseq_membarrier_available &&
+ !rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU_CPU_ID,
+ (intptr_t *)&begin_cpu_count->rseq_begin, 1, cpu))) {
+ /*
+ * This compiler barrier (A) is paired with membarrier() at (C),
+ * (D), (E). It effectively upgrades this compiler barrier to a
+ * SEQ_CST fence with respect to the paired barriers.
+ *
+ * This barrier (A) ensures that the contents of the read-side
+ * critical section does not leak before the "begin" counter
+ * increment. It pairs with memory barriers (D) and (E).
+ *
+ * This barrier (A) also ensures that the "begin" increment is
+ * before the "end" increment. It pairs with memory barrier (C).
+ * It is redundant with barrier (B) for that purpose.
+ */
+ rseq_barrier();
+ return;
}
+ /* Fallback to atomic increment and SEQ_CST. */
+ cpu = sched_getcpu();
+ if (side_unlikely(cpu < 0))
+ cpu = 0;
+ read_state->cpu = cpu;
+ cpu_gp_state = &gp_state->percpu_state[cpu];
+ read_state->percpu_count = begin_cpu_count = &cpu_gp_state->count[period];
+ (void) __atomic_add_fetch(&begin_cpu_count->begin, 1, __ATOMIC_SEQ_CST);
}
static inline
-void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
+void side_rcu_read_end(struct side_rcu_gp_state *gp_state, struct side_rcu_read_state *read_state)
{
- bool active_readers[2];
+ struct side_rcu_percpu_count *begin_cpu_count = read_state->percpu_count;
+ int cpu = read_state->cpu;
/*
- * This fence pairs with the __atomic_add_fetch __ATOMIC_SEQ_CST in
- * side_rcu_read_end().
+ * This compiler barrier (B) is paired with membarrier() at (C),
+ * (D), (E). It effectively upgrades this compiler barrier to a
+ * SEQ_CST fence with respect to the paired barriers.
+ *
+ * This barrier (B) ensures that the contents of the read-side
+ * critical section does not leak after the "end" counter
+ * increment. It pairs with memory barriers (D) and (E).
+ *
+ * This barrier (B) also ensures that the "begin" increment is
+ * before the "end" increment. It pairs with memory barrier (C).
+ * It is redundant with barrier (A) for that purpose.
*/
- __atomic_thread_fence(__ATOMIC_SEQ_CST);
-
+ rseq_barrier();
+ if (side_likely(side_rcu_rseq_membarrier_available &&
+ !rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU_CPU_ID,
+ (intptr_t *)&begin_cpu_count->rseq_end, 1, cpu))) {
+ /*
+ * This barrier (F) is paired with membarrier()
+ * at (G). It orders increment of the begin/end
+ * counters before load/store to the futex.
+ */
+ rseq_barrier();
+ goto end;
+ }
+ /* Fallback to atomic increment and SEQ_CST. */
+ (void) __atomic_add_fetch(&begin_cpu_count->end, 1, __ATOMIC_SEQ_CST);
/*
- * First scan through all cpus, for both period. If no readers
- * are accounted for, we have observed quiescence and can
- * complete the grace period immediately.
+ * This barrier (F) implied by SEQ_CST is paired with SEQ_CST
+ * barrier or membarrier() at (G). It orders increment of the
+ * begin/end counters before load/store to the futex.
*/
- check_active_readers(gp_state, active_readers);
- goto end;
- if (!active_readers[0] && !active_readers[1])
- goto end;
-
- pthread_mutex_lock(&gp_state->gp_lock);
-
- wait_for_prev_period_readers(gp_state);
+end:
+ side_rcu_wake_up_gp(gp_state);
+}
- /* Flip period: 0 -> 1, 1 -> 0. */
- (void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_RELAXED);
+#define side_rcu_dereference(p) \
+ __extension__ \
+ ({ \
+ __typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \
+ (_____side_v); \
+ })
- wait_for_prev_period_readers(gp_state);
+#define side_rcu_assign_pointer(p, v) __atomic_store_n(&(p), v, __ATOMIC_RELEASE); \
- pthread_mutex_unlock(&gp_state->gp_lock);
+void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state) __attribute__((visibility("hidden")));
+void side_rcu_gp_init(struct side_rcu_gp_state *rcu_gp) __attribute__((visibility("hidden")));
+void side_rcu_gp_exit(struct side_rcu_gp_state *rcu_gp) __attribute__((visibility("hidden")));
-end:
- /*
- * This fence pairs with the __atomic_add_fetch __ATOMIC_SEQ_CST in
- * side_rcu_read_begin().
- */
- __atomic_thread_fence(__ATOMIC_SEQ_CST);
-}
+#endif /* _SIDE_RCU_H */