#include <poll.h>
#include <stdlib.h>
#include <unistd.h>
+#include <stdio.h>
#include <sys/syscall.h>
#include <linux/membarrier.h>
#include "rcu.h"
#include "smp.h"
+/*
+ * If both rseq (with glibc support) and membarrier system calls are
+ * available, use them to replace barriers and atomics on the fast-path.
+ */
+unsigned int side_rcu_rseq_membarrier_available;
+
static int
membarrier(int cmd, unsigned int flags, int cpu_id)
{
* incremented before "end", as guaranteed by memory barriers
* (A) or (B).
*/
- if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0))
- abort();
+ if (side_rcu_rseq_membarrier_available) {
+ if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0)) {
+ perror("membarrier");
+ abort();
+ }
+ } else {
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
+ }
for (i = 0; i < gp_state->nr_cpus; i++) {
struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
* exist after the grace period completes are ordered after
* loads and stores performed before the grace period.
*/
- if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0))
- abort();
+ if (side_rcu_rseq_membarrier_available) {
+ if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0)) {
+ perror("membarrier");
+ abort();
+ }
+ } else {
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
+ }
/*
* First scan through all cpus, for both period. If no readers
* are ordered before loads and stores performed after the grace
* period.
*/
- if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0))
- abort();
+ if (side_rcu_rseq_membarrier_available) {
+ if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0)) {
+ perror("membarrier");
+ abort();
+ }
+ } else {
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
+ }
}
void side_rcu_gp_init(struct side_rcu_gp_state *rcu_gp)
{
+ bool has_membarrier = false, has_rseq = false;
+
memset(rcu_gp, 0, sizeof(*rcu_gp));
rcu_gp->nr_cpus = get_possible_cpus_array_len();
if (!rcu_gp->nr_cpus)
rcu_gp->percpu_state = calloc(rcu_gp->nr_cpus, sizeof(struct side_rcu_cpu_gp_state));
if (!rcu_gp->percpu_state)
abort();
- if (membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0, 0))
- abort();
+ if (!membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0, 0))
+ has_membarrier = true;
+ if (rseq_available(RSEQ_AVAILABLE_QUERY_LIBC))
+ has_rseq = true;
+ if (has_membarrier && has_rseq)
+ side_rcu_rseq_membarrier_available = 1;
}
void side_rcu_gp_exit(struct side_rcu_gp_state *rcu_gp)
pthread_mutex_t gp_lock;
};
+extern unsigned int side_rcu_rseq_membarrier_available __attribute__((visibility("hidden")));
+
//TODO: implement wait/wakeup for grace period using sys_futex
static inline
unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
struct side_rcu_cpu_gp_state *cpu_gp_state;
int cpu;
- if (side_likely(rseq_offset > 0)) {
+ if (side_likely(side_rcu_rseq_membarrier_available)) {
cpu = rseq_cpu_start();
cpu_gp_state = &gp_state->percpu_state[cpu];
- if (!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_begin, 1, cpu))
- goto fence;
+ if (side_likely(!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_begin, 1, cpu))) {
+ /*
+ * This compiler barrier (A) is paired with membarrier() at (C),
+ * (D), (E). It effectively upgrades this compiler barrier to a
+ * SEQ_CST fence with respect to the paired barriers.
+ *
+ * This barrier (A) ensures that the contents of the read-side
+ * critical section does not leak before the "begin" counter
+ * increment. It pairs with memory barriers (D) and (E).
+ *
+ * This barrier (A) also ensures that the "begin" increment is
+ * before the "end" increment. It pairs with memory barrier (C).
+ * It is redundant with barrier (B) for that purpose.
+ */
+ rseq_barrier();
+ return period;
+ }
}
+ /* Fallback to atomic increment and SEQ_CST. */
cpu = sched_getcpu();
if (side_unlikely(cpu < 0))
cpu = 0;
cpu_gp_state = &gp_state->percpu_state[cpu];
- (void) __atomic_add_fetch(&cpu_gp_state->count[period].begin, 1, __ATOMIC_RELAXED);
-fence:
- /*
- * This compiler barrier (A) is paired with membarrier() at (C),
- * (D), (E). It effectively upgrades this compiler barrier to a
- * SEQ_CST fence with respect to the paired barriers.
- *
- * This barrier (A) ensures that the contents of the read-side
- * critical section does not leak before the "begin" counter
- * increment. It pairs with memory barriers (D) and (E).
- *
- * This barrier (A) also ensures that the "begin" increment is
- * before the "end" increment. It pairs with memory barrier (C).
- * It is redundant with barrier (B) for that purpose.
- */
- rseq_barrier();
+ (void) __atomic_add_fetch(&cpu_gp_state->count[period].begin, 1, __ATOMIC_SEQ_CST);
return period;
}
struct side_rcu_cpu_gp_state *cpu_gp_state;
int cpu;
- /*
- * This compiler barrier (B) is paired with membarrier() at (C),
- * (D), (E). It effectively upgrades this compiler barrier to a
- * SEQ_CST fence with respect to the paired barriers.
- *
- * This barrier (B) ensures that the contents of the read-side
- * critical section does not leak after the "end" counter
- * increment. It pairs with memory barriers (D) and (E).
- *
- * This barrier (B) also ensures that the "begin" increment is
- * before the "end" increment. It pairs with memory barrier (C).
- * It is redundant with barrier (A) for that purpose.
- */
- rseq_barrier();
-
- if (side_likely(rseq_offset > 0)) {
+ if (side_likely(side_rcu_rseq_membarrier_available)) {
+ /*
+ * This compiler barrier (B) is paired with membarrier() at (C),
+ * (D), (E). It effectively upgrades this compiler barrier to a
+ * SEQ_CST fence with respect to the paired barriers.
+ *
+ * This barrier (B) ensures that the contents of the read-side
+ * critical section does not leak after the "end" counter
+ * increment. It pairs with memory barriers (D) and (E).
+ *
+ * This barrier (B) also ensures that the "begin" increment is
+ * before the "end" increment. It pairs with memory barrier (C).
+ * It is redundant with barrier (A) for that purpose.
+ */
+ rseq_barrier();
cpu = rseq_cpu_start();
cpu_gp_state = &gp_state->percpu_state[cpu];
- if (!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_end, 1, cpu))
+ if (side_likely(!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_end, 1, cpu)))
return;
}
+ /* Fallback to atomic increment and SEQ_CST. */
cpu = sched_getcpu();
if (side_unlikely(cpu < 0))
cpu = 0;
cpu_gp_state = &gp_state->percpu_state[cpu];
- (void) __atomic_add_fetch(&cpu_gp_state->count[period].end, 1, __ATOMIC_RELAXED);
+ (void) __atomic_add_fetch(&cpu_gp_state->count[period].end, 1, __ATOMIC_SEQ_CST);
+
+
}
#define side_rcu_dereference(p) \