#include <sched.h>
#include <stdint.h>
#include <pthread.h>
+#include <stdbool.h>
#include <poll.h>
#define SIDE_CACHE_LINE_SIZE 256
-#define SIDE_RCU_PERCPU_ARRAY_SIZE 2
struct side_rcu_percpu_count {
uintptr_t begin;
} __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
struct side_rcu_cpu_gp_state {
- struct side_rcu_percpu_count count[SIDE_RCU_PERCPU_ARRAY_SIZE];
+ struct side_rcu_percpu_count count[2];
};
struct side_rcu_gp_state {
#define side_rcu_assign_pointer(p, v) __atomic_store_n(&(p), v, __ATOMIC_RELEASE); \
static inline
-void wait_for_cpus(struct side_rcu_gp_state *gp_state)
+void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_readers)
+{
+ uintptr_t sum[2] = { 0, 0 }; /* begin - end */
+ int i;
+
+ for (i = 0; i < gp_state->nr_cpus; i++) {
+ struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
+
+ sum[0] -= __atomic_load_n(&cpu_state->count[0].end, __ATOMIC_RELAXED);
+ sum[1] -= __atomic_load_n(&cpu_state->count[1].end, __ATOMIC_RELAXED);
+ }
+
+ /*
+ * Read end counts before begin counts. Reading end
+ * before begin count ensures we never see an end
+ * without having seen its associated begin, in case of
+ * a thread migration during the traversal over each
+ * cpu.
+ */
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
+
+ for (i = 0; i < gp_state->nr_cpus; i++) {
+ struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
+
+ sum[0] += __atomic_load_n(&cpu_state->count[0].begin, __ATOMIC_RELAXED);
+ sum[1] += __atomic_load_n(&cpu_state->count[1].begin, __ATOMIC_RELAXED);
+ }
+ active_readers[0] = sum[0];
+ active_readers[1] = sum[1];
+}
+
+static inline
+void wait_for_prev_period_readers(struct side_rcu_gp_state *gp_state)
{
unsigned int prev_period = gp_state->period ^ 1;
+ bool active_readers[2];
/*
* Wait for the sum of CPU begin/end counts to match for the
* previous period.
*/
for (;;) {
- uintptr_t sum = 0; /* begin - end */
- int i;
-
- for (i = 0; i < gp_state->nr_cpus; i++) {
- struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
-
- sum -= __atomic_load_n(&cpu_state->count[prev_period].end, __ATOMIC_RELAXED);
- }
-
- /*
- * Read end counts before begin counts. Reading end
- * before begin count ensures we never see an end
- * without having seen its associated begin, in case of
- * a thread migration during the traversal over each
- * cpu.
- */
- __atomic_thread_fence(__ATOMIC_SEQ_CST);
-
- for (i = 0; i < gp_state->nr_cpus; i++) {
- struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
-
- sum += __atomic_load_n(&cpu_state->count[prev_period].begin, __ATOMIC_RELAXED);
- }
- if (!sum) {
+ check_active_readers(gp_state, active_readers);
+ if (!active_readers[prev_period])
break;
- } else {
- /* Retry after 10ms. */
- poll(NULL, 0, 10);
- }
+ /* Retry after 10ms. */
+ poll(NULL, 0, 10);
}
}
static inline
void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
{
+ bool active_readers[2];
+
/*
* This fence pairs with the __atomic_add_fetch __ATOMIC_SEQ_CST in
* side_rcu_read_end().
*/
__atomic_thread_fence(__ATOMIC_SEQ_CST);
+ /*
+ * First scan through all cpus, for both period. If no readers
+ * are accounted for, we have observed quiescence and can
+ * complete the grace period immediately.
+ */
+ check_active_readers(gp_state, active_readers);
+ goto end;
+ if (!active_readers[0] && !active_readers[1])
+ goto end;
+
pthread_mutex_lock(&gp_state->gp_lock);
- wait_for_cpus(gp_state);
+ wait_for_prev_period_readers(gp_state);
/* Flip period: 0 -> 1, 1 -> 0. */
- (void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_SEQ_CST);
+ (void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_RELAXED);
- wait_for_cpus(gp_state);
+ wait_for_prev_period_readers(gp_state);
pthread_mutex_unlock(&gp_state->gp_lock);
+end:
/*
* This fence pairs with the __atomic_add_fetch __ATOMIC_SEQ_CST in
* side_rcu_read_begin().