src/rcu.h

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
   4  */
   5
   6 #include <sched.h>
   7 #include <stdint.h>
   8 #include <pthread.h>
   9 #include <poll.h>
  10
  11 #define SIDE_CACHE_LINE_SIZE            256
  12 #define SIDE_RCU_PERCPU_ARRAY_SIZE      2
  13
  14 struct side_rcu_percpu_count {
  15         uintptr_t begin;
  16         uintptr_t end;
  17 }  __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
  18
  19 struct side_rcu_cpu_gp_state {
  20         struct side_rcu_percpu_count count[SIDE_RCU_PERCPU_ARRAY_SIZE];
  21 };
  22
  23 struct side_rcu_gp_state {
  24         struct side_rcu_cpu_gp_state *percpu_state;
  25         int nr_cpus;
  26         unsigned int period;
  27         pthread_mutex_t gp_lock;
  28 };
  29
  30 //TODO: replace atomics by rseq (when available)
  31 //TODO: replace acquire/release by membarrier+compiler barrier (when available)
  32 //TODO: implement wait/wakeup for grace period using sys_futex
  33 static inline
  34 unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
  35 {
  36         int cpu = sched_getcpu();
  37         unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
  38
  39         if (cpu < 0)
  40                 cpu = 0;
  41         /*
  42          * This acquire MO pairs with the release fence at the end of
  43          * side_rcu_wait_grace_period().
  44          */
  45         (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_SEQ_CST);
  46         return period;
  47 }
  48
  49 static inline
  50 void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
  51 {
  52         int cpu = sched_getcpu();
  53
  54         if (cpu < 0)
  55                 cpu = 0;
  56         /*
  57          * This release MO pairs with the acquire fence at the beginning
  58          * of side_rcu_wait_grace_period().
  59          */
  60         (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_SEQ_CST);
  61 }
  62
  63 #define side_rcu_dereference(p) \
  64         __extension__ \
  65         ({ \
  66                 (__typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \
  67                 (_____side_v); \
  68         })
  69
  70 #define side_rcu_assign_pointer(p, v)   __atomic_store_n(&(p), v, __ATOMIC_RELEASE); \
  71
  72 static inline
  73 void wait_for_cpus(struct side_rcu_gp_state *gp_state)
  74 {
  75         unsigned int prev_period = gp_state->period ^ 1;
  76
  77         /*
  78          * Wait for the sum of CPU begin/end counts to match for the
  79          * previous period.
  80          */
  81         for (;;) {
  82                 uintptr_t sum = 0;      /* begin - end */
  83                 int i;
  84
  85                 for (i = 0; i < gp_state->nr_cpus; i++) {
  86                         struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
  87
  88                         sum -= __atomic_load_n(&cpu_state->count[prev_period].end, __ATOMIC_RELAXED);
  89                 }
  90
  91                 /*
  92                  * Read end counts before begin counts. Reading end
  93                  * before begin count ensures we never see an end
  94                  * without having seen its associated begin, in case of
  95                  * a thread migration during the traversal over each
  96                  * cpu.
  97                  */
  98                 __atomic_thread_fence(__ATOMIC_SEQ_CST);
  99
 100                 for (i = 0; i < gp_state->nr_cpus; i++) {
 101                         struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
 102
 103                         sum += __atomic_load_n(&cpu_state->count[prev_period].begin, __ATOMIC_RELAXED);
 104                 }
 105                 if (!sum) {
 106                         break;
 107                 } else {
 108                         /* Retry after 10ms. */
 109                         poll(NULL, 0, 10);
 110                 }
 111         }
 112 }
 113
 114 static inline
 115 void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
 116 {
 117         /*
 118          * This fence pairs with the __atomic_add_fetch __ATOMIC_SEQ_CST in
 119          * side_rcu_read_end().
 120          */
 121         __atomic_thread_fence(__ATOMIC_SEQ_CST);
 122
 123         pthread_mutex_lock(&gp_state->gp_lock);
 124
 125         wait_for_cpus(gp_state);
 126
 127         /* Flip period: 0 -> 1, 1 -> 0. */
 128         (void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_SEQ_CST);
 129
 130         wait_for_cpus(gp_state);
 131
 132         pthread_mutex_unlock(&gp_state->gp_lock);
 133
 134         /*
 135          * This fence pairs with the __atomic_add_fetch __ATOMIC_SEQ_CST in
 136          * side_rcu_read_begin().
 137          */
 138         __atomic_thread_fence(__ATOMIC_SEQ_CST);
 139 }