Update barriers to SEQ_CST in read begin/end
[libside.git] / src / rcu.h
1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 */
5
6 #include <sched.h>
7 #include <stdint.h>
8 #include <pthread.h>
9 #include <poll.h>
10
11 #define SIDE_CACHE_LINE_SIZE 256
12 #define SIDE_RCU_PERCPU_ARRAY_SIZE 2
13
14 struct side_rcu_percpu_count {
15 uintptr_t begin;
16 uintptr_t end;
17 } __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
18
19 struct side_rcu_cpu_gp_state {
20 struct side_rcu_percpu_count count[SIDE_RCU_PERCPU_ARRAY_SIZE];
21 };
22
23 struct side_rcu_gp_state {
24 struct side_rcu_cpu_gp_state *percpu_state;
25 int nr_cpus;
26 unsigned int period;
27 pthread_mutex_t gp_lock;
28 };
29
30 //TODO: replace atomics by rseq (when available)
31 //TODO: replace acquire/release by membarrier+compiler barrier (when available)
32 //TODO: implement wait/wakeup for grace period using sys_futex
33 static inline
34 unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
35 {
36 int cpu = sched_getcpu();
37 unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
38
39 if (cpu < 0)
40 cpu = 0;
41 /*
42 * This acquire MO pairs with the release fence at the end of
43 * side_rcu_wait_grace_period().
44 */
45 (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_SEQ_CST);
46 return period;
47 }
48
49 static inline
50 void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
51 {
52 int cpu = sched_getcpu();
53
54 if (cpu < 0)
55 cpu = 0;
56 /*
57 * This release MO pairs with the acquire fence at the beginning
58 * of side_rcu_wait_grace_period().
59 */
60 (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_SEQ_CST);
61 }
62
63 #define side_rcu_dereference(p) \
64 __extension__ \
65 ({ \
66 (__typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \
67 (_____side_v); \
68 })
69
70 #define side_rcu_assign_pointer(p, v) __atomic_store_n(&(p), v, __ATOMIC_RELEASE); \
71
72 static inline
73 void wait_for_cpus(struct side_rcu_gp_state *gp_state)
74 {
75 unsigned int prev_period = gp_state->period ^ 1;
76
77 /*
78 * Wait for the sum of CPU begin/end counts to match for the
79 * previous period.
80 */
81 for (;;) {
82 uintptr_t sum = 0; /* begin - end */
83 int i;
84
85 for (i = 0; i < gp_state->nr_cpus; i++) {
86 struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
87
88 sum -= __atomic_load_n(&cpu_state->count[prev_period].end, __ATOMIC_RELAXED);
89 }
90
91 /*
92 * Read end counts before begin counts. Reading end
93 * before begin count ensures we never see an end
94 * without having seen its associated begin, in case of
95 * a thread migration during the traversal over each
96 * cpu.
97 */
98 __atomic_thread_fence(__ATOMIC_SEQ_CST);
99
100 for (i = 0; i < gp_state->nr_cpus; i++) {
101 struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
102
103 sum += __atomic_load_n(&cpu_state->count[prev_period].begin, __ATOMIC_RELAXED);
104 }
105 if (!sum) {
106 break;
107 } else {
108 /* Retry after 10ms. */
109 poll(NULL, 0, 10);
110 }
111 }
112 }
113
114 static inline
115 void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
116 {
117 /*
118 * This fence pairs with the __atomic_add_fetch __ATOMIC_SEQ_CST in
119 * side_rcu_read_end().
120 */
121 __atomic_thread_fence(__ATOMIC_SEQ_CST);
122
123 pthread_mutex_lock(&gp_state->gp_lock);
124
125 wait_for_cpus(gp_state);
126
127 /* Flip period: 0 -> 1, 1 -> 0. */
128 (void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_SEQ_CST);
129
130 wait_for_cpus(gp_state);
131
132 pthread_mutex_unlock(&gp_state->gp_lock);
133
134 /*
135 * This fence pairs with the __atomic_add_fetch __ATOMIC_SEQ_CST in
136 * side_rcu_read_begin().
137 */
138 __atomic_thread_fence(__ATOMIC_SEQ_CST);
139 }
This page took 0.032139 seconds and 5 git commands to generate.