1 // SPDX-License-Identifier: MIT
3 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
15 #include <sys/syscall.h>
16 #include <linux/membarrier.h>
22 * If both rseq (with glibc support) and membarrier system calls are
23 * available, use them to replace barriers and atomics on the fast-path.
25 unsigned int side_rcu_rseq_membarrier_available
;
28 membarrier(int cmd
, unsigned int flags
, int cpu_id
)
30 return syscall(__NR_membarrier
, cmd
, flags
, cpu_id
);
34 * Wait/wakeup scheme with single waiter/many wakers.
37 void wait_gp_prepare(struct side_rcu_gp_state
*gp_state
)
39 __atomic_store_n(&gp_state
->futex
, -1, __ATOMIC_RELAXED
);
41 * This memory barrier (H) pairs with memory barrier (F). It
42 * orders store to futex before load of RCU reader's counter
43 * state, thus ensuring that load of RCU reader's counters does
44 * not leak outside of futex state=-1.
46 if (side_rcu_rseq_membarrier_available
) {
47 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
52 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
57 void wait_gp_end(struct side_rcu_gp_state
*gp_state
)
60 * This memory barrier (G) pairs with memory barrier (F). It
61 * orders load of RCU reader's counter state before storing the
62 * futex value, thus ensuring that load of RCU reader's counters
63 * does not leak outside of futex state=-1.
65 if (side_rcu_rseq_membarrier_available
) {
66 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
71 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
73 __atomic_store_n(&gp_state
->futex
, 0, __ATOMIC_RELAXED
);
77 void wait_gp(struct side_rcu_gp_state
*gp_state
)
80 * This memory barrier (G) pairs with memory barrier (F). It
81 * orders load of RCU reader's counter state before loading the
84 if (side_rcu_rseq_membarrier_available
) {
85 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
90 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
92 while (__atomic_load_n(&gp_state
->futex
, __ATOMIC_RELAXED
) == -1) {
93 if (!futex(&gp_state
->futex
, FUTEX_WAIT
, -1, NULL
, NULL
, 0)) {
95 * May be awakened by either spurious wake up or
96 * because the state is now as expected.
102 /* Value already changed. */
105 /* Retry if interrupted by signal. */
106 break; /* Get out of switch. */
108 /* Unexpected error. */
115 /* active_readers is an input/output parameter. */
117 void check_active_readers(struct side_rcu_gp_state
*gp_state
, bool *active_readers
)
119 uintptr_t sum
[2] = { 0, 0 }; /* begin - end */
122 for (i
= 0; i
< gp_state
->nr_cpus
; i
++) {
123 struct side_rcu_cpu_gp_state
*cpu_state
= &gp_state
->percpu_state
[i
];
125 if (active_readers
[0]) {
126 sum
[0] -= __atomic_load_n(&cpu_state
->count
[0].end
, __ATOMIC_RELAXED
);
127 sum
[0] -= __atomic_load_n(&cpu_state
->count
[0].rseq_end
, __ATOMIC_RELAXED
);
129 if (active_readers
[1]) {
130 sum
[1] -= __atomic_load_n(&cpu_state
->count
[1].end
, __ATOMIC_RELAXED
);
131 sum
[1] -= __atomic_load_n(&cpu_state
->count
[1].rseq_end
, __ATOMIC_RELAXED
);
136 * This memory barrier (C) pairs with either of memory barriers
137 * (A) or (B) (one is sufficient).
139 * Read end counts before begin counts. Reading "end" before
140 * "begin" counts ensures we never see an "end" without having
141 * seen its associated "begin", because "begin" is always
142 * incremented before "end", as guaranteed by memory barriers
145 if (side_rcu_rseq_membarrier_available
) {
146 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
147 perror("membarrier");
151 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
154 for (i
= 0; i
< gp_state
->nr_cpus
; i
++) {
155 struct side_rcu_cpu_gp_state
*cpu_state
= &gp_state
->percpu_state
[i
];
157 if (active_readers
[0]) {
158 sum
[0] += __atomic_load_n(&cpu_state
->count
[0].begin
, __ATOMIC_RELAXED
);
159 sum
[0] += __atomic_load_n(&cpu_state
->count
[0].rseq_begin
, __ATOMIC_RELAXED
);
161 if (active_readers
[1]) {
162 sum
[1] += __atomic_load_n(&cpu_state
->count
[1].begin
, __ATOMIC_RELAXED
);
163 sum
[1] += __atomic_load_n(&cpu_state
->count
[1].rseq_begin
, __ATOMIC_RELAXED
);
166 if (active_readers
[0])
167 active_readers
[0] = sum
[0];
168 if (active_readers
[1])
169 active_readers
[1] = sum
[1];
173 * Wait for previous period to have no active readers.
175 * active_readers is an input/output parameter.
178 void wait_for_prev_period_readers(struct side_rcu_gp_state
*gp_state
, bool *active_readers
)
180 unsigned int prev_period
= gp_state
->period
^ 1;
183 * If a prior active readers scan already observed that no
184 * readers are present for the previous period, there is no need
187 if (!active_readers
[prev_period
])
190 * Wait for the sum of CPU begin/end counts to match for the
194 wait_gp_prepare(gp_state
);
195 check_active_readers(gp_state
, active_readers
);
196 if (!active_readers
[prev_period
]) {
197 wait_gp_end(gp_state
);
205 * The grace period completes when it observes that there are no active
206 * readers within each of the periods.
208 * The active_readers state is initially true for each period, until the
209 * grace period observes that no readers are present for each given
210 * period, at which point the active_readers state becomes false.
212 void side_rcu_wait_grace_period(struct side_rcu_gp_state
*gp_state
)
214 bool active_readers
[2] = { true, true };
217 * This memory barrier (D) pairs with memory barriers (A) and
218 * (B) on the read-side.
220 * It orders prior loads and stores before the "end"/"begin"
221 * reader state loads. In other words, it orders prior loads and
222 * stores before observation of active readers quiescence,
223 * effectively ensuring that read-side critical sections which
224 * exist after the grace period completes are ordered after
225 * loads and stores performed before the grace period.
227 if (side_rcu_rseq_membarrier_available
) {
228 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
229 perror("membarrier");
233 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
237 * First scan through all cpus, for both period. If no readers
238 * are accounted for, we have observed quiescence and can
239 * complete the grace period immediately.
241 check_active_readers(gp_state
, active_readers
);
242 if (!active_readers
[0] && !active_readers
[1])
245 pthread_mutex_lock(&gp_state
->gp_lock
);
247 wait_for_prev_period_readers(gp_state
, active_readers
);
249 * If the reader scan detected that there are no readers in the
250 * current period as well, we can complete the grace period
253 if (!active_readers
[gp_state
->period
])
256 /* Flip period: 0 -> 1, 1 -> 0. */
257 (void) __atomic_xor_fetch(&gp_state
->period
, 1, __ATOMIC_RELAXED
);
259 wait_for_prev_period_readers(gp_state
, active_readers
);
261 pthread_mutex_unlock(&gp_state
->gp_lock
);
264 * This memory barrier (E) pairs with memory barriers (A) and
265 * (B) on the read-side.
267 * It orders the "end"/"begin" reader state loads before
268 * following loads and stores. In other words, it orders
269 * observation of active readers quiescence before following
270 * loads and stores, effectively ensuring that read-side
271 * critical sections which existed prior to the grace period
272 * are ordered before loads and stores performed after the grace
275 if (side_rcu_rseq_membarrier_available
) {
276 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
277 perror("membarrier");
281 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
285 void side_rcu_gp_init(struct side_rcu_gp_state
*rcu_gp
)
287 bool has_membarrier
= false, has_rseq
= false;
289 memset(rcu_gp
, 0, sizeof(*rcu_gp
));
290 rcu_gp
->nr_cpus
= get_possible_cpus_array_len();
291 if (!rcu_gp
->nr_cpus
)
293 pthread_mutex_init(&rcu_gp
->gp_lock
, NULL
);
294 rcu_gp
->percpu_state
= (struct side_rcu_cpu_gp_state
*)
295 calloc(rcu_gp
->nr_cpus
, sizeof(struct side_rcu_cpu_gp_state
));
296 if (!rcu_gp
->percpu_state
)
298 if (!membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
, 0, 0))
299 has_membarrier
= true;
300 if (rseq_available(RSEQ_AVAILABLE_QUERY_LIBC
))
302 if (has_membarrier
&& has_rseq
)
303 side_rcu_rseq_membarrier_available
= 1;
306 void side_rcu_gp_exit(struct side_rcu_gp_state
*rcu_gp
)
308 rseq_prepare_unload();
309 pthread_mutex_destroy(&rcu_gp
->gp_lock
);
310 free(rcu_gp
->percpu_state
);