percpu pool: Move robust flag to atttribute
[librseq.git] / tests / param_test.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6 #include <assert.h>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
9 #include <pthread.h>
10 #include <sched.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <syscall.h>
16 #include <unistd.h>
17 #include <poll.h>
18 #include <sys/types.h>
19 #include <signal.h>
20 #include <errno.h>
21 #include <stddef.h>
22 #include <stdbool.h>
23 #include <rseq/percpu-alloc.h>
24
25 #define PERCPU_POOL_LEN (1024*1024) /* 1MB */
26
27 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
28 enum {
29 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
30 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
31 };
32
33 enum {
34 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
35 };
36 #endif
37
38 #define NR_INJECT 9
39 static int loop_cnt[NR_INJECT + 1];
40
41 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
42 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
43 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
44 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
45 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
46 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
47
48 static int opt_modulo, verbose;
49
50 static int opt_yield, opt_signal, opt_sleep,
51 opt_disable_rseq, opt_threads = 200,
52 opt_disable_mod = 0, opt_test = 's';
53
54 static long long opt_reps = 5000;
55
56 static __thread __attribute__((tls_model("initial-exec")))
57 unsigned int signals_delivered;
58
59 static inline pid_t rseq_gettid(void)
60 {
61 return syscall(__NR_gettid);
62 }
63
64 #ifndef BENCHMARK
65
66 static __thread __attribute__((tls_model("initial-exec"), unused))
67 int yield_mod_cnt, nr_abort;
68
69 #define printf_verbose(fmt, ...) \
70 do { \
71 if (verbose) \
72 printf(fmt, ## __VA_ARGS__); \
73 } while (0)
74
75 #ifdef __i386__
76
77 #define INJECT_ASM_REG "eax"
78
79 #define RSEQ_INJECT_CLOBBER \
80 , INJECT_ASM_REG
81
82 /*
83 * Use ip-relative addressing to get the loop counter.
84 */
85 #define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
86 "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
87 "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
88 "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "jz 333f\n\t" \
91 "222:\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
93 "jnz 222b\n\t" \
94 "333:\n\t"
95
96 #define RSEQ_INJECT_ASM(n) \
97 __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
98
99 #elif defined(__x86_64__)
100
101 #define INJECT_ASM_REG_P "rax"
102 #define INJECT_ASM_REG "eax"
103
104 #define RSEQ_INJECT_CLOBBER \
105 , INJECT_ASM_REG_P \
106 , INJECT_ASM_REG
107
108 #define RSEQ_INJECT_ASM(n) \
109 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
110 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
111 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
112 "jz 333f\n\t" \
113 "222:\n\t" \
114 "dec %%" INJECT_ASM_REG "\n\t" \
115 "jnz 222b\n\t" \
116 "333:\n\t"
117
118 #elif defined(__s390__)
119
120 #define RSEQ_INJECT_INPUT \
121 , [loop_cnt_1]"m"(loop_cnt[1]) \
122 , [loop_cnt_2]"m"(loop_cnt[2]) \
123 , [loop_cnt_3]"m"(loop_cnt[3]) \
124 , [loop_cnt_4]"m"(loop_cnt[4]) \
125 , [loop_cnt_5]"m"(loop_cnt[5]) \
126 , [loop_cnt_6]"m"(loop_cnt[6])
127
128 #define INJECT_ASM_REG "r12"
129
130 #define RSEQ_INJECT_CLOBBER \
131 , INJECT_ASM_REG
132
133 #define RSEQ_INJECT_ASM(n) \
134 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
135 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
136 "je 333f\n\t" \
137 "222:\n\t" \
138 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
139 "jnz 222b\n\t" \
140 "333:\n\t"
141
142 #elif defined(__ARMEL__)
143
144 #define RSEQ_INJECT_INPUT \
145 , [loop_cnt_1]"m"(loop_cnt[1]) \
146 , [loop_cnt_2]"m"(loop_cnt[2]) \
147 , [loop_cnt_3]"m"(loop_cnt[3]) \
148 , [loop_cnt_4]"m"(loop_cnt[4]) \
149 , [loop_cnt_5]"m"(loop_cnt[5]) \
150 , [loop_cnt_6]"m"(loop_cnt[6])
151
152 #define INJECT_ASM_REG "r4"
153
154 #define RSEQ_INJECT_CLOBBER \
155 , INJECT_ASM_REG
156
157 #define RSEQ_INJECT_ASM(n) \
158 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
159 "cmp " INJECT_ASM_REG ", #0\n\t" \
160 "beq 333f\n\t" \
161 "222:\n\t" \
162 "subs " INJECT_ASM_REG ", #1\n\t" \
163 "bne 222b\n\t" \
164 "333:\n\t"
165
166 #elif defined(__AARCH64EL__)
167
168 #define RSEQ_INJECT_INPUT \
169 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
170 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
171 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
172 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
173 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
174 , [loop_cnt_6] "Qo" (loop_cnt[6])
175
176 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
177
178 #define RSEQ_INJECT_ASM(n) \
179 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
180 " cbz " INJECT_ASM_REG ", 333f\n" \
181 "222:\n" \
182 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
183 " cbnz " INJECT_ASM_REG ", 222b\n" \
184 "333:\n"
185
186 #elif defined(__PPC__)
187
188 #define RSEQ_INJECT_INPUT \
189 , [loop_cnt_1]"m"(loop_cnt[1]) \
190 , [loop_cnt_2]"m"(loop_cnt[2]) \
191 , [loop_cnt_3]"m"(loop_cnt[3]) \
192 , [loop_cnt_4]"m"(loop_cnt[4]) \
193 , [loop_cnt_5]"m"(loop_cnt[5]) \
194 , [loop_cnt_6]"m"(loop_cnt[6])
195
196 #define INJECT_ASM_REG "r18"
197
198 #define RSEQ_INJECT_CLOBBER \
199 , INJECT_ASM_REG
200
201 #define RSEQ_INJECT_ASM(n) \
202 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
203 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
204 "beq 333f\n\t" \
205 "222:\n\t" \
206 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
207 "bne 222b\n\t" \
208 "333:\n\t"
209
210 #elif defined(__mips__)
211
212 #define RSEQ_INJECT_INPUT \
213 , [loop_cnt_1]"m"(loop_cnt[1]) \
214 , [loop_cnt_2]"m"(loop_cnt[2]) \
215 , [loop_cnt_3]"m"(loop_cnt[3]) \
216 , [loop_cnt_4]"m"(loop_cnt[4]) \
217 , [loop_cnt_5]"m"(loop_cnt[5]) \
218 , [loop_cnt_6]"m"(loop_cnt[6])
219
220 #define INJECT_ASM_REG "$5"
221
222 #define RSEQ_INJECT_CLOBBER \
223 , INJECT_ASM_REG
224
225 #define RSEQ_INJECT_ASM(n) \
226 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
227 "beqz " INJECT_ASM_REG ", 333f\n\t" \
228 "222:\n\t" \
229 "addiu " INJECT_ASM_REG ", -1\n\t" \
230 "bnez " INJECT_ASM_REG ", 222b\n\t" \
231 "333:\n\t"
232
233 #elif defined(__riscv)
234
235 #define RSEQ_INJECT_INPUT \
236 , [loop_cnt_1]"m"(loop_cnt[1]) \
237 , [loop_cnt_2]"m"(loop_cnt[2]) \
238 , [loop_cnt_3]"m"(loop_cnt[3]) \
239 , [loop_cnt_4]"m"(loop_cnt[4]) \
240 , [loop_cnt_5]"m"(loop_cnt[5]) \
241 , [loop_cnt_6]"m"(loop_cnt[6])
242
243 #define INJECT_ASM_REG "t1"
244
245 #define RSEQ_INJECT_CLOBBER \
246 , INJECT_ASM_REG
247
248 #define RSEQ_INJECT_ASM(n) \
249 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
250 "beqz " INJECT_ASM_REG ", 333f\n\t" \
251 "222:\n\t" \
252 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
253 "bnez " INJECT_ASM_REG ", 222b\n\t" \
254 "333:\n\t"
255
256 #else
257 #error unsupported target
258 #endif
259
260 #define RSEQ_INJECT_FAILED \
261 nr_abort++;
262
263 #define RSEQ_INJECT_C(n) \
264 { \
265 int loc_i, loc_nr_loops = loop_cnt[n]; \
266 \
267 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
268 rseq_barrier(); \
269 } \
270 if (loc_nr_loops == -1 && opt_modulo) { \
271 if (yield_mod_cnt == opt_modulo - 1) { \
272 if (opt_sleep > 0) \
273 poll(NULL, 0, opt_sleep); \
274 if (opt_yield) \
275 sched_yield(); \
276 if (opt_signal) \
277 raise(SIGUSR1); \
278 yield_mod_cnt = 0; \
279 } else { \
280 yield_mod_cnt++; \
281 } \
282 } \
283 }
284
285 #else
286
287 #define printf_verbose(fmt, ...)
288
289 #endif /* BENCHMARK */
290
291 #include <rseq/rseq.h>
292
293 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
294
295 static int sys_membarrier(int cmd, int flags, int cpu_id)
296 {
297 return syscall(__NR_membarrier, cmd, flags, cpu_id);
298 }
299
300 #ifdef rseq_arch_has_load_cbne_load_add_load_add_store
301 #define TEST_MEMBARRIER
302 #endif
303
304 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
305 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
306 static
307 int get_current_cpu_id(void)
308 {
309 return rseq_current_mm_cid();
310 }
311 static
312 bool rseq_validate_cpu_id(void)
313 {
314 return rseq_mm_cid_available();
315 }
316 static
317 bool rseq_use_cpu_index(void)
318 {
319 return false; /* Use mm_cid */
320 }
321 # ifdef TEST_MEMBARRIER
322 /*
323 * Membarrier does not currently support targeting a mm_cid, so
324 * issue the barrier on all cpus.
325 */
326 static
327 int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
328 {
329 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
330 0, 0);
331 }
332 # endif /* TEST_MEMBARRIER */
333 #else
334 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
335 static
336 int get_current_cpu_id(void)
337 {
338 return rseq_cpu_start();
339 }
340 static
341 bool rseq_validate_cpu_id(void)
342 {
343 return rseq_current_cpu_raw() >= 0;
344 }
345 static
346 bool rseq_use_cpu_index(void)
347 {
348 return true; /* Use cpu_id as index. */
349 }
350 # ifdef TEST_MEMBARRIER
351 static
352 int rseq_membarrier_expedited(int cpu)
353 {
354 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
355 MEMBARRIER_CMD_FLAG_CPU, cpu);
356 }
357 # endif /* TEST_MEMBARRIER */
358 #endif
359
360 struct percpu_lock {
361 intptr_t v;
362 };
363
364 struct spinlock_test_data {
365 struct percpu_lock lock;
366 intptr_t count;
367 };
368
369 struct spinlock_thread_test_data {
370 struct spinlock_test_data __rseq_percpu *data;
371 long long reps;
372 int reg;
373 };
374
375 struct inc_test_data {
376 intptr_t count;
377 };
378
379 struct inc_thread_test_data {
380 struct inc_test_data __rseq_percpu *data;
381 long long reps;
382 int reg;
383 };
384
385 struct percpu_list_node {
386 intptr_t data;
387 struct percpu_list_node *next;
388 };
389
390 struct percpu_list {
391 struct percpu_list_node *head;
392 };
393
394 #define BUFFER_ITEM_PER_CPU 100
395
396 struct percpu_buffer_node {
397 intptr_t data;
398 };
399
400 struct percpu_buffer {
401 intptr_t offset;
402 intptr_t buflen;
403 struct percpu_buffer_node **array;
404 };
405
406 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
407
408 struct percpu_memcpy_buffer_node {
409 intptr_t data1;
410 uint64_t data2;
411 };
412
413 struct percpu_memcpy_buffer {
414 intptr_t offset;
415 intptr_t buflen;
416 struct percpu_memcpy_buffer_node *array;
417 };
418
419 /* A simple percpu spinlock. Grabs lock on current cpu. */
420 static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
421 {
422 int cpu;
423
424 for (;;) {
425 int ret;
426
427 cpu = get_current_cpu_id();
428 if (cpu < 0) {
429 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
430 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
431 abort();
432 }
433 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
434 &rseq_percpu_ptr(lock, cpu)->v,
435 0, 1, cpu);
436 if (rseq_likely(!ret))
437 break;
438 /* Retry if comparison fails or rseq aborts. */
439 }
440 /*
441 * Acquire semantic when taking lock after control dependency.
442 * Matches rseq_smp_store_release().
443 */
444 rseq_smp_acquire__after_ctrl_dep();
445 return cpu;
446 }
447
448 static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
449 {
450 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
451 /*
452 * Release lock, with release semantic. Matches
453 * rseq_smp_acquire__after_ctrl_dep().
454 */
455 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
456 }
457
458 static void *test_percpu_spinlock_thread(void *arg)
459 {
460 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
461 struct spinlock_test_data __rseq_percpu *data = thread_data->data;
462 long long i, reps;
463
464 if (!opt_disable_rseq && thread_data->reg &&
465 rseq_register_current_thread())
466 abort();
467 reps = thread_data->reps;
468 for (i = 0; i < reps; i++) {
469 int cpu = rseq_this_cpu_lock(&data->lock);
470 rseq_percpu_ptr(data, cpu)->count++;
471 rseq_percpu_unlock(&data->lock, cpu);
472 #ifndef BENCHMARK
473 if (i != 0 && !(i % (reps / 10)))
474 printf_verbose("tid %d: count %lld\n",
475 (int) rseq_gettid(), i);
476 #endif
477 }
478 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
479 (int) rseq_gettid(), nr_abort, signals_delivered);
480 if (!opt_disable_rseq && thread_data->reg &&
481 rseq_unregister_current_thread())
482 abort();
483 return NULL;
484 }
485
486 /*
487 * A simple test which implements a sharded counter using a per-cpu
488 * lock. Obviously real applications might prefer to simply use a
489 * per-cpu increment; however, this is reasonable for a test and the
490 * lock can be extended to synchronize more complicated operations.
491 */
492 static void test_percpu_spinlock(void)
493 {
494 const int num_threads = opt_threads;
495 int i, ret;
496 uint64_t sum;
497 pthread_t test_threads[num_threads];
498 struct spinlock_test_data __rseq_percpu *data;
499 struct spinlock_thread_test_data thread_data[num_threads];
500 struct rseq_percpu_pool *mempool;
501
502 mempool = rseq_percpu_pool_create("spinlock_test_data",
503 sizeof(struct spinlock_test_data),
504 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
505 if (!mempool) {
506 perror("rseq_percpu_pool_create");
507 abort();
508 }
509 data = (struct spinlock_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
510 if (!data) {
511 perror("rseq_percpu_zmalloc");
512 abort();
513 }
514
515 for (i = 0; i < num_threads; i++) {
516 thread_data[i].reps = opt_reps;
517 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
518 thread_data[i].reg = 1;
519 else
520 thread_data[i].reg = 0;
521 thread_data[i].data = data;
522 ret = pthread_create(&test_threads[i], NULL,
523 test_percpu_spinlock_thread,
524 &thread_data[i]);
525 if (ret) {
526 errno = ret;
527 perror("pthread_create");
528 abort();
529 }
530 }
531
532 for (i = 0; i < num_threads; i++) {
533 ret = pthread_join(test_threads[i], NULL);
534 if (ret) {
535 errno = ret;
536 perror("pthread_join");
537 abort();
538 }
539 }
540
541 sum = 0;
542 for (i = 0; i < CPU_SETSIZE; i++)
543 sum += rseq_percpu_ptr(data, i)->count;
544
545 assert(sum == (uint64_t)opt_reps * num_threads);
546 rseq_percpu_free(data);
547 ret = rseq_percpu_pool_destroy(mempool);
548 if (ret) {
549 perror("rseq_percpu_pool_destroy");
550 abort();
551 }
552 }
553
554 static void *test_percpu_inc_thread(void *arg)
555 {
556 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
557 struct inc_test_data __rseq_percpu *data = thread_data->data;
558 long long i, reps;
559
560 if (!opt_disable_rseq && thread_data->reg &&
561 rseq_register_current_thread())
562 abort();
563 reps = thread_data->reps;
564 for (i = 0; i < reps; i++) {
565 int ret;
566
567 do {
568 int cpu;
569
570 cpu = get_current_cpu_id();
571 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
572 &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
573 } while (rseq_unlikely(ret));
574 #ifndef BENCHMARK
575 if (i != 0 && !(i % (reps / 10)))
576 printf_verbose("tid %d: count %lld\n",
577 (int) rseq_gettid(), i);
578 #endif
579 }
580 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
581 (int) rseq_gettid(), nr_abort, signals_delivered);
582 if (!opt_disable_rseq && thread_data->reg &&
583 rseq_unregister_current_thread())
584 abort();
585 return NULL;
586 }
587
588 static void test_percpu_inc(void)
589 {
590 const int num_threads = opt_threads;
591 int i, ret;
592 uint64_t sum;
593 pthread_t test_threads[num_threads];
594 struct inc_test_data __rseq_percpu *data;
595 struct inc_thread_test_data thread_data[num_threads];
596 struct rseq_percpu_pool *mempool;
597
598 mempool = rseq_percpu_pool_create("inc_test_data",
599 sizeof(struct inc_test_data),
600 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
601 if (!mempool) {
602 perror("rseq_percpu_pool_create");
603 abort();
604 }
605 data = (struct inc_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
606 if (!data) {
607 perror("rseq_percpu_zmalloc");
608 abort();
609 }
610
611 for (i = 0; i < num_threads; i++) {
612 thread_data[i].reps = opt_reps;
613 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
614 thread_data[i].reg = 1;
615 else
616 thread_data[i].reg = 0;
617 thread_data[i].data = data;
618 ret = pthread_create(&test_threads[i], NULL,
619 test_percpu_inc_thread,
620 &thread_data[i]);
621 if (ret) {
622 errno = ret;
623 perror("pthread_create");
624 abort();
625 }
626 }
627
628 for (i = 0; i < num_threads; i++) {
629 ret = pthread_join(test_threads[i], NULL);
630 if (ret) {
631 errno = ret;
632 perror("pthread_join");
633 abort();
634 }
635 }
636
637 sum = 0;
638 for (i = 0; i < CPU_SETSIZE; i++)
639 sum += rseq_percpu_ptr(data, i)->count;
640
641 assert(sum == (uint64_t)opt_reps * num_threads);
642 rseq_percpu_free(data);
643 ret = rseq_percpu_pool_destroy(mempool);
644 if (ret) {
645 perror("rseq_percpu_pool_destroy");
646 abort();
647 }
648 }
649
650 static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
651 struct percpu_list_node *node,
652 int *_cpu)
653 {
654 int cpu;
655
656 for (;;) {
657 intptr_t *targetptr, newval, expect;
658 struct percpu_list *cpulist;
659 int ret;
660
661 cpu = get_current_cpu_id();
662 cpulist = rseq_percpu_ptr(list, cpu);
663 /* Load list->c[cpu].head with single-copy atomicity. */
664 expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
665 newval = (intptr_t)node;
666 targetptr = (intptr_t *)&cpulist->head;
667 node->next = (struct percpu_list_node *)expect;
668 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
669 targetptr, expect, newval, cpu);
670 if (rseq_likely(!ret))
671 break;
672 /* Retry if comparison fails or rseq aborts. */
673 }
674 if (_cpu)
675 *_cpu = cpu;
676 }
677
678 /*
679 * Unlike a traditional lock-less linked list; the availability of a
680 * rseq primitive allows us to implement pop without concerns over
681 * ABA-type races.
682 */
683 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
684 int *_cpu)
685 {
686 struct percpu_list_node *node = NULL;
687 int cpu;
688
689 for (;;) {
690 struct percpu_list_node *head;
691 intptr_t *targetptr, expectnot, *load;
692 struct percpu_list *cpulist;
693 long offset;
694 int ret;
695
696 cpu = get_current_cpu_id();
697 cpulist = rseq_percpu_ptr(list, cpu);
698 targetptr = (intptr_t *)&cpulist->head;
699 expectnot = (intptr_t)NULL;
700 offset = offsetof(struct percpu_list_node, next);
701 load = (intptr_t *)&head;
702 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
703 targetptr, expectnot,
704 offset, load, cpu);
705 if (rseq_likely(!ret)) {
706 node = head;
707 break;
708 }
709 if (ret > 0)
710 break;
711 /* Retry if rseq aborts. */
712 }
713 if (_cpu)
714 *_cpu = cpu;
715 return node;
716 }
717
718 /*
719 * __percpu_list_pop is not safe against concurrent accesses. Should
720 * only be used on lists that are not concurrently modified.
721 */
722 static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
723 {
724 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
725 struct percpu_list_node *node;
726
727 node = cpulist->head;
728 if (!node)
729 return NULL;
730 cpulist->head = node->next;
731 return node;
732 }
733
734 static void *test_percpu_list_thread(void *arg)
735 {
736 long long i, reps;
737 struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
738
739 if (!opt_disable_rseq && rseq_register_current_thread())
740 abort();
741
742 reps = opt_reps;
743 for (i = 0; i < reps; i++) {
744 struct percpu_list_node *node;
745
746 node = this_cpu_list_pop(list, NULL);
747 if (opt_yield)
748 sched_yield(); /* encourage shuffling */
749 if (node)
750 this_cpu_list_push(list, node, NULL);
751 }
752
753 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
754 (int) rseq_gettid(), nr_abort, signals_delivered);
755 if (!opt_disable_rseq && rseq_unregister_current_thread())
756 abort();
757
758 return NULL;
759 }
760
761 /* Simultaneous modification to a per-cpu linked list from many threads. */
762 static void test_percpu_list(void)
763 {
764 const int num_threads = opt_threads;
765 int i, j, ret;
766 uint64_t sum = 0, expected_sum = 0;
767 struct percpu_list __rseq_percpu *list;
768 pthread_t test_threads[num_threads];
769 cpu_set_t allowed_cpus;
770 struct rseq_percpu_pool *mempool;
771
772 mempool = rseq_percpu_pool_create("percpu_list", sizeof(struct percpu_list),
773 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
774 if (!mempool) {
775 perror("rseq_percpu_pool_create");
776 abort();
777 }
778 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
779 if (!list) {
780 perror("rseq_percpu_zmalloc");
781 abort();
782 }
783
784 /* Generate list entries for every usable cpu. */
785 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
786 for (i = 0; i < CPU_SETSIZE; i++) {
787 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
788 continue;
789 for (j = 1; j <= 100; j++) {
790 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
791 struct percpu_list_node *node;
792
793 expected_sum += j;
794
795 node = (struct percpu_list_node *) malloc(sizeof(*node));
796 assert(node);
797 node->data = j;
798 node->next = cpulist->head;
799 cpulist->head = node;
800 }
801 }
802
803 for (i = 0; i < num_threads; i++) {
804 ret = pthread_create(&test_threads[i], NULL,
805 test_percpu_list_thread, list);
806 if (ret) {
807 errno = ret;
808 perror("pthread_create");
809 abort();
810 }
811 }
812
813 for (i = 0; i < num_threads; i++) {
814 ret = pthread_join(test_threads[i], NULL);
815 if (ret) {
816 errno = ret;
817 perror("pthread_join");
818 abort();
819 }
820 }
821
822 for (i = 0; i < CPU_SETSIZE; i++) {
823 struct percpu_list_node *node;
824
825 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
826 continue;
827
828 while ((node = __percpu_list_pop(list, i))) {
829 sum += node->data;
830 free(node);
831 }
832 }
833
834 /*
835 * All entries should now be accounted for (unless some external
836 * actor is interfering with our allowed affinity while this
837 * test is running).
838 */
839 assert(sum == expected_sum);
840 rseq_percpu_free(list);
841 ret = rseq_percpu_pool_destroy(mempool);
842 if (ret) {
843 perror("rseq_percpu_pool_destroy");
844 abort();
845 }
846 }
847
848 static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer,
849 struct percpu_buffer_node *node,
850 int *_cpu)
851 {
852 bool result = false;
853 int cpu;
854
855 for (;;) {
856 struct percpu_buffer *cpubuffer;
857 intptr_t *targetptr_spec, newval_spec;
858 intptr_t *targetptr_final, newval_final;
859 intptr_t offset;
860 int ret;
861
862 cpu = get_current_cpu_id();
863 cpubuffer = rseq_percpu_ptr(buffer, cpu);
864 offset = RSEQ_READ_ONCE(cpubuffer->offset);
865 if (offset == cpubuffer->buflen)
866 break;
867 newval_spec = (intptr_t)node;
868 targetptr_spec = (intptr_t *)&cpubuffer->array[offset];
869 newval_final = offset + 1;
870 targetptr_final = &cpubuffer->offset;
871 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
872 targetptr_final, offset, targetptr_spec,
873 newval_spec, newval_final, cpu);
874 if (rseq_likely(!ret)) {
875 result = true;
876 break;
877 }
878 /* Retry if comparison fails or rseq aborts. */
879 }
880 if (_cpu)
881 *_cpu = cpu;
882 return result;
883 }
884
885 static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
886 int *_cpu)
887 {
888 struct percpu_buffer_node *head;
889 int cpu;
890
891 for (;;) {
892 struct percpu_buffer *cpubuffer;
893 intptr_t *targetptr, newval;
894 intptr_t offset;
895 int ret;
896
897 cpu = get_current_cpu_id();
898 cpubuffer = rseq_percpu_ptr(buffer, cpu);
899 /* Load offset with single-copy atomicity. */
900 offset = RSEQ_READ_ONCE(cpubuffer->offset);
901 if (offset == 0) {
902 head = NULL;
903 break;
904 }
905 head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]);
906 newval = offset - 1;
907 targetptr = (intptr_t *)&cpubuffer->offset;
908 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
909 targetptr, offset,
910 (intptr_t *)&cpubuffer->array[offset - 1],
911 (intptr_t)head, newval, cpu);
912 if (rseq_likely(!ret))
913 break;
914 /* Retry if comparison fails or rseq aborts. */
915 }
916 if (_cpu)
917 *_cpu = cpu;
918 return head;
919 }
920
921 /*
922 * __percpu_buffer_pop is not safe against concurrent accesses. Should
923 * only be used on buffers that are not concurrently modified.
924 */
925 static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
926 int cpu)
927 {
928 struct percpu_buffer *cpubuffer;
929 struct percpu_buffer_node *head;
930 intptr_t offset;
931
932 cpubuffer = rseq_percpu_ptr(buffer, cpu);
933 offset = cpubuffer->offset;
934 if (offset == 0)
935 return NULL;
936 head = cpubuffer->array[offset - 1];
937 cpubuffer->offset = offset - 1;
938 return head;
939 }
940
941 static void *test_percpu_buffer_thread(void *arg)
942 {
943 long long i, reps;
944 struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg;
945
946 if (!opt_disable_rseq && rseq_register_current_thread())
947 abort();
948
949 reps = opt_reps;
950 for (i = 0; i < reps; i++) {
951 struct percpu_buffer_node *node;
952
953 node = this_cpu_buffer_pop(buffer, NULL);
954 if (opt_yield)
955 sched_yield(); /* encourage shuffling */
956 if (node) {
957 if (!this_cpu_buffer_push(buffer, node, NULL)) {
958 /* Should increase buffer size. */
959 abort();
960 }
961 }
962 }
963
964 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
965 (int) rseq_gettid(), nr_abort, signals_delivered);
966 if (!opt_disable_rseq && rseq_unregister_current_thread())
967 abort();
968
969 return NULL;
970 }
971
972 /* Simultaneous modification to a per-cpu buffer from many threads. */
973 static void test_percpu_buffer(void)
974 {
975 const int num_threads = opt_threads;
976 int i, j, ret;
977 uint64_t sum = 0, expected_sum = 0;
978 struct percpu_buffer __rseq_percpu *buffer;
979 pthread_t test_threads[num_threads];
980 cpu_set_t allowed_cpus;
981 struct rseq_percpu_pool *mempool;
982
983 mempool = rseq_percpu_pool_create("percpu_buffer", sizeof(struct percpu_buffer),
984 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
985 if (!mempool) {
986 perror("rseq_percpu_pool_create");
987 abort();
988 }
989 buffer = (struct percpu_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
990 if (!buffer) {
991 perror("rseq_percpu_zmalloc");
992 abort();
993 }
994
995 /* Generate list entries for every usable cpu. */
996 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
997 for (i = 0; i < CPU_SETSIZE; i++) {
998 struct percpu_buffer *cpubuffer;
999
1000 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1001 continue;
1002 cpubuffer = rseq_percpu_ptr(buffer, i);
1003 /* Worse-case is every item in same CPU. */
1004 cpubuffer->array =
1005 (struct percpu_buffer_node **)
1006 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
1007 BUFFER_ITEM_PER_CPU);
1008 assert(cpubuffer->array);
1009 cpubuffer->buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
1010 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
1011 struct percpu_buffer_node *node;
1012
1013 expected_sum += j;
1014
1015 /*
1016 * We could theoretically put the word-sized
1017 * "data" directly in the buffer. However, we
1018 * want to model objects that would not fit
1019 * within a single word, so allocate an object
1020 * for each node.
1021 */
1022 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
1023 assert(node);
1024 node->data = j;
1025 cpubuffer->array[j - 1] = node;
1026 cpubuffer->offset++;
1027 }
1028 }
1029
1030 for (i = 0; i < num_threads; i++) {
1031 ret = pthread_create(&test_threads[i], NULL,
1032 test_percpu_buffer_thread, buffer);
1033 if (ret) {
1034 errno = ret;
1035 perror("pthread_create");
1036 abort();
1037 }
1038 }
1039
1040 for (i = 0; i < num_threads; i++) {
1041 ret = pthread_join(test_threads[i], NULL);
1042 if (ret) {
1043 errno = ret;
1044 perror("pthread_join");
1045 abort();
1046 }
1047 }
1048
1049 for (i = 0; i < CPU_SETSIZE; i++) {
1050 struct percpu_buffer *cpubuffer;
1051 struct percpu_buffer_node *node;
1052
1053 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1054 continue;
1055
1056 cpubuffer = rseq_percpu_ptr(buffer, i);
1057 while ((node = __percpu_buffer_pop(buffer, i))) {
1058 sum += node->data;
1059 free(node);
1060 }
1061 free(cpubuffer->array);
1062 }
1063
1064 /*
1065 * All entries should now be accounted for (unless some external
1066 * actor is interfering with our allowed affinity while this
1067 * test is running).
1068 */
1069 assert(sum == expected_sum);
1070 rseq_percpu_free(buffer);
1071 ret = rseq_percpu_pool_destroy(mempool);
1072 if (ret) {
1073 perror("rseq_percpu_pool_destroy");
1074 abort();
1075 }
1076 }
1077
1078 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1079 struct percpu_memcpy_buffer_node item,
1080 int *_cpu)
1081 {
1082 bool result = false;
1083 int cpu;
1084
1085 for (;;) {
1086 struct percpu_memcpy_buffer *cpubuffer;
1087 intptr_t *targetptr_final, newval_final, offset;
1088 char *destptr, *srcptr;
1089 size_t copylen;
1090 int ret;
1091
1092 cpu = get_current_cpu_id();
1093 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1094 /* Load offset with single-copy atomicity. */
1095 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1096 if (offset == cpubuffer->buflen)
1097 break;
1098 destptr = (char *)&cpubuffer->array[offset];
1099 srcptr = (char *)&item;
1100 /* copylen must be <= 4kB. */
1101 copylen = sizeof(item);
1102 newval_final = offset + 1;
1103 targetptr_final = &cpubuffer->offset;
1104 ret = rseq_load_cbne_memcpy_store__ptr(
1105 opt_mo, RSEQ_PERCPU,
1106 targetptr_final, offset,
1107 destptr, srcptr, copylen,
1108 newval_final, cpu);
1109 if (rseq_likely(!ret)) {
1110 result = true;
1111 break;
1112 }
1113 /* Retry if comparison fails or rseq aborts. */
1114 }
1115 if (_cpu)
1116 *_cpu = cpu;
1117 return result;
1118 }
1119
1120 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1121 struct percpu_memcpy_buffer_node *item,
1122 int *_cpu)
1123 {
1124 bool result = false;
1125 int cpu;
1126
1127 for (;;) {
1128 struct percpu_memcpy_buffer *cpubuffer;
1129 intptr_t *targetptr_final, newval_final, offset;
1130 char *destptr, *srcptr;
1131 size_t copylen;
1132 int ret;
1133
1134 cpu = get_current_cpu_id();
1135 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1136 /* Load offset with single-copy atomicity. */
1137 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1138 if (offset == 0)
1139 break;
1140 destptr = (char *)item;
1141 srcptr = (char *)&cpubuffer->array[offset - 1];
1142 /* copylen must be <= 4kB. */
1143 copylen = sizeof(*item);
1144 newval_final = offset - 1;
1145 targetptr_final = &cpubuffer->offset;
1146 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1147 targetptr_final, offset, destptr, srcptr, copylen,
1148 newval_final, cpu);
1149 if (rseq_likely(!ret)) {
1150 result = true;
1151 break;
1152 }
1153 /* Retry if comparison fails or rseq aborts. */
1154 }
1155 if (_cpu)
1156 *_cpu = cpu;
1157 return result;
1158 }
1159
1160 /*
1161 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1162 * only be used on buffers that are not concurrently modified.
1163 */
1164 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1165 struct percpu_memcpy_buffer_node *item,
1166 int cpu)
1167 {
1168 struct percpu_memcpy_buffer *cpubuffer;
1169 intptr_t offset;
1170
1171 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1172 offset = cpubuffer->offset;
1173 if (offset == 0)
1174 return false;
1175 memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item));
1176 cpubuffer->offset = offset - 1;
1177 return true;
1178 }
1179
1180 static void *test_percpu_memcpy_buffer_thread(void *arg)
1181 {
1182 long long i, reps;
1183 struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg;
1184
1185 if (!opt_disable_rseq && rseq_register_current_thread())
1186 abort();
1187
1188 reps = opt_reps;
1189 for (i = 0; i < reps; i++) {
1190 struct percpu_memcpy_buffer_node item;
1191 bool result;
1192
1193 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1194 if (opt_yield)
1195 sched_yield(); /* encourage shuffling */
1196 if (result) {
1197 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1198 /* Should increase buffer size. */
1199 abort();
1200 }
1201 }
1202 }
1203
1204 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1205 (int) rseq_gettid(), nr_abort, signals_delivered);
1206 if (!opt_disable_rseq && rseq_unregister_current_thread())
1207 abort();
1208
1209 return NULL;
1210 }
1211
1212 /* Simultaneous modification to a per-cpu buffer from many threads. */
1213 static void test_percpu_memcpy_buffer(void)
1214 {
1215 const int num_threads = opt_threads;
1216 int i, j, ret;
1217 uint64_t sum = 0, expected_sum = 0;
1218 struct percpu_memcpy_buffer *buffer;
1219 pthread_t test_threads[num_threads];
1220 cpu_set_t allowed_cpus;
1221 struct rseq_percpu_pool *mempool;
1222
1223 mempool = rseq_percpu_pool_create("percpu_memcpy_buffer",
1224 sizeof(struct percpu_memcpy_buffer),
1225 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
1226 if (!mempool) {
1227 perror("rseq_percpu_pool_create");
1228 abort();
1229 }
1230 buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
1231 if (!buffer) {
1232 perror("rseq_percpu_zmalloc");
1233 abort();
1234 }
1235
1236 /* Generate list entries for every usable cpu. */
1237 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1238 for (i = 0; i < CPU_SETSIZE; i++) {
1239 struct percpu_memcpy_buffer *cpubuffer;
1240
1241 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1242 continue;
1243 cpubuffer = rseq_percpu_ptr(buffer, i);
1244 /* Worse-case is every item in same CPU. */
1245 cpubuffer->array =
1246 (struct percpu_memcpy_buffer_node *)
1247 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
1248 MEMCPY_BUFFER_ITEM_PER_CPU);
1249 assert(cpubuffer->array);
1250 cpubuffer->buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1251 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1252 expected_sum += 2 * j + 1;
1253
1254 /*
1255 * We could theoretically put the word-sized
1256 * "data" directly in the buffer. However, we
1257 * want to model objects that would not fit
1258 * within a single word, so allocate an object
1259 * for each node.
1260 */
1261 cpubuffer->array[j - 1].data1 = j;
1262 cpubuffer->array[j - 1].data2 = j + 1;
1263 cpubuffer->offset++;
1264 }
1265 }
1266
1267 for (i = 0; i < num_threads; i++) {
1268 ret = pthread_create(&test_threads[i], NULL,
1269 test_percpu_memcpy_buffer_thread,
1270 buffer);
1271 if (ret) {
1272 errno = ret;
1273 perror("pthread_create");
1274 abort();
1275 }
1276 }
1277
1278 for (i = 0; i < num_threads; i++) {
1279 ret = pthread_join(test_threads[i], NULL);
1280 if (ret) {
1281 errno = ret;
1282 perror("pthread_join");
1283 abort();
1284 }
1285 }
1286
1287 for (i = 0; i < CPU_SETSIZE; i++) {
1288 struct percpu_memcpy_buffer_node item;
1289 struct percpu_memcpy_buffer *cpubuffer;
1290
1291 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1292 continue;
1293
1294 cpubuffer = rseq_percpu_ptr(buffer, i);
1295 while (__percpu_memcpy_buffer_pop(buffer, &item, i)) {
1296 sum += item.data1;
1297 sum += item.data2;
1298 }
1299 free(cpubuffer->array);
1300 }
1301
1302 /*
1303 * All entries should now be accounted for (unless some external
1304 * actor is interfering with our allowed affinity while this
1305 * test is running).
1306 */
1307 assert(sum == expected_sum);
1308 rseq_percpu_free(buffer);
1309 ret = rseq_percpu_pool_destroy(mempool);
1310 if (ret) {
1311 perror("rseq_percpu_pool_destroy");
1312 abort();
1313 }
1314 }
1315
1316 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
1317 {
1318 signals_delivered++;
1319 }
1320
1321 static int set_signal_handler(void)
1322 {
1323 int ret = 0;
1324 struct sigaction sa;
1325 sigset_t sigset;
1326
1327 ret = sigemptyset(&sigset);
1328 if (ret < 0) {
1329 perror("sigemptyset");
1330 return ret;
1331 }
1332
1333 sa.sa_handler = test_signal_interrupt_handler;
1334 sa.sa_mask = sigset;
1335 sa.sa_flags = 0;
1336 ret = sigaction(SIGUSR1, &sa, NULL);
1337 if (ret < 0) {
1338 perror("sigaction");
1339 return ret;
1340 }
1341
1342 printf_verbose("Signal handler set for SIGUSR1\n");
1343
1344 return ret;
1345 }
1346
1347 static
1348 bool membarrier_private_expedited_rseq_available(void)
1349 {
1350 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1351
1352 if (status < 0) {
1353 perror("membarrier");
1354 return false;
1355 }
1356 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1357 return false;
1358 return true;
1359 }
1360
1361 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1362 #ifdef TEST_MEMBARRIER
1363 struct test_membarrier_thread_args {
1364 struct rseq_percpu_pool *mempool;
1365 struct percpu_list __rseq_percpu *percpu_list_ptr;
1366 int stop;
1367 };
1368
1369 /* Worker threads modify data in their "active" percpu lists. */
1370 static
1371 void *test_membarrier_worker_thread(void *arg)
1372 {
1373 struct test_membarrier_thread_args *args =
1374 (struct test_membarrier_thread_args *)arg;
1375 const long long iters = opt_reps;
1376 long long i;
1377
1378 if (rseq_register_current_thread()) {
1379 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1380 errno, strerror(errno));
1381 abort();
1382 }
1383
1384 /* Wait for initialization. */
1385 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1386
1387 for (i = 0; i < iters; ++i) {
1388 int ret;
1389
1390 do {
1391 int cpu = get_current_cpu_id();
1392 struct percpu_list __rseq_percpu *list = RSEQ_READ_ONCE(args->percpu_list_ptr);
1393 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
1394
1395 ret = rseq_load_cbne_load_add_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1396 (intptr_t *) &args->percpu_list_ptr,
1397 (intptr_t) list, (intptr_t *) &cpulist->head, 0, 1, cpu);
1398 } while (rseq_unlikely(ret));
1399 }
1400
1401 if (rseq_unregister_current_thread()) {
1402 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1403 errno, strerror(errno));
1404 abort();
1405 }
1406 return NULL;
1407 }
1408
1409 static
1410 struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_percpu_pool *mempool)
1411 {
1412 struct percpu_list __rseq_percpu *list;
1413 int i;
1414
1415 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
1416 if (!list) {
1417 perror("rseq_percpu_zmalloc");
1418 return NULL;
1419 }
1420 for (i = 0; i < CPU_SETSIZE; i++) {
1421 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
1422 struct percpu_list_node *node;
1423
1424 node = (struct percpu_list_node *) malloc(sizeof(*node));
1425 assert(node);
1426 node->data = 0;
1427 node->next = NULL;
1428 cpulist->head = node;
1429 }
1430 return list;
1431 }
1432
1433 static
1434 void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list)
1435 {
1436 int i;
1437
1438 for (i = 0; i < CPU_SETSIZE; i++)
1439 free(rseq_percpu_ptr(list, i)->head);
1440 rseq_percpu_free(list);
1441 }
1442
1443 static
1444 long long test_membarrier_count_percpu_list(struct percpu_list __rseq_percpu *list)
1445 {
1446 long long total_count = 0;
1447 int i;
1448
1449 for (i = 0; i < CPU_SETSIZE; i++)
1450 total_count += rseq_percpu_ptr(list, i)->head->data;
1451 return total_count;
1452 }
1453
1454 /*
1455 * The manager thread swaps per-cpu lists that worker threads see,
1456 * and validates that there are no unexpected modifications.
1457 */
1458 static
1459 void *test_membarrier_manager_thread(void *arg)
1460 {
1461 struct test_membarrier_thread_args *args =
1462 (struct test_membarrier_thread_args *)arg;
1463 struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
1464 intptr_t expect_a = 0, expect_b = 0;
1465 int cpu_a = 0, cpu_b = 0;
1466 struct rseq_percpu_pool *mempool;
1467 int ret;
1468 long long total_count = 0;
1469
1470 mempool = rseq_percpu_pool_create("percpu_list", sizeof(struct percpu_list),
1471 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
1472 if (!mempool) {
1473 perror("rseq_percpu_pool_create");
1474 abort();
1475 }
1476 args->mempool = mempool;
1477
1478 if (rseq_register_current_thread()) {
1479 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1480 errno, strerror(errno));
1481 abort();
1482 }
1483
1484 /* Init lists. */
1485 list_a = test_membarrier_alloc_percpu_list(mempool);
1486 assert(list_a);
1487 list_b = test_membarrier_alloc_percpu_list(mempool);
1488 assert(list_b);
1489
1490 /* Initialize lists before publishing them. */
1491 rseq_smp_wmb();
1492
1493 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1494
1495 while (!RSEQ_READ_ONCE(args->stop)) {
1496 /* list_a is "active". */
1497 cpu_a = rand() % CPU_SETSIZE;
1498 /*
1499 * As list_b is "inactive", we should never see changes
1500 * to list_b.
1501 */
1502 if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
1503 fprintf(stderr, "Membarrier test failed\n");
1504 abort();
1505 }
1506
1507 /* Make list_b "active". */
1508 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
1509 if (rseq_membarrier_expedited(cpu_a) &&
1510 errno != ENXIO /* missing CPU */) {
1511 perror("sys_membarrier");
1512 abort();
1513 }
1514 /*
1515 * Cpu A should now only modify list_b, so the values
1516 * in list_a should be stable.
1517 */
1518 expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
1519
1520 cpu_b = rand() % CPU_SETSIZE;
1521 /*
1522 * As list_a is "inactive", we should never see changes
1523 * to list_a.
1524 */
1525 if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
1526 fprintf(stderr, "Membarrier test failed\n");
1527 abort();
1528 }
1529
1530 /* Make list_a "active". */
1531 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1532 if (rseq_membarrier_expedited(cpu_b) &&
1533 errno != ENXIO /* missing CPU */) {
1534 perror("sys_membarrier");
1535 abort();
1536 }
1537 /* Remember a value from list_b. */
1538 expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
1539 }
1540
1541 total_count += test_membarrier_count_percpu_list(list_a);
1542 total_count += test_membarrier_count_percpu_list(list_b);
1543
1544 /* Validate that we observe the right number of increments. */
1545 if (total_count != opt_threads * opt_reps) {
1546 fprintf(stderr, "Error: Observed %lld increments, expected %lld\n",
1547 total_count, opt_threads * opt_reps);
1548 abort();
1549 }
1550 test_membarrier_free_percpu_list(list_a);
1551 test_membarrier_free_percpu_list(list_b);
1552
1553 if (rseq_unregister_current_thread()) {
1554 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1555 errno, strerror(errno));
1556 abort();
1557 }
1558 ret = rseq_percpu_pool_destroy(mempool);
1559 if (ret) {
1560 perror("rseq_percpu_pool_destroy");
1561 abort();
1562 }
1563
1564 return NULL;
1565 }
1566
1567 static
1568 void test_membarrier(void)
1569 {
1570 const int num_threads = opt_threads;
1571 struct test_membarrier_thread_args thread_args;
1572 pthread_t worker_threads[num_threads];
1573 pthread_t manager_thread;
1574 int i, ret;
1575
1576 if (!membarrier_private_expedited_rseq_available()) {
1577 fprintf(stderr, "Membarrier private expedited rseq not available. "
1578 "Skipping membarrier test.\n");
1579 return;
1580 }
1581 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1582 perror("sys_membarrier");
1583 abort();
1584 }
1585
1586 thread_args.percpu_list_ptr = NULL;
1587 thread_args.stop = 0;
1588 ret = pthread_create(&manager_thread, NULL,
1589 test_membarrier_manager_thread, &thread_args);
1590 if (ret) {
1591 errno = ret;
1592 perror("pthread_create");
1593 abort();
1594 }
1595
1596 for (i = 0; i < num_threads; i++) {
1597 ret = pthread_create(&worker_threads[i], NULL,
1598 test_membarrier_worker_thread, &thread_args);
1599 if (ret) {
1600 errno = ret;
1601 perror("pthread_create");
1602 abort();
1603 }
1604 }
1605
1606
1607 for (i = 0; i < num_threads; i++) {
1608 ret = pthread_join(worker_threads[i], NULL);
1609 if (ret) {
1610 errno = ret;
1611 perror("pthread_join");
1612 abort();
1613 }
1614 }
1615
1616 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1617 ret = pthread_join(manager_thread, NULL);
1618 if (ret) {
1619 errno = ret;
1620 perror("pthread_join");
1621 abort();
1622 }
1623 }
1624 #else /* TEST_MEMBARRIER */
1625 static
1626 void test_membarrier(void)
1627 {
1628 if (!membarrier_private_expedited_rseq_available()) {
1629 fprintf(stderr, "Membarrier private expedited rseq not available. "
1630 "Skipping membarrier test.\n");
1631 return;
1632 }
1633 fprintf(stderr, "rseq_load_cbne_load_add_load_add_store__ptr is not implemented on this architecture. "
1634 "Skipping membarrier test.\n");
1635 }
1636 #endif
1637
1638 static void show_usage(char **argv)
1639 {
1640 printf("Usage : %s <OPTIONS>\n",
1641 argv[0]);
1642 printf("OPTIONS:\n");
1643 printf(" [-1 loops] Number of loops for delay injection 1\n");
1644 printf(" [-2 loops] Number of loops for delay injection 2\n");
1645 printf(" [-3 loops] Number of loops for delay injection 3\n");
1646 printf(" [-4 loops] Number of loops for delay injection 4\n");
1647 printf(" [-5 loops] Number of loops for delay injection 5\n");
1648 printf(" [-6 loops] Number of loops for delay injection 6\n");
1649 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1650 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1651 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1652 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1653 printf(" [-y] Yield\n");
1654 printf(" [-k] Kill thread with signal\n");
1655 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1656 printf(" [-t N] Number of threads (default 200)\n");
1657 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1658 printf(" [-d] Disable rseq system call (no initialization)\n");
1659 printf(" [-D M] Disable rseq for each M threads\n");
1660 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1661 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1662 printf(" [-c] Check if the rseq syscall is available.\n");
1663 printf(" [-v] Verbose output.\n");
1664 printf(" [-h] Show this help.\n");
1665 printf("\n");
1666 }
1667
1668 int main(int argc, char **argv)
1669 {
1670 int i;
1671
1672 for (i = 1; i < argc; i++) {
1673 if (argv[i][0] != '-')
1674 continue;
1675 switch (argv[i][1]) {
1676 case '1':
1677 case '2':
1678 case '3':
1679 case '4':
1680 case '5':
1681 case '6':
1682 case '7':
1683 case '8':
1684 case '9':
1685 if (argc < i + 2) {
1686 show_usage(argv);
1687 goto error;
1688 }
1689 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1690 i++;
1691 break;
1692 case 'm':
1693 if (argc < i + 2) {
1694 show_usage(argv);
1695 goto error;
1696 }
1697 opt_modulo = atol(argv[i + 1]);
1698 if (opt_modulo < 0) {
1699 show_usage(argv);
1700 goto error;
1701 }
1702 i++;
1703 break;
1704 case 's':
1705 if (argc < i + 2) {
1706 show_usage(argv);
1707 goto error;
1708 }
1709 opt_sleep = atol(argv[i + 1]);
1710 if (opt_sleep < 0) {
1711 show_usage(argv);
1712 goto error;
1713 }
1714 i++;
1715 break;
1716 case 'y':
1717 opt_yield = 1;
1718 break;
1719 case 'k':
1720 opt_signal = 1;
1721 break;
1722 case 'd':
1723 opt_disable_rseq = 1;
1724 break;
1725 case 'D':
1726 if (argc < i + 2) {
1727 show_usage(argv);
1728 goto error;
1729 }
1730 opt_disable_mod = atol(argv[i + 1]);
1731 if (opt_disable_mod < 0) {
1732 show_usage(argv);
1733 goto error;
1734 }
1735 i++;
1736 break;
1737 case 't':
1738 if (argc < i + 2) {
1739 show_usage(argv);
1740 goto error;
1741 }
1742 opt_threads = atol(argv[i + 1]);
1743 if (opt_threads < 0) {
1744 show_usage(argv);
1745 goto error;
1746 }
1747 i++;
1748 break;
1749 case 'r':
1750 if (argc < i + 2) {
1751 show_usage(argv);
1752 goto error;
1753 }
1754 opt_reps = atoll(argv[i + 1]);
1755 if (opt_reps < 0) {
1756 show_usage(argv);
1757 goto error;
1758 }
1759 i++;
1760 break;
1761 case 'h':
1762 show_usage(argv);
1763 goto end;
1764 case 'T':
1765 if (argc < i + 2) {
1766 show_usage(argv);
1767 goto error;
1768 }
1769 opt_test = *argv[i + 1];
1770 switch (opt_test) {
1771 case 's':
1772 case 'l':
1773 case 'i':
1774 case 'b':
1775 case 'm':
1776 case 'r':
1777 break;
1778 default:
1779 show_usage(argv);
1780 goto error;
1781 }
1782 i++;
1783 break;
1784 case 'v':
1785 verbose = 1;
1786 break;
1787 case 'M':
1788 opt_mo = RSEQ_MO_RELEASE;
1789 break;
1790 case 'c':
1791 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
1792 printf_verbose("The rseq syscall is available.\n");
1793 goto end;
1794 } else {
1795 printf_verbose("The rseq syscall is unavailable.\n");
1796 goto no_rseq;
1797 }
1798 default:
1799 show_usage(argv);
1800 goto error;
1801 }
1802 }
1803
1804 loop_cnt_1 = loop_cnt[1];
1805 loop_cnt_2 = loop_cnt[2];
1806 loop_cnt_3 = loop_cnt[3];
1807 loop_cnt_4 = loop_cnt[4];
1808 loop_cnt_5 = loop_cnt[5];
1809 loop_cnt_6 = loop_cnt[6];
1810
1811 if (set_signal_handler())
1812 goto error;
1813
1814 if (!opt_disable_rseq && rseq_register_current_thread())
1815 goto error;
1816 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1817 printf_verbose("The rseq cpu id getter is unavailable\n");
1818 goto no_rseq;
1819 }
1820 switch (opt_test) {
1821 case 's':
1822 printf_verbose("spinlock\n");
1823 test_percpu_spinlock();
1824 break;
1825 case 'l':
1826 printf_verbose("linked list\n");
1827 test_percpu_list();
1828 break;
1829 case 'b':
1830 printf_verbose("buffer\n");
1831 test_percpu_buffer();
1832 break;
1833 case 'm':
1834 printf_verbose("memcpy buffer\n");
1835 test_percpu_memcpy_buffer();
1836 break;
1837 case 'i':
1838 printf_verbose("counter increment\n");
1839 test_percpu_inc();
1840 break;
1841 case 'r':
1842 printf_verbose("membarrier\n");
1843 test_membarrier();
1844 break;
1845 }
1846 if (!opt_disable_rseq && rseq_unregister_current_thread())
1847 abort();
1848 end:
1849 return 0;
1850
1851 error:
1852 return -1;
1853
1854 no_rseq:
1855 return 2;
1856 }
This page took 0.21385 seconds and 4 git commands to generate.