mempool: namespacing, add global alloc/free
[librseq.git] / tests / param_test.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6 #include <assert.h>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
9 #include <pthread.h>
10 #include <sched.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <syscall.h>
16 #include <unistd.h>
17 #include <poll.h>
18 #include <sys/types.h>
19 #include <signal.h>
20 #include <errno.h>
21 #include <stddef.h>
22 #include <stdbool.h>
23 #include <rseq/mempool.h>
24
25 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
26 enum {
27 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
28 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
29 };
30
31 enum {
32 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
33 };
34 #endif
35
36 #define NR_INJECT 9
37 static int loop_cnt[NR_INJECT + 1];
38
39 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
40 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
41 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
42 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
43 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
44 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
45
46 static int opt_modulo, verbose;
47
48 static int opt_yield, opt_signal, opt_sleep,
49 opt_disable_rseq, opt_threads = 200,
50 opt_disable_mod = 0, opt_test = 's';
51
52 static long long opt_reps = 5000;
53
54 static __thread __attribute__((tls_model("initial-exec")))
55 unsigned int signals_delivered;
56
57 static inline pid_t rseq_gettid(void)
58 {
59 return syscall(__NR_gettid);
60 }
61
62 #ifndef BENCHMARK
63
64 static __thread __attribute__((tls_model("initial-exec"), unused))
65 int yield_mod_cnt, nr_abort;
66
67 #define printf_verbose(fmt, ...) \
68 do { \
69 if (verbose) \
70 printf(fmt, ## __VA_ARGS__); \
71 } while (0)
72
73 #ifdef __i386__
74
75 #define INJECT_ASM_REG "eax"
76
77 #define RSEQ_INJECT_CLOBBER \
78 , INJECT_ASM_REG
79
80 /*
81 * Use ip-relative addressing to get the loop counter.
82 */
83 #define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
84 "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
85 "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
86 "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
87 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
88 "jz 333f\n\t" \
89 "222:\n\t" \
90 "dec %%" INJECT_ASM_REG "\n\t" \
91 "jnz 222b\n\t" \
92 "333:\n\t"
93
94 #define RSEQ_INJECT_ASM(n) \
95 __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
96
97 #elif defined(__x86_64__)
98
99 #define INJECT_ASM_REG_P "rax"
100 #define INJECT_ASM_REG "eax"
101
102 #define RSEQ_INJECT_CLOBBER \
103 , INJECT_ASM_REG_P \
104 , INJECT_ASM_REG
105
106 #define RSEQ_INJECT_ASM(n) \
107 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
108 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
109 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
110 "jz 333f\n\t" \
111 "222:\n\t" \
112 "dec %%" INJECT_ASM_REG "\n\t" \
113 "jnz 222b\n\t" \
114 "333:\n\t"
115
116 #elif defined(__s390__)
117
118 #define RSEQ_INJECT_INPUT \
119 , [loop_cnt_1]"m"(loop_cnt[1]) \
120 , [loop_cnt_2]"m"(loop_cnt[2]) \
121 , [loop_cnt_3]"m"(loop_cnt[3]) \
122 , [loop_cnt_4]"m"(loop_cnt[4]) \
123 , [loop_cnt_5]"m"(loop_cnt[5]) \
124 , [loop_cnt_6]"m"(loop_cnt[6])
125
126 #define INJECT_ASM_REG "r12"
127
128 #define RSEQ_INJECT_CLOBBER \
129 , INJECT_ASM_REG
130
131 #define RSEQ_INJECT_ASM(n) \
132 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
133 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
134 "je 333f\n\t" \
135 "222:\n\t" \
136 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
137 "jnz 222b\n\t" \
138 "333:\n\t"
139
140 #elif defined(__ARMEL__)
141
142 #define RSEQ_INJECT_INPUT \
143 , [loop_cnt_1]"m"(loop_cnt[1]) \
144 , [loop_cnt_2]"m"(loop_cnt[2]) \
145 , [loop_cnt_3]"m"(loop_cnt[3]) \
146 , [loop_cnt_4]"m"(loop_cnt[4]) \
147 , [loop_cnt_5]"m"(loop_cnt[5]) \
148 , [loop_cnt_6]"m"(loop_cnt[6])
149
150 #define INJECT_ASM_REG "r4"
151
152 #define RSEQ_INJECT_CLOBBER \
153 , INJECT_ASM_REG
154
155 #define RSEQ_INJECT_ASM(n) \
156 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
157 "cmp " INJECT_ASM_REG ", #0\n\t" \
158 "beq 333f\n\t" \
159 "222:\n\t" \
160 "subs " INJECT_ASM_REG ", #1\n\t" \
161 "bne 222b\n\t" \
162 "333:\n\t"
163
164 #elif defined(__AARCH64EL__)
165
166 #define RSEQ_INJECT_INPUT \
167 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
168 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
169 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
170 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
171 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
172 , [loop_cnt_6] "Qo" (loop_cnt[6])
173
174 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
175
176 #define RSEQ_INJECT_ASM(n) \
177 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
178 " cbz " INJECT_ASM_REG ", 333f\n" \
179 "222:\n" \
180 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
181 " cbnz " INJECT_ASM_REG ", 222b\n" \
182 "333:\n"
183
184 #elif defined(__PPC__)
185
186 #define RSEQ_INJECT_INPUT \
187 , [loop_cnt_1]"m"(loop_cnt[1]) \
188 , [loop_cnt_2]"m"(loop_cnt[2]) \
189 , [loop_cnt_3]"m"(loop_cnt[3]) \
190 , [loop_cnt_4]"m"(loop_cnt[4]) \
191 , [loop_cnt_5]"m"(loop_cnt[5]) \
192 , [loop_cnt_6]"m"(loop_cnt[6])
193
194 #define INJECT_ASM_REG "r18"
195
196 #define RSEQ_INJECT_CLOBBER \
197 , INJECT_ASM_REG
198
199 #define RSEQ_INJECT_ASM(n) \
200 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
201 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
202 "beq 333f\n\t" \
203 "222:\n\t" \
204 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
205 "bne 222b\n\t" \
206 "333:\n\t"
207
208 #elif defined(__mips__)
209
210 #define RSEQ_INJECT_INPUT \
211 , [loop_cnt_1]"m"(loop_cnt[1]) \
212 , [loop_cnt_2]"m"(loop_cnt[2]) \
213 , [loop_cnt_3]"m"(loop_cnt[3]) \
214 , [loop_cnt_4]"m"(loop_cnt[4]) \
215 , [loop_cnt_5]"m"(loop_cnt[5]) \
216 , [loop_cnt_6]"m"(loop_cnt[6])
217
218 #define INJECT_ASM_REG "$5"
219
220 #define RSEQ_INJECT_CLOBBER \
221 , INJECT_ASM_REG
222
223 #define RSEQ_INJECT_ASM(n) \
224 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
225 "beqz " INJECT_ASM_REG ", 333f\n\t" \
226 "222:\n\t" \
227 "addiu " INJECT_ASM_REG ", -1\n\t" \
228 "bnez " INJECT_ASM_REG ", 222b\n\t" \
229 "333:\n\t"
230
231 #elif defined(__riscv)
232
233 #define RSEQ_INJECT_INPUT \
234 , [loop_cnt_1]"m"(loop_cnt[1]) \
235 , [loop_cnt_2]"m"(loop_cnt[2]) \
236 , [loop_cnt_3]"m"(loop_cnt[3]) \
237 , [loop_cnt_4]"m"(loop_cnt[4]) \
238 , [loop_cnt_5]"m"(loop_cnt[5]) \
239 , [loop_cnt_6]"m"(loop_cnt[6])
240
241 #define INJECT_ASM_REG "t1"
242
243 #define RSEQ_INJECT_CLOBBER \
244 , INJECT_ASM_REG
245
246 #define RSEQ_INJECT_ASM(n) \
247 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
248 "beqz " INJECT_ASM_REG ", 333f\n\t" \
249 "222:\n\t" \
250 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
251 "bnez " INJECT_ASM_REG ", 222b\n\t" \
252 "333:\n\t"
253
254 #else
255 #error unsupported target
256 #endif
257
258 #define RSEQ_INJECT_FAILED \
259 nr_abort++;
260
261 #define RSEQ_INJECT_C(n) \
262 { \
263 int loc_i, loc_nr_loops = loop_cnt[n]; \
264 \
265 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
266 rseq_barrier(); \
267 } \
268 if (loc_nr_loops == -1 && opt_modulo) { \
269 if (yield_mod_cnt == opt_modulo - 1) { \
270 if (opt_sleep > 0) \
271 poll(NULL, 0, opt_sleep); \
272 if (opt_yield) \
273 sched_yield(); \
274 if (opt_signal) \
275 raise(SIGUSR1); \
276 yield_mod_cnt = 0; \
277 } else { \
278 yield_mod_cnt++; \
279 } \
280 } \
281 }
282
283 #else
284
285 #define printf_verbose(fmt, ...)
286
287 #endif /* BENCHMARK */
288
289 #include <rseq/rseq.h>
290
291 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
292
293 static int sys_membarrier(int cmd, int flags, int cpu_id)
294 {
295 return syscall(__NR_membarrier, cmd, flags, cpu_id);
296 }
297
298 #ifdef rseq_arch_has_load_cbne_load_add_load_add_store
299 #define TEST_MEMBARRIER
300 #endif
301
302 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
303 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
304 static
305 int get_current_cpu_id(void)
306 {
307 return rseq_current_mm_cid();
308 }
309 static
310 bool rseq_validate_cpu_id(void)
311 {
312 return rseq_mm_cid_available();
313 }
314 static
315 bool rseq_use_cpu_index(void)
316 {
317 return false; /* Use mm_cid */
318 }
319 # ifdef TEST_MEMBARRIER
320 /*
321 * Membarrier does not currently support targeting a mm_cid, so
322 * issue the barrier on all cpus.
323 */
324 static
325 int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
326 {
327 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
328 0, 0);
329 }
330 # endif /* TEST_MEMBARRIER */
331 #else
332 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
333 static
334 int get_current_cpu_id(void)
335 {
336 return rseq_cpu_start();
337 }
338 static
339 bool rseq_validate_cpu_id(void)
340 {
341 return rseq_current_cpu_raw() >= 0;
342 }
343 static
344 bool rseq_use_cpu_index(void)
345 {
346 return true; /* Use cpu_id as index. */
347 }
348 # ifdef TEST_MEMBARRIER
349 static
350 int rseq_membarrier_expedited(int cpu)
351 {
352 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
353 MEMBARRIER_CMD_FLAG_CPU, cpu);
354 }
355 # endif /* TEST_MEMBARRIER */
356 #endif
357
358 struct percpu_lock {
359 intptr_t v;
360 };
361
362 struct spinlock_test_data {
363 struct percpu_lock lock;
364 intptr_t count;
365 };
366
367 struct spinlock_thread_test_data {
368 struct spinlock_test_data __rseq_percpu *data;
369 long long reps;
370 int reg;
371 };
372
373 struct inc_test_data {
374 intptr_t count;
375 };
376
377 struct inc_thread_test_data {
378 struct inc_test_data __rseq_percpu *data;
379 long long reps;
380 int reg;
381 };
382
383 struct percpu_list_node {
384 intptr_t data;
385 struct percpu_list_node *next;
386 };
387
388 struct percpu_list {
389 struct percpu_list_node *head;
390 };
391
392 #define BUFFER_ITEM_PER_CPU 100
393
394 struct percpu_buffer_node {
395 intptr_t data;
396 };
397
398 struct percpu_buffer {
399 intptr_t offset;
400 intptr_t buflen;
401 struct percpu_buffer_node **array;
402 };
403
404 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
405
406 struct percpu_memcpy_buffer_node {
407 intptr_t data1;
408 uint64_t data2;
409 };
410
411 struct percpu_memcpy_buffer {
412 intptr_t offset;
413 intptr_t buflen;
414 struct percpu_memcpy_buffer_node *array;
415 };
416
417 /* A simple percpu spinlock. Grabs lock on current cpu. */
418 static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
419 {
420 int cpu;
421
422 for (;;) {
423 int ret;
424
425 cpu = get_current_cpu_id();
426 if (cpu < 0) {
427 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
428 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
429 abort();
430 }
431 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
432 &rseq_percpu_ptr(lock, cpu)->v,
433 0, 1, cpu);
434 if (rseq_likely(!ret))
435 break;
436 /* Retry if comparison fails or rseq aborts. */
437 }
438 /*
439 * Acquire semantic when taking lock after control dependency.
440 * Matches rseq_smp_store_release().
441 */
442 rseq_smp_acquire__after_ctrl_dep();
443 return cpu;
444 }
445
446 static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
447 {
448 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
449 /*
450 * Release lock, with release semantic. Matches
451 * rseq_smp_acquire__after_ctrl_dep().
452 */
453 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
454 }
455
456 static void *test_percpu_spinlock_thread(void *arg)
457 {
458 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
459 struct spinlock_test_data __rseq_percpu *data = thread_data->data;
460 long long i, reps;
461
462 if (!opt_disable_rseq && thread_data->reg &&
463 rseq_register_current_thread())
464 abort();
465 reps = thread_data->reps;
466 for (i = 0; i < reps; i++) {
467 int cpu = rseq_this_cpu_lock(&data->lock);
468 rseq_percpu_ptr(data, cpu)->count++;
469 rseq_percpu_unlock(&data->lock, cpu);
470 #ifndef BENCHMARK
471 if (i != 0 && !(i % (reps / 10)))
472 printf_verbose("tid %d: count %lld\n",
473 (int) rseq_gettid(), i);
474 #endif
475 }
476 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
477 (int) rseq_gettid(), nr_abort, signals_delivered);
478 if (!opt_disable_rseq && thread_data->reg &&
479 rseq_unregister_current_thread())
480 abort();
481 return NULL;
482 }
483
484 /*
485 * A simple test which implements a sharded counter using a per-cpu
486 * lock. Obviously real applications might prefer to simply use a
487 * per-cpu increment; however, this is reasonable for a test and the
488 * lock can be extended to synchronize more complicated operations.
489 */
490 static void test_percpu_spinlock(void)
491 {
492 const int num_threads = opt_threads;
493 int i, ret;
494 uint64_t sum;
495 pthread_t test_threads[num_threads];
496 struct spinlock_test_data __rseq_percpu *data;
497 struct spinlock_thread_test_data thread_data[num_threads];
498 struct rseq_mempool *mempool;
499
500 mempool = rseq_mempool_create("spinlock_test_data",
501 sizeof(struct spinlock_test_data),
502 0, CPU_SETSIZE, NULL);
503 if (!mempool) {
504 perror("rseq_mempool_create");
505 abort();
506 }
507 data = (struct spinlock_test_data __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
508 if (!data) {
509 perror("rseq_mempool_percpu_zmalloc");
510 abort();
511 }
512
513 for (i = 0; i < num_threads; i++) {
514 thread_data[i].reps = opt_reps;
515 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
516 thread_data[i].reg = 1;
517 else
518 thread_data[i].reg = 0;
519 thread_data[i].data = data;
520 ret = pthread_create(&test_threads[i], NULL,
521 test_percpu_spinlock_thread,
522 &thread_data[i]);
523 if (ret) {
524 errno = ret;
525 perror("pthread_create");
526 abort();
527 }
528 }
529
530 for (i = 0; i < num_threads; i++) {
531 ret = pthread_join(test_threads[i], NULL);
532 if (ret) {
533 errno = ret;
534 perror("pthread_join");
535 abort();
536 }
537 }
538
539 sum = 0;
540 for (i = 0; i < CPU_SETSIZE; i++)
541 sum += rseq_percpu_ptr(data, i)->count;
542
543 assert(sum == (uint64_t)opt_reps * num_threads);
544 rseq_mempool_percpu_free(data);
545 ret = rseq_mempool_destroy(mempool);
546 if (ret) {
547 perror("rseq_mempool_destroy");
548 abort();
549 }
550 }
551
552 static void *test_percpu_inc_thread(void *arg)
553 {
554 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
555 struct inc_test_data __rseq_percpu *data = thread_data->data;
556 long long i, reps;
557
558 if (!opt_disable_rseq && thread_data->reg &&
559 rseq_register_current_thread())
560 abort();
561 reps = thread_data->reps;
562 for (i = 0; i < reps; i++) {
563 int ret;
564
565 do {
566 int cpu;
567
568 cpu = get_current_cpu_id();
569 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
570 &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
571 } while (rseq_unlikely(ret));
572 #ifndef BENCHMARK
573 if (i != 0 && !(i % (reps / 10)))
574 printf_verbose("tid %d: count %lld\n",
575 (int) rseq_gettid(), i);
576 #endif
577 }
578 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
579 (int) rseq_gettid(), nr_abort, signals_delivered);
580 if (!opt_disable_rseq && thread_data->reg &&
581 rseq_unregister_current_thread())
582 abort();
583 return NULL;
584 }
585
586 static void test_percpu_inc(void)
587 {
588 const int num_threads = opt_threads;
589 int i, ret;
590 uint64_t sum;
591 pthread_t test_threads[num_threads];
592 struct inc_test_data __rseq_percpu *data;
593 struct inc_thread_test_data thread_data[num_threads];
594 struct rseq_mempool *mempool;
595
596 mempool = rseq_mempool_create("inc_test_data",
597 sizeof(struct inc_test_data),
598 0, CPU_SETSIZE, NULL);
599 if (!mempool) {
600 perror("rseq_mempool_create");
601 abort();
602 }
603 data = (struct inc_test_data __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
604 if (!data) {
605 perror("rseq_mempool_percpu_zmalloc");
606 abort();
607 }
608
609 for (i = 0; i < num_threads; i++) {
610 thread_data[i].reps = opt_reps;
611 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
612 thread_data[i].reg = 1;
613 else
614 thread_data[i].reg = 0;
615 thread_data[i].data = data;
616 ret = pthread_create(&test_threads[i], NULL,
617 test_percpu_inc_thread,
618 &thread_data[i]);
619 if (ret) {
620 errno = ret;
621 perror("pthread_create");
622 abort();
623 }
624 }
625
626 for (i = 0; i < num_threads; i++) {
627 ret = pthread_join(test_threads[i], NULL);
628 if (ret) {
629 errno = ret;
630 perror("pthread_join");
631 abort();
632 }
633 }
634
635 sum = 0;
636 for (i = 0; i < CPU_SETSIZE; i++)
637 sum += rseq_percpu_ptr(data, i)->count;
638
639 assert(sum == (uint64_t)opt_reps * num_threads);
640 rseq_mempool_percpu_free(data);
641 ret = rseq_mempool_destroy(mempool);
642 if (ret) {
643 perror("rseq_mempool_destroy");
644 abort();
645 }
646 }
647
648 static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
649 struct percpu_list_node *node,
650 int *_cpu)
651 {
652 int cpu;
653
654 for (;;) {
655 intptr_t *targetptr, newval, expect;
656 struct percpu_list *cpulist;
657 int ret;
658
659 cpu = get_current_cpu_id();
660 cpulist = rseq_percpu_ptr(list, cpu);
661 /* Load list->c[cpu].head with single-copy atomicity. */
662 expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
663 newval = (intptr_t)node;
664 targetptr = (intptr_t *)&cpulist->head;
665 node->next = (struct percpu_list_node *)expect;
666 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
667 targetptr, expect, newval, cpu);
668 if (rseq_likely(!ret))
669 break;
670 /* Retry if comparison fails or rseq aborts. */
671 }
672 if (_cpu)
673 *_cpu = cpu;
674 }
675
676 /*
677 * Unlike a traditional lock-less linked list; the availability of a
678 * rseq primitive allows us to implement pop without concerns over
679 * ABA-type races.
680 */
681 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
682 int *_cpu)
683 {
684 struct percpu_list_node *node = NULL;
685 int cpu;
686
687 for (;;) {
688 struct percpu_list_node *head;
689 intptr_t *targetptr, expectnot, *load;
690 struct percpu_list *cpulist;
691 long offset;
692 int ret;
693
694 cpu = get_current_cpu_id();
695 cpulist = rseq_percpu_ptr(list, cpu);
696 targetptr = (intptr_t *)&cpulist->head;
697 expectnot = (intptr_t)NULL;
698 offset = offsetof(struct percpu_list_node, next);
699 load = (intptr_t *)&head;
700 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
701 targetptr, expectnot,
702 offset, load, cpu);
703 if (rseq_likely(!ret)) {
704 node = head;
705 break;
706 }
707 if (ret > 0)
708 break;
709 /* Retry if rseq aborts. */
710 }
711 if (_cpu)
712 *_cpu = cpu;
713 return node;
714 }
715
716 /*
717 * __percpu_list_pop is not safe against concurrent accesses. Should
718 * only be used on lists that are not concurrently modified.
719 */
720 static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
721 {
722 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
723 struct percpu_list_node *node;
724
725 node = cpulist->head;
726 if (!node)
727 return NULL;
728 cpulist->head = node->next;
729 return node;
730 }
731
732 static void *test_percpu_list_thread(void *arg)
733 {
734 long long i, reps;
735 struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
736
737 if (!opt_disable_rseq && rseq_register_current_thread())
738 abort();
739
740 reps = opt_reps;
741 for (i = 0; i < reps; i++) {
742 struct percpu_list_node *node;
743
744 node = this_cpu_list_pop(list, NULL);
745 if (opt_yield)
746 sched_yield(); /* encourage shuffling */
747 if (node)
748 this_cpu_list_push(list, node, NULL);
749 }
750
751 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
752 (int) rseq_gettid(), nr_abort, signals_delivered);
753 if (!opt_disable_rseq && rseq_unregister_current_thread())
754 abort();
755
756 return NULL;
757 }
758
759 /* Simultaneous modification to a per-cpu linked list from many threads. */
760 static void test_percpu_list(void)
761 {
762 const int num_threads = opt_threads;
763 int i, j, ret;
764 uint64_t sum = 0, expected_sum = 0;
765 struct percpu_list __rseq_percpu *list;
766 pthread_t test_threads[num_threads];
767 cpu_set_t allowed_cpus;
768 struct rseq_mempool *mempool;
769
770 mempool = rseq_mempool_create("percpu_list", sizeof(struct percpu_list),
771 0, CPU_SETSIZE, NULL);
772 if (!mempool) {
773 perror("rseq_mempool_create");
774 abort();
775 }
776 list = (struct percpu_list __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
777 if (!list) {
778 perror("rseq_mempool_percpu_zmalloc");
779 abort();
780 }
781
782 /* Generate list entries for every usable cpu. */
783 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
784 for (i = 0; i < CPU_SETSIZE; i++) {
785 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
786 continue;
787 for (j = 1; j <= 100; j++) {
788 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
789 struct percpu_list_node *node;
790
791 expected_sum += j;
792
793 node = (struct percpu_list_node *) malloc(sizeof(*node));
794 assert(node);
795 node->data = j;
796 node->next = cpulist->head;
797 cpulist->head = node;
798 }
799 }
800
801 for (i = 0; i < num_threads; i++) {
802 ret = pthread_create(&test_threads[i], NULL,
803 test_percpu_list_thread, list);
804 if (ret) {
805 errno = ret;
806 perror("pthread_create");
807 abort();
808 }
809 }
810
811 for (i = 0; i < num_threads; i++) {
812 ret = pthread_join(test_threads[i], NULL);
813 if (ret) {
814 errno = ret;
815 perror("pthread_join");
816 abort();
817 }
818 }
819
820 for (i = 0; i < CPU_SETSIZE; i++) {
821 struct percpu_list_node *node;
822
823 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
824 continue;
825
826 while ((node = __percpu_list_pop(list, i))) {
827 sum += node->data;
828 free(node);
829 }
830 }
831
832 /*
833 * All entries should now be accounted for (unless some external
834 * actor is interfering with our allowed affinity while this
835 * test is running).
836 */
837 assert(sum == expected_sum);
838 rseq_mempool_percpu_free(list);
839 ret = rseq_mempool_destroy(mempool);
840 if (ret) {
841 perror("rseq_mempool_destroy");
842 abort();
843 }
844 }
845
846 static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer,
847 struct percpu_buffer_node *node,
848 int *_cpu)
849 {
850 bool result = false;
851 int cpu;
852
853 for (;;) {
854 struct percpu_buffer *cpubuffer;
855 intptr_t *targetptr_spec, newval_spec;
856 intptr_t *targetptr_final, newval_final;
857 intptr_t offset;
858 int ret;
859
860 cpu = get_current_cpu_id();
861 cpubuffer = rseq_percpu_ptr(buffer, cpu);
862 offset = RSEQ_READ_ONCE(cpubuffer->offset);
863 if (offset == cpubuffer->buflen)
864 break;
865 newval_spec = (intptr_t)node;
866 targetptr_spec = (intptr_t *)&cpubuffer->array[offset];
867 newval_final = offset + 1;
868 targetptr_final = &cpubuffer->offset;
869 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
870 targetptr_final, offset, targetptr_spec,
871 newval_spec, newval_final, cpu);
872 if (rseq_likely(!ret)) {
873 result = true;
874 break;
875 }
876 /* Retry if comparison fails or rseq aborts. */
877 }
878 if (_cpu)
879 *_cpu = cpu;
880 return result;
881 }
882
883 static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
884 int *_cpu)
885 {
886 struct percpu_buffer_node *head;
887 int cpu;
888
889 for (;;) {
890 struct percpu_buffer *cpubuffer;
891 intptr_t *targetptr, newval;
892 intptr_t offset;
893 int ret;
894
895 cpu = get_current_cpu_id();
896 cpubuffer = rseq_percpu_ptr(buffer, cpu);
897 /* Load offset with single-copy atomicity. */
898 offset = RSEQ_READ_ONCE(cpubuffer->offset);
899 if (offset == 0) {
900 head = NULL;
901 break;
902 }
903 head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]);
904 newval = offset - 1;
905 targetptr = (intptr_t *)&cpubuffer->offset;
906 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
907 targetptr, offset,
908 (intptr_t *)&cpubuffer->array[offset - 1],
909 (intptr_t)head, newval, cpu);
910 if (rseq_likely(!ret))
911 break;
912 /* Retry if comparison fails or rseq aborts. */
913 }
914 if (_cpu)
915 *_cpu = cpu;
916 return head;
917 }
918
919 /*
920 * __percpu_buffer_pop is not safe against concurrent accesses. Should
921 * only be used on buffers that are not concurrently modified.
922 */
923 static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
924 int cpu)
925 {
926 struct percpu_buffer *cpubuffer;
927 struct percpu_buffer_node *head;
928 intptr_t offset;
929
930 cpubuffer = rseq_percpu_ptr(buffer, cpu);
931 offset = cpubuffer->offset;
932 if (offset == 0)
933 return NULL;
934 head = cpubuffer->array[offset - 1];
935 cpubuffer->offset = offset - 1;
936 return head;
937 }
938
939 static void *test_percpu_buffer_thread(void *arg)
940 {
941 long long i, reps;
942 struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg;
943
944 if (!opt_disable_rseq && rseq_register_current_thread())
945 abort();
946
947 reps = opt_reps;
948 for (i = 0; i < reps; i++) {
949 struct percpu_buffer_node *node;
950
951 node = this_cpu_buffer_pop(buffer, NULL);
952 if (opt_yield)
953 sched_yield(); /* encourage shuffling */
954 if (node) {
955 if (!this_cpu_buffer_push(buffer, node, NULL)) {
956 /* Should increase buffer size. */
957 abort();
958 }
959 }
960 }
961
962 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
963 (int) rseq_gettid(), nr_abort, signals_delivered);
964 if (!opt_disable_rseq && rseq_unregister_current_thread())
965 abort();
966
967 return NULL;
968 }
969
970 /* Simultaneous modification to a per-cpu buffer from many threads. */
971 static void test_percpu_buffer(void)
972 {
973 const int num_threads = opt_threads;
974 int i, j, ret;
975 uint64_t sum = 0, expected_sum = 0;
976 struct percpu_buffer __rseq_percpu *buffer;
977 pthread_t test_threads[num_threads];
978 cpu_set_t allowed_cpus;
979 struct rseq_mempool *mempool;
980
981 mempool = rseq_mempool_create("percpu_buffer", sizeof(struct percpu_buffer),
982 0, CPU_SETSIZE, NULL);
983 if (!mempool) {
984 perror("rseq_mempool_create");
985 abort();
986 }
987 buffer = (struct percpu_buffer __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
988 if (!buffer) {
989 perror("rseq_mempool_percpu_zmalloc");
990 abort();
991 }
992
993 /* Generate list entries for every usable cpu. */
994 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
995 for (i = 0; i < CPU_SETSIZE; i++) {
996 struct percpu_buffer *cpubuffer;
997
998 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
999 continue;
1000 cpubuffer = rseq_percpu_ptr(buffer, i);
1001 /* Worse-case is every item in same CPU. */
1002 cpubuffer->array =
1003 (struct percpu_buffer_node **)
1004 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
1005 BUFFER_ITEM_PER_CPU);
1006 assert(cpubuffer->array);
1007 cpubuffer->buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
1008 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
1009 struct percpu_buffer_node *node;
1010
1011 expected_sum += j;
1012
1013 /*
1014 * We could theoretically put the word-sized
1015 * "data" directly in the buffer. However, we
1016 * want to model objects that would not fit
1017 * within a single word, so allocate an object
1018 * for each node.
1019 */
1020 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
1021 assert(node);
1022 node->data = j;
1023 cpubuffer->array[j - 1] = node;
1024 cpubuffer->offset++;
1025 }
1026 }
1027
1028 for (i = 0; i < num_threads; i++) {
1029 ret = pthread_create(&test_threads[i], NULL,
1030 test_percpu_buffer_thread, buffer);
1031 if (ret) {
1032 errno = ret;
1033 perror("pthread_create");
1034 abort();
1035 }
1036 }
1037
1038 for (i = 0; i < num_threads; i++) {
1039 ret = pthread_join(test_threads[i], NULL);
1040 if (ret) {
1041 errno = ret;
1042 perror("pthread_join");
1043 abort();
1044 }
1045 }
1046
1047 for (i = 0; i < CPU_SETSIZE; i++) {
1048 struct percpu_buffer *cpubuffer;
1049 struct percpu_buffer_node *node;
1050
1051 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1052 continue;
1053
1054 cpubuffer = rseq_percpu_ptr(buffer, i);
1055 while ((node = __percpu_buffer_pop(buffer, i))) {
1056 sum += node->data;
1057 free(node);
1058 }
1059 free(cpubuffer->array);
1060 }
1061
1062 /*
1063 * All entries should now be accounted for (unless some external
1064 * actor is interfering with our allowed affinity while this
1065 * test is running).
1066 */
1067 assert(sum == expected_sum);
1068 rseq_mempool_percpu_free(buffer);
1069 ret = rseq_mempool_destroy(mempool);
1070 if (ret) {
1071 perror("rseq_mempool_destroy");
1072 abort();
1073 }
1074 }
1075
1076 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1077 struct percpu_memcpy_buffer_node item,
1078 int *_cpu)
1079 {
1080 bool result = false;
1081 int cpu;
1082
1083 for (;;) {
1084 struct percpu_memcpy_buffer *cpubuffer;
1085 intptr_t *targetptr_final, newval_final, offset;
1086 char *destptr, *srcptr;
1087 size_t copylen;
1088 int ret;
1089
1090 cpu = get_current_cpu_id();
1091 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1092 /* Load offset with single-copy atomicity. */
1093 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1094 if (offset == cpubuffer->buflen)
1095 break;
1096 destptr = (char *)&cpubuffer->array[offset];
1097 srcptr = (char *)&item;
1098 /* copylen must be <= 4kB. */
1099 copylen = sizeof(item);
1100 newval_final = offset + 1;
1101 targetptr_final = &cpubuffer->offset;
1102 ret = rseq_load_cbne_memcpy_store__ptr(
1103 opt_mo, RSEQ_PERCPU,
1104 targetptr_final, offset,
1105 destptr, srcptr, copylen,
1106 newval_final, cpu);
1107 if (rseq_likely(!ret)) {
1108 result = true;
1109 break;
1110 }
1111 /* Retry if comparison fails or rseq aborts. */
1112 }
1113 if (_cpu)
1114 *_cpu = cpu;
1115 return result;
1116 }
1117
1118 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1119 struct percpu_memcpy_buffer_node *item,
1120 int *_cpu)
1121 {
1122 bool result = false;
1123 int cpu;
1124
1125 for (;;) {
1126 struct percpu_memcpy_buffer *cpubuffer;
1127 intptr_t *targetptr_final, newval_final, offset;
1128 char *destptr, *srcptr;
1129 size_t copylen;
1130 int ret;
1131
1132 cpu = get_current_cpu_id();
1133 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1134 /* Load offset with single-copy atomicity. */
1135 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1136 if (offset == 0)
1137 break;
1138 destptr = (char *)item;
1139 srcptr = (char *)&cpubuffer->array[offset - 1];
1140 /* copylen must be <= 4kB. */
1141 copylen = sizeof(*item);
1142 newval_final = offset - 1;
1143 targetptr_final = &cpubuffer->offset;
1144 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1145 targetptr_final, offset, destptr, srcptr, copylen,
1146 newval_final, cpu);
1147 if (rseq_likely(!ret)) {
1148 result = true;
1149 break;
1150 }
1151 /* Retry if comparison fails or rseq aborts. */
1152 }
1153 if (_cpu)
1154 *_cpu = cpu;
1155 return result;
1156 }
1157
1158 /*
1159 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1160 * only be used on buffers that are not concurrently modified.
1161 */
1162 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1163 struct percpu_memcpy_buffer_node *item,
1164 int cpu)
1165 {
1166 struct percpu_memcpy_buffer *cpubuffer;
1167 intptr_t offset;
1168
1169 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1170 offset = cpubuffer->offset;
1171 if (offset == 0)
1172 return false;
1173 memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item));
1174 cpubuffer->offset = offset - 1;
1175 return true;
1176 }
1177
1178 static void *test_percpu_memcpy_buffer_thread(void *arg)
1179 {
1180 long long i, reps;
1181 struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg;
1182
1183 if (!opt_disable_rseq && rseq_register_current_thread())
1184 abort();
1185
1186 reps = opt_reps;
1187 for (i = 0; i < reps; i++) {
1188 struct percpu_memcpy_buffer_node item;
1189 bool result;
1190
1191 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1192 if (opt_yield)
1193 sched_yield(); /* encourage shuffling */
1194 if (result) {
1195 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1196 /* Should increase buffer size. */
1197 abort();
1198 }
1199 }
1200 }
1201
1202 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1203 (int) rseq_gettid(), nr_abort, signals_delivered);
1204 if (!opt_disable_rseq && rseq_unregister_current_thread())
1205 abort();
1206
1207 return NULL;
1208 }
1209
1210 /* Simultaneous modification to a per-cpu buffer from many threads. */
1211 static void test_percpu_memcpy_buffer(void)
1212 {
1213 const int num_threads = opt_threads;
1214 int i, j, ret;
1215 uint64_t sum = 0, expected_sum = 0;
1216 struct percpu_memcpy_buffer *buffer;
1217 pthread_t test_threads[num_threads];
1218 cpu_set_t allowed_cpus;
1219 struct rseq_mempool *mempool;
1220
1221 mempool = rseq_mempool_create("percpu_memcpy_buffer",
1222 sizeof(struct percpu_memcpy_buffer),
1223 0, CPU_SETSIZE, NULL);
1224 if (!mempool) {
1225 perror("rseq_mempool_create");
1226 abort();
1227 }
1228 buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
1229 if (!buffer) {
1230 perror("rseq_mempool_percpu_zmalloc");
1231 abort();
1232 }
1233
1234 /* Generate list entries for every usable cpu. */
1235 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1236 for (i = 0; i < CPU_SETSIZE; i++) {
1237 struct percpu_memcpy_buffer *cpubuffer;
1238
1239 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1240 continue;
1241 cpubuffer = rseq_percpu_ptr(buffer, i);
1242 /* Worse-case is every item in same CPU. */
1243 cpubuffer->array =
1244 (struct percpu_memcpy_buffer_node *)
1245 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
1246 MEMCPY_BUFFER_ITEM_PER_CPU);
1247 assert(cpubuffer->array);
1248 cpubuffer->buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1249 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1250 expected_sum += 2 * j + 1;
1251
1252 /*
1253 * We could theoretically put the word-sized
1254 * "data" directly in the buffer. However, we
1255 * want to model objects that would not fit
1256 * within a single word, so allocate an object
1257 * for each node.
1258 */
1259 cpubuffer->array[j - 1].data1 = j;
1260 cpubuffer->array[j - 1].data2 = j + 1;
1261 cpubuffer->offset++;
1262 }
1263 }
1264
1265 for (i = 0; i < num_threads; i++) {
1266 ret = pthread_create(&test_threads[i], NULL,
1267 test_percpu_memcpy_buffer_thread,
1268 buffer);
1269 if (ret) {
1270 errno = ret;
1271 perror("pthread_create");
1272 abort();
1273 }
1274 }
1275
1276 for (i = 0; i < num_threads; i++) {
1277 ret = pthread_join(test_threads[i], NULL);
1278 if (ret) {
1279 errno = ret;
1280 perror("pthread_join");
1281 abort();
1282 }
1283 }
1284
1285 for (i = 0; i < CPU_SETSIZE; i++) {
1286 struct percpu_memcpy_buffer_node item;
1287 struct percpu_memcpy_buffer *cpubuffer;
1288
1289 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1290 continue;
1291
1292 cpubuffer = rseq_percpu_ptr(buffer, i);
1293 while (__percpu_memcpy_buffer_pop(buffer, &item, i)) {
1294 sum += item.data1;
1295 sum += item.data2;
1296 }
1297 free(cpubuffer->array);
1298 }
1299
1300 /*
1301 * All entries should now be accounted for (unless some external
1302 * actor is interfering with our allowed affinity while this
1303 * test is running).
1304 */
1305 assert(sum == expected_sum);
1306 rseq_mempool_percpu_free(buffer);
1307 ret = rseq_mempool_destroy(mempool);
1308 if (ret) {
1309 perror("rseq_mempool_destroy");
1310 abort();
1311 }
1312 }
1313
1314 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
1315 {
1316 signals_delivered++;
1317 }
1318
1319 static int set_signal_handler(void)
1320 {
1321 int ret = 0;
1322 struct sigaction sa;
1323 sigset_t sigset;
1324
1325 ret = sigemptyset(&sigset);
1326 if (ret < 0) {
1327 perror("sigemptyset");
1328 return ret;
1329 }
1330
1331 sa.sa_handler = test_signal_interrupt_handler;
1332 sa.sa_mask = sigset;
1333 sa.sa_flags = 0;
1334 ret = sigaction(SIGUSR1, &sa, NULL);
1335 if (ret < 0) {
1336 perror("sigaction");
1337 return ret;
1338 }
1339
1340 printf_verbose("Signal handler set for SIGUSR1\n");
1341
1342 return ret;
1343 }
1344
1345 static
1346 bool membarrier_private_expedited_rseq_available(void)
1347 {
1348 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1349
1350 if (status < 0) {
1351 perror("membarrier");
1352 return false;
1353 }
1354 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1355 return false;
1356 return true;
1357 }
1358
1359 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1360 #ifdef TEST_MEMBARRIER
1361 struct test_membarrier_thread_args {
1362 struct rseq_mempool *mempool;
1363 struct percpu_list __rseq_percpu *percpu_list_ptr;
1364 int stop;
1365 };
1366
1367 /* Worker threads modify data in their "active" percpu lists. */
1368 static
1369 void *test_membarrier_worker_thread(void *arg)
1370 {
1371 struct test_membarrier_thread_args *args =
1372 (struct test_membarrier_thread_args *)arg;
1373 const long long iters = opt_reps;
1374 long long i;
1375
1376 if (rseq_register_current_thread()) {
1377 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1378 errno, strerror(errno));
1379 abort();
1380 }
1381
1382 /* Wait for initialization. */
1383 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1384
1385 for (i = 0; i < iters; ++i) {
1386 int ret;
1387
1388 do {
1389 int cpu = get_current_cpu_id();
1390 struct percpu_list __rseq_percpu *list = RSEQ_READ_ONCE(args->percpu_list_ptr);
1391 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
1392
1393 ret = rseq_load_cbne_load_add_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1394 (intptr_t *) &args->percpu_list_ptr,
1395 (intptr_t) list, (intptr_t *) &cpulist->head, 0, 1, cpu);
1396 } while (rseq_unlikely(ret));
1397 }
1398
1399 if (rseq_unregister_current_thread()) {
1400 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1401 errno, strerror(errno));
1402 abort();
1403 }
1404 return NULL;
1405 }
1406
1407 static
1408 struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_mempool *mempool)
1409 {
1410 struct percpu_list __rseq_percpu *list;
1411 int i;
1412
1413 list = (struct percpu_list __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
1414 if (!list) {
1415 perror("rseq_mempool_percpu_zmalloc");
1416 return NULL;
1417 }
1418 for (i = 0; i < CPU_SETSIZE; i++) {
1419 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
1420 struct percpu_list_node *node;
1421
1422 node = (struct percpu_list_node *) malloc(sizeof(*node));
1423 assert(node);
1424 node->data = 0;
1425 node->next = NULL;
1426 cpulist->head = node;
1427 }
1428 return list;
1429 }
1430
1431 static
1432 void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list)
1433 {
1434 int i;
1435
1436 for (i = 0; i < CPU_SETSIZE; i++)
1437 free(rseq_percpu_ptr(list, i)->head);
1438 rseq_mempool_percpu_free(list);
1439 }
1440
1441 static
1442 long long test_membarrier_count_percpu_list(struct percpu_list __rseq_percpu *list)
1443 {
1444 long long total_count = 0;
1445 int i;
1446
1447 for (i = 0; i < CPU_SETSIZE; i++)
1448 total_count += rseq_percpu_ptr(list, i)->head->data;
1449 return total_count;
1450 }
1451
1452 /*
1453 * The manager thread swaps per-cpu lists that worker threads see,
1454 * and validates that there are no unexpected modifications.
1455 */
1456 static
1457 void *test_membarrier_manager_thread(void *arg)
1458 {
1459 struct test_membarrier_thread_args *args =
1460 (struct test_membarrier_thread_args *)arg;
1461 struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
1462 intptr_t expect_a = 0, expect_b = 0;
1463 int cpu_a = 0, cpu_b = 0;
1464 struct rseq_mempool *mempool;
1465 int ret;
1466 long long total_count = 0;
1467
1468 mempool = rseq_mempool_create("percpu_list", sizeof(struct percpu_list),
1469 0, CPU_SETSIZE, NULL);
1470 if (!mempool) {
1471 perror("rseq_mempool_create");
1472 abort();
1473 }
1474 args->mempool = mempool;
1475
1476 if (rseq_register_current_thread()) {
1477 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1478 errno, strerror(errno));
1479 abort();
1480 }
1481
1482 /* Init lists. */
1483 list_a = test_membarrier_alloc_percpu_list(mempool);
1484 assert(list_a);
1485 list_b = test_membarrier_alloc_percpu_list(mempool);
1486 assert(list_b);
1487
1488 /* Initialize lists before publishing them. */
1489 rseq_smp_wmb();
1490
1491 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1492
1493 while (!RSEQ_READ_ONCE(args->stop)) {
1494 /* list_a is "active". */
1495 cpu_a = rand() % CPU_SETSIZE;
1496 /*
1497 * As list_b is "inactive", we should never see changes
1498 * to list_b.
1499 */
1500 if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
1501 fprintf(stderr, "Membarrier test failed\n");
1502 abort();
1503 }
1504
1505 /* Make list_b "active". */
1506 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
1507 if (rseq_membarrier_expedited(cpu_a) &&
1508 errno != ENXIO /* missing CPU */) {
1509 perror("sys_membarrier");
1510 abort();
1511 }
1512 /*
1513 * Cpu A should now only modify list_b, so the values
1514 * in list_a should be stable.
1515 */
1516 expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
1517
1518 cpu_b = rand() % CPU_SETSIZE;
1519 /*
1520 * As list_a is "inactive", we should never see changes
1521 * to list_a.
1522 */
1523 if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
1524 fprintf(stderr, "Membarrier test failed\n");
1525 abort();
1526 }
1527
1528 /* Make list_a "active". */
1529 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1530 if (rseq_membarrier_expedited(cpu_b) &&
1531 errno != ENXIO /* missing CPU */) {
1532 perror("sys_membarrier");
1533 abort();
1534 }
1535 /* Remember a value from list_b. */
1536 expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
1537 }
1538
1539 total_count += test_membarrier_count_percpu_list(list_a);
1540 total_count += test_membarrier_count_percpu_list(list_b);
1541
1542 /* Validate that we observe the right number of increments. */
1543 if (total_count != opt_threads * opt_reps) {
1544 fprintf(stderr, "Error: Observed %lld increments, expected %lld\n",
1545 total_count, opt_threads * opt_reps);
1546 abort();
1547 }
1548 test_membarrier_free_percpu_list(list_a);
1549 test_membarrier_free_percpu_list(list_b);
1550
1551 if (rseq_unregister_current_thread()) {
1552 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1553 errno, strerror(errno));
1554 abort();
1555 }
1556 ret = rseq_mempool_destroy(mempool);
1557 if (ret) {
1558 perror("rseq_mempool_destroy");
1559 abort();
1560 }
1561
1562 return NULL;
1563 }
1564
1565 static
1566 void test_membarrier(void)
1567 {
1568 const int num_threads = opt_threads;
1569 struct test_membarrier_thread_args thread_args;
1570 pthread_t worker_threads[num_threads];
1571 pthread_t manager_thread;
1572 int i, ret;
1573
1574 if (!membarrier_private_expedited_rseq_available()) {
1575 fprintf(stderr, "Membarrier private expedited rseq not available. "
1576 "Skipping membarrier test.\n");
1577 return;
1578 }
1579 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1580 perror("sys_membarrier");
1581 abort();
1582 }
1583
1584 thread_args.percpu_list_ptr = NULL;
1585 thread_args.stop = 0;
1586 ret = pthread_create(&manager_thread, NULL,
1587 test_membarrier_manager_thread, &thread_args);
1588 if (ret) {
1589 errno = ret;
1590 perror("pthread_create");
1591 abort();
1592 }
1593
1594 for (i = 0; i < num_threads; i++) {
1595 ret = pthread_create(&worker_threads[i], NULL,
1596 test_membarrier_worker_thread, &thread_args);
1597 if (ret) {
1598 errno = ret;
1599 perror("pthread_create");
1600 abort();
1601 }
1602 }
1603
1604
1605 for (i = 0; i < num_threads; i++) {
1606 ret = pthread_join(worker_threads[i], NULL);
1607 if (ret) {
1608 errno = ret;
1609 perror("pthread_join");
1610 abort();
1611 }
1612 }
1613
1614 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1615 ret = pthread_join(manager_thread, NULL);
1616 if (ret) {
1617 errno = ret;
1618 perror("pthread_join");
1619 abort();
1620 }
1621 }
1622 #else /* TEST_MEMBARRIER */
1623 static
1624 void test_membarrier(void)
1625 {
1626 if (!membarrier_private_expedited_rseq_available()) {
1627 fprintf(stderr, "Membarrier private expedited rseq not available. "
1628 "Skipping membarrier test.\n");
1629 return;
1630 }
1631 fprintf(stderr, "rseq_load_cbne_load_add_load_add_store__ptr is not implemented on this architecture. "
1632 "Skipping membarrier test.\n");
1633 }
1634 #endif
1635
1636 static void show_usage(char **argv)
1637 {
1638 printf("Usage : %s <OPTIONS>\n",
1639 argv[0]);
1640 printf("OPTIONS:\n");
1641 printf(" [-1 loops] Number of loops for delay injection 1\n");
1642 printf(" [-2 loops] Number of loops for delay injection 2\n");
1643 printf(" [-3 loops] Number of loops for delay injection 3\n");
1644 printf(" [-4 loops] Number of loops for delay injection 4\n");
1645 printf(" [-5 loops] Number of loops for delay injection 5\n");
1646 printf(" [-6 loops] Number of loops for delay injection 6\n");
1647 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1648 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1649 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1650 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1651 printf(" [-y] Yield\n");
1652 printf(" [-k] Kill thread with signal\n");
1653 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1654 printf(" [-t N] Number of threads (default 200)\n");
1655 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1656 printf(" [-d] Disable rseq system call (no initialization)\n");
1657 printf(" [-D M] Disable rseq for each M threads\n");
1658 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1659 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1660 printf(" [-c] Check if the rseq syscall is available.\n");
1661 printf(" [-v] Verbose output.\n");
1662 printf(" [-h] Show this help.\n");
1663 printf("\n");
1664 }
1665
1666 int main(int argc, char **argv)
1667 {
1668 int i;
1669
1670 for (i = 1; i < argc; i++) {
1671 if (argv[i][0] != '-')
1672 continue;
1673 switch (argv[i][1]) {
1674 case '1':
1675 case '2':
1676 case '3':
1677 case '4':
1678 case '5':
1679 case '6':
1680 case '7':
1681 case '8':
1682 case '9':
1683 if (argc < i + 2) {
1684 show_usage(argv);
1685 goto error;
1686 }
1687 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1688 i++;
1689 break;
1690 case 'm':
1691 if (argc < i + 2) {
1692 show_usage(argv);
1693 goto error;
1694 }
1695 opt_modulo = atol(argv[i + 1]);
1696 if (opt_modulo < 0) {
1697 show_usage(argv);
1698 goto error;
1699 }
1700 i++;
1701 break;
1702 case 's':
1703 if (argc < i + 2) {
1704 show_usage(argv);
1705 goto error;
1706 }
1707 opt_sleep = atol(argv[i + 1]);
1708 if (opt_sleep < 0) {
1709 show_usage(argv);
1710 goto error;
1711 }
1712 i++;
1713 break;
1714 case 'y':
1715 opt_yield = 1;
1716 break;
1717 case 'k':
1718 opt_signal = 1;
1719 break;
1720 case 'd':
1721 opt_disable_rseq = 1;
1722 break;
1723 case 'D':
1724 if (argc < i + 2) {
1725 show_usage(argv);
1726 goto error;
1727 }
1728 opt_disable_mod = atol(argv[i + 1]);
1729 if (opt_disable_mod < 0) {
1730 show_usage(argv);
1731 goto error;
1732 }
1733 i++;
1734 break;
1735 case 't':
1736 if (argc < i + 2) {
1737 show_usage(argv);
1738 goto error;
1739 }
1740 opt_threads = atol(argv[i + 1]);
1741 if (opt_threads < 0) {
1742 show_usage(argv);
1743 goto error;
1744 }
1745 i++;
1746 break;
1747 case 'r':
1748 if (argc < i + 2) {
1749 show_usage(argv);
1750 goto error;
1751 }
1752 opt_reps = atoll(argv[i + 1]);
1753 if (opt_reps < 0) {
1754 show_usage(argv);
1755 goto error;
1756 }
1757 i++;
1758 break;
1759 case 'h':
1760 show_usage(argv);
1761 goto end;
1762 case 'T':
1763 if (argc < i + 2) {
1764 show_usage(argv);
1765 goto error;
1766 }
1767 opt_test = *argv[i + 1];
1768 switch (opt_test) {
1769 case 's':
1770 case 'l':
1771 case 'i':
1772 case 'b':
1773 case 'm':
1774 case 'r':
1775 break;
1776 default:
1777 show_usage(argv);
1778 goto error;
1779 }
1780 i++;
1781 break;
1782 case 'v':
1783 verbose = 1;
1784 break;
1785 case 'M':
1786 opt_mo = RSEQ_MO_RELEASE;
1787 break;
1788 case 'c':
1789 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
1790 printf_verbose("The rseq syscall is available.\n");
1791 goto end;
1792 } else {
1793 printf_verbose("The rseq syscall is unavailable.\n");
1794 goto no_rseq;
1795 }
1796 default:
1797 show_usage(argv);
1798 goto error;
1799 }
1800 }
1801
1802 loop_cnt_1 = loop_cnt[1];
1803 loop_cnt_2 = loop_cnt[2];
1804 loop_cnt_3 = loop_cnt[3];
1805 loop_cnt_4 = loop_cnt[4];
1806 loop_cnt_5 = loop_cnt[5];
1807 loop_cnt_6 = loop_cnt[6];
1808
1809 if (set_signal_handler())
1810 goto error;
1811
1812 if (!opt_disable_rseq && rseq_register_current_thread())
1813 goto error;
1814 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1815 printf_verbose("The rseq cpu id getter is unavailable\n");
1816 goto no_rseq;
1817 }
1818 switch (opt_test) {
1819 case 's':
1820 printf_verbose("spinlock\n");
1821 test_percpu_spinlock();
1822 break;
1823 case 'l':
1824 printf_verbose("linked list\n");
1825 test_percpu_list();
1826 break;
1827 case 'b':
1828 printf_verbose("buffer\n");
1829 test_percpu_buffer();
1830 break;
1831 case 'm':
1832 printf_verbose("memcpy buffer\n");
1833 test_percpu_memcpy_buffer();
1834 break;
1835 case 'i':
1836 printf_verbose("counter increment\n");
1837 test_percpu_inc();
1838 break;
1839 case 'r':
1840 printf_verbose("membarrier\n");
1841 test_membarrier();
1842 break;
1843 }
1844 if (!opt_disable_rseq && rseq_unregister_current_thread())
1845 abort();
1846 end:
1847 return 0;
1848
1849 error:
1850 return -1;
1851
1852 no_rseq:
1853 return 2;
1854 }
This page took 0.068978 seconds and 4 git commands to generate.