1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
18 #include <sys/types.h>
23 #include <rseq/percpu-alloc.h>
25 #define PERCPU_POOL_LEN (1024*1024) /* 1MB */
27 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
29 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
= (1 << 7),
30 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
= (1 << 8),
34 MEMBARRIER_CMD_FLAG_CPU
= (1 << 0),
39 static int loop_cnt
[NR_INJECT
+ 1];
41 static int loop_cnt_1
asm("asm_loop_cnt_1") __attribute__((used
));
42 static int loop_cnt_2
asm("asm_loop_cnt_2") __attribute__((used
));
43 static int loop_cnt_3
asm("asm_loop_cnt_3") __attribute__((used
));
44 static int loop_cnt_4
asm("asm_loop_cnt_4") __attribute__((used
));
45 static int loop_cnt_5
asm("asm_loop_cnt_5") __attribute__((used
));
46 static int loop_cnt_6
asm("asm_loop_cnt_6") __attribute__((used
));
48 static int opt_modulo
, verbose
;
50 static int opt_yield
, opt_signal
, opt_sleep
,
51 opt_disable_rseq
, opt_threads
= 200,
52 opt_disable_mod
= 0, opt_test
= 's';
54 static long long opt_reps
= 5000;
56 static __thread
__attribute__((tls_model("initial-exec")))
57 unsigned int signals_delivered
;
59 static inline pid_t
rseq_gettid(void)
61 return syscall(__NR_gettid
);
66 static __thread
__attribute__((tls_model("initial-exec"), unused
))
67 int yield_mod_cnt
, nr_abort
;
69 #define printf_verbose(fmt, ...) \
72 printf(fmt, ## __VA_ARGS__); \
77 #define INJECT_ASM_REG "eax"
79 #define RSEQ_INJECT_CLOBBER \
83 * Use ip-relative addressing to get the loop counter.
85 #define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
86 "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
87 "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
88 "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
96 #define RSEQ_INJECT_ASM(n) \
97 __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
99 #elif defined(__x86_64__)
101 #define INJECT_ASM_REG_P "rax"
102 #define INJECT_ASM_REG "eax"
104 #define RSEQ_INJECT_CLOBBER \
108 #define RSEQ_INJECT_ASM(n) \
109 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
110 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
111 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
114 "dec %%" INJECT_ASM_REG "\n\t" \
118 #elif defined(__s390__)
120 #define RSEQ_INJECT_INPUT \
121 , [loop_cnt_1]"m"(loop_cnt[1]) \
122 , [loop_cnt_2]"m"(loop_cnt[2]) \
123 , [loop_cnt_3]"m"(loop_cnt[3]) \
124 , [loop_cnt_4]"m"(loop_cnt[4]) \
125 , [loop_cnt_5]"m"(loop_cnt[5]) \
126 , [loop_cnt_6]"m"(loop_cnt[6])
128 #define INJECT_ASM_REG "r12"
130 #define RSEQ_INJECT_CLOBBER \
133 #define RSEQ_INJECT_ASM(n) \
134 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
135 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
138 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
142 #elif defined(__ARMEL__)
144 #define RSEQ_INJECT_INPUT \
145 , [loop_cnt_1]"m"(loop_cnt[1]) \
146 , [loop_cnt_2]"m"(loop_cnt[2]) \
147 , [loop_cnt_3]"m"(loop_cnt[3]) \
148 , [loop_cnt_4]"m"(loop_cnt[4]) \
149 , [loop_cnt_5]"m"(loop_cnt[5]) \
150 , [loop_cnt_6]"m"(loop_cnt[6])
152 #define INJECT_ASM_REG "r4"
154 #define RSEQ_INJECT_CLOBBER \
157 #define RSEQ_INJECT_ASM(n) \
158 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
159 "cmp " INJECT_ASM_REG ", #0\n\t" \
162 "subs " INJECT_ASM_REG ", #1\n\t" \
166 #elif defined(__AARCH64EL__)
168 #define RSEQ_INJECT_INPUT \
169 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
170 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
171 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
172 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
173 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
174 , [loop_cnt_6] "Qo" (loop_cnt[6])
176 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
178 #define RSEQ_INJECT_ASM(n) \
179 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
180 " cbz " INJECT_ASM_REG ", 333f\n" \
182 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
183 " cbnz " INJECT_ASM_REG ", 222b\n" \
186 #elif defined(__PPC__)
188 #define RSEQ_INJECT_INPUT \
189 , [loop_cnt_1]"m"(loop_cnt[1]) \
190 , [loop_cnt_2]"m"(loop_cnt[2]) \
191 , [loop_cnt_3]"m"(loop_cnt[3]) \
192 , [loop_cnt_4]"m"(loop_cnt[4]) \
193 , [loop_cnt_5]"m"(loop_cnt[5]) \
194 , [loop_cnt_6]"m"(loop_cnt[6])
196 #define INJECT_ASM_REG "r18"
198 #define RSEQ_INJECT_CLOBBER \
201 #define RSEQ_INJECT_ASM(n) \
202 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
203 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
206 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
210 #elif defined(__mips__)
212 #define RSEQ_INJECT_INPUT \
213 , [loop_cnt_1]"m"(loop_cnt[1]) \
214 , [loop_cnt_2]"m"(loop_cnt[2]) \
215 , [loop_cnt_3]"m"(loop_cnt[3]) \
216 , [loop_cnt_4]"m"(loop_cnt[4]) \
217 , [loop_cnt_5]"m"(loop_cnt[5]) \
218 , [loop_cnt_6]"m"(loop_cnt[6])
220 #define INJECT_ASM_REG "$5"
222 #define RSEQ_INJECT_CLOBBER \
225 #define RSEQ_INJECT_ASM(n) \
226 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
227 "beqz " INJECT_ASM_REG ", 333f\n\t" \
229 "addiu " INJECT_ASM_REG ", -1\n\t" \
230 "bnez " INJECT_ASM_REG ", 222b\n\t" \
233 #elif defined(__riscv)
235 #define RSEQ_INJECT_INPUT \
236 , [loop_cnt_1]"m"(loop_cnt[1]) \
237 , [loop_cnt_2]"m"(loop_cnt[2]) \
238 , [loop_cnt_3]"m"(loop_cnt[3]) \
239 , [loop_cnt_4]"m"(loop_cnt[4]) \
240 , [loop_cnt_5]"m"(loop_cnt[5]) \
241 , [loop_cnt_6]"m"(loop_cnt[6])
243 #define INJECT_ASM_REG "t1"
245 #define RSEQ_INJECT_CLOBBER \
248 #define RSEQ_INJECT_ASM(n) \
249 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
250 "beqz " INJECT_ASM_REG ", 333f\n\t" \
252 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
253 "bnez " INJECT_ASM_REG ", 222b\n\t" \
257 #error unsupported target
260 #define RSEQ_INJECT_FAILED \
263 #define RSEQ_INJECT_C(n) \
265 int loc_i, loc_nr_loops = loop_cnt[n]; \
267 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
270 if (loc_nr_loops == -1 && opt_modulo) { \
271 if (yield_mod_cnt == opt_modulo - 1) { \
273 poll(NULL, 0, opt_sleep); \
287 #define printf_verbose(fmt, ...)
289 #endif /* BENCHMARK */
291 #include <rseq/rseq.h>
293 static enum rseq_mo opt_mo
= RSEQ_MO_RELAXED
;
295 static int sys_membarrier(int cmd
, int flags
, int cpu_id
)
297 return syscall(__NR_membarrier
, cmd
, flags
, cpu_id
);
300 #ifdef rseq_arch_has_load_cbne_load_add_load_add_store
301 #define TEST_MEMBARRIER
304 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
305 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
307 int get_current_cpu_id(void)
309 return rseq_current_mm_cid();
312 bool rseq_validate_cpu_id(void)
314 return rseq_mm_cid_available();
317 bool rseq_use_cpu_index(void)
319 return false; /* Use mm_cid */
321 # ifdef TEST_MEMBARRIER
323 * Membarrier does not currently support targeting a mm_cid, so
324 * issue the barrier on all cpus.
327 int rseq_membarrier_expedited(__attribute__ ((unused
)) int cpu
)
329 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
,
332 # endif /* TEST_MEMBARRIER */
334 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
336 int get_current_cpu_id(void)
338 return rseq_cpu_start();
341 bool rseq_validate_cpu_id(void)
343 return rseq_current_cpu_raw() >= 0;
346 bool rseq_use_cpu_index(void)
348 return true; /* Use cpu_id as index. */
350 # ifdef TEST_MEMBARRIER
352 int rseq_membarrier_expedited(int cpu
)
354 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
,
355 MEMBARRIER_CMD_FLAG_CPU
, cpu
);
357 # endif /* TEST_MEMBARRIER */
364 struct spinlock_test_data
{
365 struct percpu_lock lock
;
369 struct spinlock_thread_test_data
{
370 struct spinlock_test_data __rseq_percpu
*data
;
375 struct inc_test_data
{
379 struct inc_thread_test_data
{
380 struct inc_test_data __rseq_percpu
*data
;
385 struct percpu_list_node
{
387 struct percpu_list_node
*next
;
391 struct percpu_list_node
*head
;
394 #define BUFFER_ITEM_PER_CPU 100
396 struct percpu_buffer_node
{
400 struct percpu_buffer
{
403 struct percpu_buffer_node
**array
;
406 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
408 struct percpu_memcpy_buffer_node
{
413 struct percpu_memcpy_buffer
{
416 struct percpu_memcpy_buffer_node
*array
;
419 /* A simple percpu spinlock. Grabs lock on current cpu. */
420 static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu
*lock
)
427 cpu
= get_current_cpu_id();
429 fprintf(stderr
, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
430 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu
);
433 ret
= rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
434 &rseq_percpu_ptr(lock
, cpu
)->v
,
436 if (rseq_likely(!ret
))
438 /* Retry if comparison fails or rseq aborts. */
441 * Acquire semantic when taking lock after control dependency.
442 * Matches rseq_smp_store_release().
444 rseq_smp_acquire__after_ctrl_dep();
448 static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu
*lock
, int cpu
)
450 assert(rseq_percpu_ptr(lock
, cpu
)->v
== 1);
452 * Release lock, with release semantic. Matches
453 * rseq_smp_acquire__after_ctrl_dep().
455 rseq_smp_store_release(&rseq_percpu_ptr(lock
, cpu
)->v
, 0);
458 static void *test_percpu_spinlock_thread(void *arg
)
460 struct spinlock_thread_test_data
*thread_data
= (struct spinlock_thread_test_data
*) arg
;
461 struct spinlock_test_data __rseq_percpu
*data
= thread_data
->data
;
464 if (!opt_disable_rseq
&& thread_data
->reg
&&
465 rseq_register_current_thread())
467 reps
= thread_data
->reps
;
468 for (i
= 0; i
< reps
; i
++) {
469 int cpu
= rseq_this_cpu_lock(&data
->lock
);
470 rseq_percpu_ptr(data
, cpu
)->count
++;
471 rseq_percpu_unlock(&data
->lock
, cpu
);
473 if (i
!= 0 && !(i
% (reps
/ 10)))
474 printf_verbose("tid %d: count %lld\n",
475 (int) rseq_gettid(), i
);
478 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
479 (int) rseq_gettid(), nr_abort
, signals_delivered
);
480 if (!opt_disable_rseq
&& thread_data
->reg
&&
481 rseq_unregister_current_thread())
487 * A simple test which implements a sharded counter using a per-cpu
488 * lock. Obviously real applications might prefer to simply use a
489 * per-cpu increment; however, this is reasonable for a test and the
490 * lock can be extended to synchronize more complicated operations.
492 static void test_percpu_spinlock(void)
494 const int num_threads
= opt_threads
;
497 pthread_t test_threads
[num_threads
];
498 struct spinlock_test_data __rseq_percpu
*data
;
499 struct spinlock_thread_test_data thread_data
[num_threads
];
500 struct rseq_percpu_pool
*mempool
;
502 mempool
= rseq_percpu_pool_create("spinlock_test_data",
503 sizeof(struct spinlock_test_data
),
504 PERCPU_POOL_LEN
, CPU_SETSIZE
, NULL
);
506 perror("rseq_percpu_pool_create");
509 data
= (struct spinlock_test_data __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
511 perror("rseq_percpu_zmalloc");
515 for (i
= 0; i
< num_threads
; i
++) {
516 thread_data
[i
].reps
= opt_reps
;
517 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
518 thread_data
[i
].reg
= 1;
520 thread_data
[i
].reg
= 0;
521 thread_data
[i
].data
= data
;
522 ret
= pthread_create(&test_threads
[i
], NULL
,
523 test_percpu_spinlock_thread
,
527 perror("pthread_create");
532 for (i
= 0; i
< num_threads
; i
++) {
533 ret
= pthread_join(test_threads
[i
], NULL
);
536 perror("pthread_join");
542 for (i
= 0; i
< CPU_SETSIZE
; i
++)
543 sum
+= rseq_percpu_ptr(data
, i
)->count
;
545 assert(sum
== (uint64_t)opt_reps
* num_threads
);
546 rseq_percpu_free(data
);
547 ret
= rseq_percpu_pool_destroy(mempool
);
549 perror("rseq_percpu_pool_destroy");
554 static void *test_percpu_inc_thread(void *arg
)
556 struct inc_thread_test_data
*thread_data
= (struct inc_thread_test_data
*) arg
;
557 struct inc_test_data __rseq_percpu
*data
= thread_data
->data
;
560 if (!opt_disable_rseq
&& thread_data
->reg
&&
561 rseq_register_current_thread())
563 reps
= thread_data
->reps
;
564 for (i
= 0; i
< reps
; i
++) {
570 cpu
= get_current_cpu_id();
571 ret
= rseq_load_add_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
572 &rseq_percpu_ptr(data
, cpu
)->count
, 1, cpu
);
573 } while (rseq_unlikely(ret
));
575 if (i
!= 0 && !(i
% (reps
/ 10)))
576 printf_verbose("tid %d: count %lld\n",
577 (int) rseq_gettid(), i
);
580 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
581 (int) rseq_gettid(), nr_abort
, signals_delivered
);
582 if (!opt_disable_rseq
&& thread_data
->reg
&&
583 rseq_unregister_current_thread())
588 static void test_percpu_inc(void)
590 const int num_threads
= opt_threads
;
593 pthread_t test_threads
[num_threads
];
594 struct inc_test_data __rseq_percpu
*data
;
595 struct inc_thread_test_data thread_data
[num_threads
];
596 struct rseq_percpu_pool
*mempool
;
598 mempool
= rseq_percpu_pool_create("inc_test_data",
599 sizeof(struct inc_test_data
),
600 PERCPU_POOL_LEN
, CPU_SETSIZE
, NULL
);
602 perror("rseq_percpu_pool_create");
605 data
= (struct inc_test_data __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
607 perror("rseq_percpu_zmalloc");
611 for (i
= 0; i
< num_threads
; i
++) {
612 thread_data
[i
].reps
= opt_reps
;
613 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
614 thread_data
[i
].reg
= 1;
616 thread_data
[i
].reg
= 0;
617 thread_data
[i
].data
= data
;
618 ret
= pthread_create(&test_threads
[i
], NULL
,
619 test_percpu_inc_thread
,
623 perror("pthread_create");
628 for (i
= 0; i
< num_threads
; i
++) {
629 ret
= pthread_join(test_threads
[i
], NULL
);
632 perror("pthread_join");
638 for (i
= 0; i
< CPU_SETSIZE
; i
++)
639 sum
+= rseq_percpu_ptr(data
, i
)->count
;
641 assert(sum
== (uint64_t)opt_reps
* num_threads
);
642 rseq_percpu_free(data
);
643 ret
= rseq_percpu_pool_destroy(mempool
);
645 perror("rseq_percpu_pool_destroy");
650 static void this_cpu_list_push(struct percpu_list __rseq_percpu
*list
,
651 struct percpu_list_node
*node
,
657 intptr_t *targetptr
, newval
, expect
;
658 struct percpu_list
*cpulist
;
661 cpu
= get_current_cpu_id();
662 cpulist
= rseq_percpu_ptr(list
, cpu
);
663 /* Load list->c[cpu].head with single-copy atomicity. */
664 expect
= (intptr_t)RSEQ_READ_ONCE(cpulist
->head
);
665 newval
= (intptr_t)node
;
666 targetptr
= (intptr_t *)&cpulist
->head
;
667 node
->next
= (struct percpu_list_node
*)expect
;
668 ret
= rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
669 targetptr
, expect
, newval
, cpu
);
670 if (rseq_likely(!ret
))
672 /* Retry if comparison fails or rseq aborts. */
679 * Unlike a traditional lock-less linked list; the availability of a
680 * rseq primitive allows us to implement pop without concerns over
683 static struct percpu_list_node
*this_cpu_list_pop(struct percpu_list __rseq_percpu
*list
,
686 struct percpu_list_node
*node
= NULL
;
690 struct percpu_list_node
*head
;
691 intptr_t *targetptr
, expectnot
, *load
;
692 struct percpu_list
*cpulist
;
696 cpu
= get_current_cpu_id();
697 cpulist
= rseq_percpu_ptr(list
, cpu
);
698 targetptr
= (intptr_t *)&cpulist
->head
;
699 expectnot
= (intptr_t)NULL
;
700 offset
= offsetof(struct percpu_list_node
, next
);
701 load
= (intptr_t *)&head
;
702 ret
= rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
703 targetptr
, expectnot
,
705 if (rseq_likely(!ret
)) {
711 /* Retry if rseq aborts. */
719 * __percpu_list_pop is not safe against concurrent accesses. Should
720 * only be used on lists that are not concurrently modified.
722 static struct percpu_list_node
*__percpu_list_pop(struct percpu_list __rseq_percpu
*list
, int cpu
)
724 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, cpu
);
725 struct percpu_list_node
*node
;
727 node
= cpulist
->head
;
730 cpulist
->head
= node
->next
;
734 static void *test_percpu_list_thread(void *arg
)
737 struct percpu_list __rseq_percpu
*list
= (struct percpu_list __rseq_percpu
*)arg
;
739 if (!opt_disable_rseq
&& rseq_register_current_thread())
743 for (i
= 0; i
< reps
; i
++) {
744 struct percpu_list_node
*node
;
746 node
= this_cpu_list_pop(list
, NULL
);
748 sched_yield(); /* encourage shuffling */
750 this_cpu_list_push(list
, node
, NULL
);
753 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
754 (int) rseq_gettid(), nr_abort
, signals_delivered
);
755 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
761 /* Simultaneous modification to a per-cpu linked list from many threads. */
762 static void test_percpu_list(void)
764 const int num_threads
= opt_threads
;
766 uint64_t sum
= 0, expected_sum
= 0;
767 struct percpu_list __rseq_percpu
*list
;
768 pthread_t test_threads
[num_threads
];
769 cpu_set_t allowed_cpus
;
770 struct rseq_percpu_pool
*mempool
;
772 mempool
= rseq_percpu_pool_create("percpu_list", sizeof(struct percpu_list
),
773 PERCPU_POOL_LEN
, CPU_SETSIZE
, NULL
);
775 perror("rseq_percpu_pool_create");
778 list
= (struct percpu_list __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
780 perror("rseq_percpu_zmalloc");
784 /* Generate list entries for every usable cpu. */
785 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
786 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
787 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
789 for (j
= 1; j
<= 100; j
++) {
790 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, i
);
791 struct percpu_list_node
*node
;
795 node
= (struct percpu_list_node
*) malloc(sizeof(*node
));
798 node
->next
= cpulist
->head
;
799 cpulist
->head
= node
;
803 for (i
= 0; i
< num_threads
; i
++) {
804 ret
= pthread_create(&test_threads
[i
], NULL
,
805 test_percpu_list_thread
, list
);
808 perror("pthread_create");
813 for (i
= 0; i
< num_threads
; i
++) {
814 ret
= pthread_join(test_threads
[i
], NULL
);
817 perror("pthread_join");
822 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
823 struct percpu_list_node
*node
;
825 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
828 while ((node
= __percpu_list_pop(list
, i
))) {
835 * All entries should now be accounted for (unless some external
836 * actor is interfering with our allowed affinity while this
839 assert(sum
== expected_sum
);
840 rseq_percpu_free(list
);
841 ret
= rseq_percpu_pool_destroy(mempool
);
843 perror("rseq_percpu_pool_destroy");
848 static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu
*buffer
,
849 struct percpu_buffer_node
*node
,
856 struct percpu_buffer
*cpubuffer
;
857 intptr_t *targetptr_spec
, newval_spec
;
858 intptr_t *targetptr_final
, newval_final
;
862 cpu
= get_current_cpu_id();
863 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
864 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
865 if (offset
== cpubuffer
->buflen
)
867 newval_spec
= (intptr_t)node
;
868 targetptr_spec
= (intptr_t *)&cpubuffer
->array
[offset
];
869 newval_final
= offset
+ 1;
870 targetptr_final
= &cpubuffer
->offset
;
871 ret
= rseq_load_cbne_store_store__ptr(opt_mo
, RSEQ_PERCPU
,
872 targetptr_final
, offset
, targetptr_spec
,
873 newval_spec
, newval_final
, cpu
);
874 if (rseq_likely(!ret
)) {
878 /* Retry if comparison fails or rseq aborts. */
885 static struct percpu_buffer_node
*this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu
*buffer
,
888 struct percpu_buffer_node
*head
;
892 struct percpu_buffer
*cpubuffer
;
893 intptr_t *targetptr
, newval
;
897 cpu
= get_current_cpu_id();
898 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
899 /* Load offset with single-copy atomicity. */
900 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
905 head
= RSEQ_READ_ONCE(cpubuffer
->array
[offset
- 1]);
907 targetptr
= (intptr_t *)&cpubuffer
->offset
;
908 ret
= rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
910 (intptr_t *)&cpubuffer
->array
[offset
- 1],
911 (intptr_t)head
, newval
, cpu
);
912 if (rseq_likely(!ret
))
914 /* Retry if comparison fails or rseq aborts. */
922 * __percpu_buffer_pop is not safe against concurrent accesses. Should
923 * only be used on buffers that are not concurrently modified.
925 static struct percpu_buffer_node
*__percpu_buffer_pop(struct percpu_buffer __rseq_percpu
*buffer
,
928 struct percpu_buffer
*cpubuffer
;
929 struct percpu_buffer_node
*head
;
932 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
933 offset
= cpubuffer
->offset
;
936 head
= cpubuffer
->array
[offset
- 1];
937 cpubuffer
->offset
= offset
- 1;
941 static void *test_percpu_buffer_thread(void *arg
)
944 struct percpu_buffer __rseq_percpu
*buffer
= (struct percpu_buffer __rseq_percpu
*)arg
;
946 if (!opt_disable_rseq
&& rseq_register_current_thread())
950 for (i
= 0; i
< reps
; i
++) {
951 struct percpu_buffer_node
*node
;
953 node
= this_cpu_buffer_pop(buffer
, NULL
);
955 sched_yield(); /* encourage shuffling */
957 if (!this_cpu_buffer_push(buffer
, node
, NULL
)) {
958 /* Should increase buffer size. */
964 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
965 (int) rseq_gettid(), nr_abort
, signals_delivered
);
966 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
972 /* Simultaneous modification to a per-cpu buffer from many threads. */
973 static void test_percpu_buffer(void)
975 const int num_threads
= opt_threads
;
977 uint64_t sum
= 0, expected_sum
= 0;
978 struct percpu_buffer __rseq_percpu
*buffer
;
979 pthread_t test_threads
[num_threads
];
980 cpu_set_t allowed_cpus
;
981 struct rseq_percpu_pool
*mempool
;
983 mempool
= rseq_percpu_pool_create("percpu_buffer", sizeof(struct percpu_buffer
),
984 PERCPU_POOL_LEN
, CPU_SETSIZE
, NULL
);
986 perror("rseq_percpu_pool_create");
989 buffer
= (struct percpu_buffer __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
991 perror("rseq_percpu_zmalloc");
995 /* Generate list entries for every usable cpu. */
996 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
997 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
998 struct percpu_buffer
*cpubuffer
;
1000 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1002 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1003 /* Worse-case is every item in same CPU. */
1005 (struct percpu_buffer_node
**)
1006 malloc(sizeof(*cpubuffer
->array
) * CPU_SETSIZE
*
1007 BUFFER_ITEM_PER_CPU
);
1008 assert(cpubuffer
->array
);
1009 cpubuffer
->buflen
= CPU_SETSIZE
* BUFFER_ITEM_PER_CPU
;
1010 for (j
= 1; j
<= BUFFER_ITEM_PER_CPU
; j
++) {
1011 struct percpu_buffer_node
*node
;
1016 * We could theoretically put the word-sized
1017 * "data" directly in the buffer. However, we
1018 * want to model objects that would not fit
1019 * within a single word, so allocate an object
1022 node
= (struct percpu_buffer_node
*) malloc(sizeof(*node
));
1025 cpubuffer
->array
[j
- 1] = node
;
1026 cpubuffer
->offset
++;
1030 for (i
= 0; i
< num_threads
; i
++) {
1031 ret
= pthread_create(&test_threads
[i
], NULL
,
1032 test_percpu_buffer_thread
, buffer
);
1035 perror("pthread_create");
1040 for (i
= 0; i
< num_threads
; i
++) {
1041 ret
= pthread_join(test_threads
[i
], NULL
);
1044 perror("pthread_join");
1049 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1050 struct percpu_buffer
*cpubuffer
;
1051 struct percpu_buffer_node
*node
;
1053 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1056 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1057 while ((node
= __percpu_buffer_pop(buffer
, i
))) {
1061 free(cpubuffer
->array
);
1065 * All entries should now be accounted for (unless some external
1066 * actor is interfering with our allowed affinity while this
1069 assert(sum
== expected_sum
);
1070 rseq_percpu_free(buffer
);
1071 ret
= rseq_percpu_pool_destroy(mempool
);
1073 perror("rseq_percpu_pool_destroy");
1078 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu
*buffer
,
1079 struct percpu_memcpy_buffer_node item
,
1082 bool result
= false;
1086 struct percpu_memcpy_buffer
*cpubuffer
;
1087 intptr_t *targetptr_final
, newval_final
, offset
;
1088 char *destptr
, *srcptr
;
1092 cpu
= get_current_cpu_id();
1093 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
1094 /* Load offset with single-copy atomicity. */
1095 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
1096 if (offset
== cpubuffer
->buflen
)
1098 destptr
= (char *)&cpubuffer
->array
[offset
];
1099 srcptr
= (char *)&item
;
1100 /* copylen must be <= 4kB. */
1101 copylen
= sizeof(item
);
1102 newval_final
= offset
+ 1;
1103 targetptr_final
= &cpubuffer
->offset
;
1104 ret
= rseq_load_cbne_memcpy_store__ptr(
1105 opt_mo
, RSEQ_PERCPU
,
1106 targetptr_final
, offset
,
1107 destptr
, srcptr
, copylen
,
1109 if (rseq_likely(!ret
)) {
1113 /* Retry if comparison fails or rseq aborts. */
1120 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu
*buffer
,
1121 struct percpu_memcpy_buffer_node
*item
,
1124 bool result
= false;
1128 struct percpu_memcpy_buffer
*cpubuffer
;
1129 intptr_t *targetptr_final
, newval_final
, offset
;
1130 char *destptr
, *srcptr
;
1134 cpu
= get_current_cpu_id();
1135 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
1136 /* Load offset with single-copy atomicity. */
1137 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
1140 destptr
= (char *)item
;
1141 srcptr
= (char *)&cpubuffer
->array
[offset
- 1];
1142 /* copylen must be <= 4kB. */
1143 copylen
= sizeof(*item
);
1144 newval_final
= offset
- 1;
1145 targetptr_final
= &cpubuffer
->offset
;
1146 ret
= rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
1147 targetptr_final
, offset
, destptr
, srcptr
, copylen
,
1149 if (rseq_likely(!ret
)) {
1153 /* Retry if comparison fails or rseq aborts. */
1161 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1162 * only be used on buffers that are not concurrently modified.
1164 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu
*buffer
,
1165 struct percpu_memcpy_buffer_node
*item
,
1168 struct percpu_memcpy_buffer
*cpubuffer
;
1171 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
1172 offset
= cpubuffer
->offset
;
1175 memcpy(item
, &cpubuffer
->array
[offset
- 1], sizeof(*item
));
1176 cpubuffer
->offset
= offset
- 1;
1180 static void *test_percpu_memcpy_buffer_thread(void *arg
)
1183 struct percpu_memcpy_buffer __rseq_percpu
*buffer
= (struct percpu_memcpy_buffer __rseq_percpu
*)arg
;
1185 if (!opt_disable_rseq
&& rseq_register_current_thread())
1189 for (i
= 0; i
< reps
; i
++) {
1190 struct percpu_memcpy_buffer_node item
;
1193 result
= this_cpu_memcpy_buffer_pop(buffer
, &item
, NULL
);
1195 sched_yield(); /* encourage shuffling */
1197 if (!this_cpu_memcpy_buffer_push(buffer
, item
, NULL
)) {
1198 /* Should increase buffer size. */
1204 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1205 (int) rseq_gettid(), nr_abort
, signals_delivered
);
1206 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
1212 /* Simultaneous modification to a per-cpu buffer from many threads. */
1213 static void test_percpu_memcpy_buffer(void)
1215 const int num_threads
= opt_threads
;
1217 uint64_t sum
= 0, expected_sum
= 0;
1218 struct percpu_memcpy_buffer
*buffer
;
1219 pthread_t test_threads
[num_threads
];
1220 cpu_set_t allowed_cpus
;
1221 struct rseq_percpu_pool
*mempool
;
1223 mempool
= rseq_percpu_pool_create("percpu_memcpy_buffer",
1224 sizeof(struct percpu_memcpy_buffer
),
1225 PERCPU_POOL_LEN
, CPU_SETSIZE
, NULL
);
1227 perror("rseq_percpu_pool_create");
1230 buffer
= (struct percpu_memcpy_buffer __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
1232 perror("rseq_percpu_zmalloc");
1236 /* Generate list entries for every usable cpu. */
1237 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
1238 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1239 struct percpu_memcpy_buffer
*cpubuffer
;
1241 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1243 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1244 /* Worse-case is every item in same CPU. */
1246 (struct percpu_memcpy_buffer_node
*)
1247 malloc(sizeof(*cpubuffer
->array
) * CPU_SETSIZE
*
1248 MEMCPY_BUFFER_ITEM_PER_CPU
);
1249 assert(cpubuffer
->array
);
1250 cpubuffer
->buflen
= CPU_SETSIZE
* MEMCPY_BUFFER_ITEM_PER_CPU
;
1251 for (j
= 1; j
<= MEMCPY_BUFFER_ITEM_PER_CPU
; j
++) {
1252 expected_sum
+= 2 * j
+ 1;
1255 * We could theoretically put the word-sized
1256 * "data" directly in the buffer. However, we
1257 * want to model objects that would not fit
1258 * within a single word, so allocate an object
1261 cpubuffer
->array
[j
- 1].data1
= j
;
1262 cpubuffer
->array
[j
- 1].data2
= j
+ 1;
1263 cpubuffer
->offset
++;
1267 for (i
= 0; i
< num_threads
; i
++) {
1268 ret
= pthread_create(&test_threads
[i
], NULL
,
1269 test_percpu_memcpy_buffer_thread
,
1273 perror("pthread_create");
1278 for (i
= 0; i
< num_threads
; i
++) {
1279 ret
= pthread_join(test_threads
[i
], NULL
);
1282 perror("pthread_join");
1287 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1288 struct percpu_memcpy_buffer_node item
;
1289 struct percpu_memcpy_buffer
*cpubuffer
;
1291 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1294 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1295 while (__percpu_memcpy_buffer_pop(buffer
, &item
, i
)) {
1299 free(cpubuffer
->array
);
1303 * All entries should now be accounted for (unless some external
1304 * actor is interfering with our allowed affinity while this
1307 assert(sum
== expected_sum
);
1308 rseq_percpu_free(buffer
);
1309 ret
= rseq_percpu_pool_destroy(mempool
);
1311 perror("rseq_percpu_pool_destroy");
1316 static void test_signal_interrupt_handler(__attribute__ ((unused
)) int signo
)
1318 signals_delivered
++;
1321 static int set_signal_handler(void)
1324 struct sigaction sa
;
1327 ret
= sigemptyset(&sigset
);
1329 perror("sigemptyset");
1333 sa
.sa_handler
= test_signal_interrupt_handler
;
1334 sa
.sa_mask
= sigset
;
1336 ret
= sigaction(SIGUSR1
, &sa
, NULL
);
1338 perror("sigaction");
1342 printf_verbose("Signal handler set for SIGUSR1\n");
1348 bool membarrier_private_expedited_rseq_available(void)
1350 int status
= sys_membarrier(MEMBARRIER_CMD_QUERY
, 0, 0);
1353 perror("membarrier");
1356 if (!(status
& MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
))
1361 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1362 #ifdef TEST_MEMBARRIER
1363 struct test_membarrier_thread_args
{
1364 struct rseq_percpu_pool
*mempool
;
1365 struct percpu_list __rseq_percpu
*percpu_list_ptr
;
1369 /* Worker threads modify data in their "active" percpu lists. */
1371 void *test_membarrier_worker_thread(void *arg
)
1373 struct test_membarrier_thread_args
*args
=
1374 (struct test_membarrier_thread_args
*)arg
;
1375 const long long iters
= opt_reps
;
1378 if (rseq_register_current_thread()) {
1379 fprintf(stderr
, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1380 errno
, strerror(errno
));
1384 /* Wait for initialization. */
1385 while (!rseq_smp_load_acquire(&args
->percpu_list_ptr
)) { }
1387 for (i
= 0; i
< iters
; ++i
) {
1391 int cpu
= get_current_cpu_id();
1392 struct percpu_list __rseq_percpu
*list
= RSEQ_READ_ONCE(args
->percpu_list_ptr
);
1393 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, cpu
);
1395 ret
= rseq_load_cbne_load_add_load_add_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
1396 (intptr_t *) &args
->percpu_list_ptr
,
1397 (intptr_t) list
, (intptr_t *) &cpulist
->head
, 0, 1, cpu
);
1398 } while (rseq_unlikely(ret
));
1401 if (rseq_unregister_current_thread()) {
1402 fprintf(stderr
, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1403 errno
, strerror(errno
));
1410 struct percpu_list __rseq_percpu
*test_membarrier_alloc_percpu_list(struct rseq_percpu_pool
*mempool
)
1412 struct percpu_list __rseq_percpu
*list
;
1415 list
= (struct percpu_list __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
1417 perror("rseq_percpu_zmalloc");
1420 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1421 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, i
);
1422 struct percpu_list_node
*node
;
1424 node
= (struct percpu_list_node
*) malloc(sizeof(*node
));
1428 cpulist
->head
= node
;
1434 void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu
*list
)
1438 for (i
= 0; i
< CPU_SETSIZE
; i
++)
1439 free(rseq_percpu_ptr(list
, i
)->head
);
1440 rseq_percpu_free(list
);
1444 long long test_membarrier_count_percpu_list(struct percpu_list __rseq_percpu
*list
)
1446 long long total_count
= 0;
1449 for (i
= 0; i
< CPU_SETSIZE
; i
++)
1450 total_count
+= rseq_percpu_ptr(list
, i
)->head
->data
;
1455 * The manager thread swaps per-cpu lists that worker threads see,
1456 * and validates that there are no unexpected modifications.
1459 void *test_membarrier_manager_thread(void *arg
)
1461 struct test_membarrier_thread_args
*args
=
1462 (struct test_membarrier_thread_args
*)arg
;
1463 struct percpu_list __rseq_percpu
*list_a
, __rseq_percpu
*list_b
;
1464 intptr_t expect_a
= 0, expect_b
= 0;
1465 int cpu_a
= 0, cpu_b
= 0;
1466 struct rseq_percpu_pool
*mempool
;
1468 long long total_count
= 0;
1470 mempool
= rseq_percpu_pool_create("percpu_list", sizeof(struct percpu_list
),
1471 PERCPU_POOL_LEN
, CPU_SETSIZE
, NULL
);
1473 perror("rseq_percpu_pool_create");
1476 args
->mempool
= mempool
;
1478 if (rseq_register_current_thread()) {
1479 fprintf(stderr
, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1480 errno
, strerror(errno
));
1485 list_a
= test_membarrier_alloc_percpu_list(mempool
);
1487 list_b
= test_membarrier_alloc_percpu_list(mempool
);
1490 /* Initialize lists before publishing them. */
1493 RSEQ_WRITE_ONCE(args
->percpu_list_ptr
, list_a
);
1495 while (!RSEQ_READ_ONCE(args
->stop
)) {
1496 /* list_a is "active". */
1497 cpu_a
= rand() % CPU_SETSIZE
;
1499 * As list_b is "inactive", we should never see changes
1502 if (expect_b
!= RSEQ_READ_ONCE(rseq_percpu_ptr(list_b
, cpu_b
)->head
->data
)) {
1503 fprintf(stderr
, "Membarrier test failed\n");
1507 /* Make list_b "active". */
1508 RSEQ_WRITE_ONCE(args
->percpu_list_ptr
, list_b
);
1509 if (rseq_membarrier_expedited(cpu_a
) &&
1510 errno
!= ENXIO
/* missing CPU */) {
1511 perror("sys_membarrier");
1515 * Cpu A should now only modify list_b, so the values
1516 * in list_a should be stable.
1518 expect_a
= RSEQ_READ_ONCE(rseq_percpu_ptr(list_a
, cpu_a
)->head
->data
);
1520 cpu_b
= rand() % CPU_SETSIZE
;
1522 * As list_a is "inactive", we should never see changes
1525 if (expect_a
!= RSEQ_READ_ONCE(rseq_percpu_ptr(list_a
, cpu_a
)->head
->data
)) {
1526 fprintf(stderr
, "Membarrier test failed\n");
1530 /* Make list_a "active". */
1531 RSEQ_WRITE_ONCE(args
->percpu_list_ptr
, list_a
);
1532 if (rseq_membarrier_expedited(cpu_b
) &&
1533 errno
!= ENXIO
/* missing CPU */) {
1534 perror("sys_membarrier");
1537 /* Remember a value from list_b. */
1538 expect_b
= RSEQ_READ_ONCE(rseq_percpu_ptr(list_b
, cpu_b
)->head
->data
);
1541 total_count
+= test_membarrier_count_percpu_list(list_a
);
1542 total_count
+= test_membarrier_count_percpu_list(list_b
);
1544 /* Validate that we observe the right number of increments. */
1545 if (total_count
!= opt_threads
* opt_reps
) {
1546 fprintf(stderr
, "Error: Observed %lld increments, expected %lld\n",
1547 total_count
, opt_threads
* opt_reps
);
1550 test_membarrier_free_percpu_list(list_a
);
1551 test_membarrier_free_percpu_list(list_b
);
1553 if (rseq_unregister_current_thread()) {
1554 fprintf(stderr
, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1555 errno
, strerror(errno
));
1558 ret
= rseq_percpu_pool_destroy(mempool
);
1560 perror("rseq_percpu_pool_destroy");
1568 void test_membarrier(void)
1570 const int num_threads
= opt_threads
;
1571 struct test_membarrier_thread_args thread_args
;
1572 pthread_t worker_threads
[num_threads
];
1573 pthread_t manager_thread
;
1576 if (!membarrier_private_expedited_rseq_available()) {
1577 fprintf(stderr
, "Membarrier private expedited rseq not available. "
1578 "Skipping membarrier test.\n");
1581 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
, 0, 0)) {
1582 perror("sys_membarrier");
1586 thread_args
.percpu_list_ptr
= NULL
;
1587 thread_args
.stop
= 0;
1588 ret
= pthread_create(&manager_thread
, NULL
,
1589 test_membarrier_manager_thread
, &thread_args
);
1592 perror("pthread_create");
1596 for (i
= 0; i
< num_threads
; i
++) {
1597 ret
= pthread_create(&worker_threads
[i
], NULL
,
1598 test_membarrier_worker_thread
, &thread_args
);
1601 perror("pthread_create");
1607 for (i
= 0; i
< num_threads
; i
++) {
1608 ret
= pthread_join(worker_threads
[i
], NULL
);
1611 perror("pthread_join");
1616 RSEQ_WRITE_ONCE(thread_args
.stop
, 1);
1617 ret
= pthread_join(manager_thread
, NULL
);
1620 perror("pthread_join");
1624 #else /* TEST_MEMBARRIER */
1626 void test_membarrier(void)
1628 if (!membarrier_private_expedited_rseq_available()) {
1629 fprintf(stderr
, "Membarrier private expedited rseq not available. "
1630 "Skipping membarrier test.\n");
1633 fprintf(stderr
, "rseq_load_cbne_load_add_load_add_store__ptr is not implemented on this architecture. "
1634 "Skipping membarrier test.\n");
1638 static void show_usage(char **argv
)
1640 printf("Usage : %s <OPTIONS>\n",
1642 printf("OPTIONS:\n");
1643 printf(" [-1 loops] Number of loops for delay injection 1\n");
1644 printf(" [-2 loops] Number of loops for delay injection 2\n");
1645 printf(" [-3 loops] Number of loops for delay injection 3\n");
1646 printf(" [-4 loops] Number of loops for delay injection 4\n");
1647 printf(" [-5 loops] Number of loops for delay injection 5\n");
1648 printf(" [-6 loops] Number of loops for delay injection 6\n");
1649 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1650 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1651 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1652 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1653 printf(" [-y] Yield\n");
1654 printf(" [-k] Kill thread with signal\n");
1655 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1656 printf(" [-t N] Number of threads (default 200)\n");
1657 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1658 printf(" [-d] Disable rseq system call (no initialization)\n");
1659 printf(" [-D M] Disable rseq for each M threads\n");
1660 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1661 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1662 printf(" [-c] Check if the rseq syscall is available.\n");
1663 printf(" [-v] Verbose output.\n");
1664 printf(" [-h] Show this help.\n");
1668 int main(int argc
, char **argv
)
1672 for (i
= 1; i
< argc
; i
++) {
1673 if (argv
[i
][0] != '-')
1675 switch (argv
[i
][1]) {
1689 loop_cnt
[argv
[i
][1] - '0'] = atol(argv
[i
+ 1]);
1697 opt_modulo
= atol(argv
[i
+ 1]);
1698 if (opt_modulo
< 0) {
1709 opt_sleep
= atol(argv
[i
+ 1]);
1710 if (opt_sleep
< 0) {
1723 opt_disable_rseq
= 1;
1730 opt_disable_mod
= atol(argv
[i
+ 1]);
1731 if (opt_disable_mod
< 0) {
1742 opt_threads
= atol(argv
[i
+ 1]);
1743 if (opt_threads
< 0) {
1754 opt_reps
= atoll(argv
[i
+ 1]);
1769 opt_test
= *argv
[i
+ 1];
1788 opt_mo
= RSEQ_MO_RELEASE
;
1791 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL
)) {
1792 printf_verbose("The rseq syscall is available.\n");
1795 printf_verbose("The rseq syscall is unavailable.\n");
1804 loop_cnt_1
= loop_cnt
[1];
1805 loop_cnt_2
= loop_cnt
[2];
1806 loop_cnt_3
= loop_cnt
[3];
1807 loop_cnt_4
= loop_cnt
[4];
1808 loop_cnt_5
= loop_cnt
[5];
1809 loop_cnt_6
= loop_cnt
[6];
1811 if (set_signal_handler())
1814 if (!opt_disable_rseq
&& rseq_register_current_thread())
1816 if (!opt_disable_rseq
&& !rseq_validate_cpu_id()) {
1817 printf_verbose("The rseq cpu id getter is unavailable\n");
1822 printf_verbose("spinlock\n");
1823 test_percpu_spinlock();
1826 printf_verbose("linked list\n");
1830 printf_verbose("buffer\n");
1831 test_percpu_buffer();
1834 printf_verbose("memcpy buffer\n");
1835 test_percpu_memcpy_buffer();
1838 printf_verbose("counter increment\n");
1842 printf_verbose("membarrier\n");
1846 if (!opt_disable_rseq
&& rseq_unregister_current_thread())