62614c3073d2ca792e5511ca83fa532503368190
[librseq.git] / tests / param_test.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6 #include <assert.h>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
9 #include <pthread.h>
10 #include <sched.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <syscall.h>
16 #include <unistd.h>
17 #include <poll.h>
18 #include <sys/types.h>
19 #include <signal.h>
20 #include <errno.h>
21 #include <stddef.h>
22 #include <stdbool.h>
23 #include <rseq/mempool.h>
24
25 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
26 enum {
27 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
28 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
29 };
30
31 enum {
32 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
33 };
34 #endif
35
36 #define NR_INJECT 9
37 static int loop_cnt[NR_INJECT + 1];
38
39 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
40 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
41 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
42 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
43 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
44 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
45
46 static int opt_modulo, verbose;
47
48 static int opt_yield, opt_signal, opt_sleep,
49 opt_disable_rseq, opt_threads = 200,
50 opt_disable_mod = 0, opt_test = 's';
51
52 static long long opt_reps = 5000;
53
54 static __thread __attribute__((tls_model("initial-exec")))
55 unsigned int signals_delivered;
56
57 static inline pid_t rseq_gettid(void)
58 {
59 return syscall(__NR_gettid);
60 }
61
62 #ifndef BENCHMARK
63
64 static __thread __attribute__((tls_model("initial-exec"), unused))
65 int yield_mod_cnt, nr_abort;
66
67 #define printf_verbose(fmt, ...) \
68 do { \
69 if (verbose) \
70 printf(fmt, ## __VA_ARGS__); \
71 } while (0)
72
73 #ifdef __i386__
74
75 #define INJECT_ASM_REG "eax"
76
77 #define RSEQ_INJECT_CLOBBER \
78 , INJECT_ASM_REG
79
80 /*
81 * Use ip-relative addressing to get the loop counter.
82 */
83 #define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
84 "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
85 "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
86 "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
87 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
88 "jz 333f\n\t" \
89 "222:\n\t" \
90 "dec %%" INJECT_ASM_REG "\n\t" \
91 "jnz 222b\n\t" \
92 "333:\n\t"
93
94 #define RSEQ_INJECT_ASM(n) \
95 __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
96
97 #elif defined(__x86_64__)
98
99 #define INJECT_ASM_REG_P "rax"
100 #define INJECT_ASM_REG "eax"
101
102 #define RSEQ_INJECT_CLOBBER \
103 , INJECT_ASM_REG_P \
104 , INJECT_ASM_REG
105
106 #define RSEQ_INJECT_ASM(n) \
107 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
108 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
109 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
110 "jz 333f\n\t" \
111 "222:\n\t" \
112 "dec %%" INJECT_ASM_REG "\n\t" \
113 "jnz 222b\n\t" \
114 "333:\n\t"
115
116 #elif defined(__s390__)
117
118 #define RSEQ_INJECT_INPUT \
119 , [loop_cnt_1]"m"(loop_cnt[1]) \
120 , [loop_cnt_2]"m"(loop_cnt[2]) \
121 , [loop_cnt_3]"m"(loop_cnt[3]) \
122 , [loop_cnt_4]"m"(loop_cnt[4]) \
123 , [loop_cnt_5]"m"(loop_cnt[5]) \
124 , [loop_cnt_6]"m"(loop_cnt[6])
125
126 #define INJECT_ASM_REG "r12"
127
128 #define RSEQ_INJECT_CLOBBER \
129 , INJECT_ASM_REG
130
131 #define RSEQ_INJECT_ASM(n) \
132 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
133 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
134 "je 333f\n\t" \
135 "222:\n\t" \
136 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
137 "jnz 222b\n\t" \
138 "333:\n\t"
139
140 #elif defined(__ARMEL__)
141
142 #define RSEQ_INJECT_INPUT \
143 , [loop_cnt_1]"m"(loop_cnt[1]) \
144 , [loop_cnt_2]"m"(loop_cnt[2]) \
145 , [loop_cnt_3]"m"(loop_cnt[3]) \
146 , [loop_cnt_4]"m"(loop_cnt[4]) \
147 , [loop_cnt_5]"m"(loop_cnt[5]) \
148 , [loop_cnt_6]"m"(loop_cnt[6])
149
150 #define INJECT_ASM_REG "r4"
151
152 #define RSEQ_INJECT_CLOBBER \
153 , INJECT_ASM_REG
154
155 #define RSEQ_INJECT_ASM(n) \
156 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
157 "cmp " INJECT_ASM_REG ", #0\n\t" \
158 "beq 333f\n\t" \
159 "222:\n\t" \
160 "subs " INJECT_ASM_REG ", #1\n\t" \
161 "bne 222b\n\t" \
162 "333:\n\t"
163
164 #elif defined(__AARCH64EL__)
165
166 #define RSEQ_INJECT_INPUT \
167 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
168 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
169 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
170 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
171 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
172 , [loop_cnt_6] "Qo" (loop_cnt[6])
173
174 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
175
176 #define RSEQ_INJECT_ASM(n) \
177 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
178 " cbz " INJECT_ASM_REG ", 333f\n" \
179 "222:\n" \
180 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
181 " cbnz " INJECT_ASM_REG ", 222b\n" \
182 "333:\n"
183
184 #elif defined(__PPC__)
185
186 #define RSEQ_INJECT_INPUT \
187 , [loop_cnt_1]"m"(loop_cnt[1]) \
188 , [loop_cnt_2]"m"(loop_cnt[2]) \
189 , [loop_cnt_3]"m"(loop_cnt[3]) \
190 , [loop_cnt_4]"m"(loop_cnt[4]) \
191 , [loop_cnt_5]"m"(loop_cnt[5]) \
192 , [loop_cnt_6]"m"(loop_cnt[6])
193
194 #define INJECT_ASM_REG "r18"
195
196 #define RSEQ_INJECT_CLOBBER \
197 , INJECT_ASM_REG
198
199 #define RSEQ_INJECT_ASM(n) \
200 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
201 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
202 "beq 333f\n\t" \
203 "222:\n\t" \
204 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
205 "bne 222b\n\t" \
206 "333:\n\t"
207
208 #elif defined(__mips__)
209
210 #define RSEQ_INJECT_INPUT \
211 , [loop_cnt_1]"m"(loop_cnt[1]) \
212 , [loop_cnt_2]"m"(loop_cnt[2]) \
213 , [loop_cnt_3]"m"(loop_cnt[3]) \
214 , [loop_cnt_4]"m"(loop_cnt[4]) \
215 , [loop_cnt_5]"m"(loop_cnt[5]) \
216 , [loop_cnt_6]"m"(loop_cnt[6])
217
218 #define INJECT_ASM_REG "$5"
219
220 #define RSEQ_INJECT_CLOBBER \
221 , INJECT_ASM_REG
222
223 #define RSEQ_INJECT_ASM(n) \
224 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
225 "beqz " INJECT_ASM_REG ", 333f\n\t" \
226 "222:\n\t" \
227 "addiu " INJECT_ASM_REG ", -1\n\t" \
228 "bnez " INJECT_ASM_REG ", 222b\n\t" \
229 "333:\n\t"
230
231 #elif defined(__riscv)
232
233 #define RSEQ_INJECT_INPUT \
234 , [loop_cnt_1]"m"(loop_cnt[1]) \
235 , [loop_cnt_2]"m"(loop_cnt[2]) \
236 , [loop_cnt_3]"m"(loop_cnt[3]) \
237 , [loop_cnt_4]"m"(loop_cnt[4]) \
238 , [loop_cnt_5]"m"(loop_cnt[5]) \
239 , [loop_cnt_6]"m"(loop_cnt[6])
240
241 #define INJECT_ASM_REG "t1"
242
243 #define RSEQ_INJECT_CLOBBER \
244 , INJECT_ASM_REG
245
246 #define RSEQ_INJECT_ASM(n) \
247 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
248 "beqz " INJECT_ASM_REG ", 333f\n\t" \
249 "222:\n\t" \
250 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
251 "bnez " INJECT_ASM_REG ", 222b\n\t" \
252 "333:\n\t"
253
254 #else
255 #error unsupported target
256 #endif
257
258 #define RSEQ_INJECT_FAILED \
259 nr_abort++;
260
261 #define RSEQ_INJECT_C(n) \
262 { \
263 int loc_i, loc_nr_loops = loop_cnt[n]; \
264 \
265 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
266 rseq_barrier(); \
267 } \
268 if (loc_nr_loops == -1 && opt_modulo) { \
269 if (yield_mod_cnt == opt_modulo - 1) { \
270 if (opt_sleep > 0) \
271 poll(NULL, 0, opt_sleep); \
272 if (opt_yield) \
273 sched_yield(); \
274 if (opt_signal) \
275 raise(SIGUSR1); \
276 yield_mod_cnt = 0; \
277 } else { \
278 yield_mod_cnt++; \
279 } \
280 } \
281 }
282
283 #else
284
285 #define printf_verbose(fmt, ...)
286
287 #endif /* BENCHMARK */
288
289 #include <rseq/rseq.h>
290
291 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
292
293 static int sys_membarrier(int cmd, int flags, int cpu_id)
294 {
295 return syscall(__NR_membarrier, cmd, flags, cpu_id);
296 }
297
298 #ifdef rseq_arch_has_load_add_load_load_add_store
299 #define TEST_MEMBARRIER
300 #endif
301
302 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
303 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
304 static
305 int get_current_cpu_id(void)
306 {
307 return rseq_current_mm_cid();
308 }
309 static
310 bool rseq_validate_cpu_id(void)
311 {
312 return rseq_mm_cid_available();
313 }
314 static
315 bool rseq_use_cpu_index(void)
316 {
317 return false; /* Use mm_cid */
318 }
319 # ifdef TEST_MEMBARRIER
320 /*
321 * Membarrier does not currently support targeting a mm_cid, so
322 * issue the barrier on all cpus.
323 */
324 static
325 int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
326 {
327 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
328 0, 0);
329 }
330 # endif /* TEST_MEMBARRIER */
331 #else
332 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
333 static
334 int get_current_cpu_id(void)
335 {
336 return rseq_cpu_start();
337 }
338 static
339 bool rseq_validate_cpu_id(void)
340 {
341 return rseq_current_cpu_raw() >= 0;
342 }
343 static
344 bool rseq_use_cpu_index(void)
345 {
346 return true; /* Use cpu_id as index. */
347 }
348 # ifdef TEST_MEMBARRIER
349 static
350 int rseq_membarrier_expedited(int cpu)
351 {
352 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
353 MEMBARRIER_CMD_FLAG_CPU, cpu);
354 }
355 # endif /* TEST_MEMBARRIER */
356 #endif
357
358 struct percpu_lock {
359 intptr_t v;
360 };
361
362 struct spinlock_test_data {
363 struct percpu_lock lock;
364 intptr_t count;
365 };
366
367 struct spinlock_thread_test_data {
368 struct spinlock_test_data __rseq_percpu *data;
369 long long reps;
370 int reg;
371 };
372
373 struct inc_test_data {
374 intptr_t count;
375 };
376
377 struct inc_thread_test_data {
378 struct inc_test_data __rseq_percpu *data;
379 long long reps;
380 int reg;
381 };
382
383 struct percpu_list_node {
384 intptr_t data;
385 struct percpu_list_node *next;
386 };
387
388 struct percpu_list {
389 struct percpu_list_node *head;
390 };
391
392 #define BUFFER_ITEM_PER_CPU 100
393
394 struct percpu_buffer_node {
395 intptr_t data;
396 };
397
398 struct percpu_buffer {
399 intptr_t offset;
400 intptr_t buflen;
401 struct percpu_buffer_node **array;
402 };
403
404 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
405
406 struct percpu_memcpy_buffer_node {
407 intptr_t data1;
408 uint64_t data2;
409 };
410
411 struct percpu_memcpy_buffer {
412 intptr_t offset;
413 intptr_t buflen;
414 struct percpu_memcpy_buffer_node *array;
415 };
416
417 /* A simple percpu spinlock. Grabs lock on current cpu. */
418 static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
419 {
420 int cpu;
421
422 for (;;) {
423 int ret;
424
425 cpu = get_current_cpu_id();
426 if (cpu < 0) {
427 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
428 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
429 abort();
430 }
431 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
432 &rseq_percpu_ptr(lock, cpu)->v,
433 0, 1, cpu);
434 if (rseq_likely(!ret))
435 break;
436 /* Retry if comparison fails or rseq aborts. */
437 }
438 /*
439 * Acquire semantic when taking lock after control dependency.
440 * Matches rseq_smp_store_release().
441 */
442 rseq_smp_acquire__after_ctrl_dep();
443 return cpu;
444 }
445
446 static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
447 {
448 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
449 /*
450 * Release lock, with release semantic. Matches
451 * rseq_smp_acquire__after_ctrl_dep().
452 */
453 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
454 }
455
456 static void *test_percpu_spinlock_thread(void *arg)
457 {
458 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
459 struct spinlock_test_data __rseq_percpu *data = thread_data->data;
460 long long i, reps;
461
462 if (!opt_disable_rseq && thread_data->reg &&
463 rseq_register_current_thread())
464 abort();
465 reps = thread_data->reps;
466 for (i = 0; i < reps; i++) {
467 int cpu = rseq_this_cpu_lock(&data->lock);
468 rseq_percpu_ptr(data, cpu)->count++;
469 rseq_percpu_unlock(&data->lock, cpu);
470 #ifndef BENCHMARK
471 if (i != 0 && !(i % (reps / 10)))
472 printf_verbose("tid %d: count %lld\n",
473 (int) rseq_gettid(), i);
474 #endif
475 }
476 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
477 (int) rseq_gettid(), nr_abort, signals_delivered);
478 if (!opt_disable_rseq && thread_data->reg &&
479 rseq_unregister_current_thread())
480 abort();
481 return NULL;
482 }
483
484 /*
485 * A simple test which implements a sharded counter using a per-cpu
486 * lock. Obviously real applications might prefer to simply use a
487 * per-cpu increment; however, this is reasonable for a test and the
488 * lock can be extended to synchronize more complicated operations.
489 */
490 static void test_percpu_spinlock(void)
491 {
492 const int num_threads = opt_threads;
493 int i, ret;
494 uint64_t sum;
495 pthread_t test_threads[num_threads];
496 struct spinlock_test_data __rseq_percpu *data;
497 struct spinlock_thread_test_data thread_data[num_threads];
498 struct rseq_mempool *mempool;
499 struct rseq_mempool_attr *attr;
500
501 attr = rseq_mempool_attr_create();
502 if (!attr) {
503 perror("rseq_mempool_attr_create");
504 abort();
505 }
506 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, CPU_SETSIZE);
507 if (ret) {
508 perror("rseq_mempool_attr_set_percpu");
509 abort();
510 }
511 mempool = rseq_mempool_create("spinlock_test_data",
512 sizeof(struct spinlock_test_data), attr);
513 if (!mempool) {
514 perror("rseq_mempool_create");
515 abort();
516 }
517 rseq_mempool_attr_destroy(attr);
518 data = (struct spinlock_test_data __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
519 if (!data) {
520 perror("rseq_mempool_percpu_zmalloc");
521 abort();
522 }
523
524 for (i = 0; i < num_threads; i++) {
525 thread_data[i].reps = opt_reps;
526 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
527 thread_data[i].reg = 1;
528 else
529 thread_data[i].reg = 0;
530 thread_data[i].data = data;
531 ret = pthread_create(&test_threads[i], NULL,
532 test_percpu_spinlock_thread,
533 &thread_data[i]);
534 if (ret) {
535 errno = ret;
536 perror("pthread_create");
537 abort();
538 }
539 }
540
541 for (i = 0; i < num_threads; i++) {
542 ret = pthread_join(test_threads[i], NULL);
543 if (ret) {
544 errno = ret;
545 perror("pthread_join");
546 abort();
547 }
548 }
549
550 sum = 0;
551 for (i = 0; i < CPU_SETSIZE; i++)
552 sum += rseq_percpu_ptr(data, i)->count;
553
554 assert(sum == (uint64_t)opt_reps * num_threads);
555 rseq_mempool_percpu_free(data);
556 ret = rseq_mempool_destroy(mempool);
557 if (ret) {
558 perror("rseq_mempool_destroy");
559 abort();
560 }
561 }
562
563 static void *test_percpu_inc_thread(void *arg)
564 {
565 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
566 struct inc_test_data __rseq_percpu *data = thread_data->data;
567 long long i, reps;
568
569 if (!opt_disable_rseq && thread_data->reg &&
570 rseq_register_current_thread())
571 abort();
572 reps = thread_data->reps;
573 for (i = 0; i < reps; i++) {
574 int ret;
575
576 do {
577 int cpu;
578
579 cpu = get_current_cpu_id();
580 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
581 &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
582 } while (rseq_unlikely(ret));
583 #ifndef BENCHMARK
584 if (i != 0 && !(i % (reps / 10)))
585 printf_verbose("tid %d: count %lld\n",
586 (int) rseq_gettid(), i);
587 #endif
588 }
589 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
590 (int) rseq_gettid(), nr_abort, signals_delivered);
591 if (!opt_disable_rseq && thread_data->reg &&
592 rseq_unregister_current_thread())
593 abort();
594 return NULL;
595 }
596
597 static void test_percpu_inc(void)
598 {
599 const int num_threads = opt_threads;
600 int i, ret;
601 uint64_t sum;
602 pthread_t test_threads[num_threads];
603 struct inc_test_data __rseq_percpu *data;
604 struct inc_thread_test_data thread_data[num_threads];
605 struct rseq_mempool *mempool;
606 struct rseq_mempool_attr *attr;
607
608 attr = rseq_mempool_attr_create();
609 if (!attr) {
610 perror("rseq_mempool_attr_create");
611 abort();
612 }
613 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, CPU_SETSIZE);
614 if (ret) {
615 perror("rseq_mempool_attr_set_percpu");
616 abort();
617 }
618 mempool = rseq_mempool_create("inc_test_data",
619 sizeof(struct inc_test_data), attr);
620 if (!mempool) {
621 perror("rseq_mempool_create");
622 abort();
623 }
624 rseq_mempool_attr_destroy(attr);
625 data = (struct inc_test_data __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
626 if (!data) {
627 perror("rseq_mempool_percpu_zmalloc");
628 abort();
629 }
630
631 for (i = 0; i < num_threads; i++) {
632 thread_data[i].reps = opt_reps;
633 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
634 thread_data[i].reg = 1;
635 else
636 thread_data[i].reg = 0;
637 thread_data[i].data = data;
638 ret = pthread_create(&test_threads[i], NULL,
639 test_percpu_inc_thread,
640 &thread_data[i]);
641 if (ret) {
642 errno = ret;
643 perror("pthread_create");
644 abort();
645 }
646 }
647
648 for (i = 0; i < num_threads; i++) {
649 ret = pthread_join(test_threads[i], NULL);
650 if (ret) {
651 errno = ret;
652 perror("pthread_join");
653 abort();
654 }
655 }
656
657 sum = 0;
658 for (i = 0; i < CPU_SETSIZE; i++)
659 sum += rseq_percpu_ptr(data, i)->count;
660
661 assert(sum == (uint64_t)opt_reps * num_threads);
662 rseq_mempool_percpu_free(data);
663 ret = rseq_mempool_destroy(mempool);
664 if (ret) {
665 perror("rseq_mempool_destroy");
666 abort();
667 }
668 }
669
670 static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
671 struct percpu_list_node *node,
672 int *_cpu)
673 {
674 int cpu;
675
676 for (;;) {
677 intptr_t *targetptr, newval, expect;
678 struct percpu_list *cpulist;
679 int ret;
680
681 cpu = get_current_cpu_id();
682 cpulist = rseq_percpu_ptr(list, cpu);
683 /* Load list->c[cpu].head with single-copy atomicity. */
684 expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
685 newval = (intptr_t)node;
686 targetptr = (intptr_t *)&cpulist->head;
687 node->next = (struct percpu_list_node *)expect;
688 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
689 targetptr, expect, newval, cpu);
690 if (rseq_likely(!ret))
691 break;
692 /* Retry if comparison fails or rseq aborts. */
693 }
694 if (_cpu)
695 *_cpu = cpu;
696 }
697
698 /*
699 * Unlike a traditional lock-less linked list; the availability of a
700 * rseq primitive allows us to implement pop without concerns over
701 * ABA-type races.
702 */
703 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
704 int *_cpu)
705 {
706 struct percpu_list_node *node = NULL;
707 int cpu;
708
709 for (;;) {
710 struct percpu_list_node *head;
711 intptr_t *targetptr, expectnot, *load;
712 struct percpu_list *cpulist;
713 long offset;
714 int ret;
715
716 cpu = get_current_cpu_id();
717 cpulist = rseq_percpu_ptr(list, cpu);
718 targetptr = (intptr_t *)&cpulist->head;
719 expectnot = (intptr_t)NULL;
720 offset = offsetof(struct percpu_list_node, next);
721 load = (intptr_t *)&head;
722 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
723 targetptr, expectnot,
724 offset, load, cpu);
725 if (rseq_likely(!ret)) {
726 node = head;
727 break;
728 }
729 if (ret > 0)
730 break;
731 /* Retry if rseq aborts. */
732 }
733 if (_cpu)
734 *_cpu = cpu;
735 return node;
736 }
737
738 /*
739 * __percpu_list_pop is not safe against concurrent accesses. Should
740 * only be used on lists that are not concurrently modified.
741 */
742 static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
743 {
744 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
745 struct percpu_list_node *node;
746
747 node = cpulist->head;
748 if (!node)
749 return NULL;
750 cpulist->head = node->next;
751 return node;
752 }
753
754 static void *test_percpu_list_thread(void *arg)
755 {
756 long long i, reps;
757 struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
758
759 if (!opt_disable_rseq && rseq_register_current_thread())
760 abort();
761
762 reps = opt_reps;
763 for (i = 0; i < reps; i++) {
764 struct percpu_list_node *node;
765
766 node = this_cpu_list_pop(list, NULL);
767 if (opt_yield)
768 sched_yield(); /* encourage shuffling */
769 if (node)
770 this_cpu_list_push(list, node, NULL);
771 }
772
773 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
774 (int) rseq_gettid(), nr_abort, signals_delivered);
775 if (!opt_disable_rseq && rseq_unregister_current_thread())
776 abort();
777
778 return NULL;
779 }
780
781 /* Simultaneous modification to a per-cpu linked list from many threads. */
782 static void test_percpu_list(void)
783 {
784 const int num_threads = opt_threads;
785 int i, j, ret;
786 uint64_t sum = 0, expected_sum = 0;
787 struct percpu_list __rseq_percpu *list;
788 pthread_t test_threads[num_threads];
789 cpu_set_t allowed_cpus;
790 struct rseq_mempool *mempool;
791 struct rseq_mempool_attr *attr;
792
793 attr = rseq_mempool_attr_create();
794 if (!attr) {
795 perror("rseq_mempool_attr_create");
796 abort();
797 }
798 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, CPU_SETSIZE);
799 if (ret) {
800 perror("rseq_mempool_attr_set_percpu");
801 abort();
802 }
803 mempool = rseq_mempool_create("percpu_list",
804 sizeof(struct percpu_list), attr);
805 if (!mempool) {
806 perror("rseq_mempool_create");
807 abort();
808 }
809 rseq_mempool_attr_destroy(attr);
810 list = (struct percpu_list __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
811 if (!list) {
812 perror("rseq_mempool_percpu_zmalloc");
813 abort();
814 }
815
816 /* Generate list entries for every usable cpu. */
817 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
818 for (i = 0; i < CPU_SETSIZE; i++) {
819 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
820 continue;
821 for (j = 1; j <= 100; j++) {
822 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
823 struct percpu_list_node *node;
824
825 expected_sum += j;
826
827 node = (struct percpu_list_node *) malloc(sizeof(*node));
828 assert(node);
829 node->data = j;
830 node->next = cpulist->head;
831 cpulist->head = node;
832 }
833 }
834
835 for (i = 0; i < num_threads; i++) {
836 ret = pthread_create(&test_threads[i], NULL,
837 test_percpu_list_thread, list);
838 if (ret) {
839 errno = ret;
840 perror("pthread_create");
841 abort();
842 }
843 }
844
845 for (i = 0; i < num_threads; i++) {
846 ret = pthread_join(test_threads[i], NULL);
847 if (ret) {
848 errno = ret;
849 perror("pthread_join");
850 abort();
851 }
852 }
853
854 for (i = 0; i < CPU_SETSIZE; i++) {
855 struct percpu_list_node *node;
856
857 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
858 continue;
859
860 while ((node = __percpu_list_pop(list, i))) {
861 sum += node->data;
862 free(node);
863 }
864 }
865
866 /*
867 * All entries should now be accounted for (unless some external
868 * actor is interfering with our allowed affinity while this
869 * test is running).
870 */
871 assert(sum == expected_sum);
872 rseq_mempool_percpu_free(list);
873 ret = rseq_mempool_destroy(mempool);
874 if (ret) {
875 perror("rseq_mempool_destroy");
876 abort();
877 }
878 }
879
880 static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer,
881 struct percpu_buffer_node *node,
882 int *_cpu)
883 {
884 bool result = false;
885 int cpu;
886
887 for (;;) {
888 struct percpu_buffer *cpubuffer;
889 intptr_t *targetptr_spec, newval_spec;
890 intptr_t *targetptr_final, newval_final;
891 intptr_t offset;
892 int ret;
893
894 cpu = get_current_cpu_id();
895 cpubuffer = rseq_percpu_ptr(buffer, cpu);
896 offset = RSEQ_READ_ONCE(cpubuffer->offset);
897 if (offset == cpubuffer->buflen)
898 break;
899 newval_spec = (intptr_t)node;
900 targetptr_spec = (intptr_t *)&cpubuffer->array[offset];
901 newval_final = offset + 1;
902 targetptr_final = &cpubuffer->offset;
903 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
904 targetptr_final, offset, targetptr_spec,
905 newval_spec, newval_final, cpu);
906 if (rseq_likely(!ret)) {
907 result = true;
908 break;
909 }
910 /* Retry if comparison fails or rseq aborts. */
911 }
912 if (_cpu)
913 *_cpu = cpu;
914 return result;
915 }
916
917 static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
918 int *_cpu)
919 {
920 struct percpu_buffer_node *head;
921 int cpu;
922
923 for (;;) {
924 struct percpu_buffer *cpubuffer;
925 intptr_t *targetptr, newval;
926 intptr_t offset;
927 int ret;
928
929 cpu = get_current_cpu_id();
930 cpubuffer = rseq_percpu_ptr(buffer, cpu);
931 /* Load offset with single-copy atomicity. */
932 offset = RSEQ_READ_ONCE(cpubuffer->offset);
933 if (offset == 0) {
934 head = NULL;
935 break;
936 }
937 head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]);
938 newval = offset - 1;
939 targetptr = (intptr_t *)&cpubuffer->offset;
940 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
941 targetptr, offset,
942 (intptr_t *)&cpubuffer->array[offset - 1],
943 (intptr_t)head, newval, cpu);
944 if (rseq_likely(!ret))
945 break;
946 /* Retry if comparison fails or rseq aborts. */
947 }
948 if (_cpu)
949 *_cpu = cpu;
950 return head;
951 }
952
953 /*
954 * __percpu_buffer_pop is not safe against concurrent accesses. Should
955 * only be used on buffers that are not concurrently modified.
956 */
957 static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
958 int cpu)
959 {
960 struct percpu_buffer *cpubuffer;
961 struct percpu_buffer_node *head;
962 intptr_t offset;
963
964 cpubuffer = rseq_percpu_ptr(buffer, cpu);
965 offset = cpubuffer->offset;
966 if (offset == 0)
967 return NULL;
968 head = cpubuffer->array[offset - 1];
969 cpubuffer->offset = offset - 1;
970 return head;
971 }
972
973 static void *test_percpu_buffer_thread(void *arg)
974 {
975 long long i, reps;
976 struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg;
977
978 if (!opt_disable_rseq && rseq_register_current_thread())
979 abort();
980
981 reps = opt_reps;
982 for (i = 0; i < reps; i++) {
983 struct percpu_buffer_node *node;
984
985 node = this_cpu_buffer_pop(buffer, NULL);
986 if (opt_yield)
987 sched_yield(); /* encourage shuffling */
988 if (node) {
989 if (!this_cpu_buffer_push(buffer, node, NULL)) {
990 /* Should increase buffer size. */
991 abort();
992 }
993 }
994 }
995
996 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
997 (int) rseq_gettid(), nr_abort, signals_delivered);
998 if (!opt_disable_rseq && rseq_unregister_current_thread())
999 abort();
1000
1001 return NULL;
1002 }
1003
1004 /* Simultaneous modification to a per-cpu buffer from many threads. */
1005 static void test_percpu_buffer(void)
1006 {
1007 const int num_threads = opt_threads;
1008 int i, j, ret;
1009 uint64_t sum = 0, expected_sum = 0;
1010 struct percpu_buffer __rseq_percpu *buffer;
1011 pthread_t test_threads[num_threads];
1012 cpu_set_t allowed_cpus;
1013 struct rseq_mempool *mempool;
1014 struct rseq_mempool_attr *attr;
1015
1016 attr = rseq_mempool_attr_create();
1017 if (!attr) {
1018 perror("rseq_mempool_attr_create");
1019 abort();
1020 }
1021 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, CPU_SETSIZE);
1022 if (ret) {
1023 perror("rseq_mempool_attr_set_percpu");
1024 abort();
1025 }
1026 mempool = rseq_mempool_create("percpu_buffer",
1027 sizeof(struct percpu_buffer), attr);
1028 if (!mempool) {
1029 perror("rseq_mempool_create");
1030 abort();
1031 }
1032 rseq_mempool_attr_destroy(attr);
1033 buffer = (struct percpu_buffer __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
1034 if (!buffer) {
1035 perror("rseq_mempool_percpu_zmalloc");
1036 abort();
1037 }
1038
1039 /* Generate list entries for every usable cpu. */
1040 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1041 for (i = 0; i < CPU_SETSIZE; i++) {
1042 struct percpu_buffer *cpubuffer;
1043
1044 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1045 continue;
1046 cpubuffer = rseq_percpu_ptr(buffer, i);
1047 /* Worse-case is every item in same CPU. */
1048 cpubuffer->array =
1049 (struct percpu_buffer_node **)
1050 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
1051 BUFFER_ITEM_PER_CPU);
1052 assert(cpubuffer->array);
1053 cpubuffer->buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
1054 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
1055 struct percpu_buffer_node *node;
1056
1057 expected_sum += j;
1058
1059 /*
1060 * We could theoretically put the word-sized
1061 * "data" directly in the buffer. However, we
1062 * want to model objects that would not fit
1063 * within a single word, so allocate an object
1064 * for each node.
1065 */
1066 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
1067 assert(node);
1068 node->data = j;
1069 cpubuffer->array[j - 1] = node;
1070 cpubuffer->offset++;
1071 }
1072 }
1073
1074 for (i = 0; i < num_threads; i++) {
1075 ret = pthread_create(&test_threads[i], NULL,
1076 test_percpu_buffer_thread, buffer);
1077 if (ret) {
1078 errno = ret;
1079 perror("pthread_create");
1080 abort();
1081 }
1082 }
1083
1084 for (i = 0; i < num_threads; i++) {
1085 ret = pthread_join(test_threads[i], NULL);
1086 if (ret) {
1087 errno = ret;
1088 perror("pthread_join");
1089 abort();
1090 }
1091 }
1092
1093 for (i = 0; i < CPU_SETSIZE; i++) {
1094 struct percpu_buffer *cpubuffer;
1095 struct percpu_buffer_node *node;
1096
1097 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1098 continue;
1099
1100 cpubuffer = rseq_percpu_ptr(buffer, i);
1101 while ((node = __percpu_buffer_pop(buffer, i))) {
1102 sum += node->data;
1103 free(node);
1104 }
1105 free(cpubuffer->array);
1106 }
1107
1108 /*
1109 * All entries should now be accounted for (unless some external
1110 * actor is interfering with our allowed affinity while this
1111 * test is running).
1112 */
1113 assert(sum == expected_sum);
1114 rseq_mempool_percpu_free(buffer);
1115 ret = rseq_mempool_destroy(mempool);
1116 if (ret) {
1117 perror("rseq_mempool_destroy");
1118 abort();
1119 }
1120 }
1121
1122 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1123 struct percpu_memcpy_buffer_node item,
1124 int *_cpu)
1125 {
1126 bool result = false;
1127 int cpu;
1128
1129 for (;;) {
1130 struct percpu_memcpy_buffer *cpubuffer;
1131 intptr_t *targetptr_final, newval_final, offset;
1132 char *destptr, *srcptr;
1133 size_t copylen;
1134 int ret;
1135
1136 cpu = get_current_cpu_id();
1137 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1138 /* Load offset with single-copy atomicity. */
1139 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1140 if (offset == cpubuffer->buflen)
1141 break;
1142 destptr = (char *)&cpubuffer->array[offset];
1143 srcptr = (char *)&item;
1144 /* copylen must be <= 4kB. */
1145 copylen = sizeof(item);
1146 newval_final = offset + 1;
1147 targetptr_final = &cpubuffer->offset;
1148 ret = rseq_load_cbne_memcpy_store__ptr(
1149 opt_mo, RSEQ_PERCPU,
1150 targetptr_final, offset,
1151 destptr, srcptr, copylen,
1152 newval_final, cpu);
1153 if (rseq_likely(!ret)) {
1154 result = true;
1155 break;
1156 }
1157 /* Retry if comparison fails or rseq aborts. */
1158 }
1159 if (_cpu)
1160 *_cpu = cpu;
1161 return result;
1162 }
1163
1164 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1165 struct percpu_memcpy_buffer_node *item,
1166 int *_cpu)
1167 {
1168 bool result = false;
1169 int cpu;
1170
1171 for (;;) {
1172 struct percpu_memcpy_buffer *cpubuffer;
1173 intptr_t *targetptr_final, newval_final, offset;
1174 char *destptr, *srcptr;
1175 size_t copylen;
1176 int ret;
1177
1178 cpu = get_current_cpu_id();
1179 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1180 /* Load offset with single-copy atomicity. */
1181 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1182 if (offset == 0)
1183 break;
1184 destptr = (char *)item;
1185 srcptr = (char *)&cpubuffer->array[offset - 1];
1186 /* copylen must be <= 4kB. */
1187 copylen = sizeof(*item);
1188 newval_final = offset - 1;
1189 targetptr_final = &cpubuffer->offset;
1190 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1191 targetptr_final, offset, destptr, srcptr, copylen,
1192 newval_final, cpu);
1193 if (rseq_likely(!ret)) {
1194 result = true;
1195 break;
1196 }
1197 /* Retry if comparison fails or rseq aborts. */
1198 }
1199 if (_cpu)
1200 *_cpu = cpu;
1201 return result;
1202 }
1203
1204 /*
1205 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1206 * only be used on buffers that are not concurrently modified.
1207 */
1208 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1209 struct percpu_memcpy_buffer_node *item,
1210 int cpu)
1211 {
1212 struct percpu_memcpy_buffer *cpubuffer;
1213 intptr_t offset;
1214
1215 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1216 offset = cpubuffer->offset;
1217 if (offset == 0)
1218 return false;
1219 memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item));
1220 cpubuffer->offset = offset - 1;
1221 return true;
1222 }
1223
1224 static void *test_percpu_memcpy_buffer_thread(void *arg)
1225 {
1226 long long i, reps;
1227 struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg;
1228
1229 if (!opt_disable_rseq && rseq_register_current_thread())
1230 abort();
1231
1232 reps = opt_reps;
1233 for (i = 0; i < reps; i++) {
1234 struct percpu_memcpy_buffer_node item;
1235 bool result;
1236
1237 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1238 if (opt_yield)
1239 sched_yield(); /* encourage shuffling */
1240 if (result) {
1241 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1242 /* Should increase buffer size. */
1243 abort();
1244 }
1245 }
1246 }
1247
1248 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1249 (int) rseq_gettid(), nr_abort, signals_delivered);
1250 if (!opt_disable_rseq && rseq_unregister_current_thread())
1251 abort();
1252
1253 return NULL;
1254 }
1255
1256 /* Simultaneous modification to a per-cpu buffer from many threads. */
1257 static void test_percpu_memcpy_buffer(void)
1258 {
1259 const int num_threads = opt_threads;
1260 int i, j, ret;
1261 uint64_t sum = 0, expected_sum = 0;
1262 struct percpu_memcpy_buffer *buffer;
1263 pthread_t test_threads[num_threads];
1264 cpu_set_t allowed_cpus;
1265 struct rseq_mempool *mempool;
1266 struct rseq_mempool_attr *attr;
1267
1268 attr = rseq_mempool_attr_create();
1269 if (!attr) {
1270 perror("rseq_mempool_attr_create");
1271 abort();
1272 }
1273 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, CPU_SETSIZE);
1274 if (ret) {
1275 perror("rseq_mempool_attr_set_percpu");
1276 abort();
1277 }
1278 mempool = rseq_mempool_create("percpu_memcpy_buffer",
1279 sizeof(struct percpu_memcpy_buffer), attr);
1280 if (!mempool) {
1281 perror("rseq_mempool_create");
1282 abort();
1283 }
1284 rseq_mempool_attr_destroy(attr);
1285 buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
1286 if (!buffer) {
1287 perror("rseq_mempool_percpu_zmalloc");
1288 abort();
1289 }
1290
1291 /* Generate list entries for every usable cpu. */
1292 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1293 for (i = 0; i < CPU_SETSIZE; i++) {
1294 struct percpu_memcpy_buffer *cpubuffer;
1295
1296 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1297 continue;
1298 cpubuffer = rseq_percpu_ptr(buffer, i);
1299 /* Worse-case is every item in same CPU. */
1300 cpubuffer->array =
1301 (struct percpu_memcpy_buffer_node *)
1302 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
1303 MEMCPY_BUFFER_ITEM_PER_CPU);
1304 assert(cpubuffer->array);
1305 cpubuffer->buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1306 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1307 expected_sum += 2 * j + 1;
1308
1309 /*
1310 * We could theoretically put the word-sized
1311 * "data" directly in the buffer. However, we
1312 * want to model objects that would not fit
1313 * within a single word, so allocate an object
1314 * for each node.
1315 */
1316 cpubuffer->array[j - 1].data1 = j;
1317 cpubuffer->array[j - 1].data2 = j + 1;
1318 cpubuffer->offset++;
1319 }
1320 }
1321
1322 for (i = 0; i < num_threads; i++) {
1323 ret = pthread_create(&test_threads[i], NULL,
1324 test_percpu_memcpy_buffer_thread,
1325 buffer);
1326 if (ret) {
1327 errno = ret;
1328 perror("pthread_create");
1329 abort();
1330 }
1331 }
1332
1333 for (i = 0; i < num_threads; i++) {
1334 ret = pthread_join(test_threads[i], NULL);
1335 if (ret) {
1336 errno = ret;
1337 perror("pthread_join");
1338 abort();
1339 }
1340 }
1341
1342 for (i = 0; i < CPU_SETSIZE; i++) {
1343 struct percpu_memcpy_buffer_node item;
1344 struct percpu_memcpy_buffer *cpubuffer;
1345
1346 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1347 continue;
1348
1349 cpubuffer = rseq_percpu_ptr(buffer, i);
1350 while (__percpu_memcpy_buffer_pop(buffer, &item, i)) {
1351 sum += item.data1;
1352 sum += item.data2;
1353 }
1354 free(cpubuffer->array);
1355 }
1356
1357 /*
1358 * All entries should now be accounted for (unless some external
1359 * actor is interfering with our allowed affinity while this
1360 * test is running).
1361 */
1362 assert(sum == expected_sum);
1363 rseq_mempool_percpu_free(buffer);
1364 ret = rseq_mempool_destroy(mempool);
1365 if (ret) {
1366 perror("rseq_mempool_destroy");
1367 abort();
1368 }
1369 }
1370
1371 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
1372 {
1373 signals_delivered++;
1374 }
1375
1376 static int set_signal_handler(void)
1377 {
1378 int ret = 0;
1379 struct sigaction sa;
1380 sigset_t sigset;
1381
1382 ret = sigemptyset(&sigset);
1383 if (ret < 0) {
1384 perror("sigemptyset");
1385 return ret;
1386 }
1387
1388 sa.sa_handler = test_signal_interrupt_handler;
1389 sa.sa_mask = sigset;
1390 sa.sa_flags = 0;
1391 ret = sigaction(SIGUSR1, &sa, NULL);
1392 if (ret < 0) {
1393 perror("sigaction");
1394 return ret;
1395 }
1396
1397 printf_verbose("Signal handler set for SIGUSR1\n");
1398
1399 return ret;
1400 }
1401
1402 static
1403 bool membarrier_private_expedited_rseq_available(void)
1404 {
1405 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1406
1407 if (status < 0) {
1408 perror("membarrier");
1409 return false;
1410 }
1411 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1412 return false;
1413 return true;
1414 }
1415
1416 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1417 #ifdef TEST_MEMBARRIER
1418 struct test_membarrier_thread_args {
1419 struct rseq_mempool *mempool;
1420 struct percpu_list __rseq_percpu *percpu_list_ptr;
1421 int stop;
1422 };
1423
1424 /* Worker threads modify data in their "active" percpu lists. */
1425 static
1426 void *test_membarrier_worker_thread(void *arg)
1427 {
1428 struct test_membarrier_thread_args *args =
1429 (struct test_membarrier_thread_args *)arg;
1430 const long long iters = opt_reps;
1431 long long i;
1432
1433 if (rseq_register_current_thread()) {
1434 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1435 errno, strerror(errno));
1436 abort();
1437 }
1438
1439 /* Wait for initialization. */
1440 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1441
1442 for (i = 0; i < iters; ++i) {
1443 int ret;
1444
1445 do {
1446 int cpu = get_current_cpu_id();
1447
1448 ret = rseq_load_add_load_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1449 (intptr_t *) &args->percpu_list_ptr,
1450 (RSEQ_MEMPOOL_STRIDE * cpu) + offsetof(struct percpu_list, head),
1451 1, cpu);
1452 } while (rseq_unlikely(ret));
1453 }
1454
1455 if (rseq_unregister_current_thread()) {
1456 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1457 errno, strerror(errno));
1458 abort();
1459 }
1460 return NULL;
1461 }
1462
1463 static
1464 struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_mempool *mempool)
1465 {
1466 struct percpu_list __rseq_percpu *list;
1467 int i;
1468
1469 list = (struct percpu_list __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
1470 if (!list) {
1471 perror("rseq_mempool_percpu_zmalloc");
1472 return NULL;
1473 }
1474 for (i = 0; i < CPU_SETSIZE; i++) {
1475 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
1476 struct percpu_list_node *node;
1477
1478 node = (struct percpu_list_node *) malloc(sizeof(*node));
1479 assert(node);
1480 node->data = 0;
1481 node->next = NULL;
1482 cpulist->head = node;
1483 }
1484 return list;
1485 }
1486
1487 static
1488 void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list)
1489 {
1490 int i;
1491
1492 for (i = 0; i < CPU_SETSIZE; i++)
1493 free(rseq_percpu_ptr(list, i)->head);
1494 rseq_mempool_percpu_free(list);
1495 }
1496
1497 static
1498 long long test_membarrier_count_percpu_list(struct percpu_list __rseq_percpu *list)
1499 {
1500 long long total_count = 0;
1501 int i;
1502
1503 for (i = 0; i < CPU_SETSIZE; i++)
1504 total_count += rseq_percpu_ptr(list, i)->head->data;
1505 return total_count;
1506 }
1507
1508 /*
1509 * The manager thread swaps per-cpu lists that worker threads see,
1510 * and validates that there are no unexpected modifications.
1511 */
1512 static
1513 void *test_membarrier_manager_thread(void *arg)
1514 {
1515 struct test_membarrier_thread_args *args =
1516 (struct test_membarrier_thread_args *)arg;
1517 struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
1518 intptr_t expect_a = 0, expect_b = 0;
1519 int cpu_a = 0, cpu_b = 0;
1520 struct rseq_mempool *mempool;
1521 int ret;
1522 long long total_count = 0;
1523 struct rseq_mempool_attr *attr;
1524
1525 attr = rseq_mempool_attr_create();
1526 if (!attr) {
1527 perror("rseq_mempool_attr_create");
1528 abort();
1529 }
1530 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, CPU_SETSIZE);
1531 if (ret) {
1532 perror("rseq_mempool_attr_set_percpu");
1533 abort();
1534 }
1535 mempool = rseq_mempool_create("percpu_list",
1536 sizeof(struct percpu_list), attr);
1537 if (!mempool) {
1538 perror("rseq_mempool_create");
1539 abort();
1540 }
1541 rseq_mempool_attr_destroy(attr);
1542 args->mempool = mempool;
1543
1544 if (rseq_register_current_thread()) {
1545 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1546 errno, strerror(errno));
1547 abort();
1548 }
1549
1550 /* Init lists. */
1551 list_a = test_membarrier_alloc_percpu_list(mempool);
1552 assert(list_a);
1553 list_b = test_membarrier_alloc_percpu_list(mempool);
1554 assert(list_b);
1555
1556 /* Initialize lists before publishing them. */
1557 rseq_smp_wmb();
1558
1559 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1560
1561 while (!RSEQ_READ_ONCE(args->stop)) {
1562 /* list_a is "active". */
1563 cpu_a = rand() % CPU_SETSIZE;
1564 /*
1565 * As list_b is "inactive", we should never see changes
1566 * to list_b.
1567 */
1568 if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
1569 fprintf(stderr, "Membarrier test failed\n");
1570 abort();
1571 }
1572
1573 /* Make list_b "active". */
1574 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
1575 if (rseq_membarrier_expedited(cpu_a) &&
1576 errno != ENXIO /* missing CPU */) {
1577 perror("sys_membarrier");
1578 abort();
1579 }
1580 /*
1581 * Cpu A should now only modify list_b, so the values
1582 * in list_a should be stable.
1583 */
1584 expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
1585
1586 cpu_b = rand() % CPU_SETSIZE;
1587 /*
1588 * As list_a is "inactive", we should never see changes
1589 * to list_a.
1590 */
1591 if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
1592 fprintf(stderr, "Membarrier test failed\n");
1593 abort();
1594 }
1595
1596 /* Make list_a "active". */
1597 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1598 if (rseq_membarrier_expedited(cpu_b) &&
1599 errno != ENXIO /* missing CPU */) {
1600 perror("sys_membarrier");
1601 abort();
1602 }
1603 /* Remember a value from list_b. */
1604 expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
1605 }
1606
1607 total_count += test_membarrier_count_percpu_list(list_a);
1608 total_count += test_membarrier_count_percpu_list(list_b);
1609
1610 /* Validate that we observe the right number of increments. */
1611 if (total_count != opt_threads * opt_reps) {
1612 fprintf(stderr, "Error: Observed %lld increments, expected %lld\n",
1613 total_count, opt_threads * opt_reps);
1614 abort();
1615 }
1616 test_membarrier_free_percpu_list(list_a);
1617 test_membarrier_free_percpu_list(list_b);
1618
1619 if (rseq_unregister_current_thread()) {
1620 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1621 errno, strerror(errno));
1622 abort();
1623 }
1624 ret = rseq_mempool_destroy(mempool);
1625 if (ret) {
1626 perror("rseq_mempool_destroy");
1627 abort();
1628 }
1629
1630 return NULL;
1631 }
1632
1633 static
1634 void test_membarrier(void)
1635 {
1636 const int num_threads = opt_threads;
1637 struct test_membarrier_thread_args thread_args;
1638 pthread_t worker_threads[num_threads];
1639 pthread_t manager_thread;
1640 int i, ret;
1641
1642 if (!membarrier_private_expedited_rseq_available()) {
1643 fprintf(stderr, "Membarrier private expedited rseq not available. "
1644 "Skipping membarrier test.\n");
1645 return;
1646 }
1647 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1648 perror("sys_membarrier");
1649 abort();
1650 }
1651
1652 thread_args.percpu_list_ptr = NULL;
1653 thread_args.stop = 0;
1654 ret = pthread_create(&manager_thread, NULL,
1655 test_membarrier_manager_thread, &thread_args);
1656 if (ret) {
1657 errno = ret;
1658 perror("pthread_create");
1659 abort();
1660 }
1661
1662 for (i = 0; i < num_threads; i++) {
1663 ret = pthread_create(&worker_threads[i], NULL,
1664 test_membarrier_worker_thread, &thread_args);
1665 if (ret) {
1666 errno = ret;
1667 perror("pthread_create");
1668 abort();
1669 }
1670 }
1671
1672
1673 for (i = 0; i < num_threads; i++) {
1674 ret = pthread_join(worker_threads[i], NULL);
1675 if (ret) {
1676 errno = ret;
1677 perror("pthread_join");
1678 abort();
1679 }
1680 }
1681
1682 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1683 ret = pthread_join(manager_thread, NULL);
1684 if (ret) {
1685 errno = ret;
1686 perror("pthread_join");
1687 abort();
1688 }
1689 }
1690 #else /* TEST_MEMBARRIER */
1691 static
1692 void test_membarrier(void)
1693 {
1694 if (!membarrier_private_expedited_rseq_available()) {
1695 fprintf(stderr, "Membarrier private expedited rseq not available. "
1696 "Skipping membarrier test.\n");
1697 return;
1698 }
1699 fprintf(stderr, "rseq_load_add_load_load_add_store__ptr is not implemented on this architecture. "
1700 "Skipping membarrier test.\n");
1701 }
1702 #endif
1703
1704 static void show_usage(char **argv)
1705 {
1706 printf("Usage : %s <OPTIONS>\n",
1707 argv[0]);
1708 printf("OPTIONS:\n");
1709 printf(" [-1 loops] Number of loops for delay injection 1\n");
1710 printf(" [-2 loops] Number of loops for delay injection 2\n");
1711 printf(" [-3 loops] Number of loops for delay injection 3\n");
1712 printf(" [-4 loops] Number of loops for delay injection 4\n");
1713 printf(" [-5 loops] Number of loops for delay injection 5\n");
1714 printf(" [-6 loops] Number of loops for delay injection 6\n");
1715 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1716 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1717 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1718 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1719 printf(" [-y] Yield\n");
1720 printf(" [-k] Kill thread with signal\n");
1721 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1722 printf(" [-t N] Number of threads (default 200)\n");
1723 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1724 printf(" [-d] Disable rseq system call (no initialization)\n");
1725 printf(" [-D M] Disable rseq for each M threads\n");
1726 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1727 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1728 printf(" [-c] Check if the rseq syscall is available.\n");
1729 printf(" [-v] Verbose output.\n");
1730 printf(" [-h] Show this help.\n");
1731 printf("\n");
1732 }
1733
1734 int main(int argc, char **argv)
1735 {
1736 int i;
1737
1738 for (i = 1; i < argc; i++) {
1739 if (argv[i][0] != '-')
1740 continue;
1741 switch (argv[i][1]) {
1742 case '1':
1743 case '2':
1744 case '3':
1745 case '4':
1746 case '5':
1747 case '6':
1748 case '7':
1749 case '8':
1750 case '9':
1751 if (argc < i + 2) {
1752 show_usage(argv);
1753 goto error;
1754 }
1755 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1756 i++;
1757 break;
1758 case 'm':
1759 if (argc < i + 2) {
1760 show_usage(argv);
1761 goto error;
1762 }
1763 opt_modulo = atol(argv[i + 1]);
1764 if (opt_modulo < 0) {
1765 show_usage(argv);
1766 goto error;
1767 }
1768 i++;
1769 break;
1770 case 's':
1771 if (argc < i + 2) {
1772 show_usage(argv);
1773 goto error;
1774 }
1775 opt_sleep = atol(argv[i + 1]);
1776 if (opt_sleep < 0) {
1777 show_usage(argv);
1778 goto error;
1779 }
1780 i++;
1781 break;
1782 case 'y':
1783 opt_yield = 1;
1784 break;
1785 case 'k':
1786 opt_signal = 1;
1787 break;
1788 case 'd':
1789 opt_disable_rseq = 1;
1790 break;
1791 case 'D':
1792 if (argc < i + 2) {
1793 show_usage(argv);
1794 goto error;
1795 }
1796 opt_disable_mod = atol(argv[i + 1]);
1797 if (opt_disable_mod < 0) {
1798 show_usage(argv);
1799 goto error;
1800 }
1801 i++;
1802 break;
1803 case 't':
1804 if (argc < i + 2) {
1805 show_usage(argv);
1806 goto error;
1807 }
1808 opt_threads = atol(argv[i + 1]);
1809 if (opt_threads < 0) {
1810 show_usage(argv);
1811 goto error;
1812 }
1813 i++;
1814 break;
1815 case 'r':
1816 if (argc < i + 2) {
1817 show_usage(argv);
1818 goto error;
1819 }
1820 opt_reps = atoll(argv[i + 1]);
1821 if (opt_reps < 0) {
1822 show_usage(argv);
1823 goto error;
1824 }
1825 i++;
1826 break;
1827 case 'h':
1828 show_usage(argv);
1829 goto end;
1830 case 'T':
1831 if (argc < i + 2) {
1832 show_usage(argv);
1833 goto error;
1834 }
1835 opt_test = *argv[i + 1];
1836 switch (opt_test) {
1837 case 's':
1838 case 'l':
1839 case 'i':
1840 case 'b':
1841 case 'm':
1842 case 'r':
1843 break;
1844 default:
1845 show_usage(argv);
1846 goto error;
1847 }
1848 i++;
1849 break;
1850 case 'v':
1851 verbose = 1;
1852 break;
1853 case 'M':
1854 opt_mo = RSEQ_MO_RELEASE;
1855 break;
1856 case 'c':
1857 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
1858 printf_verbose("The rseq syscall is available.\n");
1859 goto end;
1860 } else {
1861 printf_verbose("The rseq syscall is unavailable.\n");
1862 goto no_rseq;
1863 }
1864 default:
1865 show_usage(argv);
1866 goto error;
1867 }
1868 }
1869
1870 loop_cnt_1 = loop_cnt[1];
1871 loop_cnt_2 = loop_cnt[2];
1872 loop_cnt_3 = loop_cnt[3];
1873 loop_cnt_4 = loop_cnt[4];
1874 loop_cnt_5 = loop_cnt[5];
1875 loop_cnt_6 = loop_cnt[6];
1876
1877 if (set_signal_handler())
1878 goto error;
1879
1880 if (!opt_disable_rseq && rseq_register_current_thread())
1881 goto error;
1882 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1883 printf_verbose("The rseq cpu id getter is unavailable\n");
1884 goto no_rseq;
1885 }
1886 switch (opt_test) {
1887 case 's':
1888 printf_verbose("spinlock\n");
1889 test_percpu_spinlock();
1890 break;
1891 case 'l':
1892 printf_verbose("linked list\n");
1893 test_percpu_list();
1894 break;
1895 case 'b':
1896 printf_verbose("buffer\n");
1897 test_percpu_buffer();
1898 break;
1899 case 'm':
1900 printf_verbose("memcpy buffer\n");
1901 test_percpu_memcpy_buffer();
1902 break;
1903 case 'i':
1904 printf_verbose("counter increment\n");
1905 test_percpu_inc();
1906 break;
1907 case 'r':
1908 printf_verbose("membarrier\n");
1909 test_membarrier();
1910 break;
1911 }
1912 if (!opt_disable_rseq && rseq_unregister_current_thread())
1913 abort();
1914 end:
1915 return 0;
1916
1917 error:
1918 return -1;
1919
1920 no_rseq:
1921 return 2;
1922 }
This page took 0.066138 seconds and 3 git commands to generate.