Add Olivier Dion to list of mempool authors
[librseq.git] / tests / param_test.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6 #include <assert.h>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
9 #include <pthread.h>
10 #include <sched.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <syscall.h>
16 #include <unistd.h>
17 #include <poll.h>
18 #include <sys/types.h>
19 #include <signal.h>
20 #include <errno.h>
21 #include <stddef.h>
22 #include <stdbool.h>
23 #include <rseq/mempool.h>
24
25 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
26 enum {
27 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
28 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
29 };
30
31 enum {
32 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
33 };
34 #endif
35
36 #define NR_INJECT 9
37 static int loop_cnt[NR_INJECT + 1];
38
39 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
40 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
41 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
42 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
43 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
44 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
45
46 static int opt_modulo, verbose;
47
48 static int opt_yield, opt_signal, opt_sleep,
49 opt_disable_rseq, opt_threads = 200,
50 opt_disable_mod = 0, opt_test = 's';
51
52 static long long opt_reps = 5000;
53
54 static __thread __attribute__((tls_model("initial-exec")))
55 unsigned int signals_delivered;
56
57 static inline pid_t rseq_gettid(void)
58 {
59 return syscall(__NR_gettid);
60 }
61
62 #ifndef BENCHMARK
63
64 static __thread __attribute__((tls_model("initial-exec"), unused))
65 int yield_mod_cnt, nr_abort;
66
67 #define printf_verbose(fmt, ...) \
68 do { \
69 if (verbose) \
70 printf(fmt, ## __VA_ARGS__); \
71 } while (0)
72
73 #ifdef __i386__
74
75 #define INJECT_ASM_REG "eax"
76
77 #define RSEQ_INJECT_CLOBBER \
78 , INJECT_ASM_REG
79
80 /*
81 * Use ip-relative addressing to get the loop counter.
82 */
83 #define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
84 "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
85 "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
86 "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
87 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
88 "jz 333f\n\t" \
89 "222:\n\t" \
90 "dec %%" INJECT_ASM_REG "\n\t" \
91 "jnz 222b\n\t" \
92 "333:\n\t"
93
94 #define RSEQ_INJECT_ASM(n) \
95 __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
96
97 #elif defined(__x86_64__)
98
99 #define INJECT_ASM_REG_P "rax"
100 #define INJECT_ASM_REG "eax"
101
102 #define RSEQ_INJECT_CLOBBER \
103 , INJECT_ASM_REG_P \
104 , INJECT_ASM_REG
105
106 #define RSEQ_INJECT_ASM(n) \
107 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
108 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
109 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
110 "jz 333f\n\t" \
111 "222:\n\t" \
112 "dec %%" INJECT_ASM_REG "\n\t" \
113 "jnz 222b\n\t" \
114 "333:\n\t"
115
116 #elif defined(__s390__)
117
118 #define RSEQ_INJECT_INPUT \
119 , [loop_cnt_1]"m"(loop_cnt[1]) \
120 , [loop_cnt_2]"m"(loop_cnt[2]) \
121 , [loop_cnt_3]"m"(loop_cnt[3]) \
122 , [loop_cnt_4]"m"(loop_cnt[4]) \
123 , [loop_cnt_5]"m"(loop_cnt[5]) \
124 , [loop_cnt_6]"m"(loop_cnt[6])
125
126 #define INJECT_ASM_REG "r12"
127
128 #define RSEQ_INJECT_CLOBBER \
129 , INJECT_ASM_REG
130
131 #define RSEQ_INJECT_ASM(n) \
132 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
133 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
134 "je 333f\n\t" \
135 "222:\n\t" \
136 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
137 "jnz 222b\n\t" \
138 "333:\n\t"
139
140 #elif defined(__ARMEL__)
141
142 #define RSEQ_INJECT_INPUT \
143 , [loop_cnt_1]"m"(loop_cnt[1]) \
144 , [loop_cnt_2]"m"(loop_cnt[2]) \
145 , [loop_cnt_3]"m"(loop_cnt[3]) \
146 , [loop_cnt_4]"m"(loop_cnt[4]) \
147 , [loop_cnt_5]"m"(loop_cnt[5]) \
148 , [loop_cnt_6]"m"(loop_cnt[6])
149
150 #define INJECT_ASM_REG "r4"
151
152 #define RSEQ_INJECT_CLOBBER \
153 , INJECT_ASM_REG
154
155 #define RSEQ_INJECT_ASM(n) \
156 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
157 "cmp " INJECT_ASM_REG ", #0\n\t" \
158 "beq 333f\n\t" \
159 "222:\n\t" \
160 "subs " INJECT_ASM_REG ", #1\n\t" \
161 "bne 222b\n\t" \
162 "333:\n\t"
163
164 #elif defined(__AARCH64EL__)
165
166 #define RSEQ_INJECT_INPUT \
167 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
168 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
169 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
170 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
171 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
172 , [loop_cnt_6] "Qo" (loop_cnt[6])
173
174 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
175
176 #define RSEQ_INJECT_ASM(n) \
177 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
178 " cbz " INJECT_ASM_REG ", 333f\n" \
179 "222:\n" \
180 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
181 " cbnz " INJECT_ASM_REG ", 222b\n" \
182 "333:\n"
183
184 #elif defined(__PPC__)
185
186 #define RSEQ_INJECT_INPUT \
187 , [loop_cnt_1]"m"(loop_cnt[1]) \
188 , [loop_cnt_2]"m"(loop_cnt[2]) \
189 , [loop_cnt_3]"m"(loop_cnt[3]) \
190 , [loop_cnt_4]"m"(loop_cnt[4]) \
191 , [loop_cnt_5]"m"(loop_cnt[5]) \
192 , [loop_cnt_6]"m"(loop_cnt[6])
193
194 #define INJECT_ASM_REG "r18"
195
196 #define RSEQ_INJECT_CLOBBER \
197 , INJECT_ASM_REG
198
199 #define RSEQ_INJECT_ASM(n) \
200 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
201 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
202 "beq 333f\n\t" \
203 "222:\n\t" \
204 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
205 "bne 222b\n\t" \
206 "333:\n\t"
207
208 #elif defined(__mips__)
209
210 #define RSEQ_INJECT_INPUT \
211 , [loop_cnt_1]"m"(loop_cnt[1]) \
212 , [loop_cnt_2]"m"(loop_cnt[2]) \
213 , [loop_cnt_3]"m"(loop_cnt[3]) \
214 , [loop_cnt_4]"m"(loop_cnt[4]) \
215 , [loop_cnt_5]"m"(loop_cnt[5]) \
216 , [loop_cnt_6]"m"(loop_cnt[6])
217
218 #define INJECT_ASM_REG "$5"
219
220 #define RSEQ_INJECT_CLOBBER \
221 , INJECT_ASM_REG
222
223 #define RSEQ_INJECT_ASM(n) \
224 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
225 "beqz " INJECT_ASM_REG ", 333f\n\t" \
226 "222:\n\t" \
227 "addiu " INJECT_ASM_REG ", -1\n\t" \
228 "bnez " INJECT_ASM_REG ", 222b\n\t" \
229 "333:\n\t"
230
231 #elif defined(__riscv)
232
233 #define RSEQ_INJECT_INPUT \
234 , [loop_cnt_1]"m"(loop_cnt[1]) \
235 , [loop_cnt_2]"m"(loop_cnt[2]) \
236 , [loop_cnt_3]"m"(loop_cnt[3]) \
237 , [loop_cnt_4]"m"(loop_cnt[4]) \
238 , [loop_cnt_5]"m"(loop_cnt[5]) \
239 , [loop_cnt_6]"m"(loop_cnt[6])
240
241 #define INJECT_ASM_REG "t1"
242
243 #define RSEQ_INJECT_CLOBBER \
244 , INJECT_ASM_REG
245
246 #define RSEQ_INJECT_ASM(n) \
247 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
248 "beqz " INJECT_ASM_REG ", 333f\n\t" \
249 "222:\n\t" \
250 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
251 "bnez " INJECT_ASM_REG ", 222b\n\t" \
252 "333:\n\t"
253
254 #else
255 #error unsupported target
256 #endif
257
258 #define RSEQ_INJECT_FAILED \
259 nr_abort++;
260
261 #define RSEQ_INJECT_C(n) \
262 { \
263 int loc_i, loc_nr_loops = loop_cnt[n]; \
264 \
265 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
266 rseq_barrier(); \
267 } \
268 if (loc_nr_loops == -1 && opt_modulo) { \
269 if (yield_mod_cnt == opt_modulo - 1) { \
270 if (opt_sleep > 0) \
271 poll(NULL, 0, opt_sleep); \
272 if (opt_yield) \
273 sched_yield(); \
274 if (opt_signal) \
275 raise(SIGUSR1); \
276 yield_mod_cnt = 0; \
277 } else { \
278 yield_mod_cnt++; \
279 } \
280 } \
281 }
282
283 #else
284
285 #define printf_verbose(fmt, ...)
286
287 #endif /* BENCHMARK */
288
289 #include <rseq/rseq.h>
290
291 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
292
293 static int sys_membarrier(int cmd, int flags, int cpu_id)
294 {
295 return syscall(__NR_membarrier, cmd, flags, cpu_id);
296 }
297
298 #ifdef rseq_arch_has_load_add_load_load_add_store
299 #define TEST_MEMBARRIER
300 #endif
301
302 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
303 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
304 static
305 int get_current_cpu_id(void)
306 {
307 return rseq_current_mm_cid();
308 }
309 static
310 bool rseq_validate_cpu_id(void)
311 {
312 return rseq_mm_cid_available();
313 }
314 static
315 bool rseq_use_cpu_index(void)
316 {
317 return false; /* Use mm_cid */
318 }
319 # ifdef TEST_MEMBARRIER
320 /*
321 * Membarrier does not currently support targeting a mm_cid, so
322 * issue the barrier on all cpus.
323 */
324 static
325 int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
326 {
327 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
328 0, 0);
329 }
330 # endif /* TEST_MEMBARRIER */
331 #else
332 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
333 static
334 int get_current_cpu_id(void)
335 {
336 return rseq_cpu_start();
337 }
338 static
339 bool rseq_validate_cpu_id(void)
340 {
341 return rseq_current_cpu_raw() >= 0;
342 }
343 static
344 bool rseq_use_cpu_index(void)
345 {
346 return true; /* Use cpu_id as index. */
347 }
348 # ifdef TEST_MEMBARRIER
349 static
350 int rseq_membarrier_expedited(int cpu)
351 {
352 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
353 MEMBARRIER_CMD_FLAG_CPU, cpu);
354 }
355 # endif /* TEST_MEMBARRIER */
356 #endif
357
358 struct percpu_lock {
359 intptr_t v;
360 };
361
362 struct spinlock_test_data {
363 struct percpu_lock lock;
364 intptr_t count;
365 };
366
367 struct spinlock_thread_test_data {
368 struct spinlock_test_data __rseq_percpu *data;
369 long long reps;
370 int reg;
371 };
372
373 struct inc_test_data {
374 intptr_t count;
375 };
376
377 struct inc_thread_test_data {
378 struct inc_test_data __rseq_percpu *data;
379 long long reps;
380 int reg;
381 };
382
383 struct percpu_list_node {
384 intptr_t data;
385 struct percpu_list_node *next;
386 };
387
388 struct percpu_list {
389 struct percpu_list_node *head;
390 };
391
392 #define BUFFER_ITEM_PER_CPU 100
393
394 struct percpu_buffer_node {
395 intptr_t data;
396 };
397
398 struct percpu_buffer {
399 intptr_t offset;
400 intptr_t buflen;
401 struct percpu_buffer_node **array;
402 };
403
404 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
405
406 struct percpu_memcpy_buffer_node {
407 intptr_t data1;
408 uint64_t data2;
409 };
410
411 struct percpu_memcpy_buffer {
412 intptr_t offset;
413 intptr_t buflen;
414 struct percpu_memcpy_buffer_node *array;
415 };
416
417 /* A simple percpu spinlock. Grabs lock on current cpu. */
418 static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
419 {
420 int cpu;
421
422 for (;;) {
423 int ret;
424
425 cpu = get_current_cpu_id();
426 if (cpu < 0) {
427 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
428 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
429 abort();
430 }
431 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
432 &rseq_percpu_ptr(lock, cpu)->v,
433 0, 1, cpu);
434 if (rseq_likely(!ret))
435 break;
436 /* Retry if comparison fails or rseq aborts. */
437 }
438 /*
439 * Acquire semantic when taking lock after control dependency.
440 * Matches rseq_smp_store_release().
441 */
442 rseq_smp_acquire__after_ctrl_dep();
443 return cpu;
444 }
445
446 static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
447 {
448 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
449 /*
450 * Release lock, with release semantic. Matches
451 * rseq_smp_acquire__after_ctrl_dep().
452 */
453 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
454 }
455
456 static void *test_percpu_spinlock_thread(void *arg)
457 {
458 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
459 struct spinlock_test_data __rseq_percpu *data = thread_data->data;
460 long long i, reps;
461
462 if (!opt_disable_rseq && thread_data->reg &&
463 rseq_register_current_thread())
464 abort();
465 reps = thread_data->reps;
466 for (i = 0; i < reps; i++) {
467 int cpu = rseq_this_cpu_lock(&data->lock);
468 rseq_percpu_ptr(data, cpu)->count++;
469 rseq_percpu_unlock(&data->lock, cpu);
470 #ifndef BENCHMARK
471 if (i != 0 && !(i % (reps / 10)))
472 printf_verbose("tid %d: count %lld\n",
473 (int) rseq_gettid(), i);
474 #endif
475 }
476 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
477 (int) rseq_gettid(), nr_abort, signals_delivered);
478 if (!opt_disable_rseq && thread_data->reg &&
479 rseq_unregister_current_thread())
480 abort();
481 return NULL;
482 }
483
484 /*
485 * A simple test which implements a sharded counter using a per-cpu
486 * lock. Obviously real applications might prefer to simply use a
487 * per-cpu increment; however, this is reasonable for a test and the
488 * lock can be extended to synchronize more complicated operations.
489 */
490 static void test_percpu_spinlock(void)
491 {
492 const int num_threads = opt_threads;
493 int i, ret, max_nr_cpus;
494 uint64_t sum;
495 pthread_t test_threads[num_threads];
496 struct spinlock_test_data __rseq_percpu *data;
497 struct spinlock_thread_test_data thread_data[num_threads];
498 struct rseq_mempool *mempool;
499 struct rseq_mempool_attr *attr;
500
501 attr = rseq_mempool_attr_create();
502 if (!attr) {
503 perror("rseq_mempool_attr_create");
504 abort();
505 }
506 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, 0);
507 if (ret) {
508 perror("rseq_mempool_attr_set_percpu");
509 abort();
510 }
511 mempool = rseq_mempool_create("spinlock_test_data",
512 sizeof(struct spinlock_test_data), attr);
513 if (!mempool) {
514 perror("rseq_mempool_create");
515 abort();
516 }
517 rseq_mempool_attr_destroy(attr);
518 max_nr_cpus = rseq_mempool_get_max_nr_cpus(mempool);
519 data = (struct spinlock_test_data __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
520 if (!data) {
521 perror("rseq_mempool_percpu_zmalloc");
522 abort();
523 }
524
525 for (i = 0; i < num_threads; i++) {
526 thread_data[i].reps = opt_reps;
527 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
528 thread_data[i].reg = 1;
529 else
530 thread_data[i].reg = 0;
531 thread_data[i].data = data;
532 ret = pthread_create(&test_threads[i], NULL,
533 test_percpu_spinlock_thread,
534 &thread_data[i]);
535 if (ret) {
536 errno = ret;
537 perror("pthread_create");
538 abort();
539 }
540 }
541
542 for (i = 0; i < num_threads; i++) {
543 ret = pthread_join(test_threads[i], NULL);
544 if (ret) {
545 errno = ret;
546 perror("pthread_join");
547 abort();
548 }
549 }
550
551 sum = 0;
552 for (i = 0; i < max_nr_cpus; i++)
553 sum += rseq_percpu_ptr(data, i)->count;
554
555 assert(sum == (uint64_t)opt_reps * num_threads);
556 rseq_mempool_percpu_free(data);
557 ret = rseq_mempool_destroy(mempool);
558 if (ret) {
559 perror("rseq_mempool_destroy");
560 abort();
561 }
562 }
563
564 static void *test_percpu_inc_thread(void *arg)
565 {
566 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
567 struct inc_test_data __rseq_percpu *data = thread_data->data;
568 long long i, reps;
569
570 if (!opt_disable_rseq && thread_data->reg &&
571 rseq_register_current_thread())
572 abort();
573 reps = thread_data->reps;
574 for (i = 0; i < reps; i++) {
575 int ret;
576
577 do {
578 int cpu;
579
580 cpu = get_current_cpu_id();
581 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
582 &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
583 } while (rseq_unlikely(ret));
584 #ifndef BENCHMARK
585 if (i != 0 && !(i % (reps / 10)))
586 printf_verbose("tid %d: count %lld\n",
587 (int) rseq_gettid(), i);
588 #endif
589 }
590 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
591 (int) rseq_gettid(), nr_abort, signals_delivered);
592 if (!opt_disable_rseq && thread_data->reg &&
593 rseq_unregister_current_thread())
594 abort();
595 return NULL;
596 }
597
598 static void test_percpu_inc(void)
599 {
600 const int num_threads = opt_threads;
601 int i, ret, max_nr_cpus;
602 uint64_t sum;
603 pthread_t test_threads[num_threads];
604 struct inc_test_data __rseq_percpu *data;
605 struct inc_thread_test_data thread_data[num_threads];
606 struct rseq_mempool *mempool;
607 struct rseq_mempool_attr *attr;
608
609 attr = rseq_mempool_attr_create();
610 if (!attr) {
611 perror("rseq_mempool_attr_create");
612 abort();
613 }
614 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, 0);
615 if (ret) {
616 perror("rseq_mempool_attr_set_percpu");
617 abort();
618 }
619 mempool = rseq_mempool_create("inc_test_data",
620 sizeof(struct inc_test_data), attr);
621 if (!mempool) {
622 perror("rseq_mempool_create");
623 abort();
624 }
625 rseq_mempool_attr_destroy(attr);
626 max_nr_cpus = rseq_mempool_get_max_nr_cpus(mempool);
627 data = (struct inc_test_data __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
628 if (!data) {
629 perror("rseq_mempool_percpu_zmalloc");
630 abort();
631 }
632
633 for (i = 0; i < num_threads; i++) {
634 thread_data[i].reps = opt_reps;
635 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
636 thread_data[i].reg = 1;
637 else
638 thread_data[i].reg = 0;
639 thread_data[i].data = data;
640 ret = pthread_create(&test_threads[i], NULL,
641 test_percpu_inc_thread,
642 &thread_data[i]);
643 if (ret) {
644 errno = ret;
645 perror("pthread_create");
646 abort();
647 }
648 }
649
650 for (i = 0; i < num_threads; i++) {
651 ret = pthread_join(test_threads[i], NULL);
652 if (ret) {
653 errno = ret;
654 perror("pthread_join");
655 abort();
656 }
657 }
658
659 sum = 0;
660 for (i = 0; i < max_nr_cpus; i++)
661 sum += rseq_percpu_ptr(data, i)->count;
662
663 assert(sum == (uint64_t)opt_reps * num_threads);
664 rseq_mempool_percpu_free(data);
665 ret = rseq_mempool_destroy(mempool);
666 if (ret) {
667 perror("rseq_mempool_destroy");
668 abort();
669 }
670 }
671
672 static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
673 struct percpu_list_node *node,
674 int *_cpu)
675 {
676 int cpu;
677
678 for (;;) {
679 intptr_t *targetptr, newval, expect;
680 struct percpu_list *cpulist;
681 int ret;
682
683 cpu = get_current_cpu_id();
684 cpulist = rseq_percpu_ptr(list, cpu);
685 /* Load list->c[cpu].head with single-copy atomicity. */
686 expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
687 newval = (intptr_t)node;
688 targetptr = (intptr_t *)&cpulist->head;
689 node->next = (struct percpu_list_node *)expect;
690 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
691 targetptr, expect, newval, cpu);
692 if (rseq_likely(!ret))
693 break;
694 /* Retry if comparison fails or rseq aborts. */
695 }
696 if (_cpu)
697 *_cpu = cpu;
698 }
699
700 /*
701 * Unlike a traditional lock-less linked list; the availability of a
702 * rseq primitive allows us to implement pop without concerns over
703 * ABA-type races.
704 */
705 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
706 int *_cpu)
707 {
708 struct percpu_list_node *node = NULL;
709 int cpu;
710
711 for (;;) {
712 struct percpu_list_node *head;
713 intptr_t *targetptr, expectnot, *load;
714 struct percpu_list *cpulist;
715 long offset;
716 int ret;
717
718 cpu = get_current_cpu_id();
719 cpulist = rseq_percpu_ptr(list, cpu);
720 targetptr = (intptr_t *)&cpulist->head;
721 expectnot = (intptr_t)NULL;
722 offset = offsetof(struct percpu_list_node, next);
723 load = (intptr_t *)&head;
724 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
725 targetptr, expectnot,
726 offset, load, cpu);
727 if (rseq_likely(!ret)) {
728 node = head;
729 break;
730 }
731 if (ret > 0)
732 break;
733 /* Retry if rseq aborts. */
734 }
735 if (_cpu)
736 *_cpu = cpu;
737 return node;
738 }
739
740 /*
741 * __percpu_list_pop is not safe against concurrent accesses. Should
742 * only be used on lists that are not concurrently modified.
743 */
744 static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
745 {
746 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
747 struct percpu_list_node *node;
748
749 node = cpulist->head;
750 if (!node)
751 return NULL;
752 cpulist->head = node->next;
753 return node;
754 }
755
756 static void *test_percpu_list_thread(void *arg)
757 {
758 long long i, reps;
759 struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
760
761 if (!opt_disable_rseq && rseq_register_current_thread())
762 abort();
763
764 reps = opt_reps;
765 for (i = 0; i < reps; i++) {
766 struct percpu_list_node *node;
767
768 node = this_cpu_list_pop(list, NULL);
769 if (opt_yield)
770 sched_yield(); /* encourage shuffling */
771 if (node)
772 this_cpu_list_push(list, node, NULL);
773 }
774
775 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
776 (int) rseq_gettid(), nr_abort, signals_delivered);
777 if (!opt_disable_rseq && rseq_unregister_current_thread())
778 abort();
779
780 return NULL;
781 }
782
783 /* Simultaneous modification to a per-cpu linked list from many threads. */
784 static void test_percpu_list(void)
785 {
786 const int num_threads = opt_threads;
787 int i, j, ret, max_nr_cpus;
788 uint64_t sum = 0, expected_sum = 0;
789 struct percpu_list __rseq_percpu *list;
790 pthread_t test_threads[num_threads];
791 cpu_set_t allowed_cpus;
792 struct rseq_mempool *mempool;
793 struct rseq_mempool_attr *attr;
794
795 attr = rseq_mempool_attr_create();
796 if (!attr) {
797 perror("rseq_mempool_attr_create");
798 abort();
799 }
800 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, 0);
801 if (ret) {
802 perror("rseq_mempool_attr_set_percpu");
803 abort();
804 }
805 mempool = rseq_mempool_create("percpu_list",
806 sizeof(struct percpu_list), attr);
807 if (!mempool) {
808 perror("rseq_mempool_create");
809 abort();
810 }
811 rseq_mempool_attr_destroy(attr);
812 max_nr_cpus = rseq_mempool_get_max_nr_cpus(mempool);
813 list = (struct percpu_list __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
814 if (!list) {
815 perror("rseq_mempool_percpu_zmalloc");
816 abort();
817 }
818
819 /* Generate list entries for every usable cpu. */
820 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
821 for (i = 0; i < max_nr_cpus; i++) {
822 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
823 continue;
824 for (j = 1; j <= 100; j++) {
825 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
826 struct percpu_list_node *node;
827
828 expected_sum += j;
829
830 node = (struct percpu_list_node *) malloc(sizeof(*node));
831 assert(node);
832 node->data = j;
833 node->next = cpulist->head;
834 cpulist->head = node;
835 }
836 }
837
838 for (i = 0; i < num_threads; i++) {
839 ret = pthread_create(&test_threads[i], NULL,
840 test_percpu_list_thread, list);
841 if (ret) {
842 errno = ret;
843 perror("pthread_create");
844 abort();
845 }
846 }
847
848 for (i = 0; i < num_threads; i++) {
849 ret = pthread_join(test_threads[i], NULL);
850 if (ret) {
851 errno = ret;
852 perror("pthread_join");
853 abort();
854 }
855 }
856
857 for (i = 0; i < max_nr_cpus; i++) {
858 struct percpu_list_node *node;
859
860 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
861 continue;
862
863 while ((node = __percpu_list_pop(list, i))) {
864 sum += node->data;
865 free(node);
866 }
867 }
868
869 /*
870 * All entries should now be accounted for (unless some external
871 * actor is interfering with our allowed affinity while this
872 * test is running).
873 */
874 assert(sum == expected_sum);
875 rseq_mempool_percpu_free(list);
876 ret = rseq_mempool_destroy(mempool);
877 if (ret) {
878 perror("rseq_mempool_destroy");
879 abort();
880 }
881 }
882
883 static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer,
884 struct percpu_buffer_node *node,
885 int *_cpu)
886 {
887 bool result = false;
888 int cpu;
889
890 for (;;) {
891 struct percpu_buffer *cpubuffer;
892 intptr_t *targetptr_spec, newval_spec;
893 intptr_t *targetptr_final, newval_final;
894 intptr_t offset;
895 int ret;
896
897 cpu = get_current_cpu_id();
898 cpubuffer = rseq_percpu_ptr(buffer, cpu);
899 offset = RSEQ_READ_ONCE(cpubuffer->offset);
900 if (offset == cpubuffer->buflen)
901 break;
902 newval_spec = (intptr_t)node;
903 targetptr_spec = (intptr_t *)&cpubuffer->array[offset];
904 newval_final = offset + 1;
905 targetptr_final = &cpubuffer->offset;
906 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
907 targetptr_final, offset, targetptr_spec,
908 newval_spec, newval_final, cpu);
909 if (rseq_likely(!ret)) {
910 result = true;
911 break;
912 }
913 /* Retry if comparison fails or rseq aborts. */
914 }
915 if (_cpu)
916 *_cpu = cpu;
917 return result;
918 }
919
920 static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
921 int *_cpu)
922 {
923 struct percpu_buffer_node *head;
924 int cpu;
925
926 for (;;) {
927 struct percpu_buffer *cpubuffer;
928 intptr_t *targetptr, newval;
929 intptr_t offset;
930 int ret;
931
932 cpu = get_current_cpu_id();
933 cpubuffer = rseq_percpu_ptr(buffer, cpu);
934 /* Load offset with single-copy atomicity. */
935 offset = RSEQ_READ_ONCE(cpubuffer->offset);
936 if (offset == 0) {
937 head = NULL;
938 break;
939 }
940 head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]);
941 newval = offset - 1;
942 targetptr = (intptr_t *)&cpubuffer->offset;
943 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
944 targetptr, offset,
945 (intptr_t *)&cpubuffer->array[offset - 1],
946 (intptr_t)head, newval, cpu);
947 if (rseq_likely(!ret))
948 break;
949 /* Retry if comparison fails or rseq aborts. */
950 }
951 if (_cpu)
952 *_cpu = cpu;
953 return head;
954 }
955
956 /*
957 * __percpu_buffer_pop is not safe against concurrent accesses. Should
958 * only be used on buffers that are not concurrently modified.
959 */
960 static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
961 int cpu)
962 {
963 struct percpu_buffer *cpubuffer;
964 struct percpu_buffer_node *head;
965 intptr_t offset;
966
967 cpubuffer = rseq_percpu_ptr(buffer, cpu);
968 offset = cpubuffer->offset;
969 if (offset == 0)
970 return NULL;
971 head = cpubuffer->array[offset - 1];
972 cpubuffer->offset = offset - 1;
973 return head;
974 }
975
976 static void *test_percpu_buffer_thread(void *arg)
977 {
978 long long i, reps;
979 struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg;
980
981 if (!opt_disable_rseq && rseq_register_current_thread())
982 abort();
983
984 reps = opt_reps;
985 for (i = 0; i < reps; i++) {
986 struct percpu_buffer_node *node;
987
988 node = this_cpu_buffer_pop(buffer, NULL);
989 if (opt_yield)
990 sched_yield(); /* encourage shuffling */
991 if (node) {
992 if (!this_cpu_buffer_push(buffer, node, NULL)) {
993 /* Should increase buffer size. */
994 abort();
995 }
996 }
997 }
998
999 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1000 (int) rseq_gettid(), nr_abort, signals_delivered);
1001 if (!opt_disable_rseq && rseq_unregister_current_thread())
1002 abort();
1003
1004 return NULL;
1005 }
1006
1007 /* Simultaneous modification to a per-cpu buffer from many threads. */
1008 static void test_percpu_buffer(void)
1009 {
1010 const int num_threads = opt_threads;
1011 int i, j, ret, max_nr_cpus;
1012 uint64_t sum = 0, expected_sum = 0;
1013 struct percpu_buffer __rseq_percpu *buffer;
1014 pthread_t test_threads[num_threads];
1015 cpu_set_t allowed_cpus;
1016 struct rseq_mempool *mempool;
1017 struct rseq_mempool_attr *attr;
1018
1019 attr = rseq_mempool_attr_create();
1020 if (!attr) {
1021 perror("rseq_mempool_attr_create");
1022 abort();
1023 }
1024 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, 0);
1025 if (ret) {
1026 perror("rseq_mempool_attr_set_percpu");
1027 abort();
1028 }
1029 mempool = rseq_mempool_create("percpu_buffer",
1030 sizeof(struct percpu_buffer), attr);
1031 if (!mempool) {
1032 perror("rseq_mempool_create");
1033 abort();
1034 }
1035 rseq_mempool_attr_destroy(attr);
1036 max_nr_cpus = rseq_mempool_get_max_nr_cpus(mempool);
1037 buffer = (struct percpu_buffer __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
1038 if (!buffer) {
1039 perror("rseq_mempool_percpu_zmalloc");
1040 abort();
1041 }
1042
1043 /* Generate list entries for every usable cpu. */
1044 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1045 for (i = 0; i < max_nr_cpus; i++) {
1046 struct percpu_buffer *cpubuffer;
1047
1048 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1049 continue;
1050 cpubuffer = rseq_percpu_ptr(buffer, i);
1051 /* Worse-case is every item in same CPU. */
1052 cpubuffer->array =
1053 (struct percpu_buffer_node **)
1054 malloc(sizeof(*cpubuffer->array) * max_nr_cpus *
1055 BUFFER_ITEM_PER_CPU);
1056 assert(cpubuffer->array);
1057 cpubuffer->buflen = max_nr_cpus * BUFFER_ITEM_PER_CPU;
1058 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
1059 struct percpu_buffer_node *node;
1060
1061 expected_sum += j;
1062
1063 /*
1064 * We could theoretically put the word-sized
1065 * "data" directly in the buffer. However, we
1066 * want to model objects that would not fit
1067 * within a single word, so allocate an object
1068 * for each node.
1069 */
1070 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
1071 assert(node);
1072 node->data = j;
1073 cpubuffer->array[j - 1] = node;
1074 cpubuffer->offset++;
1075 }
1076 }
1077
1078 for (i = 0; i < num_threads; i++) {
1079 ret = pthread_create(&test_threads[i], NULL,
1080 test_percpu_buffer_thread, buffer);
1081 if (ret) {
1082 errno = ret;
1083 perror("pthread_create");
1084 abort();
1085 }
1086 }
1087
1088 for (i = 0; i < num_threads; i++) {
1089 ret = pthread_join(test_threads[i], NULL);
1090 if (ret) {
1091 errno = ret;
1092 perror("pthread_join");
1093 abort();
1094 }
1095 }
1096
1097 for (i = 0; i < max_nr_cpus; i++) {
1098 struct percpu_buffer *cpubuffer;
1099 struct percpu_buffer_node *node;
1100
1101 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1102 continue;
1103
1104 cpubuffer = rseq_percpu_ptr(buffer, i);
1105 while ((node = __percpu_buffer_pop(buffer, i))) {
1106 sum += node->data;
1107 free(node);
1108 }
1109 free(cpubuffer->array);
1110 }
1111
1112 /*
1113 * All entries should now be accounted for (unless some external
1114 * actor is interfering with our allowed affinity while this
1115 * test is running).
1116 */
1117 assert(sum == expected_sum);
1118 rseq_mempool_percpu_free(buffer);
1119 ret = rseq_mempool_destroy(mempool);
1120 if (ret) {
1121 perror("rseq_mempool_destroy");
1122 abort();
1123 }
1124 }
1125
1126 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1127 struct percpu_memcpy_buffer_node item,
1128 int *_cpu)
1129 {
1130 bool result = false;
1131 int cpu;
1132
1133 for (;;) {
1134 struct percpu_memcpy_buffer *cpubuffer;
1135 intptr_t *targetptr_final, newval_final, offset;
1136 char *destptr, *srcptr;
1137 size_t copylen;
1138 int ret;
1139
1140 cpu = get_current_cpu_id();
1141 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1142 /* Load offset with single-copy atomicity. */
1143 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1144 if (offset == cpubuffer->buflen)
1145 break;
1146 destptr = (char *)&cpubuffer->array[offset];
1147 srcptr = (char *)&item;
1148 /* copylen must be <= 4kB. */
1149 copylen = sizeof(item);
1150 newval_final = offset + 1;
1151 targetptr_final = &cpubuffer->offset;
1152 ret = rseq_load_cbne_memcpy_store__ptr(
1153 opt_mo, RSEQ_PERCPU,
1154 targetptr_final, offset,
1155 destptr, srcptr, copylen,
1156 newval_final, cpu);
1157 if (rseq_likely(!ret)) {
1158 result = true;
1159 break;
1160 }
1161 /* Retry if comparison fails or rseq aborts. */
1162 }
1163 if (_cpu)
1164 *_cpu = cpu;
1165 return result;
1166 }
1167
1168 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1169 struct percpu_memcpy_buffer_node *item,
1170 int *_cpu)
1171 {
1172 bool result = false;
1173 int cpu;
1174
1175 for (;;) {
1176 struct percpu_memcpy_buffer *cpubuffer;
1177 intptr_t *targetptr_final, newval_final, offset;
1178 char *destptr, *srcptr;
1179 size_t copylen;
1180 int ret;
1181
1182 cpu = get_current_cpu_id();
1183 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1184 /* Load offset with single-copy atomicity. */
1185 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1186 if (offset == 0)
1187 break;
1188 destptr = (char *)item;
1189 srcptr = (char *)&cpubuffer->array[offset - 1];
1190 /* copylen must be <= 4kB. */
1191 copylen = sizeof(*item);
1192 newval_final = offset - 1;
1193 targetptr_final = &cpubuffer->offset;
1194 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1195 targetptr_final, offset, destptr, srcptr, copylen,
1196 newval_final, cpu);
1197 if (rseq_likely(!ret)) {
1198 result = true;
1199 break;
1200 }
1201 /* Retry if comparison fails or rseq aborts. */
1202 }
1203 if (_cpu)
1204 *_cpu = cpu;
1205 return result;
1206 }
1207
1208 /*
1209 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1210 * only be used on buffers that are not concurrently modified.
1211 */
1212 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1213 struct percpu_memcpy_buffer_node *item,
1214 int cpu)
1215 {
1216 struct percpu_memcpy_buffer *cpubuffer;
1217 intptr_t offset;
1218
1219 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1220 offset = cpubuffer->offset;
1221 if (offset == 0)
1222 return false;
1223 memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item));
1224 cpubuffer->offset = offset - 1;
1225 return true;
1226 }
1227
1228 static void *test_percpu_memcpy_buffer_thread(void *arg)
1229 {
1230 long long i, reps;
1231 struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg;
1232
1233 if (!opt_disable_rseq && rseq_register_current_thread())
1234 abort();
1235
1236 reps = opt_reps;
1237 for (i = 0; i < reps; i++) {
1238 struct percpu_memcpy_buffer_node item;
1239 bool result;
1240
1241 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1242 if (opt_yield)
1243 sched_yield(); /* encourage shuffling */
1244 if (result) {
1245 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1246 /* Should increase buffer size. */
1247 abort();
1248 }
1249 }
1250 }
1251
1252 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1253 (int) rseq_gettid(), nr_abort, signals_delivered);
1254 if (!opt_disable_rseq && rseq_unregister_current_thread())
1255 abort();
1256
1257 return NULL;
1258 }
1259
1260 /* Simultaneous modification to a per-cpu buffer from many threads. */
1261 static void test_percpu_memcpy_buffer(void)
1262 {
1263 const int num_threads = opt_threads;
1264 int i, j, ret, max_nr_cpus;
1265 uint64_t sum = 0, expected_sum = 0;
1266 struct percpu_memcpy_buffer *buffer;
1267 pthread_t test_threads[num_threads];
1268 cpu_set_t allowed_cpus;
1269 struct rseq_mempool *mempool;
1270 struct rseq_mempool_attr *attr;
1271
1272 attr = rseq_mempool_attr_create();
1273 if (!attr) {
1274 perror("rseq_mempool_attr_create");
1275 abort();
1276 }
1277 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, 0);
1278 if (ret) {
1279 perror("rseq_mempool_attr_set_percpu");
1280 abort();
1281 }
1282 mempool = rseq_mempool_create("percpu_memcpy_buffer",
1283 sizeof(struct percpu_memcpy_buffer), attr);
1284 if (!mempool) {
1285 perror("rseq_mempool_create");
1286 abort();
1287 }
1288 rseq_mempool_attr_destroy(attr);
1289 max_nr_cpus = rseq_mempool_get_max_nr_cpus(mempool);
1290 buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
1291 if (!buffer) {
1292 perror("rseq_mempool_percpu_zmalloc");
1293 abort();
1294 }
1295
1296 /* Generate list entries for every usable cpu. */
1297 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1298 for (i = 0; i < max_nr_cpus; i++) {
1299 struct percpu_memcpy_buffer *cpubuffer;
1300
1301 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1302 continue;
1303 cpubuffer = rseq_percpu_ptr(buffer, i);
1304 /* Worse-case is every item in same CPU. */
1305 cpubuffer->array =
1306 (struct percpu_memcpy_buffer_node *)
1307 malloc(sizeof(*cpubuffer->array) * max_nr_cpus *
1308 MEMCPY_BUFFER_ITEM_PER_CPU);
1309 assert(cpubuffer->array);
1310 cpubuffer->buflen = max_nr_cpus * MEMCPY_BUFFER_ITEM_PER_CPU;
1311 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1312 expected_sum += 2 * j + 1;
1313
1314 /*
1315 * We could theoretically put the word-sized
1316 * "data" directly in the buffer. However, we
1317 * want to model objects that would not fit
1318 * within a single word, so allocate an object
1319 * for each node.
1320 */
1321 cpubuffer->array[j - 1].data1 = j;
1322 cpubuffer->array[j - 1].data2 = j + 1;
1323 cpubuffer->offset++;
1324 }
1325 }
1326
1327 for (i = 0; i < num_threads; i++) {
1328 ret = pthread_create(&test_threads[i], NULL,
1329 test_percpu_memcpy_buffer_thread,
1330 buffer);
1331 if (ret) {
1332 errno = ret;
1333 perror("pthread_create");
1334 abort();
1335 }
1336 }
1337
1338 for (i = 0; i < num_threads; i++) {
1339 ret = pthread_join(test_threads[i], NULL);
1340 if (ret) {
1341 errno = ret;
1342 perror("pthread_join");
1343 abort();
1344 }
1345 }
1346
1347 for (i = 0; i < max_nr_cpus; i++) {
1348 struct percpu_memcpy_buffer_node item;
1349 struct percpu_memcpy_buffer *cpubuffer;
1350
1351 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1352 continue;
1353
1354 cpubuffer = rseq_percpu_ptr(buffer, i);
1355 while (__percpu_memcpy_buffer_pop(buffer, &item, i)) {
1356 sum += item.data1;
1357 sum += item.data2;
1358 }
1359 free(cpubuffer->array);
1360 }
1361
1362 /*
1363 * All entries should now be accounted for (unless some external
1364 * actor is interfering with our allowed affinity while this
1365 * test is running).
1366 */
1367 assert(sum == expected_sum);
1368 rseq_mempool_percpu_free(buffer);
1369 ret = rseq_mempool_destroy(mempool);
1370 if (ret) {
1371 perror("rseq_mempool_destroy");
1372 abort();
1373 }
1374 }
1375
1376 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
1377 {
1378 signals_delivered++;
1379 }
1380
1381 static int set_signal_handler(void)
1382 {
1383 int ret = 0;
1384 struct sigaction sa;
1385 sigset_t sigset;
1386
1387 ret = sigemptyset(&sigset);
1388 if (ret < 0) {
1389 perror("sigemptyset");
1390 return ret;
1391 }
1392
1393 sa.sa_handler = test_signal_interrupt_handler;
1394 sa.sa_mask = sigset;
1395 sa.sa_flags = 0;
1396 ret = sigaction(SIGUSR1, &sa, NULL);
1397 if (ret < 0) {
1398 perror("sigaction");
1399 return ret;
1400 }
1401
1402 printf_verbose("Signal handler set for SIGUSR1\n");
1403
1404 return ret;
1405 }
1406
1407 static
1408 bool membarrier_private_expedited_rseq_available(void)
1409 {
1410 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1411
1412 if (status < 0) {
1413 perror("membarrier");
1414 return false;
1415 }
1416 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1417 return false;
1418 return true;
1419 }
1420
1421 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1422 #ifdef TEST_MEMBARRIER
1423 struct test_membarrier_thread_args {
1424 struct rseq_mempool *mempool;
1425 struct percpu_list __rseq_percpu *percpu_list_ptr;
1426 int stop;
1427 int max_nr_cpus;
1428 };
1429
1430 /* Worker threads modify data in their "active" percpu lists. */
1431 static
1432 void *test_membarrier_worker_thread(void *arg)
1433 {
1434 struct test_membarrier_thread_args *args =
1435 (struct test_membarrier_thread_args *)arg;
1436 const long long iters = opt_reps;
1437 long long i;
1438
1439 if (rseq_register_current_thread()) {
1440 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1441 errno, strerror(errno));
1442 abort();
1443 }
1444
1445 /* Wait for initialization. */
1446 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1447
1448 for (i = 0; i < iters; ++i) {
1449 int ret;
1450
1451 do {
1452 int cpu = get_current_cpu_id();
1453
1454 ret = rseq_load_add_load_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1455 (intptr_t *) &args->percpu_list_ptr,
1456 (RSEQ_MEMPOOL_STRIDE * cpu) + offsetof(struct percpu_list, head),
1457 1, cpu);
1458 } while (rseq_unlikely(ret));
1459 }
1460
1461 if (rseq_unregister_current_thread()) {
1462 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1463 errno, strerror(errno));
1464 abort();
1465 }
1466 return NULL;
1467 }
1468
1469 static
1470 struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_mempool *mempool)
1471 {
1472 struct percpu_list __rseq_percpu *list;
1473 int i, max_nr_cpus;
1474
1475 max_nr_cpus = rseq_mempool_get_max_nr_cpus(mempool);
1476 list = (struct percpu_list __rseq_percpu *)rseq_mempool_percpu_zmalloc(mempool);
1477 if (!list) {
1478 perror("rseq_mempool_percpu_zmalloc");
1479 return NULL;
1480 }
1481 for (i = 0; i < max_nr_cpus; i++) {
1482 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
1483 struct percpu_list_node *node;
1484
1485 node = (struct percpu_list_node *) malloc(sizeof(*node));
1486 assert(node);
1487 node->data = 0;
1488 node->next = NULL;
1489 cpulist->head = node;
1490 }
1491 return list;
1492 }
1493
1494 static
1495 void test_membarrier_free_percpu_list(struct test_membarrier_thread_args *args,
1496 struct percpu_list __rseq_percpu *list)
1497 {
1498 int i;
1499
1500 for (i = 0; i < args->max_nr_cpus; i++)
1501 free(rseq_percpu_ptr(list, i)->head);
1502 rseq_mempool_percpu_free(list);
1503 }
1504
1505 static
1506 long long test_membarrier_count_percpu_list(struct test_membarrier_thread_args *args,
1507 struct percpu_list __rseq_percpu *list)
1508 {
1509 long long total_count = 0;
1510 int i;
1511
1512 for (i = 0; i < args->max_nr_cpus; i++)
1513 total_count += rseq_percpu_ptr(list, i)->head->data;
1514 return total_count;
1515 }
1516
1517 /*
1518 * The manager thread swaps per-cpu lists that worker threads see,
1519 * and validates that there are no unexpected modifications.
1520 */
1521 static
1522 void *test_membarrier_manager_thread(void *arg)
1523 {
1524 struct test_membarrier_thread_args *args =
1525 (struct test_membarrier_thread_args *)arg;
1526 struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
1527 intptr_t expect_a = 0, expect_b = 0;
1528 int cpu_a = 0, cpu_b = 0;
1529 struct rseq_mempool *mempool;
1530 int ret;
1531 long long total_count = 0;
1532 struct rseq_mempool_attr *attr;
1533
1534 attr = rseq_mempool_attr_create();
1535 if (!attr) {
1536 perror("rseq_mempool_attr_create");
1537 abort();
1538 }
1539 ret = rseq_mempool_attr_set_percpu(attr, RSEQ_MEMPOOL_STRIDE, 0);
1540 if (ret) {
1541 perror("rseq_mempool_attr_set_percpu");
1542 abort();
1543 }
1544 mempool = rseq_mempool_create("percpu_list",
1545 sizeof(struct percpu_list), attr);
1546 if (!mempool) {
1547 perror("rseq_mempool_create");
1548 abort();
1549 }
1550 rseq_mempool_attr_destroy(attr);
1551 args->max_nr_cpus = rseq_mempool_get_max_nr_cpus(mempool);
1552 args->mempool = mempool;
1553
1554 if (rseq_register_current_thread()) {
1555 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1556 errno, strerror(errno));
1557 abort();
1558 }
1559
1560 /* Init lists. */
1561 list_a = test_membarrier_alloc_percpu_list(mempool);
1562 assert(list_a);
1563 list_b = test_membarrier_alloc_percpu_list(mempool);
1564 assert(list_b);
1565
1566 /* Initialize lists before publishing them. */
1567 rseq_smp_wmb();
1568
1569 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1570
1571 while (!RSEQ_READ_ONCE(args->stop)) {
1572 /* list_a is "active". */
1573 cpu_a = rand() % args->max_nr_cpus;
1574 /*
1575 * As list_b is "inactive", we should never see changes
1576 * to list_b.
1577 */
1578 if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
1579 fprintf(stderr, "Membarrier test failed\n");
1580 abort();
1581 }
1582
1583 /* Make list_b "active". */
1584 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
1585 if (rseq_membarrier_expedited(cpu_a) &&
1586 errno != ENXIO /* missing CPU */) {
1587 perror("sys_membarrier");
1588 abort();
1589 }
1590 /*
1591 * Cpu A should now only modify list_b, so the values
1592 * in list_a should be stable.
1593 */
1594 expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
1595
1596 cpu_b = rand() % args->max_nr_cpus;
1597 /*
1598 * As list_a is "inactive", we should never see changes
1599 * to list_a.
1600 */
1601 if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
1602 fprintf(stderr, "Membarrier test failed\n");
1603 abort();
1604 }
1605
1606 /* Make list_a "active". */
1607 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1608 if (rseq_membarrier_expedited(cpu_b) &&
1609 errno != ENXIO /* missing CPU */) {
1610 perror("sys_membarrier");
1611 abort();
1612 }
1613 /* Remember a value from list_b. */
1614 expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
1615 }
1616
1617 total_count += test_membarrier_count_percpu_list(args, list_a);
1618 total_count += test_membarrier_count_percpu_list(args, list_b);
1619
1620 /* Validate that we observe the right number of increments. */
1621 if (total_count != opt_threads * opt_reps) {
1622 fprintf(stderr, "Error: Observed %lld increments, expected %lld\n",
1623 total_count, opt_threads * opt_reps);
1624 abort();
1625 }
1626 test_membarrier_free_percpu_list(args, list_a);
1627 test_membarrier_free_percpu_list(args, list_b);
1628
1629 if (rseq_unregister_current_thread()) {
1630 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1631 errno, strerror(errno));
1632 abort();
1633 }
1634 ret = rseq_mempool_destroy(mempool);
1635 if (ret) {
1636 perror("rseq_mempool_destroy");
1637 abort();
1638 }
1639
1640 return NULL;
1641 }
1642
1643 static
1644 void test_membarrier(void)
1645 {
1646 const int num_threads = opt_threads;
1647 struct test_membarrier_thread_args thread_args;
1648 pthread_t worker_threads[num_threads];
1649 pthread_t manager_thread;
1650 int i, ret;
1651
1652 if (!membarrier_private_expedited_rseq_available()) {
1653 fprintf(stderr, "Membarrier private expedited rseq not available. "
1654 "Skipping membarrier test.\n");
1655 return;
1656 }
1657 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1658 perror("sys_membarrier");
1659 abort();
1660 }
1661
1662 thread_args.percpu_list_ptr = NULL;
1663 thread_args.stop = 0;
1664 ret = pthread_create(&manager_thread, NULL,
1665 test_membarrier_manager_thread, &thread_args);
1666 if (ret) {
1667 errno = ret;
1668 perror("pthread_create");
1669 abort();
1670 }
1671
1672 for (i = 0; i < num_threads; i++) {
1673 ret = pthread_create(&worker_threads[i], NULL,
1674 test_membarrier_worker_thread, &thread_args);
1675 if (ret) {
1676 errno = ret;
1677 perror("pthread_create");
1678 abort();
1679 }
1680 }
1681
1682
1683 for (i = 0; i < num_threads; i++) {
1684 ret = pthread_join(worker_threads[i], NULL);
1685 if (ret) {
1686 errno = ret;
1687 perror("pthread_join");
1688 abort();
1689 }
1690 }
1691
1692 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1693 ret = pthread_join(manager_thread, NULL);
1694 if (ret) {
1695 errno = ret;
1696 perror("pthread_join");
1697 abort();
1698 }
1699 }
1700 #else /* TEST_MEMBARRIER */
1701 static
1702 void test_membarrier(void)
1703 {
1704 if (!membarrier_private_expedited_rseq_available()) {
1705 fprintf(stderr, "Membarrier private expedited rseq not available. "
1706 "Skipping membarrier test.\n");
1707 return;
1708 }
1709 fprintf(stderr, "rseq_load_add_load_load_add_store__ptr is not implemented on this architecture. "
1710 "Skipping membarrier test.\n");
1711 }
1712 #endif
1713
1714 static void show_usage(char **argv)
1715 {
1716 printf("Usage : %s <OPTIONS>\n",
1717 argv[0]);
1718 printf("OPTIONS:\n");
1719 printf(" [-1 loops] Number of loops for delay injection 1\n");
1720 printf(" [-2 loops] Number of loops for delay injection 2\n");
1721 printf(" [-3 loops] Number of loops for delay injection 3\n");
1722 printf(" [-4 loops] Number of loops for delay injection 4\n");
1723 printf(" [-5 loops] Number of loops for delay injection 5\n");
1724 printf(" [-6 loops] Number of loops for delay injection 6\n");
1725 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1726 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1727 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1728 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1729 printf(" [-y] Yield\n");
1730 printf(" [-k] Kill thread with signal\n");
1731 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1732 printf(" [-t N] Number of threads (default 200)\n");
1733 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1734 printf(" [-d] Disable rseq system call (no initialization)\n");
1735 printf(" [-D M] Disable rseq for each M threads\n");
1736 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1737 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1738 printf(" [-c] Check if the rseq syscall is available.\n");
1739 printf(" [-v] Verbose output.\n");
1740 printf(" [-h] Show this help.\n");
1741 printf("\n");
1742 }
1743
1744 int main(int argc, char **argv)
1745 {
1746 int i;
1747
1748 for (i = 1; i < argc; i++) {
1749 if (argv[i][0] != '-')
1750 continue;
1751 switch (argv[i][1]) {
1752 case '1':
1753 case '2':
1754 case '3':
1755 case '4':
1756 case '5':
1757 case '6':
1758 case '7':
1759 case '8':
1760 case '9':
1761 if (argc < i + 2) {
1762 show_usage(argv);
1763 goto error;
1764 }
1765 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1766 i++;
1767 break;
1768 case 'm':
1769 if (argc < i + 2) {
1770 show_usage(argv);
1771 goto error;
1772 }
1773 opt_modulo = atol(argv[i + 1]);
1774 if (opt_modulo < 0) {
1775 show_usage(argv);
1776 goto error;
1777 }
1778 i++;
1779 break;
1780 case 's':
1781 if (argc < i + 2) {
1782 show_usage(argv);
1783 goto error;
1784 }
1785 opt_sleep = atol(argv[i + 1]);
1786 if (opt_sleep < 0) {
1787 show_usage(argv);
1788 goto error;
1789 }
1790 i++;
1791 break;
1792 case 'y':
1793 opt_yield = 1;
1794 break;
1795 case 'k':
1796 opt_signal = 1;
1797 break;
1798 case 'd':
1799 opt_disable_rseq = 1;
1800 break;
1801 case 'D':
1802 if (argc < i + 2) {
1803 show_usage(argv);
1804 goto error;
1805 }
1806 opt_disable_mod = atol(argv[i + 1]);
1807 if (opt_disable_mod < 0) {
1808 show_usage(argv);
1809 goto error;
1810 }
1811 i++;
1812 break;
1813 case 't':
1814 if (argc < i + 2) {
1815 show_usage(argv);
1816 goto error;
1817 }
1818 opt_threads = atol(argv[i + 1]);
1819 if (opt_threads < 0) {
1820 show_usage(argv);
1821 goto error;
1822 }
1823 i++;
1824 break;
1825 case 'r':
1826 if (argc < i + 2) {
1827 show_usage(argv);
1828 goto error;
1829 }
1830 opt_reps = atoll(argv[i + 1]);
1831 if (opt_reps < 0) {
1832 show_usage(argv);
1833 goto error;
1834 }
1835 i++;
1836 break;
1837 case 'h':
1838 show_usage(argv);
1839 goto end;
1840 case 'T':
1841 if (argc < i + 2) {
1842 show_usage(argv);
1843 goto error;
1844 }
1845 opt_test = *argv[i + 1];
1846 switch (opt_test) {
1847 case 's':
1848 case 'l':
1849 case 'i':
1850 case 'b':
1851 case 'm':
1852 case 'r':
1853 break;
1854 default:
1855 show_usage(argv);
1856 goto error;
1857 }
1858 i++;
1859 break;
1860 case 'v':
1861 verbose = 1;
1862 break;
1863 case 'M':
1864 opt_mo = RSEQ_MO_RELEASE;
1865 break;
1866 case 'c':
1867 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
1868 printf_verbose("The rseq syscall is available.\n");
1869 goto end;
1870 } else {
1871 printf_verbose("The rseq syscall is unavailable.\n");
1872 goto no_rseq;
1873 }
1874 default:
1875 show_usage(argv);
1876 goto error;
1877 }
1878 }
1879
1880 loop_cnt_1 = loop_cnt[1];
1881 loop_cnt_2 = loop_cnt[2];
1882 loop_cnt_3 = loop_cnt[3];
1883 loop_cnt_4 = loop_cnt[4];
1884 loop_cnt_5 = loop_cnt[5];
1885 loop_cnt_6 = loop_cnt[6];
1886
1887 if (set_signal_handler())
1888 goto error;
1889
1890 if (!opt_disable_rseq && rseq_register_current_thread())
1891 goto error;
1892 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1893 printf_verbose("The rseq cpu id getter is unavailable\n");
1894 goto no_rseq;
1895 }
1896 switch (opt_test) {
1897 case 's':
1898 printf_verbose("spinlock\n");
1899 test_percpu_spinlock();
1900 break;
1901 case 'l':
1902 printf_verbose("linked list\n");
1903 test_percpu_list();
1904 break;
1905 case 'b':
1906 printf_verbose("buffer\n");
1907 test_percpu_buffer();
1908 break;
1909 case 'm':
1910 printf_verbose("memcpy buffer\n");
1911 test_percpu_memcpy_buffer();
1912 break;
1913 case 'i':
1914 printf_verbose("counter increment\n");
1915 test_percpu_inc();
1916 break;
1917 case 'r':
1918 printf_verbose("membarrier\n");
1919 test_membarrier();
1920 break;
1921 }
1922 if (!opt_disable_rseq && rseq_unregister_current_thread())
1923 abort();
1924 end:
1925 return 0;
1926
1927 error:
1928 return -1;
1929
1930 no_rseq:
1931 return 2;
1932 }
This page took 0.104325 seconds and 4 git commands to generate.