percpu alloc: introduce rseq mmap attributes
[librseq.git] / tests / param_test.c
CommitLineData
90702366 1// SPDX-License-Identifier: MIT
f2d7b530 2// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
31b44ba2
MD
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6#include <assert.h>
cb900b45 7#include <linux/version.h>
5368dcb4 8#include <linux/membarrier.h>
31b44ba2
MD
9#include <pthread.h>
10#include <sched.h>
11#include <stdint.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <syscall.h>
16#include <unistd.h>
17#include <poll.h>
18#include <sys/types.h>
19#include <signal.h>
20#include <errno.h>
21#include <stddef.h>
369688a5 22#include <stdbool.h>
324633af
MD
23#include <rseq/percpu-alloc.h>
24
25#define PERCPU_POOL_LEN (1024*1024) /* 1MB */
31b44ba2 26
cb900b45
MD
27#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
28enum {
29 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
30 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
31};
32
33enum {
34 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
35};
36#endif
37
31b44ba2
MD
38#define NR_INJECT 9
39static int loop_cnt[NR_INJECT + 1];
40
41static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
42static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
43static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
44static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
45static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
46static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
47
48static int opt_modulo, verbose;
49
50static int opt_yield, opt_signal, opt_sleep,
51 opt_disable_rseq, opt_threads = 200,
369688a5 52 opt_disable_mod = 0, opt_test = 's';
31b44ba2 53
31b44ba2 54static long long opt_reps = 5000;
31b44ba2
MD
55
56static __thread __attribute__((tls_model("initial-exec")))
57unsigned int signals_delivered;
58
c6e1dc81
MD
59static inline pid_t rseq_gettid(void)
60{
61 return syscall(__NR_gettid);
62}
63
3726b9f1
MD
64#ifndef BENCHMARK
65
31b44ba2
MD
66static __thread __attribute__((tls_model("initial-exec"), unused))
67int yield_mod_cnt, nr_abort;
68
69#define printf_verbose(fmt, ...) \
70 do { \
71 if (verbose) \
72 printf(fmt, ## __VA_ARGS__); \
73 } while (0)
74
75#ifdef __i386__
76
77#define INJECT_ASM_REG "eax"
78
79#define RSEQ_INJECT_CLOBBER \
80 , INJECT_ASM_REG
81
9b6b5311 82/*
b658cf73 83 * Use ip-relative addressing to get the loop counter.
9b6b5311 84 */
b658cf73
MD
85#define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
86 "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
87 "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
88 "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
31b44ba2
MD
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "jz 333f\n\t" \
91 "222:\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
93 "jnz 222b\n\t" \
94 "333:\n\t"
95
b658cf73
MD
96#define RSEQ_INJECT_ASM(n) \
97 __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
98
31b44ba2
MD
99#elif defined(__x86_64__)
100
101#define INJECT_ASM_REG_P "rax"
102#define INJECT_ASM_REG "eax"
103
104#define RSEQ_INJECT_CLOBBER \
105 , INJECT_ASM_REG_P \
106 , INJECT_ASM_REG
107
108#define RSEQ_INJECT_ASM(n) \
109 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
110 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
111 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
112 "jz 333f\n\t" \
113 "222:\n\t" \
114 "dec %%" INJECT_ASM_REG "\n\t" \
115 "jnz 222b\n\t" \
116 "333:\n\t"
117
118#elif defined(__s390__)
119
120#define RSEQ_INJECT_INPUT \
121 , [loop_cnt_1]"m"(loop_cnt[1]) \
122 , [loop_cnt_2]"m"(loop_cnt[2]) \
123 , [loop_cnt_3]"m"(loop_cnt[3]) \
124 , [loop_cnt_4]"m"(loop_cnt[4]) \
125 , [loop_cnt_5]"m"(loop_cnt[5]) \
126 , [loop_cnt_6]"m"(loop_cnt[6])
127
128#define INJECT_ASM_REG "r12"
129
130#define RSEQ_INJECT_CLOBBER \
131 , INJECT_ASM_REG
132
133#define RSEQ_INJECT_ASM(n) \
134 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
135 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
136 "je 333f\n\t" \
137 "222:\n\t" \
138 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
139 "jnz 222b\n\t" \
140 "333:\n\t"
141
142#elif defined(__ARMEL__)
143
144#define RSEQ_INJECT_INPUT \
145 , [loop_cnt_1]"m"(loop_cnt[1]) \
146 , [loop_cnt_2]"m"(loop_cnt[2]) \
147 , [loop_cnt_3]"m"(loop_cnt[3]) \
148 , [loop_cnt_4]"m"(loop_cnt[4]) \
149 , [loop_cnt_5]"m"(loop_cnt[5]) \
150 , [loop_cnt_6]"m"(loop_cnt[6])
151
152#define INJECT_ASM_REG "r4"
153
154#define RSEQ_INJECT_CLOBBER \
155 , INJECT_ASM_REG
156
157#define RSEQ_INJECT_ASM(n) \
158 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
159 "cmp " INJECT_ASM_REG ", #0\n\t" \
160 "beq 333f\n\t" \
161 "222:\n\t" \
162 "subs " INJECT_ASM_REG ", #1\n\t" \
163 "bne 222b\n\t" \
164 "333:\n\t"
165
166#elif defined(__AARCH64EL__)
167
168#define RSEQ_INJECT_INPUT \
169 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
170 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
171 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
172 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
173 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
174 , [loop_cnt_6] "Qo" (loop_cnt[6])
175
176#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
177
178#define RSEQ_INJECT_ASM(n) \
179 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
180 " cbz " INJECT_ASM_REG ", 333f\n" \
181 "222:\n" \
182 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
183 " cbnz " INJECT_ASM_REG ", 222b\n" \
184 "333:\n"
185
f1c6b55b 186#elif defined(__PPC__)
31b44ba2
MD
187
188#define RSEQ_INJECT_INPUT \
189 , [loop_cnt_1]"m"(loop_cnt[1]) \
190 , [loop_cnt_2]"m"(loop_cnt[2]) \
191 , [loop_cnt_3]"m"(loop_cnt[3]) \
192 , [loop_cnt_4]"m"(loop_cnt[4]) \
193 , [loop_cnt_5]"m"(loop_cnt[5]) \
194 , [loop_cnt_6]"m"(loop_cnt[6])
195
196#define INJECT_ASM_REG "r18"
197
198#define RSEQ_INJECT_CLOBBER \
199 , INJECT_ASM_REG
200
201#define RSEQ_INJECT_ASM(n) \
202 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
203 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
204 "beq 333f\n\t" \
205 "222:\n\t" \
206 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
207 "bne 222b\n\t" \
208 "333:\n\t"
209
210#elif defined(__mips__)
211
212#define RSEQ_INJECT_INPUT \
213 , [loop_cnt_1]"m"(loop_cnt[1]) \
214 , [loop_cnt_2]"m"(loop_cnt[2]) \
215 , [loop_cnt_3]"m"(loop_cnt[3]) \
216 , [loop_cnt_4]"m"(loop_cnt[4]) \
217 , [loop_cnt_5]"m"(loop_cnt[5]) \
218 , [loop_cnt_6]"m"(loop_cnt[6])
219
220#define INJECT_ASM_REG "$5"
221
222#define RSEQ_INJECT_CLOBBER \
223 , INJECT_ASM_REG
224
225#define RSEQ_INJECT_ASM(n) \
226 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
227 "beqz " INJECT_ASM_REG ", 333f\n\t" \
228 "222:\n\t" \
229 "addiu " INJECT_ASM_REG ", -1\n\t" \
230 "bnez " INJECT_ASM_REG ", 222b\n\t" \
231 "333:\n\t"
232
074b1077
MJ
233#elif defined(__riscv)
234
235#define RSEQ_INJECT_INPUT \
236 , [loop_cnt_1]"m"(loop_cnt[1]) \
237 , [loop_cnt_2]"m"(loop_cnt[2]) \
238 , [loop_cnt_3]"m"(loop_cnt[3]) \
239 , [loop_cnt_4]"m"(loop_cnt[4]) \
240 , [loop_cnt_5]"m"(loop_cnt[5]) \
241 , [loop_cnt_6]"m"(loop_cnt[6])
242
243#define INJECT_ASM_REG "t1"
244
245#define RSEQ_INJECT_CLOBBER \
246 , INJECT_ASM_REG
247
248#define RSEQ_INJECT_ASM(n) \
249 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
250 "beqz " INJECT_ASM_REG ", 333f\n\t" \
251 "222:\n\t" \
252 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
253 "bnez " INJECT_ASM_REG ", 222b\n\t" \
254 "333:\n\t"
255
31b44ba2
MD
256#else
257#error unsupported target
258#endif
259
260#define RSEQ_INJECT_FAILED \
261 nr_abort++;
262
263#define RSEQ_INJECT_C(n) \
264{ \
265 int loc_i, loc_nr_loops = loop_cnt[n]; \
266 \
267 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
268 rseq_barrier(); \
269 } \
270 if (loc_nr_loops == -1 && opt_modulo) { \
271 if (yield_mod_cnt == opt_modulo - 1) { \
272 if (opt_sleep > 0) \
273 poll(NULL, 0, opt_sleep); \
274 if (opt_yield) \
275 sched_yield(); \
276 if (opt_signal) \
277 raise(SIGUSR1); \
278 yield_mod_cnt = 0; \
279 } else { \
280 yield_mod_cnt++; \
281 } \
282 } \
283}
284
285#else
286
287#define printf_verbose(fmt, ...)
288
289#endif /* BENCHMARK */
290
291#include <rseq/rseq.h>
292
369688a5
MD
293static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
294
295static int sys_membarrier(int cmd, int flags, int cpu_id)
296{
297 return syscall(__NR_membarrier, cmd, flags, cpu_id);
298}
299
f6ddbd31 300#ifdef rseq_arch_has_load_add_load_load_add_store
369688a5
MD
301#define TEST_MEMBARRIER
302#endif
303
304#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
305# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
306static
307int get_current_cpu_id(void)
308{
309 return rseq_current_mm_cid();
310}
311static
312bool rseq_validate_cpu_id(void)
313{
314 return rseq_mm_cid_available();
315}
40797ae3
MD
316static
317bool rseq_use_cpu_index(void)
318{
319 return false; /* Use mm_cid */
320}
369688a5
MD
321# ifdef TEST_MEMBARRIER
322/*
323 * Membarrier does not currently support targeting a mm_cid, so
324 * issue the barrier on all cpus.
325 */
326static
327int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
328{
329 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
330 0, 0);
331}
332# endif /* TEST_MEMBARRIER */
333#else
334# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
335static
336int get_current_cpu_id(void)
337{
338 return rseq_cpu_start();
339}
340static
341bool rseq_validate_cpu_id(void)
342{
343 return rseq_current_cpu_raw() >= 0;
344}
40797ae3
MD
345static
346bool rseq_use_cpu_index(void)
347{
348 return true; /* Use cpu_id as index. */
349}
369688a5
MD
350# ifdef TEST_MEMBARRIER
351static
352int rseq_membarrier_expedited(int cpu)
353{
354 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
355 MEMBARRIER_CMD_FLAG_CPU, cpu);
356}
357# endif /* TEST_MEMBARRIER */
358#endif
359
31b44ba2 360struct percpu_lock {
324633af 361 intptr_t v;
31b44ba2
MD
362};
363
31b44ba2
MD
364struct spinlock_test_data {
365 struct percpu_lock lock;
324633af 366 intptr_t count;
31b44ba2
MD
367};
368
369struct spinlock_thread_test_data {
fe7f954a 370 struct spinlock_test_data __rseq_percpu *data;
31b44ba2
MD
371 long long reps;
372 int reg;
373};
374
375struct inc_test_data {
c8278da8 376 intptr_t count;
31b44ba2
MD
377};
378
379struct inc_thread_test_data {
c8278da8 380 struct inc_test_data __rseq_percpu *data;
31b44ba2
MD
381 long long reps;
382 int reg;
383};
384
385struct percpu_list_node {
386 intptr_t data;
387 struct percpu_list_node *next;
388};
389
31b44ba2 390struct percpu_list {
b08be829 391 struct percpu_list_node *head;
31b44ba2
MD
392};
393
394#define BUFFER_ITEM_PER_CPU 100
395
396struct percpu_buffer_node {
397 intptr_t data;
398};
399
bac8cd24 400struct percpu_buffer {
31b44ba2
MD
401 intptr_t offset;
402 intptr_t buflen;
403 struct percpu_buffer_node **array;
31b44ba2
MD
404};
405
406#define MEMCPY_BUFFER_ITEM_PER_CPU 100
407
408struct percpu_memcpy_buffer_node {
409 intptr_t data1;
410 uint64_t data2;
411};
412
78adbd91 413struct percpu_memcpy_buffer {
31b44ba2
MD
414 intptr_t offset;
415 intptr_t buflen;
416 struct percpu_memcpy_buffer_node *array;
31b44ba2
MD
417};
418
419/* A simple percpu spinlock. Grabs lock on current cpu. */
fe7f954a 420static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
31b44ba2
MD
421{
422 int cpu;
423
424 for (;;) {
425 int ret;
426
369688a5 427 cpu = get_current_cpu_id();
3726b9f1
MD
428 if (cpu < 0) {
429 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
430 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
431 abort();
432 }
41149e28 433 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
324633af 434 &rseq_percpu_ptr(lock, cpu)->v,
31b44ba2
MD
435 0, 1, cpu);
436 if (rseq_likely(!ret))
437 break;
438 /* Retry if comparison fails or rseq aborts. */
439 }
440 /*
441 * Acquire semantic when taking lock after control dependency.
442 * Matches rseq_smp_store_release().
443 */
444 rseq_smp_acquire__after_ctrl_dep();
445 return cpu;
446}
447
fe7f954a 448static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
31b44ba2 449{
324633af 450 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
31b44ba2
MD
451 /*
452 * Release lock, with release semantic. Matches
453 * rseq_smp_acquire__after_ctrl_dep().
454 */
324633af 455 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
31b44ba2
MD
456}
457
6e284b80 458static void *test_percpu_spinlock_thread(void *arg)
31b44ba2 459{
d268885a 460 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
fe7f954a 461 struct spinlock_test_data __rseq_percpu *data = thread_data->data;
31b44ba2
MD
462 long long i, reps;
463
464 if (!opt_disable_rseq && thread_data->reg &&
465 rseq_register_current_thread())
466 abort();
467 reps = thread_data->reps;
468 for (i = 0; i < reps; i++) {
af895f04 469 int cpu = rseq_this_cpu_lock(&data->lock);
324633af 470 rseq_percpu_ptr(data, cpu)->count++;
31b44ba2
MD
471 rseq_percpu_unlock(&data->lock, cpu);
472#ifndef BENCHMARK
473 if (i != 0 && !(i % (reps / 10)))
474 printf_verbose("tid %d: count %lld\n",
475 (int) rseq_gettid(), i);
476#endif
477 }
478 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
479 (int) rseq_gettid(), nr_abort, signals_delivered);
480 if (!opt_disable_rseq && thread_data->reg &&
481 rseq_unregister_current_thread())
482 abort();
483 return NULL;
484}
485
486/*
487 * A simple test which implements a sharded counter using a per-cpu
488 * lock. Obviously real applications might prefer to simply use a
489 * per-cpu increment; however, this is reasonable for a test and the
490 * lock can be extended to synchronize more complicated operations.
491 */
6e284b80 492static void test_percpu_spinlock(void)
31b44ba2
MD
493{
494 const int num_threads = opt_threads;
495 int i, ret;
496 uint64_t sum;
497 pthread_t test_threads[num_threads];
fe7f954a 498 struct spinlock_test_data __rseq_percpu *data;
31b44ba2 499 struct spinlock_thread_test_data thread_data[num_threads];
324633af
MD
500 struct rseq_percpu_pool *mempool;
501
502 mempool = rseq_percpu_pool_create(sizeof(struct spinlock_test_data),
9bd07c29 503 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
324633af
MD
504 if (!mempool) {
505 perror("rseq_percpu_pool_create");
506 abort();
507 }
fe7f954a 508 data = (struct spinlock_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
324633af
MD
509 if (!data) {
510 perror("rseq_percpu_zmalloc");
511 abort();
512 }
31b44ba2 513
31b44ba2
MD
514 for (i = 0; i < num_threads; i++) {
515 thread_data[i].reps = opt_reps;
516 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
517 thread_data[i].reg = 1;
518 else
519 thread_data[i].reg = 0;
324633af 520 thread_data[i].data = data;
31b44ba2
MD
521 ret = pthread_create(&test_threads[i], NULL,
522 test_percpu_spinlock_thread,
523 &thread_data[i]);
524 if (ret) {
525 errno = ret;
526 perror("pthread_create");
527 abort();
528 }
529 }
530
531 for (i = 0; i < num_threads; i++) {
532 ret = pthread_join(test_threads[i], NULL);
533 if (ret) {
534 errno = ret;
535 perror("pthread_join");
536 abort();
537 }
538 }
539
540 sum = 0;
541 for (i = 0; i < CPU_SETSIZE; i++)
324633af 542 sum += rseq_percpu_ptr(data, i)->count;
31b44ba2
MD
543
544 assert(sum == (uint64_t)opt_reps * num_threads);
324633af
MD
545 rseq_percpu_free(data);
546 ret = rseq_percpu_pool_destroy(mempool);
547 if (ret) {
548 perror("rseq_percpu_pool_destroy");
549 abort();
550 }
31b44ba2
MD
551}
552
6e284b80 553static void *test_percpu_inc_thread(void *arg)
31b44ba2 554{
d268885a 555 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
c8278da8 556 struct inc_test_data __rseq_percpu *data = thread_data->data;
31b44ba2
MD
557 long long i, reps;
558
559 if (!opt_disable_rseq && thread_data->reg &&
560 rseq_register_current_thread())
561 abort();
562 reps = thread_data->reps;
563 for (i = 0; i < reps; i++) {
564 int ret;
565
566 do {
567 int cpu;
568
369688a5 569 cpu = get_current_cpu_id();
41149e28 570 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
c8278da8 571 &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
31b44ba2
MD
572 } while (rseq_unlikely(ret));
573#ifndef BENCHMARK
574 if (i != 0 && !(i % (reps / 10)))
575 printf_verbose("tid %d: count %lld\n",
576 (int) rseq_gettid(), i);
577#endif
578 }
579 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
580 (int) rseq_gettid(), nr_abort, signals_delivered);
581 if (!opt_disable_rseq && thread_data->reg &&
582 rseq_unregister_current_thread())
583 abort();
584 return NULL;
585}
586
6e284b80 587static void test_percpu_inc(void)
31b44ba2
MD
588{
589 const int num_threads = opt_threads;
590 int i, ret;
591 uint64_t sum;
592 pthread_t test_threads[num_threads];
c8278da8 593 struct inc_test_data __rseq_percpu *data;
31b44ba2 594 struct inc_thread_test_data thread_data[num_threads];
c8278da8
MD
595 struct rseq_percpu_pool *mempool;
596
597 mempool = rseq_percpu_pool_create(sizeof(struct inc_test_data),
9bd07c29 598 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
c8278da8
MD
599 if (!mempool) {
600 perror("rseq_percpu_pool_create");
601 abort();
602 }
603 data = (struct inc_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
604 if (!data) {
605 perror("rseq_percpu_zmalloc");
606 abort();
607 }
31b44ba2 608
31b44ba2
MD
609 for (i = 0; i < num_threads; i++) {
610 thread_data[i].reps = opt_reps;
611 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
612 thread_data[i].reg = 1;
613 else
614 thread_data[i].reg = 0;
c8278da8 615 thread_data[i].data = data;
31b44ba2
MD
616 ret = pthread_create(&test_threads[i], NULL,
617 test_percpu_inc_thread,
618 &thread_data[i]);
619 if (ret) {
620 errno = ret;
621 perror("pthread_create");
622 abort();
623 }
624 }
625
626 for (i = 0; i < num_threads; i++) {
627 ret = pthread_join(test_threads[i], NULL);
628 if (ret) {
629 errno = ret;
630 perror("pthread_join");
631 abort();
632 }
633 }
634
635 sum = 0;
636 for (i = 0; i < CPU_SETSIZE; i++)
c8278da8 637 sum += rseq_percpu_ptr(data, i)->count;
31b44ba2
MD
638
639 assert(sum == (uint64_t)opt_reps * num_threads);
c8278da8
MD
640 rseq_percpu_free(data);
641 ret = rseq_percpu_pool_destroy(mempool);
642 if (ret) {
643 perror("rseq_percpu_pool_destroy");
644 abort();
645 }
31b44ba2
MD
646}
647
b08be829 648static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
31b44ba2
MD
649 struct percpu_list_node *node,
650 int *_cpu)
651{
652 int cpu;
653
654 for (;;) {
655 intptr_t *targetptr, newval, expect;
b08be829 656 struct percpu_list *cpulist;
31b44ba2
MD
657 int ret;
658
369688a5 659 cpu = get_current_cpu_id();
b08be829 660 cpulist = rseq_percpu_ptr(list, cpu);
31b44ba2 661 /* Load list->c[cpu].head with single-copy atomicity. */
b08be829 662 expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
31b44ba2 663 newval = (intptr_t)node;
b08be829 664 targetptr = (intptr_t *)&cpulist->head;
31b44ba2 665 node->next = (struct percpu_list_node *)expect;
41149e28 666 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 667 targetptr, expect, newval, cpu);
31b44ba2
MD
668 if (rseq_likely(!ret))
669 break;
670 /* Retry if comparison fails or rseq aborts. */
671 }
672 if (_cpu)
673 *_cpu = cpu;
674}
675
676/*
677 * Unlike a traditional lock-less linked list; the availability of a
678 * rseq primitive allows us to implement pop without concerns over
679 * ABA-type races.
680 */
b08be829 681static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
31b44ba2
MD
682 int *_cpu)
683{
684 struct percpu_list_node *node = NULL;
685 int cpu;
686
687 for (;;) {
688 struct percpu_list_node *head;
689 intptr_t *targetptr, expectnot, *load;
b08be829 690 struct percpu_list *cpulist;
d35eae6b
MD
691 long offset;
692 int ret;
31b44ba2 693
369688a5 694 cpu = get_current_cpu_id();
b08be829
MD
695 cpulist = rseq_percpu_ptr(list, cpu);
696 targetptr = (intptr_t *)&cpulist->head;
31b44ba2
MD
697 expectnot = (intptr_t)NULL;
698 offset = offsetof(struct percpu_list_node, next);
699 load = (intptr_t *)&head;
41149e28 700 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5
MD
701 targetptr, expectnot,
702 offset, load, cpu);
31b44ba2
MD
703 if (rseq_likely(!ret)) {
704 node = head;
705 break;
706 }
707 if (ret > 0)
708 break;
709 /* Retry if rseq aborts. */
710 }
711 if (_cpu)
712 *_cpu = cpu;
713 return node;
714}
715
716/*
717 * __percpu_list_pop is not safe against concurrent accesses. Should
718 * only be used on lists that are not concurrently modified.
719 */
b08be829 720static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
31b44ba2 721{
b08be829 722 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
31b44ba2
MD
723 struct percpu_list_node *node;
724
b08be829 725 node = cpulist->head;
31b44ba2
MD
726 if (!node)
727 return NULL;
b08be829 728 cpulist->head = node->next;
31b44ba2
MD
729 return node;
730}
731
6e284b80 732static void *test_percpu_list_thread(void *arg)
31b44ba2
MD
733{
734 long long i, reps;
b08be829 735 struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
31b44ba2
MD
736
737 if (!opt_disable_rseq && rseq_register_current_thread())
738 abort();
739
740 reps = opt_reps;
741 for (i = 0; i < reps; i++) {
742 struct percpu_list_node *node;
743
744 node = this_cpu_list_pop(list, NULL);
745 if (opt_yield)
746 sched_yield(); /* encourage shuffling */
747 if (node)
748 this_cpu_list_push(list, node, NULL);
749 }
750
751 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
752 (int) rseq_gettid(), nr_abort, signals_delivered);
753 if (!opt_disable_rseq && rseq_unregister_current_thread())
754 abort();
755
756 return NULL;
757}
758
759/* Simultaneous modification to a per-cpu linked list from many threads. */
6e284b80 760static void test_percpu_list(void)
31b44ba2
MD
761{
762 const int num_threads = opt_threads;
763 int i, j, ret;
764 uint64_t sum = 0, expected_sum = 0;
b08be829 765 struct percpu_list __rseq_percpu *list;
31b44ba2
MD
766 pthread_t test_threads[num_threads];
767 cpu_set_t allowed_cpus;
b08be829 768 struct rseq_percpu_pool *mempool;
31b44ba2 769
b08be829 770 mempool = rseq_percpu_pool_create(sizeof(struct percpu_list),
9bd07c29 771 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
b08be829
MD
772 if (!mempool) {
773 perror("rseq_percpu_pool_create");
774 abort();
775 }
776 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
777 if (!list) {
778 perror("rseq_percpu_zmalloc");
779 abort();
780 }
31b44ba2
MD
781
782 /* Generate list entries for every usable cpu. */
783 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
784 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 785 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
786 continue;
787 for (j = 1; j <= 100; j++) {
b08be829 788 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
31b44ba2
MD
789 struct percpu_list_node *node;
790
791 expected_sum += j;
792
d268885a 793 node = (struct percpu_list_node *) malloc(sizeof(*node));
31b44ba2
MD
794 assert(node);
795 node->data = j;
b08be829
MD
796 node->next = cpulist->head;
797 cpulist->head = node;
31b44ba2
MD
798 }
799 }
800
801 for (i = 0; i < num_threads; i++) {
802 ret = pthread_create(&test_threads[i], NULL,
b08be829 803 test_percpu_list_thread, list);
31b44ba2
MD
804 if (ret) {
805 errno = ret;
806 perror("pthread_create");
807 abort();
808 }
809 }
810
811 for (i = 0; i < num_threads; i++) {
812 ret = pthread_join(test_threads[i], NULL);
813 if (ret) {
814 errno = ret;
815 perror("pthread_join");
816 abort();
817 }
818 }
819
820 for (i = 0; i < CPU_SETSIZE; i++) {
821 struct percpu_list_node *node;
822
40797ae3 823 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
824 continue;
825
b08be829 826 while ((node = __percpu_list_pop(list, i))) {
31b44ba2
MD
827 sum += node->data;
828 free(node);
829 }
830 }
831
832 /*
833 * All entries should now be accounted for (unless some external
834 * actor is interfering with our allowed affinity while this
835 * test is running).
836 */
837 assert(sum == expected_sum);
b08be829
MD
838 rseq_percpu_free(list);
839 ret = rseq_percpu_pool_destroy(mempool);
840 if (ret) {
841 perror("rseq_percpu_pool_destroy");
842 abort();
843 }
31b44ba2
MD
844}
845
bac8cd24 846static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer,
31b44ba2
MD
847 struct percpu_buffer_node *node,
848 int *_cpu)
849{
850 bool result = false;
851 int cpu;
852
853 for (;;) {
bac8cd24 854 struct percpu_buffer *cpubuffer;
31b44ba2
MD
855 intptr_t *targetptr_spec, newval_spec;
856 intptr_t *targetptr_final, newval_final;
857 intptr_t offset;
858 int ret;
859
369688a5 860 cpu = get_current_cpu_id();
bac8cd24
MD
861 cpubuffer = rseq_percpu_ptr(buffer, cpu);
862 offset = RSEQ_READ_ONCE(cpubuffer->offset);
863 if (offset == cpubuffer->buflen)
31b44ba2
MD
864 break;
865 newval_spec = (intptr_t)node;
bac8cd24 866 targetptr_spec = (intptr_t *)&cpubuffer->array[offset];
31b44ba2 867 newval_final = offset + 1;
bac8cd24 868 targetptr_final = &cpubuffer->offset;
41149e28 869 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
369688a5
MD
870 targetptr_final, offset, targetptr_spec,
871 newval_spec, newval_final, cpu);
31b44ba2
MD
872 if (rseq_likely(!ret)) {
873 result = true;
874 break;
875 }
876 /* Retry if comparison fails or rseq aborts. */
877 }
878 if (_cpu)
879 *_cpu = cpu;
880 return result;
881}
882
bac8cd24 883static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
31b44ba2
MD
884 int *_cpu)
885{
886 struct percpu_buffer_node *head;
887 int cpu;
888
889 for (;;) {
bac8cd24 890 struct percpu_buffer *cpubuffer;
31b44ba2
MD
891 intptr_t *targetptr, newval;
892 intptr_t offset;
893 int ret;
894
369688a5 895 cpu = get_current_cpu_id();
bac8cd24 896 cpubuffer = rseq_percpu_ptr(buffer, cpu);
31b44ba2 897 /* Load offset with single-copy atomicity. */
bac8cd24 898 offset = RSEQ_READ_ONCE(cpubuffer->offset);
31b44ba2
MD
899 if (offset == 0) {
900 head = NULL;
901 break;
902 }
bac8cd24 903 head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]);
31b44ba2 904 newval = offset - 1;
bac8cd24 905 targetptr = (intptr_t *)&cpubuffer->offset;
41149e28 906 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 907 targetptr, offset,
bac8cd24 908 (intptr_t *)&cpubuffer->array[offset - 1],
31b44ba2
MD
909 (intptr_t)head, newval, cpu);
910 if (rseq_likely(!ret))
911 break;
912 /* Retry if comparison fails or rseq aborts. */
913 }
914 if (_cpu)
915 *_cpu = cpu;
916 return head;
917}
918
919/*
920 * __percpu_buffer_pop is not safe against concurrent accesses. Should
921 * only be used on buffers that are not concurrently modified.
922 */
bac8cd24 923static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
31b44ba2
MD
924 int cpu)
925{
bac8cd24 926 struct percpu_buffer *cpubuffer;
31b44ba2
MD
927 struct percpu_buffer_node *head;
928 intptr_t offset;
929
bac8cd24
MD
930 cpubuffer = rseq_percpu_ptr(buffer, cpu);
931 offset = cpubuffer->offset;
31b44ba2
MD
932 if (offset == 0)
933 return NULL;
bac8cd24
MD
934 head = cpubuffer->array[offset - 1];
935 cpubuffer->offset = offset - 1;
31b44ba2
MD
936 return head;
937}
938
6e284b80 939static void *test_percpu_buffer_thread(void *arg)
31b44ba2
MD
940{
941 long long i, reps;
bac8cd24 942 struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg;
31b44ba2
MD
943
944 if (!opt_disable_rseq && rseq_register_current_thread())
945 abort();
946
947 reps = opt_reps;
948 for (i = 0; i < reps; i++) {
949 struct percpu_buffer_node *node;
950
951 node = this_cpu_buffer_pop(buffer, NULL);
952 if (opt_yield)
953 sched_yield(); /* encourage shuffling */
954 if (node) {
955 if (!this_cpu_buffer_push(buffer, node, NULL)) {
956 /* Should increase buffer size. */
957 abort();
958 }
959 }
960 }
961
962 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
963 (int) rseq_gettid(), nr_abort, signals_delivered);
964 if (!opt_disable_rseq && rseq_unregister_current_thread())
965 abort();
966
967 return NULL;
968}
969
970/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 971static void test_percpu_buffer(void)
31b44ba2
MD
972{
973 const int num_threads = opt_threads;
974 int i, j, ret;
975 uint64_t sum = 0, expected_sum = 0;
bac8cd24 976 struct percpu_buffer __rseq_percpu *buffer;
31b44ba2
MD
977 pthread_t test_threads[num_threads];
978 cpu_set_t allowed_cpus;
bac8cd24 979 struct rseq_percpu_pool *mempool;
31b44ba2 980
bac8cd24 981 mempool = rseq_percpu_pool_create(sizeof(struct percpu_buffer),
9bd07c29 982 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
bac8cd24
MD
983 if (!mempool) {
984 perror("rseq_percpu_pool_create");
985 abort();
986 }
987 buffer = (struct percpu_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
988 if (!buffer) {
989 perror("rseq_percpu_zmalloc");
990 abort();
991 }
31b44ba2
MD
992
993 /* Generate list entries for every usable cpu. */
994 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
995 for (i = 0; i < CPU_SETSIZE; i++) {
bac8cd24
MD
996 struct percpu_buffer *cpubuffer;
997
40797ae3 998 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2 999 continue;
bac8cd24 1000 cpubuffer = rseq_percpu_ptr(buffer, i);
31b44ba2 1001 /* Worse-case is every item in same CPU. */
bac8cd24 1002 cpubuffer->array =
d268885a 1003 (struct percpu_buffer_node **)
bac8cd24 1004 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
31b44ba2 1005 BUFFER_ITEM_PER_CPU);
bac8cd24
MD
1006 assert(cpubuffer->array);
1007 cpubuffer->buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
31b44ba2
MD
1008 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
1009 struct percpu_buffer_node *node;
1010
1011 expected_sum += j;
1012
1013 /*
1014 * We could theoretically put the word-sized
1015 * "data" directly in the buffer. However, we
1016 * want to model objects that would not fit
1017 * within a single word, so allocate an object
1018 * for each node.
1019 */
d268885a 1020 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
31b44ba2
MD
1021 assert(node);
1022 node->data = j;
bac8cd24
MD
1023 cpubuffer->array[j - 1] = node;
1024 cpubuffer->offset++;
31b44ba2
MD
1025 }
1026 }
1027
1028 for (i = 0; i < num_threads; i++) {
1029 ret = pthread_create(&test_threads[i], NULL,
bac8cd24 1030 test_percpu_buffer_thread, buffer);
31b44ba2
MD
1031 if (ret) {
1032 errno = ret;
1033 perror("pthread_create");
1034 abort();
1035 }
1036 }
1037
1038 for (i = 0; i < num_threads; i++) {
1039 ret = pthread_join(test_threads[i], NULL);
1040 if (ret) {
1041 errno = ret;
1042 perror("pthread_join");
1043 abort();
1044 }
1045 }
1046
1047 for (i = 0; i < CPU_SETSIZE; i++) {
bac8cd24 1048 struct percpu_buffer *cpubuffer;
31b44ba2
MD
1049 struct percpu_buffer_node *node;
1050
40797ae3 1051 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1052 continue;
1053
bac8cd24
MD
1054 cpubuffer = rseq_percpu_ptr(buffer, i);
1055 while ((node = __percpu_buffer_pop(buffer, i))) {
31b44ba2
MD
1056 sum += node->data;
1057 free(node);
1058 }
bac8cd24 1059 free(cpubuffer->array);
31b44ba2
MD
1060 }
1061
1062 /*
1063 * All entries should now be accounted for (unless some external
1064 * actor is interfering with our allowed affinity while this
1065 * test is running).
1066 */
1067 assert(sum == expected_sum);
bac8cd24
MD
1068 rseq_percpu_free(buffer);
1069 ret = rseq_percpu_pool_destroy(mempool);
1070 if (ret) {
1071 perror("rseq_percpu_pool_destroy");
1072 abort();
1073 }
31b44ba2
MD
1074}
1075
78adbd91 1076static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer,
31b44ba2
MD
1077 struct percpu_memcpy_buffer_node item,
1078 int *_cpu)
1079{
1080 bool result = false;
1081 int cpu;
1082
1083 for (;;) {
78adbd91 1084 struct percpu_memcpy_buffer *cpubuffer;
31b44ba2
MD
1085 intptr_t *targetptr_final, newval_final, offset;
1086 char *destptr, *srcptr;
1087 size_t copylen;
1088 int ret;
1089
369688a5 1090 cpu = get_current_cpu_id();
78adbd91 1091 cpubuffer = rseq_percpu_ptr(buffer, cpu);
31b44ba2 1092 /* Load offset with single-copy atomicity. */
78adbd91
MD
1093 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1094 if (offset == cpubuffer->buflen)
31b44ba2 1095 break;
78adbd91 1096 destptr = (char *)&cpubuffer->array[offset];
31b44ba2
MD
1097 srcptr = (char *)&item;
1098 /* copylen must be <= 4kB. */
1099 copylen = sizeof(item);
1100 newval_final = offset + 1;
78adbd91 1101 targetptr_final = &cpubuffer->offset;
41149e28 1102 ret = rseq_load_cbne_memcpy_store__ptr(
369688a5
MD
1103 opt_mo, RSEQ_PERCPU,
1104 targetptr_final, offset,
1105 destptr, srcptr, copylen,
1106 newval_final, cpu);
31b44ba2
MD
1107 if (rseq_likely(!ret)) {
1108 result = true;
1109 break;
1110 }
1111 /* Retry if comparison fails or rseq aborts. */
1112 }
1113 if (_cpu)
1114 *_cpu = cpu;
1115 return result;
1116}
1117
78adbd91 1118static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
31b44ba2
MD
1119 struct percpu_memcpy_buffer_node *item,
1120 int *_cpu)
1121{
1122 bool result = false;
1123 int cpu;
1124
1125 for (;;) {
78adbd91 1126 struct percpu_memcpy_buffer *cpubuffer;
31b44ba2
MD
1127 intptr_t *targetptr_final, newval_final, offset;
1128 char *destptr, *srcptr;
1129 size_t copylen;
1130 int ret;
1131
369688a5 1132 cpu = get_current_cpu_id();
78adbd91 1133 cpubuffer = rseq_percpu_ptr(buffer, cpu);
31b44ba2 1134 /* Load offset with single-copy atomicity. */
78adbd91 1135 offset = RSEQ_READ_ONCE(cpubuffer->offset);
31b44ba2
MD
1136 if (offset == 0)
1137 break;
1138 destptr = (char *)item;
78adbd91 1139 srcptr = (char *)&cpubuffer->array[offset - 1];
31b44ba2
MD
1140 /* copylen must be <= 4kB. */
1141 copylen = sizeof(*item);
1142 newval_final = offset - 1;
78adbd91 1143 targetptr_final = &cpubuffer->offset;
41149e28 1144 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 1145 targetptr_final, offset, destptr, srcptr, copylen,
31b44ba2
MD
1146 newval_final, cpu);
1147 if (rseq_likely(!ret)) {
1148 result = true;
1149 break;
1150 }
1151 /* Retry if comparison fails or rseq aborts. */
1152 }
1153 if (_cpu)
1154 *_cpu = cpu;
1155 return result;
1156}
1157
1158/*
1159 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1160 * only be used on buffers that are not concurrently modified.
1161 */
78adbd91 1162static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
31b44ba2
MD
1163 struct percpu_memcpy_buffer_node *item,
1164 int cpu)
1165{
78adbd91 1166 struct percpu_memcpy_buffer *cpubuffer;
31b44ba2
MD
1167 intptr_t offset;
1168
78adbd91
MD
1169 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1170 offset = cpubuffer->offset;
31b44ba2
MD
1171 if (offset == 0)
1172 return false;
78adbd91
MD
1173 memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item));
1174 cpubuffer->offset = offset - 1;
31b44ba2
MD
1175 return true;
1176}
1177
6e284b80 1178static void *test_percpu_memcpy_buffer_thread(void *arg)
31b44ba2
MD
1179{
1180 long long i, reps;
78adbd91 1181 struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg;
31b44ba2
MD
1182
1183 if (!opt_disable_rseq && rseq_register_current_thread())
1184 abort();
1185
1186 reps = opt_reps;
1187 for (i = 0; i < reps; i++) {
1188 struct percpu_memcpy_buffer_node item;
1189 bool result;
1190
1191 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1192 if (opt_yield)
1193 sched_yield(); /* encourage shuffling */
1194 if (result) {
1195 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1196 /* Should increase buffer size. */
1197 abort();
1198 }
1199 }
1200 }
1201
1202 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1203 (int) rseq_gettid(), nr_abort, signals_delivered);
1204 if (!opt_disable_rseq && rseq_unregister_current_thread())
1205 abort();
1206
1207 return NULL;
1208}
1209
1210/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 1211static void test_percpu_memcpy_buffer(void)
31b44ba2
MD
1212{
1213 const int num_threads = opt_threads;
1214 int i, j, ret;
1215 uint64_t sum = 0, expected_sum = 0;
78adbd91 1216 struct percpu_memcpy_buffer *buffer;
31b44ba2
MD
1217 pthread_t test_threads[num_threads];
1218 cpu_set_t allowed_cpus;
78adbd91 1219 struct rseq_percpu_pool *mempool;
31b44ba2 1220
78adbd91 1221 mempool = rseq_percpu_pool_create(sizeof(struct percpu_memcpy_buffer),
9bd07c29 1222 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
78adbd91
MD
1223 if (!mempool) {
1224 perror("rseq_percpu_pool_create");
1225 abort();
1226 }
1227 buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
1228 if (!buffer) {
1229 perror("rseq_percpu_zmalloc");
1230 abort();
1231 }
31b44ba2
MD
1232
1233 /* Generate list entries for every usable cpu. */
1234 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1235 for (i = 0; i < CPU_SETSIZE; i++) {
78adbd91
MD
1236 struct percpu_memcpy_buffer *cpubuffer;
1237
40797ae3 1238 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2 1239 continue;
78adbd91 1240 cpubuffer = rseq_percpu_ptr(buffer, i);
31b44ba2 1241 /* Worse-case is every item in same CPU. */
78adbd91 1242 cpubuffer->array =
d268885a 1243 (struct percpu_memcpy_buffer_node *)
78adbd91 1244 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
31b44ba2 1245 MEMCPY_BUFFER_ITEM_PER_CPU);
78adbd91
MD
1246 assert(cpubuffer->array);
1247 cpubuffer->buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
31b44ba2
MD
1248 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1249 expected_sum += 2 * j + 1;
1250
1251 /*
1252 * We could theoretically put the word-sized
1253 * "data" directly in the buffer. However, we
1254 * want to model objects that would not fit
1255 * within a single word, so allocate an object
1256 * for each node.
1257 */
78adbd91
MD
1258 cpubuffer->array[j - 1].data1 = j;
1259 cpubuffer->array[j - 1].data2 = j + 1;
1260 cpubuffer->offset++;
31b44ba2
MD
1261 }
1262 }
1263
1264 for (i = 0; i < num_threads; i++) {
1265 ret = pthread_create(&test_threads[i], NULL,
1266 test_percpu_memcpy_buffer_thread,
78adbd91 1267 buffer);
31b44ba2
MD
1268 if (ret) {
1269 errno = ret;
1270 perror("pthread_create");
1271 abort();
1272 }
1273 }
1274
1275 for (i = 0; i < num_threads; i++) {
1276 ret = pthread_join(test_threads[i], NULL);
1277 if (ret) {
1278 errno = ret;
1279 perror("pthread_join");
1280 abort();
1281 }
1282 }
1283
1284 for (i = 0; i < CPU_SETSIZE; i++) {
1285 struct percpu_memcpy_buffer_node item;
78adbd91 1286 struct percpu_memcpy_buffer *cpubuffer;
31b44ba2 1287
40797ae3 1288 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1289 continue;
1290
78adbd91
MD
1291 cpubuffer = rseq_percpu_ptr(buffer, i);
1292 while (__percpu_memcpy_buffer_pop(buffer, &item, i)) {
31b44ba2
MD
1293 sum += item.data1;
1294 sum += item.data2;
1295 }
78adbd91 1296 free(cpubuffer->array);
31b44ba2
MD
1297 }
1298
1299 /*
1300 * All entries should now be accounted for (unless some external
1301 * actor is interfering with our allowed affinity while this
1302 * test is running).
1303 */
1304 assert(sum == expected_sum);
78adbd91
MD
1305 rseq_percpu_free(buffer);
1306 ret = rseq_percpu_pool_destroy(mempool);
1307 if (ret) {
1308 perror("rseq_percpu_pool_destroy");
1309 abort();
1310 }
31b44ba2
MD
1311}
1312
544cdc88 1313static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
31b44ba2
MD
1314{
1315 signals_delivered++;
1316}
1317
1318static int set_signal_handler(void)
1319{
1320 int ret = 0;
1321 struct sigaction sa;
1322 sigset_t sigset;
1323
1324 ret = sigemptyset(&sigset);
1325 if (ret < 0) {
1326 perror("sigemptyset");
1327 return ret;
1328 }
1329
1330 sa.sa_handler = test_signal_interrupt_handler;
1331 sa.sa_mask = sigset;
1332 sa.sa_flags = 0;
1333 ret = sigaction(SIGUSR1, &sa, NULL);
1334 if (ret < 0) {
1335 perror("sigaction");
1336 return ret;
1337 }
1338
1339 printf_verbose("Signal handler set for SIGUSR1\n");
1340
1341 return ret;
1342}
1343
3664098e
MD
1344static
1345bool membarrier_private_expedited_rseq_available(void)
1346{
1347 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1348
1349 if (status < 0) {
1350 perror("membarrier");
1351 return false;
1352 }
1353 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1354 return false;
1355 return true;
1356}
1357
5368dcb4 1358/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
369688a5 1359#ifdef TEST_MEMBARRIER
5368dcb4 1360struct test_membarrier_thread_args {
83aa48fa 1361 struct rseq_percpu_pool *mempool;
b08be829 1362 struct percpu_list __rseq_percpu *percpu_list_ptr;
5368dcb4 1363 int stop;
5368dcb4
MD
1364};
1365
1366/* Worker threads modify data in their "active" percpu lists. */
1367static
1368void *test_membarrier_worker_thread(void *arg)
1369{
1370 struct test_membarrier_thread_args *args =
1371 (struct test_membarrier_thread_args *)arg;
1372 const int iters = opt_reps;
1373 int i;
1374
1375 if (rseq_register_current_thread()) {
1376 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1377 errno, strerror(errno));
1378 abort();
1379 }
1380
1381 /* Wait for initialization. */
1382 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1383
1384 for (i = 0; i < iters; ++i) {
1385 int ret;
1386
1387 do {
369688a5 1388 int cpu = get_current_cpu_id();
83aa48fa 1389 ptrdiff_t mempool_offset = rseq_percpu_pool_ptr_offset(args->mempool, cpu);
5368dcb4 1390
83aa48fa
MD
1391 ret = rseq_load_add_load_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1392 (intptr_t *) &args->percpu_list_ptr,
1393 mempool_offset + offsetof(struct percpu_list, head),
1394 1, cpu);
5368dcb4
MD
1395 } while (rseq_unlikely(ret));
1396 }
1397
1398 if (rseq_unregister_current_thread()) {
1399 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1400 errno, strerror(errno));
1401 abort();
1402 }
1403 return NULL;
1404}
1405
1406static
b08be829 1407struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_percpu_pool *mempool)
5368dcb4 1408{
b08be829 1409 struct percpu_list __rseq_percpu *list;
5368dcb4
MD
1410 int i;
1411
b08be829
MD
1412 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
1413 if (!list) {
1414 perror("rseq_percpu_zmalloc");
1415 return NULL;
1416 }
5368dcb4 1417 for (i = 0; i < CPU_SETSIZE; i++) {
b08be829 1418 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
5368dcb4
MD
1419 struct percpu_list_node *node;
1420
1421 node = (struct percpu_list_node *) malloc(sizeof(*node));
1422 assert(node);
1423 node->data = 0;
1424 node->next = NULL;
b08be829 1425 cpulist->head = node;
5368dcb4 1426 }
b08be829 1427 return list;
5368dcb4
MD
1428}
1429
1430static
b08be829 1431void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list)
5368dcb4
MD
1432{
1433 int i;
1434
1435 for (i = 0; i < CPU_SETSIZE; i++)
b08be829
MD
1436 free(rseq_percpu_ptr(list, i)->head);
1437 rseq_percpu_free(list);
5368dcb4
MD
1438}
1439
5368dcb4
MD
1440/*
1441 * The manager thread swaps per-cpu lists that worker threads see,
1442 * and validates that there are no unexpected modifications.
1443 */
1444static
1445void *test_membarrier_manager_thread(void *arg)
1446{
1447 struct test_membarrier_thread_args *args =
1448 (struct test_membarrier_thread_args *)arg;
b08be829 1449 struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
5368dcb4
MD
1450 intptr_t expect_a = 0, expect_b = 0;
1451 int cpu_a = 0, cpu_b = 0;
b08be829
MD
1452 struct rseq_percpu_pool *mempool;
1453 int ret;
1454
1455 mempool = rseq_percpu_pool_create(sizeof(struct percpu_list),
9bd07c29 1456 PERCPU_POOL_LEN, CPU_SETSIZE, NULL);
b08be829
MD
1457 if (!mempool) {
1458 perror("rseq_percpu_pool_create");
1459 abort();
1460 }
83aa48fa 1461 args->mempool = mempool;
5368dcb4
MD
1462
1463 if (rseq_register_current_thread()) {
1464 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1465 errno, strerror(errno));
1466 abort();
1467 }
1468
1469 /* Init lists. */
b08be829
MD
1470 list_a = test_membarrier_alloc_percpu_list(mempool);
1471 assert(list_a);
1472 list_b = test_membarrier_alloc_percpu_list(mempool);
1473 assert(list_b);
5368dcb4
MD
1474
1475 /* Initialize lists before publishing them. */
1476 rseq_smp_wmb();
1477
b08be829 1478 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
5368dcb4
MD
1479
1480 while (!RSEQ_READ_ONCE(args->stop)) {
1481 /* list_a is "active". */
1482 cpu_a = rand() % CPU_SETSIZE;
1483 /*
1484 * As list_b is "inactive", we should never see changes
1485 * to list_b.
1486 */
b08be829 1487 if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
5368dcb4
MD
1488 fprintf(stderr, "Membarrier test failed\n");
1489 abort();
1490 }
1491
1492 /* Make list_b "active". */
b08be829 1493 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
369688a5 1494 if (rseq_membarrier_expedited(cpu_a) &&
5368dcb4
MD
1495 errno != ENXIO /* missing CPU */) {
1496 perror("sys_membarrier");
1497 abort();
1498 }
1499 /*
1500 * Cpu A should now only modify list_b, so the values
1501 * in list_a should be stable.
1502 */
b08be829 1503 expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
5368dcb4
MD
1504
1505 cpu_b = rand() % CPU_SETSIZE;
1506 /*
1507 * As list_a is "inactive", we should never see changes
1508 * to list_a.
1509 */
b08be829 1510 if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
5368dcb4
MD
1511 fprintf(stderr, "Membarrier test failed\n");
1512 abort();
1513 }
1514
1515 /* Make list_a "active". */
b08be829 1516 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
369688a5 1517 if (rseq_membarrier_expedited(cpu_b) &&
5368dcb4
MD
1518 errno != ENXIO /* missing CPU */) {
1519 perror("sys_membarrier");
1520 abort();
1521 }
1522 /* Remember a value from list_b. */
b08be829 1523 expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
5368dcb4
MD
1524 }
1525
b08be829
MD
1526 test_membarrier_free_percpu_list(list_a);
1527 test_membarrier_free_percpu_list(list_b);
5368dcb4
MD
1528
1529 if (rseq_unregister_current_thread()) {
1530 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1531 errno, strerror(errno));
1532 abort();
1533 }
b08be829
MD
1534 ret = rseq_percpu_pool_destroy(mempool);
1535 if (ret) {
1536 perror("rseq_percpu_pool_destroy");
1537 abort();
1538 }
1539
5368dcb4
MD
1540 return NULL;
1541}
1542
1543static
1544void test_membarrier(void)
1545{
1546 const int num_threads = opt_threads;
1547 struct test_membarrier_thread_args thread_args;
1548 pthread_t worker_threads[num_threads];
1549 pthread_t manager_thread;
1550 int i, ret;
1551
d4bff8ed
MD
1552 if (!membarrier_private_expedited_rseq_available()) {
1553 fprintf(stderr, "Membarrier private expedited rseq not available. "
1554 "Skipping membarrier test.\n");
1555 return;
1556 }
5368dcb4
MD
1557 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1558 perror("sys_membarrier");
1559 abort();
1560 }
1561
b08be829 1562 thread_args.percpu_list_ptr = NULL;
5368dcb4 1563 thread_args.stop = 0;
5368dcb4
MD
1564 ret = pthread_create(&manager_thread, NULL,
1565 test_membarrier_manager_thread, &thread_args);
1566 if (ret) {
1567 errno = ret;
1568 perror("pthread_create");
1569 abort();
1570 }
1571
1572 for (i = 0; i < num_threads; i++) {
1573 ret = pthread_create(&worker_threads[i], NULL,
1574 test_membarrier_worker_thread, &thread_args);
1575 if (ret) {
1576 errno = ret;
1577 perror("pthread_create");
1578 abort();
1579 }
1580 }
1581
1582
1583 for (i = 0; i < num_threads; i++) {
1584 ret = pthread_join(worker_threads[i], NULL);
1585 if (ret) {
1586 errno = ret;
1587 perror("pthread_join");
1588 abort();
1589 }
1590 }
1591
1592 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1593 ret = pthread_join(manager_thread, NULL);
1594 if (ret) {
1595 errno = ret;
1596 perror("pthread_join");
1597 abort();
1598 }
1599}
369688a5 1600#else /* TEST_MEMBARRIER */
5368dcb4
MD
1601static
1602void test_membarrier(void)
1603{
d4bff8ed
MD
1604 if (!membarrier_private_expedited_rseq_available()) {
1605 fprintf(stderr, "Membarrier private expedited rseq not available. "
1606 "Skipping membarrier test.\n");
1607 return;
1608 }
3cde2ee2 1609 fprintf(stderr, "rseq_load_add_load_load_add_store__ptr is not implemented on this architecture. "
5368dcb4
MD
1610 "Skipping membarrier test.\n");
1611}
1612#endif
1613
544cdc88 1614static void show_usage(char **argv)
31b44ba2
MD
1615{
1616 printf("Usage : %s <OPTIONS>\n",
1617 argv[0]);
1618 printf("OPTIONS:\n");
1619 printf(" [-1 loops] Number of loops for delay injection 1\n");
1620 printf(" [-2 loops] Number of loops for delay injection 2\n");
1621 printf(" [-3 loops] Number of loops for delay injection 3\n");
1622 printf(" [-4 loops] Number of loops for delay injection 4\n");
1623 printf(" [-5 loops] Number of loops for delay injection 5\n");
1624 printf(" [-6 loops] Number of loops for delay injection 6\n");
1625 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1626 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1627 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1628 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1629 printf(" [-y] Yield\n");
1630 printf(" [-k] Kill thread with signal\n");
1631 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1632 printf(" [-t N] Number of threads (default 200)\n");
1633 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1634 printf(" [-d] Disable rseq system call (no initialization)\n");
1635 printf(" [-D M] Disable rseq for each M threads\n");
5368dcb4 1636 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
31b44ba2 1637 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
d1cdec98 1638 printf(" [-c] Check if the rseq syscall is available.\n");
31b44ba2
MD
1639 printf(" [-v] Verbose output.\n");
1640 printf(" [-h] Show this help.\n");
1641 printf("\n");
1642}
1643
1644int main(int argc, char **argv)
1645{
1646 int i;
1647
1648 for (i = 1; i < argc; i++) {
1649 if (argv[i][0] != '-')
1650 continue;
1651 switch (argv[i][1]) {
1652 case '1':
1653 case '2':
1654 case '3':
1655 case '4':
1656 case '5':
1657 case '6':
1658 case '7':
1659 case '8':
1660 case '9':
1661 if (argc < i + 2) {
544cdc88 1662 show_usage(argv);
31b44ba2
MD
1663 goto error;
1664 }
1665 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1666 i++;
1667 break;
1668 case 'm':
1669 if (argc < i + 2) {
544cdc88 1670 show_usage(argv);
31b44ba2
MD
1671 goto error;
1672 }
1673 opt_modulo = atol(argv[i + 1]);
1674 if (opt_modulo < 0) {
544cdc88 1675 show_usage(argv);
31b44ba2
MD
1676 goto error;
1677 }
1678 i++;
1679 break;
1680 case 's':
1681 if (argc < i + 2) {
544cdc88 1682 show_usage(argv);
31b44ba2
MD
1683 goto error;
1684 }
1685 opt_sleep = atol(argv[i + 1]);
1686 if (opt_sleep < 0) {
544cdc88 1687 show_usage(argv);
31b44ba2
MD
1688 goto error;
1689 }
1690 i++;
1691 break;
1692 case 'y':
1693 opt_yield = 1;
1694 break;
1695 case 'k':
1696 opt_signal = 1;
1697 break;
1698 case 'd':
1699 opt_disable_rseq = 1;
1700 break;
1701 case 'D':
1702 if (argc < i + 2) {
544cdc88 1703 show_usage(argv);
31b44ba2
MD
1704 goto error;
1705 }
1706 opt_disable_mod = atol(argv[i + 1]);
1707 if (opt_disable_mod < 0) {
544cdc88 1708 show_usage(argv);
31b44ba2
MD
1709 goto error;
1710 }
1711 i++;
1712 break;
1713 case 't':
1714 if (argc < i + 2) {
544cdc88 1715 show_usage(argv);
31b44ba2
MD
1716 goto error;
1717 }
1718 opt_threads = atol(argv[i + 1]);
1719 if (opt_threads < 0) {
544cdc88 1720 show_usage(argv);
31b44ba2
MD
1721 goto error;
1722 }
1723 i++;
1724 break;
1725 case 'r':
1726 if (argc < i + 2) {
544cdc88 1727 show_usage(argv);
31b44ba2
MD
1728 goto error;
1729 }
1730 opt_reps = atoll(argv[i + 1]);
1731 if (opt_reps < 0) {
544cdc88 1732 show_usage(argv);
31b44ba2
MD
1733 goto error;
1734 }
1735 i++;
1736 break;
1737 case 'h':
544cdc88 1738 show_usage(argv);
31b44ba2
MD
1739 goto end;
1740 case 'T':
1741 if (argc < i + 2) {
544cdc88 1742 show_usage(argv);
31b44ba2
MD
1743 goto error;
1744 }
1745 opt_test = *argv[i + 1];
1746 switch (opt_test) {
1747 case 's':
1748 case 'l':
1749 case 'i':
1750 case 'b':
1751 case 'm':
5368dcb4 1752 case 'r':
31b44ba2
MD
1753 break;
1754 default:
544cdc88 1755 show_usage(argv);
31b44ba2
MD
1756 goto error;
1757 }
1758 i++;
1759 break;
1760 case 'v':
1761 verbose = 1;
1762 break;
1763 case 'M':
369688a5 1764 opt_mo = RSEQ_MO_RELEASE;
31b44ba2 1765 break;
d1cdec98 1766 case 'c':
8b34114a 1767 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
d1cdec98
MJ
1768 printf_verbose("The rseq syscall is available.\n");
1769 goto end;
1770 } else {
1771 printf_verbose("The rseq syscall is unavailable.\n");
1772 goto no_rseq;
1773 }
31b44ba2 1774 default:
544cdc88 1775 show_usage(argv);
31b44ba2
MD
1776 goto error;
1777 }
1778 }
1779
1780 loop_cnt_1 = loop_cnt[1];
1781 loop_cnt_2 = loop_cnt[2];
1782 loop_cnt_3 = loop_cnt[3];
1783 loop_cnt_4 = loop_cnt[4];
1784 loop_cnt_5 = loop_cnt[5];
1785 loop_cnt_6 = loop_cnt[6];
1786
1787 if (set_signal_handler())
1788 goto error;
1789
1790 if (!opt_disable_rseq && rseq_register_current_thread())
1791 goto error;
369688a5 1792 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
04bb9984
MD
1793 printf_verbose("The rseq cpu id getter is unavailable\n");
1794 goto no_rseq;
369688a5 1795 }
31b44ba2
MD
1796 switch (opt_test) {
1797 case 's':
1798 printf_verbose("spinlock\n");
1799 test_percpu_spinlock();
1800 break;
1801 case 'l':
1802 printf_verbose("linked list\n");
1803 test_percpu_list();
1804 break;
1805 case 'b':
1806 printf_verbose("buffer\n");
1807 test_percpu_buffer();
1808 break;
1809 case 'm':
1810 printf_verbose("memcpy buffer\n");
1811 test_percpu_memcpy_buffer();
1812 break;
1813 case 'i':
1814 printf_verbose("counter increment\n");
1815 test_percpu_inc();
1816 break;
5368dcb4
MD
1817 case 'r':
1818 printf_verbose("membarrier\n");
1819 test_membarrier();
1820 break;
31b44ba2
MD
1821 }
1822 if (!opt_disable_rseq && rseq_unregister_current_thread())
1823 abort();
1824end:
1825 return 0;
1826
1827error:
1828 return -1;
d1cdec98
MJ
1829
1830no_rseq:
1831 return 2;
31b44ba2 1832}
This page took 0.106614 seconds and 4 git commands to generate.