Restartable sequences: self-tests
[deliverable/linux.git] / tools / testing / selftests / rseq / param_test.c
1 #define _GNU_SOURCE
2 #include <assert.h>
3 #include <pthread.h>
4 #include <sched.h>
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <syscall.h>
10 #include <unistd.h>
11 #include <poll.h>
12 #include <sys/types.h>
13 #include <signal.h>
14 #include <errno.h>
15
16 static inline pid_t gettid(void)
17 {
18 return syscall(__NR_gettid);
19 }
20
21 #define NR_INJECT 9
22 static int loop_cnt[NR_INJECT + 1];
23
24 static int opt_modulo;
25
26 static int opt_yield, opt_signal, opt_sleep, opt_fallback_cnt = 3,
27 opt_disable_rseq, opt_threads = 200,
28 opt_reps = 5000, opt_disable_mod = 0, opt_test = 's';
29
30 static __thread unsigned int signals_delivered;
31
32 static struct rseq_lock rseq_lock;
33
34 #ifndef BENCHMARK
35
36 static __thread unsigned int yield_mod_cnt, nr_retry;
37
38 #define printf_nobench(fmt, ...) printf(fmt, ## __VA_ARGS__)
39
40 #define RSEQ_INJECT_INPUT \
41 , [loop_cnt_1]"m"(loop_cnt[1]) \
42 , [loop_cnt_2]"m"(loop_cnt[2]) \
43 , [loop_cnt_3]"m"(loop_cnt[3]) \
44 , [loop_cnt_4]"m"(loop_cnt[4]) \
45 , [loop_cnt_5]"m"(loop_cnt[5])
46
47 #if defined(__x86_64__) || defined(__i386__)
48
49 #define INJECT_ASM_REG "eax"
50
51 #define RSEQ_INJECT_CLOBBER \
52 , INJECT_ASM_REG
53
54 #define RSEQ_INJECT_ASM(n) \
55 "mov %[loop_cnt_" #n "], %%" INJECT_ASM_REG "\n\t" \
56 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
57 "jz 333f\n\t" \
58 "222:\n\t" \
59 "dec %%" INJECT_ASM_REG "\n\t" \
60 "jnz 222b\n\t" \
61 "333:\n\t"
62
63 #elif defined(__ARMEL__)
64
65 #define INJECT_ASM_REG "r4"
66
67 #define RSEQ_INJECT_CLOBBER \
68 , INJECT_ASM_REG
69
70 #define RSEQ_INJECT_ASM(n) \
71 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
72 "cmp " INJECT_ASM_REG ", #0\n\t" \
73 "beq 333f\n\t" \
74 "222:\n\t" \
75 "subs " INJECT_ASM_REG ", #1\n\t" \
76 "bne 222b\n\t" \
77 "333:\n\t"
78
79 #elif __PPC__
80 #define INJECT_ASM_REG "r18"
81
82 #define RSEQ_INJECT_CLOBBER \
83 , INJECT_ASM_REG
84
85 #define RSEQ_INJECT_ASM(n) \
86 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
87 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
88 "beq 333f\n\t" \
89 "222:\n\t" \
90 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
91 "bne 222b\n\t" \
92 "333:\n\t"
93 #else
94 #error unsupported target
95 #endif
96
97 #define RSEQ_INJECT_FAILED \
98 nr_retry++;
99
100 #define RSEQ_INJECT_C(n) \
101 { \
102 int loc_i, loc_nr_loops = loop_cnt[n]; \
103 \
104 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
105 barrier(); \
106 } \
107 if (loc_nr_loops == -1 && opt_modulo) { \
108 if (yield_mod_cnt == opt_modulo - 1) { \
109 if (opt_sleep > 0) \
110 poll(NULL, 0, opt_sleep); \
111 if (opt_yield) \
112 sched_yield(); \
113 if (opt_signal) \
114 raise(SIGUSR1); \
115 yield_mod_cnt = 0; \
116 } else { \
117 yield_mod_cnt++; \
118 } \
119 } \
120 }
121
122 #define RSEQ_FALLBACK_CNT \
123 opt_fallback_cnt
124
125 #else
126
127 #define printf_nobench(fmt, ...)
128
129 #endif /* BENCHMARK */
130
131 #include <rseq.h>
132
133 struct percpu_lock_entry {
134 intptr_t v;
135 } __attribute__((aligned(128)));
136
137 struct percpu_lock {
138 struct percpu_lock_entry c[CPU_SETSIZE];
139 };
140
141 struct test_data_entry {
142 intptr_t count;
143 } __attribute__((aligned(128)));
144
145 struct spinlock_test_data {
146 struct percpu_lock lock;
147 struct test_data_entry c[CPU_SETSIZE];
148 };
149
150 struct spinlock_thread_test_data {
151 struct spinlock_test_data *data;
152 int reps;
153 int reg;
154 };
155
156 struct inc_test_data {
157 struct test_data_entry c[CPU_SETSIZE];
158 };
159
160 struct inc_thread_test_data {
161 struct inc_test_data *data;
162 int reps;
163 int reg;
164 };
165
166 struct percpu_list_node {
167 intptr_t data;
168 struct percpu_list_node *next;
169 };
170
171 struct percpu_list_entry {
172 struct percpu_list_node *head;
173 } __attribute__((aligned(128)));
174
175 struct percpu_list {
176 struct percpu_list_entry c[CPU_SETSIZE];
177 };
178
179 #define BUFFER_ITEM_PER_CPU 100
180
181 struct percpu_buffer_node {
182 intptr_t data;
183 };
184
185 struct percpu_buffer_entry {
186 intptr_t offset;
187 intptr_t buflen;
188 struct percpu_buffer_node **array;
189 } __attribute__((aligned(128)));
190
191 struct percpu_buffer {
192 struct percpu_buffer_entry c[CPU_SETSIZE];
193 };
194
195 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
196
197 struct percpu_memcpy_buffer_node {
198 intptr_t data1;
199 uint64_t data2;
200 };
201
202 struct percpu_memcpy_buffer_entry {
203 intptr_t offset;
204 intptr_t buflen;
205 struct percpu_memcpy_buffer_node *array;
206 } __attribute__((aligned(128)));
207
208 struct percpu_memcpy_buffer {
209 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
210 };
211
212 /* A simple percpu spinlock. Returns the cpu lock was acquired on. */
213 static int rseq_percpu_lock(struct percpu_lock *lock)
214 {
215 struct rseq_state rseq_state;
216 intptr_t *targetptr, newval;
217 int cpu;
218 bool result;
219
220 for (;;) {
221 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
222 {
223 if (unlikely(lock->c[cpu].v)) {
224 result = false;
225 } else {
226 newval = 1;
227 targetptr = (intptr_t *)&lock->c[cpu].v;
228 }
229 });
230 if (likely(result))
231 break;
232 }
233 /*
234 * Acquire semantic when taking lock after control dependency.
235 * Matches smp_store_release().
236 */
237 smp_acquire__after_ctrl_dep();
238 return cpu;
239 }
240
241 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
242 {
243 assert(lock->c[cpu].v == 1);
244 /*
245 * Release lock, with release semantic. Matches
246 * smp_acquire__after_ctrl_dep().
247 */
248 smp_store_release(&lock->c[cpu].v, 0);
249 }
250
251 void *test_percpu_spinlock_thread(void *arg)
252 {
253 struct spinlock_thread_test_data *thread_data = arg;
254 struct spinlock_test_data *data = thread_data->data;
255 int i, cpu;
256
257 if (!opt_disable_rseq && thread_data->reg
258 && rseq_register_current_thread())
259 abort();
260 for (i = 0; i < thread_data->reps; i++) {
261 cpu = rseq_percpu_lock(&data->lock);
262 data->c[cpu].count++;
263 rseq_percpu_unlock(&data->lock, cpu);
264 #ifndef BENCHMARK
265 if (i != 0 && !(i % (thread_data->reps / 10)))
266 printf("tid %d: count %d\n", (int) gettid(), i);
267 #endif
268 }
269 printf_nobench("tid %d: number of retry: %d, signals delivered: %u, nr_fallback %u, nr_fallback_wait %u\n",
270 (int) gettid(), nr_retry, signals_delivered,
271 rseq_get_fallback_cnt(),
272 rseq_get_fallback_wait_cnt());
273 if (rseq_unregister_current_thread())
274 abort();
275 return NULL;
276 }
277
278 /*
279 * A simple test which implements a sharded counter using a per-cpu
280 * lock. Obviously real applications might prefer to simply use a
281 * per-cpu increment; however, this is reasonable for a test and the
282 * lock can be extended to synchronize more complicated operations.
283 */
284 void test_percpu_spinlock(void)
285 {
286 const int num_threads = opt_threads;
287 int i, ret;
288 uint64_t sum;
289 pthread_t test_threads[num_threads];
290 struct spinlock_test_data data;
291 struct spinlock_thread_test_data thread_data[num_threads];
292
293 memset(&data, 0, sizeof(data));
294 for (i = 0; i < num_threads; i++) {
295 thread_data[i].reps = opt_reps;
296 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
297 thread_data[i].reg = 1;
298 else
299 thread_data[i].reg = 0;
300 thread_data[i].data = &data;
301 ret = pthread_create(&test_threads[i], NULL,
302 test_percpu_spinlock_thread, &thread_data[i]);
303 if (ret) {
304 errno = ret;
305 perror("pthread_create");
306 abort();
307 }
308 }
309
310 for (i = 0; i < num_threads; i++) {
311 pthread_join(test_threads[i], NULL);
312 if (ret) {
313 errno = ret;
314 perror("pthread_join");
315 abort();
316 }
317 }
318
319 sum = 0;
320 for (i = 0; i < CPU_SETSIZE; i++)
321 sum += data.c[i].count;
322
323 assert(sum == (uint64_t)opt_reps * num_threads);
324 }
325
326 void *test_percpu_inc_thread(void *arg)
327 {
328 struct inc_thread_test_data *thread_data = arg;
329 struct inc_test_data *data = thread_data->data;
330 int i;
331
332 if (!opt_disable_rseq && thread_data->reg
333 && rseq_register_current_thread())
334 abort();
335 for (i = 0; i < thread_data->reps; i++) {
336 struct rseq_state rseq_state;
337 intptr_t *targetptr, newval;
338 int cpu;
339 bool result;
340
341 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
342 {
343 newval = (intptr_t)data->c[cpu].count + 1;
344 targetptr = (intptr_t *)&data->c[cpu].count;
345 });
346
347 #ifndef BENCHMARK
348 if (i != 0 && !(i % (thread_data->reps / 10)))
349 printf("tid %d: count %d\n", (int) gettid(), i);
350 #endif
351 }
352 printf_nobench("tid %d: number of retry: %d, signals delivered: %u, nr_fallback %u, nr_fallback_wait %u\n",
353 (int) gettid(), nr_retry, signals_delivered,
354 rseq_get_fallback_cnt(),
355 rseq_get_fallback_wait_cnt());
356 if (rseq_unregister_current_thread())
357 abort();
358 return NULL;
359 }
360
361 void test_percpu_inc(void)
362 {
363 const int num_threads = opt_threads;
364 int i, ret;
365 uint64_t sum;
366 pthread_t test_threads[num_threads];
367 struct inc_test_data data;
368 struct inc_thread_test_data thread_data[num_threads];
369
370 memset(&data, 0, sizeof(data));
371 for (i = 0; i < num_threads; i++) {
372 thread_data[i].reps = opt_reps;
373 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
374 thread_data[i].reg = 1;
375 else
376 thread_data[i].reg = 0;
377 thread_data[i].data = &data;
378 ret = pthread_create(&test_threads[i], NULL,
379 test_percpu_inc_thread, &thread_data[i]);
380 if (ret) {
381 errno = ret;
382 perror("pthread_create");
383 abort();
384 }
385 }
386
387 for (i = 0; i < num_threads; i++) {
388 pthread_join(test_threads[i], NULL);
389 if (ret) {
390 errno = ret;
391 perror("pthread_join");
392 abort();
393 }
394 }
395
396 sum = 0;
397 for (i = 0; i < CPU_SETSIZE; i++)
398 sum += data.c[i].count;
399
400 assert(sum == (uint64_t)opt_reps * num_threads);
401 }
402
403 int percpu_list_push(struct percpu_list *list, struct percpu_list_node *node)
404 {
405 struct rseq_state rseq_state;
406 intptr_t *targetptr, newval;
407 int cpu;
408 bool result;
409
410 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
411 {
412 newval = (intptr_t)node;
413 targetptr = (intptr_t *)&list->c[cpu].head;
414 node->next = list->c[cpu].head;
415 });
416
417 return cpu;
418 }
419
420 /*
421 * Unlike a traditional lock-less linked list; the availability of a
422 * rseq primitive allows us to implement pop without concerns over
423 * ABA-type races.
424 */
425 struct percpu_list_node *percpu_list_pop(struct percpu_list *list)
426 {
427 struct percpu_list_node *head, *next;
428 struct rseq_state rseq_state;
429 intptr_t *targetptr, newval;
430 int cpu;
431 bool result;
432
433 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
434 {
435 head = list->c[cpu].head;
436 if (!head) {
437 result = false;
438 } else {
439 next = head->next;
440 newval = (intptr_t) next;
441 targetptr = (intptr_t *) &list->c[cpu].head;
442 }
443 });
444
445 return head;
446 }
447
448 void *test_percpu_list_thread(void *arg)
449 {
450 int i;
451 struct percpu_list *list = (struct percpu_list *)arg;
452
453 if (rseq_register_current_thread())
454 abort();
455
456 for (i = 0; i < opt_reps; i++) {
457 struct percpu_list_node *node = percpu_list_pop(list);
458
459 if (opt_yield)
460 sched_yield(); /* encourage shuffling */
461 if (node)
462 percpu_list_push(list, node);
463 }
464
465 if (rseq_unregister_current_thread())
466 abort();
467
468 return NULL;
469 }
470
471 /* Simultaneous modification to a per-cpu linked list from many threads. */
472 void test_percpu_list(void)
473 {
474 const int num_threads = opt_threads;
475 int i, j, ret;
476 uint64_t sum = 0, expected_sum = 0;
477 struct percpu_list list;
478 pthread_t test_threads[num_threads];
479 cpu_set_t allowed_cpus;
480
481 memset(&list, 0, sizeof(list));
482
483 /* Generate list entries for every usable cpu. */
484 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
485 for (i = 0; i < CPU_SETSIZE; i++) {
486 if (!CPU_ISSET(i, &allowed_cpus))
487 continue;
488 for (j = 1; j <= 100; j++) {
489 struct percpu_list_node *node;
490
491 expected_sum += j;
492
493 node = malloc(sizeof(*node));
494 assert(node);
495 node->data = j;
496 node->next = list.c[i].head;
497 list.c[i].head = node;
498 }
499 }
500
501 for (i = 0; i < num_threads; i++) {
502 ret = pthread_create(&test_threads[i], NULL,
503 test_percpu_list_thread, &list);
504 if (ret) {
505 errno = ret;
506 perror("pthread_create");
507 abort();
508 }
509 }
510
511 for (i = 0; i < num_threads; i++) {
512 pthread_join(test_threads[i], NULL);
513 if (ret) {
514 errno = ret;
515 perror("pthread_join");
516 abort();
517 }
518 }
519
520 for (i = 0; i < CPU_SETSIZE; i++) {
521 cpu_set_t pin_mask;
522 struct percpu_list_node *node;
523
524 if (!CPU_ISSET(i, &allowed_cpus))
525 continue;
526
527 CPU_ZERO(&pin_mask);
528 CPU_SET(i, &pin_mask);
529 sched_setaffinity(0, sizeof(pin_mask), &pin_mask);
530
531 while ((node = percpu_list_pop(&list))) {
532 sum += node->data;
533 free(node);
534 }
535 }
536
537 /*
538 * All entries should now be accounted for (unless some external
539 * actor is interfering with our allowed affinity while this
540 * test is running).
541 */
542 assert(sum == expected_sum);
543 }
544
545 bool percpu_buffer_push(struct percpu_buffer *buffer,
546 struct percpu_buffer_node *node)
547 {
548 struct rseq_state rseq_state;
549 intptr_t *targetptr_spec, newval_spec;
550 intptr_t *targetptr_final, newval_final;
551 int cpu;
552 bool result;
553
554 do_rseq2(&rseq_lock, rseq_state, cpu, result,
555 targetptr_spec, newval_spec, targetptr_final, newval_final,
556 {
557 intptr_t offset = buffer->c[cpu].offset;
558
559 if (offset == buffer->c[cpu].buflen) {
560 result = false;
561 } else {
562 newval_spec = (intptr_t)node;
563 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
564 newval_final = offset + 1;
565 targetptr_final = &buffer->c[cpu].offset;
566 }
567 });
568
569 return result;
570 }
571
572 struct percpu_buffer_node *percpu_buffer_pop(struct percpu_buffer *buffer)
573 {
574 struct percpu_buffer_node *head;
575 struct rseq_state rseq_state;
576 intptr_t *targetptr, newval;
577 int cpu;
578 bool result;
579
580 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
581 {
582 intptr_t offset = buffer->c[cpu].offset;
583
584 if (offset == 0) {
585 result = false;
586 } else {
587 head = buffer->c[cpu].array[offset - 1];
588 newval = offset - 1;
589 targetptr = (intptr_t *)&buffer->c[cpu].offset;
590 }
591 });
592
593 if (result)
594 return head;
595 else
596 return NULL;
597 }
598
599 void *test_percpu_buffer_thread(void *arg)
600 {
601 int i;
602 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
603
604 if (rseq_register_current_thread())
605 abort();
606
607 for (i = 0; i < opt_reps; i++) {
608 struct percpu_buffer_node *node = percpu_buffer_pop(buffer);
609
610 if (opt_yield)
611 sched_yield(); /* encourage shuffling */
612 if (node) {
613 if (!percpu_buffer_push(buffer, node)) {
614 /* Should increase buffer size. */
615 abort();
616 }
617 }
618 }
619
620 if (rseq_unregister_current_thread())
621 abort();
622
623 return NULL;
624 }
625
626 /* Simultaneous modification to a per-cpu buffer from many threads. */
627 void test_percpu_buffer(void)
628 {
629 const int num_threads = opt_threads;
630 int i, j, ret;
631 uint64_t sum = 0, expected_sum = 0;
632 struct percpu_buffer buffer;
633 pthread_t test_threads[num_threads];
634 cpu_set_t allowed_cpus;
635
636 memset(&buffer, 0, sizeof(buffer));
637
638 /* Generate list entries for every usable cpu. */
639 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
640 for (i = 0; i < CPU_SETSIZE; i++) {
641 if (!CPU_ISSET(i, &allowed_cpus))
642 continue;
643 /* Worse-case is every item in same CPU. */
644 buffer.c[i].array =
645 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE
646 * BUFFER_ITEM_PER_CPU);
647 assert(buffer.c[i].array);
648 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
649 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
650 struct percpu_buffer_node *node;
651
652 expected_sum += j;
653
654 /*
655 * We could theoretically put the word-sized
656 * "data" directly in the buffer. However, we
657 * want to model objects that would not fit
658 * within a single word, so allocate an object
659 * for each node.
660 */
661 node = malloc(sizeof(*node));
662 assert(node);
663 node->data = j;
664 buffer.c[i].array[j - 1] = node;
665 buffer.c[i].offset++;
666 }
667 }
668
669 for (i = 0; i < num_threads; i++) {
670 ret = pthread_create(&test_threads[i], NULL,
671 test_percpu_buffer_thread, &buffer);
672 if (ret) {
673 errno = ret;
674 perror("pthread_create");
675 abort();
676 }
677 }
678
679 for (i = 0; i < num_threads; i++) {
680 pthread_join(test_threads[i], NULL);
681 if (ret) {
682 errno = ret;
683 perror("pthread_join");
684 abort();
685 }
686 }
687
688 for (i = 0; i < CPU_SETSIZE; i++) {
689 cpu_set_t pin_mask;
690 struct percpu_buffer_node *node;
691
692 if (!CPU_ISSET(i, &allowed_cpus))
693 continue;
694
695 CPU_ZERO(&pin_mask);
696 CPU_SET(i, &pin_mask);
697 sched_setaffinity(0, sizeof(pin_mask), &pin_mask);
698
699 while ((node = percpu_buffer_pop(&buffer))) {
700 sum += node->data;
701 free(node);
702 }
703 free(buffer.c[i].array);
704 }
705
706 /*
707 * All entries should now be accounted for (unless some external
708 * actor is interfering with our allowed affinity while this
709 * test is running).
710 */
711 assert(sum == expected_sum);
712 }
713
714 bool percpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
715 struct percpu_memcpy_buffer_node item)
716 {
717 struct rseq_state rseq_state;
718 char *destptr, *srcptr;
719 size_t copylen;
720 intptr_t *targetptr_final, newval_final;
721 int cpu;
722 bool result;
723
724 do_rseq_memcpy(&rseq_lock, rseq_state, cpu, result,
725 destptr, srcptr, copylen, targetptr_final, newval_final,
726 {
727 intptr_t offset = buffer->c[cpu].offset;
728
729 if (offset == buffer->c[cpu].buflen) {
730 result = false;
731 } else {
732 destptr = (char *)&buffer->c[cpu].array[offset];
733 srcptr = (char *)&item;
734 copylen = sizeof(item);
735 newval_final = offset + 1;
736 targetptr_final = &buffer->c[cpu].offset;
737 }
738 });
739
740 return result;
741 }
742
743 bool percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
744 struct percpu_memcpy_buffer_node *item)
745 {
746 struct rseq_state rseq_state;
747 char *destptr, *srcptr;
748 size_t copylen;
749 intptr_t *targetptr_final, newval_final;
750 int cpu;
751 bool result;
752
753 do_rseq_memcpy(&rseq_lock, rseq_state, cpu, result,
754 destptr, srcptr, copylen, targetptr_final, newval_final,
755 {
756 intptr_t offset = buffer->c[cpu].offset;
757
758 if (offset == 0) {
759 result = false;
760 } else {
761 destptr = (char *)item;
762 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
763 copylen = sizeof(*item);
764 newval_final = offset - 1;
765 targetptr_final = &buffer->c[cpu].offset;
766 }
767 });
768
769 return result;
770 }
771
772 void *test_percpu_memcpy_buffer_thread(void *arg)
773 {
774 int i;
775 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
776
777 if (rseq_register_current_thread())
778 abort();
779
780 for (i = 0; i < opt_reps; i++) {
781 struct percpu_memcpy_buffer_node item;
782 bool result;
783
784 result = percpu_memcpy_buffer_pop(buffer, &item);
785 if (opt_yield)
786 sched_yield(); /* encourage shuffling */
787 if (result) {
788 if (!percpu_memcpy_buffer_push(buffer, item)) {
789 /* Should increase buffer size. */
790 abort();
791 }
792 }
793 }
794
795 if (rseq_unregister_current_thread())
796 abort();
797
798 return NULL;
799 }
800
801 /* Simultaneous modification to a per-cpu buffer from many threads. */
802 void test_percpu_memcpy_buffer(void)
803 {
804 const int num_threads = opt_threads;
805 int i, j, ret;
806 uint64_t sum = 0, expected_sum = 0;
807 struct percpu_memcpy_buffer buffer;
808 pthread_t test_threads[num_threads];
809 cpu_set_t allowed_cpus;
810
811 memset(&buffer, 0, sizeof(buffer));
812
813 /* Generate list entries for every usable cpu. */
814 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
815 for (i = 0; i < CPU_SETSIZE; i++) {
816 if (!CPU_ISSET(i, &allowed_cpus))
817 continue;
818 /* Worse-case is every item in same CPU. */
819 buffer.c[i].array =
820 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE
821 * MEMCPY_BUFFER_ITEM_PER_CPU);
822 assert(buffer.c[i].array);
823 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
824 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
825 expected_sum += 2 * j + 1;
826
827 /*
828 * We could theoretically put the word-sized
829 * "data" directly in the buffer. However, we
830 * want to model objects that would not fit
831 * within a single word, so allocate an object
832 * for each node.
833 */
834 buffer.c[i].array[j - 1].data1 = j;
835 buffer.c[i].array[j - 1].data2 = j + 1;
836 buffer.c[i].offset++;
837 }
838 }
839
840 for (i = 0; i < num_threads; i++) {
841 ret = pthread_create(&test_threads[i], NULL,
842 test_percpu_memcpy_buffer_thread, &buffer);
843 if (ret) {
844 errno = ret;
845 perror("pthread_create");
846 abort();
847 }
848 }
849
850 for (i = 0; i < num_threads; i++) {
851 pthread_join(test_threads[i], NULL);
852 if (ret) {
853 errno = ret;
854 perror("pthread_join");
855 abort();
856 }
857 }
858
859 for (i = 0; i < CPU_SETSIZE; i++) {
860 cpu_set_t pin_mask;
861 struct percpu_memcpy_buffer_node item;
862
863 if (!CPU_ISSET(i, &allowed_cpus))
864 continue;
865
866 CPU_ZERO(&pin_mask);
867 CPU_SET(i, &pin_mask);
868 sched_setaffinity(0, sizeof(pin_mask), &pin_mask);
869
870 while (percpu_memcpy_buffer_pop(&buffer, &item)) {
871 sum += item.data1;
872 sum += item.data2;
873 }
874 free(buffer.c[i].array);
875 }
876
877 /*
878 * All entries should now be accounted for (unless some external
879 * actor is interfering with our allowed affinity while this
880 * test is running).
881 */
882 assert(sum == expected_sum);
883 }
884
885 static void test_signal_interrupt_handler(int signo)
886 {
887 signals_delivered++;
888 }
889
890 static int set_signal_handler(void)
891 {
892 int ret = 0;
893 struct sigaction sa;
894 sigset_t sigset;
895
896 ret = sigemptyset(&sigset);
897 if (ret < 0) {
898 perror("sigemptyset");
899 return ret;
900 }
901
902 sa.sa_handler = test_signal_interrupt_handler;
903 sa.sa_mask = sigset;
904 sa.sa_flags = 0;
905 ret = sigaction(SIGUSR1, &sa, NULL);
906 if (ret < 0) {
907 perror("sigaction");
908 return ret;
909 }
910
911 printf_nobench("Signal handler set for SIGUSR1\n");
912
913 return ret;
914 }
915
916 static void show_usage(int argc, char **argv)
917 {
918 printf("Usage : %s <OPTIONS>\n",
919 argv[0]);
920 printf("OPTIONS:\n");
921 printf(" [-1 loops] Number of loops for delay injection 1\n");
922 printf(" [-2 loops] Number of loops for delay injection 2\n");
923 printf(" [-3 loops] Number of loops for delay injection 3\n");
924 printf(" [-4 loops] Number of loops for delay injection 4\n");
925 printf(" [-5 loops] Number of loops for delay injection 5\n");
926 printf(" [-6 loops] Number of loops for delay injection 6 (-1 to enable -m)\n");
927 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
928 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
929 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
930 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
931 printf(" [-y] Yield\n");
932 printf(" [-k] Kill thread with signal\n");
933 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
934 printf(" [-f N] Use fallback every N failure (>= 1)\n");
935 printf(" [-t N] Number of threads (default 200)\n");
936 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
937 printf(" [-d] Disable rseq system call (no initialization)\n");
938 printf(" [-D M] Disable rseq for each M threads\n");
939 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
940 printf(" [-h] Show this help.\n");
941 printf("\n");
942 }
943
944 int main(int argc, char **argv)
945 {
946 int i;
947
948 if (rseq_init_lock(&rseq_lock)) {
949 perror("rseq_init_lock");
950 return -1;
951 }
952 if (set_signal_handler())
953 goto error;
954 for (i = 1; i < argc; i++) {
955 if (argv[i][0] != '-')
956 continue;
957 switch (argv[i][1]) {
958 case '1':
959 case '2':
960 case '3':
961 case '4':
962 case '5':
963 case '6':
964 case '7':
965 case '8':
966 case '9':
967 if (argc < i + 2) {
968 show_usage(argc, argv);
969 goto error;
970 }
971 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
972 i++;
973 break;
974 case 'm':
975 if (argc < i + 2) {
976 show_usage(argc, argv);
977 goto error;
978 }
979 opt_modulo = atol(argv[i + 1]);
980 if (opt_modulo < 0) {
981 show_usage(argc, argv);
982 goto error;
983 }
984 i++;
985 break;
986 case 's':
987 if (argc < i + 2) {
988 show_usage(argc, argv);
989 goto error;
990 }
991 opt_sleep = atol(argv[i + 1]);
992 if (opt_sleep < 0) {
993 show_usage(argc, argv);
994 goto error;
995 }
996 i++;
997 break;
998 case 'y':
999 opt_yield = 1;
1000 break;
1001 case 'k':
1002 opt_signal = 1;
1003 break;
1004 case 'd':
1005 opt_disable_rseq = 1;
1006 break;
1007 case 'D':
1008 if (argc < i + 2) {
1009 show_usage(argc, argv);
1010 goto error;
1011 }
1012 opt_disable_mod = atol(argv[i + 1]);
1013 if (opt_disable_mod < 0) {
1014 show_usage(argc, argv);
1015 goto error;
1016 }
1017 i++;
1018 break;
1019 case 'f':
1020 if (argc < i + 2) {
1021 show_usage(argc, argv);
1022 goto error;
1023 }
1024 opt_fallback_cnt = atol(argv[i + 1]);
1025 if (opt_fallback_cnt < 1) {
1026 show_usage(argc, argv);
1027 goto error;
1028 }
1029 i++;
1030 break;
1031 case 't':
1032 if (argc < i + 2) {
1033 show_usage(argc, argv);
1034 goto error;
1035 }
1036 opt_threads = atol(argv[i + 1]);
1037 if (opt_threads < 0) {
1038 show_usage(argc, argv);
1039 goto error;
1040 }
1041 i++;
1042 break;
1043 case 'r':
1044 if (argc < i + 2) {
1045 show_usage(argc, argv);
1046 goto error;
1047 }
1048 opt_reps = atol(argv[i + 1]);
1049 if (opt_reps < 0) {
1050 show_usage(argc, argv);
1051 goto error;
1052 }
1053 i++;
1054 break;
1055 case 'h':
1056 show_usage(argc, argv);
1057 goto end;
1058 case 'T':
1059 if (argc < i + 2) {
1060 show_usage(argc, argv);
1061 goto error;
1062 }
1063 opt_test = *argv[i + 1];
1064 switch (opt_test) {
1065 case 's':
1066 case 'l':
1067 case 'i':
1068 case 'b':
1069 case 'm':
1070 break;
1071 default:
1072 show_usage(argc, argv);
1073 goto error;
1074 }
1075 i++;
1076 break;
1077 default:
1078 show_usage(argc, argv);
1079 goto error;
1080 }
1081 }
1082
1083 if (!opt_disable_rseq && rseq_register_current_thread())
1084 goto error;
1085 switch (opt_test) {
1086 case 's':
1087 printf_nobench("spinlock\n");
1088 test_percpu_spinlock();
1089 break;
1090 case 'l':
1091 printf_nobench("linked list\n");
1092 test_percpu_list();
1093 break;
1094 case 'b':
1095 printf_nobench("buffer\n");
1096 test_percpu_buffer();
1097 break;
1098 case 'm':
1099 printf_nobench("memcpy buffer\n");
1100 test_percpu_memcpy_buffer();
1101 break;
1102 case 'i':
1103 printf_nobench("counter increment\n");
1104 test_percpu_inc();
1105 break;
1106 }
1107 if (rseq_unregister_current_thread())
1108 abort();
1109 end:
1110 return 0;
1111
1112 error:
1113 if (rseq_destroy_lock(&rseq_lock))
1114 perror("rseq_destroy_lock");
1115 return -1;
1116 }
This page took 0.054508 seconds and 5 git commands to generate.