Restartable sequences: self-tests
[deliverable/linux.git] / tools / testing / selftests / rseq / basic_percpu_ops_test.c
1 #define _GNU_SOURCE
2 #include <assert.h>
3 #include <pthread.h>
4 #include <sched.h>
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9
10 #include <rseq.h>
11
12 static struct rseq_lock rseq_lock;
13
14 struct percpu_lock_entry {
15 intptr_t v;
16 } __attribute__((aligned(128)));
17
18 struct percpu_lock {
19 struct percpu_lock_entry c[CPU_SETSIZE];
20 };
21
22 struct test_data_entry {
23 intptr_t count;
24 } __attribute__((aligned(128)));
25
26 struct spinlock_test_data {
27 struct percpu_lock lock;
28 struct test_data_entry c[CPU_SETSIZE];
29 int reps;
30 };
31
32 struct percpu_list_node {
33 intptr_t data;
34 struct percpu_list_node *next;
35 };
36
37 struct percpu_list_entry {
38 struct percpu_list_node *head;
39 } __attribute__((aligned(128)));
40
41 struct percpu_list {
42 struct percpu_list_entry c[CPU_SETSIZE];
43 };
44
45 /* A simple percpu spinlock. Returns the cpu lock was acquired on. */
46 int rseq_percpu_lock(struct percpu_lock *lock)
47 {
48 struct rseq_state rseq_state;
49 intptr_t *targetptr, newval;
50 int cpu;
51 bool result;
52
53 for (;;) {
54 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
55 {
56 if (unlikely(lock->c[cpu].v)) {
57 result = false;
58 } else {
59 newval = 1;
60 targetptr = (intptr_t *)&lock->c[cpu].v;
61 }
62 });
63 if (likely(result))
64 break;
65 }
66 /*
67 * Acquire semantic when taking lock after control dependency.
68 * Matches smp_store_release().
69 */
70 smp_acquire__after_ctrl_dep();
71 return cpu;
72 }
73
74 void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
75 {
76 assert(lock->c[cpu].v == 1);
77 /*
78 * Release lock, with release semantic. Matches
79 * smp_acquire__after_ctrl_dep().
80 */
81 smp_store_release(&lock->c[cpu].v, 0);
82 }
83
84 void *test_percpu_spinlock_thread(void *arg)
85 {
86 struct spinlock_test_data *data = arg;
87 int i, cpu;
88
89 if (rseq_register_current_thread())
90 abort();
91 for (i = 0; i < data->reps; i++) {
92 cpu = rseq_percpu_lock(&data->lock);
93 data->c[cpu].count++;
94 rseq_percpu_unlock(&data->lock, cpu);
95 }
96 if (rseq_unregister_current_thread())
97 abort();
98
99 return NULL;
100 }
101
102 /*
103 * A simple test which implements a sharded counter using a per-cpu
104 * lock. Obviously real applications might prefer to simply use a
105 * per-cpu increment; however, this is reasonable for a test and the
106 * lock can be extended to synchronize more complicated operations.
107 */
108 void test_percpu_spinlock(void)
109 {
110 const int num_threads = 200;
111 int i;
112 uint64_t sum;
113 pthread_t test_threads[num_threads];
114 struct spinlock_test_data data;
115
116 memset(&data, 0, sizeof(data));
117 data.reps = 5000;
118
119 for (i = 0; i < num_threads; i++)
120 pthread_create(&test_threads[i], NULL,
121 test_percpu_spinlock_thread, &data);
122
123 for (i = 0; i < num_threads; i++)
124 pthread_join(test_threads[i], NULL);
125
126 sum = 0;
127 for (i = 0; i < CPU_SETSIZE; i++)
128 sum += data.c[i].count;
129
130 assert(sum == (uint64_t)data.reps * num_threads);
131 }
132
133 int percpu_list_push(struct percpu_list *list, struct percpu_list_node *node)
134 {
135 struct rseq_state rseq_state;
136 intptr_t *targetptr, newval;
137 int cpu;
138 bool result;
139
140 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
141 {
142 newval = (intptr_t)node;
143 targetptr = (intptr_t *)&list->c[cpu].head;
144 node->next = list->c[cpu].head;
145 });
146
147 return cpu;
148 }
149
150 /*
151 * Unlike a traditional lock-less linked list; the availability of a
152 * rseq primitive allows us to implement pop without concerns over
153 * ABA-type races.
154 */
155 struct percpu_list_node *percpu_list_pop(struct percpu_list *list)
156 {
157 struct percpu_list_node *head, *next;
158 struct rseq_state rseq_state;
159 intptr_t *targetptr, newval;
160 int cpu;
161 bool result;
162
163 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
164 {
165 head = list->c[cpu].head;
166 if (!head) {
167 result = false;
168 } else {
169 next = head->next;
170 newval = (intptr_t) next;
171 targetptr = (intptr_t *)&list->c[cpu].head;
172 }
173 });
174
175 return head;
176 }
177
178 void *test_percpu_list_thread(void *arg)
179 {
180 int i;
181 struct percpu_list *list = (struct percpu_list *)arg;
182
183 if (rseq_register_current_thread())
184 abort();
185
186 for (i = 0; i < 100000; i++) {
187 struct percpu_list_node *node = percpu_list_pop(list);
188
189 sched_yield(); /* encourage shuffling */
190 if (node)
191 percpu_list_push(list, node);
192 }
193
194 if (rseq_unregister_current_thread())
195 abort();
196
197 return NULL;
198 }
199
200 /* Simultaneous modification to a per-cpu linked list from many threads. */
201 void test_percpu_list(void)
202 {
203 int i, j;
204 uint64_t sum = 0, expected_sum = 0;
205 struct percpu_list list;
206 pthread_t test_threads[200];
207 cpu_set_t allowed_cpus;
208
209 memset(&list, 0, sizeof(list));
210
211 /* Generate list entries for every usable cpu. */
212 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
213 for (i = 0; i < CPU_SETSIZE; i++) {
214 if (!CPU_ISSET(i, &allowed_cpus))
215 continue;
216 for (j = 1; j <= 100; j++) {
217 struct percpu_list_node *node;
218
219 expected_sum += j;
220
221 node = malloc(sizeof(*node));
222 assert(node);
223 node->data = j;
224 node->next = list.c[i].head;
225 list.c[i].head = node;
226 }
227 }
228
229 for (i = 0; i < 200; i++)
230 assert(pthread_create(&test_threads[i], NULL,
231 test_percpu_list_thread, &list) == 0);
232
233 for (i = 0; i < 200; i++)
234 pthread_join(test_threads[i], NULL);
235
236 for (i = 0; i < CPU_SETSIZE; i++) {
237 cpu_set_t pin_mask;
238 struct percpu_list_node *node;
239
240 if (!CPU_ISSET(i, &allowed_cpus))
241 continue;
242
243 CPU_ZERO(&pin_mask);
244 CPU_SET(i, &pin_mask);
245 sched_setaffinity(0, sizeof(pin_mask), &pin_mask);
246
247 while ((node = percpu_list_pop(&list))) {
248 sum += node->data;
249 free(node);
250 }
251 }
252
253 /*
254 * All entries should now be accounted for (unless some external
255 * actor is interfering with our allowed affinity while this
256 * test is running).
257 */
258 assert(sum == expected_sum);
259 }
260
261 int main(int argc, char **argv)
262 {
263 if (rseq_init_lock(&rseq_lock)) {
264 perror("rseq_init_lock");
265 return -1;
266 }
267 if (rseq_register_current_thread())
268 goto error;
269 printf("spinlock\n");
270 test_percpu_spinlock();
271 printf("percpu_list\n");
272 test_percpu_list();
273 if (rseq_unregister_current_thread())
274 goto error;
275 if (rseq_destroy_lock(&rseq_lock)) {
276 perror("rseq_destroy_lock");
277 return -1;
278 }
279 return 0;
280
281 error:
282 if (rseq_destroy_lock(&rseq_lock))
283 perror("rseq_destroy_lock");
284 return -1;
285 }
286
This page took 0.071471 seconds and 5 git commands to generate.