Commit | Line | Data |
---|---|---|
be4663d4 MD |
1 | #define _GNU_SOURCE |
2 | #include <assert.h> | |
3 | #include <pthread.h> | |
4 | #include <sched.h> | |
5 | #include <stdint.h> | |
6 | #include <stdio.h> | |
7 | #include <stdlib.h> | |
8 | #include <string.h> | |
9 | ||
10 | #include "rseq.h" | |
11 | ||
12 | static struct rseq_lock rseq_lock; | |
13 | ||
14 | struct percpu_lock_entry { | |
15 | intptr_t v; | |
16 | } __attribute__((aligned(128))); | |
17 | ||
18 | struct percpu_lock { | |
19 | struct percpu_lock_entry c[CPU_SETSIZE]; | |
20 | }; | |
21 | ||
22 | struct test_data_entry { | |
23 | int count; | |
24 | } __attribute__((aligned(128))); | |
25 | ||
26 | struct spinlock_test_data { | |
27 | struct percpu_lock lock; | |
28 | struct test_data_entry c[CPU_SETSIZE]; | |
29 | int reps; | |
30 | }; | |
31 | ||
32 | struct percpu_list_node { | |
33 | intptr_t data; | |
34 | struct percpu_list_node *next; | |
35 | }; | |
36 | ||
37 | struct percpu_list_entry { | |
38 | struct percpu_list_node *head; | |
39 | } __attribute__((aligned(128))); | |
40 | ||
41 | struct percpu_list { | |
42 | struct percpu_list_entry c[CPU_SETSIZE]; | |
43 | }; | |
44 | ||
45 | /* A simple percpu spinlock. Returns the cpu lock was acquired on. */ | |
46 | int rseq_percpu_lock(struct percpu_lock *lock) | |
47 | { | |
48 | struct rseq_state rseq_state; | |
49 | intptr_t *targetptr, newval; | |
50 | int cpu; | |
51 | bool result; | |
52 | ||
53 | for (;;) { | |
54 | do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval, | |
55 | { | |
56 | if (unlikely(lock->c[cpu].v)) { | |
57 | result = false; | |
58 | } else { | |
59 | newval = 1; | |
60 | targetptr = (intptr_t *)&lock->c[cpu].v; | |
61 | } | |
62 | }); | |
63 | if (likely(result)) | |
64 | break; | |
65 | } | |
66 | /* | |
67 | * Acquire semantic when taking lock after control dependency. | |
68 | * Matches smp_store_release(). | |
69 | */ | |
70 | smp_acquire__after_ctrl_dep(); | |
71 | return cpu; | |
72 | } | |
73 | ||
74 | void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) | |
75 | { | |
76 | assert(lock->c[cpu].v == 1); | |
77 | /* | |
78 | * Release lock, with release semantic. Matches | |
79 | * smp_acquire__after_ctrl_dep(). | |
80 | */ | |
81 | smp_store_release(&lock->c[cpu].v, 0); | |
82 | } | |
83 | ||
84 | void *test_percpu_spinlock_thread(void *arg) | |
85 | { | |
86 | struct spinlock_test_data *data = arg; | |
87 | int i, cpu; | |
88 | ||
89 | if (rseq_init_current_thread()) | |
90 | abort(); | |
91 | for (i = 0; i < data->reps; i++) { | |
92 | cpu = rseq_percpu_lock(&data->lock); | |
93 | data->c[cpu].count++; | |
94 | rseq_percpu_unlock(&data->lock, cpu); | |
95 | } | |
96 | ||
97 | return NULL; | |
98 | } | |
99 | ||
100 | /* | |
101 | * A simple test which implements a sharded counter using a per-cpu | |
102 | * lock. Obviously real applications might prefer to simply use a | |
103 | * per-cpu increment; however, this is reasonable for a test and the | |
104 | * lock can be extended to synchronize more complicated operations. | |
105 | */ | |
106 | void test_percpu_spinlock(void) | |
107 | { | |
108 | const int num_threads = 200; | |
109 | int i, sum; | |
110 | pthread_t test_threads[num_threads]; | |
111 | struct spinlock_test_data data; | |
112 | ||
113 | memset(&data, 0, sizeof(data)); | |
114 | data.reps = 5000; | |
115 | ||
116 | for (i = 0; i < num_threads; i++) | |
117 | pthread_create(&test_threads[i], NULL, | |
118 | test_percpu_spinlock_thread, &data); | |
119 | ||
120 | for (i = 0; i < num_threads; i++) | |
121 | pthread_join(test_threads[i], NULL); | |
122 | ||
123 | sum = 0; | |
124 | for (i = 0; i < CPU_SETSIZE; i++) | |
125 | sum += data.c[i].count; | |
126 | ||
127 | assert(sum == data.reps * num_threads); | |
128 | } | |
129 | ||
130 | int percpu_list_push(struct percpu_list *list, struct percpu_list_node *node) | |
131 | { | |
132 | struct rseq_state rseq_state; | |
133 | intptr_t *targetptr, newval; | |
134 | int cpu; | |
135 | bool result; | |
136 | ||
137 | do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval, | |
138 | { | |
139 | newval = (intptr_t)node; | |
140 | targetptr = (intptr_t *)&list->c[cpu].head; | |
141 | node->next = list->c[cpu].head; | |
142 | }); | |
143 | ||
144 | return cpu; | |
145 | } | |
146 | ||
147 | /* | |
148 | * Unlike a traditional lock-less linked list; the availability of a | |
149 | * rseq primitive allows us to implement pop without concerns over | |
150 | * ABA-type races. | |
151 | */ | |
152 | struct percpu_list_node *percpu_list_pop(struct percpu_list *list) | |
153 | { | |
154 | struct percpu_list_node *head, *next; | |
155 | struct rseq_state rseq_state; | |
156 | intptr_t *targetptr, newval; | |
157 | int cpu; | |
158 | bool result; | |
159 | ||
160 | do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval, | |
161 | { | |
162 | head = list->c[cpu].head; | |
163 | if (!head) { | |
164 | result = false; | |
165 | } else { | |
166 | next = head->next; | |
167 | newval = (intptr_t) next; | |
168 | targetptr = (intptr_t *)&list->c[cpu].head; | |
169 | } | |
170 | }); | |
171 | ||
172 | return head; | |
173 | } | |
174 | ||
175 | void *test_percpu_list_thread(void *arg) | |
176 | { | |
177 | int i; | |
178 | struct percpu_list *list = (struct percpu_list *)arg; | |
179 | ||
180 | if (rseq_init_current_thread()) | |
181 | abort(); | |
182 | ||
183 | for (i = 0; i < 100000; i++) { | |
184 | struct percpu_list_node *node = percpu_list_pop(list); | |
185 | ||
186 | sched_yield(); /* encourage shuffling */ | |
187 | if (node) | |
188 | percpu_list_push(list, node); | |
189 | } | |
190 | ||
191 | return NULL; | |
192 | } | |
193 | ||
194 | /* Simultaneous modification to a per-cpu linked list from many threads. */ | |
195 | void test_percpu_list(void) | |
196 | { | |
197 | int i, j; | |
198 | long sum = 0, expected_sum = 0; | |
199 | struct percpu_list list; | |
200 | pthread_t test_threads[200]; | |
201 | cpu_set_t allowed_cpus; | |
202 | ||
203 | memset(&list, 0, sizeof(list)); | |
204 | ||
205 | /* Generate list entries for every usable cpu. */ | |
206 | sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); | |
207 | for (i = 0; i < CPU_SETSIZE; i++) { | |
208 | if (!CPU_ISSET(i, &allowed_cpus)) | |
209 | continue; | |
210 | for (j = 1; j <= 100; j++) { | |
211 | struct percpu_list_node *node; | |
212 | ||
213 | expected_sum += j; | |
214 | ||
215 | node = malloc(sizeof(*node)); | |
216 | assert(node); | |
217 | node->data = j; | |
218 | node->next = list.c[i].head; | |
219 | list.c[i].head = node; | |
220 | } | |
221 | } | |
222 | ||
223 | for (i = 0; i < 200; i++) | |
224 | assert(pthread_create(&test_threads[i], NULL, | |
225 | test_percpu_list_thread, &list) == 0); | |
226 | ||
227 | for (i = 0; i < 200; i++) | |
228 | pthread_join(test_threads[i], NULL); | |
229 | ||
230 | for (i = 0; i < CPU_SETSIZE; i++) { | |
231 | cpu_set_t pin_mask; | |
232 | struct percpu_list_node *node; | |
233 | ||
234 | if (!CPU_ISSET(i, &allowed_cpus)) | |
235 | continue; | |
236 | ||
237 | CPU_ZERO(&pin_mask); | |
238 | CPU_SET(i, &pin_mask); | |
239 | sched_setaffinity(0, sizeof(pin_mask), &pin_mask); | |
240 | ||
241 | while ((node = percpu_list_pop(&list))) { | |
242 | sum += node->data; | |
243 | free(node); | |
244 | } | |
245 | } | |
246 | ||
247 | /* | |
248 | * All entries should now be accounted for (unless some external | |
249 | * actor is interfering with our allowed affinity while this | |
250 | * test is running). | |
251 | */ | |
252 | assert(sum == expected_sum); | |
253 | } | |
254 | ||
255 | int main(int argc, char **argv) | |
256 | { | |
257 | if (rseq_init_lock(&rseq_lock)) { | |
258 | perror("rseq_init_lock"); | |
259 | return -1; | |
260 | } | |
261 | if (rseq_init_current_thread()) | |
262 | goto error; | |
263 | printf("spinlock\n"); | |
264 | test_percpu_spinlock(); | |
265 | printf("percpu_list\n"); | |
266 | test_percpu_list(); | |
267 | ||
268 | if (rseq_destroy_lock(&rseq_lock)) { | |
269 | perror("rseq_destroy_lock"); | |
270 | return -1; | |
271 | } | |
272 | return 0; | |
273 | ||
274 | error: | |
275 | if (rseq_destroy_lock(&rseq_lock)) | |
276 | perror("rseq_destroy_lock"); | |
277 | return -1; | |
278 | } | |
279 |