12 static struct rseq_lock rseq_lock
;
14 struct percpu_lock_entry
{
16 } __attribute__((aligned(128)));
19 struct percpu_lock_entry c
[CPU_SETSIZE
];
22 struct test_data_entry
{
24 } __attribute__((aligned(128)));
26 struct spinlock_test_data
{
27 struct percpu_lock lock
;
28 struct test_data_entry c
[CPU_SETSIZE
];
32 struct percpu_list_node
{
34 struct percpu_list_node
*next
;
37 struct percpu_list_entry
{
38 struct percpu_list_node
*head
;
39 } __attribute__((aligned(128)));
42 struct percpu_list_entry c
[CPU_SETSIZE
];
45 /* A simple percpu spinlock. Returns the cpu lock was acquired on. */
46 int rseq_percpu_lock(struct percpu_lock
*lock
)
48 struct rseq_state rseq_state
;
49 intptr_t *targetptr
, newval
;
54 do_rseq(&rseq_lock
, rseq_state
, cpu
, result
, targetptr
, newval
,
56 if (unlikely(lock
->c
[cpu
].v
)) {
60 targetptr
= (intptr_t *)&lock
->c
[cpu
].v
;
67 * Acquire semantic when taking lock after control dependency.
68 * Matches smp_store_release().
70 smp_acquire__after_ctrl_dep();
74 void rseq_percpu_unlock(struct percpu_lock
*lock
, int cpu
)
76 assert(lock
->c
[cpu
].v
== 1);
78 * Release lock, with release semantic. Matches
79 * smp_acquire__after_ctrl_dep().
81 smp_store_release(&lock
->c
[cpu
].v
, 0);
84 void *test_percpu_spinlock_thread(void *arg
)
86 struct spinlock_test_data
*data
= arg
;
89 if (rseq_register_current_thread())
91 for (i
= 0; i
< data
->reps
; i
++) {
92 cpu
= rseq_percpu_lock(&data
->lock
);
94 rseq_percpu_unlock(&data
->lock
, cpu
);
96 if (rseq_unregister_current_thread())
103 * A simple test which implements a sharded counter using a per-cpu
104 * lock. Obviously real applications might prefer to simply use a
105 * per-cpu increment; however, this is reasonable for a test and the
106 * lock can be extended to synchronize more complicated operations.
108 void test_percpu_spinlock(void)
110 const int num_threads
= 200;
113 pthread_t test_threads
[num_threads
];
114 struct spinlock_test_data data
;
116 memset(&data
, 0, sizeof(data
));
119 for (i
= 0; i
< num_threads
; i
++)
120 pthread_create(&test_threads
[i
], NULL
,
121 test_percpu_spinlock_thread
, &data
);
123 for (i
= 0; i
< num_threads
; i
++)
124 pthread_join(test_threads
[i
], NULL
);
127 for (i
= 0; i
< CPU_SETSIZE
; i
++)
128 sum
+= data
.c
[i
].count
;
130 assert(sum
== (uint64_t)data
.reps
* num_threads
);
133 int percpu_list_push(struct percpu_list
*list
, struct percpu_list_node
*node
)
135 struct rseq_state rseq_state
;
136 intptr_t *targetptr
, newval
;
140 do_rseq(&rseq_lock
, rseq_state
, cpu
, result
, targetptr
, newval
,
142 newval
= (intptr_t)node
;
143 targetptr
= (intptr_t *)&list
->c
[cpu
].head
;
144 node
->next
= list
->c
[cpu
].head
;
151 * Unlike a traditional lock-less linked list; the availability of a
152 * rseq primitive allows us to implement pop without concerns over
155 struct percpu_list_node
*percpu_list_pop(struct percpu_list
*list
)
157 struct percpu_list_node
*head
, *next
;
158 struct rseq_state rseq_state
;
159 intptr_t *targetptr
, newval
;
163 do_rseq(&rseq_lock
, rseq_state
, cpu
, result
, targetptr
, newval
,
165 head
= list
->c
[cpu
].head
;
170 newval
= (intptr_t) next
;
171 targetptr
= (intptr_t *)&list
->c
[cpu
].head
;
178 void *test_percpu_list_thread(void *arg
)
181 struct percpu_list
*list
= (struct percpu_list
*)arg
;
183 if (rseq_register_current_thread())
186 for (i
= 0; i
< 100000; i
++) {
187 struct percpu_list_node
*node
= percpu_list_pop(list
);
189 sched_yield(); /* encourage shuffling */
191 percpu_list_push(list
, node
);
194 if (rseq_unregister_current_thread())
200 /* Simultaneous modification to a per-cpu linked list from many threads. */
201 void test_percpu_list(void)
204 uint64_t sum
= 0, expected_sum
= 0;
205 struct percpu_list list
;
206 pthread_t test_threads
[200];
207 cpu_set_t allowed_cpus
;
209 memset(&list
, 0, sizeof(list
));
211 /* Generate list entries for every usable cpu. */
212 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
213 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
214 if (!CPU_ISSET(i
, &allowed_cpus
))
216 for (j
= 1; j
<= 100; j
++) {
217 struct percpu_list_node
*node
;
221 node
= malloc(sizeof(*node
));
224 node
->next
= list
.c
[i
].head
;
225 list
.c
[i
].head
= node
;
229 for (i
= 0; i
< 200; i
++)
230 assert(pthread_create(&test_threads
[i
], NULL
,
231 test_percpu_list_thread
, &list
) == 0);
233 for (i
= 0; i
< 200; i
++)
234 pthread_join(test_threads
[i
], NULL
);
236 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
238 struct percpu_list_node
*node
;
240 if (!CPU_ISSET(i
, &allowed_cpus
))
244 CPU_SET(i
, &pin_mask
);
245 sched_setaffinity(0, sizeof(pin_mask
), &pin_mask
);
247 while ((node
= percpu_list_pop(&list
))) {
254 * All entries should now be accounted for (unless some external
255 * actor is interfering with our allowed affinity while this
258 assert(sum
== expected_sum
);
261 int main(int argc
, char **argv
)
263 if (rseq_init_lock(&rseq_lock
)) {
264 perror("rseq_init_lock");
267 if (rseq_register_current_thread())
269 printf("spinlock\n");
270 test_percpu_spinlock();
271 printf("percpu_list\n");
273 if (rseq_unregister_current_thread())
275 if (rseq_destroy_lock(&rseq_lock
)) {
276 perror("rseq_destroy_lock");
282 if (rseq_destroy_lock(&rseq_lock
))
283 perror("rseq_destroy_lock");