[deliverable/linux.git] / tools / testing / selftests / rseq / basic_percpu_ops_test.c

#define _GNU_SOURCE
#include <assert.h>
#include <pthread.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "rseq.h"

static struct rseq_lock rseq_lock;

struct percpu_lock_entry {
	intptr_t v;
} __attribute__((aligned(128)));

struct percpu_lock {
	struct percpu_lock_entry c[CPU_SETSIZE];
};

struct test_data_entry {
	int count;
} __attribute__((aligned(128)));

struct spinlock_test_data {
	struct percpu_lock lock;
	struct test_data_entry c[CPU_SETSIZE];
	int reps;
};

struct percpu_list_node {
	intptr_t data;
	struct percpu_list_node *next;
};

struct percpu_list_entry {
	struct percpu_list_node *head;
} __attribute__((aligned(128)));

struct percpu_list {
	struct percpu_list_entry c[CPU_SETSIZE];
};

/* A simple percpu spinlock.  Returns the cpu lock was acquired on. */
int rseq_percpu_lock(struct percpu_lock *lock)
{
	struct rseq_state rseq_state;
	intptr_t *targetptr, newval;
	int cpu;
	bool result;

	for (;;) {
		do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
			{
				if (unlikely(lock->c[cpu].v)) {
					result = false;
				} else {
					newval = 1;
					targetptr = (intptr_t *)&lock->c[cpu].v;
				}
			});
		if (likely(result))
			break;
	}
	/*
	 * Acquire semantic when taking lock after control dependency.
	 * Matches smp_store_release().
	 */
	smp_acquire__after_ctrl_dep();
	return cpu;
}

void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
{
	assert(lock->c[cpu].v == 1);
	/*
	 * Release lock, with release semantic. Matches
	 * smp_acquire__after_ctrl_dep().
	 */
	smp_store_release(&lock->c[cpu].v, 0);
}

void *test_percpu_spinlock_thread(void *arg)
{
	struct spinlock_test_data *data = arg;
	int i, cpu;

	if (rseq_init_current_thread())
		abort();
	for (i = 0; i < data->reps; i++) {
		cpu = rseq_percpu_lock(&data->lock);
		data->c[cpu].count++;
		rseq_percpu_unlock(&data->lock, cpu);
	}

	return NULL;
}

/*
 * A simple test which implements a sharded counter using a per-cpu
 * lock.  Obviously real applications might prefer to simply use a
 * per-cpu increment; however, this is reasonable for a test and the
 * lock can be extended to synchronize more complicated operations.
 */
void test_percpu_spinlock(void)
{
	const int num_threads = 200;
	int i, sum;
	pthread_t test_threads[num_threads];
	struct spinlock_test_data data;

	memset(&data, 0, sizeof(data));
	data.reps = 5000;

	for (i = 0; i < num_threads; i++)
		pthread_create(&test_threads[i], NULL,
			test_percpu_spinlock_thread, &data);

	for (i = 0; i < num_threads; i++)
		pthread_join(test_threads[i], NULL);

	sum = 0;
	for (i = 0; i < CPU_SETSIZE; i++)
		sum += data.c[i].count;

	assert(sum == data.reps * num_threads);
}

int percpu_list_push(struct percpu_list *list, struct percpu_list_node *node)
{
	struct rseq_state rseq_state;
	intptr_t *targetptr, newval;
	int cpu;
	bool result;

	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
		{
			newval = (intptr_t)node;
			targetptr = (intptr_t *)&list->c[cpu].head;
			node->next = list->c[cpu].head;
		});

	return cpu;
}

/*
 * Unlike a traditional lock-less linked list; the availability of a
 * rseq primitive allows us to implement pop without concerns over
 * ABA-type races.
 */
struct percpu_list_node *percpu_list_pop(struct percpu_list *list)
{
	struct percpu_list_node *head, *next;
	struct rseq_state rseq_state;
	intptr_t *targetptr, newval;
	int cpu;
	bool result;

	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
		{
			head = list->c[cpu].head;
			if (!head) {
				result = false;
			} else {
				next = head->next;
				newval = (intptr_t) next;
				targetptr = (intptr_t *)&list->c[cpu].head;
			}
		});

	return head;
}

void *test_percpu_list_thread(void *arg)
{
	int i;
	struct percpu_list *list = (struct percpu_list *)arg;

	if (rseq_init_current_thread())
		abort();

	for (i = 0; i < 100000; i++) {
		struct percpu_list_node *node = percpu_list_pop(list);

		sched_yield();  /* encourage shuffling */
		if (node)
			percpu_list_push(list, node);
	}

	return NULL;
}

/* Simultaneous modification to a per-cpu linked list from many threads.  */
void test_percpu_list(void)
{
	int i, j;
	long sum = 0, expected_sum = 0;
	struct percpu_list list;
	pthread_t test_threads[200];
	cpu_set_t allowed_cpus;

	memset(&list, 0, sizeof(list));

	/* Generate list entries for every usable cpu. */
	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
	for (i = 0; i < CPU_SETSIZE; i++) {
		if (!CPU_ISSET(i, &allowed_cpus))
			continue;
		for (j = 1; j <= 100; j++) {
			struct percpu_list_node *node;

			expected_sum += j;

			node = malloc(sizeof(*node));
			assert(node);
			node->data = j;
			node->next = list.c[i].head;
			list.c[i].head = node;
		}
	}

	for (i = 0; i < 200; i++)
		assert(pthread_create(&test_threads[i], NULL,
			test_percpu_list_thread, &list) == 0);

	for (i = 0; i < 200; i++)
		pthread_join(test_threads[i], NULL);

	for (i = 0; i < CPU_SETSIZE; i++) {
		cpu_set_t pin_mask;
		struct percpu_list_node *node;

		if (!CPU_ISSET(i, &allowed_cpus))
			continue;

		CPU_ZERO(&pin_mask);
		CPU_SET(i, &pin_mask);
		sched_setaffinity(0, sizeof(pin_mask), &pin_mask);

		while ((node = percpu_list_pop(&list))) {
			sum += node->data;
			free(node);
		}
	}

	/*
	 * All entries should now be accounted for (unless some external
	 * actor is interfering with our allowed affinity while this
	 * test is running).
	 */
	assert(sum == expected_sum);
}

int main(int argc, char **argv)
{
	if (rseq_init_lock(&rseq_lock)) {
		perror("rseq_init_lock");
		return -1;
	}
	if (rseq_init_current_thread())
		goto error;
	printf("spinlock\n");
	test_percpu_spinlock();
	printf("percpu_list\n");
	test_percpu_list();

	if (rseq_destroy_lock(&rseq_lock)) {
		perror("rseq_destroy_lock");
		return -1;
	}
	return 0;

error:
	if (rseq_destroy_lock(&rseq_lock))
		perror("rseq_destroy_lock");
	return -1;
}
Commit	Line	Data
be4663d4 MD	1	#define _GNU_SOURCE
	2	#include <assert.h>
	3	#include <pthread.h>
	4	#include <sched.h>
	5	#include <stdint.h>
	6	#include <stdio.h>
	7	#include <stdlib.h>
	8	#include <string.h>
	9
	10	#include "rseq.h"
	11
	12	static struct rseq_lock rseq_lock;
	13
	14	struct percpu_lock_entry {
	15	intptr_t v;
	16	} __attribute__((aligned(128)));
	17
	18	struct percpu_lock {
	19	struct percpu_lock_entry c[CPU_SETSIZE];
	20	};
	21
	22	struct test_data_entry {
	23	int count;
	24	} __attribute__((aligned(128)));
	25
	26	struct spinlock_test_data {
	27	struct percpu_lock lock;
	28	struct test_data_entry c[CPU_SETSIZE];
	29	int reps;
	30	};
	31
	32	struct percpu_list_node {
	33	intptr_t data;
	34	struct percpu_list_node *next;
	35	};
	36
	37	struct percpu_list_entry {
	38	struct percpu_list_node *head;
	39	} __attribute__((aligned(128)));
	40
	41	struct percpu_list {
	42	struct percpu_list_entry c[CPU_SETSIZE];
	43	};
	44
	45	/* A simple percpu spinlock. Returns the cpu lock was acquired on. */
	46	int rseq_percpu_lock(struct percpu_lock *lock)
	47	{
	48	struct rseq_state rseq_state;
	49	intptr_t *targetptr, newval;
	50	int cpu;
	51	bool result;
	52
	53	for (;;) {
	54	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
	55	{
	56	if (unlikely(lock->c[cpu].v)) {
	57	result = false;
	58	} else {
	59	newval = 1;
	60	targetptr = (intptr_t *)&lock->c[cpu].v;
	61	}
	62	});
	63	if (likely(result))
	64	break;
65	}
66	/*
67	* Acquire semantic when taking lock after control dependency.
68	* Matches smp_store_release().
69	*/
70	smp_acquire__after_ctrl_dep();
71	return cpu;
72	}
73
74	void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
75	{
76	assert(lock->c[cpu].v == 1);
77	/*
78	* Release lock, with release semantic. Matches
79	* smp_acquire__after_ctrl_dep().
80	*/
81	smp_store_release(&lock->c[cpu].v, 0);
82	}
83
84	void test_percpu_spinlock_thread(void arg)
85	{
86	struct spinlock_test_data *data = arg;
87	int i, cpu;
88
89	if (rseq_init_current_thread())
90	abort();
91	for (i = 0; i < data->reps; i++) {
92	cpu = rseq_percpu_lock(&data->lock);
93	data->c[cpu].count++;
94	rseq_percpu_unlock(&data->lock, cpu);
95	}
96
97	return NULL;
98	}
99
100	/*
101	* A simple test which implements a sharded counter using a per-cpu
102	* lock. Obviously real applications might prefer to simply use a
103	* per-cpu increment; however, this is reasonable for a test and the
104	* lock can be extended to synchronize more complicated operations.
105	*/
106	void test_percpu_spinlock(void)
107	{
108	const int num_threads = 200;
109	int i, sum;
110	pthread_t test_threads[num_threads];
111	struct spinlock_test_data data;
112
113	memset(&data, 0, sizeof(data));
114	data.reps = 5000;
115
116	for (i = 0; i < num_threads; i++)
117	pthread_create(&test_threads[i], NULL,
118	test_percpu_spinlock_thread, &data);
119
120	for (i = 0; i < num_threads; i++)
121	pthread_join(test_threads[i], NULL);
122
123	sum = 0;
124	for (i = 0; i < CPU_SETSIZE; i++)
125	sum += data.c[i].count;
126
127	assert(sum == data.reps * num_threads);
128	}
129
130	int percpu_list_push(struct percpu_list list, struct percpu_list_node node)
131	{
132	struct rseq_state rseq_state;
133	intptr_t *targetptr, newval;
134	int cpu;
135	bool result;
136
137	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
138	{
139	newval = (intptr_t)node;
140	targetptr = (intptr_t *)&list->c[cpu].head;
141	node->next = list->c[cpu].head;
142	});
143
144	return cpu;
145	}
146
147	/*
148	* Unlike a traditional lock-less linked list; the availability of a
149	* rseq primitive allows us to implement pop without concerns over
150	* ABA-type races.
151	*/
152	struct percpu_list_node percpu_list_pop(struct percpu_list list)
153	{
154	struct percpu_list_node head, next;
155	struct rseq_state rseq_state;
156	intptr_t *targetptr, newval;
157	int cpu;
158	bool result;
159
160	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
161	{
162	head = list->c[cpu].head;
163	if (!head) {
164	result = false;
165	} else {
166	next = head->next;
167	newval = (intptr_t) next;
168	targetptr = (intptr_t *)&list->c[cpu].head;
169	}
170	});
171
172	return head;
173	}
174
175	void test_percpu_list_thread(void arg)
176	{
177	int i;
178	struct percpu_list list = (struct percpu_list )arg;
179
180	if (rseq_init_current_thread())
181	abort();
182
183	for (i = 0; i < 100000; i++) {
184	struct percpu_list_node *node = percpu_list_pop(list);
185
186	sched_yield(); /* encourage shuffling */
187	if (node)
188	percpu_list_push(list, node);
189	}
190
191	return NULL;
192	}
193
194	/* Simultaneous modification to a per-cpu linked list from many threads. */
195	void test_percpu_list(void)
196	{
197	int i, j;
198	long sum = 0, expected_sum = 0;
199	struct percpu_list list;
200	pthread_t test_threads[200];
201	cpu_set_t allowed_cpus;
202
203	memset(&list, 0, sizeof(list));
204
205	/* Generate list entries for every usable cpu. */
206	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
207	for (i = 0; i < CPU_SETSIZE; i++) {
208	if (!CPU_ISSET(i, &allowed_cpus))
209	continue;
210	for (j = 1; j <= 100; j++) {
211	struct percpu_list_node *node;
212
213	expected_sum += j;
214
215	node = malloc(sizeof(*node));
216	assert(node);
217	node->data = j;
218	node->next = list.c[i].head;
219	list.c[i].head = node;
220	}
221	}
222
223	for (i = 0; i < 200; i++)
224	assert(pthread_create(&test_threads[i], NULL,
225	test_percpu_list_thread, &list) == 0);
226
227	for (i = 0; i < 200; i++)
228	pthread_join(test_threads[i], NULL);
229
230	for (i = 0; i < CPU_SETSIZE; i++) {
231	cpu_set_t pin_mask;
232	struct percpu_list_node *node;
233
234	if (!CPU_ISSET(i, &allowed_cpus))
235	continue;
236
237	CPU_ZERO(&pin_mask);
238	CPU_SET(i, &pin_mask);
239	sched_setaffinity(0, sizeof(pin_mask), &pin_mask);
240
241	while ((node = percpu_list_pop(&list))) {
242	sum += node->data;
243	free(node);
244	}
245	}
246
247	/*
248	* All entries should now be accounted for (unless some external
249	* actor is interfering with our allowed affinity while this
250	* test is running).
251	*/
252	assert(sum == expected_sum);
253	}
254
255	int main(int argc, char **argv)
256	{
257	if (rseq_init_lock(&rseq_lock)) {
258	perror("rseq_init_lock");
259	return -1;
260	}
261	if (rseq_init_current_thread())
262	goto error;
263	printf("spinlock\n");
264	test_percpu_spinlock();
265	printf("percpu_list\n");
266	test_percpu_list();
267
268	if (rseq_destroy_lock(&rseq_lock)) {
269	perror("rseq_destroy_lock");
270	return -1;
271	}
272	return 0;
273
274	error:
275	if (rseq_destroy_lock(&rseq_lock))
276	perror("rseq_destroy_lock");
277	return -1;
278	}
279