[libside.git] / src / rcu.h

// SPDX-License-Identifier: MIT
/*
 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 */

#include <sched.h>
#include <stdint.h>
#include <pthread.h>
#include <poll.h>

#define SIDE_CACHE_LINE_SIZE		256
#define SIDE_RCU_PERCPU_ARRAY_SIZE	2

struct side_rcu_percpu_count {
	uintptr_t begin;
	uintptr_t end;
}  __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));

struct side_rcu_cpu_gp_state {
	struct side_rcu_percpu_count count[SIDE_RCU_PERCPU_ARRAY_SIZE];
};

struct side_rcu_gp_state {
	struct side_rcu_cpu_gp_state *percpu_state;
	int nr_cpus;
	unsigned int period;
	pthread_mutex_t gp_lock;
};

//TODO: replace atomics by rseq (when available)
//TODO: replace acquire/release by membarrier+compiler barrier (when available)
//TODO: implement wait/wakeup for grace period using sys_futex
static inline
unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
{
	int cpu = sched_getcpu();
	unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);

	if (cpu < 0)
		cpu = 0;
	(void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_ACQUIRE);
	return period;
}

static inline
void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
{
	int cpu = sched_getcpu();

	if (cpu < 0)
		cpu = 0;
	(void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_RELEASE);
}

static inline
void wait_for_cpus(struct side_rcu_gp_state *gp_state)
{
	unsigned int prev_period = 1 - gp_state->period;

	/*
	 * Wait for the sum of CPU begin/end counts to match for the
	 * previous period.
	 */
	for (;;) {
		uintptr_t sum = 0;	/* begin - end */
		int i;

		for (i = 0; i < gp_state->nr_cpus; i++) {
			struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];

			sum -= __atomic_load_n(&cpu_state->count[prev_period].end, __ATOMIC_RELAXED);
		}

		/*
		 * Read end counts before begin counts. Reading end
		 * before begin count ensures we never see an end
		 * without having seen its associated begin, in case of
		 * a thread migration during the traversal over each
		 * cpu.
		 */
		__atomic_thread_fence(__ATOMIC_ACQ_REL);

		for (i = 0; i < gp_state->nr_cpus; i++) {
			struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];

			sum += __atomic_load_n(&cpu_state->count[prev_period].begin, __ATOMIC_RELAXED);
		}
		if (!sum) {
			break;
		} else {
			/* Retry after 10ms. */
			poll(NULL, 0, 10);
		}
	}
}

static inline
void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
{
	/*
	 * This release fence pairs with the acquire MO __atomic_add_fetch
	 * in side_rcu_read_begin().
	 */
	__atomic_thread_fence(__ATOMIC_RELEASE);

	pthread_mutex_lock(&gp_state->gp_lock);

	wait_for_cpus(gp_state);

	/* Flip period: 0 -> 1, 1 -> 0. */
	(void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_RELAXED);

	wait_for_cpus(gp_state);

	pthread_mutex_unlock(&gp_state->gp_lock);

	/*
	 * This acquire fence pairs with the release MO __atomic_add_fetch
	 * in side_rcu_read_end().
	 */
	__atomic_thread_fence(__ATOMIC_ACQUIRE);
}
Commit	Line	Data
85b765b8 MD	1	// SPDX-License-Identifier: MIT
	2	/*
	3	* Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	4	*/
	5
	6	#include <sched.h>
	7	#include <stdint.h>
	8	#include <pthread.h>
	9	#include <poll.h>
	10
	11	#define SIDE_CACHE_LINE_SIZE 256
	12	#define SIDE_RCU_PERCPU_ARRAY_SIZE 2
	13
	14	struct side_rcu_percpu_count {
	15	uintptr_t begin;
	16	uintptr_t end;
	17	} __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
	18
	19	struct side_rcu_cpu_gp_state {
	20	struct side_rcu_percpu_count count[SIDE_RCU_PERCPU_ARRAY_SIZE];
	21	};
	22
	23	struct side_rcu_gp_state {
	24	struct side_rcu_cpu_gp_state *percpu_state;
	25	int nr_cpus;
	26	unsigned int period;
	27	pthread_mutex_t gp_lock;
	28	};
	29
	30	//TODO: replace atomics by rseq (when available)
	31	//TODO: replace acquire/release by membarrier+compiler barrier (when available)
	32	//TODO: implement wait/wakeup for grace period using sys_futex
	33	static inline
	34	unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
	35	{
	36	int cpu = sched_getcpu();
	37	unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
	38
	39	if (cpu < 0)
	40	cpu = 0;
	41	(void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_ACQUIRE);
	42	return period;
	43	}
	44
	45	static inline
	46	void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
	47	{
	48	int cpu = sched_getcpu();
	49
	50	if (cpu < 0)
	51	cpu = 0;
	52	(void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_RELEASE);
	53	}
	54
	55	static inline
	56	void wait_for_cpus(struct side_rcu_gp_state *gp_state)
	57	{
	58	unsigned int prev_period = 1 - gp_state->period;
	59
	60	/*
	61	* Wait for the sum of CPU begin/end counts to match for the
	62	* previous period.
	63	*/
	64	for (;;) {
65	uintptr_t sum = 0; /* begin - end */
66	int i;
67
68	for (i = 0; i < gp_state->nr_cpus; i++) {
69	struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
70
71	sum -= __atomic_load_n(&cpu_state->count[prev_period].end, __ATOMIC_RELAXED);
72	}
73
74	/*
75	* Read end counts before begin counts. Reading end
76	* before begin count ensures we never see an end
77	* without having seen its associated begin, in case of
78	* a thread migration during the traversal over each
79	* cpu.
80	*/
81	__atomic_thread_fence(__ATOMIC_ACQ_REL);
82
83	for (i = 0; i < gp_state->nr_cpus; i++) {
84	struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
85
86	sum += __atomic_load_n(&cpu_state->count[prev_period].begin, __ATOMIC_RELAXED);
87	}
88	if (!sum) {
89	break;
90	} else {
91	/* Retry after 10ms. */
92	poll(NULL, 0, 10);
93	}
94	}
95	}
96
97	static inline
98	void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
99	{
100	/*
101	* This release fence pairs with the acquire MO __atomic_add_fetch
102	* in side_rcu_read_begin().
103	*/
104	__atomic_thread_fence(__ATOMIC_RELEASE);
105
106	pthread_mutex_lock(&gp_state->gp_lock);
107
108	wait_for_cpus(gp_state);
109
110	/* Flip period: 0 -> 1, 1 -> 0. */
111	(void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_RELAXED);
112
113	wait_for_cpus(gp_state);
114
115	pthread_mutex_unlock(&gp_state->gp_lock);
116
117	/*
118	* This acquire fence pairs with the release MO __atomic_add_fetch
119	* in side_rcu_read_end().
120	*/
121	__atomic_thread_fence(__ATOMIC_ACQUIRE);
122	}