Commit | Line | Data |
---|---|---|
ef6695f1 MD |
1 | /* SPDX-License-Identifier: MIT */ |
2 | /* SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> */ | |
3 | ||
34337fec MD |
4 | #ifndef _RSEQ_MEMPOOL_H |
5 | #define _RSEQ_MEMPOOL_H | |
ef6695f1 | 6 | |
f2981623 | 7 | #include <rseq/compiler.h> |
ef6695f1 MD |
8 | #include <stddef.h> |
9 | #include <sys/types.h> | |
e229a2dd | 10 | #include <sys/mman.h> |
ef6695f1 MD |
11 | |
12 | /* | |
34337fec | 13 | * rseq/mempool.h: rseq CPU-Local Storage (CLS) memory allocator. |
8aa1462d MD |
14 | * |
15 | * The rseq per-CPU memory allocator allows the application the request | |
16 | * memory pools of CPU-Local memory each of containing objects of a | |
17 | * given size (rounded to next power of 2), reserving a given virtual | |
18 | * address size per CPU, for a given maximum number of CPUs. | |
19 | * | |
20 | * The per-CPU memory allocator is analogous to TLS (Thread-Local | |
21 | * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU | |
22 | * memory allocator provides CPU-Local Storage. | |
ef6695f1 MD |
23 | */ |
24 | ||
c7ec94e0 MD |
25 | #ifdef __cplusplus |
26 | extern "C" { | |
27 | #endif | |
28 | ||
bef24483 MD |
29 | /* |
30 | * The percpu offset stride can be overridden by the user code. | |
31 | * The stride *must* match for all objects belonging to a given pool | |
32 | * between arguments to: | |
33 | * | |
34 | * - rseq_percpu_pool_create(), | |
35 | * - __rseq_percpu_free(), | |
36 | * - __rseq_percpu_ptr(). | |
37 | */ | |
f2981623 MD |
38 | #if RSEQ_BITS_PER_LONG == 64 |
39 | # define RSEQ_PERCPU_STRIDE (1U << 24) /* 64-bit stride: 16MB */ | |
40 | #else | |
41 | # define RSEQ_PERCPU_STRIDE (1U << 16) /* 32-bit stride: 64kB */ | |
42 | #endif | |
43 | ||
d24ee051 MD |
44 | /* |
45 | * Tag pointers returned by: | |
46 | * - rseq_percpu_malloc(), | |
47 | * - rseq_percpu_zmalloc(), | |
48 | * - rseq_percpu_pool_set_malloc(), | |
49 | * - rseq_percpu_pool_set_zmalloc(). | |
50 | * | |
8aa1462d MD |
51 | * and passed as parameter to: |
52 | * - rseq_percpu_ptr(), | |
53 | * - rseq_percpu_free(). | |
54 | * | |
d24ee051 MD |
55 | * with __rseq_percpu for use by static analyzers. |
56 | */ | |
57 | #define __rseq_percpu | |
58 | ||
a82006d0 | 59 | struct rseq_pool_attr; |
ef6695f1 MD |
60 | struct rseq_percpu_pool; |
61 | ||
8aa1462d MD |
62 | /* |
63 | * rseq_percpu_pool_create: Create a per-cpu memory pool. | |
64 | * | |
65 | * Create a per-cpu memory pool for items of size @item_len (rounded to | |
bef24483 MD |
66 | * next power of two). The reserved allocation size is @percpu_stride, and |
67 | * the maximum CPU value expected is (@max_nr_cpus - 1). A | |
68 | * @percpu_stride of 0 uses the default RSEQ_PERCPU_STRIDE. | |
8aa1462d | 69 | * |
d6acc8aa MD |
70 | * The @attr pointer used to specify the pool attributes. If NULL, use a |
71 | * default attribute values. The @attr can be destroyed immediately | |
72 | * after rseq_percpu_pool_create() returns. The caller keeps ownership | |
73 | * of @attr. | |
8aa1462d | 74 | * |
ca452fee MD |
75 | * The argument @pool_name can be used to given a name to the pool for |
76 | * debugging purposes. It can be NULL if no name is given. | |
77 | * | |
8aa1462d MD |
78 | * Returns a pointer to the created percpu pool. Return NULL on error, |
79 | * with errno set accordingly: | |
80 | * EINVAL: Invalid argument. | |
81 | * ENOMEM: Not enough resources (memory or pool indexes) available to | |
82 | * allocate pool. | |
83 | * | |
a82006d0 MD |
84 | * In addition, if the attr mmap callback fails, NULL is returned and |
85 | * errno is propagated from the callback. The default callback can | |
9bd07c29 | 86 | * return errno=ENOMEM. |
8aa1462d MD |
87 | * |
88 | * This API is MT-safe. | |
89 | */ | |
ca452fee | 90 | struct rseq_percpu_pool *rseq_percpu_pool_create(const char *pool_name, |
f2981623 | 91 | size_t item_len, size_t percpu_stride, int max_nr_cpus, |
d6acc8aa | 92 | const struct rseq_pool_attr *attr); |
8aa1462d MD |
93 | |
94 | /* | |
95 | * rseq_percpu_pool_destroy: Destroy a per-cpu memory pool. | |
96 | * | |
97 | * Destroy a per-cpu memory pool, unmapping its memory and removing the | |
98 | * pool entry from the global index. No pointers allocated from the | |
99 | * pool should be used when it is destroyed. This includes rseq_percpu_ptr(). | |
100 | * | |
101 | * Argument @pool is a pointer to the per-cpu pool to destroy. | |
102 | * | |
103 | * Return values: 0 on success, -1 on error, with errno set accordingly: | |
104 | * ENOENT: Trying to free a pool which was not allocated. | |
105 | * | |
9bd07c29 MD |
106 | * If the munmap_func callback fails, -1 is returned and errno is |
107 | * propagated from the callback. The default callback can return | |
108 | * errno=EINVAL. | |
8aa1462d MD |
109 | * |
110 | * This API is MT-safe. | |
111 | */ | |
ef6695f1 MD |
112 | int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool); |
113 | ||
8aa1462d MD |
114 | /* |
115 | * rseq_percpu_malloc: Allocate memory from a per-cpu pool. | |
116 | * | |
117 | * Allocate an item from a per-cpu @pool. The allocation will reserve | |
118 | * an item of the size specified by @item_len (rounded to next power of | |
119 | * two) at pool creation. This effectively reserves space for this item | |
120 | * on all CPUs. | |
121 | * | |
122 | * On success, return a "__rseq_percpu" encoded pointer to the pool | |
123 | * item. This encoded pointer is meant to be passed to rseq_percpu_ptr() | |
124 | * to be decoded to a valid address before being accessed. | |
125 | * | |
126 | * Return NULL (errno=ENOMEM) if there is not enough space left in the | |
127 | * pool to allocate an item. | |
128 | * | |
129 | * This API is MT-safe. | |
130 | */ | |
d24ee051 | 131 | void __rseq_percpu *rseq_percpu_malloc(struct rseq_percpu_pool *pool); |
8aa1462d MD |
132 | |
133 | /* | |
134 | * rseq_percpu_zmalloc: Allocated zero-initialized memory from a per-cpu pool. | |
135 | * | |
136 | * Allocate memory for an item within the pool, and zero-initialize its | |
137 | * memory on all CPUs. See rseq_percpu_malloc for details. | |
138 | * | |
139 | * This API is MT-safe. | |
140 | */ | |
d24ee051 | 141 | void __rseq_percpu *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool); |
8aa1462d MD |
142 | |
143 | /* | |
144 | * rseq_percpu_free: Free memory from a per-cpu pool. | |
145 | * | |
146 | * Free an item pointed to by @ptr from its per-cpu pool. | |
147 | * | |
148 | * The @ptr argument is a __rseq_percpu encoded pointer returned by | |
149 | * either: | |
150 | * | |
151 | * - rseq_percpu_malloc(), | |
152 | * - rseq_percpu_zmalloc(), | |
153 | * - rseq_percpu_pool_set_malloc(), | |
154 | * - rseq_percpu_pool_set_zmalloc(). | |
155 | * | |
4aa3220c MD |
156 | * The @stride argument to __rseq_percpu_free() is a configurable |
157 | * stride, which must match the stride received by pool creation. | |
158 | * rseq_percpu_free() uses the default RSEQ_PERCPU_STRIDE stride. | |
159 | * | |
8aa1462d MD |
160 | * This API is MT-safe. |
161 | */ | |
f2981623 MD |
162 | void __rseq_percpu_free(void __rseq_percpu *ptr, size_t percpu_stride); |
163 | ||
164 | #define rseq_percpu_free(ptr) __rseq_percpu_free(ptr, RSEQ_PERCPU_STRIDE) | |
ef6695f1 | 165 | |
8aa1462d | 166 | /* |
4aa3220c | 167 | * rseq_percpu_ptr: Offset a per-cpu pointer for a given CPU. |
8aa1462d | 168 | * |
4aa3220c MD |
169 | * Offset a per-cpu pointer @ptr to get the associated pointer for the |
170 | * given @cpu. The @ptr argument is a __rseq_percpu pointer returned by | |
171 | * either: | |
8aa1462d MD |
172 | * |
173 | * - rseq_percpu_malloc(), | |
174 | * - rseq_percpu_zmalloc(), | |
175 | * - rseq_percpu_pool_set_malloc(), | |
176 | * - rseq_percpu_pool_set_zmalloc(). | |
177 | * | |
4aa3220c MD |
178 | * The macros rseq_percpu_ptr() and __rseq_percpu_ptr() preserve the |
179 | * type of the @ptr parameter for the returned pointer, but removes the | |
180 | * __rseq_percpu annotation. | |
8aa1462d | 181 | * |
4aa3220c MD |
182 | * The macro __rseq_percpu_ptr() takes a configurable @stride argument, |
183 | * whereas rseq_percpu_ptr() uses the RSEQ_PERCPU_STRIDE default stride. | |
184 | * This must match the stride used for pool creation. | |
8aa1462d MD |
185 | * |
186 | * This API is MT-safe. | |
187 | */ | |
4aa3220c MD |
188 | #define __rseq_percpu_ptr(ptr, cpu, stride) \ |
189 | ((__typeof__(*(ptr)) *) ((uintptr_t) (ptr) + ((unsigned int) (cpu) * (uintptr_t) (stride)))) | |
190 | ||
191 | #define rseq_percpu_ptr(ptr, cpu) __rseq_percpu_ptr(ptr, cpu, RSEQ_PERCPU_STRIDE) | |
ef6695f1 | 192 | |
8aa1462d MD |
193 | /* |
194 | * rseq_percpu_pool_set_create: Create a pool set. | |
195 | * | |
196 | * Create a set of pools. Its purpose is to offer a memory allocator API | |
197 | * for variable-length items (e.g. variable length strings). When | |
198 | * created, the pool set has no pool. Pools can be created and added to | |
199 | * the set. One common approach would be to create pools for each | |
200 | * relevant power of two allocation size useful for the application. | |
201 | * Only one pool can be added to the pool set for each power of two | |
202 | * allocation size. | |
203 | * | |
204 | * Returns a pool set pointer on success, else returns NULL with | |
205 | * errno=ENOMEM (out of memory). | |
206 | * | |
207 | * This API is MT-safe. | |
208 | */ | |
ef6695f1 | 209 | struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void); |
8aa1462d MD |
210 | |
211 | /* | |
212 | * rseq_percpu_pool_set_destroy: Destroy a pool set. | |
213 | * | |
214 | * Destroy a pool set and its associated resources. The pools that were | |
215 | * added to the pool set are destroyed as well. | |
216 | * | |
217 | * Returns 0 on success, -1 on failure (or partial failure), with errno | |
218 | * set by rseq_percpu_pool_destroy(). Using a pool set after destroy | |
219 | * failure is undefined. | |
220 | * | |
221 | * This API is MT-safe. | |
222 | */ | |
ef6695f1 | 223 | int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set); |
8aa1462d MD |
224 | |
225 | /* | |
226 | * rseq_percpu_pool_set_add_pool: Add a pool to a pool set. | |
227 | * | |
228 | * Add a @pool to the @pool_set. On success, its ownership is handed | |
229 | * over to the pool set, so the caller should not destroy it explicitly. | |
230 | * Only one pool can be added to the pool set for each power of two | |
231 | * allocation size. | |
232 | * | |
233 | * Returns 0 on success, -1 on error with the following errno: | |
234 | * - EBUSY: A pool already exists in the pool set for this power of two | |
235 | * allocation size. | |
236 | * | |
237 | * This API is MT-safe. | |
238 | */ | |
ef6695f1 MD |
239 | int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set, |
240 | struct rseq_percpu_pool *pool); | |
241 | ||
8aa1462d MD |
242 | /* |
243 | * rseq_percpu_pool_set_malloc: Allocate memory from a per-cpu pool set. | |
244 | * | |
245 | * Allocate an item from a per-cpu @pool. The allocation will reserve | |
246 | * an item of the size specified by @len (rounded to next power of | |
247 | * two). This effectively reserves space for this item on all CPUs. | |
248 | * | |
249 | * The space reservation will search for the smallest pool within | |
250 | * @pool_set which respects the following conditions: | |
251 | * | |
252 | * - it has an item size large enough to fit @len, | |
253 | * - it has space available. | |
254 | * | |
255 | * On success, return a "__rseq_percpu" encoded pointer to the pool | |
256 | * item. This encoded pointer is meant to be passed to rseq_percpu_ptr() | |
257 | * to be decoded to a valid address before being accessed. | |
258 | * | |
259 | * Return NULL (errno=ENOMEM) if there is not enough space left in the | |
260 | * pool to allocate an item. | |
261 | * | |
262 | * This API is MT-safe. | |
263 | */ | |
d24ee051 | 264 | void __rseq_percpu *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len); |
8aa1462d MD |
265 | |
266 | /* | |
267 | * rseq_percpu_pool_set_zmalloc: Allocated zero-initialized memory from a per-cpu pool set. | |
268 | * | |
269 | * Allocate memory for an item within the pool, and zero-initialize its | |
270 | * memory on all CPUs. See rseq_percpu_pool_set_malloc for details. | |
271 | * | |
272 | * This API is MT-safe. | |
273 | */ | |
d24ee051 | 274 | void __rseq_percpu *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len); |
ef6695f1 | 275 | |
9bd07c29 MD |
276 | /* |
277 | * rseq_percpu_pool_init_numa: Move pages to the NUMA node associated to their CPU topology. | |
278 | * | |
279 | * For pages allocated within @pool, invoke move_pages(2) with the given | |
280 | * @numa_flags to move the pages to the NUMA node associated to their | |
281 | * CPU topology. | |
282 | * | |
283 | * Argument @numa_flags are passed to move_pages(2). The expected flags are: | |
284 | * MPOL_MF_MOVE: move process-private pages to cpu-specific numa nodes. | |
285 | * MPOL_MF_MOVE_ALL: move shared pages to cpu-specific numa nodes | |
286 | * (requires CAP_SYS_NICE). | |
287 | * | |
288 | * Returns 0 on success, else return -1 with errno set by move_pages(2). | |
289 | */ | |
290 | int rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool, int numa_flags); | |
291 | ||
292 | /* | |
a82006d0 MD |
293 | * rseq_pool_attr_create: Create a pool attribute structure. |
294 | */ | |
295 | struct rseq_pool_attr *rseq_pool_attr_create(void); | |
296 | ||
297 | /* | |
298 | * rseq_pool_attr_destroy: Destroy a pool attribute structure. | |
299 | */ | |
300 | void rseq_pool_attr_destroy(struct rseq_pool_attr *attr); | |
301 | ||
302 | /* | |
303 | * rseq_pool_attr_set_mmap: Set pool attribute structure mmap functions. | |
9bd07c29 MD |
304 | * |
305 | * The @mmap_func callback used to map the memory for the pool. | |
306 | * | |
307 | * The @munmap_func callback used to unmap the memory when the pool | |
308 | * is destroyed. | |
309 | * | |
310 | * The @mmap_priv argument is a private data pointer passed to both | |
311 | * @mmap_func and @munmap_func callbacks. | |
8118247e MD |
312 | * |
313 | * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. | |
9bd07c29 | 314 | */ |
8118247e | 315 | int rseq_pool_attr_set_mmap(struct rseq_pool_attr *attr, |
a82006d0 | 316 | void *(*mmap_func)(void *priv, size_t len), |
9bd07c29 MD |
317 | int (*munmap_func)(void *priv, void *ptr, size_t len), |
318 | void *mmap_priv); | |
319 | ||
d6acc8aa MD |
320 | /* |
321 | * rseq_pool_attr_set_robust: Set pool robust attribute. | |
322 | * | |
323 | * The robust pool attribute enables runtime validation of the pool: | |
324 | * | |
325 | * - Check for double-free of pointers. | |
326 | * | |
327 | * - Detect memory leaks on pool destruction. | |
328 | * | |
329 | * - Detect free-list corruption on pool destruction. | |
330 | * | |
331 | * There is a marginal runtime overhead on malloc/free operations. | |
332 | * | |
333 | * The memory overhead is (pool->percpu_len / pool->item_len) / CHAR_BIT | |
334 | * bytes, over the lifetime of the pool. | |
335 | * | |
336 | * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. | |
337 | */ | |
338 | int rseq_pool_attr_set_robust(struct rseq_pool_attr *attr); | |
339 | ||
c7ec94e0 MD |
340 | #ifdef __cplusplus |
341 | } | |
342 | #endif | |
343 | ||
34337fec | 344 | #endif /* _RSEQ_MEMPOOL_H */ |