Commit | Line | Data |
---|---|---|
ef6695f1 MD |
1 | /* SPDX-License-Identifier: MIT */ |
2 | /* SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> */ | |
3 | ||
34337fec MD |
4 | #ifndef _RSEQ_MEMPOOL_H |
5 | #define _RSEQ_MEMPOOL_H | |
ef6695f1 | 6 | |
f2981623 | 7 | #include <rseq/compiler.h> |
ef6695f1 MD |
8 | #include <stddef.h> |
9 | #include <sys/types.h> | |
e229a2dd | 10 | #include <sys/mman.h> |
ef6695f1 MD |
11 | |
12 | /* | |
34337fec | 13 | * rseq/mempool.h: rseq CPU-Local Storage (CLS) memory allocator. |
8aa1462d MD |
14 | * |
15 | * The rseq per-CPU memory allocator allows the application the request | |
16 | * memory pools of CPU-Local memory each of containing objects of a | |
17 | * given size (rounded to next power of 2), reserving a given virtual | |
18 | * address size per CPU, for a given maximum number of CPUs. | |
19 | * | |
20 | * The per-CPU memory allocator is analogous to TLS (Thread-Local | |
21 | * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU | |
22 | * memory allocator provides CPU-Local Storage. | |
ef6695f1 MD |
23 | */ |
24 | ||
c7ec94e0 MD |
25 | #ifdef __cplusplus |
26 | extern "C" { | |
27 | #endif | |
28 | ||
bef24483 MD |
29 | /* |
30 | * The percpu offset stride can be overridden by the user code. | |
31 | * The stride *must* match for all objects belonging to a given pool | |
32 | * between arguments to: | |
33 | * | |
06e0b1c0 MD |
34 | * - rseq_mempool_create(), |
35 | * - rseq_percpu_ptr(). | |
36 | * - rseq_percpu_free(), | |
bef24483 | 37 | */ |
f2981623 MD |
38 | #if RSEQ_BITS_PER_LONG == 64 |
39 | # define RSEQ_PERCPU_STRIDE (1U << 24) /* 64-bit stride: 16MB */ | |
40 | #else | |
41 | # define RSEQ_PERCPU_STRIDE (1U << 16) /* 32-bit stride: 64kB */ | |
42 | #endif | |
43 | ||
d24ee051 MD |
44 | /* |
45 | * Tag pointers returned by: | |
46 | * - rseq_percpu_malloc(), | |
47 | * - rseq_percpu_zmalloc(), | |
48 | * - rseq_percpu_pool_set_malloc(), | |
49 | * - rseq_percpu_pool_set_zmalloc(). | |
50 | * | |
8aa1462d MD |
51 | * and passed as parameter to: |
52 | * - rseq_percpu_ptr(), | |
53 | * - rseq_percpu_free(). | |
54 | * | |
d24ee051 MD |
55 | * with __rseq_percpu for use by static analyzers. |
56 | */ | |
57 | #define __rseq_percpu | |
58 | ||
0ba2a93e MD |
59 | struct rseq_mempool_attr; |
60 | struct rseq_mempool; | |
ef6695f1 | 61 | |
8aa1462d MD |
62 | /* |
63 | * rseq_percpu_pool_create: Create a per-cpu memory pool. | |
64 | * | |
65 | * Create a per-cpu memory pool for items of size @item_len (rounded to | |
bef24483 MD |
66 | * next power of two). The reserved allocation size is @percpu_stride, and |
67 | * the maximum CPU value expected is (@max_nr_cpus - 1). A | |
68 | * @percpu_stride of 0 uses the default RSEQ_PERCPU_STRIDE. | |
8aa1462d | 69 | * |
d6acc8aa MD |
70 | * The @attr pointer used to specify the pool attributes. If NULL, use a |
71 | * default attribute values. The @attr can be destroyed immediately | |
72 | * after rseq_percpu_pool_create() returns. The caller keeps ownership | |
73 | * of @attr. | |
8aa1462d | 74 | * |
ca452fee MD |
75 | * The argument @pool_name can be used to given a name to the pool for |
76 | * debugging purposes. It can be NULL if no name is given. | |
77 | * | |
8aa1462d MD |
78 | * Returns a pointer to the created percpu pool. Return NULL on error, |
79 | * with errno set accordingly: | |
80 | * EINVAL: Invalid argument. | |
81 | * ENOMEM: Not enough resources (memory or pool indexes) available to | |
82 | * allocate pool. | |
83 | * | |
a82006d0 MD |
84 | * In addition, if the attr mmap callback fails, NULL is returned and |
85 | * errno is propagated from the callback. The default callback can | |
9bd07c29 | 86 | * return errno=ENOMEM. |
8aa1462d MD |
87 | * |
88 | * This API is MT-safe. | |
89 | */ | |
0ba2a93e | 90 | struct rseq_mempool *rseq_mempool_create(const char *pool_name, |
f2981623 | 91 | size_t item_len, size_t percpu_stride, int max_nr_cpus, |
0ba2a93e | 92 | const struct rseq_mempool_attr *attr); |
8aa1462d MD |
93 | |
94 | /* | |
0ba2a93e | 95 | * rseq_mempool_destroy: Destroy a per-cpu memory pool. |
8aa1462d MD |
96 | * |
97 | * Destroy a per-cpu memory pool, unmapping its memory and removing the | |
98 | * pool entry from the global index. No pointers allocated from the | |
99 | * pool should be used when it is destroyed. This includes rseq_percpu_ptr(). | |
100 | * | |
101 | * Argument @pool is a pointer to the per-cpu pool to destroy. | |
102 | * | |
103 | * Return values: 0 on success, -1 on error, with errno set accordingly: | |
104 | * ENOENT: Trying to free a pool which was not allocated. | |
105 | * | |
9bd07c29 MD |
106 | * If the munmap_func callback fails, -1 is returned and errno is |
107 | * propagated from the callback. The default callback can return | |
108 | * errno=EINVAL. | |
8aa1462d MD |
109 | * |
110 | * This API is MT-safe. | |
111 | */ | |
0ba2a93e | 112 | int rseq_mempool_destroy(struct rseq_mempool *pool); |
ef6695f1 | 113 | |
8aa1462d MD |
114 | /* |
115 | * rseq_percpu_malloc: Allocate memory from a per-cpu pool. | |
116 | * | |
117 | * Allocate an item from a per-cpu @pool. The allocation will reserve | |
118 | * an item of the size specified by @item_len (rounded to next power of | |
119 | * two) at pool creation. This effectively reserves space for this item | |
120 | * on all CPUs. | |
121 | * | |
122 | * On success, return a "__rseq_percpu" encoded pointer to the pool | |
123 | * item. This encoded pointer is meant to be passed to rseq_percpu_ptr() | |
124 | * to be decoded to a valid address before being accessed. | |
125 | * | |
126 | * Return NULL (errno=ENOMEM) if there is not enough space left in the | |
127 | * pool to allocate an item. | |
128 | * | |
129 | * This API is MT-safe. | |
130 | */ | |
0ba2a93e | 131 | void __rseq_percpu *rseq_percpu_malloc(struct rseq_mempool *pool); |
8aa1462d MD |
132 | |
133 | /* | |
134 | * rseq_percpu_zmalloc: Allocated zero-initialized memory from a per-cpu pool. | |
135 | * | |
136 | * Allocate memory for an item within the pool, and zero-initialize its | |
137 | * memory on all CPUs. See rseq_percpu_malloc for details. | |
138 | * | |
139 | * This API is MT-safe. | |
140 | */ | |
0ba2a93e | 141 | void __rseq_percpu *rseq_percpu_zmalloc(struct rseq_mempool *pool); |
8aa1462d MD |
142 | |
143 | /* | |
144 | * rseq_percpu_free: Free memory from a per-cpu pool. | |
145 | * | |
146 | * Free an item pointed to by @ptr from its per-cpu pool. | |
147 | * | |
148 | * The @ptr argument is a __rseq_percpu encoded pointer returned by | |
149 | * either: | |
150 | * | |
151 | * - rseq_percpu_malloc(), | |
152 | * - rseq_percpu_zmalloc(), | |
153 | * - rseq_percpu_pool_set_malloc(), | |
154 | * - rseq_percpu_pool_set_zmalloc(). | |
155 | * | |
06e0b1c0 | 156 | * The @stride optional argument to rseq_percpu_free() is a configurable |
4aa3220c | 157 | * stride, which must match the stride received by pool creation. |
06e0b1c0 | 158 | * If the argument is not present, use the default RSEQ_PERCPU_STRIDE. |
4aa3220c | 159 | * |
8aa1462d MD |
160 | * This API is MT-safe. |
161 | */ | |
06e0b1c0 | 162 | void librseq_percpu_free(void __rseq_percpu *ptr, size_t percpu_stride); |
f2981623 | 163 | |
06e0b1c0 MD |
164 | #define rseq_percpu_free(_ptr, _stride...) \ |
165 | librseq_percpu_free(_ptr, RSEQ_PARAM_SELECT_ARG1(_, ##_stride, RSEQ_PERCPU_STRIDE)) | |
ef6695f1 | 166 | |
8aa1462d | 167 | /* |
4aa3220c | 168 | * rseq_percpu_ptr: Offset a per-cpu pointer for a given CPU. |
8aa1462d | 169 | * |
4aa3220c MD |
170 | * Offset a per-cpu pointer @ptr to get the associated pointer for the |
171 | * given @cpu. The @ptr argument is a __rseq_percpu pointer returned by | |
172 | * either: | |
8aa1462d MD |
173 | * |
174 | * - rseq_percpu_malloc(), | |
175 | * - rseq_percpu_zmalloc(), | |
176 | * - rseq_percpu_pool_set_malloc(), | |
177 | * - rseq_percpu_pool_set_zmalloc(). | |
178 | * | |
06e0b1c0 MD |
179 | * The macro rseq_percpu_ptr() preserves the type of the @ptr parameter |
180 | * for the returned pointer, but removes the __rseq_percpu annotation. | |
8aa1462d | 181 | * |
06e0b1c0 MD |
182 | * The macro rseq_percpu_ptr() takes an optional @stride argument. If |
183 | * the argument is not present, use the default RSEQ_PERCPU_STRIDE. | |
4aa3220c | 184 | * This must match the stride used for pool creation. |
8aa1462d MD |
185 | * |
186 | * This API is MT-safe. | |
187 | */ | |
06e0b1c0 MD |
188 | #define rseq_percpu_ptr(_ptr, _cpu, _stride...) \ |
189 | ((__typeof__(*(_ptr)) *) ((uintptr_t) (_ptr) + \ | |
190 | ((unsigned int) (_cpu) * \ | |
191 | (uintptr_t) RSEQ_PARAM_SELECT_ARG1(_, ##_stride, RSEQ_PERCPU_STRIDE)))) | |
ef6695f1 | 192 | |
8aa1462d | 193 | /* |
0ba2a93e | 194 | * rseq_mempool_set_create: Create a pool set. |
8aa1462d MD |
195 | * |
196 | * Create a set of pools. Its purpose is to offer a memory allocator API | |
197 | * for variable-length items (e.g. variable length strings). When | |
198 | * created, the pool set has no pool. Pools can be created and added to | |
199 | * the set. One common approach would be to create pools for each | |
200 | * relevant power of two allocation size useful for the application. | |
201 | * Only one pool can be added to the pool set for each power of two | |
202 | * allocation size. | |
203 | * | |
204 | * Returns a pool set pointer on success, else returns NULL with | |
205 | * errno=ENOMEM (out of memory). | |
206 | * | |
207 | * This API is MT-safe. | |
208 | */ | |
0ba2a93e | 209 | struct rseq_mempool_set *rseq_mempool_set_create(void); |
8aa1462d MD |
210 | |
211 | /* | |
0ba2a93e | 212 | * rseq_mempool_set_destroy: Destroy a pool set. |
8aa1462d MD |
213 | * |
214 | * Destroy a pool set and its associated resources. The pools that were | |
215 | * added to the pool set are destroyed as well. | |
216 | * | |
217 | * Returns 0 on success, -1 on failure (or partial failure), with errno | |
218 | * set by rseq_percpu_pool_destroy(). Using a pool set after destroy | |
219 | * failure is undefined. | |
220 | * | |
221 | * This API is MT-safe. | |
222 | */ | |
0ba2a93e | 223 | int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set); |
8aa1462d MD |
224 | |
225 | /* | |
0ba2a93e | 226 | * rseq_mempool_set_add_pool: Add a pool to a pool set. |
8aa1462d MD |
227 | * |
228 | * Add a @pool to the @pool_set. On success, its ownership is handed | |
229 | * over to the pool set, so the caller should not destroy it explicitly. | |
230 | * Only one pool can be added to the pool set for each power of two | |
231 | * allocation size. | |
232 | * | |
233 | * Returns 0 on success, -1 on error with the following errno: | |
234 | * - EBUSY: A pool already exists in the pool set for this power of two | |
235 | * allocation size. | |
236 | * | |
237 | * This API is MT-safe. | |
238 | */ | |
0ba2a93e MD |
239 | int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, |
240 | struct rseq_mempool *pool); | |
ef6695f1 | 241 | |
8aa1462d | 242 | /* |
0ba2a93e | 243 | * rseq_percpu_mempool_set_malloc: Allocate memory from a per-cpu pool set. |
8aa1462d MD |
244 | * |
245 | * Allocate an item from a per-cpu @pool. The allocation will reserve | |
246 | * an item of the size specified by @len (rounded to next power of | |
247 | * two). This effectively reserves space for this item on all CPUs. | |
248 | * | |
249 | * The space reservation will search for the smallest pool within | |
250 | * @pool_set which respects the following conditions: | |
251 | * | |
252 | * - it has an item size large enough to fit @len, | |
253 | * - it has space available. | |
254 | * | |
255 | * On success, return a "__rseq_percpu" encoded pointer to the pool | |
256 | * item. This encoded pointer is meant to be passed to rseq_percpu_ptr() | |
257 | * to be decoded to a valid address before being accessed. | |
258 | * | |
259 | * Return NULL (errno=ENOMEM) if there is not enough space left in the | |
260 | * pool to allocate an item. | |
261 | * | |
262 | * This API is MT-safe. | |
263 | */ | |
0ba2a93e | 264 | void __rseq_percpu *rseq_percpu_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len); |
8aa1462d MD |
265 | |
266 | /* | |
0ba2a93e | 267 | * rseq_percpu_mempool_set_zmalloc: Allocated zero-initialized memory from a per-cpu pool set. |
8aa1462d MD |
268 | * |
269 | * Allocate memory for an item within the pool, and zero-initialize its | |
0ba2a93e | 270 | * memory on all CPUs. See rseq_percpu_mempool_set_malloc for details. |
8aa1462d MD |
271 | * |
272 | * This API is MT-safe. | |
273 | */ | |
0ba2a93e | 274 | void __rseq_percpu *rseq_percpu_mempool_set_zmalloc(struct rseq_mempool_set *pool_set, size_t len); |
ef6695f1 | 275 | |
9bd07c29 | 276 | /* |
0ba2a93e | 277 | * rseq_mempool_init_numa: Move pages to the NUMA node associated to their CPU topology. |
9bd07c29 MD |
278 | * |
279 | * For pages allocated within @pool, invoke move_pages(2) with the given | |
280 | * @numa_flags to move the pages to the NUMA node associated to their | |
281 | * CPU topology. | |
282 | * | |
283 | * Argument @numa_flags are passed to move_pages(2). The expected flags are: | |
284 | * MPOL_MF_MOVE: move process-private pages to cpu-specific numa nodes. | |
285 | * MPOL_MF_MOVE_ALL: move shared pages to cpu-specific numa nodes | |
286 | * (requires CAP_SYS_NICE). | |
287 | * | |
288 | * Returns 0 on success, else return -1 with errno set by move_pages(2). | |
289 | */ | |
0ba2a93e | 290 | int rseq_mempool_init_numa(struct rseq_mempool *pool, int numa_flags); |
9bd07c29 MD |
291 | |
292 | /* | |
0ba2a93e | 293 | * rseq_mempool_attr_create: Create a pool attribute structure. |
a82006d0 | 294 | */ |
0ba2a93e | 295 | struct rseq_mempool_attr *rseq_mempool_attr_create(void); |
a82006d0 MD |
296 | |
297 | /* | |
0ba2a93e | 298 | * rseq_mempool_attr_destroy: Destroy a pool attribute structure. |
a82006d0 | 299 | */ |
0ba2a93e | 300 | void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr); |
a82006d0 MD |
301 | |
302 | /* | |
0ba2a93e | 303 | * rseq_mempool_attr_set_mmap: Set pool attribute structure mmap functions. |
9bd07c29 MD |
304 | * |
305 | * The @mmap_func callback used to map the memory for the pool. | |
306 | * | |
307 | * The @munmap_func callback used to unmap the memory when the pool | |
308 | * is destroyed. | |
309 | * | |
310 | * The @mmap_priv argument is a private data pointer passed to both | |
311 | * @mmap_func and @munmap_func callbacks. | |
8118247e MD |
312 | * |
313 | * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. | |
9bd07c29 | 314 | */ |
0ba2a93e | 315 | int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr, |
a82006d0 | 316 | void *(*mmap_func)(void *priv, size_t len), |
9bd07c29 MD |
317 | int (*munmap_func)(void *priv, void *ptr, size_t len), |
318 | void *mmap_priv); | |
319 | ||
d6acc8aa | 320 | /* |
0ba2a93e | 321 | * rseq_mempool_attr_set_robust: Set pool robust attribute. |
d6acc8aa MD |
322 | * |
323 | * The robust pool attribute enables runtime validation of the pool: | |
324 | * | |
325 | * - Check for double-free of pointers. | |
326 | * | |
327 | * - Detect memory leaks on pool destruction. | |
328 | * | |
329 | * - Detect free-list corruption on pool destruction. | |
330 | * | |
331 | * There is a marginal runtime overhead on malloc/free operations. | |
332 | * | |
333 | * The memory overhead is (pool->percpu_len / pool->item_len) / CHAR_BIT | |
334 | * bytes, over the lifetime of the pool. | |
335 | * | |
336 | * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. | |
337 | */ | |
0ba2a93e | 338 | int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr); |
d6acc8aa | 339 | |
c7ec94e0 MD |
340 | #ifdef __cplusplus |
341 | } | |
342 | #endif | |
343 | ||
34337fec | 344 | #endif /* _RSEQ_MEMPOOL_H */ |