mempool: Create memfd only for range creation
[librseq.git] / src / rseq-mempool.c
CommitLineData
ef6695f1
MD
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
34337fec 4#include <rseq/mempool.h>
ef6695f1
MD
5#include <sys/mman.h>
6#include <assert.h>
7#include <string.h>
8#include <pthread.h>
9#include <unistd.h>
10#include <stdlib.h>
11#include <rseq/compiler.h>
12#include <errno.h>
13#include <stdint.h>
14#include <stdbool.h>
367e559c 15#include <stdio.h>
a5694a4d 16#include <fcntl.h>
367e559c
MD
17
18#ifdef HAVE_LIBNUMA
19# include <numa.h>
20# include <numaif.h>
21#endif
ef6695f1 22
34337fec 23#include "rseq-utils.h"
47c725dd 24#include <rseq/rseq.h>
19be9217 25
ef6695f1 26/*
b73b0c25 27 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
ef6695f1 28 *
8ab16a24
MD
29 * The rseq per-CPU memory allocator allows the application the request
30 * memory pools of CPU-Local memory each of containing objects of a
8aa1462d
MD
31 * given size (rounded to next power of 2), reserving a given virtual
32 * address size per CPU, for a given maximum number of CPUs.
8ab16a24
MD
33 *
34 * The per-CPU memory allocator is analogous to TLS (Thread-Local
35 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
36 * memory allocator provides CPU-Local Storage.
ef6695f1
MD
37 */
38
3236da62 39#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
ef6695f1 40
72b100a1
MD
41/*
42 * Smallest allocation should hold enough space for a free list pointer.
43 */
ef6695f1
MD
44#if RSEQ_BITS_PER_LONG == 64
45# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
46#else
47# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
48#endif
49
0fdf7a4c
OD
50#define BIT_PER_ULONG (8 * sizeof(unsigned long))
51
57d8b586
OD
52#define MOVE_PAGES_BATCH_SIZE 4096
53
0ba2a93e 54#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
4aa3220c 55
3975084e 56#if RSEQ_BITS_PER_LONG == 64
dbccd436 57# define DEFAULT_PRIVATE_POISON_VALUE 0x5555555555555555ULL
3975084e 58#else
dbccd436 59# define DEFAULT_PRIVATE_POISON_VALUE 0x55555555UL
3975084e
MD
60#endif
61
ef6695f1
MD
62struct free_list_node;
63
64struct free_list_node {
65 struct free_list_node *next;
66};
67
cb475906 68enum mempool_type {
89b7e681
MD
69 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
70 MEMPOOL_TYPE_PERCPU = 1,
cb475906
MD
71};
72
0ba2a93e 73struct rseq_mempool_attr {
135811f2 74 bool init_set;
6e329183 75 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
135811f2
MD
76 void *init_priv;
77
d6acc8aa 78 bool robust_set;
cb475906
MD
79
80 enum mempool_type type;
81 size_t stride;
82 int max_nr_cpus;
e11a02d7
MD
83
84 unsigned long max_nr_ranges;
455e090e
MD
85
86 bool poison_set;
87 uintptr_t poison;
a5694a4d
MD
88
89 enum rseq_mempool_populate_policy populate_policy;
9bd07c29
MD
90};
91
0ba2a93e 92struct rseq_mempool_range;
b73b0c25 93
0ba2a93e 94struct rseq_mempool_range {
9d986353
MD
95 struct rseq_mempool_range *next; /* Linked list of ranges. */
96 struct rseq_mempool *pool; /* Backward reference to container pool. */
a5694a4d
MD
97
98 /*
99 * Memory layout of a mempool range:
100 * - Header page (contains struct rseq_mempool_range at the very end),
c0de0012
MD
101 * - Base of the per-cpu data, starting with CPU 0.
102 * Aliases with free-list for non-robust populate all pool.
a5694a4d
MD
103 * - CPU 1,
104 * ...
105 * - CPU max_nr_cpus - 1
4e8ae59d 106 * - init values (unpopulated for RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL).
c0de0012
MD
107 * Aliases with free-list for non-robust populate none pool.
108 * - free list (for robust pool).
109 *
110 * The free list aliases the CPU 0 memory area for non-robust
111 * populate all pools. It aliases with init values for
112 * non-robust populate none pools. It is located immediately
113 * after the init values for robust pools.
a5694a4d 114 */
4aa3220c 115 void *header;
ef6695f1 116 void *base;
a5694a4d
MD
117 /*
118 * The init values contains malloc_init/zmalloc values.
4e8ae59d 119 * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL.
a5694a4d
MD
120 */
121 void *init;
b73b0c25 122 size_t next_unused;
fa6a0fb3
MD
123
124 /* Pool range mmap/munmap */
125 void *mmap_addr;
126 size_t mmap_len;
127
b73b0c25
MD
128 /* Track alloc/free. */
129 unsigned long *alloc_bitmap;
130};
131
0ba2a93e 132struct rseq_mempool {
9d986353
MD
133 /* Head of ranges linked-list. */
134 struct rseq_mempool_range *range_list;
135 unsigned long nr_ranges;
b73b0c25 136
ef6695f1 137 size_t item_len;
ef6695f1 138 int item_order;
ef6695f1
MD
139
140 /*
8ab16a24 141 * The free list chains freed items on the CPU 0 address range.
ef6695f1 142 * We should rethink this decision if false sharing between
8ab16a24 143 * malloc/free from other CPUs and data accesses from CPU 0
ef6695f1
MD
144 * becomes an issue. This is a NULL-terminated singly-linked
145 * list.
146 */
147 struct free_list_node *free_list_head;
b73b0c25 148
ef6695f1
MD
149 /* This lock protects allocation/free within the pool. */
150 pthread_mutex_t lock;
9bd07c29 151
0ba2a93e 152 struct rseq_mempool_attr attr;
ca452fee 153 char *name;
ef6695f1
MD
154};
155
ef6695f1
MD
156/*
157 * Pool set entries are indexed by item_len rounded to the next power of
158 * 2. A pool set can contain NULL pool entries, in which case the next
159 * large enough entry will be used for allocation.
160 */
0ba2a93e 161struct rseq_mempool_set {
ef6695f1
MD
162 /* This lock protects add vs malloc/zmalloc within the pool set. */
163 pthread_mutex_t lock;
0ba2a93e 164 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
ef6695f1
MD
165};
166
86617384
MD
167static
168const char *get_pool_name(const struct rseq_mempool *pool)
169{
170 return pool->name ? : "<anonymous>";
171}
172
367e559c 173static
6fbf1fb6 174void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
f2981623 175 uintptr_t item_offset, size_t stride)
367e559c 176{
15b63c9f 177 return range->base + (stride * cpu) + item_offset;
367e559c
MD
178}
179
a5694a4d
MD
180static
181void *__rseq_pool_range_init_ptr(const struct rseq_mempool_range *range,
182 uintptr_t item_offset)
183{
184 if (!range->init)
185 return NULL;
186 return range->init + item_offset;
187}
188
189static
190void __rseq_percpu *__rseq_free_list_to_percpu_ptr(const struct rseq_mempool *pool,
191 struct free_list_node *node)
192{
193 void __rseq_percpu *p = (void __rseq_percpu *) node;
194
c0de0012
MD
195 if (pool->attr.robust_set) {
196 /* Skip cpus. */
a5694a4d 197 p -= pool->attr.max_nr_cpus * pool->attr.stride;
c0de0012 198 /* Skip init values */
4e8ae59d 199 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
c0de0012
MD
200 p -= pool->attr.stride;
201
202 } else {
203 /* Populate none free list is in init values */
4e8ae59d 204 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
c0de0012
MD
205 p -= pool->attr.max_nr_cpus * pool->attr.stride;
206 }
a5694a4d
MD
207 return p;
208}
209
210static
211struct free_list_node *__rseq_percpu_to_free_list_ptr(const struct rseq_mempool *pool,
212 void __rseq_percpu *p)
213{
c0de0012
MD
214 if (pool->attr.robust_set) {
215 /* Skip cpus. */
a5694a4d 216 p += pool->attr.max_nr_cpus * pool->attr.stride;
c0de0012 217 /* Skip init values */
4e8ae59d 218 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
c0de0012
MD
219 p += pool->attr.stride;
220
221 } else {
222 /* Populate none free list is in init values */
4e8ae59d 223 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
c0de0012
MD
224 p += pool->attr.max_nr_cpus * pool->attr.stride;
225 }
a5694a4d
MD
226 return (struct free_list_node *) p;
227}
228
229static
14af0aa2 230intptr_t rseq_cmp_item(void *p, size_t item_len, intptr_t cmp_value, intptr_t *unexpected_value)
a5694a4d 231{
14af0aa2
MD
232 size_t offset;
233 intptr_t res = 0;
a5694a4d 234
14af0aa2
MD
235 for (offset = 0; offset < item_len; offset += sizeof(uintptr_t)) {
236 intptr_t v = *((intptr_t *) (p + offset));
237
238 if ((res = v - cmp_value) != 0) {
239 if (unexpected_value)
240 *unexpected_value = v;
a5694a4d 241 break;
14af0aa2
MD
242 }
243 }
a5694a4d
MD
244 return res;
245}
246
367e559c 247static
15b63c9f
MD
248void rseq_percpu_zero_item(struct rseq_mempool *pool,
249 struct rseq_mempool_range *range, uintptr_t item_offset)
367e559c 250{
a5694a4d 251 char *init_p = NULL;
367e559c
MD
252 int i;
253
a5694a4d
MD
254 init_p = __rseq_pool_range_init_ptr(range, item_offset);
255 if (init_p)
644298bb 256 bzero(init_p, pool->item_len);
cb475906 257 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
15b63c9f 258 char *p = __rseq_pool_range_percpu_ptr(range, i,
cb475906 259 item_offset, pool->attr.stride);
a5694a4d 260
1b658191
MD
261 /*
262 * If item is already zeroed, either because the
263 * init range update has propagated or because the
264 * content is already zeroed (e.g. zero page), don't
265 * write to the page. This eliminates useless COW over
266 * the zero page just for overwriting it with zeroes.
267 *
268 * This means zmalloc() in populate all policy pool do
269 * not trigger COW for CPUs which are not actively
270 * writing to the pool. This is however not the case for
271 * malloc_init() in populate-all pools if it populates
272 * non-zero content.
273 */
14af0aa2 274 if (!rseq_cmp_item(p, pool->item_len, 0, NULL))
a5694a4d 275 continue;
644298bb 276 bzero(p, pool->item_len);
367e559c
MD
277 }
278}
279
6ff43d9a
MD
280static
281void rseq_percpu_init_item(struct rseq_mempool *pool,
282 struct rseq_mempool_range *range, uintptr_t item_offset,
283 void *init_ptr, size_t init_len)
284{
a5694a4d 285 char *init_p = NULL;
6ff43d9a
MD
286 int i;
287
a5694a4d
MD
288 init_p = __rseq_pool_range_init_ptr(range, item_offset);
289 if (init_p)
290 memcpy(init_p, init_ptr, init_len);
6ff43d9a
MD
291 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
292 char *p = __rseq_pool_range_percpu_ptr(range, i,
293 item_offset, pool->attr.stride);
a5694a4d 294
1b658191
MD
295 /*
296 * If the update propagated through a shared mapping,
297 * or the item already has the correct content, skip
298 * writing it into the cpu item to eliminate useless
299 * COW of the page.
300 */
301 if (!memcmp(init_ptr, p, init_len))
a5694a4d 302 continue;
6ff43d9a
MD
303 memcpy(p, init_ptr, init_len);
304 }
305}
306
a5694a4d
MD
307static
308void rseq_poison_item(void *p, size_t item_len, uintptr_t poison)
309{
310 size_t offset;
311
312 for (offset = 0; offset < item_len; offset += sizeof(uintptr_t))
313 *((uintptr_t *) (p + offset)) = poison;
314}
315
455e090e
MD
316static
317void rseq_percpu_poison_item(struct rseq_mempool *pool,
318 struct rseq_mempool_range *range, uintptr_t item_offset)
319{
320 uintptr_t poison = pool->attr.poison;
a5694a4d 321 char *init_p = NULL;
455e090e
MD
322 int i;
323
a5694a4d
MD
324 init_p = __rseq_pool_range_init_ptr(range, item_offset);
325 if (init_p)
326 rseq_poison_item(init_p, pool->item_len, poison);
455e090e
MD
327 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
328 char *p = __rseq_pool_range_percpu_ptr(range, i,
329 item_offset, pool->attr.stride);
455e090e 330
1b658191
MD
331 /*
332 * If the update propagated through a shared mapping,
333 * or the item already has the correct content, skip
334 * writing it into the cpu item to eliminate useless
335 * COW of the page.
336 *
337 * It is recommended to use zero as poison value for
338 * populate-all pools to eliminate COW due to writing
339 * poison to unused CPU memory.
340 */
14af0aa2 341 if (rseq_cmp_item(p, pool->item_len, poison, NULL) == 0)
a5694a4d
MD
342 continue;
343 rseq_poison_item(p, pool->item_len, poison);
344 }
345}
346
347/* Always inline for __builtin_return_address(0). */
348static inline __attribute__((always_inline))
349void rseq_check_poison_item(const struct rseq_mempool *pool, uintptr_t item_offset,
c0de0012 350 void *p, size_t item_len, uintptr_t poison)
a5694a4d 351{
1b658191 352 intptr_t unexpected_value;
a5694a4d 353
14af0aa2 354 if (rseq_cmp_item(p, item_len, poison, &unexpected_value) == 0)
1b658191 355 return;
a5694a4d 356
1b658191
MD
357 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
358 __func__, (unsigned long) unexpected_value, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
359 abort();
86617384
MD
360}
361
362/* Always inline for __builtin_return_address(0). */
363static inline __attribute__((always_inline))
6fbf1fb6
MD
364void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
365 const struct rseq_mempool_range *range, uintptr_t item_offset)
86617384
MD
366{
367 uintptr_t poison = pool->attr.poison;
a5694a4d 368 char *init_p;
86617384
MD
369 int i;
370
3975084e 371 if (!pool->attr.robust_set)
86617384 372 return;
a5694a4d
MD
373 init_p = __rseq_pool_range_init_ptr(range, item_offset);
374 if (init_p)
c0de0012 375 rseq_check_poison_item(pool, item_offset, init_p, pool->item_len, poison);
86617384
MD
376 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
377 char *p = __rseq_pool_range_percpu_ptr(range, i,
378 item_offset, pool->attr.stride);
c0de0012 379 rseq_check_poison_item(pool, item_offset, p, pool->item_len, poison);
455e090e
MD
380 }
381}
382
15b63c9f 383#ifdef HAVE_LIBNUMA
c6fd3981 384int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
367e559c 385{
f2981623 386 unsigned long nr_pages, page_len;
c6fd3981
MD
387 int status[MOVE_PAGES_BATCH_SIZE];
388 int nodes[MOVE_PAGES_BATCH_SIZE];
389 void *pages[MOVE_PAGES_BATCH_SIZE];
f2981623 390 long ret;
367e559c 391
c6fd3981
MD
392 if (!numa_flags) {
393 errno = EINVAL;
394 return -1;
395 }
367e559c 396 page_len = rseq_get_page_len();
c6fd3981 397 nr_pages = len >> rseq_get_count_order_ulong(page_len);
57d8b586 398
c6fd3981
MD
399 nodes[0] = numa_node_of_cpu(cpu);
400 if (nodes[0] < 0)
401 return -1;
57d8b586 402
c6fd3981
MD
403 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
404 nodes[k] = nodes[0];
405 }
57d8b586 406
c6fd3981 407 for (unsigned long page = 0; page < nr_pages;) {
57d8b586 408
c6fd3981
MD
409 size_t max_k = RSEQ_ARRAY_SIZE(pages);
410 size_t left = nr_pages - page;
57d8b586 411
c6fd3981
MD
412 if (left < max_k) {
413 max_k = left;
414 }
57d8b586 415
c6fd3981
MD
416 for (size_t k = 0; k < max_k; ++k, ++page) {
417 pages[k] = addr + (page * page_len);
418 status[k] = -EPERM;
367e559c 419 }
b73b0c25 420
c6fd3981
MD
421 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
422
423 if (ret < 0)
b73b0c25 424 return ret;
c6fd3981
MD
425
426 if (ret > 0) {
427 fprintf(stderr, "%lu pages were not migrated\n", ret);
428 for (size_t k = 0; k < max_k; ++k) {
429 if (status[k] < 0)
430 fprintf(stderr,
431 "Error while moving page %p to numa node %d: %u\n",
432 pages[k], nodes[k], -status[k]);
433 }
434 }
b73b0c25
MD
435 }
436 return 0;
437}
367e559c 438#else
c6fd3981
MD
439int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
440 size_t len __attribute__((unused)),
441 int cpu __attribute__((unused)),
367e559c
MD
442 int numa_flags __attribute__((unused)))
443{
c6fd3981
MD
444 errno = ENOSYS;
445 return -1;
367e559c
MD
446}
447#endif
448
0fdf7a4c 449static
0ba2a93e 450int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c
OD
451{
452 size_t count;
453
cb475906 454 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
455
456 /*
9649c7ee
MD
457 * Not being able to create the validation bitmap is an error
458 * that needs to be reported.
0fdf7a4c 459 */
b73b0c25
MD
460 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
461 if (!range->alloc_bitmap)
9649c7ee
MD
462 return -1;
463 return 0;
0fdf7a4c
OD
464}
465
b73b0c25 466static
a5694a4d 467bool percpu_addr_in_pool(const struct rseq_mempool *pool, void __rseq_percpu *_addr)
b73b0c25 468{
0ba2a93e 469 struct rseq_mempool_range *range;
a5694a4d 470 void *addr = (void *) _addr;
b73b0c25 471
9d986353 472 for (range = pool->range_list; range; range = range->next) {
b73b0c25
MD
473 if (addr >= range->base && addr < range->base + range->next_unused)
474 return true;
475 }
476 return false;
477}
478
a9ec6111
OD
479/* Always inline for __builtin_return_address(0). */
480static inline __attribute__((always_inline))
0ba2a93e 481void check_free_list(const struct rseq_mempool *pool)
a9ec6111 482{
b73b0c25
MD
483 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
484 max_list_traversal = 0, traversal_iteration = 0;
0ba2a93e 485 struct rseq_mempool_range *range;
b73b0c25
MD
486
487 if (!pool->attr.robust_set)
488 return;
489
9d986353 490 for (range = pool->range_list; range; range = range->next) {
cb475906
MD
491 total_item += pool->attr.stride >> pool->item_order;
492 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
b73b0c25
MD
493 }
494 max_list_traversal = total_item - total_never_allocated;
a9ec6111
OD
495
496 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
497 node;
498 prev = node,
499 node = node->next) {
500
a9ec6111 501 if (traversal_iteration >= max_list_traversal) {
ca452fee
MD
502 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
503 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
504 abort();
505 }
506
507 /* Node is out of range. */
a5694a4d 508 if (!percpu_addr_in_pool(pool, __rseq_free_list_to_percpu_ptr(pool, node))) {
a9ec6111 509 if (prev)
ca452fee
MD
510 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
511 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111 512 else
ca452fee
MD
513 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
514 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
515 abort();
516 }
517
b73b0c25
MD
518 traversal_iteration++;
519 total_freed++;
a9ec6111
OD
520 }
521
522 if (total_never_allocated + total_freed != total_item) {
ca452fee
MD
523 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
524 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
a9ec6111
OD
525 abort();
526 }
a9ec6111
OD
527}
528
6fbf1fb6
MD
529/* Always inline for __builtin_return_address(0). */
530static inline __attribute__((always_inline))
531void check_range_poison(const struct rseq_mempool *pool,
532 const struct rseq_mempool_range *range)
533{
534 size_t item_offset;
535
536 for (item_offset = 0; item_offset < range->next_unused;
537 item_offset += pool->item_len)
538 rseq_percpu_check_poison_item(pool, range, item_offset);
539}
540
541/* Always inline for __builtin_return_address(0). */
542static inline __attribute__((always_inline))
543void check_pool_poison(const struct rseq_mempool *pool)
544{
545 struct rseq_mempool_range *range;
546
3975084e 547 if (!pool->attr.robust_set)
6fbf1fb6
MD
548 return;
549 for (range = pool->range_list; range; range = range->next)
550 check_range_poison(pool, range);
551}
552
e7cbbc10
MD
553/* Always inline for __builtin_return_address(0). */
554static inline __attribute__((always_inline))
0ba2a93e 555void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c 556{
b73b0c25 557 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 558 size_t count, total_leaks = 0;
0fdf7a4c 559
9649c7ee 560 if (!bitmap)
0fdf7a4c 561 return;
0fdf7a4c 562
cb475906 563 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
564
565 /* Assert that all items in the pool were freed. */
9649c7ee
MD
566 for (size_t k = 0; k < count; ++k)
567 total_leaks += rseq_hweight_ulong(bitmap[k]);
568 if (total_leaks) {
ca452fee
MD
569 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
570 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
9649c7ee 571 abort();
0fdf7a4c
OD
572 }
573
574 free(bitmap);
a5694a4d 575 range->alloc_bitmap = NULL;
0fdf7a4c
OD
576}
577
b73b0c25
MD
578/* Always inline for __builtin_return_address(0). */
579static inline __attribute__((always_inline))
0ba2a93e
MD
580int rseq_mempool_range_destroy(struct rseq_mempool *pool,
581 struct rseq_mempool_range *range)
b73b0c25
MD
582{
583 destroy_alloc_bitmap(pool, range);
a5694a4d 584
5c99f3d6 585 /* range is a header located one page before the aligned mapping. */
5cd72fc7 586 return munmap(range->mmap_addr, range->mmap_len);
5c99f3d6
MD
587}
588
589/*
590 * Allocate a memory mapping aligned on @alignment, with an optional
591 * @pre_header before the mapping.
592 */
593static
5cd72fc7 594void *aligned_mmap_anonymous(size_t page_size, size_t len, size_t alignment,
5c99f3d6
MD
595 void **pre_header, size_t pre_header_len)
596{
597 size_t minimum_page_count, page_count, extra, total_allocate = 0;
598 int page_order;
599 void *ptr;
600
601 if (len < page_size || alignment < page_size ||
b72b2d9e 602 !is_pow2(alignment) || (len & (alignment - 1))) {
5c99f3d6
MD
603 errno = EINVAL;
604 return NULL;
605 }
606 page_order = rseq_get_count_order_ulong(page_size);
607 if (page_order < 0) {
608 errno = EINVAL;
609 return NULL;
610 }
611 if (pre_header_len && (pre_header_len & (page_size - 1))) {
612 errno = EINVAL;
613 return NULL;
614 }
615
616 minimum_page_count = (pre_header_len + len) >> page_order;
617 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
618
619 assert(page_count >= minimum_page_count);
620
5cd72fc7
MD
621 ptr = mmap(NULL, page_count << page_order, PROT_READ | PROT_WRITE,
622 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
623 if (ptr == MAP_FAILED) {
624 ptr = NULL;
5c99f3d6 625 goto alloc_error;
5cd72fc7 626 }
5c99f3d6
MD
627
628 total_allocate = page_count << page_order;
629
630 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
631 /* Pointer is already aligned. ptr points to pre_header. */
632 goto out;
633 }
634
635 /* Unmap extra before. */
636 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
637 assert(!(extra & (page_size - 1)));
5cd72fc7 638 if (munmap(ptr, extra)) {
5c99f3d6
MD
639 perror("munmap");
640 abort();
641 }
642 total_allocate -= extra;
643 ptr += extra; /* ptr points to pre_header */
644 page_count -= extra >> page_order;
645out:
646 assert(page_count >= minimum_page_count);
647
648 if (page_count > minimum_page_count) {
649 void *extra_ptr;
650
651 /* Unmap extra after. */
652 extra_ptr = ptr + (minimum_page_count << page_order);
653 extra = (page_count - minimum_page_count) << page_order;
5cd72fc7 654 if (munmap(extra_ptr, extra)) {
5c99f3d6
MD
655 perror("munmap");
656 abort();
657 }
658 total_allocate -= extra;
659 }
660
661 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
662 assert(total_allocate == len + pre_header_len);
663
664alloc_error:
665 if (ptr) {
666 if (pre_header)
667 *pre_header = ptr;
668 ptr += pre_header_len;
669 }
670 return ptr;
b73b0c25
MD
671}
672
a5694a4d 673static
a10c1c93 674int rseq_memfd_create_init(size_t init_len)
a5694a4d 675{
a10c1c93
MD
676 int fd;
677
678 fd = memfd_create("mempool", MFD_CLOEXEC);
679 if (fd < 0) {
680 perror("memfd_create");
681 goto end;
a5694a4d 682 }
a10c1c93
MD
683 if (ftruncate(fd, (off_t) init_len)) {
684 fd = -1;
685 goto end;
686 }
687end:
688 return fd;
689}
690
691static
692void rseq_memfd_close(int fd)
693{
694 if (close(fd))
695 perror("close");
a5694a4d
MD
696}
697
b73b0c25 698static
0ba2a93e 699struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
b73b0c25 700{
0ba2a93e 701 struct rseq_mempool_range *range;
5c99f3d6 702 unsigned long page_size;
4aa3220c 703 void *header;
b73b0c25 704 void *base;
a5694a4d 705 size_t range_len; /* Range len excludes header. */
b73b0c25 706
e11a02d7
MD
707 if (pool->attr.max_nr_ranges &&
708 pool->nr_ranges >= pool->attr.max_nr_ranges) {
9d986353
MD
709 errno = ENOMEM;
710 return NULL;
711 }
5c99f3d6 712 page_size = rseq_get_page_len();
b73b0c25 713
a5694a4d 714 range_len = pool->attr.stride * pool->attr.max_nr_cpus;
4e8ae59d 715 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
a5694a4d 716 range_len += pool->attr.stride; /* init values */
c0de0012
MD
717 if (pool->attr.robust_set)
718 range_len += pool->attr.stride; /* free list */
5cd72fc7
MD
719 base = aligned_mmap_anonymous(page_size, range_len,
720 pool->attr.stride, &header, page_size);
b73b0c25 721 if (!base)
5c99f3d6 722 return NULL;
0ba2a93e 723 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
5c99f3d6 724 range->pool = pool;
4aa3220c 725 range->header = header;
a5694a4d 726 range->base = base;
fa6a0fb3 727 range->mmap_addr = header;
a5694a4d
MD
728 range->mmap_len = page_size + range_len;
729
4e8ae59d 730 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) {
a10c1c93
MD
731 int memfd;
732
a5694a4d
MD
733 range->init = base + (pool->attr.stride * pool->attr.max_nr_cpus);
734 /* Populate init values pages from memfd */
a10c1c93
MD
735 memfd = rseq_memfd_create_init(pool->attr.stride);
736 if (memfd < 0)
a5694a4d
MD
737 goto error_alloc;
738 if (mmap(range->init, pool->attr.stride, PROT_READ | PROT_WRITE,
a10c1c93 739 MAP_SHARED | MAP_FIXED, memfd, 0) != (void *) range->init) {
a5694a4d
MD
740 goto error_alloc;
741 }
742 assert(pool->attr.type == MEMPOOL_TYPE_PERCPU);
743 /*
744 * Map per-cpu memory as private COW mappings of init values.
745 */
746 {
747 int cpu;
748
749 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
750 void *p = base + (pool->attr.stride * cpu);
751 size_t len = pool->attr.stride;
752
753 if (mmap(p, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
a10c1c93 754 memfd, 0) != (void *) p) {
a5694a4d
MD
755 goto error_alloc;
756 }
757 }
758 }
a10c1c93 759 rseq_memfd_close(memfd);
a5694a4d
MD
760 }
761
b73b0c25
MD
762 if (pool->attr.robust_set) {
763 if (create_alloc_bitmap(pool, range))
764 goto error_alloc;
765 }
135811f2 766 if (pool->attr.init_set) {
374c2773
MD
767 switch (pool->attr.type) {
768 case MEMPOOL_TYPE_GLOBAL:
6e329183 769 if (pool->attr.init_func(pool->attr.init_priv,
374c2773 770 base, pool->attr.stride, -1)) {
6e329183
MD
771 goto error_alloc;
772 }
374c2773
MD
773 break;
774 case MEMPOOL_TYPE_PERCPU:
775 {
776 int cpu;
777 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
778 if (pool->attr.init_func(pool->attr.init_priv,
779 base + (pool->attr.stride * cpu),
780 pool->attr.stride, cpu)) {
781 goto error_alloc;
782 }
783 }
784 break;
785 }
786 default:
787 abort();
135811f2
MD
788 }
789 }
9d986353 790 pool->nr_ranges++;
b73b0c25
MD
791 return range;
792
793error_alloc:
0ba2a93e 794 (void) rseq_mempool_range_destroy(pool, range);
b73b0c25
MD
795 return NULL;
796}
797
0ba2a93e 798int rseq_mempool_destroy(struct rseq_mempool *pool)
9649c7ee 799{
0ba2a93e 800 struct rseq_mempool_range *range, *next_range;
b73b0c25 801 int ret = 0;
9649c7ee 802
f510ddc5
MD
803 if (!pool)
804 return 0;
b73b0c25 805 check_free_list(pool);
6fbf1fb6 806 check_pool_poison(pool);
b73b0c25 807 /* Iteration safe against removal. */
9d986353 808 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
0ba2a93e 809 if (rseq_mempool_range_destroy(pool, range))
b73b0c25
MD
810 goto end;
811 /* Update list head to keep list coherent in case of partial failure. */
9d986353 812 pool->range_list = next_range;
b73b0c25 813 }
9649c7ee 814 pthread_mutex_destroy(&pool->lock);
ca452fee 815 free(pool->name);
eb8db04d 816 free(pool);
9649c7ee 817end:
b73b0c25 818 return ret;
9649c7ee
MD
819}
820
0ba2a93e 821struct rseq_mempool *rseq_mempool_create(const char *pool_name,
cb475906 822 size_t item_len, const struct rseq_mempool_attr *_attr)
ef6695f1 823{
0ba2a93e
MD
824 struct rseq_mempool *pool;
825 struct rseq_mempool_attr attr = {};
ef6695f1 826 int order;
ef6695f1
MD
827
828 /* Make sure each item is large enough to contain free list pointers. */
829 if (item_len < sizeof(void *))
830 item_len = sizeof(void *);
831
832 /* Align item_len on next power of two. */
19be9217 833 order = rseq_get_count_order_ulong(item_len);
ef6695f1
MD
834 if (order < 0) {
835 errno = EINVAL;
836 return NULL;
837 }
838 item_len = 1UL << order;
839
a82006d0
MD
840 if (_attr)
841 memcpy(&attr, _attr, sizeof(attr));
a82006d0 842
cb475906
MD
843 switch (attr.type) {
844 case MEMPOOL_TYPE_PERCPU:
845 if (attr.max_nr_cpus < 0) {
846 errno = EINVAL;
847 return NULL;
848 }
849 if (attr.max_nr_cpus == 0) {
850 /* Auto-detect */
47c725dd 851 attr.max_nr_cpus = rseq_get_max_nr_cpus();
cb475906
MD
852 if (attr.max_nr_cpus == 0) {
853 errno = EINVAL;
854 return NULL;
855 }
856 }
857 break;
858 case MEMPOOL_TYPE_GLOBAL:
a5694a4d 859 /* Override populate policy for global type. */
4e8ae59d
MD
860 if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE)
861 attr.populate_policy = RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL;
89b7e681
MD
862 /* Use a 1-cpu pool for global mempool type. */
863 attr.max_nr_cpus = 1;
cb475906
MD
864 break;
865 }
866 if (!attr.stride)
867 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
3975084e
MD
868 if (attr.robust_set && !attr.poison_set) {
869 attr.poison_set = true;
dbccd436 870 attr.poison = DEFAULT_PRIVATE_POISON_VALUE;
3975084e 871 }
cb475906
MD
872 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
873 !is_pow2(attr.stride)) {
874 errno = EINVAL;
875 return NULL;
876 }
877
0ba2a93e 878 pool = calloc(1, sizeof(struct rseq_mempool));
bc510b60
MD
879 if (!pool)
880 return NULL;
ef6695f1 881
b73b0c25 882 memcpy(&pool->attr, &attr, sizeof(attr));
ef6695f1 883 pthread_mutex_init(&pool->lock, NULL);
ef6695f1
MD
884 pool->item_len = item_len;
885 pool->item_order = order;
b73b0c25 886
9d986353
MD
887 pool->range_list = rseq_mempool_range_create(pool);
888 if (!pool->range_list)
b73b0c25 889 goto error_alloc;
0fdf7a4c 890
ca452fee
MD
891 if (pool_name) {
892 pool->name = strdup(pool_name);
893 if (!pool->name)
894 goto error_alloc;
895 }
ef6695f1 896 return pool;
ef6695f1 897
9649c7ee 898error_alloc:
0ba2a93e 899 rseq_mempool_destroy(pool);
9649c7ee
MD
900 errno = ENOMEM;
901 return NULL;
ef6695f1
MD
902}
903
e7cbbc10
MD
904/* Always inline for __builtin_return_address(0). */
905static inline __attribute__((always_inline))
9d986353 906void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 907{
9d986353 908 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 909 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
910 unsigned long mask;
911 size_t k;
912
9649c7ee 913 if (!bitmap)
0fdf7a4c 914 return;
0fdf7a4c 915
9649c7ee 916 k = item_index / BIT_PER_ULONG;
0fdf7a4c
OD
917 mask = 1ULL << (item_index % BIT_PER_ULONG);
918
9649c7ee
MD
919 /* Print error if bit is already set. */
920 if (bitmap[k] & mask) {
ca452fee
MD
921 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
922 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
9649c7ee
MD
923 abort();
924 }
0fdf7a4c
OD
925 bitmap[k] |= mask;
926}
927
ef6695f1 928static
6ff43d9a
MD
929void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool,
930 bool zeroed, void *init_ptr, size_t init_len)
ef6695f1 931{
9d986353 932 struct rseq_mempool_range *range;
ef6695f1
MD
933 struct free_list_node *node;
934 uintptr_t item_offset;
d24ee051 935 void __rseq_percpu *addr;
ef6695f1 936
6ff43d9a
MD
937 if (init_len > pool->item_len) {
938 errno = EINVAL;
939 return NULL;
940 }
ef6695f1
MD
941 pthread_mutex_lock(&pool->lock);
942 /* Get first entry from free list. */
943 node = pool->free_list_head;
944 if (node != NULL) {
a5694a4d 945 void *range_base, *ptr;
9d986353 946
a5694a4d
MD
947 ptr = __rseq_free_list_to_percpu_ptr(pool, node);
948 range_base = (void *) ((uintptr_t) ptr & (~(pool->attr.stride - 1)));
9d986353 949 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
ef6695f1
MD
950 /* Remove node from free list (update head). */
951 pool->free_list_head = node->next;
a5694a4d 952 item_offset = (uintptr_t) (ptr - range_base);
86617384 953 rseq_percpu_check_poison_item(pool, range, item_offset);
a5694a4d 954 addr = __rseq_free_list_to_percpu_ptr(pool, node);
ef6695f1
MD
955 goto end;
956 }
9d986353
MD
957 /*
958 * If the most recent range (first in list) does not have any
959 * room left, create a new range and prepend it to the list
960 * head.
961 */
962 range = pool->range_list;
963 if (range->next_unused + pool->item_len > pool->attr.stride) {
964 range = rseq_mempool_range_create(pool);
965 if (!range) {
966 errno = ENOMEM;
967 addr = NULL;
968 goto end;
969 }
970 /* Add range to head of list. */
971 range->next = pool->range_list;
972 pool->range_list = range;
ef6695f1 973 }
9d986353
MD
974 /* First range in list has room left. */
975 item_offset = range->next_unused;
976 addr = (void __rseq_percpu *) (range->base + item_offset);
977 range->next_unused += pool->item_len;
ef6695f1 978end:
8f28507f 979 if (addr)
9d986353 980 set_alloc_slot(pool, range, item_offset);
ef6695f1 981 pthread_mutex_unlock(&pool->lock);
6ff43d9a
MD
982 if (addr) {
983 if (zeroed)
984 rseq_percpu_zero_item(pool, range, item_offset);
985 else if (init_ptr) {
986 rseq_percpu_init_item(pool, range, item_offset,
987 init_ptr, init_len);
988 }
989 }
ef6695f1
MD
990 return addr;
991}
992
15da5c27 993void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
ef6695f1 994{
6ff43d9a 995 return __rseq_percpu_malloc(pool, false, NULL, 0);
ef6695f1
MD
996}
997
15da5c27 998void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
ef6695f1 999{
6ff43d9a
MD
1000 return __rseq_percpu_malloc(pool, true, NULL, 0);
1001}
1002
1003void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool,
1004 void *init_ptr, size_t len)
1005{
1006 return __rseq_percpu_malloc(pool, false, init_ptr, len);
ef6695f1
MD
1007}
1008
e7cbbc10
MD
1009/* Always inline for __builtin_return_address(0). */
1010static inline __attribute__((always_inline))
9d986353 1011void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 1012{
9d986353 1013 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 1014 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
1015 unsigned long mask;
1016 size_t k;
1017
9649c7ee 1018 if (!bitmap)
0fdf7a4c 1019 return;
0fdf7a4c 1020
9649c7ee
MD
1021 k = item_index / BIT_PER_ULONG;
1022 mask = 1ULL << (item_index % BIT_PER_ULONG);
0fdf7a4c 1023
9649c7ee
MD
1024 /* Print error if bit is not set. */
1025 if (!(bitmap[k] & mask)) {
ca452fee
MD
1026 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
1027 __func__, get_pool_name(pool), pool, item_offset,
1028 (void *) __builtin_return_address(0));
9649c7ee
MD
1029 abort();
1030 }
0fdf7a4c
OD
1031 bitmap[k] &= ~mask;
1032}
1033
cb475906 1034void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
ef6695f1
MD
1035{
1036 uintptr_t ptr = (uintptr_t) _ptr;
cb475906 1037 void *range_base = (void *) (ptr & (~(stride - 1)));
0ba2a93e
MD
1038 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
1039 struct rseq_mempool *pool = range->pool;
cb475906 1040 uintptr_t item_offset = ptr & (stride - 1);
ef6695f1
MD
1041 struct free_list_node *head, *item;
1042
1043 pthread_mutex_lock(&pool->lock);
9d986353 1044 clear_alloc_slot(pool, range, item_offset);
ef6695f1
MD
1045 /* Add ptr to head of free list */
1046 head = pool->free_list_head;
455e090e
MD
1047 if (pool->attr.poison_set)
1048 rseq_percpu_poison_item(pool, range, item_offset);
a5694a4d 1049 item = __rseq_percpu_to_free_list_ptr(pool, _ptr);
455e090e
MD
1050 /*
1051 * Setting the next pointer will overwrite the first uintptr_t
a5694a4d
MD
1052 * poison for either CPU 0 (populate all) or init data (populate
1053 * none).
455e090e 1054 */
ef6695f1
MD
1055 item->next = head;
1056 pool->free_list_head = item;
1057 pthread_mutex_unlock(&pool->lock);
1058}
1059
0ba2a93e 1060struct rseq_mempool_set *rseq_mempool_set_create(void)
ef6695f1 1061{
0ba2a93e 1062 struct rseq_mempool_set *pool_set;
ef6695f1 1063
0ba2a93e 1064 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
ef6695f1
MD
1065 if (!pool_set)
1066 return NULL;
1067 pthread_mutex_init(&pool_set->lock, NULL);
1068 return pool_set;
1069}
1070
0ba2a93e 1071int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
ef6695f1
MD
1072{
1073 int order, ret;
1074
1075 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
0ba2a93e 1076 struct rseq_mempool *pool = pool_set->entries[order];
ef6695f1
MD
1077
1078 if (!pool)
1079 continue;
0ba2a93e 1080 ret = rseq_mempool_destroy(pool);
ef6695f1
MD
1081 if (ret)
1082 return ret;
1083 pool_set->entries[order] = NULL;
1084 }
1085 pthread_mutex_destroy(&pool_set->lock);
1086 free(pool_set);
1087 return 0;
1088}
1089
1090/* Ownership of pool is handed over to pool set on success. */
0ba2a93e 1091int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
ef6695f1
MD
1092{
1093 size_t item_order = pool->item_order;
1094 int ret = 0;
1095
1096 pthread_mutex_lock(&pool_set->lock);
1097 if (pool_set->entries[item_order]) {
1098 errno = EBUSY;
1099 ret = -1;
1100 goto end;
1101 }
1102 pool_set->entries[pool->item_order] = pool;
1103end:
1104 pthread_mutex_unlock(&pool_set->lock);
1105 return ret;
1106}
1107
1108static
6ff43d9a
MD
1109void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set,
1110 void *init_ptr, size_t len, bool zeroed)
ef6695f1
MD
1111{
1112 int order, min_order = POOL_SET_MIN_ENTRY;
0ba2a93e 1113 struct rseq_mempool *pool;
d24ee051 1114 void __rseq_percpu *addr;
ef6695f1 1115
d06f5cf5
MD
1116 order = rseq_get_count_order_ulong(len);
1117 if (order > POOL_SET_MIN_ENTRY)
1118 min_order = order;
ef6695f1
MD
1119again:
1120 pthread_mutex_lock(&pool_set->lock);
1121 /* First smallest present pool where @len fits. */
1122 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
1123 pool = pool_set->entries[order];
1124
1125 if (!pool)
1126 continue;
1127 if (pool->item_len >= len)
1128 goto found;
1129 }
1130 pool = NULL;
1131found:
1132 pthread_mutex_unlock(&pool_set->lock);
1133 if (pool) {
6ff43d9a 1134 addr = __rseq_percpu_malloc(pool, zeroed, init_ptr, len);
ef6695f1
MD
1135 if (addr == NULL && errno == ENOMEM) {
1136 /*
1137 * If the allocation failed, try again with a
1138 * larger pool.
1139 */
1140 min_order = order + 1;
1141 goto again;
1142 }
1143 } else {
1144 /* Not found. */
1145 errno = ENOMEM;
1146 addr = NULL;
1147 }
1148 return addr;
1149}
1150
15da5c27 1151void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 1152{
6ff43d9a 1153 return __rseq_mempool_set_malloc(pool_set, NULL, len, false);
ef6695f1
MD
1154}
1155
15da5c27 1156void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 1157{
6ff43d9a
MD
1158 return __rseq_mempool_set_malloc(pool_set, NULL, len, true);
1159}
1160
1161void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set,
1162 void *init_ptr, size_t len)
1163{
1164 return __rseq_mempool_set_malloc(pool_set, init_ptr, len, true);
ef6695f1 1165}
9bd07c29 1166
0ba2a93e 1167struct rseq_mempool_attr *rseq_mempool_attr_create(void)
a82006d0 1168{
0ba2a93e 1169 return calloc(1, sizeof(struct rseq_mempool_attr));
a82006d0
MD
1170}
1171
0ba2a93e 1172void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
a82006d0
MD
1173{
1174 free(attr);
1175}
1176
135811f2 1177int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
6e329183 1178 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
135811f2
MD
1179 void *init_priv)
1180{
1181 if (!attr) {
1182 errno = EINVAL;
1183 return -1;
1184 }
1185 attr->init_set = true;
1186 attr->init_func = init_func;
1187 attr->init_priv = init_priv;
1188 return 0;
1189}
1190
0ba2a93e 1191int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
d6acc8aa
MD
1192{
1193 if (!attr) {
1194 errno = EINVAL;
1195 return -1;
1196 }
1197 attr->robust_set = true;
1198 return 0;
1199}
cb475906
MD
1200
1201int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
1202 size_t stride, int max_nr_cpus)
1203{
1204 if (!attr) {
1205 errno = EINVAL;
1206 return -1;
1207 }
1208 attr->type = MEMPOOL_TYPE_PERCPU;
1209 attr->stride = stride;
1210 attr->max_nr_cpus = max_nr_cpus;
1211 return 0;
1212}
1213
1214int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
1215 size_t stride)
1216{
1217 if (!attr) {
1218 errno = EINVAL;
1219 return -1;
1220 }
1221 attr->type = MEMPOOL_TYPE_GLOBAL;
1222 attr->stride = stride;
89b7e681 1223 attr->max_nr_cpus = 0;
cb475906
MD
1224 return 0;
1225}
6037d364 1226
e11a02d7
MD
1227int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
1228 unsigned long max_nr_ranges)
1229{
1230 if (!attr) {
1231 errno = EINVAL;
1232 return -1;
1233 }
1234 attr->max_nr_ranges = max_nr_ranges;
1235 return 0;
1236}
1237
455e090e
MD
1238int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
1239 uintptr_t poison)
1240{
1241 if (!attr) {
1242 errno = EINVAL;
1243 return -1;
1244 }
1245 attr->poison_set = true;
1246 attr->poison = poison;
1247 return 0;
1248}
1249
a5694a4d
MD
1250int rseq_mempool_attr_set_populate_policy(struct rseq_mempool_attr *attr,
1251 enum rseq_mempool_populate_policy policy)
1252{
1253 if (!attr) {
1254 errno = EINVAL;
1255 return -1;
1256 }
1257 attr->populate_policy = policy;
1258 return 0;
1259}
1260
6037d364
MD
1261int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
1262{
1263 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
1264 errno = EINVAL;
1265 return -1;
1266 }
1267 return mempool->attr.max_nr_cpus;
1268}
This page took 0.113724 seconds and 4 git commands to generate.