mempool: do not overwrite same per-cpu values
[librseq.git] / src / rseq-mempool.c
CommitLineData
ef6695f1
MD
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
34337fec 4#include <rseq/mempool.h>
ef6695f1
MD
5#include <sys/mman.h>
6#include <assert.h>
7#include <string.h>
8#include <pthread.h>
9#include <unistd.h>
10#include <stdlib.h>
11#include <rseq/compiler.h>
12#include <errno.h>
13#include <stdint.h>
14#include <stdbool.h>
367e559c 15#include <stdio.h>
a5694a4d 16#include <fcntl.h>
367e559c
MD
17
18#ifdef HAVE_LIBNUMA
19# include <numa.h>
20# include <numaif.h>
21#endif
ef6695f1 22
34337fec 23#include "rseq-utils.h"
47c725dd 24#include <rseq/rseq.h>
19be9217 25
ef6695f1 26/*
b73b0c25 27 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
ef6695f1 28 *
8ab16a24
MD
29 * The rseq per-CPU memory allocator allows the application the request
30 * memory pools of CPU-Local memory each of containing objects of a
8aa1462d
MD
31 * given size (rounded to next power of 2), reserving a given virtual
32 * address size per CPU, for a given maximum number of CPUs.
8ab16a24
MD
33 *
34 * The per-CPU memory allocator is analogous to TLS (Thread-Local
35 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
36 * memory allocator provides CPU-Local Storage.
ef6695f1
MD
37 */
38
3236da62 39#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
ef6695f1 40
72b100a1
MD
41/*
42 * Smallest allocation should hold enough space for a free list pointer.
43 */
ef6695f1
MD
44#if RSEQ_BITS_PER_LONG == 64
45# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
46#else
47# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
48#endif
49
0fdf7a4c
OD
50#define BIT_PER_ULONG (8 * sizeof(unsigned long))
51
57d8b586
OD
52#define MOVE_PAGES_BATCH_SIZE 4096
53
0ba2a93e 54#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
4aa3220c 55
3975084e
MD
56#if RSEQ_BITS_PER_LONG == 64
57# define DEFAULT_POISON_VALUE 0x5555555555555555ULL
58#else
59# define DEFAULT_POISON_VALUE 0x55555555UL
60#endif
61
ef6695f1
MD
62struct free_list_node;
63
64struct free_list_node {
65 struct free_list_node *next;
66};
67
cb475906 68enum mempool_type {
89b7e681
MD
69 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
70 MEMPOOL_TYPE_PERCPU = 1,
cb475906
MD
71};
72
0ba2a93e 73struct rseq_mempool_attr {
a82006d0 74 bool mmap_set;
9bd07c29
MD
75 void *(*mmap_func)(void *priv, size_t len);
76 int (*munmap_func)(void *priv, void *ptr, size_t len);
77 void *mmap_priv;
d6acc8aa 78
135811f2 79 bool init_set;
6e329183 80 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
135811f2
MD
81 void *init_priv;
82
d6acc8aa 83 bool robust_set;
cb475906
MD
84
85 enum mempool_type type;
86 size_t stride;
87 int max_nr_cpus;
e11a02d7
MD
88
89 unsigned long max_nr_ranges;
455e090e
MD
90
91 bool poison_set;
92 uintptr_t poison;
a5694a4d
MD
93
94 enum rseq_mempool_populate_policy populate_policy;
9bd07c29
MD
95};
96
0ba2a93e 97struct rseq_mempool_range;
b73b0c25 98
0ba2a93e 99struct rseq_mempool_range {
9d986353
MD
100 struct rseq_mempool_range *next; /* Linked list of ranges. */
101 struct rseq_mempool *pool; /* Backward reference to container pool. */
a5694a4d
MD
102
103 /*
104 * Memory layout of a mempool range:
105 * - Header page (contains struct rseq_mempool_range at the very end),
c0de0012
MD
106 * - Base of the per-cpu data, starting with CPU 0.
107 * Aliases with free-list for non-robust populate all pool.
a5694a4d
MD
108 * - CPU 1,
109 * ...
110 * - CPU max_nr_cpus - 1
111 * - init values (unpopulated for RSEQ_MEMPOOL_POPULATE_ALL).
c0de0012
MD
112 * Aliases with free-list for non-robust populate none pool.
113 * - free list (for robust pool).
114 *
115 * The free list aliases the CPU 0 memory area for non-robust
116 * populate all pools. It aliases with init values for
117 * non-robust populate none pools. It is located immediately
118 * after the init values for robust pools.
a5694a4d 119 */
4aa3220c 120 void *header;
ef6695f1 121 void *base;
a5694a4d
MD
122 /*
123 * The init values contains malloc_init/zmalloc values.
124 * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_ALL.
125 */
126 void *init;
b73b0c25 127 size_t next_unused;
fa6a0fb3
MD
128
129 /* Pool range mmap/munmap */
130 void *mmap_addr;
131 size_t mmap_len;
132
b73b0c25
MD
133 /* Track alloc/free. */
134 unsigned long *alloc_bitmap;
135};
136
0ba2a93e 137struct rseq_mempool {
9d986353
MD
138 /* Head of ranges linked-list. */
139 struct rseq_mempool_range *range_list;
140 unsigned long nr_ranges;
b73b0c25 141
ef6695f1 142 size_t item_len;
ef6695f1 143 int item_order;
ef6695f1
MD
144
145 /*
8ab16a24 146 * The free list chains freed items on the CPU 0 address range.
ef6695f1 147 * We should rethink this decision if false sharing between
8ab16a24 148 * malloc/free from other CPUs and data accesses from CPU 0
ef6695f1
MD
149 * becomes an issue. This is a NULL-terminated singly-linked
150 * list.
151 */
152 struct free_list_node *free_list_head;
b73b0c25 153
ef6695f1
MD
154 /* This lock protects allocation/free within the pool. */
155 pthread_mutex_t lock;
9bd07c29 156
0ba2a93e 157 struct rseq_mempool_attr attr;
ca452fee 158 char *name;
ef6695f1
MD
159};
160
ef6695f1
MD
161/*
162 * Pool set entries are indexed by item_len rounded to the next power of
163 * 2. A pool set can contain NULL pool entries, in which case the next
164 * large enough entry will be used for allocation.
165 */
0ba2a93e 166struct rseq_mempool_set {
ef6695f1
MD
167 /* This lock protects add vs malloc/zmalloc within the pool set. */
168 pthread_mutex_t lock;
0ba2a93e 169 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
ef6695f1
MD
170};
171
a5694a4d
MD
172/*
173 * This memfd is used to implement the user COW behavior for the page
174 * protection scheme. memfd is a sparse virtual file. Its layout (in
175 * offset from beginning of file) matches the process address space
176 * (pointers directly converted to file offsets).
177 */
178struct rseq_memfd {
179 pthread_mutex_t lock;
180 size_t reserved_size;
181 unsigned int refcount;
182 int fd;
183};
184
185static struct rseq_memfd memfd = {
186 .lock = PTHREAD_MUTEX_INITIALIZER,
187 .reserved_size = 0,
188 .refcount = 0,
189 .fd = -1,
190};
191
86617384
MD
192static
193const char *get_pool_name(const struct rseq_mempool *pool)
194{
195 return pool->name ? : "<anonymous>";
196}
197
367e559c 198static
6fbf1fb6 199void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
f2981623 200 uintptr_t item_offset, size_t stride)
367e559c 201{
15b63c9f 202 return range->base + (stride * cpu) + item_offset;
367e559c
MD
203}
204
a5694a4d
MD
205static
206void *__rseq_pool_range_init_ptr(const struct rseq_mempool_range *range,
207 uintptr_t item_offset)
208{
209 if (!range->init)
210 return NULL;
211 return range->init + item_offset;
212}
213
214static
215void __rseq_percpu *__rseq_free_list_to_percpu_ptr(const struct rseq_mempool *pool,
216 struct free_list_node *node)
217{
218 void __rseq_percpu *p = (void __rseq_percpu *) node;
219
c0de0012
MD
220 if (pool->attr.robust_set) {
221 /* Skip cpus. */
a5694a4d 222 p -= pool->attr.max_nr_cpus * pool->attr.stride;
c0de0012
MD
223 /* Skip init values */
224 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
225 p -= pool->attr.stride;
226
227 } else {
228 /* Populate none free list is in init values */
229 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
230 p -= pool->attr.max_nr_cpus * pool->attr.stride;
231 }
a5694a4d
MD
232 return p;
233}
234
235static
236struct free_list_node *__rseq_percpu_to_free_list_ptr(const struct rseq_mempool *pool,
237 void __rseq_percpu *p)
238{
c0de0012
MD
239 if (pool->attr.robust_set) {
240 /* Skip cpus. */
a5694a4d 241 p += pool->attr.max_nr_cpus * pool->attr.stride;
c0de0012
MD
242 /* Skip init values */
243 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
244 p += pool->attr.stride;
245
246 } else {
247 /* Populate none free list is in init values */
248 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
249 p += pool->attr.max_nr_cpus * pool->attr.stride;
250 }
a5694a4d
MD
251 return (struct free_list_node *) p;
252}
253
49f96dc5
MD
254static
255off_t ptr_to_off_t(void *p)
256{
257 return (off_t) (uintptr_t) p;
258}
259
a5694a4d
MD
260static
261int memcmpbyte(const char *s, int c, size_t n)
262{
263 int res = 0;
264
265 while (n-- > 0)
266 if ((res = *(s++) - c) != 0)
267 break;
268 return res;
269}
270
367e559c 271static
15b63c9f
MD
272void rseq_percpu_zero_item(struct rseq_mempool *pool,
273 struct rseq_mempool_range *range, uintptr_t item_offset)
367e559c 274{
a5694a4d 275 char *init_p = NULL;
367e559c
MD
276 int i;
277
a5694a4d
MD
278 init_p = __rseq_pool_range_init_ptr(range, item_offset);
279 if (init_p)
280 memset(init_p, 0, pool->item_len);
cb475906 281 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
15b63c9f 282 char *p = __rseq_pool_range_percpu_ptr(range, i,
cb475906 283 item_offset, pool->attr.stride);
a5694a4d 284
1b658191
MD
285 /*
286 * If item is already zeroed, either because the
287 * init range update has propagated or because the
288 * content is already zeroed (e.g. zero page), don't
289 * write to the page. This eliminates useless COW over
290 * the zero page just for overwriting it with zeroes.
291 *
292 * This means zmalloc() in populate all policy pool do
293 * not trigger COW for CPUs which are not actively
294 * writing to the pool. This is however not the case for
295 * malloc_init() in populate-all pools if it populates
296 * non-zero content.
297 */
298 if (!memcmpbyte(p, 0, pool->item_len))
a5694a4d 299 continue;
367e559c
MD
300 memset(p, 0, pool->item_len);
301 }
302}
303
6ff43d9a
MD
304static
305void rseq_percpu_init_item(struct rseq_mempool *pool,
306 struct rseq_mempool_range *range, uintptr_t item_offset,
307 void *init_ptr, size_t init_len)
308{
a5694a4d 309 char *init_p = NULL;
6ff43d9a
MD
310 int i;
311
a5694a4d
MD
312 init_p = __rseq_pool_range_init_ptr(range, item_offset);
313 if (init_p)
314 memcpy(init_p, init_ptr, init_len);
6ff43d9a
MD
315 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
316 char *p = __rseq_pool_range_percpu_ptr(range, i,
317 item_offset, pool->attr.stride);
a5694a4d 318
1b658191
MD
319 /*
320 * If the update propagated through a shared mapping,
321 * or the item already has the correct content, skip
322 * writing it into the cpu item to eliminate useless
323 * COW of the page.
324 */
325 if (!memcmp(init_ptr, p, init_len))
a5694a4d 326 continue;
6ff43d9a
MD
327 memcpy(p, init_ptr, init_len);
328 }
329}
330
a5694a4d
MD
331static
332void rseq_poison_item(void *p, size_t item_len, uintptr_t poison)
333{
334 size_t offset;
335
336 for (offset = 0; offset < item_len; offset += sizeof(uintptr_t))
337 *((uintptr_t *) (p + offset)) = poison;
338}
339
1b658191
MD
340static
341intptr_t rseq_cmp_poison_item(void *p, size_t item_len, uintptr_t poison, intptr_t *unexpected_value)
342{
343 size_t offset;
344 intptr_t res = 0;
345
346 for (offset = 0; offset < item_len; offset += sizeof(uintptr_t)) {
347 intptr_t v = *((intptr_t *) (p + offset));
348
349 if ((res = v - (intptr_t) poison) != 0) {
350 if (unexpected_value)
351 *unexpected_value = v;
352 break;
353 }
354 }
355 return res;
356}
357
455e090e
MD
358static
359void rseq_percpu_poison_item(struct rseq_mempool *pool,
360 struct rseq_mempool_range *range, uintptr_t item_offset)
361{
362 uintptr_t poison = pool->attr.poison;
a5694a4d 363 char *init_p = NULL;
455e090e
MD
364 int i;
365
a5694a4d
MD
366 init_p = __rseq_pool_range_init_ptr(range, item_offset);
367 if (init_p)
368 rseq_poison_item(init_p, pool->item_len, poison);
455e090e
MD
369 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
370 char *p = __rseq_pool_range_percpu_ptr(range, i,
371 item_offset, pool->attr.stride);
455e090e 372
1b658191
MD
373 /*
374 * If the update propagated through a shared mapping,
375 * or the item already has the correct content, skip
376 * writing it into the cpu item to eliminate useless
377 * COW of the page.
378 *
379 * It is recommended to use zero as poison value for
380 * populate-all pools to eliminate COW due to writing
381 * poison to unused CPU memory.
382 */
383 if (rseq_cmp_poison_item(p, pool->item_len, poison, NULL) == 0)
a5694a4d
MD
384 continue;
385 rseq_poison_item(p, pool->item_len, poison);
386 }
387}
388
389/* Always inline for __builtin_return_address(0). */
390static inline __attribute__((always_inline))
391void rseq_check_poison_item(const struct rseq_mempool *pool, uintptr_t item_offset,
c0de0012 392 void *p, size_t item_len, uintptr_t poison)
a5694a4d 393{
1b658191 394 intptr_t unexpected_value;
a5694a4d 395
1b658191
MD
396 if (rseq_cmp_poison_item(p, item_len, poison, &unexpected_value) == 0)
397 return;
a5694a4d 398
1b658191
MD
399 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
400 __func__, (unsigned long) unexpected_value, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
401 abort();
86617384
MD
402}
403
404/* Always inline for __builtin_return_address(0). */
405static inline __attribute__((always_inline))
6fbf1fb6
MD
406void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
407 const struct rseq_mempool_range *range, uintptr_t item_offset)
86617384
MD
408{
409 uintptr_t poison = pool->attr.poison;
a5694a4d 410 char *init_p;
86617384
MD
411 int i;
412
3975084e 413 if (!pool->attr.robust_set)
86617384 414 return;
a5694a4d
MD
415 init_p = __rseq_pool_range_init_ptr(range, item_offset);
416 if (init_p)
c0de0012 417 rseq_check_poison_item(pool, item_offset, init_p, pool->item_len, poison);
86617384
MD
418 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
419 char *p = __rseq_pool_range_percpu_ptr(range, i,
420 item_offset, pool->attr.stride);
c0de0012 421 rseq_check_poison_item(pool, item_offset, p, pool->item_len, poison);
455e090e
MD
422 }
423}
424
15b63c9f 425#ifdef HAVE_LIBNUMA
c6fd3981 426int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
367e559c 427{
f2981623 428 unsigned long nr_pages, page_len;
c6fd3981
MD
429 int status[MOVE_PAGES_BATCH_SIZE];
430 int nodes[MOVE_PAGES_BATCH_SIZE];
431 void *pages[MOVE_PAGES_BATCH_SIZE];
f2981623 432 long ret;
367e559c 433
c6fd3981
MD
434 if (!numa_flags) {
435 errno = EINVAL;
436 return -1;
437 }
367e559c 438 page_len = rseq_get_page_len();
c6fd3981 439 nr_pages = len >> rseq_get_count_order_ulong(page_len);
57d8b586 440
c6fd3981
MD
441 nodes[0] = numa_node_of_cpu(cpu);
442 if (nodes[0] < 0)
443 return -1;
57d8b586 444
c6fd3981
MD
445 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
446 nodes[k] = nodes[0];
447 }
57d8b586 448
c6fd3981 449 for (unsigned long page = 0; page < nr_pages;) {
57d8b586 450
c6fd3981
MD
451 size_t max_k = RSEQ_ARRAY_SIZE(pages);
452 size_t left = nr_pages - page;
57d8b586 453
c6fd3981
MD
454 if (left < max_k) {
455 max_k = left;
456 }
57d8b586 457
c6fd3981
MD
458 for (size_t k = 0; k < max_k; ++k, ++page) {
459 pages[k] = addr + (page * page_len);
460 status[k] = -EPERM;
367e559c 461 }
b73b0c25 462
c6fd3981
MD
463 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
464
465 if (ret < 0)
b73b0c25 466 return ret;
c6fd3981
MD
467
468 if (ret > 0) {
469 fprintf(stderr, "%lu pages were not migrated\n", ret);
470 for (size_t k = 0; k < max_k; ++k) {
471 if (status[k] < 0)
472 fprintf(stderr,
473 "Error while moving page %p to numa node %d: %u\n",
474 pages[k], nodes[k], -status[k]);
475 }
476 }
b73b0c25
MD
477 }
478 return 0;
479}
367e559c 480#else
c6fd3981
MD
481int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
482 size_t len __attribute__((unused)),
483 int cpu __attribute__((unused)),
367e559c
MD
484 int numa_flags __attribute__((unused)))
485{
c6fd3981
MD
486 errno = ENOSYS;
487 return -1;
367e559c
MD
488}
489#endif
490
9bd07c29
MD
491static
492void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
493{
494 void *base;
495
496 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
497 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
498 if (base == MAP_FAILED)
499 return NULL;
500 return base;
501}
502
503static
504int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
505{
506 return munmap(ptr, len);
507}
508
0fdf7a4c 509static
0ba2a93e 510int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c
OD
511{
512 size_t count;
513
cb475906 514 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
515
516 /*
9649c7ee
MD
517 * Not being able to create the validation bitmap is an error
518 * that needs to be reported.
0fdf7a4c 519 */
b73b0c25
MD
520 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
521 if (!range->alloc_bitmap)
9649c7ee
MD
522 return -1;
523 return 0;
0fdf7a4c
OD
524}
525
b73b0c25 526static
a5694a4d 527bool percpu_addr_in_pool(const struct rseq_mempool *pool, void __rseq_percpu *_addr)
b73b0c25 528{
0ba2a93e 529 struct rseq_mempool_range *range;
a5694a4d 530 void *addr = (void *) _addr;
b73b0c25 531
9d986353 532 for (range = pool->range_list; range; range = range->next) {
b73b0c25
MD
533 if (addr >= range->base && addr < range->base + range->next_unused)
534 return true;
535 }
536 return false;
537}
538
a9ec6111
OD
539/* Always inline for __builtin_return_address(0). */
540static inline __attribute__((always_inline))
0ba2a93e 541void check_free_list(const struct rseq_mempool *pool)
a9ec6111 542{
b73b0c25
MD
543 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
544 max_list_traversal = 0, traversal_iteration = 0;
0ba2a93e 545 struct rseq_mempool_range *range;
b73b0c25
MD
546
547 if (!pool->attr.robust_set)
548 return;
549
9d986353 550 for (range = pool->range_list; range; range = range->next) {
cb475906
MD
551 total_item += pool->attr.stride >> pool->item_order;
552 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
b73b0c25
MD
553 }
554 max_list_traversal = total_item - total_never_allocated;
a9ec6111
OD
555
556 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
557 node;
558 prev = node,
559 node = node->next) {
560
a9ec6111 561 if (traversal_iteration >= max_list_traversal) {
ca452fee
MD
562 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
563 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
564 abort();
565 }
566
567 /* Node is out of range. */
a5694a4d 568 if (!percpu_addr_in_pool(pool, __rseq_free_list_to_percpu_ptr(pool, node))) {
a9ec6111 569 if (prev)
ca452fee
MD
570 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
571 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111 572 else
ca452fee
MD
573 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
574 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
575 abort();
576 }
577
b73b0c25
MD
578 traversal_iteration++;
579 total_freed++;
a9ec6111
OD
580 }
581
582 if (total_never_allocated + total_freed != total_item) {
ca452fee
MD
583 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
584 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
a9ec6111
OD
585 abort();
586 }
a9ec6111
OD
587}
588
6fbf1fb6
MD
589/* Always inline for __builtin_return_address(0). */
590static inline __attribute__((always_inline))
591void check_range_poison(const struct rseq_mempool *pool,
592 const struct rseq_mempool_range *range)
593{
594 size_t item_offset;
595
596 for (item_offset = 0; item_offset < range->next_unused;
597 item_offset += pool->item_len)
598 rseq_percpu_check_poison_item(pool, range, item_offset);
599}
600
601/* Always inline for __builtin_return_address(0). */
602static inline __attribute__((always_inline))
603void check_pool_poison(const struct rseq_mempool *pool)
604{
605 struct rseq_mempool_range *range;
606
3975084e 607 if (!pool->attr.robust_set)
6fbf1fb6
MD
608 return;
609 for (range = pool->range_list; range; range = range->next)
610 check_range_poison(pool, range);
611}
612
e7cbbc10
MD
613/* Always inline for __builtin_return_address(0). */
614static inline __attribute__((always_inline))
0ba2a93e 615void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c 616{
b73b0c25 617 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 618 size_t count, total_leaks = 0;
0fdf7a4c 619
9649c7ee 620 if (!bitmap)
0fdf7a4c 621 return;
0fdf7a4c 622
cb475906 623 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
624
625 /* Assert that all items in the pool were freed. */
9649c7ee
MD
626 for (size_t k = 0; k < count; ++k)
627 total_leaks += rseq_hweight_ulong(bitmap[k]);
628 if (total_leaks) {
ca452fee
MD
629 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
630 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
9649c7ee 631 abort();
0fdf7a4c
OD
632 }
633
634 free(bitmap);
a5694a4d 635 range->alloc_bitmap = NULL;
0fdf7a4c
OD
636}
637
b73b0c25
MD
638/* Always inline for __builtin_return_address(0). */
639static inline __attribute__((always_inline))
0ba2a93e
MD
640int rseq_mempool_range_destroy(struct rseq_mempool *pool,
641 struct rseq_mempool_range *range)
b73b0c25 642{
a5694a4d
MD
643 int ret = 0;
644
b73b0c25 645 destroy_alloc_bitmap(pool, range);
a5694a4d
MD
646
647 /*
648 * Punch a hole into memfd where the init values used to be.
649 */
650 if (range->init) {
651 ret = fallocate(memfd.fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
49f96dc5 652 ptr_to_off_t(range->init), pool->attr.stride);
a5694a4d
MD
653 if (ret)
654 return ret;
655 range->init = NULL;
656 }
657
5c99f3d6 658 /* range is a header located one page before the aligned mapping. */
fa6a0fb3 659 return pool->attr.munmap_func(pool->attr.mmap_priv, range->mmap_addr, range->mmap_len);
5c99f3d6
MD
660}
661
662/*
663 * Allocate a memory mapping aligned on @alignment, with an optional
664 * @pre_header before the mapping.
665 */
666static
0ba2a93e 667void *aligned_mmap_anonymous(struct rseq_mempool *pool,
5c99f3d6
MD
668 size_t page_size, size_t len, size_t alignment,
669 void **pre_header, size_t pre_header_len)
670{
671 size_t minimum_page_count, page_count, extra, total_allocate = 0;
672 int page_order;
673 void *ptr;
674
675 if (len < page_size || alignment < page_size ||
b72b2d9e 676 !is_pow2(alignment) || (len & (alignment - 1))) {
5c99f3d6
MD
677 errno = EINVAL;
678 return NULL;
679 }
680 page_order = rseq_get_count_order_ulong(page_size);
681 if (page_order < 0) {
682 errno = EINVAL;
683 return NULL;
684 }
685 if (pre_header_len && (pre_header_len & (page_size - 1))) {
686 errno = EINVAL;
687 return NULL;
688 }
689
690 minimum_page_count = (pre_header_len + len) >> page_order;
691 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
692
693 assert(page_count >= minimum_page_count);
694
695 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
696 if (!ptr)
697 goto alloc_error;
698
699 total_allocate = page_count << page_order;
700
701 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
702 /* Pointer is already aligned. ptr points to pre_header. */
703 goto out;
704 }
705
706 /* Unmap extra before. */
707 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
708 assert(!(extra & (page_size - 1)));
709 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
710 perror("munmap");
711 abort();
712 }
713 total_allocate -= extra;
714 ptr += extra; /* ptr points to pre_header */
715 page_count -= extra >> page_order;
716out:
717 assert(page_count >= minimum_page_count);
718
719 if (page_count > minimum_page_count) {
720 void *extra_ptr;
721
722 /* Unmap extra after. */
723 extra_ptr = ptr + (minimum_page_count << page_order);
724 extra = (page_count - minimum_page_count) << page_order;
725 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
726 perror("munmap");
727 abort();
728 }
729 total_allocate -= extra;
730 }
731
732 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
733 assert(total_allocate == len + pre_header_len);
734
735alloc_error:
736 if (ptr) {
737 if (pre_header)
738 *pre_header = ptr;
739 ptr += pre_header_len;
740 }
741 return ptr;
b73b0c25
MD
742}
743
a5694a4d
MD
744static
745int rseq_memfd_reserve_init(void *init, size_t init_len)
746{
747 int ret = 0;
748 size_t reserve_len;
749
750 pthread_mutex_lock(&memfd.lock);
49f96dc5 751 reserve_len = (size_t) ptr_to_off_t(init) + init_len;
a5694a4d
MD
752 if (reserve_len > memfd.reserved_size) {
753 if (ftruncate(memfd.fd, (off_t) reserve_len)) {
754 ret = -1;
755 goto unlock;
756 }
757 memfd.reserved_size = reserve_len;
758 }
759unlock:
760 pthread_mutex_unlock(&memfd.lock);
761 return ret;
762}
763
b73b0c25 764static
0ba2a93e 765struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
b73b0c25 766{
0ba2a93e 767 struct rseq_mempool_range *range;
5c99f3d6 768 unsigned long page_size;
4aa3220c 769 void *header;
b73b0c25 770 void *base;
a5694a4d 771 size_t range_len; /* Range len excludes header. */
b73b0c25 772
e11a02d7
MD
773 if (pool->attr.max_nr_ranges &&
774 pool->nr_ranges >= pool->attr.max_nr_ranges) {
9d986353
MD
775 errno = ENOMEM;
776 return NULL;
777 }
5c99f3d6 778 page_size = rseq_get_page_len();
b73b0c25 779
a5694a4d
MD
780 range_len = pool->attr.stride * pool->attr.max_nr_cpus;
781 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
782 range_len += pool->attr.stride; /* init values */
c0de0012
MD
783 if (pool->attr.robust_set)
784 range_len += pool->attr.stride; /* free list */
5c99f3d6 785 base = aligned_mmap_anonymous(pool, page_size,
a5694a4d 786 range_len,
cb475906 787 pool->attr.stride,
4aa3220c 788 &header, page_size);
b73b0c25 789 if (!base)
5c99f3d6 790 return NULL;
0ba2a93e 791 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
5c99f3d6 792 range->pool = pool;
4aa3220c 793 range->header = header;
a5694a4d 794 range->base = base;
fa6a0fb3 795 range->mmap_addr = header;
a5694a4d
MD
796 range->mmap_len = page_size + range_len;
797
798 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL) {
799 range->init = base + (pool->attr.stride * pool->attr.max_nr_cpus);
800 /* Populate init values pages from memfd */
801 if (rseq_memfd_reserve_init(range->init, pool->attr.stride))
802 goto error_alloc;
803 if (mmap(range->init, pool->attr.stride, PROT_READ | PROT_WRITE,
804 MAP_SHARED | MAP_FIXED, memfd.fd,
49f96dc5 805 ptr_to_off_t(range->init)) != (void *) range->init) {
a5694a4d
MD
806 goto error_alloc;
807 }
808 assert(pool->attr.type == MEMPOOL_TYPE_PERCPU);
809 /*
810 * Map per-cpu memory as private COW mappings of init values.
811 */
812 {
813 int cpu;
814
815 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
816 void *p = base + (pool->attr.stride * cpu);
817 size_t len = pool->attr.stride;
818
819 if (mmap(p, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
49f96dc5 820 memfd.fd, ptr_to_off_t(range->init)) != (void *) p) {
a5694a4d
MD
821 goto error_alloc;
822 }
823 }
824 }
825 }
826
b73b0c25
MD
827 if (pool->attr.robust_set) {
828 if (create_alloc_bitmap(pool, range))
829 goto error_alloc;
830 }
135811f2 831 if (pool->attr.init_set) {
374c2773
MD
832 switch (pool->attr.type) {
833 case MEMPOOL_TYPE_GLOBAL:
6e329183 834 if (pool->attr.init_func(pool->attr.init_priv,
374c2773 835 base, pool->attr.stride, -1)) {
6e329183
MD
836 goto error_alloc;
837 }
374c2773
MD
838 break;
839 case MEMPOOL_TYPE_PERCPU:
840 {
841 int cpu;
842 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
843 if (pool->attr.init_func(pool->attr.init_priv,
844 base + (pool->attr.stride * cpu),
845 pool->attr.stride, cpu)) {
846 goto error_alloc;
847 }
848 }
849 break;
850 }
851 default:
852 abort();
135811f2
MD
853 }
854 }
9d986353 855 pool->nr_ranges++;
b73b0c25
MD
856 return range;
857
858error_alloc:
0ba2a93e 859 (void) rseq_mempool_range_destroy(pool, range);
b73b0c25
MD
860 return NULL;
861}
862
a5694a4d
MD
863static
864int rseq_mempool_memfd_ref(struct rseq_mempool *pool)
865{
866 int ret = 0;
867
868 if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_ALL)
869 return 0;
870
871 pthread_mutex_lock(&memfd.lock);
872 if (memfd.refcount == 0) {
873 memfd.fd = memfd_create("mempool", MFD_CLOEXEC);
874 if (memfd.fd < 0) {
875 perror("memfd_create");
876 ret = -1;
877 goto unlock;
878 }
879 }
880 memfd.refcount++;
881unlock:
882 pthread_mutex_unlock(&memfd.lock);
883 return ret;
884}
885
886static
887void rseq_mempool_memfd_unref(struct rseq_mempool *pool)
888{
889 if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_ALL)
890 return;
891
892 pthread_mutex_lock(&memfd.lock);
893 if (memfd.refcount == 1) {
894 if (close(memfd.fd)) {
895 perror("close");
896 abort();
897 }
898 memfd.fd = -1;
899 memfd.reserved_size = 0;
900 }
901 memfd.refcount--;
902 pthread_mutex_unlock(&memfd.lock);
903}
904
0ba2a93e 905int rseq_mempool_destroy(struct rseq_mempool *pool)
9649c7ee 906{
0ba2a93e 907 struct rseq_mempool_range *range, *next_range;
b73b0c25 908 int ret = 0;
9649c7ee 909
f510ddc5
MD
910 if (!pool)
911 return 0;
b73b0c25 912 check_free_list(pool);
6fbf1fb6 913 check_pool_poison(pool);
b73b0c25 914 /* Iteration safe against removal. */
9d986353 915 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
0ba2a93e 916 if (rseq_mempool_range_destroy(pool, range))
b73b0c25
MD
917 goto end;
918 /* Update list head to keep list coherent in case of partial failure. */
9d986353 919 pool->range_list = next_range;
b73b0c25 920 }
a5694a4d 921 rseq_mempool_memfd_unref(pool);
9649c7ee 922 pthread_mutex_destroy(&pool->lock);
ca452fee 923 free(pool->name);
eb8db04d 924 free(pool);
9649c7ee 925end:
b73b0c25 926 return ret;
9649c7ee
MD
927}
928
0ba2a93e 929struct rseq_mempool *rseq_mempool_create(const char *pool_name,
cb475906 930 size_t item_len, const struct rseq_mempool_attr *_attr)
ef6695f1 931{
0ba2a93e
MD
932 struct rseq_mempool *pool;
933 struct rseq_mempool_attr attr = {};
ef6695f1 934 int order;
ef6695f1
MD
935
936 /* Make sure each item is large enough to contain free list pointers. */
937 if (item_len < sizeof(void *))
938 item_len = sizeof(void *);
939
940 /* Align item_len on next power of two. */
19be9217 941 order = rseq_get_count_order_ulong(item_len);
ef6695f1
MD
942 if (order < 0) {
943 errno = EINVAL;
944 return NULL;
945 }
946 item_len = 1UL << order;
947
a82006d0
MD
948 if (_attr)
949 memcpy(&attr, _attr, sizeof(attr));
950 if (!attr.mmap_set) {
951 attr.mmap_func = default_mmap_func;
952 attr.munmap_func = default_munmap_func;
953 attr.mmap_priv = NULL;
9bd07c29 954 }
a82006d0 955
cb475906
MD
956 switch (attr.type) {
957 case MEMPOOL_TYPE_PERCPU:
958 if (attr.max_nr_cpus < 0) {
959 errno = EINVAL;
960 return NULL;
961 }
962 if (attr.max_nr_cpus == 0) {
963 /* Auto-detect */
47c725dd 964 attr.max_nr_cpus = rseq_get_max_nr_cpus();
cb475906
MD
965 if (attr.max_nr_cpus == 0) {
966 errno = EINVAL;
967 return NULL;
968 }
969 }
970 break;
971 case MEMPOOL_TYPE_GLOBAL:
a5694a4d
MD
972 /* Override populate policy for global type. */
973 attr.populate_policy = RSEQ_MEMPOOL_POPULATE_ALL;
89b7e681
MD
974 /* Use a 1-cpu pool for global mempool type. */
975 attr.max_nr_cpus = 1;
cb475906
MD
976 break;
977 }
978 if (!attr.stride)
979 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
3975084e
MD
980 if (attr.robust_set && !attr.poison_set) {
981 attr.poison_set = true;
982 attr.poison = DEFAULT_POISON_VALUE;
983 }
cb475906
MD
984 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
985 !is_pow2(attr.stride)) {
986 errno = EINVAL;
987 return NULL;
988 }
989
0ba2a93e 990 pool = calloc(1, sizeof(struct rseq_mempool));
bc510b60
MD
991 if (!pool)
992 return NULL;
ef6695f1 993
b73b0c25 994 memcpy(&pool->attr, &attr, sizeof(attr));
ef6695f1 995 pthread_mutex_init(&pool->lock, NULL);
ef6695f1
MD
996 pool->item_len = item_len;
997 pool->item_order = order;
b73b0c25 998
a5694a4d
MD
999 if (rseq_mempool_memfd_ref(pool))
1000 goto error_alloc;
1001
9d986353
MD
1002 pool->range_list = rseq_mempool_range_create(pool);
1003 if (!pool->range_list)
b73b0c25 1004 goto error_alloc;
0fdf7a4c 1005
ca452fee
MD
1006 if (pool_name) {
1007 pool->name = strdup(pool_name);
1008 if (!pool->name)
1009 goto error_alloc;
1010 }
ef6695f1 1011 return pool;
ef6695f1 1012
9649c7ee 1013error_alloc:
0ba2a93e 1014 rseq_mempool_destroy(pool);
9649c7ee
MD
1015 errno = ENOMEM;
1016 return NULL;
ef6695f1
MD
1017}
1018
e7cbbc10
MD
1019/* Always inline for __builtin_return_address(0). */
1020static inline __attribute__((always_inline))
9d986353 1021void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 1022{
9d986353 1023 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 1024 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
1025 unsigned long mask;
1026 size_t k;
1027
9649c7ee 1028 if (!bitmap)
0fdf7a4c 1029 return;
0fdf7a4c 1030
9649c7ee 1031 k = item_index / BIT_PER_ULONG;
0fdf7a4c
OD
1032 mask = 1ULL << (item_index % BIT_PER_ULONG);
1033
9649c7ee
MD
1034 /* Print error if bit is already set. */
1035 if (bitmap[k] & mask) {
ca452fee
MD
1036 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
1037 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
9649c7ee
MD
1038 abort();
1039 }
0fdf7a4c
OD
1040 bitmap[k] |= mask;
1041}
1042
ef6695f1 1043static
6ff43d9a
MD
1044void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool,
1045 bool zeroed, void *init_ptr, size_t init_len)
ef6695f1 1046{
9d986353 1047 struct rseq_mempool_range *range;
ef6695f1
MD
1048 struct free_list_node *node;
1049 uintptr_t item_offset;
d24ee051 1050 void __rseq_percpu *addr;
ef6695f1 1051
6ff43d9a
MD
1052 if (init_len > pool->item_len) {
1053 errno = EINVAL;
1054 return NULL;
1055 }
ef6695f1
MD
1056 pthread_mutex_lock(&pool->lock);
1057 /* Get first entry from free list. */
1058 node = pool->free_list_head;
1059 if (node != NULL) {
a5694a4d 1060 void *range_base, *ptr;
9d986353 1061
a5694a4d
MD
1062 ptr = __rseq_free_list_to_percpu_ptr(pool, node);
1063 range_base = (void *) ((uintptr_t) ptr & (~(pool->attr.stride - 1)));
9d986353 1064 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
ef6695f1
MD
1065 /* Remove node from free list (update head). */
1066 pool->free_list_head = node->next;
a5694a4d 1067 item_offset = (uintptr_t) (ptr - range_base);
86617384 1068 rseq_percpu_check_poison_item(pool, range, item_offset);
a5694a4d 1069 addr = __rseq_free_list_to_percpu_ptr(pool, node);
ef6695f1
MD
1070 goto end;
1071 }
9d986353
MD
1072 /*
1073 * If the most recent range (first in list) does not have any
1074 * room left, create a new range and prepend it to the list
1075 * head.
1076 */
1077 range = pool->range_list;
1078 if (range->next_unused + pool->item_len > pool->attr.stride) {
1079 range = rseq_mempool_range_create(pool);
1080 if (!range) {
1081 errno = ENOMEM;
1082 addr = NULL;
1083 goto end;
1084 }
1085 /* Add range to head of list. */
1086 range->next = pool->range_list;
1087 pool->range_list = range;
ef6695f1 1088 }
9d986353
MD
1089 /* First range in list has room left. */
1090 item_offset = range->next_unused;
1091 addr = (void __rseq_percpu *) (range->base + item_offset);
1092 range->next_unused += pool->item_len;
ef6695f1 1093end:
8f28507f 1094 if (addr)
9d986353 1095 set_alloc_slot(pool, range, item_offset);
ef6695f1 1096 pthread_mutex_unlock(&pool->lock);
6ff43d9a
MD
1097 if (addr) {
1098 if (zeroed)
1099 rseq_percpu_zero_item(pool, range, item_offset);
1100 else if (init_ptr) {
1101 rseq_percpu_init_item(pool, range, item_offset,
1102 init_ptr, init_len);
1103 }
1104 }
ef6695f1
MD
1105 return addr;
1106}
1107
15da5c27 1108void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
ef6695f1 1109{
6ff43d9a 1110 return __rseq_percpu_malloc(pool, false, NULL, 0);
ef6695f1
MD
1111}
1112
15da5c27 1113void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
ef6695f1 1114{
6ff43d9a
MD
1115 return __rseq_percpu_malloc(pool, true, NULL, 0);
1116}
1117
1118void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool,
1119 void *init_ptr, size_t len)
1120{
1121 return __rseq_percpu_malloc(pool, false, init_ptr, len);
ef6695f1
MD
1122}
1123
e7cbbc10
MD
1124/* Always inline for __builtin_return_address(0). */
1125static inline __attribute__((always_inline))
9d986353 1126void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 1127{
9d986353 1128 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 1129 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
1130 unsigned long mask;
1131 size_t k;
1132
9649c7ee 1133 if (!bitmap)
0fdf7a4c 1134 return;
0fdf7a4c 1135
9649c7ee
MD
1136 k = item_index / BIT_PER_ULONG;
1137 mask = 1ULL << (item_index % BIT_PER_ULONG);
0fdf7a4c 1138
9649c7ee
MD
1139 /* Print error if bit is not set. */
1140 if (!(bitmap[k] & mask)) {
ca452fee
MD
1141 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
1142 __func__, get_pool_name(pool), pool, item_offset,
1143 (void *) __builtin_return_address(0));
9649c7ee
MD
1144 abort();
1145 }
0fdf7a4c
OD
1146 bitmap[k] &= ~mask;
1147}
1148
cb475906 1149void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
ef6695f1
MD
1150{
1151 uintptr_t ptr = (uintptr_t) _ptr;
cb475906 1152 void *range_base = (void *) (ptr & (~(stride - 1)));
0ba2a93e
MD
1153 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
1154 struct rseq_mempool *pool = range->pool;
cb475906 1155 uintptr_t item_offset = ptr & (stride - 1);
ef6695f1
MD
1156 struct free_list_node *head, *item;
1157
1158 pthread_mutex_lock(&pool->lock);
9d986353 1159 clear_alloc_slot(pool, range, item_offset);
ef6695f1
MD
1160 /* Add ptr to head of free list */
1161 head = pool->free_list_head;
455e090e
MD
1162 if (pool->attr.poison_set)
1163 rseq_percpu_poison_item(pool, range, item_offset);
a5694a4d 1164 item = __rseq_percpu_to_free_list_ptr(pool, _ptr);
455e090e
MD
1165 /*
1166 * Setting the next pointer will overwrite the first uintptr_t
a5694a4d
MD
1167 * poison for either CPU 0 (populate all) or init data (populate
1168 * none).
455e090e 1169 */
ef6695f1
MD
1170 item->next = head;
1171 pool->free_list_head = item;
1172 pthread_mutex_unlock(&pool->lock);
1173}
1174
0ba2a93e 1175struct rseq_mempool_set *rseq_mempool_set_create(void)
ef6695f1 1176{
0ba2a93e 1177 struct rseq_mempool_set *pool_set;
ef6695f1 1178
0ba2a93e 1179 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
ef6695f1
MD
1180 if (!pool_set)
1181 return NULL;
1182 pthread_mutex_init(&pool_set->lock, NULL);
1183 return pool_set;
1184}
1185
0ba2a93e 1186int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
ef6695f1
MD
1187{
1188 int order, ret;
1189
1190 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
0ba2a93e 1191 struct rseq_mempool *pool = pool_set->entries[order];
ef6695f1
MD
1192
1193 if (!pool)
1194 continue;
0ba2a93e 1195 ret = rseq_mempool_destroy(pool);
ef6695f1
MD
1196 if (ret)
1197 return ret;
1198 pool_set->entries[order] = NULL;
1199 }
1200 pthread_mutex_destroy(&pool_set->lock);
1201 free(pool_set);
1202 return 0;
1203}
1204
1205/* Ownership of pool is handed over to pool set on success. */
0ba2a93e 1206int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
ef6695f1
MD
1207{
1208 size_t item_order = pool->item_order;
1209 int ret = 0;
1210
1211 pthread_mutex_lock(&pool_set->lock);
1212 if (pool_set->entries[item_order]) {
1213 errno = EBUSY;
1214 ret = -1;
1215 goto end;
1216 }
1217 pool_set->entries[pool->item_order] = pool;
1218end:
1219 pthread_mutex_unlock(&pool_set->lock);
1220 return ret;
1221}
1222
1223static
6ff43d9a
MD
1224void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set,
1225 void *init_ptr, size_t len, bool zeroed)
ef6695f1
MD
1226{
1227 int order, min_order = POOL_SET_MIN_ENTRY;
0ba2a93e 1228 struct rseq_mempool *pool;
d24ee051 1229 void __rseq_percpu *addr;
ef6695f1 1230
d06f5cf5
MD
1231 order = rseq_get_count_order_ulong(len);
1232 if (order > POOL_SET_MIN_ENTRY)
1233 min_order = order;
ef6695f1
MD
1234again:
1235 pthread_mutex_lock(&pool_set->lock);
1236 /* First smallest present pool where @len fits. */
1237 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
1238 pool = pool_set->entries[order];
1239
1240 if (!pool)
1241 continue;
1242 if (pool->item_len >= len)
1243 goto found;
1244 }
1245 pool = NULL;
1246found:
1247 pthread_mutex_unlock(&pool_set->lock);
1248 if (pool) {
6ff43d9a 1249 addr = __rseq_percpu_malloc(pool, zeroed, init_ptr, len);
ef6695f1
MD
1250 if (addr == NULL && errno == ENOMEM) {
1251 /*
1252 * If the allocation failed, try again with a
1253 * larger pool.
1254 */
1255 min_order = order + 1;
1256 goto again;
1257 }
1258 } else {
1259 /* Not found. */
1260 errno = ENOMEM;
1261 addr = NULL;
1262 }
1263 return addr;
1264}
1265
15da5c27 1266void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 1267{
6ff43d9a 1268 return __rseq_mempool_set_malloc(pool_set, NULL, len, false);
ef6695f1
MD
1269}
1270
15da5c27 1271void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 1272{
6ff43d9a
MD
1273 return __rseq_mempool_set_malloc(pool_set, NULL, len, true);
1274}
1275
1276void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set,
1277 void *init_ptr, size_t len)
1278{
1279 return __rseq_mempool_set_malloc(pool_set, init_ptr, len, true);
ef6695f1 1280}
9bd07c29 1281
0ba2a93e 1282struct rseq_mempool_attr *rseq_mempool_attr_create(void)
a82006d0 1283{
0ba2a93e 1284 return calloc(1, sizeof(struct rseq_mempool_attr));
a82006d0
MD
1285}
1286
0ba2a93e 1287void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
a82006d0
MD
1288{
1289 free(attr);
1290}
1291
0ba2a93e 1292int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
a82006d0 1293 void *(*mmap_func)(void *priv, size_t len),
9bd07c29
MD
1294 int (*munmap_func)(void *priv, void *ptr, size_t len),
1295 void *mmap_priv)
1296{
8118247e
MD
1297 if (!attr) {
1298 errno = EINVAL;
1299 return -1;
1300 }
a82006d0 1301 attr->mmap_set = true;
9bd07c29
MD
1302 attr->mmap_func = mmap_func;
1303 attr->munmap_func = munmap_func;
1304 attr->mmap_priv = mmap_priv;
8118247e 1305 return 0;
9bd07c29 1306}
d6acc8aa 1307
135811f2 1308int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
6e329183 1309 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
135811f2
MD
1310 void *init_priv)
1311{
1312 if (!attr) {
1313 errno = EINVAL;
1314 return -1;
1315 }
1316 attr->init_set = true;
1317 attr->init_func = init_func;
1318 attr->init_priv = init_priv;
1319 return 0;
1320}
1321
0ba2a93e 1322int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
d6acc8aa
MD
1323{
1324 if (!attr) {
1325 errno = EINVAL;
1326 return -1;
1327 }
1328 attr->robust_set = true;
1329 return 0;
1330}
cb475906
MD
1331
1332int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
1333 size_t stride, int max_nr_cpus)
1334{
1335 if (!attr) {
1336 errno = EINVAL;
1337 return -1;
1338 }
1339 attr->type = MEMPOOL_TYPE_PERCPU;
1340 attr->stride = stride;
1341 attr->max_nr_cpus = max_nr_cpus;
1342 return 0;
1343}
1344
1345int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
1346 size_t stride)
1347{
1348 if (!attr) {
1349 errno = EINVAL;
1350 return -1;
1351 }
1352 attr->type = MEMPOOL_TYPE_GLOBAL;
1353 attr->stride = stride;
89b7e681 1354 attr->max_nr_cpus = 0;
cb475906
MD
1355 return 0;
1356}
6037d364 1357
e11a02d7
MD
1358int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
1359 unsigned long max_nr_ranges)
1360{
1361 if (!attr) {
1362 errno = EINVAL;
1363 return -1;
1364 }
1365 attr->max_nr_ranges = max_nr_ranges;
1366 return 0;
1367}
1368
455e090e
MD
1369int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
1370 uintptr_t poison)
1371{
1372 if (!attr) {
1373 errno = EINVAL;
1374 return -1;
1375 }
1376 attr->poison_set = true;
1377 attr->poison = poison;
1378 return 0;
1379}
1380
a5694a4d
MD
1381int rseq_mempool_attr_set_populate_policy(struct rseq_mempool_attr *attr,
1382 enum rseq_mempool_populate_policy policy)
1383{
1384 if (!attr) {
1385 errno = EINVAL;
1386 return -1;
1387 }
1388 attr->populate_policy = policy;
1389 return 0;
1390}
1391
6037d364
MD
1392int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
1393{
1394 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
1395 errno = EINVAL;
1396 return -1;
1397 }
1398 return mempool->attr.max_nr_cpus;
1399}
This page took 0.115457 seconds and 4 git commands to generate.