mempool: check poison on destroy
[librseq.git] / src / rseq-mempool.c
CommitLineData
ef6695f1
MD
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
34337fec 4#include <rseq/mempool.h>
ef6695f1
MD
5#include <sys/mman.h>
6#include <assert.h>
7#include <string.h>
8#include <pthread.h>
9#include <unistd.h>
10#include <stdlib.h>
11#include <rseq/compiler.h>
12#include <errno.h>
13#include <stdint.h>
14#include <stdbool.h>
367e559c
MD
15#include <stdio.h>
16
17#ifdef HAVE_LIBNUMA
18# include <numa.h>
19# include <numaif.h>
20#endif
ef6695f1 21
34337fec 22#include "rseq-utils.h"
47c725dd 23#include <rseq/rseq.h>
19be9217 24
ef6695f1 25/*
b73b0c25 26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
ef6695f1 27 *
8ab16a24
MD
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
8aa1462d
MD
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
8ab16a24
MD
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
ef6695f1
MD
36 */
37
3236da62 38#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
ef6695f1 39
72b100a1
MD
40/*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
ef6695f1
MD
43#if RSEQ_BITS_PER_LONG == 64
44# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45#else
46# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47#endif
48
0fdf7a4c
OD
49#define BIT_PER_ULONG (8 * sizeof(unsigned long))
50
57d8b586
OD
51#define MOVE_PAGES_BATCH_SIZE 4096
52
0ba2a93e 53#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
4aa3220c 54
ef6695f1
MD
55struct free_list_node;
56
57struct free_list_node {
58 struct free_list_node *next;
59};
60
cb475906 61enum mempool_type {
89b7e681
MD
62 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
63 MEMPOOL_TYPE_PERCPU = 1,
cb475906
MD
64};
65
0ba2a93e 66struct rseq_mempool_attr {
a82006d0 67 bool mmap_set;
9bd07c29
MD
68 void *(*mmap_func)(void *priv, size_t len);
69 int (*munmap_func)(void *priv, void *ptr, size_t len);
70 void *mmap_priv;
d6acc8aa 71
135811f2 72 bool init_set;
6e329183 73 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
135811f2
MD
74 void *init_priv;
75
d6acc8aa 76 bool robust_set;
cb475906
MD
77
78 enum mempool_type type;
79 size_t stride;
80 int max_nr_cpus;
e11a02d7
MD
81
82 unsigned long max_nr_ranges;
455e090e
MD
83
84 bool poison_set;
85 uintptr_t poison;
9bd07c29
MD
86};
87
0ba2a93e 88struct rseq_mempool_range;
b73b0c25 89
0ba2a93e 90struct rseq_mempool_range {
9d986353
MD
91 struct rseq_mempool_range *next; /* Linked list of ranges. */
92 struct rseq_mempool *pool; /* Backward reference to container pool. */
4aa3220c 93 void *header;
ef6695f1 94 void *base;
b73b0c25
MD
95 size_t next_unused;
96 /* Track alloc/free. */
97 unsigned long *alloc_bitmap;
98};
99
0ba2a93e 100struct rseq_mempool {
9d986353
MD
101 /* Head of ranges linked-list. */
102 struct rseq_mempool_range *range_list;
103 unsigned long nr_ranges;
b73b0c25 104
ef6695f1 105 size_t item_len;
ef6695f1 106 int item_order;
ef6695f1
MD
107
108 /*
8ab16a24 109 * The free list chains freed items on the CPU 0 address range.
ef6695f1 110 * We should rethink this decision if false sharing between
8ab16a24 111 * malloc/free from other CPUs and data accesses from CPU 0
ef6695f1
MD
112 * becomes an issue. This is a NULL-terminated singly-linked
113 * list.
114 */
115 struct free_list_node *free_list_head;
b73b0c25 116
ef6695f1
MD
117 /* This lock protects allocation/free within the pool. */
118 pthread_mutex_t lock;
9bd07c29 119
0ba2a93e 120 struct rseq_mempool_attr attr;
ca452fee 121 char *name;
ef6695f1
MD
122};
123
ef6695f1
MD
124/*
125 * Pool set entries are indexed by item_len rounded to the next power of
126 * 2. A pool set can contain NULL pool entries, in which case the next
127 * large enough entry will be used for allocation.
128 */
0ba2a93e 129struct rseq_mempool_set {
ef6695f1
MD
130 /* This lock protects add vs malloc/zmalloc within the pool set. */
131 pthread_mutex_t lock;
0ba2a93e 132 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
ef6695f1
MD
133};
134
86617384
MD
135static
136const char *get_pool_name(const struct rseq_mempool *pool)
137{
138 return pool->name ? : "<anonymous>";
139}
140
367e559c 141static
6fbf1fb6 142void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
f2981623 143 uintptr_t item_offset, size_t stride)
367e559c 144{
15b63c9f 145 return range->base + (stride * cpu) + item_offset;
367e559c
MD
146}
147
367e559c 148static
15b63c9f
MD
149void rseq_percpu_zero_item(struct rseq_mempool *pool,
150 struct rseq_mempool_range *range, uintptr_t item_offset)
367e559c
MD
151{
152 int i;
153
cb475906 154 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
15b63c9f 155 char *p = __rseq_pool_range_percpu_ptr(range, i,
cb475906 156 item_offset, pool->attr.stride);
367e559c
MD
157 memset(p, 0, pool->item_len);
158 }
159}
160
455e090e
MD
161static
162void rseq_percpu_poison_item(struct rseq_mempool *pool,
163 struct rseq_mempool_range *range, uintptr_t item_offset)
164{
165 uintptr_t poison = pool->attr.poison;
166 int i;
167
168 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
169 char *p = __rseq_pool_range_percpu_ptr(range, i,
170 item_offset, pool->attr.stride);
171 size_t offset;
172
173 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t))
86617384
MD
174 *((uintptr_t *) (p + offset)) = poison;
175 }
176}
177
178/* Always inline for __builtin_return_address(0). */
179static inline __attribute__((always_inline))
6fbf1fb6
MD
180void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
181 const struct rseq_mempool_range *range, uintptr_t item_offset)
86617384
MD
182{
183 uintptr_t poison = pool->attr.poison;
184 int i;
185
186 if (!pool->attr.robust_set || !pool->attr.poison_set)
187 return;
188 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
189 char *p = __rseq_pool_range_percpu_ptr(range, i,
190 item_offset, pool->attr.stride);
191 size_t offset;
192
193 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t)) {
194 uintptr_t v;
195
196 /* Skip poison check for free-list pointer. */
197 if (i == 0 && offset == 0)
198 continue;
199 v = *((uintptr_t *) (p + offset));
200 if (v != poison) {
201 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
202 __func__, (unsigned long) v, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
203 abort();
204 }
205 }
455e090e
MD
206 }
207}
208
15b63c9f 209#ifdef HAVE_LIBNUMA
c6fd3981 210int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
367e559c 211{
f2981623 212 unsigned long nr_pages, page_len;
c6fd3981
MD
213 int status[MOVE_PAGES_BATCH_SIZE];
214 int nodes[MOVE_PAGES_BATCH_SIZE];
215 void *pages[MOVE_PAGES_BATCH_SIZE];
f2981623 216 long ret;
367e559c 217
c6fd3981
MD
218 if (!numa_flags) {
219 errno = EINVAL;
220 return -1;
221 }
367e559c 222 page_len = rseq_get_page_len();
c6fd3981 223 nr_pages = len >> rseq_get_count_order_ulong(page_len);
57d8b586 224
c6fd3981
MD
225 nodes[0] = numa_node_of_cpu(cpu);
226 if (nodes[0] < 0)
227 return -1;
57d8b586 228
c6fd3981
MD
229 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
230 nodes[k] = nodes[0];
231 }
57d8b586 232
c6fd3981 233 for (unsigned long page = 0; page < nr_pages;) {
57d8b586 234
c6fd3981
MD
235 size_t max_k = RSEQ_ARRAY_SIZE(pages);
236 size_t left = nr_pages - page;
57d8b586 237
c6fd3981
MD
238 if (left < max_k) {
239 max_k = left;
240 }
57d8b586 241
c6fd3981
MD
242 for (size_t k = 0; k < max_k; ++k, ++page) {
243 pages[k] = addr + (page * page_len);
244 status[k] = -EPERM;
367e559c 245 }
b73b0c25 246
c6fd3981
MD
247 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
248
249 if (ret < 0)
b73b0c25 250 return ret;
c6fd3981
MD
251
252 if (ret > 0) {
253 fprintf(stderr, "%lu pages were not migrated\n", ret);
254 for (size_t k = 0; k < max_k; ++k) {
255 if (status[k] < 0)
256 fprintf(stderr,
257 "Error while moving page %p to numa node %d: %u\n",
258 pages[k], nodes[k], -status[k]);
259 }
260 }
b73b0c25
MD
261 }
262 return 0;
263}
367e559c 264#else
c6fd3981
MD
265int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
266 size_t len __attribute__((unused)),
267 int cpu __attribute__((unused)),
367e559c
MD
268 int numa_flags __attribute__((unused)))
269{
c6fd3981
MD
270 errno = ENOSYS;
271 return -1;
367e559c
MD
272}
273#endif
274
9bd07c29
MD
275static
276void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
277{
278 void *base;
279
280 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
281 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
282 if (base == MAP_FAILED)
283 return NULL;
284 return base;
285}
286
287static
288int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
289{
290 return munmap(ptr, len);
291}
292
0fdf7a4c 293static
0ba2a93e 294int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c
OD
295{
296 size_t count;
297
cb475906 298 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
299
300 /*
9649c7ee
MD
301 * Not being able to create the validation bitmap is an error
302 * that needs to be reported.
0fdf7a4c 303 */
b73b0c25
MD
304 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
305 if (!range->alloc_bitmap)
9649c7ee
MD
306 return -1;
307 return 0;
0fdf7a4c
OD
308}
309
b73b0c25 310static
0ba2a93e 311bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
b73b0c25 312{
0ba2a93e 313 struct rseq_mempool_range *range;
b73b0c25 314
9d986353 315 for (range = pool->range_list; range; range = range->next) {
b73b0c25
MD
316 if (addr >= range->base && addr < range->base + range->next_unused)
317 return true;
318 }
319 return false;
320}
321
a9ec6111
OD
322/* Always inline for __builtin_return_address(0). */
323static inline __attribute__((always_inline))
0ba2a93e 324void check_free_list(const struct rseq_mempool *pool)
a9ec6111 325{
b73b0c25
MD
326 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
327 max_list_traversal = 0, traversal_iteration = 0;
0ba2a93e 328 struct rseq_mempool_range *range;
b73b0c25
MD
329
330 if (!pool->attr.robust_set)
331 return;
332
9d986353 333 for (range = pool->range_list; range; range = range->next) {
cb475906
MD
334 total_item += pool->attr.stride >> pool->item_order;
335 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
b73b0c25
MD
336 }
337 max_list_traversal = total_item - total_never_allocated;
a9ec6111
OD
338
339 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
340 node;
341 prev = node,
342 node = node->next) {
343
344 void *node_addr = node;
345
346 if (traversal_iteration >= max_list_traversal) {
ca452fee
MD
347 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
348 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
349 abort();
350 }
351
352 /* Node is out of range. */
b73b0c25 353 if (!addr_in_pool(pool, node_addr)) {
a9ec6111 354 if (prev)
ca452fee
MD
355 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
356 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111 357 else
ca452fee
MD
358 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
359 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
360 abort();
361 }
362
b73b0c25
MD
363 traversal_iteration++;
364 total_freed++;
a9ec6111
OD
365 }
366
367 if (total_never_allocated + total_freed != total_item) {
ca452fee
MD
368 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
369 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
a9ec6111
OD
370 abort();
371 }
a9ec6111
OD
372}
373
6fbf1fb6
MD
374/* Always inline for __builtin_return_address(0). */
375static inline __attribute__((always_inline))
376void check_range_poison(const struct rseq_mempool *pool,
377 const struct rseq_mempool_range *range)
378{
379 size_t item_offset;
380
381 for (item_offset = 0; item_offset < range->next_unused;
382 item_offset += pool->item_len)
383 rseq_percpu_check_poison_item(pool, range, item_offset);
384}
385
386/* Always inline for __builtin_return_address(0). */
387static inline __attribute__((always_inline))
388void check_pool_poison(const struct rseq_mempool *pool)
389{
390 struct rseq_mempool_range *range;
391
392 if (!pool->attr.robust_set || !pool->attr.poison_set)
393 return;
394 for (range = pool->range_list; range; range = range->next)
395 check_range_poison(pool, range);
396}
397
e7cbbc10
MD
398/* Always inline for __builtin_return_address(0). */
399static inline __attribute__((always_inline))
0ba2a93e 400void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c 401{
b73b0c25 402 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 403 size_t count, total_leaks = 0;
0fdf7a4c 404
9649c7ee 405 if (!bitmap)
0fdf7a4c 406 return;
0fdf7a4c 407
cb475906 408 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
409
410 /* Assert that all items in the pool were freed. */
9649c7ee
MD
411 for (size_t k = 0; k < count; ++k)
412 total_leaks += rseq_hweight_ulong(bitmap[k]);
413 if (total_leaks) {
ca452fee
MD
414 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
415 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
9649c7ee 416 abort();
0fdf7a4c
OD
417 }
418
419 free(bitmap);
420}
421
b73b0c25
MD
422/* Always inline for __builtin_return_address(0). */
423static inline __attribute__((always_inline))
0ba2a93e
MD
424int rseq_mempool_range_destroy(struct rseq_mempool *pool,
425 struct rseq_mempool_range *range)
b73b0c25
MD
426{
427 destroy_alloc_bitmap(pool, range);
5c99f3d6 428 /* range is a header located one page before the aligned mapping. */
4aa3220c 429 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
cb475906 430 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
5c99f3d6
MD
431}
432
433/*
434 * Allocate a memory mapping aligned on @alignment, with an optional
435 * @pre_header before the mapping.
436 */
437static
0ba2a93e 438void *aligned_mmap_anonymous(struct rseq_mempool *pool,
5c99f3d6
MD
439 size_t page_size, size_t len, size_t alignment,
440 void **pre_header, size_t pre_header_len)
441{
442 size_t minimum_page_count, page_count, extra, total_allocate = 0;
443 int page_order;
444 void *ptr;
445
446 if (len < page_size || alignment < page_size ||
b72b2d9e 447 !is_pow2(alignment) || (len & (alignment - 1))) {
5c99f3d6
MD
448 errno = EINVAL;
449 return NULL;
450 }
451 page_order = rseq_get_count_order_ulong(page_size);
452 if (page_order < 0) {
453 errno = EINVAL;
454 return NULL;
455 }
456 if (pre_header_len && (pre_header_len & (page_size - 1))) {
457 errno = EINVAL;
458 return NULL;
459 }
460
461 minimum_page_count = (pre_header_len + len) >> page_order;
462 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
463
464 assert(page_count >= minimum_page_count);
465
466 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
467 if (!ptr)
468 goto alloc_error;
469
470 total_allocate = page_count << page_order;
471
472 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
473 /* Pointer is already aligned. ptr points to pre_header. */
474 goto out;
475 }
476
477 /* Unmap extra before. */
478 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
479 assert(!(extra & (page_size - 1)));
480 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
481 perror("munmap");
482 abort();
483 }
484 total_allocate -= extra;
485 ptr += extra; /* ptr points to pre_header */
486 page_count -= extra >> page_order;
487out:
488 assert(page_count >= minimum_page_count);
489
490 if (page_count > minimum_page_count) {
491 void *extra_ptr;
492
493 /* Unmap extra after. */
494 extra_ptr = ptr + (minimum_page_count << page_order);
495 extra = (page_count - minimum_page_count) << page_order;
496 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
497 perror("munmap");
498 abort();
499 }
500 total_allocate -= extra;
501 }
502
503 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
504 assert(total_allocate == len + pre_header_len);
505
506alloc_error:
507 if (ptr) {
508 if (pre_header)
509 *pre_header = ptr;
510 ptr += pre_header_len;
511 }
512 return ptr;
b73b0c25
MD
513}
514
515static
0ba2a93e 516struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
b73b0c25 517{
0ba2a93e 518 struct rseq_mempool_range *range;
5c99f3d6 519 unsigned long page_size;
4aa3220c 520 void *header;
b73b0c25
MD
521 void *base;
522
e11a02d7
MD
523 if (pool->attr.max_nr_ranges &&
524 pool->nr_ranges >= pool->attr.max_nr_ranges) {
9d986353
MD
525 errno = ENOMEM;
526 return NULL;
527 }
5c99f3d6 528 page_size = rseq_get_page_len();
b73b0c25 529
5c99f3d6 530 base = aligned_mmap_anonymous(pool, page_size,
cb475906
MD
531 pool->attr.stride * pool->attr.max_nr_cpus,
532 pool->attr.stride,
4aa3220c 533 &header, page_size);
b73b0c25 534 if (!base)
5c99f3d6 535 return NULL;
0ba2a93e 536 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
5c99f3d6 537 range->pool = pool;
b73b0c25 538 range->base = base;
4aa3220c 539 range->header = header;
b73b0c25
MD
540 if (pool->attr.robust_set) {
541 if (create_alloc_bitmap(pool, range))
542 goto error_alloc;
543 }
135811f2 544 if (pool->attr.init_set) {
374c2773
MD
545 switch (pool->attr.type) {
546 case MEMPOOL_TYPE_GLOBAL:
6e329183 547 if (pool->attr.init_func(pool->attr.init_priv,
374c2773 548 base, pool->attr.stride, -1)) {
6e329183
MD
549 goto error_alloc;
550 }
374c2773
MD
551 break;
552 case MEMPOOL_TYPE_PERCPU:
553 {
554 int cpu;
555 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
556 if (pool->attr.init_func(pool->attr.init_priv,
557 base + (pool->attr.stride * cpu),
558 pool->attr.stride, cpu)) {
559 goto error_alloc;
560 }
561 }
562 break;
563 }
564 default:
565 abort();
135811f2
MD
566 }
567 }
9d986353 568 pool->nr_ranges++;
b73b0c25
MD
569 return range;
570
571error_alloc:
0ba2a93e 572 (void) rseq_mempool_range_destroy(pool, range);
b73b0c25
MD
573 return NULL;
574}
575
0ba2a93e 576int rseq_mempool_destroy(struct rseq_mempool *pool)
9649c7ee 577{
0ba2a93e 578 struct rseq_mempool_range *range, *next_range;
b73b0c25 579 int ret = 0;
9649c7ee 580
f510ddc5
MD
581 if (!pool)
582 return 0;
b73b0c25 583 check_free_list(pool);
6fbf1fb6 584 check_pool_poison(pool);
b73b0c25 585 /* Iteration safe against removal. */
9d986353 586 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
0ba2a93e 587 if (rseq_mempool_range_destroy(pool, range))
b73b0c25
MD
588 goto end;
589 /* Update list head to keep list coherent in case of partial failure. */
9d986353 590 pool->range_list = next_range;
b73b0c25 591 }
9649c7ee 592 pthread_mutex_destroy(&pool->lock);
ca452fee 593 free(pool->name);
9649c7ee
MD
594 memset(pool, 0, sizeof(*pool));
595end:
b73b0c25 596 return ret;
9649c7ee
MD
597}
598
0ba2a93e 599struct rseq_mempool *rseq_mempool_create(const char *pool_name,
cb475906 600 size_t item_len, const struct rseq_mempool_attr *_attr)
ef6695f1 601{
0ba2a93e
MD
602 struct rseq_mempool *pool;
603 struct rseq_mempool_attr attr = {};
ef6695f1 604 int order;
ef6695f1
MD
605
606 /* Make sure each item is large enough to contain free list pointers. */
607 if (item_len < sizeof(void *))
608 item_len = sizeof(void *);
609
610 /* Align item_len on next power of two. */
19be9217 611 order = rseq_get_count_order_ulong(item_len);
ef6695f1
MD
612 if (order < 0) {
613 errno = EINVAL;
614 return NULL;
615 }
616 item_len = 1UL << order;
617
a82006d0
MD
618 if (_attr)
619 memcpy(&attr, _attr, sizeof(attr));
620 if (!attr.mmap_set) {
621 attr.mmap_func = default_mmap_func;
622 attr.munmap_func = default_munmap_func;
623 attr.mmap_priv = NULL;
9bd07c29 624 }
a82006d0 625
cb475906
MD
626 switch (attr.type) {
627 case MEMPOOL_TYPE_PERCPU:
628 if (attr.max_nr_cpus < 0) {
629 errno = EINVAL;
630 return NULL;
631 }
632 if (attr.max_nr_cpus == 0) {
633 /* Auto-detect */
47c725dd 634 attr.max_nr_cpus = rseq_get_max_nr_cpus();
cb475906
MD
635 if (attr.max_nr_cpus == 0) {
636 errno = EINVAL;
637 return NULL;
638 }
639 }
640 break;
641 case MEMPOOL_TYPE_GLOBAL:
89b7e681
MD
642 /* Use a 1-cpu pool for global mempool type. */
643 attr.max_nr_cpus = 1;
cb475906
MD
644 break;
645 }
646 if (!attr.stride)
647 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
648 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
649 !is_pow2(attr.stride)) {
650 errno = EINVAL;
651 return NULL;
652 }
653
0ba2a93e 654 pool = calloc(1, sizeof(struct rseq_mempool));
bc510b60
MD
655 if (!pool)
656 return NULL;
ef6695f1 657
b73b0c25 658 memcpy(&pool->attr, &attr, sizeof(attr));
ef6695f1 659 pthread_mutex_init(&pool->lock, NULL);
ef6695f1
MD
660 pool->item_len = item_len;
661 pool->item_order = order;
b73b0c25 662
9d986353
MD
663 pool->range_list = rseq_mempool_range_create(pool);
664 if (!pool->range_list)
b73b0c25 665 goto error_alloc;
0fdf7a4c 666
ca452fee
MD
667 if (pool_name) {
668 pool->name = strdup(pool_name);
669 if (!pool->name)
670 goto error_alloc;
671 }
ef6695f1 672 return pool;
ef6695f1 673
9649c7ee 674error_alloc:
0ba2a93e 675 rseq_mempool_destroy(pool);
9649c7ee
MD
676 errno = ENOMEM;
677 return NULL;
ef6695f1
MD
678}
679
e7cbbc10
MD
680/* Always inline for __builtin_return_address(0). */
681static inline __attribute__((always_inline))
9d986353 682void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 683{
9d986353 684 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 685 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
686 unsigned long mask;
687 size_t k;
688
9649c7ee 689 if (!bitmap)
0fdf7a4c 690 return;
0fdf7a4c 691
9649c7ee 692 k = item_index / BIT_PER_ULONG;
0fdf7a4c
OD
693 mask = 1ULL << (item_index % BIT_PER_ULONG);
694
9649c7ee
MD
695 /* Print error if bit is already set. */
696 if (bitmap[k] & mask) {
ca452fee
MD
697 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
698 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
9649c7ee
MD
699 abort();
700 }
0fdf7a4c
OD
701 bitmap[k] |= mask;
702}
703
ef6695f1 704static
0ba2a93e 705void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
ef6695f1 706{
9d986353 707 struct rseq_mempool_range *range;
ef6695f1
MD
708 struct free_list_node *node;
709 uintptr_t item_offset;
d24ee051 710 void __rseq_percpu *addr;
ef6695f1
MD
711
712 pthread_mutex_lock(&pool->lock);
713 /* Get first entry from free list. */
714 node = pool->free_list_head;
715 if (node != NULL) {
9d986353
MD
716 uintptr_t ptr = (uintptr_t) node;
717 void *range_base = (void *) (ptr & (~(pool->attr.stride - 1)));
718
719 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
ef6695f1
MD
720 /* Remove node from free list (update head). */
721 pool->free_list_head = node->next;
9d986353 722 item_offset = (uintptr_t) ((void *) node - range_base);
86617384 723 rseq_percpu_check_poison_item(pool, range, item_offset);
9d986353 724 addr = (void __rseq_percpu *) node;
ef6695f1
MD
725 goto end;
726 }
9d986353
MD
727 /*
728 * If the most recent range (first in list) does not have any
729 * room left, create a new range and prepend it to the list
730 * head.
731 */
732 range = pool->range_list;
733 if (range->next_unused + pool->item_len > pool->attr.stride) {
734 range = rseq_mempool_range_create(pool);
735 if (!range) {
736 errno = ENOMEM;
737 addr = NULL;
738 goto end;
739 }
740 /* Add range to head of list. */
741 range->next = pool->range_list;
742 pool->range_list = range;
ef6695f1 743 }
9d986353
MD
744 /* First range in list has room left. */
745 item_offset = range->next_unused;
746 addr = (void __rseq_percpu *) (range->base + item_offset);
747 range->next_unused += pool->item_len;
ef6695f1 748end:
8f28507f 749 if (addr)
9d986353 750 set_alloc_slot(pool, range, item_offset);
ef6695f1
MD
751 pthread_mutex_unlock(&pool->lock);
752 if (zeroed && addr)
9d986353 753 rseq_percpu_zero_item(pool, range, item_offset);
ef6695f1
MD
754 return addr;
755}
756
15da5c27 757void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
ef6695f1
MD
758{
759 return __rseq_percpu_malloc(pool, false);
760}
761
15da5c27 762void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
ef6695f1
MD
763{
764 return __rseq_percpu_malloc(pool, true);
765}
766
e7cbbc10
MD
767/* Always inline for __builtin_return_address(0). */
768static inline __attribute__((always_inline))
9d986353 769void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 770{
9d986353 771 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 772 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
773 unsigned long mask;
774 size_t k;
775
9649c7ee 776 if (!bitmap)
0fdf7a4c 777 return;
0fdf7a4c 778
9649c7ee
MD
779 k = item_index / BIT_PER_ULONG;
780 mask = 1ULL << (item_index % BIT_PER_ULONG);
0fdf7a4c 781
9649c7ee
MD
782 /* Print error if bit is not set. */
783 if (!(bitmap[k] & mask)) {
ca452fee
MD
784 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
785 __func__, get_pool_name(pool), pool, item_offset,
786 (void *) __builtin_return_address(0));
9649c7ee
MD
787 abort();
788 }
0fdf7a4c
OD
789 bitmap[k] &= ~mask;
790}
791
cb475906 792void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
ef6695f1
MD
793{
794 uintptr_t ptr = (uintptr_t) _ptr;
cb475906 795 void *range_base = (void *) (ptr & (~(stride - 1)));
0ba2a93e
MD
796 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
797 struct rseq_mempool *pool = range->pool;
cb475906 798 uintptr_t item_offset = ptr & (stride - 1);
ef6695f1
MD
799 struct free_list_node *head, *item;
800
801 pthread_mutex_lock(&pool->lock);
9d986353 802 clear_alloc_slot(pool, range, item_offset);
ef6695f1
MD
803 /* Add ptr to head of free list */
804 head = pool->free_list_head;
455e090e
MD
805 if (pool->attr.poison_set)
806 rseq_percpu_poison_item(pool, range, item_offset);
8ab16a24 807 /* Free-list is in CPU 0 range. */
4aa3220c 808 item = (struct free_list_node *) ptr;
455e090e
MD
809 /*
810 * Setting the next pointer will overwrite the first uintptr_t
811 * poison for CPU 0.
812 */
ef6695f1
MD
813 item->next = head;
814 pool->free_list_head = item;
815 pthread_mutex_unlock(&pool->lock);
816}
817
0ba2a93e 818struct rseq_mempool_set *rseq_mempool_set_create(void)
ef6695f1 819{
0ba2a93e 820 struct rseq_mempool_set *pool_set;
ef6695f1 821
0ba2a93e 822 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
ef6695f1
MD
823 if (!pool_set)
824 return NULL;
825 pthread_mutex_init(&pool_set->lock, NULL);
826 return pool_set;
827}
828
0ba2a93e 829int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
ef6695f1
MD
830{
831 int order, ret;
832
833 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
0ba2a93e 834 struct rseq_mempool *pool = pool_set->entries[order];
ef6695f1
MD
835
836 if (!pool)
837 continue;
0ba2a93e 838 ret = rseq_mempool_destroy(pool);
ef6695f1
MD
839 if (ret)
840 return ret;
841 pool_set->entries[order] = NULL;
842 }
843 pthread_mutex_destroy(&pool_set->lock);
844 free(pool_set);
845 return 0;
846}
847
848/* Ownership of pool is handed over to pool set on success. */
0ba2a93e 849int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
ef6695f1
MD
850{
851 size_t item_order = pool->item_order;
852 int ret = 0;
853
854 pthread_mutex_lock(&pool_set->lock);
855 if (pool_set->entries[item_order]) {
856 errno = EBUSY;
857 ret = -1;
858 goto end;
859 }
860 pool_set->entries[pool->item_order] = pool;
861end:
862 pthread_mutex_unlock(&pool_set->lock);
863 return ret;
864}
865
866static
0ba2a93e 867void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
ef6695f1
MD
868{
869 int order, min_order = POOL_SET_MIN_ENTRY;
0ba2a93e 870 struct rseq_mempool *pool;
d24ee051 871 void __rseq_percpu *addr;
ef6695f1 872
d06f5cf5
MD
873 order = rseq_get_count_order_ulong(len);
874 if (order > POOL_SET_MIN_ENTRY)
875 min_order = order;
ef6695f1
MD
876again:
877 pthread_mutex_lock(&pool_set->lock);
878 /* First smallest present pool where @len fits. */
879 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
880 pool = pool_set->entries[order];
881
882 if (!pool)
883 continue;
884 if (pool->item_len >= len)
885 goto found;
886 }
887 pool = NULL;
888found:
889 pthread_mutex_unlock(&pool_set->lock);
890 if (pool) {
891 addr = __rseq_percpu_malloc(pool, zeroed);
892 if (addr == NULL && errno == ENOMEM) {
893 /*
894 * If the allocation failed, try again with a
895 * larger pool.
896 */
897 min_order = order + 1;
898 goto again;
899 }
900 } else {
901 /* Not found. */
902 errno = ENOMEM;
903 addr = NULL;
904 }
905 return addr;
906}
907
15da5c27 908void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 909{
0ba2a93e 910 return __rseq_mempool_set_malloc(pool_set, len, false);
ef6695f1
MD
911}
912
15da5c27 913void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 914{
0ba2a93e 915 return __rseq_mempool_set_malloc(pool_set, len, true);
ef6695f1 916}
9bd07c29 917
0ba2a93e 918struct rseq_mempool_attr *rseq_mempool_attr_create(void)
a82006d0 919{
0ba2a93e 920 return calloc(1, sizeof(struct rseq_mempool_attr));
a82006d0
MD
921}
922
0ba2a93e 923void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
a82006d0
MD
924{
925 free(attr);
926}
927
0ba2a93e 928int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
a82006d0 929 void *(*mmap_func)(void *priv, size_t len),
9bd07c29
MD
930 int (*munmap_func)(void *priv, void *ptr, size_t len),
931 void *mmap_priv)
932{
8118247e
MD
933 if (!attr) {
934 errno = EINVAL;
935 return -1;
936 }
a82006d0 937 attr->mmap_set = true;
9bd07c29
MD
938 attr->mmap_func = mmap_func;
939 attr->munmap_func = munmap_func;
940 attr->mmap_priv = mmap_priv;
8118247e 941 return 0;
9bd07c29 942}
d6acc8aa 943
135811f2 944int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
6e329183 945 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
135811f2
MD
946 void *init_priv)
947{
948 if (!attr) {
949 errno = EINVAL;
950 return -1;
951 }
952 attr->init_set = true;
953 attr->init_func = init_func;
954 attr->init_priv = init_priv;
955 return 0;
956}
957
0ba2a93e 958int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
d6acc8aa
MD
959{
960 if (!attr) {
961 errno = EINVAL;
962 return -1;
963 }
964 attr->robust_set = true;
965 return 0;
966}
cb475906
MD
967
968int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
969 size_t stride, int max_nr_cpus)
970{
971 if (!attr) {
972 errno = EINVAL;
973 return -1;
974 }
975 attr->type = MEMPOOL_TYPE_PERCPU;
976 attr->stride = stride;
977 attr->max_nr_cpus = max_nr_cpus;
978 return 0;
979}
980
981int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
982 size_t stride)
983{
984 if (!attr) {
985 errno = EINVAL;
986 return -1;
987 }
988 attr->type = MEMPOOL_TYPE_GLOBAL;
989 attr->stride = stride;
89b7e681 990 attr->max_nr_cpus = 0;
cb475906
MD
991 return 0;
992}
6037d364 993
e11a02d7
MD
994int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
995 unsigned long max_nr_ranges)
996{
997 if (!attr) {
998 errno = EINVAL;
999 return -1;
1000 }
1001 attr->max_nr_ranges = max_nr_ranges;
1002 return 0;
1003}
1004
455e090e
MD
1005int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
1006 uintptr_t poison)
1007{
1008 if (!attr) {
1009 errno = EINVAL;
1010 return -1;
1011 }
1012 attr->poison_set = true;
1013 attr->poison = poison;
1014 return 0;
1015}
1016
6037d364
MD
1017int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
1018{
1019 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
1020 errno = EINVAL;
1021 return -1;
1022 }
1023 return mempool->attr.max_nr_cpus;
1024}
This page took 0.071474 seconds and 4 git commands to generate.