mempool: Detect poison corruption on alloc
[librseq.git] / src / rseq-mempool.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
4 #include <rseq/mempool.h>
5 #include <sys/mman.h>
6 #include <assert.h>
7 #include <string.h>
8 #include <pthread.h>
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <rseq/compiler.h>
12 #include <errno.h>
13 #include <stdint.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16
17 #ifdef HAVE_LIBNUMA
18 # include <numa.h>
19 # include <numaif.h>
20 #endif
21
22 #include "rseq-utils.h"
23 #include <rseq/rseq.h>
24
25 /*
26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
27 *
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
36 */
37
38 #define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
39
40 /*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
43 #if RSEQ_BITS_PER_LONG == 64
44 # define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45 #else
46 # define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47 #endif
48
49 #define BIT_PER_ULONG (8 * sizeof(unsigned long))
50
51 #define MOVE_PAGES_BATCH_SIZE 4096
52
53 #define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
54
55 struct free_list_node;
56
57 struct free_list_node {
58 struct free_list_node *next;
59 };
60
61 enum mempool_type {
62 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
63 MEMPOOL_TYPE_PERCPU = 1,
64 };
65
66 struct rseq_mempool_attr {
67 bool mmap_set;
68 void *(*mmap_func)(void *priv, size_t len);
69 int (*munmap_func)(void *priv, void *ptr, size_t len);
70 void *mmap_priv;
71
72 bool init_set;
73 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
74 void *init_priv;
75
76 bool robust_set;
77
78 enum mempool_type type;
79 size_t stride;
80 int max_nr_cpus;
81
82 unsigned long max_nr_ranges;
83
84 bool poison_set;
85 uintptr_t poison;
86 };
87
88 struct rseq_mempool_range;
89
90 struct rseq_mempool_range {
91 struct rseq_mempool_range *next; /* Linked list of ranges. */
92 struct rseq_mempool *pool; /* Backward reference to container pool. */
93 void *header;
94 void *base;
95 size_t next_unused;
96 /* Track alloc/free. */
97 unsigned long *alloc_bitmap;
98 };
99
100 struct rseq_mempool {
101 /* Head of ranges linked-list. */
102 struct rseq_mempool_range *range_list;
103 unsigned long nr_ranges;
104
105 size_t item_len;
106 int item_order;
107
108 /*
109 * The free list chains freed items on the CPU 0 address range.
110 * We should rethink this decision if false sharing between
111 * malloc/free from other CPUs and data accesses from CPU 0
112 * becomes an issue. This is a NULL-terminated singly-linked
113 * list.
114 */
115 struct free_list_node *free_list_head;
116
117 /* This lock protects allocation/free within the pool. */
118 pthread_mutex_t lock;
119
120 struct rseq_mempool_attr attr;
121 char *name;
122 };
123
124 /*
125 * Pool set entries are indexed by item_len rounded to the next power of
126 * 2. A pool set can contain NULL pool entries, in which case the next
127 * large enough entry will be used for allocation.
128 */
129 struct rseq_mempool_set {
130 /* This lock protects add vs malloc/zmalloc within the pool set. */
131 pthread_mutex_t lock;
132 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
133 };
134
135 static
136 const char *get_pool_name(const struct rseq_mempool *pool)
137 {
138 return pool->name ? : "<anonymous>";
139 }
140
141 static
142 void *__rseq_pool_range_percpu_ptr(struct rseq_mempool_range *range, int cpu,
143 uintptr_t item_offset, size_t stride)
144 {
145 return range->base + (stride * cpu) + item_offset;
146 }
147
148 static
149 void rseq_percpu_zero_item(struct rseq_mempool *pool,
150 struct rseq_mempool_range *range, uintptr_t item_offset)
151 {
152 int i;
153
154 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
155 char *p = __rseq_pool_range_percpu_ptr(range, i,
156 item_offset, pool->attr.stride);
157 memset(p, 0, pool->item_len);
158 }
159 }
160
161 static
162 void rseq_percpu_poison_item(struct rseq_mempool *pool,
163 struct rseq_mempool_range *range, uintptr_t item_offset)
164 {
165 uintptr_t poison = pool->attr.poison;
166 int i;
167
168 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
169 char *p = __rseq_pool_range_percpu_ptr(range, i,
170 item_offset, pool->attr.stride);
171 size_t offset;
172
173 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t))
174 *((uintptr_t *) (p + offset)) = poison;
175 }
176 }
177
178 /* Always inline for __builtin_return_address(0). */
179 static inline __attribute__((always_inline))
180 void rseq_percpu_check_poison_item(struct rseq_mempool *pool,
181 struct rseq_mempool_range *range, uintptr_t item_offset)
182 {
183 uintptr_t poison = pool->attr.poison;
184 int i;
185
186 if (!pool->attr.robust_set || !pool->attr.poison_set)
187 return;
188 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
189 char *p = __rseq_pool_range_percpu_ptr(range, i,
190 item_offset, pool->attr.stride);
191 size_t offset;
192
193 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t)) {
194 uintptr_t v;
195
196 /* Skip poison check for free-list pointer. */
197 if (i == 0 && offset == 0)
198 continue;
199 v = *((uintptr_t *) (p + offset));
200 if (v != poison) {
201 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
202 __func__, (unsigned long) v, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
203 abort();
204 }
205 }
206 }
207 }
208
209 #ifdef HAVE_LIBNUMA
210 int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
211 {
212 unsigned long nr_pages, page_len;
213 int status[MOVE_PAGES_BATCH_SIZE];
214 int nodes[MOVE_PAGES_BATCH_SIZE];
215 void *pages[MOVE_PAGES_BATCH_SIZE];
216 long ret;
217
218 if (!numa_flags) {
219 errno = EINVAL;
220 return -1;
221 }
222 page_len = rseq_get_page_len();
223 nr_pages = len >> rseq_get_count_order_ulong(page_len);
224
225 nodes[0] = numa_node_of_cpu(cpu);
226 if (nodes[0] < 0)
227 return -1;
228
229 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
230 nodes[k] = nodes[0];
231 }
232
233 for (unsigned long page = 0; page < nr_pages;) {
234
235 size_t max_k = RSEQ_ARRAY_SIZE(pages);
236 size_t left = nr_pages - page;
237
238 if (left < max_k) {
239 max_k = left;
240 }
241
242 for (size_t k = 0; k < max_k; ++k, ++page) {
243 pages[k] = addr + (page * page_len);
244 status[k] = -EPERM;
245 }
246
247 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
248
249 if (ret < 0)
250 return ret;
251
252 if (ret > 0) {
253 fprintf(stderr, "%lu pages were not migrated\n", ret);
254 for (size_t k = 0; k < max_k; ++k) {
255 if (status[k] < 0)
256 fprintf(stderr,
257 "Error while moving page %p to numa node %d: %u\n",
258 pages[k], nodes[k], -status[k]);
259 }
260 }
261 }
262 return 0;
263 }
264 #else
265 int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
266 size_t len __attribute__((unused)),
267 int cpu __attribute__((unused)),
268 int numa_flags __attribute__((unused)))
269 {
270 errno = ENOSYS;
271 return -1;
272 }
273 #endif
274
275 static
276 void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
277 {
278 void *base;
279
280 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
281 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
282 if (base == MAP_FAILED)
283 return NULL;
284 return base;
285 }
286
287 static
288 int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
289 {
290 return munmap(ptr, len);
291 }
292
293 static
294 int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
295 {
296 size_t count;
297
298 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
299
300 /*
301 * Not being able to create the validation bitmap is an error
302 * that needs to be reported.
303 */
304 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
305 if (!range->alloc_bitmap)
306 return -1;
307 return 0;
308 }
309
310 static
311 bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
312 {
313 struct rseq_mempool_range *range;
314
315 for (range = pool->range_list; range; range = range->next) {
316 if (addr >= range->base && addr < range->base + range->next_unused)
317 return true;
318 }
319 return false;
320 }
321
322 /* Always inline for __builtin_return_address(0). */
323 static inline __attribute__((always_inline))
324 void check_free_list(const struct rseq_mempool *pool)
325 {
326 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
327 max_list_traversal = 0, traversal_iteration = 0;
328 struct rseq_mempool_range *range;
329
330 if (!pool->attr.robust_set)
331 return;
332
333 for (range = pool->range_list; range; range = range->next) {
334 total_item += pool->attr.stride >> pool->item_order;
335 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
336 }
337 max_list_traversal = total_item - total_never_allocated;
338
339 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
340 node;
341 prev = node,
342 node = node->next) {
343
344 void *node_addr = node;
345
346 if (traversal_iteration >= max_list_traversal) {
347 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
348 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
349 abort();
350 }
351
352 /* Node is out of range. */
353 if (!addr_in_pool(pool, node_addr)) {
354 if (prev)
355 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
356 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
357 else
358 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
359 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
360 abort();
361 }
362
363 traversal_iteration++;
364 total_freed++;
365 }
366
367 if (total_never_allocated + total_freed != total_item) {
368 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
369 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
370 abort();
371 }
372 }
373
374 /* Always inline for __builtin_return_address(0). */
375 static inline __attribute__((always_inline))
376 void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
377 {
378 unsigned long *bitmap = range->alloc_bitmap;
379 size_t count, total_leaks = 0;
380
381 if (!bitmap)
382 return;
383
384 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
385
386 /* Assert that all items in the pool were freed. */
387 for (size_t k = 0; k < count; ++k)
388 total_leaks += rseq_hweight_ulong(bitmap[k]);
389 if (total_leaks) {
390 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
391 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
392 abort();
393 }
394
395 free(bitmap);
396 }
397
398 /* Always inline for __builtin_return_address(0). */
399 static inline __attribute__((always_inline))
400 int rseq_mempool_range_destroy(struct rseq_mempool *pool,
401 struct rseq_mempool_range *range)
402 {
403 destroy_alloc_bitmap(pool, range);
404 /* range is a header located one page before the aligned mapping. */
405 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
406 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
407 }
408
409 /*
410 * Allocate a memory mapping aligned on @alignment, with an optional
411 * @pre_header before the mapping.
412 */
413 static
414 void *aligned_mmap_anonymous(struct rseq_mempool *pool,
415 size_t page_size, size_t len, size_t alignment,
416 void **pre_header, size_t pre_header_len)
417 {
418 size_t minimum_page_count, page_count, extra, total_allocate = 0;
419 int page_order;
420 void *ptr;
421
422 if (len < page_size || alignment < page_size ||
423 !is_pow2(alignment) || (len & (alignment - 1))) {
424 errno = EINVAL;
425 return NULL;
426 }
427 page_order = rseq_get_count_order_ulong(page_size);
428 if (page_order < 0) {
429 errno = EINVAL;
430 return NULL;
431 }
432 if (pre_header_len && (pre_header_len & (page_size - 1))) {
433 errno = EINVAL;
434 return NULL;
435 }
436
437 minimum_page_count = (pre_header_len + len) >> page_order;
438 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
439
440 assert(page_count >= minimum_page_count);
441
442 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
443 if (!ptr)
444 goto alloc_error;
445
446 total_allocate = page_count << page_order;
447
448 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
449 /* Pointer is already aligned. ptr points to pre_header. */
450 goto out;
451 }
452
453 /* Unmap extra before. */
454 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
455 assert(!(extra & (page_size - 1)));
456 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
457 perror("munmap");
458 abort();
459 }
460 total_allocate -= extra;
461 ptr += extra; /* ptr points to pre_header */
462 page_count -= extra >> page_order;
463 out:
464 assert(page_count >= minimum_page_count);
465
466 if (page_count > minimum_page_count) {
467 void *extra_ptr;
468
469 /* Unmap extra after. */
470 extra_ptr = ptr + (minimum_page_count << page_order);
471 extra = (page_count - minimum_page_count) << page_order;
472 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
473 perror("munmap");
474 abort();
475 }
476 total_allocate -= extra;
477 }
478
479 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
480 assert(total_allocate == len + pre_header_len);
481
482 alloc_error:
483 if (ptr) {
484 if (pre_header)
485 *pre_header = ptr;
486 ptr += pre_header_len;
487 }
488 return ptr;
489 }
490
491 static
492 struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
493 {
494 struct rseq_mempool_range *range;
495 unsigned long page_size;
496 void *header;
497 void *base;
498
499 if (pool->attr.max_nr_ranges &&
500 pool->nr_ranges >= pool->attr.max_nr_ranges) {
501 errno = ENOMEM;
502 return NULL;
503 }
504 page_size = rseq_get_page_len();
505
506 base = aligned_mmap_anonymous(pool, page_size,
507 pool->attr.stride * pool->attr.max_nr_cpus,
508 pool->attr.stride,
509 &header, page_size);
510 if (!base)
511 return NULL;
512 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
513 range->pool = pool;
514 range->base = base;
515 range->header = header;
516 if (pool->attr.robust_set) {
517 if (create_alloc_bitmap(pool, range))
518 goto error_alloc;
519 }
520 if (pool->attr.init_set) {
521 switch (pool->attr.type) {
522 case MEMPOOL_TYPE_GLOBAL:
523 if (pool->attr.init_func(pool->attr.init_priv,
524 base, pool->attr.stride, -1)) {
525 goto error_alloc;
526 }
527 break;
528 case MEMPOOL_TYPE_PERCPU:
529 {
530 int cpu;
531 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
532 if (pool->attr.init_func(pool->attr.init_priv,
533 base + (pool->attr.stride * cpu),
534 pool->attr.stride, cpu)) {
535 goto error_alloc;
536 }
537 }
538 break;
539 }
540 default:
541 abort();
542 }
543 }
544 pool->nr_ranges++;
545 return range;
546
547 error_alloc:
548 (void) rseq_mempool_range_destroy(pool, range);
549 return NULL;
550 }
551
552 int rseq_mempool_destroy(struct rseq_mempool *pool)
553 {
554 struct rseq_mempool_range *range, *next_range;
555 int ret = 0;
556
557 if (!pool)
558 return 0;
559 check_free_list(pool);
560 /* Iteration safe against removal. */
561 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
562 if (rseq_mempool_range_destroy(pool, range))
563 goto end;
564 /* Update list head to keep list coherent in case of partial failure. */
565 pool->range_list = next_range;
566 }
567 pthread_mutex_destroy(&pool->lock);
568 free(pool->name);
569 memset(pool, 0, sizeof(*pool));
570 end:
571 return ret;
572 }
573
574 struct rseq_mempool *rseq_mempool_create(const char *pool_name,
575 size_t item_len, const struct rseq_mempool_attr *_attr)
576 {
577 struct rseq_mempool *pool;
578 struct rseq_mempool_attr attr = {};
579 int order;
580
581 /* Make sure each item is large enough to contain free list pointers. */
582 if (item_len < sizeof(void *))
583 item_len = sizeof(void *);
584
585 /* Align item_len on next power of two. */
586 order = rseq_get_count_order_ulong(item_len);
587 if (order < 0) {
588 errno = EINVAL;
589 return NULL;
590 }
591 item_len = 1UL << order;
592
593 if (_attr)
594 memcpy(&attr, _attr, sizeof(attr));
595 if (!attr.mmap_set) {
596 attr.mmap_func = default_mmap_func;
597 attr.munmap_func = default_munmap_func;
598 attr.mmap_priv = NULL;
599 }
600
601 switch (attr.type) {
602 case MEMPOOL_TYPE_PERCPU:
603 if (attr.max_nr_cpus < 0) {
604 errno = EINVAL;
605 return NULL;
606 }
607 if (attr.max_nr_cpus == 0) {
608 /* Auto-detect */
609 attr.max_nr_cpus = rseq_get_max_nr_cpus();
610 if (attr.max_nr_cpus == 0) {
611 errno = EINVAL;
612 return NULL;
613 }
614 }
615 break;
616 case MEMPOOL_TYPE_GLOBAL:
617 /* Use a 1-cpu pool for global mempool type. */
618 attr.max_nr_cpus = 1;
619 break;
620 }
621 if (!attr.stride)
622 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
623 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
624 !is_pow2(attr.stride)) {
625 errno = EINVAL;
626 return NULL;
627 }
628
629 pool = calloc(1, sizeof(struct rseq_mempool));
630 if (!pool)
631 return NULL;
632
633 memcpy(&pool->attr, &attr, sizeof(attr));
634 pthread_mutex_init(&pool->lock, NULL);
635 pool->item_len = item_len;
636 pool->item_order = order;
637
638 pool->range_list = rseq_mempool_range_create(pool);
639 if (!pool->range_list)
640 goto error_alloc;
641
642 if (pool_name) {
643 pool->name = strdup(pool_name);
644 if (!pool->name)
645 goto error_alloc;
646 }
647 return pool;
648
649 error_alloc:
650 rseq_mempool_destroy(pool);
651 errno = ENOMEM;
652 return NULL;
653 }
654
655 /* Always inline for __builtin_return_address(0). */
656 static inline __attribute__((always_inline))
657 void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
658 {
659 unsigned long *bitmap = range->alloc_bitmap;
660 size_t item_index = item_offset >> pool->item_order;
661 unsigned long mask;
662 size_t k;
663
664 if (!bitmap)
665 return;
666
667 k = item_index / BIT_PER_ULONG;
668 mask = 1ULL << (item_index % BIT_PER_ULONG);
669
670 /* Print error if bit is already set. */
671 if (bitmap[k] & mask) {
672 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
673 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
674 abort();
675 }
676 bitmap[k] |= mask;
677 }
678
679 static
680 void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
681 {
682 struct rseq_mempool_range *range;
683 struct free_list_node *node;
684 uintptr_t item_offset;
685 void __rseq_percpu *addr;
686
687 pthread_mutex_lock(&pool->lock);
688 /* Get first entry from free list. */
689 node = pool->free_list_head;
690 if (node != NULL) {
691 uintptr_t ptr = (uintptr_t) node;
692 void *range_base = (void *) (ptr & (~(pool->attr.stride - 1)));
693
694 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
695 /* Remove node from free list (update head). */
696 pool->free_list_head = node->next;
697 item_offset = (uintptr_t) ((void *) node - range_base);
698 rseq_percpu_check_poison_item(pool, range, item_offset);
699 addr = (void __rseq_percpu *) node;
700 goto end;
701 }
702 /*
703 * If the most recent range (first in list) does not have any
704 * room left, create a new range and prepend it to the list
705 * head.
706 */
707 range = pool->range_list;
708 if (range->next_unused + pool->item_len > pool->attr.stride) {
709 range = rseq_mempool_range_create(pool);
710 if (!range) {
711 errno = ENOMEM;
712 addr = NULL;
713 goto end;
714 }
715 /* Add range to head of list. */
716 range->next = pool->range_list;
717 pool->range_list = range;
718 }
719 /* First range in list has room left. */
720 item_offset = range->next_unused;
721 addr = (void __rseq_percpu *) (range->base + item_offset);
722 range->next_unused += pool->item_len;
723 end:
724 if (addr)
725 set_alloc_slot(pool, range, item_offset);
726 pthread_mutex_unlock(&pool->lock);
727 if (zeroed && addr)
728 rseq_percpu_zero_item(pool, range, item_offset);
729 return addr;
730 }
731
732 void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
733 {
734 return __rseq_percpu_malloc(pool, false);
735 }
736
737 void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
738 {
739 return __rseq_percpu_malloc(pool, true);
740 }
741
742 /* Always inline for __builtin_return_address(0). */
743 static inline __attribute__((always_inline))
744 void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
745 {
746 unsigned long *bitmap = range->alloc_bitmap;
747 size_t item_index = item_offset >> pool->item_order;
748 unsigned long mask;
749 size_t k;
750
751 if (!bitmap)
752 return;
753
754 k = item_index / BIT_PER_ULONG;
755 mask = 1ULL << (item_index % BIT_PER_ULONG);
756
757 /* Print error if bit is not set. */
758 if (!(bitmap[k] & mask)) {
759 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
760 __func__, get_pool_name(pool), pool, item_offset,
761 (void *) __builtin_return_address(0));
762 abort();
763 }
764 bitmap[k] &= ~mask;
765 }
766
767 void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
768 {
769 uintptr_t ptr = (uintptr_t) _ptr;
770 void *range_base = (void *) (ptr & (~(stride - 1)));
771 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
772 struct rseq_mempool *pool = range->pool;
773 uintptr_t item_offset = ptr & (stride - 1);
774 struct free_list_node *head, *item;
775
776 pthread_mutex_lock(&pool->lock);
777 clear_alloc_slot(pool, range, item_offset);
778 /* Add ptr to head of free list */
779 head = pool->free_list_head;
780 if (pool->attr.poison_set)
781 rseq_percpu_poison_item(pool, range, item_offset);
782 /* Free-list is in CPU 0 range. */
783 item = (struct free_list_node *) ptr;
784 /*
785 * Setting the next pointer will overwrite the first uintptr_t
786 * poison for CPU 0.
787 */
788 item->next = head;
789 pool->free_list_head = item;
790 pthread_mutex_unlock(&pool->lock);
791 }
792
793 struct rseq_mempool_set *rseq_mempool_set_create(void)
794 {
795 struct rseq_mempool_set *pool_set;
796
797 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
798 if (!pool_set)
799 return NULL;
800 pthread_mutex_init(&pool_set->lock, NULL);
801 return pool_set;
802 }
803
804 int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
805 {
806 int order, ret;
807
808 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
809 struct rseq_mempool *pool = pool_set->entries[order];
810
811 if (!pool)
812 continue;
813 ret = rseq_mempool_destroy(pool);
814 if (ret)
815 return ret;
816 pool_set->entries[order] = NULL;
817 }
818 pthread_mutex_destroy(&pool_set->lock);
819 free(pool_set);
820 return 0;
821 }
822
823 /* Ownership of pool is handed over to pool set on success. */
824 int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
825 {
826 size_t item_order = pool->item_order;
827 int ret = 0;
828
829 pthread_mutex_lock(&pool_set->lock);
830 if (pool_set->entries[item_order]) {
831 errno = EBUSY;
832 ret = -1;
833 goto end;
834 }
835 pool_set->entries[pool->item_order] = pool;
836 end:
837 pthread_mutex_unlock(&pool_set->lock);
838 return ret;
839 }
840
841 static
842 void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
843 {
844 int order, min_order = POOL_SET_MIN_ENTRY;
845 struct rseq_mempool *pool;
846 void __rseq_percpu *addr;
847
848 order = rseq_get_count_order_ulong(len);
849 if (order > POOL_SET_MIN_ENTRY)
850 min_order = order;
851 again:
852 pthread_mutex_lock(&pool_set->lock);
853 /* First smallest present pool where @len fits. */
854 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
855 pool = pool_set->entries[order];
856
857 if (!pool)
858 continue;
859 if (pool->item_len >= len)
860 goto found;
861 }
862 pool = NULL;
863 found:
864 pthread_mutex_unlock(&pool_set->lock);
865 if (pool) {
866 addr = __rseq_percpu_malloc(pool, zeroed);
867 if (addr == NULL && errno == ENOMEM) {
868 /*
869 * If the allocation failed, try again with a
870 * larger pool.
871 */
872 min_order = order + 1;
873 goto again;
874 }
875 } else {
876 /* Not found. */
877 errno = ENOMEM;
878 addr = NULL;
879 }
880 return addr;
881 }
882
883 void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
884 {
885 return __rseq_mempool_set_malloc(pool_set, len, false);
886 }
887
888 void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
889 {
890 return __rseq_mempool_set_malloc(pool_set, len, true);
891 }
892
893 struct rseq_mempool_attr *rseq_mempool_attr_create(void)
894 {
895 return calloc(1, sizeof(struct rseq_mempool_attr));
896 }
897
898 void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
899 {
900 free(attr);
901 }
902
903 int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
904 void *(*mmap_func)(void *priv, size_t len),
905 int (*munmap_func)(void *priv, void *ptr, size_t len),
906 void *mmap_priv)
907 {
908 if (!attr) {
909 errno = EINVAL;
910 return -1;
911 }
912 attr->mmap_set = true;
913 attr->mmap_func = mmap_func;
914 attr->munmap_func = munmap_func;
915 attr->mmap_priv = mmap_priv;
916 return 0;
917 }
918
919 int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
920 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
921 void *init_priv)
922 {
923 if (!attr) {
924 errno = EINVAL;
925 return -1;
926 }
927 attr->init_set = true;
928 attr->init_func = init_func;
929 attr->init_priv = init_priv;
930 return 0;
931 }
932
933 int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
934 {
935 if (!attr) {
936 errno = EINVAL;
937 return -1;
938 }
939 attr->robust_set = true;
940 return 0;
941 }
942
943 int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
944 size_t stride, int max_nr_cpus)
945 {
946 if (!attr) {
947 errno = EINVAL;
948 return -1;
949 }
950 attr->type = MEMPOOL_TYPE_PERCPU;
951 attr->stride = stride;
952 attr->max_nr_cpus = max_nr_cpus;
953 return 0;
954 }
955
956 int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
957 size_t stride)
958 {
959 if (!attr) {
960 errno = EINVAL;
961 return -1;
962 }
963 attr->type = MEMPOOL_TYPE_GLOBAL;
964 attr->stride = stride;
965 attr->max_nr_cpus = 0;
966 return 0;
967 }
968
969 int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
970 unsigned long max_nr_ranges)
971 {
972 if (!attr) {
973 errno = EINVAL;
974 return -1;
975 }
976 attr->max_nr_ranges = max_nr_ranges;
977 return 0;
978 }
979
980 int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
981 uintptr_t poison)
982 {
983 if (!attr) {
984 errno = EINVAL;
985 return -1;
986 }
987 attr->poison_set = true;
988 attr->poison = poison;
989 return 0;
990 }
991
992 int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
993 {
994 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
995 errno = EINVAL;
996 return -1;
997 }
998 return mempool->attr.max_nr_cpus;
999 }
This page took 0.051554 seconds and 4 git commands to generate.