mempool: Use default poison value when robust is set
[librseq.git] / src / rseq-mempool.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
4 #include <rseq/mempool.h>
5 #include <sys/mman.h>
6 #include <assert.h>
7 #include <string.h>
8 #include <pthread.h>
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <rseq/compiler.h>
12 #include <errno.h>
13 #include <stdint.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16
17 #ifdef HAVE_LIBNUMA
18 # include <numa.h>
19 # include <numaif.h>
20 #endif
21
22 #include "rseq-utils.h"
23 #include <rseq/rseq.h>
24
25 /*
26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
27 *
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
36 */
37
38 #define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
39
40 /*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
43 #if RSEQ_BITS_PER_LONG == 64
44 # define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45 #else
46 # define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47 #endif
48
49 #define BIT_PER_ULONG (8 * sizeof(unsigned long))
50
51 #define MOVE_PAGES_BATCH_SIZE 4096
52
53 #define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
54
55 #if RSEQ_BITS_PER_LONG == 64
56 # define DEFAULT_POISON_VALUE 0x5555555555555555ULL
57 #else
58 # define DEFAULT_POISON_VALUE 0x55555555UL
59 #endif
60
61 struct free_list_node;
62
63 struct free_list_node {
64 struct free_list_node *next;
65 };
66
67 enum mempool_type {
68 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
69 MEMPOOL_TYPE_PERCPU = 1,
70 };
71
72 struct rseq_mempool_attr {
73 bool mmap_set;
74 void *(*mmap_func)(void *priv, size_t len);
75 int (*munmap_func)(void *priv, void *ptr, size_t len);
76 void *mmap_priv;
77
78 bool init_set;
79 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
80 void *init_priv;
81
82 bool robust_set;
83
84 enum mempool_type type;
85 size_t stride;
86 int max_nr_cpus;
87
88 unsigned long max_nr_ranges;
89
90 bool poison_set;
91 uintptr_t poison;
92 };
93
94 struct rseq_mempool_range;
95
96 struct rseq_mempool_range {
97 struct rseq_mempool_range *next; /* Linked list of ranges. */
98 struct rseq_mempool *pool; /* Backward reference to container pool. */
99 void *header;
100 void *base;
101 size_t next_unused;
102 /* Track alloc/free. */
103 unsigned long *alloc_bitmap;
104 };
105
106 struct rseq_mempool {
107 /* Head of ranges linked-list. */
108 struct rseq_mempool_range *range_list;
109 unsigned long nr_ranges;
110
111 size_t item_len;
112 int item_order;
113
114 /*
115 * The free list chains freed items on the CPU 0 address range.
116 * We should rethink this decision if false sharing between
117 * malloc/free from other CPUs and data accesses from CPU 0
118 * becomes an issue. This is a NULL-terminated singly-linked
119 * list.
120 */
121 struct free_list_node *free_list_head;
122
123 /* This lock protects allocation/free within the pool. */
124 pthread_mutex_t lock;
125
126 struct rseq_mempool_attr attr;
127 char *name;
128 };
129
130 /*
131 * Pool set entries are indexed by item_len rounded to the next power of
132 * 2. A pool set can contain NULL pool entries, in which case the next
133 * large enough entry will be used for allocation.
134 */
135 struct rseq_mempool_set {
136 /* This lock protects add vs malloc/zmalloc within the pool set. */
137 pthread_mutex_t lock;
138 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
139 };
140
141 static
142 const char *get_pool_name(const struct rseq_mempool *pool)
143 {
144 return pool->name ? : "<anonymous>";
145 }
146
147 static
148 void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
149 uintptr_t item_offset, size_t stride)
150 {
151 return range->base + (stride * cpu) + item_offset;
152 }
153
154 static
155 void rseq_percpu_zero_item(struct rseq_mempool *pool,
156 struct rseq_mempool_range *range, uintptr_t item_offset)
157 {
158 int i;
159
160 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
161 char *p = __rseq_pool_range_percpu_ptr(range, i,
162 item_offset, pool->attr.stride);
163 memset(p, 0, pool->item_len);
164 }
165 }
166
167 static
168 void rseq_percpu_poison_item(struct rseq_mempool *pool,
169 struct rseq_mempool_range *range, uintptr_t item_offset)
170 {
171 uintptr_t poison = pool->attr.poison;
172 int i;
173
174 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
175 char *p = __rseq_pool_range_percpu_ptr(range, i,
176 item_offset, pool->attr.stride);
177 size_t offset;
178
179 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t))
180 *((uintptr_t *) (p + offset)) = poison;
181 }
182 }
183
184 /* Always inline for __builtin_return_address(0). */
185 static inline __attribute__((always_inline))
186 void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
187 const struct rseq_mempool_range *range, uintptr_t item_offset)
188 {
189 uintptr_t poison = pool->attr.poison;
190 int i;
191
192 if (!pool->attr.robust_set)
193 return;
194 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
195 char *p = __rseq_pool_range_percpu_ptr(range, i,
196 item_offset, pool->attr.stride);
197 size_t offset;
198
199 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t)) {
200 uintptr_t v;
201
202 /* Skip poison check for free-list pointer. */
203 if (i == 0 && offset == 0)
204 continue;
205 v = *((uintptr_t *) (p + offset));
206 if (v != poison) {
207 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
208 __func__, (unsigned long) v, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
209 abort();
210 }
211 }
212 }
213 }
214
215 #ifdef HAVE_LIBNUMA
216 int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
217 {
218 unsigned long nr_pages, page_len;
219 int status[MOVE_PAGES_BATCH_SIZE];
220 int nodes[MOVE_PAGES_BATCH_SIZE];
221 void *pages[MOVE_PAGES_BATCH_SIZE];
222 long ret;
223
224 if (!numa_flags) {
225 errno = EINVAL;
226 return -1;
227 }
228 page_len = rseq_get_page_len();
229 nr_pages = len >> rseq_get_count_order_ulong(page_len);
230
231 nodes[0] = numa_node_of_cpu(cpu);
232 if (nodes[0] < 0)
233 return -1;
234
235 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
236 nodes[k] = nodes[0];
237 }
238
239 for (unsigned long page = 0; page < nr_pages;) {
240
241 size_t max_k = RSEQ_ARRAY_SIZE(pages);
242 size_t left = nr_pages - page;
243
244 if (left < max_k) {
245 max_k = left;
246 }
247
248 for (size_t k = 0; k < max_k; ++k, ++page) {
249 pages[k] = addr + (page * page_len);
250 status[k] = -EPERM;
251 }
252
253 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
254
255 if (ret < 0)
256 return ret;
257
258 if (ret > 0) {
259 fprintf(stderr, "%lu pages were not migrated\n", ret);
260 for (size_t k = 0; k < max_k; ++k) {
261 if (status[k] < 0)
262 fprintf(stderr,
263 "Error while moving page %p to numa node %d: %u\n",
264 pages[k], nodes[k], -status[k]);
265 }
266 }
267 }
268 return 0;
269 }
270 #else
271 int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
272 size_t len __attribute__((unused)),
273 int cpu __attribute__((unused)),
274 int numa_flags __attribute__((unused)))
275 {
276 errno = ENOSYS;
277 return -1;
278 }
279 #endif
280
281 static
282 void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
283 {
284 void *base;
285
286 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
287 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
288 if (base == MAP_FAILED)
289 return NULL;
290 return base;
291 }
292
293 static
294 int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
295 {
296 return munmap(ptr, len);
297 }
298
299 static
300 int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
301 {
302 size_t count;
303
304 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
305
306 /*
307 * Not being able to create the validation bitmap is an error
308 * that needs to be reported.
309 */
310 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
311 if (!range->alloc_bitmap)
312 return -1;
313 return 0;
314 }
315
316 static
317 bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
318 {
319 struct rseq_mempool_range *range;
320
321 for (range = pool->range_list; range; range = range->next) {
322 if (addr >= range->base && addr < range->base + range->next_unused)
323 return true;
324 }
325 return false;
326 }
327
328 /* Always inline for __builtin_return_address(0). */
329 static inline __attribute__((always_inline))
330 void check_free_list(const struct rseq_mempool *pool)
331 {
332 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
333 max_list_traversal = 0, traversal_iteration = 0;
334 struct rseq_mempool_range *range;
335
336 if (!pool->attr.robust_set)
337 return;
338
339 for (range = pool->range_list; range; range = range->next) {
340 total_item += pool->attr.stride >> pool->item_order;
341 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
342 }
343 max_list_traversal = total_item - total_never_allocated;
344
345 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
346 node;
347 prev = node,
348 node = node->next) {
349
350 void *node_addr = node;
351
352 if (traversal_iteration >= max_list_traversal) {
353 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
354 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
355 abort();
356 }
357
358 /* Node is out of range. */
359 if (!addr_in_pool(pool, node_addr)) {
360 if (prev)
361 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
362 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
363 else
364 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
365 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
366 abort();
367 }
368
369 traversal_iteration++;
370 total_freed++;
371 }
372
373 if (total_never_allocated + total_freed != total_item) {
374 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
375 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
376 abort();
377 }
378 }
379
380 /* Always inline for __builtin_return_address(0). */
381 static inline __attribute__((always_inline))
382 void check_range_poison(const struct rseq_mempool *pool,
383 const struct rseq_mempool_range *range)
384 {
385 size_t item_offset;
386
387 for (item_offset = 0; item_offset < range->next_unused;
388 item_offset += pool->item_len)
389 rseq_percpu_check_poison_item(pool, range, item_offset);
390 }
391
392 /* Always inline for __builtin_return_address(0). */
393 static inline __attribute__((always_inline))
394 void check_pool_poison(const struct rseq_mempool *pool)
395 {
396 struct rseq_mempool_range *range;
397
398 if (!pool->attr.robust_set)
399 return;
400 for (range = pool->range_list; range; range = range->next)
401 check_range_poison(pool, range);
402 }
403
404 /* Always inline for __builtin_return_address(0). */
405 static inline __attribute__((always_inline))
406 void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
407 {
408 unsigned long *bitmap = range->alloc_bitmap;
409 size_t count, total_leaks = 0;
410
411 if (!bitmap)
412 return;
413
414 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
415
416 /* Assert that all items in the pool were freed. */
417 for (size_t k = 0; k < count; ++k)
418 total_leaks += rseq_hweight_ulong(bitmap[k]);
419 if (total_leaks) {
420 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
421 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
422 abort();
423 }
424
425 free(bitmap);
426 }
427
428 /* Always inline for __builtin_return_address(0). */
429 static inline __attribute__((always_inline))
430 int rseq_mempool_range_destroy(struct rseq_mempool *pool,
431 struct rseq_mempool_range *range)
432 {
433 destroy_alloc_bitmap(pool, range);
434 /* range is a header located one page before the aligned mapping. */
435 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
436 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
437 }
438
439 /*
440 * Allocate a memory mapping aligned on @alignment, with an optional
441 * @pre_header before the mapping.
442 */
443 static
444 void *aligned_mmap_anonymous(struct rseq_mempool *pool,
445 size_t page_size, size_t len, size_t alignment,
446 void **pre_header, size_t pre_header_len)
447 {
448 size_t minimum_page_count, page_count, extra, total_allocate = 0;
449 int page_order;
450 void *ptr;
451
452 if (len < page_size || alignment < page_size ||
453 !is_pow2(alignment) || (len & (alignment - 1))) {
454 errno = EINVAL;
455 return NULL;
456 }
457 page_order = rseq_get_count_order_ulong(page_size);
458 if (page_order < 0) {
459 errno = EINVAL;
460 return NULL;
461 }
462 if (pre_header_len && (pre_header_len & (page_size - 1))) {
463 errno = EINVAL;
464 return NULL;
465 }
466
467 minimum_page_count = (pre_header_len + len) >> page_order;
468 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
469
470 assert(page_count >= minimum_page_count);
471
472 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
473 if (!ptr)
474 goto alloc_error;
475
476 total_allocate = page_count << page_order;
477
478 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
479 /* Pointer is already aligned. ptr points to pre_header. */
480 goto out;
481 }
482
483 /* Unmap extra before. */
484 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
485 assert(!(extra & (page_size - 1)));
486 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
487 perror("munmap");
488 abort();
489 }
490 total_allocate -= extra;
491 ptr += extra; /* ptr points to pre_header */
492 page_count -= extra >> page_order;
493 out:
494 assert(page_count >= minimum_page_count);
495
496 if (page_count > minimum_page_count) {
497 void *extra_ptr;
498
499 /* Unmap extra after. */
500 extra_ptr = ptr + (minimum_page_count << page_order);
501 extra = (page_count - minimum_page_count) << page_order;
502 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
503 perror("munmap");
504 abort();
505 }
506 total_allocate -= extra;
507 }
508
509 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
510 assert(total_allocate == len + pre_header_len);
511
512 alloc_error:
513 if (ptr) {
514 if (pre_header)
515 *pre_header = ptr;
516 ptr += pre_header_len;
517 }
518 return ptr;
519 }
520
521 static
522 struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
523 {
524 struct rseq_mempool_range *range;
525 unsigned long page_size;
526 void *header;
527 void *base;
528
529 if (pool->attr.max_nr_ranges &&
530 pool->nr_ranges >= pool->attr.max_nr_ranges) {
531 errno = ENOMEM;
532 return NULL;
533 }
534 page_size = rseq_get_page_len();
535
536 base = aligned_mmap_anonymous(pool, page_size,
537 pool->attr.stride * pool->attr.max_nr_cpus,
538 pool->attr.stride,
539 &header, page_size);
540 if (!base)
541 return NULL;
542 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
543 range->pool = pool;
544 range->base = base;
545 range->header = header;
546 if (pool->attr.robust_set) {
547 if (create_alloc_bitmap(pool, range))
548 goto error_alloc;
549 }
550 if (pool->attr.init_set) {
551 switch (pool->attr.type) {
552 case MEMPOOL_TYPE_GLOBAL:
553 if (pool->attr.init_func(pool->attr.init_priv,
554 base, pool->attr.stride, -1)) {
555 goto error_alloc;
556 }
557 break;
558 case MEMPOOL_TYPE_PERCPU:
559 {
560 int cpu;
561 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
562 if (pool->attr.init_func(pool->attr.init_priv,
563 base + (pool->attr.stride * cpu),
564 pool->attr.stride, cpu)) {
565 goto error_alloc;
566 }
567 }
568 break;
569 }
570 default:
571 abort();
572 }
573 }
574 pool->nr_ranges++;
575 return range;
576
577 error_alloc:
578 (void) rseq_mempool_range_destroy(pool, range);
579 return NULL;
580 }
581
582 int rseq_mempool_destroy(struct rseq_mempool *pool)
583 {
584 struct rseq_mempool_range *range, *next_range;
585 int ret = 0;
586
587 if (!pool)
588 return 0;
589 check_free_list(pool);
590 check_pool_poison(pool);
591 /* Iteration safe against removal. */
592 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
593 if (rseq_mempool_range_destroy(pool, range))
594 goto end;
595 /* Update list head to keep list coherent in case of partial failure. */
596 pool->range_list = next_range;
597 }
598 pthread_mutex_destroy(&pool->lock);
599 free(pool->name);
600 memset(pool, 0, sizeof(*pool));
601 end:
602 return ret;
603 }
604
605 struct rseq_mempool *rseq_mempool_create(const char *pool_name,
606 size_t item_len, const struct rseq_mempool_attr *_attr)
607 {
608 struct rseq_mempool *pool;
609 struct rseq_mempool_attr attr = {};
610 int order;
611
612 /* Make sure each item is large enough to contain free list pointers. */
613 if (item_len < sizeof(void *))
614 item_len = sizeof(void *);
615
616 /* Align item_len on next power of two. */
617 order = rseq_get_count_order_ulong(item_len);
618 if (order < 0) {
619 errno = EINVAL;
620 return NULL;
621 }
622 item_len = 1UL << order;
623
624 if (_attr)
625 memcpy(&attr, _attr, sizeof(attr));
626 if (!attr.mmap_set) {
627 attr.mmap_func = default_mmap_func;
628 attr.munmap_func = default_munmap_func;
629 attr.mmap_priv = NULL;
630 }
631
632 switch (attr.type) {
633 case MEMPOOL_TYPE_PERCPU:
634 if (attr.max_nr_cpus < 0) {
635 errno = EINVAL;
636 return NULL;
637 }
638 if (attr.max_nr_cpus == 0) {
639 /* Auto-detect */
640 attr.max_nr_cpus = rseq_get_max_nr_cpus();
641 if (attr.max_nr_cpus == 0) {
642 errno = EINVAL;
643 return NULL;
644 }
645 }
646 break;
647 case MEMPOOL_TYPE_GLOBAL:
648 /* Use a 1-cpu pool for global mempool type. */
649 attr.max_nr_cpus = 1;
650 break;
651 }
652 if (!attr.stride)
653 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
654 if (attr.robust_set && !attr.poison_set) {
655 attr.poison_set = true;
656 attr.poison = DEFAULT_POISON_VALUE;
657 }
658 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
659 !is_pow2(attr.stride)) {
660 errno = EINVAL;
661 return NULL;
662 }
663
664 pool = calloc(1, sizeof(struct rseq_mempool));
665 if (!pool)
666 return NULL;
667
668 memcpy(&pool->attr, &attr, sizeof(attr));
669 pthread_mutex_init(&pool->lock, NULL);
670 pool->item_len = item_len;
671 pool->item_order = order;
672
673 pool->range_list = rseq_mempool_range_create(pool);
674 if (!pool->range_list)
675 goto error_alloc;
676
677 if (pool_name) {
678 pool->name = strdup(pool_name);
679 if (!pool->name)
680 goto error_alloc;
681 }
682 return pool;
683
684 error_alloc:
685 rseq_mempool_destroy(pool);
686 errno = ENOMEM;
687 return NULL;
688 }
689
690 /* Always inline for __builtin_return_address(0). */
691 static inline __attribute__((always_inline))
692 void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
693 {
694 unsigned long *bitmap = range->alloc_bitmap;
695 size_t item_index = item_offset >> pool->item_order;
696 unsigned long mask;
697 size_t k;
698
699 if (!bitmap)
700 return;
701
702 k = item_index / BIT_PER_ULONG;
703 mask = 1ULL << (item_index % BIT_PER_ULONG);
704
705 /* Print error if bit is already set. */
706 if (bitmap[k] & mask) {
707 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
708 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
709 abort();
710 }
711 bitmap[k] |= mask;
712 }
713
714 static
715 void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
716 {
717 struct rseq_mempool_range *range;
718 struct free_list_node *node;
719 uintptr_t item_offset;
720 void __rseq_percpu *addr;
721
722 pthread_mutex_lock(&pool->lock);
723 /* Get first entry from free list. */
724 node = pool->free_list_head;
725 if (node != NULL) {
726 uintptr_t ptr = (uintptr_t) node;
727 void *range_base = (void *) (ptr & (~(pool->attr.stride - 1)));
728
729 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
730 /* Remove node from free list (update head). */
731 pool->free_list_head = node->next;
732 item_offset = (uintptr_t) ((void *) node - range_base);
733 rseq_percpu_check_poison_item(pool, range, item_offset);
734 addr = (void __rseq_percpu *) node;
735 goto end;
736 }
737 /*
738 * If the most recent range (first in list) does not have any
739 * room left, create a new range and prepend it to the list
740 * head.
741 */
742 range = pool->range_list;
743 if (range->next_unused + pool->item_len > pool->attr.stride) {
744 range = rseq_mempool_range_create(pool);
745 if (!range) {
746 errno = ENOMEM;
747 addr = NULL;
748 goto end;
749 }
750 /* Add range to head of list. */
751 range->next = pool->range_list;
752 pool->range_list = range;
753 }
754 /* First range in list has room left. */
755 item_offset = range->next_unused;
756 addr = (void __rseq_percpu *) (range->base + item_offset);
757 range->next_unused += pool->item_len;
758 end:
759 if (addr)
760 set_alloc_slot(pool, range, item_offset);
761 pthread_mutex_unlock(&pool->lock);
762 if (zeroed && addr)
763 rseq_percpu_zero_item(pool, range, item_offset);
764 return addr;
765 }
766
767 void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
768 {
769 return __rseq_percpu_malloc(pool, false);
770 }
771
772 void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
773 {
774 return __rseq_percpu_malloc(pool, true);
775 }
776
777 /* Always inline for __builtin_return_address(0). */
778 static inline __attribute__((always_inline))
779 void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
780 {
781 unsigned long *bitmap = range->alloc_bitmap;
782 size_t item_index = item_offset >> pool->item_order;
783 unsigned long mask;
784 size_t k;
785
786 if (!bitmap)
787 return;
788
789 k = item_index / BIT_PER_ULONG;
790 mask = 1ULL << (item_index % BIT_PER_ULONG);
791
792 /* Print error if bit is not set. */
793 if (!(bitmap[k] & mask)) {
794 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
795 __func__, get_pool_name(pool), pool, item_offset,
796 (void *) __builtin_return_address(0));
797 abort();
798 }
799 bitmap[k] &= ~mask;
800 }
801
802 void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
803 {
804 uintptr_t ptr = (uintptr_t) _ptr;
805 void *range_base = (void *) (ptr & (~(stride - 1)));
806 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
807 struct rseq_mempool *pool = range->pool;
808 uintptr_t item_offset = ptr & (stride - 1);
809 struct free_list_node *head, *item;
810
811 pthread_mutex_lock(&pool->lock);
812 clear_alloc_slot(pool, range, item_offset);
813 /* Add ptr to head of free list */
814 head = pool->free_list_head;
815 if (pool->attr.poison_set)
816 rseq_percpu_poison_item(pool, range, item_offset);
817 /* Free-list is in CPU 0 range. */
818 item = (struct free_list_node *) ptr;
819 /*
820 * Setting the next pointer will overwrite the first uintptr_t
821 * poison for CPU 0.
822 */
823 item->next = head;
824 pool->free_list_head = item;
825 pthread_mutex_unlock(&pool->lock);
826 }
827
828 struct rseq_mempool_set *rseq_mempool_set_create(void)
829 {
830 struct rseq_mempool_set *pool_set;
831
832 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
833 if (!pool_set)
834 return NULL;
835 pthread_mutex_init(&pool_set->lock, NULL);
836 return pool_set;
837 }
838
839 int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
840 {
841 int order, ret;
842
843 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
844 struct rseq_mempool *pool = pool_set->entries[order];
845
846 if (!pool)
847 continue;
848 ret = rseq_mempool_destroy(pool);
849 if (ret)
850 return ret;
851 pool_set->entries[order] = NULL;
852 }
853 pthread_mutex_destroy(&pool_set->lock);
854 free(pool_set);
855 return 0;
856 }
857
858 /* Ownership of pool is handed over to pool set on success. */
859 int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
860 {
861 size_t item_order = pool->item_order;
862 int ret = 0;
863
864 pthread_mutex_lock(&pool_set->lock);
865 if (pool_set->entries[item_order]) {
866 errno = EBUSY;
867 ret = -1;
868 goto end;
869 }
870 pool_set->entries[pool->item_order] = pool;
871 end:
872 pthread_mutex_unlock(&pool_set->lock);
873 return ret;
874 }
875
876 static
877 void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
878 {
879 int order, min_order = POOL_SET_MIN_ENTRY;
880 struct rseq_mempool *pool;
881 void __rseq_percpu *addr;
882
883 order = rseq_get_count_order_ulong(len);
884 if (order > POOL_SET_MIN_ENTRY)
885 min_order = order;
886 again:
887 pthread_mutex_lock(&pool_set->lock);
888 /* First smallest present pool where @len fits. */
889 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
890 pool = pool_set->entries[order];
891
892 if (!pool)
893 continue;
894 if (pool->item_len >= len)
895 goto found;
896 }
897 pool = NULL;
898 found:
899 pthread_mutex_unlock(&pool_set->lock);
900 if (pool) {
901 addr = __rseq_percpu_malloc(pool, zeroed);
902 if (addr == NULL && errno == ENOMEM) {
903 /*
904 * If the allocation failed, try again with a
905 * larger pool.
906 */
907 min_order = order + 1;
908 goto again;
909 }
910 } else {
911 /* Not found. */
912 errno = ENOMEM;
913 addr = NULL;
914 }
915 return addr;
916 }
917
918 void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
919 {
920 return __rseq_mempool_set_malloc(pool_set, len, false);
921 }
922
923 void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
924 {
925 return __rseq_mempool_set_malloc(pool_set, len, true);
926 }
927
928 struct rseq_mempool_attr *rseq_mempool_attr_create(void)
929 {
930 return calloc(1, sizeof(struct rseq_mempool_attr));
931 }
932
933 void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
934 {
935 free(attr);
936 }
937
938 int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
939 void *(*mmap_func)(void *priv, size_t len),
940 int (*munmap_func)(void *priv, void *ptr, size_t len),
941 void *mmap_priv)
942 {
943 if (!attr) {
944 errno = EINVAL;
945 return -1;
946 }
947 attr->mmap_set = true;
948 attr->mmap_func = mmap_func;
949 attr->munmap_func = munmap_func;
950 attr->mmap_priv = mmap_priv;
951 return 0;
952 }
953
954 int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
955 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
956 void *init_priv)
957 {
958 if (!attr) {
959 errno = EINVAL;
960 return -1;
961 }
962 attr->init_set = true;
963 attr->init_func = init_func;
964 attr->init_priv = init_priv;
965 return 0;
966 }
967
968 int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
969 {
970 if (!attr) {
971 errno = EINVAL;
972 return -1;
973 }
974 attr->robust_set = true;
975 return 0;
976 }
977
978 int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
979 size_t stride, int max_nr_cpus)
980 {
981 if (!attr) {
982 errno = EINVAL;
983 return -1;
984 }
985 attr->type = MEMPOOL_TYPE_PERCPU;
986 attr->stride = stride;
987 attr->max_nr_cpus = max_nr_cpus;
988 return 0;
989 }
990
991 int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
992 size_t stride)
993 {
994 if (!attr) {
995 errno = EINVAL;
996 return -1;
997 }
998 attr->type = MEMPOOL_TYPE_GLOBAL;
999 attr->stride = stride;
1000 attr->max_nr_cpus = 0;
1001 return 0;
1002 }
1003
1004 int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
1005 unsigned long max_nr_ranges)
1006 {
1007 if (!attr) {
1008 errno = EINVAL;
1009 return -1;
1010 }
1011 attr->max_nr_ranges = max_nr_ranges;
1012 return 0;
1013 }
1014
1015 int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
1016 uintptr_t poison)
1017 {
1018 if (!attr) {
1019 errno = EINVAL;
1020 return -1;
1021 }
1022 attr->poison_set = true;
1023 attr->poison = poison;
1024 return 0;
1025 }
1026
1027 int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
1028 {
1029 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
1030 errno = EINVAL;
1031 return -1;
1032 }
1033 return mempool->attr.max_nr_cpus;
1034 }
This page took 0.092395 seconds and 4 git commands to generate.