a68d1f887cb3c41e494eaee31c831d4b02ab21b6
[librseq.git] / src / rseq-mempool.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
4 #include <rseq/mempool.h>
5 #include <sys/mman.h>
6 #include <assert.h>
7 #include <string.h>
8 #include <pthread.h>
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <rseq/compiler.h>
12 #include <errno.h>
13 #include <stdint.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16
17 #ifdef HAVE_LIBNUMA
18 # include <numa.h>
19 # include <numaif.h>
20 #endif
21
22 #include "rseq-utils.h"
23 #include <rseq/rseq.h>
24
25 /*
26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
27 *
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
36 */
37
38 #define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
39
40 /*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
43 #if RSEQ_BITS_PER_LONG == 64
44 # define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45 #else
46 # define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47 #endif
48
49 #define BIT_PER_ULONG (8 * sizeof(unsigned long))
50
51 #define MOVE_PAGES_BATCH_SIZE 4096
52
53 #define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
54
55 struct free_list_node;
56
57 struct free_list_node {
58 struct free_list_node *next;
59 };
60
61 enum mempool_type {
62 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
63 MEMPOOL_TYPE_PERCPU = 1,
64 };
65
66 struct rseq_mempool_attr {
67 bool mmap_set;
68 void *(*mmap_func)(void *priv, size_t len);
69 int (*munmap_func)(void *priv, void *ptr, size_t len);
70 void *mmap_priv;
71
72 bool init_set;
73 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
74 void *init_priv;
75
76 bool robust_set;
77
78 enum mempool_type type;
79 size_t stride;
80 int max_nr_cpus;
81
82 unsigned long max_nr_ranges;
83
84 bool poison_set;
85 uintptr_t poison;
86 };
87
88 struct rseq_mempool_range;
89
90 struct rseq_mempool_range {
91 struct rseq_mempool_range *next; /* Linked list of ranges. */
92 struct rseq_mempool *pool; /* Backward reference to container pool. */
93 void *header;
94 void *base;
95 size_t next_unused;
96 /* Track alloc/free. */
97 unsigned long *alloc_bitmap;
98 };
99
100 struct rseq_mempool {
101 /* Head of ranges linked-list. */
102 struct rseq_mempool_range *range_list;
103 unsigned long nr_ranges;
104
105 size_t item_len;
106 int item_order;
107
108 /*
109 * The free list chains freed items on the CPU 0 address range.
110 * We should rethink this decision if false sharing between
111 * malloc/free from other CPUs and data accesses from CPU 0
112 * becomes an issue. This is a NULL-terminated singly-linked
113 * list.
114 */
115 struct free_list_node *free_list_head;
116
117 /* This lock protects allocation/free within the pool. */
118 pthread_mutex_t lock;
119
120 struct rseq_mempool_attr attr;
121 char *name;
122 };
123
124 /*
125 * Pool set entries are indexed by item_len rounded to the next power of
126 * 2. A pool set can contain NULL pool entries, in which case the next
127 * large enough entry will be used for allocation.
128 */
129 struct rseq_mempool_set {
130 /* This lock protects add vs malloc/zmalloc within the pool set. */
131 pthread_mutex_t lock;
132 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
133 };
134
135 static
136 void *__rseq_pool_range_percpu_ptr(struct rseq_mempool_range *range, int cpu,
137 uintptr_t item_offset, size_t stride)
138 {
139 return range->base + (stride * cpu) + item_offset;
140 }
141
142 static
143 void rseq_percpu_zero_item(struct rseq_mempool *pool,
144 struct rseq_mempool_range *range, uintptr_t item_offset)
145 {
146 int i;
147
148 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
149 char *p = __rseq_pool_range_percpu_ptr(range, i,
150 item_offset, pool->attr.stride);
151 memset(p, 0, pool->item_len);
152 }
153 }
154
155 static
156 void rseq_percpu_poison_item(struct rseq_mempool *pool,
157 struct rseq_mempool_range *range, uintptr_t item_offset)
158 {
159 uintptr_t poison = pool->attr.poison;
160 int i;
161
162 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
163 char *p = __rseq_pool_range_percpu_ptr(range, i,
164 item_offset, pool->attr.stride);
165 size_t offset;
166
167 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t))
168 *((uintptr_t *) p) = poison;
169 }
170 }
171
172 #ifdef HAVE_LIBNUMA
173 int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
174 {
175 unsigned long nr_pages, page_len;
176 int status[MOVE_PAGES_BATCH_SIZE];
177 int nodes[MOVE_PAGES_BATCH_SIZE];
178 void *pages[MOVE_PAGES_BATCH_SIZE];
179 long ret;
180
181 if (!numa_flags) {
182 errno = EINVAL;
183 return -1;
184 }
185 page_len = rseq_get_page_len();
186 nr_pages = len >> rseq_get_count_order_ulong(page_len);
187
188 nodes[0] = numa_node_of_cpu(cpu);
189 if (nodes[0] < 0)
190 return -1;
191
192 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
193 nodes[k] = nodes[0];
194 }
195
196 for (unsigned long page = 0; page < nr_pages;) {
197
198 size_t max_k = RSEQ_ARRAY_SIZE(pages);
199 size_t left = nr_pages - page;
200
201 if (left < max_k) {
202 max_k = left;
203 }
204
205 for (size_t k = 0; k < max_k; ++k, ++page) {
206 pages[k] = addr + (page * page_len);
207 status[k] = -EPERM;
208 }
209
210 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
211
212 if (ret < 0)
213 return ret;
214
215 if (ret > 0) {
216 fprintf(stderr, "%lu pages were not migrated\n", ret);
217 for (size_t k = 0; k < max_k; ++k) {
218 if (status[k] < 0)
219 fprintf(stderr,
220 "Error while moving page %p to numa node %d: %u\n",
221 pages[k], nodes[k], -status[k]);
222 }
223 }
224 }
225 return 0;
226 }
227 #else
228 int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
229 size_t len __attribute__((unused)),
230 int cpu __attribute__((unused)),
231 int numa_flags __attribute__((unused)))
232 {
233 errno = ENOSYS;
234 return -1;
235 }
236 #endif
237
238 static
239 void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
240 {
241 void *base;
242
243 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
244 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
245 if (base == MAP_FAILED)
246 return NULL;
247 return base;
248 }
249
250 static
251 int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
252 {
253 return munmap(ptr, len);
254 }
255
256 static
257 int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
258 {
259 size_t count;
260
261 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
262
263 /*
264 * Not being able to create the validation bitmap is an error
265 * that needs to be reported.
266 */
267 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
268 if (!range->alloc_bitmap)
269 return -1;
270 return 0;
271 }
272
273 static
274 const char *get_pool_name(const struct rseq_mempool *pool)
275 {
276 return pool->name ? : "<anonymous>";
277 }
278
279 static
280 bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
281 {
282 struct rseq_mempool_range *range;
283
284 for (range = pool->range_list; range; range = range->next) {
285 if (addr >= range->base && addr < range->base + range->next_unused)
286 return true;
287 }
288 return false;
289 }
290
291 /* Always inline for __builtin_return_address(0). */
292 static inline __attribute__((always_inline))
293 void check_free_list(const struct rseq_mempool *pool)
294 {
295 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
296 max_list_traversal = 0, traversal_iteration = 0;
297 struct rseq_mempool_range *range;
298
299 if (!pool->attr.robust_set)
300 return;
301
302 for (range = pool->range_list; range; range = range->next) {
303 total_item += pool->attr.stride >> pool->item_order;
304 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
305 }
306 max_list_traversal = total_item - total_never_allocated;
307
308 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
309 node;
310 prev = node,
311 node = node->next) {
312
313 void *node_addr = node;
314
315 if (traversal_iteration >= max_list_traversal) {
316 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
317 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
318 abort();
319 }
320
321 /* Node is out of range. */
322 if (!addr_in_pool(pool, node_addr)) {
323 if (prev)
324 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
325 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
326 else
327 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
328 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
329 abort();
330 }
331
332 traversal_iteration++;
333 total_freed++;
334 }
335
336 if (total_never_allocated + total_freed != total_item) {
337 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
338 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
339 abort();
340 }
341 }
342
343 /* Always inline for __builtin_return_address(0). */
344 static inline __attribute__((always_inline))
345 void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
346 {
347 unsigned long *bitmap = range->alloc_bitmap;
348 size_t count, total_leaks = 0;
349
350 if (!bitmap)
351 return;
352
353 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
354
355 /* Assert that all items in the pool were freed. */
356 for (size_t k = 0; k < count; ++k)
357 total_leaks += rseq_hweight_ulong(bitmap[k]);
358 if (total_leaks) {
359 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
360 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
361 abort();
362 }
363
364 free(bitmap);
365 }
366
367 /* Always inline for __builtin_return_address(0). */
368 static inline __attribute__((always_inline))
369 int rseq_mempool_range_destroy(struct rseq_mempool *pool,
370 struct rseq_mempool_range *range)
371 {
372 destroy_alloc_bitmap(pool, range);
373 /* range is a header located one page before the aligned mapping. */
374 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
375 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
376 }
377
378 /*
379 * Allocate a memory mapping aligned on @alignment, with an optional
380 * @pre_header before the mapping.
381 */
382 static
383 void *aligned_mmap_anonymous(struct rseq_mempool *pool,
384 size_t page_size, size_t len, size_t alignment,
385 void **pre_header, size_t pre_header_len)
386 {
387 size_t minimum_page_count, page_count, extra, total_allocate = 0;
388 int page_order;
389 void *ptr;
390
391 if (len < page_size || alignment < page_size ||
392 !is_pow2(alignment) || (len & (alignment - 1))) {
393 errno = EINVAL;
394 return NULL;
395 }
396 page_order = rseq_get_count_order_ulong(page_size);
397 if (page_order < 0) {
398 errno = EINVAL;
399 return NULL;
400 }
401 if (pre_header_len && (pre_header_len & (page_size - 1))) {
402 errno = EINVAL;
403 return NULL;
404 }
405
406 minimum_page_count = (pre_header_len + len) >> page_order;
407 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
408
409 assert(page_count >= minimum_page_count);
410
411 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
412 if (!ptr)
413 goto alloc_error;
414
415 total_allocate = page_count << page_order;
416
417 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
418 /* Pointer is already aligned. ptr points to pre_header. */
419 goto out;
420 }
421
422 /* Unmap extra before. */
423 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
424 assert(!(extra & (page_size - 1)));
425 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
426 perror("munmap");
427 abort();
428 }
429 total_allocate -= extra;
430 ptr += extra; /* ptr points to pre_header */
431 page_count -= extra >> page_order;
432 out:
433 assert(page_count >= minimum_page_count);
434
435 if (page_count > minimum_page_count) {
436 void *extra_ptr;
437
438 /* Unmap extra after. */
439 extra_ptr = ptr + (minimum_page_count << page_order);
440 extra = (page_count - minimum_page_count) << page_order;
441 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
442 perror("munmap");
443 abort();
444 }
445 total_allocate -= extra;
446 }
447
448 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
449 assert(total_allocate == len + pre_header_len);
450
451 alloc_error:
452 if (ptr) {
453 if (pre_header)
454 *pre_header = ptr;
455 ptr += pre_header_len;
456 }
457 return ptr;
458 }
459
460 static
461 struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
462 {
463 struct rseq_mempool_range *range;
464 unsigned long page_size;
465 void *header;
466 void *base;
467
468 if (pool->attr.max_nr_ranges &&
469 pool->nr_ranges >= pool->attr.max_nr_ranges) {
470 errno = ENOMEM;
471 return NULL;
472 }
473 page_size = rseq_get_page_len();
474
475 base = aligned_mmap_anonymous(pool, page_size,
476 pool->attr.stride * pool->attr.max_nr_cpus,
477 pool->attr.stride,
478 &header, page_size);
479 if (!base)
480 return NULL;
481 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
482 range->pool = pool;
483 range->base = base;
484 range->header = header;
485 if (pool->attr.robust_set) {
486 if (create_alloc_bitmap(pool, range))
487 goto error_alloc;
488 }
489 if (pool->attr.init_set) {
490 switch (pool->attr.type) {
491 case MEMPOOL_TYPE_GLOBAL:
492 if (pool->attr.init_func(pool->attr.init_priv,
493 base, pool->attr.stride, -1)) {
494 goto error_alloc;
495 }
496 break;
497 case MEMPOOL_TYPE_PERCPU:
498 {
499 int cpu;
500 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
501 if (pool->attr.init_func(pool->attr.init_priv,
502 base + (pool->attr.stride * cpu),
503 pool->attr.stride, cpu)) {
504 goto error_alloc;
505 }
506 }
507 break;
508 }
509 default:
510 abort();
511 }
512 }
513 pool->nr_ranges++;
514 return range;
515
516 error_alloc:
517 (void) rseq_mempool_range_destroy(pool, range);
518 return NULL;
519 }
520
521 int rseq_mempool_destroy(struct rseq_mempool *pool)
522 {
523 struct rseq_mempool_range *range, *next_range;
524 int ret = 0;
525
526 if (!pool)
527 return 0;
528 check_free_list(pool);
529 /* Iteration safe against removal. */
530 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
531 if (rseq_mempool_range_destroy(pool, range))
532 goto end;
533 /* Update list head to keep list coherent in case of partial failure. */
534 pool->range_list = next_range;
535 }
536 pthread_mutex_destroy(&pool->lock);
537 free(pool->name);
538 memset(pool, 0, sizeof(*pool));
539 end:
540 return ret;
541 }
542
543 struct rseq_mempool *rseq_mempool_create(const char *pool_name,
544 size_t item_len, const struct rseq_mempool_attr *_attr)
545 {
546 struct rseq_mempool *pool;
547 struct rseq_mempool_attr attr = {};
548 int order;
549
550 /* Make sure each item is large enough to contain free list pointers. */
551 if (item_len < sizeof(void *))
552 item_len = sizeof(void *);
553
554 /* Align item_len on next power of two. */
555 order = rseq_get_count_order_ulong(item_len);
556 if (order < 0) {
557 errno = EINVAL;
558 return NULL;
559 }
560 item_len = 1UL << order;
561
562 if (_attr)
563 memcpy(&attr, _attr, sizeof(attr));
564 if (!attr.mmap_set) {
565 attr.mmap_func = default_mmap_func;
566 attr.munmap_func = default_munmap_func;
567 attr.mmap_priv = NULL;
568 }
569
570 switch (attr.type) {
571 case MEMPOOL_TYPE_PERCPU:
572 if (attr.max_nr_cpus < 0) {
573 errno = EINVAL;
574 return NULL;
575 }
576 if (attr.max_nr_cpus == 0) {
577 /* Auto-detect */
578 attr.max_nr_cpus = rseq_get_max_nr_cpus();
579 if (attr.max_nr_cpus == 0) {
580 errno = EINVAL;
581 return NULL;
582 }
583 }
584 break;
585 case MEMPOOL_TYPE_GLOBAL:
586 /* Use a 1-cpu pool for global mempool type. */
587 attr.max_nr_cpus = 1;
588 break;
589 }
590 if (!attr.stride)
591 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
592 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
593 !is_pow2(attr.stride)) {
594 errno = EINVAL;
595 return NULL;
596 }
597
598 pool = calloc(1, sizeof(struct rseq_mempool));
599 if (!pool)
600 return NULL;
601
602 memcpy(&pool->attr, &attr, sizeof(attr));
603 pthread_mutex_init(&pool->lock, NULL);
604 pool->item_len = item_len;
605 pool->item_order = order;
606
607 pool->range_list = rseq_mempool_range_create(pool);
608 if (!pool->range_list)
609 goto error_alloc;
610
611 if (pool_name) {
612 pool->name = strdup(pool_name);
613 if (!pool->name)
614 goto error_alloc;
615 }
616 return pool;
617
618 error_alloc:
619 rseq_mempool_destroy(pool);
620 errno = ENOMEM;
621 return NULL;
622 }
623
624 /* Always inline for __builtin_return_address(0). */
625 static inline __attribute__((always_inline))
626 void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
627 {
628 unsigned long *bitmap = range->alloc_bitmap;
629 size_t item_index = item_offset >> pool->item_order;
630 unsigned long mask;
631 size_t k;
632
633 if (!bitmap)
634 return;
635
636 k = item_index / BIT_PER_ULONG;
637 mask = 1ULL << (item_index % BIT_PER_ULONG);
638
639 /* Print error if bit is already set. */
640 if (bitmap[k] & mask) {
641 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
642 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
643 abort();
644 }
645 bitmap[k] |= mask;
646 }
647
648 static
649 void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
650 {
651 struct rseq_mempool_range *range;
652 struct free_list_node *node;
653 uintptr_t item_offset;
654 void __rseq_percpu *addr;
655
656 pthread_mutex_lock(&pool->lock);
657 /* Get first entry from free list. */
658 node = pool->free_list_head;
659 if (node != NULL) {
660 uintptr_t ptr = (uintptr_t) node;
661 void *range_base = (void *) (ptr & (~(pool->attr.stride - 1)));
662
663 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
664 /* Remove node from free list (update head). */
665 pool->free_list_head = node->next;
666 item_offset = (uintptr_t) ((void *) node - range_base);
667 addr = (void __rseq_percpu *) node;
668 goto end;
669 }
670 /*
671 * If the most recent range (first in list) does not have any
672 * room left, create a new range and prepend it to the list
673 * head.
674 */
675 range = pool->range_list;
676 if (range->next_unused + pool->item_len > pool->attr.stride) {
677 range = rseq_mempool_range_create(pool);
678 if (!range) {
679 errno = ENOMEM;
680 addr = NULL;
681 goto end;
682 }
683 /* Add range to head of list. */
684 range->next = pool->range_list;
685 pool->range_list = range;
686 }
687 /* First range in list has room left. */
688 item_offset = range->next_unused;
689 addr = (void __rseq_percpu *) (range->base + item_offset);
690 range->next_unused += pool->item_len;
691 end:
692 if (addr)
693 set_alloc_slot(pool, range, item_offset);
694 pthread_mutex_unlock(&pool->lock);
695 if (zeroed && addr)
696 rseq_percpu_zero_item(pool, range, item_offset);
697 return addr;
698 }
699
700 void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
701 {
702 return __rseq_percpu_malloc(pool, false);
703 }
704
705 void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
706 {
707 return __rseq_percpu_malloc(pool, true);
708 }
709
710 /* Always inline for __builtin_return_address(0). */
711 static inline __attribute__((always_inline))
712 void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
713 {
714 unsigned long *bitmap = range->alloc_bitmap;
715 size_t item_index = item_offset >> pool->item_order;
716 unsigned long mask;
717 size_t k;
718
719 if (!bitmap)
720 return;
721
722 k = item_index / BIT_PER_ULONG;
723 mask = 1ULL << (item_index % BIT_PER_ULONG);
724
725 /* Print error if bit is not set. */
726 if (!(bitmap[k] & mask)) {
727 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
728 __func__, get_pool_name(pool), pool, item_offset,
729 (void *) __builtin_return_address(0));
730 abort();
731 }
732 bitmap[k] &= ~mask;
733 }
734
735 void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
736 {
737 uintptr_t ptr = (uintptr_t) _ptr;
738 void *range_base = (void *) (ptr & (~(stride - 1)));
739 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
740 struct rseq_mempool *pool = range->pool;
741 uintptr_t item_offset = ptr & (stride - 1);
742 struct free_list_node *head, *item;
743
744 pthread_mutex_lock(&pool->lock);
745 clear_alloc_slot(pool, range, item_offset);
746 /* Add ptr to head of free list */
747 head = pool->free_list_head;
748 if (pool->attr.poison_set)
749 rseq_percpu_poison_item(pool, range, item_offset);
750 /* Free-list is in CPU 0 range. */
751 item = (struct free_list_node *) ptr;
752 /*
753 * Setting the next pointer will overwrite the first uintptr_t
754 * poison for CPU 0.
755 */
756 item->next = head;
757 pool->free_list_head = item;
758 pthread_mutex_unlock(&pool->lock);
759 }
760
761 struct rseq_mempool_set *rseq_mempool_set_create(void)
762 {
763 struct rseq_mempool_set *pool_set;
764
765 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
766 if (!pool_set)
767 return NULL;
768 pthread_mutex_init(&pool_set->lock, NULL);
769 return pool_set;
770 }
771
772 int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
773 {
774 int order, ret;
775
776 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
777 struct rseq_mempool *pool = pool_set->entries[order];
778
779 if (!pool)
780 continue;
781 ret = rseq_mempool_destroy(pool);
782 if (ret)
783 return ret;
784 pool_set->entries[order] = NULL;
785 }
786 pthread_mutex_destroy(&pool_set->lock);
787 free(pool_set);
788 return 0;
789 }
790
791 /* Ownership of pool is handed over to pool set on success. */
792 int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
793 {
794 size_t item_order = pool->item_order;
795 int ret = 0;
796
797 pthread_mutex_lock(&pool_set->lock);
798 if (pool_set->entries[item_order]) {
799 errno = EBUSY;
800 ret = -1;
801 goto end;
802 }
803 pool_set->entries[pool->item_order] = pool;
804 end:
805 pthread_mutex_unlock(&pool_set->lock);
806 return ret;
807 }
808
809 static
810 void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
811 {
812 int order, min_order = POOL_SET_MIN_ENTRY;
813 struct rseq_mempool *pool;
814 void __rseq_percpu *addr;
815
816 order = rseq_get_count_order_ulong(len);
817 if (order > POOL_SET_MIN_ENTRY)
818 min_order = order;
819 again:
820 pthread_mutex_lock(&pool_set->lock);
821 /* First smallest present pool where @len fits. */
822 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
823 pool = pool_set->entries[order];
824
825 if (!pool)
826 continue;
827 if (pool->item_len >= len)
828 goto found;
829 }
830 pool = NULL;
831 found:
832 pthread_mutex_unlock(&pool_set->lock);
833 if (pool) {
834 addr = __rseq_percpu_malloc(pool, zeroed);
835 if (addr == NULL && errno == ENOMEM) {
836 /*
837 * If the allocation failed, try again with a
838 * larger pool.
839 */
840 min_order = order + 1;
841 goto again;
842 }
843 } else {
844 /* Not found. */
845 errno = ENOMEM;
846 addr = NULL;
847 }
848 return addr;
849 }
850
851 void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
852 {
853 return __rseq_mempool_set_malloc(pool_set, len, false);
854 }
855
856 void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
857 {
858 return __rseq_mempool_set_malloc(pool_set, len, true);
859 }
860
861 struct rseq_mempool_attr *rseq_mempool_attr_create(void)
862 {
863 return calloc(1, sizeof(struct rseq_mempool_attr));
864 }
865
866 void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
867 {
868 free(attr);
869 }
870
871 int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
872 void *(*mmap_func)(void *priv, size_t len),
873 int (*munmap_func)(void *priv, void *ptr, size_t len),
874 void *mmap_priv)
875 {
876 if (!attr) {
877 errno = EINVAL;
878 return -1;
879 }
880 attr->mmap_set = true;
881 attr->mmap_func = mmap_func;
882 attr->munmap_func = munmap_func;
883 attr->mmap_priv = mmap_priv;
884 return 0;
885 }
886
887 int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
888 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
889 void *init_priv)
890 {
891 if (!attr) {
892 errno = EINVAL;
893 return -1;
894 }
895 attr->init_set = true;
896 attr->init_func = init_func;
897 attr->init_priv = init_priv;
898 return 0;
899 }
900
901 int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
902 {
903 if (!attr) {
904 errno = EINVAL;
905 return -1;
906 }
907 attr->robust_set = true;
908 return 0;
909 }
910
911 int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
912 size_t stride, int max_nr_cpus)
913 {
914 if (!attr) {
915 errno = EINVAL;
916 return -1;
917 }
918 attr->type = MEMPOOL_TYPE_PERCPU;
919 attr->stride = stride;
920 attr->max_nr_cpus = max_nr_cpus;
921 return 0;
922 }
923
924 int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
925 size_t stride)
926 {
927 if (!attr) {
928 errno = EINVAL;
929 return -1;
930 }
931 attr->type = MEMPOOL_TYPE_GLOBAL;
932 attr->stride = stride;
933 attr->max_nr_cpus = 0;
934 return 0;
935 }
936
937 int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
938 unsigned long max_nr_ranges)
939 {
940 if (!attr) {
941 errno = EINVAL;
942 return -1;
943 }
944 attr->max_nr_ranges = max_nr_ranges;
945 return 0;
946 }
947
948 int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
949 uintptr_t poison)
950 {
951 if (!attr) {
952 errno = EINVAL;
953 return -1;
954 }
955 attr->poison_set = true;
956 attr->poison = poison;
957 return 0;
958 }
959
960 int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
961 {
962 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
963 errno = EINVAL;
964 return -1;
965 }
966 return mempool->attr.max_nr_cpus;
967 }
This page took 0.062477 seconds and 3 git commands to generate.