8b25e0b886ff9c556920dd3686cb104ec974d84b
[librseq.git] / src / rseq-mempool.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
4 #include <rseq/mempool.h>
5 #include <sys/mman.h>
6 #include <assert.h>
7 #include <string.h>
8 #include <pthread.h>
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <rseq/compiler.h>
12 #include <errno.h>
13 #include <stdint.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16
17 #ifdef HAVE_LIBNUMA
18 # include <numa.h>
19 # include <numaif.h>
20 #endif
21
22 #include "rseq-utils.h"
23 #include <rseq/rseq.h>
24
25 /*
26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
27 *
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
36 */
37
38 #define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
39
40 /*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
43 #if RSEQ_BITS_PER_LONG == 64
44 # define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45 #else
46 # define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47 #endif
48
49 #define BIT_PER_ULONG (8 * sizeof(unsigned long))
50
51 #define MOVE_PAGES_BATCH_SIZE 4096
52
53 #define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
54
55 #if RSEQ_BITS_PER_LONG == 64
56 # define DEFAULT_POISON_VALUE 0x5555555555555555ULL
57 #else
58 # define DEFAULT_POISON_VALUE 0x55555555UL
59 #endif
60
61 struct free_list_node;
62
63 struct free_list_node {
64 struct free_list_node *next;
65 };
66
67 enum mempool_type {
68 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
69 MEMPOOL_TYPE_PERCPU = 1,
70 };
71
72 struct rseq_mempool_attr {
73 bool mmap_set;
74 void *(*mmap_func)(void *priv, size_t len);
75 int (*munmap_func)(void *priv, void *ptr, size_t len);
76 void *mmap_priv;
77
78 bool init_set;
79 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
80 void *init_priv;
81
82 bool robust_set;
83
84 enum mempool_type type;
85 size_t stride;
86 int max_nr_cpus;
87
88 unsigned long max_nr_ranges;
89
90 bool poison_set;
91 uintptr_t poison;
92 };
93
94 struct rseq_mempool_range;
95
96 struct rseq_mempool_range {
97 struct rseq_mempool_range *next; /* Linked list of ranges. */
98 struct rseq_mempool *pool; /* Backward reference to container pool. */
99 void *header;
100 void *base;
101 size_t next_unused;
102
103 /* Pool range mmap/munmap */
104 void *mmap_addr;
105 size_t mmap_len;
106
107 /* Track alloc/free. */
108 unsigned long *alloc_bitmap;
109 };
110
111 struct rseq_mempool {
112 /* Head of ranges linked-list. */
113 struct rseq_mempool_range *range_list;
114 unsigned long nr_ranges;
115
116 size_t item_len;
117 int item_order;
118
119 /*
120 * The free list chains freed items on the CPU 0 address range.
121 * We should rethink this decision if false sharing between
122 * malloc/free from other CPUs and data accesses from CPU 0
123 * becomes an issue. This is a NULL-terminated singly-linked
124 * list.
125 */
126 struct free_list_node *free_list_head;
127
128 /* This lock protects allocation/free within the pool. */
129 pthread_mutex_t lock;
130
131 struct rseq_mempool_attr attr;
132 char *name;
133 };
134
135 /*
136 * Pool set entries are indexed by item_len rounded to the next power of
137 * 2. A pool set can contain NULL pool entries, in which case the next
138 * large enough entry will be used for allocation.
139 */
140 struct rseq_mempool_set {
141 /* This lock protects add vs malloc/zmalloc within the pool set. */
142 pthread_mutex_t lock;
143 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
144 };
145
146 static
147 const char *get_pool_name(const struct rseq_mempool *pool)
148 {
149 return pool->name ? : "<anonymous>";
150 }
151
152 static
153 void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
154 uintptr_t item_offset, size_t stride)
155 {
156 return range->base + (stride * cpu) + item_offset;
157 }
158
159 static
160 void rseq_percpu_zero_item(struct rseq_mempool *pool,
161 struct rseq_mempool_range *range, uintptr_t item_offset)
162 {
163 int i;
164
165 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
166 char *p = __rseq_pool_range_percpu_ptr(range, i,
167 item_offset, pool->attr.stride);
168 memset(p, 0, pool->item_len);
169 }
170 }
171
172 static
173 void rseq_percpu_init_item(struct rseq_mempool *pool,
174 struct rseq_mempool_range *range, uintptr_t item_offset,
175 void *init_ptr, size_t init_len)
176 {
177 int i;
178
179 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
180 char *p = __rseq_pool_range_percpu_ptr(range, i,
181 item_offset, pool->attr.stride);
182 memcpy(p, init_ptr, init_len);
183 }
184 }
185
186 static
187 void rseq_percpu_poison_item(struct rseq_mempool *pool,
188 struct rseq_mempool_range *range, uintptr_t item_offset)
189 {
190 uintptr_t poison = pool->attr.poison;
191 int i;
192
193 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
194 char *p = __rseq_pool_range_percpu_ptr(range, i,
195 item_offset, pool->attr.stride);
196 size_t offset;
197
198 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t))
199 *((uintptr_t *) (p + offset)) = poison;
200 }
201 }
202
203 /* Always inline for __builtin_return_address(0). */
204 static inline __attribute__((always_inline))
205 void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
206 const struct rseq_mempool_range *range, uintptr_t item_offset)
207 {
208 uintptr_t poison = pool->attr.poison;
209 int i;
210
211 if (!pool->attr.robust_set)
212 return;
213 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
214 char *p = __rseq_pool_range_percpu_ptr(range, i,
215 item_offset, pool->attr.stride);
216 size_t offset;
217
218 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t)) {
219 uintptr_t v;
220
221 /* Skip poison check for free-list pointer. */
222 if (i == 0 && offset == 0)
223 continue;
224 v = *((uintptr_t *) (p + offset));
225 if (v != poison) {
226 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
227 __func__, (unsigned long) v, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
228 abort();
229 }
230 }
231 }
232 }
233
234 #ifdef HAVE_LIBNUMA
235 int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
236 {
237 unsigned long nr_pages, page_len;
238 int status[MOVE_PAGES_BATCH_SIZE];
239 int nodes[MOVE_PAGES_BATCH_SIZE];
240 void *pages[MOVE_PAGES_BATCH_SIZE];
241 long ret;
242
243 if (!numa_flags) {
244 errno = EINVAL;
245 return -1;
246 }
247 page_len = rseq_get_page_len();
248 nr_pages = len >> rseq_get_count_order_ulong(page_len);
249
250 nodes[0] = numa_node_of_cpu(cpu);
251 if (nodes[0] < 0)
252 return -1;
253
254 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
255 nodes[k] = nodes[0];
256 }
257
258 for (unsigned long page = 0; page < nr_pages;) {
259
260 size_t max_k = RSEQ_ARRAY_SIZE(pages);
261 size_t left = nr_pages - page;
262
263 if (left < max_k) {
264 max_k = left;
265 }
266
267 for (size_t k = 0; k < max_k; ++k, ++page) {
268 pages[k] = addr + (page * page_len);
269 status[k] = -EPERM;
270 }
271
272 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
273
274 if (ret < 0)
275 return ret;
276
277 if (ret > 0) {
278 fprintf(stderr, "%lu pages were not migrated\n", ret);
279 for (size_t k = 0; k < max_k; ++k) {
280 if (status[k] < 0)
281 fprintf(stderr,
282 "Error while moving page %p to numa node %d: %u\n",
283 pages[k], nodes[k], -status[k]);
284 }
285 }
286 }
287 return 0;
288 }
289 #else
290 int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
291 size_t len __attribute__((unused)),
292 int cpu __attribute__((unused)),
293 int numa_flags __attribute__((unused)))
294 {
295 errno = ENOSYS;
296 return -1;
297 }
298 #endif
299
300 static
301 void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
302 {
303 void *base;
304
305 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
306 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
307 if (base == MAP_FAILED)
308 return NULL;
309 return base;
310 }
311
312 static
313 int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
314 {
315 return munmap(ptr, len);
316 }
317
318 static
319 int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
320 {
321 size_t count;
322
323 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
324
325 /*
326 * Not being able to create the validation bitmap is an error
327 * that needs to be reported.
328 */
329 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
330 if (!range->alloc_bitmap)
331 return -1;
332 return 0;
333 }
334
335 static
336 bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
337 {
338 struct rseq_mempool_range *range;
339
340 for (range = pool->range_list; range; range = range->next) {
341 if (addr >= range->base && addr < range->base + range->next_unused)
342 return true;
343 }
344 return false;
345 }
346
347 /* Always inline for __builtin_return_address(0). */
348 static inline __attribute__((always_inline))
349 void check_free_list(const struct rseq_mempool *pool)
350 {
351 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
352 max_list_traversal = 0, traversal_iteration = 0;
353 struct rseq_mempool_range *range;
354
355 if (!pool->attr.robust_set)
356 return;
357
358 for (range = pool->range_list; range; range = range->next) {
359 total_item += pool->attr.stride >> pool->item_order;
360 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
361 }
362 max_list_traversal = total_item - total_never_allocated;
363
364 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
365 node;
366 prev = node,
367 node = node->next) {
368
369 void *node_addr = node;
370
371 if (traversal_iteration >= max_list_traversal) {
372 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
373 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
374 abort();
375 }
376
377 /* Node is out of range. */
378 if (!addr_in_pool(pool, node_addr)) {
379 if (prev)
380 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
381 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
382 else
383 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
384 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
385 abort();
386 }
387
388 traversal_iteration++;
389 total_freed++;
390 }
391
392 if (total_never_allocated + total_freed != total_item) {
393 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
394 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
395 abort();
396 }
397 }
398
399 /* Always inline for __builtin_return_address(0). */
400 static inline __attribute__((always_inline))
401 void check_range_poison(const struct rseq_mempool *pool,
402 const struct rseq_mempool_range *range)
403 {
404 size_t item_offset;
405
406 for (item_offset = 0; item_offset < range->next_unused;
407 item_offset += pool->item_len)
408 rseq_percpu_check_poison_item(pool, range, item_offset);
409 }
410
411 /* Always inline for __builtin_return_address(0). */
412 static inline __attribute__((always_inline))
413 void check_pool_poison(const struct rseq_mempool *pool)
414 {
415 struct rseq_mempool_range *range;
416
417 if (!pool->attr.robust_set)
418 return;
419 for (range = pool->range_list; range; range = range->next)
420 check_range_poison(pool, range);
421 }
422
423 /* Always inline for __builtin_return_address(0). */
424 static inline __attribute__((always_inline))
425 void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
426 {
427 unsigned long *bitmap = range->alloc_bitmap;
428 size_t count, total_leaks = 0;
429
430 if (!bitmap)
431 return;
432
433 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
434
435 /* Assert that all items in the pool were freed. */
436 for (size_t k = 0; k < count; ++k)
437 total_leaks += rseq_hweight_ulong(bitmap[k]);
438 if (total_leaks) {
439 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
440 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
441 abort();
442 }
443
444 free(bitmap);
445 }
446
447 /* Always inline for __builtin_return_address(0). */
448 static inline __attribute__((always_inline))
449 int rseq_mempool_range_destroy(struct rseq_mempool *pool,
450 struct rseq_mempool_range *range)
451 {
452 destroy_alloc_bitmap(pool, range);
453 /* range is a header located one page before the aligned mapping. */
454 return pool->attr.munmap_func(pool->attr.mmap_priv, range->mmap_addr, range->mmap_len);
455 }
456
457 /*
458 * Allocate a memory mapping aligned on @alignment, with an optional
459 * @pre_header before the mapping.
460 */
461 static
462 void *aligned_mmap_anonymous(struct rseq_mempool *pool,
463 size_t page_size, size_t len, size_t alignment,
464 void **pre_header, size_t pre_header_len)
465 {
466 size_t minimum_page_count, page_count, extra, total_allocate = 0;
467 int page_order;
468 void *ptr;
469
470 if (len < page_size || alignment < page_size ||
471 !is_pow2(alignment) || (len & (alignment - 1))) {
472 errno = EINVAL;
473 return NULL;
474 }
475 page_order = rseq_get_count_order_ulong(page_size);
476 if (page_order < 0) {
477 errno = EINVAL;
478 return NULL;
479 }
480 if (pre_header_len && (pre_header_len & (page_size - 1))) {
481 errno = EINVAL;
482 return NULL;
483 }
484
485 minimum_page_count = (pre_header_len + len) >> page_order;
486 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
487
488 assert(page_count >= minimum_page_count);
489
490 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
491 if (!ptr)
492 goto alloc_error;
493
494 total_allocate = page_count << page_order;
495
496 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
497 /* Pointer is already aligned. ptr points to pre_header. */
498 goto out;
499 }
500
501 /* Unmap extra before. */
502 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
503 assert(!(extra & (page_size - 1)));
504 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
505 perror("munmap");
506 abort();
507 }
508 total_allocate -= extra;
509 ptr += extra; /* ptr points to pre_header */
510 page_count -= extra >> page_order;
511 out:
512 assert(page_count >= minimum_page_count);
513
514 if (page_count > minimum_page_count) {
515 void *extra_ptr;
516
517 /* Unmap extra after. */
518 extra_ptr = ptr + (minimum_page_count << page_order);
519 extra = (page_count - minimum_page_count) << page_order;
520 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
521 perror("munmap");
522 abort();
523 }
524 total_allocate -= extra;
525 }
526
527 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
528 assert(total_allocate == len + pre_header_len);
529
530 alloc_error:
531 if (ptr) {
532 if (pre_header)
533 *pre_header = ptr;
534 ptr += pre_header_len;
535 }
536 return ptr;
537 }
538
539 static
540 struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
541 {
542 struct rseq_mempool_range *range;
543 unsigned long page_size;
544 void *header;
545 void *base;
546
547 if (pool->attr.max_nr_ranges &&
548 pool->nr_ranges >= pool->attr.max_nr_ranges) {
549 errno = ENOMEM;
550 return NULL;
551 }
552 page_size = rseq_get_page_len();
553
554 base = aligned_mmap_anonymous(pool, page_size,
555 pool->attr.stride * pool->attr.max_nr_cpus,
556 pool->attr.stride,
557 &header, page_size);
558 if (!base)
559 return NULL;
560 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
561 range->pool = pool;
562 range->base = base;
563 range->header = header;
564 range->mmap_addr = header;
565 range->mmap_len = page_size + (pool->attr.stride * pool->attr.max_nr_cpus);
566 if (pool->attr.robust_set) {
567 if (create_alloc_bitmap(pool, range))
568 goto error_alloc;
569 }
570 if (pool->attr.init_set) {
571 switch (pool->attr.type) {
572 case MEMPOOL_TYPE_GLOBAL:
573 if (pool->attr.init_func(pool->attr.init_priv,
574 base, pool->attr.stride, -1)) {
575 goto error_alloc;
576 }
577 break;
578 case MEMPOOL_TYPE_PERCPU:
579 {
580 int cpu;
581 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
582 if (pool->attr.init_func(pool->attr.init_priv,
583 base + (pool->attr.stride * cpu),
584 pool->attr.stride, cpu)) {
585 goto error_alloc;
586 }
587 }
588 break;
589 }
590 default:
591 abort();
592 }
593 }
594 pool->nr_ranges++;
595 return range;
596
597 error_alloc:
598 (void) rseq_mempool_range_destroy(pool, range);
599 return NULL;
600 }
601
602 int rseq_mempool_destroy(struct rseq_mempool *pool)
603 {
604 struct rseq_mempool_range *range, *next_range;
605 int ret = 0;
606
607 if (!pool)
608 return 0;
609 check_free_list(pool);
610 check_pool_poison(pool);
611 /* Iteration safe against removal. */
612 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
613 if (rseq_mempool_range_destroy(pool, range))
614 goto end;
615 /* Update list head to keep list coherent in case of partial failure. */
616 pool->range_list = next_range;
617 }
618 pthread_mutex_destroy(&pool->lock);
619 free(pool->name);
620 free(pool);
621 end:
622 return ret;
623 }
624
625 struct rseq_mempool *rseq_mempool_create(const char *pool_name,
626 size_t item_len, const struct rseq_mempool_attr *_attr)
627 {
628 struct rseq_mempool *pool;
629 struct rseq_mempool_attr attr = {};
630 int order;
631
632 /* Make sure each item is large enough to contain free list pointers. */
633 if (item_len < sizeof(void *))
634 item_len = sizeof(void *);
635
636 /* Align item_len on next power of two. */
637 order = rseq_get_count_order_ulong(item_len);
638 if (order < 0) {
639 errno = EINVAL;
640 return NULL;
641 }
642 item_len = 1UL << order;
643
644 if (_attr)
645 memcpy(&attr, _attr, sizeof(attr));
646 if (!attr.mmap_set) {
647 attr.mmap_func = default_mmap_func;
648 attr.munmap_func = default_munmap_func;
649 attr.mmap_priv = NULL;
650 }
651
652 switch (attr.type) {
653 case MEMPOOL_TYPE_PERCPU:
654 if (attr.max_nr_cpus < 0) {
655 errno = EINVAL;
656 return NULL;
657 }
658 if (attr.max_nr_cpus == 0) {
659 /* Auto-detect */
660 attr.max_nr_cpus = rseq_get_max_nr_cpus();
661 if (attr.max_nr_cpus == 0) {
662 errno = EINVAL;
663 return NULL;
664 }
665 }
666 break;
667 case MEMPOOL_TYPE_GLOBAL:
668 /* Use a 1-cpu pool for global mempool type. */
669 attr.max_nr_cpus = 1;
670 break;
671 }
672 if (!attr.stride)
673 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
674 if (attr.robust_set && !attr.poison_set) {
675 attr.poison_set = true;
676 attr.poison = DEFAULT_POISON_VALUE;
677 }
678 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
679 !is_pow2(attr.stride)) {
680 errno = EINVAL;
681 return NULL;
682 }
683
684 pool = calloc(1, sizeof(struct rseq_mempool));
685 if (!pool)
686 return NULL;
687
688 memcpy(&pool->attr, &attr, sizeof(attr));
689 pthread_mutex_init(&pool->lock, NULL);
690 pool->item_len = item_len;
691 pool->item_order = order;
692
693 pool->range_list = rseq_mempool_range_create(pool);
694 if (!pool->range_list)
695 goto error_alloc;
696
697 if (pool_name) {
698 pool->name = strdup(pool_name);
699 if (!pool->name)
700 goto error_alloc;
701 }
702 return pool;
703
704 error_alloc:
705 rseq_mempool_destroy(pool);
706 errno = ENOMEM;
707 return NULL;
708 }
709
710 /* Always inline for __builtin_return_address(0). */
711 static inline __attribute__((always_inline))
712 void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
713 {
714 unsigned long *bitmap = range->alloc_bitmap;
715 size_t item_index = item_offset >> pool->item_order;
716 unsigned long mask;
717 size_t k;
718
719 if (!bitmap)
720 return;
721
722 k = item_index / BIT_PER_ULONG;
723 mask = 1ULL << (item_index % BIT_PER_ULONG);
724
725 /* Print error if bit is already set. */
726 if (bitmap[k] & mask) {
727 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
728 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
729 abort();
730 }
731 bitmap[k] |= mask;
732 }
733
734 static
735 void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool,
736 bool zeroed, void *init_ptr, size_t init_len)
737 {
738 struct rseq_mempool_range *range;
739 struct free_list_node *node;
740 uintptr_t item_offset;
741 void __rseq_percpu *addr;
742
743 if (init_len > pool->item_len) {
744 errno = EINVAL;
745 return NULL;
746 }
747 pthread_mutex_lock(&pool->lock);
748 /* Get first entry from free list. */
749 node = pool->free_list_head;
750 if (node != NULL) {
751 uintptr_t ptr = (uintptr_t) node;
752 void *range_base = (void *) (ptr & (~(pool->attr.stride - 1)));
753
754 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
755 /* Remove node from free list (update head). */
756 pool->free_list_head = node->next;
757 item_offset = (uintptr_t) ((void *) node - range_base);
758 rseq_percpu_check_poison_item(pool, range, item_offset);
759 addr = (void __rseq_percpu *) node;
760 goto end;
761 }
762 /*
763 * If the most recent range (first in list) does not have any
764 * room left, create a new range and prepend it to the list
765 * head.
766 */
767 range = pool->range_list;
768 if (range->next_unused + pool->item_len > pool->attr.stride) {
769 range = rseq_mempool_range_create(pool);
770 if (!range) {
771 errno = ENOMEM;
772 addr = NULL;
773 goto end;
774 }
775 /* Add range to head of list. */
776 range->next = pool->range_list;
777 pool->range_list = range;
778 }
779 /* First range in list has room left. */
780 item_offset = range->next_unused;
781 addr = (void __rseq_percpu *) (range->base + item_offset);
782 range->next_unused += pool->item_len;
783 end:
784 if (addr)
785 set_alloc_slot(pool, range, item_offset);
786 pthread_mutex_unlock(&pool->lock);
787 if (addr) {
788 if (zeroed)
789 rseq_percpu_zero_item(pool, range, item_offset);
790 else if (init_ptr) {
791 rseq_percpu_init_item(pool, range, item_offset,
792 init_ptr, init_len);
793 }
794 }
795 return addr;
796 }
797
798 void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
799 {
800 return __rseq_percpu_malloc(pool, false, NULL, 0);
801 }
802
803 void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
804 {
805 return __rseq_percpu_malloc(pool, true, NULL, 0);
806 }
807
808 void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool,
809 void *init_ptr, size_t len)
810 {
811 return __rseq_percpu_malloc(pool, false, init_ptr, len);
812 }
813
814 /* Always inline for __builtin_return_address(0). */
815 static inline __attribute__((always_inline))
816 void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
817 {
818 unsigned long *bitmap = range->alloc_bitmap;
819 size_t item_index = item_offset >> pool->item_order;
820 unsigned long mask;
821 size_t k;
822
823 if (!bitmap)
824 return;
825
826 k = item_index / BIT_PER_ULONG;
827 mask = 1ULL << (item_index % BIT_PER_ULONG);
828
829 /* Print error if bit is not set. */
830 if (!(bitmap[k] & mask)) {
831 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
832 __func__, get_pool_name(pool), pool, item_offset,
833 (void *) __builtin_return_address(0));
834 abort();
835 }
836 bitmap[k] &= ~mask;
837 }
838
839 void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
840 {
841 uintptr_t ptr = (uintptr_t) _ptr;
842 void *range_base = (void *) (ptr & (~(stride - 1)));
843 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
844 struct rseq_mempool *pool = range->pool;
845 uintptr_t item_offset = ptr & (stride - 1);
846 struct free_list_node *head, *item;
847
848 pthread_mutex_lock(&pool->lock);
849 clear_alloc_slot(pool, range, item_offset);
850 /* Add ptr to head of free list */
851 head = pool->free_list_head;
852 if (pool->attr.poison_set)
853 rseq_percpu_poison_item(pool, range, item_offset);
854 /* Free-list is in CPU 0 range. */
855 item = (struct free_list_node *) ptr;
856 /*
857 * Setting the next pointer will overwrite the first uintptr_t
858 * poison for CPU 0.
859 */
860 item->next = head;
861 pool->free_list_head = item;
862 pthread_mutex_unlock(&pool->lock);
863 }
864
865 struct rseq_mempool_set *rseq_mempool_set_create(void)
866 {
867 struct rseq_mempool_set *pool_set;
868
869 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
870 if (!pool_set)
871 return NULL;
872 pthread_mutex_init(&pool_set->lock, NULL);
873 return pool_set;
874 }
875
876 int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
877 {
878 int order, ret;
879
880 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
881 struct rseq_mempool *pool = pool_set->entries[order];
882
883 if (!pool)
884 continue;
885 ret = rseq_mempool_destroy(pool);
886 if (ret)
887 return ret;
888 pool_set->entries[order] = NULL;
889 }
890 pthread_mutex_destroy(&pool_set->lock);
891 free(pool_set);
892 return 0;
893 }
894
895 /* Ownership of pool is handed over to pool set on success. */
896 int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
897 {
898 size_t item_order = pool->item_order;
899 int ret = 0;
900
901 pthread_mutex_lock(&pool_set->lock);
902 if (pool_set->entries[item_order]) {
903 errno = EBUSY;
904 ret = -1;
905 goto end;
906 }
907 pool_set->entries[pool->item_order] = pool;
908 end:
909 pthread_mutex_unlock(&pool_set->lock);
910 return ret;
911 }
912
913 static
914 void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set,
915 void *init_ptr, size_t len, bool zeroed)
916 {
917 int order, min_order = POOL_SET_MIN_ENTRY;
918 struct rseq_mempool *pool;
919 void __rseq_percpu *addr;
920
921 order = rseq_get_count_order_ulong(len);
922 if (order > POOL_SET_MIN_ENTRY)
923 min_order = order;
924 again:
925 pthread_mutex_lock(&pool_set->lock);
926 /* First smallest present pool where @len fits. */
927 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
928 pool = pool_set->entries[order];
929
930 if (!pool)
931 continue;
932 if (pool->item_len >= len)
933 goto found;
934 }
935 pool = NULL;
936 found:
937 pthread_mutex_unlock(&pool_set->lock);
938 if (pool) {
939 addr = __rseq_percpu_malloc(pool, zeroed, init_ptr, len);
940 if (addr == NULL && errno == ENOMEM) {
941 /*
942 * If the allocation failed, try again with a
943 * larger pool.
944 */
945 min_order = order + 1;
946 goto again;
947 }
948 } else {
949 /* Not found. */
950 errno = ENOMEM;
951 addr = NULL;
952 }
953 return addr;
954 }
955
956 void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
957 {
958 return __rseq_mempool_set_malloc(pool_set, NULL, len, false);
959 }
960
961 void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
962 {
963 return __rseq_mempool_set_malloc(pool_set, NULL, len, true);
964 }
965
966 void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set,
967 void *init_ptr, size_t len)
968 {
969 return __rseq_mempool_set_malloc(pool_set, init_ptr, len, true);
970 }
971
972 struct rseq_mempool_attr *rseq_mempool_attr_create(void)
973 {
974 return calloc(1, sizeof(struct rseq_mempool_attr));
975 }
976
977 void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
978 {
979 free(attr);
980 }
981
982 int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
983 void *(*mmap_func)(void *priv, size_t len),
984 int (*munmap_func)(void *priv, void *ptr, size_t len),
985 void *mmap_priv)
986 {
987 if (!attr) {
988 errno = EINVAL;
989 return -1;
990 }
991 attr->mmap_set = true;
992 attr->mmap_func = mmap_func;
993 attr->munmap_func = munmap_func;
994 attr->mmap_priv = mmap_priv;
995 return 0;
996 }
997
998 int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
999 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
1000 void *init_priv)
1001 {
1002 if (!attr) {
1003 errno = EINVAL;
1004 return -1;
1005 }
1006 attr->init_set = true;
1007 attr->init_func = init_func;
1008 attr->init_priv = init_priv;
1009 return 0;
1010 }
1011
1012 int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
1013 {
1014 if (!attr) {
1015 errno = EINVAL;
1016 return -1;
1017 }
1018 attr->robust_set = true;
1019 return 0;
1020 }
1021
1022 int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
1023 size_t stride, int max_nr_cpus)
1024 {
1025 if (!attr) {
1026 errno = EINVAL;
1027 return -1;
1028 }
1029 attr->type = MEMPOOL_TYPE_PERCPU;
1030 attr->stride = stride;
1031 attr->max_nr_cpus = max_nr_cpus;
1032 return 0;
1033 }
1034
1035 int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
1036 size_t stride)
1037 {
1038 if (!attr) {
1039 errno = EINVAL;
1040 return -1;
1041 }
1042 attr->type = MEMPOOL_TYPE_GLOBAL;
1043 attr->stride = stride;
1044 attr->max_nr_cpus = 0;
1045 return 0;
1046 }
1047
1048 int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
1049 unsigned long max_nr_ranges)
1050 {
1051 if (!attr) {
1052 errno = EINVAL;
1053 return -1;
1054 }
1055 attr->max_nr_ranges = max_nr_ranges;
1056 return 0;
1057 }
1058
1059 int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
1060 uintptr_t poison)
1061 {
1062 if (!attr) {
1063 errno = EINVAL;
1064 return -1;
1065 }
1066 attr->poison_set = true;
1067 attr->poison = poison;
1068 return 0;
1069 }
1070
1071 int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
1072 {
1073 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
1074 errno = EINVAL;
1075 return -1;
1076 }
1077 return mempool->attr.max_nr_cpus;
1078 }
This page took 0.051852 seconds and 3 git commands to generate.