mempool: Use default poison value when robust is set
[librseq.git] / src / rseq-mempool.c
CommitLineData
ef6695f1
MD
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
34337fec 4#include <rseq/mempool.h>
ef6695f1
MD
5#include <sys/mman.h>
6#include <assert.h>
7#include <string.h>
8#include <pthread.h>
9#include <unistd.h>
10#include <stdlib.h>
11#include <rseq/compiler.h>
12#include <errno.h>
13#include <stdint.h>
14#include <stdbool.h>
367e559c
MD
15#include <stdio.h>
16
17#ifdef HAVE_LIBNUMA
18# include <numa.h>
19# include <numaif.h>
20#endif
ef6695f1 21
34337fec 22#include "rseq-utils.h"
47c725dd 23#include <rseq/rseq.h>
19be9217 24
ef6695f1 25/*
b73b0c25 26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
ef6695f1 27 *
8ab16a24
MD
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
8aa1462d
MD
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
8ab16a24
MD
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
ef6695f1
MD
36 */
37
3236da62 38#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
ef6695f1 39
72b100a1
MD
40/*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
ef6695f1
MD
43#if RSEQ_BITS_PER_LONG == 64
44# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45#else
46# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47#endif
48
0fdf7a4c
OD
49#define BIT_PER_ULONG (8 * sizeof(unsigned long))
50
57d8b586
OD
51#define MOVE_PAGES_BATCH_SIZE 4096
52
0ba2a93e 53#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
4aa3220c 54
3975084e
MD
55#if RSEQ_BITS_PER_LONG == 64
56# define DEFAULT_POISON_VALUE 0x5555555555555555ULL
57#else
58# define DEFAULT_POISON_VALUE 0x55555555UL
59#endif
60
ef6695f1
MD
61struct free_list_node;
62
63struct free_list_node {
64 struct free_list_node *next;
65};
66
cb475906 67enum mempool_type {
89b7e681
MD
68 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
69 MEMPOOL_TYPE_PERCPU = 1,
cb475906
MD
70};
71
0ba2a93e 72struct rseq_mempool_attr {
a82006d0 73 bool mmap_set;
9bd07c29
MD
74 void *(*mmap_func)(void *priv, size_t len);
75 int (*munmap_func)(void *priv, void *ptr, size_t len);
76 void *mmap_priv;
d6acc8aa 77
135811f2 78 bool init_set;
6e329183 79 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
135811f2
MD
80 void *init_priv;
81
d6acc8aa 82 bool robust_set;
cb475906
MD
83
84 enum mempool_type type;
85 size_t stride;
86 int max_nr_cpus;
e11a02d7
MD
87
88 unsigned long max_nr_ranges;
455e090e
MD
89
90 bool poison_set;
91 uintptr_t poison;
9bd07c29
MD
92};
93
0ba2a93e 94struct rseq_mempool_range;
b73b0c25 95
0ba2a93e 96struct rseq_mempool_range {
9d986353
MD
97 struct rseq_mempool_range *next; /* Linked list of ranges. */
98 struct rseq_mempool *pool; /* Backward reference to container pool. */
4aa3220c 99 void *header;
ef6695f1 100 void *base;
b73b0c25
MD
101 size_t next_unused;
102 /* Track alloc/free. */
103 unsigned long *alloc_bitmap;
104};
105
0ba2a93e 106struct rseq_mempool {
9d986353
MD
107 /* Head of ranges linked-list. */
108 struct rseq_mempool_range *range_list;
109 unsigned long nr_ranges;
b73b0c25 110
ef6695f1 111 size_t item_len;
ef6695f1 112 int item_order;
ef6695f1
MD
113
114 /*
8ab16a24 115 * The free list chains freed items on the CPU 0 address range.
ef6695f1 116 * We should rethink this decision if false sharing between
8ab16a24 117 * malloc/free from other CPUs and data accesses from CPU 0
ef6695f1
MD
118 * becomes an issue. This is a NULL-terminated singly-linked
119 * list.
120 */
121 struct free_list_node *free_list_head;
b73b0c25 122
ef6695f1
MD
123 /* This lock protects allocation/free within the pool. */
124 pthread_mutex_t lock;
9bd07c29 125
0ba2a93e 126 struct rseq_mempool_attr attr;
ca452fee 127 char *name;
ef6695f1
MD
128};
129
ef6695f1
MD
130/*
131 * Pool set entries are indexed by item_len rounded to the next power of
132 * 2. A pool set can contain NULL pool entries, in which case the next
133 * large enough entry will be used for allocation.
134 */
0ba2a93e 135struct rseq_mempool_set {
ef6695f1
MD
136 /* This lock protects add vs malloc/zmalloc within the pool set. */
137 pthread_mutex_t lock;
0ba2a93e 138 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
ef6695f1
MD
139};
140
86617384
MD
141static
142const char *get_pool_name(const struct rseq_mempool *pool)
143{
144 return pool->name ? : "<anonymous>";
145}
146
367e559c 147static
6fbf1fb6 148void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
f2981623 149 uintptr_t item_offset, size_t stride)
367e559c 150{
15b63c9f 151 return range->base + (stride * cpu) + item_offset;
367e559c
MD
152}
153
367e559c 154static
15b63c9f
MD
155void rseq_percpu_zero_item(struct rseq_mempool *pool,
156 struct rseq_mempool_range *range, uintptr_t item_offset)
367e559c
MD
157{
158 int i;
159
cb475906 160 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
15b63c9f 161 char *p = __rseq_pool_range_percpu_ptr(range, i,
cb475906 162 item_offset, pool->attr.stride);
367e559c
MD
163 memset(p, 0, pool->item_len);
164 }
165}
166
455e090e
MD
167static
168void rseq_percpu_poison_item(struct rseq_mempool *pool,
169 struct rseq_mempool_range *range, uintptr_t item_offset)
170{
171 uintptr_t poison = pool->attr.poison;
172 int i;
173
174 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
175 char *p = __rseq_pool_range_percpu_ptr(range, i,
176 item_offset, pool->attr.stride);
177 size_t offset;
178
179 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t))
86617384
MD
180 *((uintptr_t *) (p + offset)) = poison;
181 }
182}
183
184/* Always inline for __builtin_return_address(0). */
185static inline __attribute__((always_inline))
6fbf1fb6
MD
186void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
187 const struct rseq_mempool_range *range, uintptr_t item_offset)
86617384
MD
188{
189 uintptr_t poison = pool->attr.poison;
190 int i;
191
3975084e 192 if (!pool->attr.robust_set)
86617384
MD
193 return;
194 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
195 char *p = __rseq_pool_range_percpu_ptr(range, i,
196 item_offset, pool->attr.stride);
197 size_t offset;
198
199 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t)) {
200 uintptr_t v;
201
202 /* Skip poison check for free-list pointer. */
203 if (i == 0 && offset == 0)
204 continue;
205 v = *((uintptr_t *) (p + offset));
206 if (v != poison) {
207 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
208 __func__, (unsigned long) v, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
209 abort();
210 }
211 }
455e090e
MD
212 }
213}
214
15b63c9f 215#ifdef HAVE_LIBNUMA
c6fd3981 216int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
367e559c 217{
f2981623 218 unsigned long nr_pages, page_len;
c6fd3981
MD
219 int status[MOVE_PAGES_BATCH_SIZE];
220 int nodes[MOVE_PAGES_BATCH_SIZE];
221 void *pages[MOVE_PAGES_BATCH_SIZE];
f2981623 222 long ret;
367e559c 223
c6fd3981
MD
224 if (!numa_flags) {
225 errno = EINVAL;
226 return -1;
227 }
367e559c 228 page_len = rseq_get_page_len();
c6fd3981 229 nr_pages = len >> rseq_get_count_order_ulong(page_len);
57d8b586 230
c6fd3981
MD
231 nodes[0] = numa_node_of_cpu(cpu);
232 if (nodes[0] < 0)
233 return -1;
57d8b586 234
c6fd3981
MD
235 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
236 nodes[k] = nodes[0];
237 }
57d8b586 238
c6fd3981 239 for (unsigned long page = 0; page < nr_pages;) {
57d8b586 240
c6fd3981
MD
241 size_t max_k = RSEQ_ARRAY_SIZE(pages);
242 size_t left = nr_pages - page;
57d8b586 243
c6fd3981
MD
244 if (left < max_k) {
245 max_k = left;
246 }
57d8b586 247
c6fd3981
MD
248 for (size_t k = 0; k < max_k; ++k, ++page) {
249 pages[k] = addr + (page * page_len);
250 status[k] = -EPERM;
367e559c 251 }
b73b0c25 252
c6fd3981
MD
253 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
254
255 if (ret < 0)
b73b0c25 256 return ret;
c6fd3981
MD
257
258 if (ret > 0) {
259 fprintf(stderr, "%lu pages were not migrated\n", ret);
260 for (size_t k = 0; k < max_k; ++k) {
261 if (status[k] < 0)
262 fprintf(stderr,
263 "Error while moving page %p to numa node %d: %u\n",
264 pages[k], nodes[k], -status[k]);
265 }
266 }
b73b0c25
MD
267 }
268 return 0;
269}
367e559c 270#else
c6fd3981
MD
271int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
272 size_t len __attribute__((unused)),
273 int cpu __attribute__((unused)),
367e559c
MD
274 int numa_flags __attribute__((unused)))
275{
c6fd3981
MD
276 errno = ENOSYS;
277 return -1;
367e559c
MD
278}
279#endif
280
9bd07c29
MD
281static
282void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
283{
284 void *base;
285
286 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
287 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
288 if (base == MAP_FAILED)
289 return NULL;
290 return base;
291}
292
293static
294int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
295{
296 return munmap(ptr, len);
297}
298
0fdf7a4c 299static
0ba2a93e 300int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c
OD
301{
302 size_t count;
303
cb475906 304 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
305
306 /*
9649c7ee
MD
307 * Not being able to create the validation bitmap is an error
308 * that needs to be reported.
0fdf7a4c 309 */
b73b0c25
MD
310 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
311 if (!range->alloc_bitmap)
9649c7ee
MD
312 return -1;
313 return 0;
0fdf7a4c
OD
314}
315
b73b0c25 316static
0ba2a93e 317bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
b73b0c25 318{
0ba2a93e 319 struct rseq_mempool_range *range;
b73b0c25 320
9d986353 321 for (range = pool->range_list; range; range = range->next) {
b73b0c25
MD
322 if (addr >= range->base && addr < range->base + range->next_unused)
323 return true;
324 }
325 return false;
326}
327
a9ec6111
OD
328/* Always inline for __builtin_return_address(0). */
329static inline __attribute__((always_inline))
0ba2a93e 330void check_free_list(const struct rseq_mempool *pool)
a9ec6111 331{
b73b0c25
MD
332 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
333 max_list_traversal = 0, traversal_iteration = 0;
0ba2a93e 334 struct rseq_mempool_range *range;
b73b0c25
MD
335
336 if (!pool->attr.robust_set)
337 return;
338
9d986353 339 for (range = pool->range_list; range; range = range->next) {
cb475906
MD
340 total_item += pool->attr.stride >> pool->item_order;
341 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
b73b0c25
MD
342 }
343 max_list_traversal = total_item - total_never_allocated;
a9ec6111
OD
344
345 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
346 node;
347 prev = node,
348 node = node->next) {
349
350 void *node_addr = node;
351
352 if (traversal_iteration >= max_list_traversal) {
ca452fee
MD
353 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
354 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
355 abort();
356 }
357
358 /* Node is out of range. */
b73b0c25 359 if (!addr_in_pool(pool, node_addr)) {
a9ec6111 360 if (prev)
ca452fee
MD
361 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
362 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111 363 else
ca452fee
MD
364 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
365 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
366 abort();
367 }
368
b73b0c25
MD
369 traversal_iteration++;
370 total_freed++;
a9ec6111
OD
371 }
372
373 if (total_never_allocated + total_freed != total_item) {
ca452fee
MD
374 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
375 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
a9ec6111
OD
376 abort();
377 }
a9ec6111
OD
378}
379
6fbf1fb6
MD
380/* Always inline for __builtin_return_address(0). */
381static inline __attribute__((always_inline))
382void check_range_poison(const struct rseq_mempool *pool,
383 const struct rseq_mempool_range *range)
384{
385 size_t item_offset;
386
387 for (item_offset = 0; item_offset < range->next_unused;
388 item_offset += pool->item_len)
389 rseq_percpu_check_poison_item(pool, range, item_offset);
390}
391
392/* Always inline for __builtin_return_address(0). */
393static inline __attribute__((always_inline))
394void check_pool_poison(const struct rseq_mempool *pool)
395{
396 struct rseq_mempool_range *range;
397
3975084e 398 if (!pool->attr.robust_set)
6fbf1fb6
MD
399 return;
400 for (range = pool->range_list; range; range = range->next)
401 check_range_poison(pool, range);
402}
403
e7cbbc10
MD
404/* Always inline for __builtin_return_address(0). */
405static inline __attribute__((always_inline))
0ba2a93e 406void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c 407{
b73b0c25 408 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 409 size_t count, total_leaks = 0;
0fdf7a4c 410
9649c7ee 411 if (!bitmap)
0fdf7a4c 412 return;
0fdf7a4c 413
cb475906 414 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
415
416 /* Assert that all items in the pool were freed. */
9649c7ee
MD
417 for (size_t k = 0; k < count; ++k)
418 total_leaks += rseq_hweight_ulong(bitmap[k]);
419 if (total_leaks) {
ca452fee
MD
420 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
421 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
9649c7ee 422 abort();
0fdf7a4c
OD
423 }
424
425 free(bitmap);
426}
427
b73b0c25
MD
428/* Always inline for __builtin_return_address(0). */
429static inline __attribute__((always_inline))
0ba2a93e
MD
430int rseq_mempool_range_destroy(struct rseq_mempool *pool,
431 struct rseq_mempool_range *range)
b73b0c25
MD
432{
433 destroy_alloc_bitmap(pool, range);
5c99f3d6 434 /* range is a header located one page before the aligned mapping. */
4aa3220c 435 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
cb475906 436 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
5c99f3d6
MD
437}
438
439/*
440 * Allocate a memory mapping aligned on @alignment, with an optional
441 * @pre_header before the mapping.
442 */
443static
0ba2a93e 444void *aligned_mmap_anonymous(struct rseq_mempool *pool,
5c99f3d6
MD
445 size_t page_size, size_t len, size_t alignment,
446 void **pre_header, size_t pre_header_len)
447{
448 size_t minimum_page_count, page_count, extra, total_allocate = 0;
449 int page_order;
450 void *ptr;
451
452 if (len < page_size || alignment < page_size ||
b72b2d9e 453 !is_pow2(alignment) || (len & (alignment - 1))) {
5c99f3d6
MD
454 errno = EINVAL;
455 return NULL;
456 }
457 page_order = rseq_get_count_order_ulong(page_size);
458 if (page_order < 0) {
459 errno = EINVAL;
460 return NULL;
461 }
462 if (pre_header_len && (pre_header_len & (page_size - 1))) {
463 errno = EINVAL;
464 return NULL;
465 }
466
467 minimum_page_count = (pre_header_len + len) >> page_order;
468 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
469
470 assert(page_count >= minimum_page_count);
471
472 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
473 if (!ptr)
474 goto alloc_error;
475
476 total_allocate = page_count << page_order;
477
478 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
479 /* Pointer is already aligned. ptr points to pre_header. */
480 goto out;
481 }
482
483 /* Unmap extra before. */
484 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
485 assert(!(extra & (page_size - 1)));
486 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
487 perror("munmap");
488 abort();
489 }
490 total_allocate -= extra;
491 ptr += extra; /* ptr points to pre_header */
492 page_count -= extra >> page_order;
493out:
494 assert(page_count >= minimum_page_count);
495
496 if (page_count > minimum_page_count) {
497 void *extra_ptr;
498
499 /* Unmap extra after. */
500 extra_ptr = ptr + (minimum_page_count << page_order);
501 extra = (page_count - minimum_page_count) << page_order;
502 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
503 perror("munmap");
504 abort();
505 }
506 total_allocate -= extra;
507 }
508
509 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
510 assert(total_allocate == len + pre_header_len);
511
512alloc_error:
513 if (ptr) {
514 if (pre_header)
515 *pre_header = ptr;
516 ptr += pre_header_len;
517 }
518 return ptr;
b73b0c25
MD
519}
520
521static
0ba2a93e 522struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
b73b0c25 523{
0ba2a93e 524 struct rseq_mempool_range *range;
5c99f3d6 525 unsigned long page_size;
4aa3220c 526 void *header;
b73b0c25
MD
527 void *base;
528
e11a02d7
MD
529 if (pool->attr.max_nr_ranges &&
530 pool->nr_ranges >= pool->attr.max_nr_ranges) {
9d986353
MD
531 errno = ENOMEM;
532 return NULL;
533 }
5c99f3d6 534 page_size = rseq_get_page_len();
b73b0c25 535
5c99f3d6 536 base = aligned_mmap_anonymous(pool, page_size,
cb475906
MD
537 pool->attr.stride * pool->attr.max_nr_cpus,
538 pool->attr.stride,
4aa3220c 539 &header, page_size);
b73b0c25 540 if (!base)
5c99f3d6 541 return NULL;
0ba2a93e 542 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
5c99f3d6 543 range->pool = pool;
b73b0c25 544 range->base = base;
4aa3220c 545 range->header = header;
b73b0c25
MD
546 if (pool->attr.robust_set) {
547 if (create_alloc_bitmap(pool, range))
548 goto error_alloc;
549 }
135811f2 550 if (pool->attr.init_set) {
374c2773
MD
551 switch (pool->attr.type) {
552 case MEMPOOL_TYPE_GLOBAL:
6e329183 553 if (pool->attr.init_func(pool->attr.init_priv,
374c2773 554 base, pool->attr.stride, -1)) {
6e329183
MD
555 goto error_alloc;
556 }
374c2773
MD
557 break;
558 case MEMPOOL_TYPE_PERCPU:
559 {
560 int cpu;
561 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
562 if (pool->attr.init_func(pool->attr.init_priv,
563 base + (pool->attr.stride * cpu),
564 pool->attr.stride, cpu)) {
565 goto error_alloc;
566 }
567 }
568 break;
569 }
570 default:
571 abort();
135811f2
MD
572 }
573 }
9d986353 574 pool->nr_ranges++;
b73b0c25
MD
575 return range;
576
577error_alloc:
0ba2a93e 578 (void) rseq_mempool_range_destroy(pool, range);
b73b0c25
MD
579 return NULL;
580}
581
0ba2a93e 582int rseq_mempool_destroy(struct rseq_mempool *pool)
9649c7ee 583{
0ba2a93e 584 struct rseq_mempool_range *range, *next_range;
b73b0c25 585 int ret = 0;
9649c7ee 586
f510ddc5
MD
587 if (!pool)
588 return 0;
b73b0c25 589 check_free_list(pool);
6fbf1fb6 590 check_pool_poison(pool);
b73b0c25 591 /* Iteration safe against removal. */
9d986353 592 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
0ba2a93e 593 if (rseq_mempool_range_destroy(pool, range))
b73b0c25
MD
594 goto end;
595 /* Update list head to keep list coherent in case of partial failure. */
9d986353 596 pool->range_list = next_range;
b73b0c25 597 }
9649c7ee 598 pthread_mutex_destroy(&pool->lock);
ca452fee 599 free(pool->name);
9649c7ee
MD
600 memset(pool, 0, sizeof(*pool));
601end:
b73b0c25 602 return ret;
9649c7ee
MD
603}
604
0ba2a93e 605struct rseq_mempool *rseq_mempool_create(const char *pool_name,
cb475906 606 size_t item_len, const struct rseq_mempool_attr *_attr)
ef6695f1 607{
0ba2a93e
MD
608 struct rseq_mempool *pool;
609 struct rseq_mempool_attr attr = {};
ef6695f1 610 int order;
ef6695f1
MD
611
612 /* Make sure each item is large enough to contain free list pointers. */
613 if (item_len < sizeof(void *))
614 item_len = sizeof(void *);
615
616 /* Align item_len on next power of two. */
19be9217 617 order = rseq_get_count_order_ulong(item_len);
ef6695f1
MD
618 if (order < 0) {
619 errno = EINVAL;
620 return NULL;
621 }
622 item_len = 1UL << order;
623
a82006d0
MD
624 if (_attr)
625 memcpy(&attr, _attr, sizeof(attr));
626 if (!attr.mmap_set) {
627 attr.mmap_func = default_mmap_func;
628 attr.munmap_func = default_munmap_func;
629 attr.mmap_priv = NULL;
9bd07c29 630 }
a82006d0 631
cb475906
MD
632 switch (attr.type) {
633 case MEMPOOL_TYPE_PERCPU:
634 if (attr.max_nr_cpus < 0) {
635 errno = EINVAL;
636 return NULL;
637 }
638 if (attr.max_nr_cpus == 0) {
639 /* Auto-detect */
47c725dd 640 attr.max_nr_cpus = rseq_get_max_nr_cpus();
cb475906
MD
641 if (attr.max_nr_cpus == 0) {
642 errno = EINVAL;
643 return NULL;
644 }
645 }
646 break;
647 case MEMPOOL_TYPE_GLOBAL:
89b7e681
MD
648 /* Use a 1-cpu pool for global mempool type. */
649 attr.max_nr_cpus = 1;
cb475906
MD
650 break;
651 }
652 if (!attr.stride)
653 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
3975084e
MD
654 if (attr.robust_set && !attr.poison_set) {
655 attr.poison_set = true;
656 attr.poison = DEFAULT_POISON_VALUE;
657 }
cb475906
MD
658 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
659 !is_pow2(attr.stride)) {
660 errno = EINVAL;
661 return NULL;
662 }
663
0ba2a93e 664 pool = calloc(1, sizeof(struct rseq_mempool));
bc510b60
MD
665 if (!pool)
666 return NULL;
ef6695f1 667
b73b0c25 668 memcpy(&pool->attr, &attr, sizeof(attr));
ef6695f1 669 pthread_mutex_init(&pool->lock, NULL);
ef6695f1
MD
670 pool->item_len = item_len;
671 pool->item_order = order;
b73b0c25 672
9d986353
MD
673 pool->range_list = rseq_mempool_range_create(pool);
674 if (!pool->range_list)
b73b0c25 675 goto error_alloc;
0fdf7a4c 676
ca452fee
MD
677 if (pool_name) {
678 pool->name = strdup(pool_name);
679 if (!pool->name)
680 goto error_alloc;
681 }
ef6695f1 682 return pool;
ef6695f1 683
9649c7ee 684error_alloc:
0ba2a93e 685 rseq_mempool_destroy(pool);
9649c7ee
MD
686 errno = ENOMEM;
687 return NULL;
ef6695f1
MD
688}
689
e7cbbc10
MD
690/* Always inline for __builtin_return_address(0). */
691static inline __attribute__((always_inline))
9d986353 692void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 693{
9d986353 694 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 695 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
696 unsigned long mask;
697 size_t k;
698
9649c7ee 699 if (!bitmap)
0fdf7a4c 700 return;
0fdf7a4c 701
9649c7ee 702 k = item_index / BIT_PER_ULONG;
0fdf7a4c
OD
703 mask = 1ULL << (item_index % BIT_PER_ULONG);
704
9649c7ee
MD
705 /* Print error if bit is already set. */
706 if (bitmap[k] & mask) {
ca452fee
MD
707 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
708 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
9649c7ee
MD
709 abort();
710 }
0fdf7a4c
OD
711 bitmap[k] |= mask;
712}
713
ef6695f1 714static
0ba2a93e 715void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
ef6695f1 716{
9d986353 717 struct rseq_mempool_range *range;
ef6695f1
MD
718 struct free_list_node *node;
719 uintptr_t item_offset;
d24ee051 720 void __rseq_percpu *addr;
ef6695f1
MD
721
722 pthread_mutex_lock(&pool->lock);
723 /* Get first entry from free list. */
724 node = pool->free_list_head;
725 if (node != NULL) {
9d986353
MD
726 uintptr_t ptr = (uintptr_t) node;
727 void *range_base = (void *) (ptr & (~(pool->attr.stride - 1)));
728
729 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
ef6695f1
MD
730 /* Remove node from free list (update head). */
731 pool->free_list_head = node->next;
9d986353 732 item_offset = (uintptr_t) ((void *) node - range_base);
86617384 733 rseq_percpu_check_poison_item(pool, range, item_offset);
9d986353 734 addr = (void __rseq_percpu *) node;
ef6695f1
MD
735 goto end;
736 }
9d986353
MD
737 /*
738 * If the most recent range (first in list) does not have any
739 * room left, create a new range and prepend it to the list
740 * head.
741 */
742 range = pool->range_list;
743 if (range->next_unused + pool->item_len > pool->attr.stride) {
744 range = rseq_mempool_range_create(pool);
745 if (!range) {
746 errno = ENOMEM;
747 addr = NULL;
748 goto end;
749 }
750 /* Add range to head of list. */
751 range->next = pool->range_list;
752 pool->range_list = range;
ef6695f1 753 }
9d986353
MD
754 /* First range in list has room left. */
755 item_offset = range->next_unused;
756 addr = (void __rseq_percpu *) (range->base + item_offset);
757 range->next_unused += pool->item_len;
ef6695f1 758end:
8f28507f 759 if (addr)
9d986353 760 set_alloc_slot(pool, range, item_offset);
ef6695f1
MD
761 pthread_mutex_unlock(&pool->lock);
762 if (zeroed && addr)
9d986353 763 rseq_percpu_zero_item(pool, range, item_offset);
ef6695f1
MD
764 return addr;
765}
766
15da5c27 767void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
ef6695f1
MD
768{
769 return __rseq_percpu_malloc(pool, false);
770}
771
15da5c27 772void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
ef6695f1
MD
773{
774 return __rseq_percpu_malloc(pool, true);
775}
776
e7cbbc10
MD
777/* Always inline for __builtin_return_address(0). */
778static inline __attribute__((always_inline))
9d986353 779void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 780{
9d986353 781 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 782 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
783 unsigned long mask;
784 size_t k;
785
9649c7ee 786 if (!bitmap)
0fdf7a4c 787 return;
0fdf7a4c 788
9649c7ee
MD
789 k = item_index / BIT_PER_ULONG;
790 mask = 1ULL << (item_index % BIT_PER_ULONG);
0fdf7a4c 791
9649c7ee
MD
792 /* Print error if bit is not set. */
793 if (!(bitmap[k] & mask)) {
ca452fee
MD
794 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
795 __func__, get_pool_name(pool), pool, item_offset,
796 (void *) __builtin_return_address(0));
9649c7ee
MD
797 abort();
798 }
0fdf7a4c
OD
799 bitmap[k] &= ~mask;
800}
801
cb475906 802void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
ef6695f1
MD
803{
804 uintptr_t ptr = (uintptr_t) _ptr;
cb475906 805 void *range_base = (void *) (ptr & (~(stride - 1)));
0ba2a93e
MD
806 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
807 struct rseq_mempool *pool = range->pool;
cb475906 808 uintptr_t item_offset = ptr & (stride - 1);
ef6695f1
MD
809 struct free_list_node *head, *item;
810
811 pthread_mutex_lock(&pool->lock);
9d986353 812 clear_alloc_slot(pool, range, item_offset);
ef6695f1
MD
813 /* Add ptr to head of free list */
814 head = pool->free_list_head;
455e090e
MD
815 if (pool->attr.poison_set)
816 rseq_percpu_poison_item(pool, range, item_offset);
8ab16a24 817 /* Free-list is in CPU 0 range. */
4aa3220c 818 item = (struct free_list_node *) ptr;
455e090e
MD
819 /*
820 * Setting the next pointer will overwrite the first uintptr_t
821 * poison for CPU 0.
822 */
ef6695f1
MD
823 item->next = head;
824 pool->free_list_head = item;
825 pthread_mutex_unlock(&pool->lock);
826}
827
0ba2a93e 828struct rseq_mempool_set *rseq_mempool_set_create(void)
ef6695f1 829{
0ba2a93e 830 struct rseq_mempool_set *pool_set;
ef6695f1 831
0ba2a93e 832 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
ef6695f1
MD
833 if (!pool_set)
834 return NULL;
835 pthread_mutex_init(&pool_set->lock, NULL);
836 return pool_set;
837}
838
0ba2a93e 839int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
ef6695f1
MD
840{
841 int order, ret;
842
843 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
0ba2a93e 844 struct rseq_mempool *pool = pool_set->entries[order];
ef6695f1
MD
845
846 if (!pool)
847 continue;
0ba2a93e 848 ret = rseq_mempool_destroy(pool);
ef6695f1
MD
849 if (ret)
850 return ret;
851 pool_set->entries[order] = NULL;
852 }
853 pthread_mutex_destroy(&pool_set->lock);
854 free(pool_set);
855 return 0;
856}
857
858/* Ownership of pool is handed over to pool set on success. */
0ba2a93e 859int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
ef6695f1
MD
860{
861 size_t item_order = pool->item_order;
862 int ret = 0;
863
864 pthread_mutex_lock(&pool_set->lock);
865 if (pool_set->entries[item_order]) {
866 errno = EBUSY;
867 ret = -1;
868 goto end;
869 }
870 pool_set->entries[pool->item_order] = pool;
871end:
872 pthread_mutex_unlock(&pool_set->lock);
873 return ret;
874}
875
876static
0ba2a93e 877void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
ef6695f1
MD
878{
879 int order, min_order = POOL_SET_MIN_ENTRY;
0ba2a93e 880 struct rseq_mempool *pool;
d24ee051 881 void __rseq_percpu *addr;
ef6695f1 882
d06f5cf5
MD
883 order = rseq_get_count_order_ulong(len);
884 if (order > POOL_SET_MIN_ENTRY)
885 min_order = order;
ef6695f1
MD
886again:
887 pthread_mutex_lock(&pool_set->lock);
888 /* First smallest present pool where @len fits. */
889 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
890 pool = pool_set->entries[order];
891
892 if (!pool)
893 continue;
894 if (pool->item_len >= len)
895 goto found;
896 }
897 pool = NULL;
898found:
899 pthread_mutex_unlock(&pool_set->lock);
900 if (pool) {
901 addr = __rseq_percpu_malloc(pool, zeroed);
902 if (addr == NULL && errno == ENOMEM) {
903 /*
904 * If the allocation failed, try again with a
905 * larger pool.
906 */
907 min_order = order + 1;
908 goto again;
909 }
910 } else {
911 /* Not found. */
912 errno = ENOMEM;
913 addr = NULL;
914 }
915 return addr;
916}
917
15da5c27 918void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 919{
0ba2a93e 920 return __rseq_mempool_set_malloc(pool_set, len, false);
ef6695f1
MD
921}
922
15da5c27 923void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 924{
0ba2a93e 925 return __rseq_mempool_set_malloc(pool_set, len, true);
ef6695f1 926}
9bd07c29 927
0ba2a93e 928struct rseq_mempool_attr *rseq_mempool_attr_create(void)
a82006d0 929{
0ba2a93e 930 return calloc(1, sizeof(struct rseq_mempool_attr));
a82006d0
MD
931}
932
0ba2a93e 933void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
a82006d0
MD
934{
935 free(attr);
936}
937
0ba2a93e 938int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
a82006d0 939 void *(*mmap_func)(void *priv, size_t len),
9bd07c29
MD
940 int (*munmap_func)(void *priv, void *ptr, size_t len),
941 void *mmap_priv)
942{
8118247e
MD
943 if (!attr) {
944 errno = EINVAL;
945 return -1;
946 }
a82006d0 947 attr->mmap_set = true;
9bd07c29
MD
948 attr->mmap_func = mmap_func;
949 attr->munmap_func = munmap_func;
950 attr->mmap_priv = mmap_priv;
8118247e 951 return 0;
9bd07c29 952}
d6acc8aa 953
135811f2 954int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
6e329183 955 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
135811f2
MD
956 void *init_priv)
957{
958 if (!attr) {
959 errno = EINVAL;
960 return -1;
961 }
962 attr->init_set = true;
963 attr->init_func = init_func;
964 attr->init_priv = init_priv;
965 return 0;
966}
967
0ba2a93e 968int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
d6acc8aa
MD
969{
970 if (!attr) {
971 errno = EINVAL;
972 return -1;
973 }
974 attr->robust_set = true;
975 return 0;
976}
cb475906
MD
977
978int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
979 size_t stride, int max_nr_cpus)
980{
981 if (!attr) {
982 errno = EINVAL;
983 return -1;
984 }
985 attr->type = MEMPOOL_TYPE_PERCPU;
986 attr->stride = stride;
987 attr->max_nr_cpus = max_nr_cpus;
988 return 0;
989}
990
991int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
992 size_t stride)
993{
994 if (!attr) {
995 errno = EINVAL;
996 return -1;
997 }
998 attr->type = MEMPOOL_TYPE_GLOBAL;
999 attr->stride = stride;
89b7e681 1000 attr->max_nr_cpus = 0;
cb475906
MD
1001 return 0;
1002}
6037d364 1003
e11a02d7
MD
1004int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
1005 unsigned long max_nr_ranges)
1006{
1007 if (!attr) {
1008 errno = EINVAL;
1009 return -1;
1010 }
1011 attr->max_nr_ranges = max_nr_ranges;
1012 return 0;
1013}
1014
455e090e
MD
1015int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
1016 uintptr_t poison)
1017{
1018 if (!attr) {
1019 errno = EINVAL;
1020 return -1;
1021 }
1022 attr->poison_set = true;
1023 attr->poison = poison;
1024 return 0;
1025}
1026
6037d364
MD
1027int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
1028{
1029 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
1030 errno = EINVAL;
1031 return -1;
1032 }
1033 return mempool->attr.max_nr_cpus;
1034}
This page took 0.074588 seconds and 4 git commands to generate.