mempool: Introduce init_func attribute
[librseq.git] / src / rseq-mempool.c
CommitLineData
ef6695f1
MD
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
34337fec 4#include <rseq/mempool.h>
ef6695f1
MD
5#include <sys/mman.h>
6#include <assert.h>
7#include <string.h>
8#include <pthread.h>
9#include <unistd.h>
10#include <stdlib.h>
11#include <rseq/compiler.h>
12#include <errno.h>
13#include <stdint.h>
14#include <stdbool.h>
367e559c
MD
15#include <stdio.h>
16
17#ifdef HAVE_LIBNUMA
18# include <numa.h>
19# include <numaif.h>
20#endif
ef6695f1 21
34337fec 22#include "rseq-utils.h"
cb475906 23#include "smp.h"
19be9217 24
ef6695f1 25/*
b73b0c25 26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
ef6695f1 27 *
8ab16a24
MD
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
8aa1462d
MD
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
8ab16a24
MD
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
ef6695f1
MD
36 */
37
3236da62 38#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
ef6695f1 39
72b100a1
MD
40/*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
ef6695f1
MD
43#if RSEQ_BITS_PER_LONG == 64
44# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45#else
46# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47#endif
48
bb1552e2
MD
49/*
50 * Skip pool index 0 to ensure allocated entries at index 0 do not match
51 * a NULL pointer.
52 */
53#define FIRST_POOL 1
54
0fdf7a4c
OD
55#define BIT_PER_ULONG (8 * sizeof(unsigned long))
56
57d8b586
OD
57#define MOVE_PAGES_BATCH_SIZE 4096
58
0ba2a93e 59#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
4aa3220c 60
ef6695f1
MD
61struct free_list_node;
62
63struct free_list_node {
64 struct free_list_node *next;
65};
66
cb475906 67enum mempool_type {
89b7e681
MD
68 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
69 MEMPOOL_TYPE_PERCPU = 1,
cb475906
MD
70};
71
0ba2a93e 72struct rseq_mempool_attr {
a82006d0 73 bool mmap_set;
9bd07c29
MD
74 void *(*mmap_func)(void *priv, size_t len);
75 int (*munmap_func)(void *priv, void *ptr, size_t len);
76 void *mmap_priv;
d6acc8aa 77
135811f2
MD
78 bool init_set;
79 void (*init_func)(void *priv, void *addr, size_t len, int cpu);
80 void *init_priv;
81
d6acc8aa 82 bool robust_set;
cb475906
MD
83
84 enum mempool_type type;
85 size_t stride;
86 int max_nr_cpus;
9bd07c29
MD
87};
88
0ba2a93e 89struct rseq_mempool_range;
b73b0c25 90
0ba2a93e
MD
91struct rseq_mempool_range {
92 struct rseq_mempool_range *next;
93 struct rseq_mempool *pool; /* Backward ref. to container pool. */
4aa3220c 94 void *header;
ef6695f1 95 void *base;
b73b0c25
MD
96 size_t next_unused;
97 /* Track alloc/free. */
98 unsigned long *alloc_bitmap;
99};
100
0ba2a93e 101struct rseq_mempool {
b73b0c25 102 /* Linked-list of ranges. */
0ba2a93e 103 struct rseq_mempool_range *ranges;
b73b0c25 104
ef6695f1 105 size_t item_len;
ef6695f1 106 int item_order;
ef6695f1
MD
107
108 /*
8ab16a24 109 * The free list chains freed items on the CPU 0 address range.
ef6695f1 110 * We should rethink this decision if false sharing between
8ab16a24 111 * malloc/free from other CPUs and data accesses from CPU 0
ef6695f1
MD
112 * becomes an issue. This is a NULL-terminated singly-linked
113 * list.
114 */
115 struct free_list_node *free_list_head;
b73b0c25 116
ef6695f1
MD
117 /* This lock protects allocation/free within the pool. */
118 pthread_mutex_t lock;
9bd07c29 119
0ba2a93e 120 struct rseq_mempool_attr attr;
ca452fee 121 char *name;
ef6695f1
MD
122};
123
ef6695f1
MD
124/*
125 * Pool set entries are indexed by item_len rounded to the next power of
126 * 2. A pool set can contain NULL pool entries, in which case the next
127 * large enough entry will be used for allocation.
128 */
0ba2a93e 129struct rseq_mempool_set {
ef6695f1
MD
130 /* This lock protects add vs malloc/zmalloc within the pool set. */
131 pthread_mutex_t lock;
0ba2a93e 132 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
ef6695f1
MD
133};
134
367e559c 135static
15b63c9f 136void *__rseq_pool_range_percpu_ptr(struct rseq_mempool_range *range, int cpu,
f2981623 137 uintptr_t item_offset, size_t stride)
367e559c 138{
15b63c9f 139 return range->base + (stride * cpu) + item_offset;
367e559c
MD
140}
141
367e559c 142static
15b63c9f
MD
143void rseq_percpu_zero_item(struct rseq_mempool *pool,
144 struct rseq_mempool_range *range, uintptr_t item_offset)
367e559c
MD
145{
146 int i;
147
cb475906 148 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
15b63c9f 149 char *p = __rseq_pool_range_percpu_ptr(range, i,
cb475906 150 item_offset, pool->attr.stride);
367e559c
MD
151 memset(p, 0, pool->item_len);
152 }
153}
154
15b63c9f 155#ifdef HAVE_LIBNUMA
b73b0c25 156static
0ba2a93e 157int rseq_mempool_range_init_numa(struct rseq_mempool *pool, struct rseq_mempool_range *range, int numa_flags)
367e559c 158{
f2981623
MD
159 unsigned long nr_pages, page_len;
160 long ret;
367e559c
MD
161 int cpu;
162
163 if (!numa_flags)
9bd07c29 164 return 0;
367e559c 165 page_len = rseq_get_page_len();
cb475906
MD
166 nr_pages = pool->attr.stride >> rseq_get_count_order_ulong(page_len);
167 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
367e559c 168
57d8b586
OD
169 int status[MOVE_PAGES_BATCH_SIZE];
170 int nodes[MOVE_PAGES_BATCH_SIZE];
171 void *pages[MOVE_PAGES_BATCH_SIZE];
172
173 nodes[0] = numa_node_of_cpu(cpu);
15b63c9f
MD
174 if (nodes[0] < 0)
175 continue;
57d8b586
OD
176 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
177 nodes[k] = nodes[0];
178 }
179
180 for (unsigned long page = 0; page < nr_pages;) {
181
182 size_t max_k = RSEQ_ARRAY_SIZE(pages);
183 size_t left = nr_pages - page;
367e559c 184
57d8b586
OD
185 if (left < max_k) {
186 max_k = left;
187 }
188
189 for (size_t k = 0; k < max_k; ++k, ++page) {
15b63c9f
MD
190 pages[k] = __rseq_pool_range_percpu_ptr(range, cpu,
191 page * page_len, pool->attr.stride);
57d8b586
OD
192 status[k] = -EPERM;
193 }
194
195 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
196
197 if (ret < 0)
9bd07c29 198 return ret;
57d8b586
OD
199
200 if (ret > 0) {
201 fprintf(stderr, "%lu pages were not migrated\n", ret);
202 for (size_t k = 0; k < max_k; ++k) {
203 if (status[k] < 0)
204 fprintf(stderr,
205 "Error while moving page %p to numa node %d: %u\n",
206 pages[k], nodes[k], -status[k]);
207 }
208 }
367e559c
MD
209 }
210 }
9bd07c29 211 return 0;
367e559c 212}
b73b0c25 213
0ba2a93e 214int rseq_mempool_init_numa(struct rseq_mempool *pool, int numa_flags)
b73b0c25 215{
0ba2a93e 216 struct rseq_mempool_range *range;
b73b0c25
MD
217 int ret;
218
219 if (!numa_flags)
220 return 0;
221 for (range = pool->ranges; range; range = range->next) {
0ba2a93e 222 ret = rseq_mempool_range_init_numa(pool, range, numa_flags);
b73b0c25
MD
223 if (ret)
224 return ret;
225 }
226 return 0;
227}
367e559c 228#else
0ba2a93e 229int rseq_mempool_init_numa(struct rseq_mempool *pool __attribute__((unused)),
367e559c
MD
230 int numa_flags __attribute__((unused)))
231{
9bd07c29 232 return 0;
367e559c
MD
233}
234#endif
235
9bd07c29
MD
236static
237void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
238{
239 void *base;
240
241 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
242 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
243 if (base == MAP_FAILED)
244 return NULL;
245 return base;
246}
247
248static
249int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
250{
251 return munmap(ptr, len);
252}
253
0fdf7a4c 254static
0ba2a93e 255int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c
OD
256{
257 size_t count;
258
cb475906 259 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
260
261 /*
9649c7ee
MD
262 * Not being able to create the validation bitmap is an error
263 * that needs to be reported.
0fdf7a4c 264 */
b73b0c25
MD
265 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
266 if (!range->alloc_bitmap)
9649c7ee
MD
267 return -1;
268 return 0;
0fdf7a4c
OD
269}
270
ca452fee 271static
0ba2a93e 272const char *get_pool_name(const struct rseq_mempool *pool)
ca452fee
MD
273{
274 return pool->name ? : "<anonymous>";
275}
276
b73b0c25 277static
0ba2a93e 278bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
b73b0c25 279{
0ba2a93e 280 struct rseq_mempool_range *range;
b73b0c25
MD
281
282 for (range = pool->ranges; range; range = range->next) {
283 if (addr >= range->base && addr < range->base + range->next_unused)
284 return true;
285 }
286 return false;
287}
288
a9ec6111
OD
289/* Always inline for __builtin_return_address(0). */
290static inline __attribute__((always_inline))
0ba2a93e 291void check_free_list(const struct rseq_mempool *pool)
a9ec6111 292{
b73b0c25
MD
293 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
294 max_list_traversal = 0, traversal_iteration = 0;
0ba2a93e 295 struct rseq_mempool_range *range;
b73b0c25
MD
296
297 if (!pool->attr.robust_set)
298 return;
299
300 for (range = pool->ranges; range; range = range->next) {
cb475906
MD
301 total_item += pool->attr.stride >> pool->item_order;
302 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
b73b0c25
MD
303 }
304 max_list_traversal = total_item - total_never_allocated;
a9ec6111
OD
305
306 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
307 node;
308 prev = node,
309 node = node->next) {
310
311 void *node_addr = node;
312
313 if (traversal_iteration >= max_list_traversal) {
ca452fee
MD
314 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
315 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
316 abort();
317 }
318
319 /* Node is out of range. */
b73b0c25 320 if (!addr_in_pool(pool, node_addr)) {
a9ec6111 321 if (prev)
ca452fee
MD
322 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
323 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111 324 else
ca452fee
MD
325 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
326 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
327 abort();
328 }
329
b73b0c25
MD
330 traversal_iteration++;
331 total_freed++;
a9ec6111
OD
332 }
333
334 if (total_never_allocated + total_freed != total_item) {
ca452fee
MD
335 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
336 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
a9ec6111
OD
337 abort();
338 }
a9ec6111
OD
339}
340
e7cbbc10
MD
341/* Always inline for __builtin_return_address(0). */
342static inline __attribute__((always_inline))
0ba2a93e 343void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c 344{
b73b0c25 345 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 346 size_t count, total_leaks = 0;
0fdf7a4c 347
9649c7ee 348 if (!bitmap)
0fdf7a4c 349 return;
0fdf7a4c 350
cb475906 351 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
352
353 /* Assert that all items in the pool were freed. */
9649c7ee
MD
354 for (size_t k = 0; k < count; ++k)
355 total_leaks += rseq_hweight_ulong(bitmap[k]);
356 if (total_leaks) {
ca452fee
MD
357 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
358 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
9649c7ee 359 abort();
0fdf7a4c
OD
360 }
361
362 free(bitmap);
363}
364
b73b0c25
MD
365/* Always inline for __builtin_return_address(0). */
366static inline __attribute__((always_inline))
0ba2a93e
MD
367int rseq_mempool_range_destroy(struct rseq_mempool *pool,
368 struct rseq_mempool_range *range)
b73b0c25
MD
369{
370 destroy_alloc_bitmap(pool, range);
5c99f3d6 371 /* range is a header located one page before the aligned mapping. */
4aa3220c 372 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
cb475906 373 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
5c99f3d6
MD
374}
375
376/*
377 * Allocate a memory mapping aligned on @alignment, with an optional
378 * @pre_header before the mapping.
379 */
380static
0ba2a93e 381void *aligned_mmap_anonymous(struct rseq_mempool *pool,
5c99f3d6
MD
382 size_t page_size, size_t len, size_t alignment,
383 void **pre_header, size_t pre_header_len)
384{
385 size_t minimum_page_count, page_count, extra, total_allocate = 0;
386 int page_order;
387 void *ptr;
388
389 if (len < page_size || alignment < page_size ||
390 !is_pow2(len) || !is_pow2(alignment)) {
391 errno = EINVAL;
392 return NULL;
393 }
394 page_order = rseq_get_count_order_ulong(page_size);
395 if (page_order < 0) {
396 errno = EINVAL;
397 return NULL;
398 }
399 if (pre_header_len && (pre_header_len & (page_size - 1))) {
400 errno = EINVAL;
401 return NULL;
402 }
403
404 minimum_page_count = (pre_header_len + len) >> page_order;
405 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
406
407 assert(page_count >= minimum_page_count);
408
409 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
410 if (!ptr)
411 goto alloc_error;
412
413 total_allocate = page_count << page_order;
414
415 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
416 /* Pointer is already aligned. ptr points to pre_header. */
417 goto out;
418 }
419
420 /* Unmap extra before. */
421 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
422 assert(!(extra & (page_size - 1)));
423 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
424 perror("munmap");
425 abort();
426 }
427 total_allocate -= extra;
428 ptr += extra; /* ptr points to pre_header */
429 page_count -= extra >> page_order;
430out:
431 assert(page_count >= minimum_page_count);
432
433 if (page_count > minimum_page_count) {
434 void *extra_ptr;
435
436 /* Unmap extra after. */
437 extra_ptr = ptr + (minimum_page_count << page_order);
438 extra = (page_count - minimum_page_count) << page_order;
439 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
440 perror("munmap");
441 abort();
442 }
443 total_allocate -= extra;
444 }
445
446 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
447 assert(total_allocate == len + pre_header_len);
448
449alloc_error:
450 if (ptr) {
451 if (pre_header)
452 *pre_header = ptr;
453 ptr += pre_header_len;
454 }
455 return ptr;
b73b0c25
MD
456}
457
458static
0ba2a93e 459struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
b73b0c25 460{
0ba2a93e 461 struct rseq_mempool_range *range;
5c99f3d6 462 unsigned long page_size;
4aa3220c 463 void *header;
b73b0c25
MD
464 void *base;
465
5c99f3d6 466 page_size = rseq_get_page_len();
b73b0c25 467
5c99f3d6 468 base = aligned_mmap_anonymous(pool, page_size,
cb475906
MD
469 pool->attr.stride * pool->attr.max_nr_cpus,
470 pool->attr.stride,
4aa3220c 471 &header, page_size);
b73b0c25 472 if (!base)
5c99f3d6 473 return NULL;
0ba2a93e 474 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
5c99f3d6 475 range->pool = pool;
b73b0c25 476 range->base = base;
4aa3220c 477 range->header = header;
b73b0c25
MD
478 if (pool->attr.robust_set) {
479 if (create_alloc_bitmap(pool, range))
480 goto error_alloc;
481 }
135811f2
MD
482 if (pool->attr.init_set) {
483 int cpu;
484
485 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
486 pool->attr.init_func(pool->attr.init_priv,
487 base + (pool->attr.stride * cpu),
488 pool->attr.stride, cpu);
489 }
490 }
b73b0c25
MD
491 return range;
492
493error_alloc:
0ba2a93e 494 (void) rseq_mempool_range_destroy(pool, range);
b73b0c25
MD
495 return NULL;
496}
497
0ba2a93e 498int rseq_mempool_destroy(struct rseq_mempool *pool)
9649c7ee 499{
0ba2a93e 500 struct rseq_mempool_range *range, *next_range;
b73b0c25 501 int ret = 0;
9649c7ee 502
f510ddc5
MD
503 if (!pool)
504 return 0;
b73b0c25
MD
505 check_free_list(pool);
506 /* Iteration safe against removal. */
507 for (range = pool->ranges; range && (next_range = range->next, 1); range = next_range) {
0ba2a93e 508 if (rseq_mempool_range_destroy(pool, range))
b73b0c25
MD
509 goto end;
510 /* Update list head to keep list coherent in case of partial failure. */
511 pool->ranges = next_range;
512 }
9649c7ee 513 pthread_mutex_destroy(&pool->lock);
ca452fee 514 free(pool->name);
9649c7ee
MD
515 memset(pool, 0, sizeof(*pool));
516end:
b73b0c25 517 return ret;
9649c7ee
MD
518}
519
0ba2a93e 520struct rseq_mempool *rseq_mempool_create(const char *pool_name,
cb475906 521 size_t item_len, const struct rseq_mempool_attr *_attr)
ef6695f1 522{
0ba2a93e
MD
523 struct rseq_mempool *pool;
524 struct rseq_mempool_attr attr = {};
ef6695f1 525 int order;
ef6695f1
MD
526
527 /* Make sure each item is large enough to contain free list pointers. */
528 if (item_len < sizeof(void *))
529 item_len = sizeof(void *);
530
531 /* Align item_len on next power of two. */
19be9217 532 order = rseq_get_count_order_ulong(item_len);
ef6695f1
MD
533 if (order < 0) {
534 errno = EINVAL;
535 return NULL;
536 }
537 item_len = 1UL << order;
538
a82006d0
MD
539 if (_attr)
540 memcpy(&attr, _attr, sizeof(attr));
541 if (!attr.mmap_set) {
542 attr.mmap_func = default_mmap_func;
543 attr.munmap_func = default_munmap_func;
544 attr.mmap_priv = NULL;
9bd07c29 545 }
a82006d0 546
cb475906
MD
547 switch (attr.type) {
548 case MEMPOOL_TYPE_PERCPU:
549 if (attr.max_nr_cpus < 0) {
550 errno = EINVAL;
551 return NULL;
552 }
553 if (attr.max_nr_cpus == 0) {
554 /* Auto-detect */
555 attr.max_nr_cpus = get_possible_cpus_array_len();
556 if (attr.max_nr_cpus == 0) {
557 errno = EINVAL;
558 return NULL;
559 }
560 }
561 break;
562 case MEMPOOL_TYPE_GLOBAL:
89b7e681
MD
563 /* Use a 1-cpu pool for global mempool type. */
564 attr.max_nr_cpus = 1;
cb475906
MD
565 break;
566 }
567 if (!attr.stride)
568 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
569 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
570 !is_pow2(attr.stride)) {
571 errno = EINVAL;
572 return NULL;
573 }
574
0ba2a93e 575 pool = calloc(1, sizeof(struct rseq_mempool));
bc510b60
MD
576 if (!pool)
577 return NULL;
ef6695f1 578
b73b0c25 579 memcpy(&pool->attr, &attr, sizeof(attr));
ef6695f1 580 pthread_mutex_init(&pool->lock, NULL);
ef6695f1
MD
581 pool->item_len = item_len;
582 pool->item_order = order;
b73b0c25
MD
583
584 //TODO: implement multi-range support.
0ba2a93e 585 pool->ranges = rseq_mempool_range_create(pool);
b73b0c25
MD
586 if (!pool->ranges)
587 goto error_alloc;
0fdf7a4c 588
ca452fee
MD
589 if (pool_name) {
590 pool->name = strdup(pool_name);
591 if (!pool->name)
592 goto error_alloc;
593 }
ef6695f1 594 return pool;
ef6695f1 595
9649c7ee 596error_alloc:
0ba2a93e 597 rseq_mempool_destroy(pool);
9649c7ee
MD
598 errno = ENOMEM;
599 return NULL;
ef6695f1
MD
600}
601
e7cbbc10
MD
602/* Always inline for __builtin_return_address(0). */
603static inline __attribute__((always_inline))
0ba2a93e 604void set_alloc_slot(struct rseq_mempool *pool, size_t item_offset)
0fdf7a4c 605{
b73b0c25 606 unsigned long *bitmap = pool->ranges->alloc_bitmap;
9649c7ee 607 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
608 unsigned long mask;
609 size_t k;
610
9649c7ee 611 if (!bitmap)
0fdf7a4c 612 return;
0fdf7a4c 613
9649c7ee 614 k = item_index / BIT_PER_ULONG;
0fdf7a4c
OD
615 mask = 1ULL << (item_index % BIT_PER_ULONG);
616
9649c7ee
MD
617 /* Print error if bit is already set. */
618 if (bitmap[k] & mask) {
ca452fee
MD
619 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
620 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
9649c7ee
MD
621 abort();
622 }
0fdf7a4c
OD
623 bitmap[k] |= mask;
624}
625
ef6695f1 626static
0ba2a93e 627void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
ef6695f1
MD
628{
629 struct free_list_node *node;
630 uintptr_t item_offset;
d24ee051 631 void __rseq_percpu *addr;
ef6695f1
MD
632
633 pthread_mutex_lock(&pool->lock);
634 /* Get first entry from free list. */
635 node = pool->free_list_head;
636 if (node != NULL) {
637 /* Remove node from free list (update head). */
638 pool->free_list_head = node->next;
b73b0c25 639 item_offset = (uintptr_t) ((void *) node - pool->ranges->base);
4aa3220c 640 addr = (void __rseq_percpu *) (pool->ranges->base + item_offset);
ef6695f1
MD
641 goto end;
642 }
cb475906 643 if (pool->ranges->next_unused + pool->item_len > pool->attr.stride) {
ea1a3ada 644 errno = ENOMEM;
ef6695f1
MD
645 addr = NULL;
646 goto end;
647 }
b73b0c25 648 item_offset = pool->ranges->next_unused;
4aa3220c 649 addr = (void __rseq_percpu *) (pool->ranges->base + item_offset);
b73b0c25 650 pool->ranges->next_unused += pool->item_len;
ef6695f1 651end:
8f28507f
OD
652 if (addr)
653 set_alloc_slot(pool, item_offset);
ef6695f1
MD
654 pthread_mutex_unlock(&pool->lock);
655 if (zeroed && addr)
15b63c9f 656 rseq_percpu_zero_item(pool, pool->ranges, item_offset);
ef6695f1
MD
657 return addr;
658}
659
15da5c27 660void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
ef6695f1
MD
661{
662 return __rseq_percpu_malloc(pool, false);
663}
664
15da5c27 665void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
ef6695f1
MD
666{
667 return __rseq_percpu_malloc(pool, true);
668}
669
e7cbbc10
MD
670/* Always inline for __builtin_return_address(0). */
671static inline __attribute__((always_inline))
0ba2a93e 672void clear_alloc_slot(struct rseq_mempool *pool, size_t item_offset)
0fdf7a4c 673{
b73b0c25 674 unsigned long *bitmap = pool->ranges->alloc_bitmap;
9649c7ee 675 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
676 unsigned long mask;
677 size_t k;
678
9649c7ee 679 if (!bitmap)
0fdf7a4c 680 return;
0fdf7a4c 681
9649c7ee
MD
682 k = item_index / BIT_PER_ULONG;
683 mask = 1ULL << (item_index % BIT_PER_ULONG);
0fdf7a4c 684
9649c7ee
MD
685 /* Print error if bit is not set. */
686 if (!(bitmap[k] & mask)) {
ca452fee
MD
687 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
688 __func__, get_pool_name(pool), pool, item_offset,
689 (void *) __builtin_return_address(0));
9649c7ee
MD
690 abort();
691 }
0fdf7a4c
OD
692 bitmap[k] &= ~mask;
693}
694
cb475906 695void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
ef6695f1
MD
696{
697 uintptr_t ptr = (uintptr_t) _ptr;
cb475906 698 void *range_base = (void *) (ptr & (~(stride - 1)));
0ba2a93e
MD
699 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
700 struct rseq_mempool *pool = range->pool;
cb475906 701 uintptr_t item_offset = ptr & (stride - 1);
ef6695f1
MD
702 struct free_list_node *head, *item;
703
704 pthread_mutex_lock(&pool->lock);
9649c7ee 705 clear_alloc_slot(pool, item_offset);
ef6695f1
MD
706 /* Add ptr to head of free list */
707 head = pool->free_list_head;
8ab16a24 708 /* Free-list is in CPU 0 range. */
4aa3220c 709 item = (struct free_list_node *) ptr;
ef6695f1
MD
710 item->next = head;
711 pool->free_list_head = item;
712 pthread_mutex_unlock(&pool->lock);
713}
714
0ba2a93e 715struct rseq_mempool_set *rseq_mempool_set_create(void)
ef6695f1 716{
0ba2a93e 717 struct rseq_mempool_set *pool_set;
ef6695f1 718
0ba2a93e 719 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
ef6695f1
MD
720 if (!pool_set)
721 return NULL;
722 pthread_mutex_init(&pool_set->lock, NULL);
723 return pool_set;
724}
725
0ba2a93e 726int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
ef6695f1
MD
727{
728 int order, ret;
729
730 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
0ba2a93e 731 struct rseq_mempool *pool = pool_set->entries[order];
ef6695f1
MD
732
733 if (!pool)
734 continue;
0ba2a93e 735 ret = rseq_mempool_destroy(pool);
ef6695f1
MD
736 if (ret)
737 return ret;
738 pool_set->entries[order] = NULL;
739 }
740 pthread_mutex_destroy(&pool_set->lock);
741 free(pool_set);
742 return 0;
743}
744
745/* Ownership of pool is handed over to pool set on success. */
0ba2a93e 746int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
ef6695f1
MD
747{
748 size_t item_order = pool->item_order;
749 int ret = 0;
750
751 pthread_mutex_lock(&pool_set->lock);
752 if (pool_set->entries[item_order]) {
753 errno = EBUSY;
754 ret = -1;
755 goto end;
756 }
757 pool_set->entries[pool->item_order] = pool;
758end:
759 pthread_mutex_unlock(&pool_set->lock);
760 return ret;
761}
762
763static
0ba2a93e 764void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
ef6695f1
MD
765{
766 int order, min_order = POOL_SET_MIN_ENTRY;
0ba2a93e 767 struct rseq_mempool *pool;
d24ee051 768 void __rseq_percpu *addr;
ef6695f1 769
d06f5cf5
MD
770 order = rseq_get_count_order_ulong(len);
771 if (order > POOL_SET_MIN_ENTRY)
772 min_order = order;
ef6695f1
MD
773again:
774 pthread_mutex_lock(&pool_set->lock);
775 /* First smallest present pool where @len fits. */
776 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
777 pool = pool_set->entries[order];
778
779 if (!pool)
780 continue;
781 if (pool->item_len >= len)
782 goto found;
783 }
784 pool = NULL;
785found:
786 pthread_mutex_unlock(&pool_set->lock);
787 if (pool) {
788 addr = __rseq_percpu_malloc(pool, zeroed);
789 if (addr == NULL && errno == ENOMEM) {
790 /*
791 * If the allocation failed, try again with a
792 * larger pool.
793 */
794 min_order = order + 1;
795 goto again;
796 }
797 } else {
798 /* Not found. */
799 errno = ENOMEM;
800 addr = NULL;
801 }
802 return addr;
803}
804
15da5c27 805void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 806{
0ba2a93e 807 return __rseq_mempool_set_malloc(pool_set, len, false);
ef6695f1
MD
808}
809
15da5c27 810void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 811{
0ba2a93e 812 return __rseq_mempool_set_malloc(pool_set, len, true);
ef6695f1 813}
9bd07c29 814
0ba2a93e 815struct rseq_mempool_attr *rseq_mempool_attr_create(void)
a82006d0 816{
0ba2a93e 817 return calloc(1, sizeof(struct rseq_mempool_attr));
a82006d0
MD
818}
819
0ba2a93e 820void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
a82006d0
MD
821{
822 free(attr);
823}
824
0ba2a93e 825int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
a82006d0 826 void *(*mmap_func)(void *priv, size_t len),
9bd07c29
MD
827 int (*munmap_func)(void *priv, void *ptr, size_t len),
828 void *mmap_priv)
829{
8118247e
MD
830 if (!attr) {
831 errno = EINVAL;
832 return -1;
833 }
a82006d0 834 attr->mmap_set = true;
9bd07c29
MD
835 attr->mmap_func = mmap_func;
836 attr->munmap_func = munmap_func;
837 attr->mmap_priv = mmap_priv;
8118247e 838 return 0;
9bd07c29 839}
d6acc8aa 840
135811f2
MD
841int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
842 void (*init_func)(void *priv, void *addr, size_t len, int cpu),
843 void *init_priv)
844{
845 if (!attr) {
846 errno = EINVAL;
847 return -1;
848 }
849 attr->init_set = true;
850 attr->init_func = init_func;
851 attr->init_priv = init_priv;
852 return 0;
853}
854
0ba2a93e 855int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
d6acc8aa
MD
856{
857 if (!attr) {
858 errno = EINVAL;
859 return -1;
860 }
861 attr->robust_set = true;
862 return 0;
863}
cb475906
MD
864
865int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
866 size_t stride, int max_nr_cpus)
867{
868 if (!attr) {
869 errno = EINVAL;
870 return -1;
871 }
872 attr->type = MEMPOOL_TYPE_PERCPU;
873 attr->stride = stride;
874 attr->max_nr_cpus = max_nr_cpus;
875 return 0;
876}
877
878int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
879 size_t stride)
880{
881 if (!attr) {
882 errno = EINVAL;
883 return -1;
884 }
885 attr->type = MEMPOOL_TYPE_GLOBAL;
886 attr->stride = stride;
89b7e681 887 attr->max_nr_cpus = 0;
cb475906
MD
888 return 0;
889}
This page took 0.068339 seconds and 4 git commands to generate.