mempool: malloc: handle empty range list
[librseq.git] / src / rseq-mempool.c
CommitLineData
ef6695f1
MD
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
cabbbc8e 3// SPDX-FileCopyrightText: 2024 Olivier Dion <odion@efficios.com>
ef6695f1 4
34337fec 5#include <rseq/mempool.h>
ef6695f1
MD
6#include <sys/mman.h>
7#include <assert.h>
8#include <string.h>
9#include <pthread.h>
10#include <unistd.h>
11#include <stdlib.h>
12#include <rseq/compiler.h>
13#include <errno.h>
14#include <stdint.h>
15#include <stdbool.h>
367e559c 16#include <stdio.h>
a5694a4d 17#include <fcntl.h>
367e559c
MD
18
19#ifdef HAVE_LIBNUMA
20# include <numa.h>
21# include <numaif.h>
22#endif
ef6695f1 23
34337fec 24#include "rseq-utils.h"
252f9411 25#include "list.h"
47c725dd 26#include <rseq/rseq.h>
19be9217 27
ef6695f1 28/*
b73b0c25 29 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
ef6695f1 30 *
8ab16a24
MD
31 * The rseq per-CPU memory allocator allows the application the request
32 * memory pools of CPU-Local memory each of containing objects of a
8aa1462d
MD
33 * given size (rounded to next power of 2), reserving a given virtual
34 * address size per CPU, for a given maximum number of CPUs.
8ab16a24
MD
35 *
36 * The per-CPU memory allocator is analogous to TLS (Thread-Local
37 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
38 * memory allocator provides CPU-Local Storage.
ef6695f1
MD
39 */
40
3236da62 41#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
ef6695f1 42
1a426b47
MD
43#define POOL_HEADER_NR_PAGES 2
44
72b100a1
MD
45/*
46 * Smallest allocation should hold enough space for a free list pointer.
47 */
ef6695f1
MD
48#if RSEQ_BITS_PER_LONG == 64
49# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
50#else
51# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
52#endif
53
0fdf7a4c
OD
54#define BIT_PER_ULONG (8 * sizeof(unsigned long))
55
57d8b586
OD
56#define MOVE_PAGES_BATCH_SIZE 4096
57
0ba2a93e 58#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
4aa3220c 59
3975084e 60#if RSEQ_BITS_PER_LONG == 64
6ea98a7b 61# define DEFAULT_COW_INIT_POISON_VALUE 0x5555555555555555ULL
3975084e 62#else
6ea98a7b 63# define DEFAULT_COW_INIT_POISON_VALUE 0x55555555UL
3975084e
MD
64#endif
65
805d0043
MD
66/*
67 * Define the default COW_ZERO poison value as zero to prevent useless
68 * COW page allocation when writing poison values when freeing items.
69 */
6ea98a7b
MD
70#define DEFAULT_COW_ZERO_POISON_VALUE 0x0
71
ef6695f1
MD
72struct free_list_node;
73
74struct free_list_node {
75 struct free_list_node *next;
76};
77
cb475906 78enum mempool_type {
fffc02aa
MD
79 MEMPOOL_TYPE_PERCPU = 0, /* Default */
80 MEMPOOL_TYPE_GLOBAL = 1,
cb475906
MD
81};
82
0ba2a93e 83struct rseq_mempool_attr {
135811f2 84 bool init_set;
6e329183 85 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
135811f2
MD
86 void *init_priv;
87
d6acc8aa 88 bool robust_set;
cb475906
MD
89
90 enum mempool_type type;
91 size_t stride;
92 int max_nr_cpus;
e11a02d7
MD
93
94 unsigned long max_nr_ranges;
455e090e
MD
95
96 bool poison_set;
97 uintptr_t poison;
a5694a4d
MD
98
99 enum rseq_mempool_populate_policy populate_policy;
9bd07c29
MD
100};
101
0ba2a93e 102struct rseq_mempool_range;
b73b0c25 103
0ba2a93e 104struct rseq_mempool_range {
252f9411 105 struct list_head node; /* Linked list of ranges. */
9d986353 106 struct rseq_mempool *pool; /* Backward reference to container pool. */
a5694a4d
MD
107
108 /*
109 * Memory layout of a mempool range:
805d0043
MD
110 * - Canary header page (for detection of destroy-after-fork of
111 * COW_INIT pool),
1a426b47
MD
112 * - Header page (contains struct rseq_mempool_range at the
113 * very end),
c0de0012 114 * - Base of the per-cpu data, starting with CPU 0.
805d0043 115 * Aliases with free-list for non-robust COW_ZERO pool.
a5694a4d
MD
116 * - CPU 1,
117 * ...
118 * - CPU max_nr_cpus - 1
805d0043
MD
119 * - init values (only allocated for COW_INIT pool).
120 * Aliases with free-list for non-robust COW_INIT pool.
c0de0012
MD
121 * - free list (for robust pool).
122 *
123 * The free list aliases the CPU 0 memory area for non-robust
805d0043
MD
124 * COW_ZERO pools. It aliases with init values for non-robust
125 * COW_INIT pools. It is located immediately after the init
126 * values for robust pools.
a5694a4d 127 */
4aa3220c 128 void *header;
ef6695f1 129 void *base;
a5694a4d
MD
130 /*
131 * The init values contains malloc_init/zmalloc values.
805d0043 132 * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_COW_ZERO.
a5694a4d
MD
133 */
134 void *init;
b73b0c25 135 size_t next_unused;
fa6a0fb3
MD
136
137 /* Pool range mmap/munmap */
138 void *mmap_addr;
139 size_t mmap_len;
140
ffea0dea
MD
141 size_t allocated_items;
142
b73b0c25
MD
143 /* Track alloc/free. */
144 unsigned long *alloc_bitmap;
145};
146
0ba2a93e 147struct rseq_mempool {
252f9411 148 struct list_head range_list; /* Head of ranges linked-list. */
9d986353 149 unsigned long nr_ranges;
b73b0c25 150
ef6695f1 151 size_t item_len;
ef6695f1 152 int item_order;
ef6695f1
MD
153
154 /*
805d0043
MD
155 * COW_INIT non-robust pools:
156 * The free list chains freed items on the init
157 * values address range.
158 *
159 * COW_ZERO non-robust pools:
160 * The free list chains freed items on the CPU 0
161 * address range. We should rethink this
162 * decision if false sharing between malloc/free
163 * from other CPUs and data accesses from CPU 0
164 * becomes an issue.
165 *
166 * Robust pools: The free list chains freed items in the
167 * address range dedicated for the free list.
168 *
169 * This is a NULL-terminated singly-linked list.
ef6695f1
MD
170 */
171 struct free_list_node *free_list_head;
b73b0c25 172
ef6695f1
MD
173 /* This lock protects allocation/free within the pool. */
174 pthread_mutex_t lock;
9bd07c29 175
0ba2a93e 176 struct rseq_mempool_attr attr;
ca452fee 177 char *name;
ef6695f1
MD
178};
179
ef6695f1
MD
180/*
181 * Pool set entries are indexed by item_len rounded to the next power of
182 * 2. A pool set can contain NULL pool entries, in which case the next
183 * large enough entry will be used for allocation.
184 */
0ba2a93e 185struct rseq_mempool_set {
ef6695f1
MD
186 /* This lock protects add vs malloc/zmalloc within the pool set. */
187 pthread_mutex_t lock;
0ba2a93e 188 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
ef6695f1
MD
189};
190
86617384
MD
191static
192const char *get_pool_name(const struct rseq_mempool *pool)
193{
194 return pool->name ? : "<anonymous>";
195}
196
367e559c 197static
6fbf1fb6 198void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
f2981623 199 uintptr_t item_offset, size_t stride)
367e559c 200{
15b63c9f 201 return range->base + (stride * cpu) + item_offset;
367e559c
MD
202}
203
a5694a4d
MD
204static
205void *__rseq_pool_range_init_ptr(const struct rseq_mempool_range *range,
206 uintptr_t item_offset)
207{
208 if (!range->init)
209 return NULL;
210 return range->init + item_offset;
211}
212
213static
214void __rseq_percpu *__rseq_free_list_to_percpu_ptr(const struct rseq_mempool *pool,
215 struct free_list_node *node)
216{
217 void __rseq_percpu *p = (void __rseq_percpu *) node;
218
c0de0012
MD
219 if (pool->attr.robust_set) {
220 /* Skip cpus. */
a5694a4d 221 p -= pool->attr.max_nr_cpus * pool->attr.stride;
c0de0012 222 /* Skip init values */
805d0043 223 if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
c0de0012
MD
224 p -= pool->attr.stride;
225
226 } else {
805d0043
MD
227 /* COW_INIT free list is in init values */
228 if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
c0de0012
MD
229 p -= pool->attr.max_nr_cpus * pool->attr.stride;
230 }
a5694a4d
MD
231 return p;
232}
233
234static
235struct free_list_node *__rseq_percpu_to_free_list_ptr(const struct rseq_mempool *pool,
236 void __rseq_percpu *p)
237{
c0de0012
MD
238 if (pool->attr.robust_set) {
239 /* Skip cpus. */
a5694a4d 240 p += pool->attr.max_nr_cpus * pool->attr.stride;
c0de0012 241 /* Skip init values */
805d0043 242 if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
c0de0012
MD
243 p += pool->attr.stride;
244
245 } else {
805d0043
MD
246 /* COW_INIT free list is in init values */
247 if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
c0de0012
MD
248 p += pool->attr.max_nr_cpus * pool->attr.stride;
249 }
a5694a4d
MD
250 return (struct free_list_node *) p;
251}
252
253static
14af0aa2 254intptr_t rseq_cmp_item(void *p, size_t item_len, intptr_t cmp_value, intptr_t *unexpected_value)
a5694a4d 255{
14af0aa2
MD
256 size_t offset;
257 intptr_t res = 0;
a5694a4d 258
14af0aa2
MD
259 for (offset = 0; offset < item_len; offset += sizeof(uintptr_t)) {
260 intptr_t v = *((intptr_t *) (p + offset));
261
262 if ((res = v - cmp_value) != 0) {
263 if (unexpected_value)
264 *unexpected_value = v;
a5694a4d 265 break;
14af0aa2
MD
266 }
267 }
a5694a4d
MD
268 return res;
269}
270
367e559c 271static
15b63c9f
MD
272void rseq_percpu_zero_item(struct rseq_mempool *pool,
273 struct rseq_mempool_range *range, uintptr_t item_offset)
367e559c 274{
a5694a4d 275 char *init_p = NULL;
367e559c
MD
276 int i;
277
a5694a4d
MD
278 init_p = __rseq_pool_range_init_ptr(range, item_offset);
279 if (init_p)
644298bb 280 bzero(init_p, pool->item_len);
cb475906 281 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
15b63c9f 282 char *p = __rseq_pool_range_percpu_ptr(range, i,
cb475906 283 item_offset, pool->attr.stride);
a5694a4d 284
1b658191
MD
285 /*
286 * If item is already zeroed, either because the
287 * init range update has propagated or because the
288 * content is already zeroed (e.g. zero page), don't
289 * write to the page. This eliminates useless COW over
290 * the zero page just for overwriting it with zeroes.
291 *
805d0043 292 * This means zmalloc() in COW_ZERO policy pool do
1b658191
MD
293 * not trigger COW for CPUs which are not actively
294 * writing to the pool. This is however not the case for
295 * malloc_init() in populate-all pools if it populates
296 * non-zero content.
297 */
14af0aa2 298 if (!rseq_cmp_item(p, pool->item_len, 0, NULL))
a5694a4d 299 continue;
644298bb 300 bzero(p, pool->item_len);
367e559c
MD
301 }
302}
303
6ff43d9a
MD
304static
305void rseq_percpu_init_item(struct rseq_mempool *pool,
306 struct rseq_mempool_range *range, uintptr_t item_offset,
307 void *init_ptr, size_t init_len)
308{
a5694a4d 309 char *init_p = NULL;
6ff43d9a
MD
310 int i;
311
a5694a4d
MD
312 init_p = __rseq_pool_range_init_ptr(range, item_offset);
313 if (init_p)
314 memcpy(init_p, init_ptr, init_len);
6ff43d9a
MD
315 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
316 char *p = __rseq_pool_range_percpu_ptr(range, i,
317 item_offset, pool->attr.stride);
a5694a4d 318
1b658191
MD
319 /*
320 * If the update propagated through a shared mapping,
321 * or the item already has the correct content, skip
322 * writing it into the cpu item to eliminate useless
323 * COW of the page.
324 */
325 if (!memcmp(init_ptr, p, init_len))
a5694a4d 326 continue;
6ff43d9a
MD
327 memcpy(p, init_ptr, init_len);
328 }
329}
330
a5694a4d
MD
331static
332void rseq_poison_item(void *p, size_t item_len, uintptr_t poison)
333{
334 size_t offset;
335
336 for (offset = 0; offset < item_len; offset += sizeof(uintptr_t))
337 *((uintptr_t *) (p + offset)) = poison;
338}
339
455e090e
MD
340static
341void rseq_percpu_poison_item(struct rseq_mempool *pool,
342 struct rseq_mempool_range *range, uintptr_t item_offset)
343{
344 uintptr_t poison = pool->attr.poison;
a5694a4d 345 char *init_p = NULL;
455e090e
MD
346 int i;
347
a5694a4d
MD
348 init_p = __rseq_pool_range_init_ptr(range, item_offset);
349 if (init_p)
350 rseq_poison_item(init_p, pool->item_len, poison);
455e090e
MD
351 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
352 char *p = __rseq_pool_range_percpu_ptr(range, i,
353 item_offset, pool->attr.stride);
455e090e 354
1b658191
MD
355 /*
356 * If the update propagated through a shared mapping,
357 * or the item already has the correct content, skip
358 * writing it into the cpu item to eliminate useless
359 * COW of the page.
360 *
361 * It is recommended to use zero as poison value for
805d0043
MD
362 * COW_ZERO pools to eliminate COW due to writing
363 * poison to CPU memory still backed by the zero page.
1b658191 364 */
14af0aa2 365 if (rseq_cmp_item(p, pool->item_len, poison, NULL) == 0)
a5694a4d
MD
366 continue;
367 rseq_poison_item(p, pool->item_len, poison);
368 }
369}
370
371/* Always inline for __builtin_return_address(0). */
372static inline __attribute__((always_inline))
373void rseq_check_poison_item(const struct rseq_mempool *pool, uintptr_t item_offset,
c0de0012 374 void *p, size_t item_len, uintptr_t poison)
a5694a4d 375{
1b658191 376 intptr_t unexpected_value;
a5694a4d 377
14af0aa2 378 if (rseq_cmp_item(p, item_len, poison, &unexpected_value) == 0)
1b658191 379 return;
a5694a4d 380
1b658191
MD
381 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
382 __func__, (unsigned long) unexpected_value, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
383 abort();
86617384
MD
384}
385
386/* Always inline for __builtin_return_address(0). */
387static inline __attribute__((always_inline))
6fbf1fb6
MD
388void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
389 const struct rseq_mempool_range *range, uintptr_t item_offset)
86617384
MD
390{
391 uintptr_t poison = pool->attr.poison;
a5694a4d 392 char *init_p;
86617384
MD
393 int i;
394
3975084e 395 if (!pool->attr.robust_set)
86617384 396 return;
a5694a4d
MD
397 init_p = __rseq_pool_range_init_ptr(range, item_offset);
398 if (init_p)
c0de0012 399 rseq_check_poison_item(pool, item_offset, init_p, pool->item_len, poison);
86617384
MD
400 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
401 char *p = __rseq_pool_range_percpu_ptr(range, i,
402 item_offset, pool->attr.stride);
c0de0012 403 rseq_check_poison_item(pool, item_offset, p, pool->item_len, poison);
455e090e
MD
404 }
405}
406
15b63c9f 407#ifdef HAVE_LIBNUMA
c6fd3981 408int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
367e559c 409{
f2981623 410 unsigned long nr_pages, page_len;
c6fd3981
MD
411 int status[MOVE_PAGES_BATCH_SIZE];
412 int nodes[MOVE_PAGES_BATCH_SIZE];
413 void *pages[MOVE_PAGES_BATCH_SIZE];
f2981623 414 long ret;
367e559c 415
c6fd3981
MD
416 if (!numa_flags) {
417 errno = EINVAL;
418 return -1;
419 }
367e559c 420 page_len = rseq_get_page_len();
c6fd3981 421 nr_pages = len >> rseq_get_count_order_ulong(page_len);
57d8b586 422
c6fd3981
MD
423 nodes[0] = numa_node_of_cpu(cpu);
424 if (nodes[0] < 0)
425 return -1;
57d8b586 426
c6fd3981
MD
427 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
428 nodes[k] = nodes[0];
429 }
57d8b586 430
c6fd3981 431 for (unsigned long page = 0; page < nr_pages;) {
57d8b586 432
c6fd3981
MD
433 size_t max_k = RSEQ_ARRAY_SIZE(pages);
434 size_t left = nr_pages - page;
57d8b586 435
c6fd3981
MD
436 if (left < max_k) {
437 max_k = left;
438 }
57d8b586 439
c6fd3981
MD
440 for (size_t k = 0; k < max_k; ++k, ++page) {
441 pages[k] = addr + (page * page_len);
442 status[k] = -EPERM;
367e559c 443 }
b73b0c25 444
c6fd3981
MD
445 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
446
447 if (ret < 0)
b73b0c25 448 return ret;
c6fd3981
MD
449
450 if (ret > 0) {
451 fprintf(stderr, "%lu pages were not migrated\n", ret);
452 for (size_t k = 0; k < max_k; ++k) {
453 if (status[k] < 0)
454 fprintf(stderr,
455 "Error while moving page %p to numa node %d: %u\n",
456 pages[k], nodes[k], -status[k]);
457 }
458 }
b73b0c25
MD
459 }
460 return 0;
461}
367e559c 462#else
c6fd3981
MD
463int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
464 size_t len __attribute__((unused)),
465 int cpu __attribute__((unused)),
367e559c
MD
466 int numa_flags __attribute__((unused)))
467{
c6fd3981
MD
468 errno = ENOSYS;
469 return -1;
367e559c
MD
470}
471#endif
472
0fdf7a4c 473static
0ba2a93e 474int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c
OD
475{
476 size_t count;
477
cb475906 478 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
479
480 /*
9649c7ee
MD
481 * Not being able to create the validation bitmap is an error
482 * that needs to be reported.
0fdf7a4c 483 */
b73b0c25
MD
484 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
485 if (!range->alloc_bitmap)
9649c7ee
MD
486 return -1;
487 return 0;
0fdf7a4c
OD
488}
489
b73b0c25 490static
a5694a4d 491bool percpu_addr_in_pool(const struct rseq_mempool *pool, void __rseq_percpu *_addr)
b73b0c25 492{
0ba2a93e 493 struct rseq_mempool_range *range;
a5694a4d 494 void *addr = (void *) _addr;
b73b0c25 495
252f9411 496 list_for_each_entry(range, &pool->range_list, node) {
b73b0c25
MD
497 if (addr >= range->base && addr < range->base + range->next_unused)
498 return true;
499 }
500 return false;
501}
502
a9ec6111
OD
503/* Always inline for __builtin_return_address(0). */
504static inline __attribute__((always_inline))
1a426b47 505void check_free_list(const struct rseq_mempool *pool, bool mapping_accessible)
a9ec6111 506{
b73b0c25
MD
507 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
508 max_list_traversal = 0, traversal_iteration = 0;
0ba2a93e 509 struct rseq_mempool_range *range;
b73b0c25 510
1a426b47 511 if (!pool->attr.robust_set || !mapping_accessible)
b73b0c25
MD
512 return;
513
252f9411 514 list_for_each_entry(range, &pool->range_list, node) {
cb475906
MD
515 total_item += pool->attr.stride >> pool->item_order;
516 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
b73b0c25
MD
517 }
518 max_list_traversal = total_item - total_never_allocated;
a9ec6111
OD
519
520 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
521 node;
522 prev = node,
523 node = node->next) {
524
a9ec6111 525 if (traversal_iteration >= max_list_traversal) {
ca452fee
MD
526 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
527 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
528 abort();
529 }
530
531 /* Node is out of range. */
a5694a4d 532 if (!percpu_addr_in_pool(pool, __rseq_free_list_to_percpu_ptr(pool, node))) {
a9ec6111 533 if (prev)
ca452fee
MD
534 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
535 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111 536 else
ca452fee
MD
537 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
538 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
539 abort();
540 }
541
b73b0c25
MD
542 traversal_iteration++;
543 total_freed++;
a9ec6111
OD
544 }
545
546 if (total_never_allocated + total_freed != total_item) {
ca452fee
MD
547 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
548 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
a9ec6111
OD
549 abort();
550 }
a9ec6111
OD
551}
552
6fbf1fb6
MD
553/* Always inline for __builtin_return_address(0). */
554static inline __attribute__((always_inline))
555void check_range_poison(const struct rseq_mempool *pool,
556 const struct rseq_mempool_range *range)
557{
558 size_t item_offset;
559
560 for (item_offset = 0; item_offset < range->next_unused;
561 item_offset += pool->item_len)
562 rseq_percpu_check_poison_item(pool, range, item_offset);
563}
564
565/* Always inline for __builtin_return_address(0). */
566static inline __attribute__((always_inline))
1a426b47 567void check_pool_poison(const struct rseq_mempool *pool, bool mapping_accessible)
6fbf1fb6
MD
568{
569 struct rseq_mempool_range *range;
570
1a426b47 571 if (!pool->attr.robust_set || !mapping_accessible)
6fbf1fb6 572 return;
252f9411 573 list_for_each_entry(range, &pool->range_list, node)
6fbf1fb6
MD
574 check_range_poison(pool, range);
575}
576
e7cbbc10
MD
577/* Always inline for __builtin_return_address(0). */
578static inline __attribute__((always_inline))
0ba2a93e 579void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c 580{
b73b0c25 581 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 582 size_t count, total_leaks = 0;
0fdf7a4c 583
9649c7ee 584 if (!bitmap)
0fdf7a4c 585 return;
0fdf7a4c 586
cb475906 587 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
588
589 /* Assert that all items in the pool were freed. */
9649c7ee
MD
590 for (size_t k = 0; k < count; ++k)
591 total_leaks += rseq_hweight_ulong(bitmap[k]);
592 if (total_leaks) {
ca452fee
MD
593 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
594 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
9649c7ee 595 abort();
0fdf7a4c
OD
596 }
597
598 free(bitmap);
a5694a4d 599 range->alloc_bitmap = NULL;
0fdf7a4c
OD
600}
601
b73b0c25
MD
602/* Always inline for __builtin_return_address(0). */
603static inline __attribute__((always_inline))
0ba2a93e 604int rseq_mempool_range_destroy(struct rseq_mempool *pool,
1a426b47
MD
605 struct rseq_mempool_range *range,
606 bool mapping_accessible)
b73b0c25
MD
607{
608 destroy_alloc_bitmap(pool, range);
1a426b47
MD
609 if (!mapping_accessible) {
610 /*
611 * Only the header pages are populated in the child
612 * process.
613 */
614 return munmap(range->header, POOL_HEADER_NR_PAGES * rseq_get_page_len());
615 }
5cd72fc7 616 return munmap(range->mmap_addr, range->mmap_len);
5c99f3d6
MD
617}
618
619/*
620 * Allocate a memory mapping aligned on @alignment, with an optional
621 * @pre_header before the mapping.
622 */
623static
5cd72fc7 624void *aligned_mmap_anonymous(size_t page_size, size_t len, size_t alignment,
5c99f3d6
MD
625 void **pre_header, size_t pre_header_len)
626{
627 size_t minimum_page_count, page_count, extra, total_allocate = 0;
628 int page_order;
629 void *ptr;
630
631 if (len < page_size || alignment < page_size ||
b72b2d9e 632 !is_pow2(alignment) || (len & (alignment - 1))) {
5c99f3d6
MD
633 errno = EINVAL;
634 return NULL;
635 }
636 page_order = rseq_get_count_order_ulong(page_size);
637 if (page_order < 0) {
638 errno = EINVAL;
639 return NULL;
640 }
641 if (pre_header_len && (pre_header_len & (page_size - 1))) {
642 errno = EINVAL;
643 return NULL;
644 }
645
646 minimum_page_count = (pre_header_len + len) >> page_order;
647 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
648
649 assert(page_count >= minimum_page_count);
650
5cd72fc7
MD
651 ptr = mmap(NULL, page_count << page_order, PROT_READ | PROT_WRITE,
652 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
653 if (ptr == MAP_FAILED) {
654 ptr = NULL;
5c99f3d6 655 goto alloc_error;
5cd72fc7 656 }
5c99f3d6
MD
657
658 total_allocate = page_count << page_order;
659
660 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
661 /* Pointer is already aligned. ptr points to pre_header. */
662 goto out;
663 }
664
665 /* Unmap extra before. */
666 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
667 assert(!(extra & (page_size - 1)));
5cd72fc7 668 if (munmap(ptr, extra)) {
5c99f3d6
MD
669 perror("munmap");
670 abort();
671 }
672 total_allocate -= extra;
673 ptr += extra; /* ptr points to pre_header */
674 page_count -= extra >> page_order;
675out:
676 assert(page_count >= minimum_page_count);
677
678 if (page_count > minimum_page_count) {
679 void *extra_ptr;
680
681 /* Unmap extra after. */
682 extra_ptr = ptr + (minimum_page_count << page_order);
683 extra = (page_count - minimum_page_count) << page_order;
5cd72fc7 684 if (munmap(extra_ptr, extra)) {
5c99f3d6
MD
685 perror("munmap");
686 abort();
687 }
688 total_allocate -= extra;
689 }
690
691 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
692 assert(total_allocate == len + pre_header_len);
693
694alloc_error:
695 if (ptr) {
696 if (pre_header)
697 *pre_header = ptr;
698 ptr += pre_header_len;
699 }
700 return ptr;
b73b0c25
MD
701}
702
a5694a4d 703static
cc0413ab 704int rseq_memfd_create_init(const char *poolname, size_t init_len)
a5694a4d 705{
a10c1c93 706 int fd;
cc0413ab
OD
707 char buf[249]; /* Limit is 249 bytes. */
708 const char *name;
a10c1c93 709
cc0413ab
OD
710 if (poolname) {
711 snprintf(buf, sizeof(buf), "%s:rseq-mempool", poolname);
712 name = buf;
713 } else {
714 name = "<anonymous>:rseq-mempool";
715 }
716
717 fd = memfd_create(name, MFD_CLOEXEC);
a10c1c93
MD
718 if (fd < 0) {
719 perror("memfd_create");
720 goto end;
a5694a4d 721 }
a10c1c93 722 if (ftruncate(fd, (off_t) init_len)) {
025165ad
MD
723 if (close(fd))
724 perror("close");
a10c1c93
MD
725 fd = -1;
726 goto end;
727 }
728end:
729 return fd;
730}
731
732static
733void rseq_memfd_close(int fd)
734{
025165ad
MD
735 if (fd < 0)
736 return;
a10c1c93
MD
737 if (close(fd))
738 perror("close");
a5694a4d
MD
739}
740
b73b0c25 741static
0ba2a93e 742struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
b73b0c25 743{
0ba2a93e 744 struct rseq_mempool_range *range;
5c99f3d6 745 unsigned long page_size;
4aa3220c 746 void *header;
b73b0c25 747 void *base;
a5694a4d 748 size_t range_len; /* Range len excludes header. */
1a426b47 749 size_t header_len;
025165ad 750 int memfd = -1;
b73b0c25 751
e11a02d7
MD
752 if (pool->attr.max_nr_ranges &&
753 pool->nr_ranges >= pool->attr.max_nr_ranges) {
9d986353
MD
754 errno = ENOMEM;
755 return NULL;
756 }
5c99f3d6 757 page_size = rseq_get_page_len();
b73b0c25 758
1a426b47 759 header_len = POOL_HEADER_NR_PAGES * page_size;
a5694a4d 760 range_len = pool->attr.stride * pool->attr.max_nr_cpus;
805d0043 761 if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
a5694a4d 762 range_len += pool->attr.stride; /* init values */
c0de0012 763 if (pool->attr.robust_set)
805d0043 764 range_len += pool->attr.stride; /* dedicated free list */
5cd72fc7 765 base = aligned_mmap_anonymous(page_size, range_len,
1a426b47 766 pool->attr.stride, &header, header_len);
b73b0c25 767 if (!base)
5c99f3d6 768 return NULL;
0ba2a93e 769 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
5c99f3d6 770 range->pool = pool;
4aa3220c 771 range->header = header;
a5694a4d 772 range->base = base;
fa6a0fb3 773 range->mmap_addr = header;
1a426b47 774 range->mmap_len = header_len + range_len;
a5694a4d 775
805d0043 776 if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) {
a5694a4d
MD
777 range->init = base + (pool->attr.stride * pool->attr.max_nr_cpus);
778 /* Populate init values pages from memfd */
cc0413ab 779 memfd = rseq_memfd_create_init(pool->name, pool->attr.stride);
a10c1c93 780 if (memfd < 0)
a5694a4d
MD
781 goto error_alloc;
782 if (mmap(range->init, pool->attr.stride, PROT_READ | PROT_WRITE,
1a426b47 783 MAP_SHARED | MAP_FIXED, memfd, 0) != (void *) range->init)
a5694a4d 784 goto error_alloc;
a5694a4d
MD
785 assert(pool->attr.type == MEMPOOL_TYPE_PERCPU);
786 /*
787 * Map per-cpu memory as private COW mappings of init values.
788 */
789 {
790 int cpu;
791
792 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
793 void *p = base + (pool->attr.stride * cpu);
794 size_t len = pool->attr.stride;
795
796 if (mmap(p, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
1a426b47 797 memfd, 0) != (void *) p)
a5694a4d 798 goto error_alloc;
a5694a4d
MD
799 }
800 }
bf7b01a3
MD
801 /*
802 * The init values shared mapping should not be shared
803 * with the children processes across fork. Prevent the
804 * whole mapping from being used across fork.
805 */
1a426b47
MD
806 if (madvise(base, range_len, MADV_DONTFORK))
807 goto error_alloc;
808
809 /*
810 * Write 0x1 in first byte of header first page, which
811 * will be WIPEONFORK (and thus cleared) in children
812 * processes. Used to find out if pool destroy is called
813 * from a child process after fork.
814 */
815 *((char *) header) = 0x1;
816 if (madvise(header, page_size, MADV_WIPEONFORK))
bf7b01a3 817 goto error_alloc;
1a426b47
MD
818
819 /*
820 * The second header page contains the struct
821 * rseq_mempool_range, which is needed by pool destroy.
822 * Leave this anonymous page populated (COW) in child
823 * processes.
824 */
a10c1c93 825 rseq_memfd_close(memfd);
025165ad 826 memfd = -1;
a5694a4d
MD
827 }
828
b73b0c25
MD
829 if (pool->attr.robust_set) {
830 if (create_alloc_bitmap(pool, range))
831 goto error_alloc;
832 }
135811f2 833 if (pool->attr.init_set) {
374c2773
MD
834 switch (pool->attr.type) {
835 case MEMPOOL_TYPE_GLOBAL:
6e329183 836 if (pool->attr.init_func(pool->attr.init_priv,
374c2773 837 base, pool->attr.stride, -1)) {
6e329183
MD
838 goto error_alloc;
839 }
374c2773
MD
840 break;
841 case MEMPOOL_TYPE_PERCPU:
842 {
843 int cpu;
844 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
845 if (pool->attr.init_func(pool->attr.init_priv,
846 base + (pool->attr.stride * cpu),
847 pool->attr.stride, cpu)) {
848 goto error_alloc;
849 }
850 }
851 break;
852 }
853 default:
854 abort();
135811f2
MD
855 }
856 }
9d986353 857 pool->nr_ranges++;
b73b0c25
MD
858 return range;
859
860error_alloc:
025165ad 861 rseq_memfd_close(memfd);
1a426b47 862 (void) rseq_mempool_range_destroy(pool, range, true);
b73b0c25
MD
863 return NULL;
864}
865
1a426b47
MD
866static
867bool pool_mappings_accessible(struct rseq_mempool *pool)
868{
869 struct rseq_mempool_range *range;
870 size_t page_size;
871 char *addr;
872
805d0043 873 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_COW_INIT)
1a426b47 874 return true;
252f9411 875 if (list_empty(&pool->range_list))
1a426b47 876 return true;
252f9411 877 range = list_first_entry(&pool->range_list, struct rseq_mempool_range, node);
1a426b47
MD
878 page_size = rseq_get_page_len();
879 /*
880 * Header first page is one page before the page containing the
881 * range structure.
882 */
883 addr = (char *) ((uintptr_t) range & ~(page_size - 1)) - page_size;
884 /*
885 * Look for 0x1 first byte marker in header first page.
886 */
887 if (*addr != 0x1)
888 return false;
889 return true;
890}
891
0ba2a93e 892int rseq_mempool_destroy(struct rseq_mempool *pool)
9649c7ee 893{
252f9411 894 struct rseq_mempool_range *range, *tmp_range;
1a426b47 895 bool mapping_accessible;
b73b0c25 896 int ret = 0;
9649c7ee 897
f510ddc5
MD
898 if (!pool)
899 return 0;
1a426b47
MD
900
901 /*
902 * Validate that the pool mappings are accessible before doing
903 * free list/poison validation and unmapping ranges. This allows
805d0043
MD
904 * calling pool destroy in child process after a fork for COW_INIT
905 * pools to free pool resources.
1a426b47
MD
906 */
907 mapping_accessible = pool_mappings_accessible(pool);
908
909 check_free_list(pool, mapping_accessible);
910 check_pool_poison(pool, mapping_accessible);
911
b73b0c25 912 /* Iteration safe against removal. */
252f9411
MD
913 list_for_each_entry_safe(range, tmp_range, &pool->range_list, node) {
914 list_del(&range->node);
915 if (rseq_mempool_range_destroy(pool, range, mapping_accessible)) {
916 /* Keep list coherent in case of partial failure. */
917 list_add(&range->node, &pool->range_list);
b73b0c25 918 goto end;
252f9411 919 }
b73b0c25 920 }
9649c7ee 921 pthread_mutex_destroy(&pool->lock);
ca452fee 922 free(pool->name);
eb8db04d 923 free(pool);
9649c7ee 924end:
b73b0c25 925 return ret;
9649c7ee
MD
926}
927
0ba2a93e 928struct rseq_mempool *rseq_mempool_create(const char *pool_name,
cb475906 929 size_t item_len, const struct rseq_mempool_attr *_attr)
ef6695f1 930{
0ba2a93e 931 struct rseq_mempool_attr attr = {};
252f9411
MD
932 struct rseq_mempool_range *range;
933 struct rseq_mempool *pool;
ef6695f1 934 int order;
ef6695f1
MD
935
936 /* Make sure each item is large enough to contain free list pointers. */
937 if (item_len < sizeof(void *))
938 item_len = sizeof(void *);
939
940 /* Align item_len on next power of two. */
19be9217 941 order = rseq_get_count_order_ulong(item_len);
ef6695f1
MD
942 if (order < 0) {
943 errno = EINVAL;
944 return NULL;
945 }
946 item_len = 1UL << order;
947
a82006d0
MD
948 if (_attr)
949 memcpy(&attr, _attr, sizeof(attr));
a82006d0 950
805d0043
MD
951 /*
952 * Validate that the pool populate policy requested is known.
953 */
954 switch (attr.populate_policy) {
955 case RSEQ_MEMPOOL_POPULATE_COW_INIT:
956 break;
957 case RSEQ_MEMPOOL_POPULATE_COW_ZERO:
958 break;
959 default:
960 errno = EINVAL;
961 return NULL;
962 }
963
cb475906
MD
964 switch (attr.type) {
965 case MEMPOOL_TYPE_PERCPU:
966 if (attr.max_nr_cpus < 0) {
967 errno = EINVAL;
968 return NULL;
969 }
970 if (attr.max_nr_cpus == 0) {
971 /* Auto-detect */
47c725dd 972 attr.max_nr_cpus = rseq_get_max_nr_cpus();
cb475906
MD
973 if (attr.max_nr_cpus == 0) {
974 errno = EINVAL;
975 return NULL;
976 }
977 }
978 break;
979 case MEMPOOL_TYPE_GLOBAL:
a5694a4d 980 /* Override populate policy for global type. */
805d0043
MD
981 if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
982 attr.populate_policy = RSEQ_MEMPOOL_POPULATE_COW_ZERO;
89b7e681
MD
983 /* Use a 1-cpu pool for global mempool type. */
984 attr.max_nr_cpus = 1;
cb475906
MD
985 break;
986 }
987 if (!attr.stride)
988 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
3975084e
MD
989 if (attr.robust_set && !attr.poison_set) {
990 attr.poison_set = true;
805d0043 991 if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
6ea98a7b
MD
992 attr.poison = DEFAULT_COW_INIT_POISON_VALUE;
993 else
994 attr.poison = DEFAULT_COW_ZERO_POISON_VALUE;
3975084e 995 }
cb475906
MD
996 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
997 !is_pow2(attr.stride)) {
998 errno = EINVAL;
999 return NULL;
1000 }
1001
0ba2a93e 1002 pool = calloc(1, sizeof(struct rseq_mempool));
bc510b60
MD
1003 if (!pool)
1004 return NULL;
ef6695f1 1005
b73b0c25 1006 memcpy(&pool->attr, &attr, sizeof(attr));
ef6695f1 1007 pthread_mutex_init(&pool->lock, NULL);
ef6695f1
MD
1008 pool->item_len = item_len;
1009 pool->item_order = order;
252f9411 1010 INIT_LIST_HEAD(&pool->range_list);
b73b0c25 1011
252f9411
MD
1012 range = rseq_mempool_range_create(pool);
1013 if (!range)
b73b0c25 1014 goto error_alloc;
252f9411 1015 list_add(&range->node, &pool->range_list);
0fdf7a4c 1016
ca452fee
MD
1017 if (pool_name) {
1018 pool->name = strdup(pool_name);
1019 if (!pool->name)
1020 goto error_alloc;
1021 }
ef6695f1 1022 return pool;
ef6695f1 1023
9649c7ee 1024error_alloc:
0ba2a93e 1025 rseq_mempool_destroy(pool);
9649c7ee
MD
1026 errno = ENOMEM;
1027 return NULL;
ef6695f1
MD
1028}
1029
e7cbbc10
MD
1030/* Always inline for __builtin_return_address(0). */
1031static inline __attribute__((always_inline))
9d986353 1032void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 1033{
9d986353 1034 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 1035 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
1036 unsigned long mask;
1037 size_t k;
1038
9649c7ee 1039 if (!bitmap)
0fdf7a4c 1040 return;
0fdf7a4c 1041
9649c7ee 1042 k = item_index / BIT_PER_ULONG;
0fdf7a4c
OD
1043 mask = 1ULL << (item_index % BIT_PER_ULONG);
1044
9649c7ee
MD
1045 /* Print error if bit is already set. */
1046 if (bitmap[k] & mask) {
ca452fee
MD
1047 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
1048 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
9649c7ee
MD
1049 abort();
1050 }
0fdf7a4c
OD
1051 bitmap[k] |= mask;
1052}
1053
ef6695f1 1054static
6ff43d9a
MD
1055void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool,
1056 bool zeroed, void *init_ptr, size_t init_len)
ef6695f1 1057{
9d986353 1058 struct rseq_mempool_range *range;
ef6695f1
MD
1059 struct free_list_node *node;
1060 uintptr_t item_offset;
d24ee051 1061 void __rseq_percpu *addr;
ef6695f1 1062
6ff43d9a
MD
1063 if (init_len > pool->item_len) {
1064 errno = EINVAL;
1065 return NULL;
1066 }
ef6695f1
MD
1067 pthread_mutex_lock(&pool->lock);
1068 /* Get first entry from free list. */
1069 node = pool->free_list_head;
1070 if (node != NULL) {
a5694a4d 1071 void *range_base, *ptr;
9d986353 1072
a5694a4d
MD
1073 ptr = __rseq_free_list_to_percpu_ptr(pool, node);
1074 range_base = (void *) ((uintptr_t) ptr & (~(pool->attr.stride - 1)));
9d986353 1075 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
ef6695f1
MD
1076 /* Remove node from free list (update head). */
1077 pool->free_list_head = node->next;
a5694a4d 1078 item_offset = (uintptr_t) (ptr - range_base);
86617384 1079 rseq_percpu_check_poison_item(pool, range, item_offset);
a5694a4d 1080 addr = __rseq_free_list_to_percpu_ptr(pool, node);
ef6695f1
MD
1081 goto end;
1082 }
9d986353 1083 /*
f06c65e7
MD
1084 * If there are no ranges, or if the most recent range (first in
1085 * list) does not have any room left, create a new range and
1086 * prepend it to the list head.
9d986353 1087 */
f06c65e7
MD
1088 if (list_empty(&pool->range_list))
1089 goto create_range;
252f9411 1090 range = list_first_entry(&pool->range_list, struct rseq_mempool_range, node);
f06c65e7
MD
1091 if (range->next_unused + pool->item_len > pool->attr.stride)
1092 goto create_range;
1093 else
1094 goto room_left;
1095create_range:
1096 range = rseq_mempool_range_create(pool);
1097 if (!range) {
1098 errno = ENOMEM;
1099 addr = NULL;
1100 goto end;
ef6695f1 1101 }
f06c65e7
MD
1102 /* Add range to head of list. */
1103 list_add(&range->node, &pool->range_list);
1104room_left:
9d986353
MD
1105 /* First range in list has room left. */
1106 item_offset = range->next_unused;
1107 addr = (void __rseq_percpu *) (range->base + item_offset);
1108 range->next_unused += pool->item_len;
ef6695f1 1109end:
ffea0dea
MD
1110 if (addr) {
1111 range->allocated_items++;
9d986353 1112 set_alloc_slot(pool, range, item_offset);
ffea0dea 1113 }
ef6695f1 1114 pthread_mutex_unlock(&pool->lock);
6ff43d9a
MD
1115 if (addr) {
1116 if (zeroed)
1117 rseq_percpu_zero_item(pool, range, item_offset);
1118 else if (init_ptr) {
1119 rseq_percpu_init_item(pool, range, item_offset,
1120 init_ptr, init_len);
1121 }
1122 }
ef6695f1
MD
1123 return addr;
1124}
1125
15da5c27 1126void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
ef6695f1 1127{
6ff43d9a 1128 return __rseq_percpu_malloc(pool, false, NULL, 0);
ef6695f1
MD
1129}
1130
15da5c27 1131void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
ef6695f1 1132{
6ff43d9a
MD
1133 return __rseq_percpu_malloc(pool, true, NULL, 0);
1134}
1135
1136void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool,
1137 void *init_ptr, size_t len)
1138{
1139 return __rseq_percpu_malloc(pool, false, init_ptr, len);
ef6695f1
MD
1140}
1141
e7cbbc10
MD
1142/* Always inline for __builtin_return_address(0). */
1143static inline __attribute__((always_inline))
9d986353 1144void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 1145{
9d986353 1146 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 1147 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
1148 unsigned long mask;
1149 size_t k;
1150
9649c7ee 1151 if (!bitmap)
0fdf7a4c 1152 return;
0fdf7a4c 1153
9649c7ee
MD
1154 k = item_index / BIT_PER_ULONG;
1155 mask = 1ULL << (item_index % BIT_PER_ULONG);
0fdf7a4c 1156
9649c7ee
MD
1157 /* Print error if bit is not set. */
1158 if (!(bitmap[k] & mask)) {
ca452fee
MD
1159 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
1160 __func__, get_pool_name(pool), pool, item_offset,
1161 (void *) __builtin_return_address(0));
9649c7ee
MD
1162 abort();
1163 }
0fdf7a4c
OD
1164 bitmap[k] &= ~mask;
1165}
1166
cb475906 1167void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
ef6695f1
MD
1168{
1169 uintptr_t ptr = (uintptr_t) _ptr;
cb475906 1170 void *range_base = (void *) (ptr & (~(stride - 1)));
0ba2a93e
MD
1171 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
1172 struct rseq_mempool *pool = range->pool;
cb475906 1173 uintptr_t item_offset = ptr & (stride - 1);
ef6695f1
MD
1174 struct free_list_node *head, *item;
1175
1176 pthread_mutex_lock(&pool->lock);
9d986353 1177 clear_alloc_slot(pool, range, item_offset);
ffea0dea
MD
1178 if (!range->allocated_items) {
1179 fprintf(stderr, "%s: Trying to free an item from an empty pool range within pool \"%s\" (%p), item offset: %zu, caller: %p.\n",
1180 __func__, get_pool_name(pool), pool, item_offset,
1181 (void *) __builtin_return_address(0));
1182 abort();
1183 }
1184 range->allocated_items--;
ef6695f1
MD
1185 /* Add ptr to head of free list */
1186 head = pool->free_list_head;
455e090e
MD
1187 if (pool->attr.poison_set)
1188 rseq_percpu_poison_item(pool, range, item_offset);
a5694a4d 1189 item = __rseq_percpu_to_free_list_ptr(pool, _ptr);
455e090e
MD
1190 /*
1191 * Setting the next pointer will overwrite the first uintptr_t
805d0043
MD
1192 * poison for either CPU 0 (COW_ZERO, non-robust), or init data
1193 * (COW_INIT, non-robust).
455e090e 1194 */
ef6695f1
MD
1195 item->next = head;
1196 pool->free_list_head = item;
1197 pthread_mutex_unlock(&pool->lock);
1198}
1199
0ba2a93e 1200struct rseq_mempool_set *rseq_mempool_set_create(void)
ef6695f1 1201{
0ba2a93e 1202 struct rseq_mempool_set *pool_set;
ef6695f1 1203
0ba2a93e 1204 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
ef6695f1
MD
1205 if (!pool_set)
1206 return NULL;
1207 pthread_mutex_init(&pool_set->lock, NULL);
1208 return pool_set;
1209}
1210
0ba2a93e 1211int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
ef6695f1
MD
1212{
1213 int order, ret;
1214
1215 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
0ba2a93e 1216 struct rseq_mempool *pool = pool_set->entries[order];
ef6695f1
MD
1217
1218 if (!pool)
1219 continue;
0ba2a93e 1220 ret = rseq_mempool_destroy(pool);
ef6695f1
MD
1221 if (ret)
1222 return ret;
1223 pool_set->entries[order] = NULL;
1224 }
1225 pthread_mutex_destroy(&pool_set->lock);
1226 free(pool_set);
1227 return 0;
1228}
1229
1230/* Ownership of pool is handed over to pool set on success. */
0ba2a93e 1231int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
ef6695f1
MD
1232{
1233 size_t item_order = pool->item_order;
1234 int ret = 0;
1235
1236 pthread_mutex_lock(&pool_set->lock);
1237 if (pool_set->entries[item_order]) {
1238 errno = EBUSY;
1239 ret = -1;
1240 goto end;
1241 }
1242 pool_set->entries[pool->item_order] = pool;
1243end:
1244 pthread_mutex_unlock(&pool_set->lock);
1245 return ret;
1246}
1247
1248static
6ff43d9a
MD
1249void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set,
1250 void *init_ptr, size_t len, bool zeroed)
ef6695f1
MD
1251{
1252 int order, min_order = POOL_SET_MIN_ENTRY;
0ba2a93e 1253 struct rseq_mempool *pool;
d24ee051 1254 void __rseq_percpu *addr;
ef6695f1 1255
d06f5cf5
MD
1256 order = rseq_get_count_order_ulong(len);
1257 if (order > POOL_SET_MIN_ENTRY)
1258 min_order = order;
ef6695f1
MD
1259again:
1260 pthread_mutex_lock(&pool_set->lock);
1261 /* First smallest present pool where @len fits. */
1262 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
1263 pool = pool_set->entries[order];
1264
1265 if (!pool)
1266 continue;
1267 if (pool->item_len >= len)
1268 goto found;
1269 }
1270 pool = NULL;
1271found:
1272 pthread_mutex_unlock(&pool_set->lock);
1273 if (pool) {
6ff43d9a 1274 addr = __rseq_percpu_malloc(pool, zeroed, init_ptr, len);
ef6695f1
MD
1275 if (addr == NULL && errno == ENOMEM) {
1276 /*
1277 * If the allocation failed, try again with a
1278 * larger pool.
1279 */
1280 min_order = order + 1;
1281 goto again;
1282 }
1283 } else {
1284 /* Not found. */
1285 errno = ENOMEM;
1286 addr = NULL;
1287 }
1288 return addr;
1289}
1290
15da5c27 1291void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 1292{
6ff43d9a 1293 return __rseq_mempool_set_malloc(pool_set, NULL, len, false);
ef6695f1
MD
1294}
1295
15da5c27 1296void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 1297{
6ff43d9a
MD
1298 return __rseq_mempool_set_malloc(pool_set, NULL, len, true);
1299}
1300
1301void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set,
1302 void *init_ptr, size_t len)
1303{
1304 return __rseq_mempool_set_malloc(pool_set, init_ptr, len, true);
ef6695f1 1305}
9bd07c29 1306
0ba2a93e 1307struct rseq_mempool_attr *rseq_mempool_attr_create(void)
a82006d0 1308{
0ba2a93e 1309 return calloc(1, sizeof(struct rseq_mempool_attr));
a82006d0
MD
1310}
1311
0ba2a93e 1312void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
a82006d0
MD
1313{
1314 free(attr);
1315}
1316
135811f2 1317int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
6e329183 1318 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
135811f2
MD
1319 void *init_priv)
1320{
1321 if (!attr) {
1322 errno = EINVAL;
1323 return -1;
1324 }
1325 attr->init_set = true;
1326 attr->init_func = init_func;
1327 attr->init_priv = init_priv;
805d0043 1328 attr->populate_policy = RSEQ_MEMPOOL_POPULATE_COW_INIT;
135811f2
MD
1329 return 0;
1330}
1331
0ba2a93e 1332int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
d6acc8aa
MD
1333{
1334 if (!attr) {
1335 errno = EINVAL;
1336 return -1;
1337 }
1338 attr->robust_set = true;
1339 return 0;
1340}
cb475906
MD
1341
1342int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
1343 size_t stride, int max_nr_cpus)
1344{
1345 if (!attr) {
1346 errno = EINVAL;
1347 return -1;
1348 }
1349 attr->type = MEMPOOL_TYPE_PERCPU;
1350 attr->stride = stride;
1351 attr->max_nr_cpus = max_nr_cpus;
1352 return 0;
1353}
1354
1355int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
1356 size_t stride)
1357{
1358 if (!attr) {
1359 errno = EINVAL;
1360 return -1;
1361 }
1362 attr->type = MEMPOOL_TYPE_GLOBAL;
1363 attr->stride = stride;
89b7e681 1364 attr->max_nr_cpus = 0;
cb475906
MD
1365 return 0;
1366}
6037d364 1367
e11a02d7
MD
1368int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
1369 unsigned long max_nr_ranges)
1370{
1371 if (!attr) {
1372 errno = EINVAL;
1373 return -1;
1374 }
1375 attr->max_nr_ranges = max_nr_ranges;
1376 return 0;
1377}
1378
455e090e
MD
1379int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
1380 uintptr_t poison)
1381{
1382 if (!attr) {
1383 errno = EINVAL;
1384 return -1;
1385 }
1386 attr->poison_set = true;
1387 attr->poison = poison;
1388 return 0;
1389}
1390
a5694a4d
MD
1391int rseq_mempool_attr_set_populate_policy(struct rseq_mempool_attr *attr,
1392 enum rseq_mempool_populate_policy policy)
1393{
1394 if (!attr) {
1395 errno = EINVAL;
1396 return -1;
1397 }
1398 attr->populate_policy = policy;
1399 return 0;
1400}
1401
6037d364
MD
1402int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
1403{
1404 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
1405 errno = EINVAL;
1406 return -1;
1407 }
1408 return mempool->attr.max_nr_cpus;
1409}
This page took 0.101367 seconds and 4 git commands to generate.