rseq percpu: print caller ip on double-free/alloc corruption and leak
[librseq.git] / src / rseq-percpu-alloc.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
4 #include <rseq/percpu-alloc.h>
5 #include <sys/mman.h>
6 #include <assert.h>
7 #include <string.h>
8 #include <pthread.h>
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <rseq/compiler.h>
12 #include <errno.h>
13 #include <stdint.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16
17 #ifdef HAVE_LIBNUMA
18 # include <numa.h>
19 # include <numaif.h>
20 #endif
21
22 #include "rseq-alloc-utils.h"
23
24 /*
25 * rseq-percpu-alloc.c: rseq CPU-Local Storage (CLS) memory allocator.
26 *
27 * The rseq per-CPU memory allocator allows the application the request
28 * memory pools of CPU-Local memory each of containing objects of a
29 * given size (rounded to next power of 2), reserving a given virtual
30 * address size per CPU, for a given maximum number of CPUs.
31 *
32 * The per-CPU memory allocator is analogous to TLS (Thread-Local
33 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
34 * memory allocator provides CPU-Local Storage.
35 */
36
37 /*
38 * Use high bits of per-CPU addresses to index the pool.
39 * This leaves the low bits of available to the application for pointer
40 * tagging (based on next power of 2 alignment of the allocations).
41 */
42 #if RSEQ_BITS_PER_LONG == 64
43 # define POOL_INDEX_BITS 16
44 #else
45 # define POOL_INDEX_BITS 8
46 #endif
47 #define MAX_NR_POOLS (1UL << POOL_INDEX_BITS)
48 #define POOL_INDEX_SHIFT (RSEQ_BITS_PER_LONG - POOL_INDEX_BITS)
49 #define MAX_POOL_LEN (1UL << POOL_INDEX_SHIFT)
50 #define MAX_POOL_LEN_MASK (MAX_POOL_LEN - 1)
51
52 #define POOL_SET_NR_ENTRIES POOL_INDEX_SHIFT
53
54 /*
55 * Smallest allocation should hold enough space for a free list pointer.
56 */
57 #if RSEQ_BITS_PER_LONG == 64
58 # define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
59 #else
60 # define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
61 #endif
62
63 /*
64 * Skip pool index 0 to ensure allocated entries at index 0 do not match
65 * a NULL pointer.
66 */
67 #define FIRST_POOL 1
68
69 #define RSEQ_POOL_FLAGS (RSEQ_POOL_ROBUST)
70
71 #define BIT_PER_ULONG (8 * sizeof(unsigned long))
72
73 struct free_list_node;
74
75 struct free_list_node {
76 struct free_list_node *next;
77 };
78
79 /* This lock protects pool create/destroy. */
80 static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
81
82 struct rseq_mmap_attr {
83 void *(*mmap_func)(void *priv, size_t len);
84 int (*munmap_func)(void *priv, void *ptr, size_t len);
85 void *mmap_priv;
86 };
87
88 struct rseq_percpu_pool {
89 void *base;
90 unsigned int index;
91 size_t item_len;
92 size_t percpu_len;
93 int item_order;
94 int max_nr_cpus;
95
96 /*
97 * The free list chains freed items on the CPU 0 address range.
98 * We should rethink this decision if false sharing between
99 * malloc/free from other CPUs and data accesses from CPU 0
100 * becomes an issue. This is a NULL-terminated singly-linked
101 * list.
102 */
103 struct free_list_node *free_list_head;
104 size_t next_unused;
105 /* This lock protects allocation/free within the pool. */
106 pthread_mutex_t lock;
107
108 struct rseq_mmap_attr mmap_attr;
109
110 /* Track alloc/free. */
111 unsigned long *alloc_bitmap;
112 };
113
114 //TODO: the array of pools should grow dynamically on create.
115 static struct rseq_percpu_pool rseq_percpu_pool[MAX_NR_POOLS];
116
117 /*
118 * Pool set entries are indexed by item_len rounded to the next power of
119 * 2. A pool set can contain NULL pool entries, in which case the next
120 * large enough entry will be used for allocation.
121 */
122 struct rseq_percpu_pool_set {
123 /* This lock protects add vs malloc/zmalloc within the pool set. */
124 pthread_mutex_t lock;
125 struct rseq_percpu_pool *entries[POOL_SET_NR_ENTRIES];
126 };
127
128 static
129 void *__rseq_pool_percpu_ptr(struct rseq_percpu_pool *pool, int cpu, uintptr_t item_offset)
130 {
131 return pool->base + (pool->percpu_len * cpu) + item_offset;
132 }
133
134 void *__rseq_percpu_ptr(void __rseq_percpu *_ptr, int cpu)
135 {
136 uintptr_t ptr = (uintptr_t) _ptr;
137 uintptr_t item_offset = ptr & MAX_POOL_LEN_MASK;
138 uintptr_t pool_index = ptr >> POOL_INDEX_SHIFT;
139 struct rseq_percpu_pool *pool = &rseq_percpu_pool[pool_index];
140
141 assert(cpu >= 0);
142 return __rseq_pool_percpu_ptr(pool, cpu, item_offset);
143 }
144
145 static
146 void rseq_percpu_zero_item(struct rseq_percpu_pool *pool, uintptr_t item_offset)
147 {
148 int i;
149
150 for (i = 0; i < pool->max_nr_cpus; i++) {
151 char *p = __rseq_pool_percpu_ptr(pool, i, item_offset);
152 memset(p, 0, pool->item_len);
153 }
154 }
155
156 #ifdef HAVE_LIBNUMA
157 int rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool, int numa_flags)
158 {
159 unsigned long nr_pages, page;
160 long ret, page_len;
161 int cpu;
162
163 if (!numa_flags)
164 return 0;
165 page_len = rseq_get_page_len();
166 nr_pages = pool->percpu_len >> rseq_get_count_order_ulong(page_len);
167 for (cpu = 0; cpu < pool->max_nr_cpus; cpu++) {
168 int node = numa_node_of_cpu(cpu);
169
170 /* TODO: batch move_pages() call with an array of pages. */
171 for (page = 0; page < nr_pages; page++) {
172 void *pageptr = __rseq_pool_percpu_ptr(pool, cpu, page * page_len);
173 int status = -EPERM;
174
175 ret = move_pages(0, 1, &pageptr, &node, &status, numa_flags);
176 if (ret)
177 return ret;
178 }
179 }
180 return 0;
181 }
182 #else
183 void rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool __attribute__((unused)),
184 int numa_flags __attribute__((unused)))
185 {
186 return 0;
187 }
188 #endif
189
190 static
191 void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
192 {
193 void *base;
194
195 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
196 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
197 if (base == MAP_FAILED)
198 return NULL;
199 return base;
200 }
201
202 static
203 int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
204 {
205 return munmap(ptr, len);
206 }
207
208 static
209 int create_alloc_bitmap(struct rseq_percpu_pool *pool)
210 {
211 size_t count;
212
213 count = ((pool->percpu_len >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
214
215 /*
216 * Not being able to create the validation bitmap is an error
217 * that needs to be reported.
218 */
219 pool->alloc_bitmap = calloc(count, sizeof(unsigned long));
220 if (!pool->alloc_bitmap)
221 return -1;
222 return 0;
223 }
224
225 /* Always inline for __builtin_return_address(0). */
226 static inline __attribute__((always_inline))
227 void destroy_alloc_bitmap(struct rseq_percpu_pool *pool)
228 {
229 unsigned long *bitmap = pool->alloc_bitmap;
230 size_t count, total_leaks = 0;
231
232 if (!bitmap)
233 return;
234
235 count = ((pool->percpu_len >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
236
237 /* Assert that all items in the pool were freed. */
238 for (size_t k = 0; k < count; ++k)
239 total_leaks += rseq_hweight_ulong(bitmap[k]);
240 if (total_leaks) {
241 fprintf(stderr, "%s: Pool has %zu leaked items on destroy, caller: %p.\n",
242 __func__, total_leaks, (void *) __builtin_return_address(0));
243 abort();
244 }
245
246 free(bitmap);
247 }
248
249 /* Always inline for __builtin_return_address(0). */
250 static inline __attribute__((always_inline))
251 int __rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool)
252 {
253 int ret;
254
255 if (!pool->base) {
256 errno = ENOENT;
257 ret = -1;
258 goto end;
259 }
260 ret = pool->mmap_attr.munmap_func(pool->mmap_attr.mmap_priv, pool->base,
261 pool->percpu_len * pool->max_nr_cpus);
262 if (ret)
263 goto end;
264 pthread_mutex_destroy(&pool->lock);
265 destroy_alloc_bitmap(pool);
266 memset(pool, 0, sizeof(*pool));
267 end:
268 return 0;
269 }
270
271 int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool)
272 {
273 int ret;
274
275 pthread_mutex_lock(&pool_lock);
276 ret = __rseq_percpu_pool_destroy(pool);
277 pthread_mutex_unlock(&pool_lock);
278 return ret;
279 }
280
281 struct rseq_percpu_pool *rseq_percpu_pool_create(size_t item_len,
282 size_t percpu_len, int max_nr_cpus,
283 const struct rseq_mmap_attr *mmap_attr,
284 int flags)
285 {
286 void *(*mmap_func)(void *priv, size_t len);
287 int (*munmap_func)(void *priv, void *ptr, size_t len);
288 void *mmap_priv;
289 struct rseq_percpu_pool *pool;
290 void *base;
291 unsigned int i;
292 int order;
293
294 if (flags & ~RSEQ_POOL_FLAGS) {
295 errno = EINVAL;
296 return NULL;
297 }
298
299 /* Make sure each item is large enough to contain free list pointers. */
300 if (item_len < sizeof(void *))
301 item_len = sizeof(void *);
302
303 /* Align item_len on next power of two. */
304 order = rseq_get_count_order_ulong(item_len);
305 if (order < 0) {
306 errno = EINVAL;
307 return NULL;
308 }
309 item_len = 1UL << order;
310
311 /* Align percpu_len on page size. */
312 percpu_len = rseq_align(percpu_len, rseq_get_page_len());
313
314 if (max_nr_cpus < 0 || item_len > percpu_len ||
315 percpu_len > (UINTPTR_MAX >> POOL_INDEX_BITS)) {
316 errno = EINVAL;
317 return NULL;
318 }
319
320 if (mmap_attr) {
321 mmap_func = mmap_attr->mmap_func;
322 munmap_func = mmap_attr->munmap_func;
323 mmap_priv = mmap_attr->mmap_priv;
324 } else {
325 mmap_func = default_mmap_func;
326 munmap_func = default_munmap_func;
327 mmap_priv = NULL;
328 }
329 pthread_mutex_lock(&pool_lock);
330 /* Linear scan in array of pools to find empty spot. */
331 for (i = FIRST_POOL; i < MAX_NR_POOLS; i++) {
332 pool = &rseq_percpu_pool[i];
333 if (!pool->base)
334 goto found_empty;
335 }
336 errno = ENOMEM;
337 pool = NULL;
338 goto end;
339
340 found_empty:
341 base = mmap_func(mmap_priv, percpu_len * max_nr_cpus);
342 if (!base)
343 goto error_alloc;
344 pthread_mutex_init(&pool->lock, NULL);
345 pool->base = base;
346 pool->percpu_len = percpu_len;
347 pool->max_nr_cpus = max_nr_cpus;
348 pool->index = i;
349 pool->item_len = item_len;
350 pool->item_order = order;
351 pool->mmap_attr.mmap_func = mmap_func;
352 pool->mmap_attr.munmap_func = munmap_func;
353 pool->mmap_attr.mmap_priv = mmap_priv;
354
355 if (RSEQ_POOL_ROBUST & flags) {
356 if (create_alloc_bitmap(pool))
357 goto error_alloc;
358 }
359 end:
360 pthread_mutex_unlock(&pool_lock);
361 return pool;
362
363 error_alloc:
364 __rseq_percpu_pool_destroy(pool);
365 pthread_mutex_unlock(&pool_lock);
366 errno = ENOMEM;
367 return NULL;
368 }
369
370 /* Always inline for __builtin_return_address(0). */
371 static inline __attribute__((always_inline))
372 void set_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset)
373 {
374 unsigned long *bitmap = pool->alloc_bitmap;
375 size_t item_index = item_offset >> pool->item_order;
376 unsigned long mask;
377 size_t k;
378
379 if (!bitmap)
380 return;
381
382 k = item_index / BIT_PER_ULONG;
383 mask = 1ULL << (item_index % BIT_PER_ULONG);
384
385 /* Print error if bit is already set. */
386 if (bitmap[k] & mask) {
387 fprintf(stderr, "%s: Allocator corruption detected for pool: %p, item offset: %zu, caller: %p.\n",
388 __func__, pool, item_offset, (void *) __builtin_return_address(0));
389 abort();
390 }
391 bitmap[k] |= mask;
392 }
393
394 static
395 void __rseq_percpu *__rseq_percpu_malloc(struct rseq_percpu_pool *pool, bool zeroed)
396 {
397 struct free_list_node *node;
398 uintptr_t item_offset;
399 void __rseq_percpu *addr;
400
401 pthread_mutex_lock(&pool->lock);
402 /* Get first entry from free list. */
403 node = pool->free_list_head;
404 if (node != NULL) {
405 /* Remove node from free list (update head). */
406 pool->free_list_head = node->next;
407 item_offset = (uintptr_t) ((void *) node - pool->base);
408 addr = (void *) (((uintptr_t) pool->index << POOL_INDEX_SHIFT) | item_offset);
409 goto end;
410 }
411 if (pool->next_unused + pool->item_len > pool->percpu_len) {
412 errno = ENOMEM;
413 addr = NULL;
414 goto end;
415 }
416 item_offset = pool->next_unused;
417 addr = (void *) (((uintptr_t) pool->index << POOL_INDEX_SHIFT) | item_offset);
418 pool->next_unused += pool->item_len;
419 set_alloc_slot(pool, item_offset);
420 end:
421 pthread_mutex_unlock(&pool->lock);
422 if (zeroed && addr)
423 rseq_percpu_zero_item(pool, item_offset);
424 return addr;
425 }
426
427 void __rseq_percpu *rseq_percpu_malloc(struct rseq_percpu_pool *pool)
428 {
429 return __rseq_percpu_malloc(pool, false);
430 }
431
432 void __rseq_percpu *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool)
433 {
434 return __rseq_percpu_malloc(pool, true);
435 }
436
437 /* Always inline for __builtin_return_address(0). */
438 static inline __attribute__((always_inline))
439 void clear_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset)
440 {
441 unsigned long *bitmap = pool->alloc_bitmap;
442 size_t item_index = item_offset >> pool->item_order;
443 unsigned long mask;
444 size_t k;
445
446 if (!bitmap)
447 return;
448
449 k = item_index / BIT_PER_ULONG;
450 mask = 1ULL << (item_index % BIT_PER_ULONG);
451
452 /* Print error if bit is not set. */
453 if (!(bitmap[k] & mask)) {
454 fprintf(stderr, "%s: Double-free detected for pool: %p, item offset: %zu, caller: %p.\n",
455 __func__, pool, item_offset, (void *) __builtin_return_address(0));
456 abort();
457 }
458 bitmap[k] &= ~mask;
459 }
460
461 void rseq_percpu_free(void __rseq_percpu *_ptr)
462 {
463 uintptr_t ptr = (uintptr_t) _ptr;
464 uintptr_t item_offset = ptr & MAX_POOL_LEN_MASK;
465 uintptr_t pool_index = ptr >> POOL_INDEX_SHIFT;
466 struct rseq_percpu_pool *pool = &rseq_percpu_pool[pool_index];
467 struct free_list_node *head, *item;
468
469 pthread_mutex_lock(&pool->lock);
470 clear_alloc_slot(pool, item_offset);
471 /* Add ptr to head of free list */
472 head = pool->free_list_head;
473 /* Free-list is in CPU 0 range. */
474 item = (struct free_list_node *)__rseq_pool_percpu_ptr(pool, 0, item_offset);
475 item->next = head;
476 pool->free_list_head = item;
477 pthread_mutex_unlock(&pool->lock);
478 }
479
480 struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void)
481 {
482 struct rseq_percpu_pool_set *pool_set;
483
484 pool_set = calloc(1, sizeof(struct rseq_percpu_pool_set));
485 if (!pool_set)
486 return NULL;
487 pthread_mutex_init(&pool_set->lock, NULL);
488 return pool_set;
489 }
490
491 int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set)
492 {
493 int order, ret;
494
495 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
496 struct rseq_percpu_pool *pool = pool_set->entries[order];
497
498 if (!pool)
499 continue;
500 ret = rseq_percpu_pool_destroy(pool);
501 if (ret)
502 return ret;
503 pool_set->entries[order] = NULL;
504 }
505 pthread_mutex_destroy(&pool_set->lock);
506 free(pool_set);
507 return 0;
508 }
509
510 /* Ownership of pool is handed over to pool set on success. */
511 int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set, struct rseq_percpu_pool *pool)
512 {
513 size_t item_order = pool->item_order;
514 int ret = 0;
515
516 pthread_mutex_lock(&pool_set->lock);
517 if (pool_set->entries[item_order]) {
518 errno = EBUSY;
519 ret = -1;
520 goto end;
521 }
522 pool_set->entries[pool->item_order] = pool;
523 end:
524 pthread_mutex_unlock(&pool_set->lock);
525 return ret;
526 }
527
528 static
529 void __rseq_percpu *__rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len, bool zeroed)
530 {
531 int order, min_order = POOL_SET_MIN_ENTRY;
532 struct rseq_percpu_pool *pool;
533 void __rseq_percpu *addr;
534
535 order = rseq_get_count_order_ulong(len);
536 if (order > POOL_SET_MIN_ENTRY)
537 min_order = order;
538 again:
539 pthread_mutex_lock(&pool_set->lock);
540 /* First smallest present pool where @len fits. */
541 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
542 pool = pool_set->entries[order];
543
544 if (!pool)
545 continue;
546 if (pool->item_len >= len)
547 goto found;
548 }
549 pool = NULL;
550 found:
551 pthread_mutex_unlock(&pool_set->lock);
552 if (pool) {
553 addr = __rseq_percpu_malloc(pool, zeroed);
554 if (addr == NULL && errno == ENOMEM) {
555 /*
556 * If the allocation failed, try again with a
557 * larger pool.
558 */
559 min_order = order + 1;
560 goto again;
561 }
562 } else {
563 /* Not found. */
564 errno = ENOMEM;
565 addr = NULL;
566 }
567 return addr;
568 }
569
570 void __rseq_percpu *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len)
571 {
572 return __rseq_percpu_pool_set_malloc(pool_set, len, false);
573 }
574
575 void __rseq_percpu *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len)
576 {
577 return __rseq_percpu_pool_set_malloc(pool_set, len, true);
578 }
579
580 struct rseq_mmap_attr *rseq_mmap_attr_create(void *(*mmap_func)(void *priv, size_t len),
581 int (*munmap_func)(void *priv, void *ptr, size_t len),
582 void *mmap_priv)
583 {
584 struct rseq_mmap_attr *attr = calloc(1, sizeof(struct rseq_mmap_attr));
585
586 if (!attr)
587 return NULL;
588 attr->mmap_func = mmap_func;
589 attr->munmap_func = munmap_func;
590 attr->mmap_priv = mmap_priv;
591 return attr;
592 }
593
594 void rseq_mmap_attr_destroy(struct rseq_mmap_attr *attr)
595 {
596 free(attr);
597 }
This page took 0.04246 seconds and 4 git commands to generate.