Commit | Line | Data |
---|---|---|
ef6695f1 MD |
1 | // SPDX-License-Identifier: MIT |
2 | // SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | |
3 | ||
4 | #include <rseq/percpu-alloc.h> | |
5 | #include <sys/mman.h> | |
6 | #include <assert.h> | |
7 | #include <string.h> | |
8 | #include <pthread.h> | |
9 | #include <unistd.h> | |
10 | #include <stdlib.h> | |
11 | #include <rseq/compiler.h> | |
12 | #include <errno.h> | |
13 | #include <stdint.h> | |
14 | #include <stdbool.h> | |
367e559c MD |
15 | #include <stdio.h> |
16 | ||
17 | #ifdef HAVE_LIBNUMA | |
18 | # include <numa.h> | |
19 | # include <numaif.h> | |
20 | #endif | |
ef6695f1 | 21 | |
19be9217 MD |
22 | #include "rseq-alloc-utils.h" |
23 | ||
ef6695f1 | 24 | /* |
8ab16a24 | 25 | * rseq-percpu-alloc.c: rseq CPU-Local Storage (CLS) memory allocator. |
ef6695f1 | 26 | * |
8ab16a24 MD |
27 | * The rseq per-CPU memory allocator allows the application the request |
28 | * memory pools of CPU-Local memory each of containing objects of a | |
8aa1462d MD |
29 | * given size (rounded to next power of 2), reserving a given virtual |
30 | * address size per CPU, for a given maximum number of CPUs. | |
8ab16a24 MD |
31 | * |
32 | * The per-CPU memory allocator is analogous to TLS (Thread-Local | |
33 | * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU | |
34 | * memory allocator provides CPU-Local Storage. | |
ef6695f1 MD |
35 | */ |
36 | ||
72b100a1 | 37 | /* |
8ab16a24 | 38 | * Use high bits of per-CPU addresses to index the pool. |
72b100a1 MD |
39 | * This leaves the low bits of available to the application for pointer |
40 | * tagging (based on next power of 2 alignment of the allocations). | |
41 | */ | |
ef6695f1 | 42 | #if RSEQ_BITS_PER_LONG == 64 |
72b100a1 | 43 | # define POOL_INDEX_BITS 16 |
ef6695f1 | 44 | #else |
72b100a1 | 45 | # define POOL_INDEX_BITS 8 |
ef6695f1 | 46 | #endif |
72b100a1 MD |
47 | #define MAX_NR_POOLS (1UL << POOL_INDEX_BITS) |
48 | #define POOL_INDEX_SHIFT (RSEQ_BITS_PER_LONG - POOL_INDEX_BITS) | |
49 | #define MAX_POOL_LEN (1UL << POOL_INDEX_SHIFT) | |
50 | #define MAX_POOL_LEN_MASK (MAX_POOL_LEN - 1) | |
ef6695f1 | 51 | |
72b100a1 | 52 | #define POOL_SET_NR_ENTRIES POOL_INDEX_SHIFT |
ef6695f1 | 53 | |
72b100a1 MD |
54 | /* |
55 | * Smallest allocation should hold enough space for a free list pointer. | |
56 | */ | |
ef6695f1 MD |
57 | #if RSEQ_BITS_PER_LONG == 64 |
58 | # define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */ | |
59 | #else | |
60 | # define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */ | |
61 | #endif | |
62 | ||
bb1552e2 MD |
63 | /* |
64 | * Skip pool index 0 to ensure allocated entries at index 0 do not match | |
65 | * a NULL pointer. | |
66 | */ | |
67 | #define FIRST_POOL 1 | |
68 | ||
0fdf7a4c OD |
69 | #define BIT_PER_ULONG (8 * sizeof(unsigned long)) |
70 | ||
57d8b586 OD |
71 | #define MOVE_PAGES_BATCH_SIZE 4096 |
72 | ||
ef6695f1 MD |
73 | struct free_list_node; |
74 | ||
75 | struct free_list_node { | |
76 | struct free_list_node *next; | |
77 | }; | |
78 | ||
79 | /* This lock protects pool create/destroy. */ | |
80 | static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER; | |
81 | ||
a82006d0 MD |
82 | struct rseq_pool_attr { |
83 | bool mmap_set; | |
9bd07c29 MD |
84 | void *(*mmap_func)(void *priv, size_t len); |
85 | int (*munmap_func)(void *priv, void *ptr, size_t len); | |
86 | void *mmap_priv; | |
d6acc8aa MD |
87 | |
88 | bool robust_set; | |
9bd07c29 MD |
89 | }; |
90 | ||
ef6695f1 MD |
91 | struct rseq_percpu_pool { |
92 | void *base; | |
93 | unsigned int index; | |
94 | size_t item_len; | |
95 | size_t percpu_len; | |
96 | int item_order; | |
97 | int max_nr_cpus; | |
98 | ||
99 | /* | |
8ab16a24 | 100 | * The free list chains freed items on the CPU 0 address range. |
ef6695f1 | 101 | * We should rethink this decision if false sharing between |
8ab16a24 | 102 | * malloc/free from other CPUs and data accesses from CPU 0 |
ef6695f1 MD |
103 | * becomes an issue. This is a NULL-terminated singly-linked |
104 | * list. | |
105 | */ | |
106 | struct free_list_node *free_list_head; | |
107 | size_t next_unused; | |
108 | /* This lock protects allocation/free within the pool. */ | |
109 | pthread_mutex_t lock; | |
9bd07c29 | 110 | |
a82006d0 | 111 | struct rseq_pool_attr attr; |
0fdf7a4c | 112 | |
ca452fee | 113 | char *name; |
9649c7ee MD |
114 | /* Track alloc/free. */ |
115 | unsigned long *alloc_bitmap; | |
ef6695f1 MD |
116 | }; |
117 | ||
118 | //TODO: the array of pools should grow dynamically on create. | |
119 | static struct rseq_percpu_pool rseq_percpu_pool[MAX_NR_POOLS]; | |
120 | ||
121 | /* | |
122 | * Pool set entries are indexed by item_len rounded to the next power of | |
123 | * 2. A pool set can contain NULL pool entries, in which case the next | |
124 | * large enough entry will be used for allocation. | |
125 | */ | |
126 | struct rseq_percpu_pool_set { | |
127 | /* This lock protects add vs malloc/zmalloc within the pool set. */ | |
128 | pthread_mutex_t lock; | |
129 | struct rseq_percpu_pool *entries[POOL_SET_NR_ENTRIES]; | |
130 | }; | |
131 | ||
367e559c MD |
132 | static |
133 | void *__rseq_pool_percpu_ptr(struct rseq_percpu_pool *pool, int cpu, uintptr_t item_offset) | |
134 | { | |
135 | return pool->base + (pool->percpu_len * cpu) + item_offset; | |
136 | } | |
137 | ||
d24ee051 | 138 | void *__rseq_percpu_ptr(void __rseq_percpu *_ptr, int cpu) |
367e559c MD |
139 | { |
140 | uintptr_t ptr = (uintptr_t) _ptr; | |
72b100a1 MD |
141 | uintptr_t item_offset = ptr & MAX_POOL_LEN_MASK; |
142 | uintptr_t pool_index = ptr >> POOL_INDEX_SHIFT; | |
367e559c MD |
143 | struct rseq_percpu_pool *pool = &rseq_percpu_pool[pool_index]; |
144 | ||
145 | assert(cpu >= 0); | |
146 | return __rseq_pool_percpu_ptr(pool, cpu, item_offset); | |
147 | } | |
148 | ||
149 | static | |
150 | void rseq_percpu_zero_item(struct rseq_percpu_pool *pool, uintptr_t item_offset) | |
151 | { | |
152 | int i; | |
153 | ||
154 | for (i = 0; i < pool->max_nr_cpus; i++) { | |
155 | char *p = __rseq_pool_percpu_ptr(pool, i, item_offset); | |
156 | memset(p, 0, pool->item_len); | |
157 | } | |
158 | } | |
159 | ||
160 | #ifdef HAVE_LIBNUMA | |
9bd07c29 | 161 | int rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool, int numa_flags) |
367e559c | 162 | { |
57d8b586 | 163 | unsigned long nr_pages; |
367e559c MD |
164 | long ret, page_len; |
165 | int cpu; | |
166 | ||
167 | if (!numa_flags) | |
9bd07c29 | 168 | return 0; |
367e559c | 169 | page_len = rseq_get_page_len(); |
19be9217 | 170 | nr_pages = pool->percpu_len >> rseq_get_count_order_ulong(page_len); |
367e559c | 171 | for (cpu = 0; cpu < pool->max_nr_cpus; cpu++) { |
367e559c | 172 | |
57d8b586 OD |
173 | int status[MOVE_PAGES_BATCH_SIZE]; |
174 | int nodes[MOVE_PAGES_BATCH_SIZE]; | |
175 | void *pages[MOVE_PAGES_BATCH_SIZE]; | |
176 | ||
177 | nodes[0] = numa_node_of_cpu(cpu); | |
178 | for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) { | |
179 | nodes[k] = nodes[0]; | |
180 | } | |
181 | ||
182 | for (unsigned long page = 0; page < nr_pages;) { | |
183 | ||
184 | size_t max_k = RSEQ_ARRAY_SIZE(pages); | |
185 | size_t left = nr_pages - page; | |
367e559c | 186 | |
57d8b586 OD |
187 | if (left < max_k) { |
188 | max_k = left; | |
189 | } | |
190 | ||
191 | for (size_t k = 0; k < max_k; ++k, ++page) { | |
192 | pages[k] = __rseq_pool_percpu_ptr(pool, cpu, page * page_len); | |
193 | status[k] = -EPERM; | |
194 | } | |
195 | ||
196 | ret = move_pages(0, max_k, pages, nodes, status, numa_flags); | |
197 | ||
198 | if (ret < 0) | |
9bd07c29 | 199 | return ret; |
57d8b586 OD |
200 | |
201 | if (ret > 0) { | |
202 | fprintf(stderr, "%lu pages were not migrated\n", ret); | |
203 | for (size_t k = 0; k < max_k; ++k) { | |
204 | if (status[k] < 0) | |
205 | fprintf(stderr, | |
206 | "Error while moving page %p to numa node %d: %u\n", | |
207 | pages[k], nodes[k], -status[k]); | |
208 | } | |
209 | } | |
367e559c MD |
210 | } |
211 | } | |
9bd07c29 | 212 | return 0; |
367e559c MD |
213 | } |
214 | #else | |
367e559c MD |
215 | void rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool __attribute__((unused)), |
216 | int numa_flags __attribute__((unused))) | |
217 | { | |
9bd07c29 | 218 | return 0; |
367e559c MD |
219 | } |
220 | #endif | |
221 | ||
9bd07c29 MD |
222 | static |
223 | void *default_mmap_func(void *priv __attribute__((unused)), size_t len) | |
224 | { | |
225 | void *base; | |
226 | ||
227 | base = mmap(NULL, len, PROT_READ | PROT_WRITE, | |
228 | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); | |
229 | if (base == MAP_FAILED) | |
230 | return NULL; | |
231 | return base; | |
232 | } | |
233 | ||
234 | static | |
235 | int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len) | |
236 | { | |
237 | return munmap(ptr, len); | |
238 | } | |
239 | ||
0fdf7a4c | 240 | static |
9649c7ee | 241 | int create_alloc_bitmap(struct rseq_percpu_pool *pool) |
0fdf7a4c OD |
242 | { |
243 | size_t count; | |
244 | ||
9649c7ee | 245 | count = ((pool->percpu_len >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG; |
0fdf7a4c OD |
246 | |
247 | /* | |
9649c7ee MD |
248 | * Not being able to create the validation bitmap is an error |
249 | * that needs to be reported. | |
0fdf7a4c | 250 | */ |
9649c7ee MD |
251 | pool->alloc_bitmap = calloc(count, sizeof(unsigned long)); |
252 | if (!pool->alloc_bitmap) | |
253 | return -1; | |
254 | return 0; | |
0fdf7a4c OD |
255 | } |
256 | ||
ca452fee MD |
257 | static |
258 | const char *get_pool_name(const struct rseq_percpu_pool *pool) | |
259 | { | |
260 | return pool->name ? : "<anonymous>"; | |
261 | } | |
262 | ||
a9ec6111 OD |
263 | /* Always inline for __builtin_return_address(0). */ |
264 | static inline __attribute__((always_inline)) | |
265 | void check_free_list(const struct rseq_percpu_pool *pool) | |
266 | { | |
267 | size_t total_item = pool->percpu_len >> pool->item_order; | |
268 | size_t total_never_allocated = (pool->percpu_len - pool->next_unused) >> pool->item_order; | |
269 | size_t total_freed = 0; | |
270 | size_t max_list_traversal = total_item - total_never_allocated; | |
271 | size_t traversal_iteration = 0; | |
272 | ||
273 | for (struct free_list_node *node = pool->free_list_head, *prev = NULL; | |
274 | node; | |
275 | prev = node, | |
276 | node = node->next) { | |
277 | ||
278 | void *node_addr = node; | |
279 | ||
280 | if (traversal_iteration >= max_list_traversal) { | |
ca452fee MD |
281 | fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n", |
282 | __func__, get_pool_name(pool), pool, __builtin_return_address(0)); | |
a9ec6111 OD |
283 | abort(); |
284 | } | |
285 | ||
286 | /* Node is out of range. */ | |
287 | if ((node_addr < pool->base) || | |
288 | (node_addr >= pool->base + pool->next_unused)) { | |
289 | if (prev) | |
ca452fee MD |
290 | fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n", |
291 | __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0)); | |
a9ec6111 | 292 | else |
ca452fee MD |
293 | fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n", |
294 | __func__, node, get_pool_name(pool), pool, __builtin_return_address(0)); | |
a9ec6111 OD |
295 | abort(); |
296 | } | |
297 | ||
298 | traversal_iteration += 1; | |
299 | total_freed += 1; | |
300 | } | |
301 | ||
302 | if (total_never_allocated + total_freed != total_item) { | |
ca452fee MD |
303 | fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n", |
304 | __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0)); | |
a9ec6111 OD |
305 | abort(); |
306 | } | |
307 | ||
308 | } | |
309 | ||
e7cbbc10 MD |
310 | /* Always inline for __builtin_return_address(0). */ |
311 | static inline __attribute__((always_inline)) | |
9649c7ee | 312 | void destroy_alloc_bitmap(struct rseq_percpu_pool *pool) |
0fdf7a4c | 313 | { |
9649c7ee | 314 | unsigned long *bitmap = pool->alloc_bitmap; |
9649c7ee | 315 | size_t count, total_leaks = 0; |
0fdf7a4c | 316 | |
9649c7ee | 317 | if (!bitmap) |
0fdf7a4c | 318 | return; |
0fdf7a4c | 319 | |
ed56a022 | 320 | count = ((pool->percpu_len >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG; |
0fdf7a4c OD |
321 | |
322 | /* Assert that all items in the pool were freed. */ | |
9649c7ee MD |
323 | for (size_t k = 0; k < count; ++k) |
324 | total_leaks += rseq_hweight_ulong(bitmap[k]); | |
325 | if (total_leaks) { | |
ca452fee MD |
326 | fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n", |
327 | __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0)); | |
9649c7ee | 328 | abort(); |
0fdf7a4c OD |
329 | } |
330 | ||
a9ec6111 OD |
331 | check_free_list(pool); |
332 | ||
0fdf7a4c OD |
333 | free(bitmap); |
334 | } | |
335 | ||
e7cbbc10 MD |
336 | /* Always inline for __builtin_return_address(0). */ |
337 | static inline __attribute__((always_inline)) | |
9649c7ee MD |
338 | int __rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool) |
339 | { | |
340 | int ret; | |
341 | ||
342 | if (!pool->base) { | |
343 | errno = ENOENT; | |
344 | ret = -1; | |
345 | goto end; | |
346 | } | |
a9ec6111 OD |
347 | /* |
348 | * This must be done before releasing pool->base for checking the | |
349 | * free-list. | |
350 | */ | |
351 | destroy_alloc_bitmap(pool); | |
a82006d0 | 352 | ret = pool->attr.munmap_func(pool->attr.mmap_priv, pool->base, |
9649c7ee MD |
353 | pool->percpu_len * pool->max_nr_cpus); |
354 | if (ret) | |
355 | goto end; | |
356 | pthread_mutex_destroy(&pool->lock); | |
ca452fee | 357 | free(pool->name); |
9649c7ee MD |
358 | memset(pool, 0, sizeof(*pool)); |
359 | end: | |
360 | return 0; | |
361 | } | |
362 | ||
363 | int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool) | |
364 | { | |
365 | int ret; | |
366 | ||
367 | pthread_mutex_lock(&pool_lock); | |
368 | ret = __rseq_percpu_pool_destroy(pool); | |
369 | pthread_mutex_unlock(&pool_lock); | |
370 | return ret; | |
371 | } | |
372 | ||
ca452fee MD |
373 | struct rseq_percpu_pool *rseq_percpu_pool_create(const char *pool_name, |
374 | size_t item_len, size_t percpu_len, int max_nr_cpus, | |
d6acc8aa | 375 | const struct rseq_pool_attr *_attr) |
ef6695f1 MD |
376 | { |
377 | struct rseq_percpu_pool *pool; | |
a82006d0 | 378 | struct rseq_pool_attr attr = {}; |
ef6695f1 MD |
379 | void *base; |
380 | unsigned int i; | |
381 | int order; | |
ef6695f1 MD |
382 | |
383 | /* Make sure each item is large enough to contain free list pointers. */ | |
384 | if (item_len < sizeof(void *)) | |
385 | item_len = sizeof(void *); | |
386 | ||
387 | /* Align item_len on next power of two. */ | |
19be9217 | 388 | order = rseq_get_count_order_ulong(item_len); |
ef6695f1 MD |
389 | if (order < 0) { |
390 | errno = EINVAL; | |
391 | return NULL; | |
392 | } | |
393 | item_len = 1UL << order; | |
394 | ||
395 | /* Align percpu_len on page size. */ | |
367e559c | 396 | percpu_len = rseq_align(percpu_len, rseq_get_page_len()); |
ef6695f1 MD |
397 | |
398 | if (max_nr_cpus < 0 || item_len > percpu_len || | |
72b100a1 | 399 | percpu_len > (UINTPTR_MAX >> POOL_INDEX_BITS)) { |
ef6695f1 MD |
400 | errno = EINVAL; |
401 | return NULL; | |
402 | } | |
403 | ||
a82006d0 MD |
404 | if (_attr) |
405 | memcpy(&attr, _attr, sizeof(attr)); | |
406 | if (!attr.mmap_set) { | |
407 | attr.mmap_func = default_mmap_func; | |
408 | attr.munmap_func = default_munmap_func; | |
409 | attr.mmap_priv = NULL; | |
9bd07c29 | 410 | } |
a82006d0 | 411 | |
ef6695f1 MD |
412 | pthread_mutex_lock(&pool_lock); |
413 | /* Linear scan in array of pools to find empty spot. */ | |
bb1552e2 | 414 | for (i = FIRST_POOL; i < MAX_NR_POOLS; i++) { |
ef6695f1 MD |
415 | pool = &rseq_percpu_pool[i]; |
416 | if (!pool->base) | |
417 | goto found_empty; | |
418 | } | |
419 | errno = ENOMEM; | |
420 | pool = NULL; | |
421 | goto end; | |
422 | ||
423 | found_empty: | |
a82006d0 | 424 | base = attr.mmap_func(attr.mmap_priv, percpu_len * max_nr_cpus); |
9649c7ee MD |
425 | if (!base) |
426 | goto error_alloc; | |
ef6695f1 MD |
427 | pthread_mutex_init(&pool->lock, NULL); |
428 | pool->base = base; | |
429 | pool->percpu_len = percpu_len; | |
430 | pool->max_nr_cpus = max_nr_cpus; | |
431 | pool->index = i; | |
432 | pool->item_len = item_len; | |
433 | pool->item_order = order; | |
a82006d0 | 434 | memcpy(&pool->attr, &attr, sizeof(attr)); |
0fdf7a4c | 435 | |
ca452fee MD |
436 | if (pool_name) { |
437 | pool->name = strdup(pool_name); | |
438 | if (!pool->name) | |
439 | goto error_alloc; | |
440 | } | |
441 | ||
d6acc8aa | 442 | if (attr.robust_set) { |
9649c7ee MD |
443 | if (create_alloc_bitmap(pool)) |
444 | goto error_alloc; | |
0fdf7a4c | 445 | } |
ef6695f1 MD |
446 | end: |
447 | pthread_mutex_unlock(&pool_lock); | |
448 | return pool; | |
ef6695f1 | 449 | |
9649c7ee MD |
450 | error_alloc: |
451 | __rseq_percpu_pool_destroy(pool); | |
ef6695f1 | 452 | pthread_mutex_unlock(&pool_lock); |
9649c7ee MD |
453 | errno = ENOMEM; |
454 | return NULL; | |
ef6695f1 MD |
455 | } |
456 | ||
e7cbbc10 MD |
457 | /* Always inline for __builtin_return_address(0). */ |
458 | static inline __attribute__((always_inline)) | |
9649c7ee | 459 | void set_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset) |
0fdf7a4c | 460 | { |
9649c7ee MD |
461 | unsigned long *bitmap = pool->alloc_bitmap; |
462 | size_t item_index = item_offset >> pool->item_order; | |
0fdf7a4c OD |
463 | unsigned long mask; |
464 | size_t k; | |
465 | ||
9649c7ee | 466 | if (!bitmap) |
0fdf7a4c | 467 | return; |
0fdf7a4c | 468 | |
9649c7ee | 469 | k = item_index / BIT_PER_ULONG; |
0fdf7a4c OD |
470 | mask = 1ULL << (item_index % BIT_PER_ULONG); |
471 | ||
9649c7ee MD |
472 | /* Print error if bit is already set. */ |
473 | if (bitmap[k] & mask) { | |
ca452fee MD |
474 | fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n", |
475 | __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0)); | |
9649c7ee MD |
476 | abort(); |
477 | } | |
0fdf7a4c OD |
478 | bitmap[k] |= mask; |
479 | } | |
480 | ||
ef6695f1 | 481 | static |
d24ee051 | 482 | void __rseq_percpu *__rseq_percpu_malloc(struct rseq_percpu_pool *pool, bool zeroed) |
ef6695f1 MD |
483 | { |
484 | struct free_list_node *node; | |
485 | uintptr_t item_offset; | |
d24ee051 | 486 | void __rseq_percpu *addr; |
ef6695f1 MD |
487 | |
488 | pthread_mutex_lock(&pool->lock); | |
489 | /* Get first entry from free list. */ | |
490 | node = pool->free_list_head; | |
491 | if (node != NULL) { | |
492 | /* Remove node from free list (update head). */ | |
493 | pool->free_list_head = node->next; | |
494 | item_offset = (uintptr_t) ((void *) node - pool->base); | |
72b100a1 | 495 | addr = (void *) (((uintptr_t) pool->index << POOL_INDEX_SHIFT) | item_offset); |
ef6695f1 MD |
496 | goto end; |
497 | } | |
498 | if (pool->next_unused + pool->item_len > pool->percpu_len) { | |
ea1a3ada | 499 | errno = ENOMEM; |
ef6695f1 MD |
500 | addr = NULL; |
501 | goto end; | |
502 | } | |
503 | item_offset = pool->next_unused; | |
72b100a1 | 504 | addr = (void *) (((uintptr_t) pool->index << POOL_INDEX_SHIFT) | item_offset); |
ef6695f1 | 505 | pool->next_unused += pool->item_len; |
9649c7ee | 506 | set_alloc_slot(pool, item_offset); |
ef6695f1 MD |
507 | end: |
508 | pthread_mutex_unlock(&pool->lock); | |
509 | if (zeroed && addr) | |
510 | rseq_percpu_zero_item(pool, item_offset); | |
511 | return addr; | |
512 | } | |
513 | ||
d24ee051 | 514 | void __rseq_percpu *rseq_percpu_malloc(struct rseq_percpu_pool *pool) |
ef6695f1 MD |
515 | { |
516 | return __rseq_percpu_malloc(pool, false); | |
517 | } | |
518 | ||
d24ee051 | 519 | void __rseq_percpu *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool) |
ef6695f1 MD |
520 | { |
521 | return __rseq_percpu_malloc(pool, true); | |
522 | } | |
523 | ||
e7cbbc10 MD |
524 | /* Always inline for __builtin_return_address(0). */ |
525 | static inline __attribute__((always_inline)) | |
9649c7ee | 526 | void clear_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset) |
0fdf7a4c | 527 | { |
9649c7ee MD |
528 | unsigned long *bitmap = pool->alloc_bitmap; |
529 | size_t item_index = item_offset >> pool->item_order; | |
0fdf7a4c OD |
530 | unsigned long mask; |
531 | size_t k; | |
532 | ||
9649c7ee | 533 | if (!bitmap) |
0fdf7a4c | 534 | return; |
0fdf7a4c | 535 | |
9649c7ee MD |
536 | k = item_index / BIT_PER_ULONG; |
537 | mask = 1ULL << (item_index % BIT_PER_ULONG); | |
0fdf7a4c | 538 | |
9649c7ee MD |
539 | /* Print error if bit is not set. */ |
540 | if (!(bitmap[k] & mask)) { | |
ca452fee MD |
541 | fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n", |
542 | __func__, get_pool_name(pool), pool, item_offset, | |
543 | (void *) __builtin_return_address(0)); | |
9649c7ee MD |
544 | abort(); |
545 | } | |
0fdf7a4c OD |
546 | bitmap[k] &= ~mask; |
547 | } | |
548 | ||
d24ee051 | 549 | void rseq_percpu_free(void __rseq_percpu *_ptr) |
ef6695f1 MD |
550 | { |
551 | uintptr_t ptr = (uintptr_t) _ptr; | |
72b100a1 MD |
552 | uintptr_t item_offset = ptr & MAX_POOL_LEN_MASK; |
553 | uintptr_t pool_index = ptr >> POOL_INDEX_SHIFT; | |
ef6695f1 MD |
554 | struct rseq_percpu_pool *pool = &rseq_percpu_pool[pool_index]; |
555 | struct free_list_node *head, *item; | |
556 | ||
557 | pthread_mutex_lock(&pool->lock); | |
9649c7ee | 558 | clear_alloc_slot(pool, item_offset); |
ef6695f1 MD |
559 | /* Add ptr to head of free list */ |
560 | head = pool->free_list_head; | |
8ab16a24 | 561 | /* Free-list is in CPU 0 range. */ |
ef6695f1 MD |
562 | item = (struct free_list_node *)__rseq_pool_percpu_ptr(pool, 0, item_offset); |
563 | item->next = head; | |
564 | pool->free_list_head = item; | |
565 | pthread_mutex_unlock(&pool->lock); | |
566 | } | |
567 | ||
568 | struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void) | |
569 | { | |
570 | struct rseq_percpu_pool_set *pool_set; | |
571 | ||
572 | pool_set = calloc(1, sizeof(struct rseq_percpu_pool_set)); | |
573 | if (!pool_set) | |
574 | return NULL; | |
575 | pthread_mutex_init(&pool_set->lock, NULL); | |
576 | return pool_set; | |
577 | } | |
578 | ||
579 | int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set) | |
580 | { | |
581 | int order, ret; | |
582 | ||
583 | for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) { | |
584 | struct rseq_percpu_pool *pool = pool_set->entries[order]; | |
585 | ||
586 | if (!pool) | |
587 | continue; | |
588 | ret = rseq_percpu_pool_destroy(pool); | |
589 | if (ret) | |
590 | return ret; | |
591 | pool_set->entries[order] = NULL; | |
592 | } | |
593 | pthread_mutex_destroy(&pool_set->lock); | |
594 | free(pool_set); | |
595 | return 0; | |
596 | } | |
597 | ||
598 | /* Ownership of pool is handed over to pool set on success. */ | |
599 | int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set, struct rseq_percpu_pool *pool) | |
600 | { | |
601 | size_t item_order = pool->item_order; | |
602 | int ret = 0; | |
603 | ||
604 | pthread_mutex_lock(&pool_set->lock); | |
605 | if (pool_set->entries[item_order]) { | |
606 | errno = EBUSY; | |
607 | ret = -1; | |
608 | goto end; | |
609 | } | |
610 | pool_set->entries[pool->item_order] = pool; | |
611 | end: | |
612 | pthread_mutex_unlock(&pool_set->lock); | |
613 | return ret; | |
614 | } | |
615 | ||
616 | static | |
d24ee051 | 617 | void __rseq_percpu *__rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len, bool zeroed) |
ef6695f1 MD |
618 | { |
619 | int order, min_order = POOL_SET_MIN_ENTRY; | |
620 | struct rseq_percpu_pool *pool; | |
d24ee051 | 621 | void __rseq_percpu *addr; |
ef6695f1 | 622 | |
d06f5cf5 MD |
623 | order = rseq_get_count_order_ulong(len); |
624 | if (order > POOL_SET_MIN_ENTRY) | |
625 | min_order = order; | |
ef6695f1 MD |
626 | again: |
627 | pthread_mutex_lock(&pool_set->lock); | |
628 | /* First smallest present pool where @len fits. */ | |
629 | for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) { | |
630 | pool = pool_set->entries[order]; | |
631 | ||
632 | if (!pool) | |
633 | continue; | |
634 | if (pool->item_len >= len) | |
635 | goto found; | |
636 | } | |
637 | pool = NULL; | |
638 | found: | |
639 | pthread_mutex_unlock(&pool_set->lock); | |
640 | if (pool) { | |
641 | addr = __rseq_percpu_malloc(pool, zeroed); | |
642 | if (addr == NULL && errno == ENOMEM) { | |
643 | /* | |
644 | * If the allocation failed, try again with a | |
645 | * larger pool. | |
646 | */ | |
647 | min_order = order + 1; | |
648 | goto again; | |
649 | } | |
650 | } else { | |
651 | /* Not found. */ | |
652 | errno = ENOMEM; | |
653 | addr = NULL; | |
654 | } | |
655 | return addr; | |
656 | } | |
657 | ||
d24ee051 | 658 | void __rseq_percpu *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len) |
ef6695f1 MD |
659 | { |
660 | return __rseq_percpu_pool_set_malloc(pool_set, len, false); | |
661 | } | |
662 | ||
d24ee051 | 663 | void __rseq_percpu *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len) |
ef6695f1 MD |
664 | { |
665 | return __rseq_percpu_pool_set_malloc(pool_set, len, true); | |
666 | } | |
9bd07c29 | 667 | |
a82006d0 MD |
668 | struct rseq_pool_attr *rseq_pool_attr_create(void) |
669 | { | |
670 | return calloc(1, sizeof(struct rseq_pool_attr)); | |
671 | } | |
672 | ||
673 | void rseq_pool_attr_destroy(struct rseq_pool_attr *attr) | |
674 | { | |
675 | free(attr); | |
676 | } | |
677 | ||
8118247e | 678 | int rseq_pool_attr_set_mmap(struct rseq_pool_attr *attr, |
a82006d0 | 679 | void *(*mmap_func)(void *priv, size_t len), |
9bd07c29 MD |
680 | int (*munmap_func)(void *priv, void *ptr, size_t len), |
681 | void *mmap_priv) | |
682 | { | |
8118247e MD |
683 | if (!attr) { |
684 | errno = EINVAL; | |
685 | return -1; | |
686 | } | |
a82006d0 | 687 | attr->mmap_set = true; |
9bd07c29 MD |
688 | attr->mmap_func = mmap_func; |
689 | attr->munmap_func = munmap_func; | |
690 | attr->mmap_priv = mmap_priv; | |
8118247e | 691 | return 0; |
9bd07c29 | 692 | } |
d6acc8aa MD |
693 | |
694 | int rseq_pool_attr_set_robust(struct rseq_pool_attr *attr) | |
695 | { | |
696 | if (!attr) { | |
697 | errno = EINVAL; | |
698 | return -1; | |
699 | } | |
700 | attr->robust_set = true; | |
701 | return 0; | |
702 | } |