rseq/arch.h \
rseq/compiler.h \
rseq/inject.h \
+ rseq/mempool.h \
rseq/pseudocode.h \
- rseq/percpu-alloc.h \
rseq/rseq.h \
rseq/thread-pointer.h \
rseq/utils.h
--- /dev/null
+/* SPDX-License-Identifier: MIT */
+/* SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> */
+
+#ifndef _RSEQ_MEMPOOL_H
+#define _RSEQ_MEMPOOL_H
+
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+/*
+ * rseq/mempool.h: rseq CPU-Local Storage (CLS) memory allocator.
+ *
+ * The rseq per-CPU memory allocator allows the application the request
+ * memory pools of CPU-Local memory each of containing objects of a
+ * given size (rounded to next power of 2), reserving a given virtual
+ * address size per CPU, for a given maximum number of CPUs.
+ *
+ * The per-CPU memory allocator is analogous to TLS (Thread-Local
+ * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
+ * memory allocator provides CPU-Local Storage.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Tag pointers returned by:
+ * - rseq_percpu_malloc(),
+ * - rseq_percpu_zmalloc(),
+ * - rseq_percpu_pool_set_malloc(),
+ * - rseq_percpu_pool_set_zmalloc().
+ *
+ * and passed as parameter to:
+ * - rseq_percpu_ptr(),
+ * - rseq_percpu_free().
+ *
+ * with __rseq_percpu for use by static analyzers.
+ */
+#define __rseq_percpu
+
+struct rseq_pool_attr;
+struct rseq_percpu_pool;
+
+/*
+ * rseq_percpu_pool_create: Create a per-cpu memory pool.
+ *
+ * Create a per-cpu memory pool for items of size @item_len (rounded to
+ * next power of two). The reserved allocation size is @percpu_len, and
+ * the maximum CPU value expected is (@max_nr_cpus - 1).
+ *
+ * The @attr pointer used to specify the pool attributes. If NULL, use a
+ * default attribute values. The @attr can be destroyed immediately
+ * after rseq_percpu_pool_create() returns. The caller keeps ownership
+ * of @attr.
+ *
+ * The argument @pool_name can be used to given a name to the pool for
+ * debugging purposes. It can be NULL if no name is given.
+ *
+ * Returns a pointer to the created percpu pool. Return NULL on error,
+ * with errno set accordingly:
+ * EINVAL: Invalid argument.
+ * ENOMEM: Not enough resources (memory or pool indexes) available to
+ * allocate pool.
+ *
+ * In addition, if the attr mmap callback fails, NULL is returned and
+ * errno is propagated from the callback. The default callback can
+ * return errno=ENOMEM.
+ *
+ * This API is MT-safe.
+ */
+struct rseq_percpu_pool *rseq_percpu_pool_create(const char *pool_name,
+ size_t item_len, size_t percpu_len, int max_nr_cpus,
+ const struct rseq_pool_attr *attr);
+
+/*
+ * rseq_percpu_pool_destroy: Destroy a per-cpu memory pool.
+ *
+ * Destroy a per-cpu memory pool, unmapping its memory and removing the
+ * pool entry from the global index. No pointers allocated from the
+ * pool should be used when it is destroyed. This includes rseq_percpu_ptr().
+ *
+ * Argument @pool is a pointer to the per-cpu pool to destroy.
+ *
+ * Return values: 0 on success, -1 on error, with errno set accordingly:
+ * ENOENT: Trying to free a pool which was not allocated.
+ *
+ * If the munmap_func callback fails, -1 is returned and errno is
+ * propagated from the callback. The default callback can return
+ * errno=EINVAL.
+ *
+ * This API is MT-safe.
+ */
+int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool);
+
+/*
+ * rseq_percpu_malloc: Allocate memory from a per-cpu pool.
+ *
+ * Allocate an item from a per-cpu @pool. The allocation will reserve
+ * an item of the size specified by @item_len (rounded to next power of
+ * two) at pool creation. This effectively reserves space for this item
+ * on all CPUs.
+ *
+ * On success, return a "__rseq_percpu" encoded pointer to the pool
+ * item. This encoded pointer is meant to be passed to rseq_percpu_ptr()
+ * to be decoded to a valid address before being accessed.
+ *
+ * Return NULL (errno=ENOMEM) if there is not enough space left in the
+ * pool to allocate an item.
+ *
+ * This API is MT-safe.
+ */
+void __rseq_percpu *rseq_percpu_malloc(struct rseq_percpu_pool *pool);
+
+/*
+ * rseq_percpu_zmalloc: Allocated zero-initialized memory from a per-cpu pool.
+ *
+ * Allocate memory for an item within the pool, and zero-initialize its
+ * memory on all CPUs. See rseq_percpu_malloc for details.
+ *
+ * This API is MT-safe.
+ */
+void __rseq_percpu *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool);
+
+/*
+ * rseq_percpu_free: Free memory from a per-cpu pool.
+ *
+ * Free an item pointed to by @ptr from its per-cpu pool.
+ *
+ * The @ptr argument is a __rseq_percpu encoded pointer returned by
+ * either:
+ *
+ * - rseq_percpu_malloc(),
+ * - rseq_percpu_zmalloc(),
+ * - rseq_percpu_pool_set_malloc(),
+ * - rseq_percpu_pool_set_zmalloc().
+ *
+ * This API is MT-safe.
+ */
+void rseq_percpu_free(void __rseq_percpu *ptr);
+
+/*
+ * rseq_percpu_ptr: Decode a per-cpu pointer.
+ *
+ * Decode a per-cpu pointer @ptr to get the associated pointer for the
+ * given @cpu. The @ptr argument is a __rseq_percpu encoded pointer
+ * returned by either:
+ *
+ * - rseq_percpu_malloc(),
+ * - rseq_percpu_zmalloc(),
+ * - rseq_percpu_pool_set_malloc(),
+ * - rseq_percpu_pool_set_zmalloc().
+ *
+ * The __rseq_percpu pointer can be decoded with rseq_percpu_ptr() even
+ * after it has been freed, as long as its associated pool has not been
+ * destroyed. However, memory pointed to by the decoded pointer should
+ * not be accessed after the __rseq_percpu pointer has been freed.
+ *
+ * The macro rseq_percpu_ptr() preserves the type of the @ptr parameter
+ * for the returned pointer, but removes the __rseq_percpu annotation.
+ *
+ * This API is MT-safe.
+ */
+void *__rseq_percpu_ptr(void __rseq_percpu *ptr, int cpu);
+#define rseq_percpu_ptr(ptr, cpu) ((__typeof__(*(ptr)) *) __rseq_percpu_ptr(ptr, cpu))
+
+/*
+ * rseq_percpu_pool_set_create: Create a pool set.
+ *
+ * Create a set of pools. Its purpose is to offer a memory allocator API
+ * for variable-length items (e.g. variable length strings). When
+ * created, the pool set has no pool. Pools can be created and added to
+ * the set. One common approach would be to create pools for each
+ * relevant power of two allocation size useful for the application.
+ * Only one pool can be added to the pool set for each power of two
+ * allocation size.
+ *
+ * Returns a pool set pointer on success, else returns NULL with
+ * errno=ENOMEM (out of memory).
+ *
+ * This API is MT-safe.
+ */
+struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void);
+
+/*
+ * rseq_percpu_pool_set_destroy: Destroy a pool set.
+ *
+ * Destroy a pool set and its associated resources. The pools that were
+ * added to the pool set are destroyed as well.
+ *
+ * Returns 0 on success, -1 on failure (or partial failure), with errno
+ * set by rseq_percpu_pool_destroy(). Using a pool set after destroy
+ * failure is undefined.
+ *
+ * This API is MT-safe.
+ */
+int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set);
+
+/*
+ * rseq_percpu_pool_set_add_pool: Add a pool to a pool set.
+ *
+ * Add a @pool to the @pool_set. On success, its ownership is handed
+ * over to the pool set, so the caller should not destroy it explicitly.
+ * Only one pool can be added to the pool set for each power of two
+ * allocation size.
+ *
+ * Returns 0 on success, -1 on error with the following errno:
+ * - EBUSY: A pool already exists in the pool set for this power of two
+ * allocation size.
+ *
+ * This API is MT-safe.
+ */
+int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set,
+ struct rseq_percpu_pool *pool);
+
+/*
+ * rseq_percpu_pool_set_malloc: Allocate memory from a per-cpu pool set.
+ *
+ * Allocate an item from a per-cpu @pool. The allocation will reserve
+ * an item of the size specified by @len (rounded to next power of
+ * two). This effectively reserves space for this item on all CPUs.
+ *
+ * The space reservation will search for the smallest pool within
+ * @pool_set which respects the following conditions:
+ *
+ * - it has an item size large enough to fit @len,
+ * - it has space available.
+ *
+ * On success, return a "__rseq_percpu" encoded pointer to the pool
+ * item. This encoded pointer is meant to be passed to rseq_percpu_ptr()
+ * to be decoded to a valid address before being accessed.
+ *
+ * Return NULL (errno=ENOMEM) if there is not enough space left in the
+ * pool to allocate an item.
+ *
+ * This API is MT-safe.
+ */
+void __rseq_percpu *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len);
+
+/*
+ * rseq_percpu_pool_set_zmalloc: Allocated zero-initialized memory from a per-cpu pool set.
+ *
+ * Allocate memory for an item within the pool, and zero-initialize its
+ * memory on all CPUs. See rseq_percpu_pool_set_malloc for details.
+ *
+ * This API is MT-safe.
+ */
+void __rseq_percpu *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len);
+
+/*
+ * rseq_percpu_pool_init_numa: Move pages to the NUMA node associated to their CPU topology.
+ *
+ * For pages allocated within @pool, invoke move_pages(2) with the given
+ * @numa_flags to move the pages to the NUMA node associated to their
+ * CPU topology.
+ *
+ * Argument @numa_flags are passed to move_pages(2). The expected flags are:
+ * MPOL_MF_MOVE: move process-private pages to cpu-specific numa nodes.
+ * MPOL_MF_MOVE_ALL: move shared pages to cpu-specific numa nodes
+ * (requires CAP_SYS_NICE).
+ *
+ * Returns 0 on success, else return -1 with errno set by move_pages(2).
+ */
+int rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool, int numa_flags);
+
+/*
+ * rseq_pool_attr_create: Create a pool attribute structure.
+ */
+struct rseq_pool_attr *rseq_pool_attr_create(void);
+
+/*
+ * rseq_pool_attr_destroy: Destroy a pool attribute structure.
+ */
+void rseq_pool_attr_destroy(struct rseq_pool_attr *attr);
+
+/*
+ * rseq_pool_attr_set_mmap: Set pool attribute structure mmap functions.
+ *
+ * The @mmap_func callback used to map the memory for the pool.
+ *
+ * The @munmap_func callback used to unmap the memory when the pool
+ * is destroyed.
+ *
+ * The @mmap_priv argument is a private data pointer passed to both
+ * @mmap_func and @munmap_func callbacks.
+ *
+ * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid.
+ */
+int rseq_pool_attr_set_mmap(struct rseq_pool_attr *attr,
+ void *(*mmap_func)(void *priv, size_t len),
+ int (*munmap_func)(void *priv, void *ptr, size_t len),
+ void *mmap_priv);
+
+/*
+ * rseq_pool_attr_set_robust: Set pool robust attribute.
+ *
+ * The robust pool attribute enables runtime validation of the pool:
+ *
+ * - Check for double-free of pointers.
+ *
+ * - Detect memory leaks on pool destruction.
+ *
+ * - Detect free-list corruption on pool destruction.
+ *
+ * There is a marginal runtime overhead on malloc/free operations.
+ *
+ * The memory overhead is (pool->percpu_len / pool->item_len) / CHAR_BIT
+ * bytes, over the lifetime of the pool.
+ *
+ * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid.
+ */
+int rseq_pool_attr_set_robust(struct rseq_pool_attr *attr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RSEQ_MEMPOOL_H */
+++ /dev/null
-/* SPDX-License-Identifier: MIT */
-/* SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> */
-
-#ifndef _RSEQ_PERCPU_ALLOC_H
-#define _RSEQ_PERCPU_ALLOC_H
-
-#include <stddef.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-
-/*
- * rseq/percpu-alloc.h: rseq CPU-Local Storage (CLS) memory allocator.
- *
- * The rseq per-CPU memory allocator allows the application the request
- * memory pools of CPU-Local memory each of containing objects of a
- * given size (rounded to next power of 2), reserving a given virtual
- * address size per CPU, for a given maximum number of CPUs.
- *
- * The per-CPU memory allocator is analogous to TLS (Thread-Local
- * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
- * memory allocator provides CPU-Local Storage.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Tag pointers returned by:
- * - rseq_percpu_malloc(),
- * - rseq_percpu_zmalloc(),
- * - rseq_percpu_pool_set_malloc(),
- * - rseq_percpu_pool_set_zmalloc().
- *
- * and passed as parameter to:
- * - rseq_percpu_ptr(),
- * - rseq_percpu_free().
- *
- * with __rseq_percpu for use by static analyzers.
- */
-#define __rseq_percpu
-
-struct rseq_pool_attr;
-struct rseq_percpu_pool;
-
-/*
- * rseq_percpu_pool_create: Create a per-cpu memory pool.
- *
- * Create a per-cpu memory pool for items of size @item_len (rounded to
- * next power of two). The reserved allocation size is @percpu_len, and
- * the maximum CPU value expected is (@max_nr_cpus - 1).
- *
- * The @attr pointer used to specify the pool attributes. If NULL, use a
- * default attribute values. The @attr can be destroyed immediately
- * after rseq_percpu_pool_create() returns. The caller keeps ownership
- * of @attr.
- *
- * The argument @pool_name can be used to given a name to the pool for
- * debugging purposes. It can be NULL if no name is given.
- *
- * Returns a pointer to the created percpu pool. Return NULL on error,
- * with errno set accordingly:
- * EINVAL: Invalid argument.
- * ENOMEM: Not enough resources (memory or pool indexes) available to
- * allocate pool.
- *
- * In addition, if the attr mmap callback fails, NULL is returned and
- * errno is propagated from the callback. The default callback can
- * return errno=ENOMEM.
- *
- * This API is MT-safe.
- */
-struct rseq_percpu_pool *rseq_percpu_pool_create(const char *pool_name,
- size_t item_len, size_t percpu_len, int max_nr_cpus,
- const struct rseq_pool_attr *attr);
-
-/*
- * rseq_percpu_pool_destroy: Destroy a per-cpu memory pool.
- *
- * Destroy a per-cpu memory pool, unmapping its memory and removing the
- * pool entry from the global index. No pointers allocated from the
- * pool should be used when it is destroyed. This includes rseq_percpu_ptr().
- *
- * Argument @pool is a pointer to the per-cpu pool to destroy.
- *
- * Return values: 0 on success, -1 on error, with errno set accordingly:
- * ENOENT: Trying to free a pool which was not allocated.
- *
- * If the munmap_func callback fails, -1 is returned and errno is
- * propagated from the callback. The default callback can return
- * errno=EINVAL.
- *
- * This API is MT-safe.
- */
-int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool);
-
-/*
- * rseq_percpu_malloc: Allocate memory from a per-cpu pool.
- *
- * Allocate an item from a per-cpu @pool. The allocation will reserve
- * an item of the size specified by @item_len (rounded to next power of
- * two) at pool creation. This effectively reserves space for this item
- * on all CPUs.
- *
- * On success, return a "__rseq_percpu" encoded pointer to the pool
- * item. This encoded pointer is meant to be passed to rseq_percpu_ptr()
- * to be decoded to a valid address before being accessed.
- *
- * Return NULL (errno=ENOMEM) if there is not enough space left in the
- * pool to allocate an item.
- *
- * This API is MT-safe.
- */
-void __rseq_percpu *rseq_percpu_malloc(struct rseq_percpu_pool *pool);
-
-/*
- * rseq_percpu_zmalloc: Allocated zero-initialized memory from a per-cpu pool.
- *
- * Allocate memory for an item within the pool, and zero-initialize its
- * memory on all CPUs. See rseq_percpu_malloc for details.
- *
- * This API is MT-safe.
- */
-void __rseq_percpu *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool);
-
-/*
- * rseq_percpu_free: Free memory from a per-cpu pool.
- *
- * Free an item pointed to by @ptr from its per-cpu pool.
- *
- * The @ptr argument is a __rseq_percpu encoded pointer returned by
- * either:
- *
- * - rseq_percpu_malloc(),
- * - rseq_percpu_zmalloc(),
- * - rseq_percpu_pool_set_malloc(),
- * - rseq_percpu_pool_set_zmalloc().
- *
- * This API is MT-safe.
- */
-void rseq_percpu_free(void __rseq_percpu *ptr);
-
-/*
- * rseq_percpu_ptr: Decode a per-cpu pointer.
- *
- * Decode a per-cpu pointer @ptr to get the associated pointer for the
- * given @cpu. The @ptr argument is a __rseq_percpu encoded pointer
- * returned by either:
- *
- * - rseq_percpu_malloc(),
- * - rseq_percpu_zmalloc(),
- * - rseq_percpu_pool_set_malloc(),
- * - rseq_percpu_pool_set_zmalloc().
- *
- * The __rseq_percpu pointer can be decoded with rseq_percpu_ptr() even
- * after it has been freed, as long as its associated pool has not been
- * destroyed. However, memory pointed to by the decoded pointer should
- * not be accessed after the __rseq_percpu pointer has been freed.
- *
- * The macro rseq_percpu_ptr() preserves the type of the @ptr parameter
- * for the returned pointer, but removes the __rseq_percpu annotation.
- *
- * This API is MT-safe.
- */
-void *__rseq_percpu_ptr(void __rseq_percpu *ptr, int cpu);
-#define rseq_percpu_ptr(ptr, cpu) ((__typeof__(*(ptr)) *) __rseq_percpu_ptr(ptr, cpu))
-
-/*
- * rseq_percpu_pool_set_create: Create a pool set.
- *
- * Create a set of pools. Its purpose is to offer a memory allocator API
- * for variable-length items (e.g. variable length strings). When
- * created, the pool set has no pool. Pools can be created and added to
- * the set. One common approach would be to create pools for each
- * relevant power of two allocation size useful for the application.
- * Only one pool can be added to the pool set for each power of two
- * allocation size.
- *
- * Returns a pool set pointer on success, else returns NULL with
- * errno=ENOMEM (out of memory).
- *
- * This API is MT-safe.
- */
-struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void);
-
-/*
- * rseq_percpu_pool_set_destroy: Destroy a pool set.
- *
- * Destroy a pool set and its associated resources. The pools that were
- * added to the pool set are destroyed as well.
- *
- * Returns 0 on success, -1 on failure (or partial failure), with errno
- * set by rseq_percpu_pool_destroy(). Using a pool set after destroy
- * failure is undefined.
- *
- * This API is MT-safe.
- */
-int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set);
-
-/*
- * rseq_percpu_pool_set_add_pool: Add a pool to a pool set.
- *
- * Add a @pool to the @pool_set. On success, its ownership is handed
- * over to the pool set, so the caller should not destroy it explicitly.
- * Only one pool can be added to the pool set for each power of two
- * allocation size.
- *
- * Returns 0 on success, -1 on error with the following errno:
- * - EBUSY: A pool already exists in the pool set for this power of two
- * allocation size.
- *
- * This API is MT-safe.
- */
-int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set,
- struct rseq_percpu_pool *pool);
-
-/*
- * rseq_percpu_pool_set_malloc: Allocate memory from a per-cpu pool set.
- *
- * Allocate an item from a per-cpu @pool. The allocation will reserve
- * an item of the size specified by @len (rounded to next power of
- * two). This effectively reserves space for this item on all CPUs.
- *
- * The space reservation will search for the smallest pool within
- * @pool_set which respects the following conditions:
- *
- * - it has an item size large enough to fit @len,
- * - it has space available.
- *
- * On success, return a "__rseq_percpu" encoded pointer to the pool
- * item. This encoded pointer is meant to be passed to rseq_percpu_ptr()
- * to be decoded to a valid address before being accessed.
- *
- * Return NULL (errno=ENOMEM) if there is not enough space left in the
- * pool to allocate an item.
- *
- * This API is MT-safe.
- */
-void __rseq_percpu *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len);
-
-/*
- * rseq_percpu_pool_set_zmalloc: Allocated zero-initialized memory from a per-cpu pool set.
- *
- * Allocate memory for an item within the pool, and zero-initialize its
- * memory on all CPUs. See rseq_percpu_pool_set_malloc for details.
- *
- * This API is MT-safe.
- */
-void __rseq_percpu *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len);
-
-/*
- * rseq_percpu_pool_init_numa: Move pages to the NUMA node associated to their CPU topology.
- *
- * For pages allocated within @pool, invoke move_pages(2) with the given
- * @numa_flags to move the pages to the NUMA node associated to their
- * CPU topology.
- *
- * Argument @numa_flags are passed to move_pages(2). The expected flags are:
- * MPOL_MF_MOVE: move process-private pages to cpu-specific numa nodes.
- * MPOL_MF_MOVE_ALL: move shared pages to cpu-specific numa nodes
- * (requires CAP_SYS_NICE).
- *
- * Returns 0 on success, else return -1 with errno set by move_pages(2).
- */
-int rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool, int numa_flags);
-
-/*
- * rseq_pool_attr_create: Create a pool attribute structure.
- */
-struct rseq_pool_attr *rseq_pool_attr_create(void);
-
-/*
- * rseq_pool_attr_destroy: Destroy a pool attribute structure.
- */
-void rseq_pool_attr_destroy(struct rseq_pool_attr *attr);
-
-/*
- * rseq_pool_attr_set_mmap: Set pool attribute structure mmap functions.
- *
- * The @mmap_func callback used to map the memory for the pool.
- *
- * The @munmap_func callback used to unmap the memory when the pool
- * is destroyed.
- *
- * The @mmap_priv argument is a private data pointer passed to both
- * @mmap_func and @munmap_func callbacks.
- *
- * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid.
- */
-int rseq_pool_attr_set_mmap(struct rseq_pool_attr *attr,
- void *(*mmap_func)(void *priv, size_t len),
- int (*munmap_func)(void *priv, void *ptr, size_t len),
- void *mmap_priv);
-
-/*
- * rseq_pool_attr_set_robust: Set pool robust attribute.
- *
- * The robust pool attribute enables runtime validation of the pool:
- *
- * - Check for double-free of pointers.
- *
- * - Detect memory leaks on pool destruction.
- *
- * - Detect free-list corruption on pool destruction.
- *
- * There is a marginal runtime overhead on malloc/free operations.
- *
- * The memory overhead is (pool->percpu_len / pool->item_len) / CHAR_BIT
- * bytes, over the lifetime of the pool.
- *
- * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid.
- */
-int rseq_pool_attr_set_robust(struct rseq_pool_attr *attr);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RSEQ_PERCPU_ALLOC_H */
lib_LTLIBRARIES = librseq.la
librseq_la_SOURCES = \
- rseq.c rseq-percpu-alloc.c rseq-alloc-utils.h
+ rseq.c rseq-mempool.c rseq-utils.h
librseq_la_LDFLAGS = -no-undefined -version-info $(RSEQ_LIBRARY_VERSION)
librseq_la_LIBADD = $(DL_LIBS)
+++ /dev/null
-// SPDX-License-Identifier: MIT
-// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-
-#ifndef _RSEQ_ALLOC_UTILS_H
-#define _RSEQ_ALLOC_UTILS_H
-
-#define RSEQ_ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
-#define __rseq_align_mask(v, mask) (((v) + (mask)) & ~(mask))
-#define rseq_align(v, align) __rseq_align_mask(v, (__typeof__(v)) (align) - 1)
-
-static inline
-unsigned int rseq_fls_u64(uint64_t x)
-{
- unsigned int r = 64;
-
- if (!x)
- return 0;
-
- if (!(x & 0xFFFFFFFF00000000ULL)) {
- x <<= 32;
- r -= 32;
- }
- if (!(x & 0xFFFF000000000000ULL)) {
- x <<= 16;
- r -= 16;
- }
- if (!(x & 0xFF00000000000000ULL)) {
- x <<= 8;
- r -= 8;
- }
- if (!(x & 0xF000000000000000ULL)) {
- x <<= 4;
- r -= 4;
- }
- if (!(x & 0xC000000000000000ULL)) {
- x <<= 2;
- r -= 2;
- }
- if (!(x & 0x8000000000000000ULL)) {
- x <<= 1;
- r -= 1;
- }
- return r;
-}
-
-static inline
-unsigned int rseq_fls_u32(uint32_t x)
-{
- unsigned int r = 32;
-
- if (!x)
- return 0;
- if (!(x & 0xFFFF0000U)) {
- x <<= 16;
- r -= 16;
- }
- if (!(x & 0xFF000000U)) {
- x <<= 8;
- r -= 8;
- }
- if (!(x & 0xF0000000U)) {
- x <<= 4;
- r -= 4;
- }
- if (!(x & 0xC0000000U)) {
- x <<= 2;
- r -= 2;
- }
- if (!(x & 0x80000000U)) {
- x <<= 1;
- r -= 1;
- }
- return r;
-}
-
-static inline
-unsigned int rseq_fls_ulong(unsigned long x)
-{
-#if RSEQ_BITS_PER_LONG == 32
- return rseq_fls_u32(x);
-#else
- return rseq_fls_u64(x);
-#endif
-}
-
-/*
- * Return the minimum order for which x <= (1UL << order).
- * Return -1 if x is 0.
- */
-static inline
-int rseq_get_count_order_ulong(unsigned long x)
-{
- if (!x)
- return -1;
-
- return rseq_fls_ulong(x - 1);
-}
-
-#define RSEQ_DEFAULT_PAGE_SIZE 4096
-
-static inline
-long rseq_get_page_len(void)
-{
- long page_len = sysconf(_SC_PAGE_SIZE);
-
- if (page_len < 0)
- page_len = RSEQ_DEFAULT_PAGE_SIZE;
- return page_len;
-}
-
-static inline
-int rseq_hweight_ulong(unsigned long v)
-{
- return __builtin_popcountl(v);
-}
-
-#endif /* _RSEQ_ALLOC_UTILS_H */
--- /dev/null
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
+#include <rseq/mempool.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <string.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <rseq/compiler.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#ifdef HAVE_LIBNUMA
+# include <numa.h>
+# include <numaif.h>
+#endif
+
+#include "rseq-utils.h"
+
+/*
+ * rseq-percpu-alloc.c: rseq CPU-Local Storage (CLS) memory allocator.
+ *
+ * The rseq per-CPU memory allocator allows the application the request
+ * memory pools of CPU-Local memory each of containing objects of a
+ * given size (rounded to next power of 2), reserving a given virtual
+ * address size per CPU, for a given maximum number of CPUs.
+ *
+ * The per-CPU memory allocator is analogous to TLS (Thread-Local
+ * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
+ * memory allocator provides CPU-Local Storage.
+ */
+
+/*
+ * Use high bits of per-CPU addresses to index the pool.
+ * This leaves the low bits of available to the application for pointer
+ * tagging (based on next power of 2 alignment of the allocations).
+ */
+#if RSEQ_BITS_PER_LONG == 64
+# define POOL_INDEX_BITS 16
+#else
+# define POOL_INDEX_BITS 8
+#endif
+#define MAX_NR_POOLS (1UL << POOL_INDEX_BITS)
+#define POOL_INDEX_SHIFT (RSEQ_BITS_PER_LONG - POOL_INDEX_BITS)
+#define MAX_POOL_LEN (1UL << POOL_INDEX_SHIFT)
+#define MAX_POOL_LEN_MASK (MAX_POOL_LEN - 1)
+
+#define POOL_SET_NR_ENTRIES POOL_INDEX_SHIFT
+
+/*
+ * Smallest allocation should hold enough space for a free list pointer.
+ */
+#if RSEQ_BITS_PER_LONG == 64
+# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
+#else
+# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
+#endif
+
+/*
+ * Skip pool index 0 to ensure allocated entries at index 0 do not match
+ * a NULL pointer.
+ */
+#define FIRST_POOL 1
+
+#define BIT_PER_ULONG (8 * sizeof(unsigned long))
+
+#define MOVE_PAGES_BATCH_SIZE 4096
+
+struct free_list_node;
+
+struct free_list_node {
+ struct free_list_node *next;
+};
+
+/* This lock protects pool create/destroy. */
+static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
+
+struct rseq_pool_attr {
+ bool mmap_set;
+ void *(*mmap_func)(void *priv, size_t len);
+ int (*munmap_func)(void *priv, void *ptr, size_t len);
+ void *mmap_priv;
+
+ bool robust_set;
+};
+
+struct rseq_percpu_pool {
+ void *base;
+ unsigned int index;
+ size_t item_len;
+ size_t percpu_len;
+ int item_order;
+ int max_nr_cpus;
+
+ /*
+ * The free list chains freed items on the CPU 0 address range.
+ * We should rethink this decision if false sharing between
+ * malloc/free from other CPUs and data accesses from CPU 0
+ * becomes an issue. This is a NULL-terminated singly-linked
+ * list.
+ */
+ struct free_list_node *free_list_head;
+ size_t next_unused;
+ /* This lock protects allocation/free within the pool. */
+ pthread_mutex_t lock;
+
+ struct rseq_pool_attr attr;
+
+ char *name;
+ /* Track alloc/free. */
+ unsigned long *alloc_bitmap;
+};
+
+//TODO: the array of pools should grow dynamically on create.
+static struct rseq_percpu_pool rseq_percpu_pool[MAX_NR_POOLS];
+
+/*
+ * Pool set entries are indexed by item_len rounded to the next power of
+ * 2. A pool set can contain NULL pool entries, in which case the next
+ * large enough entry will be used for allocation.
+ */
+struct rseq_percpu_pool_set {
+ /* This lock protects add vs malloc/zmalloc within the pool set. */
+ pthread_mutex_t lock;
+ struct rseq_percpu_pool *entries[POOL_SET_NR_ENTRIES];
+};
+
+static
+void *__rseq_pool_percpu_ptr(struct rseq_percpu_pool *pool, int cpu, uintptr_t item_offset)
+{
+ return pool->base + (pool->percpu_len * cpu) + item_offset;
+}
+
+void *__rseq_percpu_ptr(void __rseq_percpu *_ptr, int cpu)
+{
+ uintptr_t ptr = (uintptr_t) _ptr;
+ uintptr_t item_offset = ptr & MAX_POOL_LEN_MASK;
+ uintptr_t pool_index = ptr >> POOL_INDEX_SHIFT;
+ struct rseq_percpu_pool *pool = &rseq_percpu_pool[pool_index];
+
+ assert(cpu >= 0);
+ return __rseq_pool_percpu_ptr(pool, cpu, item_offset);
+}
+
+static
+void rseq_percpu_zero_item(struct rseq_percpu_pool *pool, uintptr_t item_offset)
+{
+ int i;
+
+ for (i = 0; i < pool->max_nr_cpus; i++) {
+ char *p = __rseq_pool_percpu_ptr(pool, i, item_offset);
+ memset(p, 0, pool->item_len);
+ }
+}
+
+#ifdef HAVE_LIBNUMA
+int rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool, int numa_flags)
+{
+ unsigned long nr_pages;
+ long ret, page_len;
+ int cpu;
+
+ if (!numa_flags)
+ return 0;
+ page_len = rseq_get_page_len();
+ nr_pages = pool->percpu_len >> rseq_get_count_order_ulong(page_len);
+ for (cpu = 0; cpu < pool->max_nr_cpus; cpu++) {
+
+ int status[MOVE_PAGES_BATCH_SIZE];
+ int nodes[MOVE_PAGES_BATCH_SIZE];
+ void *pages[MOVE_PAGES_BATCH_SIZE];
+
+ nodes[0] = numa_node_of_cpu(cpu);
+ for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
+ nodes[k] = nodes[0];
+ }
+
+ for (unsigned long page = 0; page < nr_pages;) {
+
+ size_t max_k = RSEQ_ARRAY_SIZE(pages);
+ size_t left = nr_pages - page;
+
+ if (left < max_k) {
+ max_k = left;
+ }
+
+ for (size_t k = 0; k < max_k; ++k, ++page) {
+ pages[k] = __rseq_pool_percpu_ptr(pool, cpu, page * page_len);
+ status[k] = -EPERM;
+ }
+
+ ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
+
+ if (ret < 0)
+ return ret;
+
+ if (ret > 0) {
+ fprintf(stderr, "%lu pages were not migrated\n", ret);
+ for (size_t k = 0; k < max_k; ++k) {
+ if (status[k] < 0)
+ fprintf(stderr,
+ "Error while moving page %p to numa node %d: %u\n",
+ pages[k], nodes[k], -status[k]);
+ }
+ }
+ }
+ }
+ return 0;
+}
+#else
+void rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool __attribute__((unused)),
+ int numa_flags __attribute__((unused)))
+{
+ return 0;
+}
+#endif
+
+static
+void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
+{
+ void *base;
+
+ base = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (base == MAP_FAILED)
+ return NULL;
+ return base;
+}
+
+static
+int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
+{
+ return munmap(ptr, len);
+}
+
+static
+int create_alloc_bitmap(struct rseq_percpu_pool *pool)
+{
+ size_t count;
+
+ count = ((pool->percpu_len >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
+
+ /*
+ * Not being able to create the validation bitmap is an error
+ * that needs to be reported.
+ */
+ pool->alloc_bitmap = calloc(count, sizeof(unsigned long));
+ if (!pool->alloc_bitmap)
+ return -1;
+ return 0;
+}
+
+static
+const char *get_pool_name(const struct rseq_percpu_pool *pool)
+{
+ return pool->name ? : "<anonymous>";
+}
+
+/* Always inline for __builtin_return_address(0). */
+static inline __attribute__((always_inline))
+void check_free_list(const struct rseq_percpu_pool *pool)
+{
+ size_t total_item = pool->percpu_len >> pool->item_order;
+ size_t total_never_allocated = (pool->percpu_len - pool->next_unused) >> pool->item_order;
+ size_t total_freed = 0;
+ size_t max_list_traversal = total_item - total_never_allocated;
+ size_t traversal_iteration = 0;
+
+ for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
+ node;
+ prev = node,
+ node = node->next) {
+
+ void *node_addr = node;
+
+ if (traversal_iteration >= max_list_traversal) {
+ fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
+ __func__, get_pool_name(pool), pool, __builtin_return_address(0));
+ abort();
+ }
+
+ /* Node is out of range. */
+ if ((node_addr < pool->base) ||
+ (node_addr >= pool->base + pool->next_unused)) {
+ if (prev)
+ fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
+ __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
+ else
+ fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
+ __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
+ abort();
+ }
+
+ traversal_iteration += 1;
+ total_freed += 1;
+ }
+
+ if (total_never_allocated + total_freed != total_item) {
+ fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
+ __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
+ abort();
+ }
+
+}
+
+/* Always inline for __builtin_return_address(0). */
+static inline __attribute__((always_inline))
+void destroy_alloc_bitmap(struct rseq_percpu_pool *pool)
+{
+ unsigned long *bitmap = pool->alloc_bitmap;
+ size_t count, total_leaks = 0;
+
+ if (!bitmap)
+ return;
+
+ count = ((pool->percpu_len >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
+
+ /* Assert that all items in the pool were freed. */
+ for (size_t k = 0; k < count; ++k)
+ total_leaks += rseq_hweight_ulong(bitmap[k]);
+ if (total_leaks) {
+ fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
+ __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
+ abort();
+ }
+
+ check_free_list(pool);
+
+ free(bitmap);
+}
+
+/* Always inline for __builtin_return_address(0). */
+static inline __attribute__((always_inline))
+int __rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool)
+{
+ int ret;
+
+ if (!pool->base) {
+ errno = ENOENT;
+ ret = -1;
+ goto end;
+ }
+ /*
+ * This must be done before releasing pool->base for checking the
+ * free-list.
+ */
+ destroy_alloc_bitmap(pool);
+ ret = pool->attr.munmap_func(pool->attr.mmap_priv, pool->base,
+ pool->percpu_len * pool->max_nr_cpus);
+ if (ret)
+ goto end;
+ pthread_mutex_destroy(&pool->lock);
+ free(pool->name);
+ memset(pool, 0, sizeof(*pool));
+end:
+ return 0;
+}
+
+int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool)
+{
+ int ret;
+
+ pthread_mutex_lock(&pool_lock);
+ ret = __rseq_percpu_pool_destroy(pool);
+ pthread_mutex_unlock(&pool_lock);
+ return ret;
+}
+
+struct rseq_percpu_pool *rseq_percpu_pool_create(const char *pool_name,
+ size_t item_len, size_t percpu_len, int max_nr_cpus,
+ const struct rseq_pool_attr *_attr)
+{
+ struct rseq_percpu_pool *pool;
+ struct rseq_pool_attr attr = {};
+ void *base;
+ unsigned int i;
+ int order;
+
+ /* Make sure each item is large enough to contain free list pointers. */
+ if (item_len < sizeof(void *))
+ item_len = sizeof(void *);
+
+ /* Align item_len on next power of two. */
+ order = rseq_get_count_order_ulong(item_len);
+ if (order < 0) {
+ errno = EINVAL;
+ return NULL;
+ }
+ item_len = 1UL << order;
+
+ /* Align percpu_len on page size. */
+ percpu_len = rseq_align(percpu_len, rseq_get_page_len());
+
+ if (max_nr_cpus < 0 || item_len > percpu_len ||
+ percpu_len > (UINTPTR_MAX >> POOL_INDEX_BITS)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (_attr)
+ memcpy(&attr, _attr, sizeof(attr));
+ if (!attr.mmap_set) {
+ attr.mmap_func = default_mmap_func;
+ attr.munmap_func = default_munmap_func;
+ attr.mmap_priv = NULL;
+ }
+
+ pthread_mutex_lock(&pool_lock);
+ /* Linear scan in array of pools to find empty spot. */
+ for (i = FIRST_POOL; i < MAX_NR_POOLS; i++) {
+ pool = &rseq_percpu_pool[i];
+ if (!pool->base)
+ goto found_empty;
+ }
+ errno = ENOMEM;
+ pool = NULL;
+ goto end;
+
+found_empty:
+ base = attr.mmap_func(attr.mmap_priv, percpu_len * max_nr_cpus);
+ if (!base)
+ goto error_alloc;
+ pthread_mutex_init(&pool->lock, NULL);
+ pool->base = base;
+ pool->percpu_len = percpu_len;
+ pool->max_nr_cpus = max_nr_cpus;
+ pool->index = i;
+ pool->item_len = item_len;
+ pool->item_order = order;
+ memcpy(&pool->attr, &attr, sizeof(attr));
+
+ if (pool_name) {
+ pool->name = strdup(pool_name);
+ if (!pool->name)
+ goto error_alloc;
+ }
+
+ if (attr.robust_set) {
+ if (create_alloc_bitmap(pool))
+ goto error_alloc;
+ }
+end:
+ pthread_mutex_unlock(&pool_lock);
+ return pool;
+
+error_alloc:
+ __rseq_percpu_pool_destroy(pool);
+ pthread_mutex_unlock(&pool_lock);
+ errno = ENOMEM;
+ return NULL;
+}
+
+/* Always inline for __builtin_return_address(0). */
+static inline __attribute__((always_inline))
+void set_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset)
+{
+ unsigned long *bitmap = pool->alloc_bitmap;
+ size_t item_index = item_offset >> pool->item_order;
+ unsigned long mask;
+ size_t k;
+
+ if (!bitmap)
+ return;
+
+ k = item_index / BIT_PER_ULONG;
+ mask = 1ULL << (item_index % BIT_PER_ULONG);
+
+ /* Print error if bit is already set. */
+ if (bitmap[k] & mask) {
+ fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
+ __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
+ abort();
+ }
+ bitmap[k] |= mask;
+}
+
+static
+void __rseq_percpu *__rseq_percpu_malloc(struct rseq_percpu_pool *pool, bool zeroed)
+{
+ struct free_list_node *node;
+ uintptr_t item_offset;
+ void __rseq_percpu *addr;
+
+ pthread_mutex_lock(&pool->lock);
+ /* Get first entry from free list. */
+ node = pool->free_list_head;
+ if (node != NULL) {
+ /* Remove node from free list (update head). */
+ pool->free_list_head = node->next;
+ item_offset = (uintptr_t) ((void *) node - pool->base);
+ addr = (void *) (((uintptr_t) pool->index << POOL_INDEX_SHIFT) | item_offset);
+ goto end;
+ }
+ if (pool->next_unused + pool->item_len > pool->percpu_len) {
+ errno = ENOMEM;
+ addr = NULL;
+ goto end;
+ }
+ item_offset = pool->next_unused;
+ addr = (void *) (((uintptr_t) pool->index << POOL_INDEX_SHIFT) | item_offset);
+ pool->next_unused += pool->item_len;
+ set_alloc_slot(pool, item_offset);
+end:
+ pthread_mutex_unlock(&pool->lock);
+ if (zeroed && addr)
+ rseq_percpu_zero_item(pool, item_offset);
+ return addr;
+}
+
+void __rseq_percpu *rseq_percpu_malloc(struct rseq_percpu_pool *pool)
+{
+ return __rseq_percpu_malloc(pool, false);
+}
+
+void __rseq_percpu *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool)
+{
+ return __rseq_percpu_malloc(pool, true);
+}
+
+/* Always inline for __builtin_return_address(0). */
+static inline __attribute__((always_inline))
+void clear_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset)
+{
+ unsigned long *bitmap = pool->alloc_bitmap;
+ size_t item_index = item_offset >> pool->item_order;
+ unsigned long mask;
+ size_t k;
+
+ if (!bitmap)
+ return;
+
+ k = item_index / BIT_PER_ULONG;
+ mask = 1ULL << (item_index % BIT_PER_ULONG);
+
+ /* Print error if bit is not set. */
+ if (!(bitmap[k] & mask)) {
+ fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
+ __func__, get_pool_name(pool), pool, item_offset,
+ (void *) __builtin_return_address(0));
+ abort();
+ }
+ bitmap[k] &= ~mask;
+}
+
+void rseq_percpu_free(void __rseq_percpu *_ptr)
+{
+ uintptr_t ptr = (uintptr_t) _ptr;
+ uintptr_t item_offset = ptr & MAX_POOL_LEN_MASK;
+ uintptr_t pool_index = ptr >> POOL_INDEX_SHIFT;
+ struct rseq_percpu_pool *pool = &rseq_percpu_pool[pool_index];
+ struct free_list_node *head, *item;
+
+ pthread_mutex_lock(&pool->lock);
+ clear_alloc_slot(pool, item_offset);
+ /* Add ptr to head of free list */
+ head = pool->free_list_head;
+ /* Free-list is in CPU 0 range. */
+ item = (struct free_list_node *)__rseq_pool_percpu_ptr(pool, 0, item_offset);
+ item->next = head;
+ pool->free_list_head = item;
+ pthread_mutex_unlock(&pool->lock);
+}
+
+struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void)
+{
+ struct rseq_percpu_pool_set *pool_set;
+
+ pool_set = calloc(1, sizeof(struct rseq_percpu_pool_set));
+ if (!pool_set)
+ return NULL;
+ pthread_mutex_init(&pool_set->lock, NULL);
+ return pool_set;
+}
+
+int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set)
+{
+ int order, ret;
+
+ for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
+ struct rseq_percpu_pool *pool = pool_set->entries[order];
+
+ if (!pool)
+ continue;
+ ret = rseq_percpu_pool_destroy(pool);
+ if (ret)
+ return ret;
+ pool_set->entries[order] = NULL;
+ }
+ pthread_mutex_destroy(&pool_set->lock);
+ free(pool_set);
+ return 0;
+}
+
+/* Ownership of pool is handed over to pool set on success. */
+int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set, struct rseq_percpu_pool *pool)
+{
+ size_t item_order = pool->item_order;
+ int ret = 0;
+
+ pthread_mutex_lock(&pool_set->lock);
+ if (pool_set->entries[item_order]) {
+ errno = EBUSY;
+ ret = -1;
+ goto end;
+ }
+ pool_set->entries[pool->item_order] = pool;
+end:
+ pthread_mutex_unlock(&pool_set->lock);
+ return ret;
+}
+
+static
+void __rseq_percpu *__rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len, bool zeroed)
+{
+ int order, min_order = POOL_SET_MIN_ENTRY;
+ struct rseq_percpu_pool *pool;
+ void __rseq_percpu *addr;
+
+ order = rseq_get_count_order_ulong(len);
+ if (order > POOL_SET_MIN_ENTRY)
+ min_order = order;
+again:
+ pthread_mutex_lock(&pool_set->lock);
+ /* First smallest present pool where @len fits. */
+ for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
+ pool = pool_set->entries[order];
+
+ if (!pool)
+ continue;
+ if (pool->item_len >= len)
+ goto found;
+ }
+ pool = NULL;
+found:
+ pthread_mutex_unlock(&pool_set->lock);
+ if (pool) {
+ addr = __rseq_percpu_malloc(pool, zeroed);
+ if (addr == NULL && errno == ENOMEM) {
+ /*
+ * If the allocation failed, try again with a
+ * larger pool.
+ */
+ min_order = order + 1;
+ goto again;
+ }
+ } else {
+ /* Not found. */
+ errno = ENOMEM;
+ addr = NULL;
+ }
+ return addr;
+}
+
+void __rseq_percpu *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len)
+{
+ return __rseq_percpu_pool_set_malloc(pool_set, len, false);
+}
+
+void __rseq_percpu *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len)
+{
+ return __rseq_percpu_pool_set_malloc(pool_set, len, true);
+}
+
+struct rseq_pool_attr *rseq_pool_attr_create(void)
+{
+ return calloc(1, sizeof(struct rseq_pool_attr));
+}
+
+void rseq_pool_attr_destroy(struct rseq_pool_attr *attr)
+{
+ free(attr);
+}
+
+int rseq_pool_attr_set_mmap(struct rseq_pool_attr *attr,
+ void *(*mmap_func)(void *priv, size_t len),
+ int (*munmap_func)(void *priv, void *ptr, size_t len),
+ void *mmap_priv)
+{
+ if (!attr) {
+ errno = EINVAL;
+ return -1;
+ }
+ attr->mmap_set = true;
+ attr->mmap_func = mmap_func;
+ attr->munmap_func = munmap_func;
+ attr->mmap_priv = mmap_priv;
+ return 0;
+}
+
+int rseq_pool_attr_set_robust(struct rseq_pool_attr *attr)
+{
+ if (!attr) {
+ errno = EINVAL;
+ return -1;
+ }
+ attr->robust_set = true;
+ return 0;
+}
+++ /dev/null
-// SPDX-License-Identifier: MIT
-// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-
-#include <rseq/percpu-alloc.h>
-#include <sys/mman.h>
-#include <assert.h>
-#include <string.h>
-#include <pthread.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <rseq/compiler.h>
-#include <errno.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdio.h>
-
-#ifdef HAVE_LIBNUMA
-# include <numa.h>
-# include <numaif.h>
-#endif
-
-#include "rseq-alloc-utils.h"
-
-/*
- * rseq-percpu-alloc.c: rseq CPU-Local Storage (CLS) memory allocator.
- *
- * The rseq per-CPU memory allocator allows the application the request
- * memory pools of CPU-Local memory each of containing objects of a
- * given size (rounded to next power of 2), reserving a given virtual
- * address size per CPU, for a given maximum number of CPUs.
- *
- * The per-CPU memory allocator is analogous to TLS (Thread-Local
- * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
- * memory allocator provides CPU-Local Storage.
- */
-
-/*
- * Use high bits of per-CPU addresses to index the pool.
- * This leaves the low bits of available to the application for pointer
- * tagging (based on next power of 2 alignment of the allocations).
- */
-#if RSEQ_BITS_PER_LONG == 64
-# define POOL_INDEX_BITS 16
-#else
-# define POOL_INDEX_BITS 8
-#endif
-#define MAX_NR_POOLS (1UL << POOL_INDEX_BITS)
-#define POOL_INDEX_SHIFT (RSEQ_BITS_PER_LONG - POOL_INDEX_BITS)
-#define MAX_POOL_LEN (1UL << POOL_INDEX_SHIFT)
-#define MAX_POOL_LEN_MASK (MAX_POOL_LEN - 1)
-
-#define POOL_SET_NR_ENTRIES POOL_INDEX_SHIFT
-
-/*
- * Smallest allocation should hold enough space for a free list pointer.
- */
-#if RSEQ_BITS_PER_LONG == 64
-# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
-#else
-# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
-#endif
-
-/*
- * Skip pool index 0 to ensure allocated entries at index 0 do not match
- * a NULL pointer.
- */
-#define FIRST_POOL 1
-
-#define BIT_PER_ULONG (8 * sizeof(unsigned long))
-
-#define MOVE_PAGES_BATCH_SIZE 4096
-
-struct free_list_node;
-
-struct free_list_node {
- struct free_list_node *next;
-};
-
-/* This lock protects pool create/destroy. */
-static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
-
-struct rseq_pool_attr {
- bool mmap_set;
- void *(*mmap_func)(void *priv, size_t len);
- int (*munmap_func)(void *priv, void *ptr, size_t len);
- void *mmap_priv;
-
- bool robust_set;
-};
-
-struct rseq_percpu_pool {
- void *base;
- unsigned int index;
- size_t item_len;
- size_t percpu_len;
- int item_order;
- int max_nr_cpus;
-
- /*
- * The free list chains freed items on the CPU 0 address range.
- * We should rethink this decision if false sharing between
- * malloc/free from other CPUs and data accesses from CPU 0
- * becomes an issue. This is a NULL-terminated singly-linked
- * list.
- */
- struct free_list_node *free_list_head;
- size_t next_unused;
- /* This lock protects allocation/free within the pool. */
- pthread_mutex_t lock;
-
- struct rseq_pool_attr attr;
-
- char *name;
- /* Track alloc/free. */
- unsigned long *alloc_bitmap;
-};
-
-//TODO: the array of pools should grow dynamically on create.
-static struct rseq_percpu_pool rseq_percpu_pool[MAX_NR_POOLS];
-
-/*
- * Pool set entries are indexed by item_len rounded to the next power of
- * 2. A pool set can contain NULL pool entries, in which case the next
- * large enough entry will be used for allocation.
- */
-struct rseq_percpu_pool_set {
- /* This lock protects add vs malloc/zmalloc within the pool set. */
- pthread_mutex_t lock;
- struct rseq_percpu_pool *entries[POOL_SET_NR_ENTRIES];
-};
-
-static
-void *__rseq_pool_percpu_ptr(struct rseq_percpu_pool *pool, int cpu, uintptr_t item_offset)
-{
- return pool->base + (pool->percpu_len * cpu) + item_offset;
-}
-
-void *__rseq_percpu_ptr(void __rseq_percpu *_ptr, int cpu)
-{
- uintptr_t ptr = (uintptr_t) _ptr;
- uintptr_t item_offset = ptr & MAX_POOL_LEN_MASK;
- uintptr_t pool_index = ptr >> POOL_INDEX_SHIFT;
- struct rseq_percpu_pool *pool = &rseq_percpu_pool[pool_index];
-
- assert(cpu >= 0);
- return __rseq_pool_percpu_ptr(pool, cpu, item_offset);
-}
-
-static
-void rseq_percpu_zero_item(struct rseq_percpu_pool *pool, uintptr_t item_offset)
-{
- int i;
-
- for (i = 0; i < pool->max_nr_cpus; i++) {
- char *p = __rseq_pool_percpu_ptr(pool, i, item_offset);
- memset(p, 0, pool->item_len);
- }
-}
-
-#ifdef HAVE_LIBNUMA
-int rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool, int numa_flags)
-{
- unsigned long nr_pages;
- long ret, page_len;
- int cpu;
-
- if (!numa_flags)
- return 0;
- page_len = rseq_get_page_len();
- nr_pages = pool->percpu_len >> rseq_get_count_order_ulong(page_len);
- for (cpu = 0; cpu < pool->max_nr_cpus; cpu++) {
-
- int status[MOVE_PAGES_BATCH_SIZE];
- int nodes[MOVE_PAGES_BATCH_SIZE];
- void *pages[MOVE_PAGES_BATCH_SIZE];
-
- nodes[0] = numa_node_of_cpu(cpu);
- for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
- nodes[k] = nodes[0];
- }
-
- for (unsigned long page = 0; page < nr_pages;) {
-
- size_t max_k = RSEQ_ARRAY_SIZE(pages);
- size_t left = nr_pages - page;
-
- if (left < max_k) {
- max_k = left;
- }
-
- for (size_t k = 0; k < max_k; ++k, ++page) {
- pages[k] = __rseq_pool_percpu_ptr(pool, cpu, page * page_len);
- status[k] = -EPERM;
- }
-
- ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
-
- if (ret < 0)
- return ret;
-
- if (ret > 0) {
- fprintf(stderr, "%lu pages were not migrated\n", ret);
- for (size_t k = 0; k < max_k; ++k) {
- if (status[k] < 0)
- fprintf(stderr,
- "Error while moving page %p to numa node %d: %u\n",
- pages[k], nodes[k], -status[k]);
- }
- }
- }
- }
- return 0;
-}
-#else
-void rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool __attribute__((unused)),
- int numa_flags __attribute__((unused)))
-{
- return 0;
-}
-#endif
-
-static
-void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
-{
- void *base;
-
- base = mmap(NULL, len, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (base == MAP_FAILED)
- return NULL;
- return base;
-}
-
-static
-int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
-{
- return munmap(ptr, len);
-}
-
-static
-int create_alloc_bitmap(struct rseq_percpu_pool *pool)
-{
- size_t count;
-
- count = ((pool->percpu_len >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
-
- /*
- * Not being able to create the validation bitmap is an error
- * that needs to be reported.
- */
- pool->alloc_bitmap = calloc(count, sizeof(unsigned long));
- if (!pool->alloc_bitmap)
- return -1;
- return 0;
-}
-
-static
-const char *get_pool_name(const struct rseq_percpu_pool *pool)
-{
- return pool->name ? : "<anonymous>";
-}
-
-/* Always inline for __builtin_return_address(0). */
-static inline __attribute__((always_inline))
-void check_free_list(const struct rseq_percpu_pool *pool)
-{
- size_t total_item = pool->percpu_len >> pool->item_order;
- size_t total_never_allocated = (pool->percpu_len - pool->next_unused) >> pool->item_order;
- size_t total_freed = 0;
- size_t max_list_traversal = total_item - total_never_allocated;
- size_t traversal_iteration = 0;
-
- for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
- node;
- prev = node,
- node = node->next) {
-
- void *node_addr = node;
-
- if (traversal_iteration >= max_list_traversal) {
- fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
- __func__, get_pool_name(pool), pool, __builtin_return_address(0));
- abort();
- }
-
- /* Node is out of range. */
- if ((node_addr < pool->base) ||
- (node_addr >= pool->base + pool->next_unused)) {
- if (prev)
- fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
- __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
- else
- fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
- __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
- abort();
- }
-
- traversal_iteration += 1;
- total_freed += 1;
- }
-
- if (total_never_allocated + total_freed != total_item) {
- fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
- __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
- abort();
- }
-
-}
-
-/* Always inline for __builtin_return_address(0). */
-static inline __attribute__((always_inline))
-void destroy_alloc_bitmap(struct rseq_percpu_pool *pool)
-{
- unsigned long *bitmap = pool->alloc_bitmap;
- size_t count, total_leaks = 0;
-
- if (!bitmap)
- return;
-
- count = ((pool->percpu_len >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
-
- /* Assert that all items in the pool were freed. */
- for (size_t k = 0; k < count; ++k)
- total_leaks += rseq_hweight_ulong(bitmap[k]);
- if (total_leaks) {
- fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
- __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
- abort();
- }
-
- check_free_list(pool);
-
- free(bitmap);
-}
-
-/* Always inline for __builtin_return_address(0). */
-static inline __attribute__((always_inline))
-int __rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool)
-{
- int ret;
-
- if (!pool->base) {
- errno = ENOENT;
- ret = -1;
- goto end;
- }
- /*
- * This must be done before releasing pool->base for checking the
- * free-list.
- */
- destroy_alloc_bitmap(pool);
- ret = pool->attr.munmap_func(pool->attr.mmap_priv, pool->base,
- pool->percpu_len * pool->max_nr_cpus);
- if (ret)
- goto end;
- pthread_mutex_destroy(&pool->lock);
- free(pool->name);
- memset(pool, 0, sizeof(*pool));
-end:
- return 0;
-}
-
-int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool)
-{
- int ret;
-
- pthread_mutex_lock(&pool_lock);
- ret = __rseq_percpu_pool_destroy(pool);
- pthread_mutex_unlock(&pool_lock);
- return ret;
-}
-
-struct rseq_percpu_pool *rseq_percpu_pool_create(const char *pool_name,
- size_t item_len, size_t percpu_len, int max_nr_cpus,
- const struct rseq_pool_attr *_attr)
-{
- struct rseq_percpu_pool *pool;
- struct rseq_pool_attr attr = {};
- void *base;
- unsigned int i;
- int order;
-
- /* Make sure each item is large enough to contain free list pointers. */
- if (item_len < sizeof(void *))
- item_len = sizeof(void *);
-
- /* Align item_len on next power of two. */
- order = rseq_get_count_order_ulong(item_len);
- if (order < 0) {
- errno = EINVAL;
- return NULL;
- }
- item_len = 1UL << order;
-
- /* Align percpu_len on page size. */
- percpu_len = rseq_align(percpu_len, rseq_get_page_len());
-
- if (max_nr_cpus < 0 || item_len > percpu_len ||
- percpu_len > (UINTPTR_MAX >> POOL_INDEX_BITS)) {
- errno = EINVAL;
- return NULL;
- }
-
- if (_attr)
- memcpy(&attr, _attr, sizeof(attr));
- if (!attr.mmap_set) {
- attr.mmap_func = default_mmap_func;
- attr.munmap_func = default_munmap_func;
- attr.mmap_priv = NULL;
- }
-
- pthread_mutex_lock(&pool_lock);
- /* Linear scan in array of pools to find empty spot. */
- for (i = FIRST_POOL; i < MAX_NR_POOLS; i++) {
- pool = &rseq_percpu_pool[i];
- if (!pool->base)
- goto found_empty;
- }
- errno = ENOMEM;
- pool = NULL;
- goto end;
-
-found_empty:
- base = attr.mmap_func(attr.mmap_priv, percpu_len * max_nr_cpus);
- if (!base)
- goto error_alloc;
- pthread_mutex_init(&pool->lock, NULL);
- pool->base = base;
- pool->percpu_len = percpu_len;
- pool->max_nr_cpus = max_nr_cpus;
- pool->index = i;
- pool->item_len = item_len;
- pool->item_order = order;
- memcpy(&pool->attr, &attr, sizeof(attr));
-
- if (pool_name) {
- pool->name = strdup(pool_name);
- if (!pool->name)
- goto error_alloc;
- }
-
- if (attr.robust_set) {
- if (create_alloc_bitmap(pool))
- goto error_alloc;
- }
-end:
- pthread_mutex_unlock(&pool_lock);
- return pool;
-
-error_alloc:
- __rseq_percpu_pool_destroy(pool);
- pthread_mutex_unlock(&pool_lock);
- errno = ENOMEM;
- return NULL;
-}
-
-/* Always inline for __builtin_return_address(0). */
-static inline __attribute__((always_inline))
-void set_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset)
-{
- unsigned long *bitmap = pool->alloc_bitmap;
- size_t item_index = item_offset >> pool->item_order;
- unsigned long mask;
- size_t k;
-
- if (!bitmap)
- return;
-
- k = item_index / BIT_PER_ULONG;
- mask = 1ULL << (item_index % BIT_PER_ULONG);
-
- /* Print error if bit is already set. */
- if (bitmap[k] & mask) {
- fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
- __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
- abort();
- }
- bitmap[k] |= mask;
-}
-
-static
-void __rseq_percpu *__rseq_percpu_malloc(struct rseq_percpu_pool *pool, bool zeroed)
-{
- struct free_list_node *node;
- uintptr_t item_offset;
- void __rseq_percpu *addr;
-
- pthread_mutex_lock(&pool->lock);
- /* Get first entry from free list. */
- node = pool->free_list_head;
- if (node != NULL) {
- /* Remove node from free list (update head). */
- pool->free_list_head = node->next;
- item_offset = (uintptr_t) ((void *) node - pool->base);
- addr = (void *) (((uintptr_t) pool->index << POOL_INDEX_SHIFT) | item_offset);
- goto end;
- }
- if (pool->next_unused + pool->item_len > pool->percpu_len) {
- errno = ENOMEM;
- addr = NULL;
- goto end;
- }
- item_offset = pool->next_unused;
- addr = (void *) (((uintptr_t) pool->index << POOL_INDEX_SHIFT) | item_offset);
- pool->next_unused += pool->item_len;
- set_alloc_slot(pool, item_offset);
-end:
- pthread_mutex_unlock(&pool->lock);
- if (zeroed && addr)
- rseq_percpu_zero_item(pool, item_offset);
- return addr;
-}
-
-void __rseq_percpu *rseq_percpu_malloc(struct rseq_percpu_pool *pool)
-{
- return __rseq_percpu_malloc(pool, false);
-}
-
-void __rseq_percpu *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool)
-{
- return __rseq_percpu_malloc(pool, true);
-}
-
-/* Always inline for __builtin_return_address(0). */
-static inline __attribute__((always_inline))
-void clear_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset)
-{
- unsigned long *bitmap = pool->alloc_bitmap;
- size_t item_index = item_offset >> pool->item_order;
- unsigned long mask;
- size_t k;
-
- if (!bitmap)
- return;
-
- k = item_index / BIT_PER_ULONG;
- mask = 1ULL << (item_index % BIT_PER_ULONG);
-
- /* Print error if bit is not set. */
- if (!(bitmap[k] & mask)) {
- fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
- __func__, get_pool_name(pool), pool, item_offset,
- (void *) __builtin_return_address(0));
- abort();
- }
- bitmap[k] &= ~mask;
-}
-
-void rseq_percpu_free(void __rseq_percpu *_ptr)
-{
- uintptr_t ptr = (uintptr_t) _ptr;
- uintptr_t item_offset = ptr & MAX_POOL_LEN_MASK;
- uintptr_t pool_index = ptr >> POOL_INDEX_SHIFT;
- struct rseq_percpu_pool *pool = &rseq_percpu_pool[pool_index];
- struct free_list_node *head, *item;
-
- pthread_mutex_lock(&pool->lock);
- clear_alloc_slot(pool, item_offset);
- /* Add ptr to head of free list */
- head = pool->free_list_head;
- /* Free-list is in CPU 0 range. */
- item = (struct free_list_node *)__rseq_pool_percpu_ptr(pool, 0, item_offset);
- item->next = head;
- pool->free_list_head = item;
- pthread_mutex_unlock(&pool->lock);
-}
-
-struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void)
-{
- struct rseq_percpu_pool_set *pool_set;
-
- pool_set = calloc(1, sizeof(struct rseq_percpu_pool_set));
- if (!pool_set)
- return NULL;
- pthread_mutex_init(&pool_set->lock, NULL);
- return pool_set;
-}
-
-int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set)
-{
- int order, ret;
-
- for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
- struct rseq_percpu_pool *pool = pool_set->entries[order];
-
- if (!pool)
- continue;
- ret = rseq_percpu_pool_destroy(pool);
- if (ret)
- return ret;
- pool_set->entries[order] = NULL;
- }
- pthread_mutex_destroy(&pool_set->lock);
- free(pool_set);
- return 0;
-}
-
-/* Ownership of pool is handed over to pool set on success. */
-int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set, struct rseq_percpu_pool *pool)
-{
- size_t item_order = pool->item_order;
- int ret = 0;
-
- pthread_mutex_lock(&pool_set->lock);
- if (pool_set->entries[item_order]) {
- errno = EBUSY;
- ret = -1;
- goto end;
- }
- pool_set->entries[pool->item_order] = pool;
-end:
- pthread_mutex_unlock(&pool_set->lock);
- return ret;
-}
-
-static
-void __rseq_percpu *__rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len, bool zeroed)
-{
- int order, min_order = POOL_SET_MIN_ENTRY;
- struct rseq_percpu_pool *pool;
- void __rseq_percpu *addr;
-
- order = rseq_get_count_order_ulong(len);
- if (order > POOL_SET_MIN_ENTRY)
- min_order = order;
-again:
- pthread_mutex_lock(&pool_set->lock);
- /* First smallest present pool where @len fits. */
- for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
- pool = pool_set->entries[order];
-
- if (!pool)
- continue;
- if (pool->item_len >= len)
- goto found;
- }
- pool = NULL;
-found:
- pthread_mutex_unlock(&pool_set->lock);
- if (pool) {
- addr = __rseq_percpu_malloc(pool, zeroed);
- if (addr == NULL && errno == ENOMEM) {
- /*
- * If the allocation failed, try again with a
- * larger pool.
- */
- min_order = order + 1;
- goto again;
- }
- } else {
- /* Not found. */
- errno = ENOMEM;
- addr = NULL;
- }
- return addr;
-}
-
-void __rseq_percpu *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len)
-{
- return __rseq_percpu_pool_set_malloc(pool_set, len, false);
-}
-
-void __rseq_percpu *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len)
-{
- return __rseq_percpu_pool_set_malloc(pool_set, len, true);
-}
-
-struct rseq_pool_attr *rseq_pool_attr_create(void)
-{
- return calloc(1, sizeof(struct rseq_pool_attr));
-}
-
-void rseq_pool_attr_destroy(struct rseq_pool_attr *attr)
-{
- free(attr);
-}
-
-int rseq_pool_attr_set_mmap(struct rseq_pool_attr *attr,
- void *(*mmap_func)(void *priv, size_t len),
- int (*munmap_func)(void *priv, void *ptr, size_t len),
- void *mmap_priv)
-{
- if (!attr) {
- errno = EINVAL;
- return -1;
- }
- attr->mmap_set = true;
- attr->mmap_func = mmap_func;
- attr->munmap_func = munmap_func;
- attr->mmap_priv = mmap_priv;
- return 0;
-}
-
-int rseq_pool_attr_set_robust(struct rseq_pool_attr *attr)
-{
- if (!attr) {
- errno = EINVAL;
- return -1;
- }
- attr->robust_set = true;
- return 0;
-}
--- /dev/null
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
+#ifndef _RSEQ_UTILS_H
+#define _RSEQ_UTILS_H
+
+#define RSEQ_ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define __rseq_align_mask(v, mask) (((v) + (mask)) & ~(mask))
+#define rseq_align(v, align) __rseq_align_mask(v, (__typeof__(v)) (align) - 1)
+
+static inline
+unsigned int rseq_fls_u64(uint64_t x)
+{
+ unsigned int r = 64;
+
+ if (!x)
+ return 0;
+
+ if (!(x & 0xFFFFFFFF00000000ULL)) {
+ x <<= 32;
+ r -= 32;
+ }
+ if (!(x & 0xFFFF000000000000ULL)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xFF00000000000000ULL)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xF000000000000000ULL)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xC000000000000000ULL)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x8000000000000000ULL)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+static inline
+unsigned int rseq_fls_u32(uint32_t x)
+{
+ unsigned int r = 32;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xFFFF0000U)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xFF000000U)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xF0000000U)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xC0000000U)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000U)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+static inline
+unsigned int rseq_fls_ulong(unsigned long x)
+{
+#if RSEQ_BITS_PER_LONG == 32
+ return rseq_fls_u32(x);
+#else
+ return rseq_fls_u64(x);
+#endif
+}
+
+/*
+ * Return the minimum order for which x <= (1UL << order).
+ * Return -1 if x is 0.
+ */
+static inline
+int rseq_get_count_order_ulong(unsigned long x)
+{
+ if (!x)
+ return -1;
+
+ return rseq_fls_ulong(x - 1);
+}
+
+#define RSEQ_DEFAULT_PAGE_SIZE 4096
+
+static inline
+long rseq_get_page_len(void)
+{
+ long page_len = sysconf(_SC_PAGE_SIZE);
+
+ if (page_len < 0)
+ page_len = RSEQ_DEFAULT_PAGE_SIZE;
+ return page_len;
+}
+
+static inline
+int rseq_hweight_ulong(unsigned long v)
+{
+ return __builtin_popcountl(v);
+}
+
+#endif /* _RSEQ_UTILS_H */
#include <errno.h>
#include <stddef.h>
#include <stdbool.h>
-#include <rseq/percpu-alloc.h>
+#include <rseq/mempool.h>
#define PERCPU_POOL_LEN (1024*1024) /* 1MB */