mempool: Protect whole range of populate none across fork
[librseq.git] / src / rseq.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
4 #ifndef _GNU_SOURCE
5 #define _GNU_SOURCE
6 #endif
7 #include <errno.h>
8 #include <sched.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <unistd.h>
13 #include <syscall.h>
14 #include <assert.h>
15 #include <signal.h>
16 #include <limits.h>
17 #include <dlfcn.h>
18 #include <stddef.h>
19 #include <stdint.h>
20 #include <sys/auxv.h>
21 #include <linux/auxvec.h>
22
23 #include <rseq/rseq.h>
24 #include "smp.h"
25
26 #ifndef AT_RSEQ_FEATURE_SIZE
27 # define AT_RSEQ_FEATURE_SIZE 27
28 #endif
29
30 #ifndef AT_RSEQ_ALIGN
31 # define AT_RSEQ_ALIGN 28
32 #endif
33
34 static __attribute__((constructor))
35 void rseq_init(void);
36
37 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
38 static int init_done;
39
40 static const ptrdiff_t *libc_rseq_offset_p;
41 static const unsigned int *libc_rseq_size_p;
42 static const unsigned int *libc_rseq_flags_p;
43
44 /* Offset from the thread pointer to the rseq area. */
45 ptrdiff_t rseq_offset;
46
47 /*
48 * Size of the registered rseq area. 0 if the registration was
49 * unsuccessful.
50 */
51 unsigned int rseq_size = -1U;
52
53 /* Flags used during rseq registration. */
54 unsigned int rseq_flags;
55
56 /*
57 * rseq feature size supported by the kernel. 0 if the registration was
58 * unsuccessful.
59 */
60 unsigned int rseq_feature_size = -1U;
61
62 static int rseq_ownership;
63 static int rseq_reg_success; /* At least one rseq registration has succeded. */
64
65 /* Allocate a large area for the TLS. */
66 #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
67
68 /* Original struct rseq feature size is 20 bytes. */
69 #define ORIG_RSEQ_FEATURE_SIZE 20
70
71 /* Original struct rseq allocation size is 32 bytes. */
72 #define ORIG_RSEQ_ALLOC_SIZE 32
73
74 /*
75 * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the
76 * rseq_abi structure allocated size is at least
77 * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown
78 * kernel rseq extensions.
79 */
80 static
81 __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
82 .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
83 };
84
85 static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
86 int flags, uint32_t sig)
87 {
88 return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
89 }
90
91 static int sys_getcpu(unsigned *cpu, unsigned *node)
92 {
93 return syscall(__NR_getcpu, cpu, node, NULL);
94 }
95
96 bool rseq_available(unsigned int query)
97 {
98 int rc;
99
100 switch (query) {
101 case RSEQ_AVAILABLE_QUERY_KERNEL:
102 rc = sys_rseq(NULL, 0, 0, 0);
103 if (rc != -1)
104 abort();
105 switch (errno) {
106 case ENOSYS:
107 break;
108 case EINVAL:
109 return true;
110 default:
111 abort();
112 }
113 break;
114 case RSEQ_AVAILABLE_QUERY_LIBC:
115 if (rseq_size && !rseq_ownership)
116 return true;
117 break;
118 default:
119 break;
120 }
121 return false;
122 }
123
124 int rseq_register_current_thread(void)
125 {
126 int rc;
127
128 rseq_init();
129
130 if (!rseq_ownership) {
131 /* Treat libc's ownership as a successful registration. */
132 return 0;
133 }
134 rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
135 if (rc) {
136 if (RSEQ_READ_ONCE(rseq_reg_success)) {
137 /* Incoherent success/failure within process. */
138 abort();
139 }
140 return -1;
141 }
142 assert(rseq_current_cpu_raw() >= 0);
143 RSEQ_WRITE_ONCE(rseq_reg_success, 1);
144 return 0;
145 }
146
147 int rseq_unregister_current_thread(void)
148 {
149 int rc;
150
151 if (!rseq_ownership) {
152 /* Treat libc's ownership as a successful unregistration. */
153 return 0;
154 }
155 rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
156 if (rc)
157 return -1;
158 return 0;
159 }
160
161 static
162 unsigned int get_rseq_feature_size(void)
163 {
164 unsigned long auxv_rseq_feature_size, auxv_rseq_align;
165
166 auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
167 assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
168
169 auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
170 assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
171 if (auxv_rseq_feature_size)
172 return auxv_rseq_feature_size;
173 else
174 return ORIG_RSEQ_FEATURE_SIZE;
175 }
176
177 /*
178 * Initialize the public symbols for the rseq offset, size, feature size and
179 * flags prior to registering threads. If glibc owns the registration, get the
180 * values from its public symbols.
181 */
182 static
183 void rseq_init(void)
184 {
185 /* Ensure initialization is only done once. */
186 if (RSEQ_READ_ONCE(init_done))
187 return;
188
189 /*
190 * Take the mutex, check the initialization flag again and atomically
191 * set it to ensure we are the only thread doing the initialization.
192 */
193 pthread_mutex_lock(&init_lock);
194 if (init_done)
195 goto unlock;
196 RSEQ_WRITE_ONCE(init_done, 1);
197
198 /*
199 * Check for glibc rseq support, if the 3 public symbols are found and
200 * the rseq_size is not zero, glibc owns the registration.
201 */
202 libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
203 libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
204 libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
205 if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
206 *libc_rseq_size_p != 0) {
207 /* rseq registration owned by glibc */
208 rseq_offset = *libc_rseq_offset_p;
209 rseq_size = *libc_rseq_size_p;
210 rseq_flags = *libc_rseq_flags_p;
211 rseq_feature_size = get_rseq_feature_size();
212
213 /*
214 * The registered rseq area could be smaller than the feature
215 * size reported by the kernel auxval. Cap it to the rseq size
216 * so we don't try to access features past the end of the rseq
217 * area.
218 */
219 if (rseq_feature_size > rseq_size)
220 rseq_feature_size = rseq_size;
221 goto unlock;
222 }
223
224 /* librseq owns the registration */
225 rseq_ownership = 1;
226
227 /* Calculate the offset of the rseq area from the thread pointer. */
228 rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer();
229
230 /* rseq flags are deprecated, always set to 0. */
231 rseq_flags = 0;
232
233 /*
234 * Check if the rseq syscall is available, if not set the size and
235 * feature_size to 0.
236 */
237 if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
238 rseq_size = 0;
239 rseq_feature_size = 0;
240 goto unlock;
241 }
242
243 /*
244 * If the feature size matches the original ABI (20), set the size to
245 * match the original ABI allocation (32), otherwise use the allocated
246 * size.
247 */
248 rseq_feature_size = get_rseq_feature_size();
249 if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
250 rseq_size = ORIG_RSEQ_ALLOC_SIZE;
251 else
252 rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
253 unlock:
254 pthread_mutex_unlock(&init_lock);
255 }
256
257 static __attribute__((destructor))
258 void rseq_exit(void)
259 {
260 if (!rseq_ownership)
261 return;
262 rseq_offset = 0;
263 rseq_size = -1U;
264 rseq_feature_size = -1U;
265 rseq_ownership = 0;
266 }
267
268 int32_t rseq_fallback_current_cpu(void)
269 {
270 int32_t cpu;
271
272 cpu = sched_getcpu();
273 if (cpu < 0) {
274 perror("sched_getcpu()");
275 abort();
276 }
277 return cpu;
278 }
279
280 int32_t rseq_fallback_current_node(void)
281 {
282 uint32_t cpu_id, node_id;
283 int ret;
284
285 ret = sys_getcpu(&cpu_id, &node_id);
286 if (ret) {
287 perror("sys_getcpu()");
288 return ret;
289 }
290 return (int32_t) node_id;
291 }
292
293 int rseq_get_max_nr_cpus(void)
294 {
295 return get_possible_cpus_array_len();
296 }
This page took 0.037104 seconds and 4 git commands to generate.