Use RCU for statedump
[libside.git] / src / side.c
1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 */
5
6 #include <side/trace.h>
7 #include <string.h>
8 #include <assert.h>
9
10 #include "rcu.h"
11 #include "list.h"
12 #include "rculist.h"
13
14 /* Top 8 bits reserved for kernel tracer use. */
15 #if SIDE_BITS_PER_LONG == 64
16 # define SIDE_EVENT_ENABLED_KERNEL_MASK 0xFF00000000000000ULL
17 # define SIDE_EVENT_ENABLED_KERNEL_USER_EVENT_MASK 0x8000000000000000ULL
18
19 /* Allow 2^56 tracer references on an event. */
20 # define SIDE_EVENT_ENABLED_USER_MASK 0x00FFFFFFFFFFFFFFULL
21 #else
22 # define SIDE_EVENT_ENABLED_KERNEL_MASK 0xFF000000UL
23 # define SIDE_EVENT_ENABLED_KERNEL_USER_EVENT_MASK 0x80000000UL
24
25 /* Allow 2^24 tracer references on an event. */
26 # define SIDE_EVENT_ENABLED_USER_MASK 0x00FFFFFFUL
27 #endif
28
29 struct side_events_register_handle {
30 struct side_list_node node;
31 struct side_event_description **events;
32 uint32_t nr_events;
33 };
34
35 struct side_tracer_handle {
36 struct side_list_node node;
37 void (*cb)(enum side_tracer_notification notif,
38 struct side_event_description **events, uint32_t nr_events, void *priv);
39 void *priv;
40 };
41
42 struct side_statedump_request_handle {
43 struct side_list_node node; /* RCU list. */
44 void (*cb)(void);
45 };
46
47 struct side_callback {
48 union {
49 void (*call)(const struct side_event_description *desc,
50 const struct side_arg_vec *side_arg_vec,
51 void *priv);
52 void (*call_variadic)(const struct side_event_description *desc,
53 const struct side_arg_vec *side_arg_vec,
54 const struct side_arg_dynamic_struct *var_struct,
55 void *priv);
56 } u;
57 void *priv;
58 void *key;
59 };
60
61 static struct side_rcu_gp_state event_rcu_gp, statedump_rcu_gp;
62
63 /*
64 * Lazy initialization for early use within library constructors.
65 */
66 static bool initialized;
67 /*
68 * Do not register/unregister any more events after destructor.
69 */
70 static bool finalized;
71
72 /*
73 * Recursive mutex to allow tracer callbacks to use the side API.
74 */
75 static pthread_mutex_t side_event_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
76 static pthread_mutex_t side_statedump_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
77
78 static DEFINE_SIDE_LIST_HEAD(side_events_list);
79 static DEFINE_SIDE_LIST_HEAD(side_tracer_list);
80 static DEFINE_SIDE_LIST_HEAD(side_statedump_list);
81
82 /*
83 * Callback filter key for state dump.
84 */
85 static __thread void *filter_key;
86
87 /*
88 * The empty callback has a NULL function callback pointer, which stops
89 * iteration on the array of callbacks immediately.
90 */
91 const char side_empty_callback[sizeof(struct side_callback)];
92
93 static
94 void _side_call(const struct side_event_state *event_state, const struct side_arg_vec *side_arg_vec, void *key)
95 {
96 struct side_rcu_read_state rcu_read_state;
97 const struct side_event_state_0 *es0;
98 const struct side_callback *side_cb;
99 uintptr_t enabled;
100
101 if (side_unlikely(finalized))
102 return;
103 if (side_unlikely(!initialized))
104 side_init();
105 if (side_unlikely(event_state->version != 0))
106 abort();
107 es0 = side_container_of(event_state, const struct side_event_state_0, parent);
108 assert(!(es0->desc->flags & SIDE_EVENT_FLAG_VARIADIC));
109 enabled = __atomic_load_n(&es0->enabled, __ATOMIC_RELAXED);
110 if (side_unlikely(enabled & SIDE_EVENT_ENABLED_KERNEL_USER_EVENT_MASK)) {
111 // TODO: call kernel write.
112 }
113 side_rcu_read_begin(&event_rcu_gp, &rcu_read_state);
114 for (side_cb = side_rcu_dereference(es0->callbacks); side_cb->u.call != NULL; side_cb++) {
115 /* A NULL key is always a match. */
116 if (key && side_cb->key && side_cb->key != key)
117 continue;
118 side_cb->u.call(es0->desc, side_arg_vec, side_cb->priv);
119 }
120 side_rcu_read_end(&event_rcu_gp, &rcu_read_state);
121 }
122
123 void side_call(const struct side_event_state *event_state, const struct side_arg_vec *side_arg_vec)
124 {
125 _side_call(event_state, side_arg_vec, NULL);
126 }
127
128 void side_statedump_call(const struct side_event_state *event_state, const struct side_arg_vec *side_arg_vec)
129 {
130 _side_call(event_state, side_arg_vec, filter_key);
131 }
132
133 static
134 void _side_call_variadic(const struct side_event_state *event_state,
135 const struct side_arg_vec *side_arg_vec,
136 const struct side_arg_dynamic_struct *var_struct,
137 void *key)
138 {
139 struct side_rcu_read_state rcu_read_state;
140 const struct side_event_state_0 *es0;
141 const struct side_callback *side_cb;
142 uintptr_t enabled;
143
144 if (side_unlikely(finalized))
145 return;
146 if (side_unlikely(!initialized))
147 side_init();
148 if (side_unlikely(event_state->version != 0))
149 abort();
150 es0 = side_container_of(event_state, const struct side_event_state_0, parent);
151 assert(es0->desc->flags & SIDE_EVENT_FLAG_VARIADIC);
152 enabled = __atomic_load_n(&es0->enabled, __ATOMIC_RELAXED);
153 if (side_unlikely(enabled & SIDE_EVENT_ENABLED_KERNEL_USER_EVENT_MASK)) {
154 // TODO: call kernel write.
155 }
156 side_rcu_read_begin(&event_rcu_gp, &rcu_read_state);
157 for (side_cb = side_rcu_dereference(es0->callbacks); side_cb->u.call_variadic != NULL; side_cb++) {
158 /* A NULL key is always a match. */
159 if (key && side_cb->key && side_cb->key != key)
160 continue;
161 side_cb->u.call_variadic(es0->desc, side_arg_vec, var_struct, side_cb->priv);
162 }
163 side_rcu_read_end(&event_rcu_gp, &rcu_read_state);
164 }
165
166 void side_call_variadic(const struct side_event_state *event_state,
167 const struct side_arg_vec *side_arg_vec,
168 const struct side_arg_dynamic_struct *var_struct)
169 {
170 _side_call_variadic(event_state, side_arg_vec, var_struct, NULL);
171 }
172
173 void side_statedump_call_variadic(const struct side_event_state *event_state,
174 const struct side_arg_vec *side_arg_vec,
175 const struct side_arg_dynamic_struct *var_struct)
176 {
177 _side_call_variadic(event_state, side_arg_vec, var_struct, filter_key);
178 }
179
180 static
181 const struct side_callback *side_tracer_callback_lookup(
182 const struct side_event_description *desc,
183 void *call, void *priv, void *key)
184 {
185 struct side_event_state *event_state = side_ptr_get(desc->state);
186 const struct side_event_state_0 *es0;
187 const struct side_callback *cb;
188
189 if (side_unlikely(event_state->version != 0))
190 abort();
191 es0 = side_container_of(event_state, const struct side_event_state_0, parent);
192 for (cb = es0->callbacks; cb->u.call != NULL; cb++) {
193 if ((void *) cb->u.call == call && cb->priv == priv && cb->key == key)
194 return cb;
195 }
196 return NULL;
197 }
198
199 static
200 int _side_tracer_callback_register(struct side_event_description *desc,
201 void *call, void *priv, void *key)
202 {
203 struct side_event_state *event_state;
204 struct side_callback *old_cb, *new_cb;
205 struct side_event_state_0 *es0;
206 int ret = SIDE_ERROR_OK;
207 uint32_t old_nr_cb;
208
209 if (!call)
210 return SIDE_ERROR_INVAL;
211 if (finalized)
212 return SIDE_ERROR_EXITING;
213 if (!initialized)
214 side_init();
215 pthread_mutex_lock(&side_event_lock);
216 event_state = side_ptr_get(desc->state);
217 if (side_unlikely(event_state->version != 0))
218 abort();
219 es0 = side_container_of(event_state, struct side_event_state_0, parent);
220 old_nr_cb = es0->nr_callbacks;
221 if (old_nr_cb == UINT32_MAX) {
222 ret = SIDE_ERROR_INVAL;
223 goto unlock;
224 }
225 /* Reject duplicate (call, priv) tuples. */
226 if (side_tracer_callback_lookup(desc, call, priv, key)) {
227 ret = SIDE_ERROR_EXIST;
228 goto unlock;
229 }
230 old_cb = (struct side_callback *) es0->callbacks;
231 /* old_nr_cb + 1 (new cb) + 1 (NULL) */
232 new_cb = (struct side_callback *) calloc(old_nr_cb + 2, sizeof(struct side_callback));
233 if (!new_cb) {
234 ret = SIDE_ERROR_NOMEM;
235 goto unlock;
236 }
237 memcpy(new_cb, old_cb, old_nr_cb);
238 if (desc->flags & SIDE_EVENT_FLAG_VARIADIC)
239 new_cb[old_nr_cb].u.call_variadic =
240 (side_tracer_callback_variadic_func) call;
241 else
242 new_cb[old_nr_cb].u.call =
243 (side_tracer_callback_func) call;
244 new_cb[old_nr_cb].priv = priv;
245 new_cb[old_nr_cb].key = key;
246 /* High order bits are already zeroed. */
247 side_rcu_assign_pointer(es0->callbacks, new_cb);
248 side_rcu_wait_grace_period(&event_rcu_gp);
249 if (old_nr_cb)
250 free(old_cb);
251 es0->nr_callbacks++;
252 /* Increment concurrently with kernel setting the top bits. */
253 if (!old_nr_cb)
254 (void) __atomic_add_fetch(&es0->enabled, 1, __ATOMIC_RELAXED);
255 unlock:
256 pthread_mutex_unlock(&side_event_lock);
257 return ret;
258 }
259
260 int side_tracer_callback_register(struct side_event_description *desc,
261 side_tracer_callback_func call,
262 void *priv, void *key)
263 {
264 if (desc->flags & SIDE_EVENT_FLAG_VARIADIC)
265 return SIDE_ERROR_INVAL;
266 return _side_tracer_callback_register(desc, (void *) call, priv, key);
267 }
268
269 int side_tracer_callback_variadic_register(struct side_event_description *desc,
270 side_tracer_callback_variadic_func call_variadic,
271 void *priv, void *key)
272 {
273 if (!(desc->flags & SIDE_EVENT_FLAG_VARIADIC))
274 return SIDE_ERROR_INVAL;
275 return _side_tracer_callback_register(desc, (void *) call_variadic, priv, key);
276 }
277
278 static int _side_tracer_callback_unregister(struct side_event_description *desc,
279 void *call, void *priv, void *key)
280 {
281 struct side_event_state *event_state;
282 struct side_callback *old_cb, *new_cb;
283 const struct side_callback *cb_pos;
284 struct side_event_state_0 *es0;
285 uint32_t pos_idx;
286 int ret = SIDE_ERROR_OK;
287 uint32_t old_nr_cb;
288
289 if (!call)
290 return SIDE_ERROR_INVAL;
291 if (finalized)
292 return SIDE_ERROR_EXITING;
293 if (!initialized)
294 side_init();
295 pthread_mutex_lock(&side_event_lock);
296 event_state = side_ptr_get(desc->state);
297 if (side_unlikely(event_state->version != 0))
298 abort();
299 es0 = side_container_of(event_state, struct side_event_state_0, parent);
300 cb_pos = side_tracer_callback_lookup(desc, call, priv, key);
301 if (!cb_pos) {
302 ret = SIDE_ERROR_NOENT;
303 goto unlock;
304 }
305 old_nr_cb = es0->nr_callbacks;
306 old_cb = (struct side_callback *) es0->callbacks;
307 if (old_nr_cb == 1) {
308 new_cb = (struct side_callback *) &side_empty_callback;
309 } else {
310 pos_idx = cb_pos - es0->callbacks;
311 /* Remove entry at pos_idx. */
312 /* old_nr_cb - 1 (removed cb) + 1 (NULL) */
313 new_cb = (struct side_callback *) calloc(old_nr_cb, sizeof(struct side_callback));
314 if (!new_cb) {
315 ret = SIDE_ERROR_NOMEM;
316 goto unlock;
317 }
318 memcpy(new_cb, old_cb, pos_idx);
319 memcpy(&new_cb[pos_idx], &old_cb[pos_idx + 1], old_nr_cb - pos_idx - 1);
320 }
321 /* High order bits are already zeroed. */
322 side_rcu_assign_pointer(es0->callbacks, new_cb);
323 side_rcu_wait_grace_period(&event_rcu_gp);
324 free(old_cb);
325 es0->nr_callbacks--;
326 /* Decrement concurrently with kernel setting the top bits. */
327 if (old_nr_cb == 1)
328 (void) __atomic_add_fetch(&es0->enabled, -1, __ATOMIC_RELAXED);
329 unlock:
330 pthread_mutex_unlock(&side_event_lock);
331 return ret;
332 }
333
334 int side_tracer_callback_unregister(struct side_event_description *desc,
335 side_tracer_callback_func call,
336 void *priv, void *key)
337 {
338 if (desc->flags & SIDE_EVENT_FLAG_VARIADIC)
339 return SIDE_ERROR_INVAL;
340 return _side_tracer_callback_unregister(desc, (void *) call, priv, key);
341 }
342
343 int side_tracer_callback_variadic_unregister(struct side_event_description *desc,
344 side_tracer_callback_variadic_func call_variadic,
345 void *priv, void *key)
346 {
347 if (!(desc->flags & SIDE_EVENT_FLAG_VARIADIC))
348 return SIDE_ERROR_INVAL;
349 return _side_tracer_callback_unregister(desc, (void *) call_variadic, priv, key);
350 }
351
352 struct side_events_register_handle *side_events_register(struct side_event_description **events, uint32_t nr_events)
353 {
354 struct side_events_register_handle *events_handle = NULL;
355 struct side_tracer_handle *tracer_handle;
356
357 if (finalized)
358 return NULL;
359 if (!initialized)
360 side_init();
361 events_handle = (struct side_events_register_handle *)
362 calloc(1, sizeof(struct side_events_register_handle));
363 if (!events_handle)
364 return NULL;
365 events_handle->events = events;
366 events_handle->nr_events = nr_events;
367
368 pthread_mutex_lock(&side_event_lock);
369 side_list_insert_node_tail(&side_events_list, &events_handle->node);
370 side_list_for_each_entry(tracer_handle, &side_tracer_list, node) {
371 tracer_handle->cb(SIDE_TRACER_NOTIFICATION_INSERT_EVENTS,
372 events, nr_events, tracer_handle->priv);
373 }
374 pthread_mutex_unlock(&side_event_lock);
375 //TODO: call event batch register ioctl
376 return events_handle;
377 }
378
379 static
380 void side_event_remove_callbacks(struct side_event_description *desc)
381 {
382 struct side_event_state *event_state = side_ptr_get(desc->state);
383 struct side_event_state_0 *es0;
384 struct side_callback *old_cb;
385 uint32_t nr_cb;
386
387 if (side_unlikely(event_state->version != 0))
388 abort();
389 es0 = side_container_of(event_state, struct side_event_state_0, parent);
390 nr_cb = es0->nr_callbacks;
391 if (!nr_cb)
392 return;
393 old_cb = (struct side_callback *) es0->callbacks;
394 (void) __atomic_add_fetch(&es0->enabled, -1, __ATOMIC_RELAXED);
395 /*
396 * Setting the state back to 0 cb and empty callbacks out of
397 * caution. This should not matter because instrumentation is
398 * unreachable.
399 */
400 es0->nr_callbacks = 0;
401 side_rcu_assign_pointer(es0->callbacks, &side_empty_callback);
402 /*
403 * No need to wait for grace period because instrumentation is
404 * unreachable.
405 */
406 free(old_cb);
407 }
408
409 /*
410 * Unregister event handle. At this point, all side events in that
411 * handle should be unreachable.
412 */
413 void side_events_unregister(struct side_events_register_handle *events_handle)
414 {
415 struct side_tracer_handle *tracer_handle;
416 uint32_t i;
417
418 if (!events_handle)
419 return;
420 if (finalized)
421 return;
422 if (!initialized)
423 side_init();
424 pthread_mutex_lock(&side_event_lock);
425 side_list_remove_node(&events_handle->node);
426 side_list_for_each_entry(tracer_handle, &side_tracer_list, node) {
427 tracer_handle->cb(SIDE_TRACER_NOTIFICATION_REMOVE_EVENTS,
428 events_handle->events, events_handle->nr_events,
429 tracer_handle->priv);
430 }
431 for (i = 0; i < events_handle->nr_events; i++) {
432 struct side_event_description *event = events_handle->events[i];
433
434 /* Skip NULL pointers */
435 if (!event)
436 continue;
437 side_event_remove_callbacks(event);
438 }
439 pthread_mutex_unlock(&side_event_lock);
440 //TODO: call event batch unregister ioctl
441 free(events_handle);
442 }
443
444 struct side_tracer_handle *side_tracer_event_notification_register(
445 void (*cb)(enum side_tracer_notification notif,
446 struct side_event_description **events, uint32_t nr_events, void *priv),
447 void *priv)
448 {
449 struct side_tracer_handle *tracer_handle;
450 struct side_events_register_handle *events_handle;
451
452 if (finalized)
453 return NULL;
454 if (!initialized)
455 side_init();
456 tracer_handle = (struct side_tracer_handle *)
457 calloc(1, sizeof(struct side_tracer_handle));
458 if (!tracer_handle)
459 return NULL;
460 pthread_mutex_lock(&side_event_lock);
461 tracer_handle->cb = cb;
462 tracer_handle->priv = priv;
463 side_list_insert_node_tail(&side_tracer_list, &tracer_handle->node);
464 side_list_for_each_entry(events_handle, &side_events_list, node) {
465 cb(SIDE_TRACER_NOTIFICATION_INSERT_EVENTS,
466 events_handle->events, events_handle->nr_events, priv);
467 }
468 pthread_mutex_unlock(&side_event_lock);
469 return tracer_handle;
470 }
471
472 void side_tracer_event_notification_unregister(struct side_tracer_handle *tracer_handle)
473 {
474 struct side_events_register_handle *events_handle;
475
476 if (finalized)
477 return;
478 if (!initialized)
479 side_init();
480 pthread_mutex_lock(&side_event_lock);
481 side_list_for_each_entry(events_handle, &side_events_list, node) {
482 tracer_handle->cb(SIDE_TRACER_NOTIFICATION_REMOVE_EVENTS,
483 events_handle->events, events_handle->nr_events,
484 tracer_handle->priv);
485 }
486 side_list_remove_node(&tracer_handle->node);
487 pthread_mutex_unlock(&side_event_lock);
488 free(tracer_handle);
489 }
490
491 struct side_statedump_request_handle *side_statedump_request_notification_register(void (*statedump_cb)(void))
492 {
493 struct side_statedump_request_handle *handle;
494
495 if (finalized)
496 return NULL;
497 if (!initialized)
498 side_init();
499 /*
500 * The statedump request notification should not be registered
501 * from a notification callback.
502 */
503 assert(filter_key == NULL);
504 handle = (struct side_statedump_request_handle *)
505 calloc(1, sizeof(struct side_statedump_request_handle));
506 if (!handle)
507 return NULL;
508 handle->cb = statedump_cb;
509
510 pthread_mutex_lock(&side_statedump_lock);
511 side_list_insert_node_tail_rcu(&side_statedump_list, &handle->node);
512 pthread_mutex_unlock(&side_statedump_lock);
513
514 /* Invoke callback for all tracers. */
515 statedump_cb();
516
517 return handle;
518 }
519
520 void side_statedump_request_notification_unregister(struct side_statedump_request_handle *handle)
521 {
522 if (finalized)
523 return;
524 if (!initialized)
525 side_init();
526 assert(filter_key == NULL);
527
528 pthread_mutex_lock(&side_statedump_lock);
529 side_list_remove_node_rcu(&handle->node);
530 pthread_mutex_unlock(&side_statedump_lock);
531
532 side_rcu_wait_grace_period(&statedump_rcu_gp);
533 free(handle);
534 }
535
536 void side_tracer_statedump_request(void *key)
537 {
538 struct side_statedump_request_handle *handle;
539 struct side_rcu_read_state rcu_read_state;
540
541 /* Invoke the state dump callback specifically for the tracer key. */
542 filter_key = key;
543 side_rcu_read_begin(&statedump_rcu_gp, &rcu_read_state);
544 side_list_for_each_entry_rcu(handle, &side_statedump_list, node)
545 handle->cb();
546 side_rcu_read_end(&statedump_rcu_gp, &rcu_read_state);
547 filter_key = NULL;
548 }
549
550 void side_init(void)
551 {
552 if (initialized)
553 return;
554 side_rcu_gp_init(&event_rcu_gp);
555 side_rcu_gp_init(&statedump_rcu_gp);
556 initialized = true;
557 }
558
559 /*
560 * side_exit() is executed from a library destructor. It can be called
561 * explicitly at application exit as well. Concurrent side API use is
562 * not expected at that point.
563 */
564 void side_exit(void)
565 {
566 struct side_events_register_handle *handle, *tmp;
567
568 if (finalized)
569 return;
570 side_list_for_each_entry_safe(handle, tmp, &side_events_list, node)
571 side_events_unregister(handle);
572 side_rcu_gp_exit(&event_rcu_gp);
573 side_rcu_gp_exit(&statedump_rcu_gp);
574 finalized = true;
575 }
This page took 0.039561 seconds and 4 git commands to generate.