1 // SPDX-License-Identifier: MIT
3 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 #include <side/trace.h>
14 /* Top 8 bits reserved for shared tracer use. */
15 #if SIDE_BITS_PER_LONG == 64
16 # define SIDE_EVENT_ENABLED_SHARED_MASK 0xFF00000000000000ULL
17 # define SIDE_EVENT_ENABLED_SHARED_USER_EVENT_MASK 0x8000000000000000ULL
18 # define SIDE_EVENT_ENABLED_SHARED_PTRACE_MASK 0x4000000000000000ULL
20 /* Allow 2^56 private tracer references on an event. */
21 # define SIDE_EVENT_ENABLED_PRIVATE_MASK 0x00FFFFFFFFFFFFFFULL
23 # define SIDE_EVENT_ENABLED_SHARED_MASK 0xFF000000UL
24 # define SIDE_EVENT_ENABLED_SHARED_USER_EVENT_MASK 0x80000000UL
25 # define SIDE_EVENT_ENABLED_SHARED_PTRACE_MASK 0x40000000UL
27 /* Allow 2^24 private tracer references on an event. */
28 # define SIDE_EVENT_ENABLED_PRIVATE_MASK 0x00FFFFFFUL
31 /* Key 0x1 is reserved for user event. */
32 #define SIDE_USER_EVENT_KEY ((void *)0x1UL)
33 /* Key 0x2 is reserved for ptrace. */
34 #define SIDE_PTRACE_KEY ((void *)0x2UL)
36 struct side_events_register_handle
{
37 struct side_list_node node
;
38 struct side_event_description
**events
;
42 struct side_tracer_handle
{
43 struct side_list_node node
;
44 void (*cb
)(enum side_tracer_notification notif
,
45 struct side_event_description
**events
, uint32_t nr_events
, void *priv
);
49 struct side_statedump_request_handle
{
50 struct side_list_node node
; /* RCU list. */
54 struct side_callback
{
56 void (*call
)(const struct side_event_description
*desc
,
57 const struct side_arg_vec
*side_arg_vec
,
59 void (*call_variadic
)(const struct side_event_description
*desc
,
60 const struct side_arg_vec
*side_arg_vec
,
61 const struct side_arg_dynamic_struct
*var_struct
,
68 static struct side_rcu_gp_state event_rcu_gp
, statedump_rcu_gp
;
71 * Lazy initialization for early use within library constructors.
73 static bool initialized
;
75 * Do not register/unregister any more events after destructor.
77 static bool finalized
;
80 * Recursive mutex to allow tracer callbacks to use the side API.
82 static pthread_mutex_t side_event_lock
= PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP
;
83 static pthread_mutex_t side_statedump_lock
= PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP
;
85 static DEFINE_SIDE_LIST_HEAD(side_events_list
);
86 static DEFINE_SIDE_LIST_HEAD(side_tracer_list
);
87 static DEFINE_SIDE_LIST_HEAD(side_statedump_list
);
90 * Callback filter key for state dump.
92 static __thread
void *filter_key
;
95 * The empty callback has a NULL function callback pointer, which stops
96 * iteration on the array of callbacks immediately.
98 const char side_empty_callback
[sizeof(struct side_callback
)];
101 * side_ptrace_hook is a place holder for a debugger breakpoint.
102 * var_struct is NULL if not variadic.
104 void side_ptrace_hook(const struct side_event_state
*event_state
__attribute__((unused
)),
105 const struct side_arg_vec
*side_arg_vec
__attribute__((unused
)),
106 const struct side_arg_dynamic_struct
*var_struct
__attribute__((unused
)))
107 __attribute__((noinline
));
108 void side_ptrace_hook(const struct side_event_state
*event_state
__attribute__((unused
)),
109 const struct side_arg_vec
*side_arg_vec
__attribute__((unused
)),
110 const struct side_arg_dynamic_struct
*var_struct
__attribute__((unused
)))
115 void _side_call(const struct side_event_state
*event_state
, const struct side_arg_vec
*side_arg_vec
, void *key
)
117 struct side_rcu_read_state rcu_read_state
;
118 const struct side_event_state_0
*es0
;
119 const struct side_callback
*side_cb
;
122 if (side_unlikely(finalized
))
124 if (side_unlikely(!initialized
))
126 if (side_unlikely(event_state
->version
!= 0))
128 es0
= side_container_of(event_state
, const struct side_event_state_0
, parent
);
129 assert(!(es0
->desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
));
130 enabled
= __atomic_load_n(&es0
->enabled
, __ATOMIC_RELAXED
);
131 if (side_unlikely(enabled
& SIDE_EVENT_ENABLED_SHARED_MASK
)) {
132 if ((enabled
& SIDE_EVENT_ENABLED_SHARED_USER_EVENT_MASK
) &&
133 (!key
|| key
== SIDE_USER_EVENT_KEY
)) {
134 // TODO: call kernel write.
136 if ((enabled
& SIDE_EVENT_ENABLED_SHARED_PTRACE_MASK
) &&
137 (!key
|| key
== SIDE_PTRACE_KEY
))
138 side_ptrace_hook(event_state
, side_arg_vec
, NULL
);
140 side_rcu_read_begin(&event_rcu_gp
, &rcu_read_state
);
141 for (side_cb
= side_rcu_dereference(es0
->callbacks
); side_cb
->u
.call
!= NULL
; side_cb
++) {
142 /* A NULL key is always a match. */
143 if (key
&& side_cb
->key
&& side_cb
->key
!= key
)
145 side_cb
->u
.call(es0
->desc
, side_arg_vec
, side_cb
->priv
);
147 side_rcu_read_end(&event_rcu_gp
, &rcu_read_state
);
150 void side_call(const struct side_event_state
*event_state
, const struct side_arg_vec
*side_arg_vec
)
152 _side_call(event_state
, side_arg_vec
, NULL
);
155 void side_statedump_call(const struct side_event_state
*event_state
, const struct side_arg_vec
*side_arg_vec
)
157 _side_call(event_state
, side_arg_vec
, filter_key
);
161 void _side_call_variadic(const struct side_event_state
*event_state
,
162 const struct side_arg_vec
*side_arg_vec
,
163 const struct side_arg_dynamic_struct
*var_struct
,
166 struct side_rcu_read_state rcu_read_state
;
167 const struct side_event_state_0
*es0
;
168 const struct side_callback
*side_cb
;
171 if (side_unlikely(finalized
))
173 if (side_unlikely(!initialized
))
175 if (side_unlikely(event_state
->version
!= 0))
177 es0
= side_container_of(event_state
, const struct side_event_state_0
, parent
);
178 assert(es0
->desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
);
179 enabled
= __atomic_load_n(&es0
->enabled
, __ATOMIC_RELAXED
);
180 if (side_unlikely(enabled
& SIDE_EVENT_ENABLED_SHARED_MASK
)) {
181 if ((enabled
& SIDE_EVENT_ENABLED_SHARED_USER_EVENT_MASK
) &&
182 (!key
|| key
== SIDE_USER_EVENT_KEY
)) {
183 // TODO: call kernel write.
185 if ((enabled
& SIDE_EVENT_ENABLED_SHARED_PTRACE_MASK
) &&
186 (!key
|| key
== SIDE_PTRACE_KEY
))
187 side_ptrace_hook(event_state
, side_arg_vec
, var_struct
);
189 side_rcu_read_begin(&event_rcu_gp
, &rcu_read_state
);
190 for (side_cb
= side_rcu_dereference(es0
->callbacks
); side_cb
->u
.call_variadic
!= NULL
; side_cb
++) {
191 /* A NULL key is always a match. */
192 if (key
&& side_cb
->key
&& side_cb
->key
!= key
)
194 side_cb
->u
.call_variadic(es0
->desc
, side_arg_vec
, var_struct
, side_cb
->priv
);
196 side_rcu_read_end(&event_rcu_gp
, &rcu_read_state
);
199 void side_call_variadic(const struct side_event_state
*event_state
,
200 const struct side_arg_vec
*side_arg_vec
,
201 const struct side_arg_dynamic_struct
*var_struct
)
203 _side_call_variadic(event_state
, side_arg_vec
, var_struct
, NULL
);
206 void side_statedump_call_variadic(const struct side_event_state
*event_state
,
207 const struct side_arg_vec
*side_arg_vec
,
208 const struct side_arg_dynamic_struct
*var_struct
)
210 _side_call_variadic(event_state
, side_arg_vec
, var_struct
, filter_key
);
214 const struct side_callback
*side_tracer_callback_lookup(
215 const struct side_event_description
*desc
,
216 void *call
, void *priv
, void *key
)
218 struct side_event_state
*event_state
= side_ptr_get(desc
->state
);
219 const struct side_event_state_0
*es0
;
220 const struct side_callback
*cb
;
222 if (side_unlikely(event_state
->version
!= 0))
224 es0
= side_container_of(event_state
, const struct side_event_state_0
, parent
);
225 for (cb
= es0
->callbacks
; cb
->u
.call
!= NULL
; cb
++) {
226 if ((void *) cb
->u
.call
== call
&& cb
->priv
== priv
&& cb
->key
== key
)
233 int _side_tracer_callback_register(struct side_event_description
*desc
,
234 void *call
, void *priv
, void *key
)
236 struct side_event_state
*event_state
;
237 struct side_callback
*old_cb
, *new_cb
;
238 struct side_event_state_0
*es0
;
239 int ret
= SIDE_ERROR_OK
;
243 return SIDE_ERROR_INVAL
;
245 return SIDE_ERROR_EXITING
;
248 pthread_mutex_lock(&side_event_lock
);
249 event_state
= side_ptr_get(desc
->state
);
250 if (side_unlikely(event_state
->version
!= 0))
252 es0
= side_container_of(event_state
, struct side_event_state_0
, parent
);
253 old_nr_cb
= es0
->nr_callbacks
;
254 if (old_nr_cb
== UINT32_MAX
) {
255 ret
= SIDE_ERROR_INVAL
;
258 /* Reject duplicate (call, priv) tuples. */
259 if (side_tracer_callback_lookup(desc
, call
, priv
, key
)) {
260 ret
= SIDE_ERROR_EXIST
;
263 old_cb
= (struct side_callback
*) es0
->callbacks
;
264 /* old_nr_cb + 1 (new cb) + 1 (NULL) */
265 new_cb
= (struct side_callback
*) calloc(old_nr_cb
+ 2, sizeof(struct side_callback
));
267 ret
= SIDE_ERROR_NOMEM
;
270 memcpy(new_cb
, old_cb
, old_nr_cb
);
271 if (desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
)
272 new_cb
[old_nr_cb
].u
.call_variadic
=
273 (side_tracer_callback_variadic_func
) call
;
275 new_cb
[old_nr_cb
].u
.call
=
276 (side_tracer_callback_func
) call
;
277 new_cb
[old_nr_cb
].priv
= priv
;
278 new_cb
[old_nr_cb
].key
= key
;
279 /* High order bits are already zeroed. */
280 side_rcu_assign_pointer(es0
->callbacks
, new_cb
);
281 side_rcu_wait_grace_period(&event_rcu_gp
);
285 /* Increment concurrently with kernel setting the top bits. */
287 (void) __atomic_add_fetch(&es0
->enabled
, 1, __ATOMIC_RELAXED
);
289 pthread_mutex_unlock(&side_event_lock
);
293 int side_tracer_callback_register(struct side_event_description
*desc
,
294 side_tracer_callback_func call
,
295 void *priv
, void *key
)
297 if (desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
)
298 return SIDE_ERROR_INVAL
;
299 return _side_tracer_callback_register(desc
, (void *) call
, priv
, key
);
302 int side_tracer_callback_variadic_register(struct side_event_description
*desc
,
303 side_tracer_callback_variadic_func call_variadic
,
304 void *priv
, void *key
)
306 if (!(desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
))
307 return SIDE_ERROR_INVAL
;
308 return _side_tracer_callback_register(desc
, (void *) call_variadic
, priv
, key
);
311 static int _side_tracer_callback_unregister(struct side_event_description
*desc
,
312 void *call
, void *priv
, void *key
)
314 struct side_event_state
*event_state
;
315 struct side_callback
*old_cb
, *new_cb
;
316 const struct side_callback
*cb_pos
;
317 struct side_event_state_0
*es0
;
319 int ret
= SIDE_ERROR_OK
;
323 return SIDE_ERROR_INVAL
;
325 return SIDE_ERROR_EXITING
;
328 pthread_mutex_lock(&side_event_lock
);
329 event_state
= side_ptr_get(desc
->state
);
330 if (side_unlikely(event_state
->version
!= 0))
332 es0
= side_container_of(event_state
, struct side_event_state_0
, parent
);
333 cb_pos
= side_tracer_callback_lookup(desc
, call
, priv
, key
);
335 ret
= SIDE_ERROR_NOENT
;
338 old_nr_cb
= es0
->nr_callbacks
;
339 old_cb
= (struct side_callback
*) es0
->callbacks
;
340 if (old_nr_cb
== 1) {
341 new_cb
= (struct side_callback
*) &side_empty_callback
;
343 pos_idx
= cb_pos
- es0
->callbacks
;
344 /* Remove entry at pos_idx. */
345 /* old_nr_cb - 1 (removed cb) + 1 (NULL) */
346 new_cb
= (struct side_callback
*) calloc(old_nr_cb
, sizeof(struct side_callback
));
348 ret
= SIDE_ERROR_NOMEM
;
351 memcpy(new_cb
, old_cb
, pos_idx
);
352 memcpy(&new_cb
[pos_idx
], &old_cb
[pos_idx
+ 1], old_nr_cb
- pos_idx
- 1);
354 /* High order bits are already zeroed. */
355 side_rcu_assign_pointer(es0
->callbacks
, new_cb
);
356 side_rcu_wait_grace_period(&event_rcu_gp
);
359 /* Decrement concurrently with kernel setting the top bits. */
361 (void) __atomic_add_fetch(&es0
->enabled
, -1, __ATOMIC_RELAXED
);
363 pthread_mutex_unlock(&side_event_lock
);
367 int side_tracer_callback_unregister(struct side_event_description
*desc
,
368 side_tracer_callback_func call
,
369 void *priv
, void *key
)
371 if (desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
)
372 return SIDE_ERROR_INVAL
;
373 return _side_tracer_callback_unregister(desc
, (void *) call
, priv
, key
);
376 int side_tracer_callback_variadic_unregister(struct side_event_description
*desc
,
377 side_tracer_callback_variadic_func call_variadic
,
378 void *priv
, void *key
)
380 if (!(desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
))
381 return SIDE_ERROR_INVAL
;
382 return _side_tracer_callback_unregister(desc
, (void *) call_variadic
, priv
, key
);
385 struct side_events_register_handle
*side_events_register(struct side_event_description
**events
, uint32_t nr_events
)
387 struct side_events_register_handle
*events_handle
= NULL
;
388 struct side_tracer_handle
*tracer_handle
;
394 events_handle
= (struct side_events_register_handle
*)
395 calloc(1, sizeof(struct side_events_register_handle
));
398 events_handle
->events
= events
;
399 events_handle
->nr_events
= nr_events
;
401 pthread_mutex_lock(&side_event_lock
);
402 side_list_insert_node_tail(&side_events_list
, &events_handle
->node
);
403 side_list_for_each_entry(tracer_handle
, &side_tracer_list
, node
) {
404 tracer_handle
->cb(SIDE_TRACER_NOTIFICATION_INSERT_EVENTS
,
405 events
, nr_events
, tracer_handle
->priv
);
407 pthread_mutex_unlock(&side_event_lock
);
408 //TODO: call event batch register ioctl
409 return events_handle
;
413 void side_event_remove_callbacks(struct side_event_description
*desc
)
415 struct side_event_state
*event_state
= side_ptr_get(desc
->state
);
416 struct side_event_state_0
*es0
;
417 struct side_callback
*old_cb
;
420 if (side_unlikely(event_state
->version
!= 0))
422 es0
= side_container_of(event_state
, struct side_event_state_0
, parent
);
423 nr_cb
= es0
->nr_callbacks
;
426 old_cb
= (struct side_callback
*) es0
->callbacks
;
427 (void) __atomic_add_fetch(&es0
->enabled
, -1, __ATOMIC_RELAXED
);
429 * Setting the state back to 0 cb and empty callbacks out of
430 * caution. This should not matter because instrumentation is
433 es0
->nr_callbacks
= 0;
434 side_rcu_assign_pointer(es0
->callbacks
, &side_empty_callback
);
436 * No need to wait for grace period because instrumentation is
443 * Unregister event handle. At this point, all side events in that
444 * handle should be unreachable.
446 void side_events_unregister(struct side_events_register_handle
*events_handle
)
448 struct side_tracer_handle
*tracer_handle
;
457 pthread_mutex_lock(&side_event_lock
);
458 side_list_remove_node(&events_handle
->node
);
459 side_list_for_each_entry(tracer_handle
, &side_tracer_list
, node
) {
460 tracer_handle
->cb(SIDE_TRACER_NOTIFICATION_REMOVE_EVENTS
,
461 events_handle
->events
, events_handle
->nr_events
,
462 tracer_handle
->priv
);
464 for (i
= 0; i
< events_handle
->nr_events
; i
++) {
465 struct side_event_description
*event
= events_handle
->events
[i
];
467 /* Skip NULL pointers */
470 side_event_remove_callbacks(event
);
472 pthread_mutex_unlock(&side_event_lock
);
473 //TODO: call event batch unregister ioctl
477 struct side_tracer_handle
*side_tracer_event_notification_register(
478 void (*cb
)(enum side_tracer_notification notif
,
479 struct side_event_description
**events
, uint32_t nr_events
, void *priv
),
482 struct side_tracer_handle
*tracer_handle
;
483 struct side_events_register_handle
*events_handle
;
489 tracer_handle
= (struct side_tracer_handle
*)
490 calloc(1, sizeof(struct side_tracer_handle
));
493 pthread_mutex_lock(&side_event_lock
);
494 tracer_handle
->cb
= cb
;
495 tracer_handle
->priv
= priv
;
496 side_list_insert_node_tail(&side_tracer_list
, &tracer_handle
->node
);
497 side_list_for_each_entry(events_handle
, &side_events_list
, node
) {
498 cb(SIDE_TRACER_NOTIFICATION_INSERT_EVENTS
,
499 events_handle
->events
, events_handle
->nr_events
, priv
);
501 pthread_mutex_unlock(&side_event_lock
);
502 return tracer_handle
;
505 void side_tracer_event_notification_unregister(struct side_tracer_handle
*tracer_handle
)
507 struct side_events_register_handle
*events_handle
;
513 pthread_mutex_lock(&side_event_lock
);
514 side_list_for_each_entry(events_handle
, &side_events_list
, node
) {
515 tracer_handle
->cb(SIDE_TRACER_NOTIFICATION_REMOVE_EVENTS
,
516 events_handle
->events
, events_handle
->nr_events
,
517 tracer_handle
->priv
);
519 side_list_remove_node(&tracer_handle
->node
);
520 pthread_mutex_unlock(&side_event_lock
);
524 struct side_statedump_request_handle
*side_statedump_request_notification_register(void (*statedump_cb
)(void))
526 struct side_statedump_request_handle
*handle
;
533 * The statedump request notification should not be registered
534 * from a notification callback.
536 assert(filter_key
== NULL
);
537 handle
= (struct side_statedump_request_handle
*)
538 calloc(1, sizeof(struct side_statedump_request_handle
));
541 handle
->cb
= statedump_cb
;
543 pthread_mutex_lock(&side_statedump_lock
);
544 side_list_insert_node_tail_rcu(&side_statedump_list
, &handle
->node
);
545 pthread_mutex_unlock(&side_statedump_lock
);
547 /* Invoke callback for all tracers. */
553 void side_statedump_request_notification_unregister(struct side_statedump_request_handle
*handle
)
559 assert(filter_key
== NULL
);
561 pthread_mutex_lock(&side_statedump_lock
);
562 side_list_remove_node_rcu(&handle
->node
);
563 pthread_mutex_unlock(&side_statedump_lock
);
565 side_rcu_wait_grace_period(&statedump_rcu_gp
);
569 void side_tracer_statedump_request(void *key
)
571 struct side_statedump_request_handle
*handle
;
572 struct side_rcu_read_state rcu_read_state
;
574 /* Invoke the state dump callback specifically for the tracer key. */
576 side_rcu_read_begin(&statedump_rcu_gp
, &rcu_read_state
);
577 side_list_for_each_entry_rcu(handle
, &side_statedump_list
, node
)
579 side_rcu_read_end(&statedump_rcu_gp
, &rcu_read_state
);
587 side_rcu_gp_init(&event_rcu_gp
);
588 side_rcu_gp_init(&statedump_rcu_gp
);
593 * side_exit() is executed from a library destructor. It can be called
594 * explicitly at application exit as well. Concurrent side API use is
595 * not expected at that point.
599 struct side_events_register_handle
*handle
, *tmp
;
603 side_list_for_each_entry_safe(handle
, tmp
, &side_events_list
, node
)
604 side_events_unregister(handle
);
605 side_rcu_gp_exit(&event_rcu_gp
);
606 side_rcu_gp_exit(&statedump_rcu_gp
);