Handle statedump agent thread state across fork
[libside.git] / src / side.c
1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 */
5
6 #include <side/trace.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <pthread.h>
10 #include <unistd.h>
11 #include <poll.h>
12
13 #include "compiler.h"
14 #include "rcu.h"
15 #include "list.h"
16 #include "rculist.h"
17
18 /* Top 8 bits reserved for shared tracer use. */
19 #if SIDE_BITS_PER_LONG == 64
20 # define SIDE_EVENT_ENABLED_SHARED_MASK 0xFF00000000000000ULL
21 # define SIDE_EVENT_ENABLED_SHARED_USER_EVENT_MASK 0x8000000000000000ULL
22 # define SIDE_EVENT_ENABLED_SHARED_PTRACE_MASK 0x4000000000000000ULL
23
24 /* Allow 2^56 private tracer references on an event. */
25 # define SIDE_EVENT_ENABLED_PRIVATE_MASK 0x00FFFFFFFFFFFFFFULL
26 #else
27 # define SIDE_EVENT_ENABLED_SHARED_MASK 0xFF000000UL
28 # define SIDE_EVENT_ENABLED_SHARED_USER_EVENT_MASK 0x80000000UL
29 # define SIDE_EVENT_ENABLED_SHARED_PTRACE_MASK 0x40000000UL
30
31 /* Allow 2^24 private tracer references on an event. */
32 # define SIDE_EVENT_ENABLED_PRIVATE_MASK 0x00FFFFFFUL
33 #endif
34
35 #define SIDE_KEY_RESERVED_RANGE_END 0x8
36
37 /* Key 0x0 is reserved to match all. */
38 #define SIDE_KEY_MATCH_ALL 0x0
39 /* Key 0x1 is reserved for user event. */
40 #define SIDE_KEY_USER_EVENT 0x1
41 /* Key 0x2 is reserved for ptrace. */
42 #define SIDE_KEY_PTRACE 0x2
43
44 #define SIDE_RETRY_BUSY_LOOP_ATTEMPTS 100
45 #define SIDE_RETRY_DELAY_MS 1
46
47 struct side_events_register_handle {
48 struct side_list_node node;
49 struct side_event_description **events;
50 uint32_t nr_events;
51 };
52
53 struct side_tracer_handle {
54 struct side_list_node node;
55 void (*cb)(enum side_tracer_notification notif,
56 struct side_event_description **events, uint32_t nr_events, void *priv);
57 void *priv;
58 };
59
60 struct side_statedump_notification {
61 struct side_list_node node;
62 uint64_t key;
63 };
64
65 struct side_statedump_request_handle {
66 struct side_list_node node; /* Statedump request RCU list node. */
67 struct side_list_head notification_queue; /* Queue of struct side_statedump_notification */
68 void (*cb)(void);
69 char *name;
70 enum side_statedump_mode mode;
71 };
72
73 struct side_callback {
74 union {
75 void (*call)(const struct side_event_description *desc,
76 const struct side_arg_vec *side_arg_vec,
77 void *priv);
78 void (*call_variadic)(const struct side_event_description *desc,
79 const struct side_arg_vec *side_arg_vec,
80 const struct side_arg_dynamic_struct *var_struct,
81 void *priv);
82 } u;
83 void *priv;
84 uint64_t key;
85 };
86
87 enum agent_thread_state {
88 AGENT_THREAD_STATE_BLOCKED = 0,
89 AGENT_THREAD_STATE_HANDLE_REQUEST = (1 << 0),
90 AGENT_THREAD_STATE_EXIT = (1 << 1),
91 AGENT_THREAD_STATE_PAUSE = (1 << 2),
92 AGENT_THREAD_STATE_PAUSE_ACK = (1 << 3),
93 };
94
95 struct statedump_agent_thread {
96 long ref;
97 pthread_t id;
98 enum agent_thread_state state;
99 pthread_cond_t worker_cond;
100 pthread_cond_t waiter_cond;
101 };
102
103 static struct side_rcu_gp_state event_rcu_gp, statedump_rcu_gp;
104
105 /*
106 * Lazy initialization for early use within library constructors.
107 */
108 static bool initialized;
109 /*
110 * Do not register/unregister any more events after destructor.
111 */
112 static bool finalized;
113
114 /*
115 * Recursive mutex to allow tracer callbacks to use the side API.
116 */
117 static pthread_mutex_t side_event_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
118 static pthread_mutex_t side_statedump_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
119 static pthread_mutex_t side_key_lock = PTHREAD_MUTEX_INITIALIZER;
120 /*
121 * The side_agent_thread_lock protects the life-time of the agent
122 * thread: reference counting, creation, join. It is not taken by
123 * the agent thread per se so it does not have a circular dependency
124 * with pthread join.
125 * The side_statedump_lock nests inside the side_agent_thread_lock.
126 */
127 static pthread_mutex_t side_agent_thread_lock = PTHREAD_MUTEX_INITIALIZER;
128
129 /* Dynamic tracer key allocation. */
130 static uint64_t side_key_next = SIDE_KEY_RESERVED_RANGE_END;
131
132 static struct statedump_agent_thread statedump_agent_thread;
133
134 static DEFINE_SIDE_LIST_HEAD(side_events_list);
135 static DEFINE_SIDE_LIST_HEAD(side_tracer_list);
136
137 /*
138 * The statedump request list is a RCU list to allow the agent thread to
139 * iterate over this list with a RCU read-side lock.
140 */
141 static DEFINE_SIDE_LIST_HEAD(side_statedump_list);
142
143 /*
144 * Callback filter key for state dump.
145 */
146 static __thread uint64_t filter_key = SIDE_KEY_MATCH_ALL;
147
148 /*
149 * The empty callback has a NULL function callback pointer, which stops
150 * iteration on the array of callbacks immediately.
151 */
152 const char side_empty_callback[sizeof(struct side_callback)];
153
154 side_static_event(side_statedump_begin, "side", "statedump_begin",
155 SIDE_LOGLEVEL_INFO, side_field_list(side_field_string("name")));
156 side_static_event(side_statedump_end, "side", "statedump_end",
157 SIDE_LOGLEVEL_INFO, side_field_list(side_field_string("name")));
158
159 /*
160 * side_ptrace_hook is a place holder for a debugger breakpoint.
161 * var_struct is NULL if not variadic.
162 */
163 void side_ptrace_hook(const struct side_event_state *event_state __attribute__((unused)),
164 const struct side_arg_vec *side_arg_vec __attribute__((unused)),
165 const struct side_arg_dynamic_struct *var_struct __attribute__((unused)))
166 __attribute__((noinline));
167 void side_ptrace_hook(const struct side_event_state *event_state __attribute__((unused)),
168 const struct side_arg_vec *side_arg_vec __attribute__((unused)),
169 const struct side_arg_dynamic_struct *var_struct __attribute__((unused)))
170 {
171 }
172
173 static
174 void _side_call(const struct side_event_state *event_state, const struct side_arg_vec *side_arg_vec, uint64_t key)
175 {
176 struct side_rcu_read_state rcu_read_state;
177 const struct side_event_state_0 *es0;
178 const struct side_callback *side_cb;
179 uintptr_t enabled;
180
181 if (side_unlikely(finalized))
182 return;
183 if (side_unlikely(!initialized))
184 side_init();
185 if (side_unlikely(event_state->version != 0))
186 abort();
187 es0 = side_container_of(event_state, const struct side_event_state_0, parent);
188 assert(!(es0->desc->flags & SIDE_EVENT_FLAG_VARIADIC));
189 enabled = __atomic_load_n(&es0->enabled, __ATOMIC_RELAXED);
190 if (side_unlikely(enabled & SIDE_EVENT_ENABLED_SHARED_MASK)) {
191 if ((enabled & SIDE_EVENT_ENABLED_SHARED_USER_EVENT_MASK) &&
192 (key == SIDE_KEY_MATCH_ALL || key == SIDE_KEY_USER_EVENT)) {
193 // TODO: call kernel write.
194 }
195 if ((enabled & SIDE_EVENT_ENABLED_SHARED_PTRACE_MASK) &&
196 (key == SIDE_KEY_MATCH_ALL || key == SIDE_KEY_PTRACE))
197 side_ptrace_hook(event_state, side_arg_vec, NULL);
198 }
199 side_rcu_read_begin(&event_rcu_gp, &rcu_read_state);
200 for (side_cb = side_rcu_dereference(es0->callbacks); side_cb->u.call != NULL; side_cb++) {
201 if (key != SIDE_KEY_MATCH_ALL && side_cb->key != SIDE_KEY_MATCH_ALL && side_cb->key != key)
202 continue;
203 side_cb->u.call(es0->desc, side_arg_vec, side_cb->priv);
204 }
205 side_rcu_read_end(&event_rcu_gp, &rcu_read_state);
206 }
207
208 void side_call(const struct side_event_state *event_state, const struct side_arg_vec *side_arg_vec)
209 {
210 _side_call(event_state, side_arg_vec, SIDE_KEY_MATCH_ALL);
211 }
212
213 void side_statedump_call(const struct side_event_state *event_state, const struct side_arg_vec *side_arg_vec)
214 {
215 _side_call(event_state, side_arg_vec, filter_key);
216 }
217
218 static
219 void _side_call_variadic(const struct side_event_state *event_state,
220 const struct side_arg_vec *side_arg_vec,
221 const struct side_arg_dynamic_struct *var_struct,
222 uint64_t key)
223 {
224 struct side_rcu_read_state rcu_read_state;
225 const struct side_event_state_0 *es0;
226 const struct side_callback *side_cb;
227 uintptr_t enabled;
228
229 if (side_unlikely(finalized))
230 return;
231 if (side_unlikely(!initialized))
232 side_init();
233 if (side_unlikely(event_state->version != 0))
234 abort();
235 es0 = side_container_of(event_state, const struct side_event_state_0, parent);
236 assert(es0->desc->flags & SIDE_EVENT_FLAG_VARIADIC);
237 enabled = __atomic_load_n(&es0->enabled, __ATOMIC_RELAXED);
238 if (side_unlikely(enabled & SIDE_EVENT_ENABLED_SHARED_MASK)) {
239 if ((enabled & SIDE_EVENT_ENABLED_SHARED_USER_EVENT_MASK) &&
240 (key == SIDE_KEY_MATCH_ALL || key == SIDE_KEY_USER_EVENT)) {
241 // TODO: call kernel write.
242 }
243 if ((enabled & SIDE_EVENT_ENABLED_SHARED_PTRACE_MASK) &&
244 (key == SIDE_KEY_MATCH_ALL || key == SIDE_KEY_PTRACE))
245 side_ptrace_hook(event_state, side_arg_vec, var_struct);
246 }
247 side_rcu_read_begin(&event_rcu_gp, &rcu_read_state);
248 for (side_cb = side_rcu_dereference(es0->callbacks); side_cb->u.call_variadic != NULL; side_cb++) {
249 if (key != SIDE_KEY_MATCH_ALL && side_cb->key != SIDE_KEY_MATCH_ALL && side_cb->key != key)
250 continue;
251 side_cb->u.call_variadic(es0->desc, side_arg_vec, var_struct, side_cb->priv);
252 }
253 side_rcu_read_end(&event_rcu_gp, &rcu_read_state);
254 }
255
256 void side_call_variadic(const struct side_event_state *event_state,
257 const struct side_arg_vec *side_arg_vec,
258 const struct side_arg_dynamic_struct *var_struct)
259 {
260 _side_call_variadic(event_state, side_arg_vec, var_struct, SIDE_KEY_MATCH_ALL);
261 }
262
263 void side_statedump_call_variadic(const struct side_event_state *event_state,
264 const struct side_arg_vec *side_arg_vec,
265 const struct side_arg_dynamic_struct *var_struct)
266 {
267 _side_call_variadic(event_state, side_arg_vec, var_struct, filter_key);
268 }
269
270 static
271 const struct side_callback *side_tracer_callback_lookup(
272 const struct side_event_description *desc,
273 void *call, void *priv, uint64_t key)
274 {
275 struct side_event_state *event_state = side_ptr_get(desc->state);
276 const struct side_event_state_0 *es0;
277 const struct side_callback *cb;
278
279 if (side_unlikely(event_state->version != 0))
280 abort();
281 es0 = side_container_of(event_state, const struct side_event_state_0, parent);
282 for (cb = es0->callbacks; cb->u.call != NULL; cb++) {
283 if ((void *) cb->u.call == call && cb->priv == priv && cb->key == key)
284 return cb;
285 }
286 return NULL;
287 }
288
289 static
290 int _side_tracer_callback_register(struct side_event_description *desc,
291 void *call, void *priv, uint64_t key)
292 {
293 struct side_event_state *event_state;
294 struct side_callback *old_cb, *new_cb;
295 struct side_event_state_0 *es0;
296 int ret = SIDE_ERROR_OK;
297 uint32_t old_nr_cb;
298
299 if (!call)
300 return SIDE_ERROR_INVAL;
301 if (finalized)
302 return SIDE_ERROR_EXITING;
303 if (!initialized)
304 side_init();
305 pthread_mutex_lock(&side_event_lock);
306 event_state = side_ptr_get(desc->state);
307 if (side_unlikely(event_state->version != 0))
308 abort();
309 es0 = side_container_of(event_state, struct side_event_state_0, parent);
310 old_nr_cb = es0->nr_callbacks;
311 if (old_nr_cb == UINT32_MAX) {
312 ret = SIDE_ERROR_INVAL;
313 goto unlock;
314 }
315 /* Reject duplicate (call, priv) tuples. */
316 if (side_tracer_callback_lookup(desc, call, priv, key)) {
317 ret = SIDE_ERROR_EXIST;
318 goto unlock;
319 }
320 old_cb = (struct side_callback *) es0->callbacks;
321 /* old_nr_cb + 1 (new cb) + 1 (NULL) */
322 new_cb = (struct side_callback *) calloc(old_nr_cb + 2, sizeof(struct side_callback));
323 if (!new_cb) {
324 ret = SIDE_ERROR_NOMEM;
325 goto unlock;
326 }
327 memcpy(new_cb, old_cb, old_nr_cb);
328 if (desc->flags & SIDE_EVENT_FLAG_VARIADIC)
329 new_cb[old_nr_cb].u.call_variadic =
330 (side_tracer_callback_variadic_func) call;
331 else
332 new_cb[old_nr_cb].u.call =
333 (side_tracer_callback_func) call;
334 new_cb[old_nr_cb].priv = priv;
335 new_cb[old_nr_cb].key = key;
336 /* High order bits are already zeroed. */
337 side_rcu_assign_pointer(es0->callbacks, new_cb);
338 side_rcu_wait_grace_period(&event_rcu_gp);
339 if (old_nr_cb)
340 free(old_cb);
341 es0->nr_callbacks++;
342 /* Increment concurrently with kernel setting the top bits. */
343 if (!old_nr_cb)
344 (void) __atomic_add_fetch(&es0->enabled, 1, __ATOMIC_RELAXED);
345 unlock:
346 pthread_mutex_unlock(&side_event_lock);
347 return ret;
348 }
349
350 int side_tracer_callback_register(struct side_event_description *desc,
351 side_tracer_callback_func call,
352 void *priv, uint64_t key)
353 {
354 if (desc->flags & SIDE_EVENT_FLAG_VARIADIC)
355 return SIDE_ERROR_INVAL;
356 return _side_tracer_callback_register(desc, (void *) call, priv, key);
357 }
358
359 int side_tracer_callback_variadic_register(struct side_event_description *desc,
360 side_tracer_callback_variadic_func call_variadic,
361 void *priv, uint64_t key)
362 {
363 if (!(desc->flags & SIDE_EVENT_FLAG_VARIADIC))
364 return SIDE_ERROR_INVAL;
365 return _side_tracer_callback_register(desc, (void *) call_variadic, priv, key);
366 }
367
368 static int _side_tracer_callback_unregister(struct side_event_description *desc,
369 void *call, void *priv, uint64_t key)
370 {
371 struct side_event_state *event_state;
372 struct side_callback *old_cb, *new_cb;
373 const struct side_callback *cb_pos;
374 struct side_event_state_0 *es0;
375 uint32_t pos_idx;
376 int ret = SIDE_ERROR_OK;
377 uint32_t old_nr_cb;
378
379 if (!call)
380 return SIDE_ERROR_INVAL;
381 if (finalized)
382 return SIDE_ERROR_EXITING;
383 if (!initialized)
384 side_init();
385 pthread_mutex_lock(&side_event_lock);
386 event_state = side_ptr_get(desc->state);
387 if (side_unlikely(event_state->version != 0))
388 abort();
389 es0 = side_container_of(event_state, struct side_event_state_0, parent);
390 cb_pos = side_tracer_callback_lookup(desc, call, priv, key);
391 if (!cb_pos) {
392 ret = SIDE_ERROR_NOENT;
393 goto unlock;
394 }
395 old_nr_cb = es0->nr_callbacks;
396 old_cb = (struct side_callback *) es0->callbacks;
397 if (old_nr_cb == 1) {
398 new_cb = (struct side_callback *) &side_empty_callback;
399 } else {
400 pos_idx = cb_pos - es0->callbacks;
401 /* Remove entry at pos_idx. */
402 /* old_nr_cb - 1 (removed cb) + 1 (NULL) */
403 new_cb = (struct side_callback *) calloc(old_nr_cb, sizeof(struct side_callback));
404 if (!new_cb) {
405 ret = SIDE_ERROR_NOMEM;
406 goto unlock;
407 }
408 memcpy(new_cb, old_cb, pos_idx);
409 memcpy(&new_cb[pos_idx], &old_cb[pos_idx + 1], old_nr_cb - pos_idx - 1);
410 }
411 /* High order bits are already zeroed. */
412 side_rcu_assign_pointer(es0->callbacks, new_cb);
413 side_rcu_wait_grace_period(&event_rcu_gp);
414 free(old_cb);
415 es0->nr_callbacks--;
416 /* Decrement concurrently with kernel setting the top bits. */
417 if (old_nr_cb == 1)
418 (void) __atomic_add_fetch(&es0->enabled, -1, __ATOMIC_RELAXED);
419 unlock:
420 pthread_mutex_unlock(&side_event_lock);
421 return ret;
422 }
423
424 int side_tracer_callback_unregister(struct side_event_description *desc,
425 side_tracer_callback_func call,
426 void *priv, uint64_t key)
427 {
428 if (desc->flags & SIDE_EVENT_FLAG_VARIADIC)
429 return SIDE_ERROR_INVAL;
430 return _side_tracer_callback_unregister(desc, (void *) call, priv, key);
431 }
432
433 int side_tracer_callback_variadic_unregister(struct side_event_description *desc,
434 side_tracer_callback_variadic_func call_variadic,
435 void *priv, uint64_t key)
436 {
437 if (!(desc->flags & SIDE_EVENT_FLAG_VARIADIC))
438 return SIDE_ERROR_INVAL;
439 return _side_tracer_callback_unregister(desc, (void *) call_variadic, priv, key);
440 }
441
442 struct side_events_register_handle *side_events_register(struct side_event_description **events, uint32_t nr_events)
443 {
444 struct side_events_register_handle *events_handle = NULL;
445 struct side_tracer_handle *tracer_handle;
446
447 if (finalized)
448 return NULL;
449 if (!initialized)
450 side_init();
451 events_handle = (struct side_events_register_handle *)
452 calloc(1, sizeof(struct side_events_register_handle));
453 if (!events_handle)
454 return NULL;
455 events_handle->events = events;
456 events_handle->nr_events = nr_events;
457
458 pthread_mutex_lock(&side_event_lock);
459 side_list_insert_node_tail(&side_events_list, &events_handle->node);
460 side_list_for_each_entry(tracer_handle, &side_tracer_list, node) {
461 tracer_handle->cb(SIDE_TRACER_NOTIFICATION_INSERT_EVENTS,
462 events, nr_events, tracer_handle->priv);
463 }
464 pthread_mutex_unlock(&side_event_lock);
465 //TODO: call event batch register ioctl
466 return events_handle;
467 }
468
469 static
470 void side_event_remove_callbacks(struct side_event_description *desc)
471 {
472 struct side_event_state *event_state = side_ptr_get(desc->state);
473 struct side_event_state_0 *es0;
474 struct side_callback *old_cb;
475 uint32_t nr_cb;
476
477 if (side_unlikely(event_state->version != 0))
478 abort();
479 es0 = side_container_of(event_state, struct side_event_state_0, parent);
480 nr_cb = es0->nr_callbacks;
481 if (!nr_cb)
482 return;
483 old_cb = (struct side_callback *) es0->callbacks;
484 (void) __atomic_add_fetch(&es0->enabled, -1, __ATOMIC_RELAXED);
485 /*
486 * Setting the state back to 0 cb and empty callbacks out of
487 * caution. This should not matter because instrumentation is
488 * unreachable.
489 */
490 es0->nr_callbacks = 0;
491 side_rcu_assign_pointer(es0->callbacks, &side_empty_callback);
492 /*
493 * No need to wait for grace period because instrumentation is
494 * unreachable.
495 */
496 free(old_cb);
497 }
498
499 /*
500 * Unregister event handle. At this point, all side events in that
501 * handle should be unreachable.
502 */
503 void side_events_unregister(struct side_events_register_handle *events_handle)
504 {
505 struct side_tracer_handle *tracer_handle;
506 uint32_t i;
507
508 if (!events_handle)
509 return;
510 if (finalized)
511 return;
512 if (!initialized)
513 side_init();
514 pthread_mutex_lock(&side_event_lock);
515 side_list_remove_node(&events_handle->node);
516 side_list_for_each_entry(tracer_handle, &side_tracer_list, node) {
517 tracer_handle->cb(SIDE_TRACER_NOTIFICATION_REMOVE_EVENTS,
518 events_handle->events, events_handle->nr_events,
519 tracer_handle->priv);
520 }
521 for (i = 0; i < events_handle->nr_events; i++) {
522 struct side_event_description *event = events_handle->events[i];
523
524 /* Skip NULL pointers */
525 if (!event)
526 continue;
527 side_event_remove_callbacks(event);
528 }
529 pthread_mutex_unlock(&side_event_lock);
530 //TODO: call event batch unregister ioctl
531 free(events_handle);
532 }
533
534 struct side_tracer_handle *side_tracer_event_notification_register(
535 void (*cb)(enum side_tracer_notification notif,
536 struct side_event_description **events, uint32_t nr_events, void *priv),
537 void *priv)
538 {
539 struct side_tracer_handle *tracer_handle;
540 struct side_events_register_handle *events_handle;
541
542 if (finalized)
543 return NULL;
544 if (!initialized)
545 side_init();
546 tracer_handle = (struct side_tracer_handle *)
547 calloc(1, sizeof(struct side_tracer_handle));
548 if (!tracer_handle)
549 return NULL;
550 pthread_mutex_lock(&side_event_lock);
551 tracer_handle->cb = cb;
552 tracer_handle->priv = priv;
553 side_list_insert_node_tail(&side_tracer_list, &tracer_handle->node);
554 side_list_for_each_entry(events_handle, &side_events_list, node) {
555 cb(SIDE_TRACER_NOTIFICATION_INSERT_EVENTS,
556 events_handle->events, events_handle->nr_events, priv);
557 }
558 pthread_mutex_unlock(&side_event_lock);
559 return tracer_handle;
560 }
561
562 void side_tracer_event_notification_unregister(struct side_tracer_handle *tracer_handle)
563 {
564 struct side_events_register_handle *events_handle;
565
566 if (finalized)
567 return;
568 if (!initialized)
569 side_init();
570 pthread_mutex_lock(&side_event_lock);
571 side_list_for_each_entry(events_handle, &side_events_list, node) {
572 tracer_handle->cb(SIDE_TRACER_NOTIFICATION_REMOVE_EVENTS,
573 events_handle->events, events_handle->nr_events,
574 tracer_handle->priv);
575 }
576 side_list_remove_node(&tracer_handle->node);
577 pthread_mutex_unlock(&side_event_lock);
578 free(tracer_handle);
579 }
580
581 /* Called with side_statedump_lock held. */
582 static
583 void queue_statedump_pending(struct side_statedump_request_handle *handle, uint64_t key)
584 {
585 struct side_statedump_notification *notif;
586
587 notif = (struct side_statedump_notification *) calloc(1, sizeof(struct side_statedump_notification));
588 if (!notif)
589 abort();
590 notif->key = key;
591 side_list_insert_node_tail(&handle->notification_queue, &notif->node);
592 if (handle->mode == SIDE_STATEDUMP_MODE_AGENT_THREAD) {
593 (void)__atomic_or_fetch(&statedump_agent_thread.state, AGENT_THREAD_STATE_HANDLE_REQUEST, __ATOMIC_SEQ_CST);
594 pthread_cond_broadcast(&statedump_agent_thread.worker_cond);
595 }
596 }
597
598 /* Called with side_statedump_lock held. */
599 static
600 void unqueue_statedump_pending(struct side_statedump_request_handle *handle, uint64_t key)
601 {
602 struct side_statedump_notification *notif, *tmp;
603
604 side_list_for_each_entry_safe(notif, tmp, &handle->notification_queue, node) {
605 if (key == SIDE_KEY_MATCH_ALL || key == notif->key) {
606 side_list_remove_node(&notif->node);
607 free(notif);
608 }
609 }
610 }
611
612 static
613 void side_statedump_run(struct side_statedump_request_handle *handle,
614 struct side_statedump_notification *notif)
615 {
616 /* Invoke the state dump callback specifically for the tracer key. */
617 filter_key = notif->key;
618 side_statedump_event_call(side_statedump_begin,
619 side_arg_list(side_arg_string(handle->name)));
620 handle->cb();
621 side_statedump_event_call(side_statedump_end,
622 side_arg_list(side_arg_string(handle->name)));
623 filter_key = SIDE_KEY_MATCH_ALL;
624 }
625
626 static
627 void _side_statedump_run_pending_requests(struct side_statedump_request_handle *handle)
628 {
629 struct side_statedump_notification *notif, *tmp;
630 DEFINE_SIDE_LIST_HEAD(tmp_head);
631
632 pthread_mutex_lock(&side_statedump_lock);
633 side_list_splice(&handle->notification_queue, &tmp_head);
634 side_list_head_init(&handle->notification_queue);
635 pthread_mutex_unlock(&side_statedump_lock);
636
637 /* We are now sole owner of the tmp_head list. */
638 side_list_for_each_entry(notif, &tmp_head, node)
639 side_statedump_run(handle, notif);
640 side_list_for_each_entry_safe(notif, tmp, &tmp_head, node)
641 free(notif);
642
643 if (handle->mode == SIDE_STATEDUMP_MODE_AGENT_THREAD) {
644 pthread_mutex_lock(&side_statedump_lock);
645 pthread_cond_broadcast(&statedump_agent_thread.waiter_cond);
646 pthread_mutex_unlock(&side_statedump_lock);
647 }
648 }
649
650 static
651 void *statedump_agent_func(void *arg __attribute__((unused)))
652 {
653 for (;;) {
654 struct side_statedump_request_handle *handle;
655 struct side_rcu_read_state rcu_read_state;
656 enum agent_thread_state state;
657
658 pthread_mutex_lock(&side_statedump_lock);
659 for (;;) {
660 state = __atomic_load_n(&statedump_agent_thread.state, __ATOMIC_SEQ_CST);
661 if (state == AGENT_THREAD_STATE_BLOCKED)
662 pthread_cond_wait(&statedump_agent_thread.worker_cond, &side_statedump_lock);
663 else
664 break;
665 }
666 pthread_mutex_unlock(&side_statedump_lock);
667 if (state & AGENT_THREAD_STATE_EXIT)
668 break;
669 if (state & AGENT_THREAD_STATE_PAUSE) {
670 int attempt = 0;
671
672 (void)__atomic_or_fetch(&statedump_agent_thread.state, AGENT_THREAD_STATE_PAUSE_ACK, __ATOMIC_SEQ_CST);
673 for (;;) {
674 state = __atomic_load_n(&statedump_agent_thread.state, __ATOMIC_SEQ_CST);
675 if (!(state & AGENT_THREAD_STATE_PAUSE))
676 break;
677 if (attempt > SIDE_RETRY_BUSY_LOOP_ATTEMPTS) {
678 (void)poll(NULL, 0, SIDE_RETRY_DELAY_MS);
679 continue;
680 }
681 attempt++;
682 side_cpu_relax();
683 }
684 continue;
685 }
686 (void)__atomic_and_fetch(&statedump_agent_thread.state, ~AGENT_THREAD_STATE_HANDLE_REQUEST, __ATOMIC_SEQ_CST);
687 side_rcu_read_begin(&statedump_rcu_gp, &rcu_read_state);
688 side_list_for_each_entry_rcu(handle, &side_statedump_list, node)
689 _side_statedump_run_pending_requests(handle);
690 side_rcu_read_end(&statedump_rcu_gp, &rcu_read_state);
691 }
692 return NULL;
693 }
694
695 static
696 void statedump_agent_thread_init(void)
697 {
698 pthread_cond_init(&statedump_agent_thread.worker_cond, NULL);
699 pthread_cond_init(&statedump_agent_thread.waiter_cond, NULL);
700 statedump_agent_thread.state = AGENT_THREAD_STATE_BLOCKED;
701 }
702
703 /* Called with side_agent_thread_lock and side_statedump_lock held. */
704 static
705 void statedump_agent_thread_get(void)
706 {
707 int ret;
708
709 if (statedump_agent_thread.ref++)
710 return;
711 statedump_agent_thread_init();
712 ret = pthread_create(&statedump_agent_thread.id, NULL,
713 statedump_agent_func, NULL);
714 if (ret) {
715 abort();
716 }
717 }
718
719 /*
720 * Called with side_agent_thread_lock and side_statedump_lock held.
721 * Returns true if join for agent thread is needed.
722 */
723 static
724 bool statedump_agent_thread_put(void)
725 {
726 if (--statedump_agent_thread.ref)
727 return false;
728 (void)__atomic_or_fetch(&statedump_agent_thread.state, AGENT_THREAD_STATE_EXIT, __ATOMIC_SEQ_CST);
729 pthread_cond_broadcast(&statedump_agent_thread.worker_cond);
730 return true;
731 }
732
733 static
734 void statedump_agent_thread_fini(void)
735 {
736 statedump_agent_thread.state = AGENT_THREAD_STATE_BLOCKED;
737 if (pthread_cond_destroy(&statedump_agent_thread.worker_cond))
738 abort();
739 if (pthread_cond_destroy(&statedump_agent_thread.waiter_cond))
740 abort();
741 }
742
743 /* Called with side_agent_thread_lock held. */
744 static
745 void statedump_agent_thread_join(void)
746 {
747 int ret;
748 void *retval;
749
750 ret = pthread_join(statedump_agent_thread.id, &retval);
751 if (ret) {
752 abort();
753 }
754 statedump_agent_thread_fini();
755 }
756
757 struct side_statedump_request_handle *
758 side_statedump_request_notification_register(const char *state_name,
759 void (*statedump_cb)(void),
760 enum side_statedump_mode mode)
761 {
762 struct side_statedump_request_handle *handle;
763 char *name;
764
765 if (finalized)
766 return NULL;
767 if (!initialized)
768 side_init();
769 /*
770 * The statedump request notification should not be registered
771 * from a notification callback.
772 */
773 assert(!filter_key);
774 handle = (struct side_statedump_request_handle *)
775 calloc(1, sizeof(struct side_statedump_request_handle));
776 if (!handle)
777 return NULL;
778 name = strdup(state_name);
779 if (!name)
780 goto name_nomem;
781 handle->cb = statedump_cb;
782 handle->name = name;
783 handle->mode = mode;
784 side_list_head_init(&handle->notification_queue);
785
786 if (mode == SIDE_STATEDUMP_MODE_AGENT_THREAD)
787 pthread_mutex_lock(&side_agent_thread_lock);
788 pthread_mutex_lock(&side_statedump_lock);
789 if (mode == SIDE_STATEDUMP_MODE_AGENT_THREAD)
790 statedump_agent_thread_get();
791 side_list_insert_node_tail_rcu(&side_statedump_list, &handle->node);
792 /* Queue statedump pending for all tracers. */
793 queue_statedump_pending(handle, SIDE_KEY_MATCH_ALL);
794 pthread_mutex_unlock(&side_statedump_lock);
795
796 if (mode == SIDE_STATEDUMP_MODE_AGENT_THREAD) {
797 pthread_mutex_unlock(&side_agent_thread_lock);
798
799 pthread_mutex_lock(&side_statedump_lock);
800 while (!side_list_empty(&handle->notification_queue))
801 pthread_cond_wait(&statedump_agent_thread.waiter_cond, &side_statedump_lock);
802 pthread_mutex_unlock(&side_statedump_lock);
803 }
804
805 return handle;
806
807 name_nomem:
808 free(handle);
809 return NULL;
810 }
811
812 void side_statedump_request_notification_unregister(struct side_statedump_request_handle *handle)
813 {
814 bool join = false;
815
816 if (finalized)
817 return;
818 if (!initialized)
819 side_init();
820 assert(!filter_key);
821
822 if (handle->mode == SIDE_STATEDUMP_MODE_AGENT_THREAD)
823 pthread_mutex_lock(&side_agent_thread_lock);
824 pthread_mutex_lock(&side_statedump_lock);
825 unqueue_statedump_pending(handle, SIDE_KEY_MATCH_ALL);
826 side_list_remove_node_rcu(&handle->node);
827 if (handle->mode == SIDE_STATEDUMP_MODE_AGENT_THREAD)
828 join = statedump_agent_thread_put();
829 pthread_mutex_unlock(&side_statedump_lock);
830 if (join)
831 statedump_agent_thread_join();
832 if (handle->mode == SIDE_STATEDUMP_MODE_AGENT_THREAD)
833 pthread_mutex_unlock(&side_agent_thread_lock);
834
835 side_rcu_wait_grace_period(&statedump_rcu_gp);
836 free(handle->name);
837 free(handle);
838 }
839
840 /* Returns true if the handle has pending statedump requests. */
841 bool side_statedump_poll_pending_requests(struct side_statedump_request_handle *handle)
842 {
843 bool ret;
844
845 if (handle->mode != SIDE_STATEDUMP_MODE_POLLING)
846 return false;
847 pthread_mutex_lock(&side_statedump_lock);
848 ret = !side_list_empty(&handle->notification_queue);
849 pthread_mutex_unlock(&side_statedump_lock);
850 return ret;
851 }
852
853 /*
854 * Only polling mode state dump handles allow application to explicitly handle the
855 * pending requests.
856 */
857 int side_statedump_run_pending_requests(struct side_statedump_request_handle *handle)
858 {
859 if (handle->mode != SIDE_STATEDUMP_MODE_POLLING)
860 return SIDE_ERROR_INVAL;
861 _side_statedump_run_pending_requests(handle);
862 return SIDE_ERROR_OK;
863 }
864
865 /*
866 * Request a state dump for tracer callbacks identified with "key".
867 */
868 int side_tracer_statedump_request(uint64_t key)
869 {
870 struct side_statedump_request_handle *handle;
871
872 if (key == SIDE_KEY_MATCH_ALL)
873 return SIDE_ERROR_INVAL;
874 pthread_mutex_lock(&side_statedump_lock);
875 side_list_for_each_entry(handle, &side_statedump_list, node)
876 queue_statedump_pending(handle, key);
877 pthread_mutex_lock(&side_statedump_lock);
878 return SIDE_ERROR_OK;
879 }
880
881 /*
882 * Cancel a statedump request.
883 */
884 int side_tracer_statedump_request_cancel(uint64_t key)
885 {
886 struct side_statedump_request_handle *handle;
887
888 if (key == SIDE_KEY_MATCH_ALL)
889 return SIDE_ERROR_INVAL;
890 pthread_mutex_lock(&side_statedump_lock);
891 side_list_for_each_entry(handle, &side_statedump_list, node)
892 unqueue_statedump_pending(handle, key);
893 pthread_mutex_lock(&side_statedump_lock);
894 return SIDE_ERROR_OK;
895 }
896
897 /*
898 * Tracer keys are represented on 64-bit. Return SIDE_ERROR_NOMEM on
899 * overflow (which should never happen in practice).
900 */
901 int side_tracer_request_key(uint64_t *key)
902 {
903 int ret = SIDE_ERROR_OK;
904
905 pthread_mutex_lock(&side_key_lock);
906 if (side_key_next == 0) {
907 ret = SIDE_ERROR_NOMEM;
908 goto end;
909 }
910 *key = side_key_next++;
911 end:
912 pthread_mutex_unlock(&side_key_lock);
913 return ret;
914 }
915
916 /*
917 * Use of pthread_atfork depends on glibc 2.24 to eliminate hangs when
918 * waiting for the agent thread if the agent thread calls malloc. This
919 * is corrected by GNU libc
920 * commit 8a727af925be63aa6ea0f5f90e16751fd541626b.
921 * Ref. https://bugzilla.redhat.com/show_bug.cgi?id=906468
922 */
923 static
924 void side_before_fork(void)
925 {
926 int attempt = 0;
927
928 pthread_mutex_lock(&side_agent_thread_lock);
929 if (!statedump_agent_thread.ref)
930 return;
931 /* Pause agent thread. */
932 pthread_mutex_lock(&side_statedump_lock);
933 (void)__atomic_or_fetch(&statedump_agent_thread.state, AGENT_THREAD_STATE_PAUSE, __ATOMIC_SEQ_CST);
934 pthread_cond_broadcast(&statedump_agent_thread.worker_cond);
935 pthread_mutex_unlock(&side_statedump_lock);
936 /* Wait for agent thread acknowledge. */
937 while (!(__atomic_load_n(&statedump_agent_thread.state, __ATOMIC_SEQ_CST) & AGENT_THREAD_STATE_PAUSE_ACK)) {
938 if (attempt > SIDE_RETRY_BUSY_LOOP_ATTEMPTS) {
939 (void)poll(NULL, 0, SIDE_RETRY_DELAY_MS);
940 continue;
941 }
942 attempt++;
943 side_cpu_relax();
944 }
945 }
946
947 static
948 void side_after_fork_parent(void)
949 {
950 if (statedump_agent_thread.ref)
951 (void)__atomic_and_fetch(&statedump_agent_thread.state,
952 ~(AGENT_THREAD_STATE_PAUSE | AGENT_THREAD_STATE_PAUSE_ACK),
953 __ATOMIC_SEQ_CST);
954 pthread_mutex_unlock(&side_agent_thread_lock);
955 }
956
957 /*
958 * The agent thread does not exist in the child process after a fork.
959 * Re-initialize its data structures and create a new agent thread.
960 */
961 static
962 void side_after_fork_child(void)
963 {
964 if (statedump_agent_thread.ref) {
965 int ret;
966
967 statedump_agent_thread_fini();
968 statedump_agent_thread_init();
969 ret = pthread_create(&statedump_agent_thread.id, NULL,
970 statedump_agent_func, NULL);
971 if (ret) {
972 abort();
973 }
974 }
975 pthread_mutex_unlock(&side_agent_thread_lock);
976 }
977
978 void side_init(void)
979 {
980 if (initialized)
981 return;
982 side_rcu_gp_init(&event_rcu_gp);
983 side_rcu_gp_init(&statedump_rcu_gp);
984 if (pthread_atfork(side_before_fork, side_after_fork_parent, side_after_fork_child))
985 abort();
986 initialized = true;
987 }
988
989 /*
990 * side_exit() is executed from a library destructor. It can be called
991 * explicitly at application exit as well. Concurrent side API use is
992 * not expected at that point.
993 */
994 void side_exit(void)
995 {
996 struct side_events_register_handle *handle, *tmp;
997
998 if (finalized)
999 return;
1000 side_list_for_each_entry_safe(handle, tmp, &side_events_list, node)
1001 side_events_unregister(handle);
1002 side_rcu_gp_exit(&event_rcu_gp);
1003 side_rcu_gp_exit(&statedump_rcu_gp);
1004 finalized = true;
1005 }
This page took 0.058272 seconds and 4 git commands to generate.