SoW-2019-0007-2: Dynamic Snapshot: Triggers send partial event payload with notifications
[lttng-tools.git] / src / bin / lttng-sessiond / action-executor.c
1 /*
2 * Copyright (C) 2020 Jérémie Galarneau <jeremie.galarneau@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #include "action-executor.h"
9 #include "cmd.h"
10 #include "health-sessiond.h"
11 #include "lttng-sessiond.h"
12 #include "notification-thread-internal.h"
13 #include "session.h"
14 #include "thread.h"
15 #include <common/macros.h>
16 #include <lttng/action/group.h>
17 #include <lttng/action/notify-internal.h>
18 #include <lttng/action/notify.h>
19 #include <lttng/action/rotate-session.h>
20 #include <lttng/action/snapshot-session.h>
21 #include <lttng/action/start-session.h>
22 #include <lttng/action/stop-session.h>
23 #include <lttng/condition/evaluation.h>
24 #include <lttng/condition/event-rule-internal.h>
25 #include <lttng/lttng-error.h>
26 #include <lttng/trigger/trigger-internal.h>
27 #include <pthread.h>
28 #include <stdbool.h>
29 #include <stddef.h>
30 #include <urcu/list.h>
31
32 #define THREAD_NAME "Action Executor"
33 #define MAX_QUEUED_WORK_COUNT 8192
34
35 struct action_work_item {
36 uint64_t id;
37 struct lttng_trigger *trigger;
38 struct notification_client_list *client_list;
39 struct cds_list_head list_node;
40 struct lttng_trigger_notification *trigger_notification;
41 };
42
43 struct action_executor {
44 struct lttng_thread *thread;
45 struct notification_thread_handle *notification_thread_handle;
46 struct {
47 uint64_t pending_count;
48 struct cds_list_head list;
49 pthread_cond_t cond;
50 pthread_mutex_t lock;
51 } work;
52 bool should_quit;
53 uint64_t next_work_item_id;
54 };
55
56 typedef int (*action_executor_handler)(struct action_executor *executor,
57 const struct action_work_item *,
58 const struct lttng_action *action);
59
60 static int action_executor_notify_handler(struct action_executor *executor,
61 const struct action_work_item *,
62 const struct lttng_action *);
63 static int action_executor_start_session_handler(struct action_executor *executor,
64 const struct action_work_item *,
65 const struct lttng_action *);
66 static int action_executor_stop_session_handler(struct action_executor *executor,
67 const struct action_work_item *,
68 const struct lttng_action *);
69 static int action_executor_rotate_session_handler(struct action_executor *executor,
70 const struct action_work_item *,
71 const struct lttng_action *);
72 static int action_executor_snapshot_session_handler(struct action_executor *executor,
73 const struct action_work_item *,
74 const struct lttng_action *);
75 static int action_executor_group_handler(struct action_executor *executor,
76 const struct action_work_item *,
77 const struct lttng_action *);
78 static int action_executor_generic_handler(struct action_executor *executor,
79 const struct action_work_item *,
80 const struct lttng_action *);
81
82 static const action_executor_handler action_executors[] = {
83 [LTTNG_ACTION_TYPE_NOTIFY] = action_executor_notify_handler,
84 [LTTNG_ACTION_TYPE_START_SESSION] = action_executor_start_session_handler,
85 [LTTNG_ACTION_TYPE_STOP_SESSION] = action_executor_stop_session_handler,
86 [LTTNG_ACTION_TYPE_ROTATE_SESSION] = action_executor_rotate_session_handler,
87 [LTTNG_ACTION_TYPE_SNAPSHOT_SESSION] = action_executor_snapshot_session_handler,
88 [LTTNG_ACTION_TYPE_GROUP] = action_executor_group_handler,
89 };
90
91 static const char *get_action_name(const struct lttng_action *action)
92 {
93 const char *action_type_names[] = {
94 [LTTNG_ACTION_TYPE_NOTIFY] = "Notify",
95 [LTTNG_ACTION_TYPE_START_SESSION] = "Start session",
96 [LTTNG_ACTION_TYPE_STOP_SESSION] = "Stop session",
97 [LTTNG_ACTION_TYPE_ROTATE_SESSION] = "Rotate session",
98 [LTTNG_ACTION_TYPE_SNAPSHOT_SESSION] = "Snapshot session",
99 [LTTNG_ACTION_TYPE_GROUP] = "Group",
100 };
101
102 return action_type_names[lttng_action_get_type(action)];
103 }
104
105 static const char *get_trigger_name(const struct lttng_trigger *trigger)
106 {
107 const char *trigger_name;
108 enum lttng_trigger_status trigger_status;
109
110 trigger_status = lttng_trigger_get_name(trigger, &trigger_name);
111 assert(trigger_status == LTTNG_TRIGGER_STATUS_OK);
112
113 return trigger_name;
114 }
115
116 static int client_handle_transmission_status(
117 struct notification_client *client,
118 enum client_transmission_status status,
119 void *user_data)
120 {
121 int ret = 0;
122 struct action_executor *executor = user_data;
123 bool update_communication = true;
124
125 ASSERT_LOCKED(client->lock);
126
127 switch (status) {
128 case CLIENT_TRANSMISSION_STATUS_COMPLETE:
129 DBG("Successfully sent full notification to client, client_id = %" PRIu64,
130 client->id);
131 update_communication = false;
132 break;
133 case CLIENT_TRANSMISSION_STATUS_QUEUED:
134 DBG("Queued notification in client outgoing buffer, client_id = %" PRIu64,
135 client->id);
136 break;
137 case CLIENT_TRANSMISSION_STATUS_FAIL:
138 DBG("Communication error occurred while sending notification to client, client_id = %" PRIu64,
139 client->id);
140 client->communication.active = false;
141 break;
142 default:
143 ERR("Fatal error encoutered while sending notification to client, client_id = %" PRIu64,
144 client->id);
145 client->communication.active = false;
146 ret = -1;
147 goto end;
148 }
149
150 if (!update_communication) {
151 goto end;
152 }
153
154 ret = notification_thread_client_communication_update(
155 executor->notification_thread_handle, client->id,
156 status);
157 end:
158 return ret;
159 }
160
161 static int action_executor_notify_handler(struct action_executor *executor,
162 const struct action_work_item *work_item,
163 const struct lttng_action *action)
164 {
165 int ret = 0;
166 struct lttng_evaluation *evaluation = NULL;
167 struct lttng_trigger_notification *notification = work_item->trigger_notification;
168 unsigned int capture_count = 0;
169
170 assert(work_item->client_list);
171
172 if (LTTNG_CONDITION_STATUS_OK !=
173 lttng_condition_event_rule_get_capture_descriptor_count(
174 lttng_trigger_get_const_condition(work_item->trigger),
175 &capture_count)) {
176 ERR("Get capture count");
177 ret = -1;
178 goto end;
179 }
180
181 if (!notification->capture_buffer && capture_count != 0) {
182 ERR("Expected capture but capture buffer is null");
183 ret = -1;
184 goto end;
185 }
186
187 evaluation = lttng_evaluation_event_rule_create(
188 container_of(lttng_trigger_get_const_condition(work_item->trigger),
189 struct lttng_condition_event_rule,
190 parent),
191 get_trigger_name(work_item->trigger),
192 notification->capture_buffer,
193 notification->capture_buf_size, false);
194 if (!evaluation) {
195 ERR("Failed to create event rule hit evaluation");
196 ret = -1;
197 goto end;
198 }
199
200 ret = notification_client_list_send_evaluation(work_item->client_list,
201 lttng_trigger_get_const_condition(work_item->trigger),
202 evaluation,
203 lttng_trigger_get_credentials(work_item->trigger), NULL,
204 client_handle_transmission_status, executor);
205 end:
206 lttng_evaluation_destroy(evaluation);
207 return ret;
208 }
209
210 static int action_executor_start_session_handler(struct action_executor *executor,
211 const struct action_work_item *work_item,
212 const struct lttng_action *action)
213 {
214 int ret = 0;
215 const char *session_name;
216 enum lttng_action_status action_status;
217 struct ltt_session *session;
218
219 action_status = lttng_action_start_session_get_session_name(
220 action, &session_name);
221 if (action_status != LTTNG_ACTION_STATUS_OK) {
222 ERR("Failed to get session name from \"%s\" action",
223 get_action_name(action));
224 ret = -1;
225 goto end;
226 }
227
228 session_lock_list();
229 session = session_find_by_name(session_name);
230 if (session) {
231 enum lttng_error_code cmd_ret;
232
233 session_lock(session);
234 cmd_ret = cmd_start_trace(session);
235 session_unlock(session);
236
237 switch (cmd_ret) {
238 case LTTNG_OK:
239 DBG("Successfully started session \"%s\" on behalf of trigger \"%s\"",
240 session_name,
241 get_trigger_name(work_item->trigger));
242 break;
243 case LTTNG_ERR_TRACE_ALREADY_STARTED:
244 DBG("Attempted to start session \"%s\" on behalf of trigger \"%s\" but it was already started",
245 session_name,
246 get_trigger_name(work_item->trigger));
247 break;
248 default:
249 WARN("Failed to start session \"%s\" on behalf of trigger \"%s\": %s",
250 session_name,
251 get_trigger_name(work_item->trigger),
252 lttng_strerror(-cmd_ret));
253 break;
254 }
255 session_put(session);
256 } else {
257 DBG("Failed to find session \"%s\" by name while executing \"%s\" action of trigger \"%s\"",
258 session_name, get_action_name(action),
259 get_trigger_name(work_item->trigger));
260 }
261 session_unlock_list();
262 end:
263 return ret;
264 }
265
266 static int action_executor_stop_session_handler(struct action_executor *executor,
267 const struct action_work_item *work_item,
268 const struct lttng_action *action)
269 {
270 int ret = 0;
271 const char *session_name;
272 enum lttng_action_status action_status;
273 struct ltt_session *session;
274
275 action_status = lttng_action_stop_session_get_session_name(
276 action, &session_name);
277 if (action_status != LTTNG_ACTION_STATUS_OK) {
278 ERR("Failed to get session name from \"%s\" action",
279 get_action_name(action));
280 ret = -1;
281 goto end;
282 }
283
284 session_lock_list();
285 session = session_find_by_name(session_name);
286 if (session) {
287 enum lttng_error_code cmd_ret;
288
289 session_lock(session);
290 cmd_ret = cmd_stop_trace(session);
291 session_unlock(session);
292
293 switch (cmd_ret) {
294 case LTTNG_OK:
295 DBG("Successfully stopped session \"%s\" on behalf of trigger \"%s\"",
296 session_name,
297 get_trigger_name(work_item->trigger));
298 break;
299 case LTTNG_ERR_TRACE_ALREADY_STOPPED:
300 DBG("Attempted to stop session \"%s\" on behalf of trigger \"%s\" but it was already stopped",
301 session_name,
302 get_trigger_name(work_item->trigger));
303 break;
304 default:
305 WARN("Failed to stop session \"%s\" on behalf of trigger \"%s\": %s",
306 session_name,
307 get_trigger_name(work_item->trigger),
308 lttng_strerror(-cmd_ret));
309 break;
310 }
311 session_put(session);
312 } else {
313 DBG("Failed to find session \"%s\" by name while executing \"%s\" action of trigger \"%s\"",
314 session_name, get_action_name(action),
315 get_trigger_name(work_item->trigger));
316 }
317 session_unlock_list();
318 end:
319 return ret;
320 }
321
322 static int action_executor_rotate_session_handler(struct action_executor *executor,
323 const struct action_work_item *work_item,
324 const struct lttng_action *action)
325 {
326 int ret = 0;
327 const char *session_name;
328 enum lttng_action_status action_status;
329 struct ltt_session *session;
330
331 action_status = lttng_action_rotate_session_get_session_name(
332 action, &session_name);
333 if (action_status != LTTNG_ACTION_STATUS_OK) {
334 ERR("Failed to get session name from \"%s\" action",
335 get_action_name(action));
336 ret = -1;
337 goto end;
338 }
339
340 session_lock_list();
341 session = session_find_by_name(session_name);
342 if (session) {
343 enum lttng_error_code cmd_ret;
344
345 session_lock(session);
346 cmd_ret = cmd_rotate_session(session, NULL, false,
347 LTTNG_TRACE_CHUNK_COMMAND_TYPE_MOVE_TO_COMPLETED);
348 session_unlock(session);
349
350 switch (cmd_ret) {
351 case LTTNG_OK:
352 DBG("Successfully started rotation of session \"%s\" on behalf of trigger \"%s\"",
353 session_name,
354 get_trigger_name(work_item->trigger));
355 break;
356 case LTTNG_ERR_ROTATION_PENDING:
357 DBG("Attempted to start a rotation of session \"%s\" on behalf of trigger \"%s\" but a rotation is already ongoing",
358 session_name,
359 get_trigger_name(work_item->trigger));
360 break;
361 case LTTNG_ERR_ROTATION_MULTIPLE_AFTER_STOP:
362 case LTTNG_ERR_ROTATION_AFTER_STOP_CLEAR:
363 DBG("Attempted to start a rotation of session \"%s\" on behalf of trigger \"%s\" but a rotation has already been completed since the last stop or clear",
364 session_name,
365 get_trigger_name(work_item->trigger));
366 break;
367 default:
368 WARN("Failed to start a rotation of session \"%s\" on behalf of trigger \"%s\": %s",
369 session_name,
370 get_trigger_name(work_item->trigger),
371 lttng_strerror(-cmd_ret));
372 break;
373 }
374 session_put(session);
375 } else {
376 DBG("Failed to find session \"%s\" by name while executing \"%s\" action of trigger \"%s\"",
377 session_name, get_action_name(action),
378 get_trigger_name(work_item->trigger));
379 }
380 session_unlock_list();
381 end:
382 return ret;
383 }
384
385 static int action_executor_snapshot_session_handler(struct action_executor *executor,
386 const struct action_work_item *work_item,
387 const struct lttng_action *action)
388 {
389 int ret = 0;
390 const char *session_name;
391 enum lttng_action_status action_status;
392 struct ltt_session *session;
393 const struct lttng_snapshot_output default_snapshot_output = {
394 .max_size = UINT64_MAX,
395 };
396 const struct lttng_snapshot_output *snapshot_output =
397 &default_snapshot_output;
398
399 action_status = lttng_action_snapshot_session_get_session_name(
400 action, &session_name);
401 if (action_status != LTTNG_ACTION_STATUS_OK) {
402 ERR("Failed to get session name from \"%s\" action",
403 get_action_name(action));
404 ret = -1;
405 goto end;
406 }
407
408 action_status = lttng_action_snapshot_session_get_output_const(
409 action, &snapshot_output);
410 if (action_status != LTTNG_ACTION_STATUS_OK &&
411 action_status != LTTNG_ACTION_STATUS_UNSET) {
412 ERR("Failed to get output from \"%s\" action",
413 get_action_name(action));
414 ret = -1;
415 goto end;
416 }
417
418 session_lock_list();
419 session = session_find_by_name(session_name);
420 if (session) {
421 enum lttng_error_code cmd_ret;
422
423 session_lock(session);
424 cmd_ret = cmd_snapshot_record(session, snapshot_output, 0);
425 session_unlock(session);
426
427 switch (cmd_ret) {
428 case LTTNG_OK:
429 DBG("Successfully recorded snapshot of session \"%s\" on behalf of trigger \"%s\"",
430 session_name,
431 get_trigger_name(work_item->trigger));
432 break;
433 default:
434 WARN("Failed to record snapshot of session \"%s\" on behalf of trigger \"%s\": %s",
435 session_name,
436 get_trigger_name(work_item->trigger),
437 lttng_strerror(-cmd_ret));
438 break;
439 }
440 session_put(session);
441 } else {
442 DBG("Failed to find session \"%s\" by name while executing \"%s\" action of trigger \"%s\"",
443 session_name, get_action_name(action),
444 get_trigger_name(work_item->trigger));
445 }
446 session_unlock_list();
447 end:
448 return ret;
449 }
450
451 static int action_executor_group_handler(struct action_executor *executor,
452 const struct action_work_item *work_item,
453 const struct lttng_action *action_group)
454 {
455 int ret = 0;
456 unsigned int i, count;
457 enum lttng_action_status action_status;
458
459 action_status = lttng_action_group_get_count(action_group, &count);
460 if (action_status != LTTNG_ACTION_STATUS_OK) {
461 /* Fatal error. */
462 ERR("Failed to get count of action in action group");
463 ret = -1;
464 goto end;
465 }
466
467 DBG("Action group has %u action%s", count, count != 1 ? "s" : "");
468 for (i = 0; i < count; i++) {
469 const struct lttng_action *action =
470 lttng_action_group_get_at_index_const(
471 action_group, i);
472
473 ret = action_executor_generic_handler(
474 executor, work_item, action);
475 if (ret) {
476 ERR("Stopping the execution of the action group of trigger \"%s\" following a fatal error",
477 get_trigger_name(work_item->trigger));
478 goto end;
479 }
480 }
481 end:
482 return ret;
483 }
484
485 static int action_executor_generic_handler(struct action_executor *executor,
486 const struct action_work_item *work_item,
487 const struct lttng_action *action)
488 {
489 DBG("Executing action \"%s\" of trigger \"%s\" action work item %" PRIu64,
490 get_action_name(action),
491 get_trigger_name(work_item->trigger),
492 work_item->id);
493
494 return action_executors[lttng_action_get_type(action)](
495 executor, work_item, action);
496 }
497
498 static int action_work_item_execute(struct action_executor *executor,
499 struct action_work_item *work_item)
500 {
501 int ret;
502 const struct lttng_action *action =
503 lttng_trigger_get_const_action(work_item->trigger);
504
505 DBG("Starting execution of action work item %" PRIu64 " of trigger \"%s\"",
506 work_item->id, get_trigger_name(work_item->trigger));
507 ret = action_executor_generic_handler(executor, work_item, action);
508 DBG("Completed execution of action work item %" PRIu64 " of trigger \"%s\"",
509 work_item->id, get_trigger_name(work_item->trigger));
510 return ret;
511 }
512
513 static void action_work_item_destroy(struct action_work_item *work_item)
514 {
515 lttng_trigger_put(work_item->trigger);
516 notification_client_list_put(work_item->client_list);
517 lttng_trigger_notification_destroy(work_item->trigger_notification);
518 free(work_item);
519 }
520
521 static void *action_executor_thread(void *_data)
522 {
523 struct action_executor *executor = _data;
524
525 assert(executor);
526
527 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_ACTION_EXECUTOR);
528
529 rcu_register_thread();
530 rcu_thread_online();
531
532 DBG("Entering work execution loop");
533 pthread_mutex_lock(&executor->work.lock);
534 while (!executor->should_quit) {
535 int ret;
536 struct action_work_item *work_item;
537
538 health_code_update();
539 if (executor->work.pending_count == 0) {
540 health_poll_entry();
541 DBG("No work items enqueued, entering wait");
542 pthread_cond_wait(&executor->work.cond,
543 &executor->work.lock);
544 DBG("Woke-up from wait");
545 health_poll_exit();
546 continue;
547 }
548
549 /* Pop item from front of the list with work lock held. */
550 work_item = cds_list_first_entry(&executor->work.list,
551 struct action_work_item, list_node);
552 cds_list_del(&work_item->list_node);
553 executor->work.pending_count--;
554
555 /*
556 * Work can be performed without holding the work lock,
557 * allowing new items to be queued.
558 */
559 pthread_mutex_unlock(&executor->work.lock);
560 ret = action_work_item_execute(executor, work_item);
561 action_work_item_destroy(work_item);
562 if (ret) {
563 /* Fatal error. */
564 break;
565 }
566 health_code_update();
567 pthread_mutex_lock(&executor->work.lock);
568 }
569 pthread_mutex_unlock(&executor->work.lock);
570 DBG("Left work execution loop");
571
572 health_code_update();
573
574 rcu_thread_offline();
575 rcu_unregister_thread();
576 health_unregister(health_sessiond);
577
578 return NULL;
579 }
580
581 static bool shutdown_action_executor_thread(void *_data)
582 {
583 struct action_executor *executor = _data;
584
585 /* TODO. */
586 executor->should_quit = true;
587 pthread_cond_signal(&executor->work.cond);
588 return true;
589 }
590
591 static void clean_up_action_executor_thread(void *_data)
592 {
593 struct action_executor *executor = _data;
594
595 assert(cds_list_empty(&executor->work.list));
596
597 pthread_mutex_destroy(&executor->work.lock);
598 pthread_cond_destroy(&executor->work.cond);
599 free(executor);
600 }
601
602 struct action_executor *action_executor_create(
603 struct notification_thread_handle *handle)
604 {
605 struct action_executor *executor = zmalloc(sizeof(*executor));
606
607 if (!executor) {
608 goto end;
609 }
610
611 CDS_INIT_LIST_HEAD(&executor->work.list);
612 pthread_cond_init(&executor->work.cond, NULL);
613 pthread_mutex_init(&executor->work.lock, NULL);
614 executor->notification_thread_handle = handle;
615
616 executor->thread = lttng_thread_create(THREAD_NAME,
617 action_executor_thread, shutdown_action_executor_thread,
618 clean_up_action_executor_thread, executor);
619 end:
620 return executor;
621 }
622
623 void action_executor_destroy(struct action_executor *executor)
624 {
625 struct action_work_item *work_item, *tmp;
626
627 /* TODO Wait for work list to drain? */
628 lttng_thread_shutdown(executor->thread);
629 pthread_mutex_lock(&executor->work.lock);
630 if (executor->work.pending_count != 0) {
631 WARN("%" PRIu64
632 " trigger action%s still queued for execution and will be discarded",
633 executor->work.pending_count,
634 executor->work.pending_count == 1 ? " is" :
635 "s are");
636 }
637
638 cds_list_for_each_entry_safe (
639 work_item, tmp, &executor->work.list, list_node) {
640 WARN("Discarding action work item %" PRIu64
641 " associated to trigger \"%s\"",
642 work_item->id, get_trigger_name(work_item->trigger));
643 cds_list_del(&work_item->list_node);
644 action_work_item_destroy(work_item);
645 }
646 pthread_mutex_unlock(&executor->work.lock);
647 lttng_thread_put(executor->thread);
648 }
649
650 /* RCU read-lock must be held by the caller. */
651 enum action_executor_status action_executor_enqueue(
652 struct action_executor *executor,
653 struct lttng_trigger *trigger,
654 struct notification_client_list *client_list,
655 struct lttng_trigger_notification *trigger_notification)
656 {
657 enum action_executor_status executor_status = ACTION_EXECUTOR_STATUS_OK;
658 const uint64_t work_item_id = executor->next_work_item_id++;
659 struct action_work_item *work_item;
660 bool signal = false;
661
662 pthread_mutex_lock(&executor->work.lock);
663 /* Check for queue overflow. */
664 if (executor->work.pending_count >= MAX_QUEUED_WORK_COUNT) {
665 /* Most likely spammy, remove if it is the case. */
666 DBG("Refusing to enqueue action for trigger \"%s\" as work item %" PRIu64
667 " (overflow)",
668 get_trigger_name(trigger), work_item_id);
669 executor_status = ACTION_EXECUTOR_STATUS_OVERFLOW;
670 goto error_unlock;
671 }
672
673 work_item = zmalloc(sizeof(*work_item));
674 if (!work_item) {
675 PERROR("Failed to allocate action executor work item on behalf of trigger \"%s\"",
676 get_trigger_name(trigger));
677 executor_status = ACTION_EXECUTOR_STATUS_ERROR;
678 goto error_unlock;
679 }
680
681 lttng_trigger_get(trigger);
682 if (client_list) {
683 const bool reference_acquired =
684 notification_client_list_get(client_list);
685
686 assert(reference_acquired);
687 }
688
689 *work_item = (typeof(*work_item)){
690 .id = work_item_id,
691 .trigger = trigger,
692 .client_list = client_list,
693 .list_node = CDS_LIST_HEAD_INIT(work_item->list_node),
694 .trigger_notification = trigger_notification,
695 };
696 cds_list_add_tail(&work_item->list_node, &executor->work.list);
697 executor->work.pending_count++;
698 DBG("Enqueued action for trigger \"%s\" as work item %" PRIu64,
699 get_trigger_name(trigger), work_item_id);
700 signal = true;
701
702 /*
703 * Note:
704 * Ownership of the lttng_trigger_notification object passed to the work
705 * item object incidentally to the executor list.
706 * Caller is responsible for freeing in case of error.
707 */
708
709 error_unlock:
710 pthread_mutex_unlock(&executor->work.lock);
711 if (signal) {
712 pthread_cond_signal(&executor->work.cond);
713 }
714 return executor_status;
715 }
This page took 0.045391 seconds and 5 git commands to generate.