Rename sessiond-timer.[hc] to timer.[hc]
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
1 /*
2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * 2013 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <getopt.h>
22 #include <grp.h>
23 #include <limits.h>
24 #include <paths.h>
25 #include <pthread.h>
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <inttypes.h>
31 #include <sys/mman.h>
32 #include <sys/mount.h>
33 #include <sys/resource.h>
34 #include <sys/socket.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38 #include <urcu/uatomic.h>
39 #include <unistd.h>
40 #include <ctype.h>
41
42 #include <common/common.h>
43 #include <common/compat/socket.h>
44 #include <common/compat/getenv.h>
45 #include <common/defaults.h>
46 #include <common/kernel-consumer/kernel-consumer.h>
47 #include <common/futex.h>
48 #include <common/relayd/relayd.h>
49 #include <common/utils.h>
50 #include <common/daemonize.h>
51 #include <common/config/session-config.h>
52 #include <common/dynamic-buffer.h>
53 #include <lttng/userspace-probe-internal.h>
54 #include <lttng/event-internal.h>
55
56 #include "lttng-sessiond.h"
57 #include "buffer-registry.h"
58 #include "channel.h"
59 #include "cmd.h"
60 #include "consumer.h"
61 #include "context.h"
62 #include "event.h"
63 #include "kernel.h"
64 #include "kernel-consumer.h"
65 #include "modprobe.h"
66 #include "shm.h"
67 #include "ust-ctl.h"
68 #include "ust-consumer.h"
69 #include "utils.h"
70 #include "fd-limit.h"
71 #include "health-sessiond.h"
72 #include "testpoint.h"
73 #include "ust-thread.h"
74 #include "agent-thread.h"
75 #include "save.h"
76 #include "load-session-thread.h"
77 #include "notification-thread.h"
78 #include "notification-thread-commands.h"
79 #include "rotation-thread.h"
80 #include "lttng-syscall.h"
81 #include "agent.h"
82 #include "ht-cleanup.h"
83 #include "sessiond-config.h"
84 #include "timer.h"
85
86 static const char *help_msg =
87 #ifdef LTTNG_EMBED_HELP
88 #include <lttng-sessiond.8.h>
89 #else
90 NULL
91 #endif
92 ;
93
94 const char *progname;
95 static pid_t ppid; /* Parent PID for --sig-parent option */
96 static pid_t child_ppid; /* Internal parent PID use with daemonize. */
97 static int lockfile_fd = -1;
98
99 /* Set to 1 when a SIGUSR1 signal is received. */
100 static int recv_child_signal;
101
102 static struct lttng_kernel_tracer_version kernel_tracer_version;
103 static struct lttng_kernel_tracer_abi_version kernel_tracer_abi_version;
104
105 /*
106 * Consumer daemon specific control data. Every value not initialized here is
107 * set to 0 by the static definition.
108 */
109 static struct consumer_data kconsumer_data = {
110 .type = LTTNG_CONSUMER_KERNEL,
111 .err_sock = -1,
112 .cmd_sock = -1,
113 .channel_monitor_pipe = -1,
114 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
115 .lock = PTHREAD_MUTEX_INITIALIZER,
116 .cond = PTHREAD_COND_INITIALIZER,
117 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
118 };
119 static struct consumer_data ustconsumer64_data = {
120 .type = LTTNG_CONSUMER64_UST,
121 .err_sock = -1,
122 .cmd_sock = -1,
123 .channel_monitor_pipe = -1,
124 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
125 .lock = PTHREAD_MUTEX_INITIALIZER,
126 .cond = PTHREAD_COND_INITIALIZER,
127 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
128 };
129 static struct consumer_data ustconsumer32_data = {
130 .type = LTTNG_CONSUMER32_UST,
131 .err_sock = -1,
132 .cmd_sock = -1,
133 .channel_monitor_pipe = -1,
134 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
135 .lock = PTHREAD_MUTEX_INITIALIZER,
136 .cond = PTHREAD_COND_INITIALIZER,
137 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
138 };
139
140 /* Command line options */
141 static const struct option long_options[] = {
142 { "client-sock", required_argument, 0, 'c' },
143 { "apps-sock", required_argument, 0, 'a' },
144 { "kconsumerd-cmd-sock", required_argument, 0, '\0' },
145 { "kconsumerd-err-sock", required_argument, 0, '\0' },
146 { "ustconsumerd32-cmd-sock", required_argument, 0, '\0' },
147 { "ustconsumerd32-err-sock", required_argument, 0, '\0' },
148 { "ustconsumerd64-cmd-sock", required_argument, 0, '\0' },
149 { "ustconsumerd64-err-sock", required_argument, 0, '\0' },
150 { "consumerd32-path", required_argument, 0, '\0' },
151 { "consumerd32-libdir", required_argument, 0, '\0' },
152 { "consumerd64-path", required_argument, 0, '\0' },
153 { "consumerd64-libdir", required_argument, 0, '\0' },
154 { "daemonize", no_argument, 0, 'd' },
155 { "background", no_argument, 0, 'b' },
156 { "sig-parent", no_argument, 0, 'S' },
157 { "help", no_argument, 0, 'h' },
158 { "group", required_argument, 0, 'g' },
159 { "version", no_argument, 0, 'V' },
160 { "quiet", no_argument, 0, 'q' },
161 { "verbose", no_argument, 0, 'v' },
162 { "verbose-consumer", no_argument, 0, '\0' },
163 { "no-kernel", no_argument, 0, '\0' },
164 { "pidfile", required_argument, 0, 'p' },
165 { "agent-tcp-port", required_argument, 0, '\0' },
166 { "config", required_argument, 0, 'f' },
167 { "load", required_argument, 0, 'l' },
168 { "kmod-probes", required_argument, 0, '\0' },
169 { "extra-kmod-probes", required_argument, 0, '\0' },
170 { NULL, 0, 0, 0 }
171 };
172
173 struct sessiond_config config;
174
175 /* Command line options to ignore from configuration file */
176 static const char *config_ignore_options[] = { "help", "version", "config" };
177
178 /* Shared between threads */
179 static int dispatch_thread_exit;
180
181 /* Sockets and FDs */
182 static int client_sock = -1;
183 static int apps_sock = -1;
184 int kernel_tracer_fd = -1;
185 static int kernel_poll_pipe[2] = { -1, -1 };
186
187 /*
188 * Quit pipe for all threads. This permits a single cancellation point
189 * for all threads when receiving an event on the pipe.
190 */
191 static int thread_quit_pipe[2] = { -1, -1 };
192
193 /*
194 * This pipe is used to inform the thread managing application communication
195 * that a command is queued and ready to be processed.
196 */
197 static int apps_cmd_pipe[2] = { -1, -1 };
198
199 int apps_cmd_notify_pipe[2] = { -1, -1 };
200
201 /* Pthread, Mutexes and Semaphores */
202 static pthread_t apps_thread;
203 static pthread_t apps_notify_thread;
204 static pthread_t reg_apps_thread;
205 static pthread_t client_thread;
206 static pthread_t kernel_thread;
207 static pthread_t dispatch_thread;
208 static pthread_t health_thread;
209 static pthread_t ht_cleanup_thread;
210 static pthread_t agent_reg_thread;
211 static pthread_t load_session_thread;
212 static pthread_t notification_thread;
213 static pthread_t rotation_thread;
214 static pthread_t timer_thread;
215
216 /*
217 * UST registration command queue. This queue is tied with a futex and uses a N
218 * wakers / 1 waiter implemented and detailed in futex.c/.h
219 *
220 * The thread_registration_apps and thread_dispatch_ust_registration uses this
221 * queue along with the wait/wake scheme. The thread_manage_apps receives down
222 * the line new application socket and monitors it for any I/O error or clean
223 * close that triggers an unregistration of the application.
224 */
225 static struct ust_cmd_queue ust_cmd_queue;
226
227 /*
228 * Pointer initialized before thread creation.
229 *
230 * This points to the tracing session list containing the session count and a
231 * mutex lock. The lock MUST be taken if you iterate over the list. The lock
232 * MUST NOT be taken if you call a public function in session.c.
233 *
234 * The lock is nested inside the structure: session_list_ptr->lock. Please use
235 * session_lock_list and session_unlock_list for lock acquisition.
236 */
237 static struct ltt_session_list *session_list_ptr;
238
239 int ust_consumerd64_fd = -1;
240 int ust_consumerd32_fd = -1;
241
242 static const char *module_proc_lttng = "/proc/lttng";
243
244 /*
245 * Consumer daemon state which is changed when spawning it, killing it or in
246 * case of a fatal error.
247 */
248 enum consumerd_state {
249 CONSUMER_STARTED = 1,
250 CONSUMER_STOPPED = 2,
251 CONSUMER_ERROR = 3,
252 };
253
254 /*
255 * This consumer daemon state is used to validate if a client command will be
256 * able to reach the consumer. If not, the client is informed. For instance,
257 * doing a "lttng start" when the consumer state is set to ERROR will return an
258 * error to the client.
259 *
260 * The following example shows a possible race condition of this scheme:
261 *
262 * consumer thread error happens
263 * client cmd arrives
264 * client cmd checks state -> still OK
265 * consumer thread exit, sets error
266 * client cmd try to talk to consumer
267 * ...
268 *
269 * However, since the consumer is a different daemon, we have no way of making
270 * sure the command will reach it safely even with this state flag. This is why
271 * we consider that up to the state validation during command processing, the
272 * command is safe. After that, we can not guarantee the correctness of the
273 * client request vis-a-vis the consumer.
274 */
275 static enum consumerd_state ust_consumerd_state;
276 static enum consumerd_state kernel_consumerd_state;
277
278 /* Set in main() with the current page size. */
279 long page_size;
280
281 /* Application health monitoring */
282 struct health_app *health_sessiond;
283
284 /* Am I root or not. */
285 int is_root; /* Set to 1 if the daemon is running as root */
286
287 const char * const config_section_name = "sessiond";
288
289 /* Load session thread information to operate. */
290 struct load_session_thread_data *load_info;
291
292 /* Notification thread handle. */
293 struct notification_thread_handle *notification_thread_handle;
294
295 /* Rotation thread handle. */
296 struct rotation_thread_handle *rotation_thread_handle;
297
298 /* Global hash tables */
299 struct lttng_ht *agent_apps_ht_by_sock = NULL;
300
301 /*
302 * The initialization of the session daemon is done in multiple phases.
303 *
304 * While all threads are launched near-simultaneously, only some of them
305 * are needed to ensure the session daemon can start to respond to client
306 * requests.
307 *
308 * There are two important guarantees that we wish to offer with respect
309 * to the initialisation of the session daemon:
310 * - When the daemonize/background launcher process exits, the sessiond
311 * is fully able to respond to client requests,
312 * - Auto-loaded sessions are visible to clients.
313 *
314 * In order to achieve this, a number of support threads have to be launched
315 * to allow the "client" thread to function properly. Moreover, since the
316 * "load session" thread needs the client thread, we must provide a way
317 * for the "load session" thread to know that the "client" thread is up
318 * and running.
319 *
320 * Hence, the support threads decrement the lttng_sessiond_ready counter
321 * while the "client" threads waits for it to reach 0. Once the "client" thread
322 * unblocks, it posts the message_thread_ready semaphore which allows the
323 * "load session" thread to progress.
324 *
325 * This implies that the "load session" thread is the last to be initialized
326 * and will explicitly call sessiond_signal_parents(), which signals the parents
327 * that the session daemon is fully initialized.
328 *
329 * The four (4) support threads are:
330 * - agent_thread
331 * - notification_thread
332 * - rotation_thread
333 * - health_thread
334 */
335 #define NR_LTTNG_SESSIOND_SUPPORT_THREADS 4
336 int lttng_sessiond_ready = NR_LTTNG_SESSIOND_SUPPORT_THREADS;
337
338 int sessiond_check_thread_quit_pipe(int fd, uint32_t events)
339 {
340 return (fd == thread_quit_pipe[0] && (events & LPOLLIN)) ? 1 : 0;
341 }
342
343 /* Notify parents that we are ready for cmd and health check */
344 LTTNG_HIDDEN
345 void sessiond_signal_parents(void)
346 {
347 /*
348 * Notify parent pid that we are ready to accept command
349 * for client side. This ppid is the one from the
350 * external process that spawned us.
351 */
352 if (config.sig_parent) {
353 kill(ppid, SIGUSR1);
354 }
355
356 /*
357 * Notify the parent of the fork() process that we are
358 * ready.
359 */
360 if (config.daemonize || config.background) {
361 kill(child_ppid, SIGUSR1);
362 }
363 }
364
365 LTTNG_HIDDEN
366 void sessiond_notify_ready(void)
367 {
368 /*
369 * This memory barrier is paired with the one performed by
370 * the client thread after it has seen that 'lttng_sessiond_ready' is 0.
371 *
372 * The purpose of these memory barriers is to ensure that all
373 * initialization operations of the various threads that call this
374 * function to signal that they are ready are commited/published
375 * before the client thread can see the 'lttng_sessiond_ready' counter
376 * reach 0.
377 *
378 * Note that this could be a 'write' memory barrier, but a full barrier
379 * is used in case the code using this utility changes. The performance
380 * implications of this choice are minimal since this is a slow path.
381 */
382 cmm_smp_mb();
383 uatomic_sub(&lttng_sessiond_ready, 1);
384 }
385
386 static
387 int __sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size,
388 int *a_pipe)
389 {
390 int ret;
391
392 assert(events);
393
394 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
395 if (ret < 0) {
396 goto error;
397 }
398
399 /* Add quit pipe */
400 ret = lttng_poll_add(events, a_pipe[0], LPOLLIN | LPOLLERR);
401 if (ret < 0) {
402 goto error;
403 }
404
405 return 0;
406
407 error:
408 return ret;
409 }
410
411 /*
412 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
413 */
414 int sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size)
415 {
416 return __sessiond_set_thread_pollset(events, size, thread_quit_pipe);
417 }
418
419 /*
420 * Init thread quit pipe.
421 *
422 * Return -1 on error or 0 if all pipes are created.
423 */
424 static int __init_thread_quit_pipe(int *a_pipe)
425 {
426 int ret, i;
427
428 ret = pipe(a_pipe);
429 if (ret < 0) {
430 PERROR("thread quit pipe");
431 goto error;
432 }
433
434 for (i = 0; i < 2; i++) {
435 ret = fcntl(a_pipe[i], F_SETFD, FD_CLOEXEC);
436 if (ret < 0) {
437 PERROR("fcntl");
438 goto error;
439 }
440 }
441
442 error:
443 return ret;
444 }
445
446 static int init_thread_quit_pipe(void)
447 {
448 return __init_thread_quit_pipe(thread_quit_pipe);
449 }
450
451 /*
452 * Stop all threads by closing the thread quit pipe.
453 */
454 static void stop_threads(void)
455 {
456 int ret;
457
458 /* Stopping all threads */
459 DBG("Terminating all threads");
460 ret = notify_thread_pipe(thread_quit_pipe[1]);
461 if (ret < 0) {
462 ERR("write error on thread quit pipe");
463 }
464
465 /* Dispatch thread */
466 CMM_STORE_SHARED(dispatch_thread_exit, 1);
467 futex_nto1_wake(&ust_cmd_queue.futex);
468 }
469
470 /*
471 * Close every consumer sockets.
472 */
473 static void close_consumer_sockets(void)
474 {
475 int ret;
476
477 if (kconsumer_data.err_sock >= 0) {
478 ret = close(kconsumer_data.err_sock);
479 if (ret < 0) {
480 PERROR("kernel consumer err_sock close");
481 }
482 }
483 if (ustconsumer32_data.err_sock >= 0) {
484 ret = close(ustconsumer32_data.err_sock);
485 if (ret < 0) {
486 PERROR("UST consumerd32 err_sock close");
487 }
488 }
489 if (ustconsumer64_data.err_sock >= 0) {
490 ret = close(ustconsumer64_data.err_sock);
491 if (ret < 0) {
492 PERROR("UST consumerd64 err_sock close");
493 }
494 }
495 if (kconsumer_data.cmd_sock >= 0) {
496 ret = close(kconsumer_data.cmd_sock);
497 if (ret < 0) {
498 PERROR("kernel consumer cmd_sock close");
499 }
500 }
501 if (ustconsumer32_data.cmd_sock >= 0) {
502 ret = close(ustconsumer32_data.cmd_sock);
503 if (ret < 0) {
504 PERROR("UST consumerd32 cmd_sock close");
505 }
506 }
507 if (ustconsumer64_data.cmd_sock >= 0) {
508 ret = close(ustconsumer64_data.cmd_sock);
509 if (ret < 0) {
510 PERROR("UST consumerd64 cmd_sock close");
511 }
512 }
513 if (kconsumer_data.channel_monitor_pipe >= 0) {
514 ret = close(kconsumer_data.channel_monitor_pipe);
515 if (ret < 0) {
516 PERROR("kernel consumer channel monitor pipe close");
517 }
518 }
519 if (ustconsumer32_data.channel_monitor_pipe >= 0) {
520 ret = close(ustconsumer32_data.channel_monitor_pipe);
521 if (ret < 0) {
522 PERROR("UST consumerd32 channel monitor pipe close");
523 }
524 }
525 if (ustconsumer64_data.channel_monitor_pipe >= 0) {
526 ret = close(ustconsumer64_data.channel_monitor_pipe);
527 if (ret < 0) {
528 PERROR("UST consumerd64 channel monitor pipe close");
529 }
530 }
531 }
532
533 /*
534 * Wait on consumer process termination.
535 *
536 * Need to be called with the consumer data lock held or from a context
537 * ensuring no concurrent access to data (e.g: cleanup).
538 */
539 static void wait_consumer(struct consumer_data *consumer_data)
540 {
541 pid_t ret;
542 int status;
543
544 if (consumer_data->pid <= 0) {
545 return;
546 }
547
548 DBG("Waiting for complete teardown of consumerd (PID: %d)",
549 consumer_data->pid);
550 ret = waitpid(consumer_data->pid, &status, 0);
551 if (ret == -1) {
552 PERROR("consumerd waitpid pid: %d", consumer_data->pid)
553 } else if (!WIFEXITED(status)) {
554 ERR("consumerd termination with error: %d",
555 WEXITSTATUS(ret));
556 }
557 consumer_data->pid = 0;
558 }
559
560 /*
561 * Cleanup the session daemon's data structures.
562 */
563 static void sessiond_cleanup(void)
564 {
565 int ret;
566 struct ltt_session *sess, *stmp;
567
568 DBG("Cleanup sessiond");
569
570 /*
571 * Close the thread quit pipe. It has already done its job,
572 * since we are now called.
573 */
574 utils_close_pipe(thread_quit_pipe);
575
576 ret = remove(config.pid_file_path.value);
577 if (ret < 0) {
578 PERROR("remove pidfile %s", config.pid_file_path.value);
579 }
580
581 DBG("Removing sessiond and consumerd content of directory %s",
582 config.rundir.value);
583
584 /* sessiond */
585 DBG("Removing %s", config.pid_file_path.value);
586 (void) unlink(config.pid_file_path.value);
587
588 DBG("Removing %s", config.agent_port_file_path.value);
589 (void) unlink(config.agent_port_file_path.value);
590
591 /* kconsumerd */
592 DBG("Removing %s", kconsumer_data.err_unix_sock_path);
593 (void) unlink(kconsumer_data.err_unix_sock_path);
594
595 DBG("Removing directory %s", config.kconsumerd_path.value);
596 (void) rmdir(config.kconsumerd_path.value);
597
598 /* ust consumerd 32 */
599 DBG("Removing %s", config.consumerd32_err_unix_sock_path.value);
600 (void) unlink(config.consumerd32_err_unix_sock_path.value);
601
602 DBG("Removing directory %s", config.consumerd32_path.value);
603 (void) rmdir(config.consumerd32_path.value);
604
605 /* ust consumerd 64 */
606 DBG("Removing %s", config.consumerd64_err_unix_sock_path.value);
607 (void) unlink(config.consumerd64_err_unix_sock_path.value);
608
609 DBG("Removing directory %s", config.consumerd64_path.value);
610 (void) rmdir(config.consumerd64_path.value);
611
612 DBG("Cleaning up all sessions");
613
614 /* Destroy session list mutex */
615 if (session_list_ptr != NULL) {
616 pthread_mutex_destroy(&session_list_ptr->lock);
617
618 /* Cleanup ALL session */
619 cds_list_for_each_entry_safe(sess, stmp,
620 &session_list_ptr->head, list) {
621 cmd_destroy_session(sess, kernel_poll_pipe[1],
622 notification_thread_handle);
623 }
624 }
625
626 wait_consumer(&kconsumer_data);
627 wait_consumer(&ustconsumer64_data);
628 wait_consumer(&ustconsumer32_data);
629
630 DBG("Cleaning up all agent apps");
631 agent_app_ht_clean();
632
633 DBG("Closing all UST sockets");
634 ust_app_clean_list();
635 buffer_reg_destroy_registries();
636
637 if (is_root && !config.no_kernel) {
638 DBG2("Closing kernel fd");
639 if (kernel_tracer_fd >= 0) {
640 ret = close(kernel_tracer_fd);
641 if (ret) {
642 PERROR("close");
643 }
644 }
645 DBG("Unloading kernel modules");
646 modprobe_remove_lttng_all();
647 free(syscall_table);
648 }
649
650 close_consumer_sockets();
651
652 if (load_info) {
653 load_session_destroy_data(load_info);
654 free(load_info);
655 }
656
657 /*
658 * We do NOT rmdir rundir because there are other processes
659 * using it, for instance lttng-relayd, which can start in
660 * parallel with this teardown.
661 */
662 }
663
664 /*
665 * Cleanup the daemon's option data structures.
666 */
667 static void sessiond_cleanup_options(void)
668 {
669 DBG("Cleaning up options");
670
671 sessiond_config_fini(&config);
672
673 run_as_destroy_worker();
674 }
675
676 /*
677 * Send data on a unix socket using the liblttsessiondcomm API.
678 *
679 * Return lttcomm error code.
680 */
681 static int send_unix_sock(int sock, void *buf, size_t len)
682 {
683 /* Check valid length */
684 if (len == 0) {
685 return -1;
686 }
687
688 return lttcomm_send_unix_sock(sock, buf, len);
689 }
690
691 /*
692 * Free memory of a command context structure.
693 */
694 static void clean_command_ctx(struct command_ctx **cmd_ctx)
695 {
696 DBG("Clean command context structure");
697 if (*cmd_ctx) {
698 if ((*cmd_ctx)->llm) {
699 free((*cmd_ctx)->llm);
700 }
701 if ((*cmd_ctx)->lsm) {
702 free((*cmd_ctx)->lsm);
703 }
704 free(*cmd_ctx);
705 *cmd_ctx = NULL;
706 }
707 }
708
709 /*
710 * Notify UST applications using the shm mmap futex.
711 */
712 static int notify_ust_apps(int active)
713 {
714 char *wait_shm_mmap;
715
716 DBG("Notifying applications of session daemon state: %d", active);
717
718 /* See shm.c for this call implying mmap, shm and futex calls */
719 wait_shm_mmap = shm_ust_get_mmap(config.wait_shm_path.value, is_root);
720 if (wait_shm_mmap == NULL) {
721 goto error;
722 }
723
724 /* Wake waiting process */
725 futex_wait_update((int32_t *) wait_shm_mmap, active);
726
727 /* Apps notified successfully */
728 return 0;
729
730 error:
731 return -1;
732 }
733
734 /*
735 * Setup the outgoing data buffer for the response (llm) by allocating the
736 * right amount of memory and copying the original information from the lsm
737 * structure.
738 *
739 * Return 0 on success, negative value on error.
740 */
741 static int setup_lttng_msg(struct command_ctx *cmd_ctx,
742 const void *payload_buf, size_t payload_len,
743 const void *cmd_header_buf, size_t cmd_header_len)
744 {
745 int ret = 0;
746 const size_t header_len = sizeof(struct lttcomm_lttng_msg);
747 const size_t cmd_header_offset = header_len;
748 const size_t payload_offset = cmd_header_offset + cmd_header_len;
749 const size_t total_msg_size = header_len + cmd_header_len + payload_len;
750
751 cmd_ctx->llm = zmalloc(total_msg_size);
752
753 if (cmd_ctx->llm == NULL) {
754 PERROR("zmalloc");
755 ret = -ENOMEM;
756 goto end;
757 }
758
759 /* Copy common data */
760 cmd_ctx->llm->cmd_type = cmd_ctx->lsm->cmd_type;
761 cmd_ctx->llm->pid = cmd_ctx->lsm->domain.attr.pid;
762 cmd_ctx->llm->cmd_header_size = cmd_header_len;
763 cmd_ctx->llm->data_size = payload_len;
764 cmd_ctx->lttng_msg_size = total_msg_size;
765
766 /* Copy command header */
767 if (cmd_header_len) {
768 memcpy(((uint8_t *) cmd_ctx->llm) + cmd_header_offset, cmd_header_buf,
769 cmd_header_len);
770 }
771
772 /* Copy payload */
773 if (payload_len) {
774 memcpy(((uint8_t *) cmd_ctx->llm) + payload_offset, payload_buf,
775 payload_len);
776 }
777
778 end:
779 return ret;
780 }
781
782 /*
783 * Version of setup_lttng_msg() without command header.
784 */
785 static int setup_lttng_msg_no_cmd_header(struct command_ctx *cmd_ctx,
786 void *payload_buf, size_t payload_len)
787 {
788 return setup_lttng_msg(cmd_ctx, payload_buf, payload_len, NULL, 0);
789 }
790 /*
791 * Update the kernel poll set of all channel fd available over all tracing
792 * session. Add the wakeup pipe at the end of the set.
793 */
794 static int update_kernel_poll(struct lttng_poll_event *events)
795 {
796 int ret;
797 struct ltt_session *session;
798 struct ltt_kernel_channel *channel;
799
800 DBG("Updating kernel poll set");
801
802 session_lock_list();
803 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
804 session_lock(session);
805 if (session->kernel_session == NULL) {
806 session_unlock(session);
807 continue;
808 }
809
810 cds_list_for_each_entry(channel,
811 &session->kernel_session->channel_list.head, list) {
812 /* Add channel fd to the kernel poll set */
813 ret = lttng_poll_add(events, channel->fd, LPOLLIN | LPOLLRDNORM);
814 if (ret < 0) {
815 session_unlock(session);
816 goto error;
817 }
818 DBG("Channel fd %d added to kernel set", channel->fd);
819 }
820 session_unlock(session);
821 }
822 session_unlock_list();
823
824 return 0;
825
826 error:
827 session_unlock_list();
828 return -1;
829 }
830
831 /*
832 * Find the channel fd from 'fd' over all tracing session. When found, check
833 * for new channel stream and send those stream fds to the kernel consumer.
834 *
835 * Useful for CPU hotplug feature.
836 */
837 static int update_kernel_stream(int fd)
838 {
839 int ret = 0;
840 struct ltt_session *session;
841 struct ltt_kernel_session *ksess;
842 struct ltt_kernel_channel *channel;
843
844 DBG("Updating kernel streams for channel fd %d", fd);
845
846 session_lock_list();
847 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
848 session_lock(session);
849 if (session->kernel_session == NULL) {
850 session_unlock(session);
851 continue;
852 }
853 ksess = session->kernel_session;
854
855 cds_list_for_each_entry(channel,
856 &ksess->channel_list.head, list) {
857 struct lttng_ht_iter iter;
858 struct consumer_socket *socket;
859
860 if (channel->fd != fd) {
861 continue;
862 }
863 DBG("Channel found, updating kernel streams");
864 ret = kernel_open_channel_stream(channel);
865 if (ret < 0) {
866 goto error;
867 }
868 /* Update the stream global counter */
869 ksess->stream_count_global += ret;
870
871 /*
872 * Have we already sent fds to the consumer? If yes, it
873 * means that tracing is started so it is safe to send
874 * our updated stream fds.
875 */
876 if (ksess->consumer_fds_sent != 1
877 || ksess->consumer == NULL) {
878 ret = -1;
879 goto error;
880 }
881
882 rcu_read_lock();
883 cds_lfht_for_each_entry(ksess->consumer->socks->ht,
884 &iter.iter, socket, node.node) {
885 pthread_mutex_lock(socket->lock);
886 ret = kernel_consumer_send_channel_streams(socket,
887 channel, ksess,
888 session->output_traces ? 1 : 0);
889 pthread_mutex_unlock(socket->lock);
890 if (ret < 0) {
891 rcu_read_unlock();
892 goto error;
893 }
894 }
895 rcu_read_unlock();
896 }
897 session_unlock(session);
898 }
899 session_unlock_list();
900 return ret;
901
902 error:
903 session_unlock(session);
904 session_unlock_list();
905 return ret;
906 }
907
908 /*
909 * For each tracing session, update newly registered apps. The session list
910 * lock MUST be acquired before calling this.
911 */
912 static void update_ust_app(int app_sock)
913 {
914 struct ltt_session *sess, *stmp;
915
916 /* Consumer is in an ERROR state. Stop any application update. */
917 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
918 /* Stop the update process since the consumer is dead. */
919 return;
920 }
921
922 /* For all tracing session(s) */
923 cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) {
924 struct ust_app *app;
925
926 session_lock(sess);
927 if (!sess->ust_session) {
928 goto unlock_session;
929 }
930
931 rcu_read_lock();
932 assert(app_sock >= 0);
933 app = ust_app_find_by_sock(app_sock);
934 if (app == NULL) {
935 /*
936 * Application can be unregistered before so
937 * this is possible hence simply stopping the
938 * update.
939 */
940 DBG3("UST app update failed to find app sock %d",
941 app_sock);
942 goto unlock_rcu;
943 }
944 ust_app_global_update(sess->ust_session, app);
945 unlock_rcu:
946 rcu_read_unlock();
947 unlock_session:
948 session_unlock(sess);
949 }
950 }
951
952 /*
953 * This thread manage event coming from the kernel.
954 *
955 * Features supported in this thread:
956 * -) CPU Hotplug
957 */
958 static void *thread_manage_kernel(void *data)
959 {
960 int ret, i, pollfd, update_poll_flag = 1, err = -1;
961 uint32_t revents, nb_fd;
962 char tmp;
963 struct lttng_poll_event events;
964
965 DBG("[thread] Thread manage kernel started");
966
967 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_KERNEL);
968
969 /*
970 * This first step of the while is to clean this structure which could free
971 * non NULL pointers so initialize it before the loop.
972 */
973 lttng_poll_init(&events);
974
975 if (testpoint(sessiond_thread_manage_kernel)) {
976 goto error_testpoint;
977 }
978
979 health_code_update();
980
981 if (testpoint(sessiond_thread_manage_kernel_before_loop)) {
982 goto error_testpoint;
983 }
984
985 while (1) {
986 health_code_update();
987
988 if (update_poll_flag == 1) {
989 /* Clean events object. We are about to populate it again. */
990 lttng_poll_clean(&events);
991
992 ret = sessiond_set_thread_pollset(&events, 2);
993 if (ret < 0) {
994 goto error_poll_create;
995 }
996
997 ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
998 if (ret < 0) {
999 goto error;
1000 }
1001
1002 /* This will add the available kernel channel if any. */
1003 ret = update_kernel_poll(&events);
1004 if (ret < 0) {
1005 goto error;
1006 }
1007 update_poll_flag = 0;
1008 }
1009
1010 DBG("Thread kernel polling");
1011
1012 /* Poll infinite value of time */
1013 restart:
1014 health_poll_entry();
1015 ret = lttng_poll_wait(&events, -1);
1016 DBG("Thread kernel return from poll on %d fds",
1017 LTTNG_POLL_GETNB(&events));
1018 health_poll_exit();
1019 if (ret < 0) {
1020 /*
1021 * Restart interrupted system call.
1022 */
1023 if (errno == EINTR) {
1024 goto restart;
1025 }
1026 goto error;
1027 } else if (ret == 0) {
1028 /* Should not happen since timeout is infinite */
1029 ERR("Return value of poll is 0 with an infinite timeout.\n"
1030 "This should not have happened! Continuing...");
1031 continue;
1032 }
1033
1034 nb_fd = ret;
1035
1036 for (i = 0; i < nb_fd; i++) {
1037 /* Fetch once the poll data */
1038 revents = LTTNG_POLL_GETEV(&events, i);
1039 pollfd = LTTNG_POLL_GETFD(&events, i);
1040
1041 health_code_update();
1042
1043 if (!revents) {
1044 /* No activity for this FD (poll implementation). */
1045 continue;
1046 }
1047
1048 /* Thread quit pipe has been closed. Killing thread. */
1049 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1050 if (ret) {
1051 err = 0;
1052 goto exit;
1053 }
1054
1055 /* Check for data on kernel pipe */
1056 if (revents & LPOLLIN) {
1057 if (pollfd == kernel_poll_pipe[0]) {
1058 (void) lttng_read(kernel_poll_pipe[0],
1059 &tmp, 1);
1060 /*
1061 * Ret value is useless here, if this pipe gets any actions an
1062 * update is required anyway.
1063 */
1064 update_poll_flag = 1;
1065 continue;
1066 } else {
1067 /*
1068 * New CPU detected by the kernel. Adding kernel stream to
1069 * kernel session and updating the kernel consumer
1070 */
1071 ret = update_kernel_stream(pollfd);
1072 if (ret < 0) {
1073 continue;
1074 }
1075 break;
1076 }
1077 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1078 update_poll_flag = 1;
1079 continue;
1080 } else {
1081 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1082 goto error;
1083 }
1084 }
1085 }
1086
1087 exit:
1088 error:
1089 lttng_poll_clean(&events);
1090 error_poll_create:
1091 error_testpoint:
1092 utils_close_pipe(kernel_poll_pipe);
1093 kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
1094 if (err) {
1095 health_error();
1096 ERR("Health error occurred in %s", __func__);
1097 WARN("Kernel thread died unexpectedly. "
1098 "Kernel tracing can continue but CPU hotplug is disabled.");
1099 }
1100 health_unregister(health_sessiond);
1101 DBG("Kernel thread dying");
1102 return NULL;
1103 }
1104
1105 /*
1106 * Signal pthread condition of the consumer data that the thread.
1107 */
1108 static void signal_consumer_condition(struct consumer_data *data, int state)
1109 {
1110 pthread_mutex_lock(&data->cond_mutex);
1111
1112 /*
1113 * The state is set before signaling. It can be any value, it's the waiter
1114 * job to correctly interpret this condition variable associated to the
1115 * consumer pthread_cond.
1116 *
1117 * A value of 0 means that the corresponding thread of the consumer data
1118 * was not started. 1 indicates that the thread has started and is ready
1119 * for action. A negative value means that there was an error during the
1120 * thread bootstrap.
1121 */
1122 data->consumer_thread_is_ready = state;
1123 (void) pthread_cond_signal(&data->cond);
1124
1125 pthread_mutex_unlock(&data->cond_mutex);
1126 }
1127
1128 /*
1129 * This thread manage the consumer error sent back to the session daemon.
1130 */
1131 static void *thread_manage_consumer(void *data)
1132 {
1133 int sock = -1, i, ret, pollfd, err = -1, should_quit = 0;
1134 uint32_t revents, nb_fd;
1135 enum lttcomm_return_code code;
1136 struct lttng_poll_event events;
1137 struct consumer_data *consumer_data = data;
1138 struct consumer_socket *cmd_socket_wrapper = NULL;
1139
1140 DBG("[thread] Manage consumer started");
1141
1142 rcu_register_thread();
1143 rcu_thread_online();
1144
1145 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER);
1146
1147 health_code_update();
1148
1149 /*
1150 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
1151 * metadata_sock. Nothing more will be added to this poll set.
1152 */
1153 ret = sessiond_set_thread_pollset(&events, 3);
1154 if (ret < 0) {
1155 goto error_poll;
1156 }
1157
1158 /*
1159 * The error socket here is already in a listening state which was done
1160 * just before spawning this thread to avoid a race between the consumer
1161 * daemon exec trying to connect and the listen() call.
1162 */
1163 ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
1164 if (ret < 0) {
1165 goto error;
1166 }
1167
1168 health_code_update();
1169
1170 /* Infinite blocking call, waiting for transmission */
1171 restart:
1172 health_poll_entry();
1173
1174 if (testpoint(sessiond_thread_manage_consumer)) {
1175 goto error;
1176 }
1177
1178 ret = lttng_poll_wait(&events, -1);
1179 health_poll_exit();
1180 if (ret < 0) {
1181 /*
1182 * Restart interrupted system call.
1183 */
1184 if (errno == EINTR) {
1185 goto restart;
1186 }
1187 goto error;
1188 }
1189
1190 nb_fd = ret;
1191
1192 for (i = 0; i < nb_fd; i++) {
1193 /* Fetch once the poll data */
1194 revents = LTTNG_POLL_GETEV(&events, i);
1195 pollfd = LTTNG_POLL_GETFD(&events, i);
1196
1197 health_code_update();
1198
1199 if (!revents) {
1200 /* No activity for this FD (poll implementation). */
1201 continue;
1202 }
1203
1204 /* Thread quit pipe has been closed. Killing thread. */
1205 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1206 if (ret) {
1207 err = 0;
1208 goto exit;
1209 }
1210
1211 /* Event on the registration socket */
1212 if (pollfd == consumer_data->err_sock) {
1213 if (revents & LPOLLIN) {
1214 continue;
1215 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1216 ERR("consumer err socket poll error");
1217 goto error;
1218 } else {
1219 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1220 goto error;
1221 }
1222 }
1223 }
1224
1225 sock = lttcomm_accept_unix_sock(consumer_data->err_sock);
1226 if (sock < 0) {
1227 goto error;
1228 }
1229
1230 /*
1231 * Set the CLOEXEC flag. Return code is useless because either way, the
1232 * show must go on.
1233 */
1234 (void) utils_set_fd_cloexec(sock);
1235
1236 health_code_update();
1237
1238 DBG2("Receiving code from consumer err_sock");
1239
1240 /* Getting status code from kconsumerd */
1241 ret = lttcomm_recv_unix_sock(sock, &code,
1242 sizeof(enum lttcomm_return_code));
1243 if (ret <= 0) {
1244 goto error;
1245 }
1246
1247 health_code_update();
1248 if (code != LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
1249 ERR("consumer error when waiting for SOCK_READY : %s",
1250 lttcomm_get_readable_code(-code));
1251 goto error;
1252 }
1253
1254 /* Connect both command and metadata sockets. */
1255 consumer_data->cmd_sock =
1256 lttcomm_connect_unix_sock(
1257 consumer_data->cmd_unix_sock_path);
1258 consumer_data->metadata_fd =
1259 lttcomm_connect_unix_sock(
1260 consumer_data->cmd_unix_sock_path);
1261 if (consumer_data->cmd_sock < 0 || consumer_data->metadata_fd < 0) {
1262 PERROR("consumer connect cmd socket");
1263 /* On error, signal condition and quit. */
1264 signal_consumer_condition(consumer_data, -1);
1265 goto error;
1266 }
1267
1268 consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd;
1269
1270 /* Create metadata socket lock. */
1271 consumer_data->metadata_sock.lock = zmalloc(sizeof(pthread_mutex_t));
1272 if (consumer_data->metadata_sock.lock == NULL) {
1273 PERROR("zmalloc pthread mutex");
1274 goto error;
1275 }
1276 pthread_mutex_init(consumer_data->metadata_sock.lock, NULL);
1277
1278 DBG("Consumer command socket ready (fd: %d", consumer_data->cmd_sock);
1279 DBG("Consumer metadata socket ready (fd: %d)",
1280 consumer_data->metadata_fd);
1281
1282 /*
1283 * Remove the consumerd error sock since we've established a connection.
1284 */
1285 ret = lttng_poll_del(&events, consumer_data->err_sock);
1286 if (ret < 0) {
1287 goto error;
1288 }
1289
1290 /* Add new accepted error socket. */
1291 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLRDHUP);
1292 if (ret < 0) {
1293 goto error;
1294 }
1295
1296 /* Add metadata socket that is successfully connected. */
1297 ret = lttng_poll_add(&events, consumer_data->metadata_fd,
1298 LPOLLIN | LPOLLRDHUP);
1299 if (ret < 0) {
1300 goto error;
1301 }
1302
1303 health_code_update();
1304
1305 /*
1306 * Transfer the write-end of the channel monitoring and rotate pipe
1307 * to the consumer by issuing a SET_CHANNEL_MONITOR_PIPE command.
1308 */
1309 cmd_socket_wrapper = consumer_allocate_socket(&consumer_data->cmd_sock);
1310 if (!cmd_socket_wrapper) {
1311 goto error;
1312 }
1313 cmd_socket_wrapper->lock = &consumer_data->lock;
1314
1315 ret = consumer_send_channel_monitor_pipe(cmd_socket_wrapper,
1316 consumer_data->channel_monitor_pipe);
1317 if (ret) {
1318 goto error;
1319 }
1320
1321 /* Discard the socket wrapper as it is no longer needed. */
1322 consumer_destroy_socket(cmd_socket_wrapper);
1323 cmd_socket_wrapper = NULL;
1324
1325 /* The thread is completely initialized, signal that it is ready. */
1326 signal_consumer_condition(consumer_data, 1);
1327
1328 /* Infinite blocking call, waiting for transmission */
1329 restart_poll:
1330 while (1) {
1331 health_code_update();
1332
1333 /* Exit the thread because the thread quit pipe has been triggered. */
1334 if (should_quit) {
1335 /* Not a health error. */
1336 err = 0;
1337 goto exit;
1338 }
1339
1340 health_poll_entry();
1341 ret = lttng_poll_wait(&events, -1);
1342 health_poll_exit();
1343 if (ret < 0) {
1344 /*
1345 * Restart interrupted system call.
1346 */
1347 if (errno == EINTR) {
1348 goto restart_poll;
1349 }
1350 goto error;
1351 }
1352
1353 nb_fd = ret;
1354
1355 for (i = 0; i < nb_fd; i++) {
1356 /* Fetch once the poll data */
1357 revents = LTTNG_POLL_GETEV(&events, i);
1358 pollfd = LTTNG_POLL_GETFD(&events, i);
1359
1360 health_code_update();
1361
1362 if (!revents) {
1363 /* No activity for this FD (poll implementation). */
1364 continue;
1365 }
1366
1367 /*
1368 * Thread quit pipe has been triggered, flag that we should stop
1369 * but continue the current loop to handle potential data from
1370 * consumer.
1371 */
1372 should_quit = sessiond_check_thread_quit_pipe(pollfd, revents);
1373
1374 if (pollfd == sock) {
1375 /* Event on the consumerd socket */
1376 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
1377 && !(revents & LPOLLIN)) {
1378 ERR("consumer err socket second poll error");
1379 goto error;
1380 }
1381 health_code_update();
1382 /* Wait for any kconsumerd error */
1383 ret = lttcomm_recv_unix_sock(sock, &code,
1384 sizeof(enum lttcomm_return_code));
1385 if (ret <= 0) {
1386 ERR("consumer closed the command socket");
1387 goto error;
1388 }
1389
1390 ERR("consumer return code : %s",
1391 lttcomm_get_readable_code(-code));
1392
1393 goto exit;
1394 } else if (pollfd == consumer_data->metadata_fd) {
1395 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
1396 && !(revents & LPOLLIN)) {
1397 ERR("consumer err metadata socket second poll error");
1398 goto error;
1399 }
1400 /* UST metadata requests */
1401 ret = ust_consumer_metadata_request(
1402 &consumer_data->metadata_sock);
1403 if (ret < 0) {
1404 ERR("Handling metadata request");
1405 goto error;
1406 }
1407 }
1408 /* No need for an else branch all FDs are tested prior. */
1409 }
1410 health_code_update();
1411 }
1412
1413 exit:
1414 error:
1415 /*
1416 * We lock here because we are about to close the sockets and some other
1417 * thread might be using them so get exclusive access which will abort all
1418 * other consumer command by other threads.
1419 */
1420 pthread_mutex_lock(&consumer_data->lock);
1421
1422 /* Immediately set the consumerd state to stopped */
1423 if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
1424 uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
1425 } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
1426 consumer_data->type == LTTNG_CONSUMER32_UST) {
1427 uatomic_set(&ust_consumerd_state, CONSUMER_ERROR);
1428 } else {
1429 /* Code flow error... */
1430 assert(0);
1431 }
1432
1433 if (consumer_data->err_sock >= 0) {
1434 ret = close(consumer_data->err_sock);
1435 if (ret) {
1436 PERROR("close");
1437 }
1438 consumer_data->err_sock = -1;
1439 }
1440 if (consumer_data->cmd_sock >= 0) {
1441 ret = close(consumer_data->cmd_sock);
1442 if (ret) {
1443 PERROR("close");
1444 }
1445 consumer_data->cmd_sock = -1;
1446 }
1447 if (consumer_data->metadata_sock.fd_ptr &&
1448 *consumer_data->metadata_sock.fd_ptr >= 0) {
1449 ret = close(*consumer_data->metadata_sock.fd_ptr);
1450 if (ret) {
1451 PERROR("close");
1452 }
1453 }
1454 if (sock >= 0) {
1455 ret = close(sock);
1456 if (ret) {
1457 PERROR("close");
1458 }
1459 }
1460
1461 unlink(consumer_data->err_unix_sock_path);
1462 unlink(consumer_data->cmd_unix_sock_path);
1463 pthread_mutex_unlock(&consumer_data->lock);
1464
1465 /* Cleanup metadata socket mutex. */
1466 if (consumer_data->metadata_sock.lock) {
1467 pthread_mutex_destroy(consumer_data->metadata_sock.lock);
1468 free(consumer_data->metadata_sock.lock);
1469 }
1470 lttng_poll_clean(&events);
1471
1472 if (cmd_socket_wrapper) {
1473 consumer_destroy_socket(cmd_socket_wrapper);
1474 }
1475 error_poll:
1476 if (err) {
1477 health_error();
1478 ERR("Health error occurred in %s", __func__);
1479 }
1480 health_unregister(health_sessiond);
1481 DBG("consumer thread cleanup completed");
1482
1483 rcu_thread_offline();
1484 rcu_unregister_thread();
1485
1486 return NULL;
1487 }
1488
1489 /*
1490 * This thread receives application command sockets (FDs) on the
1491 * apps_cmd_pipe and waits (polls) on them until they are closed
1492 * or an error occurs.
1493 *
1494 * At that point, it flushes the data (tracing and metadata) associated
1495 * with this application and tears down ust app sessions and other
1496 * associated data structures through ust_app_unregister().
1497 *
1498 * Note that this thread never sends commands to the applications
1499 * through the command sockets; it merely listens for hang-ups
1500 * and errors on those sockets and cleans-up as they occur.
1501 */
1502 static void *thread_manage_apps(void *data)
1503 {
1504 int i, ret, pollfd, err = -1;
1505 ssize_t size_ret;
1506 uint32_t revents, nb_fd;
1507 struct lttng_poll_event events;
1508
1509 DBG("[thread] Manage application started");
1510
1511 rcu_register_thread();
1512 rcu_thread_online();
1513
1514 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_MANAGE);
1515
1516 if (testpoint(sessiond_thread_manage_apps)) {
1517 goto error_testpoint;
1518 }
1519
1520 health_code_update();
1521
1522 ret = sessiond_set_thread_pollset(&events, 2);
1523 if (ret < 0) {
1524 goto error_poll_create;
1525 }
1526
1527 ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1528 if (ret < 0) {
1529 goto error;
1530 }
1531
1532 if (testpoint(sessiond_thread_manage_apps_before_loop)) {
1533 goto error;
1534 }
1535
1536 health_code_update();
1537
1538 while (1) {
1539 DBG("Apps thread polling");
1540
1541 /* Inifinite blocking call, waiting for transmission */
1542 restart:
1543 health_poll_entry();
1544 ret = lttng_poll_wait(&events, -1);
1545 DBG("Apps thread return from poll on %d fds",
1546 LTTNG_POLL_GETNB(&events));
1547 health_poll_exit();
1548 if (ret < 0) {
1549 /*
1550 * Restart interrupted system call.
1551 */
1552 if (errno == EINTR) {
1553 goto restart;
1554 }
1555 goto error;
1556 }
1557
1558 nb_fd = ret;
1559
1560 for (i = 0; i < nb_fd; i++) {
1561 /* Fetch once the poll data */
1562 revents = LTTNG_POLL_GETEV(&events, i);
1563 pollfd = LTTNG_POLL_GETFD(&events, i);
1564
1565 health_code_update();
1566
1567 if (!revents) {
1568 /* No activity for this FD (poll implementation). */
1569 continue;
1570 }
1571
1572 /* Thread quit pipe has been closed. Killing thread. */
1573 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1574 if (ret) {
1575 err = 0;
1576 goto exit;
1577 }
1578
1579 /* Inspect the apps cmd pipe */
1580 if (pollfd == apps_cmd_pipe[0]) {
1581 if (revents & LPOLLIN) {
1582 int sock;
1583
1584 /* Empty pipe */
1585 size_ret = lttng_read(apps_cmd_pipe[0], &sock, sizeof(sock));
1586 if (size_ret < sizeof(sock)) {
1587 PERROR("read apps cmd pipe");
1588 goto error;
1589 }
1590
1591 health_code_update();
1592
1593 /*
1594 * Since this is a command socket (write then read),
1595 * we only monitor the error events of the socket.
1596 */
1597 ret = lttng_poll_add(&events, sock,
1598 LPOLLERR | LPOLLHUP | LPOLLRDHUP);
1599 if (ret < 0) {
1600 goto error;
1601 }
1602
1603 DBG("Apps with sock %d added to poll set", sock);
1604 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1605 ERR("Apps command pipe error");
1606 goto error;
1607 } else {
1608 ERR("Unknown poll events %u for sock %d", revents, pollfd);
1609 goto error;
1610 }
1611 } else {
1612 /*
1613 * At this point, we know that a registered application made
1614 * the event at poll_wait.
1615 */
1616 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1617 /* Removing from the poll set */
1618 ret = lttng_poll_del(&events, pollfd);
1619 if (ret < 0) {
1620 goto error;
1621 }
1622
1623 /* Socket closed on remote end. */
1624 ust_app_unregister(pollfd);
1625 } else {
1626 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1627 goto error;
1628 }
1629 }
1630
1631 health_code_update();
1632 }
1633 }
1634
1635 exit:
1636 error:
1637 lttng_poll_clean(&events);
1638 error_poll_create:
1639 error_testpoint:
1640 utils_close_pipe(apps_cmd_pipe);
1641 apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
1642
1643 /*
1644 * We don't clean the UST app hash table here since already registered
1645 * applications can still be controlled so let them be until the session
1646 * daemon dies or the applications stop.
1647 */
1648
1649 if (err) {
1650 health_error();
1651 ERR("Health error occurred in %s", __func__);
1652 }
1653 health_unregister(health_sessiond);
1654 DBG("Application communication apps thread cleanup complete");
1655 rcu_thread_offline();
1656 rcu_unregister_thread();
1657 return NULL;
1658 }
1659
1660 /*
1661 * Send a socket to a thread This is called from the dispatch UST registration
1662 * thread once all sockets are set for the application.
1663 *
1664 * The sock value can be invalid, we don't really care, the thread will handle
1665 * it and make the necessary cleanup if so.
1666 *
1667 * On success, return 0 else a negative value being the errno message of the
1668 * write().
1669 */
1670 static int send_socket_to_thread(int fd, int sock)
1671 {
1672 ssize_t ret;
1673
1674 /*
1675 * It's possible that the FD is set as invalid with -1 concurrently just
1676 * before calling this function being a shutdown state of the thread.
1677 */
1678 if (fd < 0) {
1679 ret = -EBADF;
1680 goto error;
1681 }
1682
1683 ret = lttng_write(fd, &sock, sizeof(sock));
1684 if (ret < sizeof(sock)) {
1685 PERROR("write apps pipe %d", fd);
1686 if (ret < 0) {
1687 ret = -errno;
1688 }
1689 goto error;
1690 }
1691
1692 /* All good. Don't send back the write positive ret value. */
1693 ret = 0;
1694 error:
1695 return (int) ret;
1696 }
1697
1698 /*
1699 * Sanitize the wait queue of the dispatch registration thread meaning removing
1700 * invalid nodes from it. This is to avoid memory leaks for the case the UST
1701 * notify socket is never received.
1702 */
1703 static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
1704 {
1705 int ret, nb_fd = 0, i;
1706 unsigned int fd_added = 0;
1707 struct lttng_poll_event events;
1708 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1709
1710 assert(wait_queue);
1711
1712 lttng_poll_init(&events);
1713
1714 /* Just skip everything for an empty queue. */
1715 if (!wait_queue->count) {
1716 goto end;
1717 }
1718
1719 ret = lttng_poll_create(&events, wait_queue->count, LTTNG_CLOEXEC);
1720 if (ret < 0) {
1721 goto error_create;
1722 }
1723
1724 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1725 &wait_queue->head, head) {
1726 assert(wait_node->app);
1727 ret = lttng_poll_add(&events, wait_node->app->sock,
1728 LPOLLHUP | LPOLLERR);
1729 if (ret < 0) {
1730 goto error;
1731 }
1732
1733 fd_added = 1;
1734 }
1735
1736 if (!fd_added) {
1737 goto end;
1738 }
1739
1740 /*
1741 * Poll but don't block so we can quickly identify the faulty events and
1742 * clean them afterwards from the wait queue.
1743 */
1744 ret = lttng_poll_wait(&events, 0);
1745 if (ret < 0) {
1746 goto error;
1747 }
1748 nb_fd = ret;
1749
1750 for (i = 0; i < nb_fd; i++) {
1751 /* Get faulty FD. */
1752 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1753 int pollfd = LTTNG_POLL_GETFD(&events, i);
1754
1755 if (!revents) {
1756 /* No activity for this FD (poll implementation). */
1757 continue;
1758 }
1759
1760 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1761 &wait_queue->head, head) {
1762 if (pollfd == wait_node->app->sock &&
1763 (revents & (LPOLLHUP | LPOLLERR))) {
1764 cds_list_del(&wait_node->head);
1765 wait_queue->count--;
1766 ust_app_destroy(wait_node->app);
1767 free(wait_node);
1768 /*
1769 * Silence warning of use-after-free in
1770 * cds_list_for_each_entry_safe which uses
1771 * __typeof__(*wait_node).
1772 */
1773 wait_node = NULL;
1774 break;
1775 } else {
1776 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1777 goto error;
1778 }
1779 }
1780 }
1781
1782 if (nb_fd > 0) {
1783 DBG("Wait queue sanitized, %d node were cleaned up", nb_fd);
1784 }
1785
1786 end:
1787 lttng_poll_clean(&events);
1788 return;
1789
1790 error:
1791 lttng_poll_clean(&events);
1792 error_create:
1793 ERR("Unable to sanitize wait queue");
1794 return;
1795 }
1796
1797 /*
1798 * Dispatch request from the registration threads to the application
1799 * communication thread.
1800 */
1801 static void *thread_dispatch_ust_registration(void *data)
1802 {
1803 int ret, err = -1;
1804 struct cds_wfcq_node *node;
1805 struct ust_command *ust_cmd = NULL;
1806 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1807 struct ust_reg_wait_queue wait_queue = {
1808 .count = 0,
1809 };
1810
1811 rcu_register_thread();
1812
1813 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH);
1814
1815 if (testpoint(sessiond_thread_app_reg_dispatch)) {
1816 goto error_testpoint;
1817 }
1818
1819 health_code_update();
1820
1821 CDS_INIT_LIST_HEAD(&wait_queue.head);
1822
1823 DBG("[thread] Dispatch UST command started");
1824
1825 for (;;) {
1826 health_code_update();
1827
1828 /* Atomically prepare the queue futex */
1829 futex_nto1_prepare(&ust_cmd_queue.futex);
1830
1831 if (CMM_LOAD_SHARED(dispatch_thread_exit)) {
1832 break;
1833 }
1834
1835 do {
1836 struct ust_app *app = NULL;
1837 ust_cmd = NULL;
1838
1839 /*
1840 * Make sure we don't have node(s) that have hung up before receiving
1841 * the notify socket. This is to clean the list in order to avoid
1842 * memory leaks from notify socket that are never seen.
1843 */
1844 sanitize_wait_queue(&wait_queue);
1845
1846 health_code_update();
1847 /* Dequeue command for registration */
1848 node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
1849 if (node == NULL) {
1850 DBG("Woken up but nothing in the UST command queue");
1851 /* Continue thread execution */
1852 break;
1853 }
1854
1855 ust_cmd = caa_container_of(node, struct ust_command, node);
1856
1857 DBG("Dispatching UST registration pid:%d ppid:%d uid:%d"
1858 " gid:%d sock:%d name:%s (version %d.%d)",
1859 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
1860 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
1861 ust_cmd->sock, ust_cmd->reg_msg.name,
1862 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
1863
1864 if (ust_cmd->reg_msg.type == USTCTL_SOCKET_CMD) {
1865 wait_node = zmalloc(sizeof(*wait_node));
1866 if (!wait_node) {
1867 PERROR("zmalloc wait_node dispatch");
1868 ret = close(ust_cmd->sock);
1869 if (ret < 0) {
1870 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1871 }
1872 lttng_fd_put(LTTNG_FD_APPS, 1);
1873 free(ust_cmd);
1874 goto error;
1875 }
1876 CDS_INIT_LIST_HEAD(&wait_node->head);
1877
1878 /* Create application object if socket is CMD. */
1879 wait_node->app = ust_app_create(&ust_cmd->reg_msg,
1880 ust_cmd->sock);
1881 if (!wait_node->app) {
1882 ret = close(ust_cmd->sock);
1883 if (ret < 0) {
1884 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1885 }
1886 lttng_fd_put(LTTNG_FD_APPS, 1);
1887 free(wait_node);
1888 free(ust_cmd);
1889 continue;
1890 }
1891 /*
1892 * Add application to the wait queue so we can set the notify
1893 * socket before putting this object in the global ht.
1894 */
1895 cds_list_add(&wait_node->head, &wait_queue.head);
1896 wait_queue.count++;
1897
1898 free(ust_cmd);
1899 /*
1900 * We have to continue here since we don't have the notify
1901 * socket and the application MUST be added to the hash table
1902 * only at that moment.
1903 */
1904 continue;
1905 } else {
1906 /*
1907 * Look for the application in the local wait queue and set the
1908 * notify socket if found.
1909 */
1910 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1911 &wait_queue.head, head) {
1912 health_code_update();
1913 if (wait_node->app->pid == ust_cmd->reg_msg.pid) {
1914 wait_node->app->notify_sock = ust_cmd->sock;
1915 cds_list_del(&wait_node->head);
1916 wait_queue.count--;
1917 app = wait_node->app;
1918 free(wait_node);
1919 DBG3("UST app notify socket %d is set", ust_cmd->sock);
1920 break;
1921 }
1922 }
1923
1924 /*
1925 * With no application at this stage the received socket is
1926 * basically useless so close it before we free the cmd data
1927 * structure for good.
1928 */
1929 if (!app) {
1930 ret = close(ust_cmd->sock);
1931 if (ret < 0) {
1932 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1933 }
1934 lttng_fd_put(LTTNG_FD_APPS, 1);
1935 }
1936 free(ust_cmd);
1937 }
1938
1939 if (app) {
1940 /*
1941 * @session_lock_list
1942 *
1943 * Lock the global session list so from the register up to the
1944 * registration done message, no thread can see the application
1945 * and change its state.
1946 */
1947 session_lock_list();
1948 rcu_read_lock();
1949
1950 /*
1951 * Add application to the global hash table. This needs to be
1952 * done before the update to the UST registry can locate the
1953 * application.
1954 */
1955 ust_app_add(app);
1956
1957 /* Set app version. This call will print an error if needed. */
1958 (void) ust_app_version(app);
1959
1960 /* Send notify socket through the notify pipe. */
1961 ret = send_socket_to_thread(apps_cmd_notify_pipe[1],
1962 app->notify_sock);
1963 if (ret < 0) {
1964 rcu_read_unlock();
1965 session_unlock_list();
1966 /*
1967 * No notify thread, stop the UST tracing. However, this is
1968 * not an internal error of the this thread thus setting
1969 * the health error code to a normal exit.
1970 */
1971 err = 0;
1972 goto error;
1973 }
1974
1975 /*
1976 * Update newly registered application with the tracing
1977 * registry info already enabled information.
1978 */
1979 update_ust_app(app->sock);
1980
1981 /*
1982 * Don't care about return value. Let the manage apps threads
1983 * handle app unregistration upon socket close.
1984 */
1985 (void) ust_app_register_done(app);
1986
1987 /*
1988 * Even if the application socket has been closed, send the app
1989 * to the thread and unregistration will take place at that
1990 * place.
1991 */
1992 ret = send_socket_to_thread(apps_cmd_pipe[1], app->sock);
1993 if (ret < 0) {
1994 rcu_read_unlock();
1995 session_unlock_list();
1996 /*
1997 * No apps. thread, stop the UST tracing. However, this is
1998 * not an internal error of the this thread thus setting
1999 * the health error code to a normal exit.
2000 */
2001 err = 0;
2002 goto error;
2003 }
2004
2005 rcu_read_unlock();
2006 session_unlock_list();
2007 }
2008 } while (node != NULL);
2009
2010 health_poll_entry();
2011 /* Futex wait on queue. Blocking call on futex() */
2012 futex_nto1_wait(&ust_cmd_queue.futex);
2013 health_poll_exit();
2014 }
2015 /* Normal exit, no error */
2016 err = 0;
2017
2018 error:
2019 /* Clean up wait queue. */
2020 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
2021 &wait_queue.head, head) {
2022 cds_list_del(&wait_node->head);
2023 wait_queue.count--;
2024 free(wait_node);
2025 }
2026
2027 /* Empty command queue. */
2028 for (;;) {
2029 /* Dequeue command for registration */
2030 node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
2031 if (node == NULL) {
2032 break;
2033 }
2034 ust_cmd = caa_container_of(node, struct ust_command, node);
2035 ret = close(ust_cmd->sock);
2036 if (ret < 0) {
2037 PERROR("close ust sock exit dispatch %d", ust_cmd->sock);
2038 }
2039 lttng_fd_put(LTTNG_FD_APPS, 1);
2040 free(ust_cmd);
2041 }
2042
2043 error_testpoint:
2044 DBG("Dispatch thread dying");
2045 if (err) {
2046 health_error();
2047 ERR("Health error occurred in %s", __func__);
2048 }
2049 health_unregister(health_sessiond);
2050 rcu_unregister_thread();
2051 return NULL;
2052 }
2053
2054 /*
2055 * This thread manage application registration.
2056 */
2057 static void *thread_registration_apps(void *data)
2058 {
2059 int sock = -1, i, ret, pollfd, err = -1;
2060 uint32_t revents, nb_fd;
2061 struct lttng_poll_event events;
2062 /*
2063 * Get allocated in this thread, enqueued to a global queue, dequeued and
2064 * freed in the manage apps thread.
2065 */
2066 struct ust_command *ust_cmd = NULL;
2067
2068 DBG("[thread] Manage application registration started");
2069
2070 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG);
2071
2072 if (testpoint(sessiond_thread_registration_apps)) {
2073 goto error_testpoint;
2074 }
2075
2076 ret = lttcomm_listen_unix_sock(apps_sock);
2077 if (ret < 0) {
2078 goto error_listen;
2079 }
2080
2081 /*
2082 * Pass 2 as size here for the thread quit pipe and apps socket. Nothing
2083 * more will be added to this poll set.
2084 */
2085 ret = sessiond_set_thread_pollset(&events, 2);
2086 if (ret < 0) {
2087 goto error_create_poll;
2088 }
2089
2090 /* Add the application registration socket */
2091 ret = lttng_poll_add(&events, apps_sock, LPOLLIN | LPOLLRDHUP);
2092 if (ret < 0) {
2093 goto error_poll_add;
2094 }
2095
2096 /* Notify all applications to register */
2097 ret = notify_ust_apps(1);
2098 if (ret < 0) {
2099 ERR("Failed to notify applications or create the wait shared memory.\n"
2100 "Execution continues but there might be problem for already\n"
2101 "running applications that wishes to register.");
2102 }
2103
2104 while (1) {
2105 DBG("Accepting application registration");
2106
2107 /* Inifinite blocking call, waiting for transmission */
2108 restart:
2109 health_poll_entry();
2110 ret = lttng_poll_wait(&events, -1);
2111 health_poll_exit();
2112 if (ret < 0) {
2113 /*
2114 * Restart interrupted system call.
2115 */
2116 if (errno == EINTR) {
2117 goto restart;
2118 }
2119 goto error;
2120 }
2121
2122 nb_fd = ret;
2123
2124 for (i = 0; i < nb_fd; i++) {
2125 health_code_update();
2126
2127 /* Fetch once the poll data */
2128 revents = LTTNG_POLL_GETEV(&events, i);
2129 pollfd = LTTNG_POLL_GETFD(&events, i);
2130
2131 if (!revents) {
2132 /* No activity for this FD (poll implementation). */
2133 continue;
2134 }
2135
2136 /* Thread quit pipe has been closed. Killing thread. */
2137 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
2138 if (ret) {
2139 err = 0;
2140 goto exit;
2141 }
2142
2143 /* Event on the registration socket */
2144 if (pollfd == apps_sock) {
2145 if (revents & LPOLLIN) {
2146 sock = lttcomm_accept_unix_sock(apps_sock);
2147 if (sock < 0) {
2148 goto error;
2149 }
2150
2151 /*
2152 * Set socket timeout for both receiving and ending.
2153 * app_socket_timeout is in seconds, whereas
2154 * lttcomm_setsockopt_rcv_timeout and
2155 * lttcomm_setsockopt_snd_timeout expect msec as
2156 * parameter.
2157 */
2158 if (config.app_socket_timeout >= 0) {
2159 (void) lttcomm_setsockopt_rcv_timeout(sock,
2160 config.app_socket_timeout * 1000);
2161 (void) lttcomm_setsockopt_snd_timeout(sock,
2162 config.app_socket_timeout * 1000);
2163 }
2164
2165 /*
2166 * Set the CLOEXEC flag. Return code is useless because
2167 * either way, the show must go on.
2168 */
2169 (void) utils_set_fd_cloexec(sock);
2170
2171 /* Create UST registration command for enqueuing */
2172 ust_cmd = zmalloc(sizeof(struct ust_command));
2173 if (ust_cmd == NULL) {
2174 PERROR("ust command zmalloc");
2175 ret = close(sock);
2176 if (ret) {
2177 PERROR("close");
2178 }
2179 goto error;
2180 }
2181
2182 /*
2183 * Using message-based transmissions to ensure we don't
2184 * have to deal with partially received messages.
2185 */
2186 ret = lttng_fd_get(LTTNG_FD_APPS, 1);
2187 if (ret < 0) {
2188 ERR("Exhausted file descriptors allowed for applications.");
2189 free(ust_cmd);
2190 ret = close(sock);
2191 if (ret) {
2192 PERROR("close");
2193 }
2194 sock = -1;
2195 continue;
2196 }
2197
2198 health_code_update();
2199 ret = ust_app_recv_registration(sock, &ust_cmd->reg_msg);
2200 if (ret < 0) {
2201 free(ust_cmd);
2202 /* Close socket of the application. */
2203 ret = close(sock);
2204 if (ret) {
2205 PERROR("close");
2206 }
2207 lttng_fd_put(LTTNG_FD_APPS, 1);
2208 sock = -1;
2209 continue;
2210 }
2211 health_code_update();
2212
2213 ust_cmd->sock = sock;
2214 sock = -1;
2215
2216 DBG("UST registration received with pid:%d ppid:%d uid:%d"
2217 " gid:%d sock:%d name:%s (version %d.%d)",
2218 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
2219 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
2220 ust_cmd->sock, ust_cmd->reg_msg.name,
2221 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
2222
2223 /*
2224 * Lock free enqueue the registration request. The red pill
2225 * has been taken! This apps will be part of the *system*.
2226 */
2227 cds_wfcq_enqueue(&ust_cmd_queue.head, &ust_cmd_queue.tail, &ust_cmd->node);
2228
2229 /*
2230 * Wake the registration queue futex. Implicit memory
2231 * barrier with the exchange in cds_wfcq_enqueue.
2232 */
2233 futex_nto1_wake(&ust_cmd_queue.futex);
2234 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
2235 ERR("Register apps socket poll error");
2236 goto error;
2237 } else {
2238 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
2239 goto error;
2240 }
2241 }
2242 }
2243 }
2244
2245 exit:
2246 error:
2247 /* Notify that the registration thread is gone */
2248 notify_ust_apps(0);
2249
2250 if (apps_sock >= 0) {
2251 ret = close(apps_sock);
2252 if (ret) {
2253 PERROR("close");
2254 }
2255 }
2256 if (sock >= 0) {
2257 ret = close(sock);
2258 if (ret) {
2259 PERROR("close");
2260 }
2261 lttng_fd_put(LTTNG_FD_APPS, 1);
2262 }
2263 unlink(config.apps_unix_sock_path.value);
2264
2265 error_poll_add:
2266 lttng_poll_clean(&events);
2267 error_listen:
2268 error_create_poll:
2269 error_testpoint:
2270 DBG("UST Registration thread cleanup complete");
2271 if (err) {
2272 health_error();
2273 ERR("Health error occurred in %s", __func__);
2274 }
2275 health_unregister(health_sessiond);
2276
2277 return NULL;
2278 }
2279
2280 /*
2281 * Start the thread_manage_consumer. This must be done after a lttng-consumerd
2282 * exec or it will fails.
2283 */
2284 static int spawn_consumer_thread(struct consumer_data *consumer_data)
2285 {
2286 int ret, clock_ret;
2287 struct timespec timeout;
2288
2289 /*
2290 * Make sure we set the readiness flag to 0 because we are NOT ready.
2291 * This access to consumer_thread_is_ready does not need to be
2292 * protected by consumer_data.cond_mutex (yet) since the consumer
2293 * management thread has not been started at this point.
2294 */
2295 consumer_data->consumer_thread_is_ready = 0;
2296
2297 /* Setup pthread condition */
2298 ret = pthread_condattr_init(&consumer_data->condattr);
2299 if (ret) {
2300 errno = ret;
2301 PERROR("pthread_condattr_init consumer data");
2302 goto error;
2303 }
2304
2305 /*
2306 * Set the monotonic clock in order to make sure we DO NOT jump in time
2307 * between the clock_gettime() call and the timedwait call. See bug #324
2308 * for a more details and how we noticed it.
2309 */
2310 ret = pthread_condattr_setclock(&consumer_data->condattr, CLOCK_MONOTONIC);
2311 if (ret) {
2312 errno = ret;
2313 PERROR("pthread_condattr_setclock consumer data");
2314 goto error;
2315 }
2316
2317 ret = pthread_cond_init(&consumer_data->cond, &consumer_data->condattr);
2318 if (ret) {
2319 errno = ret;
2320 PERROR("pthread_cond_init consumer data");
2321 goto error;
2322 }
2323
2324 ret = pthread_create(&consumer_data->thread, default_pthread_attr(),
2325 thread_manage_consumer, consumer_data);
2326 if (ret) {
2327 errno = ret;
2328 PERROR("pthread_create consumer");
2329 ret = -1;
2330 goto error;
2331 }
2332
2333 /* We are about to wait on a pthread condition */
2334 pthread_mutex_lock(&consumer_data->cond_mutex);
2335
2336 /* Get time for sem_timedwait absolute timeout */
2337 clock_ret = lttng_clock_gettime(CLOCK_MONOTONIC, &timeout);
2338 /*
2339 * Set the timeout for the condition timed wait even if the clock gettime
2340 * call fails since we might loop on that call and we want to avoid to
2341 * increment the timeout too many times.
2342 */
2343 timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT;
2344
2345 /*
2346 * The following loop COULD be skipped in some conditions so this is why we
2347 * set ret to 0 in order to make sure at least one round of the loop is
2348 * done.
2349 */
2350 ret = 0;
2351
2352 /*
2353 * Loop until the condition is reached or when a timeout is reached. Note
2354 * that the pthread_cond_timedwait(P) man page specifies that EINTR can NOT
2355 * be returned but the pthread_cond(3), from the glibc-doc, says that it is
2356 * possible. This loop does not take any chances and works with both of
2357 * them.
2358 */
2359 while (!consumer_data->consumer_thread_is_ready && ret != ETIMEDOUT) {
2360 if (clock_ret < 0) {
2361 PERROR("clock_gettime spawn consumer");
2362 /* Infinite wait for the consumerd thread to be ready */
2363 ret = pthread_cond_wait(&consumer_data->cond,
2364 &consumer_data->cond_mutex);
2365 } else {
2366 ret = pthread_cond_timedwait(&consumer_data->cond,
2367 &consumer_data->cond_mutex, &timeout);
2368 }
2369 }
2370
2371 /* Release the pthread condition */
2372 pthread_mutex_unlock(&consumer_data->cond_mutex);
2373
2374 if (ret != 0) {
2375 errno = ret;
2376 if (ret == ETIMEDOUT) {
2377 int pth_ret;
2378
2379 /*
2380 * Call has timed out so we kill the kconsumerd_thread and return
2381 * an error.
2382 */
2383 ERR("Condition timed out. The consumer thread was never ready."
2384 " Killing it");
2385 pth_ret = pthread_cancel(consumer_data->thread);
2386 if (pth_ret < 0) {
2387 PERROR("pthread_cancel consumer thread");
2388 }
2389 } else {
2390 PERROR("pthread_cond_wait failed consumer thread");
2391 }
2392 /* Caller is expecting a negative value on failure. */
2393 ret = -1;
2394 goto error;
2395 }
2396
2397 pthread_mutex_lock(&consumer_data->pid_mutex);
2398 if (consumer_data->pid == 0) {
2399 ERR("Consumerd did not start");
2400 pthread_mutex_unlock(&consumer_data->pid_mutex);
2401 goto error;
2402 }
2403 pthread_mutex_unlock(&consumer_data->pid_mutex);
2404
2405 return 0;
2406
2407 error:
2408 return ret;
2409 }
2410
2411 /*
2412 * Join consumer thread
2413 */
2414 static int join_consumer_thread(struct consumer_data *consumer_data)
2415 {
2416 void *status;
2417
2418 /* Consumer pid must be a real one. */
2419 if (consumer_data->pid > 0) {
2420 int ret;
2421 ret = kill(consumer_data->pid, SIGTERM);
2422 if (ret) {
2423 PERROR("Error killing consumer daemon");
2424 return ret;
2425 }
2426 return pthread_join(consumer_data->thread, &status);
2427 } else {
2428 return 0;
2429 }
2430 }
2431
2432 /*
2433 * Fork and exec a consumer daemon (consumerd).
2434 *
2435 * Return pid if successful else -1.
2436 */
2437 static pid_t spawn_consumerd(struct consumer_data *consumer_data)
2438 {
2439 int ret;
2440 pid_t pid;
2441 const char *consumer_to_use;
2442 const char *verbosity;
2443 struct stat st;
2444
2445 DBG("Spawning consumerd");
2446
2447 pid = fork();
2448 if (pid == 0) {
2449 /*
2450 * Exec consumerd.
2451 */
2452 if (config.verbose_consumer) {
2453 verbosity = "--verbose";
2454 } else if (lttng_opt_quiet) {
2455 verbosity = "--quiet";
2456 } else {
2457 verbosity = "";
2458 }
2459
2460 switch (consumer_data->type) {
2461 case LTTNG_CONSUMER_KERNEL:
2462 /*
2463 * Find out which consumerd to execute. We will first try the
2464 * 64-bit path, then the sessiond's installation directory, and
2465 * fallback on the 32-bit one,
2466 */
2467 DBG3("Looking for a kernel consumer at these locations:");
2468 DBG3(" 1) %s", config.consumerd64_bin_path.value ? : "NULL");
2469 DBG3(" 2) %s/%s", INSTALL_BIN_PATH, DEFAULT_CONSUMERD_FILE);
2470 DBG3(" 3) %s", config.consumerd32_bin_path.value ? : "NULL");
2471 if (stat(config.consumerd64_bin_path.value, &st) == 0) {
2472 DBG3("Found location #1");
2473 consumer_to_use = config.consumerd64_bin_path.value;
2474 } else if (stat(INSTALL_BIN_PATH "/" DEFAULT_CONSUMERD_FILE, &st) == 0) {
2475 DBG3("Found location #2");
2476 consumer_to_use = INSTALL_BIN_PATH "/" DEFAULT_CONSUMERD_FILE;
2477 } else if (config.consumerd32_bin_path.value &&
2478 stat(config.consumerd32_bin_path.value, &st) == 0) {
2479 DBG3("Found location #3");
2480 consumer_to_use = config.consumerd32_bin_path.value;
2481 } else {
2482 DBG("Could not find any valid consumerd executable");
2483 ret = -EINVAL;
2484 goto error;
2485 }
2486 DBG("Using kernel consumer at: %s", consumer_to_use);
2487 (void) execl(consumer_to_use,
2488 "lttng-consumerd", verbosity, "-k",
2489 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2490 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2491 "--group", config.tracing_group_name.value,
2492 NULL);
2493 break;
2494 case LTTNG_CONSUMER64_UST:
2495 {
2496 if (config.consumerd64_lib_dir.value) {
2497 char *tmp;
2498 size_t tmplen;
2499 char *tmpnew;
2500
2501 tmp = lttng_secure_getenv("LD_LIBRARY_PATH");
2502 if (!tmp) {
2503 tmp = "";
2504 }
2505 tmplen = strlen(config.consumerd64_lib_dir.value) + 1 /* : */ + strlen(tmp);
2506 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2507 if (!tmpnew) {
2508 ret = -ENOMEM;
2509 goto error;
2510 }
2511 strcat(tmpnew, config.consumerd64_lib_dir.value);
2512 if (tmp[0] != '\0') {
2513 strcat(tmpnew, ":");
2514 strcat(tmpnew, tmp);
2515 }
2516 ret = setenv("LD_LIBRARY_PATH", tmpnew, 1);
2517 free(tmpnew);
2518 if (ret) {
2519 ret = -errno;
2520 goto error;
2521 }
2522 }
2523 DBG("Using 64-bit UST consumer at: %s", config.consumerd64_bin_path.value);
2524 (void) execl(config.consumerd64_bin_path.value, "lttng-consumerd", verbosity, "-u",
2525 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2526 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2527 "--group", config.tracing_group_name.value,
2528 NULL);
2529 break;
2530 }
2531 case LTTNG_CONSUMER32_UST:
2532 {
2533 if (config.consumerd32_lib_dir.value) {
2534 char *tmp;
2535 size_t tmplen;
2536 char *tmpnew;
2537
2538 tmp = lttng_secure_getenv("LD_LIBRARY_PATH");
2539 if (!tmp) {
2540 tmp = "";
2541 }
2542 tmplen = strlen(config.consumerd32_lib_dir.value) + 1 /* : */ + strlen(tmp);
2543 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2544 if (!tmpnew) {
2545 ret = -ENOMEM;
2546 goto error;
2547 }
2548 strcat(tmpnew, config.consumerd32_lib_dir.value);
2549 if (tmp[0] != '\0') {
2550 strcat(tmpnew, ":");
2551 strcat(tmpnew, tmp);
2552 }
2553 ret = setenv("LD_LIBRARY_PATH", tmpnew, 1);
2554 free(tmpnew);
2555 if (ret) {
2556 ret = -errno;
2557 goto error;
2558 }
2559 }
2560 DBG("Using 32-bit UST consumer at: %s", config.consumerd32_bin_path.value);
2561 (void) execl(config.consumerd32_bin_path.value, "lttng-consumerd", verbosity, "-u",
2562 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2563 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2564 "--group", config.tracing_group_name.value,
2565 NULL);
2566 break;
2567 }
2568 default:
2569 ERR("unknown consumer type");
2570 errno = 0;
2571 }
2572 if (errno != 0) {
2573 PERROR("Consumer execl()");
2574 }
2575 /* Reaching this point, we got a failure on our execl(). */
2576 exit(EXIT_FAILURE);
2577 } else if (pid > 0) {
2578 ret = pid;
2579 } else {
2580 PERROR("start consumer fork");
2581 ret = -errno;
2582 }
2583 error:
2584 return ret;
2585 }
2586
2587 /*
2588 * Spawn the consumerd daemon and session daemon thread.
2589 */
2590 static int start_consumerd(struct consumer_data *consumer_data)
2591 {
2592 int ret;
2593
2594 /*
2595 * Set the listen() state on the socket since there is a possible race
2596 * between the exec() of the consumer daemon and this call if place in the
2597 * consumer thread. See bug #366 for more details.
2598 */
2599 ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
2600 if (ret < 0) {
2601 goto error;
2602 }
2603
2604 pthread_mutex_lock(&consumer_data->pid_mutex);
2605 if (consumer_data->pid != 0) {
2606 pthread_mutex_unlock(&consumer_data->pid_mutex);
2607 goto end;
2608 }
2609
2610 ret = spawn_consumerd(consumer_data);
2611 if (ret < 0) {
2612 ERR("Spawning consumerd failed");
2613 pthread_mutex_unlock(&consumer_data->pid_mutex);
2614 goto error;
2615 }
2616
2617 /* Setting up the consumer_data pid */
2618 consumer_data->pid = ret;
2619 DBG2("Consumer pid %d", consumer_data->pid);
2620 pthread_mutex_unlock(&consumer_data->pid_mutex);
2621
2622 DBG2("Spawning consumer control thread");
2623 ret = spawn_consumer_thread(consumer_data);
2624 if (ret < 0) {
2625 ERR("Fatal error spawning consumer control thread");
2626 goto error;
2627 }
2628
2629 end:
2630 return 0;
2631
2632 error:
2633 /* Cleanup already created sockets on error. */
2634 if (consumer_data->err_sock >= 0) {
2635 int err;
2636
2637 err = close(consumer_data->err_sock);
2638 if (err < 0) {
2639 PERROR("close consumer data error socket");
2640 }
2641 }
2642 return ret;
2643 }
2644
2645 /*
2646 * Setup necessary data for kernel tracer action.
2647 */
2648 static int init_kernel_tracer(void)
2649 {
2650 int ret;
2651
2652 /* Modprobe lttng kernel modules */
2653 ret = modprobe_lttng_control();
2654 if (ret < 0) {
2655 goto error;
2656 }
2657
2658 /* Open debugfs lttng */
2659 kernel_tracer_fd = open(module_proc_lttng, O_RDWR);
2660 if (kernel_tracer_fd < 0) {
2661 DBG("Failed to open %s", module_proc_lttng);
2662 goto error_open;
2663 }
2664
2665 /* Validate kernel version */
2666 ret = kernel_validate_version(kernel_tracer_fd, &kernel_tracer_version,
2667 &kernel_tracer_abi_version);
2668 if (ret < 0) {
2669 goto error_version;
2670 }
2671
2672 ret = modprobe_lttng_data();
2673 if (ret < 0) {
2674 goto error_modules;
2675 }
2676
2677 ret = kernel_supports_ring_buffer_snapshot_sample_positions(
2678 kernel_tracer_fd);
2679 if (ret < 0) {
2680 goto error_modules;
2681 }
2682
2683 if (ret < 1) {
2684 WARN("Kernel tracer does not support buffer monitoring. "
2685 "The monitoring timer of channels in the kernel domain "
2686 "will be set to 0 (disabled).");
2687 }
2688
2689 DBG("Kernel tracer fd %d", kernel_tracer_fd);
2690 return 0;
2691
2692 error_version:
2693 modprobe_remove_lttng_control();
2694 ret = close(kernel_tracer_fd);
2695 if (ret) {
2696 PERROR("close");
2697 }
2698 kernel_tracer_fd = -1;
2699 return LTTNG_ERR_KERN_VERSION;
2700
2701 error_modules:
2702 ret = close(kernel_tracer_fd);
2703 if (ret) {
2704 PERROR("close");
2705 }
2706
2707 error_open:
2708 modprobe_remove_lttng_control();
2709
2710 error:
2711 WARN("No kernel tracer available");
2712 kernel_tracer_fd = -1;
2713 if (!is_root) {
2714 return LTTNG_ERR_NEED_ROOT_SESSIOND;
2715 } else {
2716 return LTTNG_ERR_KERN_NA;
2717 }
2718 }
2719
2720
2721 /*
2722 * Copy consumer output from the tracing session to the domain session. The
2723 * function also applies the right modification on a per domain basis for the
2724 * trace files destination directory.
2725 *
2726 * Should *NOT* be called with RCU read-side lock held.
2727 */
2728 static int copy_session_consumer(int domain, struct ltt_session *session)
2729 {
2730 int ret;
2731 const char *dir_name;
2732 struct consumer_output *consumer;
2733
2734 assert(session);
2735 assert(session->consumer);
2736
2737 switch (domain) {
2738 case LTTNG_DOMAIN_KERNEL:
2739 DBG3("Copying tracing session consumer output in kernel session");
2740 /*
2741 * XXX: We should audit the session creation and what this function
2742 * does "extra" in order to avoid a destroy since this function is used
2743 * in the domain session creation (kernel and ust) only. Same for UST
2744 * domain.
2745 */
2746 if (session->kernel_session->consumer) {
2747 consumer_output_put(session->kernel_session->consumer);
2748 }
2749 session->kernel_session->consumer =
2750 consumer_copy_output(session->consumer);
2751 /* Ease our life a bit for the next part */
2752 consumer = session->kernel_session->consumer;
2753 dir_name = DEFAULT_KERNEL_TRACE_DIR;
2754 break;
2755 case LTTNG_DOMAIN_JUL:
2756 case LTTNG_DOMAIN_LOG4J:
2757 case LTTNG_DOMAIN_PYTHON:
2758 case LTTNG_DOMAIN_UST:
2759 DBG3("Copying tracing session consumer output in UST session");
2760 if (session->ust_session->consumer) {
2761 consumer_output_put(session->ust_session->consumer);
2762 }
2763 session->ust_session->consumer =
2764 consumer_copy_output(session->consumer);
2765 /* Ease our life a bit for the next part */
2766 consumer = session->ust_session->consumer;
2767 dir_name = DEFAULT_UST_TRACE_DIR;
2768 break;
2769 default:
2770 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2771 goto error;
2772 }
2773
2774 /* Append correct directory to subdir */
2775 strncat(consumer->subdir, dir_name,
2776 sizeof(consumer->subdir) - strlen(consumer->subdir) - 1);
2777 DBG3("Copy session consumer subdir %s", consumer->subdir);
2778
2779 ret = LTTNG_OK;
2780
2781 error:
2782 return ret;
2783 }
2784
2785 /*
2786 * Create an UST session and add it to the session ust list.
2787 *
2788 * Should *NOT* be called with RCU read-side lock held.
2789 */
2790 static int create_ust_session(struct ltt_session *session,
2791 struct lttng_domain *domain)
2792 {
2793 int ret;
2794 struct ltt_ust_session *lus = NULL;
2795
2796 assert(session);
2797 assert(domain);
2798 assert(session->consumer);
2799
2800 switch (domain->type) {
2801 case LTTNG_DOMAIN_JUL:
2802 case LTTNG_DOMAIN_LOG4J:
2803 case LTTNG_DOMAIN_PYTHON:
2804 case LTTNG_DOMAIN_UST:
2805 break;
2806 default:
2807 ERR("Unknown UST domain on create session %d", domain->type);
2808 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2809 goto error;
2810 }
2811
2812 DBG("Creating UST session");
2813
2814 lus = trace_ust_create_session(session->id);
2815 if (lus == NULL) {
2816 ret = LTTNG_ERR_UST_SESS_FAIL;
2817 goto error;
2818 }
2819
2820 lus->uid = session->uid;
2821 lus->gid = session->gid;
2822 lus->output_traces = session->output_traces;
2823 lus->snapshot_mode = session->snapshot_mode;
2824 lus->live_timer_interval = session->live_timer;
2825 session->ust_session = lus;
2826 if (session->shm_path[0]) {
2827 strncpy(lus->root_shm_path, session->shm_path,
2828 sizeof(lus->root_shm_path));
2829 lus->root_shm_path[sizeof(lus->root_shm_path) - 1] = '\0';
2830 strncpy(lus->shm_path, session->shm_path,
2831 sizeof(lus->shm_path));
2832 lus->shm_path[sizeof(lus->shm_path) - 1] = '\0';
2833 strncat(lus->shm_path, "/ust",
2834 sizeof(lus->shm_path) - strlen(lus->shm_path) - 1);
2835 }
2836 /* Copy session output to the newly created UST session */
2837 ret = copy_session_consumer(domain->type, session);
2838 if (ret != LTTNG_OK) {
2839 goto error;
2840 }
2841
2842 return LTTNG_OK;
2843
2844 error:
2845 free(lus);
2846 session->ust_session = NULL;
2847 return ret;
2848 }
2849
2850 /*
2851 * Create a kernel tracer session then create the default channel.
2852 */
2853 static int create_kernel_session(struct ltt_session *session)
2854 {
2855 int ret;
2856
2857 DBG("Creating kernel session");
2858
2859 ret = kernel_create_session(session, kernel_tracer_fd);
2860 if (ret < 0) {
2861 ret = LTTNG_ERR_KERN_SESS_FAIL;
2862 goto error;
2863 }
2864
2865 /* Code flow safety */
2866 assert(session->kernel_session);
2867
2868 /* Copy session output to the newly created Kernel session */
2869 ret = copy_session_consumer(LTTNG_DOMAIN_KERNEL, session);
2870 if (ret != LTTNG_OK) {
2871 goto error;
2872 }
2873
2874 session->kernel_session->uid = session->uid;
2875 session->kernel_session->gid = session->gid;
2876 session->kernel_session->output_traces = session->output_traces;
2877 session->kernel_session->snapshot_mode = session->snapshot_mode;
2878
2879 return LTTNG_OK;
2880
2881 error:
2882 trace_kernel_destroy_session(session->kernel_session);
2883 session->kernel_session = NULL;
2884 return ret;
2885 }
2886
2887 /*
2888 * Count number of session permitted by uid/gid.
2889 */
2890 static unsigned int lttng_sessions_count(uid_t uid, gid_t gid)
2891 {
2892 unsigned int i = 0;
2893 struct ltt_session *session;
2894
2895 DBG("Counting number of available session for UID %d GID %d",
2896 uid, gid);
2897 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
2898 /*
2899 * Only list the sessions the user can control.
2900 */
2901 if (!session_access_ok(session, uid, gid)) {
2902 continue;
2903 }
2904 i++;
2905 }
2906 return i;
2907 }
2908
2909 static int receive_userspace_probe(struct command_ctx *cmd_ctx, int sock,
2910 int *sock_error, struct lttng_event *event)
2911 {
2912 int fd, ret;
2913 struct lttng_userspace_probe_location *probe_location;
2914 const struct lttng_userspace_probe_location_lookup_method *lookup = NULL;
2915 struct lttng_dynamic_buffer probe_location_buffer;
2916 struct lttng_buffer_view buffer_view;
2917
2918 /*
2919 * Create a buffer to store the serialized version of the probe
2920 * location.
2921 */
2922 lttng_dynamic_buffer_init(&probe_location_buffer);
2923 ret = lttng_dynamic_buffer_set_size(&probe_location_buffer,
2924 cmd_ctx->lsm->u.enable.userspace_probe_location_len);
2925 if (ret) {
2926 ret = LTTNG_ERR_NOMEM;
2927 goto error;
2928 }
2929
2930 /*
2931 * Receive the probe location.
2932 */
2933 ret = lttcomm_recv_unix_sock(sock, probe_location_buffer.data,
2934 probe_location_buffer.size);
2935 if (ret <= 0) {
2936 DBG("Nothing recv() from client var len data... continuing");
2937 *sock_error = 1;
2938 lttng_dynamic_buffer_reset(&probe_location_buffer);
2939 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2940 goto error;
2941 }
2942
2943 buffer_view = lttng_buffer_view_from_dynamic_buffer(
2944 &probe_location_buffer, 0, probe_location_buffer.size);
2945
2946 /*
2947 * Extract the probe location from the serialized version.
2948 */
2949 ret = lttng_userspace_probe_location_create_from_buffer(
2950 &buffer_view, &probe_location);
2951 if (ret < 0) {
2952 WARN("Failed to create a userspace probe location from the received buffer");
2953 lttng_dynamic_buffer_reset( &probe_location_buffer);
2954 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2955 goto error;
2956 }
2957
2958 /*
2959 * Receive the file descriptor to the target binary from the client.
2960 */
2961 DBG("Receiving userspace probe target FD from client ...");
2962 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
2963 if (ret <= 0) {
2964 DBG("Nothing recv() from client userspace probe fd... continuing");
2965 *sock_error = 1;
2966 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2967 goto error;
2968 }
2969
2970 /*
2971 * Set the file descriptor received from the client through the unix
2972 * socket in the probe location.
2973 */
2974 lookup = lttng_userspace_probe_location_get_lookup_method(probe_location);
2975 if (!lookup) {
2976 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2977 goto error;
2978 }
2979
2980 /*
2981 * From the kernel tracer's perspective, all userspace probe event types
2982 * are all the same: a file and an offset.
2983 */
2984 switch (lttng_userspace_probe_location_lookup_method_get_type(lookup)) {
2985 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_FUNCTION_ELF:
2986 ret = lttng_userspace_probe_location_function_set_binary_fd(
2987 probe_location, fd);
2988 break;
2989 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_TRACEPOINT_SDT:
2990 ret = lttng_userspace_probe_location_tracepoint_set_binary_fd(
2991 probe_location, fd);
2992 break;
2993 default:
2994 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2995 goto error;
2996 }
2997
2998 if (ret) {
2999 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
3000 goto error;
3001 }
3002
3003 /* Attach the probe location to the event. */
3004 ret = lttng_event_set_userspace_probe_location(event, probe_location);
3005 if (ret) {
3006 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
3007 goto error;
3008 }
3009
3010 lttng_dynamic_buffer_reset(&probe_location_buffer);
3011 error:
3012 return ret;
3013 }
3014
3015 /*
3016 * Check if the current kernel tracer supports the session rotation feature.
3017 * Return 1 if it does, 0 otherwise.
3018 */
3019 static int check_rotate_compatible(void)
3020 {
3021 int ret = 1;
3022
3023 if (kernel_tracer_version.major != 2 || kernel_tracer_version.minor < 11) {
3024 DBG("Kernel tracer version is not compatible with the rotation feature");
3025 ret = 0;
3026 }
3027
3028 return ret;
3029 }
3030
3031 /*
3032 * Process the command requested by the lttng client within the command
3033 * context structure. This function make sure that the return structure (llm)
3034 * is set and ready for transmission before returning.
3035 *
3036 * Return any error encountered or 0 for success.
3037 *
3038 * "sock" is only used for special-case var. len data.
3039 *
3040 * Should *NOT* be called with RCU read-side lock held.
3041 */
3042 static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
3043 int *sock_error)
3044 {
3045 int ret = LTTNG_OK;
3046 int need_tracing_session = 1;
3047 int need_domain;
3048
3049 DBG("Processing client command %d", cmd_ctx->lsm->cmd_type);
3050
3051 assert(!rcu_read_ongoing());
3052
3053 *sock_error = 0;
3054
3055 switch (cmd_ctx->lsm->cmd_type) {
3056 case LTTNG_CREATE_SESSION:
3057 case LTTNG_CREATE_SESSION_SNAPSHOT:
3058 case LTTNG_CREATE_SESSION_LIVE:
3059 case LTTNG_DESTROY_SESSION:
3060 case LTTNG_LIST_SESSIONS:
3061 case LTTNG_LIST_DOMAINS:
3062 case LTTNG_START_TRACE:
3063 case LTTNG_STOP_TRACE:
3064 case LTTNG_DATA_PENDING:
3065 case LTTNG_SNAPSHOT_ADD_OUTPUT:
3066 case LTTNG_SNAPSHOT_DEL_OUTPUT:
3067 case LTTNG_SNAPSHOT_LIST_OUTPUT:
3068 case LTTNG_SNAPSHOT_RECORD:
3069 case LTTNG_SAVE_SESSION:
3070 case LTTNG_SET_SESSION_SHM_PATH:
3071 case LTTNG_REGENERATE_METADATA:
3072 case LTTNG_REGENERATE_STATEDUMP:
3073 case LTTNG_REGISTER_TRIGGER:
3074 case LTTNG_UNREGISTER_TRIGGER:
3075 case LTTNG_ROTATE_SESSION:
3076 case LTTNG_ROTATION_GET_INFO:
3077 case LTTNG_ROTATION_SET_SCHEDULE:
3078 case LTTNG_SESSION_LIST_ROTATION_SCHEDULES:
3079 need_domain = 0;
3080 break;
3081 default:
3082 need_domain = 1;
3083 }
3084
3085 if (config.no_kernel && need_domain
3086 && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) {
3087 if (!is_root) {
3088 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
3089 } else {
3090 ret = LTTNG_ERR_KERN_NA;
3091 }
3092 goto error;
3093 }
3094
3095 /* Deny register consumer if we already have a spawned consumer. */
3096 if (cmd_ctx->lsm->cmd_type == LTTNG_REGISTER_CONSUMER) {
3097 pthread_mutex_lock(&kconsumer_data.pid_mutex);
3098 if (kconsumer_data.pid > 0) {
3099 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
3100 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
3101 goto error;
3102 }
3103 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
3104 }
3105
3106 /*
3107 * Check for command that don't needs to allocate a returned payload. We do
3108 * this here so we don't have to make the call for no payload at each
3109 * command.
3110 */
3111 switch(cmd_ctx->lsm->cmd_type) {
3112 case LTTNG_LIST_SESSIONS:
3113 case LTTNG_LIST_TRACEPOINTS:
3114 case LTTNG_LIST_TRACEPOINT_FIELDS:
3115 case LTTNG_LIST_DOMAINS:
3116 case LTTNG_LIST_CHANNELS:
3117 case LTTNG_LIST_EVENTS:
3118 case LTTNG_LIST_SYSCALLS:
3119 case LTTNG_LIST_TRACKER_PIDS:
3120 case LTTNG_DATA_PENDING:
3121 case LTTNG_ROTATE_SESSION:
3122 case LTTNG_ROTATION_GET_INFO:
3123 case LTTNG_SESSION_LIST_ROTATION_SCHEDULES:
3124 break;
3125 default:
3126 /* Setup lttng message with no payload */
3127 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, NULL, 0);
3128 if (ret < 0) {
3129 /* This label does not try to unlock the session */
3130 goto init_setup_error;
3131 }
3132 }
3133
3134 /* Commands that DO NOT need a session. */
3135 switch (cmd_ctx->lsm->cmd_type) {
3136 case LTTNG_CREATE_SESSION:
3137 case LTTNG_CREATE_SESSION_SNAPSHOT:
3138 case LTTNG_CREATE_SESSION_LIVE:
3139 case LTTNG_LIST_SESSIONS:
3140 case LTTNG_LIST_TRACEPOINTS:
3141 case LTTNG_LIST_SYSCALLS:
3142 case LTTNG_LIST_TRACEPOINT_FIELDS:
3143 case LTTNG_SAVE_SESSION:
3144 case LTTNG_REGISTER_TRIGGER:
3145 case LTTNG_UNREGISTER_TRIGGER:
3146 need_tracing_session = 0;
3147 break;
3148 default:
3149 DBG("Getting session %s by name", cmd_ctx->lsm->session.name);
3150 /*
3151 * We keep the session list lock across _all_ commands
3152 * for now, because the per-session lock does not
3153 * handle teardown properly.
3154 */
3155 session_lock_list();
3156 cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
3157 if (cmd_ctx->session == NULL) {
3158 ret = LTTNG_ERR_SESS_NOT_FOUND;
3159 goto error;
3160 } else {
3161 /* Acquire lock for the session */
3162 session_lock(cmd_ctx->session);
3163 }
3164 break;
3165 }
3166
3167 /*
3168 * Commands that need a valid session but should NOT create one if none
3169 * exists. Instead of creating one and destroying it when the command is
3170 * handled, process that right before so we save some round trip in useless
3171 * code path.
3172 */
3173 switch (cmd_ctx->lsm->cmd_type) {
3174 case LTTNG_DISABLE_CHANNEL:
3175 case LTTNG_DISABLE_EVENT:
3176 switch (cmd_ctx->lsm->domain.type) {
3177 case LTTNG_DOMAIN_KERNEL:
3178 if (!cmd_ctx->session->kernel_session) {
3179 ret = LTTNG_ERR_NO_CHANNEL;
3180 goto error;
3181 }
3182 break;
3183 case LTTNG_DOMAIN_JUL:
3184 case LTTNG_DOMAIN_LOG4J:
3185 case LTTNG_DOMAIN_PYTHON:
3186 case LTTNG_DOMAIN_UST:
3187 if (!cmd_ctx->session->ust_session) {
3188 ret = LTTNG_ERR_NO_CHANNEL;
3189 goto error;
3190 }
3191 break;
3192 default:
3193 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
3194 goto error;
3195 }
3196 default:
3197 break;
3198 }
3199
3200 if (!need_domain) {
3201 goto skip_domain;
3202 }
3203
3204 /*
3205 * Check domain type for specific "pre-action".
3206 */
3207 switch (cmd_ctx->lsm->domain.type) {
3208 case LTTNG_DOMAIN_KERNEL:
3209 if (!is_root) {
3210 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
3211 goto error;
3212 }
3213
3214 /* Kernel tracer check */
3215 if (kernel_tracer_fd == -1) {
3216 /* Basically, load kernel tracer modules */
3217 ret = init_kernel_tracer();
3218 if (ret != 0) {
3219 goto error;
3220 }
3221 }
3222
3223 /* Consumer is in an ERROR state. Report back to client */
3224 if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) {
3225 ret = LTTNG_ERR_NO_KERNCONSUMERD;
3226 goto error;
3227 }
3228
3229 /* Need a session for kernel command */
3230 if (need_tracing_session) {
3231 if (cmd_ctx->session->kernel_session == NULL) {
3232 ret = create_kernel_session(cmd_ctx->session);
3233 if (ret < 0) {
3234 ret = LTTNG_ERR_KERN_SESS_FAIL;
3235 goto error;