Fix: leak of sessiond configuration on launch of run-as worker
[lttng-tools.git] / src / bin / lttng-consumerd / lttng-consumerd.c
index 8ddd5a372005490bb37f1ef8c01bc8045dabdbde..ddd07a14279eb3e88bc93b5497e8989a08cb2b3a 100644 (file)
@@ -16,7 +16,7 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#define _GNU_SOURCE
+#define _LGPL_SOURCE
 #include <fcntl.h>
 #include <getopt.h>
 #include <grp.h>
 #include <unistd.h>
 #include <sys/mman.h>
 #include <assert.h>
-#include <config.h>
 #include <urcu/compiler.h>
 #include <ulimit.h>
 
 #include <common/defaults.h>
 #include <common/common.h>
-#include <common/consumer.h>
-#include <common/consumer-timer.h>
+#include <common/consumer/consumer.h>
+#include <common/consumer/consumer-timer.h>
 #include <common/compat/poll.h>
+#include <common/compat/getenv.h>
 #include <common/sessiond-comm/sessiond-comm.h>
+#include <common/utils.h>
 
 #include "lttng-consumerd.h"
-
-/* TODO : support UST (all direct kernel-ctl accesses). */
+#include "health-consumerd.h"
 
 /* threads (channel handling, poll, metadata, sessiond) */
 
-static pthread_t channel_thread, data_thread, metadata_thread, sessiond_thread;
-static pthread_t metadata_timer_thread;
+static pthread_t channel_thread, data_thread, metadata_thread,
+               sessiond_thread, metadata_timer_thread, health_thread;
+static bool metadata_timer_thread_online;
 
 /* to count the number of times the user pressed ctrl+c */
 static int sigintcount = 0;
@@ -63,6 +64,8 @@ static int sigintcount = 0;
 /* Argument variables */
 int lttng_opt_quiet;    /* not static in error.h */
 int lttng_opt_verbose;  /* not static in error.h */
+int lttng_opt_mi;       /* not static in error.h */
+
 static int opt_daemon;
 static const char *progname;
 static char command_sock_path[PATH_MAX]; /* Global command socket path */
@@ -72,6 +75,21 @@ static enum lttng_consumer_type opt_type = LTTNG_CONSUMER_KERNEL;
 /* the liblttngconsumerd context */
 static struct lttng_consumer_local_data *ctx;
 
+/* Consumerd health monitoring */
+struct health_app *health_consumerd;
+
+const char *tracing_group_name = DEFAULT_TRACING_GROUP;
+
+int lttng_consumer_ready = NR_LTTNG_CONSUMER_READY;
+
+enum lttng_consumer_type lttng_consumer_get_type(void)
+{
+       if (!ctx) {
+               return LTTNG_CONSUMER_UNKNOWN;
+       }
+       return ctx->type;
+}
+
 /*
  * Signal handler for the daemon
  */
@@ -82,15 +100,9 @@ static void sighandler(int sig)
                return;
        }
 
-       /*
-        * Ignore SIGPIPE because it should not stop the consumer whenever a
-        * SIGPIPE is catched through a FD operation.
-        */
-       if (sig == SIGPIPE) {
-               return;
+       if (ctx) {
+               lttng_consumer_should_exit(ctx);
        }
-
-       lttng_consumer_should_exit(ctx);
 }
 
 /*
@@ -104,25 +116,27 @@ static int set_signal_handler(void)
        sigset_t sigset;
 
        if ((ret = sigemptyset(&sigset)) < 0) {
-               perror("sigemptyset");
+               PERROR("sigemptyset");
                return ret;
        }
 
-       sa.sa_handler = sighandler;
        sa.sa_mask = sigset;
        sa.sa_flags = 0;
+
+       sa.sa_handler = sighandler;
        if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) {
-               perror("sigaction");
+               PERROR("sigaction");
                return ret;
        }
 
        if ((ret = sigaction(SIGINT, &sa, NULL)) < 0) {
-               perror("sigaction");
+               PERROR("sigaction");
                return ret;
        }
 
+       sa.sa_handler = SIG_IGN;
        if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) {
-               perror("sigaction");
+               PERROR("sigaction");
                return ret;
        }
 
@@ -137,9 +151,9 @@ static void usage(FILE *fp)
        fprintf(fp, "Usage: %s OPTIONS\n\nOptions:\n", progname);
        fprintf(fp, "  -h, --help                         "
                        "Display this usage.\n");
-       fprintf(fp, "  -c, --consumerd-cmd-sock PATH     "
+       fprintf(fp, "  -c, --consumerd-cmd-sock PATH      "
                        "Specify path for the command socket\n");
-       fprintf(fp, "  -e, --consumerd-err-sock PATH     "
+       fprintf(fp, "  -e, --consumerd-err-sock PATH      "
                        "Specify path for the error socket\n");
        fprintf(fp, "  -d, --daemonize                    "
                        "Start as a daemon.\n");
@@ -149,6 +163,8 @@ static void usage(FILE *fp)
                        "Verbose mode. Activate DBG() macro.\n");
        fprintf(fp, "  -V, --version                      "
                        "Show version number.\n");
+       fprintf(fp, "  -g, --group NAME                   "
+                       "Specify the tracing group name. (default: tracing)\n");
        fprintf(fp, "  -k, --kernel                       "
                        "Consumer kernel buffers (default).\n");
        fprintf(fp, "  -u, --ust                          "
@@ -164,14 +180,15 @@ static void usage(FILE *fp)
 /*
  * daemon argument parsing
  */
-static void parse_args(int argc, char **argv)
+static int parse_args(int argc, char **argv)
 {
-       int c;
+       int c, ret = 0;
 
        static struct option long_options[] = {
                { "consumerd-cmd-sock", 1, 0, 'c' },
                { "consumerd-err-sock", 1, 0, 'e' },
                { "daemonize", 0, 0, 'd' },
+               { "group", 1, 0, 'g' },
                { "help", 0, 0, 'h' },
                { "quiet", 0, 0, 'q' },
                { "verbose", 0, 0, 'v' },
@@ -185,27 +202,49 @@ static void parse_args(int argc, char **argv)
 
        while (1) {
                int option_index = 0;
-               c = getopt_long(argc, argv, "dhqvVku" "c:e:", long_options, &option_index);
+               c = getopt_long(argc, argv, "dhqvVku" "c:e:g:",
+                               long_options, &option_index);
                if (c == -1) {
                        break;
                }
 
                switch (c) {
                case 0:
-                       fprintf(stderr, "option %s", long_options[option_index].name);
+                       fprintf(stderr, "option %s",
+                               long_options[option_index].name);
                        if (optarg) {
                                fprintf(stderr, " with arg %s\n", optarg);
+                               ret = -1;
+                               goto end;
                        }
                        break;
                case 'c':
-                       snprintf(command_sock_path, PATH_MAX, "%s", optarg);
+                       if (lttng_is_setuid_setgid()) {
+                               WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
+                                       "-c, --consumerd-cmd-sock");
+                       } else {
+                               snprintf(command_sock_path, PATH_MAX, "%s", optarg);
+                       }
                        break;
                case 'e':
-                       snprintf(error_sock_path, PATH_MAX, "%s", optarg);
+                       if (lttng_is_setuid_setgid()) {
+                               WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
+                                       "-e, --consumerd-err-sock");
+                       } else {
+                               snprintf(error_sock_path, PATH_MAX, "%s", optarg);
+                       }
                        break;
                case 'd':
                        opt_daemon = 1;
                        break;
+               case 'g':
+                       if (lttng_is_setuid_setgid()) {
+                               WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
+                                       "-g, --group");
+                       } else {
+                               tracing_group_name = optarg;
+                       }
+                       break;
                case 'h':
                        usage(stdout);
                        exit(EXIT_SUCCESS);
@@ -213,7 +252,7 @@ static void parse_args(int argc, char **argv)
                        lttng_opt_quiet = 1;
                        break;
                case 'v':
-                       lttng_opt_verbose = 1;
+                       lttng_opt_verbose = 3;
                        break;
                case 'V':
                        fprintf(stdout, "%s\n", VERSION);
@@ -234,9 +273,12 @@ static void parse_args(int argc, char **argv)
 #endif
                default:
                        usage(stderr);
-                       exit(EXIT_FAILURE);
+                       ret = -1;
+                       goto end;
                }
        }
+end:
+       return ret;
 }
 
 /*
@@ -263,12 +305,23 @@ static void set_ulimit(void)
  */
 int main(int argc, char **argv)
 {
-       int ret = 0;
+       int ret = 0, retval = 0;
        void *status;
+       struct lttng_consumer_local_data *tmp_ctx;
+
+       rcu_register_thread();
+
+       if (set_signal_handler()) {
+               retval = -1;
+               goto exit_set_signal_handler;
+       }
 
        /* Parse arguments */
        progname = argv[0];
-       parse_args(argc, argv);
+       if (parse_args(argc, argv)) {
+               retval = -1;
+               goto exit_options;
+       }
 
        /* Daemonize */
        if (opt_daemon) {
@@ -282,7 +335,8 @@ int main(int argc, char **argv)
                ret = daemon(0, 0);
                if (ret < 0) {
                        PERROR("daemon");
-                       goto error;
+                       retval = -1;
+                       goto exit_options;
                }
                /*
                 * We are in the child. Make sure all other file
@@ -294,188 +348,331 @@ int main(int argc, char **argv)
                }
        }
 
-       /* Set up max poll set size */
-       lttng_poll_set_max_size();
+       /*
+        * Starting from here, we can create threads. This needs to be after
+        * lttng_daemonize due to RCU.
+        */
+
+       health_consumerd = health_app_create(NR_HEALTH_CONSUMERD_TYPES);
+       if (!health_consumerd) {
+               retval = -1;
+               goto exit_health_consumerd_cleanup;
+       }
 
        if (*command_sock_path == '\0') {
                switch (opt_type) {
                case LTTNG_CONSUMER_KERNEL:
-                       snprintf(command_sock_path, PATH_MAX, DEFAULT_KCONSUMERD_CMD_SOCK_PATH,
+                       ret = snprintf(command_sock_path, PATH_MAX,
+                                       DEFAULT_KCONSUMERD_CMD_SOCK_PATH,
                                        DEFAULT_LTTNG_RUNDIR);
+                       if (ret < 0) {
+                               retval = -1;
+                               goto exit_init_data;
+                       }
                        break;
                case LTTNG_CONSUMER64_UST:
-                       snprintf(command_sock_path, PATH_MAX,
-                                       DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH, DEFAULT_LTTNG_RUNDIR);
+                       ret = snprintf(command_sock_path, PATH_MAX,
+                                       DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH,
+                                       DEFAULT_LTTNG_RUNDIR);
+                       if (ret < 0) {
+                               retval = -1;
+                               goto exit_init_data;
+                       }
                        break;
                case LTTNG_CONSUMER32_UST:
-                       snprintf(command_sock_path, PATH_MAX,
-                                       DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH, DEFAULT_LTTNG_RUNDIR);
+                       ret = snprintf(command_sock_path, PATH_MAX,
+                                       DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH,
+                                       DEFAULT_LTTNG_RUNDIR);
+                       if (ret < 0) {
+                               retval = -1;
+                               goto exit_init_data;
+                       }
                        break;
                default:
-                       WARN("Unknown consumerd type");
-                       goto error;
+                       ERR("Unknown consumerd type");
+                       retval = -1;
+                       goto exit_init_data;
                }
        }
 
        /* Init */
-       lttng_consumer_init();
+       if (lttng_consumer_init()) {
+               retval = -1;
+               goto exit_init_data;
+       }
+
+       /* Initialize communication library */
+       lttcomm_init();
+       /* Initialize TCP timeout values */
+       lttcomm_inet_init();
 
        if (!getuid()) {
                /* Set limit for open files */
                set_ulimit();
        }
 
+       if (run_as_create_worker(argv[0], NULL, NULL) < 0) {
+               goto exit_init_data;
+       }
+
        /* create the consumer instance with and assign the callbacks */
        ctx = lttng_consumer_create(opt_type, lttng_consumer_read_subbuffer,
                NULL, lttng_consumer_on_recv_stream, NULL);
-       if (ctx == NULL) {
-               goto error;
+       if (!ctx) {
+               retval = -1;
+               goto exit_init_data;
        }
 
        lttng_consumer_set_command_sock_path(ctx, command_sock_path);
        if (*error_sock_path == '\0') {
                switch (opt_type) {
                case LTTNG_CONSUMER_KERNEL:
-                       snprintf(error_sock_path, PATH_MAX, DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
+                       ret = snprintf(error_sock_path, PATH_MAX,
+                                       DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
                                        DEFAULT_LTTNG_RUNDIR);
+                       if (ret < 0) {
+                               retval = -1;
+                               goto exit_init_data;
+                       }
                        break;
                case LTTNG_CONSUMER64_UST:
-                       snprintf(error_sock_path, PATH_MAX,
-                                       DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH, DEFAULT_LTTNG_RUNDIR);
+                       ret = snprintf(error_sock_path, PATH_MAX,
+                                       DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH,
+                                       DEFAULT_LTTNG_RUNDIR);
+                       if (ret < 0) {
+                               retval = -1;
+                               goto exit_init_data;
+                       }
                        break;
                case LTTNG_CONSUMER32_UST:
-                       snprintf(error_sock_path, PATH_MAX,
-                                       DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH, DEFAULT_LTTNG_RUNDIR);
+                       ret = snprintf(error_sock_path, PATH_MAX,
+                                       DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH,
+                                       DEFAULT_LTTNG_RUNDIR);
+                       if (ret < 0) {
+                               retval = -1;
+                               goto exit_init_data;
+                       }
                        break;
                default:
-                       WARN("Unknown consumerd type");
-                       goto error;
+                       ERR("Unknown consumerd type");
+                       retval = -1;
+                       goto exit_init_data;
                }
        }
 
-       if (set_signal_handler() < 0) {
-               goto error;
-       }
-
        /* Connect to the socket created by lttng-sessiond to report errors */
        DBG("Connecting to error socket %s", error_sock_path);
        ret = lttcomm_connect_unix_sock(error_sock_path);
-       /* not a fatal error, but all communication with lttng-sessiond will fail */
+       /*
+        * Not a fatal error, but all communication with lttng-sessiond will
+        * fail.
+        */
        if (ret < 0) {
                WARN("Cannot connect to error socket (is lttng-sessiond started?)");
        }
        lttng_consumer_set_error_sock(ctx, ret);
 
        /*
-        * For UST consumer, we block RT signals used for periodical metadata flush
-        * in main and create a dedicated thread to handle these signals.
+        * Block RT signals used for UST periodical metadata flush and the live
+        * timer in main, and create a dedicated thread to handle these signals.
         */
-       switch (opt_type) {
-       case LTTNG_CONSUMER32_UST:
-       case LTTNG_CONSUMER64_UST:
-               consumer_signal_init();
-               break;
-       default:
-               break;
+       if (consumer_signal_init()) {
+               retval = -1;
+               goto exit_init_data;
        }
+
        ctx->type = opt_type;
 
+       if (utils_create_pipe(health_quit_pipe)) {
+               retval = -1;
+               goto exit_health_pipe;
+       }
+
+       /* Create thread to manage the client socket */
+       ret = pthread_create(&health_thread, default_pthread_attr(),
+                       thread_manage_health, (void *) NULL);
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_create health");
+               retval = -1;
+               goto exit_health_thread;
+       }
+
+       /*
+        * Wait for health thread to be initialized before letting the
+        * sessiond thread reply to the sessiond that we are ready.
+        */
+       while (uatomic_read(&lttng_consumer_ready)) {
+               usleep(100000);
+       }
+       cmm_smp_mb();   /* Read ready before following operations */
+
+       /*
+        * Create the thread to manage the UST metadata periodic timer and
+        * live timer.
+        */
+       ret = pthread_create(&metadata_timer_thread, NULL,
+                       consumer_timer_thread, (void *) ctx);
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_create");
+               retval = -1;
+               goto exit_metadata_timer_thread;
+       }
+       metadata_timer_thread_online = true;
+
        /* Create thread to manage channels */
-       ret = pthread_create(&channel_thread, NULL, consumer_thread_channel_poll,
+       ret = pthread_create(&channel_thread, default_pthread_attr(),
+                       consumer_thread_channel_poll,
                        (void *) ctx);
-       if (ret != 0) {
-               perror("pthread_create");
-               goto error;
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_create");
+               retval = -1;
+               goto exit_channel_thread;
        }
 
        /* Create thread to manage the polling/writing of trace metadata */
-       ret = pthread_create(&metadata_thread, NULL, consumer_thread_metadata_poll,
+       ret = pthread_create(&metadata_thread, default_pthread_attr(),
+                       consumer_thread_metadata_poll,
                        (void *) ctx);
-       if (ret != 0) {
-               perror("pthread_create");
-               goto metadata_error;
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_create");
+               retval = -1;
+               goto exit_metadata_thread;
        }
 
        /* Create thread to manage the polling/writing of trace data */
-       ret = pthread_create(&data_thread, NULL, consumer_thread_data_poll,
-                       (void *) ctx);
-       if (ret != 0) {
-               perror("pthread_create");
-               goto data_error;
+       ret = pthread_create(&data_thread, default_pthread_attr(),
+                       consumer_thread_data_poll, (void *) ctx);
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_create");
+               retval = -1;
+               goto exit_data_thread;
        }
 
-       /* Create the thread to manage the receive of fd */
-       ret = pthread_create(&sessiond_thread, NULL, consumer_thread_sessiond_poll,
+       /* Create the thread to manage the reception of fds */
+       ret = pthread_create(&sessiond_thread, default_pthread_attr(),
+                       consumer_thread_sessiond_poll,
                        (void *) ctx);
-       if (ret != 0) {
-               perror("pthread_create");
-               goto sessiond_error;
-       }
-
-       switch (opt_type) {
-       case LTTNG_CONSUMER32_UST:
-       case LTTNG_CONSUMER64_UST:
-               /* Create the thread to manage the metadata periodic timers */
-               ret = pthread_create(&metadata_timer_thread, NULL,
-                               consumer_timer_metadata_thread, (void *) ctx);
-               if (ret != 0) {
-                       perror("pthread_create");
-                       goto metadata_timer_error;
-               }
-
-               ret = pthread_detach(metadata_timer_thread);
-               if (ret) {
-                       errno = ret;
-                       perror("pthread_detach");
-               }
-               break;
-       default:
-               break;
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_create");
+               retval = -1;
+               goto exit_sessiond_thread;
        }
 
-metadata_timer_error:
+
+       /*
+        * This is where we start awaiting program completion (e.g. through
+        * signal that asks threads to teardown.
+        */
+
        ret = pthread_join(sessiond_thread, &status);
-       if (ret != 0) {
-               perror("pthread_join");
-               goto error;
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_join sessiond_thread");
+               retval = -1;
        }
+exit_sessiond_thread:
 
-sessiond_error:
        ret = pthread_join(data_thread, &status);
-       if (ret != 0) {
-               perror("pthread_join");
-               goto error;
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_join data_thread");
+               retval = -1;
        }
+exit_data_thread:
 
-data_error:
        ret = pthread_join(metadata_thread, &status);
-       if (ret != 0) {
-               perror("pthread_join");
-               goto error;
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_join metadata_thread");
+               retval = -1;
        }
+exit_metadata_thread:
 
-metadata_error:
        ret = pthread_join(channel_thread, &status);
-       if (ret != 0) {
-               perror("pthread_join");
-               goto error;
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_join channel_thread");
+               retval = -1;
        }
+exit_channel_thread:
 
-       if (!ret) {
-               ret = EXIT_SUCCESS;
-               lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_EXIT_SUCCESS);
-               goto end;
-       }
+exit_metadata_timer_thread:
 
-error:
-       ret = EXIT_FAILURE;
-       if (ctx) {
-               lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_EXIT_FAILURE);
+       ret = pthread_join(health_thread, &status);
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_join health_thread");
+               retval = -1;
        }
+exit_health_thread:
 
-end:
-       lttng_consumer_destroy(ctx);
+       utils_close_pipe(health_quit_pipe);
+exit_health_pipe:
+
+exit_init_data:
+       /*
+        * Wait for all pending call_rcu work to complete before tearing
+        * down data structures. call_rcu worker may be trying to
+        * perform lookups in those structures.
+        */
+       rcu_barrier();
        lttng_consumer_cleanup();
+       /*
+        * Tearing down the metadata timer thread in a
+        * non-fully-symmetric fashion compared to its creation in case
+        * lttng_consumer_cleanup() ends up tearing down timers (which
+        * requires the timer thread to be alive).
+        */
+       if (metadata_timer_thread_online) {
+               /*
+                * Ensure the metadata timer thread exits only after all other
+                * threads are gone, because it is required to perform timer
+                * teardown synchronization.
+                */
+               kill(getpid(), LTTNG_CONSUMER_SIG_EXIT);
+               ret = pthread_join(metadata_timer_thread, &status);
+               if (ret) {
+                       errno = ret;
+                       PERROR("pthread_join metadata_timer_thread");
+                       retval = -1;
+               }
+               ret = consumer_timer_thread_get_channel_monitor_pipe();
+               if (ret >= 0) {
+                       ret = close(ret);
+                       if (ret) {
+                               PERROR("close channel monitor pipe");
+                       }
+               }
+               metadata_timer_thread_online = false;
+       }
+       tmp_ctx = ctx;
+       ctx = NULL;
+       cmm_barrier();  /* Clear ctx for signal handler. */
+       lttng_consumer_destroy(tmp_ctx);
 
-       return ret;
+       if (health_consumerd) {
+               health_app_destroy(health_consumerd);
+       }
+       /* Ensure all prior call_rcu are done. */
+       rcu_barrier();
+
+       run_as_destroy_worker();
+
+exit_health_consumerd_cleanup:
+exit_options:
+exit_set_signal_handler:
+
+       rcu_unregister_thread();
+
+       if (!retval) {
+               exit(EXIT_SUCCESS);
+       } else {
+               exit(EXIT_FAILURE);
+       }
 }
This page took 0.031942 seconds and 5 git commands to generate.