X-Git-Url: http://git.efficios.com/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=c878cda957f87f76dbea8e514f9eaa96697db14f;hp=21bc0819db6e498dc2a355c41f23a49c944b9639;hb=2987f3288fbdc2d42e79a159ce4a9b8153f86f2d;hpb=772d6f1ee349ae87f9d6ecd22ac463b654b3f352 diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 21bc0819d..c878cda95 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -90,6 +90,7 @@ static pid_t ppid; /* Parent PID for --sig-parent option */ static pid_t child_ppid; /* Internal parent PID use with daemonize. */ static char *rundir; static int lockfile_fd = -1; +static int opt_print_version; /* Set to 1 when a SIGUSR1 signal is received. */ static int recv_child_signal; @@ -309,12 +310,38 @@ struct load_session_thread_data *load_info; struct lttng_ht *agent_apps_ht_by_sock = NULL; /* - * Whether sessiond is ready for commands/health check requests. - * NR_LTTNG_SESSIOND_READY must match the number of calls to - * sessiond_notify_ready(). + * The initialization of the session daemon is done in multiple phases. + * + * While all threads are launched near-simultaneously, only some of them + * are needed to ensure the session daemon can start to respond to client + * requests. + * + * There are two important guarantees that we wish to offer with respect + * to the initialisation of the session daemon: + * - When the daemonize/background launcher process exits, the sessiond + * is fully able to respond to client requests, + * - Auto-loaded sessions are visible to clients. + * + * In order to achieve this, a number of support threads have to be launched + * to allow the "client" thread to function properly. Moreover, since the + * "load session" thread needs the client thread, we must provide a way + * for the "load session" thread to know that the "client" thread is up + * and running. + * + * Hence, the support threads decrement the lttng_sessiond_ready counter + * while the "client" threads waits for it to reach 0. Once the "client" thread + * unblocks, it posts the message_thread_ready semaphore which allows the + * "load session" thread to progress. + * + * This implies that the "load session" thread is the last to be initialized + * and will explicitly call sessiond_signal_parents(), which signals the parents + * that the session daemon is fully initialized. + * + * The two (2) support threads are: + * - agent_thread + * - health_thread */ -#define NR_LTTNG_SESSIOND_READY 3 -int lttng_sessiond_ready = NR_LTTNG_SESSIOND_READY; +int lttng_sessiond_ready = 2; int sessiond_check_thread_quit_pipe(int fd, uint32_t events) { @@ -323,28 +350,36 @@ int sessiond_check_thread_quit_pipe(int fd, uint32_t events) /* Notify parents that we are ready for cmd and health check */ LTTNG_HIDDEN -void sessiond_notify_ready(void) +void sessiond_signal_parents(void) { - if (uatomic_sub_return(<tng_sessiond_ready, 1) == 0) { - /* - * Notify parent pid that we are ready to accept command - * for client side. This ppid is the one from the - * external process that spawned us. - */ - if (opt_sig_parent) { - kill(ppid, SIGUSR1); - } + /* + * Notify parent pid that we are ready to accept command + * for client side. This ppid is the one from the + * external process that spawned us. + */ + if (opt_sig_parent) { + kill(ppid, SIGUSR1); + } - /* - * Notify the parent of the fork() process that we are - * ready. - */ - if (opt_daemon || opt_background) { - kill(child_ppid, SIGUSR1); - } + /* + * Notify the parent of the fork() process that we are + * ready. + */ + if (opt_daemon || opt_background) { + kill(child_ppid, SIGUSR1); } } +LTTNG_HIDDEN +void sessiond_notify_ready(void) +{ + /* + * The _return variant is used since the implied memory barriers are + * required. + */ + (void) uatomic_sub_return(<tng_sessiond_ready, 1); +} + static void setup_consumerd_path(void) { @@ -564,8 +599,7 @@ static void wait_consumer(struct consumer_data *consumer_data) ret = waitpid(consumer_data->pid, &status, 0); if (ret == -1) { PERROR("consumerd waitpid pid: %d", consumer_data->pid) - } - if (!WIFEXITED(status)) { + } else if (!WIFEXITED(status)) { ERR("consumerd termination with error: %d", WEXITSTATUS(ret)); } @@ -760,12 +794,6 @@ static void sessiond_cleanup_options(void) free(kmod_extra_probes_list); run_as_destroy_worker(); - - /* */ - DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm" - "Matthew, BEET driven development works!%c[%dm", - 27, 1, 31, 27, 0, 27, 1, 33, 27, 0); - /* */ } /* @@ -1861,6 +1889,8 @@ static void *thread_dispatch_ust_registration(void *data) .count = 0, }; + rcu_register_thread(); + health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH); if (testpoint(sessiond_thread_app_reg_dispatch)) { @@ -1873,12 +1903,16 @@ static void *thread_dispatch_ust_registration(void *data) DBG("[thread] Dispatch UST command started"); - while (!CMM_LOAD_SHARED(dispatch_thread_exit)) { + for (;;) { health_code_update(); /* Atomically prepare the queue futex */ futex_nto1_prepare(&ust_cmd_queue.futex); + if (CMM_LOAD_SHARED(dispatch_thread_exit)) { + break; + } + do { struct ust_app *app = NULL; ust_cmd = NULL; @@ -2094,6 +2128,7 @@ error_testpoint: ERR("Health error occurred in %s", __func__); } health_unregister(health_sessiond); + rcu_unregister_thread(); return NULL; } @@ -2201,10 +2236,12 @@ static void *thread_registration_apps(void *data) * lttcomm_setsockopt_snd_timeout expect msec as * parameter. */ - (void) lttcomm_setsockopt_rcv_timeout(sock, - app_socket_timeout * 1000); - (void) lttcomm_setsockopt_snd_timeout(sock, - app_socket_timeout * 1000); + if (app_socket_timeout >= 0) { + (void) lttcomm_setsockopt_rcv_timeout(sock, + app_socket_timeout * 1000); + (void) lttcomm_setsockopt_snd_timeout(sock, + app_socket_timeout * 1000); + } /* * Set the CLOEXEC flag. Return code is useless because @@ -2378,7 +2415,7 @@ static int spawn_consumer_thread(struct consumer_data *consumer_data) pthread_mutex_lock(&consumer_data->cond_mutex); /* Get time for sem_timedwait absolute timeout */ - clock_ret = clock_gettime(CLOCK_MONOTONIC, &timeout); + clock_ret = lttng_clock_gettime(CLOCK_MONOTONIC, &timeout); /* * Set the timeout for the condition timed wait even if the clock gettime * call fails since we might loop on that call and we want to avoid to @@ -2524,7 +2561,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) } else { DBG("Could not find any valid consumerd executable"); ret = -EINVAL; - break; + goto error; } DBG("Using kernel consumer at: %s", consumer_to_use); ret = execl(consumer_to_use, @@ -2621,7 +2658,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) break; } default: - PERROR("unknown consumer type"); + ERR("unknown consumer type"); exit(EXIT_FAILURE); } if (errno != 0) { @@ -2714,7 +2751,6 @@ static int init_kernel_tracer(void) kernel_tracer_fd = open(module_proc_lttng, O_RDWR); if (kernel_tracer_fd < 0) { DBG("Failed to open %s", module_proc_lttng); - ret = -1; goto error_open; } @@ -3062,7 +3098,6 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, case LTTNG_CREATE_SESSION: case LTTNG_CREATE_SESSION_SNAPSHOT: case LTTNG_CREATE_SESSION_LIVE: - case LTTNG_CALIBRATE: case LTTNG_LIST_SESSIONS: case LTTNG_LIST_TRACEPOINTS: case LTTNG_LIST_SYSCALLS: @@ -3904,12 +3939,6 @@ error_add_context: ret = LTTNG_OK; break; } - case LTTNG_CALIBRATE: - { - ret = cmd_calibrate(cmd_ctx->lsm->domain.type, - &cmd_ctx->lsm->u.calibrate); - break; - } case LTTNG_REGISTER_CONSUMER: { struct consumer_data *cdata; @@ -4336,7 +4365,7 @@ error: } lttng_poll_clean(&events); - + stop_threads(); rcu_unregister_thread(); return NULL; } @@ -4381,13 +4410,50 @@ static void *thread_manage_clients(void *data) goto error; } - sessiond_notify_ready(); ret = sem_post(&load_info->message_thread_ready); if (ret) { PERROR("sem_post message_thread_ready"); goto error; } + /* + * Wait until all support threads are initialized before accepting + * commands. + */ + while (uatomic_read(<tng_sessiond_ready) != 0) { + fd_set read_fds; + struct timeval timeout; + + FD_ZERO(&read_fds); + FD_SET(thread_quit_pipe[0], &read_fds); + memset(&timeout, 0, sizeof(timeout)); + timeout.tv_usec = 1000; + + /* + * If a support thread failed to launch, it may signal that + * we must exit and the sessiond would never be marked as + * "ready". + * + * The timeout is set to 1ms, which serves as a way to + * pace down this check. + */ + ret = select(thread_quit_pipe[0] + 1, &read_fds, NULL, NULL, + &timeout); + if (ret > 0 || (ret < 0 && errno != EINTR)) { + goto exit; + } + } + /* + * This barrier is paired with the one in sessiond_notify_ready() to + * ensure that loads accessing data initialized by the other threads, + * on which this thread was waiting, are not performed before this point. + * + * Note that this could be a 'read' memory barrier, but a full barrier + * is used in case the code changes. The performance implications of + * this choice are minimal since this is a slow path. + */ + cmm_smp_mb(); + /* This testpoint is after we signal readiness to the parent. */ if (testpoint(sessiond_thread_manage_clients)) { goto error; @@ -4695,8 +4761,7 @@ static int set_option(int opt, const char *arg, const char *optname) } exit(ret ? EXIT_FAILURE : EXIT_SUCCESS); } else if (string_match(optname, "version") || opt == 'V') { - fprintf(stdout, "%s\n", VERSION); - exit(EXIT_SUCCESS); + opt_print_version = 1; } else if (string_match(optname, "sig-parent") || opt == 'S') { opt_sig_parent = 1; } else if (string_match(optname, "kconsumerd-err-sock")) { @@ -5036,6 +5101,23 @@ end: return ret; } +static void sessiond_config_log(void) +{ + DBG("LTTng-sessiond " VERSION " - " VERSION_NAME "%s%s", + GIT_VERSION[0] == '\0' ? "" : " - " GIT_VERSION, + EXTRA_VERSION_NAME[0] == '\0' ? "" : " - " EXTRA_VERSION_NAME); + if (EXTRA_VERSION_DESCRIPTION[0] != '\0') { + DBG("LTTng-sessiond extra version description:\n\t" EXTRA_VERSION_DESCRIPTION "\n"); + } + if (EXTRA_VERSION_PATCHES[0] != '\0') { + DBG("LTTng-sessiond extra patches:\n\t" EXTRA_VERSION_PATCHES "\n"); + } +} + +static void print_version(void) { + fprintf(stdout, "%s\n", VERSION); +} + /* * daemon configuration loading and argument parsing */ @@ -5374,9 +5456,6 @@ error: static void sighandler(int sig) { switch (sig) { - case SIGPIPE: - DBG("SIGPIPE caught"); - return; case SIGINT: DBG("SIGINT caught"); stop_threads(); @@ -5408,9 +5487,10 @@ static int set_signal_handler(void) return ret; } - sa.sa_handler = sighandler; sa.sa_mask = sigset; sa.sa_flags = 0; + + sa.sa_handler = sighandler; if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) { PERROR("sigaction"); return ret; @@ -5421,12 +5501,13 @@ static int set_signal_handler(void) return ret; } - if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) { + if ((ret = sigaction(SIGUSR1, &sa, NULL)) < 0) { PERROR("sigaction"); return ret; } - if ((ret = sigaction(SIGUSR1, &sa, NULL)) < 0) { + sa.sa_handler = SIG_IGN; + if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) { PERROR("sigaction"); return ret; } @@ -5532,6 +5613,44 @@ error: return ret; } +static int set_clock_plugin_env(void) +{ + int ret = 0; + const char *original_env_value; + char *full_path = NULL; + char *new_env_value = NULL; + + original_env_value = getenv("LTTNG_UST_CLOCK_PLUGIN"); + if (!original_env_value) { + goto end; + } + + full_path = utils_expand_path(original_env_value); + if (!full_path) { + ERR("Failed to expand LTTNG_UST_CLOCK_PLUGIN path \"%s\"", + original_env_value); + ret = -1; + goto end; + } + ret = asprintf(&new_env_value, "LTTNG_UST_CLOCK_PLUGIN=%s", + full_path); + free(full_path); + if (ret < 0) { + PERROR("asprintf"); + goto end; + } + + DBG("Updating environment: %s", new_env_value); + ret = putenv(new_env_value); + if (ret) { + free(new_env_value); + PERROR("putenv of LTTNG_UST_CLOCK_PLUGIN"); + goto end; + } +end: + return ret; +} + /* * main */ @@ -5573,6 +5692,20 @@ int main(int argc, char **argv) goto exit_options; } + sessiond_config_log(); + + if (opt_print_version) { + print_version(); + retval = 0; + goto exit_options; + } + + ret = set_clock_plugin_env(); + if (ret) { + retval = -1; + goto exit_options; + } + /* Daemonize */ if (opt_daemon || opt_background) { int i; @@ -6159,6 +6292,12 @@ exit_client: exit_health: exit_init_data: + /* + * Wait for all pending call_rcu work to complete before tearing + * down data structures. call_rcu worker may be trying to + * perform lookups in those structures. + */ + rcu_barrier(); /* * sessiond_cleanup() is called when no other thread is running, except * the ht_cleanup thread, which is needed to destroy the hash tables.