X-Git-Url: http://git.efficios.com/?a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=27c32e4d8b6bc34d9ec64fc754cd20633be8a41a;hb=4852cdb954cd0d685bd4be7e69abfeb3e5388cc6;hp=10802aa7b2cc66d87d0946ee53623b05b47a1c0f;hpb=87c3f04b102fced6eaaffb2ede4ec14df999f238;p=lttng-tools.git diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 10802aa7b..27c32e4d8 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -77,6 +77,7 @@ #include "agent.h" #include "ht-cleanup.h" #include "sessiond-config.h" +#include "clear.h" static const char *help_msg = #ifdef LTTNG_EMBED_HELP @@ -286,13 +287,39 @@ struct notification_thread_handle *notification_thread_handle; struct lttng_ht *agent_apps_ht_by_sock = NULL; /* - * Whether sessiond is ready for commands/notification channel/health check + * The initialization of the session daemon is done in multiple phases. + * + * While all threads are launched near-simultaneously, only some of them + * are needed to ensure the session daemon can start to respond to client * requests. - * NR_LTTNG_SESSIOND_READY must match the number of calls to - * sessiond_notify_ready(). + * + * There are two important guarantees that we wish to offer with respect + * to the initialisation of the session daemon: + * - When the daemonize/background launcher process exits, the sessiond + * is fully able to respond to client requests, + * - Auto-loaded sessions are visible to clients. + * + * In order to achieve this, a number of support threads have to be launched + * to allow the "client" thread to function properly. Moreover, since the + * "load session" thread needs the client thread, we must provide a way + * for the "load session" thread to know that the "client" thread is up + * and running. + * + * Hence, the support threads decrement the lttng_sessiond_ready counter + * while the "client" threads waits for it to reach 0. Once the "client" thread + * unblocks, it posts the message_thread_ready semaphore which allows the + * "load session" thread to progress. + * + * This implies that the "load session" thread is the last to be initialized + * and will explicitly call sessiond_signal_parents(), which signals the parents + * that the session daemon is fully initialized. + * + * The three (3) support threads are: + * - agent_thread + * - notification_thread + * - health_thread */ -#define NR_LTTNG_SESSIOND_READY 4 -int lttng_sessiond_ready = NR_LTTNG_SESSIOND_READY; +int lttng_sessiond_ready = 3; int sessiond_check_thread_quit_pipe(int fd, uint32_t events) { @@ -301,28 +328,36 @@ int sessiond_check_thread_quit_pipe(int fd, uint32_t events) /* Notify parents that we are ready for cmd and health check */ LTTNG_HIDDEN -void sessiond_notify_ready(void) +void sessiond_signal_parents(void) { - if (uatomic_sub_return(<tng_sessiond_ready, 1) == 0) { - /* - * Notify parent pid that we are ready to accept command - * for client side. This ppid is the one from the - * external process that spawned us. - */ - if (config.sig_parent) { - kill(ppid, SIGUSR1); - } + /* + * Notify parent pid that we are ready to accept command + * for client side. This ppid is the one from the + * external process that spawned us. + */ + if (config.sig_parent) { + kill(ppid, SIGUSR1); + } - /* - * Notify the parent of the fork() process that we are - * ready. - */ - if (config.daemonize || config.background) { - kill(child_ppid, SIGUSR1); - } + /* + * Notify the parent of the fork() process that we are + * ready. + */ + if (config.daemonize || config.background) { + kill(child_ppid, SIGUSR1); } } +LTTNG_HIDDEN +void sessiond_notify_ready(void) +{ + /* + * The _return variant is used since the implied memory barriers are + * required. + */ + (void) uatomic_sub_return(<tng_sessiond_ready, 1); +} + static int __sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size, int *a_pipe) @@ -513,15 +548,9 @@ static void sessiond_cleanup(void) */ utils_close_pipe(thread_quit_pipe); - /* - * If config.pid_file_path.value is undefined, the default file will be - * wiped when removing the rundir. - */ - if (config.pid_file_path.value) { - ret = remove(config.pid_file_path.value); - if (ret < 0) { - PERROR("remove pidfile %s", config.pid_file_path.value); - } + ret = remove(config.pid_file_path.value); + if (ret < 0) { + PERROR("remove pidfile %s", config.pid_file_path.value); } DBG("Removing sessiond and consumerd content of directory %s", @@ -599,21 +628,6 @@ static void sessiond_cleanup(void) free(load_info); } - /* - * Cleanup lock file by deleting it and finaly closing it which will - * release the file system lock. - */ - if (lockfile_fd >= 0) { - ret = remove(config.lock_file_path.value); - if (ret < 0) { - PERROR("remove lock file"); - } - ret = close(lockfile_fd); - if (ret < 0) { - PERROR("close lock file"); - } - } - /* * We do NOT rmdir rundir because there are other processes * using it, for instance lttng-relayd, which can start in @@ -1270,6 +1284,7 @@ restart: if (!cmd_socket_wrapper) { goto error; } + cmd_socket_wrapper->lock = &consumer_data->lock; ret = consumer_send_channel_monitor_pipe(cmd_socket_wrapper, consumer_data->channel_monitor_pipe); @@ -2413,16 +2428,17 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) * fallback on the 32-bit one, */ DBG3("Looking for a kernel consumer at these locations:"); - DBG3(" 1) %s", config.consumerd64_bin_path.value); + DBG3(" 1) %s", config.consumerd64_bin_path.value ? : "NULL"); DBG3(" 2) %s/%s", INSTALL_BIN_PATH, DEFAULT_CONSUMERD_FILE); - DBG3(" 3) %s", config.consumerd32_bin_path.value); + DBG3(" 3) %s", config.consumerd32_bin_path.value ? : "NULL"); if (stat(config.consumerd64_bin_path.value, &st) == 0) { DBG3("Found location #1"); consumer_to_use = config.consumerd64_bin_path.value; } else if (stat(INSTALL_BIN_PATH "/" DEFAULT_CONSUMERD_FILE, &st) == 0) { DBG3("Found location #2"); consumer_to_use = INSTALL_BIN_PATH "/" DEFAULT_CONSUMERD_FILE; - } else if (stat(config.consumerd32_bin_path.value, &st) == 0) { + } else if (config.consumerd32_bin_path.value && + stat(config.consumerd32_bin_path.value, &st) == 0) { DBG3("Found location #3"); consumer_to_use = config.consumerd32_bin_path.value; } else { @@ -2442,7 +2458,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) { char *tmpnew = NULL; - if (config.consumerd64_lib_dir.value[0] != '\0') { + if (config.consumerd64_lib_dir.value) { char *tmp; size_t tmplen; @@ -2450,20 +2466,18 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) if (!tmp) { tmp = ""; } - tmplen = strlen("LD_LIBRARY_PATH=") - + strlen(config.consumerd64_lib_dir.value) + 1 /* : */ + strlen(tmp); + tmplen = strlen(config.consumerd64_lib_dir.value) + 1 /* : */ + strlen(tmp); tmpnew = zmalloc(tmplen + 1 /* \0 */); if (!tmpnew) { ret = -ENOMEM; goto error; } - strcpy(tmpnew, "LD_LIBRARY_PATH="); strcat(tmpnew, config.consumerd64_lib_dir.value); if (tmp[0] != '\0') { strcat(tmpnew, ":"); strcat(tmpnew, tmp); } - ret = putenv(tmpnew); + ret = setenv("LD_LIBRARY_PATH", tmpnew, 1); if (ret) { ret = -errno; free(tmpnew); @@ -2476,16 +2490,14 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) "--consumerd-err-sock", consumer_data->err_unix_sock_path, "--group", config.tracing_group_name.value, NULL); - if (config.consumerd64_lib_dir.value[0] != '\0') { - free(tmpnew); - } + free(tmpnew); break; } case LTTNG_CONSUMER32_UST: { char *tmpnew = NULL; - if (config.consumerd32_lib_dir.value[0] != '\0') { + if (config.consumerd32_lib_dir.value) { char *tmp; size_t tmplen; @@ -2493,20 +2505,18 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) if (!tmp) { tmp = ""; } - tmplen = strlen("LD_LIBRARY_PATH=") - + strlen(config.consumerd32_lib_dir.value) + 1 /* : */ + strlen(tmp); + tmplen = strlen(config.consumerd32_lib_dir.value) + 1 /* : */ + strlen(tmp); tmpnew = zmalloc(tmplen + 1 /* \0 */); if (!tmpnew) { ret = -ENOMEM; goto error; } - strcpy(tmpnew, "LD_LIBRARY_PATH="); strcat(tmpnew, config.consumerd32_lib_dir.value); if (tmp[0] != '\0') { strcat(tmpnew, ":"); strcat(tmpnew, tmp); } - ret = putenv(tmpnew); + ret = setenv("LD_LIBRARY_PATH", tmpnew, 1); if (ret) { ret = -errno; free(tmpnew); @@ -2519,13 +2529,11 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) "--consumerd-err-sock", consumer_data->err_unix_sock_path, "--group", config.tracing_group_name.value, NULL); - if (config.consumerd32_lib_dir.value[0] != '\0') { - free(tmpnew); - } + free(tmpnew); break; } default: - PERROR("unknown consumer type"); + ERR("unknown consumer type"); exit(EXIT_FAILURE); } if (errno != 0) { @@ -2922,6 +2930,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, case LTTNG_REGENERATE_STATEDUMP: case LTTNG_REGISTER_TRIGGER: case LTTNG_UNREGISTER_TRIGGER: + case LTTNG_CLEAR_SESSION: need_domain = 0; break; default: @@ -4052,6 +4061,11 @@ error_add_context: notification_thread_handle); break; } + case LTTNG_CLEAR_SESSION: + { + ret = cmd_clear_session(cmd_ctx->session); + break; + } default: ret = LTTNG_ERR_UND; break; @@ -4308,13 +4322,50 @@ static void *thread_manage_clients(void *data) goto error; } - sessiond_notify_ready(); ret = sem_post(&load_info->message_thread_ready); if (ret) { PERROR("sem_post message_thread_ready"); goto error; } + /* + * Wait until all support threads are initialized before accepting + * commands. + */ + while (uatomic_read(<tng_sessiond_ready) != 0) { + fd_set read_fds; + struct timeval timeout; + + FD_ZERO(&read_fds); + FD_SET(thread_quit_pipe[0], &read_fds); + memset(&timeout, 0, sizeof(timeout)); + timeout.tv_usec = 1000; + + /* + * If a support thread failed to launch, it may signal that + * we must exit and the sessiond would never be marked as + * "ready". + * + * The timeout is set to 1ms, which serves as a way to + * pace down this check. + */ + ret = select(thread_quit_pipe[0] + 1, &read_fds, NULL, NULL, + &timeout); + if (ret > 0 || (ret < 0 && errno != EINTR)) { + goto exit; + } + } + /* + * This barrier is paired with the one in sessiond_notify_ready() to + * ensure that loads accessing data initialized by the other threads, + * on which this thread was waiting, are not performed before this point. + * + * Note that this could be a 'read' memory barrier, but a full barrier + * is used in case the code changes. The performance implications of + * this choice are minimal since this is a slow path. + */ + cmm_smp_mb(); + /* This testpoint is after we signal readiness to the parent. */ if (testpoint(sessiond_thread_manage_clients)) { goto error; @@ -4727,7 +4778,7 @@ static int set_option(int opt, const char *arg, const char *optname) } else if (string_match(optname, "no-kernel")) { config.no_kernel = true; } else if (string_match(optname, "quiet") || opt == 'q') { - lttng_opt_quiet = true; + config.quiet = true; } else if (string_match(optname, "verbose") || opt == 'v') { /* Verbose level can increase using multiple -v */ if (arg) { @@ -4846,8 +4897,8 @@ static int set_option(int opt, const char *arg, const char *optname) ERR("Port overflow in --agent-tcp-port parameter: %s", arg); return -1; } - config.agent_tcp_port = (uint32_t) v; - DBG3("Agent TCP port set to non default: %u", config.agent_tcp_port); + config.agent_tcp_port.begin = config.agent_tcp_port.end = (int) v; + DBG3("Agent TCP port set to non default: %i", (int) v); } } else if (string_match(optname, "load") || opt == 'l') { if (!arg || *arg == '\0') { @@ -5130,18 +5181,57 @@ end: return ret; } +/* + * Create lockfile using the rundir and return its fd. + */ +static int create_lockfile(void) +{ + return utils_create_lock_file(config.lock_file_path.value); +} + /* * Check if the global socket is available, and if a daemon is answering at the * other side. If yes, error is returned. + * + * Also attempts to create and hold the lock file. */ static int check_existing_daemon(void) { + int ret = 0; + /* Is there anybody out there ? */ if (lttng_session_daemon_alive()) { - return -EEXIST; + ret = -EEXIST; + goto end; } - return 0; + lockfile_fd = create_lockfile(); + if (lockfile_fd < 0) { + ret = -EEXIST; + goto end; + } +end: + return ret; +} + +static void sessiond_cleanup_lock_file(void) +{ + int ret; + + /* + * Cleanup lock file by deleting it and finaly closing it which will + * release the file system lock. + */ + if (lockfile_fd >= 0) { + ret = remove(config.lock_file_path.value); + if (ret < 0) { + PERROR("remove lock file"); + } + ret = close(lockfile_fd); + if (ret < 0) { + PERROR("close lock file"); + } + } } /* @@ -5400,23 +5490,6 @@ static int write_pidfile(void) return utils_create_pid_file(getpid(), config.pid_file_path.value); } -/* - * Create lockfile using the rundir and return its fd. - */ -static int create_lockfile(void) -{ - return utils_create_lock_file(config.lock_file_path.value); -} - -/* - * Write agent TCP port using the rundir. - */ -static int write_agent_port(void) -{ - return utils_create_pid_file(config.agent_tcp_port, - config.agent_port_file_path.value); -} - static int set_clock_plugin_env(void) { int ret = 0; @@ -5481,6 +5554,12 @@ int main(int argc, char **argv) goto exit_set_signal_handler; } + /* + * Init config from environment variables. + * Command line option override env configuration per-doc. Do env first. + */ + sessiond_config_apply_env_config(&config); + /* * Parse arguments and load the daemon configuration file. * @@ -5495,9 +5574,6 @@ int main(int argc, char **argv) goto exit_options; } - /* Init config from environment variables. */ - sessiond_config_apply_env_config(&config); - /* * Resolve all paths received as arguments, configuration option, or * through environment variable as absolute paths. This is necessary @@ -5528,6 +5604,18 @@ int main(int argc, char **argv) sessiond_config_log(&config); + if (create_lttng_rundir()) { + retval = -1; + goto exit_options; + } + + /* Abort launch if a session daemon is already running. */ + if (check_existing_daemon()) { + ERR("A session daemon is already running."); + retval = -1; + goto exit_options; + } + /* Daemonize */ if (config.daemonize || config.background) { int i; @@ -5542,9 +5630,12 @@ int main(int argc, char **argv) /* * We are in the child. Make sure all other file descriptors are * closed, in case we are called with more opened file - * descriptors than the standard ones. + * descriptors than the standard ones and the lock file. */ for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) { + if (i == lockfile_fd) { + continue; + } (void) close(i); } } @@ -5583,13 +5674,8 @@ int main(int argc, char **argv) /* Check if daemon is UID = 0 */ is_root = !getuid(); - if (is_root) { /* Create global run dir with root access */ - if (create_lttng_rundir()) { - retval = -1; - goto exit_init_data; - } kernel_channel_monitor_pipe = lttng_pipe_open(0); if (!kernel_channel_monitor_pipe) { @@ -5606,12 +5692,6 @@ int main(int argc, char **argv) } } - lockfile_fd = create_lockfile(); - if (lockfile_fd < 0) { - retval = -1; - goto exit_init_data; - } - /* Set consumer initial state */ kernel_consumerd_state = CONSUMER_STOPPED; ust_consumerd_state = CONSUMER_STOPPED; @@ -5642,19 +5722,6 @@ int main(int argc, char **argv) goto exit_init_data; } - /* - * See if daemon already exist. - */ - if (check_existing_daemon()) { - ERR("Already running daemon.\n"); - /* - * We do not goto exit because we must not cleanup() - * because a daemon is already running. - */ - retval = -1; - goto exit_init_data; - } - /* * Init UST app hash table. Alloc hash table before this point since * cleanup() can get called after that point. @@ -5782,12 +5849,6 @@ int main(int argc, char **argv) retval = -1; goto exit_init_data; } - ret = write_agent_port(); - if (ret) { - ERR("Error in write_agent_port"); - retval = -1; - goto exit_init_data; - } /* Initialize communication library */ lttcomm_init(); @@ -6066,6 +6127,7 @@ exit_health_sessiond_cleanup: exit_create_run_as_worker_cleanup: exit_options: + sessiond_cleanup_lock_file(); sessiond_cleanup_options(); exit_set_signal_handler: