Add notification session rotation hooks
[lttng-tools.git] / src / bin / lttng-sessiond / cmd.c
index 6722341bb4b1be79f11a88b96c8989a3cf15e734..7f05655ecd7371a60c6fc3b5b9e1bd9eef768f84 100644 (file)
@@ -21,6 +21,7 @@
 #include <inttypes.h>
 #include <urcu/list.h>
 #include <urcu/uatomic.h>
+#include <sys/stat.h>
 
 #include <common/defaults.h>
 #include <common/common.h>
@@ -37,6 +38,7 @@
 #include <lttng/channel.h>
 #include <lttng/channel-internal.h>
 #include <lttng/rotate-internal.h>
+#include <lttng/location-internal.h>
 #include <common/string-utils/string-utils.h>
 
 #include "channel.h"
 
 #include "cmd.h"
 
+/* Sleep for 100ms between each check for the shm path's deletion. */
+#define SESSION_DESTROY_SHM_PATH_CHECK_DELAY_US 100000
+
+static enum lttng_error_code wait_on_path(void *path);
+
+/*
+ * Command completion handler that is used by the destroy command
+ * when a session that has a non-default shm_path is being destroyed.
+ *
+ * See comment in cmd_destroy_session() for the rationale.
+ */
+static struct destroy_completion_handler {
+       struct cmd_completion_handler handler;
+       char shm_path[member_sizeof(struct ltt_session, shm_path)];
+} destroy_completion_handler = {
+       .handler = {
+               .run = wait_on_path,
+               .data = destroy_completion_handler.shm_path
+       },
+       .shm_path = { 0 },
+};
+
+static struct cmd_completion_handler *current_completion_handler;
+
 /*
  * Used to keep a unique index for each relayd socket created where this value
  * is associated with streams on the consumer so it can match the right relayd
@@ -3030,6 +3056,59 @@ int cmd_destroy_session(struct ltt_session *session, int wpipe,
                PERROR("write kernel poll pipe");
        }
 
+       if (session->shm_path[0]) {
+               /*
+                * When a session is created with an explicit shm_path,
+                * the consumer daemon will create its shared memory files
+                * at that location and will *not* unlink them. This is normal
+                * as the intention of that feature is to make it possible
+                * to retrieve the content of those files should a crash occur.
+                *
+                * To ensure the content of those files can be used, the
+                * sessiond daemon will replicate the content of the metadata
+                * cache in a metadata file.
+                *
+                * On clean-up, it is expected that the consumer daemon will
+                * unlink the shared memory files and that the session daemon
+                * will unlink the metadata file. Then, the session's directory
+                * in the shm path can be removed.
+                *
+                * Unfortunately, a flaw in the design of the sessiond's and
+                * consumerd's tear down of channels makes it impossible to
+                * determine when the sessiond _and_ the consumerd have both
+                * destroyed their representation of a channel. For one, the
+                * unlinking, close, and rmdir happen in deferred 'call_rcu'
+                * callbacks in both daemons.
+                *
+                * However, it is also impossible for the sessiond to know when
+                * the consumer daemon is done destroying its channel(s) since
+                * it occurs as a reaction to the closing of the channel's file
+                * descriptor. There is no resulting communication initiated
+                * from the consumerd to the sessiond to confirm that the
+                * operation is completed (and was successful).
+                *
+                * Until this is all fixed, the session daemon checks for the
+                * removal of the session's shm path which makes it possible
+                * to safely advertise a session as having been destroyed.
+                *
+                * Prior to this fix, it was not possible to reliably save
+                * a session making use of the --shm-path option, destroy it,
+                * and load it again. This is because the creation of the
+                * session would fail upon seeing the session's shm path
+                * already in existence.
+                *
+                * Note that none of the error paths in the check for the
+                * directory's existence return an error. This is normal
+                * as there isn't much that can be done. The session will
+                * be destroyed properly, except that we can't offer the
+                * guarantee that the same session can be re-created.
+                */
+               current_completion_handler = &destroy_completion_handler.handler;
+               ret = lttng_strncpy(destroy_completion_handler.shm_path,
+                               session->shm_path,
+                               sizeof(destroy_completion_handler.shm_path));
+               assert(!ret);
+       }
        ret = session_destroy(session);
 
        return ret;
@@ -4482,6 +4561,14 @@ int cmd_rotate_session(struct ltt_session *session,
        session->current_archive_id++;
        session->rotate_pending = true;
        session->rotation_state = LTTNG_ROTATION_STATE_ONGOING;
+       ret = notification_thread_command_session_rotation_ongoing(
+                       notification_thread_handle,
+                       session->name, session->uid, session->gid,
+                       session->current_archive_id);
+       if (ret != LTTNG_OK) {
+               ERR("Failed to notify notification thread that a session rotation is ongoing for session %s",
+                               session->name);
+       }
 
        /*
         * Create the path name for the next chunk.
@@ -4594,13 +4681,29 @@ int cmd_rotate_session(struct ltt_session *session,
                 * session_list locks.
                 */
                if (!session->kernel_session && !ust_active) {
+                       struct lttng_trace_archive_location *location;
+
+                       session->rotate_pending = false;
+                       session->rotation_state = LTTNG_ROTATION_STATE_COMPLETED;
                        ret = rename_complete_chunk(session, now);
                        if (ret < 0) {
                                ERR("Failed to rename completed rotation chunk");
                                goto end;
                        }
-                       session->rotate_pending = false;
-                       session->rotation_state = LTTNG_ROTATION_STATE_COMPLETED;
+
+                       /* Ownership of location is transferred. */
+                       location = session_get_trace_archive_location(session);
+                       ret = notification_thread_command_session_rotation_completed(
+                                       notification_thread_handle,
+                                       session->name,
+                                       session->uid,
+                                       session->gid,
+                                       session->current_archive_id,
+                                       location);
+                       if (ret != LTTNG_OK) {
+                               ERR("Failed to notify notification thread that rotation is complete for session %s",
+                                               session->name);
+                       }
                }
        }
 
@@ -4721,15 +4824,19 @@ end:
  * Command LTTNG_ROTATION_SET_SCHEDULE from the lttng-ctl library.
  *
  * Configure the automatic rotation parameters.
- * Set to -1ULL to disable them.
+ * 'activate' to true means activate the rotation schedule type with 'new_value'.
+ * 'activate' to false means deactivate the rotation schedule and validate that
+ * 'new_value' has the same value as the currently active value.
  *
- * Return 0 on success or else an LTTNG_ERR code.
+ * Return 0 on success or else a positive LTTNG_ERR code.
  */
 int cmd_rotation_set_schedule(struct ltt_session *session,
-               uint64_t timer_us, uint64_t size,
+               bool activate, enum lttng_rotation_schedule_type schedule_type,
+               uint64_t new_value,
                struct notification_thread_handle *notification_thread_handle)
 {
        int ret;
+       uint64_t *parameter_value;
 
        assert(session);
 
@@ -4737,71 +4844,117 @@ int cmd_rotation_set_schedule(struct ltt_session *session,
 
        if (session->live_timer || session->snapshot_mode ||
                        !session->output_traces) {
+               DBG("Failing ROTATION_SET_SCHEDULE command as the rotation feature is not available for this session");
                ret = LTTNG_ERR_ROTATION_NOT_AVAILABLE;
                goto end;
        }
 
-       /* Trying to override an already active timer. */
-       if (timer_us && timer_us != -1ULL && session->rotate_timer_period) {
-               ret = LTTNG_ERR_ROTATION_TIMER_SET;
+       switch (schedule_type) {
+       case LTTNG_ROTATION_SCHEDULE_TYPE_SIZE_THRESHOLD:
+               parameter_value = &session->rotate_size;
+               break;
+       case LTTNG_ROTATION_SCHEDULE_TYPE_PERIODIC:
+               parameter_value = &session->rotate_timer_period;
+               if (new_value >= UINT_MAX) {
+                       DBG("Failing ROTATION_SET_SCHEDULE command as the value requested for a periodic rotation schedule is invalid: %" PRIu64 " > %u (UINT_MAX)",
+                                       new_value, UINT_MAX);
+                       ret = LTTNG_ERR_INVALID;
+                       goto end;
+               }
+               break;
+       default:
+               WARN("Failing ROTATION_SET_SCHEDULE command on unknown schedule type");
+               ret = LTTNG_ERR_INVALID;
                goto end;
-       /* Trying to disable an inactive timer. */
-       } else if (timer_us == -1ULL && !session->rotate_timer_period) {
-               ret = LTTNG_ERR_ROTATION_NO_TIMER_SET;
+       }
+
+       /* Improper use of the API. */
+       if (new_value == -1ULL) {
+               WARN("Failing ROTATION_SET_SCHEDULE command as the value requested is -1");
+               ret = LTTNG_ERR_INVALID;
                goto end;
        }
 
-       if (size && size != -1ULL && session->rotate_size) {
-               ret = LTTNG_ERR_ROTATION_SIZE_SET;
+       /*
+        * As indicated in struct ltt_session's comments, a value of == 0 means
+        * this schedule rotation type is not in use.
+        *
+        * Reject the command if we were asked to activate a schedule that was
+        * already active.
+        */
+       if (activate && *parameter_value != 0) {
+               DBG("Failing ROTATION_SET_SCHEDULE (activate) command as the schedule is already active");
+               ret = LTTNG_ERR_ROTATION_SCHEDULE_SET;
                goto end;
-       } else if (size == -1ULL && !session->rotate_size) {
-               ret = LTTNG_ERR_ROTATION_NO_SIZE_SET;
+       }
+
+       /*
+        * Reject the command if we were asked to deactivate a schedule that was
+        * not active.
+        */
+       if (!activate && *parameter_value == 0) {
+               DBG("Failing ROTATION_SET_SCHEDULE (deactivate) command as the schedule is already inactive");
+               ret = LTTNG_ERR_ROTATION_SCHEDULE_NOT_SET;
                goto end;
        }
 
-       if (timer_us && !session->rotate_timer_period) {
-               if (timer_us > UINT_MAX) {
-                       ret = LTTNG_ERR_INVALID;
-                       goto end;
-               }
+       /*
+        * Reject the command if we were asked to deactivate a schedule that
+        * doesn't exist.
+        */
+       if (!activate && *parameter_value != new_value) {
+               DBG("Failing ROTATION_SET_SCHEDULE (deactivate) command as an inexistant schedule was provided");
+               ret = LTTNG_ERR_ROTATION_SCHEDULE_NOT_SET;
+               goto end;
+       }
 
-               session->rotate_timer_period = timer_us;
-               /*
-                * Only start the timer if the session is active, otherwise
-                * it will be started when the session starts.
-                */
-               if (session->active) {
-                       ret = sessiond_rotate_timer_start(session, timer_us);
+       *parameter_value = activate ? new_value : 0;
+
+       switch (schedule_type) {
+       case LTTNG_ROTATION_SCHEDULE_TYPE_PERIODIC:
+               if (activate && session->active) {
+                       /*
+                        * Only start the timer if the session is active,
+                        * otherwise it will be started when the session starts.
+                        */
+                       ret = sessiond_rotate_timer_start(session, new_value);
                        if (ret) {
-                               ERR("Failed to enable rotate timer");
+                               ERR("Failed to enable session rotation timer in ROTATION_SET_SCHEDULE command");
+                               ret = LTTNG_ERR_UNK;
+                               goto end;
+                       }
+               } else {
+                       ret = sessiond_rotate_timer_stop(session);
+                       if (ret) {
+                               ERR("Failed to disable session rotation timer in ROTATION_SET_SCHEDULE command");
                                ret = LTTNG_ERR_UNK;
                                goto end;
                        }
                }
-       } else if (timer_us == -1ULL && session->rotate_timer_period > 0) {
-               sessiond_rotate_timer_stop(session);
-               session->rotate_timer_period = 0;
-       }
-
-       if (size > 0) {
-               if (size == -1ULL) {
-                       ret = unsubscribe_session_consumed_size_rotation(session,
-                                       notification_thread_handle);
+               break;
+       case LTTNG_ROTATION_SCHEDULE_TYPE_SIZE_THRESHOLD:
+               if (activate) {
+                       ret = subscribe_session_consumed_size_rotation(session,
+                                       new_value, notification_thread_handle);
                        if (ret) {
+                               ERR("Failed to enable consumed-size notification in ROTATION_SET_SCHEDULE command");
                                ret = LTTNG_ERR_UNK;
                                goto end;
                        }
-                       session->rotate_size = 0;
                } else {
-                       ret = subscribe_session_consumed_size_rotation(session,
-                                       size, notification_thread_handle);
+                       ret = unsubscribe_session_consumed_size_rotation(session,
+                                       notification_thread_handle);
                        if (ret) {
-                               PERROR("Subscribe to session usage");
+                               ERR("Failed to disable consumed-size notification in ROTATION_SET_SCHEDULE command");
                                ret = LTTNG_ERR_UNK;
                                goto end;
                        }
-                       session->rotate_size = size;
+
                }
+               break;
+       default:
+               /* Would have been caught before. */
+               abort();
        }
 
        ret = LTTNG_OK;
@@ -4864,6 +5017,49 @@ end:
        return ret;
 }
 
+/* Wait for a given path to be removed before continuing. */
+static enum lttng_error_code wait_on_path(void *path_data)
+{
+       const char *shm_path = path_data;
+
+       DBG("Waiting for the shm path at %s to be removed before completing session destruction",
+                       shm_path);
+       while (true) {
+               int ret;
+               struct stat st;
+
+               ret = stat(shm_path, &st);
+               if (ret) {
+                       if (errno != ENOENT) {
+                               PERROR("stat() returned an error while checking for the existence of the shm path");
+                       } else {
+                               DBG("shm path no longer exists, completing the destruction of session");
+                       }
+                       break;
+               } else {
+                       if (!S_ISDIR(st.st_mode)) {
+                               ERR("The type of shm path %s returned by stat() is not a directory; aborting the wait for shm path removal",
+                                               shm_path);
+                               break;
+                       }
+               }
+               usleep(SESSION_DESTROY_SHM_PATH_CHECK_DELAY_US);
+       }
+       return LTTNG_OK;
+}
+
+/*
+ * Returns a pointer to a handler to run on completion of a command.
+ * Returns NULL if no handler has to be run for the last command executed.
+ */
+const struct cmd_completion_handler *cmd_pop_completion_handler(void)
+{
+       struct cmd_completion_handler *handler = current_completion_handler;
+
+       current_completion_handler = NULL;
+       return handler;
+}
+
 /*
  * Init command subsystem.
  */
This page took 0.028858 seconds and 5 git commands to generate.