Fix: perform a rotation to a null trace chunk on session destruction
authorJérémie Galarneau <jeremie.galarneau@efficios.com>
Fri, 9 Aug 2019 20:40:34 +0000 (16:40 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Fri, 9 Aug 2019 22:53:26 +0000 (18:53 -0400)
In order to guarantee the consumption of all data by the time a
session's destruction completes, a rotation to a "null" trace chunk is
performed as part of a session's destruction.

This ensures that a session is only reclaimed when all of its
streams (on both the consumers and the relay daemon) have been
destroyed.

The notion of a "quiet" rotation is introduced in order to re-use the
session rotation infrastructure, but without the rotation being
visible to external users through either the notification system or a
rename of the trace output folder.

Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
src/bin/lttng-sessiond/client.c
src/bin/lttng-sessiond/cmd.c
src/bin/lttng-sessiond/cmd.h
src/bin/lttng-sessiond/rotation-thread.c
src/bin/lttng-sessiond/session.c
src/bin/lttng-sessiond/session.h

index aea667cd116d2fea0132bffec7782b065fc71536..02eac96e422f8c8f3067df10c7c040c9eb1d02cd 100644 (file)
@@ -1835,7 +1835,8 @@ error_add_context:
                        goto error;
                }
 
-               ret = cmd_rotate_session(cmd_ctx->session, &rotate_return);
+               ret = cmd_rotate_session(cmd_ctx->session, &rotate_return,
+                               false);
                if (ret < 0) {
                        ret = -ret;
                        goto error;
index f6c44d3be16e63096e856874344dd39e11b4d348..30f40946df57b118ecf2158c754a80bb1b74dbb1 100644 (file)
@@ -3176,7 +3176,7 @@ int cmd_destroy_session(struct ltt_session *session,
                 * Perform a last rotation on destruction if rotations have
                 * occurred during the session's lifetime.
                 */
-               ret = cmd_rotate_session(session, NULL);
+               ret = cmd_rotate_session(session, NULL, false);
                if (ret != LTTNG_OK) {
                        ERR("Failed to perform an implicit rotation as part of the destruction of session \"%s\": %s",
                                        session->name, lttng_strerror(-ret));
@@ -3184,7 +3184,23 @@ int cmd_destroy_session(struct ltt_session *session,
                 if (reply_context) {
                        reply_context->implicit_rotation_on_destroy = true;
                 }
-        }
+        } else if (session->has_been_started && session->current_trace_chunk &&
+                               session_output_supports_trace_chunks(session)) {
+               /*
+                * The user has not triggered a session rotation. However, to
+                * ensure all data has been consumed, the session is rotated
+                * to a 'null' trace chunk before it is destroyed.
+                *
+                * This is a "quiet" rotation meaning that no notification is
+                * emitted and no renaming of the current trace chunk takes
+                * place.
+                */
+               ret = cmd_rotate_session(session, NULL, true);
+               if (ret != LTTNG_OK) {
+                       ERR("Failed to perform a quiet rotation as part of the destruction of session \"%s\": %s",
+                                       session->name, lttng_strerror(-ret));
+               }
+       }
 
        if (session->shm_path[0]) {
                /*
@@ -4713,7 +4729,8 @@ int cmd_set_session_shm_path(struct ltt_session *session,
  * Returns LTTNG_OK on success or else a negative LTTng error code.
  */
 int cmd_rotate_session(struct ltt_session *session,
-               struct lttng_rotate_session_return *rotate_return)
+               struct lttng_rotate_session_return *rotate_return,
+               bool quiet_rotation)
 {
        int ret;
        uint64_t ongoing_rotation_chunk_id;
@@ -4798,13 +4815,16 @@ int cmd_rotate_session(struct ltt_session *session,
        }
 
        ret = session_close_trace_chunk(session, chunk_being_archived,
-                       &((enum lttng_trace_chunk_command_type) {
-                                       LTTNG_TRACE_CHUNK_COMMAND_TYPE_MOVE_TO_COMPLETED}));
+                       quiet_rotation ?
+                                       NULL :
+                                       &((enum lttng_trace_chunk_command_type){
+                                                       LTTNG_TRACE_CHUNK_COMMAND_TYPE_MOVE_TO_COMPLETED}));
        if (ret) {
                cmd_ret = LTTNG_ERR_CLOSE_TRACE_CHUNK_FAIL_CONSUMER;
                goto error;
        }
 
+       session->quiet_rotation = quiet_rotation;
        ret = timer_session_rotation_pending_check_start(session,
                        DEFAULT_ROTATE_PENDING_TIMER);
        if (ret) {
@@ -4822,14 +4842,16 @@ int cmd_rotate_session(struct ltt_session *session,
 
        session->chunk_being_archived = chunk_being_archived;
        chunk_being_archived = NULL;
-       ret = notification_thread_command_session_rotation_ongoing(
-                       notification_thread_handle,
-                       session->name, session->uid, session->gid,
-                       ongoing_rotation_chunk_id);
-       if (ret != LTTNG_OK) {
-               ERR("Failed to notify notification thread that a session rotation is ongoing for session %s",
-                               session->name);
-               cmd_ret = ret;
+       if (!quiet_rotation) {
+               ret = notification_thread_command_session_rotation_ongoing(
+                               notification_thread_handle,
+                               session->name, session->uid, session->gid,
+                               ongoing_rotation_chunk_id);
+               if (ret != LTTNG_OK) {
+                       ERR("Failed to notify notification thread that a session rotation is ongoing for session %s",
+                                       session->name);
+                       cmd_ret = ret;
+               }
        }
 
        DBG("Cmd rotate session %s, archive_id %" PRIu64 " sent",
index 21a12d51f6721566445e54d786059adb927e0f90..5ac24a5069cfc4b000307641aa124d39c070e2af 100644 (file)
@@ -133,7 +133,8 @@ int cmd_unregister_trigger(struct command_ctx *cmd_ctx, int sock,
                struct notification_thread_handle *notification_thread_handle);
 
 int cmd_rotate_session(struct ltt_session *session,
-               struct lttng_rotate_session_return *rotate_return);
+               struct lttng_rotate_session_return *rotate_return,
+               bool quiet_rotation);
 int cmd_rotate_get_info(struct ltt_session *session,
                struct lttng_rotation_get_info_return *info_return,
                uint64_t rotate_id);
index cb06413b64c371e1dd9dc7fc384872530092501e..af09ca0a5cb633bd04681ed382082b60689f7629 100644 (file)
@@ -494,7 +494,6 @@ int check_session_rotation_pending(struct ltt_session *session,
 
        check_session_rotation_pending_on_consumers(session,
                        &rotation_completed);
-
        if (!rotation_completed ||
                        session->rotation_state == LTTNG_ROTATION_STATE_ERROR) {
                goto end;
@@ -514,21 +513,23 @@ int check_session_rotation_pending(struct ltt_session *session,
        }
        session_reset_rotation_state(session, LTTNG_ROTATION_STATE_COMPLETED);
 
-       location = session_get_trace_archive_location(session);
-       /* Ownership of location is transferred. */
-       ret = notification_thread_command_session_rotation_completed(
-                       notification_thread_handle,
-                       session->name,
-                       session->uid,
-                       session->gid,
-                       session->last_archived_chunk_id.value,
-                       location);
-       if (ret != LTTNG_OK) {
-               ERR("[rotation-thread] Failed to notify notification thread of completed rotation for session %s",
-                               session->name);
+       if (!session->quiet_rotation) {
+               location = session_get_trace_archive_location(session);
+               /* Ownership of location is transferred. */
+               ret = notification_thread_command_session_rotation_completed(
+                               notification_thread_handle,
+                               session->name,
+                               session->uid,
+                               session->gid,
+                               session->last_archived_chunk_id.value,
+                               location);
+               if (ret != LTTNG_OK) {
+                       ERR("[rotation-thread] Failed to notify notification thread of completed rotation for session %s",
+                                       session->name);
+               }
        }
 
-       if (!session->active) {
+       if (!session->active && !session->quiet_rotation) {
                /*
                 * A stop command was issued during the rotation, it is
                 * up to the rotation completion check to perform the
@@ -594,7 +595,7 @@ int launch_session_rotation(struct ltt_session *session)
        DBG("[rotation-thread] Launching scheduled time-based rotation on session \"%s\"",
                        session->name);
 
-       ret = cmd_rotate_session(session, &rotation_return);
+       ret = cmd_rotate_session(session, &rotation_return, false);
        if (ret == LTTNG_OK) {
                DBG("[rotation-thread] Scheduled time-based rotation successfully launched on session \"%s\"",
                                session->name);
@@ -741,7 +742,7 @@ int handle_condition(const struct lttng_condition *condition,
                goto end_unlock;
        }
 
-       ret = cmd_rotate_session(session, NULL);
+       ret = cmd_rotate_session(session, NULL, false);
        if (ret == -LTTNG_ERR_ROTATION_PENDING) {
                DBG("Rotate already pending, subscribe to the next threshold value");
        } else if (ret != LTTNG_OK) {
index 09953b48218d6b592d4f8e1ad4d69ef0461ab3d7..6e187f2ff51df9c17fb162f0096e7e6a92d7a372 100644 (file)
@@ -39,6 +39,7 @@
 #include "utils.h"
 #include "trace-ust.h"
 #include "timer.h"
+#include "cmd.h"
 
 struct ltt_session_destroy_notifier_element {
        ltt_session_destroy_notifier notifier;
@@ -794,27 +795,13 @@ void session_release(struct urcu_ref *ref)
        struct ltt_ust_session *usess;
        struct ltt_kernel_session *ksess;
        struct ltt_session *session = container_of(ref, typeof(*session), ref);
+       const bool session_published = session->published;
 
        assert(!session->chunk_being_archived);
 
        usess = session->ust_session;
        ksess = session->kernel_session;
 
-       session_notify_destruction(session);
-       lttng_dynamic_array_reset(&session->destroy_notifiers);
-       if (session->current_trace_chunk) {
-               ret = session_close_trace_chunk(session, session->current_trace_chunk, NULL);
-               if (ret) {
-                       ERR("Failed to close the current trace chunk of session \"%s\" during its release",
-                                       session->name);
-               }
-               ret = _session_set_trace_chunk_no_lock_check(session, NULL, NULL);
-               if (ret) {
-                       ERR("Failed to release the current trace chunk of session \"%s\" during its release",
-                                       session->name);
-               }
-        }
-
         /* Clean kernel session teardown */
        kernel_destroy_session(ksess);
        session->kernel_session = NULL;
@@ -851,14 +838,22 @@ void session_release(struct urcu_ref *ref)
 
        pthread_mutex_destroy(&session->lock);
 
-       if (session->published) {
+       if (session_published) {
                ASSERT_LOCKED(ltt_session_list.lock);
                del_session_list(session);
                del_session_ht(session);
-               pthread_cond_broadcast(&ltt_session_list.removal_cond);
        }
+       session_notify_destruction(session);
+       lttng_dynamic_array_reset(&session->destroy_notifiers);
        free(session->last_archived_chunk_name);
        free(session);
+       if (session_published) {
+               /*
+                * Broadcast after free-ing to ensure the memory is
+                * reclaimed before the main thread exits.
+                */
+               pthread_cond_broadcast(&ltt_session_list.removal_cond);
+       }
 }
 
 /*
index d524df1cc225caa60c0089227c5d77856ece0525..6b7d63c3cba5084d9425f37732cc78277826d19c 100644 (file)
@@ -180,6 +180,7 @@ struct ltt_session {
        struct lttng_trace_chunk *chunk_being_archived;
        /* Current state of a rotation. */
        enum lttng_rotation_state rotation_state;
+       bool quiet_rotation;
        char *last_archived_chunk_name;
        LTTNG_OPTIONAL(uint64_t) last_archived_chunk_id;
        struct lttng_dynamic_array destroy_notifiers;
This page took 0.036088 seconds and 5 git commands to generate.