From 3156892b9633c1fbee3ace6cc2b013e678343083 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?= Date: Wed, 16 Oct 2019 18:53:47 -0400 Subject: [PATCH] Fix: sessiond: leak of trace chunk on destruction error MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit By design, a trace chunk can be leaked on the consumer daemon's end if the session daemon does not close it. This is because the consumer daemon has no "top-level" session object which could bound the lifetime of a trace chunk. It was reported that errors during a session destruction operation could result in a trace chunk leak being reported by the consumer daemon on shut down. In the case that was reported, the failure to launch an application caused the metadata channel to never be created. When the session was destroyed, the rotation of the metadata channel failed with a "channel does not exist" error. This error caused cmd_rotate_session() to abort before the trace chunk close command was sent to the consumer daemon(s). This ultimately results in the leak described earlier. The fix consists in performing the trace chunk close command on the consumer daemon even if the rotation itself fails. Signed-off-by: Jérémie Galarneau --- src/bin/lttng-sessiond/cmd.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/bin/lttng-sessiond/cmd.c b/src/bin/lttng-sessiond/cmd.c index a17cc678e..71c0789b3 100644 --- a/src/bin/lttng-sessiond/cmd.c +++ b/src/bin/lttng-sessiond/cmd.c @@ -4788,6 +4788,8 @@ int cmd_rotate_session(struct ltt_session *session, struct lttng_trace_chunk *chunk_being_archived = NULL; struct lttng_trace_chunk *new_trace_chunk = NULL; enum lttng_trace_chunk_status chunk_status; + bool failed_to_rotate = false; + enum lttng_error_code rotation_fail_code = LTTNG_OK; assert(session); @@ -4845,7 +4847,13 @@ int cmd_rotate_session(struct ltt_session *session, } } - /* The current trace chunk becomes the chunk being archived. */ + /* + * The current trace chunk becomes the chunk being archived. + * + * After this point, "chunk_being_archived" must absolutely + * be closed on the consumer(s), otherwise it will never be + * cleaned-up, which will result in a leak. + */ ret = session_set_trace_chunk(session, new_trace_chunk, &chunk_being_archived); if (ret) { @@ -4861,13 +4869,15 @@ int cmd_rotate_session(struct ltt_session *session, if (session->kernel_session) { cmd_ret = kernel_rotate_session(session); if (cmd_ret != LTTNG_OK) { - goto error; + failed_to_rotate = true; + rotation_fail_code = cmd_ret; } } if (session->ust_session) { cmd_ret = ust_app_rotate_session(session); if (cmd_ret != LTTNG_OK) { - goto error; + failed_to_rotate = true; + rotation_fail_code = cmd_ret; } } @@ -4882,6 +4892,11 @@ int cmd_rotate_session(struct ltt_session *session, goto error; } + if (failed_to_rotate) { + cmd_ret = rotation_fail_code; + goto error; + } + session->quiet_rotation = quiet_rotation; ret = timer_session_rotation_pending_check_start(session, DEFAULT_ROTATE_PENDING_TIMER); -- 2.34.1