From f2a444f17e07f805109c01ab4c7f53cc98b1adf3 Mon Sep 17 00:00:00 2001 From: David Goulet Date: Tue, 18 Jun 2013 16:07:02 -0400 Subject: [PATCH] Fix: channel and stream leak in consumerd Multiple error paths were possibly leaking channel and stream structure in the lttng-consumerd. This has been detected by the stress test killing the relayd randomly during a long period of time. Acked-by: Mathieu Desnoyers Signed-off-by: David Goulet --- src/bin/lttng-sessiond/ust-app.c | 12 ++-- src/common/consumer.c | 31 +++++++++++ src/common/ust-consumer/ust-consumer.c | 76 +++++++++++++++++--------- 3 files changed, 84 insertions(+), 35 deletions(-) diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c index 07c75031a..1736a3e85 100644 --- a/src/bin/lttng-sessiond/ust-app.c +++ b/src/bin/lttng-sessiond/ust-app.c @@ -2610,10 +2610,8 @@ static int create_ust_app_metadata(struct ust_app_session *ua_sess, ret = ust_consumer_ask_channel(ua_sess, metadata, consumer, socket, registry); if (ret < 0) { - /* - * Safe because the metadata obj pointer is not set so the delete below - * will not put a FD back again. - */ + /* Nullify the metadata key so we don't try to close it later on. */ + registry->metadata_key = 0; goto error_consumer; } @@ -2625,10 +2623,8 @@ static int create_ust_app_metadata(struct ust_app_session *ua_sess, */ ret = consumer_setup_metadata(socket, metadata->key); if (ret < 0) { - /* - * Safe because the metadata obj pointer is not set so the delete below - * will not put a FD back again. - */ + /* Nullify the metadata key so we don't try to close it later on. */ + registry->metadata_key = 0; goto error_consumer; } diff --git a/src/common/consumer.c b/src/common/consumer.c index 96bc33726..116441171 100644 --- a/src/common/consumer.c +++ b/src/common/consumer.c @@ -287,6 +287,7 @@ void consumer_del_channel(struct lttng_consumer_channel *channel) { int ret; struct lttng_ht_iter iter; + struct lttng_consumer_stream *stream, *stmp; DBG("Consumer delete channel key %" PRIu64, channel->key); @@ -297,6 +298,13 @@ void consumer_del_channel(struct lttng_consumer_channel *channel) break; case LTTNG_CONSUMER32_UST: case LTTNG_CONSUMER64_UST: + /* Delete streams that might have been left in the stream list. */ + cds_list_for_each_entry_safe(stream, stmp, &channel->streams.head, + send_node) { + cds_list_del(&stream->send_node); + lttng_ustconsumer_del_stream(stream); + free(stream); + } lttng_ustconsumer_del_channel(channel); break; default: @@ -2728,6 +2736,8 @@ restart: break; case CONSUMER_CHANNEL_DEL: { + struct lttng_consumer_stream *stream, *stmp; + rcu_read_lock(); chan = consumer_find_channel(key); if (!chan) { @@ -2741,6 +2751,26 @@ restart: assert(ret == 0); consumer_close_channel_streams(chan); + switch (consumer_data.type) { + case LTTNG_CONSUMER_KERNEL: + break; + case LTTNG_CONSUMER32_UST: + case LTTNG_CONSUMER64_UST: + /* Delete streams that might have been left in the stream list. */ + cds_list_for_each_entry_safe(stream, stmp, &chan->streams.head, + send_node) { + cds_list_del(&stream->send_node); + lttng_ustconsumer_del_stream(stream); + uatomic_sub(&stream->chan->refcount, 1); + assert(&chan->refcount); + free(stream); + } + break; + default: + ERR("Unknown consumer_data type"); + assert(0); + } + /* * Release our own refcount. Force channel deletion even if * streams were not initialized. @@ -2787,6 +2817,7 @@ restart: lttng_poll_del(&events, chan->wait_fd); ret = lttng_ht_del(channel_ht, &iter); assert(ret == 0); + assert(cds_list_empty(&chan->streams.head)); consumer_close_channel_streams(chan); /* Release our own refcount */ diff --git a/src/common/ust-consumer/ust-consumer.c b/src/common/ust-consumer/ust-consumer.c index 780a601f6..f783d4058 100644 --- a/src/common/ust-consumer/ust-consumer.c +++ b/src/common/ust-consumer/ust-consumer.c @@ -394,7 +394,7 @@ static int send_sessiond_channel(int sock, struct lttng_consumer_channel *channel, struct lttng_consumer_local_data *ctx, int *relayd_error) { - int ret; + int ret, ret_code = LTTNG_OK; struct lttng_consumer_stream *stream; assert(channel); @@ -403,18 +403,6 @@ static int send_sessiond_channel(int sock, DBG("UST consumer sending channel %s to sessiond", channel->name); - /* Send channel to sessiond. */ - ret = ustctl_send_channel_to_sessiond(sock, channel->uchan); - if (ret < 0) { - goto error; - } - - ret = ustctl_channel_close_wakeup_fd(channel->uchan); - if (ret < 0) { - goto error; - } - - /* The channel was sent successfully to the sessiond at this point. */ cds_list_for_each_entry(stream, &channel->streams.head, send_node) { /* Try to send the stream to the relayd if one is available. */ ret = send_stream_to_relayd(stream); @@ -426,9 +414,33 @@ static int send_sessiond_channel(int sock, if (relayd_error) { *relayd_error = 1; } - goto error; + ret_code = LTTNG_ERR_RELAYD_CONNECT_FAIL; } + } + /* Inform sessiond that we are about to send channel and streams. */ + ret = consumer_send_status_msg(sock, ret_code); + if (ret < 0 || ret_code != LTTNG_OK) { + /* + * Either the session daemon is not responding or the relayd died so we + * stop now. + */ + goto error; + } + + /* Send channel to sessiond. */ + ret = ustctl_send_channel_to_sessiond(sock, channel->uchan); + if (ret < 0) { + goto error; + } + + ret = ustctl_channel_close_wakeup_fd(channel->uchan); + if (ret < 0) { + goto error; + } + + /* The channel was sent successfully to the sessiond at this point. */ + cds_list_for_each_entry(stream, &channel->streams.head, send_node) { /* Send stream to session daemon. */ ret = send_sessiond_stream(sock, stream); if (ret < 0) { @@ -447,6 +459,9 @@ static int send_sessiond_channel(int sock, return 0; error: + if (ret_code != LTTNG_OK) { + ret = -1; + } return ret; } @@ -677,7 +692,7 @@ static int setup_metadata(struct lttng_consumer_local_data *ctx, uint64_t key) if (!metadata) { ERR("UST consumer push metadata %" PRIu64 " not found", key); ret = LTTNG_ERR_UST_CHAN_NOT_FOUND; - goto error; + goto error_find; } /* @@ -709,9 +724,17 @@ static int setup_metadata(struct lttng_consumer_local_data *ctx, uint64_t key) /* List MUST be empty after or else it could be reused. */ assert(cds_list_empty(&metadata->streams.head)); - ret = 0; + return 0; error: + /* + * Delete metadata channel on error. At this point, the metadata stream can + * NOT be monitored by the metadata thread thus having the guarantee that + * the stream is still in the local stream list of the channel. This call + * will make sure to clean that list. + */ + consumer_del_channel(metadata); +error_find: return ret; } @@ -1007,13 +1030,6 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, goto end_msg_sessiond; } - /* Inform sessiond that we are about to send channel and streams. */ - ret = consumer_send_status_msg(sock, LTTNG_OK); - if (ret < 0) { - /* Somehow, the session daemon is not responding anymore. */ - goto error_fatal; - } - /* Send everything to sessiond. */ ret = send_sessiond_channel(sock, channel, ctx, &relayd_err); if (ret < 0) { @@ -1021,10 +1037,10 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, /* * We were unable to send to the relayd the stream so avoid * sending back a fatal error to the thread since this is OK - * and the consumer can continue its work. + * and the consumer can continue its work. The above call + * has sent the error status message to the sessiond. */ - ret_code = LTTNG_ERR_RELAYD_CONNECT_FAIL; - goto end_msg_sessiond; + goto end_nosignal; } /* * The communicaton was broken hence there is a bad state between @@ -1544,7 +1560,13 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, ret_code = lttng_ustconsumer_recv_metadata(ctx->consumer_metadata_socket, key, offset, len, channel); - (void) consumer_send_status_msg(ctx->consumer_metadata_socket, ret_code); + if (ret_code >= 0) { + /* + * Only send the status msg if the sessiond is alive meaning a positive + * ret code. + */ + (void) consumer_send_status_msg(ctx->consumer_metadata_socket, ret_code); + } ret = 0; end: -- 2.34.1