From: Jérémie Galarneau Date: Tue, 7 Jul 2020 22:55:19 +0000 (-0400) Subject: Fix: kconsumer: missing wait for metadata thread in do_sync_metadata X-Git-Url: http://git.efficios.com/?p=lttng-tools.git;a=commitdiff_plain;h=577eea73132dee4da47752590ed535206678eb34 Fix: kconsumer: missing wait for metadata thread in do_sync_metadata The `do_sync_metadata` function is invoked everytime a data sub-buffer is consumed in live mode. In the user space case, lttng_ustconsumer_sync_metadata() returns EAGAIN (positive) when there is new metadata to consume. This causes the "metadata rendez-vous" synchronization to take place. However, the kernel variant of this function returns 0 when there is new data to consume, causing the "rendez-vous" to be skipped. I have not observed an issue caused by this first-hand, but the check appears bogus and skips over an essential synchronization step. This check has been in place since at least 2013, although the callees and their return values may have changed at some point in the past. Solution -------- The user space and kernel code paths mix various return code conventions (negative errno, positive errno, 0/-1) which makes it difficult to understand the final return codes and most likely lead to this confusion in the first place. Moreover, returning EAGAIN to indicate that data is ready to be consumed is not appropriate in view of the existing conventions in the code base. An explicit `enum sync_metadata_status` is returned by the domains' sync_metadata operations which allows the common code to handle the various conditions in a straight-forward manner and for the "rendez-vous" to take place in the kernel case. Reported-by: Francis Deslauriers Signed-off-by: Jérémie Galarneau Change-Id: Ib022eee97054c0b376853dd05593e3b94bc9a8ca --- diff --git a/src/common/consumer/consumer-stream.c b/src/common/consumer/consumer-stream.c index 0c6dc8f51..fb878d72c 100644 --- a/src/common/consumer/consumer-stream.c +++ b/src/common/consumer/consumer-stream.c @@ -196,6 +196,7 @@ static int do_sync_metadata(struct lttng_consumer_stream *metadata, struct lttng_consumer_local_data *ctx) { int ret; + enum sync_metadata_status status; assert(metadata); assert(metadata->metadata_flag); @@ -243,7 +244,7 @@ static int do_sync_metadata(struct lttng_consumer_stream *metadata, /* * Empty the metadata cache and flush the current stream. */ - ret = lttng_kconsumer_sync_metadata(metadata); + status = lttng_kconsumer_sync_metadata(metadata); break; case LTTNG_CONSUMER32_UST: case LTTNG_CONSUMER64_UST: @@ -251,18 +252,23 @@ static int do_sync_metadata(struct lttng_consumer_stream *metadata, * Ask the sessiond if we have new metadata waiting and update the * consumer metadata cache. */ - ret = lttng_ustconsumer_sync_metadata(ctx, metadata); + status = lttng_ustconsumer_sync_metadata(ctx, metadata); break; default: - assert(0); - ret = -1; - break; + abort(); } - /* - * Error or no new metadata, we exit here. - */ - if (ret <= 0 || ret == ENODATA) { + + switch (status) { + case SYNC_METADATA_STATUS_NEW_DATA: + break; + case SYNC_METADATA_STATUS_NO_DATA: + ret = 0; goto end_unlock_mutex; + case SYNC_METADATA_STATUS_ERROR: + ret = -1; + goto end_unlock_mutex; + default: + abort(); } /* @@ -284,7 +290,7 @@ static int do_sync_metadata(struct lttng_consumer_stream *metadata, */ pthread_cond_wait(&metadata->metadata_rdv, &metadata->metadata_rdv_lock); pthread_mutex_unlock(&metadata->metadata_rdv_lock); - } while (ret == EAGAIN); + } while (status == SYNC_METADATA_STATUS_NEW_DATA); /* Success */ return 0; diff --git a/src/common/consumer/consumer.h b/src/common/consumer/consumer.h index 4770671c0..c5b702357 100644 --- a/src/common/consumer/consumer.h +++ b/src/common/consumer/consumer.h @@ -88,6 +88,12 @@ enum consumer_channel_type { CONSUMER_CHANNEL_TYPE_DATA = 1, }; +enum sync_metadata_status { + SYNC_METADATA_STATUS_NEW_DATA, + SYNC_METADATA_STATUS_NO_DATA, + SYNC_METADATA_STATUS_ERROR, +}; + extern struct lttng_consumer_global_data consumer_data; struct stream_list { diff --git a/src/common/kernel-consumer/kernel-consumer.c b/src/common/kernel-consumer/kernel-consumer.c index dd5cf1761..eeec3a65e 100644 --- a/src/common/kernel-consumer/kernel-consumer.c +++ b/src/common/kernel-consumer/kernel-consumer.c @@ -1348,36 +1348,38 @@ end: * metadata thread can consumer them. * * Metadata stream lock MUST be acquired. - * - * Return 0 if new metadatda is available, EAGAIN if the metadata stream - * is empty or a negative value on error. */ -int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata) +enum sync_metadata_status lttng_kconsumer_sync_metadata( + struct lttng_consumer_stream *metadata) { int ret; + enum sync_metadata_status status; assert(metadata); ret = kernctl_buffer_flush(metadata->wait_fd); if (ret < 0) { ERR("Failed to flush kernel stream"); + status = SYNC_METADATA_STATUS_ERROR; goto end; } ret = kernctl_snapshot(metadata->wait_fd); if (ret < 0) { - if (ret != -EAGAIN) { + if (errno == EAGAIN) { + /* No new metadata, exit. */ + DBG("Sync metadata, no new kernel metadata"); + status = SYNC_METADATA_STATUS_NO_DATA; + } else { ERR("Sync metadata, taking kernel snapshot failed."); - goto end; + status = SYNC_METADATA_STATUS_ERROR; } - DBG("Sync metadata, no new kernel metadata"); - /* No new metadata, exit. */ - ret = ENODATA; - goto end; + } else { + status = SYNC_METADATA_STATUS_NEW_DATA; } end: - return ret; + return status; } static diff --git a/src/common/kernel-consumer/kernel-consumer.h b/src/common/kernel-consumer/kernel-consumer.h index 48c787a46..07b51421d 100644 --- a/src/common/kernel-consumer/kernel-consumer.h +++ b/src/common/kernel-consumer/kernel-consumer.h @@ -24,6 +24,7 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, int sock, struct pollfd *consumer_sockpoll); int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream); int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream); -int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata); +enum sync_metadata_status lttng_kconsumer_sync_metadata( + struct lttng_consumer_stream *metadata); #endif /* _LTTNG_KCONSUMER_H */ diff --git a/src/common/ust-consumer/ust-consumer.c b/src/common/ust-consumer/ust-consumer.c index e5cfbd912..ce2e17f74 100644 --- a/src/common/ust-consumer/ust-consumer.c +++ b/src/common/ust-consumer/ust-consumer.c @@ -2446,8 +2446,8 @@ void metadata_stream_reset_cache_consumed_position( /* * Write up to one packet from the metadata cache to the channel. * - * Returns the number of bytes pushed in the cache, or a negative value - * on error. + * Returns the number of bytes pushed from the cache into the ring buffer, or a + * negative value on error. */ static int commit_one_metadata_packet(struct lttng_consumer_stream *stream) @@ -2531,15 +2531,13 @@ end: * awaiting on metadata to be pushed out. * * The RCU read side lock must be held by the caller. - * - * Return 0 if new metadatda is available, EAGAIN if the metadata stream - * is empty or a negative value on error. */ -int lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx, +enum sync_metadata_status lttng_ustconsumer_sync_metadata( + struct lttng_consumer_local_data *ctx, struct lttng_consumer_stream *metadata_stream) { int ret; - int retry = 0; + enum sync_metadata_status status; struct lttng_consumer_channel *metadata_channel; assert(ctx); @@ -2554,6 +2552,7 @@ int lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx, ret = lttng_ustconsumer_request_metadata(ctx, metadata_channel, 0, 0); pthread_mutex_lock(&metadata_stream->lock); if (ret < 0) { + status = SYNC_METADATA_STATUS_ERROR; goto end; } @@ -2571,38 +2570,30 @@ int lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx, if (consumer_stream_is_deleted(metadata_stream)) { DBG("Metadata stream %" PRIu64 " was deleted during the metadata synchronization", metadata_stream->key); - ret = 0; + status = SYNC_METADATA_STATUS_NO_DATA; goto end; } ret = commit_one_metadata_packet(metadata_stream); - if (ret <= 0) { + if (ret < 0) { + status = SYNC_METADATA_STATUS_ERROR; goto end; } else if (ret > 0) { - retry = 1; + status = SYNC_METADATA_STATUS_NEW_DATA; + } else /* ret == 0 */ { + status = SYNC_METADATA_STATUS_NO_DATA; + goto end; } ret = ustctl_snapshot(metadata_stream->ustream); if (ret < 0) { - if (errno != EAGAIN) { - ERR("Sync metadata, taking UST snapshot"); - goto end; - } - DBG("No new metadata when syncing them."); - /* No new metadata, exit. */ - ret = ENODATA; + ERR("Failed to take a snapshot of the metadata ring-buffer positions, ret = %d", ret); + status = SYNC_METADATA_STATUS_ERROR; goto end; } - /* - * After this flush, we still need to extract metadata. - */ - if (retry) { - ret = EAGAIN; - } - end: - return ret; + return status; } /* diff --git a/src/common/ust-consumer/ust-consumer.h b/src/common/ust-consumer/ust-consumer.h index b0a1df7f3..5059f5b95 100644 --- a/src/common/ust-consumer/ust-consumer.h +++ b/src/common/ust-consumer/ust-consumer.h @@ -53,7 +53,8 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, struct lttng_consumer_channel *channel, int timer, int wait); int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, struct lttng_consumer_channel *channel, int timer, int wait); -int lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx, +enum sync_metadata_status lttng_ustconsumer_sync_metadata( + struct lttng_consumer_local_data *ctx, struct lttng_consumer_stream *metadata); void lttng_ustconsumer_flush_buffer(struct lttng_consumer_stream *stream, int producer); @@ -209,10 +210,10 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, return -ENOSYS; } static inline -int lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx, +enum sync_metadata_status lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx, struct lttng_consumer_stream *metadata) { - return -ENOSYS; + return SYNC_METADATA_STATUS_ERROR; } static inline void lttng_ustconsumer_flush_buffer(struct lttng_consumer_stream *stream,