From ad8bec244fdbb0e7705fd1865ae71f36f06d2b94 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?= Date: Mon, 13 Jul 2020 18:42:22 -0400 Subject: [PATCH] Fix: relayd: viewer metadata is not rotated after a session clear MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Issue observed ============== Following a session clear, babeltrace sometimes doesn't receive the content of the metadata that is announced in the get_metadata reply header. This causes babeltrace2 to assert (a fix has been submitted) since the protocol state becomes de-synchronized, causing babeltrace to interpret follow-up replies as garbage. This was occasionally observed on the CI when running the "clear" tests. Cause ===== There are no provisions made for rotating a viewer metadata stream when a clear is performed on a live session. This means that a client can request metadata that is only present in a newer chunk. In the case of the crashes observed on the CI, the relay daemon attempts to service a get_new_metadata request of size `4096`. It then fails to read the data (as it was never present in the original trace chunk). The relay daemon does not interpret the `0` returned by lttng_read() as an error and sends a reply announcing `4096`bytes of metadata and no payload. Solution ======== Two fixes are rolled into this patch. First, the return of lttng_read is checked for `0` and that situation is handled as an error. However, this still leaves the problem of the metadata stream not being rotated. Secondly, the metadata relay_stream is checked for a rotation on every `get_metadata` command. If a rotation has been detected, a viewer rotation is performed on the metadata stream (very similar to the data stream). This solves the problem, but it leaves a case which the protocol does not account for. Essentially, the following can take place: - relayd sets the "NEW_METADATA" flag as part of a `get_next_index` query reply - A rotation of the metadata stream occurs, no data is sent. - client requests metadata - metadata sent > received (was reset to 0 as part of the rotation) In this scenario, the current implementation returned NO_NEW_METADATA, but it is erroneous. Returning this guarantees to the viewer that it will be able to decode all data packets that follow (until new metadata is signalled, if ever). Ideally, we would return a `RETRY` code, as is done by the data stream handler when it detects that a rotation is taking place. Unfortunately, such a code doesn't exist for the `get_metadata` command. We return ̀ OK` with a length of 0, which is technically correct since viewers are supposed to fetch metadata until the relay daemon returns the `NO_NEW_METADATA` status code. However, supporting this has required changes to babeltrace2's lttng-live source component. I'm anticipating that most implementations don't handle the 0-length case any better. Known drawbacks =============== Older viewers may not handle `OK` replies with a length of 0 gracefully. Sending `NO_NEW_METADATA` is not an option as it breaks the guarantee that all necessary metadata will be received before `NO_NEW_METADATA`. Signed-off-by: Jérémie Galarneau Change-Id: I99cc1cea15125b48f5ee4e407d782b34b5e3dfb5 --- src/bin/lttng-relayd/live.c | 136 +++++++++++++++++++------- src/bin/lttng-relayd/viewer-session.c | 3 +- src/common/trace-chunk.c | 26 +++++ src/common/trace-chunk.h | 4 + 4 files changed, 132 insertions(+), 37 deletions(-) diff --git a/src/bin/lttng-relayd/live.c b/src/bin/lttng-relayd/live.c index 06cfa5c7b..37c546452 100644 --- a/src/bin/lttng-relayd/live.c +++ b/src/bin/lttng-relayd/live.c @@ -1542,42 +1542,18 @@ int viewer_get_next_index(struct relay_connection *conn) goto send_reply; } - if (rstream->trace_chunk) { - uint64_t rchunk_id, vchunk_id; + if (rstream->trace_chunk && !lttng_trace_chunk_ids_equal( + conn->viewer_session->current_trace_chunk, + rstream->trace_chunk)) { + DBG("Metadata relay stream and viewer chunk ids differ"); - /* - * If the relay stream is not yet closed, ensure the viewer - * chunk matches the relay chunk after clear. - */ - if (lttng_trace_chunk_get_id(rstream->trace_chunk, - &rchunk_id) != LTTNG_TRACE_CHUNK_STATUS_OK) { - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR); - goto send_reply; - } - if (lttng_trace_chunk_get_id( - conn->viewer_session->current_trace_chunk, - &vchunk_id) != LTTNG_TRACE_CHUNK_STATUS_OK) { + ret = viewer_session_set_trace_chunk_copy( + conn->viewer_session, + rstream->trace_chunk); + if (ret) { viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR); goto send_reply; } - - if (rchunk_id != vchunk_id) { - DBG("Relay and viewer chunk ids differ: " - "rchunk_id %" PRIu64 " vchunk_id %" PRIu64, - rchunk_id, vchunk_id); - - lttng_trace_chunk_put( - conn->viewer_session->current_trace_chunk); - conn->viewer_session->current_trace_chunk = NULL; - ret = viewer_session_set_trace_chunk_copy( - conn->viewer_session, - rstream->trace_chunk); - if (ret) { - viewer_index.status = - htobe32(LTTNG_VIEWER_INDEX_ERR); - goto send_reply; - } - } } if (conn->viewer_session->current_trace_chunk != vstream->stream_file.trace_chunk) { @@ -1936,10 +1912,45 @@ int viewer_get_metadata(struct relay_connection *conn) goto send_reply; } + if (vstream->stream->trace_chunk && + !lttng_trace_chunk_ids_equal( + conn->viewer_session->current_trace_chunk, + vstream->stream->trace_chunk)) { + /* A rotation has occurred on the relay stream. */ + DBG("Metadata relay stream and viewer chunk ids differ"); + + ret = viewer_session_set_trace_chunk_copy( + conn->viewer_session, + vstream->stream->trace_chunk); + if (ret) { + reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR); + goto send_reply; + } + } + + if (conn->viewer_session->current_trace_chunk != + vstream->stream_file.trace_chunk) { + bool acquired_reference; + + DBG("Viewer session and viewer stream chunk differ: " + "vsession chunk %p vstream chunk %p", + conn->viewer_session->current_trace_chunk, + vstream->stream_file.trace_chunk); + lttng_trace_chunk_put(vstream->stream_file.trace_chunk); + acquired_reference = lttng_trace_chunk_get(conn->viewer_session->current_trace_chunk); + assert(acquired_reference); + vstream->stream_file.trace_chunk = + conn->viewer_session->current_trace_chunk; + viewer_stream_close_files(vstream); + } + len = vstream->stream->metadata_received - vstream->metadata_sent; - /* first time, we open the metadata file */ - if (!vstream->stream_file.handle) { + /* + * Either this is the first time the metadata file is read, or a + * rotation of the corresponding relay stream has occured. + */ + if (!vstream->stream_file.handle && len > 0) { struct fs_handle *fs_handle; char file_path[LTTNG_PATH_MAX]; enum lttng_trace_chunk_status status; @@ -1974,6 +1985,33 @@ int viewer_get_metadata(struct relay_connection *conn) goto error; } vstream->stream_file.handle = fs_handle; + + if (vstream->metadata_sent != 0) { + /* + * The client does not expect to receive any metadata + * it has received and metadata files in successive + * chunks must be a strict superset of one another. + * + * Skip the first `metadata_sent` bytes to ensure + * they are not sent a second time to the client. + * + * Baring a block layer error or an internal error, + * this seek should not fail as + * `vstream->stream->metadata_received` is reset when + * a relay stream is rotated. If this is reached, it is + * safe to assume that + * `metadata_received` > `metadata_sent`. + */ + const off_t seek_ret = fs_handle_seek(fs_handle, + vstream->metadata_sent, SEEK_SET); + + if (seek_ret < 0) { + PERROR("Failed to seek metadata viewer stream file to `sent` position: pos = %" PRId64, + vstream->metadata_sent); + reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR); + goto send_reply; + } + } } reply.len = htobe64(len); @@ -1992,8 +2030,34 @@ int viewer_get_metadata(struct relay_connection *conn) fs_handle_put_fd(vstream->stream_file.handle); fd = -1; if (read_len < len) { - PERROR("Relay reading metadata file"); - goto error; + if (read_len < 0) { + PERROR("Failed to read metadata file"); + goto error; + } else { + /* + * A clear has been performed which prevents the relay + * from sending `len` bytes of metadata. + * + * It is important not to send any metadata if we + * couldn't read all the available metadata in one shot: + * sending partial metadata can cause the client to + * attempt to parse an incomplete (incoherent) metadata + * stream, which would result in an error. + */ + const off_t seek_ret = fs_handle_seek( + vstream->stream_file.handle, -read_len, + SEEK_CUR); + + DBG("Failed to read metadata: requested = %zd, got = %zd", + len, read_len); + read_len = 0; + len = 0; + if (seek_ret < 0) { + PERROR("Failed to restore metadata file position after partial read"); + ret = -1; + goto error; + } + } } vstream->metadata_sent += read_len; reply.status = htobe32(LTTNG_VIEWER_METADATA_OK); diff --git a/src/bin/lttng-relayd/viewer-session.c b/src/bin/lttng-relayd/viewer-session.c index 3a92071a2..0b77fd144 100644 --- a/src/bin/lttng-relayd/viewer-session.c +++ b/src/bin/lttng-relayd/viewer-session.c @@ -38,7 +38,8 @@ int viewer_session_set_trace_chunk_copy(struct relay_viewer_session *vsession, struct lttng_trace_chunk *viewer_chunk; assert(relay_session_trace_chunk); - assert(!vsession->current_trace_chunk); + lttng_trace_chunk_put(vsession->current_trace_chunk); + vsession->current_trace_chunk = NULL; DBG("Copying relay session's current trace chunk to the viewer session"); viewer_chunk = lttng_trace_chunk_copy(relay_session_trace_chunk); diff --git a/src/common/trace-chunk.c b/src/common/trace-chunk.c index 313d49375..ec52e5d2b 100644 --- a/src/common/trace-chunk.c +++ b/src/common/trace-chunk.c @@ -1828,6 +1828,32 @@ const char *lttng_trace_chunk_command_type_get_name( } } +LTTNG_HIDDEN +bool lttng_trace_chunk_ids_equal(const struct lttng_trace_chunk *chunk_a, + const struct lttng_trace_chunk *chunk_b) +{ + bool equal = false; + + if (!chunk_a || !chunk_b) { + goto end; + } + + if (chunk_a->id.is_set ^ chunk_a->id.is_set) { + /* One id is set and not the other, thus they are not equal. */ + goto end; + } + + if (!chunk_a->id.is_set) { + /* Both ids are unset. */ + equal = true; + } else { + equal = chunk_a->id.value == chunk_b->id.value; + } + +end: + return equal; +} + LTTNG_HIDDEN bool lttng_trace_chunk_get(struct lttng_trace_chunk *chunk) { diff --git a/src/common/trace-chunk.h b/src/common/trace-chunk.h index 022bc9f92..ce668bc8b 100644 --- a/src/common/trace-chunk.h +++ b/src/common/trace-chunk.h @@ -200,6 +200,10 @@ LTTNG_HIDDEN const char *lttng_trace_chunk_command_type_get_name( enum lttng_trace_chunk_command_type command); +LTTNG_HIDDEN +bool lttng_trace_chunk_ids_equal(const struct lttng_trace_chunk *chunk_a, + const struct lttng_trace_chunk *chunk_b); + /* Returns true on success. */ LTTNG_HIDDEN bool lttng_trace_chunk_get(struct lttng_trace_chunk *chunk); -- 2.34.1