src.ctf.lttng-live: make `lttng_live_get_one_metadata_packet()` return status
authorFrancis Deslauriers <francis.deslauriers@efficios.com>
Mon, 28 Oct 2019 18:49:08 +0000 (14:49 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Fri, 1 Nov 2019 14:49:33 +0000 (10:49 -0400)
This is needed to differentiate real critical errors from relay daemon
from errors which are handled.

The relay daemon can return the LTTNG_VIEWER_METADATA_ERR status to
convey that the requested metadata stream is not found of the relay.
This can happen during normal operations if the associated trace is no
longer active. Short lived UST apps in per-pid session may produce such
scenario. In these cases, simply remove the live trace from the session
and go on with the iteration.

Signed-off-by: Francis Deslauriers <francis.deslauriers@efficios.com>
Change-Id: I452e63bd12a3c58d518726e3d178be241464bc2a
Reviewed-on: https://review.lttng.org/c/babeltrace/+/2277

src/plugins/ctf/lttng-live/lttng-live.c
src/plugins/ctf/lttng-live/lttng-live.h
src/plugins/ctf/lttng-live/metadata.c
src/plugins/ctf/lttng-live/viewer-connection.c

index 17818348ba334ab7288fc184ce64d6b560eed55b..774b91c8e39926c9bb7ee6c68b4cc53ca61da9a4 100644 (file)
@@ -456,13 +456,30 @@ enum lttng_live_iterator_status lttng_live_get_session(
                        status != LTTNG_LIVE_ITERATOR_STATUS_END) {
                goto end;
        }
-       for (trace_idx = 0; trace_idx < session->traces->len; trace_idx++) {
+       trace_idx = 0;
+       while (trace_idx < session->traces->len) {
                struct lttng_live_trace *trace =
                        g_ptr_array_index(session->traces, trace_idx);
 
                status = lttng_live_metadata_update(trace);
-               if (status != LTTNG_LIVE_ITERATOR_STATUS_OK &&
-                               status != LTTNG_LIVE_ITERATOR_STATUS_END) {
+               switch (status) {
+               case LTTNG_LIVE_ITERATOR_STATUS_OK:
+                       trace_idx++;
+                       break;
+               case LTTNG_LIVE_ITERATOR_STATUS_END:
+                       /*
+                        * The trace has ended. Remove it of the array an
+                        * continue the iteration.
+                        * We can remove the trace safely when using the
+                        * g_ptr_array_remove_index_fast because it replaces
+                        * the element at trace_idx with the array's last
+                        * element. trace_idx is not incremented because of
+                        * that.
+                        */
+                       (void) g_ptr_array_remove_index_fast(session->traces,
+                               trace_idx);
+                       break;
+               default:
                        goto end;
                }
        }
index 44c78e02be0fb5d7f917b80317a73b1f27c9318a..994563d0fc9afb4cb947350457f58258685555cb 100644 (file)
@@ -133,8 +133,6 @@ struct lttng_live_metadata {
        uint64_t stream_id;
        /* Weak reference. */
        struct ctf_metadata_decoder *decoder;
-
-       bool closed;
 };
 
 struct lttng_live_trace {
@@ -296,8 +294,30 @@ int lttng_live_add_session(struct lttng_live_msg_iter *lttng_live_msg_iter,
                const char *hostname,
                const char *session_name);
 
-ssize_t lttng_live_get_one_metadata_packet(struct lttng_live_trace *trace,
-               FILE *fp);
+enum lttng_live_get_one_metadata_status {
+       /* The end of the metadata stream was reached. */
+       LTTNG_LIVE_GET_ONE_METADATA_STATUS_END      = 1,
+       /* One metadata packet was received and written to file. */
+       LTTNG_LIVE_GET_ONE_METADATA_STATUS_OK       = 0,
+       /* The metadata stream was not found on the relay. */
+       LTTNG_LIVE_GET_ONE_METADATA_STATUS_CLOSED   = -1,
+       /*
+        * A critical error occurred when contacting the relay or while
+        * handling its response.
+        */
+       LTTNG_LIVE_GET_ONE_METADATA_STATUS_ERROR    = -2,
+};
+
+/*
+ * lttng_live_get_one_metadata_packet() asks the Relay Daemon for new metadata.
+ * If new metadata is received, the function writes it to the provided file
+ * handle and updates the reply_len output parameter. This function should be
+ * called in loop until _END status is received to ensure all metadata is
+ * written to the file.
+ */
+enum lttng_live_get_one_metadata_status lttng_live_get_one_metadata_packet(
+               struct lttng_live_trace *trace, FILE *fp, size_t *reply_len);
+
 enum lttng_live_iterator_status lttng_live_get_next_index(
                struct lttng_live_msg_iter *lttng_live_msg_iter,
                struct lttng_live_stream_iterator *stream,
index f77d62f8baf74d1ce049b1e35cb201b10563f03a..df75b15f5e9410d10fb1f9c28d1985adad683526 100644 (file)
@@ -120,15 +120,16 @@ enum lttng_live_iterator_status lttng_live_metadata_update(
 {
        struct lttng_live_session *session = trace->session;
        struct lttng_live_metadata *metadata = trace->metadata;
-       ssize_t ret = 0;
        size_t size, len_read = 0;
        char *metadata_buf = NULL;
+       bool keep_receiving;
        FILE *fp = NULL;
        enum ctf_metadata_decoder_status decoder_status;
        enum lttng_live_iterator_status status =
                LTTNG_LIVE_ITERATOR_STATUS_OK;
        bt_logging_level log_level = trace->log_level;
        bt_self_component *self_comp = trace->self_comp;
+       enum lttng_live_get_one_metadata_status metadata_status;
 
        /* No metadata stream yet. */
        if (!metadata) {
@@ -156,42 +157,49 @@ enum lttng_live_iterator_status lttng_live_metadata_update(
                goto error;
        }
 
+       keep_receiving = true;
        /* Grab all available metadata. */
-       do {
+       while (keep_receiving) {
+               size_t reply_len = 0;
                /*
-                * get_one_metadata_packet returns the number of bytes
-                * received, 0 when we have received everything, a
-                * negative value on error.
+                * lttng_live_get_one_metadata_packet() asks the Relay Daemon
+                * for new metadata. If new metadata is received, the function
+                * writes it to the provided file handle and updates the
+                * reply_len output parameter. We call this function in loop
+                * until it returns _END meaning that no new metadata is
+                * available.
+                * We may receive a _CLOSED status if the metadata stream we
+                * are requesting is no longer available on the relay.
+                * If we receive an _ERROR status, it means there was a
+                * networking, allocating, or some other unrecoverable error.
                 */
-               ret = lttng_live_get_one_metadata_packet(trace, fp);
-               if (ret > 0) {
-                       len_read += ret;
+               metadata_status = lttng_live_get_one_metadata_packet(trace, fp,
+                       &reply_len);
+
+               switch (metadata_status) {
+               case LTTNG_LIVE_GET_ONE_METADATA_STATUS_OK:
+                       len_read += reply_len;
+                       break;
+               case LTTNG_LIVE_GET_ONE_METADATA_STATUS_END:
+                       keep_receiving = false;
+                       break;
+               case LTTNG_LIVE_GET_ONE_METADATA_STATUS_CLOSED:
+                       keep_receiving = false;
+                       break;
+               case LTTNG_LIVE_GET_ONE_METADATA_STATUS_ERROR:
+                       goto error;
+               default:
+                       abort();
                }
-       } while (ret > 0);
+       }
 
        /*
-        * Consider metadata closed as soon as we get an error reading
-        * it (e.g. cannot be found).
+        * A closed metadata stream means the trace is no longer active. Return
+        * _END so that the caller can remove the trace from its list.
         */
-       if (ret < 0) {
-               if (!metadata->closed) {
-                       metadata->closed = true;
-                       /*
-                        * Release our reference on the trace as soon as
-                        * we know the metadata stream is not available
-                        * anymore. This won't necessarily teardown the
-                        * metadata objects immediately, but only when
-                        * the data streams are done.
-                        */
-                       metadata->trace = NULL;
-               }
-               if (errno == EINTR) {
-                       if (lttng_live_graph_is_canceled(
-                                       session->lttng_live_msg_iter)) {
-                               status = LTTNG_LIVE_ITERATOR_STATUS_AGAIN;
-                               goto end;
-                       }
-               }
+       if (metadata_status == LTTNG_LIVE_GET_ONE_METADATA_STATUS_CLOSED) {
+               status = LTTNG_LIVE_ITERATOR_STATUS_END;
+               goto end;
        }
 
        if (bt_close_memstream(&metadata_buf, &size, fp)) {
@@ -260,8 +268,15 @@ enum lttng_live_iterator_status lttng_live_metadata_update(
        }
 
        goto end;
+
 error:
-       status = LTTNG_LIVE_ITERATOR_STATUS_ERROR;
+       if (errno == EINTR) {
+               if (lttng_live_graph_is_canceled(session->lttng_live_msg_iter)) {
+                       status = LTTNG_LIVE_ITERATOR_STATUS_AGAIN;
+               }
+       } else {
+               status = LTTNG_LIVE_ITERATOR_STATUS_ERROR;
+       }
 end:
        if (fp) {
                int closeret;
index c5f7c559bfb1ac4ed479e4091aead3ddd760e423..f094a8f96553f3c9e098c373d3a177df9fc0eecf 100644 (file)
@@ -969,11 +969,11 @@ error:
 }
 
 BT_HIDDEN
-ssize_t lttng_live_get_one_metadata_packet(struct lttng_live_trace *trace,
-               FILE *fp)
+enum lttng_live_get_one_metadata_status lttng_live_get_one_metadata_packet(
+               struct lttng_live_trace *trace, FILE *fp, size_t *reply_len)
 {
        uint64_t len = 0;
-       int ret;
+       enum lttng_live_get_one_metadata_status metadata_status;
        struct lttng_viewer_cmd cmd;
        struct lttng_viewer_get_metadata rq;
        struct lttng_viewer_metadata_packet rp;
@@ -1020,15 +1020,21 @@ ssize_t lttng_live_get_one_metadata_packet(struct lttng_live_trace *trace,
 
        switch (be32toh(rp.status)) {
                case LTTNG_VIEWER_METADATA_OK:
-                       BT_COMP_LOGD("get_metadata : OK");
+                       BT_COMP_LOGD("Received get_metadata response : OK");
                        break;
                case LTTNG_VIEWER_NO_NEW_METADATA:
-                       BT_COMP_LOGD("get_metadata : NO NEW");
-                       ret = 0;
+                       BT_COMP_LOGD("Received get_metadata response: no new");
+                       metadata_status = LTTNG_LIVE_GET_ONE_METADATA_STATUS_END;
                        goto end;
                case LTTNG_VIEWER_METADATA_ERR:
-                       BT_COMP_LOGD("get_metadata : ERR");
-                       goto error;
+                       /*
+                        * The Relayd cannot find this stream id. Maybe its
+                        * gone already. This can happen in short lived UST app
+                        * in a per-pid session.
+                        */
+                       BT_COMP_LOGD("Received get_metadata response: error");
+                       metadata_status = LTTNG_LIVE_GET_ONE_METADATA_STATUS_CLOSED;
+                       goto end;
                default:
                        BT_COMP_LOGD("get_metadata : UNKNOWN");
                        goto error;
@@ -1065,14 +1071,18 @@ ssize_t lttng_live_get_one_metadata_packet(struct lttng_live_trace *trace,
        }
        BT_ASSERT(ret_len == len);
        free(data);
-       ret = len;
-end:
-       return ret;
+       *reply_len = len;
+       metadata_status = LTTNG_LIVE_GET_ONE_METADATA_STATUS_OK;
+
+       goto end;
 
 error_free_data:
        free(data);
 error:
-       return -1;
+       metadata_status = LTTNG_LIVE_GET_ONE_METADATA_STATUS_ERROR;
+
+end:
+       return metadata_status;
 }
 
 /*
This page took 0.029541 seconds and 4 git commands to generate.