CUSTOM: relayd protocol: ignore reply on relayd_send_index and relayd_send_close_stream
[lttng-tools.git] / src / common / relayd / relayd.c
index a4c8a9261c29a9e58fc96e3023a68d8da8c44d68..97a5941f75cb63fbd8c38baafaea73612f7b9970 100644 (file)
@@ -15,7 +15,7 @@
  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#define _GNU_SOURCE
+#define _LGPL_SOURCE
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -25,8 +25,9 @@
 
 #include <common/common.h>
 #include <common/defaults.h>
+#include <common/compat/endian.h>
 #include <common/sessiond-comm/relayd.h>
-#include <common/index/lttng-index.h>
+#include <common/index/ctf-index.h>
 
 #include "relayd.h"
 
@@ -57,6 +58,7 @@ static int send_command(struct lttcomm_relayd_sock *rsock,
                goto alloc_error;
        }
 
+       memset(&header, 0, sizeof(header));
        header.cmd = htobe32(cmd);
        header.data_size = htobe64(size);
 
@@ -70,20 +72,67 @@ static int send_command(struct lttcomm_relayd_sock *rsock,
                memcpy(buf + sizeof(header), data, size);
        }
 
+       DBG3("Relayd sending command %d of size %" PRIu64, (int) cmd, buf_size);
        ret = rsock->sock.ops->sendmsg(&rsock->sock, buf, buf_size, flags);
        if (ret < 0) {
+               PERROR("Failed to send command %d of size %" PRIu64,
+                               (int) cmd, buf_size);
                ret = -errno;
                goto error;
        }
-
-       DBG3("Relayd sending command %d of size %" PRIu64, cmd, buf_size);
-
 error:
        free(buf);
 alloc_error:
        return ret;
 }
 
+static int recv_reply_ignore(struct lttcomm_relayd_sock *rsock, size_t size)
+{
+       int ret;
+       size_t cutfoff = 128;
+
+       /*
+        * To prevent ever growing size of recv_reply to ignore, if the number
+        * of bytes we want to ignore is bigger than `cutoff`, consume half of
+        * the cutoff. We might block on it but still, most of bytes to ignore
+        * should already be ready to consume at this point.
+        *
+        * This kind of scenario can easily happen on stopped session with a
+        * live_timer since no actual receive is done on the socket that would
+        * discard the `ignore` portion.
+        *
+        * TCP guarantee in-order transmission both on send and receive so this
+        * is safe to do.
+        */
+       if (rsock->bytes_to_ignore_on_recv >= cutfoff) {
+               size_t to_discard = cutfoff / 2;
+
+               ret = rsock->sock.ops->recvmsg(&rsock->sock, NULL, to_discard, MSG_TRUNC);
+               if (ret <= 0 || ret != to_discard) {
+                       if (ret == 0) {
+                               /* Orderly shutdown. */
+                               DBG("Socket %d has performed an orderly shutdown", rsock->sock.fd);
+                       } else {
+                               DBG("Receiving reply to discard failed on sock %d for size %zu with ret %d",
+                                               rsock->sock.fd, to_discard, ret);
+                       }
+                       ret = -1;
+                       goto error;
+               }
+
+               DBG("Force discard of %zu bytes for socket %d", to_discard, rsock->sock.fd);
+               rsock->bytes_to_ignore_on_recv -= to_discard;
+       }
+
+       DBG3("Relayd ignore reply of %zu bytes for socket %d.", size, rsock->sock.fd);
+       /* Do not wait for the current reply to be ignored */
+       rsock->bytes_to_ignore_on_recv += size;
+       ret = 0;
+
+error:
+       return ret;
+}
+
 /*
  * Receive reply data on socket. This MUST be call after send_command or else
  * could result in unexpected behavior(s).
@@ -96,6 +145,26 @@ static int recv_reply(struct lttcomm_relayd_sock *rsock, void *data, size_t size
                return -ECONNRESET;
        }
 
+       /*
+        * We have to consume the bytes that are marked to ignore.
+        */
+       if (rsock->bytes_to_ignore_on_recv != 0) {
+               ret = rsock->sock.ops->recvmsg(&rsock->sock, NULL, rsock->bytes_to_ignore_on_recv, MSG_TRUNC);
+               if (ret <= 0 || ret != rsock->bytes_to_ignore_on_recv) {
+                       if (ret == 0) {
+                               /* Orderly shutdown. */
+                               DBG("Socket %d has performed an orderly shutdown", rsock->sock.fd);
+                       } else {
+                               DBG("Receiving reply to skip failed on sock %d for size %zu with ret %d",
+                                               rsock->sock.fd, rsock->bytes_to_ignore_on_recv, ret);
+                       }
+                       ret = -1;
+                       goto error;
+               }
+               DBG("Discarded %zu bytes on sock %d", rsock->bytes_to_ignore_on_recv, rsock->sock.fd);
+               rsock->bytes_to_ignore_on_recv = 0;
+       }
+
        DBG3("Relayd waiting for reply of size %zu", size);
 
        ret = rsock->sock.ops->recvmsg(&rsock->sock, data, size, 0);
@@ -122,14 +191,22 @@ error:
  */
 static int relayd_create_session_2_4(struct lttcomm_relayd_sock *rsock,
                uint64_t *session_id, char *session_name, char *hostname,
-               int session_live_timer)
+               int session_live_timer, unsigned int snapshot)
 {
        int ret;
        struct lttcomm_relayd_create_session_2_4 msg;
 
-       strncpy(msg.session_name, session_name, sizeof(msg.session_name));
-       strncpy(msg.hostname, hostname, sizeof(msg.hostname));
+       if (lttng_strncpy(msg.session_name, session_name,
+                       sizeof(msg.session_name))) {
+               ret = -1;
+               goto error;
+       }
+       if (lttng_strncpy(msg.hostname, hostname, sizeof(msg.hostname))) {
+               ret = -1;
+               goto error;
+       }
        msg.live_timer = htobe32(session_live_timer);
+       msg.snapshot = htobe32(snapshot);
 
        /* Send command */
        ret = send_command(rsock, RELAYD_CREATE_SESSION, &msg, sizeof(msg), 0);
@@ -167,7 +244,8 @@ error:
  * a lttng error code from the relayd.
  */
 int relayd_create_session(struct lttcomm_relayd_sock *rsock, uint64_t *session_id,
-               char *session_name, char *hostname, int session_live_timer)
+               char *session_name, char *hostname, int session_live_timer,
+               unsigned int snapshot)
 {
        int ret;
        struct lttcomm_relayd_status_session reply;
@@ -182,11 +260,12 @@ int relayd_create_session(struct lttcomm_relayd_sock *rsock, uint64_t *session_i
                case 2:
                case 3:
                        ret = relayd_create_session_2_1(rsock, session_id);
+                       break;
                case 4:
                default:
-                       ret = relayd_create_session_2_4(rsock, session_id,
-                                       session_name, hostname,
-                                       session_live_timer);
+                       ret = relayd_create_session_2_4(rsock, session_id, session_name,
+                                       hostname, session_live_timer, snapshot);
+                       break;
        }
 
        if (ret < 0) {
@@ -241,8 +320,17 @@ int relayd_add_stream(struct lttcomm_relayd_sock *rsock, const char *channel_nam
 
        /* Compat with relayd 2.1 */
        if (rsock->minor == 1) {
-               strncpy(msg.channel_name, channel_name, sizeof(msg.channel_name));
-               strncpy(msg.pathname, pathname, sizeof(msg.pathname));
+               memset(&msg, 0, sizeof(msg));
+               if (lttng_strncpy(msg.channel_name, channel_name,
+                               sizeof(msg.channel_name))) {
+                       ret = -1;
+                       goto error;
+               }
+               if (lttng_strncpy(msg.pathname, pathname,
+                               sizeof(msg.pathname))) {
+                       ret = -1;
+                       goto error;
+               }
 
                /* Send command */
                ret = send_command(rsock, RELAYD_ADD_STREAM, (void *) &msg, sizeof(msg), 0);
@@ -250,9 +338,18 @@ int relayd_add_stream(struct lttcomm_relayd_sock *rsock, const char *channel_nam
                        goto error;
                }
        } else {
+               memset(&msg_2_2, 0, sizeof(msg_2_2));
                /* Compat with relayd 2.2+ */
-               strncpy(msg_2_2.channel_name, channel_name, sizeof(msg_2_2.channel_name));
-               strncpy(msg_2_2.pathname, pathname, sizeof(msg_2_2.pathname));
+               if (lttng_strncpy(msg_2_2.channel_name, channel_name,
+                               sizeof(msg_2_2.channel_name))) {
+                       ret = -1;
+                       goto error;
+               }
+               if (lttng_strncpy(msg_2_2.pathname, pathname,
+                               sizeof(msg_2_2.pathname))) {
+                       ret = -1;
+                       goto error;
+               }
                msg_2_2.tracefile_size = htobe64(tracefile_size);
                msg_2_2.tracefile_count = htobe64(tracefile_count);
 
@@ -290,12 +387,169 @@ error:
        return ret;
 }
 
+/*
+ * Add stream on the relayd. Send part.
+ *
+ * On success return 0 else return ret_code negative value.
+ */
+int relayd_add_stream_send(struct lttcomm_relayd_sock *rsock, const char *channel_name,
+               const char *pathname, uint64_t tracefile_size, uint64_t tracefile_count)
+{
+       int ret;
+       struct lttcomm_relayd_add_stream msg;
+       struct lttcomm_relayd_add_stream_2_2 msg_2_2;
+
+       /* Code flow error. Safety net. */
+       assert(rsock);
+       assert(channel_name);
+       assert(pathname);
+
+       DBG("Relayd adding stream for channel name %s. Part send", channel_name);
+
+       /* Compat with relayd 2.1 */
+       if (rsock->minor == 1) {
+               memset(&msg, 0, sizeof(msg));
+               if (lttng_strncpy(msg.channel_name, channel_name,
+                               sizeof(msg.channel_name))) {
+                       ret = -1;
+                       goto error;
+               }
+               if (lttng_strncpy(msg.pathname, pathname,
+                               sizeof(msg.pathname))) {
+                       ret = -1;
+                       goto error;
+               }
+
+               /* Send command */
+               ret = send_command(rsock, RELAYD_ADD_STREAM, (void *) &msg, sizeof(msg), 0);
+               if (ret < 0) {
+                       goto error;
+               }
+       } else {
+               memset(&msg_2_2, 0, sizeof(msg_2_2));
+               /* Compat with relayd 2.2+ */
+               if (lttng_strncpy(msg_2_2.channel_name, channel_name,
+                               sizeof(msg_2_2.channel_name))) {
+                       ret = -1;
+                       goto error;
+               }
+               if (lttng_strncpy(msg_2_2.pathname, pathname,
+                               sizeof(msg_2_2.pathname))) {
+                       ret = -1;
+                       goto error;
+               }
+               msg_2_2.tracefile_size = htobe64(tracefile_size);
+               msg_2_2.tracefile_count = htobe64(tracefile_count);
+
+               /* Send command */
+               ret = send_command(rsock, RELAYD_ADD_STREAM, (void *) &msg_2_2, sizeof(msg_2_2), 0);
+               if (ret < 0) {
+                       goto error;
+               }
+       }
+
+       DBG("Relayd add stream sent for channel name %s.", channel_name);
+       ret = 0;
+
+error:
+       return ret;
+}
+
+int relayd_add_stream_rcv(struct lttcomm_relayd_sock *rsock, uint64_t *_stream_id)
+{
+       int ret;
+       struct lttcomm_relayd_status_stream reply;
+
+       /* Code flow error. Safety net. */
+       assert(rsock);
+
+       /* Waiting for reply */
+       ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
+       if (ret < 0) {
+               goto error;
+       }
+
+       /* Back to host bytes order. */
+       reply.handle = be64toh(reply.handle);
+       reply.ret_code = be32toh(reply.ret_code);
+
+       /* Return session id or negative ret code. */
+       if (reply.ret_code != LTTNG_OK) {
+               ret = -1;
+               ERR("Relayd add stream replied error %d", reply.ret_code);
+       } else {
+               /* Success */
+               ret = 0;
+               *_stream_id = reply.handle;
+       }
+
+       DBG("Relayd stream added successfully with handle %" PRIu64,
+               reply.handle);
+
+error:
+       return ret;
+}
+
+/*
+ * Inform the relay that all the streams for the current channel has been sent.
+ *
+ * On success return 0 else return ret_code negative value.
+ */
+int relayd_streams_sent(struct lttcomm_relayd_sock *rsock)
+{
+       int ret;
+       struct lttcomm_relayd_generic_reply reply;
+
+       /* Code flow error. Safety net. */
+       assert(rsock);
+
+       DBG("Relayd sending streams sent.");
+
+       /* This feature was introduced in 2.4, ignore it for earlier versions. */
+       if (rsock->minor < 4) {
+               ret = 0;
+               goto end;
+       }
+
+       /* Send command */
+       ret = send_command(rsock, RELAYD_STREAMS_SENT, NULL, 0, 0);
+       if (ret < 0) {
+               goto error;
+       }
+
+       /* Waiting for reply */
+       ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
+       if (ret < 0) {
+               goto error;
+       }
+
+       /* Back to host bytes order. */
+       reply.ret_code = be32toh(reply.ret_code);
+
+       /* Return session id or negative ret code. */
+       if (reply.ret_code != LTTNG_OK) {
+               ret = -1;
+               ERR("Relayd streams sent replied error %d", reply.ret_code);
+               goto error;
+       } else {
+               /* Success */
+               ret = 0;
+       }
+
+       DBG("Relayd streams sent success");
+
+error:
+end:
+       return ret;
+}
+
 /*
  * Check version numbers on the relayd.
  * If major versions are compatible, we assign minor_to_use to the
  * minor version of the procotol we are going to use for this session.
  *
- * Return 0 if compatible else negative value.
+ * Return 0 if the two daemons are compatible, LTTNG_ERR_RELAYD_VERSION_FAIL
+ * otherwise, or a negative value on network errors.
  */
 int relayd_version_check(struct lttcomm_relayd_sock *rsock)
 {
@@ -308,6 +562,7 @@ int relayd_version_check(struct lttcomm_relayd_sock *rsock)
        DBG("Relayd version check for major.minor %u.%u", rsock->major,
                        rsock->minor);
 
+       memset(&msg, 0, sizeof(msg));
        /* Prepare network byte order before transmission. */
        msg.major = htobe32(rsock->major);
        msg.minor = htobe32(rsock->minor);
@@ -336,7 +591,7 @@ int relayd_version_check(struct lttcomm_relayd_sock *rsock)
         */
        if (msg.major != rsock->major) {
                /* Not compatible */
-               ret = -1;
+               ret = LTTNG_ERR_RELAYD_VERSION_FAIL;
                DBG2("Relayd version is NOT compatible. Relayd version %u != %u (us)",
                                msg.major, rsock->major);
                goto error;
@@ -438,6 +693,27 @@ int relayd_close(struct lttcomm_relayd_sock *rsock)
                goto end;
        }
 
+       /*
+        * This ensure that we do not close the socket while the lttng-relayd
+        * expects to be able to send a response that we skipped.
+        * While we loose some time to receive everything, this keep the
+        * protocol intact from the point of view of lttng-relayd.
+        */
+       if (rsock->bytes_to_ignore_on_recv != 0) {
+               ret = rsock->sock.ops->recvmsg(&rsock->sock, NULL, rsock->bytes_to_ignore_on_recv, MSG_TRUNC);
+               if (ret <= 0 || ret != rsock->bytes_to_ignore_on_recv) {
+                       if (ret == 0) {
+                               /* Orderly shutdown. */
+                               DBG("Socket %d has performed an orderly shutdown", rsock->sock.fd);
+                       } else {
+                               DBG("Receiving reply to skip failed on sock %d for size %zu with ret %d",
+                                               rsock->sock.fd, rsock->bytes_to_ignore_on_recv, ret);
+                       }
+               }
+               DBG("Discarded %zu bytes on sock %d", rsock->bytes_to_ignore_on_recv, rsock->sock.fd);
+               rsock->bytes_to_ignore_on_recv = 0;
+       }
+
        DBG3("Relayd closing socket %d", rsock->sock.fd);
 
        if (rsock->sock.ops) {
@@ -509,6 +785,7 @@ int relayd_send_close_stream(struct lttcomm_relayd_sock *rsock, uint64_t stream_
 
        DBG("Relayd closing stream id %" PRIu64, stream_id);
 
+       memset(&msg, 0, sizeof(msg));
        msg.stream_id = htobe64(stream_id);
        msg.last_net_seq_num = htobe64(last_net_seq_num);
 
@@ -518,23 +795,16 @@ int relayd_send_close_stream(struct lttcomm_relayd_sock *rsock, uint64_t stream_
                goto error;
        }
 
-       /* Receive response */
-       ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
+       /*
+        * Discard response since we do not really care for it and that TCP
+        * guarantee in-order delivery. As for error handling, there is not much
+        * to do at this point (closing).
+        **/
+       ret = recv_reply_ignore(rsock, sizeof(reply));
        if (ret < 0) {
                goto error;
        }
 
-       reply.ret_code = be32toh(reply.ret_code);
-
-       /* Return session id or negative ret code. */
-       if (reply.ret_code != LTTNG_OK) {
-               ret = -1;
-               ERR("Relayd close stream replied error %d", reply.ret_code);
-       } else {
-               /* Success */
-               ret = 0;
-       }
-
        DBG("Relayd close stream id %" PRIu64 " successfully", stream_id);
 
 error:
@@ -558,6 +828,7 @@ int relayd_data_pending(struct lttcomm_relayd_sock *rsock, uint64_t stream_id,
 
        DBG("Relayd data pending for stream id %" PRIu64, stream_id);
 
+       memset(&msg, 0, sizeof(msg));
        msg.stream_id = htobe64(stream_id);
        msg.last_net_seq_num = htobe64(last_net_seq_num);
 
@@ -606,6 +877,7 @@ int relayd_quiescent_control(struct lttcomm_relayd_sock *rsock,
 
        DBG("Relayd checking quiescent control state");
 
+       memset(&msg, 0, sizeof(msg));
        msg.stream_id = htobe64(metadata_stream_id);
 
        /* Send command */
@@ -650,6 +922,7 @@ int relayd_begin_data_pending(struct lttcomm_relayd_sock *rsock, uint64_t id)
 
        DBG("Relayd begin data pending");
 
+       memset(&msg, 0, sizeof(msg));
        msg.session_id = htobe64(id);
 
        /* Send command */
@@ -688,7 +961,7 @@ error:
 int relayd_end_data_pending(struct lttcomm_relayd_sock *rsock, uint64_t id,
                unsigned int *is_data_inflight)
 {
-       int ret;
+       int ret, recv_ret;
        struct lttcomm_relayd_end_data_pending msg;
        struct lttcomm_relayd_generic_reply reply;
 
@@ -697,6 +970,7 @@ int relayd_end_data_pending(struct lttcomm_relayd_sock *rsock, uint64_t id,
 
        DBG("Relayd end data pending");
 
+       memset(&msg, 0, sizeof(msg));
        msg.session_id = htobe64(id);
 
        /* Send command */
@@ -711,15 +985,15 @@ int relayd_end_data_pending(struct lttcomm_relayd_sock *rsock, uint64_t id,
                goto error;
        }
 
-       reply.ret_code = be32toh(reply.ret_code);
-       if (reply.ret_code < 0) {
-               ret = reply.ret_code;
+       recv_ret = be32toh(reply.ret_code);
+       if (recv_ret < 0) {
+               ret = recv_ret;
                goto error;
        }
 
-       *is_data_inflight = reply.ret_code;
+       *is_data_inflight = recv_ret;
 
-       DBG("Relayd end data pending is data inflight: %d", reply.ret_code);
+       DBG("Relayd end data pending is data inflight: %d", recv_ret);
 
        return 0;
 
@@ -731,7 +1005,7 @@ error:
  * Send index to the relayd.
  */
 int relayd_send_index(struct lttcomm_relayd_sock *rsock,
-               struct lttng_packet_index *index, uint64_t relay_stream_id,
+               struct ctf_packet_index *index, uint64_t relay_stream_id,
                uint64_t net_seq_num)
 {
        int ret;
@@ -749,6 +1023,7 @@ int relayd_send_index(struct lttcomm_relayd_sock *rsock,
 
        DBG("Relayd sending index for stream ID %" PRIu64, relay_stream_id);
 
+       memset(&msg, 0, sizeof(msg));
        msg.relay_stream_id = htobe64(relay_stream_id);
        msg.net_seq_num = htobe64(net_seq_num);
 
@@ -760,8 +1035,65 @@ int relayd_send_index(struct lttcomm_relayd_sock *rsock,
        msg.events_discarded = index->events_discarded;
        msg.stream_id = index->stream_id;
 
+       if (rsock->minor >= 8) {
+               msg.stream_instance_id = index->stream_instance_id;
+               msg.packet_seq_num = index->packet_seq_num;
+       }
+
        /* Send command */
-       ret = send_command(rsock, RELAYD_SEND_INDEX, &msg, sizeof(msg), 0);
+       ret = send_command(rsock, RELAYD_SEND_INDEX, &msg,
+               lttcomm_relayd_index_len(lttng_to_index_major(rsock->major,
+                                                               rsock->minor),
+                               lttng_to_index_minor(rsock->major, rsock->minor)),
+                               0);
+       if (ret < 0) {
+               goto error;
+       }
+
+       /*
+        * Ignore the response. TCP guarantee in-order arrival and the overall
+        * protocol do not rely on hard ordering between the control and data
+        * socket for index.
+        * Indexes are sent either at the end of the buffer consumption or
+        * during the live timer.
+        */
+       ret = recv_reply_ignore(rsock, sizeof(reply));
+       if (ret < 0) {
+               goto error;
+       }
+
+error:
+       return ret;
+}
+
+/*
+ * Ask the relay to reset the metadata trace file (regeneration).
+ */
+int relayd_reset_metadata(struct lttcomm_relayd_sock *rsock,
+               uint64_t stream_id, uint64_t version)
+{
+       int ret;
+       struct lttcomm_relayd_reset_metadata msg;
+       struct lttcomm_relayd_generic_reply reply;
+
+       /* Code flow error. Safety net. */
+       assert(rsock);
+
+       /* Should have been prevented by the sessiond. */
+       if (rsock->minor < 8) {
+               ERR("Metadata regeneration unsupported before 2.8");
+               ret = -1;
+               goto error;
+       }
+
+       DBG("Relayd reset metadata stream id %" PRIu64, stream_id);
+
+       memset(&msg, 0, sizeof(msg));
+       msg.stream_id = htobe64(stream_id);
+       msg.version = htobe64(version);
+
+       /* Send command */
+       ret = send_command(rsock, RELAYD_RESET_METADATA, (void *) &msg, sizeof(msg), 0);
        if (ret < 0) {
                goto error;
        }
@@ -777,12 +1109,14 @@ int relayd_send_index(struct lttcomm_relayd_sock *rsock,
        /* Return session id or negative ret code. */
        if (reply.ret_code != LTTNG_OK) {
                ret = -1;
-               ERR("Relayd send index replied error %d", reply.ret_code);
+               ERR("Relayd reset metadata replied error %d", reply.ret_code);
        } else {
                /* Success */
                ret = 0;
        }
 
+       DBG("Relayd reset metadata stream id %" PRIu64 " successfully", stream_id);
+
 error:
        return ret;
 }
This page took 0.032193 seconds and 5 git commands to generate.