Fix: acquire stream lock during kernel metadata snapshot
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
index a8abcd7901121c62dc27ec94f7d01cee62d2a6fa..ccca69e0eaded03676acf8278ac30413991b6bbc 100644 (file)
@@ -60,7 +60,11 @@ int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
        int infd = stream->wait_fd;
 
        ret = kernctl_snapshot(infd);
-       if (ret != 0) {
+       /*
+        * -EAGAIN is not an error, it just means that there is no data to
+        *  be read.
+        */
+       if (ret != 0 && ret != -EAGAIN) {
                PERROR("Getting sub-buffer snapshot.");
        }
 
@@ -137,8 +141,6 @@ int lttng_kconsumer_snapshot_channel(uint64_t key, char *path,
        }
 
        cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
-               /* Are we at a position _before_ the first available packet ? */
-               bool before_first_packet = true;
                unsigned long consumed_pos, produced_pos;
 
                health_code_update();
@@ -182,11 +184,23 @@ int lttng_kconsumer_snapshot_channel(uint64_t key, char *path,
                                ERR("sending streams sent to relayd");
                                goto end_unlock;
                        }
+                       channel->streams_sent_to_relayd = true;
                }
 
-               ret = kernctl_buffer_flush(stream->wait_fd);
+               ret = kernctl_buffer_flush_empty(stream->wait_fd);
                if (ret < 0) {
-                       ERR("Failed to flush kernel stream");
+                       /*
+                        * Doing a buffer flush which does not take into
+                        * account empty packets. This is not perfect
+                        * for stream intersection, but required as a
+                        * fall-back when "flush_empty" is not
+                        * implemented by lttng-modules.
+                        */
+                       ret = kernctl_buffer_flush(stream->wait_fd);
+                       if (ret < 0) {
+                               ERR("Failed to flush kernel stream");
+                               goto end_unlock;
+                       }
                        goto end_unlock;
                }
 
@@ -224,7 +238,6 @@ int lttng_kconsumer_snapshot_channel(uint64_t key, char *path,
                while (consumed_pos < produced_pos) {
                        ssize_t read_len;
                        unsigned long len, padded_len;
-                       int lost_packet = 0;
 
                        health_code_update();
 
@@ -238,15 +251,7 @@ int lttng_kconsumer_snapshot_channel(uint64_t key, char *path,
                                }
                                DBG("Kernel consumer get subbuf failed. Skipping it.");
                                consumed_pos += stream->max_sb_size;
-
-                               /*
-                                * Start accounting lost packets only when we
-                                * already have extracted packets (to match the
-                                * content of the final snapshot).
-                                */
-                               if (!before_first_packet) {
-                                       lost_packet = 1;
-                               }
+                               stream->chan->lost_packets++;
                                continue;
                        }
 
@@ -287,16 +292,6 @@ int lttng_kconsumer_snapshot_channel(uint64_t key, char *path,
                                goto end_unlock;
                        }
                        consumed_pos += stream->max_sb_size;
-
-                       /*
-                        * Only account lost packets located between
-                        * succesfully extracted packets (do not account before
-                        * and after since they are not visible in the
-                        * resulting snapshot).
-                        */
-                       stream->chan->lost_packets += lost_packet;
-                       lost_packet = 0;
-                       before_first_packet = false;
                }
 
                if (relayd_id == (uint64_t) -1ULL) {
@@ -336,7 +331,7 @@ end:
  *
  * Returns 0 on success, < 0 on error
  */
-int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path,
+static int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path,
                uint64_t relayd_id, struct lttng_consumer_local_data *ctx)
 {
        int ret, use_relayd = 0;
@@ -355,11 +350,12 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path,
        if (!metadata_channel) {
                ERR("Kernel snapshot metadata not found for key %" PRIu64, key);
                ret = -1;
-               goto error;
+               goto error_no_channel;
        }
 
        metadata_stream = metadata_channel->metadata_stream;
        assert(metadata_stream);
+       pthread_mutex_lock(&metadata_stream->lock);
 
        /* Flag once that we have a valid relayd for the stream. */
        if (relayd_id != (uint64_t) -1ULL) {
@@ -369,7 +365,7 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path,
        if (use_relayd) {
                ret = consumer_send_relayd_stream(metadata_stream, path);
                if (ret < 0) {
-                       goto error;
+                       goto error_snapshot;
                }
        } else {
                ret = utils_create_stream_file(path, metadata_stream->name,
@@ -377,7 +373,7 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path,
                                metadata_stream->tracefile_count_current,
                                metadata_stream->uid, metadata_stream->gid, NULL);
                if (ret < 0) {
-                       goto error;
+                       goto error_snapshot;
                }
                metadata_stream->out_fd = ret;
        }
@@ -390,7 +386,8 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path,
                        if (ret_read != -EAGAIN) {
                                ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
                                                ret_read);
-                               goto error;
+                               ret = ret_read;
+                               goto error_snapshot;
                        }
                        /* ret_read is negative at this point so we will exit the loop. */
                        continue;
@@ -415,11 +412,12 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path,
        }
 
        ret = 0;
-
+error_snapshot:
+       pthread_mutex_unlock(&metadata_stream->lock);
        cds_list_del(&metadata_stream->send_node);
        consumer_stream_destroy(metadata_stream, NULL);
        metadata_channel->metadata_stream = NULL;
-error:
+error_no_channel:
        rcu_read_unlock();
        return ret;
 }
@@ -716,6 +714,19 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                                consumer_stream_free(new_stream);
                                goto end_nosignal;
                        }
+
+                       /*
+                        * If adding an extra stream to an already
+                        * existing channel (e.g. cpu hotplug), we need
+                        * to send the "streams_sent" command to relayd.
+                        */
+                       if (channel->streams_sent_to_relayd) {
+                               ret = consumer_send_relayd_streams_sent(
+                                               new_stream->net_seq_idx);
+                               if (ret < 0) {
+                                       goto end_nosignal;
+                               }
+                       }
                }
 
                /* Get the right pipe where the stream will be sent. */
@@ -809,6 +820,7 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                        if (ret < 0) {
                                goto end_nosignal;
                        }
+                       channel->streams_sent_to_relayd = true;
                }
                break;
        }
@@ -1315,12 +1327,34 @@ ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
                }
                ret = update_stream_stats(stream);
                if (ret < 0) {
+                       err = kernctl_put_subbuf(infd);
+                       if (err != 0) {
+                               if (err == -EFAULT) {
+                                       PERROR("Error in unreserving sub buffer\n");
+                               } else if (err == -EIO) {
+                                       /* Should never happen with newer LTTng versions */
+                                       PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
+                               }
+                               ret = err;
+                               goto end;
+                       }
                        goto end;
                }
        } else {
                write_index = 0;
                ret = metadata_stream_check_version(infd, stream);
                if (ret < 0) {
+                       err = kernctl_put_subbuf(infd);
+                       if (err != 0) {
+                               if (err == -EFAULT) {
+                                       PERROR("Error in unreserving sub buffer\n");
+                               } else if (err == -EIO) {
+                                       /* Should never happen with newer LTTng versions */
+                                       PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
+                               }
+                               ret = err;
+                               goto end;
+                       }
                        goto end;
                }
        }
@@ -1475,14 +1509,17 @@ int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
                stream->tracefile_size_current = 0;
 
                if (!stream->metadata_flag) {
-                       ret = index_create_file(stream->chan->pathname,
+                       struct lttng_index_file *index_file;
+
+                       index_file = lttng_index_file_create(stream->chan->pathname,
                                        stream->name, stream->uid, stream->gid,
                                        stream->chan->tracefile_size,
-                                       stream->tracefile_count_current);
-                       if (ret < 0) {
+                                       stream->tracefile_count_current,
+                                       CTF_INDEX_MAJOR, CTF_INDEX_MINOR);
+                       if (!index_file) {
                                goto error;
                        }
-                       stream->index_fd = ret;
+                       stream->index_file = index_file;
                }
        }
 
This page took 0.029415 seconds and 5 git commands to generate.