+
+static void stream_destroy_rcu(struct rcu_head *rcu_head)
+{
+ struct relay_stream *stream =
+ caa_container_of(rcu_head, struct relay_stream, rcu_node);
+
+ stream_destroy(stream);
+}
+
+/*
+ * No need to take stream->lock since this is only called on the final
+ * stream_put which ensures that a single thread may act on the stream.
+ */
+static void stream_release(struct urcu_ref *ref)
+{
+ struct relay_stream *stream =
+ caa_container_of(ref, struct relay_stream, ref);
+ struct relay_session *session;
+
+ session = stream->trace->session;
+
+ DBG("Releasing stream id %" PRIu64, stream->stream_handle);
+
+ pthread_mutex_lock(&session->recv_list_lock);
+ session->stream_count--;
+ if (stream->in_recv_list) {
+ cds_list_del_rcu(&stream->recv_node);
+ stream->in_recv_list = false;
+ }
+ pthread_mutex_unlock(&session->recv_list_lock);
+
+ stream_unpublish(stream);
+
+ if (stream->stream_fd) {
+ stream_fd_put(stream->stream_fd);
+ stream->stream_fd = NULL;
+ }
+ if (stream->index_file) {
+ lttng_index_file_put(stream->index_file);
+ stream->index_file = NULL;
+ }
+ if (stream->trace) {
+ ctf_trace_put(stream->trace);
+ stream->trace = NULL;
+ }
+ stream_complete_rotation(stream);
+ lttng_trace_chunk_put(stream->trace_chunk);
+ stream->trace_chunk = NULL;
+
+ call_rcu(&stream->rcu_node, stream_destroy_rcu);
+}
+
+void stream_put(struct relay_stream *stream)
+{
+ rcu_read_lock();
+ assert(stream->ref.refcount != 0);
+ /*
+ * Wait until we have processed all the stream packets before
+ * actually putting our last stream reference.
+ */
+ urcu_ref_put(&stream->ref, stream_release);
+ rcu_read_unlock();
+}
+
+int stream_set_pending_rotation(struct relay_stream *stream,
+ struct lttng_trace_chunk *next_trace_chunk,
+ uint64_t rotation_sequence_number)
+{
+ int ret = 0;
+ const struct relay_stream_rotation rotation = {
+ .data_rotated = false,
+ .index_rotated = false,
+ .packet_seq_num = rotation_sequence_number,
+ .prev_data_net_seq = -1ULL,
+ .next_trace_chunk = next_trace_chunk,
+ };
+
+ if (stream->ongoing_rotation.is_set) {
+ ERR("Attempted to set a pending rotation on a stream already being rotated (protocol error)");
+ ret = -1;
+ goto end;
+ }
+
+ if (next_trace_chunk) {
+ const bool reference_acquired =
+ lttng_trace_chunk_get(next_trace_chunk);
+
+ assert(reference_acquired);
+ }
+ LTTNG_OPTIONAL_SET(&stream->ongoing_rotation, rotation);
+
+ DBG("Setting pending rotation: stream_id = %" PRIu64
+ ", rotate_at_packet_seq_num = %" PRIu64,
+ stream->stream_handle, rotation_sequence_number);
+ if (stream->is_metadata) {
+ /*
+ * A metadata stream has no index; consider it already rotated.
+ */
+ stream->ongoing_rotation.value.index_rotated = true;
+ ret = stream_rotate_data_file(stream);
+ } else {
+ ret = try_rotate_stream_index(stream);
+ if (ret < 0) {
+ goto end;
+ }
+
+ ret = try_rotate_stream_data(stream);
+ if (ret < 0) {
+ goto end;
+ }
+ }
+end:
+ return ret;
+}
+
+void try_stream_close(struct relay_stream *stream)
+{
+ bool session_aborted;
+ struct relay_session *session = stream->trace->session;
+
+ DBG("Trying to close stream %" PRIu64, stream->stream_handle);
+
+ pthread_mutex_lock(&session->lock);
+ session_aborted = session->aborted;
+ pthread_mutex_unlock(&session->lock);
+
+ pthread_mutex_lock(&stream->lock);
+ /*
+ * Can be called concurently by connection close and reception of last
+ * pending data.
+ */
+ if (stream->closed) {
+ pthread_mutex_unlock(&stream->lock);
+ DBG("closing stream %" PRIu64 " aborted since it is already marked as closed", stream->stream_handle);
+ return;
+ }
+
+ stream->close_requested = true;
+
+ if (stream->last_net_seq_num == -1ULL) {
+ /*
+ * Handle connection close without explicit stream close
+ * command.
+ *
+ * We can be clever about indexes partially received in
+ * cases where we received the data socket part, but not
+ * the control socket part: since we're currently closing
+ * the stream on behalf of the control socket, we *know*
+ * there won't be any more control information for this
+ * socket. Therefore, we can destroy all indexes for
+ * which we have received only the file descriptor (from
+ * data socket). This takes care of consumerd crashes
+ * between sending the data and control information for
+ * a packet. Since those are sent in that order, we take
+ * care of consumerd crashes.
+ */
+ DBG("relay_index_close_partial_fd");
+ relay_index_close_partial_fd(stream);
+ /*
+ * Use the highest net_seq_num we currently have pending
+ * As end of stream indicator. Leave last_net_seq_num
+ * at -1ULL if we cannot find any index.
+ */
+ stream->last_net_seq_num = relay_index_find_last(stream);
+ DBG("Updating stream->last_net_seq_num to %" PRIu64, stream->last_net_seq_num);
+ /* Fall-through into the next check. */
+ }
+
+ if (stream->last_net_seq_num != -1ULL &&
+ ((int64_t) (stream->prev_data_seq - stream->last_net_seq_num)) < 0
+ && !session_aborted) {
+ /*
+ * Don't close since we still have data pending. This
+ * handles cases where an explicit close command has
+ * been received for this stream, and cases where the
+ * connection has been closed, and we are awaiting for
+ * index information from the data socket. It is
+ * therefore expected that all the index fd information
+ * we need has already been received on the control
+ * socket. Matching index information from data socket
+ * should be Expected Soon(TM).
+ *
+ * TODO: We should implement a timer to garbage collect
+ * streams after a timeout to be resilient against a
+ * consumerd implementation that would not match this
+ * expected behavior.
+ */
+ pthread_mutex_unlock(&stream->lock);
+ DBG("closing stream %" PRIu64 " aborted since it still has data pending", stream->stream_handle);
+ return;
+ }
+ /*
+ * We received all the indexes we can expect.
+ */
+ stream_unpublish(stream);
+ stream->closed = true;
+ /* Relay indexes are only used by the "consumer/sessiond" end. */
+ relay_index_close_all(stream);
+
+ /*
+ * If we are closed by an application exiting (per-pid buffers),
+ * we need to put our reference on the stream trace chunk right
+ * away, because otherwise still holding the reference on the
+ * trace chunk could allow a viewer stream (which holds a reference
+ * to the stream) to postpone destroy waiting for the chunk to cease
+ * to exist endlessly until the viewer is detached.
+ */
+
+ /* Put stream fd before put chunk. */
+ if (stream->stream_fd) {
+ stream_fd_put(stream->stream_fd);
+ stream->stream_fd = NULL;
+ }
+ if (stream->index_file) {
+ lttng_index_file_put(stream->index_file);
+ stream->index_file = NULL;
+ }
+ lttng_trace_chunk_put(stream->trace_chunk);
+ stream->trace_chunk = NULL;
+ pthread_mutex_unlock(&stream->lock);
+ DBG("Succeeded in closing stream %" PRIu64, stream->stream_handle);
+ stream_put(stream);
+}
+
+int stream_init_packet(struct relay_stream *stream, size_t packet_size,
+ bool *file_rotated)
+{
+ int ret = 0;
+
+ ASSERT_LOCKED(stream->lock);
+
+ if (!stream->stream_fd || !stream->trace_chunk) {
+ ERR("Protocol error: received a packet for a stream that doesn't have a current trace chunk: stream_id = %" PRIu64 ", channel_name = %s",
+ stream->stream_handle, stream->channel_name);
+ ret = -1;
+ goto end;
+ }
+
+ if (caa_likely(stream->tracefile_size == 0)) {
+ /* No size limit set; nothing to check. */
+ goto end;
+ }
+
+ /*
+ * Check if writing the new packet would exceed the maximal file size.
+ */
+ if (caa_unlikely((stream->tracefile_size_current + packet_size) >
+ stream->tracefile_size)) {
+ const uint64_t new_file_index =
+ (stream->tracefile_current_index + 1) %
+ stream->tracefile_count;
+
+ if (new_file_index < stream->tracefile_current_index) {
+ stream->tracefile_wrapped_around = true;
+ }
+ DBG("New stream packet causes stream file rotation: stream_id = %" PRIu64
+ ", current_file_size = %" PRIu64
+ ", packet_size = %zu, current_file_index = %" PRIu64
+ " new_file_index = %" PRIu64,
+ stream->stream_handle,
+ stream->tracefile_size_current, packet_size,
+ stream->tracefile_current_index, new_file_index);
+ tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_WRITE);
+ stream->tracefile_current_index = new_file_index;
+
+ if (stream->stream_fd) {
+ stream_fd_put(stream->stream_fd);
+ stream->stream_fd = NULL;
+ }
+ ret = stream_create_data_output_file_from_trace_chunk(stream,
+ stream->trace_chunk, false, &stream->stream_fd);
+ if (ret) {
+ ERR("Failed to perform trace file rotation of stream %" PRIu64,
+ stream->stream_handle);
+ goto end;
+ }
+
+ /*
+ * Reset current size because we just performed a stream
+ * rotation.
+ */
+ DBG("%s: reset tracefile_size_current for stream %" PRIu64 " was %" PRIu64,
+ __func__, stream->stream_handle, stream->tracefile_size_current);
+ stream->tracefile_size_current = 0;
+ *file_rotated = true;
+ } else {
+ *file_rotated = false;
+ }
+end:
+ return ret;
+}
+
+/* Note that the packet is not necessarily complete. */
+int stream_write(struct relay_stream *stream,
+ const struct lttng_buffer_view *packet, size_t padding_len)
+{
+ int ret = 0;
+ ssize_t write_ret;
+ size_t padding_to_write = padding_len;
+ char padding_buffer[FILE_IO_STACK_BUFFER_SIZE];
+
+ ASSERT_LOCKED(stream->lock);
+ memset(padding_buffer, 0,
+ min(sizeof(padding_buffer), padding_to_write));
+
+ if (!stream->stream_fd || !stream->trace_chunk) {
+ ERR("Protocol error: received a packet for a stream that doesn't have a current trace chunk: stream_id = %" PRIu64 ", channel_name = %s",
+ stream->stream_handle, stream->channel_name);
+ ret = -1;
+ goto end;
+ }
+ if (packet) {
+ write_ret = lttng_write(stream->stream_fd->fd,
+ packet->data, packet->size);
+ if (write_ret != packet->size) {
+ PERROR("Failed to write to stream file of %sstream %" PRIu64,
+ stream->is_metadata ? "metadata " : "",
+ stream->stream_handle);
+ ret = -1;
+ goto end;
+ }
+ }
+
+ while (padding_to_write > 0) {
+ const size_t padding_to_write_this_pass =
+ min(padding_to_write, sizeof(padding_buffer));
+
+ write_ret = lttng_write(stream->stream_fd->fd,
+ padding_buffer, padding_to_write_this_pass);
+ if (write_ret != padding_to_write_this_pass) {
+ PERROR("Failed to write padding to file of %sstream %" PRIu64,
+ stream->is_metadata ? "metadata " : "",
+ stream->stream_handle);
+ ret = -1;
+ goto end;
+ }
+ padding_to_write -= padding_to_write_this_pass;
+ }
+
+ if (stream->is_metadata) {
+ size_t recv_len;
+
+ recv_len = packet ? packet->size : 0;
+ recv_len += padding_len;
+ stream->metadata_received += recv_len;
+ if (recv_len) {
+ stream->no_new_metadata_notified = false;
+ }
+ }
+
+ DBG("Wrote to %sstream %" PRIu64 ": data_length = %zu, padding_length = %zu",
+ stream->is_metadata ? "metadata " : "",
+ stream->stream_handle,
+ packet ? packet->size : (size_t) 0, padding_len);
+end:
+ return ret;
+}
+
+/*
+ * Update index after receiving a packet for a data stream.
+ *
+ * Called with the stream lock held.
+ *
+ * Return 0 on success else a negative value.
+ */
+int stream_update_index(struct relay_stream *stream, uint64_t net_seq_num,
+ bool rotate_index, bool *flushed, uint64_t total_size)
+{
+ int ret = 0;
+ uint64_t data_offset;
+ struct relay_index *index;
+
+ assert(stream->trace_chunk);
+ ASSERT_LOCKED(stream->lock);
+ /* Get data offset because we are about to update the index. */
+ data_offset = htobe64(stream->tracefile_size_current);
+
+ DBG("handle_index_data: stream %" PRIu64 " net_seq_num %" PRIu64 " data offset %" PRIu64,
+ stream->stream_handle, net_seq_num, stream->tracefile_size_current);
+
+ /*
+ * Lookup for an existing index for that stream id/sequence
+ * number. If it exists, the control thread has already received the
+ * data for it, thus we need to write it to disk.
+ */
+ index = relay_index_get_by_id_or_create(stream, net_seq_num);
+ if (!index) {
+ ret = -1;
+ goto end;
+ }
+
+ if (rotate_index || !stream->index_file) {
+ ret = create_index_file(stream, stream->trace_chunk);
+ if (ret) {
+ ERR("Failed to create index file for stream %" PRIu64,
+ stream->stream_handle);
+ /* Put self-ref for this index due to error. */
+ relay_index_put(index);
+ index = NULL;
+ goto end;
+ }
+ }
+
+ if (relay_index_set_file(index, stream->index_file, data_offset)) {
+ ret = -1;
+ /* Put self-ref for this index due to error. */
+ relay_index_put(index);
+ index = NULL;
+ goto end;
+ }
+
+ ret = relay_index_try_flush(index);
+ if (ret == 0) {
+ tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_READ);
+ tracefile_array_commit_seq(stream->tfa, stream->index_received_seqcount);
+ stream->index_received_seqcount++;
+ LTTNG_OPTIONAL_SET(&stream->received_packet_seq_num,
+ be64toh(index->index_data.packet_seq_num));
+ *flushed = true;
+ } else if (ret > 0) {
+ index->total_size = total_size;
+ /* No flush. */
+ ret = 0;
+ } else {
+ /*
+ * ret < 0
+ *
+ * relay_index_try_flush is responsible for the self-reference
+ * put of the index object on error.
+ */
+ ERR("relay_index_try_flush error %d", ret);
+ ret = -1;
+ }
+end:
+ return ret;
+}
+
+int stream_complete_packet(struct relay_stream *stream, size_t packet_total_size,
+ uint64_t sequence_number, bool index_flushed)
+{
+ int ret = 0;
+
+ ASSERT_LOCKED(stream->lock);
+
+ stream->tracefile_size_current += packet_total_size;
+ if (index_flushed) {
+ stream->pos_after_last_complete_data_index =
+ stream->tracefile_size_current;
+ stream->prev_index_seq = sequence_number;
+ ret = try_rotate_stream_index(stream);
+ if (ret < 0) {
+ goto end;
+ }
+ }
+
+ stream->prev_data_seq = sequence_number;
+ ret = try_rotate_stream_data(stream);
+
+end:
+ return ret;
+}
+
+int stream_add_index(struct relay_stream *stream,
+ const struct lttcomm_relayd_index *index_info)
+{
+ int ret = 0;
+ struct relay_index *index;
+
+ ASSERT_LOCKED(stream->lock);
+
+ DBG("stream_add_index for stream %" PRIu64, stream->stream_handle);
+
+ /* Live beacon handling */
+ if (index_info->packet_size == 0) {
+ DBG("Received live beacon for stream %" PRIu64,
+ stream->stream_handle);
+
+ /*
+ * Only flag a stream inactive when it has already
+ * received data and no indexes are in flight.
+ */
+ if (stream->index_received_seqcount > 0
+ && stream->indexes_in_flight == 0) {
+ stream->beacon_ts_end = index_info->timestamp_end;
+ }
+ ret = 0;
+ goto end;
+ } else {
+ stream->beacon_ts_end = -1ULL;
+ }
+
+ if (stream->ctf_stream_id == -1ULL) {
+ stream->ctf_stream_id = index_info->stream_id;
+ }
+
+ index = relay_index_get_by_id_or_create(stream, index_info->net_seq_num);
+ if (!index) {
+ ret = -1;
+ ERR("Failed to get or create index %" PRIu64,
+ index_info->net_seq_num);
+ goto end;
+ }
+ if (relay_index_set_control_data(index, index_info,
+ stream->trace->session->minor)) {
+ ERR("set_index_control_data error");
+ relay_index_put(index);
+ ret = -1;
+ goto end;
+ }
+ ret = relay_index_try_flush(index);
+ if (ret == 0) {
+ tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_READ);
+ tracefile_array_commit_seq(stream->tfa, stream->index_received_seqcount);
+ stream->index_received_seqcount++;
+ stream->pos_after_last_complete_data_index += index->total_size;
+ stream->prev_index_seq = index_info->net_seq_num;
+ LTTNG_OPTIONAL_SET(&stream->received_packet_seq_num,
+ index_info->packet_seq_num);
+
+ ret = try_rotate_stream_index(stream);
+ if (ret < 0) {
+ goto end;
+ }
+ ret = try_rotate_stream_data(stream);
+ if (ret < 0) {
+ goto end;
+ }
+ } else if (ret > 0) {
+ /* no flush. */
+ ret = 0;
+ } else {
+ /*
+ * ret < 0
+ *
+ * relay_index_try_flush is responsible for the self-reference
+ * put of the index object on error.
+ */
+ ERR("relay_index_try_flush error %d", ret);
+ ret = -1;
+ }
+end:
+ return ret;
+}
+
+static void print_stream_indexes(struct relay_stream *stream)
+{
+ struct lttng_ht_iter iter;
+ struct relay_index *index;
+
+ rcu_read_lock();
+ cds_lfht_for_each_entry(stream->indexes_ht->ht, &iter.iter, index,
+ index_n.node) {
+ DBG("index %p net_seq_num %" PRIu64 " refcount %ld"
+ " stream %" PRIu64 " trace %" PRIu64
+ " session %" PRIu64,
+ index,
+ index->index_n.key,
+ stream->ref.refcount,
+ index->stream->stream_handle,
+ index->stream->trace->id,
+ index->stream->trace->session->id);
+ }
+ rcu_read_unlock();
+}
+
+int stream_reset_file(struct relay_stream *stream)
+{
+ ASSERT_LOCKED(stream->lock);
+
+ if (stream->stream_fd) {
+ stream_fd_put(stream->stream_fd);
+ stream->stream_fd = NULL;
+ }
+
+ DBG("%s: reset tracefile_size_current for stream %" PRIu64 " was %" PRIu64,
+ __func__, stream->stream_handle, stream->tracefile_size_current);
+ stream->tracefile_size_current = 0;
+ stream->prev_data_seq = 0;
+ stream->prev_index_seq = 0;
+ /* Note that this does not reset the tracefile array. */
+ stream->tracefile_current_index = 0;
+ stream->pos_after_last_complete_data_index = 0;
+
+ return stream_create_data_output_file_from_trace_chunk(stream,
+ stream->trace_chunk, true, &stream->stream_fd);
+}
+
+void print_relay_streams(void)
+{
+ struct lttng_ht_iter iter;
+ struct relay_stream *stream;
+
+ if (!relay_streams_ht) {
+ return;
+ }
+
+ rcu_read_lock();
+ cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, stream,
+ node.node) {
+ if (!stream_get(stream)) {
+ continue;
+ }
+ DBG("stream %p refcount %ld stream %" PRIu64 " trace %" PRIu64
+ " session %" PRIu64,
+ stream,
+ stream->ref.refcount,
+ stream->stream_handle,
+ stream->trace->id,
+ stream->trace->session->id);
+ print_stream_indexes(stream);
+ stream_put(stream);
+ }
+ rcu_read_unlock();
+}