relayd: implement file and session rotation on top of trace chunks
[lttng-tools.git] / src / bin / lttng-relayd / stream.c
CommitLineData
2a174661
DG
1/*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
7591bab1 4 * 2015 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
c35f9726 5 * 2019 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
2a174661
DG
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License, version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
6c1c0768 21#define _LGPL_SOURCE
2a174661 22#include <common/common.h>
7591bab1
MD
23#include <common/utils.h>
24#include <common/defaults.h>
c35f9726 25#include <common/sessiond-comm/relayd.h>
7591bab1
MD
26#include <urcu/rculist.h>
27#include <sys/stat.h>
2a174661 28
7591bab1 29#include "lttng-relayd.h"
2a174661
DG
30#include "index.h"
31#include "stream.h"
32#include "viewer-stream.h"
33
348a81dc
JG
34#include <sys/types.h>
35#include <fcntl.h>
36
c35f9726
JG
37#define FILE_IO_STACK_BUFFER_SIZE 65536
38
7591bab1
MD
39/* Should be called with RCU read-side lock held. */
40bool stream_get(struct relay_stream *stream)
41{
ce4d4083 42 return urcu_ref_get_unless_zero(&stream->ref);
7591bab1
MD
43}
44
2a174661 45/*
7591bab1
MD
46 * Get stream from stream id from the streams hash table. Return stream
47 * if found else NULL. A stream reference is taken when a stream is
48 * returned. stream_put() must be called on that stream.
2a174661 49 */
7591bab1 50struct relay_stream *stream_get_by_id(uint64_t stream_id)
2a174661
DG
51{
52 struct lttng_ht_node_u64 *node;
53 struct lttng_ht_iter iter;
54 struct relay_stream *stream = NULL;
55
7591bab1
MD
56 rcu_read_lock();
57 lttng_ht_lookup(relay_streams_ht, &stream_id, &iter);
2a174661 58 node = lttng_ht_iter_get_node_u64(&iter);
7591bab1 59 if (!node) {
2a174661
DG
60 DBG("Relay stream %" PRIu64 " not found", stream_id);
61 goto end;
62 }
63 stream = caa_container_of(node, struct relay_stream, node);
7591bab1
MD
64 if (!stream_get(stream)) {
65 stream = NULL;
66 }
2a174661 67end:
7591bab1 68 rcu_read_unlock();
2a174661
DG
69 return stream;
70}
71
c35f9726
JG
72static void stream_complete_rotation(struct relay_stream *stream)
73{
74 DBG("Rotation completed for stream %" PRIu64, stream->stream_handle);
75 lttng_trace_chunk_put(stream->trace_chunk);
76 stream->trace_chunk = stream->ongoing_rotation.value.next_trace_chunk;
77 stream->ongoing_rotation = (typeof(stream->ongoing_rotation)) {};
78}
79
80/*
81 * If too much data has been written in a tracefile before we received the
82 * rotation command, we have to move the excess data to the new tracefile and
83 * perform the rotation. This can happen because the control and data
84 * connections are separate, the indexes as well as the commands arrive from
85 * the control connection and we have no control over the order so we could be
86 * in a situation where too much data has been received on the data connection
87 * before the rotation command on the control connection arrives.
88 */
89static int rotate_truncate_stream(struct relay_stream *stream)
90{
91 int ret, new_fd;
92 off_t lseek_ret;
93 uint64_t diff, pos = 0;
94 char buf[FILE_IO_STACK_BUFFER_SIZE];
95
96 assert(!stream->is_metadata);
97
98 assert(stream->tracefile_size_current >
99 stream->pos_after_last_complete_data_index);
100 diff = stream->tracefile_size_current -
101 stream->pos_after_last_complete_data_index;
102
103 /* Create the new tracefile. */
104 new_fd = utils_create_stream_file(stream->path_name,
105 stream->channel_name,
106 stream->tracefile_size, stream->tracefile_count,
107 /* uid */ -1, /* gid */ -1, /* suffix */ NULL);
108 if (new_fd < 0) {
109 ERR("Failed to create new stream file at path %s for channel %s",
110 stream->path_name, stream->channel_name);
111 ret = -1;
112 goto end;
113 }
114
115 /*
116 * Rewind the current tracefile to the position at which the rotation
117 * should have occurred.
118 */
119 lseek_ret = lseek(stream->stream_fd->fd,
120 stream->pos_after_last_complete_data_index, SEEK_SET);
121 if (lseek_ret < 0) {
122 PERROR("seek truncate stream");
123 ret = -1;
124 goto end;
125 }
126
127 /* Move data from the old file to the new file. */
128 while (pos < diff) {
129 uint64_t count, bytes_left;
130 ssize_t io_ret;
131
132 bytes_left = diff - pos;
133 count = bytes_left > sizeof(buf) ? sizeof(buf) : bytes_left;
134 assert(count <= SIZE_MAX);
135
136 io_ret = lttng_read(stream->stream_fd->fd, buf, count);
137 if (io_ret < (ssize_t) count) {
138 char error_string[256];
139
140 snprintf(error_string, sizeof(error_string),
141 "Failed to read %" PRIu64 " bytes from fd %i in rotate_truncate_stream(), returned %zi",
142 count, stream->stream_fd->fd, io_ret);
143 if (io_ret == -1) {
144 PERROR("%s", error_string);
145 } else {
146 ERR("%s", error_string);
147 }
148 ret = -1;
149 goto end;
150 }
151
152 io_ret = lttng_write(new_fd, buf, count);
153 if (io_ret < (ssize_t) count) {
154 char error_string[256];
155
156 snprintf(error_string, sizeof(error_string),
157 "Failed to write %" PRIu64 " bytes from fd %i in rotate_truncate_stream(), returned %zi",
158 count, new_fd, io_ret);
159 if (io_ret == -1) {
160 PERROR("%s", error_string);
161 } else {
162 ERR("%s", error_string);
163 }
164 ret = -1;
165 goto end;
166 }
167
168 pos += count;
169 }
170
171 /* Truncate the file to get rid of the excess data. */
172 ret = ftruncate(stream->stream_fd->fd,
173 stream->pos_after_last_complete_data_index);
174 if (ret) {
175 PERROR("ftruncate");
176 goto end;
177 }
178
179 ret = close(stream->stream_fd->fd);
180 if (ret < 0) {
181 PERROR("Closing tracefile");
182 goto end;
183 }
184
185 /*
186 * Update the offset and FD of all the eventual indexes created by the
187 * data connection before the rotation command arrived.
188 */
189 ret = relay_index_switch_all_files(stream);
190 if (ret < 0) {
191 ERR("Failed to rotate index file");
192 goto end;
193 }
194
195 stream->stream_fd->fd = new_fd;
196 stream->tracefile_size_current = diff;
197 stream->pos_after_last_complete_data_index = 0;
198 stream_complete_rotation(stream);
199
200 ret = 0;
201
202end:
203 return ret;
204}
205
206static int stream_create_data_output_file_from_trace_chunk(
207 struct relay_stream *stream,
208 struct lttng_trace_chunk *trace_chunk,
209 bool force_unlink,
210 struct stream_fd **out_stream_fd)
348a81dc
JG
211{
212 int ret, fd;
c35f9726 213 char stream_path[LTTNG_PATH_MAX];
348a81dc
JG
214 enum lttng_trace_chunk_status status;
215 const int flags = O_RDWR | O_CREAT | O_TRUNC;
216 const mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
348a81dc
JG
217
218 ASSERT_LOCKED(stream->lock);
219 assert(stream->trace_chunk);
220
348a81dc 221 ret = utils_stream_file_path(stream->path_name, stream->channel_name,
c35f9726
JG
222 stream->tracefile_size, stream->tracefile_current_index,
223 NULL, stream_path, sizeof(stream_path));
348a81dc
JG
224 if (ret < 0) {
225 goto end;
226 }
227
c35f9726
JG
228 if (stream->tracefile_wrapped_around || force_unlink) {
229 /*
230 * The on-disk ring-buffer has wrapped around.
231 * Newly created stream files will replace existing files. Since
232 * live clients may be consuming existing files, the file about
233 * to be replaced is unlinked in order to not overwrite its
234 * content.
235 */
236 status = lttng_trace_chunk_unlink_file(trace_chunk,
237 stream_path);
238 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
239 PERROR("Failed to unlink stream file \"%s\" during trace file rotation",
240 stream_path);
241 /*
242 * Don't abort if the file doesn't exist, it is
243 * unexpected, but should not be a fatal error.
244 */
245 if (errno != ENOENT) {
246 ret = -1;
247 goto end;
248 }
249 }
250 }
251
348a81dc 252 status = lttng_trace_chunk_open_file(
c35f9726 253 trace_chunk, stream_path, flags, mode, &fd);
348a81dc
JG
254 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
255 ERR("Failed to open stream file \"%s\"", stream->channel_name);
256 ret = -1;
257 goto end;
258 }
259
c35f9726
JG
260 *out_stream_fd = stream_fd_create(fd);
261 if (!*out_stream_fd) {
348a81dc
JG
262 if (close(ret)) {
263 PERROR("Error closing stream file descriptor %d", ret);
264 }
265 ret = -1;
266 goto end;
267 }
268end:
269 return ret;
270}
271
c35f9726
JG
272static int stream_rotate_data_file(struct relay_stream *stream)
273{
274 int ret = 0;
275
276 DBG("Rotating stream %" PRIu64 " data file",
277 stream->stream_handle);
278
279 if (stream->stream_fd) {
280 stream_fd_put(stream->stream_fd);
281 stream->stream_fd = NULL;
282 }
283
284 stream->tracefile_wrapped_around = false;
285 stream->tracefile_current_index = 0;
286
287 if (stream->ongoing_rotation.value.next_trace_chunk) {
288 struct stream_fd *new_stream_fd = NULL;
289 enum lttng_trace_chunk_status chunk_status;
290
291 chunk_status = lttng_trace_chunk_create_subdirectory(
292 stream->ongoing_rotation.value.next_trace_chunk,
293 stream->path_name);
294 if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
295 ret = -1;
296 goto end;
297 }
298
299 /* Rotate the data file. */
300 ret = stream_create_data_output_file_from_trace_chunk(stream,
301 stream->ongoing_rotation.value.next_trace_chunk,
302 false, &new_stream_fd);
303 stream->stream_fd = new_stream_fd;
304 if (ret < 0) {
305 ERR("Failed to rotate stream data file");
306 goto end;
307 }
308 }
309 stream->tracefile_size_current = 0;
310 stream->pos_after_last_complete_data_index = 0;
311 stream->ongoing_rotation.value.data_rotated = true;
312
313 if (stream->ongoing_rotation.value.index_rotated) {
314 /* Rotation completed; reset its state. */
315 stream_complete_rotation(stream);
316 }
317end:
318 return ret;
319}
320
321/*
322 * Check if a stream's data file (as opposed to index) should be rotated
323 * (for session rotation).
324 * Must be called with the stream lock held.
325 *
326 * Return 0 on success, a negative value on error.
327 */
328static int try_rotate_stream_data(struct relay_stream *stream)
329{
330 int ret = 0;
331
332 if (caa_likely(!stream->ongoing_rotation.is_set)) {
333 /* No rotation expected. */
334 goto end;
335 }
336
337 if (stream->ongoing_rotation.value.data_rotated) {
338 /* Rotation of the data file has already occurred. */
339 goto end;
340 }
341
342 if (stream->prev_data_seq == -1ULL ||
343 stream->prev_data_seq + 1 < stream->ongoing_rotation.value.seq_num) {
344 /*
345 * The next packet that will be written is not part of the next
346 * chunk yet.
347 */
348 DBG("Stream %" PRIu64 " not yet ready for rotation (rotate_at_seq_num = %" PRIu64
349 ", prev_data_seq = %" PRIu64 ")",
350 stream->stream_handle,
351 stream->ongoing_rotation.value.seq_num,
352 stream->prev_data_seq);
353 goto end;
354 } else if (stream->prev_data_seq > stream->ongoing_rotation.value.seq_num) {
355 /*
356 * prev_data_seq is checked here since indexes and rotation
357 * commands are serialized with respect to each other.
358 */
359 DBG("Rotation after too much data has been written in tracefile "
360 "for stream %" PRIu64 ", need to truncate before "
361 "rotating", stream->stream_handle);
362 ret = rotate_truncate_stream(stream);
363 if (ret) {
364 ERR("Failed to truncate stream");
365 goto end;
366 }
367 } else {
368 ret = stream_rotate_data_file(stream);
369 }
370
371end:
372 return ret;
373}
374
375/*
376 * Close the current index file if it is open, and create a new one.
377 *
378 * Return 0 on success, -1 on error.
379 */
380static int create_index_file(struct relay_stream *stream,
381 struct lttng_trace_chunk *chunk)
382{
383 int ret;
384 uint32_t major, minor;
385 char *index_subpath = NULL;
386
387 ASSERT_LOCKED(stream->lock);
388
389 /* Put ref on previous index_file. */
390 if (stream->index_file) {
391 lttng_index_file_put(stream->index_file);
392 stream->index_file = NULL;
393 }
394 major = stream->trace->session->major;
395 minor = stream->trace->session->minor;
396
397 if (!chunk) {
398 ret = 0;
399 goto end;
400 }
401 ret = asprintf(&index_subpath, "%s/%s", stream->path_name,
402 DEFAULT_INDEX_DIR);
403 if (ret < 0) {
404 goto end;
405 }
406
407 ret = lttng_trace_chunk_create_subdirectory(chunk,
408 index_subpath);
409 free(index_subpath);
410 if (ret) {
411 goto end;
412 }
413 stream->index_file = lttng_index_file_create_from_trace_chunk(
414 chunk, stream->path_name,
415 stream->channel_name, stream->tracefile_size,
416 stream->tracefile_current_index,
417 lttng_to_index_major(major, minor),
418 lttng_to_index_minor(major, minor), true);
419 if (!stream->index_file) {
420 ret = -1;
421 goto end;
422 }
423
424 ret = 0;
425
426end:
427 return ret;
428}
429
430/*
431 * Check if a stream's index file should be rotated (for session rotation).
432 * Must be called with the stream lock held.
433 *
434 * Return 0 on success, a negative value on error.
435 */
436static int try_rotate_stream_index(struct relay_stream *stream)
437{
438 int ret = 0;
439
440 if (!stream->ongoing_rotation.is_set) {
441 /* No rotation expected. */
442 goto end;
443 }
444
445 if (stream->ongoing_rotation.value.index_rotated) {
446 /* Rotation of the index has already occurred. */
447 goto end;
448 }
449
450 if (stream->prev_index_seq == -1ULL ||
451 stream->prev_index_seq + 1 < stream->ongoing_rotation.value.seq_num) {
452 DBG("Stream %" PRIu64 " index not yet ready for rotation (rotate_at_seq_num = %" PRIu64 ", prev_index_seq = %" PRIu64 ")",
453 stream->stream_handle,
454 stream->ongoing_rotation.value.seq_num,
455 stream->prev_index_seq);
456 goto end;
457 } else {
458 /* The next index belongs to the new trace chunk; rotate. */
459 assert(stream->prev_index_seq + 1 ==
460 stream->ongoing_rotation.value.seq_num);
461 DBG("Rotating stream %" PRIu64 " index file",
462 stream->stream_handle);
463 ret = create_index_file(stream,
464 stream->ongoing_rotation.value.next_trace_chunk);
465 stream->ongoing_rotation.value.index_rotated = true;
466
467 if (stream->ongoing_rotation.value.data_rotated &&
468 stream->ongoing_rotation.value.index_rotated) {
469 /* Rotation completed; reset its state. */
470 DBG("Rotation completed for stream %" PRIu64,
471 stream->stream_handle);
472 stream_complete_rotation(stream);
473 }
474 }
475
476end:
477 return ret;
478}
479
348a81dc
JG
480static int stream_set_trace_chunk(struct relay_stream *stream,
481 struct lttng_trace_chunk *chunk)
482{
483 int ret = 0;
484 enum lttng_trace_chunk_status status;
485 bool acquired_reference;
c35f9726 486 struct stream_fd *new_stream_fd = NULL;
348a81dc 487
348a81dc
JG
488 status = lttng_trace_chunk_create_subdirectory(chunk,
489 stream->path_name);
490 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
491 ret = -1;
492 goto end;
493 }
494
495 lttng_trace_chunk_put(stream->trace_chunk);
496 acquired_reference = lttng_trace_chunk_get(chunk);
497 assert(acquired_reference);
498 stream->trace_chunk = chunk;
c35f9726
JG
499
500 if (stream->stream_fd) {
501 stream_fd_put(stream->stream_fd);
502 stream->stream_fd = NULL;
503 }
504 ret = stream_create_data_output_file_from_trace_chunk(stream, chunk,
505 false, &new_stream_fd);
506 stream->stream_fd = new_stream_fd;
348a81dc 507end:
348a81dc
JG
508 return ret;
509}
510
2a174661 511/*
7591bab1 512 * We keep ownership of path_name and channel_name.
2a174661 513 */
7591bab1
MD
514struct relay_stream *stream_create(struct ctf_trace *trace,
515 uint64_t stream_handle, char *path_name,
516 char *channel_name, uint64_t tracefile_size,
348a81dc 517 uint64_t tracefile_count)
2a174661 518{
7591bab1
MD
519 int ret;
520 struct relay_stream *stream = NULL;
521 struct relay_session *session = trace->session;
348a81dc
JG
522 bool acquired_reference = false;
523 struct lttng_trace_chunk *current_trace_chunk;
2a174661 524
7591bab1
MD
525 stream = zmalloc(sizeof(struct relay_stream));
526 if (stream == NULL) {
527 PERROR("relay stream zmalloc");
7591bab1
MD
528 goto error_no_alloc;
529 }
2a174661 530
7591bab1 531 stream->stream_handle = stream_handle;
a8f9f353 532 stream->prev_data_seq = -1ULL;
7a45c7e6 533 stream->prev_index_seq = -1ULL;
bda7c7b9 534 stream->last_net_seq_num = -1ULL;
7591bab1
MD
535 stream->ctf_stream_id = -1ULL;
536 stream->tracefile_size = tracefile_size;
537 stream->tracefile_count = tracefile_count;
538 stream->path_name = path_name;
539 stream->channel_name = channel_name;
2f9c3030 540 stream->beacon_ts_end = -1ULL;
7591bab1
MD
541 lttng_ht_node_init_u64(&stream->node, stream->stream_handle);
542 pthread_mutex_init(&stream->lock, NULL);
7591bab1
MD
543 urcu_ref_init(&stream->ref);
544 ctf_trace_get(trace);
545 stream->trace = trace;
2a174661 546
348a81dc
JG
547 pthread_mutex_lock(&trace->session->lock);
548 current_trace_chunk = trace->session->current_trace_chunk;
549 if (current_trace_chunk) {
550 acquired_reference = lttng_trace_chunk_get(current_trace_chunk);
551 }
552 pthread_mutex_unlock(&trace->session->lock);
553 if (!acquired_reference) {
554 ERR("Cannot create stream for channel \"%s\" as a reference to the session's current trace chunk could not be acquired",
555 channel_name);
7591bab1
MD
556 ret = -1;
557 goto end;
2a174661
DG
558 }
559
348a81dc
JG
560 stream->indexes_ht = lttng_ht_new(0, LTTNG_HT_TYPE_U64);
561 if (!stream->indexes_ht) {
562 ERR("Cannot created indexes_ht");
563 ret = -1;
7591bab1
MD
564 goto end;
565 }
2a174661 566
c35f9726 567 pthread_mutex_lock(&stream->lock);
348a81dc 568 ret = stream_set_trace_chunk(stream, current_trace_chunk);
c35f9726 569 pthread_mutex_unlock(&stream->lock);
348a81dc
JG
570 if (ret) {
571 ERR("Failed to set the current trace chunk of session \"%s\" on newly created stream of channel \"%s\"",
572 trace->session->session_name,
573 stream->channel_name);
7591bab1
MD
574 ret = -1;
575 goto end;
2a174661 576 }
a44ca2ca
MD
577 stream->tfa = tracefile_array_create(stream->tracefile_count);
578 if (!stream->tfa) {
579 ret = -1;
580 goto end;
581 }
7591bab1 582
348a81dc
JG
583 stream->is_metadata = !strcmp(stream->channel_name,
584 DEFAULT_METADATA_NAME);
7591bab1
MD
585 stream->in_recv_list = true;
586
587 /*
588 * Add the stream in the recv list of the session. Once the end stream
589 * message is received, all session streams are published.
590 */
591 pthread_mutex_lock(&session->recv_list_lock);
592 cds_list_add_rcu(&stream->recv_node, &session->recv_list);
593 session->stream_count++;
594 pthread_mutex_unlock(&session->recv_list_lock);
595
596 /*
597 * Both in the ctf_trace object and the global stream ht since the data
598 * side of the relayd does not have the concept of session.
599 */
600 lttng_ht_add_unique_u64(relay_streams_ht, &stream->node);
77f7bd85 601 stream->in_stream_ht = true;
2a174661 602
7591bab1
MD
603 DBG("Relay new stream added %s with ID %" PRIu64, stream->channel_name,
604 stream->stream_handle);
605 ret = 0;
606
607end:
608 if (ret) {
609 if (stream->stream_fd) {
610 stream_fd_put(stream->stream_fd);
611 stream->stream_fd = NULL;
2a174661 612 }
7591bab1
MD
613 stream_put(stream);
614 stream = NULL;
2a174661 615 }
348a81dc 616 lttng_trace_chunk_put(current_trace_chunk);
7591bab1 617 return stream;
2a174661 618
7591bab1
MD
619error_no_alloc:
620 /*
621 * path_name and channel_name need to be freed explicitly here
622 * because we cannot rely on stream_put().
623 */
624 free(path_name);
625 free(channel_name);
626 return NULL;
627}
628
629/*
630 * Called with the session lock held.
631 */
632void stream_publish(struct relay_stream *stream)
633{
634 struct relay_session *session;
635
636 pthread_mutex_lock(&stream->lock);
637 if (stream->published) {
638 goto unlock;
2a174661
DG
639 }
640
7591bab1 641 session = stream->trace->session;
2a174661 642
7591bab1
MD
643 pthread_mutex_lock(&session->recv_list_lock);
644 if (stream->in_recv_list) {
645 cds_list_del_rcu(&stream->recv_node);
646 stream->in_recv_list = false;
647 }
648 pthread_mutex_unlock(&session->recv_list_lock);
2a174661 649
7591bab1
MD
650 pthread_mutex_lock(&stream->trace->stream_list_lock);
651 cds_list_add_rcu(&stream->stream_node, &stream->trace->stream_list);
652 pthread_mutex_unlock(&stream->trace->stream_list_lock);
2a174661 653
7591bab1
MD
654 stream->published = true;
655unlock:
2a174661 656 pthread_mutex_unlock(&stream->lock);
2a174661
DG
657}
658
7591bab1 659/*
77f7bd85 660 * Stream must be protected by holding the stream lock or by virtue of being
ce4d4083 661 * called from stream_destroy.
7591bab1
MD
662 */
663static void stream_unpublish(struct relay_stream *stream)
2a174661 664{
77f7bd85
MD
665 if (stream->in_stream_ht) {
666 struct lttng_ht_iter iter;
667 int ret;
668
669 iter.iter.node = &stream->node.node;
670 ret = lttng_ht_del(relay_streams_ht, &iter);
671 assert(!ret);
672 stream->in_stream_ht = false;
673 }
674 if (stream->published) {
675 pthread_mutex_lock(&stream->trace->stream_list_lock);
676 cds_list_del_rcu(&stream->stream_node);
677 pthread_mutex_unlock(&stream->trace->stream_list_lock);
678 stream->published = false;
7591bab1 679 }
7591bab1
MD
680}
681
682static void stream_destroy(struct relay_stream *stream)
683{
684 if (stream->indexes_ht) {
49e614cb
MD
685 /*
686 * Calling lttng_ht_destroy in call_rcu worker thread so
687 * we don't hold the RCU read-side lock while calling
688 * it.
689 */
7591bab1
MD
690 lttng_ht_destroy(stream->indexes_ht);
691 }
a44ca2ca
MD
692 if (stream->tfa) {
693 tracefile_array_destroy(stream->tfa);
694 }
7591bab1
MD
695 free(stream->path_name);
696 free(stream->channel_name);
697 free(stream);
698}
699
700static void stream_destroy_rcu(struct rcu_head *rcu_head)
701{
702 struct relay_stream *stream =
703 caa_container_of(rcu_head, struct relay_stream, rcu_node);
704
705 stream_destroy(stream);
706}
707
708/*
709 * No need to take stream->lock since this is only called on the final
710 * stream_put which ensures that a single thread may act on the stream.
7591bab1
MD
711 */
712static void stream_release(struct urcu_ref *ref)
713{
714 struct relay_stream *stream =
715 caa_container_of(ref, struct relay_stream, ref);
716 struct relay_session *session;
2a174661 717
7591bab1
MD
718 session = stream->trace->session;
719
720 DBG("Releasing stream id %" PRIu64, stream->stream_handle);
721
722 pthread_mutex_lock(&session->recv_list_lock);
723 session->stream_count--;
724 if (stream->in_recv_list) {
725 cds_list_del_rcu(&stream->recv_node);
726 stream->in_recv_list = false;
727 }
728 pthread_mutex_unlock(&session->recv_list_lock);
2a174661 729
7591bab1
MD
730 stream_unpublish(stream);
731
732 if (stream->stream_fd) {
733 stream_fd_put(stream->stream_fd);
734 stream->stream_fd = NULL;
735 }
f8f3885c
MD
736 if (stream->index_file) {
737 lttng_index_file_put(stream->index_file);
738 stream->index_file = NULL;
7591bab1
MD
739 }
740 if (stream->trace) {
741 ctf_trace_put(stream->trace);
742 stream->trace = NULL;
743 }
c35f9726 744 stream_complete_rotation(stream);
348a81dc
JG
745 lttng_trace_chunk_put(stream->trace_chunk);
746 stream->trace_chunk = NULL;
7591bab1
MD
747
748 call_rcu(&stream->rcu_node, stream_destroy_rcu);
2a174661
DG
749}
750
7591bab1 751void stream_put(struct relay_stream *stream)
2a174661 752{
7591bab1 753 rcu_read_lock();
7591bab1
MD
754 assert(stream->ref.refcount != 0);
755 /*
756 * Wait until we have processed all the stream packets before
757 * actually putting our last stream reference.
758 */
7591bab1 759 urcu_ref_put(&stream->ref, stream_release);
7591bab1
MD
760 rcu_read_unlock();
761}
762
c35f9726
JG
763int stream_set_pending_rotation(struct relay_stream *stream,
764 struct lttng_trace_chunk *next_trace_chunk,
765 uint64_t rotation_sequence_number)
766{
767 int ret = 0;
768 const struct relay_stream_rotation rotation = {
769 .seq_num = rotation_sequence_number,
770 .next_trace_chunk = next_trace_chunk,
771 };
772
773 if (stream->ongoing_rotation.is_set) {
774 ERR("Attempted to set a pending rotation on a stream already being rotated (protocol error)");
775 ret = -1;
776 goto end;
777 }
778
779 if (next_trace_chunk) {
780 const bool reference_acquired =
781 lttng_trace_chunk_get(next_trace_chunk);
782
783 assert(reference_acquired);
784 }
785 LTTNG_OPTIONAL_SET(&stream->ongoing_rotation, rotation);
786
787 DBG("Setting pending rotation: stream_id = %" PRIu64 ", rotation_seq_num = %" PRIu64,
788 stream->stream_handle, rotation_sequence_number);
789 if (stream->is_metadata) {
790 /*
791 * A metadata stream has no index; consider it already rotated.
792 */
793 stream->ongoing_rotation.value.index_rotated = true;
794 ret = stream_rotate_data_file(stream);
795 } else {
796 ret = try_rotate_stream_data(stream);
797 if (ret < 0) {
798 goto end;
799 }
800
801 ret = try_rotate_stream_index(stream);
802 if (ret < 0) {
803 goto end;
804 }
805 }
806end:
807 return ret;
808}
809
bda7c7b9 810void try_stream_close(struct relay_stream *stream)
7591bab1 811{
98ba050e
JR
812 bool session_aborted;
813 struct relay_session *session = stream->trace->session;
814
bda7c7b9 815 DBG("Trying to close stream %" PRIu64, stream->stream_handle);
98ba050e
JR
816
817 pthread_mutex_lock(&session->lock);
818 session_aborted = session->aborted;
819 pthread_mutex_unlock(&session->lock);
820
7591bab1 821 pthread_mutex_lock(&stream->lock);
bda7c7b9
JG
822 /*
823 * Can be called concurently by connection close and reception of last
824 * pending data.
825 */
826 if (stream->closed) {
827 pthread_mutex_unlock(&stream->lock);
828 DBG("closing stream %" PRIu64 " aborted since it is already marked as closed", stream->stream_handle);
829 return;
830 }
831
832 stream->close_requested = true;
3d07a857
MD
833
834 if (stream->last_net_seq_num == -1ULL) {
835 /*
836 * Handle connection close without explicit stream close
837 * command.
838 *
839 * We can be clever about indexes partially received in
840 * cases where we received the data socket part, but not
841 * the control socket part: since we're currently closing
842 * the stream on behalf of the control socket, we *know*
843 * there won't be any more control information for this
844 * socket. Therefore, we can destroy all indexes for
845 * which we have received only the file descriptor (from
846 * data socket). This takes care of consumerd crashes
847 * between sending the data and control information for
848 * a packet. Since those are sent in that order, we take
849 * care of consumerd crashes.
850 */
5312a3ed 851 DBG("relay_index_close_partial_fd");
3d07a857
MD
852 relay_index_close_partial_fd(stream);
853 /*
854 * Use the highest net_seq_num we currently have pending
855 * As end of stream indicator. Leave last_net_seq_num
856 * at -1ULL if we cannot find any index.
857 */
858 stream->last_net_seq_num = relay_index_find_last(stream);
5312a3ed 859 DBG("Updating stream->last_net_seq_num to %" PRIu64, stream->last_net_seq_num);
3d07a857
MD
860 /* Fall-through into the next check. */
861 }
862
bda7c7b9 863 if (stream->last_net_seq_num != -1ULL &&
a8f9f353 864 ((int64_t) (stream->prev_data_seq - stream->last_net_seq_num)) < 0
98ba050e 865 && !session_aborted) {
3d07a857
MD
866 /*
867 * Don't close since we still have data pending. This
868 * handles cases where an explicit close command has
869 * been received for this stream, and cases where the
870 * connection has been closed, and we are awaiting for
871 * index information from the data socket. It is
872 * therefore expected that all the index fd information
873 * we need has already been received on the control
874 * socket. Matching index information from data socket
875 * should be Expected Soon(TM).
876 *
877 * TODO: We should implement a timer to garbage collect
878 * streams after a timeout to be resilient against a
879 * consumerd implementation that would not match this
880 * expected behavior.
881 */
bda7c7b9
JG
882 pthread_mutex_unlock(&stream->lock);
883 DBG("closing stream %" PRIu64 " aborted since it still has data pending", stream->stream_handle);
884 return;
885 }
3d07a857
MD
886 /*
887 * We received all the indexes we can expect.
888 */
77f7bd85 889 stream_unpublish(stream);
2229a09c 890 stream->closed = true;
bda7c7b9 891 /* Relay indexes are only used by the "consumer/sessiond" end. */
7591bab1
MD
892 relay_index_close_all(stream);
893 pthread_mutex_unlock(&stream->lock);
bda7c7b9 894 DBG("Succeeded in closing stream %" PRIu64, stream->stream_handle);
7591bab1
MD
895 stream_put(stream);
896}
897
c35f9726
JG
898int stream_init_packet(struct relay_stream *stream, size_t packet_size,
899 bool *file_rotated)
900{
901 int ret = 0;
902
903 ASSERT_LOCKED(stream->lock);
904 if (caa_likely(stream->tracefile_size == 0)) {
905 /* No size limit set; nothing to check. */
906 goto end;
907 }
908
909 /*
910 * Check if writing the new packet would exceed the maximal file size.
911 */
912 if (caa_unlikely((stream->tracefile_size_current + packet_size) >
913 stream->tracefile_size)) {
914 const uint64_t new_file_index =
915 (stream->tracefile_current_index + 1) %
916 stream->tracefile_count;
917
918 if (new_file_index < stream->tracefile_current_index) {
919 stream->tracefile_wrapped_around = true;
920 }
921 DBG("New stream packet causes stream file rotation: stream_id = %" PRIu64
922 ", current_file_size = %" PRIu64
923 ", packet_size = %" PRIu64 ", current_file_index = %" PRIu64
924 " new_file_index = %" PRIu64,
925 stream->stream_handle,
926 stream->tracefile_size_current, packet_size,
927 stream->tracefile_current_index, new_file_index);
928 tracefile_array_file_rotate(stream->tfa);
929 stream->tracefile_current_index = new_file_index;
930
931 if (stream->stream_fd) {
932 stream_fd_put(stream->stream_fd);
933 stream->stream_fd = NULL;
934 }
935 ret = stream_create_data_output_file_from_trace_chunk(stream,
936 stream->trace_chunk, false, &stream->stream_fd);
937 if (ret) {
938 ERR("Failed to perform trace file rotation of stream %" PRIu64,
939 stream->stream_handle);
940 goto end;
941 }
942
943 /*
944 * Reset current size because we just performed a stream
945 * rotation.
946 */
947 stream->tracefile_size_current = 0;
948 *file_rotated = true;
949 } else {
950 *file_rotated = false;
951 }
952end:
953 return ret;
954}
955
956/* Note that the packet is not necessarily complete. */
957int stream_write(struct relay_stream *stream,
958 const struct lttng_buffer_view *packet, size_t padding_len)
959{
960 int ret = 0;
961 ssize_t write_ret;
962 size_t padding_to_write = padding_len;
963 char padding_buffer[FILE_IO_STACK_BUFFER_SIZE];
964
965 ASSERT_LOCKED(stream->lock);
966 memset(padding_buffer, 0,
967 min(sizeof(padding_buffer), padding_to_write));
968
969 if (packet) {
970 write_ret = lttng_write(stream->stream_fd->fd,
971 packet->data, packet->size);
972 if (write_ret != packet->size) {
973 PERROR("Failed to write to stream file of %sstream %" PRIu64,
974 stream->is_metadata ? "metadata " : "",
975 stream->stream_handle);
976 ret = -1;
977 goto end;
978 }
979 }
980
981 while (padding_to_write > 0) {
982 const size_t padding_to_write_this_pass =
983 min(padding_to_write, sizeof(padding_buffer));
984
985 write_ret = lttng_write(stream->stream_fd->fd,
986 padding_buffer, padding_to_write_this_pass);
987 if (write_ret != padding_to_write_this_pass) {
988 PERROR("Failed to write padding to file of %sstream %" PRIu64,
989 stream->is_metadata ? "metadata " : "",
990 stream->stream_handle);
991 ret = -1;
992 goto end;
993 }
994 padding_to_write -= padding_to_write_this_pass;
995 }
996
997 if (stream->is_metadata) {
998 stream->metadata_received += packet->size + padding_len;
999 }
1000
1001 DBG("Wrote to %sstream %" PRIu64 ": data_length = %" PRIu64 ", padding_length = %" PRIu64,
1002 stream->is_metadata ? "metadata " : "",
1003 stream->stream_handle,
1004 packet ? packet->size : 0, padding_len);
1005end:
1006 return ret;
1007}
1008
1009/*
1010 * Update index after receiving a packet for a data stream.
1011 *
1012 * Called with the stream lock held.
1013 *
1014 * Return 0 on success else a negative value.
1015 */
1016int stream_update_index(struct relay_stream *stream, uint64_t net_seq_num,
1017 bool rotate_index, bool *flushed, uint64_t total_size)
1018{
1019 int ret = 0;
1020 uint64_t data_offset;
1021 struct relay_index *index;
1022
1023 ASSERT_LOCKED(stream->lock);
1024 /* Get data offset because we are about to update the index. */
1025 data_offset = htobe64(stream->tracefile_size_current);
1026
1027 DBG("handle_index_data: stream %" PRIu64 " net_seq_num %" PRIu64 " data offset %" PRIu64,
1028 stream->stream_handle, net_seq_num, stream->tracefile_size_current);
1029
1030 /*
1031 * Lookup for an existing index for that stream id/sequence
1032 * number. If it exists, the control thread has already received the
1033 * data for it, thus we need to write it to disk.
1034 */
1035 index = relay_index_get_by_id_or_create(stream, net_seq_num);
1036 if (!index) {
1037 ret = -1;
1038 goto end;
1039 }
1040
1041 if (rotate_index || !stream->index_file) {
1042 ret = create_index_file(stream, stream->trace_chunk);
1043 if (ret) {
1044 ERR("Failed to create index file for stream %" PRIu64,
1045 stream->stream_handle);
1046 /* Put self-ref for this index due to error. */
1047 relay_index_put(index);
1048 index = NULL;
1049 goto end;
1050 }
1051 }
1052
1053 if (relay_index_set_file(index, stream->index_file, data_offset)) {
1054 ret = -1;
1055 /* Put self-ref for this index due to error. */
1056 relay_index_put(index);
1057 index = NULL;
1058 goto end;
1059 }
1060
1061 ret = relay_index_try_flush(index);
1062 if (ret == 0) {
1063 tracefile_array_commit_seq(stream->tfa);
1064 stream->index_received_seqcount++;
1065 *flushed = true;
1066 } else if (ret > 0) {
1067 index->total_size = total_size;
1068 /* No flush. */
1069 ret = 0;
1070 } else {
1071 /*
1072 * ret < 0
1073 *
1074 * relay_index_try_flush is responsible for the self-reference
1075 * put of the index object on error.
1076 */
1077 ERR("relay_index_try_flush error %d", ret);
1078 ret = -1;
1079 }
1080end:
1081 return ret;
1082}
1083
1084int stream_complete_packet(struct relay_stream *stream, size_t packet_total_size,
1085 uint64_t sequence_number, bool index_flushed)
1086{
1087 int ret = 0;
1088
1089 ASSERT_LOCKED(stream->lock);
1090
1091 stream->tracefile_size_current += packet_total_size;
1092 if (index_flushed) {
1093 stream->pos_after_last_complete_data_index =
1094 stream->tracefile_size_current;
1095 stream->prev_index_seq = sequence_number;
1096 ret = try_rotate_stream_index(stream);
1097 if (ret < 0) {
1098 goto end;
1099 }
1100 }
1101
1102 stream->prev_data_seq = sequence_number;
1103 ret = try_rotate_stream_data(stream);
1104 if (ret < 0) {
1105 goto end;
1106 }
1107end:
1108 return ret;
1109}
1110
1111int stream_add_index(struct relay_stream *stream,
1112 const struct lttcomm_relayd_index *index_info)
1113{
1114 int ret = 0;
1115 struct relay_index *index;
1116
1117 ASSERT_LOCKED(stream->lock);
1118
1119 /* Live beacon handling */
1120 if (index_info->packet_size == 0) {
1121 DBG("Received live beacon for stream %" PRIu64,
1122 stream->stream_handle);
1123
1124 /*
1125 * Only flag a stream inactive when it has already
1126 * received data and no indexes are in flight.
1127 */
1128 if (stream->index_received_seqcount > 0
1129 && stream->indexes_in_flight == 0) {
1130 stream->beacon_ts_end = index_info->timestamp_end;
1131 }
1132 ret = 0;
1133 goto end;
1134 } else {
1135 stream->beacon_ts_end = -1ULL;
1136 }
1137
1138 if (stream->ctf_stream_id == -1ULL) {
1139 stream->ctf_stream_id = index_info->stream_id;
1140 }
1141
1142 index = relay_index_get_by_id_or_create(stream, index_info->net_seq_num);
1143 if (!index) {
1144 ret = -1;
1145 ERR("Failed to get or create index %" PRIu64,
1146 index_info->net_seq_num);
1147 goto end;
1148 }
1149 if (relay_index_set_control_data(index, index_info,
1150 stream->trace->session->minor)) {
1151 ERR("set_index_control_data error");
1152 relay_index_put(index);
1153 ret = -1;
1154 goto end;
1155 }
1156 ret = relay_index_try_flush(index);
1157 if (ret == 0) {
1158 tracefile_array_commit_seq(stream->tfa);
1159 stream->index_received_seqcount++;
1160 stream->pos_after_last_complete_data_index += index->total_size;
1161 stream->prev_index_seq = index_info->net_seq_num;
1162
1163 ret = try_rotate_stream_index(stream);
1164 if (ret < 0) {
1165 goto end;
1166 }
1167 } else if (ret > 0) {
1168 /* no flush. */
1169 ret = 0;
1170 } else {
1171 /*
1172 * ret < 0
1173 *
1174 * relay_index_try_flush is responsible for the self-reference
1175 * put of the index object on error.
1176 */
1177 ERR("relay_index_try_flush error %d", ret);
1178 ret = -1;
1179 }
1180end:
1181 return ret;
1182}
1183
da412cde
MD
1184static void print_stream_indexes(struct relay_stream *stream)
1185{
1186 struct lttng_ht_iter iter;
1187 struct relay_index *index;
1188
1189 rcu_read_lock();
1190 cds_lfht_for_each_entry(stream->indexes_ht->ht, &iter.iter, index,
1191 index_n.node) {
1192 DBG("index %p net_seq_num %" PRIu64 " refcount %ld"
1193 " stream %" PRIu64 " trace %" PRIu64
1194 " session %" PRIu64,
1195 index,
1196 index->index_n.key,
1197 stream->ref.refcount,
1198 index->stream->stream_handle,
1199 index->stream->trace->id,
1200 index->stream->trace->session->id);
1201 }
1202 rcu_read_unlock();
1203}
1204
c35f9726
JG
1205int stream_reset_file(struct relay_stream *stream)
1206{
1207 ASSERT_LOCKED(stream->lock);
1208
1209 if (stream->stream_fd) {
1210 stream_fd_put(stream->stream_fd);
1211 stream->stream_fd = NULL;
1212 }
1213
1214 stream->tracefile_size_current = 0;
1215 stream->prev_data_seq = 0;
1216 stream->prev_index_seq = 0;
1217 /* Note that this does not reset the tracefile array. */
1218 stream->tracefile_current_index = 0;
1219 stream->pos_after_last_complete_data_index = 0;
1220
1221 return stream_create_data_output_file_from_trace_chunk(stream,
1222 stream->trace_chunk, true, &stream->stream_fd);
1223}
1224
7591bab1
MD
1225void print_relay_streams(void)
1226{
1227 struct lttng_ht_iter iter;
1228 struct relay_stream *stream;
1229
ce3f3ba3
JG
1230 if (!relay_streams_ht) {
1231 return;
1232 }
1233
7591bab1
MD
1234 rcu_read_lock();
1235 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, stream,
1236 node.node) {
1237 if (!stream_get(stream)) {
1238 continue;
1239 }
1240 DBG("stream %p refcount %ld stream %" PRIu64 " trace %" PRIu64
1241 " session %" PRIu64,
1242 stream,
1243 stream->ref.refcount,
1244 stream->stream_handle,
1245 stream->trace->id,
1246 stream->trace->session->id);
da412cde 1247 print_stream_indexes(stream);
7591bab1
MD
1248 stream_put(stream);
1249 }
1250 rcu_read_unlock();
2a174661 1251}
This page took 0.102646 seconds and 5 git commands to generate.