kconsumer: clean-up: initialize ctf_index before populating it
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <assert.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <inttypes.h>
30 #include <unistd.h>
31 #include <sys/stat.h>
32
33 #include <bin/lttng-consumerd/health-consumerd.h>
34 #include <common/common.h>
35 #include <common/kernel-ctl/kernel-ctl.h>
36 #include <common/sessiond-comm/sessiond-comm.h>
37 #include <common/sessiond-comm/relayd.h>
38 #include <common/compat/fcntl.h>
39 #include <common/compat/endian.h>
40 #include <common/pipe.h>
41 #include <common/relayd/relayd.h>
42 #include <common/utils.h>
43 #include <common/consumer/consumer-stream.h>
44 #include <common/index/index.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/optional.h>
47
48 #include "kernel-consumer.h"
49
50 extern struct lttng_consumer_global_data consumer_data;
51 extern int consumer_poll_timeout;
52
53 /*
54 * Take a snapshot for a specific fd
55 *
56 * Returns 0 on success, < 0 on error
57 */
58 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
59 {
60 int ret = 0;
61 int infd = stream->wait_fd;
62
63 ret = kernctl_snapshot(infd);
64 /*
65 * -EAGAIN is not an error, it just means that there is no data to
66 * be read.
67 */
68 if (ret != 0 && ret != -EAGAIN) {
69 PERROR("Getting sub-buffer snapshot.");
70 }
71
72 return ret;
73 }
74
75 /*
76 * Sample consumed and produced positions for a specific fd.
77 *
78 * Returns 0 on success, < 0 on error.
79 */
80 int lttng_kconsumer_sample_snapshot_positions(
81 struct lttng_consumer_stream *stream)
82 {
83 assert(stream);
84
85 return kernctl_snapshot_sample_positions(stream->wait_fd);
86 }
87
88 /*
89 * Get the produced position
90 *
91 * Returns 0 on success, < 0 on error
92 */
93 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
94 unsigned long *pos)
95 {
96 int ret;
97 int infd = stream->wait_fd;
98
99 ret = kernctl_snapshot_get_produced(infd, pos);
100 if (ret != 0) {
101 PERROR("kernctl_snapshot_get_produced");
102 }
103
104 return ret;
105 }
106
107 /*
108 * Get the consumerd position
109 *
110 * Returns 0 on success, < 0 on error
111 */
112 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
113 unsigned long *pos)
114 {
115 int ret;
116 int infd = stream->wait_fd;
117
118 ret = kernctl_snapshot_get_consumed(infd, pos);
119 if (ret != 0) {
120 PERROR("kernctl_snapshot_get_consumed");
121 }
122
123 return ret;
124 }
125
126 /*
127 * Take a snapshot of all the stream of a channel
128 * RCU read-side lock must be held across this function to ensure existence of
129 * channel. The channel lock must be held by the caller.
130 *
131 * Returns 0 on success, < 0 on error
132 */
133 static int lttng_kconsumer_snapshot_channel(
134 struct lttng_consumer_channel *channel,
135 uint64_t key, char *path, uint64_t relayd_id,
136 uint64_t nb_packets_per_stream,
137 struct lttng_consumer_local_data *ctx)
138 {
139 int ret;
140 struct lttng_consumer_stream *stream;
141
142 DBG("Kernel consumer snapshot channel %" PRIu64, key);
143
144 rcu_read_lock();
145
146 /* Splice is not supported yet for channel snapshot. */
147 if (channel->output != CONSUMER_CHANNEL_MMAP) {
148 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
149 channel->name);
150 ret = -1;
151 goto end;
152 }
153
154 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
155 unsigned long consumed_pos, produced_pos;
156
157 health_code_update();
158
159 /*
160 * Lock stream because we are about to change its state.
161 */
162 pthread_mutex_lock(&stream->lock);
163
164 assert(channel->trace_chunk);
165 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
166 /*
167 * Can't happen barring an internal error as the channel
168 * holds a reference to the trace chunk.
169 */
170 ERR("Failed to acquire reference to channel's trace chunk");
171 ret = -1;
172 goto end_unlock;
173 }
174 assert(!stream->trace_chunk);
175 stream->trace_chunk = channel->trace_chunk;
176
177 /*
178 * Assign the received relayd ID so we can use it for streaming. The streams
179 * are not visible to anyone so this is OK to change it.
180 */
181 stream->net_seq_idx = relayd_id;
182 channel->relayd_id = relayd_id;
183 if (relayd_id != (uint64_t) -1ULL) {
184 ret = consumer_send_relayd_stream(stream, path);
185 if (ret < 0) {
186 ERR("sending stream to relayd");
187 goto end_unlock;
188 }
189 } else {
190 ret = consumer_stream_create_output_files(stream,
191 false);
192 if (ret < 0) {
193 goto end_unlock;
194 }
195 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
196 stream->key);
197 }
198
199 ret = kernctl_buffer_flush_empty(stream->wait_fd);
200 if (ret < 0) {
201 /*
202 * Doing a buffer flush which does not take into
203 * account empty packets. This is not perfect
204 * for stream intersection, but required as a
205 * fall-back when "flush_empty" is not
206 * implemented by lttng-modules.
207 */
208 ret = kernctl_buffer_flush(stream->wait_fd);
209 if (ret < 0) {
210 ERR("Failed to flush kernel stream");
211 goto end_unlock;
212 }
213 goto end_unlock;
214 }
215
216 ret = lttng_kconsumer_take_snapshot(stream);
217 if (ret < 0) {
218 ERR("Taking kernel snapshot");
219 goto end_unlock;
220 }
221
222 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
223 if (ret < 0) {
224 ERR("Produced kernel snapshot position");
225 goto end_unlock;
226 }
227
228 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
229 if (ret < 0) {
230 ERR("Consumerd kernel snapshot position");
231 goto end_unlock;
232 }
233
234 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
235 produced_pos, nb_packets_per_stream,
236 stream->max_sb_size);
237
238 while ((long) (consumed_pos - produced_pos) < 0) {
239 ssize_t read_len;
240 unsigned long len, padded_len;
241
242 health_code_update();
243
244 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
245
246 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
247 if (ret < 0) {
248 if (ret != -EAGAIN) {
249 PERROR("kernctl_get_subbuf snapshot");
250 goto end_unlock;
251 }
252 DBG("Kernel consumer get subbuf failed. Skipping it.");
253 consumed_pos += stream->max_sb_size;
254 stream->chan->lost_packets++;
255 continue;
256 }
257
258 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
259 if (ret < 0) {
260 ERR("Snapshot kernctl_get_subbuf_size");
261 goto error_put_subbuf;
262 }
263
264 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
265 if (ret < 0) {
266 ERR("Snapshot kernctl_get_padded_subbuf_size");
267 goto error_put_subbuf;
268 }
269
270 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len,
271 padded_len - len, NULL);
272 /*
273 * We write the padded len in local tracefiles but the data len
274 * when using a relay. Display the error but continue processing
275 * to try to release the subbuffer.
276 */
277 if (relayd_id != (uint64_t) -1ULL) {
278 if (read_len != len) {
279 ERR("Error sending to the relay (ret: %zd != len: %lu)",
280 read_len, len);
281 }
282 } else {
283 if (read_len != padded_len) {
284 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
285 read_len, padded_len);
286 }
287 }
288
289 ret = kernctl_put_subbuf(stream->wait_fd);
290 if (ret < 0) {
291 ERR("Snapshot kernctl_put_subbuf");
292 goto end_unlock;
293 }
294 consumed_pos += stream->max_sb_size;
295 }
296
297 if (relayd_id == (uint64_t) -1ULL) {
298 if (stream->out_fd >= 0) {
299 ret = close(stream->out_fd);
300 if (ret < 0) {
301 PERROR("Kernel consumer snapshot close out_fd");
302 goto end_unlock;
303 }
304 stream->out_fd = -1;
305 }
306 } else {
307 close_relayd_stream(stream);
308 stream->net_seq_idx = (uint64_t) -1ULL;
309 }
310 lttng_trace_chunk_put(stream->trace_chunk);
311 stream->trace_chunk = NULL;
312 pthread_mutex_unlock(&stream->lock);
313 }
314
315 /* All good! */
316 ret = 0;
317 goto end;
318
319 error_put_subbuf:
320 ret = kernctl_put_subbuf(stream->wait_fd);
321 if (ret < 0) {
322 ERR("Snapshot kernctl_put_subbuf error path");
323 }
324 end_unlock:
325 pthread_mutex_unlock(&stream->lock);
326 end:
327 rcu_read_unlock();
328 return ret;
329 }
330
331 /*
332 * Read the whole metadata available for a snapshot.
333 * RCU read-side lock must be held across this function to ensure existence of
334 * metadata_channel. The channel lock must be held by the caller.
335 *
336 * Returns 0 on success, < 0 on error
337 */
338 static int lttng_kconsumer_snapshot_metadata(
339 struct lttng_consumer_channel *metadata_channel,
340 uint64_t key, char *path, uint64_t relayd_id,
341 struct lttng_consumer_local_data *ctx)
342 {
343 int ret, use_relayd = 0;
344 ssize_t ret_read;
345 struct lttng_consumer_stream *metadata_stream;
346
347 assert(ctx);
348
349 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
350 key, path);
351
352 rcu_read_lock();
353
354 metadata_stream = metadata_channel->metadata_stream;
355 assert(metadata_stream);
356
357 pthread_mutex_lock(&metadata_stream->lock);
358 assert(metadata_channel->trace_chunk);
359 assert(metadata_stream->trace_chunk);
360
361 /* Flag once that we have a valid relayd for the stream. */
362 if (relayd_id != (uint64_t) -1ULL) {
363 use_relayd = 1;
364 }
365
366 if (use_relayd) {
367 ret = consumer_send_relayd_stream(metadata_stream, path);
368 if (ret < 0) {
369 goto error_snapshot;
370 }
371 } else {
372 ret = consumer_stream_create_output_files(metadata_stream,
373 false);
374 if (ret < 0) {
375 goto error_snapshot;
376 }
377 }
378
379 do {
380 health_code_update();
381
382 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
383 if (ret_read < 0) {
384 if (ret_read != -EAGAIN) {
385 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
386 ret_read);
387 ret = ret_read;
388 goto error_snapshot;
389 }
390 /* ret_read is negative at this point so we will exit the loop. */
391 continue;
392 }
393 } while (ret_read >= 0);
394
395 if (use_relayd) {
396 close_relayd_stream(metadata_stream);
397 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
398 } else {
399 if (metadata_stream->out_fd >= 0) {
400 ret = close(metadata_stream->out_fd);
401 if (ret < 0) {
402 PERROR("Kernel consumer snapshot metadata close out_fd");
403 /*
404 * Don't go on error here since the snapshot was successful at this
405 * point but somehow the close failed.
406 */
407 }
408 metadata_stream->out_fd = -1;
409 lttng_trace_chunk_put(metadata_stream->trace_chunk);
410 metadata_stream->trace_chunk = NULL;
411 }
412 }
413
414 ret = 0;
415 error_snapshot:
416 pthread_mutex_unlock(&metadata_stream->lock);
417 cds_list_del(&metadata_stream->send_node);
418 consumer_stream_destroy(metadata_stream, NULL);
419 metadata_channel->metadata_stream = NULL;
420 rcu_read_unlock();
421 return ret;
422 }
423
424 /*
425 * Receive command from session daemon and process it.
426 *
427 * Return 1 on success else a negative value or 0.
428 */
429 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
430 int sock, struct pollfd *consumer_sockpoll)
431 {
432 ssize_t ret;
433 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
434 struct lttcomm_consumer_msg msg;
435
436 health_code_update();
437
438 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
439 if (ret != sizeof(msg)) {
440 if (ret > 0) {
441 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
442 ret = -1;
443 }
444 return ret;
445 }
446
447 health_code_update();
448
449 /* Deprecated command */
450 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
451
452 health_code_update();
453
454 /* relayd needs RCU read-side protection */
455 rcu_read_lock();
456
457 switch (msg.cmd_type) {
458 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
459 {
460 /* Session daemon status message are handled in the following call. */
461 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
462 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
463 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
464 msg.u.relayd_sock.relayd_session_id);
465 goto end_nosignal;
466 }
467 case LTTNG_CONSUMER_ADD_CHANNEL:
468 {
469 struct lttng_consumer_channel *new_channel;
470 int ret_recv;
471 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
472
473 health_code_update();
474
475 /* First send a status message before receiving the fds. */
476 ret = consumer_send_status_msg(sock, ret_code);
477 if (ret < 0) {
478 /* Somehow, the session daemon is not responding anymore. */
479 goto error_fatal;
480 }
481
482 health_code_update();
483
484 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
485 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
486 msg.u.channel.session_id,
487 msg.u.channel.chunk_id.is_set ?
488 &chunk_id : NULL,
489 msg.u.channel.pathname,
490 msg.u.channel.name,
491 msg.u.channel.relayd_id, msg.u.channel.output,
492 msg.u.channel.tracefile_size,
493 msg.u.channel.tracefile_count, 0,
494 msg.u.channel.monitor,
495 msg.u.channel.live_timer_interval,
496 NULL, NULL);
497 if (new_channel == NULL) {
498 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
499 goto end_nosignal;
500 }
501 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
502 switch (msg.u.channel.output) {
503 case LTTNG_EVENT_SPLICE:
504 new_channel->output = CONSUMER_CHANNEL_SPLICE;
505 break;
506 case LTTNG_EVENT_MMAP:
507 new_channel->output = CONSUMER_CHANNEL_MMAP;
508 break;
509 default:
510 ERR("Channel output unknown %d", msg.u.channel.output);
511 goto end_nosignal;
512 }
513
514 /* Translate and save channel type. */
515 switch (msg.u.channel.type) {
516 case CONSUMER_CHANNEL_TYPE_DATA:
517 case CONSUMER_CHANNEL_TYPE_METADATA:
518 new_channel->type = msg.u.channel.type;
519 break;
520 default:
521 assert(0);
522 goto end_nosignal;
523 };
524
525 health_code_update();
526
527 if (ctx->on_recv_channel != NULL) {
528 ret_recv = ctx->on_recv_channel(new_channel);
529 if (ret_recv == 0) {
530 ret = consumer_add_channel(new_channel, ctx);
531 } else if (ret_recv < 0) {
532 goto end_nosignal;
533 }
534 } else {
535 ret = consumer_add_channel(new_channel, ctx);
536 }
537 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
538 int monitor_start_ret;
539
540 DBG("Consumer starting monitor timer");
541 consumer_timer_live_start(new_channel,
542 msg.u.channel.live_timer_interval);
543 monitor_start_ret = consumer_timer_monitor_start(
544 new_channel,
545 msg.u.channel.monitor_timer_interval);
546 if (monitor_start_ret < 0) {
547 ERR("Starting channel monitoring timer failed");
548 goto end_nosignal;
549 }
550
551 }
552
553 health_code_update();
554
555 /* If we received an error in add_channel, we need to report it. */
556 if (ret < 0) {
557 ret = consumer_send_status_msg(sock, ret);
558 if (ret < 0) {
559 goto error_fatal;
560 }
561 goto end_nosignal;
562 }
563
564 goto end_nosignal;
565 }
566 case LTTNG_CONSUMER_ADD_STREAM:
567 {
568 int fd;
569 struct lttng_pipe *stream_pipe;
570 struct lttng_consumer_stream *new_stream;
571 struct lttng_consumer_channel *channel;
572 int alloc_ret = 0;
573
574 /*
575 * Get stream's channel reference. Needed when adding the stream to the
576 * global hash table.
577 */
578 channel = consumer_find_channel(msg.u.stream.channel_key);
579 if (!channel) {
580 /*
581 * We could not find the channel. Can happen if cpu hotplug
582 * happens while tearing down.
583 */
584 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
585 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
586 }
587
588 health_code_update();
589
590 /* First send a status message before receiving the fds. */
591 ret = consumer_send_status_msg(sock, ret_code);
592 if (ret < 0) {
593 /* Somehow, the session daemon is not responding anymore. */
594 goto error_add_stream_fatal;
595 }
596
597 health_code_update();
598
599 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
600 /* Channel was not found. */
601 goto error_add_stream_nosignal;
602 }
603
604 /* Blocking call */
605 health_poll_entry();
606 ret = lttng_consumer_poll_socket(consumer_sockpoll);
607 health_poll_exit();
608 if (ret) {
609 goto error_add_stream_fatal;
610 }
611
612 health_code_update();
613
614 /* Get stream file descriptor from socket */
615 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
616 if (ret != sizeof(fd)) {
617 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
618 goto end;
619 }
620
621 health_code_update();
622
623 /*
624 * Send status code to session daemon only if the recv works. If the
625 * above recv() failed, the session daemon is notified through the
626 * error socket and the teardown is eventually done.
627 */
628 ret = consumer_send_status_msg(sock, ret_code);
629 if (ret < 0) {
630 /* Somehow, the session daemon is not responding anymore. */
631 goto error_add_stream_nosignal;
632 }
633
634 health_code_update();
635
636 pthread_mutex_lock(&channel->lock);
637 new_stream = consumer_allocate_stream(channel->key,
638 fd,
639 channel->name,
640 channel->relayd_id,
641 channel->session_id,
642 channel->trace_chunk,
643 msg.u.stream.cpu,
644 &alloc_ret,
645 channel->type,
646 channel->monitor);
647 if (new_stream == NULL) {
648 switch (alloc_ret) {
649 case -ENOMEM:
650 case -EINVAL:
651 default:
652 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
653 break;
654 }
655 pthread_mutex_unlock(&channel->lock);
656 goto error_add_stream_nosignal;
657 }
658
659 new_stream->chan = channel;
660 new_stream->wait_fd = fd;
661 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
662 &new_stream->max_sb_size);
663 if (ret < 0) {
664 pthread_mutex_unlock(&channel->lock);
665 ERR("Failed to get kernel maximal subbuffer size");
666 goto error_add_stream_nosignal;
667 }
668
669 consumer_stream_update_channel_attributes(new_stream,
670 channel);
671 switch (channel->output) {
672 case CONSUMER_CHANNEL_SPLICE:
673 new_stream->output = LTTNG_EVENT_SPLICE;
674 ret = utils_create_pipe(new_stream->splice_pipe);
675 if (ret < 0) {
676 pthread_mutex_unlock(&channel->lock);
677 goto error_add_stream_nosignal;
678 }
679 break;
680 case CONSUMER_CHANNEL_MMAP:
681 new_stream->output = LTTNG_EVENT_MMAP;
682 break;
683 default:
684 ERR("Stream output unknown %d", channel->output);
685 pthread_mutex_unlock(&channel->lock);
686 goto error_add_stream_nosignal;
687 }
688
689 /*
690 * We've just assigned the channel to the stream so increment the
691 * refcount right now. We don't need to increment the refcount for
692 * streams in no monitor because we handle manually the cleanup of
693 * those. It is very important to make sure there is NO prior
694 * consumer_del_stream() calls or else the refcount will be unbalanced.
695 */
696 if (channel->monitor) {
697 uatomic_inc(&new_stream->chan->refcount);
698 }
699
700 /*
701 * The buffer flush is done on the session daemon side for the kernel
702 * so no need for the stream "hangup_flush_done" variable to be
703 * tracked. This is important for a kernel stream since we don't rely
704 * on the flush state of the stream to read data. It's not the case for
705 * user space tracing.
706 */
707 new_stream->hangup_flush_done = 0;
708
709 health_code_update();
710
711 pthread_mutex_lock(&new_stream->lock);
712 if (ctx->on_recv_stream) {
713 ret = ctx->on_recv_stream(new_stream);
714 if (ret < 0) {
715 pthread_mutex_unlock(&new_stream->lock);
716 pthread_mutex_unlock(&channel->lock);
717 consumer_stream_free(new_stream);
718 goto error_add_stream_nosignal;
719 }
720 }
721 health_code_update();
722
723 if (new_stream->metadata_flag) {
724 channel->metadata_stream = new_stream;
725 }
726
727 /* Do not monitor this stream. */
728 if (!channel->monitor) {
729 DBG("Kernel consumer add stream %s in no monitor mode with "
730 "relayd id %" PRIu64, new_stream->name,
731 new_stream->net_seq_idx);
732 cds_list_add(&new_stream->send_node, &channel->streams.head);
733 pthread_mutex_unlock(&new_stream->lock);
734 pthread_mutex_unlock(&channel->lock);
735 goto end_add_stream;
736 }
737
738 /* Send stream to relayd if the stream has an ID. */
739 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
740 ret = consumer_send_relayd_stream(new_stream,
741 new_stream->chan->pathname);
742 if (ret < 0) {
743 pthread_mutex_unlock(&new_stream->lock);
744 pthread_mutex_unlock(&channel->lock);
745 consumer_stream_free(new_stream);
746 goto error_add_stream_nosignal;
747 }
748
749 /*
750 * If adding an extra stream to an already
751 * existing channel (e.g. cpu hotplug), we need
752 * to send the "streams_sent" command to relayd.
753 */
754 if (channel->streams_sent_to_relayd) {
755 ret = consumer_send_relayd_streams_sent(
756 new_stream->net_seq_idx);
757 if (ret < 0) {
758 pthread_mutex_unlock(&new_stream->lock);
759 pthread_mutex_unlock(&channel->lock);
760 goto error_add_stream_nosignal;
761 }
762 }
763 }
764 pthread_mutex_unlock(&new_stream->lock);
765 pthread_mutex_unlock(&channel->lock);
766
767 /* Get the right pipe where the stream will be sent. */
768 if (new_stream->metadata_flag) {
769 consumer_add_metadata_stream(new_stream);
770 stream_pipe = ctx->consumer_metadata_pipe;
771 } else {
772 consumer_add_data_stream(new_stream);
773 stream_pipe = ctx->consumer_data_pipe;
774 }
775
776 /* Visible to other threads */
777 new_stream->globally_visible = 1;
778
779 health_code_update();
780
781 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
782 if (ret < 0) {
783 ERR("Consumer write %s stream to pipe %d",
784 new_stream->metadata_flag ? "metadata" : "data",
785 lttng_pipe_get_writefd(stream_pipe));
786 if (new_stream->metadata_flag) {
787 consumer_del_stream_for_metadata(new_stream);
788 } else {
789 consumer_del_stream_for_data(new_stream);
790 }
791 goto error_add_stream_nosignal;
792 }
793
794 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
795 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
796 end_add_stream:
797 break;
798 error_add_stream_nosignal:
799 goto end_nosignal;
800 error_add_stream_fatal:
801 goto error_fatal;
802 }
803 case LTTNG_CONSUMER_STREAMS_SENT:
804 {
805 struct lttng_consumer_channel *channel;
806
807 /*
808 * Get stream's channel reference. Needed when adding the stream to the
809 * global hash table.
810 */
811 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
812 if (!channel) {
813 /*
814 * We could not find the channel. Can happen if cpu hotplug
815 * happens while tearing down.
816 */
817 ERR("Unable to find channel key %" PRIu64,
818 msg.u.sent_streams.channel_key);
819 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
820 }
821
822 health_code_update();
823
824 /*
825 * Send status code to session daemon.
826 */
827 ret = consumer_send_status_msg(sock, ret_code);
828 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
829 /* Somehow, the session daemon is not responding anymore. */
830 goto error_streams_sent_nosignal;
831 }
832
833 health_code_update();
834
835 /*
836 * We should not send this message if we don't monitor the
837 * streams in this channel.
838 */
839 if (!channel->monitor) {
840 goto end_error_streams_sent;
841 }
842
843 health_code_update();
844 /* Send stream to relayd if the stream has an ID. */
845 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
846 ret = consumer_send_relayd_streams_sent(
847 msg.u.sent_streams.net_seq_idx);
848 if (ret < 0) {
849 goto error_streams_sent_nosignal;
850 }
851 channel->streams_sent_to_relayd = true;
852 }
853 end_error_streams_sent:
854 break;
855 error_streams_sent_nosignal:
856 goto end_nosignal;
857 }
858 case LTTNG_CONSUMER_UPDATE_STREAM:
859 {
860 rcu_read_unlock();
861 return -ENOSYS;
862 }
863 case LTTNG_CONSUMER_DESTROY_RELAYD:
864 {
865 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
866 struct consumer_relayd_sock_pair *relayd;
867
868 DBG("Kernel consumer destroying relayd %" PRIu64, index);
869
870 /* Get relayd reference if exists. */
871 relayd = consumer_find_relayd(index);
872 if (relayd == NULL) {
873 DBG("Unable to find relayd %" PRIu64, index);
874 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
875 }
876
877 /*
878 * Each relayd socket pair has a refcount of stream attached to it
879 * which tells if the relayd is still active or not depending on the
880 * refcount value.
881 *
882 * This will set the destroy flag of the relayd object and destroy it
883 * if the refcount reaches zero when called.
884 *
885 * The destroy can happen either here or when a stream fd hangs up.
886 */
887 if (relayd) {
888 consumer_flag_relayd_for_destroy(relayd);
889 }
890
891 health_code_update();
892
893 ret = consumer_send_status_msg(sock, ret_code);
894 if (ret < 0) {
895 /* Somehow, the session daemon is not responding anymore. */
896 goto error_fatal;
897 }
898
899 goto end_nosignal;
900 }
901 case LTTNG_CONSUMER_DATA_PENDING:
902 {
903 int32_t ret;
904 uint64_t id = msg.u.data_pending.session_id;
905
906 DBG("Kernel consumer data pending command for id %" PRIu64, id);
907
908 ret = consumer_data_pending(id);
909
910 health_code_update();
911
912 /* Send back returned value to session daemon */
913 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
914 if (ret < 0) {
915 PERROR("send data pending ret code");
916 goto error_fatal;
917 }
918
919 /*
920 * No need to send back a status message since the data pending
921 * returned value is the response.
922 */
923 break;
924 }
925 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
926 {
927 struct lttng_consumer_channel *channel;
928 uint64_t key = msg.u.snapshot_channel.key;
929
930 channel = consumer_find_channel(key);
931 if (!channel) {
932 ERR("Channel %" PRIu64 " not found", key);
933 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
934 } else {
935 pthread_mutex_lock(&channel->lock);
936 if (msg.u.snapshot_channel.metadata == 1) {
937 ret = lttng_kconsumer_snapshot_metadata(channel, key,
938 msg.u.snapshot_channel.pathname,
939 msg.u.snapshot_channel.relayd_id, ctx);
940 if (ret < 0) {
941 ERR("Snapshot metadata failed");
942 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
943 }
944 } else {
945 ret = lttng_kconsumer_snapshot_channel(channel, key,
946 msg.u.snapshot_channel.pathname,
947 msg.u.snapshot_channel.relayd_id,
948 msg.u.snapshot_channel.nb_packets_per_stream,
949 ctx);
950 if (ret < 0) {
951 ERR("Snapshot channel failed");
952 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
953 }
954 }
955 pthread_mutex_unlock(&channel->lock);
956 }
957 health_code_update();
958
959 ret = consumer_send_status_msg(sock, ret_code);
960 if (ret < 0) {
961 /* Somehow, the session daemon is not responding anymore. */
962 goto end_nosignal;
963 }
964 break;
965 }
966 case LTTNG_CONSUMER_DESTROY_CHANNEL:
967 {
968 uint64_t key = msg.u.destroy_channel.key;
969 struct lttng_consumer_channel *channel;
970
971 channel = consumer_find_channel(key);
972 if (!channel) {
973 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
974 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
975 }
976
977 health_code_update();
978
979 ret = consumer_send_status_msg(sock, ret_code);
980 if (ret < 0) {
981 /* Somehow, the session daemon is not responding anymore. */
982 goto end_destroy_channel;
983 }
984
985 health_code_update();
986
987 /* Stop right now if no channel was found. */
988 if (!channel) {
989 goto end_destroy_channel;
990 }
991
992 /*
993 * This command should ONLY be issued for channel with streams set in
994 * no monitor mode.
995 */
996 assert(!channel->monitor);
997
998 /*
999 * The refcount should ALWAYS be 0 in the case of a channel in no
1000 * monitor mode.
1001 */
1002 assert(!uatomic_sub_return(&channel->refcount, 1));
1003
1004 consumer_del_channel(channel);
1005 end_destroy_channel:
1006 goto end_nosignal;
1007 }
1008 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1009 {
1010 ssize_t ret;
1011 uint64_t count;
1012 struct lttng_consumer_channel *channel;
1013 uint64_t id = msg.u.discarded_events.session_id;
1014 uint64_t key = msg.u.discarded_events.channel_key;
1015
1016 DBG("Kernel consumer discarded events command for session id %"
1017 PRIu64 ", channel key %" PRIu64, id, key);
1018
1019 channel = consumer_find_channel(key);
1020 if (!channel) {
1021 ERR("Kernel consumer discarded events channel %"
1022 PRIu64 " not found", key);
1023 count = 0;
1024 } else {
1025 count = channel->discarded_events;
1026 }
1027
1028 health_code_update();
1029
1030 /* Send back returned value to session daemon */
1031 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1032 if (ret < 0) {
1033 PERROR("send discarded events");
1034 goto error_fatal;
1035 }
1036
1037 break;
1038 }
1039 case LTTNG_CONSUMER_LOST_PACKETS:
1040 {
1041 ssize_t ret;
1042 uint64_t count;
1043 struct lttng_consumer_channel *channel;
1044 uint64_t id = msg.u.lost_packets.session_id;
1045 uint64_t key = msg.u.lost_packets.channel_key;
1046
1047 DBG("Kernel consumer lost packets command for session id %"
1048 PRIu64 ", channel key %" PRIu64, id, key);
1049
1050 channel = consumer_find_channel(key);
1051 if (!channel) {
1052 ERR("Kernel consumer lost packets channel %"
1053 PRIu64 " not found", key);
1054 count = 0;
1055 } else {
1056 count = channel->lost_packets;
1057 }
1058
1059 health_code_update();
1060
1061 /* Send back returned value to session daemon */
1062 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1063 if (ret < 0) {
1064 PERROR("send lost packets");
1065 goto error_fatal;
1066 }
1067
1068 break;
1069 }
1070 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1071 {
1072 int channel_monitor_pipe;
1073
1074 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1075 /* Successfully received the command's type. */
1076 ret = consumer_send_status_msg(sock, ret_code);
1077 if (ret < 0) {
1078 goto error_fatal;
1079 }
1080
1081 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1082 1);
1083 if (ret != sizeof(channel_monitor_pipe)) {
1084 ERR("Failed to receive channel monitor pipe");
1085 goto error_fatal;
1086 }
1087
1088 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1089 ret = consumer_timer_thread_set_channel_monitor_pipe(
1090 channel_monitor_pipe);
1091 if (!ret) {
1092 int flags;
1093
1094 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1095 /* Set the pipe as non-blocking. */
1096 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1097 if (ret == -1) {
1098 PERROR("fcntl get flags of the channel monitoring pipe");
1099 goto error_fatal;
1100 }
1101 flags = ret;
1102
1103 ret = fcntl(channel_monitor_pipe, F_SETFL,
1104 flags | O_NONBLOCK);
1105 if (ret == -1) {
1106 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1107 goto error_fatal;
1108 }
1109 DBG("Channel monitor pipe set as non-blocking");
1110 } else {
1111 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1112 }
1113 ret = consumer_send_status_msg(sock, ret_code);
1114 if (ret < 0) {
1115 goto error_fatal;
1116 }
1117 break;
1118 }
1119 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1120 {
1121 struct lttng_consumer_channel *channel;
1122 uint64_t key = msg.u.rotate_channel.key;
1123
1124 DBG("Consumer rotate channel %" PRIu64, key);
1125
1126 channel = consumer_find_channel(key);
1127 if (!channel) {
1128 ERR("Channel %" PRIu64 " not found", key);
1129 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1130 } else {
1131 /*
1132 * Sample the rotate position of all the streams in this channel.
1133 */
1134 ret = lttng_consumer_rotate_channel(channel, key,
1135 msg.u.rotate_channel.relayd_id,
1136 msg.u.rotate_channel.metadata,
1137 ctx);
1138 if (ret < 0) {
1139 ERR("Rotate channel failed");
1140 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1141 }
1142
1143 health_code_update();
1144 }
1145 ret = consumer_send_status_msg(sock, ret_code);
1146 if (ret < 0) {
1147 /* Somehow, the session daemon is not responding anymore. */
1148 goto error_rotate_channel;
1149 }
1150 if (channel) {
1151 /* Rotate the streams that are ready right now. */
1152 ret = lttng_consumer_rotate_ready_streams(
1153 channel, key, ctx);
1154 if (ret < 0) {
1155 ERR("Rotate ready streams failed");
1156 }
1157 }
1158 break;
1159 error_rotate_channel:
1160 goto end_nosignal;
1161 }
1162 case LTTNG_CONSUMER_INIT:
1163 {
1164 ret_code = lttng_consumer_init_command(ctx,
1165 msg.u.init.sessiond_uuid);
1166 health_code_update();
1167 ret = consumer_send_status_msg(sock, ret_code);
1168 if (ret < 0) {
1169 /* Somehow, the session daemon is not responding anymore. */
1170 goto end_nosignal;
1171 }
1172 break;
1173 }
1174 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1175 {
1176 const struct lttng_credentials credentials = {
1177 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1178 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1179 };
1180 const bool is_local_trace =
1181 !msg.u.create_trace_chunk.relayd_id.is_set;
1182 const uint64_t relayd_id =
1183 msg.u.create_trace_chunk.relayd_id.value;
1184 const char *chunk_override_name =
1185 *msg.u.create_trace_chunk.override_name ?
1186 msg.u.create_trace_chunk.override_name :
1187 NULL;
1188 LTTNG_OPTIONAL(struct lttng_directory_handle) chunk_directory_handle =
1189 LTTNG_OPTIONAL_INIT;
1190
1191 /*
1192 * The session daemon will only provide a chunk directory file
1193 * descriptor for local traces.
1194 */
1195 if (is_local_trace) {
1196 int chunk_dirfd;
1197
1198 /* Acnowledge the reception of the command. */
1199 ret = consumer_send_status_msg(sock,
1200 LTTCOMM_CONSUMERD_SUCCESS);
1201 if (ret < 0) {
1202 /* Somehow, the session daemon is not responding anymore. */
1203 goto end_nosignal;
1204 }
1205
1206 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1207 if (ret != sizeof(chunk_dirfd)) {
1208 ERR("Failed to receive trace chunk directory file descriptor");
1209 goto error_fatal;
1210 }
1211
1212 DBG("Received trace chunk directory fd (%d)",
1213 chunk_dirfd);
1214 ret = lttng_directory_handle_init_from_dirfd(
1215 &chunk_directory_handle.value,
1216 chunk_dirfd);
1217 if (ret) {
1218 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1219 if (close(chunk_dirfd)) {
1220 PERROR("Failed to close chunk directory file descriptor");
1221 }
1222 goto error_fatal;
1223 }
1224 chunk_directory_handle.is_set = true;
1225 }
1226
1227 ret_code = lttng_consumer_create_trace_chunk(
1228 !is_local_trace ? &relayd_id : NULL,
1229 msg.u.create_trace_chunk.session_id,
1230 msg.u.create_trace_chunk.chunk_id,
1231 (time_t) msg.u.create_trace_chunk
1232 .creation_timestamp,
1233 chunk_override_name,
1234 msg.u.create_trace_chunk.credentials.is_set ?
1235 &credentials :
1236 NULL,
1237 chunk_directory_handle.is_set ?
1238 &chunk_directory_handle.value :
1239 NULL);
1240
1241 if (chunk_directory_handle.is_set) {
1242 lttng_directory_handle_fini(
1243 &chunk_directory_handle.value);
1244 }
1245 goto end_msg_sessiond;
1246 }
1247 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1248 {
1249 enum lttng_trace_chunk_command_type close_command =
1250 msg.u.close_trace_chunk.close_command.value;
1251 const uint64_t relayd_id =
1252 msg.u.close_trace_chunk.relayd_id.value;
1253 struct lttcomm_consumer_close_trace_chunk_reply reply;
1254 char path[LTTNG_PATH_MAX];
1255
1256 ret_code = lttng_consumer_close_trace_chunk(
1257 msg.u.close_trace_chunk.relayd_id.is_set ?
1258 &relayd_id :
1259 NULL,
1260 msg.u.close_trace_chunk.session_id,
1261 msg.u.close_trace_chunk.chunk_id,
1262 (time_t) msg.u.close_trace_chunk.close_timestamp,
1263 msg.u.close_trace_chunk.close_command.is_set ?
1264 &close_command :
1265 NULL, path);
1266 reply.ret_code = ret_code;
1267 reply.path_length = strlen(path) + 1;
1268 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1269 if (ret != sizeof(reply)) {
1270 goto error_fatal;
1271 }
1272 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1273 if (ret != reply.path_length) {
1274 goto error_fatal;
1275 }
1276 goto end_nosignal;
1277 }
1278 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1279 {
1280 const uint64_t relayd_id =
1281 msg.u.trace_chunk_exists.relayd_id.value;
1282
1283 ret_code = lttng_consumer_trace_chunk_exists(
1284 msg.u.trace_chunk_exists.relayd_id.is_set ?
1285 &relayd_id : NULL,
1286 msg.u.trace_chunk_exists.session_id,
1287 msg.u.trace_chunk_exists.chunk_id);
1288 goto end_msg_sessiond;
1289 }
1290 default:
1291 goto end_nosignal;
1292 }
1293
1294 end_nosignal:
1295 /*
1296 * Return 1 to indicate success since the 0 value can be a socket
1297 * shutdown during the recv() or send() call.
1298 */
1299 ret = 1;
1300 goto end;
1301 error_fatal:
1302 /* This will issue a consumer stop. */
1303 ret = -1;
1304 goto end;
1305 end_msg_sessiond:
1306 /*
1307 * The returned value here is not useful since either way we'll return 1 to
1308 * the caller because the session daemon socket management is done
1309 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1310 */
1311 ret = consumer_send_status_msg(sock, ret_code);
1312 if (ret < 0) {
1313 goto error_fatal;
1314 }
1315 ret = 1;
1316 end:
1317 health_code_update();
1318 rcu_read_unlock();
1319 return ret;
1320 }
1321
1322 /*
1323 * Populate index values of a kernel stream. Values are set in big endian order.
1324 *
1325 * Return 0 on success or else a negative value.
1326 */
1327 static int get_index_values(struct ctf_packet_index *index, int infd)
1328 {
1329 int ret;
1330 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1331 events_discarded, stream_id, stream_instance_id,
1332 packet_seq_num;
1333
1334 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1335 if (ret < 0) {
1336 PERROR("kernctl_get_timestamp_begin");
1337 goto error;
1338 }
1339
1340 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1341 if (ret < 0) {
1342 PERROR("kernctl_get_timestamp_end");
1343 goto error;
1344 }
1345
1346 ret = kernctl_get_events_discarded(infd, &events_discarded);
1347 if (ret < 0) {
1348 PERROR("kernctl_get_events_discarded");
1349 goto error;
1350 }
1351
1352 ret = kernctl_get_content_size(infd, &content_size);
1353 if (ret < 0) {
1354 PERROR("kernctl_get_content_size");
1355 goto error;
1356 }
1357
1358 ret = kernctl_get_packet_size(infd, &packet_size);
1359 if (ret < 0) {
1360 PERROR("kernctl_get_packet_size");
1361 goto error;
1362 }
1363
1364 ret = kernctl_get_stream_id(infd, &stream_id);
1365 if (ret < 0) {
1366 PERROR("kernctl_get_stream_id");
1367 goto error;
1368 }
1369
1370 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1371 if (ret < 0) {
1372 if (ret == -ENOTTY) {
1373 /* Command not implemented by lttng-modules. */
1374 stream_instance_id = -1ULL;
1375 } else {
1376 PERROR("kernctl_get_instance_id");
1377 goto error;
1378 }
1379 }
1380
1381 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1382 if (ret < 0) {
1383 if (ret == -ENOTTY) {
1384 /* Command not implemented by lttng-modules. */
1385 packet_seq_num = -1ULL;
1386 ret = 0;
1387 } else {
1388 PERROR("kernctl_get_sequence_number");
1389 goto error;
1390 }
1391 }
1392 index->packet_seq_num = htobe64(index->packet_seq_num);
1393
1394 *index = (typeof(*index)) {
1395 .offset = index->offset,
1396 .packet_size = htobe64(packet_size),
1397 .content_size = htobe64(content_size),
1398 .timestamp_begin = htobe64(timestamp_begin),
1399 .timestamp_end = htobe64(timestamp_end),
1400 .events_discarded = htobe64(events_discarded),
1401 .stream_id = htobe64(stream_id),
1402 .stream_instance_id = htobe64(stream_instance_id),
1403 .packet_seq_num = htobe64(packet_seq_num),
1404 };
1405
1406 error:
1407 return ret;
1408 }
1409 /*
1410 * Sync metadata meaning request them to the session daemon and snapshot to the
1411 * metadata thread can consumer them.
1412 *
1413 * Metadata stream lock MUST be acquired.
1414 *
1415 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1416 * is empty or a negative value on error.
1417 */
1418 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1419 {
1420 int ret;
1421
1422 assert(metadata);
1423
1424 ret = kernctl_buffer_flush(metadata->wait_fd);
1425 if (ret < 0) {
1426 ERR("Failed to flush kernel stream");
1427 goto end;
1428 }
1429
1430 ret = kernctl_snapshot(metadata->wait_fd);
1431 if (ret < 0) {
1432 if (ret != -EAGAIN) {
1433 ERR("Sync metadata, taking kernel snapshot failed.");
1434 goto end;
1435 }
1436 DBG("Sync metadata, no new kernel metadata");
1437 /* No new metadata, exit. */
1438 ret = ENODATA;
1439 goto end;
1440 }
1441
1442 end:
1443 return ret;
1444 }
1445
1446 static
1447 int update_stream_stats(struct lttng_consumer_stream *stream)
1448 {
1449 int ret;
1450 uint64_t seq, discarded;
1451
1452 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1453 if (ret < 0) {
1454 if (ret == -ENOTTY) {
1455 /* Command not implemented by lttng-modules. */
1456 seq = -1ULL;
1457 } else {
1458 PERROR("kernctl_get_sequence_number");
1459 goto end;
1460 }
1461 }
1462
1463 /*
1464 * Start the sequence when we extract the first packet in case we don't
1465 * start at 0 (for example if a consumer is not connected to the
1466 * session immediately after the beginning).
1467 */
1468 if (stream->last_sequence_number == -1ULL) {
1469 stream->last_sequence_number = seq;
1470 } else if (seq > stream->last_sequence_number) {
1471 stream->chan->lost_packets += seq -
1472 stream->last_sequence_number - 1;
1473 } else {
1474 /* seq <= last_sequence_number */
1475 ERR("Sequence number inconsistent : prev = %" PRIu64
1476 ", current = %" PRIu64,
1477 stream->last_sequence_number, seq);
1478 ret = -1;
1479 goto end;
1480 }
1481 stream->last_sequence_number = seq;
1482
1483 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1484 if (ret < 0) {
1485 PERROR("kernctl_get_events_discarded");
1486 goto end;
1487 }
1488 if (discarded < stream->last_discarded_events) {
1489 /*
1490 * Overflow has occurred. We assume only one wrap-around
1491 * has occurred.
1492 */
1493 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1494 stream->last_discarded_events + discarded;
1495 } else {
1496 stream->chan->discarded_events += discarded -
1497 stream->last_discarded_events;
1498 }
1499 stream->last_discarded_events = discarded;
1500 ret = 0;
1501
1502 end:
1503 return ret;
1504 }
1505
1506 /*
1507 * Check if the local version of the metadata stream matches with the version
1508 * of the metadata stream in the kernel. If it was updated, set the reset flag
1509 * on the stream.
1510 */
1511 static
1512 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1513 {
1514 int ret;
1515 uint64_t cur_version;
1516
1517 ret = kernctl_get_metadata_version(infd, &cur_version);
1518 if (ret < 0) {
1519 if (ret == -ENOTTY) {
1520 /*
1521 * LTTng-modules does not implement this
1522 * command.
1523 */
1524 ret = 0;
1525 goto end;
1526 }
1527 ERR("Failed to get the metadata version");
1528 goto end;
1529 }
1530
1531 if (stream->metadata_version == cur_version) {
1532 ret = 0;
1533 goto end;
1534 }
1535
1536 DBG("New metadata version detected");
1537 stream->metadata_version = cur_version;
1538 stream->reset_metadata_flag = 1;
1539 ret = 0;
1540
1541 end:
1542 return ret;
1543 }
1544
1545 /*
1546 * Consume data on a file descriptor and write it on a trace file.
1547 * The stream and channel locks must be held by the caller.
1548 */
1549 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1550 struct lttng_consumer_local_data *ctx)
1551 {
1552 unsigned long len, subbuf_size, padding;
1553 int err, write_index = 1, rotation_ret;
1554 ssize_t ret = 0;
1555 int infd = stream->wait_fd;
1556 struct ctf_packet_index index = {};
1557
1558 DBG("In read_subbuffer (infd : %d)", infd);
1559
1560 /*
1561 * If the stream was flagged to be ready for rotation before we extract the
1562 * next packet, rotate it now.
1563 */
1564 if (stream->rotate_ready) {
1565 DBG("Rotate stream before extracting data");
1566 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1567 if (rotation_ret < 0) {
1568 ERR("Stream rotation error");
1569 ret = -1;
1570 goto error;
1571 }
1572 }
1573
1574 /* Get the next subbuffer */
1575 err = kernctl_get_next_subbuf(infd);
1576 if (err != 0) {
1577 /*
1578 * This is a debug message even for single-threaded consumer,
1579 * because poll() have more relaxed criterions than get subbuf,
1580 * so get_subbuf may fail for short race windows where poll()
1581 * would issue wakeups.
1582 */
1583 DBG("Reserving sub buffer failed (everything is normal, "
1584 "it is due to concurrency)");
1585 ret = err;
1586 goto error;
1587 }
1588
1589 /* Get the full subbuffer size including padding */
1590 err = kernctl_get_padded_subbuf_size(infd, &len);
1591 if (err != 0) {
1592 PERROR("Getting sub-buffer len failed.");
1593 err = kernctl_put_subbuf(infd);
1594 if (err != 0) {
1595 if (err == -EFAULT) {
1596 PERROR("Error in unreserving sub buffer\n");
1597 } else if (err == -EIO) {
1598 /* Should never happen with newer LTTng versions */
1599 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1600 }
1601 ret = err;
1602 goto error;
1603 }
1604 ret = err;
1605 goto error;
1606 }
1607
1608 if (!stream->metadata_flag) {
1609 ret = get_index_values(&index, infd);
1610 if (ret < 0) {
1611 err = kernctl_put_subbuf(infd);
1612 if (err != 0) {
1613 if (err == -EFAULT) {
1614 PERROR("Error in unreserving sub buffer\n");
1615 } else if (err == -EIO) {
1616 /* Should never happen with newer LTTng versions */
1617 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1618 }
1619 ret = err;
1620 goto error;
1621 }
1622 goto error;
1623 }
1624 ret = update_stream_stats(stream);
1625 if (ret < 0) {
1626 err = kernctl_put_subbuf(infd);
1627 if (err != 0) {
1628 if (err == -EFAULT) {
1629 PERROR("Error in unreserving sub buffer\n");
1630 } else if (err == -EIO) {
1631 /* Should never happen with newer LTTng versions */
1632 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1633 }
1634 ret = err;
1635 goto error;
1636 }
1637 goto error;
1638 }
1639 } else {
1640 write_index = 0;
1641 ret = metadata_stream_check_version(infd, stream);
1642 if (ret < 0) {
1643 err = kernctl_put_subbuf(infd);
1644 if (err != 0) {
1645 if (err == -EFAULT) {
1646 PERROR("Error in unreserving sub buffer\n");
1647 } else if (err == -EIO) {
1648 /* Should never happen with newer LTTng versions */
1649 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1650 }
1651 ret = err;
1652 goto error;
1653 }
1654 goto error;
1655 }
1656 }
1657
1658 switch (stream->chan->output) {
1659 case CONSUMER_CHANNEL_SPLICE:
1660 /*
1661 * XXX: The lttng-modules splice "actor" does not handle copying
1662 * partial pages hence only using the subbuffer size without the
1663 * padding makes the splice fail.
1664 */
1665 subbuf_size = len;
1666 padding = 0;
1667
1668 /* splice the subbuffer to the tracefile */
1669 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1670 padding, &index);
1671 /*
1672 * XXX: Splice does not support network streaming so the return value
1673 * is simply checked against subbuf_size and not like the mmap() op.
1674 */
1675 if (ret != subbuf_size) {
1676 /*
1677 * display the error but continue processing to try
1678 * to release the subbuffer
1679 */
1680 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1681 ret, subbuf_size);
1682 write_index = 0;
1683 }
1684 break;
1685 case CONSUMER_CHANNEL_MMAP:
1686 /* Get subbuffer size without padding */
1687 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1688 if (err != 0) {
1689 PERROR("Getting sub-buffer len failed.");
1690 err = kernctl_put_subbuf(infd);
1691 if (err != 0) {
1692 if (err == -EFAULT) {
1693 PERROR("Error in unreserving sub buffer\n");
1694 } else if (err == -EIO) {
1695 /* Should never happen with newer LTTng versions */
1696 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1697 }
1698 ret = err;
1699 goto error;
1700 }
1701 ret = err;
1702 goto error;
1703 }
1704
1705 /* Make sure the tracer is not gone mad on us! */
1706 assert(len >= subbuf_size);
1707
1708 padding = len - subbuf_size;
1709
1710 /* write the subbuffer to the tracefile */
1711 ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, subbuf_size,
1712 padding, &index);
1713 /*
1714 * The mmap operation should write subbuf_size amount of data when
1715 * network streaming or the full padding (len) size when we are _not_
1716 * streaming.
1717 */
1718 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1719 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1720 /*
1721 * Display the error but continue processing to try to release the
1722 * subbuffer. This is a DBG statement since this is possible to
1723 * happen without being a critical error.
1724 */
1725 DBG("Error writing to tracefile "
1726 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1727 ret, len, subbuf_size);
1728 write_index = 0;
1729 }
1730 break;
1731 default:
1732 ERR("Unknown output method");
1733 ret = -EPERM;
1734 }
1735
1736 err = kernctl_put_next_subbuf(infd);
1737 if (err != 0) {
1738 if (err == -EFAULT) {
1739 PERROR("Error in unreserving sub buffer\n");
1740 } else if (err == -EIO) {
1741 /* Should never happen with newer LTTng versions */
1742 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1743 }
1744 ret = err;
1745 goto error;
1746 }
1747
1748 /* Write index if needed. */
1749 if (!write_index) {
1750 goto rotate;
1751 }
1752
1753 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1754 /*
1755 * In live, block until all the metadata is sent.
1756 */
1757 pthread_mutex_lock(&stream->metadata_timer_lock);
1758 assert(!stream->missed_metadata_flush);
1759 stream->waiting_on_metadata = true;
1760 pthread_mutex_unlock(&stream->metadata_timer_lock);
1761
1762 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1763
1764 pthread_mutex_lock(&stream->metadata_timer_lock);
1765 stream->waiting_on_metadata = false;
1766 if (stream->missed_metadata_flush) {
1767 stream->missed_metadata_flush = false;
1768 pthread_mutex_unlock(&stream->metadata_timer_lock);
1769 (void) consumer_flush_kernel_index(stream);
1770 } else {
1771 pthread_mutex_unlock(&stream->metadata_timer_lock);
1772 }
1773 if (err < 0) {
1774 goto error;
1775 }
1776 }
1777
1778 err = consumer_stream_write_index(stream, &index);
1779 if (err < 0) {
1780 goto error;
1781 }
1782
1783 rotate:
1784 /*
1785 * After extracting the packet, we check if the stream is now ready to be
1786 * rotated and perform the action immediately.
1787 */
1788 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1789 if (rotation_ret == 1) {
1790 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1791 if (rotation_ret < 0) {
1792 ERR("Stream rotation error");
1793 ret = -1;
1794 goto error;
1795 }
1796 } else if (rotation_ret < 0) {
1797 ERR("Checking if stream is ready to rotate");
1798 ret = -1;
1799 goto error;
1800 }
1801
1802 error:
1803 return ret;
1804 }
1805
1806 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1807 {
1808 int ret;
1809
1810 assert(stream);
1811
1812 /*
1813 * Don't create anything if this is set for streaming or if there is
1814 * no current trace chunk on the parent channel.
1815 */
1816 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1817 stream->chan->trace_chunk) {
1818 ret = consumer_stream_create_output_files(stream, true);
1819 if (ret) {
1820 goto error;
1821 }
1822 }
1823
1824 if (stream->output == LTTNG_EVENT_MMAP) {
1825 /* get the len of the mmap region */
1826 unsigned long mmap_len;
1827
1828 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1829 if (ret != 0) {
1830 PERROR("kernctl_get_mmap_len");
1831 goto error_close_fd;
1832 }
1833 stream->mmap_len = (size_t) mmap_len;
1834
1835 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1836 MAP_PRIVATE, stream->wait_fd, 0);
1837 if (stream->mmap_base == MAP_FAILED) {
1838 PERROR("Error mmaping");
1839 ret = -1;
1840 goto error_close_fd;
1841 }
1842 }
1843
1844 /* we return 0 to let the library handle the FD internally */
1845 return 0;
1846
1847 error_close_fd:
1848 if (stream->out_fd >= 0) {
1849 int err;
1850
1851 err = close(stream->out_fd);
1852 assert(!err);
1853 stream->out_fd = -1;
1854 }
1855 error:
1856 return ret;
1857 }
1858
1859 /*
1860 * Check if data is still being extracted from the buffers for a specific
1861 * stream. Consumer data lock MUST be acquired before calling this function
1862 * and the stream lock.
1863 *
1864 * Return 1 if the traced data are still getting read else 0 meaning that the
1865 * data is available for trace viewer reading.
1866 */
1867 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1868 {
1869 int ret;
1870
1871 assert(stream);
1872
1873 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1874 ret = 0;
1875 goto end;
1876 }
1877
1878 ret = kernctl_get_next_subbuf(stream->wait_fd);
1879 if (ret == 0) {
1880 /* There is still data so let's put back this subbuffer. */
1881 ret = kernctl_put_subbuf(stream->wait_fd);
1882 assert(ret == 0);
1883 ret = 1; /* Data is pending */
1884 goto end;
1885 }
1886
1887 /* Data is NOT pending and ready to be read. */
1888 ret = 0;
1889
1890 end:
1891 return ret;
1892 }
This page took 0.105101 seconds and 6 git commands to generate.