Make lttng_directory_handle reference countable
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <assert.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <inttypes.h>
30 #include <unistd.h>
31 #include <sys/stat.h>
32
33 #include <bin/lttng-consumerd/health-consumerd.h>
34 #include <common/common.h>
35 #include <common/kernel-ctl/kernel-ctl.h>
36 #include <common/sessiond-comm/sessiond-comm.h>
37 #include <common/sessiond-comm/relayd.h>
38 #include <common/compat/fcntl.h>
39 #include <common/compat/endian.h>
40 #include <common/pipe.h>
41 #include <common/relayd/relayd.h>
42 #include <common/utils.h>
43 #include <common/consumer/consumer-stream.h>
44 #include <common/index/index.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/optional.h>
47
48 #include "kernel-consumer.h"
49
50 extern struct lttng_consumer_global_data consumer_data;
51 extern int consumer_poll_timeout;
52
53 /*
54 * Take a snapshot for a specific fd
55 *
56 * Returns 0 on success, < 0 on error
57 */
58 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
59 {
60 int ret = 0;
61 int infd = stream->wait_fd;
62
63 ret = kernctl_snapshot(infd);
64 /*
65 * -EAGAIN is not an error, it just means that there is no data to
66 * be read.
67 */
68 if (ret != 0 && ret != -EAGAIN) {
69 PERROR("Getting sub-buffer snapshot.");
70 }
71
72 return ret;
73 }
74
75 /*
76 * Sample consumed and produced positions for a specific fd.
77 *
78 * Returns 0 on success, < 0 on error.
79 */
80 int lttng_kconsumer_sample_snapshot_positions(
81 struct lttng_consumer_stream *stream)
82 {
83 assert(stream);
84
85 return kernctl_snapshot_sample_positions(stream->wait_fd);
86 }
87
88 /*
89 * Get the produced position
90 *
91 * Returns 0 on success, < 0 on error
92 */
93 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
94 unsigned long *pos)
95 {
96 int ret;
97 int infd = stream->wait_fd;
98
99 ret = kernctl_snapshot_get_produced(infd, pos);
100 if (ret != 0) {
101 PERROR("kernctl_snapshot_get_produced");
102 }
103
104 return ret;
105 }
106
107 /*
108 * Get the consumerd position
109 *
110 * Returns 0 on success, < 0 on error
111 */
112 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
113 unsigned long *pos)
114 {
115 int ret;
116 int infd = stream->wait_fd;
117
118 ret = kernctl_snapshot_get_consumed(infd, pos);
119 if (ret != 0) {
120 PERROR("kernctl_snapshot_get_consumed");
121 }
122
123 return ret;
124 }
125
126 /*
127 * Take a snapshot of all the stream of a channel
128 * RCU read-side lock must be held across this function to ensure existence of
129 * channel. The channel lock must be held by the caller.
130 *
131 * Returns 0 on success, < 0 on error
132 */
133 static int lttng_kconsumer_snapshot_channel(
134 struct lttng_consumer_channel *channel,
135 uint64_t key, char *path, uint64_t relayd_id,
136 uint64_t nb_packets_per_stream,
137 struct lttng_consumer_local_data *ctx)
138 {
139 int ret;
140 struct lttng_consumer_stream *stream;
141
142 DBG("Kernel consumer snapshot channel %" PRIu64, key);
143
144 rcu_read_lock();
145
146 /* Splice is not supported yet for channel snapshot. */
147 if (channel->output != CONSUMER_CHANNEL_MMAP) {
148 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
149 channel->name);
150 ret = -1;
151 goto end;
152 }
153
154 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
155 unsigned long consumed_pos, produced_pos;
156
157 health_code_update();
158
159 /*
160 * Lock stream because we are about to change its state.
161 */
162 pthread_mutex_lock(&stream->lock);
163
164 assert(channel->trace_chunk);
165 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
166 /*
167 * Can't happen barring an internal error as the channel
168 * holds a reference to the trace chunk.
169 */
170 ERR("Failed to acquire reference to channel's trace chunk");
171 ret = -1;
172 goto end_unlock;
173 }
174 assert(!stream->trace_chunk);
175 stream->trace_chunk = channel->trace_chunk;
176
177 /*
178 * Assign the received relayd ID so we can use it for streaming. The streams
179 * are not visible to anyone so this is OK to change it.
180 */
181 stream->net_seq_idx = relayd_id;
182 channel->relayd_id = relayd_id;
183 if (relayd_id != (uint64_t) -1ULL) {
184 ret = consumer_send_relayd_stream(stream, path);
185 if (ret < 0) {
186 ERR("sending stream to relayd");
187 goto end_unlock;
188 }
189 } else {
190 ret = consumer_stream_create_output_files(stream,
191 false);
192 if (ret < 0) {
193 goto end_unlock;
194 }
195 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
196 stream->key);
197 }
198
199 ret = kernctl_buffer_flush_empty(stream->wait_fd);
200 if (ret < 0) {
201 /*
202 * Doing a buffer flush which does not take into
203 * account empty packets. This is not perfect
204 * for stream intersection, but required as a
205 * fall-back when "flush_empty" is not
206 * implemented by lttng-modules.
207 */
208 ret = kernctl_buffer_flush(stream->wait_fd);
209 if (ret < 0) {
210 ERR("Failed to flush kernel stream");
211 goto end_unlock;
212 }
213 goto end_unlock;
214 }
215
216 ret = lttng_kconsumer_take_snapshot(stream);
217 if (ret < 0) {
218 ERR("Taking kernel snapshot");
219 goto end_unlock;
220 }
221
222 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
223 if (ret < 0) {
224 ERR("Produced kernel snapshot position");
225 goto end_unlock;
226 }
227
228 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
229 if (ret < 0) {
230 ERR("Consumerd kernel snapshot position");
231 goto end_unlock;
232 }
233
234 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
235 produced_pos, nb_packets_per_stream,
236 stream->max_sb_size);
237
238 while ((long) (consumed_pos - produced_pos) < 0) {
239 ssize_t read_len;
240 unsigned long len, padded_len;
241
242 health_code_update();
243
244 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
245
246 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
247 if (ret < 0) {
248 if (ret != -EAGAIN) {
249 PERROR("kernctl_get_subbuf snapshot");
250 goto end_unlock;
251 }
252 DBG("Kernel consumer get subbuf failed. Skipping it.");
253 consumed_pos += stream->max_sb_size;
254 stream->chan->lost_packets++;
255 continue;
256 }
257
258 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
259 if (ret < 0) {
260 ERR("Snapshot kernctl_get_subbuf_size");
261 goto error_put_subbuf;
262 }
263
264 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
265 if (ret < 0) {
266 ERR("Snapshot kernctl_get_padded_subbuf_size");
267 goto error_put_subbuf;
268 }
269
270 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len,
271 padded_len - len, NULL);
272 /*
273 * We write the padded len in local tracefiles but the data len
274 * when using a relay. Display the error but continue processing
275 * to try to release the subbuffer.
276 */
277 if (relayd_id != (uint64_t) -1ULL) {
278 if (read_len != len) {
279 ERR("Error sending to the relay (ret: %zd != len: %lu)",
280 read_len, len);
281 }
282 } else {
283 if (read_len != padded_len) {
284 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
285 read_len, padded_len);
286 }
287 }
288
289 ret = kernctl_put_subbuf(stream->wait_fd);
290 if (ret < 0) {
291 ERR("Snapshot kernctl_put_subbuf");
292 goto end_unlock;
293 }
294 consumed_pos += stream->max_sb_size;
295 }
296
297 if (relayd_id == (uint64_t) -1ULL) {
298 if (stream->out_fd >= 0) {
299 ret = close(stream->out_fd);
300 if (ret < 0) {
301 PERROR("Kernel consumer snapshot close out_fd");
302 goto end_unlock;
303 }
304 stream->out_fd = -1;
305 }
306 } else {
307 close_relayd_stream(stream);
308 stream->net_seq_idx = (uint64_t) -1ULL;
309 }
310 lttng_trace_chunk_put(stream->trace_chunk);
311 stream->trace_chunk = NULL;
312 pthread_mutex_unlock(&stream->lock);
313 }
314
315 /* All good! */
316 ret = 0;
317 goto end;
318
319 error_put_subbuf:
320 ret = kernctl_put_subbuf(stream->wait_fd);
321 if (ret < 0) {
322 ERR("Snapshot kernctl_put_subbuf error path");
323 }
324 end_unlock:
325 pthread_mutex_unlock(&stream->lock);
326 end:
327 rcu_read_unlock();
328 return ret;
329 }
330
331 /*
332 * Read the whole metadata available for a snapshot.
333 * RCU read-side lock must be held across this function to ensure existence of
334 * metadata_channel. The channel lock must be held by the caller.
335 *
336 * Returns 0 on success, < 0 on error
337 */
338 static int lttng_kconsumer_snapshot_metadata(
339 struct lttng_consumer_channel *metadata_channel,
340 uint64_t key, char *path, uint64_t relayd_id,
341 struct lttng_consumer_local_data *ctx)
342 {
343 int ret, use_relayd = 0;
344 ssize_t ret_read;
345 struct lttng_consumer_stream *metadata_stream;
346
347 assert(ctx);
348
349 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
350 key, path);
351
352 rcu_read_lock();
353
354 metadata_stream = metadata_channel->metadata_stream;
355 assert(metadata_stream);
356
357 pthread_mutex_lock(&metadata_stream->lock);
358 assert(metadata_channel->trace_chunk);
359 assert(metadata_stream->trace_chunk);
360
361 /* Flag once that we have a valid relayd for the stream. */
362 if (relayd_id != (uint64_t) -1ULL) {
363 use_relayd = 1;
364 }
365
366 if (use_relayd) {
367 ret = consumer_send_relayd_stream(metadata_stream, path);
368 if (ret < 0) {
369 goto error_snapshot;
370 }
371 } else {
372 ret = consumer_stream_create_output_files(metadata_stream,
373 false);
374 if (ret < 0) {
375 goto error_snapshot;
376 }
377 }
378
379 do {
380 health_code_update();
381
382 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
383 if (ret_read < 0) {
384 if (ret_read != -EAGAIN) {
385 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
386 ret_read);
387 ret = ret_read;
388 goto error_snapshot;
389 }
390 /* ret_read is negative at this point so we will exit the loop. */
391 continue;
392 }
393 } while (ret_read >= 0);
394
395 if (use_relayd) {
396 close_relayd_stream(metadata_stream);
397 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
398 } else {
399 if (metadata_stream->out_fd >= 0) {
400 ret = close(metadata_stream->out_fd);
401 if (ret < 0) {
402 PERROR("Kernel consumer snapshot metadata close out_fd");
403 /*
404 * Don't go on error here since the snapshot was successful at this
405 * point but somehow the close failed.
406 */
407 }
408 metadata_stream->out_fd = -1;
409 lttng_trace_chunk_put(metadata_stream->trace_chunk);
410 metadata_stream->trace_chunk = NULL;
411 }
412 }
413
414 ret = 0;
415 error_snapshot:
416 pthread_mutex_unlock(&metadata_stream->lock);
417 cds_list_del(&metadata_stream->send_node);
418 consumer_stream_destroy(metadata_stream, NULL);
419 metadata_channel->metadata_stream = NULL;
420 rcu_read_unlock();
421 return ret;
422 }
423
424 /*
425 * Receive command from session daemon and process it.
426 *
427 * Return 1 on success else a negative value or 0.
428 */
429 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
430 int sock, struct pollfd *consumer_sockpoll)
431 {
432 ssize_t ret;
433 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
434 struct lttcomm_consumer_msg msg;
435
436 health_code_update();
437
438 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
439 if (ret != sizeof(msg)) {
440 if (ret > 0) {
441 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
442 ret = -1;
443 }
444 return ret;
445 }
446
447 health_code_update();
448
449 /* Deprecated command */
450 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
451
452 health_code_update();
453
454 /* relayd needs RCU read-side protection */
455 rcu_read_lock();
456
457 switch (msg.cmd_type) {
458 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
459 {
460 /* Session daemon status message are handled in the following call. */
461 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
462 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
463 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
464 msg.u.relayd_sock.relayd_session_id);
465 goto end_nosignal;
466 }
467 case LTTNG_CONSUMER_ADD_CHANNEL:
468 {
469 struct lttng_consumer_channel *new_channel;
470 int ret_recv;
471 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
472
473 health_code_update();
474
475 /* First send a status message before receiving the fds. */
476 ret = consumer_send_status_msg(sock, ret_code);
477 if (ret < 0) {
478 /* Somehow, the session daemon is not responding anymore. */
479 goto error_fatal;
480 }
481
482 health_code_update();
483
484 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
485 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
486 msg.u.channel.session_id,
487 msg.u.channel.chunk_id.is_set ?
488 &chunk_id : NULL,
489 msg.u.channel.pathname,
490 msg.u.channel.name,
491 msg.u.channel.relayd_id, msg.u.channel.output,
492 msg.u.channel.tracefile_size,
493 msg.u.channel.tracefile_count, 0,
494 msg.u.channel.monitor,
495 msg.u.channel.live_timer_interval,
496 NULL, NULL);
497 if (new_channel == NULL) {
498 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
499 goto end_nosignal;
500 }
501 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
502 switch (msg.u.channel.output) {
503 case LTTNG_EVENT_SPLICE:
504 new_channel->output = CONSUMER_CHANNEL_SPLICE;
505 break;
506 case LTTNG_EVENT_MMAP:
507 new_channel->output = CONSUMER_CHANNEL_MMAP;
508 break;
509 default:
510 ERR("Channel output unknown %d", msg.u.channel.output);
511 goto end_nosignal;
512 }
513
514 /* Translate and save channel type. */
515 switch (msg.u.channel.type) {
516 case CONSUMER_CHANNEL_TYPE_DATA:
517 case CONSUMER_CHANNEL_TYPE_METADATA:
518 new_channel->type = msg.u.channel.type;
519 break;
520 default:
521 assert(0);
522 goto end_nosignal;
523 };
524
525 health_code_update();
526
527 if (ctx->on_recv_channel != NULL) {
528 ret_recv = ctx->on_recv_channel(new_channel);
529 if (ret_recv == 0) {
530 ret = consumer_add_channel(new_channel, ctx);
531 } else if (ret_recv < 0) {
532 goto end_nosignal;
533 }
534 } else {
535 ret = consumer_add_channel(new_channel, ctx);
536 }
537 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
538 int monitor_start_ret;
539
540 DBG("Consumer starting monitor timer");
541 consumer_timer_live_start(new_channel,
542 msg.u.channel.live_timer_interval);
543 monitor_start_ret = consumer_timer_monitor_start(
544 new_channel,
545 msg.u.channel.monitor_timer_interval);
546 if (monitor_start_ret < 0) {
547 ERR("Starting channel monitoring timer failed");
548 goto end_nosignal;
549 }
550
551 }
552
553 health_code_update();
554
555 /* If we received an error in add_channel, we need to report it. */
556 if (ret < 0) {
557 ret = consumer_send_status_msg(sock, ret);
558 if (ret < 0) {
559 goto error_fatal;
560 }
561 goto end_nosignal;
562 }
563
564 goto end_nosignal;
565 }
566 case LTTNG_CONSUMER_ADD_STREAM:
567 {
568 int fd;
569 struct lttng_pipe *stream_pipe;
570 struct lttng_consumer_stream *new_stream;
571 struct lttng_consumer_channel *channel;
572 int alloc_ret = 0;
573
574 /*
575 * Get stream's channel reference. Needed when adding the stream to the
576 * global hash table.
577 */
578 channel = consumer_find_channel(msg.u.stream.channel_key);
579 if (!channel) {
580 /*
581 * We could not find the channel. Can happen if cpu hotplug
582 * happens while tearing down.
583 */
584 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
585 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
586 }
587
588 health_code_update();
589
590 /* First send a status message before receiving the fds. */
591 ret = consumer_send_status_msg(sock, ret_code);
592 if (ret < 0) {
593 /* Somehow, the session daemon is not responding anymore. */
594 goto error_add_stream_fatal;
595 }
596
597 health_code_update();
598
599 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
600 /* Channel was not found. */
601 goto error_add_stream_nosignal;
602 }
603
604 /* Blocking call */
605 health_poll_entry();
606 ret = lttng_consumer_poll_socket(consumer_sockpoll);
607 health_poll_exit();
608 if (ret) {
609 goto error_add_stream_fatal;
610 }
611
612 health_code_update();
613
614 /* Get stream file descriptor from socket */
615 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
616 if (ret != sizeof(fd)) {
617 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
618 goto end;
619 }
620
621 health_code_update();
622
623 /*
624 * Send status code to session daemon only if the recv works. If the
625 * above recv() failed, the session daemon is notified through the
626 * error socket and the teardown is eventually done.
627 */
628 ret = consumer_send_status_msg(sock, ret_code);
629 if (ret < 0) {
630 /* Somehow, the session daemon is not responding anymore. */
631 goto error_add_stream_nosignal;
632 }
633
634 health_code_update();
635
636 pthread_mutex_lock(&channel->lock);
637 new_stream = consumer_allocate_stream(channel->key,
638 fd,
639 channel->name,
640 channel->relayd_id,
641 channel->session_id,
642 channel->trace_chunk,
643 msg.u.stream.cpu,
644 &alloc_ret,
645 channel->type,
646 channel->monitor);
647 if (new_stream == NULL) {
648 switch (alloc_ret) {
649 case -ENOMEM:
650 case -EINVAL:
651 default:
652 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
653 break;
654 }
655 pthread_mutex_unlock(&channel->lock);
656 goto error_add_stream_nosignal;
657 }
658
659 new_stream->chan = channel;
660 new_stream->wait_fd = fd;
661 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
662 &new_stream->max_sb_size);
663 if (ret < 0) {
664 pthread_mutex_unlock(&channel->lock);
665 ERR("Failed to get kernel maximal subbuffer size");
666 goto error_add_stream_nosignal;
667 }
668
669 consumer_stream_update_channel_attributes(new_stream,
670 channel);
671 switch (channel->output) {
672 case CONSUMER_CHANNEL_SPLICE:
673 new_stream->output = LTTNG_EVENT_SPLICE;
674 ret = utils_create_pipe(new_stream->splice_pipe);
675 if (ret < 0) {
676 pthread_mutex_unlock(&channel->lock);
677 goto error_add_stream_nosignal;
678 }
679 break;
680 case CONSUMER_CHANNEL_MMAP:
681 new_stream->output = LTTNG_EVENT_MMAP;
682 break;
683 default:
684 ERR("Stream output unknown %d", channel->output);
685 pthread_mutex_unlock(&channel->lock);
686 goto error_add_stream_nosignal;
687 }
688
689 /*
690 * We've just assigned the channel to the stream so increment the
691 * refcount right now. We don't need to increment the refcount for
692 * streams in no monitor because we handle manually the cleanup of
693 * those. It is very important to make sure there is NO prior
694 * consumer_del_stream() calls or else the refcount will be unbalanced.
695 */
696 if (channel->monitor) {
697 uatomic_inc(&new_stream->chan->refcount);
698 }
699
700 /*
701 * The buffer flush is done on the session daemon side for the kernel
702 * so no need for the stream "hangup_flush_done" variable to be
703 * tracked. This is important for a kernel stream since we don't rely
704 * on the flush state of the stream to read data. It's not the case for
705 * user space tracing.
706 */
707 new_stream->hangup_flush_done = 0;
708
709 health_code_update();
710
711 pthread_mutex_lock(&new_stream->lock);
712 if (ctx->on_recv_stream) {
713 ret = ctx->on_recv_stream(new_stream);
714 if (ret < 0) {
715 pthread_mutex_unlock(&new_stream->lock);
716 pthread_mutex_unlock(&channel->lock);
717 consumer_stream_free(new_stream);
718 goto error_add_stream_nosignal;
719 }
720 }
721 health_code_update();
722
723 if (new_stream->metadata_flag) {
724 channel->metadata_stream = new_stream;
725 }
726
727 /* Do not monitor this stream. */
728 if (!channel->monitor) {
729 DBG("Kernel consumer add stream %s in no monitor mode with "
730 "relayd id %" PRIu64, new_stream->name,
731 new_stream->net_seq_idx);
732 cds_list_add(&new_stream->send_node, &channel->streams.head);
733 pthread_mutex_unlock(&new_stream->lock);
734 pthread_mutex_unlock(&channel->lock);
735 goto end_add_stream;
736 }
737
738 /* Send stream to relayd if the stream has an ID. */
739 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
740 ret = consumer_send_relayd_stream(new_stream,
741 new_stream->chan->pathname);
742 if (ret < 0) {
743 pthread_mutex_unlock(&new_stream->lock);
744 pthread_mutex_unlock(&channel->lock);
745 consumer_stream_free(new_stream);
746 goto error_add_stream_nosignal;
747 }
748
749 /*
750 * If adding an extra stream to an already
751 * existing channel (e.g. cpu hotplug), we need
752 * to send the "streams_sent" command to relayd.
753 */
754 if (channel->streams_sent_to_relayd) {
755 ret = consumer_send_relayd_streams_sent(
756 new_stream->net_seq_idx);
757 if (ret < 0) {
758 pthread_mutex_unlock(&new_stream->lock);
759 pthread_mutex_unlock(&channel->lock);
760 goto error_add_stream_nosignal;
761 }
762 }
763 }
764 pthread_mutex_unlock(&new_stream->lock);
765 pthread_mutex_unlock(&channel->lock);
766
767 /* Get the right pipe where the stream will be sent. */
768 if (new_stream->metadata_flag) {
769 consumer_add_metadata_stream(new_stream);
770 stream_pipe = ctx->consumer_metadata_pipe;
771 } else {
772 consumer_add_data_stream(new_stream);
773 stream_pipe = ctx->consumer_data_pipe;
774 }
775
776 /* Visible to other threads */
777 new_stream->globally_visible = 1;
778
779 health_code_update();
780
781 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
782 if (ret < 0) {
783 ERR("Consumer write %s stream to pipe %d",
784 new_stream->metadata_flag ? "metadata" : "data",
785 lttng_pipe_get_writefd(stream_pipe));
786 if (new_stream->metadata_flag) {
787 consumer_del_stream_for_metadata(new_stream);
788 } else {
789 consumer_del_stream_for_data(new_stream);
790 }
791 goto error_add_stream_nosignal;
792 }
793
794 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
795 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
796 end_add_stream:
797 break;
798 error_add_stream_nosignal:
799 goto end_nosignal;
800 error_add_stream_fatal:
801 goto error_fatal;
802 }
803 case LTTNG_CONSUMER_STREAMS_SENT:
804 {
805 struct lttng_consumer_channel *channel;
806
807 /*
808 * Get stream's channel reference. Needed when adding the stream to the
809 * global hash table.
810 */
811 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
812 if (!channel) {
813 /*
814 * We could not find the channel. Can happen if cpu hotplug
815 * happens while tearing down.
816 */
817 ERR("Unable to find channel key %" PRIu64,
818 msg.u.sent_streams.channel_key);
819 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
820 }
821
822 health_code_update();
823
824 /*
825 * Send status code to session daemon.
826 */
827 ret = consumer_send_status_msg(sock, ret_code);
828 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
829 /* Somehow, the session daemon is not responding anymore. */
830 goto error_streams_sent_nosignal;
831 }
832
833 health_code_update();
834
835 /*
836 * We should not send this message if we don't monitor the
837 * streams in this channel.
838 */
839 if (!channel->monitor) {
840 goto end_error_streams_sent;
841 }
842
843 health_code_update();
844 /* Send stream to relayd if the stream has an ID. */
845 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
846 ret = consumer_send_relayd_streams_sent(
847 msg.u.sent_streams.net_seq_idx);
848 if (ret < 0) {
849 goto error_streams_sent_nosignal;
850 }
851 channel->streams_sent_to_relayd = true;
852 }
853 end_error_streams_sent:
854 break;
855 error_streams_sent_nosignal:
856 goto end_nosignal;
857 }
858 case LTTNG_CONSUMER_UPDATE_STREAM:
859 {
860 rcu_read_unlock();
861 return -ENOSYS;
862 }
863 case LTTNG_CONSUMER_DESTROY_RELAYD:
864 {
865 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
866 struct consumer_relayd_sock_pair *relayd;
867
868 DBG("Kernel consumer destroying relayd %" PRIu64, index);
869
870 /* Get relayd reference if exists. */
871 relayd = consumer_find_relayd(index);
872 if (relayd == NULL) {
873 DBG("Unable to find relayd %" PRIu64, index);
874 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
875 }
876
877 /*
878 * Each relayd socket pair has a refcount of stream attached to it
879 * which tells if the relayd is still active or not depending on the
880 * refcount value.
881 *
882 * This will set the destroy flag of the relayd object and destroy it
883 * if the refcount reaches zero when called.
884 *
885 * The destroy can happen either here or when a stream fd hangs up.
886 */
887 if (relayd) {
888 consumer_flag_relayd_for_destroy(relayd);
889 }
890
891 health_code_update();
892
893 ret = consumer_send_status_msg(sock, ret_code);
894 if (ret < 0) {
895 /* Somehow, the session daemon is not responding anymore. */
896 goto error_fatal;
897 }
898
899 goto end_nosignal;
900 }
901 case LTTNG_CONSUMER_DATA_PENDING:
902 {
903 int32_t ret;
904 uint64_t id = msg.u.data_pending.session_id;
905
906 DBG("Kernel consumer data pending command for id %" PRIu64, id);
907
908 ret = consumer_data_pending(id);
909
910 health_code_update();
911
912 /* Send back returned value to session daemon */
913 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
914 if (ret < 0) {
915 PERROR("send data pending ret code");
916 goto error_fatal;
917 }
918
919 /*
920 * No need to send back a status message since the data pending
921 * returned value is the response.
922 */
923 break;
924 }
925 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
926 {
927 struct lttng_consumer_channel *channel;
928 uint64_t key = msg.u.snapshot_channel.key;
929
930 channel = consumer_find_channel(key);
931 if (!channel) {
932 ERR("Channel %" PRIu64 " not found", key);
933 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
934 } else {
935 pthread_mutex_lock(&channel->lock);
936 if (msg.u.snapshot_channel.metadata == 1) {
937 ret = lttng_kconsumer_snapshot_metadata(channel, key,
938 msg.u.snapshot_channel.pathname,
939 msg.u.snapshot_channel.relayd_id, ctx);
940 if (ret < 0) {
941 ERR("Snapshot metadata failed");
942 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
943 }
944 } else {
945 ret = lttng_kconsumer_snapshot_channel(channel, key,
946 msg.u.snapshot_channel.pathname,
947 msg.u.snapshot_channel.relayd_id,
948 msg.u.snapshot_channel.nb_packets_per_stream,
949 ctx);
950 if (ret < 0) {
951 ERR("Snapshot channel failed");
952 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
953 }
954 }
955 pthread_mutex_unlock(&channel->lock);
956 }
957 health_code_update();
958
959 ret = consumer_send_status_msg(sock, ret_code);
960 if (ret < 0) {
961 /* Somehow, the session daemon is not responding anymore. */
962 goto end_nosignal;
963 }
964 break;
965 }
966 case LTTNG_CONSUMER_DESTROY_CHANNEL:
967 {
968 uint64_t key = msg.u.destroy_channel.key;
969 struct lttng_consumer_channel *channel;
970
971 channel = consumer_find_channel(key);
972 if (!channel) {
973 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
974 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
975 }
976
977 health_code_update();
978
979 ret = consumer_send_status_msg(sock, ret_code);
980 if (ret < 0) {
981 /* Somehow, the session daemon is not responding anymore. */
982 goto end_destroy_channel;
983 }
984
985 health_code_update();
986
987 /* Stop right now if no channel was found. */
988 if (!channel) {
989 goto end_destroy_channel;
990 }
991
992 /*
993 * This command should ONLY be issued for channel with streams set in
994 * no monitor mode.
995 */
996 assert(!channel->monitor);
997
998 /*
999 * The refcount should ALWAYS be 0 in the case of a channel in no
1000 * monitor mode.
1001 */
1002 assert(!uatomic_sub_return(&channel->refcount, 1));
1003
1004 consumer_del_channel(channel);
1005 end_destroy_channel:
1006 goto end_nosignal;
1007 }
1008 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1009 {
1010 ssize_t ret;
1011 uint64_t count;
1012 struct lttng_consumer_channel *channel;
1013 uint64_t id = msg.u.discarded_events.session_id;
1014 uint64_t key = msg.u.discarded_events.channel_key;
1015
1016 DBG("Kernel consumer discarded events command for session id %"
1017 PRIu64 ", channel key %" PRIu64, id, key);
1018
1019 channel = consumer_find_channel(key);
1020 if (!channel) {
1021 ERR("Kernel consumer discarded events channel %"
1022 PRIu64 " not found", key);
1023 count = 0;
1024 } else {
1025 count = channel->discarded_events;
1026 }
1027
1028 health_code_update();
1029
1030 /* Send back returned value to session daemon */
1031 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1032 if (ret < 0) {
1033 PERROR("send discarded events");
1034 goto error_fatal;
1035 }
1036
1037 break;
1038 }
1039 case LTTNG_CONSUMER_LOST_PACKETS:
1040 {
1041 ssize_t ret;
1042 uint64_t count;
1043 struct lttng_consumer_channel *channel;
1044 uint64_t id = msg.u.lost_packets.session_id;
1045 uint64_t key = msg.u.lost_packets.channel_key;
1046
1047 DBG("Kernel consumer lost packets command for session id %"
1048 PRIu64 ", channel key %" PRIu64, id, key);
1049
1050 channel = consumer_find_channel(key);
1051 if (!channel) {
1052 ERR("Kernel consumer lost packets channel %"
1053 PRIu64 " not found", key);
1054 count = 0;
1055 } else {
1056 count = channel->lost_packets;
1057 }
1058
1059 health_code_update();
1060
1061 /* Send back returned value to session daemon */
1062 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1063 if (ret < 0) {
1064 PERROR("send lost packets");
1065 goto error_fatal;
1066 }
1067
1068 break;
1069 }
1070 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1071 {
1072 int channel_monitor_pipe;
1073
1074 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1075 /* Successfully received the command's type. */
1076 ret = consumer_send_status_msg(sock, ret_code);
1077 if (ret < 0) {
1078 goto error_fatal;
1079 }
1080
1081 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1082 1);
1083 if (ret != sizeof(channel_monitor_pipe)) {
1084 ERR("Failed to receive channel monitor pipe");
1085 goto error_fatal;
1086 }
1087
1088 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1089 ret = consumer_timer_thread_set_channel_monitor_pipe(
1090 channel_monitor_pipe);
1091 if (!ret) {
1092 int flags;
1093
1094 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1095 /* Set the pipe as non-blocking. */
1096 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1097 if (ret == -1) {
1098 PERROR("fcntl get flags of the channel monitoring pipe");
1099 goto error_fatal;
1100 }
1101 flags = ret;
1102
1103 ret = fcntl(channel_monitor_pipe, F_SETFL,
1104 flags | O_NONBLOCK);
1105 if (ret == -1) {
1106 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1107 goto error_fatal;
1108 }
1109 DBG("Channel monitor pipe set as non-blocking");
1110 } else {
1111 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1112 }
1113 ret = consumer_send_status_msg(sock, ret_code);
1114 if (ret < 0) {
1115 goto error_fatal;
1116 }
1117 break;
1118 }
1119 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1120 {
1121 struct lttng_consumer_channel *channel;
1122 uint64_t key = msg.u.rotate_channel.key;
1123
1124 DBG("Consumer rotate channel %" PRIu64, key);
1125
1126 channel = consumer_find_channel(key);
1127 if (!channel) {
1128 ERR("Channel %" PRIu64 " not found", key);
1129 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1130 } else {
1131 /*
1132 * Sample the rotate position of all the streams in this channel.
1133 */
1134 ret = lttng_consumer_rotate_channel(channel, key,
1135 msg.u.rotate_channel.relayd_id,
1136 msg.u.rotate_channel.metadata,
1137 ctx);
1138 if (ret < 0) {
1139 ERR("Rotate channel failed");
1140 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1141 }
1142
1143 health_code_update();
1144 }
1145 ret = consumer_send_status_msg(sock, ret_code);
1146 if (ret < 0) {
1147 /* Somehow, the session daemon is not responding anymore. */
1148 goto error_rotate_channel;
1149 }
1150 if (channel) {
1151 /* Rotate the streams that are ready right now. */
1152 ret = lttng_consumer_rotate_ready_streams(
1153 channel, key, ctx);
1154 if (ret < 0) {
1155 ERR("Rotate ready streams failed");
1156 }
1157 }
1158 break;
1159 error_rotate_channel:
1160 goto end_nosignal;
1161 }
1162 case LTTNG_CONSUMER_INIT:
1163 {
1164 ret_code = lttng_consumer_init_command(ctx,
1165 msg.u.init.sessiond_uuid);
1166 health_code_update();
1167 ret = consumer_send_status_msg(sock, ret_code);
1168 if (ret < 0) {
1169 /* Somehow, the session daemon is not responding anymore. */
1170 goto end_nosignal;
1171 }
1172 break;
1173 }
1174 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1175 {
1176 const struct lttng_credentials credentials = {
1177 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1178 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1179 };
1180 const bool is_local_trace =
1181 !msg.u.create_trace_chunk.relayd_id.is_set;
1182 const uint64_t relayd_id =
1183 msg.u.create_trace_chunk.relayd_id.value;
1184 const char *chunk_override_name =
1185 *msg.u.create_trace_chunk.override_name ?
1186 msg.u.create_trace_chunk.override_name :
1187 NULL;
1188 struct lttng_directory_handle *chunk_directory_handle = NULL;
1189
1190 /*
1191 * The session daemon will only provide a chunk directory file
1192 * descriptor for local traces.
1193 */
1194 if (is_local_trace) {
1195 int chunk_dirfd;
1196
1197 /* Acnowledge the reception of the command. */
1198 ret = consumer_send_status_msg(sock,
1199 LTTCOMM_CONSUMERD_SUCCESS);
1200 if (ret < 0) {
1201 /* Somehow, the session daemon is not responding anymore. */
1202 goto end_nosignal;
1203 }
1204
1205 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1206 if (ret != sizeof(chunk_dirfd)) {
1207 ERR("Failed to receive trace chunk directory file descriptor");
1208 goto error_fatal;
1209 }
1210
1211 DBG("Received trace chunk directory fd (%d)",
1212 chunk_dirfd);
1213 chunk_directory_handle = lttng_directory_handle_create_from_dirfd(
1214 chunk_dirfd);
1215 if (!chunk_directory_handle) {
1216 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1217 if (close(chunk_dirfd)) {
1218 PERROR("Failed to close chunk directory file descriptor");
1219 }
1220 goto error_fatal;
1221 }
1222 }
1223
1224 ret_code = lttng_consumer_create_trace_chunk(
1225 !is_local_trace ? &relayd_id : NULL,
1226 msg.u.create_trace_chunk.session_id,
1227 msg.u.create_trace_chunk.chunk_id,
1228 (time_t) msg.u.create_trace_chunk
1229 .creation_timestamp,
1230 chunk_override_name,
1231 msg.u.create_trace_chunk.credentials.is_set ?
1232 &credentials :
1233 NULL,
1234 chunk_directory_handle);
1235 lttng_directory_handle_put(chunk_directory_handle);
1236 goto end_msg_sessiond;
1237 }
1238 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1239 {
1240 enum lttng_trace_chunk_command_type close_command =
1241 msg.u.close_trace_chunk.close_command.value;
1242 const uint64_t relayd_id =
1243 msg.u.close_trace_chunk.relayd_id.value;
1244 struct lttcomm_consumer_close_trace_chunk_reply reply;
1245 char path[LTTNG_PATH_MAX];
1246
1247 ret_code = lttng_consumer_close_trace_chunk(
1248 msg.u.close_trace_chunk.relayd_id.is_set ?
1249 &relayd_id :
1250 NULL,
1251 msg.u.close_trace_chunk.session_id,
1252 msg.u.close_trace_chunk.chunk_id,
1253 (time_t) msg.u.close_trace_chunk.close_timestamp,
1254 msg.u.close_trace_chunk.close_command.is_set ?
1255 &close_command :
1256 NULL, path);
1257 reply.ret_code = ret_code;
1258 reply.path_length = strlen(path) + 1;
1259 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1260 if (ret != sizeof(reply)) {
1261 goto error_fatal;
1262 }
1263 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1264 if (ret != reply.path_length) {
1265 goto error_fatal;
1266 }
1267 goto end_nosignal;
1268 }
1269 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1270 {
1271 const uint64_t relayd_id =
1272 msg.u.trace_chunk_exists.relayd_id.value;
1273
1274 ret_code = lttng_consumer_trace_chunk_exists(
1275 msg.u.trace_chunk_exists.relayd_id.is_set ?
1276 &relayd_id : NULL,
1277 msg.u.trace_chunk_exists.session_id,
1278 msg.u.trace_chunk_exists.chunk_id);
1279 goto end_msg_sessiond;
1280 }
1281 default:
1282 goto end_nosignal;
1283 }
1284
1285 end_nosignal:
1286 /*
1287 * Return 1 to indicate success since the 0 value can be a socket
1288 * shutdown during the recv() or send() call.
1289 */
1290 ret = 1;
1291 goto end;
1292 error_fatal:
1293 /* This will issue a consumer stop. */
1294 ret = -1;
1295 goto end;
1296 end_msg_sessiond:
1297 /*
1298 * The returned value here is not useful since either way we'll return 1 to
1299 * the caller because the session daemon socket management is done
1300 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1301 */
1302 ret = consumer_send_status_msg(sock, ret_code);
1303 if (ret < 0) {
1304 goto error_fatal;
1305 }
1306 ret = 1;
1307 end:
1308 health_code_update();
1309 rcu_read_unlock();
1310 return ret;
1311 }
1312
1313 /*
1314 * Populate index values of a kernel stream. Values are set in big endian order.
1315 *
1316 * Return 0 on success or else a negative value.
1317 */
1318 static int get_index_values(struct ctf_packet_index *index, int infd)
1319 {
1320 int ret;
1321 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1322 events_discarded, stream_id, stream_instance_id,
1323 packet_seq_num;
1324
1325 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1326 if (ret < 0) {
1327 PERROR("kernctl_get_timestamp_begin");
1328 goto error;
1329 }
1330
1331 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1332 if (ret < 0) {
1333 PERROR("kernctl_get_timestamp_end");
1334 goto error;
1335 }
1336
1337 ret = kernctl_get_events_discarded(infd, &events_discarded);
1338 if (ret < 0) {
1339 PERROR("kernctl_get_events_discarded");
1340 goto error;
1341 }
1342
1343 ret = kernctl_get_content_size(infd, &content_size);
1344 if (ret < 0) {
1345 PERROR("kernctl_get_content_size");
1346 goto error;
1347 }
1348
1349 ret = kernctl_get_packet_size(infd, &packet_size);
1350 if (ret < 0) {
1351 PERROR("kernctl_get_packet_size");
1352 goto error;
1353 }
1354
1355 ret = kernctl_get_stream_id(infd, &stream_id);
1356 if (ret < 0) {
1357 PERROR("kernctl_get_stream_id");
1358 goto error;
1359 }
1360
1361 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1362 if (ret < 0) {
1363 if (ret == -ENOTTY) {
1364 /* Command not implemented by lttng-modules. */
1365 stream_instance_id = -1ULL;
1366 } else {
1367 PERROR("kernctl_get_instance_id");
1368 goto error;
1369 }
1370 }
1371
1372 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1373 if (ret < 0) {
1374 if (ret == -ENOTTY) {
1375 /* Command not implemented by lttng-modules. */
1376 packet_seq_num = -1ULL;
1377 ret = 0;
1378 } else {
1379 PERROR("kernctl_get_sequence_number");
1380 goto error;
1381 }
1382 }
1383 index->packet_seq_num = htobe64(index->packet_seq_num);
1384
1385 *index = (typeof(*index)) {
1386 .offset = index->offset,
1387 .packet_size = htobe64(packet_size),
1388 .content_size = htobe64(content_size),
1389 .timestamp_begin = htobe64(timestamp_begin),
1390 .timestamp_end = htobe64(timestamp_end),
1391 .events_discarded = htobe64(events_discarded),
1392 .stream_id = htobe64(stream_id),
1393 .stream_instance_id = htobe64(stream_instance_id),
1394 .packet_seq_num = htobe64(packet_seq_num),
1395 };
1396
1397 error:
1398 return ret;
1399 }
1400 /*
1401 * Sync metadata meaning request them to the session daemon and snapshot to the
1402 * metadata thread can consumer them.
1403 *
1404 * Metadata stream lock MUST be acquired.
1405 *
1406 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1407 * is empty or a negative value on error.
1408 */
1409 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1410 {
1411 int ret;
1412
1413 assert(metadata);
1414
1415 ret = kernctl_buffer_flush(metadata->wait_fd);
1416 if (ret < 0) {
1417 ERR("Failed to flush kernel stream");
1418 goto end;
1419 }
1420
1421 ret = kernctl_snapshot(metadata->wait_fd);
1422 if (ret < 0) {
1423 if (ret != -EAGAIN) {
1424 ERR("Sync metadata, taking kernel snapshot failed.");
1425 goto end;
1426 }
1427 DBG("Sync metadata, no new kernel metadata");
1428 /* No new metadata, exit. */
1429 ret = ENODATA;
1430 goto end;
1431 }
1432
1433 end:
1434 return ret;
1435 }
1436
1437 static
1438 int update_stream_stats(struct lttng_consumer_stream *stream)
1439 {
1440 int ret;
1441 uint64_t seq, discarded;
1442
1443 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1444 if (ret < 0) {
1445 if (ret == -ENOTTY) {
1446 /* Command not implemented by lttng-modules. */
1447 seq = -1ULL;
1448 } else {
1449 PERROR("kernctl_get_sequence_number");
1450 goto end;
1451 }
1452 }
1453
1454 /*
1455 * Start the sequence when we extract the first packet in case we don't
1456 * start at 0 (for example if a consumer is not connected to the
1457 * session immediately after the beginning).
1458 */
1459 if (stream->last_sequence_number == -1ULL) {
1460 stream->last_sequence_number = seq;
1461 } else if (seq > stream->last_sequence_number) {
1462 stream->chan->lost_packets += seq -
1463 stream->last_sequence_number - 1;
1464 } else {
1465 /* seq <= last_sequence_number */
1466 ERR("Sequence number inconsistent : prev = %" PRIu64
1467 ", current = %" PRIu64,
1468 stream->last_sequence_number, seq);
1469 ret = -1;
1470 goto end;
1471 }
1472 stream->last_sequence_number = seq;
1473
1474 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1475 if (ret < 0) {
1476 PERROR("kernctl_get_events_discarded");
1477 goto end;
1478 }
1479 if (discarded < stream->last_discarded_events) {
1480 /*
1481 * Overflow has occurred. We assume only one wrap-around
1482 * has occurred.
1483 */
1484 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1485 stream->last_discarded_events + discarded;
1486 } else {
1487 stream->chan->discarded_events += discarded -
1488 stream->last_discarded_events;
1489 }
1490 stream->last_discarded_events = discarded;
1491 ret = 0;
1492
1493 end:
1494 return ret;
1495 }
1496
1497 /*
1498 * Check if the local version of the metadata stream matches with the version
1499 * of the metadata stream in the kernel. If it was updated, set the reset flag
1500 * on the stream.
1501 */
1502 static
1503 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1504 {
1505 int ret;
1506 uint64_t cur_version;
1507
1508 ret = kernctl_get_metadata_version(infd, &cur_version);
1509 if (ret < 0) {
1510 if (ret == -ENOTTY) {
1511 /*
1512 * LTTng-modules does not implement this
1513 * command.
1514 */
1515 ret = 0;
1516 goto end;
1517 }
1518 ERR("Failed to get the metadata version");
1519 goto end;
1520 }
1521
1522 if (stream->metadata_version == cur_version) {
1523 ret = 0;
1524 goto end;
1525 }
1526
1527 DBG("New metadata version detected");
1528 stream->metadata_version = cur_version;
1529 stream->reset_metadata_flag = 1;
1530 ret = 0;
1531
1532 end:
1533 return ret;
1534 }
1535
1536 /*
1537 * Consume data on a file descriptor and write it on a trace file.
1538 * The stream and channel locks must be held by the caller.
1539 */
1540 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1541 struct lttng_consumer_local_data *ctx)
1542 {
1543 unsigned long len, subbuf_size, padding;
1544 int err, write_index = 1, rotation_ret;
1545 ssize_t ret = 0;
1546 int infd = stream->wait_fd;
1547 struct ctf_packet_index index = {};
1548
1549 DBG("In read_subbuffer (infd : %d)", infd);
1550
1551 /*
1552 * If the stream was flagged to be ready for rotation before we extract the
1553 * next packet, rotate it now.
1554 */
1555 if (stream->rotate_ready) {
1556 DBG("Rotate stream before extracting data");
1557 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1558 if (rotation_ret < 0) {
1559 ERR("Stream rotation error");
1560 ret = -1;
1561 goto error;
1562 }
1563 }
1564
1565 /* Get the next subbuffer */
1566 err = kernctl_get_next_subbuf(infd);
1567 if (err != 0) {
1568 /*
1569 * This is a debug message even for single-threaded consumer,
1570 * because poll() have more relaxed criterions than get subbuf,
1571 * so get_subbuf may fail for short race windows where poll()
1572 * would issue wakeups.
1573 */
1574 DBG("Reserving sub buffer failed (everything is normal, "
1575 "it is due to concurrency)");
1576 ret = err;
1577 goto error;
1578 }
1579
1580 /* Get the full subbuffer size including padding */
1581 err = kernctl_get_padded_subbuf_size(infd, &len);
1582 if (err != 0) {
1583 PERROR("Getting sub-buffer len failed.");
1584 err = kernctl_put_subbuf(infd);
1585 if (err != 0) {
1586 if (err == -EFAULT) {
1587 PERROR("Error in unreserving sub buffer\n");
1588 } else if (err == -EIO) {
1589 /* Should never happen with newer LTTng versions */
1590 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1591 }
1592 ret = err;
1593 goto error;
1594 }
1595 ret = err;
1596 goto error;
1597 }
1598
1599 if (!stream->metadata_flag) {
1600 ret = get_index_values(&index, infd);
1601 if (ret < 0) {
1602 err = kernctl_put_subbuf(infd);
1603 if (err != 0) {
1604 if (err == -EFAULT) {
1605 PERROR("Error in unreserving sub buffer\n");
1606 } else if (err == -EIO) {
1607 /* Should never happen with newer LTTng versions */
1608 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1609 }
1610 ret = err;
1611 goto error;
1612 }
1613 goto error;
1614 }
1615 ret = update_stream_stats(stream);
1616 if (ret < 0) {
1617 err = kernctl_put_subbuf(infd);
1618 if (err != 0) {
1619 if (err == -EFAULT) {
1620 PERROR("Error in unreserving sub buffer\n");
1621 } else if (err == -EIO) {
1622 /* Should never happen with newer LTTng versions */
1623 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1624 }
1625 ret = err;
1626 goto error;
1627 }
1628 goto error;
1629 }
1630 } else {
1631 write_index = 0;
1632 ret = metadata_stream_check_version(infd, stream);
1633 if (ret < 0) {
1634 err = kernctl_put_subbuf(infd);
1635 if (err != 0) {
1636 if (err == -EFAULT) {
1637 PERROR("Error in unreserving sub buffer\n");
1638 } else if (err == -EIO) {
1639 /* Should never happen with newer LTTng versions */
1640 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1641 }
1642 ret = err;
1643 goto error;
1644 }
1645 goto error;
1646 }
1647 }
1648
1649 switch (stream->chan->output) {
1650 case CONSUMER_CHANNEL_SPLICE:
1651 /*
1652 * XXX: The lttng-modules splice "actor" does not handle copying
1653 * partial pages hence only using the subbuffer size without the
1654 * padding makes the splice fail.
1655 */
1656 subbuf_size = len;
1657 padding = 0;
1658
1659 /* splice the subbuffer to the tracefile */
1660 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1661 padding, &index);
1662 /*
1663 * XXX: Splice does not support network streaming so the return value
1664 * is simply checked against subbuf_size and not like the mmap() op.
1665 */
1666 if (ret != subbuf_size) {
1667 /*
1668 * display the error but continue processing to try
1669 * to release the subbuffer
1670 */
1671 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1672 ret, subbuf_size);
1673 write_index = 0;
1674 }
1675 break;
1676 case CONSUMER_CHANNEL_MMAP:
1677 /* Get subbuffer size without padding */
1678 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1679 if (err != 0) {
1680 PERROR("Getting sub-buffer len failed.");
1681 err = kernctl_put_subbuf(infd);
1682 if (err != 0) {
1683 if (err == -EFAULT) {
1684 PERROR("Error in unreserving sub buffer\n");
1685 } else if (err == -EIO) {
1686 /* Should never happen with newer LTTng versions */
1687 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1688 }
1689 ret = err;
1690 goto error;
1691 }
1692 ret = err;
1693 goto error;
1694 }
1695
1696 /* Make sure the tracer is not gone mad on us! */
1697 assert(len >= subbuf_size);
1698
1699 padding = len - subbuf_size;
1700
1701 /* write the subbuffer to the tracefile */
1702 ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, subbuf_size,
1703 padding, &index);
1704 /*
1705 * The mmap operation should write subbuf_size amount of data when
1706 * network streaming or the full padding (len) size when we are _not_
1707 * streaming.
1708 */
1709 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1710 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1711 /*
1712 * Display the error but continue processing to try to release the
1713 * subbuffer. This is a DBG statement since this is possible to
1714 * happen without being a critical error.
1715 */
1716 DBG("Error writing to tracefile "
1717 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1718 ret, len, subbuf_size);
1719 write_index = 0;
1720 }
1721 break;
1722 default:
1723 ERR("Unknown output method");
1724 ret = -EPERM;
1725 }
1726
1727 err = kernctl_put_next_subbuf(infd);
1728 if (err != 0) {
1729 if (err == -EFAULT) {
1730 PERROR("Error in unreserving sub buffer\n");
1731 } else if (err == -EIO) {
1732 /* Should never happen with newer LTTng versions */
1733 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1734 }
1735 ret = err;
1736 goto error;
1737 }
1738
1739 /* Write index if needed. */
1740 if (!write_index) {
1741 goto rotate;
1742 }
1743
1744 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1745 /*
1746 * In live, block until all the metadata is sent.
1747 */
1748 pthread_mutex_lock(&stream->metadata_timer_lock);
1749 assert(!stream->missed_metadata_flush);
1750 stream->waiting_on_metadata = true;
1751 pthread_mutex_unlock(&stream->metadata_timer_lock);
1752
1753 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1754
1755 pthread_mutex_lock(&stream->metadata_timer_lock);
1756 stream->waiting_on_metadata = false;
1757 if (stream->missed_metadata_flush) {
1758 stream->missed_metadata_flush = false;
1759 pthread_mutex_unlock(&stream->metadata_timer_lock);
1760 (void) consumer_flush_kernel_index(stream);
1761 } else {
1762 pthread_mutex_unlock(&stream->metadata_timer_lock);
1763 }
1764 if (err < 0) {
1765 goto error;
1766 }
1767 }
1768
1769 err = consumer_stream_write_index(stream, &index);
1770 if (err < 0) {
1771 goto error;
1772 }
1773
1774 rotate:
1775 /*
1776 * After extracting the packet, we check if the stream is now ready to be
1777 * rotated and perform the action immediately.
1778 */
1779 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1780 if (rotation_ret == 1) {
1781 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1782 if (rotation_ret < 0) {
1783 ERR("Stream rotation error");
1784 ret = -1;
1785 goto error;
1786 }
1787 } else if (rotation_ret < 0) {
1788 ERR("Checking if stream is ready to rotate");
1789 ret = -1;
1790 goto error;
1791 }
1792
1793 error:
1794 return ret;
1795 }
1796
1797 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1798 {
1799 int ret;
1800
1801 assert(stream);
1802
1803 /*
1804 * Don't create anything if this is set for streaming or if there is
1805 * no current trace chunk on the parent channel.
1806 */
1807 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1808 stream->chan->trace_chunk) {
1809 ret = consumer_stream_create_output_files(stream, true);
1810 if (ret) {
1811 goto error;
1812 }
1813 }
1814
1815 if (stream->output == LTTNG_EVENT_MMAP) {
1816 /* get the len of the mmap region */
1817 unsigned long mmap_len;
1818
1819 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1820 if (ret != 0) {
1821 PERROR("kernctl_get_mmap_len");
1822 goto error_close_fd;
1823 }
1824 stream->mmap_len = (size_t) mmap_len;
1825
1826 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1827 MAP_PRIVATE, stream->wait_fd, 0);
1828 if (stream->mmap_base == MAP_FAILED) {
1829 PERROR("Error mmaping");
1830 ret = -1;
1831 goto error_close_fd;
1832 }
1833 }
1834
1835 /* we return 0 to let the library handle the FD internally */
1836 return 0;
1837
1838 error_close_fd:
1839 if (stream->out_fd >= 0) {
1840 int err;
1841
1842 err = close(stream->out_fd);
1843 assert(!err);
1844 stream->out_fd = -1;
1845 }
1846 error:
1847 return ret;
1848 }
1849
1850 /*
1851 * Check if data is still being extracted from the buffers for a specific
1852 * stream. Consumer data lock MUST be acquired before calling this function
1853 * and the stream lock.
1854 *
1855 * Return 1 if the traced data are still getting read else 0 meaning that the
1856 * data is available for trace viewer reading.
1857 */
1858 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1859 {
1860 int ret;
1861
1862 assert(stream);
1863
1864 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1865 ret = 0;
1866 goto end;
1867 }
1868
1869 ret = kernctl_get_next_subbuf(stream->wait_fd);
1870 if (ret == 0) {
1871 /* There is still data so let's put back this subbuffer. */
1872 ret = kernctl_put_subbuf(stream->wait_fd);
1873 assert(ret == 0);
1874 ret = 1; /* Data is pending */
1875 goto end;
1876 }
1877
1878 /* Data is NOT pending and ready to be read. */
1879 ret = 0;
1880
1881 end:
1882 return ret;
1883 }
This page took 0.104169 seconds and 5 git commands to generate.