consumerd: move address computation from on_read_subbuffer_mmap
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-only
7 *
8 */
9
10 #include <stdint.h>
11 #define _LGPL_SOURCE
12 #include <assert.h>
13 #include <poll.h>
14 #include <pthread.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 #include <sys/socket.h>
19 #include <sys/types.h>
20 #include <inttypes.h>
21 #include <unistd.h>
22 #include <sys/stat.h>
23
24 #include <bin/lttng-consumerd/health-consumerd.h>
25 #include <common/common.h>
26 #include <common/kernel-ctl/kernel-ctl.h>
27 #include <common/sessiond-comm/sessiond-comm.h>
28 #include <common/sessiond-comm/relayd.h>
29 #include <common/compat/fcntl.h>
30 #include <common/compat/endian.h>
31 #include <common/pipe.h>
32 #include <common/relayd/relayd.h>
33 #include <common/utils.h>
34 #include <common/consumer/consumer-stream.h>
35 #include <common/index/index.h>
36 #include <common/consumer/consumer-timer.h>
37 #include <common/optional.h>
38
39 #include "kernel-consumer.h"
40
41 extern struct lttng_consumer_global_data consumer_data;
42 extern int consumer_poll_timeout;
43
44 /*
45 * Take a snapshot for a specific fd
46 *
47 * Returns 0 on success, < 0 on error
48 */
49 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
50 {
51 int ret = 0;
52 int infd = stream->wait_fd;
53
54 ret = kernctl_snapshot(infd);
55 /*
56 * -EAGAIN is not an error, it just means that there is no data to
57 * be read.
58 */
59 if (ret != 0 && ret != -EAGAIN) {
60 PERROR("Getting sub-buffer snapshot.");
61 }
62
63 return ret;
64 }
65
66 /*
67 * Sample consumed and produced positions for a specific fd.
68 *
69 * Returns 0 on success, < 0 on error.
70 */
71 int lttng_kconsumer_sample_snapshot_positions(
72 struct lttng_consumer_stream *stream)
73 {
74 assert(stream);
75
76 return kernctl_snapshot_sample_positions(stream->wait_fd);
77 }
78
79 /*
80 * Get the produced position
81 *
82 * Returns 0 on success, < 0 on error
83 */
84 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
85 unsigned long *pos)
86 {
87 int ret;
88 int infd = stream->wait_fd;
89
90 ret = kernctl_snapshot_get_produced(infd, pos);
91 if (ret != 0) {
92 PERROR("kernctl_snapshot_get_produced");
93 }
94
95 return ret;
96 }
97
98 /*
99 * Get the consumerd position
100 *
101 * Returns 0 on success, < 0 on error
102 */
103 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
104 unsigned long *pos)
105 {
106 int ret;
107 int infd = stream->wait_fd;
108
109 ret = kernctl_snapshot_get_consumed(infd, pos);
110 if (ret != 0) {
111 PERROR("kernctl_snapshot_get_consumed");
112 }
113
114 return ret;
115 }
116
117 static
118 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
119 const char **addr)
120 {
121 int ret;
122 unsigned long mmap_offset;
123 const char *mmap_base = stream->mmap_base;
124
125 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
126 if (ret < 0) {
127 PERROR("Failed to get mmap read offset");
128 goto error;
129 }
130
131 *addr = mmap_base + mmap_offset;
132 error:
133 return ret;
134 }
135
136 /*
137 * Take a snapshot of all the stream of a channel
138 * RCU read-side lock must be held across this function to ensure existence of
139 * channel. The channel lock must be held by the caller.
140 *
141 * Returns 0 on success, < 0 on error
142 */
143 static int lttng_kconsumer_snapshot_channel(
144 struct lttng_consumer_channel *channel,
145 uint64_t key, char *path, uint64_t relayd_id,
146 uint64_t nb_packets_per_stream,
147 struct lttng_consumer_local_data *ctx)
148 {
149 int ret;
150 struct lttng_consumer_stream *stream;
151
152 DBG("Kernel consumer snapshot channel %" PRIu64, key);
153
154 rcu_read_lock();
155
156 /* Splice is not supported yet for channel snapshot. */
157 if (channel->output != CONSUMER_CHANNEL_MMAP) {
158 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
159 channel->name);
160 ret = -1;
161 goto end;
162 }
163
164 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
165 unsigned long consumed_pos, produced_pos;
166
167 health_code_update();
168
169 /*
170 * Lock stream because we are about to change its state.
171 */
172 pthread_mutex_lock(&stream->lock);
173
174 assert(channel->trace_chunk);
175 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
176 /*
177 * Can't happen barring an internal error as the channel
178 * holds a reference to the trace chunk.
179 */
180 ERR("Failed to acquire reference to channel's trace chunk");
181 ret = -1;
182 goto end_unlock;
183 }
184 assert(!stream->trace_chunk);
185 stream->trace_chunk = channel->trace_chunk;
186
187 /*
188 * Assign the received relayd ID so we can use it for streaming. The streams
189 * are not visible to anyone so this is OK to change it.
190 */
191 stream->net_seq_idx = relayd_id;
192 channel->relayd_id = relayd_id;
193 if (relayd_id != (uint64_t) -1ULL) {
194 ret = consumer_send_relayd_stream(stream, path);
195 if (ret < 0) {
196 ERR("sending stream to relayd");
197 goto end_unlock;
198 }
199 } else {
200 ret = consumer_stream_create_output_files(stream,
201 false);
202 if (ret < 0) {
203 goto end_unlock;
204 }
205 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
206 stream->key);
207 }
208
209 ret = kernctl_buffer_flush_empty(stream->wait_fd);
210 if (ret < 0) {
211 /*
212 * Doing a buffer flush which does not take into
213 * account empty packets. This is not perfect
214 * for stream intersection, but required as a
215 * fall-back when "flush_empty" is not
216 * implemented by lttng-modules.
217 */
218 ret = kernctl_buffer_flush(stream->wait_fd);
219 if (ret < 0) {
220 ERR("Failed to flush kernel stream");
221 goto end_unlock;
222 }
223 goto end_unlock;
224 }
225
226 ret = lttng_kconsumer_take_snapshot(stream);
227 if (ret < 0) {
228 ERR("Taking kernel snapshot");
229 goto end_unlock;
230 }
231
232 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
233 if (ret < 0) {
234 ERR("Produced kernel snapshot position");
235 goto end_unlock;
236 }
237
238 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
239 if (ret < 0) {
240 ERR("Consumerd kernel snapshot position");
241 goto end_unlock;
242 }
243
244 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
245 produced_pos, nb_packets_per_stream,
246 stream->max_sb_size);
247
248 while ((long) (consumed_pos - produced_pos) < 0) {
249 ssize_t read_len;
250 unsigned long len, padded_len;
251 const char *subbuf_addr;
252
253 health_code_update();
254
255 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
256
257 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
258 if (ret < 0) {
259 if (ret != -EAGAIN) {
260 PERROR("kernctl_get_subbuf snapshot");
261 goto end_unlock;
262 }
263 DBG("Kernel consumer get subbuf failed. Skipping it.");
264 consumed_pos += stream->max_sb_size;
265 stream->chan->lost_packets++;
266 continue;
267 }
268
269 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
270 if (ret < 0) {
271 ERR("Snapshot kernctl_get_subbuf_size");
272 goto error_put_subbuf;
273 }
274
275 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
276 if (ret < 0) {
277 ERR("Snapshot kernctl_get_padded_subbuf_size");
278 goto error_put_subbuf;
279 }
280
281 ret = get_current_subbuf_addr(stream, &subbuf_addr);
282 if (ret) {
283 goto error_put_subbuf;
284 }
285
286 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx,
287 stream, subbuf_addr, len,
288 padded_len - len, NULL);
289 /*
290 * We write the padded len in local tracefiles but the data len
291 * when using a relay. Display the error but continue processing
292 * to try to release the subbuffer.
293 */
294 if (relayd_id != (uint64_t) -1ULL) {
295 if (read_len != len) {
296 ERR("Error sending to the relay (ret: %zd != len: %lu)",
297 read_len, len);
298 }
299 } else {
300 if (read_len != padded_len) {
301 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
302 read_len, padded_len);
303 }
304 }
305
306 ret = kernctl_put_subbuf(stream->wait_fd);
307 if (ret < 0) {
308 ERR("Snapshot kernctl_put_subbuf");
309 goto end_unlock;
310 }
311 consumed_pos += stream->max_sb_size;
312 }
313
314 if (relayd_id == (uint64_t) -1ULL) {
315 if (stream->out_fd >= 0) {
316 ret = close(stream->out_fd);
317 if (ret < 0) {
318 PERROR("Kernel consumer snapshot close out_fd");
319 goto end_unlock;
320 }
321 stream->out_fd = -1;
322 }
323 } else {
324 close_relayd_stream(stream);
325 stream->net_seq_idx = (uint64_t) -1ULL;
326 }
327 lttng_trace_chunk_put(stream->trace_chunk);
328 stream->trace_chunk = NULL;
329 pthread_mutex_unlock(&stream->lock);
330 }
331
332 /* All good! */
333 ret = 0;
334 goto end;
335
336 error_put_subbuf:
337 ret = kernctl_put_subbuf(stream->wait_fd);
338 if (ret < 0) {
339 ERR("Snapshot kernctl_put_subbuf error path");
340 }
341 end_unlock:
342 pthread_mutex_unlock(&stream->lock);
343 end:
344 rcu_read_unlock();
345 return ret;
346 }
347
348 /*
349 * Read the whole metadata available for a snapshot.
350 * RCU read-side lock must be held across this function to ensure existence of
351 * metadata_channel. The channel lock must be held by the caller.
352 *
353 * Returns 0 on success, < 0 on error
354 */
355 static int lttng_kconsumer_snapshot_metadata(
356 struct lttng_consumer_channel *metadata_channel,
357 uint64_t key, char *path, uint64_t relayd_id,
358 struct lttng_consumer_local_data *ctx)
359 {
360 int ret, use_relayd = 0;
361 ssize_t ret_read;
362 struct lttng_consumer_stream *metadata_stream;
363
364 assert(ctx);
365
366 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
367 key, path);
368
369 rcu_read_lock();
370
371 metadata_stream = metadata_channel->metadata_stream;
372 assert(metadata_stream);
373
374 pthread_mutex_lock(&metadata_stream->lock);
375 assert(metadata_channel->trace_chunk);
376 assert(metadata_stream->trace_chunk);
377
378 /* Flag once that we have a valid relayd for the stream. */
379 if (relayd_id != (uint64_t) -1ULL) {
380 use_relayd = 1;
381 }
382
383 if (use_relayd) {
384 ret = consumer_send_relayd_stream(metadata_stream, path);
385 if (ret < 0) {
386 goto error_snapshot;
387 }
388 } else {
389 ret = consumer_stream_create_output_files(metadata_stream,
390 false);
391 if (ret < 0) {
392 goto error_snapshot;
393 }
394 }
395
396 do {
397 health_code_update();
398
399 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
400 if (ret_read < 0) {
401 if (ret_read != -EAGAIN) {
402 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
403 ret_read);
404 ret = ret_read;
405 goto error_snapshot;
406 }
407 /* ret_read is negative at this point so we will exit the loop. */
408 continue;
409 }
410 } while (ret_read >= 0);
411
412 if (use_relayd) {
413 close_relayd_stream(metadata_stream);
414 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
415 } else {
416 if (metadata_stream->out_fd >= 0) {
417 ret = close(metadata_stream->out_fd);
418 if (ret < 0) {
419 PERROR("Kernel consumer snapshot metadata close out_fd");
420 /*
421 * Don't go on error here since the snapshot was successful at this
422 * point but somehow the close failed.
423 */
424 }
425 metadata_stream->out_fd = -1;
426 lttng_trace_chunk_put(metadata_stream->trace_chunk);
427 metadata_stream->trace_chunk = NULL;
428 }
429 }
430
431 ret = 0;
432 error_snapshot:
433 pthread_mutex_unlock(&metadata_stream->lock);
434 cds_list_del(&metadata_stream->send_node);
435 consumer_stream_destroy(metadata_stream, NULL);
436 metadata_channel->metadata_stream = NULL;
437 rcu_read_unlock();
438 return ret;
439 }
440
441 /*
442 * Receive command from session daemon and process it.
443 *
444 * Return 1 on success else a negative value or 0.
445 */
446 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
447 int sock, struct pollfd *consumer_sockpoll)
448 {
449 ssize_t ret;
450 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
451 struct lttcomm_consumer_msg msg;
452
453 health_code_update();
454
455 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
456 if (ret != sizeof(msg)) {
457 if (ret > 0) {
458 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
459 ret = -1;
460 }
461 return ret;
462 }
463
464 health_code_update();
465
466 /* Deprecated command */
467 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
468
469 health_code_update();
470
471 /* relayd needs RCU read-side protection */
472 rcu_read_lock();
473
474 switch (msg.cmd_type) {
475 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
476 {
477 /* Session daemon status message are handled in the following call. */
478 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
479 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
480 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
481 msg.u.relayd_sock.relayd_session_id);
482 goto end_nosignal;
483 }
484 case LTTNG_CONSUMER_ADD_CHANNEL:
485 {
486 struct lttng_consumer_channel *new_channel;
487 int ret_recv;
488 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
489
490 health_code_update();
491
492 /* First send a status message before receiving the fds. */
493 ret = consumer_send_status_msg(sock, ret_code);
494 if (ret < 0) {
495 /* Somehow, the session daemon is not responding anymore. */
496 goto error_fatal;
497 }
498
499 health_code_update();
500
501 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
502 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
503 msg.u.channel.session_id,
504 msg.u.channel.chunk_id.is_set ?
505 &chunk_id : NULL,
506 msg.u.channel.pathname,
507 msg.u.channel.name,
508 msg.u.channel.relayd_id, msg.u.channel.output,
509 msg.u.channel.tracefile_size,
510 msg.u.channel.tracefile_count, 0,
511 msg.u.channel.monitor,
512 msg.u.channel.live_timer_interval,
513 NULL, NULL);
514 if (new_channel == NULL) {
515 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
516 goto end_nosignal;
517 }
518 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
519 switch (msg.u.channel.output) {
520 case LTTNG_EVENT_SPLICE:
521 new_channel->output = CONSUMER_CHANNEL_SPLICE;
522 break;
523 case LTTNG_EVENT_MMAP:
524 new_channel->output = CONSUMER_CHANNEL_MMAP;
525 break;
526 default:
527 ERR("Channel output unknown %d", msg.u.channel.output);
528 goto end_nosignal;
529 }
530
531 /* Translate and save channel type. */
532 switch (msg.u.channel.type) {
533 case CONSUMER_CHANNEL_TYPE_DATA:
534 case CONSUMER_CHANNEL_TYPE_METADATA:
535 new_channel->type = msg.u.channel.type;
536 break;
537 default:
538 assert(0);
539 goto end_nosignal;
540 };
541
542 health_code_update();
543
544 if (ctx->on_recv_channel != NULL) {
545 ret_recv = ctx->on_recv_channel(new_channel);
546 if (ret_recv == 0) {
547 ret = consumer_add_channel(new_channel, ctx);
548 } else if (ret_recv < 0) {
549 goto end_nosignal;
550 }
551 } else {
552 ret = consumer_add_channel(new_channel, ctx);
553 }
554 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
555 int monitor_start_ret;
556
557 DBG("Consumer starting monitor timer");
558 consumer_timer_live_start(new_channel,
559 msg.u.channel.live_timer_interval);
560 monitor_start_ret = consumer_timer_monitor_start(
561 new_channel,
562 msg.u.channel.monitor_timer_interval);
563 if (monitor_start_ret < 0) {
564 ERR("Starting channel monitoring timer failed");
565 goto end_nosignal;
566 }
567
568 }
569
570 health_code_update();
571
572 /* If we received an error in add_channel, we need to report it. */
573 if (ret < 0) {
574 ret = consumer_send_status_msg(sock, ret);
575 if (ret < 0) {
576 goto error_fatal;
577 }
578 goto end_nosignal;
579 }
580
581 goto end_nosignal;
582 }
583 case LTTNG_CONSUMER_ADD_STREAM:
584 {
585 int fd;
586 struct lttng_pipe *stream_pipe;
587 struct lttng_consumer_stream *new_stream;
588 struct lttng_consumer_channel *channel;
589 int alloc_ret = 0;
590
591 /*
592 * Get stream's channel reference. Needed when adding the stream to the
593 * global hash table.
594 */
595 channel = consumer_find_channel(msg.u.stream.channel_key);
596 if (!channel) {
597 /*
598 * We could not find the channel. Can happen if cpu hotplug
599 * happens while tearing down.
600 */
601 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
602 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
603 }
604
605 health_code_update();
606
607 /* First send a status message before receiving the fds. */
608 ret = consumer_send_status_msg(sock, ret_code);
609 if (ret < 0) {
610 /* Somehow, the session daemon is not responding anymore. */
611 goto error_add_stream_fatal;
612 }
613
614 health_code_update();
615
616 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
617 /* Channel was not found. */
618 goto error_add_stream_nosignal;
619 }
620
621 /* Blocking call */
622 health_poll_entry();
623 ret = lttng_consumer_poll_socket(consumer_sockpoll);
624 health_poll_exit();
625 if (ret) {
626 goto error_add_stream_fatal;
627 }
628
629 health_code_update();
630
631 /* Get stream file descriptor from socket */
632 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
633 if (ret != sizeof(fd)) {
634 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
635 goto end;
636 }
637
638 health_code_update();
639
640 /*
641 * Send status code to session daemon only if the recv works. If the
642 * above recv() failed, the session daemon is notified through the
643 * error socket and the teardown is eventually done.
644 */
645 ret = consumer_send_status_msg(sock, ret_code);
646 if (ret < 0) {
647 /* Somehow, the session daemon is not responding anymore. */
648 goto error_add_stream_nosignal;
649 }
650
651 health_code_update();
652
653 pthread_mutex_lock(&channel->lock);
654 new_stream = consumer_allocate_stream(channel->key,
655 fd,
656 channel->name,
657 channel->relayd_id,
658 channel->session_id,
659 channel->trace_chunk,
660 msg.u.stream.cpu,
661 &alloc_ret,
662 channel->type,
663 channel->monitor);
664 if (new_stream == NULL) {
665 switch (alloc_ret) {
666 case -ENOMEM:
667 case -EINVAL:
668 default:
669 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
670 break;
671 }
672 pthread_mutex_unlock(&channel->lock);
673 goto error_add_stream_nosignal;
674 }
675
676 new_stream->chan = channel;
677 new_stream->wait_fd = fd;
678 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
679 &new_stream->max_sb_size);
680 if (ret < 0) {
681 pthread_mutex_unlock(&channel->lock);
682 ERR("Failed to get kernel maximal subbuffer size");
683 goto error_add_stream_nosignal;
684 }
685
686 consumer_stream_update_channel_attributes(new_stream,
687 channel);
688 switch (channel->output) {
689 case CONSUMER_CHANNEL_SPLICE:
690 new_stream->output = LTTNG_EVENT_SPLICE;
691 ret = utils_create_pipe(new_stream->splice_pipe);
692 if (ret < 0) {
693 pthread_mutex_unlock(&channel->lock);
694 goto error_add_stream_nosignal;
695 }
696 break;
697 case CONSUMER_CHANNEL_MMAP:
698 new_stream->output = LTTNG_EVENT_MMAP;
699 break;
700 default:
701 ERR("Stream output unknown %d", channel->output);
702 pthread_mutex_unlock(&channel->lock);
703 goto error_add_stream_nosignal;
704 }
705
706 /*
707 * We've just assigned the channel to the stream so increment the
708 * refcount right now. We don't need to increment the refcount for
709 * streams in no monitor because we handle manually the cleanup of
710 * those. It is very important to make sure there is NO prior
711 * consumer_del_stream() calls or else the refcount will be unbalanced.
712 */
713 if (channel->monitor) {
714 uatomic_inc(&new_stream->chan->refcount);
715 }
716
717 /*
718 * The buffer flush is done on the session daemon side for the kernel
719 * so no need for the stream "hangup_flush_done" variable to be
720 * tracked. This is important for a kernel stream since we don't rely
721 * on the flush state of the stream to read data. It's not the case for
722 * user space tracing.
723 */
724 new_stream->hangup_flush_done = 0;
725
726 health_code_update();
727
728 pthread_mutex_lock(&new_stream->lock);
729 if (ctx->on_recv_stream) {
730 ret = ctx->on_recv_stream(new_stream);
731 if (ret < 0) {
732 pthread_mutex_unlock(&new_stream->lock);
733 pthread_mutex_unlock(&channel->lock);
734 consumer_stream_free(new_stream);
735 goto error_add_stream_nosignal;
736 }
737 }
738 health_code_update();
739
740 if (new_stream->metadata_flag) {
741 channel->metadata_stream = new_stream;
742 }
743
744 /* Do not monitor this stream. */
745 if (!channel->monitor) {
746 DBG("Kernel consumer add stream %s in no monitor mode with "
747 "relayd id %" PRIu64, new_stream->name,
748 new_stream->net_seq_idx);
749 cds_list_add(&new_stream->send_node, &channel->streams.head);
750 pthread_mutex_unlock(&new_stream->lock);
751 pthread_mutex_unlock(&channel->lock);
752 goto end_add_stream;
753 }
754
755 /* Send stream to relayd if the stream has an ID. */
756 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
757 ret = consumer_send_relayd_stream(new_stream,
758 new_stream->chan->pathname);
759 if (ret < 0) {
760 pthread_mutex_unlock(&new_stream->lock);
761 pthread_mutex_unlock(&channel->lock);
762 consumer_stream_free(new_stream);
763 goto error_add_stream_nosignal;
764 }
765
766 /*
767 * If adding an extra stream to an already
768 * existing channel (e.g. cpu hotplug), we need
769 * to send the "streams_sent" command to relayd.
770 */
771 if (channel->streams_sent_to_relayd) {
772 ret = consumer_send_relayd_streams_sent(
773 new_stream->net_seq_idx);
774 if (ret < 0) {
775 pthread_mutex_unlock(&new_stream->lock);
776 pthread_mutex_unlock(&channel->lock);
777 goto error_add_stream_nosignal;
778 }
779 }
780 }
781 pthread_mutex_unlock(&new_stream->lock);
782 pthread_mutex_unlock(&channel->lock);
783
784 /* Get the right pipe where the stream will be sent. */
785 if (new_stream->metadata_flag) {
786 consumer_add_metadata_stream(new_stream);
787 stream_pipe = ctx->consumer_metadata_pipe;
788 } else {
789 consumer_add_data_stream(new_stream);
790 stream_pipe = ctx->consumer_data_pipe;
791 }
792
793 /* Visible to other threads */
794 new_stream->globally_visible = 1;
795
796 health_code_update();
797
798 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
799 if (ret < 0) {
800 ERR("Consumer write %s stream to pipe %d",
801 new_stream->metadata_flag ? "metadata" : "data",
802 lttng_pipe_get_writefd(stream_pipe));
803 if (new_stream->metadata_flag) {
804 consumer_del_stream_for_metadata(new_stream);
805 } else {
806 consumer_del_stream_for_data(new_stream);
807 }
808 goto error_add_stream_nosignal;
809 }
810
811 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
812 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
813 end_add_stream:
814 break;
815 error_add_stream_nosignal:
816 goto end_nosignal;
817 error_add_stream_fatal:
818 goto error_fatal;
819 }
820 case LTTNG_CONSUMER_STREAMS_SENT:
821 {
822 struct lttng_consumer_channel *channel;
823
824 /*
825 * Get stream's channel reference. Needed when adding the stream to the
826 * global hash table.
827 */
828 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
829 if (!channel) {
830 /*
831 * We could not find the channel. Can happen if cpu hotplug
832 * happens while tearing down.
833 */
834 ERR("Unable to find channel key %" PRIu64,
835 msg.u.sent_streams.channel_key);
836 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
837 }
838
839 health_code_update();
840
841 /*
842 * Send status code to session daemon.
843 */
844 ret = consumer_send_status_msg(sock, ret_code);
845 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
846 /* Somehow, the session daemon is not responding anymore. */
847 goto error_streams_sent_nosignal;
848 }
849
850 health_code_update();
851
852 /*
853 * We should not send this message if we don't monitor the
854 * streams in this channel.
855 */
856 if (!channel->monitor) {
857 goto end_error_streams_sent;
858 }
859
860 health_code_update();
861 /* Send stream to relayd if the stream has an ID. */
862 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
863 ret = consumer_send_relayd_streams_sent(
864 msg.u.sent_streams.net_seq_idx);
865 if (ret < 0) {
866 goto error_streams_sent_nosignal;
867 }
868 channel->streams_sent_to_relayd = true;
869 }
870 end_error_streams_sent:
871 break;
872 error_streams_sent_nosignal:
873 goto end_nosignal;
874 }
875 case LTTNG_CONSUMER_UPDATE_STREAM:
876 {
877 rcu_read_unlock();
878 return -ENOSYS;
879 }
880 case LTTNG_CONSUMER_DESTROY_RELAYD:
881 {
882 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
883 struct consumer_relayd_sock_pair *relayd;
884
885 DBG("Kernel consumer destroying relayd %" PRIu64, index);
886
887 /* Get relayd reference if exists. */
888 relayd = consumer_find_relayd(index);
889 if (relayd == NULL) {
890 DBG("Unable to find relayd %" PRIu64, index);
891 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
892 }
893
894 /*
895 * Each relayd socket pair has a refcount of stream attached to it
896 * which tells if the relayd is still active or not depending on the
897 * refcount value.
898 *
899 * This will set the destroy flag of the relayd object and destroy it
900 * if the refcount reaches zero when called.
901 *
902 * The destroy can happen either here or when a stream fd hangs up.
903 */
904 if (relayd) {
905 consumer_flag_relayd_for_destroy(relayd);
906 }
907
908 health_code_update();
909
910 ret = consumer_send_status_msg(sock, ret_code);
911 if (ret < 0) {
912 /* Somehow, the session daemon is not responding anymore. */
913 goto error_fatal;
914 }
915
916 goto end_nosignal;
917 }
918 case LTTNG_CONSUMER_DATA_PENDING:
919 {
920 int32_t ret;
921 uint64_t id = msg.u.data_pending.session_id;
922
923 DBG("Kernel consumer data pending command for id %" PRIu64, id);
924
925 ret = consumer_data_pending(id);
926
927 health_code_update();
928
929 /* Send back returned value to session daemon */
930 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
931 if (ret < 0) {
932 PERROR("send data pending ret code");
933 goto error_fatal;
934 }
935
936 /*
937 * No need to send back a status message since the data pending
938 * returned value is the response.
939 */
940 break;
941 }
942 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
943 {
944 struct lttng_consumer_channel *channel;
945 uint64_t key = msg.u.snapshot_channel.key;
946
947 channel = consumer_find_channel(key);
948 if (!channel) {
949 ERR("Channel %" PRIu64 " not found", key);
950 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
951 } else {
952 pthread_mutex_lock(&channel->lock);
953 if (msg.u.snapshot_channel.metadata == 1) {
954 ret = lttng_kconsumer_snapshot_metadata(channel, key,
955 msg.u.snapshot_channel.pathname,
956 msg.u.snapshot_channel.relayd_id, ctx);
957 if (ret < 0) {
958 ERR("Snapshot metadata failed");
959 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
960 }
961 } else {
962 ret = lttng_kconsumer_snapshot_channel(channel, key,
963 msg.u.snapshot_channel.pathname,
964 msg.u.snapshot_channel.relayd_id,
965 msg.u.snapshot_channel.nb_packets_per_stream,
966 ctx);
967 if (ret < 0) {
968 ERR("Snapshot channel failed");
969 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
970 }
971 }
972 pthread_mutex_unlock(&channel->lock);
973 }
974 health_code_update();
975
976 ret = consumer_send_status_msg(sock, ret_code);
977 if (ret < 0) {
978 /* Somehow, the session daemon is not responding anymore. */
979 goto end_nosignal;
980 }
981 break;
982 }
983 case LTTNG_CONSUMER_DESTROY_CHANNEL:
984 {
985 uint64_t key = msg.u.destroy_channel.key;
986 struct lttng_consumer_channel *channel;
987
988 channel = consumer_find_channel(key);
989 if (!channel) {
990 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
991 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
992 }
993
994 health_code_update();
995
996 ret = consumer_send_status_msg(sock, ret_code);
997 if (ret < 0) {
998 /* Somehow, the session daemon is not responding anymore. */
999 goto end_destroy_channel;
1000 }
1001
1002 health_code_update();
1003
1004 /* Stop right now if no channel was found. */
1005 if (!channel) {
1006 goto end_destroy_channel;
1007 }
1008
1009 /*
1010 * This command should ONLY be issued for channel with streams set in
1011 * no monitor mode.
1012 */
1013 assert(!channel->monitor);
1014
1015 /*
1016 * The refcount should ALWAYS be 0 in the case of a channel in no
1017 * monitor mode.
1018 */
1019 assert(!uatomic_sub_return(&channel->refcount, 1));
1020
1021 consumer_del_channel(channel);
1022 end_destroy_channel:
1023 goto end_nosignal;
1024 }
1025 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1026 {
1027 ssize_t ret;
1028 uint64_t count;
1029 struct lttng_consumer_channel *channel;
1030 uint64_t id = msg.u.discarded_events.session_id;
1031 uint64_t key = msg.u.discarded_events.channel_key;
1032
1033 DBG("Kernel consumer discarded events command for session id %"
1034 PRIu64 ", channel key %" PRIu64, id, key);
1035
1036 channel = consumer_find_channel(key);
1037 if (!channel) {
1038 ERR("Kernel consumer discarded events channel %"
1039 PRIu64 " not found", key);
1040 count = 0;
1041 } else {
1042 count = channel->discarded_events;
1043 }
1044
1045 health_code_update();
1046
1047 /* Send back returned value to session daemon */
1048 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1049 if (ret < 0) {
1050 PERROR("send discarded events");
1051 goto error_fatal;
1052 }
1053
1054 break;
1055 }
1056 case LTTNG_CONSUMER_LOST_PACKETS:
1057 {
1058 ssize_t ret;
1059 uint64_t count;
1060 struct lttng_consumer_channel *channel;
1061 uint64_t id = msg.u.lost_packets.session_id;
1062 uint64_t key = msg.u.lost_packets.channel_key;
1063
1064 DBG("Kernel consumer lost packets command for session id %"
1065 PRIu64 ", channel key %" PRIu64, id, key);
1066
1067 channel = consumer_find_channel(key);
1068 if (!channel) {
1069 ERR("Kernel consumer lost packets channel %"
1070 PRIu64 " not found", key);
1071 count = 0;
1072 } else {
1073 count = channel->lost_packets;
1074 }
1075
1076 health_code_update();
1077
1078 /* Send back returned value to session daemon */
1079 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1080 if (ret < 0) {
1081 PERROR("send lost packets");
1082 goto error_fatal;
1083 }
1084
1085 break;
1086 }
1087 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1088 {
1089 int channel_monitor_pipe;
1090
1091 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1092 /* Successfully received the command's type. */
1093 ret = consumer_send_status_msg(sock, ret_code);
1094 if (ret < 0) {
1095 goto error_fatal;
1096 }
1097
1098 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1099 1);
1100 if (ret != sizeof(channel_monitor_pipe)) {
1101 ERR("Failed to receive channel monitor pipe");
1102 goto error_fatal;
1103 }
1104
1105 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1106 ret = consumer_timer_thread_set_channel_monitor_pipe(
1107 channel_monitor_pipe);
1108 if (!ret) {
1109 int flags;
1110
1111 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1112 /* Set the pipe as non-blocking. */
1113 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1114 if (ret == -1) {
1115 PERROR("fcntl get flags of the channel monitoring pipe");
1116 goto error_fatal;
1117 }
1118 flags = ret;
1119
1120 ret = fcntl(channel_monitor_pipe, F_SETFL,
1121 flags | O_NONBLOCK);
1122 if (ret == -1) {
1123 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1124 goto error_fatal;
1125 }
1126 DBG("Channel monitor pipe set as non-blocking");
1127 } else {
1128 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1129 }
1130 ret = consumer_send_status_msg(sock, ret_code);
1131 if (ret < 0) {
1132 goto error_fatal;
1133 }
1134 break;
1135 }
1136 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1137 {
1138 struct lttng_consumer_channel *channel;
1139 uint64_t key = msg.u.rotate_channel.key;
1140
1141 DBG("Consumer rotate channel %" PRIu64, key);
1142
1143 channel = consumer_find_channel(key);
1144 if (!channel) {
1145 ERR("Channel %" PRIu64 " not found", key);
1146 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1147 } else {
1148 /*
1149 * Sample the rotate position of all the streams in this channel.
1150 */
1151 ret = lttng_consumer_rotate_channel(channel, key,
1152 msg.u.rotate_channel.relayd_id,
1153 msg.u.rotate_channel.metadata,
1154 ctx);
1155 if (ret < 0) {
1156 ERR("Rotate channel failed");
1157 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1158 }
1159
1160 health_code_update();
1161 }
1162 ret = consumer_send_status_msg(sock, ret_code);
1163 if (ret < 0) {
1164 /* Somehow, the session daemon is not responding anymore. */
1165 goto error_rotate_channel;
1166 }
1167 if (channel) {
1168 /* Rotate the streams that are ready right now. */
1169 ret = lttng_consumer_rotate_ready_streams(
1170 channel, key, ctx);
1171 if (ret < 0) {
1172 ERR("Rotate ready streams failed");
1173 }
1174 }
1175 break;
1176 error_rotate_channel:
1177 goto end_nosignal;
1178 }
1179 case LTTNG_CONSUMER_CLEAR_CHANNEL:
1180 {
1181 struct lttng_consumer_channel *channel;
1182 uint64_t key = msg.u.clear_channel.key;
1183
1184 channel = consumer_find_channel(key);
1185 if (!channel) {
1186 DBG("Channel %" PRIu64 " not found", key);
1187 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1188 } else {
1189 ret = lttng_consumer_clear_channel(channel);
1190 if (ret) {
1191 ERR("Clear channel failed");
1192 ret_code = ret;
1193 }
1194
1195 health_code_update();
1196 }
1197 ret = consumer_send_status_msg(sock, ret_code);
1198 if (ret < 0) {
1199 /* Somehow, the session daemon is not responding anymore. */
1200 goto end_nosignal;
1201 }
1202
1203 break;
1204 }
1205 case LTTNG_CONSUMER_INIT:
1206 {
1207 ret_code = lttng_consumer_init_command(ctx,
1208 msg.u.init.sessiond_uuid);
1209 health_code_update();
1210 ret = consumer_send_status_msg(sock, ret_code);
1211 if (ret < 0) {
1212 /* Somehow, the session daemon is not responding anymore. */
1213 goto end_nosignal;
1214 }
1215 break;
1216 }
1217 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1218 {
1219 const struct lttng_credentials credentials = {
1220 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1221 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1222 };
1223 const bool is_local_trace =
1224 !msg.u.create_trace_chunk.relayd_id.is_set;
1225 const uint64_t relayd_id =
1226 msg.u.create_trace_chunk.relayd_id.value;
1227 const char *chunk_override_name =
1228 *msg.u.create_trace_chunk.override_name ?
1229 msg.u.create_trace_chunk.override_name :
1230 NULL;
1231 struct lttng_directory_handle *chunk_directory_handle = NULL;
1232
1233 /*
1234 * The session daemon will only provide a chunk directory file
1235 * descriptor for local traces.
1236 */
1237 if (is_local_trace) {
1238 int chunk_dirfd;
1239
1240 /* Acnowledge the reception of the command. */
1241 ret = consumer_send_status_msg(sock,
1242 LTTCOMM_CONSUMERD_SUCCESS);
1243 if (ret < 0) {
1244 /* Somehow, the session daemon is not responding anymore. */
1245 goto end_nosignal;
1246 }
1247
1248 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1249 if (ret != sizeof(chunk_dirfd)) {
1250 ERR("Failed to receive trace chunk directory file descriptor");
1251 goto error_fatal;
1252 }
1253
1254 DBG("Received trace chunk directory fd (%d)",
1255 chunk_dirfd);
1256 chunk_directory_handle = lttng_directory_handle_create_from_dirfd(
1257 chunk_dirfd);
1258 if (!chunk_directory_handle) {
1259 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1260 if (close(chunk_dirfd)) {
1261 PERROR("Failed to close chunk directory file descriptor");
1262 }
1263 goto error_fatal;
1264 }
1265 }
1266
1267 ret_code = lttng_consumer_create_trace_chunk(
1268 !is_local_trace ? &relayd_id : NULL,
1269 msg.u.create_trace_chunk.session_id,
1270 msg.u.create_trace_chunk.chunk_id,
1271 (time_t) msg.u.create_trace_chunk
1272 .creation_timestamp,
1273 chunk_override_name,
1274 msg.u.create_trace_chunk.credentials.is_set ?
1275 &credentials :
1276 NULL,
1277 chunk_directory_handle);
1278 lttng_directory_handle_put(chunk_directory_handle);
1279 goto end_msg_sessiond;
1280 }
1281 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1282 {
1283 enum lttng_trace_chunk_command_type close_command =
1284 msg.u.close_trace_chunk.close_command.value;
1285 const uint64_t relayd_id =
1286 msg.u.close_trace_chunk.relayd_id.value;
1287 struct lttcomm_consumer_close_trace_chunk_reply reply;
1288 char path[LTTNG_PATH_MAX];
1289
1290 ret_code = lttng_consumer_close_trace_chunk(
1291 msg.u.close_trace_chunk.relayd_id.is_set ?
1292 &relayd_id :
1293 NULL,
1294 msg.u.close_trace_chunk.session_id,
1295 msg.u.close_trace_chunk.chunk_id,
1296 (time_t) msg.u.close_trace_chunk.close_timestamp,
1297 msg.u.close_trace_chunk.close_command.is_set ?
1298 &close_command :
1299 NULL, path);
1300 reply.ret_code = ret_code;
1301 reply.path_length = strlen(path) + 1;
1302 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1303 if (ret != sizeof(reply)) {
1304 goto error_fatal;
1305 }
1306 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1307 if (ret != reply.path_length) {
1308 goto error_fatal;
1309 }
1310 goto end_nosignal;
1311 }
1312 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1313 {
1314 const uint64_t relayd_id =
1315 msg.u.trace_chunk_exists.relayd_id.value;
1316
1317 ret_code = lttng_consumer_trace_chunk_exists(
1318 msg.u.trace_chunk_exists.relayd_id.is_set ?
1319 &relayd_id : NULL,
1320 msg.u.trace_chunk_exists.session_id,
1321 msg.u.trace_chunk_exists.chunk_id);
1322 goto end_msg_sessiond;
1323 }
1324 default:
1325 goto end_nosignal;
1326 }
1327
1328 end_nosignal:
1329 /*
1330 * Return 1 to indicate success since the 0 value can be a socket
1331 * shutdown during the recv() or send() call.
1332 */
1333 ret = 1;
1334 goto end;
1335 error_fatal:
1336 /* This will issue a consumer stop. */
1337 ret = -1;
1338 goto end;
1339 end_msg_sessiond:
1340 /*
1341 * The returned value here is not useful since either way we'll return 1 to
1342 * the caller because the session daemon socket management is done
1343 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1344 */
1345 ret = consumer_send_status_msg(sock, ret_code);
1346 if (ret < 0) {
1347 goto error_fatal;
1348 }
1349 ret = 1;
1350 end:
1351 health_code_update();
1352 rcu_read_unlock();
1353 return ret;
1354 }
1355
1356 /*
1357 * Populate index values of a kernel stream. Values are set in big endian order.
1358 *
1359 * Return 0 on success or else a negative value.
1360 */
1361 static int get_index_values(struct ctf_packet_index *index, int infd)
1362 {
1363 int ret;
1364 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1365 events_discarded, stream_id, stream_instance_id,
1366 packet_seq_num;
1367
1368 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1369 if (ret < 0) {
1370 PERROR("kernctl_get_timestamp_begin");
1371 goto error;
1372 }
1373
1374 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1375 if (ret < 0) {
1376 PERROR("kernctl_get_timestamp_end");
1377 goto error;
1378 }
1379
1380 ret = kernctl_get_events_discarded(infd, &events_discarded);
1381 if (ret < 0) {
1382 PERROR("kernctl_get_events_discarded");
1383 goto error;
1384 }
1385
1386 ret = kernctl_get_content_size(infd, &content_size);
1387 if (ret < 0) {
1388 PERROR("kernctl_get_content_size");
1389 goto error;
1390 }
1391
1392 ret = kernctl_get_packet_size(infd, &packet_size);
1393 if (ret < 0) {
1394 PERROR("kernctl_get_packet_size");
1395 goto error;
1396 }
1397
1398 ret = kernctl_get_stream_id(infd, &stream_id);
1399 if (ret < 0) {
1400 PERROR("kernctl_get_stream_id");
1401 goto error;
1402 }
1403
1404 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1405 if (ret < 0) {
1406 if (ret == -ENOTTY) {
1407 /* Command not implemented by lttng-modules. */
1408 stream_instance_id = -1ULL;
1409 } else {
1410 PERROR("kernctl_get_instance_id");
1411 goto error;
1412 }
1413 }
1414
1415 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1416 if (ret < 0) {
1417 if (ret == -ENOTTY) {
1418 /* Command not implemented by lttng-modules. */
1419 packet_seq_num = -1ULL;
1420 ret = 0;
1421 } else {
1422 PERROR("kernctl_get_sequence_number");
1423 goto error;
1424 }
1425 }
1426 index->packet_seq_num = htobe64(index->packet_seq_num);
1427
1428 *index = (typeof(*index)) {
1429 .offset = index->offset,
1430 .packet_size = htobe64(packet_size),
1431 .content_size = htobe64(content_size),
1432 .timestamp_begin = htobe64(timestamp_begin),
1433 .timestamp_end = htobe64(timestamp_end),
1434 .events_discarded = htobe64(events_discarded),
1435 .stream_id = htobe64(stream_id),
1436 .stream_instance_id = htobe64(stream_instance_id),
1437 .packet_seq_num = htobe64(packet_seq_num),
1438 };
1439
1440 error:
1441 return ret;
1442 }
1443 /*
1444 * Sync metadata meaning request them to the session daemon and snapshot to the
1445 * metadata thread can consumer them.
1446 *
1447 * Metadata stream lock MUST be acquired.
1448 *
1449 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1450 * is empty or a negative value on error.
1451 */
1452 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1453 {
1454 int ret;
1455
1456 assert(metadata);
1457
1458 ret = kernctl_buffer_flush(metadata->wait_fd);
1459 if (ret < 0) {
1460 ERR("Failed to flush kernel stream");
1461 goto end;
1462 }
1463
1464 ret = kernctl_snapshot(metadata->wait_fd);
1465 if (ret < 0) {
1466 if (ret != -EAGAIN) {
1467 ERR("Sync metadata, taking kernel snapshot failed.");
1468 goto end;
1469 }
1470 DBG("Sync metadata, no new kernel metadata");
1471 /* No new metadata, exit. */
1472 ret = ENODATA;
1473 goto end;
1474 }
1475
1476 end:
1477 return ret;
1478 }
1479
1480 static
1481 int update_stream_stats(struct lttng_consumer_stream *stream)
1482 {
1483 int ret;
1484 uint64_t seq, discarded;
1485
1486 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1487 if (ret < 0) {
1488 if (ret == -ENOTTY) {
1489 /* Command not implemented by lttng-modules. */
1490 seq = -1ULL;
1491 stream->sequence_number_unavailable = true;
1492 } else {
1493 PERROR("kernctl_get_sequence_number");
1494 goto end;
1495 }
1496 }
1497
1498 /*
1499 * Start the sequence when we extract the first packet in case we don't
1500 * start at 0 (for example if a consumer is not connected to the
1501 * session immediately after the beginning).
1502 */
1503 if (stream->last_sequence_number == -1ULL) {
1504 stream->last_sequence_number = seq;
1505 } else if (seq > stream->last_sequence_number) {
1506 stream->chan->lost_packets += seq -
1507 stream->last_sequence_number - 1;
1508 } else {
1509 /* seq <= last_sequence_number */
1510 ERR("Sequence number inconsistent : prev = %" PRIu64
1511 ", current = %" PRIu64,
1512 stream->last_sequence_number, seq);
1513 ret = -1;
1514 goto end;
1515 }
1516 stream->last_sequence_number = seq;
1517
1518 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1519 if (ret < 0) {
1520 PERROR("kernctl_get_events_discarded");
1521 goto end;
1522 }
1523 if (discarded < stream->last_discarded_events) {
1524 /*
1525 * Overflow has occurred. We assume only one wrap-around
1526 * has occurred.
1527 */
1528 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1529 stream->last_discarded_events + discarded;
1530 } else {
1531 stream->chan->discarded_events += discarded -
1532 stream->last_discarded_events;
1533 }
1534 stream->last_discarded_events = discarded;
1535 ret = 0;
1536
1537 end:
1538 return ret;
1539 }
1540
1541 /*
1542 * Check if the local version of the metadata stream matches with the version
1543 * of the metadata stream in the kernel. If it was updated, set the reset flag
1544 * on the stream.
1545 */
1546 static
1547 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1548 {
1549 int ret;
1550 uint64_t cur_version;
1551
1552 ret = kernctl_get_metadata_version(infd, &cur_version);
1553 if (ret < 0) {
1554 if (ret == -ENOTTY) {
1555 /*
1556 * LTTng-modules does not implement this
1557 * command.
1558 */
1559 ret = 0;
1560 goto end;
1561 }
1562 ERR("Failed to get the metadata version");
1563 goto end;
1564 }
1565
1566 if (stream->metadata_version == cur_version) {
1567 ret = 0;
1568 goto end;
1569 }
1570
1571 DBG("New metadata version detected");
1572 stream->metadata_version = cur_version;
1573 stream->reset_metadata_flag = 1;
1574 ret = 0;
1575
1576 end:
1577 return ret;
1578 }
1579
1580 /*
1581 * Consume data on a file descriptor and write it on a trace file.
1582 * The stream and channel locks must be held by the caller.
1583 */
1584 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1585 struct lttng_consumer_local_data *ctx)
1586 {
1587 unsigned long len, subbuf_size, padding;
1588 int err, write_index = 1, rotation_ret;
1589 ssize_t ret = 0;
1590 int infd = stream->wait_fd;
1591 struct ctf_packet_index index = {};
1592
1593 DBG("In read_subbuffer (infd : %d)", infd);
1594
1595 /*
1596 * If the stream was flagged to be ready for rotation before we extract the
1597 * next packet, rotate it now.
1598 */
1599 if (stream->rotate_ready) {
1600 DBG("Rotate stream before extracting data");
1601 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1602 if (rotation_ret < 0) {
1603 ERR("Stream rotation error");
1604 ret = -1;
1605 goto error;
1606 }
1607 }
1608
1609 /* Get the next subbuffer */
1610 err = kernctl_get_next_subbuf(infd);
1611 if (err != 0) {
1612 /*
1613 * This is a debug message even for single-threaded consumer,
1614 * because poll() have more relaxed criterions than get subbuf,
1615 * so get_subbuf may fail for short race windows where poll()
1616 * would issue wakeups.
1617 */
1618 DBG("Reserving sub buffer failed (everything is normal, "
1619 "it is due to concurrency)");
1620 ret = err;
1621 goto error;
1622 }
1623
1624 /* Get the full subbuffer size including padding */
1625 err = kernctl_get_padded_subbuf_size(infd, &len);
1626 if (err != 0) {
1627 PERROR("Getting sub-buffer len failed.");
1628 err = kernctl_put_subbuf(infd);
1629 if (err != 0) {
1630 if (err == -EFAULT) {
1631 PERROR("Error in unreserving sub buffer\n");
1632 } else if (err == -EIO) {
1633 /* Should never happen with newer LTTng versions */
1634 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1635 }
1636 ret = err;
1637 goto error;
1638 }
1639 ret = err;
1640 goto error;
1641 }
1642
1643 if (!stream->metadata_flag) {
1644 ret = get_index_values(&index, infd);
1645 if (ret < 0) {
1646 err = kernctl_put_subbuf(infd);
1647 if (err != 0) {
1648 if (err == -EFAULT) {
1649 PERROR("Error in unreserving sub buffer\n");
1650 } else if (err == -EIO) {
1651 /* Should never happen with newer LTTng versions */
1652 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1653 }
1654 ret = err;
1655 goto error;
1656 }
1657 goto error;
1658 }
1659 ret = update_stream_stats(stream);
1660 if (ret < 0) {
1661 err = kernctl_put_subbuf(infd);
1662 if (err != 0) {
1663 if (err == -EFAULT) {
1664 PERROR("Error in unreserving sub buffer\n");
1665 } else if (err == -EIO) {
1666 /* Should never happen with newer LTTng versions */
1667 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1668 }
1669 ret = err;
1670 goto error;
1671 }
1672 goto error;
1673 }
1674 } else {
1675 write_index = 0;
1676 ret = metadata_stream_check_version(infd, stream);
1677 if (ret < 0) {
1678 err = kernctl_put_subbuf(infd);
1679 if (err != 0) {
1680 if (err == -EFAULT) {
1681 PERROR("Error in unreserving sub buffer\n");
1682 } else if (err == -EIO) {
1683 /* Should never happen with newer LTTng versions */
1684 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1685 }
1686 ret = err;
1687 goto error;
1688 }
1689 goto error;
1690 }
1691 }
1692
1693 switch (stream->chan->output) {
1694 case CONSUMER_CHANNEL_SPLICE:
1695 /*
1696 * XXX: The lttng-modules splice "actor" does not handle copying
1697 * partial pages hence only using the subbuffer size without the
1698 * padding makes the splice fail.
1699 */
1700 subbuf_size = len;
1701 padding = 0;
1702
1703 /* splice the subbuffer to the tracefile */
1704 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1705 padding, &index);
1706 /*
1707 * XXX: Splice does not support network streaming so the return value
1708 * is simply checked against subbuf_size and not like the mmap() op.
1709 */
1710 if (ret != subbuf_size) {
1711 /*
1712 * display the error but continue processing to try
1713 * to release the subbuffer
1714 */
1715 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1716 ret, subbuf_size);
1717 write_index = 0;
1718 }
1719 break;
1720 case CONSUMER_CHANNEL_MMAP:
1721 {
1722 const char *subbuf_addr;
1723
1724 /* Get subbuffer size without padding */
1725 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1726 if (err != 0) {
1727 PERROR("Getting sub-buffer len failed.");
1728 err = kernctl_put_subbuf(infd);
1729 if (err != 0) {
1730 if (err == -EFAULT) {
1731 PERROR("Error in unreserving sub buffer\n");
1732 } else if (err == -EIO) {
1733 /* Should never happen with newer LTTng versions */
1734 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1735 }
1736 ret = err;
1737 goto error;
1738 }
1739 ret = err;
1740 goto error;
1741 }
1742
1743 ret = get_current_subbuf_addr(stream, &subbuf_addr);
1744 if (ret) {
1745 goto error_put_subbuf;
1746 }
1747
1748 /* Make sure the tracer is not gone mad on us! */
1749 assert(len >= subbuf_size);
1750
1751 padding = len - subbuf_size;
1752
1753 /* write the subbuffer to the tracefile */
1754 ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream,
1755 subbuf_addr,
1756 subbuf_size,
1757 padding, &index);
1758 /*
1759 * The mmap operation should write subbuf_size amount of data when
1760 * network streaming or the full padding (len) size when we are _not_
1761 * streaming.
1762 */
1763 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1764 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1765 /*
1766 * Display the error but continue processing to try to release the
1767 * subbuffer. This is a DBG statement since this is possible to
1768 * happen without being a critical error.
1769 */
1770 DBG("Error writing to tracefile "
1771 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1772 ret, len, subbuf_size);
1773 write_index = 0;
1774 }
1775 break;
1776 }
1777 default:
1778 ERR("Unknown output method");
1779 ret = -EPERM;
1780 }
1781 error_put_subbuf:
1782 err = kernctl_put_next_subbuf(infd);
1783 if (err != 0) {
1784 if (err == -EFAULT) {
1785 PERROR("Error in unreserving sub buffer\n");
1786 } else if (err == -EIO) {
1787 /* Should never happen with newer LTTng versions */
1788 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1789 }
1790 ret = err;
1791 goto error;
1792 }
1793
1794 /* Write index if needed. */
1795 if (!write_index) {
1796 goto rotate;
1797 }
1798
1799 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1800 /*
1801 * In live, block until all the metadata is sent.
1802 */
1803 pthread_mutex_lock(&stream->metadata_timer_lock);
1804 assert(!stream->missed_metadata_flush);
1805 stream->waiting_on_metadata = true;
1806 pthread_mutex_unlock(&stream->metadata_timer_lock);
1807
1808 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1809
1810 pthread_mutex_lock(&stream->metadata_timer_lock);
1811 stream->waiting_on_metadata = false;
1812 if (stream->missed_metadata_flush) {
1813 stream->missed_metadata_flush = false;
1814 pthread_mutex_unlock(&stream->metadata_timer_lock);
1815 (void) consumer_flush_kernel_index(stream);
1816 } else {
1817 pthread_mutex_unlock(&stream->metadata_timer_lock);
1818 }
1819 if (err < 0) {
1820 goto error;
1821 }
1822 }
1823
1824 err = consumer_stream_write_index(stream, &index);
1825 if (err < 0) {
1826 goto error;
1827 }
1828
1829 rotate:
1830 /*
1831 * After extracting the packet, we check if the stream is now ready to be
1832 * rotated and perform the action immediately.
1833 */
1834 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1835 if (rotation_ret == 1) {
1836 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1837 if (rotation_ret < 0) {
1838 ERR("Stream rotation error");
1839 ret = -1;
1840 goto error;
1841 }
1842 } else if (rotation_ret < 0) {
1843 ERR("Checking if stream is ready to rotate");
1844 ret = -1;
1845 goto error;
1846 }
1847
1848 error:
1849 return ret;
1850 }
1851
1852 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1853 {
1854 int ret;
1855
1856 assert(stream);
1857
1858 /*
1859 * Don't create anything if this is set for streaming or if there is
1860 * no current trace chunk on the parent channel.
1861 */
1862 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1863 stream->chan->trace_chunk) {
1864 ret = consumer_stream_create_output_files(stream, true);
1865 if (ret) {
1866 goto error;
1867 }
1868 }
1869
1870 if (stream->output == LTTNG_EVENT_MMAP) {
1871 /* get the len of the mmap region */
1872 unsigned long mmap_len;
1873
1874 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1875 if (ret != 0) {
1876 PERROR("kernctl_get_mmap_len");
1877 goto error_close_fd;
1878 }
1879 stream->mmap_len = (size_t) mmap_len;
1880
1881 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1882 MAP_PRIVATE, stream->wait_fd, 0);
1883 if (stream->mmap_base == MAP_FAILED) {
1884 PERROR("Error mmaping");
1885 ret = -1;
1886 goto error_close_fd;
1887 }
1888 }
1889
1890 /* we return 0 to let the library handle the FD internally */
1891 return 0;
1892
1893 error_close_fd:
1894 if (stream->out_fd >= 0) {
1895 int err;
1896
1897 err = close(stream->out_fd);
1898 assert(!err);
1899 stream->out_fd = -1;
1900 }
1901 error:
1902 return ret;
1903 }
1904
1905 /*
1906 * Check if data is still being extracted from the buffers for a specific
1907 * stream. Consumer data lock MUST be acquired before calling this function
1908 * and the stream lock.
1909 *
1910 * Return 1 if the traced data are still getting read else 0 meaning that the
1911 * data is available for trace viewer reading.
1912 */
1913 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1914 {
1915 int ret;
1916
1917 assert(stream);
1918
1919 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1920 ret = 0;
1921 goto end;
1922 }
1923
1924 ret = kernctl_get_next_subbuf(stream->wait_fd);
1925 if (ret == 0) {
1926 /* There is still data so let's put back this subbuffer. */
1927 ret = kernctl_put_subbuf(stream->wait_fd);
1928 assert(ret == 0);
1929 ret = 1; /* Data is pending */
1930 goto end;
1931 }
1932
1933 /* Data is NOT pending and ready to be read. */
1934 ret = 0;
1935
1936 end:
1937 return ret;
1938 }
This page took 0.104619 seconds and 6 git commands to generate.