2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 #include <sys/socket.h>
27 #include <sys/types.h>
31 #include <common/common.h>
32 #include <common/kernel-ctl/kernel-ctl.h>
33 #include <common/sessiond-comm/sessiond-comm.h>
34 #include <common/sessiond-comm/relayd.h>
35 #include <common/compat/fcntl.h>
36 #include <common/relayd/relayd.h>
38 #include "kernel-consumer.h"
40 extern struct lttng_consumer_global_data consumer_data
;
41 extern int consumer_poll_timeout
;
42 extern volatile int consumer_quit
;
45 * Mmap the ring buffer, read it and write the data to the tracefile.
47 * Returns the number of bytes written
49 ssize_t
lttng_kconsumer_on_read_subbuffer_mmap(
50 struct lttng_consumer_local_data
*ctx
,
51 struct lttng_consumer_stream
*stream
, unsigned long len
)
53 unsigned long mmap_offset
;
54 ssize_t ret
= 0, written
= 0;
55 off_t orig_offset
= stream
->out_fd_offset
;
56 int fd
= stream
->wait_fd
;
57 /* Default is on the disk */
58 int outfd
= stream
->out_fd
;
60 struct consumer_relayd_sock_pair
*relayd
= NULL
;
62 /* RCU lock for the relayd pointer */
65 /* Flag that the current stream if set for network streaming. */
66 if (stream
->net_seq_idx
!= -1) {
67 relayd
= consumer_find_relayd(stream
->net_seq_idx
);
73 /* get the offset inside the fd to mmap */
74 ret
= kernctl_get_mmap_read_offset(fd
, &mmap_offset
);
77 perror("kernctl_get_mmap_read_offset");
82 /* Handle stream on the relayd if the output is on the network */
85 * Lock the control socket for the complete duration of the function
86 * since from this point on we will use the socket.
88 if (stream
->metadata_flag
) {
89 /* Metadata requires the control socket. */
90 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
93 ret
= consumer_handle_stream_before_relayd(stream
, len
);
95 /* Use the returned socket. */
98 /* Write metadata stream id before payload */
99 if (stream
->metadata_flag
) {
100 metadata_id
= htobe64(stream
->relayd_stream_id
);
102 ret
= write(outfd
, (void *) &metadata_id
,
103 sizeof(stream
->relayd_stream_id
));
104 } while (ret
< 0 && errno
== EINTR
);
106 PERROR("write metadata stream id");
110 DBG("Metadata stream id %zu written before data",
111 stream
->relayd_stream_id
);
113 * We do this so the return value can match the len passed as
114 * argument to this function.
116 written
-= sizeof(stream
->relayd_stream_id
);
119 /* Else, use the default set before which is the filesystem. */
124 ret
= write(outfd
, stream
->mmap_base
+ mmap_offset
, len
);
125 } while (ret
< 0 && errno
== EINTR
);
127 perror("Error in file write");
132 } else if (ret
> len
) {
133 perror("Error in file write");
141 /* This call is useless on a socket so better save a syscall. */
143 /* This won't block, but will start writeout asynchronously */
144 lttng_sync_file_range(outfd
, stream
->out_fd_offset
, ret
,
145 SYNC_FILE_RANGE_WRITE
);
146 stream
->out_fd_offset
+= ret
;
150 lttng_consumer_sync_trace_file(stream
, orig_offset
);
153 /* Unlock only if ctrl socket used */
154 if (relayd
&& stream
->metadata_flag
) {
155 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
164 * Splice the data from the ring buffer to the tracefile.
166 * Returns the number of bytes spliced.
168 ssize_t
lttng_kconsumer_on_read_subbuffer_splice(
169 struct lttng_consumer_local_data
*ctx
,
170 struct lttng_consumer_stream
*stream
, unsigned long len
)
172 ssize_t ret
= 0, written
= 0, ret_splice
= 0;
174 off_t orig_offset
= stream
->out_fd_offset
;
175 int fd
= stream
->wait_fd
;
176 /* Default is on the disk */
177 int outfd
= stream
->out_fd
;
178 uint64_t metadata_id
;
179 struct consumer_relayd_sock_pair
*relayd
= NULL
;
181 /* RCU lock for the relayd pointer */
184 /* Flag that the current stream if set for network streaming. */
185 if (stream
->net_seq_idx
!= -1) {
186 relayd
= consumer_find_relayd(stream
->net_seq_idx
);
187 if (relayd
== NULL
) {
192 /* Write metadata stream id before payload */
193 if (stream
->metadata_flag
&& relayd
) {
195 * Lock the control socket for the complete duration of the function
196 * since from this point on we will use the socket.
198 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
200 metadata_id
= htobe64(stream
->relayd_stream_id
);
202 ret
= write(ctx
->consumer_thread_pipe
[1],
203 (void *) &metadata_id
,
204 sizeof(stream
->relayd_stream_id
));
205 } while (ret
< 0 && errno
== EINTR
);
207 PERROR("write metadata stream id");
211 DBG("Metadata stream id %zu written before data",
212 stream
->relayd_stream_id
);
216 DBG("splice chan to pipe offset %lu of len %lu (fd : %d)",
217 (unsigned long)offset
, len
, fd
);
218 ret_splice
= splice(fd
, &offset
, ctx
->consumer_thread_pipe
[1], NULL
, len
,
219 SPLICE_F_MOVE
| SPLICE_F_MORE
);
220 DBG("splice chan to pipe, ret %zd", ret_splice
);
221 if (ret_splice
< 0) {
222 perror("Error in relay splice");
224 written
= ret_splice
;
230 /* Handle stream on the relayd if the output is on the network */
232 if (stream
->metadata_flag
) {
233 /* Update counter to fit the spliced data */
234 ret_splice
+= sizeof(stream
->relayd_stream_id
);
235 len
+= sizeof(stream
->relayd_stream_id
);
237 * We do this so the return value can match the len passed as
238 * argument to this function.
240 written
-= sizeof(stream
->relayd_stream_id
);
243 ret
= consumer_handle_stream_before_relayd(stream
, ret_splice
);
245 /* Use the returned socket. */
249 ERR("Remote relayd disconnected. Stopping");
255 DBG3("Kernel consumer splice data in %d to out %d",
256 ctx
->consumer_thread_pipe
[0], outfd
);
257 ret_splice
= splice(ctx
->consumer_thread_pipe
[0], NULL
, outfd
, NULL
,
258 ret_splice
, SPLICE_F_MOVE
| SPLICE_F_MORE
);
259 DBG("splice pipe to file, ret %zd", ret_splice
);
260 if (ret_splice
< 0) {
261 perror("Error in file splice");
263 written
= ret_splice
;
268 if (ret_splice
> len
) {
270 PERROR("Wrote more data than requested %zd (len: %lu)",
272 written
+= ret_splice
;
278 /* This call is useless on a socket so better save a syscall. */
280 /* This won't block, but will start writeout asynchronously */
281 lttng_sync_file_range(outfd
, stream
->out_fd_offset
, ret_splice
,
282 SYNC_FILE_RANGE_WRITE
);
283 stream
->out_fd_offset
+= ret_splice
;
285 written
+= ret_splice
;
287 lttng_consumer_sync_trace_file(stream
, orig_offset
);
294 /* send the appropriate error description to sessiond */
297 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_EBADF
);
300 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_EINVAL
);
303 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_ENOMEM
);
306 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_ESPIPE
);
311 if (relayd
&& stream
->metadata_flag
) {
312 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
321 * Take a snapshot for a specific fd
323 * Returns 0 on success, < 0 on error
325 int lttng_kconsumer_take_snapshot(struct lttng_consumer_local_data
*ctx
,
326 struct lttng_consumer_stream
*stream
)
329 int infd
= stream
->wait_fd
;
331 ret
= kernctl_snapshot(infd
);
334 perror("Getting sub-buffer snapshot.");
341 * Get the produced position
343 * Returns 0 on success, < 0 on error
345 int lttng_kconsumer_get_produced_snapshot(
346 struct lttng_consumer_local_data
*ctx
,
347 struct lttng_consumer_stream
*stream
,
351 int infd
= stream
->wait_fd
;
353 ret
= kernctl_snapshot_get_produced(infd
, pos
);
356 perror("kernctl_snapshot_get_produced");
362 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data
*ctx
,
363 int sock
, struct pollfd
*consumer_sockpoll
)
366 struct lttcomm_consumer_msg msg
;
368 ret
= lttcomm_recv_unix_sock(sock
, &msg
, sizeof(msg
));
369 if (ret
!= sizeof(msg
)) {
370 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_CMD
);
373 if (msg
.cmd_type
== LTTNG_CONSUMER_STOP
) {
377 /* relayd needs RCU read-side protection */
380 switch (msg
.cmd_type
) {
381 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET
:
384 struct consumer_relayd_sock_pair
*relayd
;
386 DBG("Consumer adding relayd socket");
388 /* Get relayd reference if exists. */
389 relayd
= consumer_find_relayd(msg
.u
.relayd_sock
.net_index
);
390 if (relayd
== NULL
) {
391 /* Not found. Allocate one. */
392 relayd
= consumer_allocate_relayd_sock_pair(
393 msg
.u
.relayd_sock
.net_index
);
394 if (relayd
== NULL
) {
395 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
400 /* Poll on consumer socket. */
401 if (lttng_consumer_poll_socket(consumer_sockpoll
) < 0) {
405 /* Get relayd socket from session daemon */
406 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
407 if (ret
!= sizeof(fd
)) {
408 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_FD
);
412 /* Copy socket information and received FD */
413 switch (msg
.u
.relayd_sock
.type
) {
414 case LTTNG_STREAM_CONTROL
:
415 /* Copy received lttcomm socket */
416 lttcomm_copy_sock(&relayd
->control_sock
, &msg
.u
.relayd_sock
.sock
);
418 ret
= lttcomm_create_sock(&relayd
->control_sock
);
423 /* Close the created socket fd which is useless */
424 close(relayd
->control_sock
.fd
);
426 /* Assign new file descriptor */
427 relayd
->control_sock
.fd
= fd
;
429 case LTTNG_STREAM_DATA
:
430 /* Copy received lttcomm socket */
431 lttcomm_copy_sock(&relayd
->data_sock
, &msg
.u
.relayd_sock
.sock
);
432 ret
= lttcomm_create_sock(&relayd
->data_sock
);
437 /* Close the created socket fd which is useless */
438 close(relayd
->data_sock
.fd
);
440 /* Assign new file descriptor */
441 relayd
->data_sock
.fd
= fd
;
444 ERR("Unknown relayd socket type");
448 DBG("Consumer %s socket created successfully with net idx %d (fd: %d)",
449 msg
.u
.relayd_sock
.type
== LTTNG_STREAM_CONTROL
? "control" : "data",
450 relayd
->net_seq_idx
, fd
);
453 * Add relayd socket pair to consumer data hashtable. If object already
454 * exists or on error, the function gracefully returns.
456 consumer_add_relayd(relayd
);
460 case LTTNG_CONSUMER_ADD_CHANNEL
:
462 struct lttng_consumer_channel
*new_channel
;
464 DBG("consumer_add_channel %d", msg
.u
.channel
.channel_key
);
465 new_channel
= consumer_allocate_channel(msg
.u
.channel
.channel_key
,
467 msg
.u
.channel
.mmap_len
,
468 msg
.u
.channel
.max_sb_size
);
469 if (new_channel
== NULL
) {
470 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
473 if (ctx
->on_recv_channel
!= NULL
) {
474 ret
= ctx
->on_recv_channel(new_channel
);
476 consumer_add_channel(new_channel
);
477 } else if (ret
< 0) {
481 consumer_add_channel(new_channel
);
485 case LTTNG_CONSUMER_ADD_STREAM
:
488 struct consumer_relayd_sock_pair
*relayd
= NULL
;
489 struct lttng_consumer_stream
*new_stream
;
492 if (lttng_consumer_poll_socket(consumer_sockpoll
) < 0) {
496 /* Get stream file descriptor from socket */
497 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
498 if (ret
!= sizeof(fd
)) {
499 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_FD
);
503 new_stream
= consumer_allocate_stream(msg
.u
.stream
.channel_key
,
504 msg
.u
.stream
.stream_key
,
507 msg
.u
.stream
.mmap_len
,
509 msg
.u
.stream
.path_name
,
512 msg
.u
.stream
.net_index
,
513 msg
.u
.stream
.metadata_flag
);
514 if (new_stream
== NULL
) {
515 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
519 /* The stream is not metadata. Get relayd reference if exists. */
520 relayd
= consumer_find_relayd(msg
.u
.stream
.net_index
);
521 if (relayd
!= NULL
) {
522 /* Add stream on the relayd */
523 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
524 ret
= relayd_add_stream(&relayd
->control_sock
,
525 msg
.u
.stream
.name
, msg
.u
.stream
.path_name
,
526 &new_stream
->relayd_stream_id
);
527 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
531 } else if (msg
.u
.stream
.net_index
!= -1) {
532 ERR("Network sequence index %d unknown. Not adding stream.",
533 msg
.u
.stream
.net_index
);
538 if (ctx
->on_recv_stream
!= NULL
) {
539 ret
= ctx
->on_recv_stream(new_stream
);
541 consumer_add_stream(new_stream
);
542 } else if (ret
< 0) {
546 consumer_add_stream(new_stream
);
549 DBG("Kernel consumer_add_stream (%d)", fd
);
552 case LTTNG_CONSUMER_UPDATE_STREAM
:
554 if (ctx
->on_update_stream
!= NULL
) {
555 ret
= ctx
->on_update_stream(msg
.u
.stream
.stream_key
, msg
.u
.stream
.state
);
557 consumer_change_stream_state(msg
.u
.stream
.stream_key
, msg
.u
.stream
.state
);
558 } else if (ret
< 0) {
562 consumer_change_stream_state(msg
.u
.stream
.stream_key
,
572 * Wake-up the other end by writing a null byte in the pipe
573 * (non-blocking). Important note: Because writing into the
574 * pipe is non-blocking (and therefore we allow dropping wakeup
575 * data, as long as there is wakeup data present in the pipe
576 * buffer to wake up the other end), the other end should
577 * perform the following sequence for waiting:
578 * 1) empty the pipe (reads).
579 * 2) perform update operation.
580 * 3) wait on the pipe (poll).
583 ret
= write(ctx
->consumer_poll_pipe
[1], "", 1);
584 } while (ret
< 0 && errno
== EINTR
);
591 * Consume data on a file descriptor and write it on a trace file.
593 ssize_t
lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream
*stream
,
594 struct lttng_consumer_local_data
*ctx
)
599 int infd
= stream
->wait_fd
;
601 DBG("In read_subbuffer (infd : %d)", infd
);
602 /* Get the next subbuffer */
603 err
= kernctl_get_next_subbuf(infd
);
606 * This is a debug message even for single-threaded consumer,
607 * because poll() have more relaxed criterions than get subbuf,
608 * so get_subbuf may fail for short race windows where poll()
609 * would issue wakeups.
611 DBG("Reserving sub buffer failed (everything is normal, "
612 "it is due to concurrency)");
616 switch (stream
->output
) {
617 case LTTNG_EVENT_SPLICE
:
618 /* read the whole subbuffer */
619 err
= kernctl_get_padded_subbuf_size(infd
, &len
);
622 perror("Getting sub-buffer len failed.");
626 /* splice the subbuffer to the tracefile */
627 ret
= lttng_consumer_on_read_subbuffer_splice(ctx
, stream
, len
);
630 * display the error but continue processing to try
631 * to release the subbuffer
633 ERR("Error splicing to tracefile (ret: %ld != len: %ld)",
638 case LTTNG_EVENT_MMAP
:
639 /* read the used subbuffer size */
640 err
= kernctl_get_padded_subbuf_size(infd
, &len
);
643 perror("Getting sub-buffer len failed.");
646 /* write the subbuffer to the tracefile */
647 ret
= lttng_consumer_on_read_subbuffer_mmap(ctx
, stream
, len
);
650 * display the error but continue processing to try
651 * to release the subbuffer
653 ERR("Error writing to tracefile");
657 ERR("Unknown output method");
661 err
= kernctl_put_next_subbuf(infd
);
664 if (errno
== EFAULT
) {
665 perror("Error in unreserving sub buffer\n");
666 } else if (errno
== EIO
) {
667 /* Should never happen with newer LTTng versions */
668 perror("Reader has been pushed by the writer, last sub-buffer corrupted.");
677 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream
*stream
)
681 /* Opening the tracefile in write mode */
682 if (strlen(stream
->path_name
) > 0 && stream
->net_seq_idx
== -1) {
683 ret
= run_as_open(stream
->path_name
,
684 O_WRONLY
|O_CREAT
|O_TRUNC
,
685 S_IRWXU
|S_IRWXG
|S_IRWXO
,
686 stream
->uid
, stream
->gid
);
688 ERR("Opening %s", stream
->path_name
);
692 stream
->out_fd
= ret
;
695 if (stream
->output
== LTTNG_EVENT_MMAP
) {
696 /* get the len of the mmap region */
697 unsigned long mmap_len
;
699 ret
= kernctl_get_mmap_len(stream
->wait_fd
, &mmap_len
);
702 perror("kernctl_get_mmap_len");
705 stream
->mmap_len
= (size_t) mmap_len
;
707 stream
->mmap_base
= mmap(NULL
, stream
->mmap_len
,
708 PROT_READ
, MAP_PRIVATE
, stream
->wait_fd
, 0);
709 if (stream
->mmap_base
== MAP_FAILED
) {
710 perror("Error mmaping");
716 /* we return 0 to let the library handle the FD internally */
723 err
= close(stream
->out_fd
);