Fix: notify the viewer if new streams got added
[lttng-tools.git] / src / bin / lttng-relayd / live.c
1 /*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #define _GNU_SOURCE
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <pthread.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/mount.h>
30 #include <sys/resource.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <inttypes.h>
36 #include <urcu/futex.h>
37 #include <urcu/uatomic.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <config.h>
41
42 #include <lttng/lttng.h>
43 #include <common/common.h>
44 #include <common/compat/poll.h>
45 #include <common/compat/socket.h>
46 #include <common/defaults.h>
47 #include <common/futex.h>
48 #include <common/sessiond-comm/sessiond-comm.h>
49 #include <common/sessiond-comm/inet.h>
50 #include <common/sessiond-comm/relayd.h>
51 #include <common/uri.h>
52 #include <common/utils.h>
53
54 #include "cmd.h"
55 #include "live.h"
56 #include "lttng-relayd.h"
57 #include "lttng-viewer.h"
58 #include "utils.h"
59 #include "health-relayd.h"
60 #include "testpoint.h"
61
62 static struct lttng_uri *live_uri;
63
64 /*
65 * This pipe is used to inform the worker thread that a command is queued and
66 * ready to be processed.
67 */
68 static int live_relay_cmd_pipe[2] = { -1, -1 };
69
70 /* Shared between threads */
71 static int live_dispatch_thread_exit;
72
73 static pthread_t live_listener_thread;
74 static pthread_t live_dispatcher_thread;
75 static pthread_t live_worker_thread;
76
77 /*
78 * Relay command queue.
79 *
80 * The live_thread_listener and live_thread_dispatcher communicate with this
81 * queue.
82 */
83 static struct relay_cmd_queue viewer_cmd_queue;
84
85 static uint64_t last_relay_viewer_session_id;
86
87 /*
88 * Cleanup the daemon
89 */
90 static
91 void cleanup(void)
92 {
93 DBG("Cleaning up");
94
95 free(live_uri);
96 }
97
98 /*
99 * Write to writable pipe used to notify a thread.
100 */
101 static
102 int notify_thread_pipe(int wpipe)
103 {
104 ssize_t ret;
105
106 ret = lttng_write(wpipe, "!", 1);
107 if (ret < 1) {
108 PERROR("write poll pipe");
109 }
110
111 return (int) ret;
112 }
113
114 /*
115 * Stop all threads by closing the thread quit pipe.
116 */
117 static
118 void stop_threads(void)
119 {
120 int ret;
121
122 /* Stopping all threads */
123 DBG("Terminating all live threads");
124 ret = notify_thread_pipe(thread_quit_pipe[1]);
125 if (ret < 0) {
126 ERR("write error on thread quit pipe");
127 }
128
129 /* Dispatch thread */
130 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
131 futex_nto1_wake(&viewer_cmd_queue.futex);
132 }
133
134 /*
135 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
136 */
137 static
138 int create_thread_poll_set(struct lttng_poll_event *events, int size)
139 {
140 int ret;
141
142 if (events == NULL || size == 0) {
143 ret = -1;
144 goto error;
145 }
146
147 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
148 if (ret < 0) {
149 goto error;
150 }
151
152 /* Add quit pipe */
153 ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN | LPOLLERR);
154 if (ret < 0) {
155 goto error;
156 }
157
158 return 0;
159
160 error:
161 return ret;
162 }
163
164 /*
165 * Check if the thread quit pipe was triggered.
166 *
167 * Return 1 if it was triggered else 0;
168 */
169 static
170 int check_thread_quit_pipe(int fd, uint32_t events)
171 {
172 if (fd == thread_quit_pipe[0] && (events & LPOLLIN)) {
173 return 1;
174 }
175
176 return 0;
177 }
178
179 /*
180 * Create and init socket from uri.
181 */
182 static
183 struct lttcomm_sock *init_socket(struct lttng_uri *uri)
184 {
185 int ret;
186 struct lttcomm_sock *sock = NULL;
187
188 sock = lttcomm_alloc_sock_from_uri(uri);
189 if (sock == NULL) {
190 ERR("Allocating socket");
191 goto error;
192 }
193
194 ret = lttcomm_create_sock(sock);
195 if (ret < 0) {
196 goto error;
197 }
198 DBG("Listening on sock %d for live", sock->fd);
199
200 ret = sock->ops->bind(sock);
201 if (ret < 0) {
202 goto error;
203 }
204
205 ret = sock->ops->listen(sock, -1);
206 if (ret < 0) {
207 goto error;
208
209 }
210
211 return sock;
212
213 error:
214 if (sock) {
215 lttcomm_destroy_sock(sock);
216 }
217 return NULL;
218 }
219
220 /*
221 * This thread manages the listening for new connections on the network
222 */
223 static
224 void *thread_listener(void *data)
225 {
226 int i, ret, pollfd, err = -1;
227 int val = 1;
228 uint32_t revents, nb_fd;
229 struct lttng_poll_event events;
230 struct lttcomm_sock *live_control_sock;
231
232 DBG("[thread] Relay live listener started");
233
234 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
235
236 health_code_update();
237
238 live_control_sock = init_socket(live_uri);
239 if (!live_control_sock) {
240 goto error_sock_control;
241 }
242
243 /* Pass 2 as size here for the thread quit pipe and control sockets. */
244 ret = create_thread_poll_set(&events, 2);
245 if (ret < 0) {
246 goto error_create_poll;
247 }
248
249 /* Add the control socket */
250 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
251 if (ret < 0) {
252 goto error_poll_add;
253 }
254
255 lttng_relay_notify_ready();
256
257 if (testpoint(relayd_thread_live_listener)) {
258 goto error_testpoint;
259 }
260
261 while (1) {
262 health_code_update();
263
264 DBG("Listener accepting live viewers connections");
265
266 restart:
267 health_poll_entry();
268 ret = lttng_poll_wait(&events, -1);
269 health_poll_exit();
270 if (ret < 0) {
271 /*
272 * Restart interrupted system call.
273 */
274 if (errno == EINTR) {
275 goto restart;
276 }
277 goto error;
278 }
279 nb_fd = ret;
280
281 DBG("Relay new viewer connection received");
282 for (i = 0; i < nb_fd; i++) {
283 health_code_update();
284
285 /* Fetch once the poll data */
286 revents = LTTNG_POLL_GETEV(&events, i);
287 pollfd = LTTNG_POLL_GETFD(&events, i);
288
289 /* Thread quit pipe has been closed. Killing thread. */
290 ret = check_thread_quit_pipe(pollfd, revents);
291 if (ret) {
292 err = 0;
293 goto exit;
294 }
295
296 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
297 ERR("socket poll error");
298 goto error;
299 } else if (revents & LPOLLIN) {
300 /*
301 * Get allocated in this thread, enqueued to a global queue,
302 * dequeued and freed in the worker thread.
303 */
304 struct relay_command *relay_cmd;
305 struct lttcomm_sock *newsock;
306
307 relay_cmd = zmalloc(sizeof(*relay_cmd));
308 if (!relay_cmd) {
309 PERROR("relay command zmalloc");
310 goto error;
311 }
312
313 assert(pollfd == live_control_sock->fd);
314 newsock = live_control_sock->ops->accept(live_control_sock);
315 if (!newsock) {
316 PERROR("accepting control sock");
317 free(relay_cmd);
318 goto error;
319 }
320 DBG("Relay viewer connection accepted socket %d", newsock->fd);
321 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
322 sizeof(int));
323 if (ret < 0) {
324 PERROR("setsockopt inet");
325 lttcomm_destroy_sock(newsock);
326 free(relay_cmd);
327 goto error;
328 }
329 relay_cmd->sock = newsock;
330
331 /*
332 * Lock free enqueue the request.
333 */
334 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
335
336 /*
337 * Wake the dispatch queue futex. Implicit memory
338 * barrier with the exchange in cds_wfq_enqueue.
339 */
340 futex_nto1_wake(&viewer_cmd_queue.futex);
341 }
342 }
343 }
344
345 exit:
346 error:
347 error_poll_add:
348 error_testpoint:
349 lttng_poll_clean(&events);
350 error_create_poll:
351 if (live_control_sock->fd >= 0) {
352 ret = live_control_sock->ops->close(live_control_sock);
353 if (ret) {
354 PERROR("close");
355 }
356 }
357 lttcomm_destroy_sock(live_control_sock);
358 error_sock_control:
359 if (err) {
360 health_error();
361 DBG("Live viewer listener thread exited with error");
362 }
363 health_unregister(health_relayd);
364 DBG("Live viewer listener thread cleanup complete");
365 stop_threads();
366 return NULL;
367 }
368
369 /*
370 * This thread manages the dispatching of the requests to worker threads
371 */
372 static
373 void *thread_dispatcher(void *data)
374 {
375 int err = -1;
376 ssize_t ret;
377 struct cds_wfq_node *node;
378 struct relay_command *relay_cmd = NULL;
379
380 DBG("[thread] Live viewer relay dispatcher started");
381
382 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
383
384 if (testpoint(relayd_thread_live_dispatcher)) {
385 goto error_testpoint;
386 }
387
388 health_code_update();
389
390 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
391 health_code_update();
392
393 /* Atomically prepare the queue futex */
394 futex_nto1_prepare(&viewer_cmd_queue.futex);
395
396 do {
397 health_code_update();
398
399 /* Dequeue commands */
400 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
401 if (node == NULL) {
402 DBG("Woken up but nothing in the live-viewer "
403 "relay command queue");
404 /* Continue thread execution */
405 break;
406 }
407
408 relay_cmd = caa_container_of(node, struct relay_command, node);
409 DBG("Dispatching viewer request waiting on sock %d",
410 relay_cmd->sock->fd);
411
412 /*
413 * Inform worker thread of the new request. This call is blocking
414 * so we can be assured that the data will be read at some point in
415 * time or wait to the end of the world :)
416 */
417 ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
418 sizeof(*relay_cmd));
419 free(relay_cmd);
420 if (ret < sizeof(struct relay_command)) {
421 PERROR("write cmd pipe");
422 goto error;
423 }
424 } while (node != NULL);
425
426 /* Futex wait on queue. Blocking call on futex() */
427 health_poll_entry();
428 futex_nto1_wait(&viewer_cmd_queue.futex);
429 health_poll_exit();
430 }
431
432 /* Normal exit, no error */
433 err = 0;
434
435 error:
436 error_testpoint:
437 if (err) {
438 health_error();
439 ERR("Health error occurred in %s", __func__);
440 }
441 health_unregister(health_relayd);
442 DBG("Live viewer dispatch thread dying");
443 stop_threads();
444 return NULL;
445 }
446
447 /*
448 * Establish connection with the viewer and check the versions.
449 *
450 * Return 0 on success or else negative value.
451 */
452 static
453 int viewer_connect(struct relay_command *cmd)
454 {
455 int ret;
456 struct lttng_viewer_connect reply, msg;
457
458 assert(cmd);
459
460 cmd->version_check_done = 1;
461
462 health_code_update();
463
464 /* Get version from the other side. */
465 ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0);
466 if (ret < 0 || ret != sizeof(msg)) {
467 if (ret == 0) {
468 /* Orderly shutdown. Not necessary to print an error. */
469 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
470 } else {
471 ERR("Relay failed to receive the version values.");
472 }
473 ret = -1;
474 goto end;
475 }
476
477 health_code_update();
478
479 reply.major = RELAYD_VERSION_COMM_MAJOR;
480 reply.minor = RELAYD_VERSION_COMM_MINOR;
481
482 /* Major versions must be the same */
483 if (reply.major != be32toh(msg.major)) {
484 DBG("Incompatible major versions (%u vs %u)", reply.major,
485 be32toh(msg.major));
486 ret = -1;
487 goto end;
488 }
489
490 cmd->major = reply.major;
491 /* We adapt to the lowest compatible version */
492 if (reply.minor <= be32toh(msg.minor)) {
493 cmd->minor = reply.minor;
494 } else {
495 cmd->minor = be32toh(msg.minor);
496 }
497
498 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
499 cmd->type = RELAY_VIEWER_COMMAND;
500 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
501 cmd->type = RELAY_VIEWER_NOTIFICATION;
502 } else {
503 ERR("Unknown connection type : %u", be32toh(msg.type));
504 ret = -1;
505 goto end;
506 }
507
508 reply.major = htobe32(reply.major);
509 reply.minor = htobe32(reply.minor);
510 if (cmd->type == RELAY_VIEWER_COMMAND) {
511 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
512 }
513
514 health_code_update();
515
516 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
517 sizeof(struct lttng_viewer_connect), 0);
518 if (ret < 0) {
519 ERR("Relay sending version");
520 }
521
522 health_code_update();
523
524 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
525 ret = 0;
526
527 end:
528 return ret;
529 }
530
531 /*
532 * Send the viewer the list of current sessions.
533 *
534 * Return 0 on success or else a negative value.
535 */
536 static
537 int viewer_list_sessions(struct relay_command *cmd,
538 struct lttng_ht *sessions_ht)
539 {
540 int ret;
541 struct lttng_viewer_list_sessions session_list;
542 unsigned long count;
543 long approx_before, approx_after;
544 struct lttng_ht_node_ulong *node;
545 struct lttng_ht_iter iter;
546 struct lttng_viewer_session send_session;
547 struct relay_session *session;
548
549 DBG("List sessions received");
550
551 if (cmd->version_check_done == 0) {
552 ERR("Trying to list sessions before version check");
553 ret = -1;
554 goto end_no_session;
555 }
556
557 rcu_read_lock();
558 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
559 session_list.sessions_count = htobe32(count);
560
561 health_code_update();
562
563 ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list,
564 sizeof(session_list), 0);
565 if (ret < 0) {
566 ERR("Relay sending sessions list");
567 goto end_unlock;
568 }
569
570 health_code_update();
571
572 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) {
573 health_code_update();
574
575 node = lttng_ht_iter_get_node_ulong(&iter);
576 if (!node) {
577 goto end_unlock;
578 }
579 session = caa_container_of(node, struct relay_session, session_n);
580
581 strncpy(send_session.session_name, session->session_name,
582 sizeof(send_session.session_name));
583 strncpy(send_session.hostname, session->hostname,
584 sizeof(send_session.hostname));
585 send_session.id = htobe64(session->id);
586 send_session.live_timer = htobe32(session->live_timer);
587 send_session.clients = htobe32(session->viewer_attached);
588 send_session.streams = htobe32(session->stream_count);
589
590 health_code_update();
591
592 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session,
593 sizeof(send_session), 0);
594 if (ret < 0) {
595 ERR("Relay sending session info");
596 goto end_unlock;
597 }
598 }
599 health_code_update();
600
601 rcu_read_unlock();
602 ret = 0;
603 goto end;
604
605 end_unlock:
606 rcu_read_unlock();
607
608 end:
609 end_no_session:
610 return ret;
611 }
612
613 /*
614 * Open index file using a given viewer stream.
615 *
616 * Return 0 on success or else a negative value.
617 */
618 static int open_index(struct relay_viewer_stream *stream)
619 {
620 int ret;
621 char fullpath[PATH_MAX];
622 struct ctf_packet_index_file_hdr hdr;
623
624 if (stream->tracefile_count > 0) {
625 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%"
626 PRIu64 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
627 stream->channel_name, stream->tracefile_count_current);
628 } else {
629 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s"
630 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
631 stream->channel_name);
632 }
633 if (ret < 0) {
634 PERROR("snprintf index path");
635 goto error;
636 }
637
638 DBG("Opening index file %s in read only", fullpath);
639 ret = open(fullpath, O_RDONLY);
640 if (ret < 0) {
641 if (errno == ENOENT) {
642 ret = -ENOENT;
643 goto error;
644 } else {
645 PERROR("opening index in read-only");
646 }
647 goto error;
648 }
649 stream->index_read_fd = ret;
650 DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
651
652 ret = lttng_read(stream->index_read_fd, &hdr, sizeof(hdr));
653 if (ret < sizeof(hdr)) {
654 PERROR("Reading index header");
655 goto error;
656 }
657 if (be32toh(hdr.magic) != CTF_INDEX_MAGIC) {
658 ERR("Invalid header magic");
659 ret = -1;
660 goto error;
661 }
662 if (be32toh(hdr.index_major) != CTF_INDEX_MAJOR ||
663 be32toh(hdr.index_minor) != CTF_INDEX_MINOR) {
664 ERR("Invalid header version");
665 ret = -1;
666 goto error;
667 }
668 ret = 0;
669
670 error:
671 return ret;
672 }
673
674 /*
675 * Allocate and init a new viewer_stream.
676 *
677 * Copies the values from the stream passed in parameter and insert the new
678 * stream in the viewer_streams_ht.
679 *
680 * MUST be called with rcu_read_lock held.
681 *
682 * Returns 0 on success or a negative value on error.
683 */
684 static
685 int init_viewer_stream(struct relay_stream *stream, int seek_last)
686 {
687 int ret;
688 struct relay_viewer_stream *viewer_stream;
689
690 assert(stream);
691
692 viewer_stream = zmalloc(sizeof(*viewer_stream));
693 if (!viewer_stream) {
694 PERROR("relay viewer stream zmalloc");
695 ret = -1;
696 goto error;
697 }
698 viewer_stream->session_id = stream->session->id;
699 viewer_stream->stream_handle = stream->stream_handle;
700 viewer_stream->path_name = strndup(stream->path_name,
701 LTTNG_VIEWER_PATH_MAX);
702 viewer_stream->channel_name = strndup(stream->channel_name,
703 LTTNG_VIEWER_NAME_MAX);
704 viewer_stream->tracefile_count = stream->tracefile_count;
705 viewer_stream->metadata_flag = stream->metadata_flag;
706 viewer_stream->tracefile_count_last = -1ULL;
707 if (seek_last) {
708 viewer_stream->tracefile_count_current =
709 stream->tracefile_count_current;
710 } else {
711 viewer_stream->tracefile_count_current =
712 stream->oldest_tracefile_id;
713 }
714
715 viewer_stream->ctf_trace = stream->ctf_trace;
716 if (viewer_stream->metadata_flag) {
717 viewer_stream->ctf_trace->viewer_metadata_stream =
718 viewer_stream;
719 }
720 uatomic_inc(&viewer_stream->ctf_trace->refcount);
721
722 lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
723 lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
724
725 viewer_stream->index_read_fd = -1;
726 viewer_stream->read_fd = -1;
727
728 /*
729 * This is to avoid a race between the initialization of this object and
730 * the close of the given stream. If the stream is unable to find this
731 * viewer stream when closing, this copy will at least take the latest
732 * value.
733 * We also need that for the seek_last.
734 */
735 viewer_stream->total_index_received = stream->total_index_received;
736
737 /*
738 * If we never received an index for the current stream, delay
739 * the opening of the index, otherwise open it right now.
740 */
741 if (viewer_stream->tracefile_count_current ==
742 stream->tracefile_count_current &&
743 viewer_stream->total_index_received == 0) {
744 viewer_stream->index_read_fd = -1;
745 } else {
746 ret = open_index(viewer_stream);
747 if (ret < 0) {
748 goto error;
749 }
750 }
751
752 if (seek_last && viewer_stream->index_read_fd > 0) {
753 ret = lseek(viewer_stream->index_read_fd,
754 viewer_stream->total_index_received *
755 sizeof(struct ctf_packet_index),
756 SEEK_CUR);
757 if (ret < 0) {
758 goto error;
759 }
760 viewer_stream->last_sent_index =
761 viewer_stream->total_index_received;
762 }
763
764 ret = 0;
765
766 error:
767 return ret;
768 }
769
770 /*
771 * Rotate a stream to the next tracefile.
772 *
773 * Returns 0 on success, 1 on EOF, a negative value on error.
774 */
775 static
776 int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream,
777 struct relay_stream *stream)
778 {
779 int ret;
780 uint64_t tracefile_id;
781
782 assert(viewer_stream);
783
784 tracefile_id = (viewer_stream->tracefile_count_current + 1) %
785 viewer_stream->tracefile_count;
786 /*
787 * Detect the last tracefile to open.
788 */
789 if (viewer_stream->tracefile_count_last != -1ULL &&
790 viewer_stream->tracefile_count_last ==
791 viewer_stream->tracefile_count_current) {
792 ret = 1;
793 goto end;
794 }
795
796 if (stream) {
797 pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
798 }
799 /*
800 * The writer and the reader are not working in the same
801 * tracefile, we can read up to EOF, we don't care about the
802 * total_index_received.
803 */
804 if (!stream || (stream->tracefile_count_current != tracefile_id)) {
805 viewer_stream->close_write_flag = 1;
806 } else {
807 /*
808 * We are opening a file that is still open in write, make
809 * sure we limit our reading to the number of indexes
810 * received.
811 */
812 viewer_stream->close_write_flag = 0;
813 if (stream) {
814 viewer_stream->total_index_received =
815 stream->total_index_received;
816 }
817 }
818 viewer_stream->tracefile_count_current = tracefile_id;
819
820 ret = close(viewer_stream->index_read_fd);
821 if (ret < 0) {
822 PERROR("close index file %d",
823 viewer_stream->index_read_fd);
824 }
825 viewer_stream->index_read_fd = -1;
826 ret = close(viewer_stream->read_fd);
827 if (ret < 0) {
828 PERROR("close tracefile %d",
829 viewer_stream->read_fd);
830 }
831 viewer_stream->read_fd = -1;
832
833 pthread_mutex_lock(&viewer_stream->overwrite_lock);
834 viewer_stream->abort_flag = 0;
835 pthread_mutex_unlock(&viewer_stream->overwrite_lock);
836
837 viewer_stream->index_read_fd = -1;
838 viewer_stream->read_fd = -1;
839
840 if (stream) {
841 pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
842 }
843 ret = open_index(viewer_stream);
844 if (ret < 0) {
845 goto error;
846 }
847
848 ret = 0;
849
850 end:
851 error:
852 return ret;
853 }
854
855 /*
856 * Send the viewer the list of current sessions.
857 */
858 static
859 int viewer_attach_session(struct relay_command *cmd,
860 struct lttng_ht *sessions_ht)
861 {
862 int ret, send_streams = 0;
863 uint32_t nb_streams = 0, nb_streams_ready = 0;
864 struct lttng_viewer_attach_session_request request;
865 struct lttng_viewer_attach_session_response response;
866 struct lttng_viewer_stream send_stream;
867 struct relay_stream *stream;
868 struct relay_viewer_stream *viewer_stream;
869 struct lttng_ht_node_ulong *node;
870 struct lttng_ht_node_u64 *node64;
871 struct lttng_ht_iter iter;
872 struct relay_session *session;
873 int seek_last = 0;
874
875 assert(cmd);
876 assert(sessions_ht);
877
878 DBG("Attach session received");
879
880 if (cmd->version_check_done == 0) {
881 ERR("Trying to attach session before version check");
882 ret = -1;
883 goto end_no_session;
884 }
885
886 health_code_update();
887
888 ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0);
889 if (ret < 0 || ret != sizeof(request)) {
890 if (ret == 0) {
891 /* Orderly shutdown. Not necessary to print an error. */
892 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
893 } else {
894 ERR("Relay failed to receive the attach parameters.");
895 }
896 ret = -1;
897 goto error;
898 }
899
900 health_code_update();
901
902 rcu_read_lock();
903 lttng_ht_lookup(sessions_ht,
904 (void *)((unsigned long) be64toh(request.session_id)), &iter);
905 node = lttng_ht_iter_get_node_ulong(&iter);
906 if (node == NULL) {
907 DBG("Relay session %" PRIu64 " not found",
908 be64toh(request.session_id));
909 response.status = htobe32(VIEWER_ATTACH_UNK);
910 goto send_reply;
911 }
912
913 session = caa_container_of(node, struct relay_session, session_n);
914 if (cmd->session_id == session->id) {
915 /* Same viewer already attached, just send the stream list. */
916 send_streams = 1;
917 response.status = htobe32(VIEWER_ATTACH_OK);
918 } else if (session->viewer_attached != 0) {
919 DBG("Already a viewer attached");
920 response.status = htobe32(VIEWER_ATTACH_ALREADY);
921 goto send_reply;
922 } else if (session->live_timer == 0) {
923 DBG("Not live session");
924 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
925 goto send_reply;
926 } else {
927 session->viewer_attached++;
928 send_streams = 1;
929 response.status = htobe32(VIEWER_ATTACH_OK);
930 cmd->session_id = session->id;
931 cmd->session = session;
932 }
933
934 switch (be32toh(request.seek)) {
935 case VIEWER_SEEK_BEGINNING:
936 /* Default behaviour. */
937 break;
938 case VIEWER_SEEK_LAST:
939 seek_last = 1;
940 break;
941 default:
942 ERR("Wrong seek parameter");
943 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
944 send_streams = 0;
945 goto send_reply;
946 }
947
948 if (send_streams) {
949 /* We should only be there if we have a session to attach to. */
950 assert(session);
951
952 /*
953 * Fill the viewer_streams_ht to count the number of streams
954 * ready to be sent and avoid concurrency issues on the
955 * relay_streams_ht and don't rely on a total session stream count.
956 */
957 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) {
958 struct relay_viewer_stream *vstream;
959
960 health_code_update();
961
962 node = lttng_ht_iter_get_node_ulong(&iter);
963 if (!node) {
964 continue;
965 }
966 stream = caa_container_of(node, struct relay_stream, stream_n);
967 if (stream->session != cmd->session) {
968 continue;
969 }
970 nb_streams++;
971
972 /*
973 * Don't send streams with no ctf_trace, they are not
974 * ready to be read.
975 */
976 if (!stream->ctf_trace || !stream->viewer_ready) {
977 continue;
978 }
979 nb_streams_ready++;
980
981 vstream = live_find_viewer_stream_by_id(stream->stream_handle);
982 if (!vstream) {
983 ret = init_viewer_stream(stream, seek_last);
984 if (ret < 0) {
985 goto end_unlock;
986 }
987 }
988 }
989
990 /* We must have the same amount of existing stream and ready stream. */
991 if (nb_streams != nb_streams_ready) {
992 nb_streams = 0;
993 }
994 response.streams_count = htobe32(nb_streams);
995 }
996
997 send_reply:
998 health_code_update();
999 ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0);
1000 if (ret < 0) {
1001 ERR("Relay sending viewer attach response");
1002 goto end_unlock;
1003 }
1004 health_code_update();
1005
1006 /*
1007 * Unknown or empty session, just return gracefully, the viewer knows what
1008 * is happening.
1009 */
1010 if (!send_streams || !nb_streams) {
1011 ret = 0;
1012 goto end_unlock;
1013 }
1014
1015 /* We should only be there if we have a session to attach to. */
1016 assert(session);
1017 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
1018 health_code_update();
1019
1020 node64 = lttng_ht_iter_get_node_u64(&iter);
1021 if (!node64) {
1022 continue;
1023 }
1024 viewer_stream = caa_container_of(node64, struct relay_viewer_stream,
1025 stream_n);
1026 if (viewer_stream->session_id != cmd->session->id) {
1027 continue;
1028 }
1029
1030 send_stream.id = htobe64(viewer_stream->stream_handle);
1031 send_stream.ctf_trace_id = htobe64(viewer_stream->ctf_trace->id);
1032 send_stream.metadata_flag = htobe32(viewer_stream->metadata_flag);
1033 strncpy(send_stream.path_name, viewer_stream->path_name,
1034 sizeof(send_stream.path_name));
1035 strncpy(send_stream.channel_name, viewer_stream->channel_name,
1036 sizeof(send_stream.channel_name));
1037
1038 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_stream,
1039 sizeof(send_stream), 0);
1040 if (ret < 0) {
1041 ERR("Relay sending stream %" PRIu64, viewer_stream->stream_handle);
1042 goto end_unlock;
1043 }
1044 DBG("Sent stream %" PRIu64 " to viewer", viewer_stream->stream_handle);
1045 }
1046 ret = 0;
1047
1048 end_unlock:
1049 rcu_read_unlock();
1050 end_no_session:
1051 error:
1052 return ret;
1053 }
1054
1055 /*
1056 * Get viewer stream from stream id.
1057 *
1058 * RCU read side lock MUST be acquired.
1059 */
1060 struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id)
1061 {
1062 struct lttng_ht_node_u64 *node;
1063 struct lttng_ht_iter iter;
1064 struct relay_viewer_stream *stream = NULL;
1065
1066 lttng_ht_lookup(viewer_streams_ht, &stream_id, &iter);
1067 node = lttng_ht_iter_get_node_u64(&iter);
1068 if (node == NULL) {
1069 DBG("Relay viewer stream %" PRIu64 " not found", stream_id);
1070 goto end;
1071 }
1072 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
1073
1074 end:
1075 return stream;
1076 }
1077
1078 static
1079 void deferred_free_viewer_stream(struct rcu_head *head)
1080 {
1081 struct relay_viewer_stream *stream =
1082 caa_container_of(head, struct relay_viewer_stream, rcu_node);
1083
1084 free(stream->path_name);
1085 free(stream->channel_name);
1086 free(stream);
1087 }
1088
1089 static
1090 void delete_viewer_stream(struct relay_viewer_stream *vstream)
1091 {
1092 int delret;
1093 struct lttng_ht_iter iter;
1094
1095 iter.iter.node = &vstream->stream_n.node;
1096 delret = lttng_ht_del(viewer_streams_ht, &iter);
1097 assert(!delret);
1098 }
1099
1100 static
1101 void destroy_viewer_stream(struct relay_viewer_stream *vstream)
1102 {
1103 unsigned long ret_ref;
1104 int ret;
1105
1106 assert(vstream);
1107 ret_ref = uatomic_add_return(&vstream->ctf_trace->refcount, -1);
1108 assert(ret_ref >= 0);
1109
1110 if (vstream->read_fd >= 0) {
1111 ret = close(vstream->read_fd);
1112 if (ret < 0) {
1113 PERROR("close read_fd");
1114 }
1115 }
1116 if (vstream->index_read_fd >= 0) {
1117 ret = close(vstream->index_read_fd);
1118 if (ret < 0) {
1119 PERROR("close index_read_fd");
1120 }
1121 }
1122
1123 /*
1124 * If the only stream left in the HT is the metadata stream,
1125 * we need to remove it because we won't detect a EOF for this
1126 * stream.
1127 */
1128 if (ret_ref == 1 && vstream->ctf_trace->viewer_metadata_stream) {
1129 delete_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
1130 destroy_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
1131 vstream->ctf_trace->metadata_stream = NULL;
1132 DBG("Freeing ctf_trace %" PRIu64, vstream->ctf_trace->id);
1133 /*
1134 * The streaming-side is already closed and we can't receive a new
1135 * stream concurrently at this point (since the session is being
1136 * destroyed), so when we detect the refcount equals 0, we are the
1137 * only owners of the ctf_trace and we can free it ourself.
1138 */
1139 free(vstream->ctf_trace);
1140 }
1141
1142 call_rcu(&vstream->rcu_node, deferred_free_viewer_stream);
1143 }
1144
1145 /*
1146 * Atomically check if new streams got added in the session since the last
1147 * check and reset the flag to 0.
1148 *
1149 * Returns 1 if new streams got added, 0 if nothing changed, a negative value
1150 * on error.
1151 */
1152 static
1153 int check_new_streams(uint64_t session_id, struct lttng_ht *sessions_ht)
1154 {
1155 struct lttng_ht_node_ulong *node;
1156 struct lttng_ht_iter iter;
1157 struct relay_session *session;
1158 unsigned long current_val;
1159 int ret;
1160
1161 lttng_ht_lookup(sessions_ht,
1162 (void *)((unsigned long) session_id), &iter);
1163 node = lttng_ht_iter_get_node_ulong(&iter);
1164 if (node == NULL) {
1165 DBG("Relay session %" PRIu64 " not found", session_id);
1166 ret = -1;
1167 goto error;
1168 }
1169
1170 session = caa_container_of(node, struct relay_session, session_n);
1171
1172 current_val = uatomic_cmpxchg(&session->new_streams, 1, 0);
1173 ret = current_val;
1174
1175 error:
1176 return ret;
1177 }
1178
1179 /*
1180 * Send the next index for a stream.
1181 *
1182 * Return 0 on success or else a negative value.
1183 */
1184 static
1185 int viewer_get_next_index(struct relay_command *cmd,
1186 struct lttng_ht *sessions_ht)
1187 {
1188 int ret;
1189 struct lttng_viewer_get_next_index request_index;
1190 struct lttng_viewer_index viewer_index;
1191 struct ctf_packet_index packet_index;
1192 struct relay_viewer_stream *vstream;
1193 struct relay_stream *rstream;
1194
1195 assert(cmd);
1196 assert(sessions_ht);
1197
1198 DBG("Viewer get next index");
1199
1200 if (cmd->version_check_done == 0) {
1201 ERR("Trying to request index before version check");
1202 ret = -1;
1203 goto end_no_session;
1204 }
1205
1206 health_code_update();
1207 ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index,
1208 sizeof(request_index), 0);
1209 if (ret < 0 || ret != sizeof(request_index)) {
1210 ret = -1;
1211 ERR("Relay didn't receive the whole packet");
1212 goto end;
1213 }
1214 health_code_update();
1215
1216 rcu_read_lock();
1217 vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id));
1218 if (!vstream) {
1219 ret = -1;
1220 goto end_unlock;
1221 }
1222
1223 memset(&viewer_index, 0, sizeof(viewer_index));
1224
1225 /*
1226 * The viewer should not ask for index on metadata stream.
1227 */
1228 if (vstream->metadata_flag) {
1229 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1230 goto send_reply;
1231 }
1232
1233 /* First time, we open the index file */
1234 if (vstream->index_read_fd < 0) {
1235 ret = open_index(vstream);
1236 if (ret == -ENOENT) {
1237 /*
1238 * The index is created only when the first data packet arrives, it
1239 * might not be ready at the beginning of the session
1240 */
1241 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1242 goto send_reply;
1243 } else if (ret < 0) {
1244 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1245 goto send_reply;
1246 }
1247 }
1248
1249 rstream = relay_stream_find_by_id(vstream->stream_handle);
1250 if (rstream) {
1251 if (vstream->abort_flag) {
1252 /* Rotate on abort (overwrite). */
1253 DBG("Viewer rotate because of overwrite");
1254 ret = rotate_viewer_stream(vstream, rstream);
1255 if (ret < 0) {
1256 goto end_unlock;
1257 } else if (ret == 1) {
1258 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1259 delete_viewer_stream(vstream);
1260 destroy_viewer_stream(vstream);
1261 goto send_reply;
1262 }
1263 }
1264 pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
1265 if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
1266 if (rstream->beacon_ts_end != -1ULL &&
1267 vstream->last_sent_index == rstream->total_index_received) {
1268 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1269 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1270 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1271 goto send_reply;
1272 /*
1273 * Reader and writer are working in the same tracefile, so we care
1274 * about the number of index received and sent. Otherwise, we read
1275 * up to EOF.
1276 */
1277 } else if (rstream->total_index_received <= vstream->last_sent_index
1278 && !vstream->close_write_flag) {
1279 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1280 /* No new index to send, retry later. */
1281 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1282 goto send_reply;
1283 }
1284 }
1285 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1286 } else if (!rstream && vstream->close_write_flag &&
1287 vstream->total_index_received == vstream->last_sent_index) {
1288 /* Last index sent and current tracefile closed in write */
1289 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1290 delete_viewer_stream(vstream);
1291 destroy_viewer_stream(vstream);
1292 goto send_reply;
1293 } else {
1294 vstream->close_write_flag = 1;
1295 }
1296
1297 if (!vstream->ctf_trace->metadata_received ||
1298 vstream->ctf_trace->metadata_received >
1299 vstream->ctf_trace->metadata_sent) {
1300 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1301 }
1302
1303 ret = check_new_streams(vstream->session_id, sessions_ht);
1304 if (ret < 0) {
1305 goto end_unlock;
1306 } else if (ret == 1) {
1307 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM;
1308 }
1309
1310 pthread_mutex_lock(&vstream->overwrite_lock);
1311 if (vstream->abort_flag) {
1312 /*
1313 * The file is being overwritten by the writer, we cannot
1314 * use it.
1315 */
1316 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1317 pthread_mutex_unlock(&vstream->overwrite_lock);
1318 ret = rotate_viewer_stream(vstream, rstream);
1319 if (ret < 0) {
1320 goto end_unlock;
1321 } else if (ret == 1) {
1322 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1323 delete_viewer_stream(vstream);
1324 destroy_viewer_stream(vstream);
1325 goto send_reply;
1326 }
1327 goto send_reply;
1328 }
1329 ret = lttng_read(vstream->index_read_fd, &packet_index,
1330 sizeof(packet_index));
1331 pthread_mutex_unlock(&vstream->overwrite_lock);
1332 if (ret < sizeof(packet_index)) {
1333 /*
1334 * The tracefile is closed in write, so we read up to EOF.
1335 */
1336 if (vstream->close_write_flag == 1) {
1337 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1338 /* Rotate on normal EOF */
1339 ret = rotate_viewer_stream(vstream, rstream);
1340 if (ret < 0) {
1341 goto end_unlock;
1342 } else if (ret == 1) {
1343 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1344 delete_viewer_stream(vstream);
1345 destroy_viewer_stream(vstream);
1346 goto send_reply;
1347 }
1348 } else {
1349 PERROR("Relay reading index file %d",
1350 vstream->index_read_fd);
1351 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1352 }
1353 goto send_reply;
1354 } else {
1355 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1356 vstream->last_sent_index++;
1357 }
1358
1359 /*
1360 * Indexes are stored in big endian, no need to switch before sending.
1361 */
1362 viewer_index.offset = packet_index.offset;
1363 viewer_index.packet_size = packet_index.packet_size;
1364 viewer_index.content_size = packet_index.content_size;
1365 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1366 viewer_index.timestamp_end = packet_index.timestamp_end;
1367 viewer_index.events_discarded = packet_index.events_discarded;
1368 viewer_index.stream_id = packet_index.stream_id;
1369
1370 send_reply:
1371 viewer_index.flags = htobe32(viewer_index.flags);
1372 health_code_update();
1373 ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index,
1374 sizeof(viewer_index), 0);
1375 if (ret < 0) {
1376 ERR("Relay index to viewer");
1377 goto end_unlock;
1378 }
1379 health_code_update();
1380
1381 DBG("Index %" PRIu64 "for stream %" PRIu64 "sent",
1382 vstream->last_sent_index, vstream->stream_handle);
1383
1384 end_unlock:
1385 rcu_read_unlock();
1386
1387 end_no_session:
1388 end:
1389 return ret;
1390 }
1391
1392 /*
1393 * Send the next index for a stream
1394 *
1395 * Return 0 on success or else a negative value.
1396 */
1397 static
1398 int viewer_get_packet(struct relay_command *cmd,
1399 struct lttng_ht *sessions_ht)
1400 {
1401 int ret, send_data = 0;
1402 char *data = NULL;
1403 uint32_t len = 0;
1404 ssize_t read_len;
1405 struct lttng_viewer_get_packet get_packet_info;
1406 struct lttng_viewer_trace_packet reply;
1407 struct relay_viewer_stream *stream;
1408
1409 assert(cmd);
1410
1411 DBG2("Relay get data packet");
1412
1413 if (cmd->version_check_done == 0) {
1414 ERR("Trying to get packet before version check");
1415 ret = -1;
1416 goto end;
1417 }
1418
1419 health_code_update();
1420 ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info,
1421 sizeof(get_packet_info), 0);
1422 if (ret < 0 || ret != sizeof(get_packet_info)) {
1423 ret = -1;
1424 ERR("Relay didn't receive the whole packet");
1425 goto end;
1426 }
1427 health_code_update();
1428
1429 /* From this point on, the error label can be reached. */
1430 memset(&reply, 0, sizeof(reply));
1431
1432 rcu_read_lock();
1433 stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id));
1434 if (!stream) {
1435 goto error;
1436 }
1437 assert(stream->ctf_trace);
1438
1439 /*
1440 * First time we read this stream, we need open the tracefile, we should
1441 * only arrive here if an index has already been sent to the viewer, so the
1442 * tracefile must exist, if it does not it is a fatal error.
1443 */
1444 if (stream->read_fd < 0) {
1445 char fullpath[PATH_MAX];
1446
1447 if (stream->tracefile_count > 0) {
1448 ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
1449 stream->channel_name,
1450 stream->tracefile_count_current);
1451 } else {
1452 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1453 stream->channel_name);
1454 }
1455 if (ret < 0) {
1456 goto error;
1457 }
1458 ret = open(fullpath, O_RDONLY);
1459 if (ret < 0) {
1460 PERROR("Relay opening trace file");
1461 goto error;
1462 }
1463 stream->read_fd = ret;
1464 }
1465
1466 if (!stream->ctf_trace->metadata_received ||
1467 stream->ctf_trace->metadata_received >
1468 stream->ctf_trace->metadata_sent) {
1469 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1470 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1471 goto send_reply;
1472 }
1473
1474 ret = check_new_streams(stream->session_id, sessions_ht);
1475 if (ret < 0) {
1476 goto end_unlock;
1477 } else if (ret == 1) {
1478 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1479 reply.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM;
1480 goto send_reply;
1481 }
1482
1483 len = be32toh(get_packet_info.len);
1484 data = zmalloc(len);
1485 if (!data) {
1486 PERROR("relay data zmalloc");
1487 goto error;
1488 }
1489
1490 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1491 if (ret < 0) {
1492 /*
1493 * If the read fd was closed by the streaming side, the
1494 * abort_flag will be set to 1, otherwise it is an error.
1495 */
1496 if (stream->abort_flag == 0) {
1497 PERROR("lseek");
1498 goto error;
1499 }
1500 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1501 goto send_reply;
1502 }
1503 read_len = lttng_read(stream->read_fd, data, len);
1504 if (read_len < len) {
1505 /*
1506 * If the read fd was closed by the streaming side, the
1507 * abort_flag will be set to 1, otherwise it is an error.
1508 */
1509 if (stream->abort_flag == 0) {
1510 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1511 stream->read_fd,
1512 be64toh(get_packet_info.offset));
1513 goto error;
1514 } else {
1515 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1516 goto send_reply;
1517 }
1518 }
1519 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1520 reply.len = htobe32(len);
1521 send_data = 1;
1522 goto send_reply;
1523
1524 error:
1525 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1526
1527 send_reply:
1528 reply.flags = htobe32(reply.flags);
1529
1530 health_code_update();
1531 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1532 if (ret < 0) {
1533 ERR("Relay data header to viewer");
1534 goto end_unlock;
1535 }
1536 health_code_update();
1537
1538 if (send_data) {
1539 health_code_update();
1540 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1541 if (ret < 0) {
1542 ERR("Relay send data to viewer");
1543 goto end_unlock;
1544 }
1545 health_code_update();
1546 }
1547
1548 DBG("Sent %u bytes for stream %" PRIu64, len,
1549 be64toh(get_packet_info.stream_id));
1550
1551 end_unlock:
1552 free(data);
1553 rcu_read_unlock();
1554
1555 end:
1556 return ret;
1557 }
1558
1559 /*
1560 * Send the session's metadata
1561 *
1562 * Return 0 on success else a negative value.
1563 */
1564 static
1565 int viewer_get_metadata(struct relay_command *cmd)
1566 {
1567 int ret = 0;
1568 ssize_t read_len;
1569 uint64_t len = 0;
1570 char *data = NULL;
1571 struct lttng_viewer_get_metadata request;
1572 struct lttng_viewer_metadata_packet reply;
1573 struct relay_viewer_stream *stream;
1574
1575 assert(cmd);
1576
1577 DBG("Relay get metadata");
1578
1579 if (cmd->version_check_done == 0) {
1580 ERR("Trying to get metadata before version check");
1581 ret = -1;
1582 goto end;
1583 }
1584
1585 health_code_update();
1586 ret = cmd->sock->ops->recvmsg(cmd->sock, &request,
1587 sizeof(request), 0);
1588 if (ret < 0 || ret != sizeof(request)) {
1589 ret = -1;
1590 ERR("Relay didn't receive the whole packet");
1591 goto end;
1592 }
1593 health_code_update();
1594
1595 rcu_read_lock();
1596 stream = live_find_viewer_stream_by_id(be64toh(request.stream_id));
1597 if (!stream || !stream->metadata_flag) {
1598 ERR("Invalid metadata stream");
1599 goto error;
1600 }
1601 assert(stream->ctf_trace);
1602 assert(stream->ctf_trace->metadata_sent <=
1603 stream->ctf_trace->metadata_received);
1604
1605 len = stream->ctf_trace->metadata_received -
1606 stream->ctf_trace->metadata_sent;
1607 if (len == 0) {
1608 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1609 goto send_reply;
1610 }
1611
1612 /* first time, we open the metadata file */
1613 if (stream->read_fd < 0) {
1614 char fullpath[PATH_MAX];
1615
1616 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1617 stream->channel_name);
1618 if (ret < 0) {
1619 goto error;
1620 }
1621 ret = open(fullpath, O_RDONLY);
1622 if (ret < 0) {
1623 PERROR("Relay opening metadata file");
1624 goto error;
1625 }
1626 stream->read_fd = ret;
1627 }
1628
1629 reply.len = htobe64(len);
1630 data = zmalloc(len);
1631 if (!data) {
1632 PERROR("viewer metadata zmalloc");
1633 goto error;
1634 }
1635
1636 read_len = lttng_read(stream->read_fd, data, len);
1637 if (read_len < len) {
1638 PERROR("Relay reading metadata file");
1639 goto error;
1640 }
1641 stream->ctf_trace->metadata_sent += read_len;
1642 reply.status = htobe32(VIEWER_METADATA_OK);
1643 goto send_reply;
1644
1645 error:
1646 reply.status = htobe32(VIEWER_METADATA_ERR);
1647
1648 send_reply:
1649 health_code_update();
1650 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1651 if (ret < 0) {
1652 ERR("Relay data header to viewer");
1653 goto end_unlock;
1654 }
1655 health_code_update();
1656
1657 if (len > 0) {
1658 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1659 if (ret < 0) {
1660 ERR("Relay send data to viewer");
1661 goto end_unlock;
1662 }
1663 }
1664
1665 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1666 be64toh(request.stream_id));
1667
1668 DBG("Metadata sent");
1669
1670 end_unlock:
1671 free(data);
1672 rcu_read_unlock();
1673 end:
1674 return ret;
1675 }
1676
1677 /*
1678 * live_relay_unknown_command: send -1 if received unknown command
1679 */
1680 static
1681 void live_relay_unknown_command(struct relay_command *cmd)
1682 {
1683 struct lttcomm_relayd_generic_reply reply;
1684 int ret;
1685
1686 reply.ret_code = htobe32(LTTNG_ERR_UNK);
1687 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
1688 sizeof(struct lttcomm_relayd_generic_reply), 0);
1689 if (ret < 0) {
1690 ERR("Relay sending unknown command");
1691 }
1692 }
1693
1694 /*
1695 * Process the commands received on the control socket
1696 */
1697 static
1698 int process_control(struct lttng_viewer_cmd *recv_hdr,
1699 struct relay_command *cmd, struct lttng_ht *sessions_ht)
1700 {
1701 int ret = 0;
1702
1703 switch (be32toh(recv_hdr->cmd)) {
1704 case VIEWER_CONNECT:
1705 ret = viewer_connect(cmd);
1706 break;
1707 case VIEWER_LIST_SESSIONS:
1708 ret = viewer_list_sessions(cmd, sessions_ht);
1709 break;
1710 case VIEWER_ATTACH_SESSION:
1711 ret = viewer_attach_session(cmd, sessions_ht);
1712 break;
1713 case VIEWER_GET_NEXT_INDEX:
1714 ret = viewer_get_next_index(cmd, sessions_ht);
1715 break;
1716 case VIEWER_GET_PACKET:
1717 ret = viewer_get_packet(cmd, sessions_ht);
1718 break;
1719 case VIEWER_GET_METADATA:
1720 ret = viewer_get_metadata(cmd);
1721 break;
1722 default:
1723 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1724 live_relay_unknown_command(cmd);
1725 ret = -1;
1726 goto end;
1727 }
1728
1729 end:
1730 return ret;
1731 }
1732
1733 static
1734 void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1735 {
1736 int ret;
1737
1738 assert(events);
1739
1740 lttng_poll_del(events, pollfd);
1741
1742 ret = close(pollfd);
1743 if (ret < 0) {
1744 ERR("Closing pollfd %d", pollfd);
1745 }
1746 }
1747
1748 /*
1749 * Create and add connection to the given hash table.
1750 *
1751 * Return poll add value or else -1 on error.
1752 */
1753 static
1754 int add_connection(int fd, struct lttng_poll_event *events,
1755 struct lttng_ht *relay_connections_ht)
1756 {
1757 int ret;
1758 struct relay_command *relay_connection;
1759
1760 assert(events);
1761 assert(relay_connections_ht);
1762
1763 relay_connection = zmalloc(sizeof(struct relay_command));
1764 if (relay_connection == NULL) {
1765 PERROR("Relay command zmalloc");
1766 goto error;
1767 }
1768
1769 ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
1770 if (ret < sizeof(*relay_connection)) {
1771 PERROR("read relay cmd pipe");
1772 goto error_read;
1773 }
1774
1775 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1776 (unsigned long) relay_connection->sock->fd);
1777 rcu_read_lock();
1778 lttng_ht_add_unique_ulong(relay_connections_ht,
1779 &relay_connection->sock_n);
1780 rcu_read_unlock();
1781
1782 return lttng_poll_add(events, relay_connection->sock->fd,
1783 LPOLLIN | LPOLLRDHUP);
1784
1785 error_read:
1786 free(relay_connection);
1787 error:
1788 return -1;
1789 }
1790
1791 static
1792 void deferred_free_connection(struct rcu_head *head)
1793 {
1794 struct relay_command *relay_connection =
1795 caa_container_of(head, struct relay_command, rcu_node);
1796
1797 if (relay_connection->session &&
1798 relay_connection->session->viewer_attached > 0) {
1799 relay_connection->session->viewer_attached--;
1800 }
1801 lttcomm_destroy_sock(relay_connection->sock);
1802 free(relay_connection);
1803 }
1804
1805 /*
1806 * Delete all streams for a specific session ID.
1807 */
1808 static
1809 void viewer_del_streams(uint64_t session_id)
1810 {
1811 struct relay_viewer_stream *stream;
1812 struct lttng_ht_iter iter;
1813
1814 rcu_read_lock();
1815 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream,
1816 stream_n.node) {
1817 health_code_update();
1818
1819 if (stream->session_id != session_id) {
1820 continue;
1821 }
1822
1823 delete_viewer_stream(stream);
1824 assert(stream->ctf_trace);
1825
1826 if (stream->metadata_flag) {
1827 /*
1828 * The metadata viewer stream is destroyed once the refcount on the
1829 * ctf trace goes to 0 in the destroy stream function thus there is
1830 * no explicit call to that function here.
1831 */
1832 stream->ctf_trace->metadata_sent = 0;
1833 stream->ctf_trace->viewer_metadata_stream = NULL;
1834 } else {
1835 destroy_viewer_stream(stream);
1836 }
1837 }
1838 rcu_read_unlock();
1839 }
1840
1841 /*
1842 * Delete and free a connection.
1843 *
1844 * RCU read side lock MUST be acquired.
1845 */
1846 static
1847 void del_connection(struct lttng_ht *relay_connections_ht,
1848 struct lttng_ht_iter *iter, struct relay_command *relay_connection)
1849 {
1850 int ret;
1851
1852 assert(relay_connections_ht);
1853 assert(iter);
1854 assert(relay_connection);
1855
1856 DBG("Cleaning connection of session ID %" PRIu64,
1857 relay_connection->session_id);
1858
1859 ret = lttng_ht_del(relay_connections_ht, iter);
1860 assert(!ret);
1861
1862 viewer_del_streams(relay_connection->session_id);
1863
1864 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1865 }
1866
1867 /*
1868 * This thread does the actual work
1869 */
1870 static
1871 void *thread_worker(void *data)
1872 {
1873 int ret, err = -1;
1874 uint32_t nb_fd;
1875 struct relay_command *relay_connection;
1876 struct lttng_poll_event events;
1877 struct lttng_ht *relay_connections_ht;
1878 struct lttng_ht_node_ulong *node;
1879 struct lttng_ht_iter iter;
1880 struct lttng_viewer_cmd recv_hdr;
1881 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1882 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
1883
1884 DBG("[thread] Live viewer relay worker started");
1885
1886 rcu_register_thread();
1887
1888 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1889
1890 if (testpoint(relayd_thread_live_worker)) {
1891 goto error_testpoint;
1892 }
1893
1894 /* table of connections indexed on socket */
1895 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1896 if (!relay_connections_ht) {
1897 goto relay_connections_ht_error;
1898 }
1899
1900 ret = create_thread_poll_set(&events, 2);
1901 if (ret < 0) {
1902 goto error_poll_create;
1903 }
1904
1905 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1906 if (ret < 0) {
1907 goto error;
1908 }
1909
1910 restart:
1911 while (1) {
1912 int i;
1913
1914 health_code_update();
1915
1916 /* Infinite blocking call, waiting for transmission */
1917 DBG3("Relayd live viewer worker thread polling...");
1918 health_poll_entry();
1919 ret = lttng_poll_wait(&events, -1);
1920 health_poll_exit();
1921 if (ret < 0) {
1922 /*
1923 * Restart interrupted system call.
1924 */
1925 if (errno == EINTR) {
1926 goto restart;
1927 }
1928 goto error;
1929 }
1930
1931 nb_fd = ret;
1932
1933 /*
1934 * Process control. The control connection is prioritised so we don't
1935 * starve it with high throughput tracing data on the data
1936 * connection.
1937 */
1938 for (i = 0; i < nb_fd; i++) {
1939 /* Fetch once the poll data */
1940 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1941 int pollfd = LTTNG_POLL_GETFD(&events, i);
1942
1943 health_code_update();
1944
1945 /* Thread quit pipe has been closed. Killing thread. */
1946 ret = check_thread_quit_pipe(pollfd, revents);
1947 if (ret) {
1948 err = 0;
1949 goto exit;
1950 }
1951
1952 /* Inspect the relay cmd pipe for new connection */
1953 if (pollfd == live_relay_cmd_pipe[0]) {
1954 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1955 ERR("Relay live pipe error");
1956 goto error;
1957 } else if (revents & LPOLLIN) {
1958 DBG("Relay live viewer command received");
1959 ret = add_connection(live_relay_cmd_pipe[0],
1960 &events, relay_connections_ht);
1961 if (ret < 0) {
1962 goto error;
1963 }
1964 }
1965 } else if (revents) {
1966 rcu_read_lock();
1967 lttng_ht_lookup(relay_connections_ht,
1968 (void *)((unsigned long) pollfd), &iter);
1969 node = lttng_ht_iter_get_node_ulong(&iter);
1970 if (node == NULL) {
1971 DBG2("Relay viewer sock %d not found", pollfd);
1972 rcu_read_unlock();
1973 goto error;
1974 }
1975 relay_connection = caa_container_of(node, struct relay_command,
1976 sock_n);
1977
1978 if (revents & (LPOLLERR)) {
1979 cleanup_poll_connection(&events, pollfd);
1980 del_connection(relay_connections_ht, &iter,
1981 relay_connection);
1982 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1983 DBG("Viewer socket %d hung up", pollfd);
1984 cleanup_poll_connection(&events, pollfd);
1985 del_connection(relay_connections_ht, &iter,
1986 relay_connection);
1987 } else if (revents & LPOLLIN) {
1988 ret = relay_connection->sock->ops->recvmsg(
1989 relay_connection->sock, &recv_hdr,
1990 sizeof(struct lttng_viewer_cmd),
1991 0);
1992 /* connection closed */
1993 if (ret <= 0) {
1994 cleanup_poll_connection(&events, pollfd);
1995 del_connection(relay_connections_ht, &iter,
1996 relay_connection);
1997 DBG("Viewer control connection closed with %d",
1998 pollfd);
1999 } else {
2000 if (relay_connection->session) {
2001 DBG2("Relay viewer worker receiving data for "
2002 "session: %" PRIu64,
2003 relay_connection->session->id);
2004 }
2005 ret = process_control(&recv_hdr, relay_connection,
2006 sessions_ht);
2007 if (ret < 0) {
2008 /* Clear the session on error. */
2009 cleanup_poll_connection(&events, pollfd);
2010 del_connection(relay_connections_ht, &iter,
2011 relay_connection);
2012 DBG("Viewer connection closed with %d", pollfd);
2013 }
2014 }
2015 }
2016 rcu_read_unlock();
2017 }
2018 }
2019 }
2020
2021 exit:
2022 error:
2023 lttng_poll_clean(&events);
2024
2025 /* empty the hash table and free the memory */
2026 rcu_read_lock();
2027 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
2028 health_code_update();
2029
2030 node = lttng_ht_iter_get_node_ulong(&iter);
2031 if (!node) {
2032 continue;
2033 }
2034
2035 relay_connection = caa_container_of(node, struct relay_command,
2036 sock_n);
2037 del_connection(relay_connections_ht, &iter, relay_connection);
2038 }
2039 rcu_read_unlock();
2040 error_poll_create:
2041 lttng_ht_destroy(relay_connections_ht);
2042 relay_connections_ht_error:
2043 /* Close relay cmd pipes */
2044 utils_close_pipe(live_relay_cmd_pipe);
2045 if (err) {
2046 DBG("Viewer worker thread exited with error");
2047 }
2048 DBG("Viewer worker thread cleanup complete");
2049 error_testpoint:
2050 if (err) {
2051 health_error();
2052 ERR("Health error occurred in %s", __func__);
2053 }
2054 health_unregister(health_relayd);
2055 stop_threads();
2056 rcu_unregister_thread();
2057 return NULL;
2058 }
2059
2060 /*
2061 * Create the relay command pipe to wake thread_manage_apps.
2062 * Closed in cleanup().
2063 */
2064 static int create_relay_cmd_pipe(void)
2065 {
2066 int ret;
2067
2068 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
2069
2070 return ret;
2071 }
2072
2073 void live_stop_threads(void)
2074 {
2075 int ret;
2076 void *status;
2077
2078 stop_threads();
2079
2080 ret = pthread_join(live_listener_thread, &status);
2081 if (ret != 0) {
2082 PERROR("pthread_join live listener");
2083 goto error; /* join error, exit without cleanup */
2084 }
2085
2086 ret = pthread_join(live_worker_thread, &status);
2087 if (ret != 0) {
2088 PERROR("pthread_join live worker");
2089 goto error; /* join error, exit without cleanup */
2090 }
2091
2092 ret = pthread_join(live_dispatcher_thread, &status);
2093 if (ret != 0) {
2094 PERROR("pthread_join live dispatcher");
2095 goto error; /* join error, exit without cleanup */
2096 }
2097
2098 cleanup();
2099
2100 error:
2101 return;
2102 }
2103
2104 /*
2105 * main
2106 */
2107 int live_start_threads(struct lttng_uri *uri,
2108 struct relay_local_data *relay_ctx)
2109 {
2110 int ret = 0;
2111 void *status;
2112 int is_root;
2113
2114 assert(uri);
2115 live_uri = uri;
2116
2117 /* Check if daemon is UID = 0 */
2118 is_root = !getuid();
2119
2120 if (!is_root) {
2121 if (live_uri->port < 1024) {
2122 ERR("Need to be root to use ports < 1024");
2123 ret = -1;
2124 goto exit;
2125 }
2126 }
2127
2128 /* Setup the thread apps communication pipe. */
2129 if ((ret = create_relay_cmd_pipe()) < 0) {
2130 goto exit;
2131 }
2132
2133 /* Init relay command queue. */
2134 cds_wfq_init(&viewer_cmd_queue.queue);
2135
2136 /* Set up max poll set size */
2137 lttng_poll_set_max_size();
2138
2139 /* Setup the dispatcher thread */
2140 ret = pthread_create(&live_dispatcher_thread, NULL,
2141 thread_dispatcher, (void *) NULL);
2142 if (ret != 0) {
2143 PERROR("pthread_create viewer dispatcher");
2144 goto exit_dispatcher;
2145 }
2146
2147 /* Setup the worker thread */
2148 ret = pthread_create(&live_worker_thread, NULL,
2149 thread_worker, relay_ctx);
2150 if (ret != 0) {
2151 PERROR("pthread_create viewer worker");
2152 goto exit_worker;
2153 }
2154
2155 /* Setup the listener thread */
2156 ret = pthread_create(&live_listener_thread, NULL,
2157 thread_listener, (void *) NULL);
2158 if (ret != 0) {
2159 PERROR("pthread_create viewer listener");
2160 goto exit_listener;
2161 }
2162
2163 ret = 0;
2164 goto end;
2165
2166 exit_listener:
2167 ret = pthread_join(live_listener_thread, &status);
2168 if (ret != 0) {
2169 PERROR("pthread_join live listener");
2170 goto error; /* join error, exit without cleanup */
2171 }
2172
2173 exit_worker:
2174 ret = pthread_join(live_worker_thread, &status);
2175 if (ret != 0) {
2176 PERROR("pthread_join live worker");
2177 goto error; /* join error, exit without cleanup */
2178 }
2179
2180 exit_dispatcher:
2181 ret = pthread_join(live_dispatcher_thread, &status);
2182 if (ret != 0) {
2183 PERROR("pthread_join live dispatcher");
2184 goto error; /* join error, exit without cleanup */
2185 }
2186
2187 exit:
2188 cleanup();
2189
2190 end:
2191 error:
2192 return ret;
2193 }
This page took 0.131618 seconds and 6 git commands to generate.