relayd: use same pipe for live and main
[lttng-tools.git] / src / bin / lttng-relayd / live.c
1 /*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #define _GNU_SOURCE
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <pthread.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/mount.h>
30 #include <sys/resource.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <inttypes.h>
36 #include <urcu/futex.h>
37 #include <urcu/uatomic.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <config.h>
41
42 #include <lttng/lttng.h>
43 #include <common/common.h>
44 #include <common/compat/poll.h>
45 #include <common/compat/socket.h>
46 #include <common/defaults.h>
47 #include <common/futex.h>
48 #include <common/sessiond-comm/sessiond-comm.h>
49 #include <common/sessiond-comm/inet.h>
50 #include <common/sessiond-comm/relayd.h>
51 #include <common/uri.h>
52 #include <common/utils.h>
53
54 #include "cmd.h"
55 #include "live.h"
56 #include "lttng-relayd.h"
57 #include "lttng-viewer.h"
58 #include "utils.h"
59 #include "health-relayd.h"
60
61 static struct lttng_uri *live_uri;
62
63 /*
64 * This pipe is used to inform the worker thread that a command is queued and
65 * ready to be processed.
66 */
67 static int live_relay_cmd_pipe[2] = { -1, -1 };
68
69 /* Shared between threads */
70 static int live_dispatch_thread_exit;
71
72 static pthread_t live_listener_thread;
73 static pthread_t live_dispatcher_thread;
74 static pthread_t live_worker_thread;
75
76 /*
77 * Relay command queue.
78 *
79 * The live_thread_listener and live_thread_dispatcher communicate with this
80 * queue.
81 */
82 static struct relay_cmd_queue viewer_cmd_queue;
83
84 static uint64_t last_relay_viewer_session_id;
85
86 /*
87 * Cleanup the daemon
88 */
89 static
90 void cleanup(void)
91 {
92 DBG("Cleaning up");
93
94 free(live_uri);
95 }
96
97 /*
98 * Write to writable pipe used to notify a thread.
99 */
100 static
101 int notify_thread_pipe(int wpipe)
102 {
103 ssize_t ret;
104
105 ret = lttng_write(wpipe, "!", 1);
106 if (ret < 1) {
107 PERROR("write poll pipe");
108 }
109
110 return (int) ret;
111 }
112
113 /*
114 * Stop all threads by closing the thread quit pipe.
115 */
116 static
117 void stop_threads(void)
118 {
119 int ret;
120
121 /* Stopping all threads */
122 DBG("Terminating all live threads");
123 ret = notify_thread_pipe(thread_quit_pipe[1]);
124 if (ret < 0) {
125 ERR("write error on thread quit pipe");
126 }
127
128 /* Dispatch thread */
129 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
130 futex_nto1_wake(&viewer_cmd_queue.futex);
131 }
132
133 /*
134 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
135 */
136 static
137 int create_thread_poll_set(struct lttng_poll_event *events, int size)
138 {
139 int ret;
140
141 if (events == NULL || size == 0) {
142 ret = -1;
143 goto error;
144 }
145
146 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
147 if (ret < 0) {
148 goto error;
149 }
150
151 /* Add quit pipe */
152 ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN | LPOLLERR);
153 if (ret < 0) {
154 goto error;
155 }
156
157 return 0;
158
159 error:
160 return ret;
161 }
162
163 /*
164 * Check if the thread quit pipe was triggered.
165 *
166 * Return 1 if it was triggered else 0;
167 */
168 static
169 int check_thread_quit_pipe(int fd, uint32_t events)
170 {
171 if (fd == thread_quit_pipe[0] && (events & LPOLLIN)) {
172 return 1;
173 }
174
175 return 0;
176 }
177
178 /*
179 * Create and init socket from uri.
180 */
181 static
182 struct lttcomm_sock *init_socket(struct lttng_uri *uri)
183 {
184 int ret;
185 struct lttcomm_sock *sock = NULL;
186
187 sock = lttcomm_alloc_sock_from_uri(uri);
188 if (sock == NULL) {
189 ERR("Allocating socket");
190 goto error;
191 }
192
193 ret = lttcomm_create_sock(sock);
194 if (ret < 0) {
195 goto error;
196 }
197 DBG("Listening on sock %d for live", sock->fd);
198
199 ret = sock->ops->bind(sock);
200 if (ret < 0) {
201 goto error;
202 }
203
204 ret = sock->ops->listen(sock, -1);
205 if (ret < 0) {
206 goto error;
207
208 }
209
210 return sock;
211
212 error:
213 if (sock) {
214 lttcomm_destroy_sock(sock);
215 }
216 return NULL;
217 }
218
219 /*
220 * This thread manages the listening for new connections on the network
221 */
222 static
223 void *thread_listener(void *data)
224 {
225 int i, ret, pollfd, err = -1;
226 int val = 1;
227 uint32_t revents, nb_fd;
228 struct lttng_poll_event events;
229 struct lttcomm_sock *live_control_sock;
230
231 DBG("[thread] Relay live listener started");
232
233 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
234
235 health_code_update();
236
237 live_control_sock = init_socket(live_uri);
238 if (!live_control_sock) {
239 goto error_sock_control;
240 }
241
242 /* Pass 2 as size here for the thread quit pipe and control sockets. */
243 ret = create_thread_poll_set(&events, 2);
244 if (ret < 0) {
245 goto error_create_poll;
246 }
247
248 /* Add the control socket */
249 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
250 if (ret < 0) {
251 goto error_poll_add;
252 }
253
254 lttng_relay_notify_ready();
255
256 while (1) {
257 health_code_update();
258
259 DBG("Listener accepting live viewers connections");
260
261 restart:
262 health_poll_entry();
263 ret = lttng_poll_wait(&events, -1);
264 health_poll_exit();
265 if (ret < 0) {
266 /*
267 * Restart interrupted system call.
268 */
269 if (errno == EINTR) {
270 goto restart;
271 }
272 goto error;
273 }
274 nb_fd = ret;
275
276 DBG("Relay new viewer connection received");
277 for (i = 0; i < nb_fd; i++) {
278 health_code_update();
279
280 /* Fetch once the poll data */
281 revents = LTTNG_POLL_GETEV(&events, i);
282 pollfd = LTTNG_POLL_GETFD(&events, i);
283
284 /* Thread quit pipe has been closed. Killing thread. */
285 ret = check_thread_quit_pipe(pollfd, revents);
286 if (ret) {
287 err = 0;
288 goto exit;
289 }
290
291 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
292 ERR("socket poll error");
293 goto error;
294 } else if (revents & LPOLLIN) {
295 /*
296 * Get allocated in this thread, enqueued to a global queue,
297 * dequeued and freed in the worker thread.
298 */
299 struct relay_command *relay_cmd;
300 struct lttcomm_sock *newsock;
301
302 relay_cmd = zmalloc(sizeof(*relay_cmd));
303 if (!relay_cmd) {
304 PERROR("relay command zmalloc");
305 goto error;
306 }
307
308 assert(pollfd == live_control_sock->fd);
309 newsock = live_control_sock->ops->accept(live_control_sock);
310 if (!newsock) {
311 PERROR("accepting control sock");
312 free(relay_cmd);
313 goto error;
314 }
315 DBG("Relay viewer connection accepted socket %d", newsock->fd);
316 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
317 sizeof(int));
318 if (ret < 0) {
319 PERROR("setsockopt inet");
320 lttcomm_destroy_sock(newsock);
321 free(relay_cmd);
322 goto error;
323 }
324 relay_cmd->sock = newsock;
325
326 /*
327 * Lock free enqueue the request.
328 */
329 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
330
331 /*
332 * Wake the dispatch queue futex. Implicit memory
333 * barrier with the exchange in cds_wfq_enqueue.
334 */
335 futex_nto1_wake(&viewer_cmd_queue.futex);
336 }
337 }
338 }
339
340 exit:
341 error:
342 error_poll_add:
343 lttng_poll_clean(&events);
344 error_create_poll:
345 if (live_control_sock->fd >= 0) {
346 ret = live_control_sock->ops->close(live_control_sock);
347 if (ret) {
348 PERROR("close");
349 }
350 }
351 lttcomm_destroy_sock(live_control_sock);
352 error_sock_control:
353 if (err) {
354 health_error();
355 DBG("Live viewer listener thread exited with error");
356 }
357 health_unregister(health_relayd);
358 DBG("Live viewer listener thread cleanup complete");
359 stop_threads();
360 return NULL;
361 }
362
363 /*
364 * This thread manages the dispatching of the requests to worker threads
365 */
366 static
367 void *thread_dispatcher(void *data)
368 {
369 int err = -1;
370 ssize_t ret;
371 struct cds_wfq_node *node;
372 struct relay_command *relay_cmd = NULL;
373
374 DBG("[thread] Live viewer relay dispatcher started");
375
376 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
377
378 health_code_update();
379
380 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
381 health_code_update();
382
383 /* Atomically prepare the queue futex */
384 futex_nto1_prepare(&viewer_cmd_queue.futex);
385
386 do {
387 health_code_update();
388
389 /* Dequeue commands */
390 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
391 if (node == NULL) {
392 DBG("Woken up but nothing in the live-viewer "
393 "relay command queue");
394 /* Continue thread execution */
395 break;
396 }
397
398 relay_cmd = caa_container_of(node, struct relay_command, node);
399 DBG("Dispatching viewer request waiting on sock %d",
400 relay_cmd->sock->fd);
401
402 /*
403 * Inform worker thread of the new request. This call is blocking
404 * so we can be assured that the data will be read at some point in
405 * time or wait to the end of the world :)
406 */
407 ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
408 sizeof(*relay_cmd));
409 free(relay_cmd);
410 if (ret < sizeof(struct relay_command)) {
411 PERROR("write cmd pipe");
412 goto error;
413 }
414 } while (node != NULL);
415
416 /* Futex wait on queue. Blocking call on futex() */
417 health_poll_entry();
418 futex_nto1_wait(&viewer_cmd_queue.futex);
419 health_poll_exit();
420 }
421
422 /* Normal exit, no error */
423 err = 0;
424
425 error:
426 if (err) {
427 health_error();
428 ERR("Health error occurred in %s", __func__);
429 }
430 health_unregister(health_relayd);
431 DBG("Live viewer dispatch thread dying");
432 stop_threads();
433 return NULL;
434 }
435
436 /*
437 * Establish connection with the viewer and check the versions.
438 *
439 * Return 0 on success or else negative value.
440 */
441 static
442 int viewer_connect(struct relay_command *cmd)
443 {
444 int ret;
445 struct lttng_viewer_connect reply, msg;
446
447 assert(cmd);
448
449 cmd->version_check_done = 1;
450
451 health_code_update();
452
453 /* Get version from the other side. */
454 ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0);
455 if (ret < 0 || ret != sizeof(msg)) {
456 if (ret == 0) {
457 /* Orderly shutdown. Not necessary to print an error. */
458 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
459 } else {
460 ERR("Relay failed to receive the version values.");
461 }
462 ret = -1;
463 goto end;
464 }
465
466 health_code_update();
467
468 reply.major = RELAYD_VERSION_COMM_MAJOR;
469 reply.minor = RELAYD_VERSION_COMM_MINOR;
470
471 /* Major versions must be the same */
472 if (reply.major != be32toh(msg.major)) {
473 DBG("Incompatible major versions (%u vs %u)", reply.major,
474 be32toh(msg.major));
475 ret = -1;
476 goto end;
477 }
478
479 cmd->major = reply.major;
480 /* We adapt to the lowest compatible version */
481 if (reply.minor <= be32toh(msg.minor)) {
482 cmd->minor = reply.minor;
483 } else {
484 cmd->minor = be32toh(msg.minor);
485 }
486
487 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
488 cmd->type = RELAY_VIEWER_COMMAND;
489 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
490 cmd->type = RELAY_VIEWER_NOTIFICATION;
491 } else {
492 ERR("Unknown connection type : %u", be32toh(msg.type));
493 ret = -1;
494 goto end;
495 }
496
497 reply.major = htobe32(reply.major);
498 reply.minor = htobe32(reply.minor);
499 if (cmd->type == RELAY_VIEWER_COMMAND) {
500 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
501 }
502
503 health_code_update();
504
505 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
506 sizeof(struct lttng_viewer_connect), 0);
507 if (ret < 0) {
508 ERR("Relay sending version");
509 }
510
511 health_code_update();
512
513 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
514 ret = 0;
515
516 end:
517 return ret;
518 }
519
520 /*
521 * Send the viewer the list of current sessions.
522 *
523 * Return 0 on success or else a negative value.
524 */
525 static
526 int viewer_list_sessions(struct relay_command *cmd,
527 struct lttng_ht *sessions_ht)
528 {
529 int ret;
530 struct lttng_viewer_list_sessions session_list;
531 unsigned long count;
532 long approx_before, approx_after;
533 struct lttng_ht_node_ulong *node;
534 struct lttng_ht_iter iter;
535 struct lttng_viewer_session send_session;
536 struct relay_session *session;
537
538 DBG("List sessions received");
539
540 if (cmd->version_check_done == 0) {
541 ERR("Trying to list sessions before version check");
542 ret = -1;
543 goto end_no_session;
544 }
545
546 rcu_read_lock();
547 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
548 session_list.sessions_count = htobe32(count);
549
550 health_code_update();
551
552 ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list,
553 sizeof(session_list), 0);
554 if (ret < 0) {
555 ERR("Relay sending sessions list");
556 goto end_unlock;
557 }
558
559 health_code_update();
560
561 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) {
562 health_code_update();
563
564 node = lttng_ht_iter_get_node_ulong(&iter);
565 if (!node) {
566 goto end_unlock;
567 }
568 session = caa_container_of(node, struct relay_session, session_n);
569
570 strncpy(send_session.session_name, session->session_name,
571 sizeof(send_session.session_name));
572 strncpy(send_session.hostname, session->hostname,
573 sizeof(send_session.hostname));
574 send_session.id = htobe64(session->id);
575 send_session.live_timer = htobe32(session->live_timer);
576 send_session.clients = htobe32(session->viewer_attached);
577 send_session.streams = htobe32(session->stream_count);
578
579 health_code_update();
580
581 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session,
582 sizeof(send_session), 0);
583 if (ret < 0) {
584 ERR("Relay sending session info");
585 goto end_unlock;
586 }
587 }
588 health_code_update();
589
590 rcu_read_unlock();
591 ret = 0;
592 goto end;
593
594 end_unlock:
595 rcu_read_unlock();
596
597 end:
598 end_no_session:
599 return ret;
600 }
601
602 /*
603 * Open index file using a given viewer stream.
604 *
605 * Return 0 on success or else a negative value.
606 */
607 static int open_index(struct relay_viewer_stream *stream)
608 {
609 int ret;
610 char fullpath[PATH_MAX];
611 struct ctf_packet_index_file_hdr hdr;
612
613 if (stream->tracefile_count > 0) {
614 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%"
615 PRIu64 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
616 stream->channel_name, stream->tracefile_count_current);
617 } else {
618 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s"
619 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
620 stream->channel_name);
621 }
622 if (ret < 0) {
623 PERROR("snprintf index path");
624 goto error;
625 }
626
627 DBG("Opening index file %s in read only", fullpath);
628 ret = open(fullpath, O_RDONLY);
629 if (ret < 0) {
630 if (errno == ENOENT) {
631 ret = -ENOENT;
632 goto error;
633 } else {
634 PERROR("opening index in read-only");
635 }
636 goto error;
637 }
638 stream->index_read_fd = ret;
639 DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
640
641 ret = lttng_read(stream->index_read_fd, &hdr, sizeof(hdr));
642 if (ret < sizeof(hdr)) {
643 PERROR("Reading index header");
644 goto error;
645 }
646 if (be32toh(hdr.magic) != CTF_INDEX_MAGIC) {
647 ERR("Invalid header magic");
648 ret = -1;
649 goto error;
650 }
651 if (be32toh(hdr.index_major) != CTF_INDEX_MAJOR ||
652 be32toh(hdr.index_minor) != CTF_INDEX_MINOR) {
653 ERR("Invalid header version");
654 ret = -1;
655 goto error;
656 }
657 ret = 0;
658
659 error:
660 return ret;
661 }
662
663 /*
664 * Allocate and init a new viewer_stream.
665 *
666 * Copies the values from the stream passed in parameter and insert the new
667 * stream in the viewer_streams_ht.
668 *
669 * MUST be called with rcu_read_lock held.
670 *
671 * Returns 0 on success or a negative value on error.
672 */
673 static
674 int init_viewer_stream(struct relay_stream *stream, int seek_last)
675 {
676 int ret;
677 struct relay_viewer_stream *viewer_stream;
678
679 assert(stream);
680
681 viewer_stream = zmalloc(sizeof(*viewer_stream));
682 if (!viewer_stream) {
683 PERROR("relay viewer stream zmalloc");
684 ret = -1;
685 goto error;
686 }
687 viewer_stream->session_id = stream->session->id;
688 viewer_stream->stream_handle = stream->stream_handle;
689 viewer_stream->path_name = strndup(stream->path_name,
690 LTTNG_VIEWER_PATH_MAX);
691 viewer_stream->channel_name = strndup(stream->channel_name,
692 LTTNG_VIEWER_NAME_MAX);
693 viewer_stream->tracefile_count = stream->tracefile_count;
694 viewer_stream->metadata_flag = stream->metadata_flag;
695 viewer_stream->tracefile_count_last = -1ULL;
696 if (seek_last) {
697 viewer_stream->tracefile_count_current =
698 stream->tracefile_count_current;
699 } else {
700 viewer_stream->tracefile_count_current =
701 stream->oldest_tracefile_id;
702 }
703
704 viewer_stream->ctf_trace = stream->ctf_trace;
705 if (viewer_stream->metadata_flag) {
706 viewer_stream->ctf_trace->viewer_metadata_stream =
707 viewer_stream;
708 }
709 uatomic_inc(&viewer_stream->ctf_trace->refcount);
710
711 lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
712 lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
713
714 viewer_stream->index_read_fd = -1;
715 viewer_stream->read_fd = -1;
716
717 /*
718 * This is to avoid a race between the initialization of this object and
719 * the close of the given stream. If the stream is unable to find this
720 * viewer stream when closing, this copy will at least take the latest
721 * value.
722 * We also need that for the seek_last.
723 */
724 viewer_stream->total_index_received = stream->total_index_received;
725
726 /*
727 * If we never received an index for the current stream, delay
728 * the opening of the index, otherwise open it right now.
729 */
730 if (viewer_stream->tracefile_count_current ==
731 stream->tracefile_count_current &&
732 viewer_stream->total_index_received == 0) {
733 viewer_stream->index_read_fd = -1;
734 } else {
735 ret = open_index(viewer_stream);
736 if (ret < 0) {
737 goto error;
738 }
739 }
740
741 if (seek_last && viewer_stream->index_read_fd > 0) {
742 ret = lseek(viewer_stream->index_read_fd,
743 viewer_stream->total_index_received *
744 sizeof(struct ctf_packet_index),
745 SEEK_CUR);
746 if (ret < 0) {
747 goto error;
748 }
749 viewer_stream->last_sent_index =
750 viewer_stream->total_index_received;
751 }
752
753 ret = 0;
754
755 error:
756 return ret;
757 }
758
759 /*
760 * Rotate a stream to the next tracefile.
761 *
762 * Returns 0 on success, 1 on EOF, a negative value on error.
763 */
764 static
765 int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream,
766 struct relay_stream *stream)
767 {
768 int ret;
769 uint64_t tracefile_id;
770
771 assert(viewer_stream);
772
773 tracefile_id = (viewer_stream->tracefile_count_current + 1) %
774 viewer_stream->tracefile_count;
775 /*
776 * Detect the last tracefile to open.
777 */
778 if (viewer_stream->tracefile_count_last != -1ULL &&
779 viewer_stream->tracefile_count_last ==
780 viewer_stream->tracefile_count_current) {
781 ret = 1;
782 goto end;
783 }
784
785 if (stream) {
786 pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
787 }
788 /*
789 * The writer and the reader are not working in the same
790 * tracefile, we can read up to EOF, we don't care about the
791 * total_index_received.
792 */
793 if (!stream || (stream->tracefile_count_current != tracefile_id)) {
794 viewer_stream->close_write_flag = 1;
795 } else {
796 /*
797 * We are opening a file that is still open in write, make
798 * sure we limit our reading to the number of indexes
799 * received.
800 */
801 viewer_stream->close_write_flag = 0;
802 if (stream) {
803 viewer_stream->total_index_received =
804 stream->total_index_received;
805 }
806 }
807 viewer_stream->tracefile_count_current = tracefile_id;
808
809 ret = close(viewer_stream->index_read_fd);
810 if (ret < 0) {
811 PERROR("close index file %d",
812 viewer_stream->index_read_fd);
813 }
814 viewer_stream->index_read_fd = -1;
815 ret = close(viewer_stream->read_fd);
816 if (ret < 0) {
817 PERROR("close tracefile %d",
818 viewer_stream->read_fd);
819 }
820 viewer_stream->read_fd = -1;
821
822 pthread_mutex_lock(&viewer_stream->overwrite_lock);
823 viewer_stream->abort_flag = 0;
824 pthread_mutex_unlock(&viewer_stream->overwrite_lock);
825
826 viewer_stream->index_read_fd = -1;
827 viewer_stream->read_fd = -1;
828
829 if (stream) {
830 pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
831 }
832 ret = open_index(viewer_stream);
833 if (ret < 0) {
834 goto error;
835 }
836
837 ret = 0;
838
839 end:
840 error:
841 return ret;
842 }
843
844 /*
845 * Send the viewer the list of current sessions.
846 */
847 static
848 int viewer_attach_session(struct relay_command *cmd,
849 struct lttng_ht *sessions_ht)
850 {
851 int ret, send_streams = 0;
852 uint32_t nb_streams = 0, nb_streams_ready = 0;
853 struct lttng_viewer_attach_session_request request;
854 struct lttng_viewer_attach_session_response response;
855 struct lttng_viewer_stream send_stream;
856 struct relay_stream *stream;
857 struct relay_viewer_stream *viewer_stream;
858 struct lttng_ht_node_ulong *node;
859 struct lttng_ht_node_u64 *node64;
860 struct lttng_ht_iter iter;
861 struct relay_session *session;
862 int seek_last = 0;
863
864 assert(cmd);
865 assert(sessions_ht);
866
867 DBG("Attach session received");
868
869 if (cmd->version_check_done == 0) {
870 ERR("Trying to attach session before version check");
871 ret = -1;
872 goto end_no_session;
873 }
874
875 health_code_update();
876
877 ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0);
878 if (ret < 0 || ret != sizeof(request)) {
879 if (ret == 0) {
880 /* Orderly shutdown. Not necessary to print an error. */
881 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
882 } else {
883 ERR("Relay failed to receive the attach parameters.");
884 }
885 ret = -1;
886 goto error;
887 }
888
889 health_code_update();
890
891 rcu_read_lock();
892 lttng_ht_lookup(sessions_ht,
893 (void *)((unsigned long) be64toh(request.session_id)), &iter);
894 node = lttng_ht_iter_get_node_ulong(&iter);
895 if (node == NULL) {
896 DBG("Relay session %" PRIu64 " not found",
897 be64toh(request.session_id));
898 response.status = htobe32(VIEWER_ATTACH_UNK);
899 goto send_reply;
900 }
901
902 session = caa_container_of(node, struct relay_session, session_n);
903 if (cmd->session_id == session->id) {
904 /* Same viewer already attached, just send the stream list. */
905 send_streams = 1;
906 response.status = htobe32(VIEWER_ATTACH_OK);
907 } else if (session->viewer_attached != 0) {
908 DBG("Already a viewer attached");
909 response.status = htobe32(VIEWER_ATTACH_ALREADY);
910 goto send_reply;
911 } else if (session->live_timer == 0) {
912 DBG("Not live session");
913 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
914 goto send_reply;
915 } else {
916 session->viewer_attached++;
917 send_streams = 1;
918 response.status = htobe32(VIEWER_ATTACH_OK);
919 cmd->session_id = session->id;
920 cmd->session = session;
921 }
922
923 switch (be32toh(request.seek)) {
924 case VIEWER_SEEK_BEGINNING:
925 /* Default behaviour. */
926 break;
927 case VIEWER_SEEK_LAST:
928 seek_last = 1;
929 break;
930 default:
931 ERR("Wrong seek parameter");
932 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
933 send_streams = 0;
934 goto send_reply;
935 }
936
937 if (send_streams) {
938 /* We should only be there if we have a session to attach to. */
939 assert(session);
940
941 /*
942 * Fill the viewer_streams_ht to count the number of streams
943 * ready to be sent and avoid concurrency issues on the
944 * relay_streams_ht and don't rely on a total session stream count.
945 */
946 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) {
947 struct relay_viewer_stream *vstream;
948
949 health_code_update();
950
951 node = lttng_ht_iter_get_node_ulong(&iter);
952 if (!node) {
953 continue;
954 }
955 stream = caa_container_of(node, struct relay_stream, stream_n);
956 if (stream->session != cmd->session) {
957 continue;
958 }
959 nb_streams++;
960
961 /*
962 * Don't send streams with no ctf_trace, they are not
963 * ready to be read.
964 */
965 if (!stream->ctf_trace || !stream->viewer_ready) {
966 continue;
967 }
968 nb_streams_ready++;
969
970 vstream = live_find_viewer_stream_by_id(stream->stream_handle);
971 if (!vstream) {
972 ret = init_viewer_stream(stream, seek_last);
973 if (ret < 0) {
974 goto end_unlock;
975 }
976 }
977 }
978
979 /* We must have the same amount of existing stream and ready stream. */
980 if (nb_streams != nb_streams_ready) {
981 nb_streams = 0;
982 }
983 response.streams_count = htobe32(nb_streams);
984 }
985
986 send_reply:
987 health_code_update();
988 ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0);
989 if (ret < 0) {
990 ERR("Relay sending viewer attach response");
991 goto end_unlock;
992 }
993 health_code_update();
994
995 /*
996 * Unknown or empty session, just return gracefully, the viewer knows what
997 * is happening.
998 */
999 if (!send_streams || !nb_streams) {
1000 ret = 0;
1001 goto end_unlock;
1002 }
1003
1004 /* We should only be there if we have a session to attach to. */
1005 assert(session);
1006 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
1007 health_code_update();
1008
1009 node64 = lttng_ht_iter_get_node_u64(&iter);
1010 if (!node64) {
1011 continue;
1012 }
1013 viewer_stream = caa_container_of(node64, struct relay_viewer_stream,
1014 stream_n);
1015 if (viewer_stream->session_id != cmd->session->id) {
1016 continue;
1017 }
1018
1019 send_stream.id = htobe64(viewer_stream->stream_handle);
1020 send_stream.ctf_trace_id = htobe64(viewer_stream->ctf_trace->id);
1021 send_stream.metadata_flag = htobe32(viewer_stream->metadata_flag);
1022 strncpy(send_stream.path_name, viewer_stream->path_name,
1023 sizeof(send_stream.path_name));
1024 strncpy(send_stream.channel_name, viewer_stream->channel_name,
1025 sizeof(send_stream.channel_name));
1026
1027 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_stream,
1028 sizeof(send_stream), 0);
1029 if (ret < 0) {
1030 ERR("Relay sending stream %" PRIu64, viewer_stream->stream_handle);
1031 goto end_unlock;
1032 }
1033 DBG("Sent stream %" PRIu64 " to viewer", viewer_stream->stream_handle);
1034 }
1035 ret = 0;
1036
1037 end_unlock:
1038 rcu_read_unlock();
1039 end_no_session:
1040 error:
1041 return ret;
1042 }
1043
1044 /*
1045 * Get viewer stream from stream id.
1046 *
1047 * RCU read side lock MUST be acquired.
1048 */
1049 struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id)
1050 {
1051 struct lttng_ht_node_u64 *node;
1052 struct lttng_ht_iter iter;
1053 struct relay_viewer_stream *stream = NULL;
1054
1055 lttng_ht_lookup(viewer_streams_ht, &stream_id, &iter);
1056 node = lttng_ht_iter_get_node_u64(&iter);
1057 if (node == NULL) {
1058 DBG("Relay viewer stream %" PRIu64 " not found", stream_id);
1059 goto end;
1060 }
1061 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
1062
1063 end:
1064 return stream;
1065 }
1066
1067 static
1068 void deferred_free_viewer_stream(struct rcu_head *head)
1069 {
1070 struct relay_viewer_stream *stream =
1071 caa_container_of(head, struct relay_viewer_stream, rcu_node);
1072
1073 free(stream->path_name);
1074 free(stream->channel_name);
1075 free(stream);
1076 }
1077
1078 static
1079 void delete_viewer_stream(struct relay_viewer_stream *vstream)
1080 {
1081 int delret;
1082 struct lttng_ht_iter iter;
1083
1084 iter.iter.node = &vstream->stream_n.node;
1085 delret = lttng_ht_del(viewer_streams_ht, &iter);
1086 assert(!delret);
1087 }
1088
1089 static
1090 void destroy_viewer_stream(struct relay_viewer_stream *vstream)
1091 {
1092 unsigned long ret_ref;
1093 int ret;
1094
1095 assert(vstream);
1096 ret_ref = uatomic_add_return(&vstream->ctf_trace->refcount, -1);
1097 assert(ret_ref >= 0);
1098
1099 if (vstream->read_fd >= 0) {
1100 ret = close(vstream->read_fd);
1101 if (ret < 0) {
1102 PERROR("close read_fd");
1103 }
1104 }
1105 if (vstream->index_read_fd >= 0) {
1106 ret = close(vstream->index_read_fd);
1107 if (ret < 0) {
1108 PERROR("close index_read_fd");
1109 }
1110 }
1111
1112 /*
1113 * If the only stream left in the HT is the metadata stream,
1114 * we need to remove it because we won't detect a EOF for this
1115 * stream.
1116 */
1117 if (ret_ref == 1 && vstream->ctf_trace->viewer_metadata_stream) {
1118 delete_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
1119 destroy_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
1120 vstream->ctf_trace->metadata_stream = NULL;
1121 DBG("Freeing ctf_trace %" PRIu64, vstream->ctf_trace->id);
1122 /*
1123 * The streaming-side is already closed and we can't receive a new
1124 * stream concurrently at this point (since the session is being
1125 * destroyed), so when we detect the refcount equals 0, we are the
1126 * only owners of the ctf_trace and we can free it ourself.
1127 */
1128 free(vstream->ctf_trace);
1129 }
1130
1131 call_rcu(&vstream->rcu_node, deferred_free_viewer_stream);
1132 }
1133
1134 /*
1135 * Send the next index for a stream.
1136 *
1137 * Return 0 on success or else a negative value.
1138 */
1139 static
1140 int viewer_get_next_index(struct relay_command *cmd,
1141 struct lttng_ht *sessions_ht)
1142 {
1143 int ret;
1144 struct lttng_viewer_get_next_index request_index;
1145 struct lttng_viewer_index viewer_index;
1146 struct ctf_packet_index packet_index;
1147 struct relay_viewer_stream *vstream;
1148 struct relay_stream *rstream;
1149
1150 assert(cmd);
1151 assert(sessions_ht);
1152
1153 DBG("Viewer get next index");
1154
1155 if (cmd->version_check_done == 0) {
1156 ERR("Trying to request index before version check");
1157 ret = -1;
1158 goto end_no_session;
1159 }
1160
1161 health_code_update();
1162 ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index,
1163 sizeof(request_index), 0);
1164 if (ret < 0 || ret != sizeof(request_index)) {
1165 ret = -1;
1166 ERR("Relay didn't receive the whole packet");
1167 goto end;
1168 }
1169 health_code_update();
1170
1171 rcu_read_lock();
1172 vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id));
1173 if (!vstream) {
1174 ret = -1;
1175 goto end_unlock;
1176 }
1177
1178 memset(&viewer_index, 0, sizeof(viewer_index));
1179
1180 /*
1181 * The viewer should not ask for index on metadata stream.
1182 */
1183 if (vstream->metadata_flag) {
1184 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1185 goto send_reply;
1186 }
1187
1188 /* First time, we open the index file */
1189 if (vstream->index_read_fd < 0) {
1190 ret = open_index(vstream);
1191 if (ret == -ENOENT) {
1192 /*
1193 * The index is created only when the first data packet arrives, it
1194 * might not be ready at the beginning of the session
1195 */
1196 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1197 goto send_reply;
1198 } else if (ret < 0) {
1199 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1200 goto send_reply;
1201 }
1202 }
1203
1204 rstream = relay_stream_find_by_id(vstream->stream_handle);
1205 if (rstream) {
1206 if (vstream->abort_flag) {
1207 /* Rotate on abort (overwrite). */
1208 DBG("Viewer rotate because of overwrite");
1209 ret = rotate_viewer_stream(vstream, rstream);
1210 if (ret < 0) {
1211 goto end_unlock;
1212 } else if (ret == 1) {
1213 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1214 delete_viewer_stream(vstream);
1215 destroy_viewer_stream(vstream);
1216 goto send_reply;
1217 }
1218 }
1219 pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
1220 if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
1221 if (rstream->beacon_ts_end != -1ULL &&
1222 vstream->last_sent_index == rstream->total_index_received) {
1223 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1224 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1225 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1226 goto send_reply;
1227 /*
1228 * Reader and writer are working in the same tracefile, so we care
1229 * about the number of index received and sent. Otherwise, we read
1230 * up to EOF.
1231 */
1232 } else if (rstream->total_index_received <= vstream->last_sent_index
1233 && !vstream->close_write_flag) {
1234 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1235 /* No new index to send, retry later. */
1236 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1237 goto send_reply;
1238 }
1239 }
1240 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1241 } else if (!rstream && vstream->close_write_flag &&
1242 vstream->total_index_received == vstream->last_sent_index) {
1243 /* Last index sent and current tracefile closed in write */
1244 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1245 delete_viewer_stream(vstream);
1246 destroy_viewer_stream(vstream);
1247 goto send_reply;
1248 } else {
1249 vstream->close_write_flag = 1;
1250 }
1251
1252 if (!vstream->ctf_trace->metadata_received ||
1253 vstream->ctf_trace->metadata_received >
1254 vstream->ctf_trace->metadata_sent) {
1255 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1256 }
1257
1258 pthread_mutex_lock(&vstream->overwrite_lock);
1259 if (vstream->abort_flag) {
1260 /*
1261 * The file is being overwritten by the writer, we cannot
1262 * use it.
1263 */
1264 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1265 pthread_mutex_unlock(&vstream->overwrite_lock);
1266 ret = rotate_viewer_stream(vstream, rstream);
1267 if (ret < 0) {
1268 goto end_unlock;
1269 } else if (ret == 1) {
1270 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1271 delete_viewer_stream(vstream);
1272 destroy_viewer_stream(vstream);
1273 goto send_reply;
1274 }
1275 goto send_reply;
1276 }
1277 ret = lttng_read(vstream->index_read_fd, &packet_index,
1278 sizeof(packet_index));
1279 pthread_mutex_unlock(&vstream->overwrite_lock);
1280 if (ret < sizeof(packet_index)) {
1281 /*
1282 * The tracefile is closed in write, so we read up to EOF.
1283 */
1284 if (vstream->close_write_flag == 1) {
1285 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1286 /* Rotate on normal EOF */
1287 ret = rotate_viewer_stream(vstream, rstream);
1288 if (ret < 0) {
1289 goto end_unlock;
1290 } else if (ret == 1) {
1291 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1292 delete_viewer_stream(vstream);
1293 destroy_viewer_stream(vstream);
1294 goto send_reply;
1295 }
1296 } else {
1297 PERROR("Relay reading index file %d",
1298 vstream->index_read_fd);
1299 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1300 }
1301 goto send_reply;
1302 } else {
1303 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1304 vstream->last_sent_index++;
1305 }
1306
1307 /*
1308 * Indexes are stored in big endian, no need to switch before sending.
1309 */
1310 viewer_index.offset = packet_index.offset;
1311 viewer_index.packet_size = packet_index.packet_size;
1312 viewer_index.content_size = packet_index.content_size;
1313 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1314 viewer_index.timestamp_end = packet_index.timestamp_end;
1315 viewer_index.events_discarded = packet_index.events_discarded;
1316 viewer_index.stream_id = packet_index.stream_id;
1317
1318 send_reply:
1319 viewer_index.flags = htobe32(viewer_index.flags);
1320 health_code_update();
1321 ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index,
1322 sizeof(viewer_index), 0);
1323 if (ret < 0) {
1324 ERR("Relay index to viewer");
1325 goto end_unlock;
1326 }
1327 health_code_update();
1328
1329 DBG("Index %" PRIu64 "for stream %" PRIu64 "sent",
1330 vstream->last_sent_index, vstream->stream_handle);
1331
1332 end_unlock:
1333 rcu_read_unlock();
1334
1335 end_no_session:
1336 end:
1337 return ret;
1338 }
1339
1340 /*
1341 * Send the next index for a stream
1342 *
1343 * Return 0 on success or else a negative value.
1344 */
1345 static
1346 int viewer_get_packet(struct relay_command *cmd)
1347 {
1348 int ret, send_data = 0;
1349 char *data = NULL;
1350 uint32_t len = 0;
1351 ssize_t read_len;
1352 struct lttng_viewer_get_packet get_packet_info;
1353 struct lttng_viewer_trace_packet reply;
1354 struct relay_viewer_stream *stream;
1355
1356 assert(cmd);
1357
1358 DBG2("Relay get data packet");
1359
1360 if (cmd->version_check_done == 0) {
1361 ERR("Trying to get packet before version check");
1362 ret = -1;
1363 goto end;
1364 }
1365
1366 health_code_update();
1367 ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info,
1368 sizeof(get_packet_info), 0);
1369 if (ret < 0 || ret != sizeof(get_packet_info)) {
1370 ret = -1;
1371 ERR("Relay didn't receive the whole packet");
1372 goto end;
1373 }
1374 health_code_update();
1375
1376 /* From this point on, the error label can be reached. */
1377 memset(&reply, 0, sizeof(reply));
1378
1379 rcu_read_lock();
1380 stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id));
1381 if (!stream) {
1382 goto error;
1383 }
1384 assert(stream->ctf_trace);
1385
1386 /*
1387 * First time we read this stream, we need open the tracefile, we should
1388 * only arrive here if an index has already been sent to the viewer, so the
1389 * tracefile must exist, if it does not it is a fatal error.
1390 */
1391 if (stream->read_fd < 0) {
1392 char fullpath[PATH_MAX];
1393
1394 if (stream->tracefile_count > 0) {
1395 ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
1396 stream->channel_name,
1397 stream->tracefile_count_current);
1398 } else {
1399 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1400 stream->channel_name);
1401 }
1402 if (ret < 0) {
1403 goto error;
1404 }
1405 ret = open(fullpath, O_RDONLY);
1406 if (ret < 0) {
1407 PERROR("Relay opening trace file");
1408 goto error;
1409 }
1410 stream->read_fd = ret;
1411 }
1412
1413 if (!stream->ctf_trace->metadata_received ||
1414 stream->ctf_trace->metadata_received >
1415 stream->ctf_trace->metadata_sent) {
1416 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1417 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1418 goto send_reply;
1419 }
1420
1421 len = be32toh(get_packet_info.len);
1422 data = zmalloc(len);
1423 if (!data) {
1424 PERROR("relay data zmalloc");
1425 goto error;
1426 }
1427
1428 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1429 if (ret < 0) {
1430 /*
1431 * If the read fd was closed by the streaming side, the
1432 * abort_flag will be set to 1, otherwise it is an error.
1433 */
1434 if (stream->abort_flag == 0) {
1435 PERROR("lseek");
1436 goto error;
1437 }
1438 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1439 goto send_reply;
1440 }
1441 read_len = lttng_read(stream->read_fd, data, len);
1442 if (read_len < len) {
1443 /*
1444 * If the read fd was closed by the streaming side, the
1445 * abort_flag will be set to 1, otherwise it is an error.
1446 */
1447 if (stream->abort_flag == 0) {
1448 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1449 stream->read_fd,
1450 be64toh(get_packet_info.offset));
1451 goto error;
1452 } else {
1453 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1454 goto send_reply;
1455 }
1456 }
1457 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1458 reply.len = htobe32(len);
1459 send_data = 1;
1460 goto send_reply;
1461
1462 error:
1463 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1464
1465 send_reply:
1466 reply.flags = htobe32(reply.flags);
1467
1468 health_code_update();
1469 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1470 if (ret < 0) {
1471 ERR("Relay data header to viewer");
1472 goto end_unlock;
1473 }
1474 health_code_update();
1475
1476 if (send_data) {
1477 health_code_update();
1478 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1479 if (ret < 0) {
1480 ERR("Relay send data to viewer");
1481 goto end_unlock;
1482 }
1483 health_code_update();
1484 }
1485
1486 DBG("Sent %u bytes for stream %" PRIu64, len,
1487 be64toh(get_packet_info.stream_id));
1488
1489 end_unlock:
1490 free(data);
1491 rcu_read_unlock();
1492
1493 end:
1494 return ret;
1495 }
1496
1497 /*
1498 * Send the session's metadata
1499 *
1500 * Return 0 on success else a negative value.
1501 */
1502 static
1503 int viewer_get_metadata(struct relay_command *cmd)
1504 {
1505 int ret = 0;
1506 ssize_t read_len;
1507 uint64_t len = 0;
1508 char *data = NULL;
1509 struct lttng_viewer_get_metadata request;
1510 struct lttng_viewer_metadata_packet reply;
1511 struct relay_viewer_stream *stream;
1512
1513 assert(cmd);
1514
1515 DBG("Relay get metadata");
1516
1517 if (cmd->version_check_done == 0) {
1518 ERR("Trying to get metadata before version check");
1519 ret = -1;
1520 goto end;
1521 }
1522
1523 health_code_update();
1524 ret = cmd->sock->ops->recvmsg(cmd->sock, &request,
1525 sizeof(request), 0);
1526 if (ret < 0 || ret != sizeof(request)) {
1527 ret = -1;
1528 ERR("Relay didn't receive the whole packet");
1529 goto end;
1530 }
1531 health_code_update();
1532
1533 rcu_read_lock();
1534 stream = live_find_viewer_stream_by_id(be64toh(request.stream_id));
1535 if (!stream || !stream->metadata_flag) {
1536 ERR("Invalid metadata stream");
1537 goto error;
1538 }
1539 assert(stream->ctf_trace);
1540 assert(stream->ctf_trace->metadata_sent <=
1541 stream->ctf_trace->metadata_received);
1542
1543 len = stream->ctf_trace->metadata_received -
1544 stream->ctf_trace->metadata_sent;
1545 if (len == 0) {
1546 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1547 goto send_reply;
1548 }
1549
1550 /* first time, we open the metadata file */
1551 if (stream->read_fd < 0) {
1552 char fullpath[PATH_MAX];
1553
1554 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1555 stream->channel_name);
1556 if (ret < 0) {
1557 goto error;
1558 }
1559 ret = open(fullpath, O_RDONLY);
1560 if (ret < 0) {
1561 PERROR("Relay opening metadata file");
1562 goto error;
1563 }
1564 stream->read_fd = ret;
1565 }
1566
1567 reply.len = htobe64(len);
1568 data = zmalloc(len);
1569 if (!data) {
1570 PERROR("viewer metadata zmalloc");
1571 goto error;
1572 }
1573
1574 read_len = lttng_read(stream->read_fd, data, len);
1575 if (read_len < len) {
1576 PERROR("Relay reading metadata file");
1577 goto error;
1578 }
1579 stream->ctf_trace->metadata_sent += read_len;
1580 reply.status = htobe32(VIEWER_METADATA_OK);
1581 goto send_reply;
1582
1583 error:
1584 reply.status = htobe32(VIEWER_METADATA_ERR);
1585
1586 send_reply:
1587 health_code_update();
1588 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1589 if (ret < 0) {
1590 ERR("Relay data header to viewer");
1591 goto end_unlock;
1592 }
1593 health_code_update();
1594
1595 if (len > 0) {
1596 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1597 if (ret < 0) {
1598 ERR("Relay send data to viewer");
1599 goto end_unlock;
1600 }
1601 }
1602
1603 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1604 be64toh(request.stream_id));
1605
1606 DBG("Metadata sent");
1607
1608 end_unlock:
1609 free(data);
1610 rcu_read_unlock();
1611 end:
1612 return ret;
1613 }
1614
1615 /*
1616 * live_relay_unknown_command: send -1 if received unknown command
1617 */
1618 static
1619 void live_relay_unknown_command(struct relay_command *cmd)
1620 {
1621 struct lttcomm_relayd_generic_reply reply;
1622 int ret;
1623
1624 reply.ret_code = htobe32(LTTNG_ERR_UNK);
1625 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
1626 sizeof(struct lttcomm_relayd_generic_reply), 0);
1627 if (ret < 0) {
1628 ERR("Relay sending unknown command");
1629 }
1630 }
1631
1632 /*
1633 * Process the commands received on the control socket
1634 */
1635 static
1636 int process_control(struct lttng_viewer_cmd *recv_hdr,
1637 struct relay_command *cmd, struct lttng_ht *sessions_ht)
1638 {
1639 int ret = 0;
1640
1641 switch (be32toh(recv_hdr->cmd)) {
1642 case VIEWER_CONNECT:
1643 ret = viewer_connect(cmd);
1644 break;
1645 case VIEWER_LIST_SESSIONS:
1646 ret = viewer_list_sessions(cmd, sessions_ht);
1647 break;
1648 case VIEWER_ATTACH_SESSION:
1649 ret = viewer_attach_session(cmd, sessions_ht);
1650 break;
1651 case VIEWER_GET_NEXT_INDEX:
1652 ret = viewer_get_next_index(cmd, sessions_ht);
1653 break;
1654 case VIEWER_GET_PACKET:
1655 ret = viewer_get_packet(cmd);
1656 break;
1657 case VIEWER_GET_METADATA:
1658 ret = viewer_get_metadata(cmd);
1659 break;
1660 default:
1661 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1662 live_relay_unknown_command(cmd);
1663 ret = -1;
1664 goto end;
1665 }
1666
1667 end:
1668 return ret;
1669 }
1670
1671 static
1672 void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1673 {
1674 int ret;
1675
1676 assert(events);
1677
1678 lttng_poll_del(events, pollfd);
1679
1680 ret = close(pollfd);
1681 if (ret < 0) {
1682 ERR("Closing pollfd %d", pollfd);
1683 }
1684 }
1685
1686 /*
1687 * Create and add connection to the given hash table.
1688 *
1689 * Return poll add value or else -1 on error.
1690 */
1691 static
1692 int add_connection(int fd, struct lttng_poll_event *events,
1693 struct lttng_ht *relay_connections_ht)
1694 {
1695 int ret;
1696 struct relay_command *relay_connection;
1697
1698 assert(events);
1699 assert(relay_connections_ht);
1700
1701 relay_connection = zmalloc(sizeof(struct relay_command));
1702 if (relay_connection == NULL) {
1703 PERROR("Relay command zmalloc");
1704 goto error;
1705 }
1706
1707 ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
1708 if (ret < sizeof(*relay_connection)) {
1709 PERROR("read relay cmd pipe");
1710 goto error_read;
1711 }
1712
1713 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1714 (unsigned long) relay_connection->sock->fd);
1715 rcu_read_lock();
1716 lttng_ht_add_unique_ulong(relay_connections_ht,
1717 &relay_connection->sock_n);
1718 rcu_read_unlock();
1719
1720 return lttng_poll_add(events, relay_connection->sock->fd,
1721 LPOLLIN | LPOLLRDHUP);
1722
1723 error_read:
1724 free(relay_connection);
1725 error:
1726 return -1;
1727 }
1728
1729 static
1730 void deferred_free_connection(struct rcu_head *head)
1731 {
1732 struct relay_command *relay_connection =
1733 caa_container_of(head, struct relay_command, rcu_node);
1734
1735 if (relay_connection->session &&
1736 relay_connection->session->viewer_attached > 0) {
1737 relay_connection->session->viewer_attached--;
1738 }
1739 lttcomm_destroy_sock(relay_connection->sock);
1740 free(relay_connection);
1741 }
1742
1743 /*
1744 * Delete all streams for a specific session ID.
1745 */
1746 static
1747 void viewer_del_streams(uint64_t session_id)
1748 {
1749 struct relay_viewer_stream *stream;
1750 struct lttng_ht_iter iter;
1751
1752 rcu_read_lock();
1753 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream,
1754 stream_n.node) {
1755 health_code_update();
1756
1757 if (stream->session_id != session_id) {
1758 continue;
1759 }
1760
1761 delete_viewer_stream(stream);
1762 assert(stream->ctf_trace);
1763
1764 if (stream->metadata_flag) {
1765 /*
1766 * The metadata viewer stream is destroyed once the refcount on the
1767 * ctf trace goes to 0 in the destroy stream function thus there is
1768 * no explicit call to that function here.
1769 */
1770 stream->ctf_trace->metadata_sent = 0;
1771 stream->ctf_trace->viewer_metadata_stream = NULL;
1772 } else {
1773 destroy_viewer_stream(stream);
1774 }
1775 }
1776 rcu_read_unlock();
1777 }
1778
1779 /*
1780 * Delete and free a connection.
1781 *
1782 * RCU read side lock MUST be acquired.
1783 */
1784 static
1785 void del_connection(struct lttng_ht *relay_connections_ht,
1786 struct lttng_ht_iter *iter, struct relay_command *relay_connection)
1787 {
1788 int ret;
1789
1790 assert(relay_connections_ht);
1791 assert(iter);
1792 assert(relay_connection);
1793
1794 DBG("Cleaning connection of session ID %" PRIu64,
1795 relay_connection->session_id);
1796
1797 ret = lttng_ht_del(relay_connections_ht, iter);
1798 assert(!ret);
1799
1800 viewer_del_streams(relay_connection->session_id);
1801
1802 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1803 }
1804
1805 /*
1806 * This thread does the actual work
1807 */
1808 static
1809 void *thread_worker(void *data)
1810 {
1811 int ret, err = -1;
1812 uint32_t nb_fd;
1813 struct relay_command *relay_connection;
1814 struct lttng_poll_event events;
1815 struct lttng_ht *relay_connections_ht;
1816 struct lttng_ht_node_ulong *node;
1817 struct lttng_ht_iter iter;
1818 struct lttng_viewer_cmd recv_hdr;
1819 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1820 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
1821
1822 DBG("[thread] Live viewer relay worker started");
1823
1824 rcu_register_thread();
1825
1826 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1827
1828 /* table of connections indexed on socket */
1829 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1830 if (!relay_connections_ht) {
1831 goto relay_connections_ht_error;
1832 }
1833
1834 ret = create_thread_poll_set(&events, 2);
1835 if (ret < 0) {
1836 goto error_poll_create;
1837 }
1838
1839 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1840 if (ret < 0) {
1841 goto error;
1842 }
1843
1844 restart:
1845 while (1) {
1846 int i;
1847
1848 health_code_update();
1849
1850 /* Infinite blocking call, waiting for transmission */
1851 DBG3("Relayd live viewer worker thread polling...");
1852 health_poll_entry();
1853 ret = lttng_poll_wait(&events, -1);
1854 health_poll_exit();
1855 if (ret < 0) {
1856 /*
1857 * Restart interrupted system call.
1858 */
1859 if (errno == EINTR) {
1860 goto restart;
1861 }
1862 goto error;
1863 }
1864
1865 nb_fd = ret;
1866
1867 /*
1868 * Process control. The control connection is prioritised so we don't
1869 * starve it with high throughput tracing data on the data
1870 * connection.
1871 */
1872 for (i = 0; i < nb_fd; i++) {
1873 /* Fetch once the poll data */
1874 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1875 int pollfd = LTTNG_POLL_GETFD(&events, i);
1876
1877 health_code_update();
1878
1879 /* Thread quit pipe has been closed. Killing thread. */
1880 ret = check_thread_quit_pipe(pollfd, revents);
1881 if (ret) {
1882 err = 0;
1883 goto exit;
1884 }
1885
1886 /* Inspect the relay cmd pipe for new connection */
1887 if (pollfd == live_relay_cmd_pipe[0]) {
1888 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1889 ERR("Relay live pipe error");
1890 goto error;
1891 } else if (revents & LPOLLIN) {
1892 DBG("Relay live viewer command received");
1893 ret = add_connection(live_relay_cmd_pipe[0],
1894 &events, relay_connections_ht);
1895 if (ret < 0) {
1896 goto error;
1897 }
1898 }
1899 } else if (revents) {
1900 rcu_read_lock();
1901 lttng_ht_lookup(relay_connections_ht,
1902 (void *)((unsigned long) pollfd), &iter);
1903 node = lttng_ht_iter_get_node_ulong(&iter);
1904 if (node == NULL) {
1905 DBG2("Relay viewer sock %d not found", pollfd);
1906 rcu_read_unlock();
1907 goto error;
1908 }
1909 relay_connection = caa_container_of(node, struct relay_command,
1910 sock_n);
1911
1912 if (revents & (LPOLLERR)) {
1913 cleanup_poll_connection(&events, pollfd);
1914 del_connection(relay_connections_ht, &iter,
1915 relay_connection);
1916 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1917 DBG("Viewer socket %d hung up", pollfd);
1918 cleanup_poll_connection(&events, pollfd);
1919 del_connection(relay_connections_ht, &iter,
1920 relay_connection);
1921 } else if (revents & LPOLLIN) {
1922 ret = relay_connection->sock->ops->recvmsg(
1923 relay_connection->sock, &recv_hdr,
1924 sizeof(struct lttng_viewer_cmd),
1925 0);
1926 /* connection closed */
1927 if (ret <= 0) {
1928 cleanup_poll_connection(&events, pollfd);
1929 del_connection(relay_connections_ht, &iter,
1930 relay_connection);
1931 DBG("Viewer control connection closed with %d",
1932 pollfd);
1933 } else {
1934 if (relay_connection->session) {
1935 DBG2("Relay viewer worker receiving data for "
1936 "session: %" PRIu64,
1937 relay_connection->session->id);
1938 }
1939 ret = process_control(&recv_hdr, relay_connection,
1940 sessions_ht);
1941 if (ret < 0) {
1942 /* Clear the session on error. */
1943 cleanup_poll_connection(&events, pollfd);
1944 del_connection(relay_connections_ht, &iter,
1945 relay_connection);
1946 DBG("Viewer connection closed with %d", pollfd);
1947 }
1948 }
1949 }
1950 rcu_read_unlock();
1951 }
1952 }
1953 }
1954
1955 exit:
1956 error:
1957 lttng_poll_clean(&events);
1958
1959 /* empty the hash table and free the memory */
1960 rcu_read_lock();
1961 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
1962 health_code_update();
1963
1964 node = lttng_ht_iter_get_node_ulong(&iter);
1965 if (!node) {
1966 continue;
1967 }
1968
1969 relay_connection = caa_container_of(node, struct relay_command,
1970 sock_n);
1971 del_connection(relay_connections_ht, &iter, relay_connection);
1972 }
1973 rcu_read_unlock();
1974 error_poll_create:
1975 lttng_ht_destroy(relay_connections_ht);
1976 relay_connections_ht_error:
1977 /* Close relay cmd pipes */
1978 utils_close_pipe(live_relay_cmd_pipe);
1979 if (err) {
1980 DBG("Viewer worker thread exited with error");
1981 }
1982 DBG("Viewer worker thread cleanup complete");
1983 if (err) {
1984 health_error();
1985 ERR("Health error occurred in %s", __func__);
1986 }
1987 health_unregister(health_relayd);
1988 stop_threads();
1989 rcu_unregister_thread();
1990 return NULL;
1991 }
1992
1993 /*
1994 * Create the relay command pipe to wake thread_manage_apps.
1995 * Closed in cleanup().
1996 */
1997 static int create_relay_cmd_pipe(void)
1998 {
1999 int ret;
2000
2001 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
2002
2003 return ret;
2004 }
2005
2006 void live_stop_threads(void)
2007 {
2008 int ret;
2009 void *status;
2010
2011 stop_threads();
2012
2013 ret = pthread_join(live_listener_thread, &status);
2014 if (ret != 0) {
2015 PERROR("pthread_join live listener");
2016 goto error; /* join error, exit without cleanup */
2017 }
2018
2019 ret = pthread_join(live_worker_thread, &status);
2020 if (ret != 0) {
2021 PERROR("pthread_join live worker");
2022 goto error; /* join error, exit without cleanup */
2023 }
2024
2025 ret = pthread_join(live_dispatcher_thread, &status);
2026 if (ret != 0) {
2027 PERROR("pthread_join live dispatcher");
2028 goto error; /* join error, exit without cleanup */
2029 }
2030
2031 cleanup();
2032
2033 error:
2034 return;
2035 }
2036
2037 /*
2038 * main
2039 */
2040 int live_start_threads(struct lttng_uri *uri,
2041 struct relay_local_data *relay_ctx)
2042 {
2043 int ret = 0;
2044 void *status;
2045 int is_root;
2046
2047 assert(uri);
2048 live_uri = uri;
2049
2050 /* Check if daemon is UID = 0 */
2051 is_root = !getuid();
2052
2053 if (!is_root) {
2054 if (live_uri->port < 1024) {
2055 ERR("Need to be root to use ports < 1024");
2056 ret = -1;
2057 goto exit;
2058 }
2059 }
2060
2061 /* Setup the thread apps communication pipe. */
2062 if ((ret = create_relay_cmd_pipe()) < 0) {
2063 goto exit;
2064 }
2065
2066 /* Init relay command queue. */
2067 cds_wfq_init(&viewer_cmd_queue.queue);
2068
2069 /* Set up max poll set size */
2070 lttng_poll_set_max_size();
2071
2072 /* Setup the dispatcher thread */
2073 ret = pthread_create(&live_dispatcher_thread, NULL,
2074 thread_dispatcher, (void *) NULL);
2075 if (ret != 0) {
2076 PERROR("pthread_create viewer dispatcher");
2077 goto exit_dispatcher;
2078 }
2079
2080 /* Setup the worker thread */
2081 ret = pthread_create(&live_worker_thread, NULL,
2082 thread_worker, relay_ctx);
2083 if (ret != 0) {
2084 PERROR("pthread_create viewer worker");
2085 goto exit_worker;
2086 }
2087
2088 /* Setup the listener thread */
2089 ret = pthread_create(&live_listener_thread, NULL,
2090 thread_listener, (void *) NULL);
2091 if (ret != 0) {
2092 PERROR("pthread_create viewer listener");
2093 goto exit_listener;
2094 }
2095
2096 ret = 0;
2097 goto end;
2098
2099 exit_listener:
2100 ret = pthread_join(live_listener_thread, &status);
2101 if (ret != 0) {
2102 PERROR("pthread_join live listener");
2103 goto error; /* join error, exit without cleanup */
2104 }
2105
2106 exit_worker:
2107 ret = pthread_join(live_worker_thread, &status);
2108 if (ret != 0) {
2109 PERROR("pthread_join live worker");
2110 goto error; /* join error, exit without cleanup */
2111 }
2112
2113 exit_dispatcher:
2114 ret = pthread_join(live_dispatcher_thread, &status);
2115 if (ret != 0) {
2116 PERROR("pthread_join live dispatcher");
2117 goto error; /* join error, exit without cleanup */
2118 }
2119
2120 exit:
2121 cleanup();
2122
2123 end:
2124 error:
2125 return ret;
2126 }
This page took 0.116966 seconds and 6 git commands to generate.