Protect the abort_flag on reset and always close on rotate
[lttng-tools.git] / src / bin / lttng-relayd / live.c
1 /*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #define _GNU_SOURCE
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <pthread.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/mount.h>
30 #include <sys/resource.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <inttypes.h>
36 #include <urcu/futex.h>
37 #include <urcu/uatomic.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <config.h>
41
42 #include <lttng/lttng.h>
43 #include <common/common.h>
44 #include <common/compat/poll.h>
45 #include <common/compat/socket.h>
46 #include <common/defaults.h>
47 #include <common/futex.h>
48 #include <common/sessiond-comm/sessiond-comm.h>
49 #include <common/sessiond-comm/inet.h>
50 #include <common/sessiond-comm/relayd.h>
51 #include <common/uri.h>
52 #include <common/utils.h>
53
54 #include "cmd.h"
55 #include "live.h"
56 #include "lttng-relayd.h"
57 #include "lttng-viewer.h"
58 #include "utils.h"
59 #include "health-relayd.h"
60
61 static struct lttng_uri *live_uri;
62
63 /*
64 * Quit pipe for all threads. This permits a single cancellation point
65 * for all threads when receiving an event on the pipe.
66 */
67 static int live_thread_quit_pipe[2] = { -1, -1 };
68
69 /*
70 * This pipe is used to inform the worker thread that a command is queued and
71 * ready to be processed.
72 */
73 static int live_relay_cmd_pipe[2] = { -1, -1 };
74
75 /* Shared between threads */
76 static int live_dispatch_thread_exit;
77
78 static pthread_t live_listener_thread;
79 static pthread_t live_dispatcher_thread;
80 static pthread_t live_worker_thread;
81
82 /*
83 * Relay command queue.
84 *
85 * The live_thread_listener and live_thread_dispatcher communicate with this
86 * queue.
87 */
88 static struct relay_cmd_queue viewer_cmd_queue;
89
90 static uint64_t last_relay_viewer_session_id;
91
92 /*
93 * Cleanup the daemon
94 */
95 static
96 void cleanup(void)
97 {
98 DBG("Cleaning up");
99
100 free(live_uri);
101 }
102
103 /*
104 * Write to writable pipe used to notify a thread.
105 */
106 static
107 int notify_thread_pipe(int wpipe)
108 {
109 ssize_t ret;
110
111 ret = lttng_write(wpipe, "!", 1);
112 if (ret < 1) {
113 PERROR("write poll pipe");
114 }
115
116 return (int) ret;
117 }
118
119 /*
120 * Stop all threads by closing the thread quit pipe.
121 */
122 static
123 void stop_threads(void)
124 {
125 int ret;
126
127 /* Stopping all threads */
128 DBG("Terminating all live threads");
129 ret = notify_thread_pipe(live_thread_quit_pipe[1]);
130 if (ret < 0) {
131 ERR("write error on thread quit pipe");
132 }
133
134 /* Dispatch thread */
135 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
136 futex_nto1_wake(&viewer_cmd_queue.futex);
137 }
138
139 /*
140 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
141 */
142 static
143 int create_thread_poll_set(struct lttng_poll_event *events, int size)
144 {
145 int ret;
146
147 if (events == NULL || size == 0) {
148 ret = -1;
149 goto error;
150 }
151
152 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
153 if (ret < 0) {
154 goto error;
155 }
156
157 /* Add quit pipe */
158 ret = lttng_poll_add(events, live_thread_quit_pipe[0], LPOLLIN);
159 if (ret < 0) {
160 goto error;
161 }
162
163 return 0;
164
165 error:
166 return ret;
167 }
168
169 /*
170 * Check if the thread quit pipe was triggered.
171 *
172 * Return 1 if it was triggered else 0;
173 */
174 static
175 int check_thread_quit_pipe(int fd, uint32_t events)
176 {
177 if (fd == live_thread_quit_pipe[0] && (events & LPOLLIN)) {
178 return 1;
179 }
180
181 return 0;
182 }
183
184 /*
185 * Create and init socket from uri.
186 */
187 static
188 struct lttcomm_sock *init_socket(struct lttng_uri *uri)
189 {
190 int ret;
191 struct lttcomm_sock *sock = NULL;
192
193 sock = lttcomm_alloc_sock_from_uri(uri);
194 if (sock == NULL) {
195 ERR("Allocating socket");
196 goto error;
197 }
198
199 ret = lttcomm_create_sock(sock);
200 if (ret < 0) {
201 goto error;
202 }
203 DBG("Listening on sock %d for live", sock->fd);
204
205 ret = sock->ops->bind(sock);
206 if (ret < 0) {
207 goto error;
208 }
209
210 ret = sock->ops->listen(sock, -1);
211 if (ret < 0) {
212 goto error;
213
214 }
215
216 return sock;
217
218 error:
219 if (sock) {
220 lttcomm_destroy_sock(sock);
221 }
222 return NULL;
223 }
224
225 /*
226 * This thread manages the listening for new connections on the network
227 */
228 static
229 void *thread_listener(void *data)
230 {
231 int i, ret, pollfd, err = -1;
232 int val = 1;
233 uint32_t revents, nb_fd;
234 struct lttng_poll_event events;
235 struct lttcomm_sock *live_control_sock;
236
237 DBG("[thread] Relay live listener started");
238
239 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
240
241 health_code_update();
242
243 live_control_sock = init_socket(live_uri);
244 if (!live_control_sock) {
245 goto error_sock_control;
246 }
247
248 /*
249 * Pass 3 as size here for the thread quit pipe, control and data socket.
250 */
251 ret = create_thread_poll_set(&events, 2);
252 if (ret < 0) {
253 goto error_create_poll;
254 }
255
256 /* Add the control socket */
257 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
258 if (ret < 0) {
259 goto error_poll_add;
260 }
261
262 while (1) {
263 health_code_update();
264
265 DBG("Listener accepting live viewers connections");
266
267 restart:
268 health_poll_entry();
269 ret = lttng_poll_wait(&events, -1);
270 health_poll_exit();
271 if (ret < 0) {
272 /*
273 * Restart interrupted system call.
274 */
275 if (errno == EINTR) {
276 goto restart;
277 }
278 goto error;
279 }
280 nb_fd = ret;
281
282 DBG("Relay new viewer connection received");
283 for (i = 0; i < nb_fd; i++) {
284 health_code_update();
285
286 /* Fetch once the poll data */
287 revents = LTTNG_POLL_GETEV(&events, i);
288 pollfd = LTTNG_POLL_GETFD(&events, i);
289
290 /* Thread quit pipe has been closed. Killing thread. */
291 ret = check_thread_quit_pipe(pollfd, revents);
292 if (ret) {
293 err = 0;
294 goto exit;
295 }
296
297 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
298 ERR("socket poll error");
299 goto error;
300 } else if (revents & LPOLLIN) {
301 /*
302 * Get allocated in this thread, enqueued to a global queue,
303 * dequeued and freed in the worker thread.
304 */
305 struct relay_command *relay_cmd;
306 struct lttcomm_sock *newsock;
307
308 relay_cmd = zmalloc(sizeof(*relay_cmd));
309 if (!relay_cmd) {
310 PERROR("relay command zmalloc");
311 goto error;
312 }
313
314 assert(pollfd == live_control_sock->fd);
315 newsock = live_control_sock->ops->accept(live_control_sock);
316 if (!newsock) {
317 PERROR("accepting control sock");
318 free(relay_cmd);
319 goto error;
320 }
321 DBG("Relay viewer connection accepted socket %d", newsock->fd);
322 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
323 sizeof(int));
324 if (ret < 0) {
325 PERROR("setsockopt inet");
326 lttcomm_destroy_sock(newsock);
327 free(relay_cmd);
328 goto error;
329 }
330 relay_cmd->sock = newsock;
331
332 /*
333 * Lock free enqueue the request.
334 */
335 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
336
337 /*
338 * Wake the dispatch queue futex. Implicit memory
339 * barrier with the exchange in cds_wfq_enqueue.
340 */
341 futex_nto1_wake(&viewer_cmd_queue.futex);
342 }
343 }
344 }
345
346 exit:
347 error:
348 error_poll_add:
349 lttng_poll_clean(&events);
350 error_create_poll:
351 if (live_control_sock->fd >= 0) {
352 ret = live_control_sock->ops->close(live_control_sock);
353 if (ret) {
354 PERROR("close");
355 }
356 }
357 lttcomm_destroy_sock(live_control_sock);
358 error_sock_control:
359 if (err) {
360 health_error();
361 DBG("Live viewer listener thread exited with error");
362 }
363 health_unregister(health_relayd);
364 DBG("Live viewer listener thread cleanup complete");
365 stop_threads();
366 return NULL;
367 }
368
369 /*
370 * This thread manages the dispatching of the requests to worker threads
371 */
372 static
373 void *thread_dispatcher(void *data)
374 {
375 int err = -1;
376 ssize_t ret;
377 struct cds_wfq_node *node;
378 struct relay_command *relay_cmd = NULL;
379
380 DBG("[thread] Live viewer relay dispatcher started");
381
382 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
383
384 health_code_update();
385
386 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
387 health_code_update();
388
389 /* Atomically prepare the queue futex */
390 futex_nto1_prepare(&viewer_cmd_queue.futex);
391
392 do {
393 health_code_update();
394
395 /* Dequeue commands */
396 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
397 if (node == NULL) {
398 DBG("Woken up but nothing in the live-viewer "
399 "relay command queue");
400 /* Continue thread execution */
401 break;
402 }
403
404 relay_cmd = caa_container_of(node, struct relay_command, node);
405 DBG("Dispatching viewer request waiting on sock %d",
406 relay_cmd->sock->fd);
407
408 /*
409 * Inform worker thread of the new request. This call is blocking
410 * so we can be assured that the data will be read at some point in
411 * time or wait to the end of the world :)
412 */
413 ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
414 sizeof(*relay_cmd));
415 free(relay_cmd);
416 if (ret < sizeof(struct relay_command)) {
417 PERROR("write cmd pipe");
418 goto error;
419 }
420 } while (node != NULL);
421
422 /* Futex wait on queue. Blocking call on futex() */
423 health_poll_entry();
424 futex_nto1_wait(&viewer_cmd_queue.futex);
425 health_poll_exit();
426 }
427
428 /* Normal exit, no error */
429 err = 0;
430
431 error:
432 if (err) {
433 health_error();
434 ERR("Health error occurred in %s", __func__);
435 }
436 health_unregister(health_relayd);
437 DBG("Live viewer dispatch thread dying");
438 stop_threads();
439 return NULL;
440 }
441
442 /*
443 * Establish connection with the viewer and check the versions.
444 *
445 * Return 0 on success or else negative value.
446 */
447 static
448 int viewer_connect(struct relay_command *cmd)
449 {
450 int ret;
451 struct lttng_viewer_connect reply, msg;
452
453 assert(cmd);
454
455 cmd->version_check_done = 1;
456
457 health_code_update();
458
459 /* Get version from the other side. */
460 ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0);
461 if (ret < 0 || ret != sizeof(msg)) {
462 if (ret == 0) {
463 /* Orderly shutdown. Not necessary to print an error. */
464 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
465 } else {
466 ERR("Relay failed to receive the version values.");
467 }
468 ret = -1;
469 goto end;
470 }
471
472 health_code_update();
473
474 reply.major = RELAYD_VERSION_COMM_MAJOR;
475 reply.minor = RELAYD_VERSION_COMM_MINOR;
476
477 /* Major versions must be the same */
478 if (reply.major != be32toh(msg.major)) {
479 DBG("Incompatible major versions (%u vs %u)", reply.major,
480 be32toh(msg.major));
481 ret = 0;
482 goto end;
483 }
484
485 cmd->major = reply.major;
486 /* We adapt to the lowest compatible version */
487 if (reply.minor <= be32toh(msg.minor)) {
488 cmd->minor = reply.minor;
489 } else {
490 cmd->minor = be32toh(msg.minor);
491 }
492
493 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
494 cmd->type = RELAY_VIEWER_COMMAND;
495 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
496 cmd->type = RELAY_VIEWER_NOTIFICATION;
497 } else {
498 ERR("Unknown connection type : %u", be32toh(msg.type));
499 ret = -1;
500 goto end;
501 }
502
503 reply.major = htobe32(reply.major);
504 reply.minor = htobe32(reply.minor);
505 if (cmd->type == RELAY_VIEWER_COMMAND) {
506 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
507 }
508
509 health_code_update();
510
511 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
512 sizeof(struct lttng_viewer_connect), 0);
513 if (ret < 0) {
514 ERR("Relay sending version");
515 }
516
517 health_code_update();
518
519 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
520 ret = 0;
521
522 end:
523 return ret;
524 }
525
526 /*
527 * Send the viewer the list of current sessions.
528 *
529 * Return 0 on success or else a negative value.
530 */
531 static
532 int viewer_list_sessions(struct relay_command *cmd,
533 struct lttng_ht *sessions_ht)
534 {
535 int ret;
536 struct lttng_viewer_list_sessions session_list;
537 unsigned long count;
538 long approx_before, approx_after;
539 struct lttng_ht_node_ulong *node;
540 struct lttng_ht_iter iter;
541 struct lttng_viewer_session send_session;
542 struct relay_session *session;
543
544 DBG("List sessions received");
545
546 if (cmd->version_check_done == 0) {
547 ERR("Trying to list sessions before version check");
548 ret = -1;
549 goto end_no_session;
550 }
551
552 rcu_read_lock();
553 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
554 session_list.sessions_count = htobe32(count);
555
556 health_code_update();
557
558 ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list,
559 sizeof(session_list), 0);
560 if (ret < 0) {
561 ERR("Relay sending sessions list");
562 goto end_unlock;
563 }
564
565 health_code_update();
566
567 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) {
568 health_code_update();
569
570 node = lttng_ht_iter_get_node_ulong(&iter);
571 if (!node) {
572 goto end_unlock;
573 }
574 session = caa_container_of(node, struct relay_session, session_n);
575
576 strncpy(send_session.session_name, session->session_name,
577 sizeof(send_session.session_name));
578 strncpy(send_session.hostname, session->hostname,
579 sizeof(send_session.hostname));
580 send_session.id = htobe64(session->id);
581 send_session.live_timer = htobe32(session->live_timer);
582 send_session.clients = htobe32(session->viewer_attached);
583 send_session.streams = htobe32(session->stream_count);
584
585 health_code_update();
586
587 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session,
588 sizeof(send_session), 0);
589 if (ret < 0) {
590 ERR("Relay sending session info");
591 goto end_unlock;
592 }
593 }
594 health_code_update();
595
596 rcu_read_unlock();
597 ret = 0;
598 goto end;
599
600 end_unlock:
601 rcu_read_unlock();
602
603 end:
604 end_no_session:
605 return ret;
606 }
607
608 /*
609 * Open index file using a given viewer stream.
610 *
611 * Return 0 on success or else a negative value.
612 */
613 static int open_index(struct relay_viewer_stream *stream)
614 {
615 int ret;
616 char fullpath[PATH_MAX];
617 struct lttng_packet_index_file_hdr hdr;
618
619 if (stream->tracefile_count > 0) {
620 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%"
621 PRIu64 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
622 stream->channel_name, stream->tracefile_count_current);
623 } else {
624 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s"
625 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
626 stream->channel_name);
627 }
628 if (ret < 0) {
629 PERROR("snprintf index path");
630 goto error;
631 }
632
633 DBG("Opening index file %s in read only", fullpath);
634 ret = open(fullpath, O_RDONLY);
635 if (ret < 0) {
636 if (errno == ENOENT) {
637 ret = -ENOENT;
638 goto error;
639 } else {
640 PERROR("opening index in read-only");
641 }
642 goto error;
643 }
644 stream->index_read_fd = ret;
645 DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
646
647 ret = lttng_read(stream->index_read_fd, &hdr, sizeof(hdr));
648 if (ret < sizeof(hdr)) {
649 PERROR("Reading index header");
650 goto error;
651 }
652 if (strncmp(hdr.magic, INDEX_MAGIC, sizeof(hdr.magic)) != 0) {
653 ERR("Invalid header magic");
654 ret = -1;
655 goto error;
656 }
657 if (be32toh(hdr.index_major) != INDEX_MAJOR ||
658 be32toh(hdr.index_minor) != INDEX_MINOR) {
659 ERR("Invalid header version");
660 ret = -1;
661 goto error;
662 }
663 ret = 0;
664
665 error:
666 return ret;
667 }
668
669 /*
670 * Allocate and init a new viewer_stream.
671 *
672 * Copies the values from the stream passed in parameter and insert the new
673 * stream in the viewer_streams_ht.
674 *
675 * MUST be called with rcu_read_lock held.
676 *
677 * Returns 0 on success or a negative value on error.
678 */
679 static
680 int init_viewer_stream(struct relay_stream *stream, int seek_last)
681 {
682 int ret;
683 struct relay_viewer_stream *viewer_stream;
684
685 assert(stream);
686
687 viewer_stream = zmalloc(sizeof(*viewer_stream));
688 if (!viewer_stream) {
689 PERROR("relay viewer stream zmalloc");
690 ret = -1;
691 goto error;
692 }
693 viewer_stream->session_id = stream->session->id;
694 viewer_stream->stream_handle = stream->stream_handle;
695 viewer_stream->path_name = strndup(stream->path_name,
696 LTTNG_VIEWER_PATH_MAX);
697 viewer_stream->channel_name = strndup(stream->channel_name,
698 LTTNG_VIEWER_NAME_MAX);
699 viewer_stream->tracefile_count = stream->tracefile_count;
700 viewer_stream->metadata_flag = stream->metadata_flag;
701 if (seek_last) {
702 viewer_stream->tracefile_count_current =
703 stream->tracefile_count_current;
704 } else {
705 viewer_stream->tracefile_count_current =
706 stream->oldest_tracefile_id;
707 }
708
709 /*
710 * The deletion of this ctf_trace object is only done in a call RCU of the
711 * relay stream making it valid as long as we have the read side lock.
712 */
713 viewer_stream->ctf_trace = stream->ctf_trace;
714 uatomic_inc(&viewer_stream->ctf_trace->refcount);
715
716 lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
717 lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
718
719 viewer_stream->index_read_fd = -1;
720 viewer_stream->read_fd = -1;
721
722 /*
723 * This is to avoid a race between the initialization of this object and
724 * the close of the given stream. If the stream is unable to find this
725 * viewer stream when closing, this copy will at least take the latest
726 * value.
727 * We also need that for the seek_last.
728 */
729 viewer_stream->total_index_received = stream->total_index_received;
730
731 /*
732 * If we never received an index for the current stream, delay
733 * the opening of the index, otherwise open it right now.
734 */
735 if (viewer_stream->tracefile_count_current ==
736 stream->tracefile_count_current &&
737 viewer_stream->total_index_received == 0) {
738 viewer_stream->index_read_fd = -1;
739 } else {
740 ret = open_index(viewer_stream);
741 if (ret < 0) {
742 goto error;
743 }
744 }
745
746 if (seek_last && viewer_stream->index_read_fd > 0) {
747 ret = lseek(viewer_stream->index_read_fd,
748 viewer_stream->total_index_received *
749 sizeof(struct lttng_packet_index),
750 SEEK_CUR);
751 if (ret < 0) {
752 goto error;
753 }
754 viewer_stream->last_sent_index =
755 viewer_stream->total_index_received;
756 }
757
758 ret = 0;
759
760 error:
761 return ret;
762 }
763
764 /*
765 * Rotate a stream to the next tracefile.
766 *
767 * Returns 0 on success, a negative value on error.
768 */
769 static
770 int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream,
771 struct relay_stream *stream)
772 {
773 int ret;
774 uint64_t tracefile_id;
775
776 assert(viewer_stream);
777
778 tracefile_id = (viewer_stream->tracefile_count_current + 1) %
779 viewer_stream->tracefile_count;
780
781 if (stream) {
782 pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
783 }
784 /*
785 * The writer and the reader are not working in the same
786 * tracefile, we can read up to EOF, we don't care about the
787 * total_index_received.
788 */
789 if (!stream || (stream->tracefile_count_current != tracefile_id)) {
790 viewer_stream->close_write_flag = 1;
791 } else {
792 /*
793 * We are opening a file that is still open in write, make
794 * sure we limit our reading to the number of indexes
795 * received.
796 */
797 viewer_stream->close_write_flag = 0;
798 if (stream) {
799 viewer_stream->total_index_received =
800 stream->total_index_received;
801 }
802 }
803 viewer_stream->tracefile_count_current = tracefile_id;
804
805 ret = close(viewer_stream->index_read_fd);
806 if (ret < 0) {
807 PERROR("close index file %d",
808 viewer_stream->index_read_fd);
809 }
810 viewer_stream->index_read_fd = -1;
811 ret = close(viewer_stream->read_fd);
812 if (ret < 0) {
813 PERROR("close tracefile %d",
814 viewer_stream->read_fd);
815 }
816 viewer_stream->read_fd = -1;
817
818 pthread_mutex_lock(&viewer_stream->overwrite_lock);
819 viewer_stream->abort_flag = 0;
820 pthread_mutex_unlock(&viewer_stream->overwrite_lock);
821
822 viewer_stream->index_read_fd = -1;
823 viewer_stream->read_fd = -1;
824
825 if (stream) {
826 pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
827 }
828 ret = open_index(viewer_stream);
829 if (ret < 0) {
830 goto error;
831 }
832
833 ret = 0;
834
835 error:
836 return ret;
837 }
838
839 /*
840 * Send the viewer the list of current sessions.
841 */
842 static
843 int viewer_attach_session(struct relay_command *cmd,
844 struct lttng_ht *sessions_ht)
845 {
846 int ret, send_streams = 0, nb_streams = 0;
847 struct lttng_viewer_attach_session_request request;
848 struct lttng_viewer_attach_session_response response;
849 struct lttng_viewer_stream send_stream;
850 struct relay_stream *stream;
851 struct relay_viewer_stream *viewer_stream;
852 struct lttng_ht_node_ulong *node;
853 struct lttng_ht_node_u64 *node64;
854 struct lttng_ht_iter iter;
855 struct relay_session *session;
856 int seek_last = 0;
857
858 assert(cmd);
859 assert(sessions_ht);
860
861 DBG("Attach session received");
862
863 if (cmd->version_check_done == 0) {
864 ERR("Trying to attach session before version check");
865 ret = -1;
866 goto end_no_session;
867 }
868
869 health_code_update();
870
871 ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0);
872 if (ret < 0 || ret != sizeof(request)) {
873 if (ret == 0) {
874 /* Orderly shutdown. Not necessary to print an error. */
875 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
876 } else {
877 ERR("Relay failed to receive the attach parameters.");
878 }
879 ret = -1;
880 goto error;
881 }
882
883 health_code_update();
884
885 rcu_read_lock();
886 lttng_ht_lookup(sessions_ht,
887 (void *)((unsigned long) be64toh(request.session_id)), &iter);
888 node = lttng_ht_iter_get_node_ulong(&iter);
889 if (node == NULL) {
890 DBG("Relay session %" PRIu64 " not found",
891 be64toh(request.session_id));
892 response.status = htobe32(VIEWER_ATTACH_UNK);
893 goto send_reply;
894 }
895
896 session = caa_container_of(node, struct relay_session, session_n);
897 if (cmd->session_id == session->id) {
898 /* Same viewer already attached, just send the stream list. */
899 send_streams = 1;
900 response.status = htobe32(VIEWER_ATTACH_OK);
901 } else if (session->viewer_attached != 0) {
902 DBG("Already a viewer attached");
903 response.status = htobe32(VIEWER_ATTACH_ALREADY);
904 goto send_reply;
905 } else if (session->live_timer == 0) {
906 DBG("Not live session");
907 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
908 goto send_reply;
909 } else {
910 session->viewer_attached++;
911 send_streams = 1;
912 response.status = htobe32(VIEWER_ATTACH_OK);
913 cmd->session_id = session->id;
914 cmd->session = session;
915 }
916
917 switch (be32toh(request.seek)) {
918 case VIEWER_SEEK_BEGINNING:
919 /* Default behaviour. */
920 break;
921 case VIEWER_SEEK_LAST:
922 seek_last = 1;
923 break;
924 default:
925 ERR("Wrong seek parameter");
926 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
927 send_streams = 0;
928 goto send_reply;
929 }
930
931 if (send_streams) {
932 /* We should only be there if we have a session to attach to. */
933 assert(session);
934
935 /*
936 * Fill the viewer_streams_ht to count the number of streams
937 * ready to be sent and avoid concurrency issues on the
938 * relay_streams_ht and don't rely on a total session stream count.
939 */
940 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) {
941 struct relay_viewer_stream *vstream;
942
943 health_code_update();
944
945 node = lttng_ht_iter_get_node_ulong(&iter);
946 if (!node) {
947 continue;
948 }
949 stream = caa_container_of(node, struct relay_stream, stream_n);
950 if (stream->session != cmd->session) {
951 continue;
952 }
953
954 /*
955 * Don't send streams with no ctf_trace, they are not ready to be
956 * read.
957 */
958 if (!stream->ctf_trace) {
959 continue;
960 }
961
962 vstream = live_find_viewer_stream_by_id(stream->stream_handle);
963 if (!vstream) {
964 ret = init_viewer_stream(stream, seek_last);
965 if (ret < 0) {
966 goto end_unlock;
967 }
968 }
969 nb_streams++;
970 }
971 response.streams_count = htobe32(nb_streams);
972 }
973
974 send_reply:
975 health_code_update();
976 ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0);
977 if (ret < 0) {
978 ERR("Relay sending viewer attach response");
979 goto end_unlock;
980 }
981 health_code_update();
982
983 /*
984 * Unknown or busy session, just return gracefully, the viewer knows what
985 * is happening.
986 */
987 if (!send_streams) {
988 ret = 0;
989 goto end_unlock;
990 }
991
992 /* We should only be there if we have a session to attach to. */
993 assert(session);
994 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
995 health_code_update();
996
997 node64 = lttng_ht_iter_get_node_u64(&iter);
998 if (!node64) {
999 continue;
1000 }
1001 viewer_stream = caa_container_of(node64, struct relay_viewer_stream,
1002 stream_n);
1003 if (viewer_stream->session_id != cmd->session->id) {
1004 continue;
1005 }
1006
1007 send_stream.id = htobe64(viewer_stream->stream_handle);
1008 send_stream.ctf_trace_id = htobe64(viewer_stream->ctf_trace->id);
1009 send_stream.metadata_flag = htobe32(viewer_stream->metadata_flag);
1010 strncpy(send_stream.path_name, viewer_stream->path_name,
1011 sizeof(send_stream.path_name));
1012 strncpy(send_stream.channel_name, viewer_stream->channel_name,
1013 sizeof(send_stream.channel_name));
1014
1015 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_stream,
1016 sizeof(send_stream), 0);
1017 if (ret < 0) {
1018 ERR("Relay sending stream %" PRIu64, viewer_stream->stream_handle);
1019 goto end_unlock;
1020 }
1021 DBG("Sent stream %" PRIu64 " to viewer", viewer_stream->stream_handle);
1022 }
1023 ret = 0;
1024
1025 end_unlock:
1026 rcu_read_unlock();
1027 end_no_session:
1028 error:
1029 return ret;
1030 }
1031
1032 /*
1033 * Get viewer stream from stream id.
1034 *
1035 * RCU read side lock MUST be acquired.
1036 */
1037 struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id)
1038 {
1039 struct lttng_ht_node_u64 *node;
1040 struct lttng_ht_iter iter;
1041 struct relay_viewer_stream *stream = NULL;
1042
1043 lttng_ht_lookup(viewer_streams_ht, &stream_id, &iter);
1044 node = lttng_ht_iter_get_node_u64(&iter);
1045 if (node == NULL) {
1046 DBG("Relay viewer stream %" PRIu64 " not found", stream_id);
1047 goto end;
1048 }
1049 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
1050
1051 end:
1052 return stream;
1053 }
1054
1055 /*
1056 * Send the next index for a stream.
1057 *
1058 * Return 0 on success or else a negative value.
1059 */
1060 static
1061 int viewer_get_next_index(struct relay_command *cmd,
1062 struct lttng_ht *sessions_ht)
1063 {
1064 int ret;
1065 struct lttng_viewer_get_next_index request_index;
1066 struct lttng_viewer_index viewer_index;
1067 struct lttng_packet_index packet_index;
1068 struct relay_viewer_stream *vstream;
1069 struct relay_stream *rstream;
1070
1071 assert(cmd);
1072 assert(sessions_ht);
1073
1074 DBG("Viewer get next index");
1075
1076 if (cmd->version_check_done == 0) {
1077 ERR("Trying to request index before version check");
1078 ret = -1;
1079 goto end_no_session;
1080 }
1081
1082 health_code_update();
1083 ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index,
1084 sizeof(request_index), 0);
1085 if (ret < 0 || ret != sizeof(request_index)) {
1086 ret = -1;
1087 ERR("Relay didn't receive the whole packet");
1088 goto end;
1089 }
1090 health_code_update();
1091
1092 rcu_read_lock();
1093 vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id));
1094 if (!vstream) {
1095 ret = -1;
1096 goto end_unlock;
1097 }
1098
1099 memset(&viewer_index, 0, sizeof(viewer_index));
1100
1101 /*
1102 * The viewer should not ask for index on metadata stream.
1103 */
1104 if (vstream->metadata_flag) {
1105 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1106 goto send_reply;
1107 }
1108
1109 /* First time, we open the index file */
1110 if (vstream->index_read_fd < 0) {
1111 ret = open_index(vstream);
1112 if (ret == -ENOENT) {
1113 /*
1114 * The index is created only when the first data packet arrives, it
1115 * might not be ready at the beginning of the session
1116 */
1117 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1118 goto send_reply;
1119 } else if (ret < 0) {
1120 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1121 goto send_reply;
1122 }
1123 }
1124
1125 rstream = relay_stream_find_by_id(vstream->stream_handle);
1126 if (rstream) {
1127 if (vstream->abort_flag) {
1128 /* Rotate on abort (overwrite). */
1129 DBG("Viewer rotate because of overwrite");
1130 ret = rotate_viewer_stream(vstream, rstream);
1131 if (ret < 0) {
1132 goto end_unlock;
1133 }
1134 }
1135 pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
1136 if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
1137 if (rstream->beacon_ts_end != -1ULL &&
1138 vstream->last_sent_index == rstream->total_index_received) {
1139 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1140 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1141 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1142 goto send_reply;
1143 /*
1144 * Reader and writer are working in the same tracefile, so we care
1145 * about the number of index received and sent. Otherwise, we read
1146 * up to EOF.
1147 */
1148 } else if (rstream->total_index_received <= vstream->last_sent_index
1149 && !vstream->close_write_flag) {
1150 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1151 /* No new index to send, retry later. */
1152 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1153 goto send_reply;
1154 }
1155 }
1156 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1157 } else if (!rstream && vstream->close_write_flag &&
1158 vstream->total_index_received == vstream->last_sent_index) {
1159 /* Last index sent and current tracefile closed in write */
1160 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1161 goto send_reply;
1162 } else {
1163 vstream->close_write_flag = 1;
1164 }
1165
1166 if (!vstream->ctf_trace->metadata_received ||
1167 vstream->ctf_trace->metadata_received >
1168 vstream->ctf_trace->metadata_sent) {
1169 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1170 }
1171
1172 pthread_mutex_lock(&vstream->overwrite_lock);
1173 if (vstream->abort_flag) {
1174 /*
1175 * The file is being overwritten by the writer, we cannot
1176 * use it.
1177 */
1178 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1179 pthread_mutex_unlock(&vstream->overwrite_lock);
1180 ret = rotate_viewer_stream(vstream, rstream);
1181 if (ret < 0) {
1182 goto end_unlock;
1183 }
1184 goto send_reply;
1185 }
1186 ret = lttng_read(vstream->index_read_fd, &packet_index,
1187 sizeof(packet_index));
1188 pthread_mutex_unlock(&vstream->overwrite_lock);
1189 if (ret < sizeof(packet_index)) {
1190 /*
1191 * The tracefile is closed in write, so we read up to EOF.
1192 */
1193 if (vstream->close_write_flag == 1) {
1194 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1195 /* Rotate on normal EOF */
1196 ret = rotate_viewer_stream(vstream, rstream);
1197 if (ret < 0) {
1198 goto end_unlock;
1199 }
1200 } else {
1201 PERROR("Relay reading index file %d",
1202 vstream->index_read_fd);
1203 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1204 }
1205 goto send_reply;
1206 } else {
1207 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1208 vstream->last_sent_index++;
1209 }
1210
1211 /*
1212 * Indexes are stored in big endian, no need to switch before sending.
1213 */
1214 viewer_index.offset = packet_index.offset;
1215 viewer_index.packet_size = packet_index.packet_size;
1216 viewer_index.content_size = packet_index.content_size;
1217 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1218 viewer_index.timestamp_end = packet_index.timestamp_end;
1219 viewer_index.events_discarded = packet_index.events_discarded;
1220 viewer_index.stream_id = packet_index.stream_id;
1221
1222 send_reply:
1223 viewer_index.flags = htobe32(viewer_index.flags);
1224 health_code_update();
1225 ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index,
1226 sizeof(viewer_index), 0);
1227 if (ret < 0) {
1228 ERR("Relay index to viewer");
1229 goto end_unlock;
1230 }
1231 health_code_update();
1232
1233 DBG("Index %" PRIu64 "for stream %" PRIu64 "sent",
1234 vstream->last_sent_index, vstream->stream_handle);
1235
1236 end_unlock:
1237 rcu_read_unlock();
1238
1239 end_no_session:
1240 end:
1241 return ret;
1242 }
1243
1244 /*
1245 * Send the next index for a stream
1246 *
1247 * Return 0 on success or else a negative value.
1248 */
1249 static
1250 int viewer_get_packet(struct relay_command *cmd)
1251 {
1252 int ret, send_data = 0;
1253 char *data = NULL;
1254 uint32_t len = 0;
1255 ssize_t read_len;
1256 struct lttng_viewer_get_packet get_packet_info;
1257 struct lttng_viewer_trace_packet reply;
1258 struct relay_viewer_stream *stream;
1259
1260 assert(cmd);
1261
1262 DBG2("Relay get data packet");
1263
1264 if (cmd->version_check_done == 0) {
1265 ERR("Trying to get packet before version check");
1266 ret = -1;
1267 goto end;
1268 }
1269
1270 health_code_update();
1271 ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info,
1272 sizeof(get_packet_info), 0);
1273 if (ret < 0 || ret != sizeof(get_packet_info)) {
1274 ret = -1;
1275 ERR("Relay didn't receive the whole packet");
1276 goto end;
1277 }
1278 health_code_update();
1279
1280 /* From this point on, the error label can be reached. */
1281 memset(&reply, 0, sizeof(reply));
1282
1283 rcu_read_lock();
1284 stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id));
1285 if (!stream) {
1286 goto error;
1287 }
1288 assert(stream->ctf_trace);
1289
1290 /*
1291 * First time we read this stream, we need open the tracefile, we should
1292 * only arrive here if an index has already been sent to the viewer, so the
1293 * tracefile must exist, if it does not it is a fatal error.
1294 */
1295 if (stream->read_fd < 0) {
1296 char fullpath[PATH_MAX];
1297
1298 if (stream->tracefile_count > 0) {
1299 ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
1300 stream->channel_name,
1301 stream->tracefile_count_current);
1302 } else {
1303 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1304 stream->channel_name);
1305 }
1306 if (ret < 0) {
1307 goto error;
1308 }
1309 ret = open(fullpath, O_RDONLY);
1310 if (ret < 0) {
1311 PERROR("Relay opening trace file");
1312 goto error;
1313 }
1314 stream->read_fd = ret;
1315 }
1316
1317 if (!stream->ctf_trace->metadata_received ||
1318 stream->ctf_trace->metadata_received >
1319 stream->ctf_trace->metadata_sent) {
1320 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1321 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1322 goto send_reply;
1323 }
1324
1325 len = be32toh(get_packet_info.len);
1326 data = zmalloc(len);
1327 if (!data) {
1328 PERROR("relay data zmalloc");
1329 goto error;
1330 }
1331
1332 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1333 if (ret < 0) {
1334 /*
1335 * If the read fd was closed by the streaming side, the
1336 * abort_flag will be set to 1, otherwise it is an error.
1337 */
1338 if (stream->abort_flag == 0) {
1339 PERROR("lseek");
1340 goto error;
1341 }
1342 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1343 goto send_reply;
1344 }
1345 read_len = lttng_read(stream->read_fd, data, len);
1346 if (read_len < len) {
1347 /*
1348 * If the read fd was closed by the streaming side, the
1349 * abort_flag will be set to 1, otherwise it is an error.
1350 */
1351 if (stream->abort_flag == 0) {
1352 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1353 stream->read_fd,
1354 be64toh(get_packet_info.offset));
1355 goto error;
1356 } else {
1357 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1358 goto send_reply;
1359 }
1360 }
1361 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1362 reply.len = htobe32(len);
1363 send_data = 1;
1364 goto send_reply;
1365
1366 error:
1367 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1368
1369 send_reply:
1370 reply.flags = htobe32(reply.flags);
1371
1372 health_code_update();
1373 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1374 if (ret < 0) {
1375 ERR("Relay data header to viewer");
1376 goto end_unlock;
1377 }
1378 health_code_update();
1379
1380 if (send_data) {
1381 health_code_update();
1382 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1383 if (ret < 0) {
1384 ERR("Relay send data to viewer");
1385 goto end_unlock;
1386 }
1387 health_code_update();
1388 }
1389
1390 DBG("Sent %u bytes for stream %" PRIu64, len,
1391 be64toh(get_packet_info.stream_id));
1392
1393 end_unlock:
1394 free(data);
1395 rcu_read_unlock();
1396
1397 end:
1398 return ret;
1399 }
1400
1401 /*
1402 * Send the session's metadata
1403 *
1404 * Return 0 on success else a negative value.
1405 */
1406 static
1407 int viewer_get_metadata(struct relay_command *cmd)
1408 {
1409 int ret = 0;
1410 ssize_t read_len;
1411 uint64_t len = 0;
1412 char *data = NULL;
1413 struct lttng_viewer_get_metadata request;
1414 struct lttng_viewer_metadata_packet reply;
1415 struct relay_viewer_stream *stream;
1416
1417 assert(cmd);
1418
1419 DBG("Relay get metadata");
1420
1421 if (cmd->version_check_done == 0) {
1422 ERR("Trying to get metadata before version check");
1423 ret = -1;
1424 goto end;
1425 }
1426
1427 health_code_update();
1428 ret = cmd->sock->ops->recvmsg(cmd->sock, &request,
1429 sizeof(request), 0);
1430 if (ret < 0 || ret != sizeof(request)) {
1431 ret = -1;
1432 ERR("Relay didn't receive the whole packet");
1433 goto end;
1434 }
1435 health_code_update();
1436
1437 rcu_read_lock();
1438 stream = live_find_viewer_stream_by_id(be64toh(request.stream_id));
1439 if (!stream || !stream->metadata_flag) {
1440 ERR("Invalid metadata stream");
1441 goto error;
1442 }
1443 assert(stream->ctf_trace);
1444 assert(stream->ctf_trace->metadata_sent <=
1445 stream->ctf_trace->metadata_received);
1446
1447 len = stream->ctf_trace->metadata_received -
1448 stream->ctf_trace->metadata_sent;
1449 if (len == 0) {
1450 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1451 goto send_reply;
1452 }
1453
1454 /* first time, we open the metadata file */
1455 if (stream->read_fd < 0) {
1456 char fullpath[PATH_MAX];
1457
1458 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1459 stream->channel_name);
1460 if (ret < 0) {
1461 goto error;
1462 }
1463 ret = open(fullpath, O_RDONLY);
1464 if (ret < 0) {
1465 PERROR("Relay opening metadata file");
1466 goto error;
1467 }
1468 stream->read_fd = ret;
1469 }
1470
1471 reply.len = htobe64(len);
1472 data = zmalloc(len);
1473 if (!data) {
1474 PERROR("viewer metadata zmalloc");
1475 goto error;
1476 }
1477
1478 read_len = lttng_read(stream->read_fd, data, len);
1479 if (read_len < len) {
1480 PERROR("Relay reading metadata file");
1481 goto error;
1482 }
1483 stream->ctf_trace->metadata_sent += read_len;
1484 reply.status = htobe32(VIEWER_METADATA_OK);
1485 goto send_reply;
1486
1487 error:
1488 reply.status = htobe32(VIEWER_METADATA_ERR);
1489
1490 send_reply:
1491 health_code_update();
1492 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1493 if (ret < 0) {
1494 ERR("Relay data header to viewer");
1495 goto end_unlock;
1496 }
1497 health_code_update();
1498
1499 if (len > 0) {
1500 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1501 if (ret < 0) {
1502 ERR("Relay send data to viewer");
1503 goto end_unlock;
1504 }
1505 }
1506
1507 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1508 be64toh(request.stream_id));
1509
1510 DBG("Metadata sent");
1511
1512 end_unlock:
1513 free(data);
1514 rcu_read_unlock();
1515 end:
1516 return ret;
1517 }
1518
1519 /*
1520 * live_relay_unknown_command: send -1 if received unknown command
1521 */
1522 static
1523 void live_relay_unknown_command(struct relay_command *cmd)
1524 {
1525 struct lttcomm_relayd_generic_reply reply;
1526 int ret;
1527
1528 reply.ret_code = htobe32(LTTNG_ERR_UNK);
1529 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
1530 sizeof(struct lttcomm_relayd_generic_reply), 0);
1531 if (ret < 0) {
1532 ERR("Relay sending unknown command");
1533 }
1534 }
1535
1536 /*
1537 * Process the commands received on the control socket
1538 */
1539 static
1540 int process_control(struct lttng_viewer_cmd *recv_hdr,
1541 struct relay_command *cmd, struct lttng_ht *sessions_ht)
1542 {
1543 int ret = 0;
1544
1545 switch (be32toh(recv_hdr->cmd)) {
1546 case VIEWER_CONNECT:
1547 ret = viewer_connect(cmd);
1548 break;
1549 case VIEWER_LIST_SESSIONS:
1550 ret = viewer_list_sessions(cmd, sessions_ht);
1551 break;
1552 case VIEWER_ATTACH_SESSION:
1553 ret = viewer_attach_session(cmd, sessions_ht);
1554 break;
1555 case VIEWER_GET_NEXT_INDEX:
1556 ret = viewer_get_next_index(cmd, sessions_ht);
1557 break;
1558 case VIEWER_GET_PACKET:
1559 ret = viewer_get_packet(cmd);
1560 break;
1561 case VIEWER_GET_METADATA:
1562 ret = viewer_get_metadata(cmd);
1563 break;
1564 default:
1565 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1566 live_relay_unknown_command(cmd);
1567 ret = -1;
1568 goto end;
1569 }
1570
1571 end:
1572 return ret;
1573 }
1574
1575 static
1576 void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1577 {
1578 int ret;
1579
1580 assert(events);
1581
1582 lttng_poll_del(events, pollfd);
1583
1584 ret = close(pollfd);
1585 if (ret < 0) {
1586 ERR("Closing pollfd %d", pollfd);
1587 }
1588 }
1589
1590 /*
1591 * Create and add connection to the given hash table.
1592 *
1593 * Return poll add value or else -1 on error.
1594 */
1595 static
1596 int add_connection(int fd, struct lttng_poll_event *events,
1597 struct lttng_ht *relay_connections_ht)
1598 {
1599 int ret;
1600 struct relay_command *relay_connection;
1601
1602 assert(events);
1603 assert(relay_connections_ht);
1604
1605 relay_connection = zmalloc(sizeof(struct relay_command));
1606 if (relay_connection == NULL) {
1607 PERROR("Relay command zmalloc");
1608 goto error;
1609 }
1610
1611 ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
1612 if (ret < sizeof(*relay_connection)) {
1613 PERROR("read relay cmd pipe");
1614 goto error_read;
1615 }
1616
1617 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1618 (unsigned long) relay_connection->sock->fd);
1619 rcu_read_lock();
1620 lttng_ht_add_unique_ulong(relay_connections_ht,
1621 &relay_connection->sock_n);
1622 rcu_read_unlock();
1623
1624 return lttng_poll_add(events, relay_connection->sock->fd,
1625 LPOLLIN | LPOLLRDHUP);
1626
1627 error_read:
1628 free(relay_connection);
1629 error:
1630 return -1;
1631 }
1632
1633 static
1634 void deferred_free_connection(struct rcu_head *head)
1635 {
1636 struct relay_command *relay_connection =
1637 caa_container_of(head, struct relay_command, rcu_node);
1638
1639 if (relay_connection->session &&
1640 relay_connection->session->viewer_attached > 0) {
1641 relay_connection->session->viewer_attached--;
1642 }
1643 lttcomm_destroy_sock(relay_connection->sock);
1644 free(relay_connection);
1645 }
1646
1647 static
1648 void deferred_free_viewer_stream(struct rcu_head *head)
1649 {
1650 struct relay_viewer_stream *stream =
1651 caa_container_of(head, struct relay_viewer_stream, rcu_node);
1652
1653 if (stream->ctf_trace) {
1654 uatomic_dec(&stream->ctf_trace->refcount);
1655 assert(uatomic_read(&stream->ctf_trace->refcount) >= 0);
1656 if (uatomic_read(&stream->ctf_trace->refcount) == 0) {
1657 DBG("Freeing ctf_trace %" PRIu64, stream->ctf_trace->id);
1658 free(stream->ctf_trace);
1659 }
1660 }
1661
1662 free(stream->path_name);
1663 free(stream->channel_name);
1664 free(stream);
1665 }
1666
1667 static
1668 void viewer_del_streams(uint64_t session_id)
1669 {
1670 int ret;
1671 struct relay_viewer_stream *stream;
1672 struct lttng_ht_node_u64 *node;
1673 struct lttng_ht_iter iter;
1674
1675 rcu_read_lock();
1676 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
1677 health_code_update();
1678
1679 node = lttng_ht_iter_get_node_u64(&iter);
1680 if (!node) {
1681 continue;
1682 }
1683
1684 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
1685 if (stream->session_id != session_id) {
1686 continue;
1687 }
1688
1689 if (stream->read_fd >= 0) {
1690 ret = close(stream->read_fd);
1691 if (ret < 0) {
1692 PERROR("close read_fd");
1693 }
1694 }
1695 if (stream->index_read_fd >= 0) {
1696 ret = close(stream->index_read_fd);
1697 if (ret < 0) {
1698 PERROR("close index_read_fd");
1699 }
1700 }
1701 if (stream->metadata_flag && stream->ctf_trace) {
1702 stream->ctf_trace->metadata_sent = 0;
1703 }
1704 ret = lttng_ht_del(viewer_streams_ht, &iter);
1705 assert(!ret);
1706 call_rcu(&stream->rcu_node, deferred_free_viewer_stream);
1707 }
1708 rcu_read_unlock();
1709 }
1710
1711 /*
1712 * Delete and free a connection.
1713 *
1714 * RCU read side lock MUST be acquired.
1715 */
1716 static
1717 void del_connection(struct lttng_ht *relay_connections_ht,
1718 struct lttng_ht_iter *iter, struct relay_command *relay_connection)
1719 {
1720 int ret;
1721
1722 assert(relay_connections_ht);
1723 assert(iter);
1724 assert(relay_connection);
1725
1726 ret = lttng_ht_del(relay_connections_ht, iter);
1727 assert(!ret);
1728
1729 viewer_del_streams(relay_connection->session_id);
1730
1731 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1732 }
1733
1734 /*
1735 * This thread does the actual work
1736 */
1737 static
1738 void *thread_worker(void *data)
1739 {
1740 int ret, err = -1;
1741 uint32_t nb_fd;
1742 struct relay_command *relay_connection;
1743 struct lttng_poll_event events;
1744 struct lttng_ht *relay_connections_ht;
1745 struct lttng_ht_node_ulong *node;
1746 struct lttng_ht_iter iter;
1747 struct lttng_viewer_cmd recv_hdr;
1748 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1749 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
1750
1751 DBG("[thread] Live viewer relay worker started");
1752
1753 rcu_register_thread();
1754
1755 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1756
1757 /* table of connections indexed on socket */
1758 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1759 if (!relay_connections_ht) {
1760 goto relay_connections_ht_error;
1761 }
1762
1763 ret = create_thread_poll_set(&events, 2);
1764 if (ret < 0) {
1765 goto error_poll_create;
1766 }
1767
1768 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1769 if (ret < 0) {
1770 goto error;
1771 }
1772
1773 restart:
1774 while (1) {
1775 int i;
1776
1777 health_code_update();
1778
1779 /* Infinite blocking call, waiting for transmission */
1780 DBG3("Relayd live viewer worker thread polling...");
1781 health_poll_entry();
1782 ret = lttng_poll_wait(&events, -1);
1783 health_poll_exit();
1784 if (ret < 0) {
1785 /*
1786 * Restart interrupted system call.
1787 */
1788 if (errno == EINTR) {
1789 goto restart;
1790 }
1791 goto error;
1792 }
1793
1794 nb_fd = ret;
1795
1796 /*
1797 * Process control. The control connection is prioritised so we don't
1798 * starve it with high throughput tracing data on the data
1799 * connection.
1800 */
1801 for (i = 0; i < nb_fd; i++) {
1802 /* Fetch once the poll data */
1803 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1804 int pollfd = LTTNG_POLL_GETFD(&events, i);
1805
1806 health_code_update();
1807
1808 /* Thread quit pipe has been closed. Killing thread. */
1809 ret = check_thread_quit_pipe(pollfd, revents);
1810 if (ret) {
1811 err = 0;
1812 goto exit;
1813 }
1814
1815 /* Inspect the relay cmd pipe for new connection */
1816 if (pollfd == live_relay_cmd_pipe[0]) {
1817 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1818 ERR("Relay live pipe error");
1819 goto error;
1820 } else if (revents & LPOLLIN) {
1821 DBG("Relay live viewer command received");
1822 ret = add_connection(live_relay_cmd_pipe[0],
1823 &events, relay_connections_ht);
1824 if (ret < 0) {
1825 goto error;
1826 }
1827 }
1828 } else if (revents) {
1829 rcu_read_lock();
1830 lttng_ht_lookup(relay_connections_ht,
1831 (void *)((unsigned long) pollfd), &iter);
1832 node = lttng_ht_iter_get_node_ulong(&iter);
1833 if (node == NULL) {
1834 DBG2("Relay viewer sock %d not found", pollfd);
1835 rcu_read_unlock();
1836 goto error;
1837 }
1838 relay_connection = caa_container_of(node, struct relay_command,
1839 sock_n);
1840
1841 if (revents & (LPOLLERR)) {
1842 cleanup_poll_connection(&events, pollfd);
1843 del_connection(relay_connections_ht, &iter,
1844 relay_connection);
1845 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1846 DBG("Viewer socket %d hung up", pollfd);
1847 cleanup_poll_connection(&events, pollfd);
1848 del_connection(relay_connections_ht, &iter,
1849 relay_connection);
1850 } else if (revents & LPOLLIN) {
1851 ret = relay_connection->sock->ops->recvmsg(
1852 relay_connection->sock, &recv_hdr,
1853 sizeof(struct lttng_viewer_cmd),
1854 0);
1855 /* connection closed */
1856 if (ret <= 0) {
1857 cleanup_poll_connection(&events, pollfd);
1858 del_connection(relay_connections_ht, &iter,
1859 relay_connection);
1860 DBG("Viewer control connection closed with %d",
1861 pollfd);
1862 } else {
1863 if (relay_connection->session) {
1864 DBG2("Relay viewer worker receiving data for "
1865 "session: %" PRIu64,
1866 relay_connection->session->id);
1867 }
1868 ret = process_control(&recv_hdr, relay_connection,
1869 sessions_ht);
1870 if (ret < 0) {
1871 /* Clear the session on error. */
1872 cleanup_poll_connection(&events, pollfd);
1873 del_connection(relay_connections_ht, &iter,
1874 relay_connection);
1875 DBG("Viewer connection closed with %d", pollfd);
1876 }
1877 }
1878 }
1879 rcu_read_unlock();
1880 }
1881 }
1882 }
1883
1884 exit:
1885 error:
1886 lttng_poll_clean(&events);
1887
1888 /* empty the hash table and free the memory */
1889 rcu_read_lock();
1890 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
1891 health_code_update();
1892
1893 node = lttng_ht_iter_get_node_ulong(&iter);
1894 if (!node) {
1895 continue;
1896 }
1897
1898 relay_connection = caa_container_of(node, struct relay_command,
1899 sock_n);
1900 del_connection(relay_connections_ht, &iter, relay_connection);
1901 }
1902 rcu_read_unlock();
1903 error_poll_create:
1904 lttng_ht_destroy(relay_connections_ht);
1905 relay_connections_ht_error:
1906 /* Close relay cmd pipes */
1907 utils_close_pipe(live_relay_cmd_pipe);
1908 if (err) {
1909 DBG("Viewer worker thread exited with error");
1910 }
1911 DBG("Viewer worker thread cleanup complete");
1912 if (err) {
1913 health_error();
1914 ERR("Health error occurred in %s", __func__);
1915 }
1916 health_unregister(health_relayd);
1917 stop_threads();
1918 rcu_unregister_thread();
1919 return NULL;
1920 }
1921
1922 /*
1923 * Create the relay command pipe to wake thread_manage_apps.
1924 * Closed in cleanup().
1925 */
1926 static int create_relay_cmd_pipe(void)
1927 {
1928 int ret;
1929
1930 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
1931
1932 return ret;
1933 }
1934
1935 void live_stop_threads(void)
1936 {
1937 int ret;
1938 void *status;
1939
1940 stop_threads();
1941
1942 ret = pthread_join(live_listener_thread, &status);
1943 if (ret != 0) {
1944 PERROR("pthread_join live listener");
1945 goto error; /* join error, exit without cleanup */
1946 }
1947
1948 ret = pthread_join(live_worker_thread, &status);
1949 if (ret != 0) {
1950 PERROR("pthread_join live worker");
1951 goto error; /* join error, exit without cleanup */
1952 }
1953
1954 ret = pthread_join(live_dispatcher_thread, &status);
1955 if (ret != 0) {
1956 PERROR("pthread_join live dispatcher");
1957 goto error; /* join error, exit without cleanup */
1958 }
1959
1960 cleanup();
1961
1962 error:
1963 return;
1964 }
1965
1966 /*
1967 * main
1968 */
1969 int live_start_threads(struct lttng_uri *uri,
1970 struct relay_local_data *relay_ctx, int quit_pipe[2])
1971 {
1972 int ret = 0;
1973 void *status;
1974 int is_root;
1975
1976 assert(uri);
1977 live_uri = uri;
1978
1979 live_thread_quit_pipe[0] = quit_pipe[0];
1980 live_thread_quit_pipe[1] = quit_pipe[1];
1981
1982 /* Check if daemon is UID = 0 */
1983 is_root = !getuid();
1984
1985 if (!is_root) {
1986 if (live_uri->port < 1024) {
1987 ERR("Need to be root to use ports < 1024");
1988 ret = -1;
1989 goto exit;
1990 }
1991 }
1992
1993 /* Setup the thread apps communication pipe. */
1994 if ((ret = create_relay_cmd_pipe()) < 0) {
1995 goto exit;
1996 }
1997
1998 /* Init relay command queue. */
1999 cds_wfq_init(&viewer_cmd_queue.queue);
2000
2001 /* Set up max poll set size */
2002 lttng_poll_set_max_size();
2003
2004 /* Setup the dispatcher thread */
2005 ret = pthread_create(&live_dispatcher_thread, NULL,
2006 thread_dispatcher, (void *) NULL);
2007 if (ret != 0) {
2008 PERROR("pthread_create viewer dispatcher");
2009 goto exit_dispatcher;
2010 }
2011
2012 /* Setup the worker thread */
2013 ret = pthread_create(&live_worker_thread, NULL,
2014 thread_worker, relay_ctx);
2015 if (ret != 0) {
2016 PERROR("pthread_create viewer worker");
2017 goto exit_worker;
2018 }
2019
2020 /* Setup the listener thread */
2021 ret = pthread_create(&live_listener_thread, NULL,
2022 thread_listener, (void *) NULL);
2023 if (ret != 0) {
2024 PERROR("pthread_create viewer listener");
2025 goto exit_listener;
2026 }
2027
2028 ret = 0;
2029 goto end;
2030
2031 exit_listener:
2032 ret = pthread_join(live_listener_thread, &status);
2033 if (ret != 0) {
2034 PERROR("pthread_join live listener");
2035 goto error; /* join error, exit without cleanup */
2036 }
2037
2038 exit_worker:
2039 ret = pthread_join(live_worker_thread, &status);
2040 if (ret != 0) {
2041 PERROR("pthread_join live worker");
2042 goto error; /* join error, exit without cleanup */
2043 }
2044
2045 exit_dispatcher:
2046 ret = pthread_join(live_dispatcher_thread, &status);
2047 if (ret != 0) {
2048 PERROR("pthread_join live dispatcher");
2049 goto error; /* join error, exit without cleanup */
2050 }
2051
2052 exit:
2053 cleanup();
2054
2055 end:
2056 error:
2057 return ret;
2058 }
This page took 0.12524 seconds and 6 git commands to generate.