Fix: big relayd cleanup and refactor
[lttng-tools.git] / src / bin / lttng-relayd / live.c
1 /*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #define _GNU_SOURCE
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <pthread.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/mount.h>
30 #include <sys/resource.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <inttypes.h>
36 #include <urcu/futex.h>
37 #include <urcu/uatomic.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <config.h>
41
42 #include <lttng/lttng.h>
43 #include <common/common.h>
44 #include <common/compat/poll.h>
45 #include <common/compat/socket.h>
46 #include <common/defaults.h>
47 #include <common/futex.h>
48 #include <common/index/index.h>
49 #include <common/sessiond-comm/sessiond-comm.h>
50 #include <common/sessiond-comm/inet.h>
51 #include <common/sessiond-comm/relayd.h>
52 #include <common/uri.h>
53 #include <common/utils.h>
54
55 #include "cmd.h"
56 #include "live.h"
57 #include "lttng-relayd.h"
58 #include "utils.h"
59 #include "health-relayd.h"
60 #include "testpoint.h"
61 #include "viewer-stream.h"
62 #include "stream.h"
63 #include "session.h"
64 #include "ctf-trace.h"
65
66 static struct lttng_uri *live_uri;
67
68 /*
69 * This pipe is used to inform the worker thread that a command is queued and
70 * ready to be processed.
71 */
72 static int live_relay_cmd_pipe[2] = { -1, -1 };
73
74 /* Shared between threads */
75 static int live_dispatch_thread_exit;
76
77 static pthread_t live_listener_thread;
78 static pthread_t live_dispatcher_thread;
79 static pthread_t live_worker_thread;
80
81 /*
82 * Relay command queue.
83 *
84 * The live_thread_listener and live_thread_dispatcher communicate with this
85 * queue.
86 */
87 static struct relay_cmd_queue viewer_cmd_queue;
88
89 static uint64_t last_relay_viewer_session_id;
90
91 /*
92 * Cleanup the daemon
93 */
94 static
95 void cleanup(void)
96 {
97 DBG("Cleaning up");
98
99 free(live_uri);
100 }
101
102 /*
103 * Receive a request buffer using a given socket, destination allocated buffer
104 * of length size.
105 *
106 * Return the size of the received message or else a negative value on error
107 * with errno being set by recvmsg() syscall.
108 */
109 static
110 ssize_t recv_request(struct lttcomm_sock *sock, void *buf, size_t size)
111 {
112 ssize_t ret;
113
114 assert(sock);
115 assert(buf);
116
117 ret = sock->ops->recvmsg(sock, buf, size, 0);
118 if (ret < 0 || ret != size) {
119 if (ret == 0) {
120 /* Orderly shutdown. Not necessary to print an error. */
121 DBG("Socket %d did an orderly shutdown", sock->fd);
122 } else {
123 ERR("Relay failed to receive request.");
124 }
125 ret = -1;
126 }
127
128 return ret;
129 }
130
131 /*
132 * Send a response buffer using a given socket, source allocated buffer of
133 * length size.
134 *
135 * Return the size of the sent message or else a negative value on error with
136 * errno being set by sendmsg() syscall.
137 */
138 static
139 ssize_t send_response(struct lttcomm_sock *sock, void *buf, size_t size)
140 {
141 ssize_t ret;
142
143 assert(sock);
144 assert(buf);
145
146 ret = sock->ops->sendmsg(sock, buf, size, 0);
147 if (ret < 0) {
148 ERR("Relayd failed to send response.");
149 }
150
151 return ret;
152 }
153
154 /*
155 * Atomically check if new streams got added in the session since the last
156 * check and reset the flag to 0.
157 *
158 * Returns 1 if new streams got added, 0 if nothing changed, a negative value
159 * on error.
160 */
161 static
162 int check_new_streams(uint64_t session_id, struct lttng_ht *sessions_ht)
163 {
164 int ret;
165 unsigned long current_val;
166 struct relay_session *session;
167
168 assert(sessions_ht);
169
170 session = session_find_by_id(sessions_ht, session_id);
171 if (!session) {
172 DBG("Relay session %" PRIu64 " not found", session_id);
173 ret = -1;
174 goto error;
175 }
176
177 current_val = uatomic_cmpxchg(&session->new_streams, 1, 0);
178 ret = current_val;
179
180 error:
181 return ret;
182 }
183
184 /*
185 * Send viewer streams to the given socket. The ignore_sent_flag indicates if
186 * this function should ignore the sent flag or not.
187 *
188 * Return 0 on success or else a negative value.
189 */
190 static
191 ssize_t send_viewer_streams(struct lttcomm_sock *sock,
192 struct relay_session *session, unsigned int ignore_sent_flag)
193 {
194 ssize_t ret;
195 struct lttng_viewer_stream send_stream;
196 struct lttng_ht_iter iter;
197 struct relay_viewer_stream *vstream;
198
199 assert(session);
200
201 rcu_read_lock();
202
203 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, vstream,
204 stream_n.node) {
205 struct ctf_trace *ctf_trace;
206
207 health_code_update();
208
209 /* Ignore if not the same session. */
210 if (vstream->session_id != session->id ||
211 (!ignore_sent_flag && vstream->sent_flag)) {
212 continue;
213 }
214
215 ctf_trace = ctf_trace_find_by_path(session->ctf_traces_ht,
216 vstream->path_name);
217 assert(ctf_trace);
218
219 send_stream.id = htobe64(vstream->stream_handle);
220 send_stream.ctf_trace_id = htobe64(ctf_trace->id);
221 send_stream.metadata_flag = htobe32(vstream->metadata_flag);
222 strncpy(send_stream.path_name, vstream->path_name,
223 sizeof(send_stream.path_name));
224 strncpy(send_stream.channel_name, vstream->channel_name,
225 sizeof(send_stream.channel_name));
226
227 DBG("Sending stream %" PRIu64 " to viewer", vstream->stream_handle);
228 ret = send_response(sock, &send_stream, sizeof(send_stream));
229 if (ret < 0) {
230 goto end_unlock;
231 }
232 vstream->sent_flag = 1;
233 }
234
235 ret = 0;
236
237 end_unlock:
238 rcu_read_unlock();
239 return ret;
240 }
241
242 /*
243 * Create every viewer stream possible for the given session with the seek
244 * type. Three counters *can* be return which are in order the total amount of
245 * viewer stream of the session, the number of unsent stream and the number of
246 * stream created. Those counters can be NULL and thus will be ignored.
247 *
248 * Return 0 on success or else a negative value.
249 */
250 static
251 int make_viewer_streams(struct relay_session *session,
252 enum lttng_viewer_seek seek_t, uint32_t *nb_total, uint32_t *nb_unsent,
253 uint32_t *nb_created)
254 {
255 int ret;
256 struct lttng_ht_iter iter;
257 struct ctf_trace *ctf_trace;
258
259 assert(session);
260
261 /*
262 * This is to make sure we create viewer streams for a full received
263 * channel. For instance, if we have 8 streams for a channel that are
264 * concurrently being flagged ready, we can end up creating just a subset
265 * of the 8 streams (the ones that are flagged). This lock avoids this
266 * limbo state.
267 */
268 pthread_mutex_lock(&session->viewer_ready_lock);
269
270 /*
271 * Create viewer streams for relay streams that are ready to be used for a
272 * the given session id only.
273 */
274 rcu_read_lock();
275 cds_lfht_for_each_entry(session->ctf_traces_ht->ht, &iter.iter, ctf_trace,
276 node.node) {
277 struct relay_stream *stream;
278
279 health_code_update();
280
281 if (ctf_trace->invalid_flag) {
282 continue;
283 }
284
285 cds_list_for_each_entry(stream, &ctf_trace->stream_list, trace_list) {
286 struct relay_viewer_stream *vstream;
287
288 if (!stream->viewer_ready) {
289 continue;
290 }
291
292 vstream = viewer_stream_find_by_id(stream->stream_handle);
293 if (!vstream) {
294 vstream = viewer_stream_create(stream, seek_t, ctf_trace);
295 if (!vstream) {
296 ret = -1;
297 goto error_unlock;
298 }
299 /* Acquire reference to ctf_trace. */
300 ctf_trace_get_ref(ctf_trace);
301
302 if (nb_created) {
303 /* Update number of created stream counter. */
304 (*nb_created)++;
305 }
306 } else if (!vstream->sent_flag && nb_unsent) {
307 /* Update number of unsent stream counter. */
308 (*nb_unsent)++;
309 }
310 /* Update number of total stream counter. */
311 if (nb_total) {
312 (*nb_total)++;
313 }
314 }
315 }
316
317 ret = 0;
318
319 error_unlock:
320 rcu_read_unlock();
321 pthread_mutex_unlock(&session->viewer_ready_lock);
322 return ret;
323 }
324
325 /*
326 * Write to writable pipe used to notify a thread.
327 */
328 static
329 int notify_thread_pipe(int wpipe)
330 {
331 ssize_t ret;
332
333 ret = lttng_write(wpipe, "!", 1);
334 if (ret < 1) {
335 PERROR("write poll pipe");
336 }
337
338 return (int) ret;
339 }
340
341 /*
342 * Stop all threads by closing the thread quit pipe.
343 */
344 static
345 void stop_threads(void)
346 {
347 int ret;
348
349 /* Stopping all threads */
350 DBG("Terminating all live threads");
351 ret = notify_thread_pipe(live_conn_pipe[1]);
352 if (ret < 0) {
353 ERR("write error on thread quit pipe");
354 }
355
356 /* Dispatch thread */
357 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
358 futex_nto1_wake(&viewer_cmd_queue.futex);
359 }
360
361 /*
362 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
363 */
364 static
365 int create_thread_poll_set(struct lttng_poll_event *events, int size)
366 {
367 int ret;
368
369 if (events == NULL || size == 0) {
370 ret = -1;
371 goto error;
372 }
373
374 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
375 if (ret < 0) {
376 goto error;
377 }
378
379 /* Add quit pipe */
380 ret = lttng_poll_add(events, live_conn_pipe[0], LPOLLIN | LPOLLERR);
381 if (ret < 0) {
382 goto error;
383 }
384
385 return 0;
386
387 error:
388 return ret;
389 }
390
391 /*
392 * Check if the thread quit pipe was triggered.
393 *
394 * Return 1 if it was triggered else 0;
395 */
396 static
397 int check_live_conn_pipe(int fd, uint32_t events)
398 {
399 if (fd == live_conn_pipe[0] && (events & LPOLLIN)) {
400 return 1;
401 }
402
403 return 0;
404 }
405
406 /*
407 * Create and init socket from uri.
408 */
409 static
410 struct lttcomm_sock *init_socket(struct lttng_uri *uri)
411 {
412 int ret;
413 struct lttcomm_sock *sock = NULL;
414
415 sock = lttcomm_alloc_sock_from_uri(uri);
416 if (sock == NULL) {
417 ERR("Allocating socket");
418 goto error;
419 }
420
421 ret = lttcomm_create_sock(sock);
422 if (ret < 0) {
423 goto error;
424 }
425 DBG("Listening on sock %d for live", sock->fd);
426
427 ret = sock->ops->bind(sock);
428 if (ret < 0) {
429 goto error;
430 }
431
432 ret = sock->ops->listen(sock, -1);
433 if (ret < 0) {
434 goto error;
435
436 }
437
438 return sock;
439
440 error:
441 if (sock) {
442 lttcomm_destroy_sock(sock);
443 }
444 return NULL;
445 }
446
447 /*
448 * This thread manages the listening for new connections on the network
449 */
450 static
451 void *thread_listener(void *data)
452 {
453 int i, ret, pollfd, err = -1;
454 int val = 1;
455 uint32_t revents, nb_fd;
456 struct lttng_poll_event events;
457 struct lttcomm_sock *live_control_sock;
458
459 DBG("[thread] Relay live listener started");
460
461 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
462
463 health_code_update();
464
465 live_control_sock = init_socket(live_uri);
466 if (!live_control_sock) {
467 goto error_sock_control;
468 }
469
470 /* Pass 2 as size here for the thread quit pipe and control sockets. */
471 ret = create_thread_poll_set(&events, 2);
472 if (ret < 0) {
473 goto error_create_poll;
474 }
475
476 /* Add the control socket */
477 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
478 if (ret < 0) {
479 goto error_poll_add;
480 }
481
482 lttng_relay_notify_ready();
483
484 if (testpoint(relayd_thread_live_listener)) {
485 goto error_testpoint;
486 }
487
488 while (1) {
489 health_code_update();
490
491 DBG("Listener accepting live viewers connections");
492
493 restart:
494 health_poll_entry();
495 ret = lttng_poll_wait(&events, -1);
496 health_poll_exit();
497 if (ret < 0) {
498 /*
499 * Restart interrupted system call.
500 */
501 if (errno == EINTR) {
502 goto restart;
503 }
504 goto error;
505 }
506 nb_fd = ret;
507
508 DBG("Relay new viewer connection received");
509 for (i = 0; i < nb_fd; i++) {
510 health_code_update();
511
512 /* Fetch once the poll data */
513 revents = LTTNG_POLL_GETEV(&events, i);
514 pollfd = LTTNG_POLL_GETFD(&events, i);
515
516 /* Thread quit pipe has been closed. Killing thread. */
517 ret = check_live_conn_pipe(pollfd, revents);
518 if (ret) {
519 err = 0;
520 goto exit;
521 }
522
523 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
524 ERR("socket poll error");
525 goto error;
526 } else if (revents & LPOLLIN) {
527 /*
528 * Get allocated in this thread, enqueued to a global queue,
529 * dequeued and freed in the worker thread.
530 */
531 struct relay_command *relay_cmd;
532 struct lttcomm_sock *newsock;
533
534 relay_cmd = zmalloc(sizeof(*relay_cmd));
535 if (!relay_cmd) {
536 PERROR("relay command zmalloc");
537 goto error;
538 }
539
540 assert(pollfd == live_control_sock->fd);
541 newsock = live_control_sock->ops->accept(live_control_sock);
542 if (!newsock) {
543 PERROR("accepting control sock");
544 free(relay_cmd);
545 goto error;
546 }
547 DBG("Relay viewer connection accepted socket %d", newsock->fd);
548 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
549 sizeof(int));
550 if (ret < 0) {
551 PERROR("setsockopt inet");
552 lttcomm_destroy_sock(newsock);
553 free(relay_cmd);
554 goto error;
555 }
556 relay_cmd->sock = newsock;
557
558 /*
559 * Lock free enqueue the request.
560 */
561 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
562
563 /*
564 * Wake the dispatch queue futex. Implicit memory
565 * barrier with the exchange in cds_wfq_enqueue.
566 */
567 futex_nto1_wake(&viewer_cmd_queue.futex);
568 }
569 }
570 }
571
572 exit:
573 error:
574 error_poll_add:
575 error_testpoint:
576 lttng_poll_clean(&events);
577 error_create_poll:
578 if (live_control_sock->fd >= 0) {
579 ret = live_control_sock->ops->close(live_control_sock);
580 if (ret) {
581 PERROR("close");
582 }
583 }
584 lttcomm_destroy_sock(live_control_sock);
585 error_sock_control:
586 if (err) {
587 health_error();
588 DBG("Live viewer listener thread exited with error");
589 }
590 health_unregister(health_relayd);
591 DBG("Live viewer listener thread cleanup complete");
592 stop_threads();
593 return NULL;
594 }
595
596 /*
597 * This thread manages the dispatching of the requests to worker threads
598 */
599 static
600 void *thread_dispatcher(void *data)
601 {
602 int err = -1;
603 ssize_t ret;
604 struct cds_wfq_node *node;
605 struct relay_command *relay_cmd = NULL;
606
607 DBG("[thread] Live viewer relay dispatcher started");
608
609 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
610
611 if (testpoint(relayd_thread_live_dispatcher)) {
612 goto error_testpoint;
613 }
614
615 health_code_update();
616
617 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
618 health_code_update();
619
620 /* Atomically prepare the queue futex */
621 futex_nto1_prepare(&viewer_cmd_queue.futex);
622
623 do {
624 health_code_update();
625
626 /* Dequeue commands */
627 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
628 if (node == NULL) {
629 DBG("Woken up but nothing in the live-viewer "
630 "relay command queue");
631 /* Continue thread execution */
632 break;
633 }
634
635 relay_cmd = caa_container_of(node, struct relay_command, node);
636 DBG("Dispatching viewer request waiting on sock %d",
637 relay_cmd->sock->fd);
638
639 /*
640 * Inform worker thread of the new request. This call is blocking
641 * so we can be assured that the data will be read at some point in
642 * time or wait to the end of the world :)
643 */
644 ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
645 sizeof(*relay_cmd));
646 free(relay_cmd);
647 if (ret < sizeof(struct relay_command)) {
648 PERROR("write cmd pipe");
649 goto error;
650 }
651 } while (node != NULL);
652
653 /* Futex wait on queue. Blocking call on futex() */
654 health_poll_entry();
655 futex_nto1_wait(&viewer_cmd_queue.futex);
656 health_poll_exit();
657 }
658
659 /* Normal exit, no error */
660 err = 0;
661
662 error:
663 error_testpoint:
664 if (err) {
665 health_error();
666 ERR("Health error occurred in %s", __func__);
667 }
668 health_unregister(health_relayd);
669 DBG("Live viewer dispatch thread dying");
670 stop_threads();
671 return NULL;
672 }
673
674 /*
675 * Establish connection with the viewer and check the versions.
676 *
677 * Return 0 on success or else negative value.
678 */
679 static
680 int viewer_connect(struct relay_command *cmd)
681 {
682 int ret;
683 struct lttng_viewer_connect reply, msg;
684
685 assert(cmd);
686
687 cmd->version_check_done = 1;
688
689 health_code_update();
690
691 DBG("Viewer is establishing a connection to the relayd.");
692
693 ret = recv_request(cmd->sock, &msg, sizeof(msg));
694 if (ret < 0) {
695 goto end;
696 }
697
698 health_code_update();
699
700 reply.major = RELAYD_VERSION_COMM_MAJOR;
701 reply.minor = RELAYD_VERSION_COMM_MINOR;
702
703 /* Major versions must be the same */
704 if (reply.major != be32toh(msg.major)) {
705 DBG("Incompatible major versions ([relayd] %u vs [client] %u)",
706 reply.major, be32toh(msg.major));
707 ret = -1;
708 goto end;
709 }
710
711 cmd->major = reply.major;
712 /* We adapt to the lowest compatible version */
713 if (reply.minor <= be32toh(msg.minor)) {
714 cmd->minor = reply.minor;
715 } else {
716 cmd->minor = be32toh(msg.minor);
717 }
718
719 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
720 cmd->type = RELAY_VIEWER_COMMAND;
721 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
722 cmd->type = RELAY_VIEWER_NOTIFICATION;
723 } else {
724 ERR("Unknown connection type : %u", be32toh(msg.type));
725 ret = -1;
726 goto end;
727 }
728
729 reply.major = htobe32(reply.major);
730 reply.minor = htobe32(reply.minor);
731 if (cmd->type == RELAY_VIEWER_COMMAND) {
732 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
733 }
734
735 health_code_update();
736
737 ret = send_response(cmd->sock, &reply, sizeof(reply));
738 if (ret < 0) {
739 goto end;
740 }
741
742 health_code_update();
743
744 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
745 ret = 0;
746
747 end:
748 return ret;
749 }
750
751 /*
752 * Send the viewer the list of current sessions.
753 *
754 * Return 0 on success or else a negative value.
755 */
756 static
757 int viewer_list_sessions(struct relay_command *cmd,
758 struct lttng_ht *sessions_ht)
759 {
760 int ret;
761 struct lttng_viewer_list_sessions session_list;
762 unsigned long count;
763 long approx_before, approx_after;
764 struct lttng_ht_iter iter;
765 struct lttng_viewer_session send_session;
766 struct relay_session *session;
767
768 DBG("List sessions received");
769
770 rcu_read_lock();
771 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
772 session_list.sessions_count = htobe32(count);
773
774 health_code_update();
775
776 ret = send_response(cmd->sock, &session_list, sizeof(session_list));
777 if (ret < 0) {
778 goto end_unlock;
779 }
780
781 health_code_update();
782
783 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, session,
784 session_n.node) {
785 health_code_update();
786
787 strncpy(send_session.session_name, session->session_name,
788 sizeof(send_session.session_name));
789 strncpy(send_session.hostname, session->hostname,
790 sizeof(send_session.hostname));
791 send_session.id = htobe64(session->id);
792 send_session.live_timer = htobe32(session->live_timer);
793 send_session.clients = htobe32(session->viewer_refcount);
794 send_session.streams = htobe32(session->stream_count);
795
796 health_code_update();
797
798 ret = send_response(cmd->sock, &send_session, sizeof(send_session));
799 if (ret < 0) {
800 goto end_unlock;
801 }
802 }
803 health_code_update();
804
805 rcu_read_unlock();
806 ret = 0;
807 goto end;
808
809 end_unlock:
810 rcu_read_unlock();
811
812 end:
813 return ret;
814 }
815
816 /*
817 * Send the viewer the list of current sessions.
818 */
819 static
820 int viewer_get_new_streams(struct relay_command *cmd,
821 struct lttng_ht *sessions_ht)
822 {
823 int ret, send_streams = 0;
824 uint32_t nb_created = 0, nb_unsent = 0, nb_streams = 0;
825 struct lttng_viewer_new_streams_request request;
826 struct lttng_viewer_new_streams_response response;
827 struct relay_session *session;
828
829 assert(cmd);
830 assert(sessions_ht);
831
832 DBG("Get new streams received");
833
834 health_code_update();
835
836 /* Receive the request from the connected client. */
837 ret = recv_request(cmd->sock, &request, sizeof(request));
838 if (ret < 0) {
839 goto error;
840 }
841
842 health_code_update();
843
844 rcu_read_lock();
845 session = session_find_by_id(sessions_ht, be64toh(request.session_id));
846 if (!session) {
847 DBG("Relay session %" PRIu64 " not found",
848 be64toh(request.session_id));
849 response.status = htobe32(VIEWER_NEW_STREAMS_ERR);
850 goto send_reply;
851 }
852
853 if (cmd->session_id == session->id) {
854 /* We confirmed the viewer is asking for the same session. */
855 send_streams = 1;
856 response.status = htobe32(VIEWER_NEW_STREAMS_OK);
857 } else {
858 send_streams = 0;
859 response.status = htobe32(VIEWER_NEW_STREAMS_ERR);
860 goto send_reply;
861 }
862
863 if (!send_streams) {
864 goto send_reply;
865 }
866
867 ret = make_viewer_streams(session, VIEWER_SEEK_LAST, NULL, &nb_unsent,
868 &nb_created);
869 if (ret < 0) {
870 goto end_unlock;
871 }
872 /* Only send back the newly created streams with the unsent ones. */
873 nb_streams = nb_created + nb_unsent;
874 response.streams_count = htobe32(nb_streams);
875
876 send_reply:
877 health_code_update();
878 ret = send_response(cmd->sock, &response, sizeof(response));
879 if (ret < 0) {
880 goto end_unlock;
881 }
882 health_code_update();
883
884 /*
885 * Unknown or empty session, just return gracefully, the viewer knows what
886 * is happening.
887 */
888 if (!send_streams || !nb_streams) {
889 ret = 0;
890 goto end_unlock;
891 }
892
893 /*
894 * Send stream and *DON'T* ignore the sent flag so every viewer streams
895 * that were not sent from that point will be sent to the viewer.
896 */
897 ret = send_viewer_streams(cmd->sock, session, 0);
898 if (ret < 0) {
899 goto end_unlock;
900 }
901
902 end_unlock:
903 rcu_read_unlock();
904 error:
905 return ret;
906 }
907
908 /*
909 * Send the viewer the list of current sessions.
910 */
911 static
912 int viewer_attach_session(struct relay_command *cmd,
913 struct lttng_ht *sessions_ht)
914 {
915 int send_streams = 0;
916 ssize_t ret;
917 uint32_t nb_streams = 0;
918 enum lttng_viewer_seek seek_type;
919 struct lttng_viewer_attach_session_request request;
920 struct lttng_viewer_attach_session_response response;
921 struct relay_session *session;
922
923 assert(cmd);
924 assert(sessions_ht);
925
926 health_code_update();
927
928 /* Receive the request from the connected client. */
929 ret = recv_request(cmd->sock, &request, sizeof(request));
930 if (ret < 0) {
931 goto error;
932 }
933
934 health_code_update();
935
936 rcu_read_lock();
937 session = session_find_by_id(sessions_ht, be64toh(request.session_id));
938 if (!session) {
939 DBG("Relay session %" PRIu64 " not found",
940 be64toh(request.session_id));
941 response.status = htobe32(VIEWER_ATTACH_UNK);
942 goto send_reply;
943 }
944 session_viewer_attach(session);
945 DBG("Attach session ID %" PRIu64 " received", be64toh(request.session_id));
946
947 if (uatomic_read(&session->viewer_refcount) > 1) {
948 DBG("Already a viewer attached");
949 response.status = htobe32(VIEWER_ATTACH_ALREADY);
950 session_viewer_detach(session);
951 goto send_reply;
952 } else if (session->live_timer == 0) {
953 DBG("Not live session");
954 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
955 goto send_reply;
956 } else {
957 send_streams = 1;
958 response.status = htobe32(VIEWER_ATTACH_OK);
959 cmd->session_id = session->id;
960 cmd->session = session;
961 }
962
963 switch (be32toh(request.seek)) {
964 case VIEWER_SEEK_BEGINNING:
965 case VIEWER_SEEK_LAST:
966 seek_type = be32toh(request.seek);
967 break;
968 default:
969 ERR("Wrong seek parameter");
970 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
971 send_streams = 0;
972 goto send_reply;
973 }
974
975 if (!send_streams) {
976 goto send_reply;
977 }
978
979 ret = make_viewer_streams(session, seek_type, &nb_streams, NULL, NULL);
980 if (ret < 0) {
981 goto end_unlock;
982 }
983 response.streams_count = htobe32(nb_streams);
984
985 send_reply:
986 health_code_update();
987 ret = send_response(cmd->sock, &response, sizeof(response));
988 if (ret < 0) {
989 goto end_unlock;
990 }
991 health_code_update();
992
993 /*
994 * Unknown or empty session, just return gracefully, the viewer knows what
995 * is happening.
996 */
997 if (!send_streams || !nb_streams) {
998 ret = 0;
999 goto end_unlock;
1000 }
1001
1002 /* Send stream and ignore the sent flag. */
1003 ret = send_viewer_streams(cmd->sock, session, 1);
1004 if (ret < 0) {
1005 goto end_unlock;
1006 }
1007
1008 end_unlock:
1009 rcu_read_unlock();
1010 error:
1011 return ret;
1012 }
1013
1014 /*
1015 * Send the next index for a stream.
1016 *
1017 * Return 0 on success or else a negative value.
1018 */
1019 static
1020 int viewer_get_next_index(struct relay_command *cmd,
1021 struct lttng_ht *sessions_ht)
1022 {
1023 int ret;
1024 struct lttng_viewer_get_next_index request_index;
1025 struct lttng_viewer_index viewer_index;
1026 struct ctf_packet_index packet_index;
1027 struct relay_viewer_stream *vstream;
1028 struct relay_stream *rstream;
1029 struct ctf_trace *ctf_trace;
1030 struct relay_session *session;
1031
1032 assert(cmd);
1033 assert(sessions_ht);
1034
1035 DBG("Viewer get next index");
1036
1037 health_code_update();
1038
1039 ret = recv_request(cmd->sock, &request_index, sizeof(request_index));
1040 if (ret < 0) {
1041 goto end;
1042 }
1043 health_code_update();
1044
1045 rcu_read_lock();
1046 session = session_find_by_id(sessions_ht, cmd->session_id);
1047 if (!session) {
1048 ret = -1;
1049 goto end_unlock;
1050 }
1051
1052 vstream = viewer_stream_find_by_id(be64toh(request_index.stream_id));
1053 if (!vstream) {
1054 ret = -1;
1055 goto end_unlock;
1056 }
1057
1058 ctf_trace = ctf_trace_find_by_path(session->ctf_traces_ht, vstream->path_name);
1059 assert(ctf_trace);
1060
1061 memset(&viewer_index, 0, sizeof(viewer_index));
1062
1063 /*
1064 * The viewer should not ask for index on metadata stream.
1065 */
1066 if (vstream->metadata_flag) {
1067 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1068 goto send_reply;
1069 }
1070
1071 /* First time, we open the index file */
1072 if (vstream->index_read_fd < 0) {
1073 ret = index_open(vstream->path_name, vstream->channel_name,
1074 vstream->tracefile_count, vstream->tracefile_count_current);
1075 if (ret == -ENOENT) {
1076 /*
1077 * The index is created only when the first data packet arrives, it
1078 * might not be ready at the beginning of the session
1079 */
1080 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1081 goto send_reply;
1082 } else if (ret < 0) {
1083 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1084 goto send_reply;
1085 }
1086 vstream->index_read_fd = ret;
1087 }
1088
1089 rstream = stream_find_by_id(relay_streams_ht, vstream->stream_handle);
1090 assert(rstream);
1091
1092 if (!rstream->close_flag) {
1093 if (vstream->abort_flag) {
1094 /* Rotate on abort (overwrite). */
1095 DBG("Viewer rotate because of overwrite");
1096 ret = viewer_stream_rotate(vstream, rstream);
1097 if (ret < 0) {
1098 goto end_unlock;
1099 } else if (ret == 1) {
1100 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1101 viewer_stream_delete(vstream);
1102 viewer_stream_destroy(ctf_trace, vstream);
1103 goto send_reply;
1104 }
1105 /* ret == 0 means successful so we continue. */
1106 }
1107
1108 pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
1109 if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
1110 if (rstream->beacon_ts_end != -1ULL &&
1111 vstream->last_sent_index == rstream->total_index_received) {
1112 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1113 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1114 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1115 goto send_reply;
1116 } else if (rstream->total_index_received <= vstream->last_sent_index
1117 && !vstream->close_write_flag) {
1118 /*
1119 * Reader and writer are working in the same tracefile, so we care
1120 * about the number of index received and sent. Otherwise, we read
1121 * up to EOF.
1122 */
1123 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1124 /* No new index to send, retry later. */
1125 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1126 goto send_reply;
1127 }
1128 }
1129 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1130 } else if (rstream->close_flag && vstream->close_write_flag &&
1131 vstream->total_index_received == vstream->last_sent_index) {
1132 /* Last index sent and current tracefile closed in write */
1133 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1134 viewer_stream_delete(vstream);
1135 viewer_stream_destroy(ctf_trace, vstream);
1136 goto send_reply;
1137 } else {
1138 vstream->close_write_flag = 1;
1139 }
1140
1141 if (!ctf_trace->metadata_received ||
1142 ctf_trace->metadata_received > ctf_trace->metadata_sent) {
1143 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1144 }
1145
1146 ret = check_new_streams(vstream->session_id, sessions_ht);
1147 if (ret < 0) {
1148 goto end_unlock;
1149 } else if (ret == 1) {
1150 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM;
1151 }
1152
1153 pthread_mutex_lock(&vstream->overwrite_lock);
1154 if (vstream->abort_flag) {
1155 /*
1156 * The file is being overwritten by the writer, we cannot * use it.
1157 */
1158 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1159 pthread_mutex_unlock(&vstream->overwrite_lock);
1160 ret = viewer_stream_rotate(vstream, rstream);
1161 if (ret < 0) {
1162 goto end_unlock;
1163 } else if (ret == 1) {
1164 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1165 viewer_stream_delete(vstream);
1166 viewer_stream_destroy(ctf_trace, vstream);
1167 goto send_reply;
1168 }
1169 goto send_reply;
1170 }
1171
1172 ret = lttng_read(vstream->index_read_fd, &packet_index,
1173 sizeof(packet_index));
1174 pthread_mutex_unlock(&vstream->overwrite_lock);
1175 if (ret < sizeof(packet_index)) {
1176 /*
1177 * The tracefile is closed in write, so we read up to EOF.
1178 */
1179 if (vstream->close_write_flag == 1) {
1180 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1181 /* Rotate on normal EOF */
1182 ret = viewer_stream_rotate(vstream, rstream);
1183 if (ret < 0) {
1184 goto end_unlock;
1185 } else if (ret == 1) {
1186 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1187 viewer_stream_delete(vstream);
1188 viewer_stream_destroy(ctf_trace, vstream);
1189 goto send_reply;
1190 }
1191 } else {
1192 PERROR("Relay reading index file %d", vstream->index_read_fd);
1193 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1194 }
1195 goto send_reply;
1196 } else {
1197 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1198 vstream->last_sent_index++;
1199 }
1200
1201 /*
1202 * Indexes are stored in big endian, no need to switch before sending.
1203 */
1204 viewer_index.offset = packet_index.offset;
1205 viewer_index.packet_size = packet_index.packet_size;
1206 viewer_index.content_size = packet_index.content_size;
1207 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1208 viewer_index.timestamp_end = packet_index.timestamp_end;
1209 viewer_index.events_discarded = packet_index.events_discarded;
1210 viewer_index.stream_id = packet_index.stream_id;
1211
1212 send_reply:
1213 viewer_index.flags = htobe32(viewer_index.flags);
1214 health_code_update();
1215
1216 ret = send_response(cmd->sock, &viewer_index, sizeof(viewer_index));
1217 if (ret < 0) {
1218 goto end_unlock;
1219 }
1220 health_code_update();
1221
1222 DBG("Index %" PRIu64 " for stream %" PRIu64 " sent",
1223 vstream->last_sent_index, vstream->stream_handle);
1224
1225 end_unlock:
1226 rcu_read_unlock();
1227
1228 end:
1229 return ret;
1230 }
1231
1232 /*
1233 * Send the next index for a stream
1234 *
1235 * Return 0 on success or else a negative value.
1236 */
1237 static
1238 int viewer_get_packet(struct relay_command *cmd,
1239 struct lttng_ht *sessions_ht)
1240 {
1241 int ret, send_data = 0;
1242 char *data = NULL;
1243 uint32_t len = 0;
1244 ssize_t read_len;
1245 struct lttng_viewer_get_packet get_packet_info;
1246 struct lttng_viewer_trace_packet reply;
1247 struct relay_viewer_stream *stream;
1248 struct ctf_trace *ctf_trace;
1249
1250 assert(cmd);
1251
1252 DBG2("Relay get data packet");
1253
1254 health_code_update();
1255
1256 ret = recv_request(cmd->sock, &get_packet_info, sizeof(get_packet_info));
1257 if (ret < 0) {
1258 goto end;
1259 }
1260 health_code_update();
1261
1262 /* From this point on, the error label can be reached. */
1263 memset(&reply, 0, sizeof(reply));
1264
1265 rcu_read_lock();
1266 stream = viewer_stream_find_by_id(be64toh(get_packet_info.stream_id));
1267 if (!stream) {
1268 goto error;
1269 }
1270
1271 ctf_trace = ctf_trace_find_by_path(cmd->session->ctf_traces_ht,
1272 stream->path_name);
1273 assert(ctf_trace);
1274
1275 /*
1276 * First time we read this stream, we need open the tracefile, we should
1277 * only arrive here if an index has already been sent to the viewer, so the
1278 * tracefile must exist, if it does not it is a fatal error.
1279 */
1280 if (stream->read_fd < 0) {
1281 char fullpath[PATH_MAX];
1282
1283 if (stream->tracefile_count > 0) {
1284 ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
1285 stream->channel_name,
1286 stream->tracefile_count_current);
1287 } else {
1288 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1289 stream->channel_name);
1290 }
1291 if (ret < 0) {
1292 goto error;
1293 }
1294 ret = open(fullpath, O_RDONLY);
1295 if (ret < 0) {
1296 PERROR("Relay opening trace file");
1297 goto error;
1298 }
1299 stream->read_fd = ret;
1300 }
1301
1302 if (!ctf_trace->metadata_received ||
1303 ctf_trace->metadata_received > ctf_trace->metadata_sent) {
1304 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1305 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1306 goto send_reply;
1307 }
1308
1309 ret = check_new_streams(stream->session_id, sessions_ht);
1310 if (ret < 0) {
1311 goto end_unlock;
1312 } else if (ret == 1) {
1313 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1314 reply.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM;
1315 goto send_reply;
1316 }
1317
1318 len = be32toh(get_packet_info.len);
1319 data = zmalloc(len);
1320 if (!data) {
1321 PERROR("relay data zmalloc");
1322 goto error;
1323 }
1324
1325 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1326 if (ret < 0) {
1327 /*
1328 * If the read fd was closed by the streaming side, the
1329 * abort_flag will be set to 1, otherwise it is an error.
1330 */
1331 if (stream->abort_flag == 0) {
1332 PERROR("lseek");
1333 goto error;
1334 }
1335 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1336 goto send_reply;
1337 }
1338 read_len = lttng_read(stream->read_fd, data, len);
1339 if (read_len < len) {
1340 /*
1341 * If the read fd was closed by the streaming side, the
1342 * abort_flag will be set to 1, otherwise it is an error.
1343 */
1344 if (stream->abort_flag == 0) {
1345 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1346 stream->read_fd,
1347 be64toh(get_packet_info.offset));
1348 goto error;
1349 } else {
1350 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1351 goto send_reply;
1352 }
1353 }
1354 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1355 reply.len = htobe32(len);
1356 send_data = 1;
1357 goto send_reply;
1358
1359 error:
1360 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1361
1362 send_reply:
1363 reply.flags = htobe32(reply.flags);
1364
1365 health_code_update();
1366
1367 ret = send_response(cmd->sock, &reply, sizeof(reply));
1368 if (ret < 0) {
1369 goto end_unlock;
1370 }
1371 health_code_update();
1372
1373 if (send_data) {
1374 health_code_update();
1375 ret = send_response(cmd->sock, data, len);
1376 if (ret < 0) {
1377 goto end_unlock;
1378 }
1379 health_code_update();
1380 }
1381
1382 DBG("Sent %u bytes for stream %" PRIu64, len,
1383 be64toh(get_packet_info.stream_id));
1384
1385 end_unlock:
1386 free(data);
1387 rcu_read_unlock();
1388
1389 end:
1390 return ret;
1391 }
1392
1393 /*
1394 * Send the session's metadata
1395 *
1396 * Return 0 on success else a negative value.
1397 */
1398 static
1399 int viewer_get_metadata(struct relay_command *cmd)
1400 {
1401 int ret = 0;
1402 ssize_t read_len;
1403 uint64_t len = 0;
1404 char *data = NULL;
1405 struct lttng_viewer_get_metadata request;
1406 struct lttng_viewer_metadata_packet reply;
1407 struct relay_viewer_stream *stream;
1408 struct ctf_trace *ctf_trace;
1409
1410 assert(cmd);
1411
1412 DBG("Relay get metadata");
1413
1414 health_code_update();
1415
1416 ret = recv_request(cmd->sock, &request, sizeof(request));
1417 if (ret < 0) {
1418 goto end;
1419 }
1420 health_code_update();
1421
1422 rcu_read_lock();
1423 stream = viewer_stream_find_by_id(be64toh(request.stream_id));
1424 if (!stream || !stream->metadata_flag) {
1425 ERR("Invalid metadata stream");
1426 goto error;
1427 }
1428
1429 ctf_trace = ctf_trace_find_by_path(cmd->session->ctf_traces_ht,
1430 stream->path_name);
1431 assert(ctf_trace);
1432 assert(ctf_trace->metadata_sent <= ctf_trace->metadata_received);
1433
1434 len = ctf_trace->metadata_received - ctf_trace->metadata_sent;
1435 if (len == 0) {
1436 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1437 goto send_reply;
1438 }
1439
1440 /* first time, we open the metadata file */
1441 if (stream->read_fd < 0) {
1442 char fullpath[PATH_MAX];
1443
1444 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1445 stream->channel_name);
1446 if (ret < 0) {
1447 goto error;
1448 }
1449 ret = open(fullpath, O_RDONLY);
1450 if (ret < 0) {
1451 PERROR("Relay opening metadata file");
1452 goto error;
1453 }
1454 stream->read_fd = ret;
1455 }
1456
1457 reply.len = htobe64(len);
1458 data = zmalloc(len);
1459 if (!data) {
1460 PERROR("viewer metadata zmalloc");
1461 goto error;
1462 }
1463
1464 read_len = lttng_read(stream->read_fd, data, len);
1465 if (read_len < len) {
1466 PERROR("Relay reading metadata file");
1467 goto error;
1468 }
1469 ctf_trace->metadata_sent += read_len;
1470 reply.status = htobe32(VIEWER_METADATA_OK);
1471 goto send_reply;
1472
1473 error:
1474 reply.status = htobe32(VIEWER_METADATA_ERR);
1475
1476 send_reply:
1477 health_code_update();
1478 ret = send_response(cmd->sock, &reply, sizeof(reply));
1479 if (ret < 0) {
1480 goto end_unlock;
1481 }
1482 health_code_update();
1483
1484 if (len > 0) {
1485 ret = send_response(cmd->sock, data, len);
1486 if (ret < 0) {
1487 goto end_unlock;
1488 }
1489 }
1490
1491 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1492 be64toh(request.stream_id));
1493
1494 DBG("Metadata sent");
1495
1496 end_unlock:
1497 free(data);
1498 rcu_read_unlock();
1499 end:
1500 return ret;
1501 }
1502
1503 /*
1504 * live_relay_unknown_command: send -1 if received unknown command
1505 */
1506 static
1507 void live_relay_unknown_command(struct relay_command *cmd)
1508 {
1509 struct lttcomm_relayd_generic_reply reply;
1510
1511 reply.ret_code = htobe32(LTTNG_ERR_UNK);
1512 (void) send_response(cmd->sock, &reply, sizeof(reply));
1513 }
1514
1515 /*
1516 * Process the commands received on the control socket
1517 */
1518 static
1519 int process_control(struct lttng_viewer_cmd *recv_hdr,
1520 struct relay_command *cmd, struct lttng_ht *sessions_ht)
1521 {
1522 int ret = 0;
1523 uint32_t msg_value;
1524
1525 assert(recv_hdr);
1526 assert(cmd);
1527 assert(sessions_ht);
1528
1529 msg_value = be32toh(recv_hdr->cmd);
1530
1531 /*
1532 * Make sure we've done the version check before any command other then a
1533 * new client connection.
1534 */
1535 if (msg_value != VIEWER_CONNECT && !cmd->version_check_done) {
1536 ERR("Viewer cmd value %" PRIu32 " before version check", msg_value);
1537 ret = -1;
1538 goto end;
1539 }
1540
1541 switch (msg_value) {
1542 case VIEWER_CONNECT:
1543 ret = viewer_connect(cmd);
1544 break;
1545 case VIEWER_LIST_SESSIONS:
1546 ret = viewer_list_sessions(cmd, sessions_ht);
1547 break;
1548 case VIEWER_ATTACH_SESSION:
1549 ret = viewer_attach_session(cmd, sessions_ht);
1550 break;
1551 case VIEWER_GET_NEXT_INDEX:
1552 ret = viewer_get_next_index(cmd, sessions_ht);
1553 break;
1554 case VIEWER_GET_PACKET:
1555 ret = viewer_get_packet(cmd, sessions_ht);
1556 break;
1557 case VIEWER_GET_METADATA:
1558 ret = viewer_get_metadata(cmd);
1559 break;
1560 case VIEWER_GET_NEW_STREAMS:
1561 ret = viewer_get_new_streams(cmd, sessions_ht);
1562 break;
1563 default:
1564 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1565 live_relay_unknown_command(cmd);
1566 ret = -1;
1567 goto end;
1568 }
1569
1570 end:
1571 return ret;
1572 }
1573
1574 static
1575 void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1576 {
1577 int ret;
1578
1579 assert(events);
1580
1581 lttng_poll_del(events, pollfd);
1582
1583 ret = close(pollfd);
1584 if (ret < 0) {
1585 ERR("Closing pollfd %d", pollfd);
1586 }
1587 }
1588
1589 /*
1590 * Create and add connection to the given hash table.
1591 *
1592 * Return poll add value or else -1 on error.
1593 */
1594 static
1595 int add_connection(int fd, struct lttng_poll_event *events,
1596 struct lttng_ht *relay_connections_ht)
1597 {
1598 int ret;
1599 struct relay_command *relay_connection;
1600
1601 assert(events);
1602 assert(relay_connections_ht);
1603
1604 relay_connection = zmalloc(sizeof(struct relay_command));
1605 if (relay_connection == NULL) {
1606 PERROR("Relay command zmalloc");
1607 goto error;
1608 }
1609
1610 ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
1611 if (ret < sizeof(*relay_connection)) {
1612 PERROR("read relay cmd pipe");
1613 goto error_read;
1614 }
1615
1616 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1617 (unsigned long) relay_connection->sock->fd);
1618 rcu_read_lock();
1619 lttng_ht_add_unique_ulong(relay_connections_ht,
1620 &relay_connection->sock_n);
1621 rcu_read_unlock();
1622
1623 return lttng_poll_add(events, relay_connection->sock->fd,
1624 LPOLLIN | LPOLLRDHUP);
1625
1626 error_read:
1627 free(relay_connection);
1628 error:
1629 return -1;
1630 }
1631
1632 static
1633 void deferred_free_connection(struct rcu_head *head)
1634 {
1635 struct relay_command *relay_connection =
1636 caa_container_of(head, struct relay_command, rcu_node);
1637
1638 lttcomm_destroy_sock(relay_connection->sock);
1639 free(relay_connection);
1640 }
1641
1642 /*
1643 * Delete all streams for a specific session ID.
1644 */
1645 static void destroy_viewer_streams_by_session(struct relay_session *session)
1646 {
1647 struct relay_viewer_stream *stream;
1648 struct lttng_ht_iter iter;
1649
1650 assert(session);
1651
1652 rcu_read_lock();
1653 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream,
1654 stream_n.node) {
1655 struct ctf_trace *ctf_trace;
1656
1657 health_code_update();
1658 if (stream->session_id != session->id) {
1659 continue;
1660 }
1661
1662 ctf_trace = ctf_trace_find_by_path(session->ctf_traces_ht,
1663 stream->path_name);
1664 assert(ctf_trace);
1665
1666 viewer_stream_delete(stream);
1667
1668 if (stream->metadata_flag) {
1669 ctf_trace->metadata_sent = 0;
1670 ctf_trace->viewer_metadata_stream = NULL;
1671 }
1672
1673 viewer_stream_destroy(ctf_trace, stream);
1674 }
1675 rcu_read_unlock();
1676 }
1677
1678 static void try_destroy_streams(struct relay_session *session)
1679 {
1680 struct ctf_trace *ctf_trace;
1681 struct lttng_ht_iter iter;
1682
1683 assert(session);
1684
1685 cds_lfht_for_each_entry(session->ctf_traces_ht->ht, &iter.iter, ctf_trace,
1686 node.node) {
1687 /* Attempt to destroy the ctf trace of that session. */
1688 ctf_trace_try_destroy(session, ctf_trace);
1689 }
1690 }
1691
1692 /*
1693 * Delete and free a connection.
1694 *
1695 * RCU read side lock MUST be acquired.
1696 */
1697 static
1698 void del_connection(struct lttng_ht *relay_connections_ht,
1699 struct lttng_ht_iter *iter, struct relay_command *relay_connection,
1700 struct lttng_ht *sessions_ht)
1701 {
1702 int ret;
1703 struct relay_session *session;
1704
1705 assert(relay_connections_ht);
1706 assert(iter);
1707 assert(relay_connection);
1708 assert(sessions_ht);
1709
1710 DBG("Cleaning connection of session ID %" PRIu64,
1711 relay_connection->session_id);
1712
1713 rcu_read_lock();
1714 ret = lttng_ht_del(relay_connections_ht, iter);
1715 assert(!ret);
1716
1717 session = session_find_by_id(sessions_ht, relay_connection->session_id);
1718 if (session) {
1719 /*
1720 * Very important that this is done before destroying the session so we
1721 * can put back every viewer stream reference from the ctf_trace.
1722 */
1723 destroy_viewer_streams_by_session(session);
1724 try_destroy_streams(session);
1725 session_viewer_try_destroy(sessions_ht, session);
1726 }
1727 rcu_read_unlock();
1728
1729 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1730 }
1731
1732 /*
1733 * This thread does the actual work
1734 */
1735 static
1736 void *thread_worker(void *data)
1737 {
1738 int ret, err = -1;
1739 uint32_t nb_fd;
1740 struct relay_command *relay_connection;
1741 struct lttng_poll_event events;
1742 struct lttng_ht *relay_connections_ht;
1743 struct lttng_ht_node_ulong *node;
1744 struct lttng_ht_iter iter;
1745 struct lttng_viewer_cmd recv_hdr;
1746 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1747 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
1748
1749 DBG("[thread] Live viewer relay worker started");
1750
1751 rcu_register_thread();
1752
1753 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1754
1755 if (testpoint(relayd_thread_live_worker)) {
1756 goto error_testpoint;
1757 }
1758
1759 /* table of connections indexed on socket */
1760 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1761 if (!relay_connections_ht) {
1762 goto relay_connections_ht_error;
1763 }
1764
1765 ret = create_thread_poll_set(&events, 2);
1766 if (ret < 0) {
1767 goto error_poll_create;
1768 }
1769
1770 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1771 if (ret < 0) {
1772 goto error;
1773 }
1774
1775 restart:
1776 while (1) {
1777 int i;
1778
1779 health_code_update();
1780
1781 /* Infinite blocking call, waiting for transmission */
1782 DBG3("Relayd live viewer worker thread polling...");
1783 health_poll_entry();
1784 ret = lttng_poll_wait(&events, -1);
1785 health_poll_exit();
1786 if (ret < 0) {
1787 /*
1788 * Restart interrupted system call.
1789 */
1790 if (errno == EINTR) {
1791 goto restart;
1792 }
1793 goto error;
1794 }
1795
1796 nb_fd = ret;
1797
1798 /*
1799 * Process control. The control connection is prioritised so we don't
1800 * starve it with high throughput tracing data on the data
1801 * connection.
1802 */
1803 for (i = 0; i < nb_fd; i++) {
1804 /* Fetch once the poll data */
1805 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1806 int pollfd = LTTNG_POLL_GETFD(&events, i);
1807
1808 health_code_update();
1809
1810 /* Thread quit pipe has been closed. Killing thread. */
1811 ret = check_live_conn_pipe(pollfd, revents);
1812 if (ret) {
1813 err = 0;
1814 goto exit;
1815 }
1816
1817 /* Inspect the relay cmd pipe for new connection */
1818 if (pollfd == live_relay_cmd_pipe[0]) {
1819 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1820 ERR("Relay live pipe error");
1821 goto error;
1822 } else if (revents & LPOLLIN) {
1823 DBG("Relay live viewer command received");
1824 ret = add_connection(live_relay_cmd_pipe[0],
1825 &events, relay_connections_ht);
1826 if (ret < 0) {
1827 goto error;
1828 }
1829 }
1830 } else if (revents) {
1831 rcu_read_lock();
1832 lttng_ht_lookup(relay_connections_ht,
1833 (void *)((unsigned long) pollfd), &iter);
1834 node = lttng_ht_iter_get_node_ulong(&iter);
1835 if (node == NULL) {
1836 DBG2("Relay viewer sock %d not found", pollfd);
1837 rcu_read_unlock();
1838 goto error;
1839 }
1840 relay_connection = caa_container_of(node, struct relay_command,
1841 sock_n);
1842
1843 if (revents & (LPOLLERR)) {
1844 cleanup_poll_connection(&events, pollfd);
1845 del_connection(relay_connections_ht, &iter,
1846 relay_connection, relay_ctx->sessions_ht);
1847 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1848 DBG("Viewer socket %d hung up", pollfd);
1849 cleanup_poll_connection(&events, pollfd);
1850 del_connection(relay_connections_ht, &iter,
1851 relay_connection, relay_ctx->sessions_ht);
1852 } else if (revents & LPOLLIN) {
1853 ret = relay_connection->sock->ops->recvmsg(
1854 relay_connection->sock, &recv_hdr,
1855 sizeof(struct lttng_viewer_cmd),
1856 0);
1857 /* connection closed */
1858 if (ret <= 0) {
1859 cleanup_poll_connection(&events, pollfd);
1860 del_connection(relay_connections_ht, &iter,
1861 relay_connection, relay_ctx->sessions_ht);
1862 DBG("Viewer control connection closed with %d",
1863 pollfd);
1864 } else {
1865 if (relay_connection->session) {
1866 DBG2("Relay viewer worker receiving data for "
1867 "session: %" PRIu64,
1868 relay_connection->session->id);
1869 }
1870 ret = process_control(&recv_hdr, relay_connection,
1871 sessions_ht);
1872 if (ret < 0) {
1873 /* Clear the session on error. */
1874 cleanup_poll_connection(&events, pollfd);
1875 del_connection(relay_connections_ht, &iter,
1876 relay_connection, relay_ctx->sessions_ht);
1877 DBG("Viewer connection closed with %d", pollfd);
1878 }
1879 }
1880 }
1881 rcu_read_unlock();
1882 }
1883 }
1884 }
1885
1886 exit:
1887 error:
1888 lttng_poll_clean(&events);
1889
1890 /* empty the hash table and free the memory */
1891 rcu_read_lock();
1892 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
1893 health_code_update();
1894
1895 node = lttng_ht_iter_get_node_ulong(&iter);
1896 if (!node) {
1897 continue;
1898 }
1899
1900 relay_connection = caa_container_of(node, struct relay_command,
1901 sock_n);
1902 del_connection(relay_connections_ht, &iter, relay_connection,
1903 relay_ctx->sessions_ht);
1904 }
1905 rcu_read_unlock();
1906 error_poll_create:
1907 lttng_ht_destroy(relay_connections_ht);
1908 relay_connections_ht_error:
1909 /* Close relay cmd pipes */
1910 utils_close_pipe(live_relay_cmd_pipe);
1911 if (err) {
1912 DBG("Viewer worker thread exited with error");
1913 }
1914 DBG("Viewer worker thread cleanup complete");
1915 error_testpoint:
1916 if (err) {
1917 health_error();
1918 ERR("Health error occurred in %s", __func__);
1919 }
1920 health_unregister(health_relayd);
1921 stop_threads();
1922 rcu_unregister_thread();
1923 return NULL;
1924 }
1925
1926 /*
1927 * Create the relay command pipe to wake thread_manage_apps.
1928 * Closed in cleanup().
1929 */
1930 static int create_relay_cmd_pipe(void)
1931 {
1932 int ret;
1933
1934 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
1935
1936 return ret;
1937 }
1938
1939 void live_stop_threads(void)
1940 {
1941 int ret;
1942 void *status;
1943
1944 stop_threads();
1945
1946 ret = pthread_join(live_listener_thread, &status);
1947 if (ret != 0) {
1948 PERROR("pthread_join live listener");
1949 goto error; /* join error, exit without cleanup */
1950 }
1951
1952 ret = pthread_join(live_worker_thread, &status);
1953 if (ret != 0) {
1954 PERROR("pthread_join live worker");
1955 goto error; /* join error, exit without cleanup */
1956 }
1957
1958 ret = pthread_join(live_dispatcher_thread, &status);
1959 if (ret != 0) {
1960 PERROR("pthread_join live dispatcher");
1961 goto error; /* join error, exit without cleanup */
1962 }
1963
1964 cleanup();
1965
1966 error:
1967 return;
1968 }
1969
1970 /*
1971 * main
1972 */
1973 int live_start_threads(struct lttng_uri *uri,
1974 struct relay_local_data *relay_ctx)
1975 {
1976 int ret = 0;
1977 void *status;
1978 int is_root;
1979
1980 assert(uri);
1981 live_uri = uri;
1982
1983 /* Check if daemon is UID = 0 */
1984 is_root = !getuid();
1985
1986 if (!is_root) {
1987 if (live_uri->port < 1024) {
1988 ERR("Need to be root to use ports < 1024");
1989 ret = -1;
1990 goto exit;
1991 }
1992 }
1993
1994 /* Setup the thread apps communication pipe. */
1995 if ((ret = create_relay_cmd_pipe()) < 0) {
1996 goto exit;
1997 }
1998
1999 /* Init relay command queue. */
2000 cds_wfq_init(&viewer_cmd_queue.queue);
2001
2002 /* Set up max poll set size */
2003 lttng_poll_set_max_size();
2004
2005 /* Setup the dispatcher thread */
2006 ret = pthread_create(&live_dispatcher_thread, NULL,
2007 thread_dispatcher, (void *) NULL);
2008 if (ret != 0) {
2009 PERROR("pthread_create viewer dispatcher");
2010 goto exit_dispatcher;
2011 }
2012
2013 /* Setup the worker thread */
2014 ret = pthread_create(&live_worker_thread, NULL,
2015 thread_worker, relay_ctx);
2016 if (ret != 0) {
2017 PERROR("pthread_create viewer worker");
2018 goto exit_worker;
2019 }
2020
2021 /* Setup the listener thread */
2022 ret = pthread_create(&live_listener_thread, NULL,
2023 thread_listener, (void *) NULL);
2024 if (ret != 0) {
2025 PERROR("pthread_create viewer listener");
2026 goto exit_listener;
2027 }
2028
2029 ret = 0;
2030 goto end;
2031
2032 exit_listener:
2033 ret = pthread_join(live_listener_thread, &status);
2034 if (ret != 0) {
2035 PERROR("pthread_join live listener");
2036 goto error; /* join error, exit without cleanup */
2037 }
2038
2039 exit_worker:
2040 ret = pthread_join(live_worker_thread, &status);
2041 if (ret != 0) {
2042 PERROR("pthread_join live worker");
2043 goto error; /* join error, exit without cleanup */
2044 }
2045
2046 exit_dispatcher:
2047 ret = pthread_join(live_dispatcher_thread, &status);
2048 if (ret != 0) {
2049 PERROR("pthread_join live dispatcher");
2050 goto error; /* join error, exit without cleanup */
2051 }
2052
2053 exit:
2054 cleanup();
2055
2056 end:
2057 error:
2058 return ret;
2059 }
This page took 0.105599 seconds and 5 git commands to generate.