Fix concurrency issues while overwriting tracefiles in live
[lttng-tools.git] / src / bin / lttng-relayd / live.c
CommitLineData
d3e2ba59
JD
1/*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#define _GNU_SOURCE
20#include <getopt.h>
21#include <grp.h>
22#include <limits.h>
23#include <pthread.h>
24#include <signal.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <sys/mman.h>
29#include <sys/mount.h>
30#include <sys/resource.h>
31#include <sys/socket.h>
32#include <sys/stat.h>
33#include <sys/types.h>
34#include <sys/wait.h>
35#include <inttypes.h>
36#include <urcu/futex.h>
37#include <urcu/uatomic.h>
38#include <unistd.h>
39#include <fcntl.h>
40#include <config.h>
41
42#include <lttng/lttng.h>
43#include <common/common.h>
44#include <common/compat/poll.h>
45#include <common/compat/socket.h>
46#include <common/defaults.h>
47#include <common/futex.h>
48#include <common/sessiond-comm/sessiond-comm.h>
49#include <common/sessiond-comm/inet.h>
50#include <common/sessiond-comm/relayd.h>
51#include <common/uri.h>
52#include <common/utils.h>
53
54#include "cmd.h"
55#include "live.h"
56#include "lttng-relayd.h"
57#include "lttng-viewer.h"
58#include "utils.h"
eea7556c 59#include "health-relayd.h"
d3e2ba59
JD
60
61static struct lttng_uri *live_uri;
62
63/*
64 * Quit pipe for all threads. This permits a single cancellation point
65 * for all threads when receiving an event on the pipe.
66 */
67static int live_thread_quit_pipe[2] = { -1, -1 };
68
69/*
70 * This pipe is used to inform the worker thread that a command is queued and
71 * ready to be processed.
72 */
73static int live_relay_cmd_pipe[2] = { -1, -1 };
74
75/* Shared between threads */
76static int live_dispatch_thread_exit;
77
78static pthread_t live_listener_thread;
79static pthread_t live_dispatcher_thread;
80static pthread_t live_worker_thread;
81
82/*
83 * Relay command queue.
84 *
85 * The live_thread_listener and live_thread_dispatcher communicate with this
86 * queue.
87 */
88static struct relay_cmd_queue viewer_cmd_queue;
89
90static uint64_t last_relay_viewer_session_id;
91
92/*
93 * Cleanup the daemon
94 */
95static
96void cleanup(void)
97{
98 DBG("Cleaning up");
99
d3e2ba59
JD
100 free(live_uri);
101}
102
103/*
104 * Write to writable pipe used to notify a thread.
105 */
106static
107int notify_thread_pipe(int wpipe)
108{
6cd525e8 109 ssize_t ret;
d3e2ba59 110
6cd525e8
MD
111 ret = lttng_write(wpipe, "!", 1);
112 if (ret < 1) {
d3e2ba59
JD
113 PERROR("write poll pipe");
114 }
115
6cd525e8 116 return (int) ret;
d3e2ba59
JD
117}
118
119/*
120 * Stop all threads by closing the thread quit pipe.
121 */
122static
123void stop_threads(void)
124{
125 int ret;
126
127 /* Stopping all threads */
128 DBG("Terminating all live threads");
129 ret = notify_thread_pipe(live_thread_quit_pipe[1]);
130 if (ret < 0) {
131 ERR("write error on thread quit pipe");
132 }
133
134 /* Dispatch thread */
135 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
136 futex_nto1_wake(&viewer_cmd_queue.futex);
137}
138
d3e2ba59
JD
139/*
140 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
141 */
142static
143int create_thread_poll_set(struct lttng_poll_event *events, int size)
144{
145 int ret;
146
147 if (events == NULL || size == 0) {
148 ret = -1;
149 goto error;
150 }
151
152 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
153 if (ret < 0) {
154 goto error;
155 }
156
157 /* Add quit pipe */
158 ret = lttng_poll_add(events, live_thread_quit_pipe[0], LPOLLIN);
159 if (ret < 0) {
160 goto error;
161 }
162
163 return 0;
164
165error:
166 return ret;
167}
168
169/*
170 * Check if the thread quit pipe was triggered.
171 *
172 * Return 1 if it was triggered else 0;
173 */
174static
175int check_thread_quit_pipe(int fd, uint32_t events)
176{
177 if (fd == live_thread_quit_pipe[0] && (events & LPOLLIN)) {
178 return 1;
179 }
180
181 return 0;
182}
183
184/*
185 * Create and init socket from uri.
186 */
187static
188struct lttcomm_sock *init_socket(struct lttng_uri *uri)
189{
190 int ret;
191 struct lttcomm_sock *sock = NULL;
192
193 sock = lttcomm_alloc_sock_from_uri(uri);
194 if (sock == NULL) {
195 ERR("Allocating socket");
196 goto error;
197 }
198
199 ret = lttcomm_create_sock(sock);
200 if (ret < 0) {
201 goto error;
202 }
203 DBG("Listening on sock %d for live", sock->fd);
204
205 ret = sock->ops->bind(sock);
206 if (ret < 0) {
207 goto error;
208 }
209
210 ret = sock->ops->listen(sock, -1);
211 if (ret < 0) {
212 goto error;
213
214 }
215
216 return sock;
217
218error:
219 if (sock) {
220 lttcomm_destroy_sock(sock);
221 }
222 return NULL;
223}
224
225/*
226 * This thread manages the listening for new connections on the network
227 */
228static
229void *thread_listener(void *data)
230{
231 int i, ret, pollfd, err = -1;
232 int val = 1;
233 uint32_t revents, nb_fd;
234 struct lttng_poll_event events;
235 struct lttcomm_sock *live_control_sock;
236
237 DBG("[thread] Relay live listener started");
238
eea7556c
MD
239 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
240
241 health_code_update();
242
d3e2ba59
JD
243 live_control_sock = init_socket(live_uri);
244 if (!live_control_sock) {
245 goto error_sock_control;
246 }
247
248 /*
249 * Pass 3 as size here for the thread quit pipe, control and data socket.
250 */
251 ret = create_thread_poll_set(&events, 2);
252 if (ret < 0) {
253 goto error_create_poll;
254 }
255
256 /* Add the control socket */
257 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
258 if (ret < 0) {
259 goto error_poll_add;
260 }
261
262 while (1) {
eea7556c
MD
263 health_code_update();
264
d3e2ba59
JD
265 DBG("Listener accepting live viewers connections");
266
267restart:
eea7556c 268 health_poll_entry();
d3e2ba59 269 ret = lttng_poll_wait(&events, -1);
eea7556c 270 health_poll_exit();
d3e2ba59
JD
271 if (ret < 0) {
272 /*
273 * Restart interrupted system call.
274 */
275 if (errno == EINTR) {
276 goto restart;
277 }
278 goto error;
279 }
280 nb_fd = ret;
281
282 DBG("Relay new viewer connection received");
283 for (i = 0; i < nb_fd; i++) {
eea7556c
MD
284 health_code_update();
285
d3e2ba59
JD
286 /* Fetch once the poll data */
287 revents = LTTNG_POLL_GETEV(&events, i);
288 pollfd = LTTNG_POLL_GETFD(&events, i);
289
290 /* Thread quit pipe has been closed. Killing thread. */
291 ret = check_thread_quit_pipe(pollfd, revents);
292 if (ret) {
293 err = 0;
294 goto exit;
295 }
296
297 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
298 ERR("socket poll error");
299 goto error;
300 } else if (revents & LPOLLIN) {
301 /*
302 * Get allocated in this thread, enqueued to a global queue,
303 * dequeued and freed in the worker thread.
304 */
305 struct relay_command *relay_cmd;
306 struct lttcomm_sock *newsock;
307
308 relay_cmd = zmalloc(sizeof(*relay_cmd));
309 if (!relay_cmd) {
310 PERROR("relay command zmalloc");
311 goto error;
312 }
313
314 assert(pollfd == live_control_sock->fd);
315 newsock = live_control_sock->ops->accept(live_control_sock);
316 if (!newsock) {
317 PERROR("accepting control sock");
318 free(relay_cmd);
319 goto error;
320 }
321 DBG("Relay viewer connection accepted socket %d", newsock->fd);
322 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
323 sizeof(int));
324 if (ret < 0) {
325 PERROR("setsockopt inet");
326 lttcomm_destroy_sock(newsock);
327 free(relay_cmd);
328 goto error;
329 }
330 relay_cmd->sock = newsock;
331
332 /*
333 * Lock free enqueue the request.
334 */
335 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
336
337 /*
338 * Wake the dispatch queue futex. Implicit memory
339 * barrier with the exchange in cds_wfq_enqueue.
340 */
341 futex_nto1_wake(&viewer_cmd_queue.futex);
342 }
343 }
344 }
345
346exit:
347error:
348error_poll_add:
349 lttng_poll_clean(&events);
350error_create_poll:
351 if (live_control_sock->fd >= 0) {
352 ret = live_control_sock->ops->close(live_control_sock);
353 if (ret) {
354 PERROR("close");
355 }
356 }
357 lttcomm_destroy_sock(live_control_sock);
358error_sock_control:
359 if (err) {
eea7556c 360 health_error();
d3e2ba59
JD
361 DBG("Live viewer listener thread exited with error");
362 }
eea7556c 363 health_unregister(health_relayd);
d3e2ba59
JD
364 DBG("Live viewer listener thread cleanup complete");
365 stop_threads();
366 return NULL;
367}
368
369/*
370 * This thread manages the dispatching of the requests to worker threads
371 */
372static
373void *thread_dispatcher(void *data)
374{
6cd525e8
MD
375 int err = -1;
376 ssize_t ret;
d3e2ba59
JD
377 struct cds_wfq_node *node;
378 struct relay_command *relay_cmd = NULL;
379
380 DBG("[thread] Live viewer relay dispatcher started");
381
eea7556c
MD
382 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
383
384 health_code_update();
385
d3e2ba59 386 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
eea7556c
MD
387 health_code_update();
388
d3e2ba59
JD
389 /* Atomically prepare the queue futex */
390 futex_nto1_prepare(&viewer_cmd_queue.futex);
391
392 do {
eea7556c
MD
393 health_code_update();
394
d3e2ba59
JD
395 /* Dequeue commands */
396 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
397 if (node == NULL) {
398 DBG("Woken up but nothing in the live-viewer "
399 "relay command queue");
400 /* Continue thread execution */
401 break;
402 }
403
404 relay_cmd = caa_container_of(node, struct relay_command, node);
405 DBG("Dispatching viewer request waiting on sock %d",
406 relay_cmd->sock->fd);
407
408 /*
409 * Inform worker thread of the new request. This call is blocking
410 * so we can be assured that the data will be read at some point in
411 * time or wait to the end of the world :)
412 */
6cd525e8
MD
413 ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
414 sizeof(*relay_cmd));
d3e2ba59 415 free(relay_cmd);
6cd525e8 416 if (ret < sizeof(struct relay_command)) {
d3e2ba59
JD
417 PERROR("write cmd pipe");
418 goto error;
419 }
420 } while (node != NULL);
421
422 /* Futex wait on queue. Blocking call on futex() */
eea7556c 423 health_poll_entry();
d3e2ba59 424 futex_nto1_wait(&viewer_cmd_queue.futex);
eea7556c 425 health_poll_exit();
d3e2ba59
JD
426 }
427
eea7556c
MD
428 /* Normal exit, no error */
429 err = 0;
430
d3e2ba59 431error:
eea7556c
MD
432 if (err) {
433 health_error();
434 ERR("Health error occurred in %s", __func__);
435 }
436 health_unregister(health_relayd);
d3e2ba59
JD
437 DBG("Live viewer dispatch thread dying");
438 stop_threads();
439 return NULL;
440}
441
442/*
443 * Establish connection with the viewer and check the versions.
444 *
445 * Return 0 on success or else negative value.
446 */
447static
448int viewer_connect(struct relay_command *cmd)
449{
450 int ret;
451 struct lttng_viewer_connect reply, msg;
452
453 assert(cmd);
454
455 cmd->version_check_done = 1;
456
eea7556c
MD
457 health_code_update();
458
d3e2ba59
JD
459 /* Get version from the other side. */
460 ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0);
461 if (ret < 0 || ret != sizeof(msg)) {
462 if (ret == 0) {
463 /* Orderly shutdown. Not necessary to print an error. */
464 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
465 } else {
466 ERR("Relay failed to receive the version values.");
467 }
468 ret = -1;
469 goto end;
470 }
471
eea7556c
MD
472 health_code_update();
473
d3e2ba59
JD
474 reply.major = RELAYD_VERSION_COMM_MAJOR;
475 reply.minor = RELAYD_VERSION_COMM_MINOR;
476
477 /* Major versions must be the same */
478 if (reply.major != be32toh(msg.major)) {
479 DBG("Incompatible major versions (%u vs %u)", reply.major,
480 be32toh(msg.major));
481 ret = 0;
482 goto end;
483 }
484
485 cmd->major = reply.major;
486 /* We adapt to the lowest compatible version */
487 if (reply.minor <= be32toh(msg.minor)) {
488 cmd->minor = reply.minor;
489 } else {
490 cmd->minor = be32toh(msg.minor);
491 }
492
493 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
494 cmd->type = RELAY_VIEWER_COMMAND;
495 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
496 cmd->type = RELAY_VIEWER_NOTIFICATION;
497 } else {
498 ERR("Unknown connection type : %u", be32toh(msg.type));
499 ret = -1;
500 goto end;
501 }
502
503 reply.major = htobe32(reply.major);
504 reply.minor = htobe32(reply.minor);
505 if (cmd->type == RELAY_VIEWER_COMMAND) {
506 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
507 }
eea7556c
MD
508
509 health_code_update();
510
d3e2ba59
JD
511 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
512 sizeof(struct lttng_viewer_connect), 0);
513 if (ret < 0) {
514 ERR("Relay sending version");
515 }
516
eea7556c
MD
517 health_code_update();
518
d3e2ba59
JD
519 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
520 ret = 0;
521
522end:
523 return ret;
524}
525
526/*
527 * Send the viewer the list of current sessions.
528 *
529 * Return 0 on success or else a negative value.
530 */
531static
532int viewer_list_sessions(struct relay_command *cmd,
533 struct lttng_ht *sessions_ht)
534{
535 int ret;
536 struct lttng_viewer_list_sessions session_list;
537 unsigned long count;
538 long approx_before, approx_after;
539 struct lttng_ht_node_ulong *node;
540 struct lttng_ht_iter iter;
541 struct lttng_viewer_session send_session;
542 struct relay_session *session;
543
544 DBG("List sessions received");
545
546 if (cmd->version_check_done == 0) {
547 ERR("Trying to list sessions before version check");
548 ret = -1;
549 goto end_no_session;
550 }
551
552 rcu_read_lock();
553 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
554 session_list.sessions_count = htobe32(count);
555
eea7556c
MD
556 health_code_update();
557
d3e2ba59
JD
558 ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list,
559 sizeof(session_list), 0);
560 if (ret < 0) {
561 ERR("Relay sending sessions list");
562 goto end_unlock;
563 }
564
eea7556c
MD
565 health_code_update();
566
d3e2ba59 567 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) {
eea7556c
MD
568 health_code_update();
569
d3e2ba59
JD
570 node = lttng_ht_iter_get_node_ulong(&iter);
571 if (!node) {
572 goto end_unlock;
573 }
574 session = caa_container_of(node, struct relay_session, session_n);
575
576 strncpy(send_session.session_name, session->session_name,
577 sizeof(send_session.session_name));
578 strncpy(send_session.hostname, session->hostname,
579 sizeof(send_session.hostname));
580 send_session.id = htobe64(session->id);
581 send_session.live_timer = htobe32(session->live_timer);
582 send_session.clients = htobe32(session->viewer_attached);
87b576ec 583 send_session.streams = htobe32(session->stream_count);
d3e2ba59 584
eea7556c
MD
585 health_code_update();
586
d3e2ba59
JD
587 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session,
588 sizeof(send_session), 0);
589 if (ret < 0) {
590 ERR("Relay sending session info");
591 goto end_unlock;
592 }
593 }
eea7556c
MD
594 health_code_update();
595
d3e2ba59
JD
596 rcu_read_unlock();
597 ret = 0;
598 goto end;
599
600end_unlock:
601 rcu_read_unlock();
602
603end:
604end_no_session:
605 return ret;
606}
607
0e6830aa
JD
608/*
609 * Open index file using a given viewer stream.
610 *
611 * Return 0 on success or else a negative value.
612 */
613static int open_index(struct relay_viewer_stream *stream)
614{
615 int ret;
616 char fullpath[PATH_MAX];
617 struct lttng_packet_index_file_hdr hdr;
618
6b6b9a5a
JD
619 if (stream->tracefile_count > 0) {
620 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%"
621 PRIu64 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
622 stream->channel_name, stream->tracefile_count_current);
623 } else {
624 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s"
625 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
626 stream->channel_name);
0e6830aa 627 }
0e6830aa
JD
628 if (ret < 0) {
629 PERROR("snprintf index path");
630 goto error;
631 }
632
633 DBG("Opening index file %s in read only", fullpath);
634 ret = open(fullpath, O_RDONLY);
635 if (ret < 0) {
636 if (errno == ENOENT) {
637 ret = -ENOENT;
638 goto error;
639 } else {
640 PERROR("opening index in read-only");
641 }
642 goto error;
643 }
644 stream->index_read_fd = ret;
645 DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
646
6cd525e8
MD
647 ret = lttng_read(stream->index_read_fd, &hdr, sizeof(hdr));
648 if (ret < sizeof(hdr)) {
0e6830aa
JD
649 PERROR("Reading index header");
650 goto error;
651 }
652 if (strncmp(hdr.magic, INDEX_MAGIC, sizeof(hdr.magic)) != 0) {
653 ERR("Invalid header magic");
654 ret = -1;
655 goto error;
656 }
657 if (be32toh(hdr.index_major) != INDEX_MAJOR ||
658 be32toh(hdr.index_minor) != INDEX_MINOR) {
659 ERR("Invalid header version");
660 ret = -1;
661 goto error;
662 }
663 ret = 0;
664
665error:
0e6830aa
JD
666 return ret;
667}
668
d3e2ba59
JD
669/*
670 * Allocate and init a new viewer_stream.
671 *
672 * Copies the values from the stream passed in parameter and insert the new
673 * stream in the viewer_streams_ht.
674 *
675 * MUST be called with rcu_read_lock held.
676 *
677 * Returns 0 on success or a negative value on error.
678 */
679static
0e6830aa 680int init_viewer_stream(struct relay_stream *stream, int seek_last)
d3e2ba59
JD
681{
682 int ret;
683 struct relay_viewer_stream *viewer_stream;
684
685 assert(stream);
d3e2ba59
JD
686
687 viewer_stream = zmalloc(sizeof(*viewer_stream));
688 if (!viewer_stream) {
689 PERROR("relay viewer stream zmalloc");
690 ret = -1;
691 goto error;
692 }
d3e2ba59
JD
693 viewer_stream->session_id = stream->session->id;
694 viewer_stream->stream_handle = stream->stream_handle;
695 viewer_stream->path_name = strndup(stream->path_name,
696 LTTNG_VIEWER_PATH_MAX);
697 viewer_stream->channel_name = strndup(stream->channel_name,
698 LTTNG_VIEWER_NAME_MAX);
d3e2ba59
JD
699 viewer_stream->tracefile_count = stream->tracefile_count;
700 viewer_stream->metadata_flag = stream->metadata_flag;
6b6b9a5a
JD
701 if (seek_last) {
702 viewer_stream->tracefile_count_current =
703 stream->tracefile_count_current;
704 } else {
705 viewer_stream->tracefile_count_current =
706 stream->oldest_tracefile_id;
707 }
708
709 /*
710 * The deletion of this ctf_trace object is only done in a call RCU of the
711 * relay stream making it valid as long as we have the read side lock.
712 */
713 viewer_stream->ctf_trace = stream->ctf_trace;
714 uatomic_inc(&viewer_stream->ctf_trace->refcount);
d3e2ba59 715
6b6b9a5a
JD
716 lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
717 lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
718
719 viewer_stream->index_read_fd = -1;
720 viewer_stream->read_fd = -1;
721
722 /*
723 * This is to avoid a race between the initialization of this object and
724 * the close of the given stream. If the stream is unable to find this
725 * viewer stream when closing, this copy will at least take the latest
726 * value.
727 * We also need that for the seek_last.
728 */
729 viewer_stream->total_index_received = stream->total_index_received;
730
731 /*
732 * If we never received an index for the current stream, delay
733 * the opening of the index, otherwise open it right now.
734 */
735 if (viewer_stream->tracefile_count_current ==
736 stream->tracefile_count_current &&
737 viewer_stream->total_index_received == 0) {
738 viewer_stream->index_read_fd = -1;
739 } else {
0e6830aa
JD
740 ret = open_index(viewer_stream);
741 if (ret < 0) {
742 goto error;
743 }
6b6b9a5a
JD
744 }
745
746 if (seek_last && viewer_stream->index_read_fd > 0) {
0e6830aa
JD
747 ret = lseek(viewer_stream->index_read_fd,
748 viewer_stream->total_index_received *
749 sizeof(struct lttng_packet_index),
750 SEEK_CUR);
751 if (ret < 0) {
752 goto error;
753 }
754 viewer_stream->last_sent_index =
755 viewer_stream->total_index_received;
756 }
757
6b6b9a5a
JD
758 ret = 0;
759
760error:
761 return ret;
762}
763
764/*
765 * Rotate a stream to the next tracefile.
766 *
767 * Returns 0 on success, a negative value on error.
768 */
769static
770int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream,
771 struct relay_stream *stream)
772{
773 int ret;
774 uint64_t tracefile_id;
775
776 assert(viewer_stream);
777
778 tracefile_id = (viewer_stream->tracefile_count_current + 1) %
779 viewer_stream->tracefile_count;
d3e2ba59 780
6b6b9a5a
JD
781 if (stream) {
782 pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
783 }
d3e2ba59 784 /*
6b6b9a5a
JD
785 * The writer and the reader are not working in the same
786 * tracefile, we can read up to EOF, we don't care about the
787 * total_index_received.
d3e2ba59 788 */
6b6b9a5a
JD
789 if (!stream || (stream->tracefile_count_current != tracefile_id)) {
790 viewer_stream->close_write_flag = 1;
791 } else {
792 /*
793 * We are opening a file that is still open in write, make
794 * sure we limit our reading to the number of indexes
795 * received.
796 */
797 viewer_stream->close_write_flag = 0;
798 if (stream) {
799 viewer_stream->total_index_received =
800 stream->total_index_received;
801 }
802 }
803 viewer_stream->tracefile_count_current = tracefile_id;
d3e2ba59 804
6b6b9a5a 805 if (viewer_stream->abort_flag == 0) {
cef0f7d5
JD
806 if (viewer_stream->index_read_fd > 0) {
807 ret = close(viewer_stream->index_read_fd);
808 if (ret < 0) {
809 PERROR("close index file %d",
810 viewer_stream->index_read_fd);
811 }
812 viewer_stream->index_read_fd = -1;
6b6b9a5a 813 }
cef0f7d5
JD
814 if (viewer_stream->read_fd > 0) {
815 ret = close(viewer_stream->read_fd);
816 if (ret < 0) {
817 PERROR("close tracefile %d",
818 viewer_stream->read_fd);
819 }
820 viewer_stream->read_fd = -1;
6b6b9a5a
JD
821 }
822 } else {
823 viewer_stream->abort_flag = 0;
824 }
825
cef0f7d5 826 viewer_stream->index_read_fd = -1;
6b6b9a5a
JD
827 viewer_stream->read_fd = -1;
828
cef0f7d5
JD
829 if (stream) {
830 pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
831 }
6b6b9a5a
JD
832 ret = open_index(viewer_stream);
833 if (ret < 0) {
834 goto error;
835 }
d3e2ba59
JD
836
837 ret = 0;
838
839error:
840 return ret;
841}
842
843/*
844 * Send the viewer the list of current sessions.
845 */
846static
847int viewer_attach_session(struct relay_command *cmd,
92c6ca54 848 struct lttng_ht *sessions_ht)
d3e2ba59
JD
849{
850 int ret, send_streams = 0, nb_streams = 0;
851 struct lttng_viewer_attach_session_request request;
852 struct lttng_viewer_attach_session_response response;
853 struct lttng_viewer_stream send_stream;
854 struct relay_stream *stream;
855 struct relay_viewer_stream *viewer_stream;
856 struct lttng_ht_node_ulong *node;
857 struct lttng_ht_node_u64 *node64;
858 struct lttng_ht_iter iter;
859 struct relay_session *session;
0e6830aa 860 int seek_last = 0;
d3e2ba59
JD
861
862 assert(cmd);
863 assert(sessions_ht);
d3e2ba59
JD
864
865 DBG("Attach session received");
866
867 if (cmd->version_check_done == 0) {
868 ERR("Trying to attach session before version check");
869 ret = -1;
870 goto end_no_session;
871 }
872
eea7556c
MD
873 health_code_update();
874
d3e2ba59
JD
875 ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0);
876 if (ret < 0 || ret != sizeof(request)) {
877 if (ret == 0) {
878 /* Orderly shutdown. Not necessary to print an error. */
879 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
880 } else {
881 ERR("Relay failed to receive the attach parameters.");
882 }
883 ret = -1;
884 goto error;
885 }
886
eea7556c
MD
887 health_code_update();
888
d3e2ba59
JD
889 rcu_read_lock();
890 lttng_ht_lookup(sessions_ht,
891 (void *)((unsigned long) be64toh(request.session_id)), &iter);
892 node = lttng_ht_iter_get_node_ulong(&iter);
893 if (node == NULL) {
894 DBG("Relay session %" PRIu64 " not found",
895 be64toh(request.session_id));
896 response.status = htobe32(VIEWER_ATTACH_UNK);
897 goto send_reply;
898 }
899
900 session = caa_container_of(node, struct relay_session, session_n);
b92fdc2b 901 if (cmd->session_id == session->id) {
d3e2ba59
JD
902 /* Same viewer already attached, just send the stream list. */
903 send_streams = 1;
904 response.status = htobe32(VIEWER_ATTACH_OK);
905 } else if (session->viewer_attached != 0) {
906 DBG("Already a viewer attached");
907 response.status = htobe32(VIEWER_ATTACH_ALREADY);
908 goto send_reply;
909 } else if (session->live_timer == 0) {
910 DBG("Not live session");
911 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
912 goto send_reply;
913 } else {
914 session->viewer_attached++;
915 send_streams = 1;
916 response.status = htobe32(VIEWER_ATTACH_OK);
b92fdc2b 917 cmd->session_id = session->id;
d3e2ba59
JD
918 cmd->session = session;
919 }
920
921 switch (be32toh(request.seek)) {
922 case VIEWER_SEEK_BEGINNING:
923 /* Default behaviour. */
924 break;
925 case VIEWER_SEEK_LAST:
0e6830aa 926 seek_last = 1;
d3e2ba59
JD
927 break;
928 default:
929 ERR("Wrong seek parameter");
930 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
931 send_streams = 0;
932 goto send_reply;
933 }
934
935 if (send_streams) {
936 /* We should only be there if we have a session to attach to. */
937 assert(session);
938
939 /*
940 * Fill the viewer_streams_ht to count the number of streams
941 * ready to be sent and avoid concurrency issues on the
942 * relay_streams_ht and don't rely on a total session stream count.
943 */
944 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) {
945 struct relay_viewer_stream *vstream;
946
eea7556c
MD
947 health_code_update();
948
d3e2ba59
JD
949 node = lttng_ht_iter_get_node_ulong(&iter);
950 if (!node) {
951 continue;
952 }
953 stream = caa_container_of(node, struct relay_stream, stream_n);
954 if (stream->session != cmd->session) {
955 continue;
956 }
957
958 /*
959 * Don't send streams with no ctf_trace, they are not ready to be
960 * read.
961 */
962 if (!stream->ctf_trace) {
963 continue;
964 }
965
92c6ca54 966 vstream = live_find_viewer_stream_by_id(stream->stream_handle);
d3e2ba59 967 if (!vstream) {
0e6830aa 968 ret = init_viewer_stream(stream, seek_last);
d3e2ba59
JD
969 if (ret < 0) {
970 goto end_unlock;
971 }
972 }
973 nb_streams++;
974 }
975 response.streams_count = htobe32(nb_streams);
976 }
977
978send_reply:
eea7556c 979 health_code_update();
d3e2ba59
JD
980 ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0);
981 if (ret < 0) {
982 ERR("Relay sending viewer attach response");
983 goto end_unlock;
984 }
eea7556c 985 health_code_update();
d3e2ba59
JD
986
987 /*
988 * Unknown or busy session, just return gracefully, the viewer knows what
989 * is happening.
990 */
991 if (!send_streams) {
992 ret = 0;
993 goto end_unlock;
994 }
995
996 /* We should only be there if we have a session to attach to. */
997 assert(session);
998 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
eea7556c
MD
999 health_code_update();
1000
d3e2ba59
JD
1001 node64 = lttng_ht_iter_get_node_u64(&iter);
1002 if (!node64) {
1003 continue;
1004 }
1005 viewer_stream = caa_container_of(node64, struct relay_viewer_stream,
1006 stream_n);
1007 if (viewer_stream->session_id != cmd->session->id) {
1008 continue;
1009 }
1010
1011 send_stream.id = htobe64(viewer_stream->stream_handle);
1012 send_stream.ctf_trace_id = htobe64(viewer_stream->ctf_trace->id);
1013 send_stream.metadata_flag = htobe32(viewer_stream->metadata_flag);
1014 strncpy(send_stream.path_name, viewer_stream->path_name,
1015 sizeof(send_stream.path_name));
1016 strncpy(send_stream.channel_name, viewer_stream->channel_name,
1017 sizeof(send_stream.channel_name));
1018
1019 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_stream,
1020 sizeof(send_stream), 0);
1021 if (ret < 0) {
1022 ERR("Relay sending stream %" PRIu64, viewer_stream->stream_handle);
1023 goto end_unlock;
1024 }
1025 DBG("Sent stream %" PRIu64 " to viewer", viewer_stream->stream_handle);
1026 }
1027 ret = 0;
1028
1029end_unlock:
1030 rcu_read_unlock();
1031end_no_session:
1032error:
1033 return ret;
1034}
1035
d3e2ba59
JD
1036/*
1037 * Get viewer stream from stream id.
1038 *
1039 * RCU read side lock MUST be acquired.
1040 */
92c6ca54 1041struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id)
d3e2ba59
JD
1042{
1043 struct lttng_ht_node_u64 *node;
1044 struct lttng_ht_iter iter;
1045 struct relay_viewer_stream *stream = NULL;
1046
d3e2ba59
JD
1047 lttng_ht_lookup(viewer_streams_ht, &stream_id, &iter);
1048 node = lttng_ht_iter_get_node_u64(&iter);
1049 if (node == NULL) {
1050 DBG("Relay viewer stream %" PRIu64 " not found", stream_id);
1051 goto end;
1052 }
1053 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
1054
1055end:
1056 return stream;
1057}
1058
1059/*
1060 * Send the next index for a stream.
1061 *
1062 * Return 0 on success or else a negative value.
1063 */
1064static
1065int viewer_get_next_index(struct relay_command *cmd,
92c6ca54 1066 struct lttng_ht *sessions_ht)
d3e2ba59
JD
1067{
1068 int ret;
1069 struct lttng_viewer_get_next_index request_index;
1070 struct lttng_viewer_index viewer_index;
1071 struct lttng_packet_index packet_index;
1072 struct relay_viewer_stream *vstream;
1073 struct relay_stream *rstream;
1074
1075 assert(cmd);
d3e2ba59
JD
1076 assert(sessions_ht);
1077
1078 DBG("Viewer get next index");
1079
1080 if (cmd->version_check_done == 0) {
1081 ERR("Trying to request index before version check");
1082 ret = -1;
1083 goto end_no_session;
1084 }
1085
eea7556c 1086 health_code_update();
d3e2ba59
JD
1087 ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index,
1088 sizeof(request_index), 0);
1089 if (ret < 0 || ret != sizeof(request_index)) {
1090 ret = -1;
1091 ERR("Relay didn't receive the whole packet");
1092 goto end;
1093 }
eea7556c 1094 health_code_update();
d3e2ba59
JD
1095
1096 rcu_read_lock();
92c6ca54 1097 vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id));
d3e2ba59
JD
1098 if (!vstream) {
1099 ret = -1;
1100 goto end_unlock;
1101 }
1102
1103 memset(&viewer_index, 0, sizeof(viewer_index));
1104
1105 /*
1106 * The viewer should not ask for index on metadata stream.
1107 */
1108 if (vstream->metadata_flag) {
1109 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1110 goto send_reply;
1111 }
1112
1113 /* First time, we open the index file */
1114 if (vstream->index_read_fd < 0) {
1115 ret = open_index(vstream);
0e6830aa 1116 if (ret == -ENOENT) {
d3e2ba59
JD
1117 /*
1118 * The index is created only when the first data packet arrives, it
1119 * might not be ready at the beginning of the session
1120 */
1121 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1122 goto send_reply;
1123 } else if (ret < 0) {
1124 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1125 goto send_reply;
1126 }
1127 }
1128
1129 rstream = relay_stream_find_by_id(vstream->stream_handle);
1130 if (rstream) {
6b6b9a5a
JD
1131 if (vstream->abort_flag) {
1132 /* Rotate on abort (overwrite). */
1133 DBG("Viewer rotate because of overwrite");
1134 ret = rotate_viewer_stream(vstream, rstream);
1135 if (ret < 0) {
1136 goto end_unlock;
1137 }
1138 }
6b6b9a5a 1139 pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
cef0f7d5
JD
1140 if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
1141 if (rstream->beacon_ts_end != -1ULL &&
1142 vstream->last_sent_index == rstream->total_index_received) {
1143 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1144 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1145 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1146 goto send_reply;
1147 /*
1148 * Reader and writer are working in the same tracefile, so we care
1149 * about the number of index received and sent. Otherwise, we read
1150 * up to EOF.
1151 */
1152 } else if (rstream->total_index_received <= vstream->last_sent_index
1153 && !vstream->close_write_flag) {
1154 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1155 /* No new index to send, retry later. */
1156 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1157 goto send_reply;
1158 }
d3e2ba59 1159 }
6b6b9a5a
JD
1160 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1161 } else if (!rstream && vstream->close_write_flag &&
d3e2ba59 1162 vstream->total_index_received == vstream->last_sent_index) {
6b6b9a5a 1163 /* Last index sent and current tracefile closed in write */
d3e2ba59
JD
1164 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1165 goto send_reply;
6b6b9a5a
JD
1166 } else {
1167 vstream->close_write_flag = 1;
d3e2ba59
JD
1168 }
1169
1170 if (!vstream->ctf_trace->metadata_received ||
1171 vstream->ctf_trace->metadata_received >
1172 vstream->ctf_trace->metadata_sent) {
1173 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1174 }
1175
cef0f7d5
JD
1176 pthread_mutex_lock(&vstream->overwrite_lock);
1177 if (vstream->abort_flag) {
1178 /*
1179 * The file is being overwritten by the writer, we cannot
1180 * use it.
1181 */
1182 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1183 pthread_mutex_unlock(&vstream->overwrite_lock);
1184 ret = rotate_viewer_stream(vstream, rstream);
1185 if (ret < 0) {
1186 goto end_unlock;
1187 }
1188 goto send_reply;
1189 }
6cd525e8
MD
1190 ret = lttng_read(vstream->index_read_fd, &packet_index,
1191 sizeof(packet_index));
cef0f7d5 1192 pthread_mutex_unlock(&vstream->overwrite_lock);
d3e2ba59 1193 if (ret < sizeof(packet_index)) {
6b6b9a5a
JD
1194 /*
1195 * The tracefile is closed in write, so we read up to EOF.
1196 */
1197 if (vstream->close_write_flag == 1) {
1198 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1199 /* Rotate on normal EOF */
1200 ret = rotate_viewer_stream(vstream, rstream);
1201 if (ret < 0) {
1202 goto end_unlock;
1203 }
1204 } else {
cef0f7d5
JD
1205 PERROR("Relay reading index file %d",
1206 vstream->index_read_fd);
1207 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
6b6b9a5a
JD
1208 }
1209 goto send_reply;
d3e2ba59
JD
1210 } else {
1211 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1212 vstream->last_sent_index++;
1213 }
1214
1215 /*
1216 * Indexes are stored in big endian, no need to switch before sending.
1217 */
1218 viewer_index.offset = packet_index.offset;
1219 viewer_index.packet_size = packet_index.packet_size;
1220 viewer_index.content_size = packet_index.content_size;
1221 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1222 viewer_index.timestamp_end = packet_index.timestamp_end;
1223 viewer_index.events_discarded = packet_index.events_discarded;
1224 viewer_index.stream_id = packet_index.stream_id;
1225
1226send_reply:
1227 viewer_index.flags = htobe32(viewer_index.flags);
eea7556c 1228 health_code_update();
d3e2ba59
JD
1229 ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index,
1230 sizeof(viewer_index), 0);
1231 if (ret < 0) {
1232 ERR("Relay index to viewer");
1233 goto end_unlock;
1234 }
eea7556c 1235 health_code_update();
d3e2ba59
JD
1236
1237 DBG("Index %" PRIu64 "for stream %" PRIu64 "sent",
1238 vstream->last_sent_index, vstream->stream_handle);
1239
1240end_unlock:
1241 rcu_read_unlock();
1242
1243end_no_session:
1244end:
1245 return ret;
1246}
1247
1248/*
1249 * Send the next index for a stream
1250 *
1251 * Return 0 on success or else a negative value.
1252 */
1253static
92c6ca54 1254int viewer_get_packet(struct relay_command *cmd)
d3e2ba59
JD
1255{
1256 int ret, send_data = 0;
1257 char *data = NULL;
1258 uint32_t len = 0;
1259 ssize_t read_len;
1260 struct lttng_viewer_get_packet get_packet_info;
1261 struct lttng_viewer_trace_packet reply;
1262 struct relay_viewer_stream *stream;
1263
1264 assert(cmd);
d3e2ba59
JD
1265
1266 DBG2("Relay get data packet");
1267
1268 if (cmd->version_check_done == 0) {
1269 ERR("Trying to get packet before version check");
1270 ret = -1;
1271 goto end;
1272 }
1273
eea7556c 1274 health_code_update();
d3e2ba59
JD
1275 ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info,
1276 sizeof(get_packet_info), 0);
1277 if (ret < 0 || ret != sizeof(get_packet_info)) {
1278 ret = -1;
1279 ERR("Relay didn't receive the whole packet");
1280 goto end;
1281 }
eea7556c 1282 health_code_update();
d3e2ba59 1283
0233a6a5
DG
1284 /* From this point on, the error label can be reached. */
1285 memset(&reply, 0, sizeof(reply));
1286
d3e2ba59 1287 rcu_read_lock();
92c6ca54 1288 stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id));
d3e2ba59
JD
1289 if (!stream) {
1290 goto error;
1291 }
1292 assert(stream->ctf_trace);
1293
1294 /*
1295 * First time we read this stream, we need open the tracefile, we should
1296 * only arrive here if an index has already been sent to the viewer, so the
1297 * tracefile must exist, if it does not it is a fatal error.
1298 */
1299 if (stream->read_fd < 0) {
1300 char fullpath[PATH_MAX];
1301
6b6b9a5a
JD
1302 if (stream->tracefile_count > 0) {
1303 ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
1304 stream->channel_name,
1305 stream->tracefile_count_current);
1306 } else {
1307 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1308 stream->channel_name);
1309 }
d3e2ba59
JD
1310 if (ret < 0) {
1311 goto error;
1312 }
1313 ret = open(fullpath, O_RDONLY);
1314 if (ret < 0) {
1315 PERROR("Relay opening trace file");
1316 goto error;
1317 }
1318 stream->read_fd = ret;
1319 }
1320
d3e2ba59
JD
1321 if (!stream->ctf_trace->metadata_received ||
1322 stream->ctf_trace->metadata_received >
1323 stream->ctf_trace->metadata_sent) {
1324 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1325 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
d3e2ba59
JD
1326 goto send_reply;
1327 }
1328
1329 len = be32toh(get_packet_info.len);
1330 data = zmalloc(len);
1331 if (!data) {
1332 PERROR("relay data zmalloc");
1333 goto error;
1334 }
1335
1336 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1337 if (ret < 0) {
6b6b9a5a
JD
1338 /*
1339 * If the read fd was closed by the streaming side, the
1340 * abort_flag will be set to 1, otherwise it is an error.
1341 */
1342 if (stream->abort_flag == 0) {
1343 PERROR("lseek");
1344 goto error;
1345 }
1346 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1347 goto send_reply;
d3e2ba59 1348 }
6cd525e8
MD
1349 read_len = lttng_read(stream->read_fd, data, len);
1350 if (read_len < len) {
6b6b9a5a
JD
1351 /*
1352 * If the read fd was closed by the streaming side, the
1353 * abort_flag will be set to 1, otherwise it is an error.
1354 */
1355 if (stream->abort_flag == 0) {
1356 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1357 stream->read_fd,
1358 be64toh(get_packet_info.offset));
1359 goto error;
1360 } else {
1361 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1362 goto send_reply;
1363 }
d3e2ba59
JD
1364 }
1365 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1366 reply.len = htobe32(len);
1367 send_data = 1;
1368 goto send_reply;
1369
1370error:
1371 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1372
1373send_reply:
1374 reply.flags = htobe32(reply.flags);
eea7556c
MD
1375
1376 health_code_update();
d3e2ba59
JD
1377 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1378 if (ret < 0) {
1379 ERR("Relay data header to viewer");
1380 goto end_unlock;
1381 }
eea7556c 1382 health_code_update();
d3e2ba59
JD
1383
1384 if (send_data) {
eea7556c 1385 health_code_update();
d3e2ba59
JD
1386 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1387 if (ret < 0) {
1388 ERR("Relay send data to viewer");
1389 goto end_unlock;
1390 }
eea7556c 1391 health_code_update();
d3e2ba59
JD
1392 }
1393
1394 DBG("Sent %u bytes for stream %" PRIu64, len,
1395 be64toh(get_packet_info.stream_id));
1396
1397end_unlock:
1398 free(data);
1399 rcu_read_unlock();
1400
1401end:
1402 return ret;
1403}
1404
1405/*
1406 * Send the session's metadata
1407 *
1408 * Return 0 on success else a negative value.
1409 */
1410static
92c6ca54 1411int viewer_get_metadata(struct relay_command *cmd)
d3e2ba59
JD
1412{
1413 int ret = 0;
1414 ssize_t read_len;
1415 uint64_t len = 0;
1416 char *data = NULL;
1417 struct lttng_viewer_get_metadata request;
1418 struct lttng_viewer_metadata_packet reply;
1419 struct relay_viewer_stream *stream;
1420
1421 assert(cmd);
d3e2ba59
JD
1422
1423 DBG("Relay get metadata");
1424
1425 if (cmd->version_check_done == 0) {
1426 ERR("Trying to get metadata before version check");
1427 ret = -1;
1428 goto end;
1429 }
1430
eea7556c 1431 health_code_update();
d3e2ba59
JD
1432 ret = cmd->sock->ops->recvmsg(cmd->sock, &request,
1433 sizeof(request), 0);
1434 if (ret < 0 || ret != sizeof(request)) {
1435 ret = -1;
1436 ERR("Relay didn't receive the whole packet");
1437 goto end;
1438 }
eea7556c 1439 health_code_update();
d3e2ba59
JD
1440
1441 rcu_read_lock();
92c6ca54 1442 stream = live_find_viewer_stream_by_id(be64toh(request.stream_id));
d3e2ba59
JD
1443 if (!stream || !stream->metadata_flag) {
1444 ERR("Invalid metadata stream");
1445 goto error;
1446 }
1447 assert(stream->ctf_trace);
1448 assert(stream->ctf_trace->metadata_sent <=
1449 stream->ctf_trace->metadata_received);
1450
1451 len = stream->ctf_trace->metadata_received -
1452 stream->ctf_trace->metadata_sent;
1453 if (len == 0) {
1454 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1455 goto send_reply;
1456 }
1457
1458 /* first time, we open the metadata file */
1459 if (stream->read_fd < 0) {
1460 char fullpath[PATH_MAX];
1461
1462 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1463 stream->channel_name);
1464 if (ret < 0) {
1465 goto error;
1466 }
1467 ret = open(fullpath, O_RDONLY);
1468 if (ret < 0) {
1469 PERROR("Relay opening metadata file");
1470 goto error;
1471 }
1472 stream->read_fd = ret;
1473 }
1474
1475 reply.len = htobe64(len);
1476 data = zmalloc(len);
1477 if (!data) {
1478 PERROR("viewer metadata zmalloc");
1479 goto error;
1480 }
1481
6cd525e8
MD
1482 read_len = lttng_read(stream->read_fd, data, len);
1483 if (read_len < len) {
d3e2ba59
JD
1484 PERROR("Relay reading metadata file");
1485 goto error;
1486 }
1487 stream->ctf_trace->metadata_sent += read_len;
1488 reply.status = htobe32(VIEWER_METADATA_OK);
1489 goto send_reply;
1490
1491error:
1492 reply.status = htobe32(VIEWER_METADATA_ERR);
1493
1494send_reply:
eea7556c 1495 health_code_update();
d3e2ba59
JD
1496 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1497 if (ret < 0) {
1498 ERR("Relay data header to viewer");
1499 goto end_unlock;
1500 }
eea7556c 1501 health_code_update();
d3e2ba59
JD
1502
1503 if (len > 0) {
1504 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1505 if (ret < 0) {
1506 ERR("Relay send data to viewer");
1507 goto end_unlock;
1508 }
1509 }
1510
1511 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1512 be64toh(request.stream_id));
1513
1514 DBG("Metadata sent");
1515
1516end_unlock:
1517 free(data);
1518 rcu_read_unlock();
1519end:
1520 return ret;
1521}
1522
1523/*
1524 * live_relay_unknown_command: send -1 if received unknown command
1525 */
1526static
1527void live_relay_unknown_command(struct relay_command *cmd)
1528{
1529 struct lttcomm_relayd_generic_reply reply;
1530 int ret;
1531
1532 reply.ret_code = htobe32(LTTNG_ERR_UNK);
1533 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
1534 sizeof(struct lttcomm_relayd_generic_reply), 0);
1535 if (ret < 0) {
1536 ERR("Relay sending unknown command");
1537 }
1538}
1539
1540/*
1541 * Process the commands received on the control socket
1542 */
1543static
1544int process_control(struct lttng_viewer_cmd *recv_hdr,
92c6ca54 1545 struct relay_command *cmd, struct lttng_ht *sessions_ht)
d3e2ba59
JD
1546{
1547 int ret = 0;
1548
1549 switch (be32toh(recv_hdr->cmd)) {
1550 case VIEWER_CONNECT:
1551 ret = viewer_connect(cmd);
1552 break;
1553 case VIEWER_LIST_SESSIONS:
1554 ret = viewer_list_sessions(cmd, sessions_ht);
1555 break;
1556 case VIEWER_ATTACH_SESSION:
92c6ca54 1557 ret = viewer_attach_session(cmd, sessions_ht);
d3e2ba59
JD
1558 break;
1559 case VIEWER_GET_NEXT_INDEX:
92c6ca54 1560 ret = viewer_get_next_index(cmd, sessions_ht);
d3e2ba59
JD
1561 break;
1562 case VIEWER_GET_PACKET:
92c6ca54 1563 ret = viewer_get_packet(cmd);
d3e2ba59
JD
1564 break;
1565 case VIEWER_GET_METADATA:
92c6ca54 1566 ret = viewer_get_metadata(cmd);
d3e2ba59
JD
1567 break;
1568 default:
1569 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1570 live_relay_unknown_command(cmd);
1571 ret = -1;
1572 goto end;
1573 }
1574
1575end:
1576 return ret;
1577}
1578
1579static
1580void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1581{
1582 int ret;
1583
1584 assert(events);
1585
1586 lttng_poll_del(events, pollfd);
1587
1588 ret = close(pollfd);
1589 if (ret < 0) {
1590 ERR("Closing pollfd %d", pollfd);
1591 }
1592}
1593
1594/*
1595 * Create and add connection to the given hash table.
1596 *
1597 * Return poll add value or else -1 on error.
1598 */
1599static
1600int add_connection(int fd, struct lttng_poll_event *events,
1601 struct lttng_ht *relay_connections_ht)
1602{
1603 int ret;
1604 struct relay_command *relay_connection;
1605
1606 assert(events);
1607 assert(relay_connections_ht);
1608
1609 relay_connection = zmalloc(sizeof(struct relay_command));
1610 if (relay_connection == NULL) {
1611 PERROR("Relay command zmalloc");
1612 goto error;
1613 }
1614
6cd525e8
MD
1615 ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
1616 if (ret < sizeof(*relay_connection)) {
d3e2ba59
JD
1617 PERROR("read relay cmd pipe");
1618 goto error_read;
1619 }
1620
1621 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1622 (unsigned long) relay_connection->sock->fd);
1623 rcu_read_lock();
1624 lttng_ht_add_unique_ulong(relay_connections_ht,
1625 &relay_connection->sock_n);
1626 rcu_read_unlock();
1627
1628 return lttng_poll_add(events, relay_connection->sock->fd,
1629 LPOLLIN | LPOLLRDHUP);
1630
1631error_read:
1632 free(relay_connection);
1633error:
1634 return -1;
1635}
1636
1637static
1638void deferred_free_connection(struct rcu_head *head)
1639{
1640 struct relay_command *relay_connection =
1641 caa_container_of(head, struct relay_command, rcu_node);
1642
1643 if (relay_connection->session &&
1644 relay_connection->session->viewer_attached > 0) {
1645 relay_connection->session->viewer_attached--;
1646 }
1647 lttcomm_destroy_sock(relay_connection->sock);
1648 free(relay_connection);
1649}
1650
1651static
1652void deferred_free_viewer_stream(struct rcu_head *head)
1653{
1654 struct relay_viewer_stream *stream =
1655 caa_container_of(head, struct relay_viewer_stream, rcu_node);
1656
1657 if (stream->ctf_trace) {
1658 uatomic_dec(&stream->ctf_trace->refcount);
1659 assert(uatomic_read(&stream->ctf_trace->refcount) >= 0);
1660 if (uatomic_read(&stream->ctf_trace->refcount) == 0) {
1661 DBG("Freeing ctf_trace %" PRIu64, stream->ctf_trace->id);
1662 free(stream->ctf_trace);
1663 }
1664 }
1665
1666 free(stream->path_name);
1667 free(stream->channel_name);
1668 free(stream);
1669}
1670
1671static
b92fdc2b 1672void viewer_del_streams(uint64_t session_id)
d3e2ba59
JD
1673{
1674 int ret;
1675 struct relay_viewer_stream *stream;
1676 struct lttng_ht_node_u64 *node;
1677 struct lttng_ht_iter iter;
1678
d3e2ba59
JD
1679 rcu_read_lock();
1680 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
eea7556c
MD
1681 health_code_update();
1682
d3e2ba59
JD
1683 node = lttng_ht_iter_get_node_u64(&iter);
1684 if (!node) {
1685 continue;
1686 }
1687
1688 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
b92fdc2b 1689 if (stream->session_id != session_id) {
d3e2ba59
JD
1690 continue;
1691 }
1692
6b6b9a5a 1693 if (stream->read_fd >= 0) {
d3e2ba59
JD
1694 ret = close(stream->read_fd);
1695 if (ret < 0) {
1696 PERROR("close read_fd");
1697 }
1698 }
6b6b9a5a 1699 if (stream->index_read_fd >= 0) {
d3e2ba59
JD
1700 ret = close(stream->index_read_fd);
1701 if (ret < 0) {
1702 PERROR("close index_read_fd");
1703 }
1704 }
1705 if (stream->metadata_flag && stream->ctf_trace) {
1706 stream->ctf_trace->metadata_sent = 0;
1707 }
1708 ret = lttng_ht_del(viewer_streams_ht, &iter);
1709 assert(!ret);
1710 call_rcu(&stream->rcu_node, deferred_free_viewer_stream);
1711 }
1712 rcu_read_unlock();
1713}
1714
1715/*
1716 * Delete and free a connection.
1717 *
1718 * RCU read side lock MUST be acquired.
1719 */
1720static
1721void del_connection(struct lttng_ht *relay_connections_ht,
92c6ca54 1722 struct lttng_ht_iter *iter, struct relay_command *relay_connection)
d3e2ba59
JD
1723{
1724 int ret;
1725
1726 assert(relay_connections_ht);
1727 assert(iter);
1728 assert(relay_connection);
d3e2ba59
JD
1729
1730 ret = lttng_ht_del(relay_connections_ht, iter);
1731 assert(!ret);
1732
b92fdc2b 1733 viewer_del_streams(relay_connection->session_id);
d3e2ba59
JD
1734
1735 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1736}
1737
1738/*
1739 * This thread does the actual work
1740 */
1741static
1742void *thread_worker(void *data)
1743{
1744 int ret, err = -1;
1745 uint32_t nb_fd;
1746 struct relay_command *relay_connection;
1747 struct lttng_poll_event events;
1748 struct lttng_ht *relay_connections_ht;
1749 struct lttng_ht_node_ulong *node;
1750 struct lttng_ht_iter iter;
1751 struct lttng_viewer_cmd recv_hdr;
1752 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1753 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
d3e2ba59
JD
1754
1755 DBG("[thread] Live viewer relay worker started");
1756
1757 rcu_register_thread();
1758
eea7556c
MD
1759 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1760
d3e2ba59
JD
1761 /* table of connections indexed on socket */
1762 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1763 if (!relay_connections_ht) {
1764 goto relay_connections_ht_error;
1765 }
1766
1767 ret = create_thread_poll_set(&events, 2);
1768 if (ret < 0) {
1769 goto error_poll_create;
1770 }
1771
1772 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1773 if (ret < 0) {
1774 goto error;
1775 }
1776
1777restart:
1778 while (1) {
1779 int i;
1780
eea7556c
MD
1781 health_code_update();
1782
d3e2ba59
JD
1783 /* Infinite blocking call, waiting for transmission */
1784 DBG3("Relayd live viewer worker thread polling...");
eea7556c 1785 health_poll_entry();
d3e2ba59 1786 ret = lttng_poll_wait(&events, -1);
eea7556c 1787 health_poll_exit();
d3e2ba59
JD
1788 if (ret < 0) {
1789 /*
1790 * Restart interrupted system call.
1791 */
1792 if (errno == EINTR) {
1793 goto restart;
1794 }
1795 goto error;
1796 }
1797
1798 nb_fd = ret;
1799
1800 /*
1801 * Process control. The control connection is prioritised so we don't
1802 * starve it with high throughput tracing data on the data
1803 * connection.
1804 */
1805 for (i = 0; i < nb_fd; i++) {
1806 /* Fetch once the poll data */
1807 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1808 int pollfd = LTTNG_POLL_GETFD(&events, i);
1809
eea7556c
MD
1810 health_code_update();
1811
d3e2ba59
JD
1812 /* Thread quit pipe has been closed. Killing thread. */
1813 ret = check_thread_quit_pipe(pollfd, revents);
1814 if (ret) {
1815 err = 0;
1816 goto exit;
1817 }
1818
1819 /* Inspect the relay cmd pipe for new connection */
1820 if (pollfd == live_relay_cmd_pipe[0]) {
1821 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1822 ERR("Relay live pipe error");
1823 goto error;
1824 } else if (revents & LPOLLIN) {
1825 DBG("Relay live viewer command received");
1826 ret = add_connection(live_relay_cmd_pipe[0],
1827 &events, relay_connections_ht);
1828 if (ret < 0) {
1829 goto error;
1830 }
1831 }
1832 } else if (revents) {
1833 rcu_read_lock();
1834 lttng_ht_lookup(relay_connections_ht,
1835 (void *)((unsigned long) pollfd), &iter);
1836 node = lttng_ht_iter_get_node_ulong(&iter);
1837 if (node == NULL) {
1838 DBG2("Relay viewer sock %d not found", pollfd);
1839 rcu_read_unlock();
1840 goto error;
1841 }
1842 relay_connection = caa_container_of(node, struct relay_command,
1843 sock_n);
1844
1845 if (revents & (LPOLLERR)) {
d3e2ba59
JD
1846 cleanup_poll_connection(&events, pollfd);
1847 del_connection(relay_connections_ht, &iter,
92c6ca54 1848 relay_connection);
d3e2ba59
JD
1849 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1850 DBG("Viewer socket %d hung up", pollfd);
1851 cleanup_poll_connection(&events, pollfd);
1852 del_connection(relay_connections_ht, &iter,
92c6ca54 1853 relay_connection);
d3e2ba59
JD
1854 } else if (revents & LPOLLIN) {
1855 ret = relay_connection->sock->ops->recvmsg(
1856 relay_connection->sock, &recv_hdr,
1857 sizeof(struct lttng_viewer_cmd),
1858 0);
1859 /* connection closed */
1860 if (ret <= 0) {
1861 cleanup_poll_connection(&events, pollfd);
aaec7998 1862 del_connection(relay_connections_ht, &iter,
92c6ca54 1863 relay_connection);
d3e2ba59
JD
1864 DBG("Viewer control connection closed with %d",
1865 pollfd);
1866 } else {
1867 if (relay_connection->session) {
1868 DBG2("Relay viewer worker receiving data for "
1869 "session: %" PRIu64,
1870 relay_connection->session->id);
1871 }
1872 ret = process_control(&recv_hdr, relay_connection,
92c6ca54 1873 sessions_ht);
d3e2ba59
JD
1874 if (ret < 0) {
1875 /* Clear the session on error. */
1876 cleanup_poll_connection(&events, pollfd);
1877 del_connection(relay_connections_ht, &iter,
92c6ca54 1878 relay_connection);
d3e2ba59
JD
1879 DBG("Viewer connection closed with %d", pollfd);
1880 }
1881 }
1882 }
1883 rcu_read_unlock();
1884 }
1885 }
1886 }
1887
1888exit:
1889error:
1890 lttng_poll_clean(&events);
1891
1892 /* empty the hash table and free the memory */
1893 rcu_read_lock();
1894 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
eea7556c
MD
1895 health_code_update();
1896
d3e2ba59
JD
1897 node = lttng_ht_iter_get_node_ulong(&iter);
1898 if (!node) {
1899 continue;
1900 }
1901
1902 relay_connection = caa_container_of(node, struct relay_command,
1903 sock_n);
92c6ca54 1904 del_connection(relay_connections_ht, &iter, relay_connection);
d3e2ba59
JD
1905 }
1906 rcu_read_unlock();
1907error_poll_create:
1908 lttng_ht_destroy(relay_connections_ht);
1909relay_connections_ht_error:
1910 /* Close relay cmd pipes */
1911 utils_close_pipe(live_relay_cmd_pipe);
1912 if (err) {
1913 DBG("Viewer worker thread exited with error");
1914 }
1915 DBG("Viewer worker thread cleanup complete");
eea7556c
MD
1916 if (err) {
1917 health_error();
1918 ERR("Health error occurred in %s", __func__);
1919 }
1920 health_unregister(health_relayd);
d3e2ba59
JD
1921 stop_threads();
1922 rcu_unregister_thread();
1923 return NULL;
1924}
1925
1926/*
1927 * Create the relay command pipe to wake thread_manage_apps.
1928 * Closed in cleanup().
1929 */
1930static int create_relay_cmd_pipe(void)
1931{
1932 int ret;
1933
1934 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
1935
1936 return ret;
1937}
1938
aaec7998 1939void live_stop_threads(void)
d3e2ba59
JD
1940{
1941 int ret;
1942 void *status;
1943
1944 stop_threads();
1945
1946 ret = pthread_join(live_listener_thread, &status);
1947 if (ret != 0) {
1948 PERROR("pthread_join live listener");
1949 goto error; /* join error, exit without cleanup */
1950 }
1951
1952 ret = pthread_join(live_worker_thread, &status);
1953 if (ret != 0) {
1954 PERROR("pthread_join live worker");
1955 goto error; /* join error, exit without cleanup */
1956 }
1957
1958 ret = pthread_join(live_dispatcher_thread, &status);
1959 if (ret != 0) {
1960 PERROR("pthread_join live dispatcher");
1961 goto error; /* join error, exit without cleanup */
1962 }
1963
1964 cleanup();
1965
1966error:
1967 return;
1968}
1969
1970/*
1971 * main
1972 */
1973int live_start_threads(struct lttng_uri *uri,
42415026 1974 struct relay_local_data *relay_ctx, int quit_pipe[2])
d3e2ba59
JD
1975{
1976 int ret = 0;
1977 void *status;
1978 int is_root;
1979
1980 assert(uri);
1981 live_uri = uri;
1982
42415026
DG
1983 live_thread_quit_pipe[0] = quit_pipe[0];
1984 live_thread_quit_pipe[1] = quit_pipe[1];
d3e2ba59
JD
1985
1986 /* Check if daemon is UID = 0 */
1987 is_root = !getuid();
1988
1989 if (!is_root) {
1990 if (live_uri->port < 1024) {
1991 ERR("Need to be root to use ports < 1024");
1992 ret = -1;
1993 goto exit;
1994 }
1995 }
1996
1997 /* Setup the thread apps communication pipe. */
1998 if ((ret = create_relay_cmd_pipe()) < 0) {
1999 goto exit;
2000 }
2001
2002 /* Init relay command queue. */
2003 cds_wfq_init(&viewer_cmd_queue.queue);
2004
2005 /* Set up max poll set size */
2006 lttng_poll_set_max_size();
2007
2008 /* Setup the dispatcher thread */
2009 ret = pthread_create(&live_dispatcher_thread, NULL,
2010 thread_dispatcher, (void *) NULL);
2011 if (ret != 0) {
2012 PERROR("pthread_create viewer dispatcher");
2013 goto exit_dispatcher;
2014 }
2015
2016 /* Setup the worker thread */
2017 ret = pthread_create(&live_worker_thread, NULL,
2018 thread_worker, relay_ctx);
2019 if (ret != 0) {
2020 PERROR("pthread_create viewer worker");
2021 goto exit_worker;
2022 }
2023
2024 /* Setup the listener thread */
2025 ret = pthread_create(&live_listener_thread, NULL,
2026 thread_listener, (void *) NULL);
2027 if (ret != 0) {
2028 PERROR("pthread_create viewer listener");
2029 goto exit_listener;
2030 }
2031
2032 ret = 0;
2033 goto end;
2034
2035exit_listener:
2036 ret = pthread_join(live_listener_thread, &status);
2037 if (ret != 0) {
2038 PERROR("pthread_join live listener");
2039 goto error; /* join error, exit without cleanup */
2040 }
2041
2042exit_worker:
2043 ret = pthread_join(live_worker_thread, &status);
2044 if (ret != 0) {
2045 PERROR("pthread_join live worker");
2046 goto error; /* join error, exit without cleanup */
2047 }
2048
2049exit_dispatcher:
2050 ret = pthread_join(live_dispatcher_thread, &status);
2051 if (ret != 0) {
2052 PERROR("pthread_join live dispatcher");
2053 goto error; /* join error, exit without cleanup */
2054 }
2055
2056exit:
2057 cleanup();
2058
2059end:
2060error:
2061 return ret;
2062}
This page took 0.10821 seconds and 5 git commands to generate.