Implement the relayd live features
[lttng-tools.git] / src / common / relayd / relayd.c
1 /*
2 * Copyright (C) 2012 - David Goulet <dgoulet@efficios.com>
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License, version 2 only, as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
18 #define _GNU_SOURCE
19 #include <assert.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/stat.h>
24 #include <inttypes.h>
25
26 #include <common/common.h>
27 #include <common/defaults.h>
28 #include <common/sessiond-comm/relayd.h>
29 #include <common/index/lttng-index.h>
30
31 #include "relayd.h"
32
33 /*
34 * Send command. Fill up the header and append the data.
35 */
36 static int send_command(struct lttcomm_relayd_sock *rsock,
37 enum lttcomm_relayd_command cmd, void *data, size_t size,
38 int flags)
39 {
40 int ret;
41 struct lttcomm_relayd_hdr header;
42 char *buf;
43 uint64_t buf_size = sizeof(header);
44
45 if (rsock->sock.fd < 0) {
46 return -ECONNRESET;
47 }
48
49 if (data) {
50 buf_size += size;
51 }
52
53 buf = zmalloc(buf_size);
54 if (buf == NULL) {
55 PERROR("zmalloc relayd send command buf");
56 ret = -1;
57 goto alloc_error;
58 }
59
60 header.cmd = htobe32(cmd);
61 header.data_size = htobe64(size);
62
63 /* Zeroed for now since not used. */
64 header.cmd_version = 0;
65 header.circuit_id = 0;
66
67 /* Prepare buffer to send. */
68 memcpy(buf, &header, sizeof(header));
69 if (data) {
70 memcpy(buf + sizeof(header), data, size);
71 }
72
73 ret = rsock->sock.ops->sendmsg(&rsock->sock, buf, buf_size, flags);
74 if (ret < 0) {
75 ret = -errno;
76 goto error;
77 }
78
79 DBG3("Relayd sending command %d of size %" PRIu64, cmd, buf_size);
80
81 error:
82 free(buf);
83 alloc_error:
84 return ret;
85 }
86
87 /*
88 * Receive reply data on socket. This MUST be call after send_command or else
89 * could result in unexpected behavior(s).
90 */
91 static int recv_reply(struct lttcomm_relayd_sock *rsock, void *data, size_t size)
92 {
93 int ret;
94
95 if (rsock->sock.fd < 0) {
96 return -ECONNRESET;
97 }
98
99 DBG3("Relayd waiting for reply of size %zu", size);
100
101 ret = rsock->sock.ops->recvmsg(&rsock->sock, data, size, 0);
102 if (ret <= 0 || ret != size) {
103 if (ret == 0) {
104 /* Orderly shutdown. */
105 DBG("Socket %d has performed an orderly shutdown", rsock->sock.fd);
106 } else {
107 DBG("Receiving reply failed on sock %d for size %zu with ret %d",
108 rsock->sock.fd, size, ret);
109 }
110 /* Always return -1 here and the caller can use errno. */
111 ret = -1;
112 goto error;
113 }
114
115 error:
116 return ret;
117 }
118
119 /*
120 * Starting at 2.4, RELAYD_CREATE_SESSION takes additional parameters to
121 * support the live reading capability.
122 */
123 static int relayd_create_session_2_4(struct lttcomm_relayd_sock *rsock,
124 uint64_t *session_id, char *session_name, char *hostname,
125 int session_live_timer)
126 {
127 int ret;
128 struct lttcomm_relayd_create_session_2_4 msg;
129
130 strncpy(msg.session_name, session_name, sizeof(msg.session_name));
131 strncpy(msg.hostname, hostname, sizeof(msg.hostname));
132 msg.live_timer = htobe32(session_live_timer);
133
134 /* Send command */
135 ret = send_command(rsock, RELAYD_CREATE_SESSION, &msg, sizeof(msg), 0);
136 if (ret < 0) {
137 goto error;
138 }
139
140 error:
141 return ret;
142 }
143
144 /*
145 * RELAYD_CREATE_SESSION from 2.1 to 2.3.
146 */
147 static int relayd_create_session_2_1(struct lttcomm_relayd_sock *rsock,
148 uint64_t *session_id)
149 {
150 int ret;
151
152 /* Send command */
153 ret = send_command(rsock, RELAYD_CREATE_SESSION, NULL, 0, 0);
154 if (ret < 0) {
155 goto error;
156 }
157
158 error:
159 return ret;
160 }
161
162 /*
163 * Send a RELAYD_CREATE_SESSION command to the relayd with the given socket and
164 * set session_id of the relayd if we have a successful reply from the relayd.
165 *
166 * On success, return 0 else a negative value which is either an errno error or
167 * a lttng error code from the relayd.
168 */
169 int relayd_create_session(struct lttcomm_relayd_sock *rsock, uint64_t *session_id,
170 char *session_name, char *hostname, int session_live_timer)
171 {
172 int ret;
173 struct lttcomm_relayd_status_session reply;
174
175 assert(rsock);
176 assert(session_id);
177
178 DBG("Relayd create session");
179
180 switch(rsock->minor) {
181 case 1:
182 case 2:
183 case 3:
184 ret = relayd_create_session_2_1(rsock, session_id);
185 case 4:
186 default:
187 ret = relayd_create_session_2_4(rsock, session_id,
188 session_name, hostname,
189 session_live_timer);
190 }
191
192 if (ret < 0) {
193 goto error;
194 }
195
196 /* Receive response */
197 ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
198 if (ret < 0) {
199 goto error;
200 }
201
202 reply.session_id = be64toh(reply.session_id);
203 reply.ret_code = be32toh(reply.ret_code);
204
205 /* Return session id or negative ret code. */
206 if (reply.ret_code != LTTNG_OK) {
207 ret = -1;
208 ERR("Relayd create session replied error %d", reply.ret_code);
209 goto error;
210 } else {
211 ret = 0;
212 *session_id = reply.session_id;
213 }
214
215 DBG("Relayd session created with id %" PRIu64, reply.session_id);
216
217 error:
218 return ret;
219 }
220
221 /*
222 * Add stream on the relayd and assign stream handle to the stream_id argument.
223 *
224 * On success return 0 else return ret_code negative value.
225 */
226 int relayd_add_stream(struct lttcomm_relayd_sock *rsock, const char *channel_name,
227 const char *pathname, uint64_t *stream_id,
228 uint64_t tracefile_size, uint64_t tracefile_count)
229 {
230 int ret;
231 struct lttcomm_relayd_add_stream msg;
232 struct lttcomm_relayd_add_stream_2_2 msg_2_2;
233 struct lttcomm_relayd_status_stream reply;
234
235 /* Code flow error. Safety net. */
236 assert(rsock);
237 assert(channel_name);
238 assert(pathname);
239
240 DBG("Relayd adding stream for channel name %s", channel_name);
241
242 /* Compat with relayd 2.1 */
243 if (rsock->minor == 1) {
244 strncpy(msg.channel_name, channel_name, sizeof(msg.channel_name));
245 strncpy(msg.pathname, pathname, sizeof(msg.pathname));
246
247 /* Send command */
248 ret = send_command(rsock, RELAYD_ADD_STREAM, (void *) &msg, sizeof(msg), 0);
249 if (ret < 0) {
250 goto error;
251 }
252 } else {
253 /* Compat with relayd 2.2+ */
254 strncpy(msg_2_2.channel_name, channel_name, sizeof(msg_2_2.channel_name));
255 strncpy(msg_2_2.pathname, pathname, sizeof(msg_2_2.pathname));
256 msg_2_2.tracefile_size = htobe64(tracefile_size);
257 msg_2_2.tracefile_count = htobe64(tracefile_count);
258
259 /* Send command */
260 ret = send_command(rsock, RELAYD_ADD_STREAM, (void *) &msg_2_2, sizeof(msg_2_2), 0);
261 if (ret < 0) {
262 goto error;
263 }
264 }
265
266 /* Waiting for reply */
267 ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
268 if (ret < 0) {
269 goto error;
270 }
271
272 /* Back to host bytes order. */
273 reply.handle = be64toh(reply.handle);
274 reply.ret_code = be32toh(reply.ret_code);
275
276 /* Return session id or negative ret code. */
277 if (reply.ret_code != LTTNG_OK) {
278 ret = -1;
279 ERR("Relayd add stream replied error %d", reply.ret_code);
280 } else {
281 /* Success */
282 ret = 0;
283 *stream_id = reply.handle;
284 }
285
286 DBG("Relayd stream added successfully with handle %" PRIu64,
287 reply.handle);
288
289 error:
290 return ret;
291 }
292
293 /*
294 * Check version numbers on the relayd.
295 * If major versions are compatible, we assign minor_to_use to the
296 * minor version of the procotol we are going to use for this session.
297 *
298 * Return 0 if compatible else negative value.
299 */
300 int relayd_version_check(struct lttcomm_relayd_sock *rsock)
301 {
302 int ret;
303 struct lttcomm_relayd_version msg;
304
305 /* Code flow error. Safety net. */
306 assert(rsock);
307
308 DBG("Relayd version check for major.minor %u.%u", rsock->major,
309 rsock->minor);
310
311 /* Prepare network byte order before transmission. */
312 msg.major = htobe32(rsock->major);
313 msg.minor = htobe32(rsock->minor);
314
315 /* Send command */
316 ret = send_command(rsock, RELAYD_VERSION, (void *) &msg, sizeof(msg), 0);
317 if (ret < 0) {
318 goto error;
319 }
320
321 /* Receive response */
322 ret = recv_reply(rsock, (void *) &msg, sizeof(msg));
323 if (ret < 0) {
324 goto error;
325 }
326
327 /* Set back to host bytes order */
328 msg.major = be32toh(msg.major);
329 msg.minor = be32toh(msg.minor);
330
331 /*
332 * Only validate the major version. If the other side is higher,
333 * communication is not possible. Only major version equal can talk to each
334 * other. If the minor version differs, the lowest version is used by both
335 * sides.
336 */
337 if (msg.major != rsock->major) {
338 /* Not compatible */
339 ret = -1;
340 DBG2("Relayd version is NOT compatible. Relayd version %u != %u (us)",
341 msg.major, rsock->major);
342 goto error;
343 }
344
345 /*
346 * If the relayd's minor version is higher, it will adapt to our version so
347 * we can continue to use the latest relayd communication data structure.
348 * If the received minor version is higher, the relayd should adapt to us.
349 */
350 if (rsock->minor > msg.minor) {
351 rsock->minor = msg.minor;
352 }
353
354 /* Version number compatible */
355 DBG2("Relayd version is compatible, using protocol version %u.%u",
356 rsock->major, rsock->minor);
357 ret = 0;
358
359 error:
360 return ret;
361 }
362
363 /*
364 * Add stream on the relayd and assign stream handle to the stream_id argument.
365 *
366 * On success return 0 else return ret_code negative value.
367 */
368 int relayd_send_metadata(struct lttcomm_relayd_sock *rsock, size_t len)
369 {
370 int ret;
371
372 /* Code flow error. Safety net. */
373 assert(rsock);
374
375 DBG("Relayd sending metadata of size %zu", len);
376
377 /* Send command */
378 ret = send_command(rsock, RELAYD_SEND_METADATA, NULL, len, 0);
379 if (ret < 0) {
380 goto error;
381 }
382
383 DBG2("Relayd metadata added successfully");
384
385 /*
386 * After that call, the metadata data MUST be sent to the relayd so the
387 * receive size on the other end matches the len of the metadata packet
388 * header. This is why we don't wait for a reply here.
389 */
390
391 error:
392 return ret;
393 }
394
395 /*
396 * Connect to relay daemon with an allocated lttcomm_relayd_sock.
397 */
398 int relayd_connect(struct lttcomm_relayd_sock *rsock)
399 {
400 /* Code flow error. Safety net. */
401 assert(rsock);
402
403 if (!rsock->sock.ops) {
404 /*
405 * Attempting a connect on a non-initialized socket.
406 */
407 return -ECONNRESET;
408 }
409
410 DBG3("Relayd connect ...");
411
412 return rsock->sock.ops->connect(&rsock->sock);
413 }
414
415 /*
416 * Close relayd socket with an allocated lttcomm_relayd_sock.
417 *
418 * If no socket operations are found, simply return 0 meaning that everything
419 * is fine. Without operations, the socket can not possibly be opened or used.
420 * This is possible if the socket was allocated but not created. However, the
421 * caller could simply use it to store a valid file descriptor for instance
422 * passed over a Unix socket and call this to cleanup but still without a valid
423 * ops pointer.
424 *
425 * Return the close returned value. On error, a negative value is usually
426 * returned back from close(2).
427 */
428 int relayd_close(struct lttcomm_relayd_sock *rsock)
429 {
430 int ret;
431
432 /* Code flow error. Safety net. */
433 assert(rsock);
434
435 /* An invalid fd is fine, return success. */
436 if (rsock->sock.fd < 0) {
437 ret = 0;
438 goto end;
439 }
440
441 DBG3("Relayd closing socket %d", rsock->sock.fd);
442
443 if (rsock->sock.ops) {
444 ret = rsock->sock.ops->close(&rsock->sock);
445 } else {
446 /* Default call if no specific ops found. */
447 ret = close(rsock->sock.fd);
448 if (ret < 0) {
449 PERROR("relayd_close default close");
450 }
451 }
452 rsock->sock.fd = -1;
453
454 end:
455 return ret;
456 }
457
458 /*
459 * Send data header structure to the relayd.
460 */
461 int relayd_send_data_hdr(struct lttcomm_relayd_sock *rsock,
462 struct lttcomm_relayd_data_hdr *hdr, size_t size)
463 {
464 int ret;
465
466 /* Code flow error. Safety net. */
467 assert(rsock);
468 assert(hdr);
469
470 if (rsock->sock.fd < 0) {
471 return -ECONNRESET;
472 }
473
474 DBG3("Relayd sending data header of size %zu", size);
475
476 /* Again, safety net */
477 if (size == 0) {
478 size = sizeof(struct lttcomm_relayd_data_hdr);
479 }
480
481 /* Only send data header. */
482 ret = rsock->sock.ops->sendmsg(&rsock->sock, hdr, size, 0);
483 if (ret < 0) {
484 ret = -errno;
485 goto error;
486 }
487
488 /*
489 * The data MUST be sent right after that command for the receive on the
490 * other end to match the size in the header.
491 */
492
493 error:
494 return ret;
495 }
496
497 /*
498 * Send close stream command to the relayd.
499 */
500 int relayd_send_close_stream(struct lttcomm_relayd_sock *rsock, uint64_t stream_id,
501 uint64_t last_net_seq_num)
502 {
503 int ret;
504 struct lttcomm_relayd_close_stream msg;
505 struct lttcomm_relayd_generic_reply reply;
506
507 /* Code flow error. Safety net. */
508 assert(rsock);
509
510 DBG("Relayd closing stream id %" PRIu64, stream_id);
511
512 msg.stream_id = htobe64(stream_id);
513 msg.last_net_seq_num = htobe64(last_net_seq_num);
514
515 /* Send command */
516 ret = send_command(rsock, RELAYD_CLOSE_STREAM, (void *) &msg, sizeof(msg), 0);
517 if (ret < 0) {
518 goto error;
519 }
520
521 /* Receive response */
522 ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
523 if (ret < 0) {
524 goto error;
525 }
526
527 reply.ret_code = be32toh(reply.ret_code);
528
529 /* Return session id or negative ret code. */
530 if (reply.ret_code != LTTNG_OK) {
531 ret = -1;
532 ERR("Relayd close stream replied error %d", reply.ret_code);
533 } else {
534 /* Success */
535 ret = 0;
536 }
537
538 DBG("Relayd close stream id %" PRIu64 " successfully", stream_id);
539
540 error:
541 return ret;
542 }
543
544 /*
545 * Check for data availability for a given stream id.
546 *
547 * Return 0 if NOT pending, 1 if so and a negative value on error.
548 */
549 int relayd_data_pending(struct lttcomm_relayd_sock *rsock, uint64_t stream_id,
550 uint64_t last_net_seq_num)
551 {
552 int ret;
553 struct lttcomm_relayd_data_pending msg;
554 struct lttcomm_relayd_generic_reply reply;
555
556 /* Code flow error. Safety net. */
557 assert(rsock);
558
559 DBG("Relayd data pending for stream id %" PRIu64, stream_id);
560
561 msg.stream_id = htobe64(stream_id);
562 msg.last_net_seq_num = htobe64(last_net_seq_num);
563
564 /* Send command */
565 ret = send_command(rsock, RELAYD_DATA_PENDING, (void *) &msg,
566 sizeof(msg), 0);
567 if (ret < 0) {
568 goto error;
569 }
570
571 /* Receive response */
572 ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
573 if (ret < 0) {
574 goto error;
575 }
576
577 reply.ret_code = be32toh(reply.ret_code);
578
579 /* Return session id or negative ret code. */
580 if (reply.ret_code >= LTTNG_OK) {
581 ERR("Relayd data pending replied error %d", reply.ret_code);
582 }
583
584 /* At this point, the ret code is either 1 or 0 */
585 ret = reply.ret_code;
586
587 DBG("Relayd data is %s pending for stream id %" PRIu64,
588 ret == 1 ? "" : "NOT", stream_id);
589
590 error:
591 return ret;
592 }
593
594 /*
595 * Check on the relayd side for a quiescent state on the control socket.
596 */
597 int relayd_quiescent_control(struct lttcomm_relayd_sock *rsock,
598 uint64_t metadata_stream_id)
599 {
600 int ret;
601 struct lttcomm_relayd_quiescent_control msg;
602 struct lttcomm_relayd_generic_reply reply;
603
604 /* Code flow error. Safety net. */
605 assert(rsock);
606
607 DBG("Relayd checking quiescent control state");
608
609 msg.stream_id = htobe64(metadata_stream_id);
610
611 /* Send command */
612 ret = send_command(rsock, RELAYD_QUIESCENT_CONTROL, &msg, sizeof(msg), 0);
613 if (ret < 0) {
614 goto error;
615 }
616
617 /* Receive response */
618 ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
619 if (ret < 0) {
620 goto error;
621 }
622
623 reply.ret_code = be32toh(reply.ret_code);
624
625 /* Return session id or negative ret code. */
626 if (reply.ret_code != LTTNG_OK) {
627 ret = -1;
628 ERR("Relayd quiescent control replied error %d", reply.ret_code);
629 goto error;
630 }
631
632 /* Control socket is quiescent */
633 return 0;
634
635 error:
636 return ret;
637 }
638
639 /*
640 * Begin a data pending command for a specific session id.
641 */
642 int relayd_begin_data_pending(struct lttcomm_relayd_sock *rsock, uint64_t id)
643 {
644 int ret;
645 struct lttcomm_relayd_begin_data_pending msg;
646 struct lttcomm_relayd_generic_reply reply;
647
648 /* Code flow error. Safety net. */
649 assert(rsock);
650
651 DBG("Relayd begin data pending");
652
653 msg.session_id = htobe64(id);
654
655 /* Send command */
656 ret = send_command(rsock, RELAYD_BEGIN_DATA_PENDING, &msg, sizeof(msg), 0);
657 if (ret < 0) {
658 goto error;
659 }
660
661 /* Receive response */
662 ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
663 if (ret < 0) {
664 goto error;
665 }
666
667 reply.ret_code = be32toh(reply.ret_code);
668
669 /* Return session id or negative ret code. */
670 if (reply.ret_code != LTTNG_OK) {
671 ret = -1;
672 ERR("Relayd begin data pending replied error %d", reply.ret_code);
673 goto error;
674 }
675
676 return 0;
677
678 error:
679 return ret;
680 }
681
682 /*
683 * End a data pending command for a specific session id.
684 *
685 * Return 0 on success and set is_data_inflight to 0 if no data is being
686 * streamed or 1 if it is the case.
687 */
688 int relayd_end_data_pending(struct lttcomm_relayd_sock *rsock, uint64_t id,
689 unsigned int *is_data_inflight)
690 {
691 int ret;
692 struct lttcomm_relayd_end_data_pending msg;
693 struct lttcomm_relayd_generic_reply reply;
694
695 /* Code flow error. Safety net. */
696 assert(rsock);
697
698 DBG("Relayd end data pending");
699
700 msg.session_id = htobe64(id);
701
702 /* Send command */
703 ret = send_command(rsock, RELAYD_END_DATA_PENDING, &msg, sizeof(msg), 0);
704 if (ret < 0) {
705 goto error;
706 }
707
708 /* Receive response */
709 ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
710 if (ret < 0) {
711 goto error;
712 }
713
714 reply.ret_code = be32toh(reply.ret_code);
715 if (reply.ret_code < 0) {
716 ret = reply.ret_code;
717 goto error;
718 }
719
720 *is_data_inflight = reply.ret_code;
721
722 DBG("Relayd end data pending is data inflight: %d", reply.ret_code);
723
724 return 0;
725
726 error:
727 return ret;
728 }
729
730 /*
731 * Send index to the relayd.
732 */
733 int relayd_send_index(struct lttcomm_relayd_sock *rsock,
734 struct lttng_packet_index *index, uint64_t relay_stream_id,
735 uint64_t net_seq_num)
736 {
737 int ret;
738 struct lttcomm_relayd_index msg;
739 struct lttcomm_relayd_generic_reply reply;
740
741 /* Code flow error. Safety net. */
742 assert(rsock);
743
744 if (rsock->minor < 4) {
745 DBG("Not sending indexes before protocol 2.4");
746 ret = 0;
747 goto error;
748 }
749
750 DBG("Relayd sending index for stream ID %" PRIu64, relay_stream_id);
751
752 msg.relay_stream_id = htobe64(relay_stream_id);
753 msg.net_seq_num = htobe64(net_seq_num);
754
755 /* The index is already in big endian. */
756 msg.packet_size = index->packet_size;
757 msg.content_size = index->content_size;
758 msg.timestamp_begin = index->timestamp_begin;
759 msg.timestamp_end = index->timestamp_end;
760 msg.events_discarded = index->events_discarded;
761 msg.stream_id = index->stream_id;
762
763 /* Send command */
764 ret = send_command(rsock, RELAYD_SEND_INDEX, &msg, sizeof(msg), 0);
765 if (ret < 0) {
766 goto error;
767 }
768
769 /* Receive response */
770 ret = recv_reply(rsock, (void *) &reply, sizeof(reply));
771 if (ret < 0) {
772 goto error;
773 }
774
775 reply.ret_code = be32toh(reply.ret_code);
776
777 /* Return session id or negative ret code. */
778 if (reply.ret_code != LTTNG_OK) {
779 ret = -1;
780 ERR("Relayd send index replied error %d", reply.ret_code);
781 } else {
782 /* Success */
783 ret = 0;
784 }
785
786 error:
787 return ret;
788 }
This page took 0.04728 seconds and 6 git commands to generate.