Implement $PATH binary searching function for userspace-probe
[lttng-tools.git] / src / bin / lttng-sessiond / kernel.c
1 /*
2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2 only,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
18 #define _LGPL_SOURCE
19 #include <fcntl.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25
26 #include <common/common.h>
27 #include <common/kernel-ctl/kernel-ctl.h>
28 #include <common/kernel-ctl/kernel-ioctl.h>
29 #include <common/sessiond-comm/sessiond-comm.h>
30
31 #include "consumer.h"
32 #include "kernel.h"
33 #include "kernel-consumer.h"
34 #include "kern-modules.h"
35 #include "utils.h"
36 #include "rotate.h"
37
38 /*
39 * Key used to reference a channel between the sessiond and the consumer. This
40 * is only read and updated with the session_list lock held.
41 */
42 static uint64_t next_kernel_channel_key;
43
44 #include <lttng/userspace-probe.h>
45 #include <lttng/userspace-probe-internal.h>
46 /*
47 * Add context on a kernel channel.
48 *
49 * Assumes the ownership of ctx.
50 */
51 int kernel_add_channel_context(struct ltt_kernel_channel *chan,
52 struct ltt_kernel_context *ctx)
53 {
54 int ret;
55
56 assert(chan);
57 assert(ctx);
58
59 DBG("Adding context to channel %s", chan->channel->name);
60 ret = kernctl_add_context(chan->fd, &ctx->ctx);
61 if (ret < 0) {
62 switch (-ret) {
63 case ENOSYS:
64 /* Exists but not available for this kernel */
65 ret = LTTNG_ERR_KERN_CONTEXT_UNAVAILABLE;
66 goto error;
67 case EEXIST:
68 /* If EEXIST, we just ignore the error */
69 ret = 0;
70 goto end;
71 default:
72 PERROR("add context ioctl");
73 ret = LTTNG_ERR_KERN_CONTEXT_FAIL;
74 goto error;
75 }
76 }
77 ret = 0;
78
79 end:
80 cds_list_add_tail(&ctx->list, &chan->ctx_list);
81 ctx->in_list = true;
82 ctx = NULL;
83 error:
84 if (ctx) {
85 trace_kernel_destroy_context(ctx);
86 }
87 return ret;
88 }
89
90 /*
91 * Create a new kernel session, register it to the kernel tracer and add it to
92 * the session daemon session.
93 */
94 int kernel_create_session(struct ltt_session *session, int tracer_fd)
95 {
96 int ret;
97 struct ltt_kernel_session *lks;
98
99 assert(session);
100
101 /* Allocate data structure */
102 lks = trace_kernel_create_session();
103 if (lks == NULL) {
104 ret = -1;
105 goto error;
106 }
107
108 /* Kernel tracer session creation */
109 ret = kernctl_create_session(tracer_fd);
110 if (ret < 0) {
111 PERROR("ioctl kernel create session");
112 goto error;
113 }
114
115 lks->fd = ret;
116 /* Prevent fd duplication after execlp() */
117 ret = fcntl(lks->fd, F_SETFD, FD_CLOEXEC);
118 if (ret < 0) {
119 PERROR("fcntl session fd");
120 }
121
122 lks->id = session->id;
123 lks->consumer_fds_sent = 0;
124 session->kernel_session = lks;
125
126 DBG("Kernel session created (fd: %d)", lks->fd);
127
128 return 0;
129
130 error:
131 if (lks) {
132 trace_kernel_destroy_session(lks);
133 }
134 return ret;
135 }
136
137 /*
138 * Create a kernel channel, register it to the kernel tracer and add it to the
139 * kernel session.
140 */
141 int kernel_create_channel(struct ltt_kernel_session *session,
142 struct lttng_channel *chan)
143 {
144 int ret;
145 struct ltt_kernel_channel *lkc;
146
147 assert(session);
148 assert(chan);
149
150 /* Allocate kernel channel */
151 lkc = trace_kernel_create_channel(chan);
152 if (lkc == NULL) {
153 goto error;
154 }
155
156 DBG3("Kernel create channel %s with attr: %d, %" PRIu64 ", %" PRIu64 ", %u, %u, %d, %d",
157 chan->name, lkc->channel->attr.overwrite,
158 lkc->channel->attr.subbuf_size, lkc->channel->attr.num_subbuf,
159 lkc->channel->attr.switch_timer_interval, lkc->channel->attr.read_timer_interval,
160 lkc->channel->attr.live_timer_interval, lkc->channel->attr.output);
161
162 /* Kernel tracer channel creation */
163 ret = kernctl_create_channel(session->fd, &lkc->channel->attr);
164 if (ret < 0) {
165 PERROR("ioctl kernel create channel");
166 goto error;
167 }
168
169 /* Setup the channel fd */
170 lkc->fd = ret;
171 /* Prevent fd duplication after execlp() */
172 ret = fcntl(lkc->fd, F_SETFD, FD_CLOEXEC);
173 if (ret < 0) {
174 PERROR("fcntl session fd");
175 }
176
177 /* Add channel to session */
178 cds_list_add(&lkc->list, &session->channel_list.head);
179 session->channel_count++;
180 lkc->session = session;
181 lkc->key = ++next_kernel_channel_key;
182
183 DBG("Kernel channel %s created (fd: %d, key: %" PRIu64 ")",
184 lkc->channel->name, lkc->fd, lkc->key);
185
186 return 0;
187
188 error:
189 if (lkc) {
190 free(lkc->channel);
191 free(lkc);
192 }
193 return -1;
194 }
195
196 /*
197 * Compute the offset of the instrumentation byte in the binary based on the
198 * function probe location using the ELF lookup method.
199 *
200 * Returns 0 on success and set the offset out parameter to the offset of the
201 * elf symbol
202 * Returns -1 on error
203 */
204 static
205 int extract_userspace_probe_offset_function_elf(
206 struct lttng_userspace_probe_location *probe_location,
207 struct ltt_kernel_session *session, uint64_t *offset)
208 {
209 int fd;
210 int ret = 0;
211 const char *symbol = NULL;
212 struct lttng_userspace_probe_location_lookup_method *lookup = NULL;
213 enum lttng_userspace_probe_location_lookup_method_type lookup_method_type;
214
215
216 assert(lttng_userspace_probe_location_get_type(probe_location) ==
217 LTTNG_USERSPACE_PROBE_LOCATION_TYPE_FUNCTION);
218
219 lookup = lttng_userspace_probe_location_get_lookup_method(
220 probe_location);
221 if (!lookup) {
222 ret = -1;
223 goto end;
224 }
225
226 lookup_method_type =
227 lttng_userspace_probe_location_lookup_method_get_type(lookup);
228
229 assert(lookup_method_type ==
230 LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_FUNCTION_ELF);
231
232 symbol = lttng_userspace_probe_location_function_get_function_name(
233 probe_location);
234 if (!symbol) {
235 ret = -1;
236 goto end;
237 }
238
239 fd = lttng_userspace_probe_location_function_get_binary_fd(probe_location);
240 if (fd < 0) {
241 ret = -1;
242 goto end;
243 }
244
245 ret = run_as_extract_elf_symbol_offset(fd, symbol, session->uid,
246 session->gid, offset);
247 if (ret < 0) {
248 DBG("userspace probe offset calculation failed for "
249 "function %s", symbol);
250 goto end;
251 }
252
253 DBG("userspace probe elf offset for %s is 0x%jd", symbol, (intmax_t)(*offset));
254 end:
255 return ret;
256 }
257
258 /*
259 * Compute the offsets of the instrumentation bytes in the binary based on the
260 * tracepoint probe location using the SDT lookup method. This function
261 * allocates the offsets buffer, the caller must free it.
262 *
263 * Returns 0 on success and set the offset out parameter to the offsets of the
264 * SDT tracepoint.
265 * Returns -1 on error.
266 */
267 static
268 int extract_userspace_probe_offset_tracepoint_sdt(
269 struct lttng_userspace_probe_location *probe_location,
270 struct ltt_kernel_session *session, uint64_t **offsets,
271 uint32_t *offsets_count)
272 {
273 enum lttng_userspace_probe_location_lookup_method_type lookup_method_type;
274 struct lttng_userspace_probe_location_lookup_method *lookup = NULL;
275 const char *probe_name = NULL, *provider_name = NULL;
276 int ret = 0;
277 int fd, i;
278
279 assert(lttng_userspace_probe_location_get_type(probe_location) ==
280 LTTNG_USERSPACE_PROBE_LOCATION_TYPE_TRACEPOINT);
281
282 lookup = lttng_userspace_probe_location_get_lookup_method(probe_location);
283 if (!lookup) {
284 ret = -1;
285 goto end;
286 }
287
288 lookup_method_type =
289 lttng_userspace_probe_location_lookup_method_get_type(lookup);
290
291 assert(lookup_method_type ==
292 LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_TRACEPOINT_SDT);
293
294
295 probe_name = lttng_userspace_probe_location_tracepoint_get_probe_name(
296 probe_location);
297 if (!probe_name) {
298 ret = -1;
299 goto end;
300 }
301
302 provider_name = lttng_userspace_probe_location_tracepoint_get_provider_name(
303 probe_location);
304 if (!provider_name) {
305 ret = -1;
306 goto end;
307 }
308
309 fd = lttng_userspace_probe_location_tracepoint_get_binary_fd(probe_location);
310 if (fd < 0) {
311 ret = -1;
312 goto end;
313 }
314
315 ret = run_as_extract_sdt_probe_offsets(fd, provider_name, probe_name,
316 session->uid, session->gid, offsets, offsets_count);
317 if (ret < 0) {
318 DBG("userspace probe offset calculation failed for sdt "
319 "probe %s:%s", provider_name, probe_name);
320 goto end;
321 }
322
323 if (*offsets_count == 0) {
324 DBG("no userspace probe offset found");
325 goto end;
326 }
327
328 DBG("%u userspace probe SDT offsets found for %s:%s at:",
329 *offsets_count, provider_name, probe_name);
330 for (i = 0; i < *offsets_count; i++) {
331 DBG("\t0x%jd", (intmax_t)((*offsets)[i]));
332 }
333 end:
334 return ret;
335 }
336
337 /*
338 * Extract the offsets of the instrumentation point for the different lookup
339 * methods.
340 */
341 static
342 int userspace_probe_add_callsites(struct lttng_event *ev,
343 struct ltt_kernel_session *session, int fd)
344 {
345 struct lttng_userspace_probe_location_lookup_method *lookup_method = NULL;
346 enum lttng_userspace_probe_location_lookup_method_type type;
347 struct lttng_userspace_probe_location *location = NULL;
348 int ret;
349
350 assert(ev);
351 assert(ev->type == LTTNG_EVENT_USERSPACE_PROBE);
352
353 location = lttng_event_get_userspace_probe_location(ev);
354 if (!location) {
355 ret = -1;
356 goto end;
357 }
358 lookup_method =
359 lttng_userspace_probe_location_get_lookup_method(location);
360 if (!lookup_method) {
361 ret = -1;
362 goto end;
363 }
364
365 type = lttng_userspace_probe_location_lookup_method_get_type(lookup_method);
366 switch (type) {
367 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_FUNCTION_ELF:
368 {
369 struct lttng_kernel_event_callsite callsite;
370 uint64_t offset;
371
372 ret = extract_userspace_probe_offset_function_elf(location, session, &offset);
373 if (ret) {
374 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
375 goto end;
376 }
377
378 callsite.u.uprobe.offset = offset;
379 ret = kernctl_add_callsite(fd, &callsite);
380 if (ret) {
381 WARN("Adding callsite to userspace probe "
382 "event %s failed.", ev->name);
383 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
384 goto end;
385 }
386 break;
387 }
388 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_TRACEPOINT_SDT:
389 {
390 int i;
391 uint64_t *offsets = NULL;
392 uint32_t offsets_count;
393 struct lttng_kernel_event_callsite callsite;
394
395 /*
396 * This call allocates the offsets buffer. This buffer must be freed
397 * by the caller
398 */
399 ret = extract_userspace_probe_offset_tracepoint_sdt(location, session,
400 &offsets, &offsets_count);
401 if (ret) {
402 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
403 goto end;
404 }
405 for (i = 0; i < offsets_count; i++) {
406 callsite.u.uprobe.offset = offsets[i];
407 ret = kernctl_add_callsite(fd, &callsite);
408 if (ret) {
409 WARN("Adding callsite to userspace probe "
410 "event %s failed.", ev->name);
411 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
412 free(offsets);
413 goto end;
414 }
415 }
416 free(offsets);
417 break;
418 }
419 default:
420 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
421 goto end;
422 }
423 end:
424 return ret;
425 }
426
427 /*
428 * Create a kernel event, enable it to the kernel tracer and add it to the
429 * channel event list of the kernel session.
430 * We own filter_expression and filter.
431 */
432 int kernel_create_event(struct lttng_event *ev,
433 struct ltt_kernel_channel *channel,
434 char *filter_expression,
435 struct lttng_filter_bytecode *filter)
436 {
437 int err, fd;
438 enum lttng_error_code ret;
439 struct ltt_kernel_event *event;
440
441 assert(ev);
442 assert(channel);
443
444 /* We pass ownership of filter_expression and filter */
445 ret = trace_kernel_create_event(ev, filter_expression,
446 filter, &event);
447 if (ret != LTTNG_OK) {
448 goto error;
449 }
450
451 fd = kernctl_create_event(channel->fd, event->event);
452 if (fd < 0) {
453 switch (-fd) {
454 case EEXIST:
455 ret = LTTNG_ERR_KERN_EVENT_EXIST;
456 break;
457 case ENOSYS:
458 WARN("Event type not implemented");
459 ret = LTTNG_ERR_KERN_EVENT_ENOSYS;
460 break;
461 case ENOENT:
462 WARN("Event %s not found!", ev->name);
463 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
464 break;
465 default:
466 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
467 PERROR("create event ioctl");
468 }
469 goto free_event;
470 }
471
472 event->type = ev->type;
473 event->fd = fd;
474 /* Prevent fd duplication after execlp() */
475 err = fcntl(event->fd, F_SETFD, FD_CLOEXEC);
476 if (err < 0) {
477 PERROR("fcntl session fd");
478 }
479
480 if (filter) {
481 err = kernctl_filter(event->fd, filter);
482 if (err < 0) {
483 switch (-err) {
484 case ENOMEM:
485 ret = LTTNG_ERR_FILTER_NOMEM;
486 break;
487 default:
488 ret = LTTNG_ERR_FILTER_INVAL;
489 break;
490 }
491 goto filter_error;
492 }
493 }
494
495 err = kernctl_enable(event->fd);
496 if (err < 0) {
497 switch (-err) {
498 case EEXIST:
499 ret = LTTNG_ERR_KERN_EVENT_EXIST;
500 break;
501 default:
502 PERROR("enable kernel event");
503 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
504 break;
505 }
506 goto enable_error;
507 }
508
509 /* Add event to event list */
510 cds_list_add(&event->list, &channel->events_list.head);
511 channel->event_count++;
512
513 DBG("Event %s created (fd: %d)", ev->name, event->fd);
514
515 return 0;
516
517 enable_error:
518 filter_error:
519 {
520 int closeret;
521
522 closeret = close(event->fd);
523 if (closeret) {
524 PERROR("close event fd");
525 }
526 }
527 free_event:
528 free(event);
529 error:
530 return ret;
531 }
532
533 /*
534 * Disable a kernel channel.
535 */
536 int kernel_disable_channel(struct ltt_kernel_channel *chan)
537 {
538 int ret;
539
540 assert(chan);
541
542 ret = kernctl_disable(chan->fd);
543 if (ret < 0) {
544 PERROR("disable chan ioctl");
545 goto error;
546 }
547
548 chan->enabled = 0;
549 DBG("Kernel channel %s disabled (fd: %d, key: %" PRIu64 ")",
550 chan->channel->name, chan->fd, chan->key);
551
552 return 0;
553
554 error:
555 return ret;
556 }
557
558 /*
559 * Enable a kernel channel.
560 */
561 int kernel_enable_channel(struct ltt_kernel_channel *chan)
562 {
563 int ret;
564
565 assert(chan);
566
567 ret = kernctl_enable(chan->fd);
568 if (ret < 0 && ret != -EEXIST) {
569 PERROR("Enable kernel chan");
570 goto error;
571 }
572
573 chan->enabled = 1;
574 DBG("Kernel channel %s enabled (fd: %d, key: %" PRIu64 ")",
575 chan->channel->name, chan->fd, chan->key);
576
577 return 0;
578
579 error:
580 return ret;
581 }
582
583 /*
584 * Enable a kernel event.
585 */
586 int kernel_enable_event(struct ltt_kernel_event *event)
587 {
588 int ret;
589
590 assert(event);
591
592 ret = kernctl_enable(event->fd);
593 if (ret < 0) {
594 switch (-ret) {
595 case EEXIST:
596 ret = LTTNG_ERR_KERN_EVENT_EXIST;
597 break;
598 default:
599 PERROR("enable kernel event");
600 break;
601 }
602 goto error;
603 }
604
605 event->enabled = 1;
606 DBG("Kernel event %s enabled (fd: %d)", event->event->name, event->fd);
607
608 return 0;
609
610 error:
611 return ret;
612 }
613
614 /*
615 * Disable a kernel event.
616 */
617 int kernel_disable_event(struct ltt_kernel_event *event)
618 {
619 int ret;
620
621 assert(event);
622
623 ret = kernctl_disable(event->fd);
624 if (ret < 0) {
625 switch (-ret) {
626 case EEXIST:
627 ret = LTTNG_ERR_KERN_EVENT_EXIST;
628 break;
629 default:
630 PERROR("disable kernel event");
631 break;
632 }
633 goto error;
634 }
635
636 event->enabled = 0;
637 DBG("Kernel event %s disabled (fd: %d)", event->event->name, event->fd);
638
639 return 0;
640
641 error:
642 return ret;
643 }
644
645
646 int kernel_track_pid(struct ltt_kernel_session *session, int pid)
647 {
648 int ret;
649
650 DBG("Kernel track PID %d for session id %" PRIu64 ".",
651 pid, session->id);
652 ret = kernctl_track_pid(session->fd, pid);
653 if (!ret) {
654 return LTTNG_OK;
655 }
656 switch (-ret) {
657 case EINVAL:
658 return LTTNG_ERR_INVALID;
659 case ENOMEM:
660 return LTTNG_ERR_NOMEM;
661 case EEXIST:
662 return LTTNG_ERR_PID_TRACKED;
663 default:
664 return LTTNG_ERR_UNK;
665 }
666 }
667
668 int kernel_untrack_pid(struct ltt_kernel_session *session, int pid)
669 {
670 int ret;
671
672 DBG("Kernel untrack PID %d for session id %" PRIu64 ".",
673 pid, session->id);
674 ret = kernctl_untrack_pid(session->fd, pid);
675 if (!ret) {
676 return LTTNG_OK;
677 }
678 switch (-ret) {
679 case EINVAL:
680 return LTTNG_ERR_INVALID;
681 case ENOMEM:
682 return LTTNG_ERR_NOMEM;
683 case ENOENT:
684 return LTTNG_ERR_PID_NOT_TRACKED;
685 default:
686 return LTTNG_ERR_UNK;
687 }
688 }
689
690 ssize_t kernel_list_tracker_pids(struct ltt_kernel_session *session,
691 int **_pids)
692 {
693 int fd, ret;
694 int pid;
695 ssize_t nbmem, count = 0;
696 FILE *fp;
697 int *pids;
698
699 fd = kernctl_list_tracker_pids(session->fd);
700 if (fd < 0) {
701 PERROR("kernel tracker pids list");
702 goto error;
703 }
704
705 fp = fdopen(fd, "r");
706 if (fp == NULL) {
707 PERROR("kernel tracker pids list fdopen");
708 goto error_fp;
709 }
710
711 nbmem = KERNEL_TRACKER_PIDS_INIT_LIST_SIZE;
712 pids = zmalloc(sizeof(*pids) * nbmem);
713 if (pids == NULL) {
714 PERROR("alloc list pids");
715 count = -ENOMEM;
716 goto end;
717 }
718
719 while (fscanf(fp, "process { pid = %u; };\n", &pid) == 1) {
720 if (count >= nbmem) {
721 int *new_pids;
722 size_t new_nbmem;
723
724 new_nbmem = nbmem << 1;
725 DBG("Reallocating pids list from %zu to %zu entries",
726 nbmem, new_nbmem);
727 new_pids = realloc(pids, new_nbmem * sizeof(*new_pids));
728 if (new_pids == NULL) {
729 PERROR("realloc list events");
730 free(pids);
731 count = -ENOMEM;
732 goto end;
733 }
734 /* Zero the new memory */
735 memset(new_pids + nbmem, 0,
736 (new_nbmem - nbmem) * sizeof(*new_pids));
737 nbmem = new_nbmem;
738 pids = new_pids;
739 }
740 pids[count++] = pid;
741 }
742
743 *_pids = pids;
744 DBG("Kernel list tracker pids done (%zd pids)", count);
745 end:
746 ret = fclose(fp); /* closes both fp and fd */
747 if (ret) {
748 PERROR("fclose");
749 }
750 return count;
751
752 error_fp:
753 ret = close(fd);
754 if (ret) {
755 PERROR("close");
756 }
757 error:
758 return -1;
759 }
760
761 /*
762 * Create kernel metadata, open from the kernel tracer and add it to the
763 * kernel session.
764 */
765 int kernel_open_metadata(struct ltt_kernel_session *session)
766 {
767 int ret;
768 struct ltt_kernel_metadata *lkm = NULL;
769
770 assert(session);
771
772 /* Allocate kernel metadata */
773 lkm = trace_kernel_create_metadata();
774 if (lkm == NULL) {
775 goto error;
776 }
777
778 /* Kernel tracer metadata creation */
779 ret = kernctl_open_metadata(session->fd, &lkm->conf->attr);
780 if (ret < 0) {
781 goto error_open;
782 }
783
784 lkm->fd = ret;
785 lkm->key = ++next_kernel_channel_key;
786 /* Prevent fd duplication after execlp() */
787 ret = fcntl(lkm->fd, F_SETFD, FD_CLOEXEC);
788 if (ret < 0) {
789 PERROR("fcntl session fd");
790 }
791
792 session->metadata = lkm;
793
794 DBG("Kernel metadata opened (fd: %d)", lkm->fd);
795
796 return 0;
797
798 error_open:
799 trace_kernel_destroy_metadata(lkm);
800 error:
801 return -1;
802 }
803
804 /*
805 * Start tracing session.
806 */
807 int kernel_start_session(struct ltt_kernel_session *session)
808 {
809 int ret;
810
811 assert(session);
812
813 ret = kernctl_start_session(session->fd);
814 if (ret < 0) {
815 PERROR("ioctl start session");
816 goto error;
817 }
818
819 DBG("Kernel session started");
820
821 return 0;
822
823 error:
824 return ret;
825 }
826
827 /*
828 * Make a kernel wait to make sure in-flight probe have completed.
829 */
830 void kernel_wait_quiescent(int fd)
831 {
832 int ret;
833
834 DBG("Kernel quiescent wait on %d", fd);
835
836 ret = kernctl_wait_quiescent(fd);
837 if (ret < 0) {
838 PERROR("wait quiescent ioctl");
839 ERR("Kernel quiescent wait failed");
840 }
841 }
842
843 /*
844 * Force flush buffer of metadata.
845 */
846 int kernel_metadata_flush_buffer(int fd)
847 {
848 int ret;
849
850 DBG("Kernel flushing metadata buffer on fd %d", fd);
851
852 ret = kernctl_buffer_flush(fd);
853 if (ret < 0) {
854 ERR("Fail to flush metadata buffers %d (ret: %d)", fd, ret);
855 }
856
857 return 0;
858 }
859
860 /*
861 * Force flush buffer for channel.
862 */
863 int kernel_flush_buffer(struct ltt_kernel_channel *channel)
864 {
865 int ret;
866 struct ltt_kernel_stream *stream;
867
868 assert(channel);
869
870 DBG("Flush buffer for channel %s", channel->channel->name);
871
872 cds_list_for_each_entry(stream, &channel->stream_list.head, list) {
873 DBG("Flushing channel stream %d", stream->fd);
874 ret = kernctl_buffer_flush(stream->fd);
875 if (ret < 0) {
876 PERROR("ioctl");
877 ERR("Fail to flush buffer for stream %d (ret: %d)",
878 stream->fd, ret);
879 }
880 }
881
882 return 0;
883 }
884
885 /*
886 * Stop tracing session.
887 */
888 int kernel_stop_session(struct ltt_kernel_session *session)
889 {
890 int ret;
891
892 assert(session);
893
894 ret = kernctl_stop_session(session->fd);
895 if (ret < 0) {
896 goto error;
897 }
898
899 DBG("Kernel session stopped");
900
901 return 0;
902
903 error:
904 return ret;
905 }
906
907 /*
908 * Open stream of channel, register it to the kernel tracer and add it
909 * to the stream list of the channel.
910 *
911 * Note: given that the streams may appear in random order wrt CPU
912 * number (e.g. cpu hotplug), the index value of the stream number in
913 * the stream name is not necessarily linked to the CPU number.
914 *
915 * Return the number of created stream. Else, a negative value.
916 */
917 int kernel_open_channel_stream(struct ltt_kernel_channel *channel)
918 {
919 int ret;
920 struct ltt_kernel_stream *lks;
921
922 assert(channel);
923
924 while ((ret = kernctl_create_stream(channel->fd)) >= 0) {
925 lks = trace_kernel_create_stream(channel->channel->name,
926 channel->stream_count);
927 if (lks == NULL) {
928 ret = close(ret);
929 if (ret) {
930 PERROR("close");
931 }
932 goto error;
933 }
934
935 lks->fd = ret;
936 /* Prevent fd duplication after execlp() */
937 ret = fcntl(lks->fd, F_SETFD, FD_CLOEXEC);
938 if (ret < 0) {
939 PERROR("fcntl session fd");
940 }
941
942 lks->tracefile_size = channel->channel->attr.tracefile_size;
943 lks->tracefile_count = channel->channel->attr.tracefile_count;
944
945 /* Add stream to channel stream list */
946 cds_list_add(&lks->list, &channel->stream_list.head);
947 channel->stream_count++;
948
949 DBG("Kernel stream %s created (fd: %d, state: %d)", lks->name, lks->fd,
950 lks->state);
951 }
952
953 return channel->stream_count;
954
955 error:
956 return -1;
957 }
958
959 /*
960 * Open the metadata stream and set it to the kernel session.
961 */
962 int kernel_open_metadata_stream(struct ltt_kernel_session *session)
963 {
964 int ret;
965
966 assert(session);
967
968 ret = kernctl_create_stream(session->metadata->fd);
969 if (ret < 0) {
970 PERROR("kernel create metadata stream");
971 goto error;
972 }
973
974 DBG("Kernel metadata stream created (fd: %d)", ret);
975 session->metadata_stream_fd = ret;
976 /* Prevent fd duplication after execlp() */
977 ret = fcntl(session->metadata_stream_fd, F_SETFD, FD_CLOEXEC);
978 if (ret < 0) {
979 PERROR("fcntl session fd");
980 }
981
982 return 0;
983
984 error:
985 return -1;
986 }
987
988 /*
989 * Get the event list from the kernel tracer and return the number of elements.
990 */
991 ssize_t kernel_list_events(int tracer_fd, struct lttng_event **events)
992 {
993 int fd, ret;
994 char *event;
995 size_t nbmem, count = 0;
996 FILE *fp;
997 struct lttng_event *elist;
998
999 assert(events);
1000
1001 fd = kernctl_tracepoint_list(tracer_fd);
1002 if (fd < 0) {
1003 PERROR("kernel tracepoint list");
1004 goto error;
1005 }
1006
1007 fp = fdopen(fd, "r");
1008 if (fp == NULL) {
1009 PERROR("kernel tracepoint list fdopen");
1010 goto error_fp;
1011 }
1012
1013 /*
1014 * Init memory size counter
1015 * See kernel-ctl.h for explanation of this value
1016 */
1017 nbmem = KERNEL_EVENT_INIT_LIST_SIZE;
1018 elist = zmalloc(sizeof(struct lttng_event) * nbmem);
1019 if (elist == NULL) {
1020 PERROR("alloc list events");
1021 count = -ENOMEM;
1022 goto end;
1023 }
1024
1025 while (fscanf(fp, "event { name = %m[^;]; };\n", &event) == 1) {
1026 if (count >= nbmem) {
1027 struct lttng_event *new_elist;
1028 size_t new_nbmem;
1029
1030 new_nbmem = nbmem << 1;
1031 DBG("Reallocating event list from %zu to %zu bytes",
1032 nbmem, new_nbmem);
1033 new_elist = realloc(elist, new_nbmem * sizeof(struct lttng_event));
1034 if (new_elist == NULL) {
1035 PERROR("realloc list events");
1036 free(event);
1037 free(elist);
1038 count = -ENOMEM;
1039 goto end;
1040 }
1041 /* Zero the new memory */
1042 memset(new_elist + nbmem, 0,
1043 (new_nbmem - nbmem) * sizeof(struct lttng_event));
1044 nbmem = new_nbmem;
1045 elist = new_elist;
1046 }
1047 strncpy(elist[count].name, event, LTTNG_SYMBOL_NAME_LEN);
1048 elist[count].name[LTTNG_SYMBOL_NAME_LEN - 1] = '\0';
1049 elist[count].enabled = -1;
1050 count++;
1051 free(event);
1052 }
1053
1054 *events = elist;
1055 DBG("Kernel list events done (%zu events)", count);
1056 end:
1057 ret = fclose(fp); /* closes both fp and fd */
1058 if (ret) {
1059 PERROR("fclose");
1060 }
1061 return count;
1062
1063 error_fp:
1064 ret = close(fd);
1065 if (ret) {
1066 PERROR("close");
1067 }
1068 error:
1069 return -1;
1070 }
1071
1072 /*
1073 * Get kernel version and validate it.
1074 */
1075 int kernel_validate_version(int tracer_fd,
1076 struct lttng_kernel_tracer_version *version,
1077 struct lttng_kernel_tracer_abi_version *abi_version)
1078 {
1079 int ret;
1080
1081 ret = kernctl_tracer_version(tracer_fd, version);
1082 if (ret < 0) {
1083 ERR("Failed to retrieve the lttng-modules version");
1084 goto error;
1085 }
1086
1087 /* Validate version */
1088 if (version->major != VERSION_MAJOR) {
1089 ERR("Kernel tracer major version (%d) is not compatible with lttng-tools major version (%d)",
1090 version->major, VERSION_MAJOR);
1091 goto error_version;
1092 }
1093 ret = kernctl_tracer_abi_version(tracer_fd, abi_version);
1094 if (ret < 0) {
1095 ERR("Failed to retrieve lttng-modules ABI version");
1096 goto error;
1097 }
1098 if (abi_version->major != LTTNG_MODULES_ABI_MAJOR_VERSION) {
1099 ERR("Kernel tracer ABI version (%d.%d) does not match the expected ABI major version (%d.*)",
1100 abi_version->major, abi_version->minor,
1101 LTTNG_MODULES_ABI_MAJOR_VERSION);
1102 goto error;
1103 }
1104 DBG2("Kernel tracer version validated (%d.%d, ABI %d.%d)",
1105 version->major, version->minor,
1106 abi_version->major, abi_version->minor);
1107 return 0;
1108
1109 error_version:
1110 ret = -1;
1111
1112 error:
1113 ERR("Kernel tracer version check failed; kernel tracing will not be available");
1114 return ret;
1115 }
1116
1117 /*
1118 * Kernel work-arounds called at the start of sessiond main().
1119 */
1120 int init_kernel_workarounds(void)
1121 {
1122 int ret;
1123 FILE *fp;
1124
1125 /*
1126 * boot_id needs to be read once before being used concurrently
1127 * to deal with a Linux kernel race. A fix is proposed for
1128 * upstream, but the work-around is needed for older kernels.
1129 */
1130 fp = fopen("/proc/sys/kernel/random/boot_id", "r");
1131 if (!fp) {
1132 goto end_boot_id;
1133 }
1134 while (!feof(fp)) {
1135 char buf[37] = "";
1136
1137 ret = fread(buf, 1, sizeof(buf), fp);
1138 if (ret < 0) {
1139 /* Ignore error, we don't really care */
1140 }
1141 }
1142 ret = fclose(fp);
1143 if (ret) {
1144 PERROR("fclose");
1145 }
1146 end_boot_id:
1147 return 0;
1148 }
1149
1150 /*
1151 * Complete teardown of a kernel session.
1152 */
1153 void kernel_destroy_session(struct ltt_kernel_session *ksess)
1154 {
1155 if (ksess == NULL) {
1156 DBG3("No kernel session when tearing down session");
1157 return;
1158 }
1159
1160 DBG("Tearing down kernel session");
1161
1162 /*
1163 * Destroy channels on the consumer if at least one FD has been sent and we
1164 * are in no output mode because the streams are in *no* monitor mode so we
1165 * have to send a command to clean them up or else they leaked.
1166 */
1167 if (!ksess->output_traces && ksess->consumer_fds_sent) {
1168 int ret;
1169 struct consumer_socket *socket;
1170 struct lttng_ht_iter iter;
1171
1172 /* For each consumer socket. */
1173 rcu_read_lock();
1174 cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter,
1175 socket, node.node) {
1176 struct ltt_kernel_channel *chan;
1177
1178 /* For each channel, ask the consumer to destroy it. */
1179 cds_list_for_each_entry(chan, &ksess->channel_list.head, list) {
1180 ret = kernel_consumer_destroy_channel(socket, chan);
1181 if (ret < 0) {
1182 /* Consumer is probably dead. Use next socket. */
1183 continue;
1184 }
1185 }
1186 }
1187 rcu_read_unlock();
1188 }
1189
1190 /* Close any relayd session */
1191 consumer_output_send_destroy_relayd(ksess->consumer);
1192
1193 trace_kernel_destroy_session(ksess);
1194 }
1195
1196 /*
1197 * Destroy a kernel channel object. It does not do anything on the tracer side.
1198 */
1199 void kernel_destroy_channel(struct ltt_kernel_channel *kchan)
1200 {
1201 struct ltt_kernel_session *ksess = NULL;
1202
1203 assert(kchan);
1204 assert(kchan->channel);
1205
1206 DBG3("Kernel destroy channel %s", kchan->channel->name);
1207
1208 /* Update channel count of associated session. */
1209 if (kchan->session) {
1210 /* Keep pointer reference so we can update it after the destroy. */
1211 ksess = kchan->session;
1212 }
1213
1214 trace_kernel_destroy_channel(kchan);
1215
1216 /*
1217 * At this point the kernel channel is not visible anymore. This is safe
1218 * since in order to work on a visible kernel session, the tracing session
1219 * lock (ltt_session.lock) MUST be acquired.
1220 */
1221 if (ksess) {
1222 ksess->channel_count--;
1223 }
1224 }
1225
1226 /*
1227 * Take a snapshot for a given kernel session.
1228 *
1229 * Return 0 on success or else return a LTTNG_ERR code.
1230 */
1231 int kernel_snapshot_record(struct ltt_kernel_session *ksess,
1232 struct snapshot_output *output, int wait,
1233 uint64_t nb_packets_per_stream)
1234 {
1235 int err, ret, saved_metadata_fd;
1236 struct consumer_socket *socket;
1237 struct lttng_ht_iter iter;
1238 struct ltt_kernel_metadata *saved_metadata;
1239 struct ltt_session *session;
1240 uint64_t trace_archive_id;
1241
1242 assert(ksess);
1243 assert(ksess->consumer);
1244 assert(output);
1245
1246 DBG("Kernel snapshot record started");
1247
1248 session = session_find_by_id(ksess->id);
1249 assert(session);
1250 assert(pthread_mutex_trylock(&session->lock));
1251 assert(session_trylock_list());
1252 trace_archive_id = session->current_archive_id;
1253
1254 /* Save current metadata since the following calls will change it. */
1255 saved_metadata = ksess->metadata;
1256 saved_metadata_fd = ksess->metadata_stream_fd;
1257
1258 rcu_read_lock();
1259
1260 ret = kernel_open_metadata(ksess);
1261 if (ret < 0) {
1262 ret = LTTNG_ERR_KERN_META_FAIL;
1263 goto error;
1264 }
1265
1266 ret = kernel_open_metadata_stream(ksess);
1267 if (ret < 0) {
1268 ret = LTTNG_ERR_KERN_META_FAIL;
1269 goto error_open_stream;
1270 }
1271
1272 /* Send metadata to consumer and snapshot everything. */
1273 cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter,
1274 socket, node.node) {
1275 struct consumer_output *saved_output;
1276 struct ltt_kernel_channel *chan;
1277
1278 /*
1279 * Temporarly switch consumer output for our snapshot output. As long
1280 * as the session lock is taken, this is safe.
1281 */
1282 saved_output = ksess->consumer;
1283 ksess->consumer = output->consumer;
1284
1285 pthread_mutex_lock(socket->lock);
1286 /* This stream must not be monitored by the consumer. */
1287 ret = kernel_consumer_add_metadata(socket, ksess, 0);
1288 pthread_mutex_unlock(socket->lock);
1289 /* Put back the saved consumer output into the session. */
1290 ksess->consumer = saved_output;
1291 if (ret < 0) {
1292 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
1293 goto error_consumer;
1294 }
1295
1296 /* For each channel, ask the consumer to snapshot it. */
1297 cds_list_for_each_entry(chan, &ksess->channel_list.head, list) {
1298 ret = consumer_snapshot_channel(socket, chan->key, output, 0,
1299 ksess->uid, ksess->gid,
1300 DEFAULT_KERNEL_TRACE_DIR, wait,
1301 nb_packets_per_stream,
1302 trace_archive_id);
1303 if (ret < 0) {
1304 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
1305 (void) kernel_consumer_destroy_metadata(socket,
1306 ksess->metadata);
1307 goto error_consumer;
1308 }
1309 }
1310
1311 /* Snapshot metadata, */
1312 ret = consumer_snapshot_channel(socket, ksess->metadata->key, output,
1313 1, ksess->uid, ksess->gid,
1314 DEFAULT_KERNEL_TRACE_DIR, wait, 0,
1315 trace_archive_id);
1316 if (ret < 0) {
1317 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
1318 goto error_consumer;
1319 }
1320
1321 /*
1322 * The metadata snapshot is done, ask the consumer to destroy it since
1323 * it's not monitored on the consumer side.
1324 */
1325 (void) kernel_consumer_destroy_metadata(socket, ksess->metadata);
1326 }
1327
1328 ret = LTTNG_OK;
1329
1330 error_consumer:
1331 /* Close newly opened metadata stream. It's now on the consumer side. */
1332 err = close(ksess->metadata_stream_fd);
1333 if (err < 0) {
1334 PERROR("close snapshot kernel");
1335 }
1336
1337 error_open_stream:
1338 trace_kernel_destroy_metadata(ksess->metadata);
1339 error:
1340 /* Restore metadata state.*/
1341 ksess->metadata = saved_metadata;
1342 ksess->metadata_stream_fd = saved_metadata_fd;
1343
1344 rcu_read_unlock();
1345 return ret;
1346 }
1347
1348 /*
1349 * Get the syscall mask array from the kernel tracer.
1350 *
1351 * Return 0 on success else a negative value. In both case, syscall_mask should
1352 * be freed.
1353 */
1354 int kernel_syscall_mask(int chan_fd, char **syscall_mask, uint32_t *nr_bits)
1355 {
1356 assert(syscall_mask);
1357 assert(nr_bits);
1358
1359 return kernctl_syscall_mask(chan_fd, syscall_mask, nr_bits);
1360 }
1361
1362 /*
1363 * Check for the support of the RING_BUFFER_SNAPSHOT_SAMPLE_POSITIONS via abi
1364 * version number.
1365 *
1366 * Return 1 on success, 0 when feature is not supported, negative value in case
1367 * of errors.
1368 */
1369 int kernel_supports_ring_buffer_snapshot_sample_positions(int tracer_fd)
1370 {
1371 int ret = 0; // Not supported by default
1372 struct lttng_kernel_tracer_abi_version abi;
1373
1374 ret = kernctl_tracer_abi_version(tracer_fd, &abi);
1375 if (ret < 0) {
1376 ERR("Failed to retrieve lttng-modules ABI version");
1377 goto error;
1378 }
1379
1380 /*
1381 * RING_BUFFER_SNAPSHOT_SAMPLE_POSITIONS was introduced in 2.3
1382 */
1383 if (abi.major >= 2 && abi.minor >= 3) {
1384 /* Supported */
1385 ret = 1;
1386 } else {
1387 /* Not supported */
1388 ret = 0;
1389 }
1390 error:
1391 return ret;
1392 }
1393
1394 /*
1395 * Rotate a kernel session.
1396 *
1397 * Return 0 on success or else return a LTTNG_ERR code.
1398 */
1399 int kernel_rotate_session(struct ltt_session *session)
1400 {
1401 int ret;
1402 struct consumer_socket *socket;
1403 struct lttng_ht_iter iter;
1404 struct ltt_kernel_session *ksess = session->kernel_session;
1405
1406 assert(ksess);
1407 assert(ksess->consumer);
1408
1409 DBG("Rotate kernel session %s started (session %" PRIu64 ")",
1410 session->name, session->id);
1411
1412 rcu_read_lock();
1413
1414 /*
1415 * Note that this loop will end after one iteration given that there is
1416 * only one kernel consumer.
1417 */
1418 cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter,
1419 socket, node.node) {
1420 struct ltt_kernel_channel *chan;
1421
1422 /*
1423 * Account the metadata channel first to make sure the
1424 * number of channels waiting for a rotation cannot
1425 * reach 0 before we complete the iteration over all
1426 * the channels.
1427 */
1428 ret = rotate_add_channel_pending(ksess->metadata->key,
1429 LTTNG_DOMAIN_KERNEL, session);
1430 if (ret < 0) {
1431 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
1432 goto error;
1433 }
1434
1435 /* For each channel, ask the consumer to rotate it. */
1436 cds_list_for_each_entry(chan, &ksess->channel_list.head, list) {
1437 ret = rotate_add_channel_pending(chan->key,
1438 LTTNG_DOMAIN_KERNEL, session);
1439 if (ret < 0) {
1440 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
1441 goto error;
1442 }
1443
1444 DBG("Rotate channel %" PRIu64 ", session %s", chan->key, session->name);
1445 ret = consumer_rotate_channel(socket, chan->key,
1446 ksess->uid, ksess->gid, ksess->consumer,
1447 ksess->consumer->subdir,
1448 /* is_metadata_channel */ false,
1449 session->current_archive_id,
1450 &session->rotate_pending_relay);
1451 if (ret < 0) {
1452 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
1453 goto error;
1454 }
1455 }
1456
1457 /*
1458 * Rotate the metadata channel.
1459 */
1460 ret = consumer_rotate_channel(socket, ksess->metadata->key,
1461 ksess->uid, ksess->gid, ksess->consumer,
1462 ksess->consumer->subdir,
1463 /* is_metadata_channel */ true,
1464 session->current_archive_id,
1465 &session->rotate_pending_relay);
1466 if (ret < 0) {
1467 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
1468 goto error;
1469 }
1470 }
1471
1472 ret = LTTNG_OK;
1473
1474 error:
1475 rcu_read_unlock();
1476 return ret;
1477 }
This page took 0.091157 seconds and 5 git commands to generate.