1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
17 #include <linux/futex.h>
19 static size_t syscall_arg__scnprintf_hex(char *bf
, size_t size
,
21 u8 arg_idx __maybe_unused
,
22 u8
*arg_mask __maybe_unused
)
24 return scnprintf(bf
, size
, "%#lx", arg
);
27 #define SCA_HEX syscall_arg__scnprintf_hex
29 static size_t syscall_arg__scnprintf_whence(char *bf
, size_t size
,
31 u8 arg_idx __maybe_unused
,
32 u8
*arg_mask __maybe_unused
)
37 #define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n)
51 return scnprintf(bf
, size
, "%#x", whence
);
54 #define SCA_WHENCE syscall_arg__scnprintf_whence
56 static size_t syscall_arg__scnprintf_mmap_prot(char *bf
, size_t size
,
58 u8 arg_idx __maybe_unused
,
59 u8
*arg_mask __maybe_unused
)
61 int printed
= 0, prot
= arg
;
63 if (prot
== PROT_NONE
)
64 return scnprintf(bf
, size
, "NONE");
65 #define P_MMAP_PROT(n) \
66 if (prot & PROT_##n) { \
67 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
77 P_MMAP_PROT(GROWSDOWN
);
82 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", prot
);
87 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
89 static size_t syscall_arg__scnprintf_mmap_flags(char *bf
, size_t size
,
90 unsigned long arg
, u8 arg_idx __maybe_unused
,
91 u8
*arg_mask __maybe_unused
)
93 int printed
= 0, flags
= arg
;
95 #define P_MMAP_FLAG(n) \
96 if (flags & MAP_##n) { \
97 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
102 P_MMAP_FLAG(PRIVATE
);
104 P_MMAP_FLAG(ANONYMOUS
);
105 P_MMAP_FLAG(DENYWRITE
);
106 P_MMAP_FLAG(EXECUTABLE
);
109 P_MMAP_FLAG(GROWSDOWN
);
111 P_MMAP_FLAG(HUGETLB
);
114 P_MMAP_FLAG(NONBLOCK
);
115 P_MMAP_FLAG(NORESERVE
);
116 P_MMAP_FLAG(POPULATE
);
118 #ifdef MAP_UNINITIALIZED
119 P_MMAP_FLAG(UNINITIALIZED
);
124 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
129 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
131 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf
, size_t size
,
132 unsigned long arg
, u8 arg_idx __maybe_unused
,
133 u8
*arg_mask __maybe_unused
)
138 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
141 P_MADV_BHV(SEQUENTIAL
);
142 P_MADV_BHV(WILLNEED
);
143 P_MADV_BHV(DONTNEED
);
145 P_MADV_BHV(DONTFORK
);
147 P_MADV_BHV(HWPOISON
);
148 #ifdef MADV_SOFT_OFFLINE
149 P_MADV_BHV(SOFT_OFFLINE
);
151 P_MADV_BHV(MERGEABLE
);
152 P_MADV_BHV(UNMERGEABLE
);
154 P_MADV_BHV(HUGEPAGE
);
156 #ifdef MADV_NOHUGEPAGE
157 P_MADV_BHV(NOHUGEPAGE
);
160 P_MADV_BHV(DONTDUMP
);
169 return scnprintf(bf
, size
, "%#x", behavior
);
172 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
174 static size_t syscall_arg__scnprintf_futex_op(char *bf
, size_t size
, unsigned long arg
,
175 u8 arg_idx __maybe_unused
, u8
*arg_mask
)
177 enum syscall_futex_args
{
178 SCF_UADDR
= (1 << 0),
181 SCF_TIMEOUT
= (1 << 3),
182 SCF_UADDR2
= (1 << 4),
186 int cmd
= op
& FUTEX_CMD_MASK
;
190 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
191 P_FUTEX_OP(WAIT
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
; break;
192 P_FUTEX_OP(WAKE
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
193 P_FUTEX_OP(FD
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
194 P_FUTEX_OP(REQUEUE
); *arg_mask
|= SCF_VAL3
|SCF_TIMEOUT
; break;
195 P_FUTEX_OP(CMP_REQUEUE
); *arg_mask
|= SCF_TIMEOUT
; break;
196 P_FUTEX_OP(CMP_REQUEUE_PI
); *arg_mask
|= SCF_TIMEOUT
; break;
197 P_FUTEX_OP(WAKE_OP
); break;
198 P_FUTEX_OP(LOCK_PI
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
199 P_FUTEX_OP(UNLOCK_PI
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
200 P_FUTEX_OP(TRYLOCK_PI
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
; break;
201 P_FUTEX_OP(WAIT_BITSET
); *arg_mask
|= SCF_UADDR2
; break;
202 P_FUTEX_OP(WAKE_BITSET
); *arg_mask
|= SCF_UADDR2
; break;
203 P_FUTEX_OP(WAIT_REQUEUE_PI
); break;
204 default: printed
= scnprintf(bf
, size
, "%#x", cmd
); break;
207 if (op
& FUTEX_PRIVATE_FLAG
)
208 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|PRIV");
210 if (op
& FUTEX_CLOCK_REALTIME
)
211 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|CLKRT");
216 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
218 static size_t syscall_arg__scnprintf_open_flags(char *bf
, size_t size
,
220 u8 arg_idx
, u8
*arg_mask
)
222 int printed
= 0, flags
= arg
;
224 if (!(flags
& O_CREAT
))
225 *arg_mask
|= 1 << (arg_idx
+ 1); /* Mask the mode parm */
228 return scnprintf(bf
, size
, "RDONLY");
230 if (flags & O_##n) { \
231 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
255 if ((flags
& O_SYNC
) == O_SYNC
)
256 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%s", printed
? "|" : "", "SYNC");
268 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
273 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
275 static struct syscall_fmt
{
278 size_t (*arg_scnprintf
[6])(char *bf
, size_t size
, unsigned long arg
, u8 arg_idx
, u8
*arg_mask
);
283 { .name
= "access", .errmsg
= true, },
284 { .name
= "arch_prctl", .errmsg
= true, .alias
= "prctl", },
285 { .name
= "brk", .hexret
= true,
286 .arg_scnprintf
= { [0] = SCA_HEX
, /* brk */ }, },
287 { .name
= "mmap", .hexret
= true, },
288 { .name
= "connect", .errmsg
= true, },
289 { .name
= "fstat", .errmsg
= true, .alias
= "newfstat", },
290 { .name
= "fstatat", .errmsg
= true, .alias
= "newfstatat", },
291 { .name
= "futex", .errmsg
= true,
292 .arg_scnprintf
= { [1] = SCA_FUTEX_OP
, /* op */ }, },
293 { .name
= "ioctl", .errmsg
= true,
294 .arg_scnprintf
= { [2] = SCA_HEX
, /* arg */ }, },
295 { .name
= "lseek", .errmsg
= true,
296 .arg_scnprintf
= { [2] = SCA_WHENCE
, /* whence */ }, },
297 { .name
= "lstat", .errmsg
= true, .alias
= "newlstat", },
298 { .name
= "madvise", .errmsg
= true,
299 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
300 [2] = SCA_MADV_BHV
, /* behavior */ }, },
301 { .name
= "mmap", .hexret
= true,
302 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
303 [2] = SCA_MMAP_PROT
, /* prot */
304 [3] = SCA_MMAP_FLAGS
, /* flags */ }, },
305 { .name
= "mprotect", .errmsg
= true,
306 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
307 [2] = SCA_MMAP_PROT
, /* prot */ }, },
308 { .name
= "mremap", .hexret
= true,
309 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
310 [4] = SCA_HEX
, /* new_addr */ }, },
311 { .name
= "munmap", .errmsg
= true,
312 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
313 { .name
= "open", .errmsg
= true,
314 .arg_scnprintf
= { [1] = SCA_OPEN_FLAGS
, /* flags */ }, },
315 { .name
= "open_by_handle_at", .errmsg
= true,
316 .arg_scnprintf
= { [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
317 { .name
= "openat", .errmsg
= true,
318 .arg_scnprintf
= { [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
319 { .name
= "poll", .errmsg
= true, .timeout
= true, },
320 { .name
= "ppoll", .errmsg
= true, .timeout
= true, },
321 { .name
= "pread", .errmsg
= true, .alias
= "pread64", },
322 { .name
= "pwrite", .errmsg
= true, .alias
= "pwrite64", },
323 { .name
= "read", .errmsg
= true, },
324 { .name
= "recvfrom", .errmsg
= true, },
325 { .name
= "select", .errmsg
= true, .timeout
= true, },
326 { .name
= "socket", .errmsg
= true, },
327 { .name
= "stat", .errmsg
= true, .alias
= "newstat", },
328 { .name
= "uname", .errmsg
= true, .alias
= "newuname", },
331 static int syscall_fmt__cmp(const void *name
, const void *fmtp
)
333 const struct syscall_fmt
*fmt
= fmtp
;
334 return strcmp(name
, fmt
->name
);
337 static struct syscall_fmt
*syscall_fmt__find(const char *name
)
339 const int nmemb
= ARRAY_SIZE(syscall_fmts
);
340 return bsearch(name
, syscall_fmts
, nmemb
, sizeof(struct syscall_fmt
), syscall_fmt__cmp
);
344 struct event_format
*tp_format
;
347 struct syscall_fmt
*fmt
;
348 size_t (**arg_scnprintf
)(char *bf
, size_t size
,
349 unsigned long arg
, u8 arg_idx
, u8
*args_mask
);
352 static size_t fprintf_duration(unsigned long t
, FILE *fp
)
354 double duration
= (double)t
/ NSEC_PER_MSEC
;
355 size_t printed
= fprintf(fp
, "(");
358 printed
+= color_fprintf(fp
, PERF_COLOR_RED
, "%6.3f ms", duration
);
359 else if (duration
>= 0.01)
360 printed
+= color_fprintf(fp
, PERF_COLOR_YELLOW
, "%6.3f ms", duration
);
362 printed
+= color_fprintf(fp
, PERF_COLOR_NORMAL
, "%6.3f ms", duration
);
363 return printed
+ fprintf(fp
, "): ");
366 struct thread_trace
{
370 unsigned long nr_events
;
375 static struct thread_trace
*thread_trace__new(void)
377 return zalloc(sizeof(struct thread_trace
));
380 static struct thread_trace
*thread__trace(struct thread
*thread
, FILE *fp
)
382 struct thread_trace
*ttrace
;
387 if (thread
->priv
== NULL
)
388 thread
->priv
= thread_trace__new();
390 if (thread
->priv
== NULL
)
393 ttrace
= thread
->priv
;
398 color_fprintf(fp
, PERF_COLOR_RED
,
399 "WARNING: not enough memory, dropping samples!\n");
404 struct perf_tool tool
;
408 struct syscall
*table
;
410 struct perf_record_opts opts
;
414 unsigned long nr_events
;
415 struct strlist
*ev_qualifier
;
416 bool not_ev_qualifier
;
417 struct intlist
*tid_list
;
418 struct intlist
*pid_list
;
420 bool multiple_threads
;
421 double duration_filter
;
425 static bool trace__filter_duration(struct trace
*trace
, double t
)
427 return t
< (trace
->duration_filter
* NSEC_PER_MSEC
);
430 static size_t trace__fprintf_tstamp(struct trace
*trace
, u64 tstamp
, FILE *fp
)
432 double ts
= (double)(tstamp
- trace
->base_time
) / NSEC_PER_MSEC
;
434 return fprintf(fp
, "%10.3f ", ts
);
437 static bool done
= false;
439 static void sig_handler(int sig __maybe_unused
)
444 static size_t trace__fprintf_entry_head(struct trace
*trace
, struct thread
*thread
,
445 u64 duration
, u64 tstamp
, FILE *fp
)
447 size_t printed
= trace__fprintf_tstamp(trace
, tstamp
, fp
);
448 printed
+= fprintf_duration(duration
, fp
);
450 if (trace
->multiple_threads
)
451 printed
+= fprintf(fp
, "%d ", thread
->tid
);
456 static int trace__process_event(struct trace
*trace
, struct machine
*machine
,
457 union perf_event
*event
)
461 switch (event
->header
.type
) {
462 case PERF_RECORD_LOST
:
463 color_fprintf(trace
->output
, PERF_COLOR_RED
,
464 "LOST %" PRIu64
" events!\n", event
->lost
.lost
);
465 ret
= machine__process_lost_event(machine
, event
);
467 ret
= machine__process_event(machine
, event
);
474 static int trace__tool_process(struct perf_tool
*tool
,
475 union perf_event
*event
,
476 struct perf_sample
*sample __maybe_unused
,
477 struct machine
*machine
)
479 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
480 return trace__process_event(trace
, machine
, event
);
483 static int trace__symbols_init(struct trace
*trace
, struct perf_evlist
*evlist
)
485 int err
= symbol__init();
490 machine__init(&trace
->host
, "", HOST_KERNEL_ID
);
491 machine__create_kernel_maps(&trace
->host
);
493 if (perf_target__has_task(&trace
->opts
.target
)) {
494 err
= perf_event__synthesize_thread_map(&trace
->tool
, evlist
->threads
,
498 err
= perf_event__synthesize_threads(&trace
->tool
, trace__tool_process
,
508 static int syscall__set_arg_fmts(struct syscall
*sc
)
510 struct format_field
*field
;
513 sc
->arg_scnprintf
= calloc(sc
->tp_format
->format
.nr_fields
- 1, sizeof(void *));
514 if (sc
->arg_scnprintf
== NULL
)
517 for (field
= sc
->tp_format
->format
.fields
->next
; field
; field
= field
->next
) {
518 if (sc
->fmt
&& sc
->fmt
->arg_scnprintf
[idx
])
519 sc
->arg_scnprintf
[idx
] = sc
->fmt
->arg_scnprintf
[idx
];
520 else if (field
->flags
& FIELD_IS_POINTER
)
521 sc
->arg_scnprintf
[idx
] = syscall_arg__scnprintf_hex
;
528 static int trace__read_syscall_info(struct trace
*trace
, int id
)
532 const char *name
= audit_syscall_to_name(id
, trace
->audit_machine
);
537 if (id
> trace
->syscalls
.max
) {
538 struct syscall
*nsyscalls
= realloc(trace
->syscalls
.table
, (id
+ 1) * sizeof(*sc
));
540 if (nsyscalls
== NULL
)
543 if (trace
->syscalls
.max
!= -1) {
544 memset(nsyscalls
+ trace
->syscalls
.max
+ 1, 0,
545 (id
- trace
->syscalls
.max
) * sizeof(*sc
));
547 memset(nsyscalls
, 0, (id
+ 1) * sizeof(*sc
));
550 trace
->syscalls
.table
= nsyscalls
;
551 trace
->syscalls
.max
= id
;
554 sc
= trace
->syscalls
.table
+ id
;
557 if (trace
->ev_qualifier
) {
558 bool in
= strlist__find(trace
->ev_qualifier
, name
) != NULL
;
560 if (!(in
^ trace
->not_ev_qualifier
)) {
563 * No need to do read tracepoint information since this will be
570 sc
->fmt
= syscall_fmt__find(sc
->name
);
572 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->name
);
573 sc
->tp_format
= event_format__new("syscalls", tp_name
);
575 if (sc
->tp_format
== NULL
&& sc
->fmt
&& sc
->fmt
->alias
) {
576 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->fmt
->alias
);
577 sc
->tp_format
= event_format__new("syscalls", tp_name
);
580 if (sc
->tp_format
== NULL
)
583 return syscall__set_arg_fmts(sc
);
586 static size_t syscall__scnprintf_args(struct syscall
*sc
, char *bf
, size_t size
,
592 if (sc
->tp_format
!= NULL
) {
593 struct format_field
*field
;
594 u8 mask
= 0, bit
= 1;
596 for (field
= sc
->tp_format
->format
.fields
->next
; field
;
597 field
= field
->next
, ++i
, bit
<<= 1) {
601 printed
+= scnprintf(bf
+ printed
, size
- printed
,
602 "%s%s: ", printed
? ", " : "", field
->name
);
604 if (sc
->arg_scnprintf
&& sc
->arg_scnprintf
[i
]) {
605 printed
+= sc
->arg_scnprintf
[i
](bf
+ printed
, size
- printed
,
608 printed
+= scnprintf(bf
+ printed
, size
- printed
,
614 printed
+= scnprintf(bf
+ printed
, size
- printed
,
616 printed
? ", " : "", i
, args
[i
]);
624 typedef int (*tracepoint_handler
)(struct trace
*trace
, struct perf_evsel
*evsel
,
625 struct perf_sample
*sample
);
627 static struct syscall
*trace__syscall_info(struct trace
*trace
,
628 struct perf_evsel
*evsel
,
629 struct perf_sample
*sample
)
631 int id
= perf_evsel__intval(evsel
, sample
, "id");
636 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
637 * before that, leaving at a higher verbosity level till that is
638 * explained. Reproduced with plain ftrace with:
640 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
641 * grep "NR -1 " /t/trace_pipe
643 * After generating some load on the machine.
647 fprintf(trace
->output
, "Invalid syscall %d id, skipping (%s, %" PRIu64
") ...\n",
648 id
, perf_evsel__name(evsel
), ++n
);
653 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
) &&
654 trace__read_syscall_info(trace
, id
))
657 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
))
660 return &trace
->syscalls
.table
[id
];
664 fprintf(trace
->output
, "Problems reading syscall %d", id
);
665 if (id
<= trace
->syscalls
.max
&& trace
->syscalls
.table
[id
].name
!= NULL
)
666 fprintf(trace
->output
, "(%s)", trace
->syscalls
.table
[id
].name
);
667 fputs(" information\n", trace
->output
);
672 static int trace__sys_enter(struct trace
*trace
, struct perf_evsel
*evsel
,
673 struct perf_sample
*sample
)
678 struct thread
*thread
;
679 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, sample
);
680 struct thread_trace
*ttrace
;
688 thread
= machine__findnew_thread(&trace
->host
, sample
->pid
,
690 ttrace
= thread__trace(thread
, trace
->output
);
694 args
= perf_evsel__rawptr(evsel
, sample
, "args");
696 fprintf(trace
->output
, "Problems reading syscall arguments\n");
700 ttrace
= thread
->priv
;
702 if (ttrace
->entry_str
== NULL
) {
703 ttrace
->entry_str
= malloc(1024);
704 if (!ttrace
->entry_str
)
708 ttrace
->entry_time
= sample
->time
;
709 msg
= ttrace
->entry_str
;
710 printed
+= scnprintf(msg
+ printed
, 1024 - printed
, "%s(", sc
->name
);
712 printed
+= syscall__scnprintf_args(sc
, msg
+ printed
, 1024 - printed
, args
);
714 if (!strcmp(sc
->name
, "exit_group") || !strcmp(sc
->name
, "exit")) {
715 if (!trace
->duration_filter
) {
716 trace__fprintf_entry_head(trace
, thread
, 1, sample
->time
, trace
->output
);
717 fprintf(trace
->output
, "%-70s\n", ttrace
->entry_str
);
720 ttrace
->entry_pending
= true;
725 static int trace__sys_exit(struct trace
*trace
, struct perf_evsel
*evsel
,
726 struct perf_sample
*sample
)
730 struct thread
*thread
;
731 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, sample
);
732 struct thread_trace
*ttrace
;
740 thread
= machine__findnew_thread(&trace
->host
, sample
->pid
,
742 ttrace
= thread__trace(thread
, trace
->output
);
746 ret
= perf_evsel__intval(evsel
, sample
, "ret");
748 ttrace
= thread
->priv
;
750 ttrace
->exit_time
= sample
->time
;
752 if (ttrace
->entry_time
) {
753 duration
= sample
->time
- ttrace
->entry_time
;
754 if (trace__filter_duration(trace
, duration
))
756 } else if (trace
->duration_filter
)
759 trace__fprintf_entry_head(trace
, thread
, duration
, sample
->time
, trace
->output
);
761 if (ttrace
->entry_pending
) {
762 fprintf(trace
->output
, "%-70s", ttrace
->entry_str
);
764 fprintf(trace
->output
, " ... [");
765 color_fprintf(trace
->output
, PERF_COLOR_YELLOW
, "continued");
766 fprintf(trace
->output
, "]: %s()", sc
->name
);
769 if (sc
->fmt
== NULL
) {
771 fprintf(trace
->output
, ") = %d", ret
);
772 } else if (ret
< 0 && sc
->fmt
->errmsg
) {
774 const char *emsg
= strerror_r(-ret
, bf
, sizeof(bf
)),
775 *e
= audit_errno_to_name(-ret
);
777 fprintf(trace
->output
, ") = -1 %s %s", e
, emsg
);
778 } else if (ret
== 0 && sc
->fmt
->timeout
)
779 fprintf(trace
->output
, ") = 0 Timeout");
780 else if (sc
->fmt
->hexret
)
781 fprintf(trace
->output
, ") = %#x", ret
);
785 fputc('\n', trace
->output
);
787 ttrace
->entry_pending
= false;
792 static int trace__sched_stat_runtime(struct trace
*trace
, struct perf_evsel
*evsel
,
793 struct perf_sample
*sample
)
795 u64 runtime
= perf_evsel__intval(evsel
, sample
, "runtime");
796 double runtime_ms
= (double)runtime
/ NSEC_PER_MSEC
;
797 struct thread
*thread
= machine__findnew_thread(&trace
->host
,
800 struct thread_trace
*ttrace
= thread__trace(thread
, trace
->output
);
805 ttrace
->runtime_ms
+= runtime_ms
;
806 trace
->runtime_ms
+= runtime_ms
;
810 fprintf(trace
->output
, "%s: comm=%s,pid=%u,runtime=%" PRIu64
",vruntime=%" PRIu64
")\n",
812 perf_evsel__strval(evsel
, sample
, "comm"),
813 (pid_t
)perf_evsel__intval(evsel
, sample
, "pid"),
815 perf_evsel__intval(evsel
, sample
, "vruntime"));
819 static bool skip_sample(struct trace
*trace
, struct perf_sample
*sample
)
821 if ((trace
->pid_list
&& intlist__find(trace
->pid_list
, sample
->pid
)) ||
822 (trace
->tid_list
&& intlist__find(trace
->tid_list
, sample
->tid
)))
825 if (trace
->pid_list
|| trace
->tid_list
)
831 static int trace__process_sample(struct perf_tool
*tool
,
832 union perf_event
*event __maybe_unused
,
833 struct perf_sample
*sample
,
834 struct perf_evsel
*evsel
,
835 struct machine
*machine __maybe_unused
)
837 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
840 tracepoint_handler handler
= evsel
->handler
.func
;
842 if (skip_sample(trace
, sample
))
845 if (trace
->base_time
== 0)
846 trace
->base_time
= sample
->time
;
849 handler(trace
, evsel
, sample
);
855 perf_session__has_tp(struct perf_session
*session
, const char *name
)
857 struct perf_evsel
*evsel
;
859 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
, name
);
861 return evsel
!= NULL
;
864 static int parse_target_str(struct trace
*trace
)
866 if (trace
->opts
.target
.pid
) {
867 trace
->pid_list
= intlist__new(trace
->opts
.target
.pid
);
868 if (trace
->pid_list
== NULL
) {
869 pr_err("Error parsing process id string\n");
874 if (trace
->opts
.target
.tid
) {
875 trace
->tid_list
= intlist__new(trace
->opts
.target
.tid
);
876 if (trace
->tid_list
== NULL
) {
877 pr_err("Error parsing thread id string\n");
885 static int trace__run(struct trace
*trace
, int argc
, const char **argv
)
887 struct perf_evlist
*evlist
= perf_evlist__new();
888 struct perf_evsel
*evsel
;
890 unsigned long before
;
891 const bool forks
= argc
> 0;
893 if (evlist
== NULL
) {
894 fprintf(trace
->output
, "Not enough memory to run!\n");
898 if (perf_evlist__add_newtp(evlist
, "raw_syscalls", "sys_enter", trace__sys_enter
) ||
899 perf_evlist__add_newtp(evlist
, "raw_syscalls", "sys_exit", trace__sys_exit
)) {
900 fprintf(trace
->output
, "Couldn't read the raw_syscalls tracepoints information!\n");
901 goto out_delete_evlist
;
905 perf_evlist__add_newtp(evlist
, "sched", "sched_stat_runtime",
906 trace__sched_stat_runtime
)) {
907 fprintf(trace
->output
, "Couldn't read the sched_stat_runtime tracepoint information!\n");
908 goto out_delete_evlist
;
911 err
= perf_evlist__create_maps(evlist
, &trace
->opts
.target
);
913 fprintf(trace
->output
, "Problems parsing the target to trace, check your options!\n");
914 goto out_delete_evlist
;
917 err
= trace__symbols_init(trace
, evlist
);
919 fprintf(trace
->output
, "Problems initializing symbol libraries!\n");
920 goto out_delete_maps
;
923 perf_evlist__config(evlist
, &trace
->opts
);
925 signal(SIGCHLD
, sig_handler
);
926 signal(SIGINT
, sig_handler
);
929 err
= perf_evlist__prepare_workload(evlist
, &trace
->opts
.target
,
932 fprintf(trace
->output
, "Couldn't run the workload!\n");
933 goto out_delete_maps
;
937 err
= perf_evlist__open(evlist
);
939 fprintf(trace
->output
, "Couldn't create the events: %s\n", strerror(errno
));
940 goto out_delete_maps
;
943 err
= perf_evlist__mmap(evlist
, UINT_MAX
, false);
945 fprintf(trace
->output
, "Couldn't mmap the events: %s\n", strerror(errno
));
946 goto out_close_evlist
;
949 perf_evlist__enable(evlist
);
952 perf_evlist__start_workload(evlist
);
954 trace
->multiple_threads
= evlist
->threads
->map
[0] == -1 || evlist
->threads
->nr
> 1;
956 before
= trace
->nr_events
;
958 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
959 union perf_event
*event
;
961 while ((event
= perf_evlist__mmap_read(evlist
, i
)) != NULL
) {
962 const u32 type
= event
->header
.type
;
963 tracepoint_handler handler
;
964 struct perf_sample sample
;
968 err
= perf_evlist__parse_sample(evlist
, event
, &sample
);
970 fprintf(trace
->output
, "Can't parse sample, err = %d, skipping...\n", err
);
974 if (trace
->base_time
== 0)
975 trace
->base_time
= sample
.time
;
977 if (type
!= PERF_RECORD_SAMPLE
) {
978 trace__process_event(trace
, &trace
->host
, event
);
982 evsel
= perf_evlist__id2evsel(evlist
, sample
.id
);
984 fprintf(trace
->output
, "Unknown tp ID %" PRIu64
", skipping...\n", sample
.id
);
988 if (sample
.raw_data
== NULL
) {
989 fprintf(trace
->output
, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
990 perf_evsel__name(evsel
), sample
.tid
,
991 sample
.cpu
, sample
.raw_size
);
995 handler
= evsel
->handler
.func
;
996 handler(trace
, evsel
, &sample
);
1000 if (trace
->nr_events
== before
) {
1002 goto out_unmap_evlist
;
1004 poll(evlist
->pollfd
, evlist
->nr_fds
, -1);
1008 perf_evlist__disable(evlist
);
1013 perf_evlist__munmap(evlist
);
1015 perf_evlist__close(evlist
);
1017 perf_evlist__delete_maps(evlist
);
1019 perf_evlist__delete(evlist
);
1024 static int trace__replay(struct trace
*trace
)
1026 const struct perf_evsel_str_handler handlers
[] = {
1027 { "raw_syscalls:sys_enter", trace__sys_enter
, },
1028 { "raw_syscalls:sys_exit", trace__sys_exit
, },
1031 struct perf_session
*session
;
1034 trace
->tool
.sample
= trace__process_sample
;
1035 trace
->tool
.mmap
= perf_event__process_mmap
;
1036 trace
->tool
.comm
= perf_event__process_comm
;
1037 trace
->tool
.exit
= perf_event__process_exit
;
1038 trace
->tool
.fork
= perf_event__process_fork
;
1039 trace
->tool
.attr
= perf_event__process_attr
;
1040 trace
->tool
.tracing_data
= perf_event__process_tracing_data
;
1041 trace
->tool
.build_id
= perf_event__process_build_id
;
1043 trace
->tool
.ordered_samples
= true;
1044 trace
->tool
.ordering_requires_timestamps
= true;
1046 /* add tid to output */
1047 trace
->multiple_threads
= true;
1049 if (symbol__init() < 0)
1052 session
= perf_session__new(input_name
, O_RDONLY
, 0, false,
1054 if (session
== NULL
)
1057 err
= perf_session__set_tracepoints_handlers(session
, handlers
);
1061 if (!perf_session__has_tp(session
, "raw_syscalls:sys_enter")) {
1062 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1066 if (!perf_session__has_tp(session
, "raw_syscalls:sys_exit")) {
1067 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1071 err
= parse_target_str(trace
);
1077 err
= perf_session__process_events(session
, &trace
->tool
);
1079 pr_err("Failed to process events, error %d", err
);
1082 perf_session__delete(session
);
1087 static size_t trace__fprintf_threads_header(FILE *fp
)
1091 printed
= fprintf(fp
, "\n _____________________________________________________________________\n");
1092 printed
+= fprintf(fp
," __) Summary of events (__\n\n");
1093 printed
+= fprintf(fp
," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1094 printed
+= fprintf(fp
," _____________________________________________________________________\n\n");
1099 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
)
1101 size_t printed
= trace__fprintf_threads_header(fp
);
1104 for (nd
= rb_first(&trace
->host
.threads
); nd
; nd
= rb_next(nd
)) {
1105 struct thread
*thread
= rb_entry(nd
, struct thread
, rb_node
);
1106 struct thread_trace
*ttrace
= thread
->priv
;
1113 ratio
= (double)ttrace
->nr_events
/ trace
->nr_events
* 100.0;
1115 color
= PERF_COLOR_NORMAL
;
1117 color
= PERF_COLOR_RED
;
1118 else if (ratio
> 25.0)
1119 color
= PERF_COLOR_GREEN
;
1120 else if (ratio
> 5.0)
1121 color
= PERF_COLOR_YELLOW
;
1123 printed
+= color_fprintf(fp
, color
, "%20s", thread
->comm
);
1124 printed
+= fprintf(fp
, " - %-5d :%11lu [", thread
->tid
, ttrace
->nr_events
);
1125 printed
+= color_fprintf(fp
, color
, "%5.1f%%", ratio
);
1126 printed
+= fprintf(fp
, " ] %10.3f ms\n", ttrace
->runtime_ms
);
1132 static int trace__set_duration(const struct option
*opt
, const char *str
,
1133 int unset __maybe_unused
)
1135 struct trace
*trace
= opt
->value
;
1137 trace
->duration_filter
= atof(str
);
1141 static int trace__open_output(struct trace
*trace
, const char *filename
)
1145 if (!stat(filename
, &st
) && st
.st_size
) {
1146 char oldname
[PATH_MAX
];
1148 scnprintf(oldname
, sizeof(oldname
), "%s.old", filename
);
1150 rename(filename
, oldname
);
1153 trace
->output
= fopen(filename
, "w");
1155 return trace
->output
== NULL
? -errno
: 0;
1158 int cmd_trace(int argc
, const char **argv
, const char *prefix __maybe_unused
)
1160 const char * const trace_usage
[] = {
1161 "perf trace [<options>] [<command>]",
1162 "perf trace [<options>] -- <command> [<options>]",
1165 struct trace trace
= {
1166 .audit_machine
= audit_detect_machine(),
1175 .user_freq
= UINT_MAX
,
1176 .user_interval
= ULLONG_MAX
,
1182 const char *output_name
= NULL
;
1183 const char *ev_qualifier_str
= NULL
;
1184 const struct option trace_options
[] = {
1185 OPT_STRING('e', "expr", &ev_qualifier_str
, "expr",
1186 "list of events to trace"),
1187 OPT_STRING('o', "output", &output_name
, "file", "output file name"),
1188 OPT_STRING('i', "input", &input_name
, "file", "Analyze events in file"),
1189 OPT_STRING('p', "pid", &trace
.opts
.target
.pid
, "pid",
1190 "trace events on existing process id"),
1191 OPT_STRING('t', "tid", &trace
.opts
.target
.tid
, "tid",
1192 "trace events on existing thread id"),
1193 OPT_BOOLEAN('a', "all-cpus", &trace
.opts
.target
.system_wide
,
1194 "system-wide collection from all CPUs"),
1195 OPT_STRING('C', "cpu", &trace
.opts
.target
.cpu_list
, "cpu",
1196 "list of cpus to monitor"),
1197 OPT_BOOLEAN(0, "no-inherit", &trace
.opts
.no_inherit
,
1198 "child tasks do not inherit counters"),
1199 OPT_UINTEGER('m', "mmap-pages", &trace
.opts
.mmap_pages
,
1200 "number of mmap data pages"),
1201 OPT_STRING('u', "uid", &trace
.opts
.target
.uid_str
, "user",
1203 OPT_CALLBACK(0, "duration", &trace
, "float",
1204 "show only events with duration > N.M ms",
1205 trace__set_duration
),
1206 OPT_BOOLEAN(0, "sched", &trace
.sched
, "show blocking scheduler events"),
1207 OPT_INCR('v', "verbose", &verbose
, "be more verbose"),
1213 argc
= parse_options(argc
, argv
, trace_options
, trace_usage
, 0);
1215 if (output_name
!= NULL
) {
1216 err
= trace__open_output(&trace
, output_name
);
1218 perror("failed to create output file");
1223 if (ev_qualifier_str
!= NULL
) {
1224 const char *s
= ev_qualifier_str
;
1226 trace
.not_ev_qualifier
= *s
== '!';
1227 if (trace
.not_ev_qualifier
)
1229 trace
.ev_qualifier
= strlist__new(true, s
);
1230 if (trace
.ev_qualifier
== NULL
) {
1231 fputs("Not enough memory to parse event qualifier",
1238 err
= perf_target__validate(&trace
.opts
.target
);
1240 perf_target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
1241 fprintf(trace
.output
, "%s", bf
);
1245 err
= perf_target__parse_uid(&trace
.opts
.target
);
1247 perf_target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
1248 fprintf(trace
.output
, "%s", bf
);
1252 if (!argc
&& perf_target__none(&trace
.opts
.target
))
1253 trace
.opts
.target
.system_wide
= true;
1256 err
= trace__replay(&trace
);
1258 err
= trace__run(&trace
, argc
, argv
);
1260 if (trace
.sched
&& !err
)
1261 trace__fprintf_thread_summary(&trace
, trace
.output
);
1264 if (output_name
!= NULL
)
1265 fclose(trace
.output
);