Merge remote-tracking branch 'xen-tip/linux-next'
[deliverable/linux.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
514f1c67 24#include "util/evlist.h"
4b6ab94e 25#include <subcmd/exec-cmd.h>
752fde44 26#include "util/machine.h"
6810fc91 27#include "util/session.h"
752fde44 28#include "util/thread.h"
4b6ab94e 29#include <subcmd/parse-options.h>
2ae3a312 30#include "util/strlist.h"
bdc89661 31#include "util/intlist.h"
514f1c67 32#include "util/thread_map.h"
bf2575c1 33#include "util/stat.h"
97978b3e 34#include "trace-event.h"
9aca7f17 35#include "util/parse-events.h"
ba504235 36#include "util/bpf-loader.h"
566a0885 37#include "callchain.h"
fd0db102 38#include "syscalltbl.h"
96c14451 39#include "rb_resort.h"
514f1c67 40
fd0db102 41#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
514f1c67 42#include <stdlib.h>
8dd2a131 43#include <linux/err.h>
997bba8c
ACM
44#include <linux/filter.h>
45#include <linux/audit.h>
39878d49 46#include <linux/random.h>
c6d4a494 47#include <linux/stringify.h>
bd48c63e 48#include <linux/time64.h>
514f1c67 49
c188e7ac
ACM
50#ifndef O_CLOEXEC
51# define O_CLOEXEC 02000000
52#endif
53
d1d438a3
ACM
54struct trace {
55 struct perf_tool tool;
fd0db102 56 struct syscalltbl *sctbl;
d1d438a3
ACM
57 struct {
58 int max;
59 struct syscall *table;
60 struct {
61 struct perf_evsel *sys_enter,
62 *sys_exit;
63 } events;
64 } syscalls;
65 struct record_opts opts;
66 struct perf_evlist *evlist;
67 struct machine *host;
68 struct thread *current;
69 u64 base_time;
70 FILE *output;
71 unsigned long nr_events;
72 struct strlist *ev_qualifier;
73 struct {
74 size_t nr;
75 int *entries;
76 } ev_qualifier_ids;
77 struct intlist *tid_list;
78 struct intlist *pid_list;
79 struct {
80 size_t nr;
81 pid_t *entries;
82 } filter_pids;
83 double duration_filter;
84 double runtime_ms;
85 struct {
86 u64 vfs_getname,
87 proc_getname;
88 } stats;
c6d4a494 89 unsigned int max_stack;
5cf9c84e 90 unsigned int min_stack;
d1d438a3
ACM
91 bool not_ev_qualifier;
92 bool live;
93 bool full_time;
94 bool sched;
95 bool multiple_threads;
96 bool summary;
97 bool summary_only;
98 bool show_comm;
99 bool show_tool_stats;
100 bool trace_syscalls;
44621819 101 bool kernel_syscallchains;
d1d438a3
ACM
102 bool force;
103 bool vfs_getname;
104 int trace_pgfaults;
fd0db102 105 int open_id;
d1d438a3 106};
a1c2552d 107
77170988
ACM
108struct tp_field {
109 int offset;
110 union {
111 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113 };
114};
115
116#define TP_UINT_FIELD(bits) \
117static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118{ \
55d43bca
DA
119 u##bits value; \
120 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 return value; \
77170988
ACM
122}
123
124TP_UINT_FIELD(8);
125TP_UINT_FIELD(16);
126TP_UINT_FIELD(32);
127TP_UINT_FIELD(64);
128
129#define TP_UINT_FIELD__SWAPPED(bits) \
130static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131{ \
55d43bca
DA
132 u##bits value; \
133 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
134 return bswap_##bits(value);\
135}
136
137TP_UINT_FIELD__SWAPPED(16);
138TP_UINT_FIELD__SWAPPED(32);
139TP_UINT_FIELD__SWAPPED(64);
140
141static int tp_field__init_uint(struct tp_field *field,
142 struct format_field *format_field,
143 bool needs_swap)
144{
145 field->offset = format_field->offset;
146
147 switch (format_field->size) {
148 case 1:
149 field->integer = tp_field__u8;
150 break;
151 case 2:
152 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 break;
154 case 4:
155 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 break;
157 case 8:
158 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 break;
160 default:
161 return -1;
162 }
163
164 return 0;
165}
166
167static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168{
169 return sample->raw_data + field->offset;
170}
171
172static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173{
174 field->offset = format_field->offset;
175 field->pointer = tp_field__ptr;
176 return 0;
177}
178
179struct syscall_tp {
180 struct tp_field id;
181 union {
182 struct tp_field args, ret;
183 };
184};
185
186static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 struct tp_field *field,
188 const char *name)
189{
190 struct format_field *format_field = perf_evsel__field(evsel, name);
191
192 if (format_field == NULL)
193 return -1;
194
195 return tp_field__init_uint(field, format_field, evsel->needs_swap);
196}
197
198#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 ({ struct syscall_tp *sc = evsel->priv;\
200 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201
202static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 struct tp_field *field,
204 const char *name)
205{
206 struct format_field *format_field = perf_evsel__field(evsel, name);
207
208 if (format_field == NULL)
209 return -1;
210
211 return tp_field__init_ptr(field, format_field);
212}
213
214#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 ({ struct syscall_tp *sc = evsel->priv;\
216 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217
218static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219{
04662523 220 zfree(&evsel->priv);
77170988
ACM
221 perf_evsel__delete(evsel);
222}
223
96695d44
NK
224static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225{
226 evsel->priv = malloc(sizeof(struct syscall_tp));
227 if (evsel->priv != NULL) {
228 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 goto out_delete;
230
231 evsel->handler = handler;
232 return 0;
233 }
234
235 return -ENOMEM;
236
237out_delete:
04662523 238 zfree(&evsel->priv);
96695d44
NK
239 return -ENOENT;
240}
241
ef503831 242static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 243{
ef503831 244 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 245
9aca7f17 246 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 247 if (IS_ERR(evsel))
9aca7f17
DA
248 evsel = perf_evsel__newtp("syscalls", direction);
249
8dd2a131
JO
250 if (IS_ERR(evsel))
251 return NULL;
252
253 if (perf_evsel__init_syscall_tp(evsel, handler))
254 goto out_delete;
77170988
ACM
255
256 return evsel;
257
258out_delete:
259 perf_evsel__delete_priv(evsel);
260 return NULL;
261}
262
263#define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 ({ struct syscall_tp *fields = evsel->priv; \
265 fields->name.integer(&fields->name, sample); })
266
267#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 ({ struct syscall_tp *fields = evsel->priv; \
269 fields->name.pointer(&fields->name, sample); })
270
01533e97
ACM
271struct syscall_arg {
272 unsigned long val;
75b757ca
ACM
273 struct thread *thread;
274 struct trace *trace;
1f115cb7 275 void *parm;
01533e97
ACM
276 u8 idx;
277 u8 mask;
278};
279
1f115cb7 280struct strarray {
03e3adc9 281 int offset;
1f115cb7
ACM
282 int nr_entries;
283 const char **entries;
284};
285
286#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 .nr_entries = ARRAY_SIZE(array), \
288 .entries = array, \
289}
290
03e3adc9
ACM
291#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 .offset = off, \
293 .nr_entries = ARRAY_SIZE(array), \
294 .entries = array, \
295}
296
975b7c2f
ACM
297static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 const char *intfmt,
299 struct syscall_arg *arg)
1f115cb7 300{
1f115cb7 301 struct strarray *sa = arg->parm;
03e3adc9 302 int idx = arg->val - sa->offset;
1f115cb7
ACM
303
304 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 305 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
306
307 return scnprintf(bf, size, "%s", sa->entries[idx]);
308}
309
975b7c2f
ACM
310static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 struct syscall_arg *arg)
312{
313 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314}
315
1f115cb7
ACM
316#define SCA_STRARRAY syscall_arg__scnprintf_strarray
317
844ae5b4
ACM
318#if defined(__i386__) || defined(__x86_64__)
319/*
320 * FIXME: Make this available to all arches as soon as the ioctl beautifier
321 * gets rewritten to support all arches.
322 */
78645cf3
ACM
323static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 struct syscall_arg *arg)
325{
326 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327}
328
329#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 330#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 331
75b757ca
ACM
332static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 struct syscall_arg *arg);
334
335#define SCA_FD syscall_arg__scnprintf_fd
336
48e1f91a
ACM
337#ifndef AT_FDCWD
338#define AT_FDCWD -100
339#endif
340
75b757ca
ACM
341static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
342 struct syscall_arg *arg)
343{
344 int fd = arg->val;
345
346 if (fd == AT_FDCWD)
347 return scnprintf(bf, size, "CWD");
348
349 return syscall_arg__scnprintf_fd(bf, size, arg);
350}
351
352#define SCA_FDAT syscall_arg__scnprintf_fd_at
353
354static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
355 struct syscall_arg *arg);
356
357#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
358
6e7eeb51 359static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 360 struct syscall_arg *arg)
13d4ff3e 361{
01533e97 362 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
363}
364
beccb2b5
ACM
365#define SCA_HEX syscall_arg__scnprintf_hex
366
a1c2552d
ACM
367static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
368 struct syscall_arg *arg)
369{
370 return scnprintf(bf, size, "%d", arg->val);
371}
372
373#define SCA_INT syscall_arg__scnprintf_int
374
729a7841
ACM
375static const char *bpf_cmd[] = {
376 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
377 "MAP_GET_NEXT_KEY", "PROG_LOAD",
378};
379static DEFINE_STRARRAY(bpf_cmd);
380
03e3adc9
ACM
381static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
382static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 383
1f115cb7
ACM
384static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
385static DEFINE_STRARRAY(itimers);
386
b62bee1b
ACM
387static const char *keyctl_options[] = {
388 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
389 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
390 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
391 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
392 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
393};
394static DEFINE_STRARRAY(keyctl_options);
395
efe6b882
ACM
396static const char *whences[] = { "SET", "CUR", "END",
397#ifdef SEEK_DATA
398"DATA",
399#endif
400#ifdef SEEK_HOLE
401"HOLE",
402#endif
403};
404static DEFINE_STRARRAY(whences);
f9da0b0c 405
80f587d5
ACM
406static const char *fcntl_cmds[] = {
407 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
408 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
409 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
410 "F_GETOWNER_UIDS",
411};
412static DEFINE_STRARRAY(fcntl_cmds);
413
c045bf02
ACM
414static const char *rlimit_resources[] = {
415 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
416 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
417 "RTTIME",
418};
419static DEFINE_STRARRAY(rlimit_resources);
420
eb5b1b14
ACM
421static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
422static DEFINE_STRARRAY(sighow);
423
4f8c1b74
DA
424static const char *clockid[] = {
425 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
426 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
427 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
428};
429static DEFINE_STRARRAY(clockid);
430
e10bce81
ACM
431static const char *socket_families[] = {
432 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
433 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
434 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
435 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
436 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
437 "ALG", "NFC", "VSOCK",
438};
439static DEFINE_STRARRAY(socket_families);
440
51108999
ACM
441static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
442 struct syscall_arg *arg)
443{
444 size_t printed = 0;
445 int mode = arg->val;
446
447 if (mode == F_OK) /* 0 */
448 return scnprintf(bf, size, "F");
449#define P_MODE(n) \
450 if (mode & n##_OK) { \
451 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
452 mode &= ~n##_OK; \
453 }
454
455 P_MODE(R);
456 P_MODE(W);
457 P_MODE(X);
458#undef P_MODE
459
460 if (mode)
461 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
462
463 return printed;
464}
465
466#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
467
f994592d
ACM
468static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
469 struct syscall_arg *arg);
470
471#define SCA_FILENAME syscall_arg__scnprintf_filename
472
46cce19b
ACM
473static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
474 struct syscall_arg *arg)
475{
476 int printed = 0, flags = arg->val;
477
478#define P_FLAG(n) \
479 if (flags & O_##n) { \
480 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
481 flags &= ~O_##n; \
482 }
483
484 P_FLAG(CLOEXEC);
485 P_FLAG(NONBLOCK);
486#undef P_FLAG
487
488 if (flags)
489 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
490
491 return printed;
492}
493
494#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
495
844ae5b4
ACM
496#if defined(__i386__) || defined(__x86_64__)
497/*
498 * FIXME: Make this available to all arches.
499 */
78645cf3
ACM
500#define TCGETS 0x5401
501
502static const char *tioctls[] = {
503 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
504 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
505 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
506 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
507 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
508 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
509 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
510 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
511 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
512 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
513 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
514 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
515 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
516 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
517 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
518};
519
520static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 521#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 522
a355a61e
ACM
523#ifndef GRND_NONBLOCK
524#define GRND_NONBLOCK 0x0001
525#endif
526#ifndef GRND_RANDOM
527#define GRND_RANDOM 0x0002
528#endif
529
39878d49
ACM
530static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
531 struct syscall_arg *arg)
532{
533 int printed = 0, flags = arg->val;
534
535#define P_FLAG(n) \
536 if (flags & GRND_##n) { \
537 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
538 flags &= ~GRND_##n; \
539 }
540
541 P_FLAG(RANDOM);
542 P_FLAG(NONBLOCK);
543#undef P_FLAG
544
545 if (flags)
546 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
547
548 return printed;
549}
550
551#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
552
453350dd
ACM
553#define STRARRAY(arg, name, array) \
554 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
555 .arg_parm = { [arg] = &strarray__##array, }
556
ea8dc3ce 557#include "trace/beauty/eventfd.c"
8bf382ce 558#include "trace/beauty/flock.c"
d5d71e86 559#include "trace/beauty/futex_op.c"
df4cb167 560#include "trace/beauty/mmap.c"
ba2f22cf 561#include "trace/beauty/mode_t.c"
a30e6259 562#include "trace/beauty/msg_flags.c"
8f48df69 563#include "trace/beauty/open_flags.c"
62de344e 564#include "trace/beauty/perf_event_open.c"
d5d71e86 565#include "trace/beauty/pid.c"
a3bca91f 566#include "trace/beauty/sched_policy.c"
f5cd95ea 567#include "trace/beauty/seccomp.c"
12199d8e 568#include "trace/beauty/signum.c"
bbf86c43 569#include "trace/beauty/socket_type.c"
7206b900 570#include "trace/beauty/waitid_options.c"
a3bca91f 571
514f1c67
ACM
572static struct syscall_fmt {
573 const char *name;
aec1930b 574 const char *alias;
01533e97 575 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 576 void *arg_parm[6];
514f1c67 577 bool errmsg;
11c8e39f 578 bool errpid;
514f1c67 579 bool timeout;
04b34729 580 bool hexret;
514f1c67 581} syscall_fmts[] = {
51108999 582 { .name = "access", .errmsg = true,
12f3ca4f 583 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 584 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 585 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
586 { .name = "brk", .hexret = true,
587 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
588 { .name = "chdir", .errmsg = true, },
589 { .name = "chmod", .errmsg = true, },
590 { .name = "chroot", .errmsg = true, },
4f8c1b74 591 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 592 { .name = "clone", .errpid = true, },
75b757ca 593 { .name = "close", .errmsg = true,
48000a1a 594 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 595 { .name = "connect", .errmsg = true, },
12f3ca4f 596 { .name = "creat", .errmsg = true, },
b6565c90
ACM
597 { .name = "dup", .errmsg = true, },
598 { .name = "dup2", .errmsg = true, },
599 { .name = "dup3", .errmsg = true, },
453350dd 600 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
601 { .name = "eventfd2", .errmsg = true,
602 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 603 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
604 { .name = "fadvise64", .errmsg = true, },
605 { .name = "fallocate", .errmsg = true, },
606 { .name = "fchdir", .errmsg = true, },
607 { .name = "fchmod", .errmsg = true, },
75b757ca 608 { .name = "fchmodat", .errmsg = true,
12f3ca4f 609 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 610 { .name = "fchown", .errmsg = true, },
75b757ca 611 { .name = "fchownat", .errmsg = true,
12f3ca4f 612 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 613 { .name = "fcntl", .errmsg = true,
b6565c90 614 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 615 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 616 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 617 { .name = "flock", .errmsg = true,
b6565c90
ACM
618 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
619 { .name = "fsetxattr", .errmsg = true, },
620 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 621 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
622 { .name = "fstatfs", .errmsg = true, },
623 { .name = "fsync", .errmsg = true, },
624 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
625 { .name = "futex", .errmsg = true,
626 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 627 { .name = "futimesat", .errmsg = true,
12f3ca4f 628 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
629 { .name = "getdents", .errmsg = true, },
630 { .name = "getdents64", .errmsg = true, },
453350dd 631 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 632 { .name = "getpid", .errpid = true, },
d1d438a3 633 { .name = "getpgid", .errpid = true, },
c65f1070 634 { .name = "getppid", .errpid = true, },
39878d49
ACM
635 { .name = "getrandom", .errmsg = true,
636 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 637 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
638 { .name = "getxattr", .errmsg = true, },
639 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 640 { .name = "ioctl", .errmsg = true,
b6565c90 641 .arg_scnprintf = {
844ae5b4
ACM
642#if defined(__i386__) || defined(__x86_64__)
643/*
644 * FIXME: Make this available to all arches.
645 */
78645cf3
ACM
646 [1] = SCA_STRHEXARRAY, /* cmd */
647 [2] = SCA_HEX, /* arg */ },
648 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
649#else
650 [2] = SCA_HEX, /* arg */ }, },
651#endif
b62bee1b 652 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
653 { .name = "kill", .errmsg = true,
654 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
655 { .name = "lchown", .errmsg = true, },
656 { .name = "lgetxattr", .errmsg = true, },
75b757ca 657 { .name = "linkat", .errmsg = true,
48000a1a 658 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
659 { .name = "listxattr", .errmsg = true, },
660 { .name = "llistxattr", .errmsg = true, },
661 { .name = "lremovexattr", .errmsg = true, },
75b757ca 662 { .name = "lseek", .errmsg = true,
b6565c90 663 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 664 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
665 { .name = "lsetxattr", .errmsg = true, },
666 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
667 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
668 { .name = "madvise", .errmsg = true,
669 .arg_scnprintf = { [0] = SCA_HEX, /* start */
670 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 671 { .name = "mkdir", .errmsg = true, },
75b757ca 672 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
673 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
674 { .name = "mknod", .errmsg = true, },
75b757ca 675 { .name = "mknodat", .errmsg = true,
12f3ca4f 676 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
677 { .name = "mlock", .errmsg = true,
678 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
679 { .name = "mlockall", .errmsg = true,
680 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 681 { .name = "mmap", .hexret = true,
ae685380 682 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 683 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 684 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 685 { .name = "mprotect", .errmsg = true,
ae685380
ACM
686 .arg_scnprintf = { [0] = SCA_HEX, /* start */
687 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
688 { .name = "mq_unlink", .errmsg = true,
689 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
690 { .name = "mremap", .hexret = true,
691 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 692 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 693 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
694 { .name = "munlock", .errmsg = true,
695 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
696 { .name = "munmap", .errmsg = true,
697 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 698 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 699 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 700 { .name = "newfstatat", .errmsg = true,
12f3ca4f 701 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 702 { .name = "open", .errmsg = true,
12f3ca4f 703 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 704 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
705 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
706 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 707 { .name = "openat", .errmsg = true,
75b757ca
ACM
708 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
709 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 710 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 711 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
712 [3] = SCA_FD, /* group_fd */
713 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
714 { .name = "pipe2", .errmsg = true,
715 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
716 { .name = "poll", .errmsg = true, .timeout = true, },
717 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
718 { .name = "pread", .errmsg = true, .alias = "pread64", },
719 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 720 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
721 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
722 { .name = "pwritev", .errmsg = true, },
723 { .name = "read", .errmsg = true, },
12f3ca4f 724 { .name = "readlink", .errmsg = true, },
75b757ca 725 { .name = "readlinkat", .errmsg = true,
12f3ca4f 726 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 727 { .name = "readv", .errmsg = true, },
b2cc99fd 728 { .name = "recvfrom", .errmsg = true,
b6565c90 729 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 730 { .name = "recvmmsg", .errmsg = true,
b6565c90 731 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 732 { .name = "recvmsg", .errmsg = true,
b6565c90 733 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 734 { .name = "removexattr", .errmsg = true, },
75b757ca 735 { .name = "renameat", .errmsg = true,
48000a1a 736 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 737 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
738 { .name = "rt_sigaction", .errmsg = true,
739 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 740 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
741 { .name = "rt_sigqueueinfo", .errmsg = true,
742 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
743 { .name = "rt_tgsigqueueinfo", .errmsg = true,
744 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
a3bca91f
ACM
745 { .name = "sched_setscheduler", .errmsg = true,
746 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
747 { .name = "seccomp", .errmsg = true,
748 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
749 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 750 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 751 { .name = "sendmmsg", .errmsg = true,
b6565c90 752 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 753 { .name = "sendmsg", .errmsg = true,
b6565c90 754 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 755 { .name = "sendto", .errmsg = true,
b6565c90 756 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 757 { .name = "set_tid_address", .errpid = true, },
453350dd 758 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 759 { .name = "setpgid", .errmsg = true, },
453350dd 760 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 761 { .name = "setxattr", .errmsg = true, },
b6565c90 762 { .name = "shutdown", .errmsg = true, },
e10bce81 763 { .name = "socket", .errmsg = true,
a28b24b2
ACM
764 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
765 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
766 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
767 { .name = "socketpair", .errmsg = true,
768 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
769 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 770 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
771 { .name = "stat", .errmsg = true, .alias = "newstat", },
772 { .name = "statfs", .errmsg = true, },
34221118
ACM
773 { .name = "swapoff", .errmsg = true,
774 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
775 { .name = "swapon", .errmsg = true,
776 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 777 { .name = "symlinkat", .errmsg = true,
48000a1a 778 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
779 { .name = "tgkill", .errmsg = true,
780 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
781 { .name = "tkill", .errmsg = true,
782 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 783 { .name = "truncate", .errmsg = true, },
e5959683 784 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 785 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
786 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
787 { .name = "utime", .errmsg = true, },
75b757ca 788 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
789 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
790 { .name = "utimes", .errmsg = true, },
b6565c90 791 { .name = "vmsplice", .errmsg = true, },
11c8e39f 792 { .name = "wait4", .errpid = true,
7206b900 793 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 794 { .name = "waitid", .errpid = true,
7206b900 795 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
796 { .name = "write", .errmsg = true, },
797 { .name = "writev", .errmsg = true, },
514f1c67
ACM
798};
799
800static int syscall_fmt__cmp(const void *name, const void *fmtp)
801{
802 const struct syscall_fmt *fmt = fmtp;
803 return strcmp(name, fmt->name);
804}
805
806static struct syscall_fmt *syscall_fmt__find(const char *name)
807{
808 const int nmemb = ARRAY_SIZE(syscall_fmts);
809 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
810}
811
812struct syscall {
813 struct event_format *tp_format;
f208bd8d
ACM
814 int nr_args;
815 struct format_field *args;
514f1c67 816 const char *name;
5089f20e 817 bool is_exit;
514f1c67 818 struct syscall_fmt *fmt;
01533e97 819 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 820 void **arg_parm;
514f1c67
ACM
821};
822
60c907ab
ACM
823static size_t fprintf_duration(unsigned long t, FILE *fp)
824{
825 double duration = (double)t / NSEC_PER_MSEC;
826 size_t printed = fprintf(fp, "(");
827
828 if (duration >= 1.0)
829 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
830 else if (duration >= 0.01)
831 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
832 else
833 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 834 return printed + fprintf(fp, "): ");
60c907ab
ACM
835}
836
f994592d
ACM
837/**
838 * filename.ptr: The filename char pointer that will be vfs_getname'd
839 * filename.entry_str_pos: Where to insert the string translated from
840 * filename.ptr by the vfs_getname tracepoint/kprobe.
841 */
752fde44
ACM
842struct thread_trace {
843 u64 entry_time;
844 u64 exit_time;
845 bool entry_pending;
efd5745e 846 unsigned long nr_events;
a2ea67d7 847 unsigned long pfmaj, pfmin;
752fde44 848 char *entry_str;
1302d88e 849 double runtime_ms;
f994592d
ACM
850 struct {
851 unsigned long ptr;
7f4f8001
ACM
852 short int entry_str_pos;
853 bool pending_open;
854 unsigned int namelen;
855 char *name;
f994592d 856 } filename;
75b757ca
ACM
857 struct {
858 int max;
859 char **table;
860 } paths;
bf2575c1
DA
861
862 struct intlist *syscall_stats;
752fde44
ACM
863};
864
865static struct thread_trace *thread_trace__new(void)
866{
75b757ca
ACM
867 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
868
869 if (ttrace)
870 ttrace->paths.max = -1;
871
bf2575c1
DA
872 ttrace->syscall_stats = intlist__new(NULL);
873
75b757ca 874 return ttrace;
752fde44
ACM
875}
876
c24ff998 877static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 878{
efd5745e
ACM
879 struct thread_trace *ttrace;
880
752fde44
ACM
881 if (thread == NULL)
882 goto fail;
883
89dceb22
NK
884 if (thread__priv(thread) == NULL)
885 thread__set_priv(thread, thread_trace__new());
48000a1a 886
89dceb22 887 if (thread__priv(thread) == NULL)
752fde44
ACM
888 goto fail;
889
89dceb22 890 ttrace = thread__priv(thread);
efd5745e
ACM
891 ++ttrace->nr_events;
892
893 return ttrace;
752fde44 894fail:
c24ff998 895 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
896 "WARNING: not enough memory, dropping samples!\n");
897 return NULL;
898}
899
598d02c5
SF
900#define TRACE_PFMAJ (1 << 0)
901#define TRACE_PFMIN (1 << 1)
902
e4d44e83
ACM
903static const size_t trace__entry_str_size = 2048;
904
97119f37 905static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 906{
89dceb22 907 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
908
909 if (fd > ttrace->paths.max) {
910 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
911
912 if (npath == NULL)
913 return -1;
914
915 if (ttrace->paths.max != -1) {
916 memset(npath + ttrace->paths.max + 1, 0,
917 (fd - ttrace->paths.max) * sizeof(char *));
918 } else {
919 memset(npath, 0, (fd + 1) * sizeof(char *));
920 }
921
922 ttrace->paths.table = npath;
923 ttrace->paths.max = fd;
924 }
925
926 ttrace->paths.table[fd] = strdup(pathname);
927
928 return ttrace->paths.table[fd] != NULL ? 0 : -1;
929}
930
97119f37
ACM
931static int thread__read_fd_path(struct thread *thread, int fd)
932{
933 char linkname[PATH_MAX], pathname[PATH_MAX];
934 struct stat st;
935 int ret;
936
937 if (thread->pid_ == thread->tid) {
938 scnprintf(linkname, sizeof(linkname),
939 "/proc/%d/fd/%d", thread->pid_, fd);
940 } else {
941 scnprintf(linkname, sizeof(linkname),
942 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
943 }
944
945 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
946 return -1;
947
948 ret = readlink(linkname, pathname, sizeof(pathname));
949
950 if (ret < 0 || ret > st.st_size)
951 return -1;
952
953 pathname[ret] = '\0';
954 return trace__set_fd_pathname(thread, fd, pathname);
955}
956
c522739d
ACM
957static const char *thread__fd_path(struct thread *thread, int fd,
958 struct trace *trace)
75b757ca 959{
89dceb22 960 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
961
962 if (ttrace == NULL)
963 return NULL;
964
965 if (fd < 0)
966 return NULL;
967
cdcd1e6b 968 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
969 if (!trace->live)
970 return NULL;
971 ++trace->stats.proc_getname;
cdcd1e6b 972 if (thread__read_fd_path(thread, fd))
c522739d
ACM
973 return NULL;
974 }
75b757ca
ACM
975
976 return ttrace->paths.table[fd];
977}
978
979static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
980 struct syscall_arg *arg)
981{
982 int fd = arg->val;
983 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 984 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
985
986 if (path)
987 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
988
989 return printed;
990}
991
992static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
993 struct syscall_arg *arg)
994{
995 int fd = arg->val;
996 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 997 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 998
04662523
ACM
999 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1000 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1001
1002 return printed;
1003}
1004
f994592d
ACM
1005static void thread__set_filename_pos(struct thread *thread, const char *bf,
1006 unsigned long ptr)
1007{
1008 struct thread_trace *ttrace = thread__priv(thread);
1009
1010 ttrace->filename.ptr = ptr;
1011 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1012}
1013
1014static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1015 struct syscall_arg *arg)
1016{
1017 unsigned long ptr = arg->val;
1018
1019 if (!arg->trace->vfs_getname)
1020 return scnprintf(bf, size, "%#x", ptr);
1021
1022 thread__set_filename_pos(arg->thread, bf, ptr);
1023 return 0;
1024}
1025
ae9ed035
ACM
1026static bool trace__filter_duration(struct trace *trace, double t)
1027{
1028 return t < (trace->duration_filter * NSEC_PER_MSEC);
1029}
1030
752fde44
ACM
1031static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1032{
1033 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1034
60c907ab 1035 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1036}
1037
f15eb531 1038static bool done = false;
ba209f85 1039static bool interrupted = false;
f15eb531 1040
ba209f85 1041static void sig_handler(int sig)
f15eb531
NK
1042{
1043 done = true;
ba209f85 1044 interrupted = sig == SIGINT;
f15eb531
NK
1045}
1046
752fde44 1047static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
60c907ab 1048 u64 duration, u64 tstamp, FILE *fp)
752fde44
ACM
1049{
1050 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
60c907ab 1051 printed += fprintf_duration(duration, fp);
752fde44 1052
50c95cbd
ACM
1053 if (trace->multiple_threads) {
1054 if (trace->show_comm)
1902efe7 1055 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1056 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1057 }
752fde44
ACM
1058
1059 return printed;
1060}
1061
c24ff998 1062static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1063 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1064{
1065 int ret = 0;
1066
1067 switch (event->header.type) {
1068 case PERF_RECORD_LOST:
c24ff998 1069 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1070 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1071 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1072 break;
752fde44 1073 default:
162f0bef 1074 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1075 break;
1076 }
1077
1078 return ret;
1079}
1080
c24ff998 1081static int trace__tool_process(struct perf_tool *tool,
752fde44 1082 union perf_event *event,
162f0bef 1083 struct perf_sample *sample,
752fde44
ACM
1084 struct machine *machine)
1085{
c24ff998 1086 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1087 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1088}
1089
caf8a0d0
ACM
1090static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1091{
1092 struct machine *machine = vmachine;
1093
1094 if (machine->kptr_restrict_warned)
1095 return NULL;
1096
1097 if (symbol_conf.kptr_restrict) {
1098 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1099 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1100 "Kernel samples will not be resolved.\n");
1101 machine->kptr_restrict_warned = true;
1102 return NULL;
1103 }
1104
1105 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1106}
1107
752fde44
ACM
1108static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1109{
0a7e6d1b 1110 int err = symbol__init(NULL);
752fde44
ACM
1111
1112 if (err)
1113 return err;
1114
8fb598e5
DA
1115 trace->host = machine__new_host();
1116 if (trace->host == NULL)
1117 return -ENOMEM;
752fde44 1118
caf8a0d0 1119 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1120 return -errno;
1121
a33fbd56 1122 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1123 evlist->threads, trace__tool_process, false,
1124 trace->opts.proc_map_timeout);
752fde44
ACM
1125 if (err)
1126 symbol__exit();
1127
1128 return err;
1129}
1130
13d4ff3e
ACM
1131static int syscall__set_arg_fmts(struct syscall *sc)
1132{
1133 struct format_field *field;
b6565c90 1134 int idx = 0, len;
13d4ff3e 1135
f208bd8d 1136 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1137 if (sc->arg_scnprintf == NULL)
1138 return -1;
1139
1f115cb7
ACM
1140 if (sc->fmt)
1141 sc->arg_parm = sc->fmt->arg_parm;
1142
f208bd8d 1143 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1144 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1145 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1146 else if (strcmp(field->type, "const char *") == 0 &&
1147 (strcmp(field->name, "filename") == 0 ||
1148 strcmp(field->name, "path") == 0 ||
1149 strcmp(field->name, "pathname") == 0))
1150 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1151 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1152 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1153 else if (strcmp(field->type, "pid_t") == 0)
1154 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1155 else if (strcmp(field->type, "umode_t") == 0)
1156 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1157 else if ((strcmp(field->type, "int") == 0 ||
1158 strcmp(field->type, "unsigned int") == 0 ||
1159 strcmp(field->type, "long") == 0) &&
1160 (len = strlen(field->name)) >= 2 &&
1161 strcmp(field->name + len - 2, "fd") == 0) {
1162 /*
1163 * /sys/kernel/tracing/events/syscalls/sys_enter*
1164 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1165 * 65 int
1166 * 23 unsigned int
1167 * 7 unsigned long
1168 */
1169 sc->arg_scnprintf[idx] = SCA_FD;
1170 }
13d4ff3e
ACM
1171 ++idx;
1172 }
1173
1174 return 0;
1175}
1176
514f1c67
ACM
1177static int trace__read_syscall_info(struct trace *trace, int id)
1178{
1179 char tp_name[128];
1180 struct syscall *sc;
fd0db102 1181 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1182
1183 if (name == NULL)
1184 return -1;
514f1c67
ACM
1185
1186 if (id > trace->syscalls.max) {
1187 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1188
1189 if (nsyscalls == NULL)
1190 return -1;
1191
1192 if (trace->syscalls.max != -1) {
1193 memset(nsyscalls + trace->syscalls.max + 1, 0,
1194 (id - trace->syscalls.max) * sizeof(*sc));
1195 } else {
1196 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1197 }
1198
1199 trace->syscalls.table = nsyscalls;
1200 trace->syscalls.max = id;
1201 }
1202
1203 sc = trace->syscalls.table + id;
3a531260 1204 sc->name = name;
2ae3a312 1205
3a531260 1206 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1207
aec1930b 1208 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1209 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1210
8dd2a131 1211 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1212 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1213 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1214 }
514f1c67 1215
8dd2a131 1216 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1217 return -1;
1218
f208bd8d
ACM
1219 sc->args = sc->tp_format->format.fields;
1220 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1221 /*
1222 * We need to check and discard the first variable '__syscall_nr'
1223 * or 'nr' that mean the syscall number. It is needless here.
1224 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1225 */
1226 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1227 sc->args = sc->args->next;
1228 --sc->nr_args;
1229 }
1230
5089f20e
ACM
1231 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1232
13d4ff3e 1233 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1234}
1235
d0cc439b
ACM
1236static int trace__validate_ev_qualifier(struct trace *trace)
1237{
8b3ce757 1238 int err = 0, i;
d0cc439b
ACM
1239 struct str_node *pos;
1240
8b3ce757
ACM
1241 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1242 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1243 sizeof(trace->ev_qualifier_ids.entries[0]));
1244
1245 if (trace->ev_qualifier_ids.entries == NULL) {
1246 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1247 trace->output);
1248 err = -EINVAL;
1249 goto out;
1250 }
1251
1252 i = 0;
1253
602a1f4d 1254 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1255 const char *sc = pos->s;
fd0db102 1256 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1257
8b3ce757 1258 if (id < 0) {
d0cc439b
ACM
1259 if (err == 0) {
1260 fputs("Error:\tInvalid syscall ", trace->output);
1261 err = -EINVAL;
1262 } else {
1263 fputs(", ", trace->output);
1264 }
1265
1266 fputs(sc, trace->output);
1267 }
8b3ce757
ACM
1268
1269 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1270 }
1271
1272 if (err < 0) {
1273 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1274 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1275 zfree(&trace->ev_qualifier_ids.entries);
1276 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1277 }
8b3ce757 1278out:
d0cc439b
ACM
1279 return err;
1280}
1281
55d43bca
DA
1282/*
1283 * args is to be interpreted as a series of longs but we need to handle
1284 * 8-byte unaligned accesses. args points to raw_data within the event
1285 * and raw_data is guaranteed to be 8-byte unaligned because it is
1286 * preceded by raw_size which is a u32. So we need to copy args to a temp
1287 * variable to read it. Most notably this avoids extended load instructions
1288 * on unaligned addresses
1289 */
1290
752fde44 1291static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1292 unsigned char *args, struct trace *trace,
75b757ca 1293 struct thread *thread)
514f1c67 1294{
514f1c67 1295 size_t printed = 0;
55d43bca
DA
1296 unsigned char *p;
1297 unsigned long val;
514f1c67 1298
f208bd8d 1299 if (sc->args != NULL) {
514f1c67 1300 struct format_field *field;
01533e97
ACM
1301 u8 bit = 1;
1302 struct syscall_arg arg = {
75b757ca
ACM
1303 .idx = 0,
1304 .mask = 0,
1305 .trace = trace,
1306 .thread = thread,
01533e97 1307 };
6e7eeb51 1308
f208bd8d 1309 for (field = sc->args; field;
01533e97
ACM
1310 field = field->next, ++arg.idx, bit <<= 1) {
1311 if (arg.mask & bit)
6e7eeb51 1312 continue;
55d43bca
DA
1313
1314 /* special care for unaligned accesses */
1315 p = args + sizeof(unsigned long) * arg.idx;
1316 memcpy(&val, p, sizeof(val));
1317
4aa58232
ACM
1318 /*
1319 * Suppress this argument if its value is zero and
1320 * and we don't have a string associated in an
1321 * strarray for it.
1322 */
55d43bca 1323 if (val == 0 &&
4aa58232
ACM
1324 !(sc->arg_scnprintf &&
1325 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1326 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1327 continue;
1328
752fde44 1329 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1330 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1331 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1332 arg.val = val;
1f115cb7
ACM
1333 if (sc->arg_parm)
1334 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1335 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1336 size - printed, &arg);
6e7eeb51 1337 } else {
13d4ff3e 1338 printed += scnprintf(bf + printed, size - printed,
55d43bca 1339 "%ld", val);
6e7eeb51 1340 }
514f1c67 1341 }
4c4d6e51
ACM
1342 } else if (IS_ERR(sc->tp_format)) {
1343 /*
1344 * If we managed to read the tracepoint /format file, then we
1345 * may end up not having any args, like with gettid(), so only
1346 * print the raw args when we didn't manage to read it.
1347 */
01533e97
ACM
1348 int i = 0;
1349
514f1c67 1350 while (i < 6) {
55d43bca
DA
1351 /* special care for unaligned accesses */
1352 p = args + sizeof(unsigned long) * i;
1353 memcpy(&val, p, sizeof(val));
752fde44
ACM
1354 printed += scnprintf(bf + printed, size - printed,
1355 "%sarg%d: %ld",
55d43bca 1356 printed ? ", " : "", i, val);
514f1c67
ACM
1357 ++i;
1358 }
1359 }
1360
1361 return printed;
1362}
1363
ba3d7dee 1364typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1365 union perf_event *event,
ba3d7dee
ACM
1366 struct perf_sample *sample);
1367
1368static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1369 struct perf_evsel *evsel, int id)
ba3d7dee 1370{
ba3d7dee
ACM
1371
1372 if (id < 0) {
adaa18bf
ACM
1373
1374 /*
1375 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1376 * before that, leaving at a higher verbosity level till that is
1377 * explained. Reproduced with plain ftrace with:
1378 *
1379 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1380 * grep "NR -1 " /t/trace_pipe
1381 *
1382 * After generating some load on the machine.
1383 */
1384 if (verbose > 1) {
1385 static u64 n;
1386 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1387 id, perf_evsel__name(evsel), ++n);
1388 }
ba3d7dee
ACM
1389 return NULL;
1390 }
1391
1392 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1393 trace__read_syscall_info(trace, id))
1394 goto out_cant_read;
1395
1396 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1397 goto out_cant_read;
1398
1399 return &trace->syscalls.table[id];
1400
1401out_cant_read:
7c304ee0
ACM
1402 if (verbose) {
1403 fprintf(trace->output, "Problems reading syscall %d", id);
1404 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1405 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1406 fputs(" information\n", trace->output);
1407 }
ba3d7dee
ACM
1408 return NULL;
1409}
1410
bf2575c1
DA
1411static void thread__update_stats(struct thread_trace *ttrace,
1412 int id, struct perf_sample *sample)
1413{
1414 struct int_node *inode;
1415 struct stats *stats;
1416 u64 duration = 0;
1417
1418 inode = intlist__findnew(ttrace->syscall_stats, id);
1419 if (inode == NULL)
1420 return;
1421
1422 stats = inode->priv;
1423 if (stats == NULL) {
1424 stats = malloc(sizeof(struct stats));
1425 if (stats == NULL)
1426 return;
1427 init_stats(stats);
1428 inode->priv = stats;
1429 }
1430
1431 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1432 duration = sample->time - ttrace->entry_time;
1433
1434 update_stats(stats, duration);
1435}
1436
e596663e
ACM
1437static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1438{
1439 struct thread_trace *ttrace;
1440 u64 duration;
1441 size_t printed;
1442
1443 if (trace->current == NULL)
1444 return 0;
1445
1446 ttrace = thread__priv(trace->current);
1447
1448 if (!ttrace->entry_pending)
1449 return 0;
1450
1451 duration = sample->time - ttrace->entry_time;
1452
1453 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1454 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1455 ttrace->entry_pending = false;
1456
1457 return printed;
1458}
1459
ba3d7dee 1460static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1461 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1462 struct perf_sample *sample)
1463{
752fde44 1464 char *msg;
ba3d7dee 1465 void *args;
752fde44 1466 size_t printed = 0;
2ae3a312 1467 struct thread *thread;
b91fc39f 1468 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1469 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1470 struct thread_trace *ttrace;
1471
1472 if (sc == NULL)
1473 return -1;
ba3d7dee 1474
8fb598e5 1475 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1476 ttrace = thread__trace(thread, trace->output);
2ae3a312 1477 if (ttrace == NULL)
b91fc39f 1478 goto out_put;
ba3d7dee 1479
77170988 1480 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1481
1482 if (ttrace->entry_str == NULL) {
e4d44e83 1483 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1484 if (!ttrace->entry_str)
b91fc39f 1485 goto out_put;
752fde44
ACM
1486 }
1487
5cf9c84e 1488 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1489 trace__printf_interrupted_entry(trace, sample);
e596663e 1490
752fde44
ACM
1491 ttrace->entry_time = sample->time;
1492 msg = ttrace->entry_str;
e4d44e83 1493 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1494
e4d44e83 1495 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1496 args, trace, thread);
752fde44 1497
5089f20e 1498 if (sc->is_exit) {
5cf9c84e 1499 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
c24ff998 1500 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
c008f78f 1501 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1502 }
7f4f8001 1503 } else {
752fde44 1504 ttrace->entry_pending = true;
7f4f8001
ACM
1505 /* See trace__vfs_getname & trace__sys_exit */
1506 ttrace->filename.pending_open = false;
1507 }
ba3d7dee 1508
f3b623b8
ACM
1509 if (trace->current != thread) {
1510 thread__put(trace->current);
1511 trace->current = thread__get(thread);
1512 }
b91fc39f
ACM
1513 err = 0;
1514out_put:
1515 thread__put(thread);
1516 return err;
ba3d7dee
ACM
1517}
1518
5cf9c84e
ACM
1519static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1520 struct perf_sample *sample,
1521 struct callchain_cursor *cursor)
202ff968
ACM
1522{
1523 struct addr_location al;
5cf9c84e
ACM
1524
1525 if (machine__resolve(trace->host, &al, sample) < 0 ||
1526 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1527 return -1;
1528
1529 return 0;
1530}
1531
1532static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1533{
202ff968 1534 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1535 const unsigned int print_opts = EVSEL__PRINT_SYM |
1536 EVSEL__PRINT_DSO |
1537 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1538
d327e60c 1539 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1540}
1541
ba3d7dee 1542static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1543 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1544 struct perf_sample *sample)
1545{
2c82c3ad 1546 long ret;
60c907ab 1547 u64 duration = 0;
2ae3a312 1548 struct thread *thread;
5cf9c84e 1549 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1550 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1551 struct thread_trace *ttrace;
1552
1553 if (sc == NULL)
1554 return -1;
ba3d7dee 1555
8fb598e5 1556 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1557 ttrace = thread__trace(thread, trace->output);
2ae3a312 1558 if (ttrace == NULL)
b91fc39f 1559 goto out_put;
ba3d7dee 1560
bf2575c1
DA
1561 if (trace->summary)
1562 thread__update_stats(ttrace, id, sample);
1563
77170988 1564 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1565
fd0db102 1566 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1567 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1568 ttrace->filename.pending_open = false;
c522739d
ACM
1569 ++trace->stats.vfs_getname;
1570 }
1571
752fde44
ACM
1572 ttrace->exit_time = sample->time;
1573
ae9ed035 1574 if (ttrace->entry_time) {
60c907ab 1575 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1576 if (trace__filter_duration(trace, duration))
1577 goto out;
1578 } else if (trace->duration_filter)
1579 goto out;
60c907ab 1580
5cf9c84e
ACM
1581 if (sample->callchain) {
1582 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1583 if (callchain_ret == 0) {
1584 if (callchain_cursor.nr < trace->min_stack)
1585 goto out;
1586 callchain_ret = 1;
1587 }
1588 }
1589
fd2eabaf
DA
1590 if (trace->summary_only)
1591 goto out;
1592
c24ff998 1593 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
752fde44
ACM
1594
1595 if (ttrace->entry_pending) {
c24ff998 1596 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1597 } else {
c24ff998
ACM
1598 fprintf(trace->output, " ... [");
1599 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1600 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1601 }
1602
da3c9a44
ACM
1603 if (sc->fmt == NULL) {
1604signed_print:
2c82c3ad 1605 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1606 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1607 char bf[STRERR_BUFSIZE];
c8b5f2c9 1608 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1609 *e = audit_errno_to_name(-ret);
1610
c24ff998 1611 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1612 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1613 fprintf(trace->output, ") = 0 Timeout");
04b34729 1614 else if (sc->fmt->hexret)
2c82c3ad 1615 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1616 else if (sc->fmt->errpid) {
1617 struct thread *child = machine__find_thread(trace->host, ret, ret);
1618
1619 if (child != NULL) {
1620 fprintf(trace->output, ") = %ld", ret);
1621 if (child->comm_set)
1622 fprintf(trace->output, " (%s)", thread__comm_str(child));
1623 thread__put(child);
1624 }
1625 } else
da3c9a44 1626 goto signed_print;
ba3d7dee 1627
c24ff998 1628 fputc('\n', trace->output);
566a0885 1629
5cf9c84e
ACM
1630 if (callchain_ret > 0)
1631 trace__fprintf_callchain(trace, sample);
1632 else if (callchain_ret < 0)
1633 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1634out:
752fde44 1635 ttrace->entry_pending = false;
b91fc39f
ACM
1636 err = 0;
1637out_put:
1638 thread__put(thread);
1639 return err;
ba3d7dee
ACM
1640}
1641
c522739d 1642static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1643 union perf_event *event __maybe_unused,
c522739d
ACM
1644 struct perf_sample *sample)
1645{
f994592d
ACM
1646 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1647 struct thread_trace *ttrace;
1648 size_t filename_len, entry_str_len, to_move;
1649 ssize_t remaining_space;
1650 char *pos;
7f4f8001 1651 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1652
1653 if (!thread)
1654 goto out;
1655
1656 ttrace = thread__priv(thread);
1657 if (!ttrace)
1658 goto out;
1659
7f4f8001
ACM
1660 filename_len = strlen(filename);
1661
1662 if (ttrace->filename.namelen < filename_len) {
1663 char *f = realloc(ttrace->filename.name, filename_len + 1);
1664
1665 if (f == NULL)
1666 goto out;
1667
1668 ttrace->filename.namelen = filename_len;
1669 ttrace->filename.name = f;
1670 }
1671
1672 strcpy(ttrace->filename.name, filename);
1673 ttrace->filename.pending_open = true;
1674
f994592d
ACM
1675 if (!ttrace->filename.ptr)
1676 goto out;
1677
1678 entry_str_len = strlen(ttrace->entry_str);
1679 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1680 if (remaining_space <= 0)
1681 goto out;
1682
f994592d
ACM
1683 if (filename_len > (size_t)remaining_space) {
1684 filename += filename_len - remaining_space;
1685 filename_len = remaining_space;
1686 }
1687
1688 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1689 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1690 memmove(pos + filename_len, pos, to_move);
1691 memcpy(pos, filename, filename_len);
1692
1693 ttrace->filename.ptr = 0;
1694 ttrace->filename.entry_str_pos = 0;
1695out:
c522739d
ACM
1696 return 0;
1697}
1698
1302d88e 1699static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1700 union perf_event *event __maybe_unused,
1302d88e
ACM
1701 struct perf_sample *sample)
1702{
1703 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1704 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1705 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1706 sample->pid,
1707 sample->tid);
c24ff998 1708 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1709
1710 if (ttrace == NULL)
1711 goto out_dump;
1712
1713 ttrace->runtime_ms += runtime_ms;
1714 trace->runtime_ms += runtime_ms;
b91fc39f 1715 thread__put(thread);
1302d88e
ACM
1716 return 0;
1717
1718out_dump:
c24ff998 1719 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1720 evsel->name,
1721 perf_evsel__strval(evsel, sample, "comm"),
1722 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1723 runtime,
1724 perf_evsel__intval(evsel, sample, "vruntime"));
b91fc39f 1725 thread__put(thread);
1302d88e
ACM
1726 return 0;
1727}
1728
1d6c9407
WN
1729static void bpf_output__printer(enum binary_printer_ops op,
1730 unsigned int val, void *extra)
1731{
1732 FILE *output = extra;
1733 unsigned char ch = (unsigned char)val;
1734
1735 switch (op) {
1736 case BINARY_PRINT_CHAR_DATA:
1737 fprintf(output, "%c", isprint(ch) ? ch : '.');
1738 break;
1739 case BINARY_PRINT_DATA_BEGIN:
1740 case BINARY_PRINT_LINE_BEGIN:
1741 case BINARY_PRINT_ADDR:
1742 case BINARY_PRINT_NUM_DATA:
1743 case BINARY_PRINT_NUM_PAD:
1744 case BINARY_PRINT_SEP:
1745 case BINARY_PRINT_CHAR_PAD:
1746 case BINARY_PRINT_LINE_END:
1747 case BINARY_PRINT_DATA_END:
1748 default:
1749 break;
1750 }
1751}
1752
1753static void bpf_output__fprintf(struct trace *trace,
1754 struct perf_sample *sample)
1755{
1756 print_binary(sample->raw_data, sample->raw_size, 8,
1757 bpf_output__printer, trace->output);
1758}
1759
14a052df
ACM
1760static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1761 union perf_event *event __maybe_unused,
1762 struct perf_sample *sample)
1763{
7ad35615
ACM
1764 int callchain_ret = 0;
1765
1766 if (sample->callchain) {
1767 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1768 if (callchain_ret == 0) {
1769 if (callchain_cursor.nr < trace->min_stack)
1770 goto out;
1771 callchain_ret = 1;
1772 }
1773 }
1774
14a052df
ACM
1775 trace__printf_interrupted_entry(trace, sample);
1776 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1777
1778 if (trace->trace_syscalls)
1779 fprintf(trace->output, "( ): ");
1780
1781 fprintf(trace->output, "%s:", evsel->name);
14a052df 1782
1d6c9407
WN
1783 if (perf_evsel__is_bpf_output(evsel)) {
1784 bpf_output__fprintf(trace, sample);
1785 } else if (evsel->tp_format) {
14a052df
ACM
1786 event_format__fprintf(evsel->tp_format, sample->cpu,
1787 sample->raw_data, sample->raw_size,
1788 trace->output);
1789 }
1790
1791 fprintf(trace->output, ")\n");
202ff968 1792
7ad35615
ACM
1793 if (callchain_ret > 0)
1794 trace__fprintf_callchain(trace, sample);
1795 else if (callchain_ret < 0)
1796 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1797out:
14a052df
ACM
1798 return 0;
1799}
1800
598d02c5
SF
1801static void print_location(FILE *f, struct perf_sample *sample,
1802 struct addr_location *al,
1803 bool print_dso, bool print_sym)
1804{
1805
1806 if ((verbose || print_dso) && al->map)
1807 fprintf(f, "%s@", al->map->dso->long_name);
1808
1809 if ((verbose || print_sym) && al->sym)
4414a3c5 1810 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1811 al->addr - al->sym->start);
1812 else if (al->map)
4414a3c5 1813 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1814 else
4414a3c5 1815 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1816}
1817
1818static int trace__pgfault(struct trace *trace,
1819 struct perf_evsel *evsel,
473398a2 1820 union perf_event *event __maybe_unused,
598d02c5
SF
1821 struct perf_sample *sample)
1822{
1823 struct thread *thread;
598d02c5
SF
1824 struct addr_location al;
1825 char map_type = 'd';
a2ea67d7 1826 struct thread_trace *ttrace;
b91fc39f 1827 int err = -1;
1df54290 1828 int callchain_ret = 0;
598d02c5
SF
1829
1830 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1831
1832 if (sample->callchain) {
1833 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1834 if (callchain_ret == 0) {
1835 if (callchain_cursor.nr < trace->min_stack)
1836 goto out_put;
1837 callchain_ret = 1;
1838 }
1839 }
1840
a2ea67d7
SF
1841 ttrace = thread__trace(thread, trace->output);
1842 if (ttrace == NULL)
b91fc39f 1843 goto out_put;
a2ea67d7
SF
1844
1845 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1846 ttrace->pfmaj++;
1847 else
1848 ttrace->pfmin++;
1849
1850 if (trace->summary_only)
b91fc39f 1851 goto out;
598d02c5 1852
473398a2 1853 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1854 sample->ip, &al);
1855
1856 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1857
1858 fprintf(trace->output, "%sfault [",
1859 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1860 "maj" : "min");
1861
1862 print_location(trace->output, sample, &al, false, true);
1863
1864 fprintf(trace->output, "] => ");
1865
473398a2 1866 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1867 sample->addr, &al);
1868
1869 if (!al.map) {
473398a2 1870 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1871 MAP__FUNCTION, sample->addr, &al);
1872
1873 if (al.map)
1874 map_type = 'x';
1875 else
1876 map_type = '?';
1877 }
1878
1879 print_location(trace->output, sample, &al, true, false);
1880
1881 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1882
1df54290
ACM
1883 if (callchain_ret > 0)
1884 trace__fprintf_callchain(trace, sample);
1885 else if (callchain_ret < 0)
1886 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1887out:
1888 err = 0;
1889out_put:
1890 thread__put(thread);
1891 return err;
598d02c5
SF
1892}
1893
bdc89661
DA
1894static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1895{
1896 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1897 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1898 return false;
1899
1900 if (trace->pid_list || trace->tid_list)
1901 return true;
1902
1903 return false;
1904}
1905
e6001980 1906static void trace__set_base_time(struct trace *trace,
8a07a809 1907 struct perf_evsel *evsel,
e6001980
ACM
1908 struct perf_sample *sample)
1909{
8a07a809
ACM
1910 /*
1911 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1912 * and don't use sample->time unconditionally, we may end up having
1913 * some other event in the future without PERF_SAMPLE_TIME for good
1914 * reason, i.e. we may not be interested in its timestamps, just in
1915 * it taking place, picking some piece of information when it
1916 * appears in our event stream (vfs_getname comes to mind).
1917 */
1918 if (trace->base_time == 0 && !trace->full_time &&
1919 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1920 trace->base_time = sample->time;
1921}
1922
6810fc91 1923static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1924 union perf_event *event,
6810fc91
DA
1925 struct perf_sample *sample,
1926 struct perf_evsel *evsel,
1927 struct machine *machine __maybe_unused)
1928{
1929 struct trace *trace = container_of(tool, struct trace, tool);
1930 int err = 0;
1931
744a9719 1932 tracepoint_handler handler = evsel->handler;
6810fc91 1933
bdc89661
DA
1934 if (skip_sample(trace, sample))
1935 return 0;
1936
e6001980 1937 trace__set_base_time(trace, evsel, sample);
6810fc91 1938
3160565f
DA
1939 if (handler) {
1940 ++trace->nr_events;
0c82adcf 1941 handler(trace, evsel, event, sample);
3160565f 1942 }
6810fc91
DA
1943
1944 return err;
1945}
1946
bdc89661
DA
1947static int parse_target_str(struct trace *trace)
1948{
1949 if (trace->opts.target.pid) {
1950 trace->pid_list = intlist__new(trace->opts.target.pid);
1951 if (trace->pid_list == NULL) {
1952 pr_err("Error parsing process id string\n");
1953 return -EINVAL;
1954 }
1955 }
1956
1957 if (trace->opts.target.tid) {
1958 trace->tid_list = intlist__new(trace->opts.target.tid);
1959 if (trace->tid_list == NULL) {
1960 pr_err("Error parsing thread id string\n");
1961 return -EINVAL;
1962 }
1963 }
1964
1965 return 0;
1966}
1967
1e28fe0a 1968static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1969{
1970 unsigned int rec_argc, i, j;
1971 const char **rec_argv;
1972 const char * const record_args[] = {
1973 "record",
1974 "-R",
1975 "-m", "1024",
1976 "-c", "1",
5e2485b1
DA
1977 };
1978
1e28fe0a
SF
1979 const char * const sc_args[] = { "-e", };
1980 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1981 const char * const majpf_args[] = { "-e", "major-faults" };
1982 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1983 const char * const minpf_args[] = { "-e", "minor-faults" };
1984 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1985
9aca7f17 1986 /* +1 is for the event string below */
1e28fe0a
SF
1987 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1988 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1989 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1990
1991 if (rec_argv == NULL)
1992 return -ENOMEM;
1993
1e28fe0a 1994 j = 0;
5e2485b1 1995 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
1996 rec_argv[j++] = record_args[i];
1997
e281a960
SF
1998 if (trace->trace_syscalls) {
1999 for (i = 0; i < sc_args_nr; i++)
2000 rec_argv[j++] = sc_args[i];
2001
2002 /* event string may be different for older kernels - e.g., RHEL6 */
2003 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2004 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2005 else if (is_valid_tracepoint("syscalls:sys_enter"))
2006 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2007 else {
2008 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2009 return -1;
2010 }
9aca7f17 2011 }
9aca7f17 2012
1e28fe0a
SF
2013 if (trace->trace_pgfaults & TRACE_PFMAJ)
2014 for (i = 0; i < majpf_args_nr; i++)
2015 rec_argv[j++] = majpf_args[i];
2016
2017 if (trace->trace_pgfaults & TRACE_PFMIN)
2018 for (i = 0; i < minpf_args_nr; i++)
2019 rec_argv[j++] = minpf_args[i];
2020
2021 for (i = 0; i < (unsigned int)argc; i++)
2022 rec_argv[j++] = argv[i];
5e2485b1 2023
1e28fe0a 2024 return cmd_record(j, rec_argv, NULL);
5e2485b1
DA
2025}
2026
bf2575c1
DA
2027static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2028
08c98776 2029static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2030{
ef503831 2031 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2032
2033 if (IS_ERR(evsel))
08c98776 2034 return false;
c522739d
ACM
2035
2036 if (perf_evsel__field(evsel, "pathname") == NULL) {
2037 perf_evsel__delete(evsel);
08c98776 2038 return false;
c522739d
ACM
2039 }
2040
744a9719 2041 evsel->handler = trace__vfs_getname;
c522739d 2042 perf_evlist__add(evlist, evsel);
08c98776 2043 return true;
c522739d
ACM
2044}
2045
0ae537cb 2046static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2047{
2048 struct perf_evsel *evsel;
2049 struct perf_event_attr attr = {
2050 .type = PERF_TYPE_SOFTWARE,
2051 .mmap_data = 1,
598d02c5
SF
2052 };
2053
2054 attr.config = config;
0524798c 2055 attr.sample_period = 1;
598d02c5
SF
2056
2057 event_attr_init(&attr);
2058
2059 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2060 if (evsel)
2061 evsel->handler = trace__pgfault;
598d02c5 2062
0ae537cb 2063 return evsel;
598d02c5
SF
2064}
2065
ddbb1b13
ACM
2066static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2067{
2068 const u32 type = event->header.type;
2069 struct perf_evsel *evsel;
2070
ddbb1b13
ACM
2071 if (type != PERF_RECORD_SAMPLE) {
2072 trace__process_event(trace, trace->host, event, sample);
2073 return;
2074 }
2075
2076 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2077 if (evsel == NULL) {
2078 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2079 return;
2080 }
2081
e6001980
ACM
2082 trace__set_base_time(trace, evsel, sample);
2083
ddbb1b13
ACM
2084 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2085 sample->raw_data == NULL) {
2086 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2087 perf_evsel__name(evsel), sample->tid,
2088 sample->cpu, sample->raw_size);
2089 } else {
2090 tracepoint_handler handler = evsel->handler;
2091 handler(trace, evsel, event, sample);
2092 }
2093}
2094
c27366f0
ACM
2095static int trace__add_syscall_newtp(struct trace *trace)
2096{
2097 int ret = -1;
2098 struct perf_evlist *evlist = trace->evlist;
2099 struct perf_evsel *sys_enter, *sys_exit;
2100
2101 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2102 if (sys_enter == NULL)
2103 goto out;
2104
2105 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2106 goto out_delete_sys_enter;
2107
2108 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2109 if (sys_exit == NULL)
2110 goto out_delete_sys_enter;
2111
2112 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2113 goto out_delete_sys_exit;
2114
2115 perf_evlist__add(evlist, sys_enter);
2116 perf_evlist__add(evlist, sys_exit);
2117
2ddd5c04 2118 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2119 /*
2120 * We're interested only in the user space callchain
2121 * leading to the syscall, allow overriding that for
2122 * debugging reasons using --kernel_syscall_callchains
2123 */
2124 sys_exit->attr.exclude_callchain_kernel = 1;
2125 }
2126
8b3ce757
ACM
2127 trace->syscalls.events.sys_enter = sys_enter;
2128 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2129
2130 ret = 0;
2131out:
2132 return ret;
2133
2134out_delete_sys_exit:
2135 perf_evsel__delete_priv(sys_exit);
2136out_delete_sys_enter:
2137 perf_evsel__delete_priv(sys_enter);
2138 goto out;
2139}
2140
19867b61
ACM
2141static int trace__set_ev_qualifier_filter(struct trace *trace)
2142{
2143 int err = -1;
2144 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2145 trace->ev_qualifier_ids.nr,
2146 trace->ev_qualifier_ids.entries);
2147
2148 if (filter == NULL)
2149 goto out_enomem;
2150
2151 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2152 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2153
2154 free(filter);
2155out:
2156 return err;
2157out_enomem:
2158 errno = ENOMEM;
2159 goto out;
2160}
c27366f0 2161
f15eb531 2162static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2163{
14a052df 2164 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2165 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2166 int err = -1, i;
2167 unsigned long before;
f15eb531 2168 const bool forks = argc > 0;
46fb3c21 2169 bool draining = false;
514f1c67 2170
75b757ca
ACM
2171 trace->live = true;
2172
c27366f0 2173 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2174 goto out_error_raw_syscalls;
514f1c67 2175
e281a960 2176 if (trace->trace_syscalls)
08c98776 2177 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2178
0ae537cb
ACM
2179 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2180 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2181 if (pgfault_maj == NULL)
2182 goto out_error_mem;
2183 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2184 }
598d02c5 2185
0ae537cb
ACM
2186 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2187 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2188 if (pgfault_min == NULL)
2189 goto out_error_mem;
2190 perf_evlist__add(evlist, pgfault_min);
2191 }
598d02c5 2192
1302d88e 2193 if (trace->sched &&
2cc990ba
ACM
2194 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2195 trace__sched_stat_runtime))
2196 goto out_error_sched_stat_runtime;
1302d88e 2197
514f1c67
ACM
2198 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2199 if (err < 0) {
c24ff998 2200 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2201 goto out_delete_evlist;
2202 }
2203
752fde44
ACM
2204 err = trace__symbols_init(trace, evlist);
2205 if (err < 0) {
c24ff998 2206 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2207 goto out_delete_evlist;
752fde44
ACM
2208 }
2209
fde54b78
ACM
2210 perf_evlist__config(evlist, &trace->opts, NULL);
2211
0c3a6ef4
ACM
2212 if (callchain_param.enabled) {
2213 bool use_identifier = false;
2214
2215 if (trace->syscalls.events.sys_exit) {
2216 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2217 &trace->opts, &callchain_param);
2218 use_identifier = true;
2219 }
2220
2221 if (pgfault_maj) {
2222 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2223 use_identifier = true;
2224 }
2225
2226 if (pgfault_min) {
2227 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2228 use_identifier = true;
2229 }
2230
2231 if (use_identifier) {
2232 /*
2233 * Now we have evsels with different sample_ids, use
2234 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2235 * from a fixed position in each ring buffer record.
2236 *
2237 * As of this the changeset introducing this comment, this
2238 * isn't strictly needed, as the fields that can come before
2239 * PERF_SAMPLE_ID are all used, but we'll probably disable
2240 * some of those for things like copying the payload of
2241 * pointer syscall arguments, and for vfs_getname we don't
2242 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2243 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2244 */
2245 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2246 perf_evlist__reset_sample_bit(evlist, ID);
2247 }
fde54b78 2248 }
514f1c67 2249
f15eb531
NK
2250 signal(SIGCHLD, sig_handler);
2251 signal(SIGINT, sig_handler);
2252
2253 if (forks) {
6ef73ec4 2254 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2255 argv, false, NULL);
f15eb531 2256 if (err < 0) {
c24ff998 2257 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2258 goto out_delete_evlist;
f15eb531
NK
2259 }
2260 }
2261
514f1c67 2262 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2263 if (err < 0)
2264 goto out_error_open;
514f1c67 2265
ba504235
WN
2266 err = bpf__apply_obj_config();
2267 if (err) {
2268 char errbuf[BUFSIZ];
2269
2270 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2271 pr_err("ERROR: Apply config to BPF failed: %s\n",
2272 errbuf);
2273 goto out_error_open;
2274 }
2275
241b057c
ACM
2276 /*
2277 * Better not use !target__has_task() here because we need to cover the
2278 * case where no threads were specified in the command line, but a
2279 * workload was, and in that case we will fill in the thread_map when
2280 * we fork the workload in perf_evlist__prepare_workload.
2281 */
f078c385
ACM
2282 if (trace->filter_pids.nr > 0)
2283 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2284 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2285 err = perf_evlist__set_filter_pid(evlist, getpid());
2286
94ad89bc
ACM
2287 if (err < 0)
2288 goto out_error_mem;
2289
19867b61
ACM
2290 if (trace->ev_qualifier_ids.nr > 0) {
2291 err = trace__set_ev_qualifier_filter(trace);
2292 if (err < 0)
2293 goto out_errno;
19867b61 2294
2e5e5f87
ACM
2295 pr_debug("event qualifier tracepoint filter: %s\n",
2296 trace->syscalls.events.sys_exit->filter);
2297 }
19867b61 2298
94ad89bc
ACM
2299 err = perf_evlist__apply_filters(evlist, &evsel);
2300 if (err < 0)
2301 goto out_error_apply_filters;
241b057c 2302
f885037e 2303 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2304 if (err < 0)
2305 goto out_error_mmap;
514f1c67 2306
cb24d01d
ACM
2307 if (!target__none(&trace->opts.target))
2308 perf_evlist__enable(evlist);
2309
f15eb531
NK
2310 if (forks)
2311 perf_evlist__start_workload(evlist);
2312
e13798c7 2313 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2314 evlist->threads->nr > 1 ||
2315 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2316again:
efd5745e 2317 before = trace->nr_events;
514f1c67
ACM
2318
2319 for (i = 0; i < evlist->nr_mmaps; i++) {
2320 union perf_event *event;
2321
2322 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2323 struct perf_sample sample;
514f1c67 2324
efd5745e 2325 ++trace->nr_events;
514f1c67 2326
514f1c67
ACM
2327 err = perf_evlist__parse_sample(evlist, event, &sample);
2328 if (err) {
c24ff998 2329 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2330 goto next_event;
514f1c67
ACM
2331 }
2332
ddbb1b13 2333 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2334next_event:
2335 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2336
ba209f85
ACM
2337 if (interrupted)
2338 goto out_disable;
02ac5421
ACM
2339
2340 if (done && !draining) {
2341 perf_evlist__disable(evlist);
2342 draining = true;
2343 }
514f1c67
ACM
2344 }
2345 }
2346
efd5745e 2347 if (trace->nr_events == before) {
ba209f85 2348 int timeout = done ? 100 : -1;
f15eb531 2349
46fb3c21
ACM
2350 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2351 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2352 draining = true;
2353
ba209f85 2354 goto again;
46fb3c21 2355 }
ba209f85
ACM
2356 } else {
2357 goto again;
f15eb531
NK
2358 }
2359
ba209f85 2360out_disable:
f3b623b8
ACM
2361 thread__zput(trace->current);
2362
ba209f85 2363 perf_evlist__disable(evlist);
514f1c67 2364
c522739d
ACM
2365 if (!err) {
2366 if (trace->summary)
2367 trace__fprintf_thread_summary(trace, trace->output);
2368
2369 if (trace->show_tool_stats) {
2370 fprintf(trace->output, "Stats:\n "
2371 " vfs_getname : %" PRIu64 "\n"
2372 " proc_getname: %" PRIu64 "\n",
2373 trace->stats.vfs_getname,
2374 trace->stats.proc_getname);
2375 }
2376 }
bf2575c1 2377
514f1c67
ACM
2378out_delete_evlist:
2379 perf_evlist__delete(evlist);
14a052df 2380 trace->evlist = NULL;
75b757ca 2381 trace->live = false;
514f1c67 2382 return err;
6ef068cb
ACM
2383{
2384 char errbuf[BUFSIZ];
a8f23d8f 2385
2cc990ba 2386out_error_sched_stat_runtime:
988bdb31 2387 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2388 goto out_error;
2389
801c67b0 2390out_error_raw_syscalls:
988bdb31 2391 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2392 goto out_error;
2393
e09b18d4
ACM
2394out_error_mmap:
2395 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2396 goto out_error;
2397
a8f23d8f
ACM
2398out_error_open:
2399 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2400
2401out_error:
6ef068cb 2402 fprintf(trace->output, "%s\n", errbuf);
87f91868 2403 goto out_delete_evlist;
94ad89bc
ACM
2404
2405out_error_apply_filters:
2406 fprintf(trace->output,
2407 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2408 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2409 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2410 goto out_delete_evlist;
514f1c67 2411}
5ed08dae
ACM
2412out_error_mem:
2413 fprintf(trace->output, "Not enough memory to run!\n");
2414 goto out_delete_evlist;
19867b61
ACM
2415
2416out_errno:
2417 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2418 goto out_delete_evlist;
a8f23d8f 2419}
514f1c67 2420
6810fc91
DA
2421static int trace__replay(struct trace *trace)
2422{
2423 const struct perf_evsel_str_handler handlers[] = {
c522739d 2424 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2425 };
f5fc1412
JO
2426 struct perf_data_file file = {
2427 .path = input_name,
2428 .mode = PERF_DATA_MODE_READ,
e366a6d8 2429 .force = trace->force,
f5fc1412 2430 };
6810fc91 2431 struct perf_session *session;
003824e8 2432 struct perf_evsel *evsel;
6810fc91
DA
2433 int err = -1;
2434
2435 trace->tool.sample = trace__process_sample;
2436 trace->tool.mmap = perf_event__process_mmap;
384c671e 2437 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2438 trace->tool.comm = perf_event__process_comm;
2439 trace->tool.exit = perf_event__process_exit;
2440 trace->tool.fork = perf_event__process_fork;
2441 trace->tool.attr = perf_event__process_attr;
2442 trace->tool.tracing_data = perf_event__process_tracing_data;
2443 trace->tool.build_id = perf_event__process_build_id;
2444
0a8cb85c 2445 trace->tool.ordered_events = true;
6810fc91
DA
2446 trace->tool.ordering_requires_timestamps = true;
2447
2448 /* add tid to output */
2449 trace->multiple_threads = true;
2450
f5fc1412 2451 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2452 if (session == NULL)
52e02834 2453 return -1;
6810fc91 2454
0a7e6d1b 2455 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2456 goto out;
2457
8fb598e5
DA
2458 trace->host = &session->machines.host;
2459
6810fc91
DA
2460 err = perf_session__set_tracepoints_handlers(session, handlers);
2461 if (err)
2462 goto out;
2463
003824e8
NK
2464 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2465 "raw_syscalls:sys_enter");
9aca7f17
DA
2466 /* older kernels have syscalls tp versus raw_syscalls */
2467 if (evsel == NULL)
2468 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2469 "syscalls:sys_enter");
003824e8 2470
e281a960
SF
2471 if (evsel &&
2472 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2473 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2474 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2475 goto out;
2476 }
2477
2478 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2479 "raw_syscalls:sys_exit");
9aca7f17
DA
2480 if (evsel == NULL)
2481 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2482 "syscalls:sys_exit");
e281a960
SF
2483 if (evsel &&
2484 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2485 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2486 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2487 goto out;
2488 }
2489
e5cadb93 2490 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2491 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2492 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2493 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2494 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2495 evsel->handler = trace__pgfault;
2496 }
2497
bdc89661
DA
2498 err = parse_target_str(trace);
2499 if (err != 0)
2500 goto out;
2501
6810fc91
DA
2502 setup_pager();
2503
b7b61cbe 2504 err = perf_session__process_events(session);
6810fc91
DA
2505 if (err)
2506 pr_err("Failed to process events, error %d", err);
2507
bf2575c1
DA
2508 else if (trace->summary)
2509 trace__fprintf_thread_summary(trace, trace->output);
2510
6810fc91
DA
2511out:
2512 perf_session__delete(session);
2513
2514 return err;
2515}
2516
1302d88e
ACM
2517static size_t trace__fprintf_threads_header(FILE *fp)
2518{
2519 size_t printed;
2520
99ff7150 2521 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2522
2523 return printed;
2524}
2525
b535d523
ACM
2526DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2527 struct stats *stats;
2528 double msecs;
2529 int syscall;
2530)
2531{
2532 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2533 struct stats *stats = source->priv;
2534
2535 entry->syscall = source->i;
2536 entry->stats = stats;
2537 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2538}
2539
bf2575c1
DA
2540static size_t thread__dump_stats(struct thread_trace *ttrace,
2541 struct trace *trace, FILE *fp)
2542{
bf2575c1
DA
2543 size_t printed = 0;
2544 struct syscall *sc;
b535d523
ACM
2545 struct rb_node *nd;
2546 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2547
b535d523 2548 if (syscall_stats == NULL)
bf2575c1
DA
2549 return 0;
2550
2551 printed += fprintf(fp, "\n");
2552
834fd46d
MW
2553 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2554 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2555 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2556
98a91837 2557 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2558 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2559 if (stats) {
2560 double min = (double)(stats->min) / NSEC_PER_MSEC;
2561 double max = (double)(stats->max) / NSEC_PER_MSEC;
2562 double avg = avg_stats(stats);
2563 double pct;
2564 u64 n = (u64) stats->n;
2565
2566 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2567 avg /= NSEC_PER_MSEC;
2568
b535d523 2569 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2570 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2571 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2572 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2573 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2574 }
bf2575c1
DA
2575 }
2576
b535d523 2577 resort_rb__delete(syscall_stats);
bf2575c1 2578 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2579
2580 return printed;
2581}
2582
96c14451 2583static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2584{
96c14451 2585 size_t printed = 0;
89dceb22 2586 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2587 double ratio;
2588
2589 if (ttrace == NULL)
2590 return 0;
2591
2592 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2593
15e65c69 2594 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2595 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2596 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2597 if (ttrace->pfmaj)
2598 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2599 if (ttrace->pfmin)
2600 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2601 if (trace->sched)
2602 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2603 else if (fputc('\n', fp) != EOF)
2604 ++printed;
2605
bf2575c1 2606 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2607
96c14451
ACM
2608 return printed;
2609}
896cbb56 2610
96c14451
ACM
2611static unsigned long thread__nr_events(struct thread_trace *ttrace)
2612{
2613 return ttrace ? ttrace->nr_events : 0;
2614}
2615
2616DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2617 struct thread *thread;
2618)
2619{
2620 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2621}
2622
1302d88e
ACM
2623static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2624{
96c14451
ACM
2625 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2626 size_t printed = trace__fprintf_threads_header(fp);
2627 struct rb_node *nd;
1302d88e 2628
96c14451
ACM
2629 if (threads == NULL) {
2630 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2631 return 0;
2632 }
2633
98a91837 2634 resort_rb__for_each_entry(nd, threads)
96c14451 2635 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2636
96c14451
ACM
2637 resort_rb__delete(threads);
2638
2639 return printed;
1302d88e
ACM
2640}
2641
ae9ed035
ACM
2642static int trace__set_duration(const struct option *opt, const char *str,
2643 int unset __maybe_unused)
2644{
2645 struct trace *trace = opt->value;
2646
2647 trace->duration_filter = atof(str);
2648 return 0;
2649}
2650
f078c385
ACM
2651static int trace__set_filter_pids(const struct option *opt, const char *str,
2652 int unset __maybe_unused)
2653{
2654 int ret = -1;
2655 size_t i;
2656 struct trace *trace = opt->value;
2657 /*
2658 * FIXME: introduce a intarray class, plain parse csv and create a
2659 * { int nr, int entries[] } struct...
2660 */
2661 struct intlist *list = intlist__new(str);
2662
2663 if (list == NULL)
2664 return -1;
2665
2666 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2667 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2668
2669 if (trace->filter_pids.entries == NULL)
2670 goto out;
2671
2672 trace->filter_pids.entries[0] = getpid();
2673
2674 for (i = 1; i < trace->filter_pids.nr; ++i)
2675 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2676
2677 intlist__delete(list);
2678 ret = 0;
2679out:
2680 return ret;
2681}
2682
c24ff998
ACM
2683static int trace__open_output(struct trace *trace, const char *filename)
2684{
2685 struct stat st;
2686
2687 if (!stat(filename, &st) && st.st_size) {
2688 char oldname[PATH_MAX];
2689
2690 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2691 unlink(oldname);
2692 rename(filename, oldname);
2693 }
2694
2695 trace->output = fopen(filename, "w");
2696
2697 return trace->output == NULL ? -errno : 0;
2698}
2699
598d02c5
SF
2700static int parse_pagefaults(const struct option *opt, const char *str,
2701 int unset __maybe_unused)
2702{
2703 int *trace_pgfaults = opt->value;
2704
2705 if (strcmp(str, "all") == 0)
2706 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2707 else if (strcmp(str, "maj") == 0)
2708 *trace_pgfaults |= TRACE_PFMAJ;
2709 else if (strcmp(str, "min") == 0)
2710 *trace_pgfaults |= TRACE_PFMIN;
2711 else
2712 return -1;
2713
2714 return 0;
2715}
2716
14a052df
ACM
2717static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2718{
2719 struct perf_evsel *evsel;
2720
e5cadb93 2721 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2722 evsel->handler = handler;
2723}
2724
514f1c67
ACM
2725int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2726{
6fdd9cb7 2727 const char *trace_usage[] = {
f15eb531
NK
2728 "perf trace [<options>] [<command>]",
2729 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2730 "perf trace record [<options>] [<command>]",
2731 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2732 NULL
2733 };
2734 struct trace trace = {
514f1c67
ACM
2735 .syscalls = {
2736 . max = -1,
2737 },
2738 .opts = {
2739 .target = {
2740 .uid = UINT_MAX,
2741 .uses_mmap = true,
2742 },
2743 .user_freq = UINT_MAX,
2744 .user_interval = ULLONG_MAX,
509051ea 2745 .no_buffering = true,
38d5447d 2746 .mmap_pages = UINT_MAX,
9d9cad76 2747 .proc_map_timeout = 500,
514f1c67 2748 },
007d66a0 2749 .output = stderr,
50c95cbd 2750 .show_comm = true,
e281a960 2751 .trace_syscalls = true,
44621819 2752 .kernel_syscallchains = false,
05614993 2753 .max_stack = UINT_MAX,
514f1c67 2754 };
c24ff998 2755 const char *output_name = NULL;
2ae3a312 2756 const char *ev_qualifier_str = NULL;
514f1c67 2757 const struct option trace_options[] = {
14a052df
ACM
2758 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2759 "event selector. use 'perf list' to list available events",
2760 parse_events_option),
50c95cbd
ACM
2761 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2762 "show the thread COMM next to its id"),
c522739d 2763 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
d303e85a 2764 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
c24ff998 2765 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2766 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2767 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2768 "trace events on existing process id"),
ac9be8ee 2769 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2770 "trace events on existing thread id"),
fa0e4ffe
ACM
2771 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2772 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2773 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2774 "system-wide collection from all CPUs"),
ac9be8ee 2775 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2776 "list of cpus to monitor"),
6810fc91 2777 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2778 "child tasks do not inherit counters"),
994a1f78
JO
2779 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2780 "number of mmap data pages",
2781 perf_evlist__parse_mmap_pages),
ac9be8ee 2782 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2783 "user to profile"),
ae9ed035
ACM
2784 OPT_CALLBACK(0, "duration", &trace, "float",
2785 "show only events with duration > N.M ms",
2786 trace__set_duration),
1302d88e 2787 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2788 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2789 OPT_BOOLEAN('T', "time", &trace.full_time,
2790 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2791 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2792 "Show only syscall summary with statistics"),
2793 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2794 "Show all syscalls and summary with statistics"),
598d02c5
SF
2795 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2796 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2797 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2798 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2799 OPT_CALLBACK(0, "call-graph", &trace.opts,
2800 "record_mode[,record_size]", record_callchain_help,
2801 &record_parse_callchain_opt),
44621819
ACM
2802 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2803 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2804 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2805 "Set the minimum stack depth when parsing the callchain, "
2806 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2807 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2808 "Set the maximum stack depth when parsing the callchain, "
2809 "anything beyond the specified depth will be ignored. "
4cb93446 2810 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2811 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2812 "per thread proc mmap processing timeout in ms"),
514f1c67
ACM
2813 OPT_END()
2814 };
ccd62a89 2815 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2816 bool mmap_pages_user_set = true;
6fdd9cb7 2817 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2818 int err;
32caf0d1 2819 char bf[BUFSIZ];
514f1c67 2820
4d08cb80
ACM
2821 signal(SIGSEGV, sighandler_dump_stack);
2822 signal(SIGFPE, sighandler_dump_stack);
2823
14a052df 2824 trace.evlist = perf_evlist__new();
fd0db102 2825 trace.sctbl = syscalltbl__new();
14a052df 2826
fd0db102 2827 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2828 pr_err("Not enough memory to run!\n");
ff8f695c 2829 err = -ENOMEM;
14a052df
ACM
2830 goto out;
2831 }
2832
6fdd9cb7
YS
2833 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2834 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2835
d7888573
WN
2836 err = bpf__setup_stdout(trace.evlist);
2837 if (err) {
2838 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2839 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2840 goto out;
2841 }
2842
59247e33
ACM
2843 err = -1;
2844
598d02c5
SF
2845 if (trace.trace_pgfaults) {
2846 trace.opts.sample_address = true;
2847 trace.opts.sample_time = true;
2848 }
2849
f3e459d1
ACM
2850 if (trace.opts.mmap_pages == UINT_MAX)
2851 mmap_pages_user_set = false;
2852
05614993 2853 if (trace.max_stack == UINT_MAX) {
fe176085 2854 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2855 max_stack_user_set = false;
2856 }
2857
2858#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2859 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2860 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2861#endif
2862
2ddd5c04 2863 if (callchain_param.enabled) {
f3e459d1
ACM
2864 if (!mmap_pages_user_set && geteuid() == 0)
2865 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2866
566a0885 2867 symbol_conf.use_callchain = true;
f3e459d1 2868 }
566a0885 2869
14a052df
ACM
2870 if (trace.evlist->nr_entries > 0)
2871 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2872
1e28fe0a
SF
2873 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2874 return trace__record(&trace, argc-1, &argv[1]);
2875
2876 /* summary_only implies summary option, but don't overwrite summary if set */
2877 if (trace.summary_only)
2878 trace.summary = trace.summary_only;
2879
726f3234
ACM
2880 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2881 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2882 pr_err("Please specify something to trace.\n");
2883 return -1;
2884 }
2885
59247e33
ACM
2886 if (!trace.trace_syscalls && ev_qualifier_str) {
2887 pr_err("The -e option can't be used with --no-syscalls.\n");
2888 goto out;
2889 }
2890
c24ff998
ACM
2891 if (output_name != NULL) {
2892 err = trace__open_output(&trace, output_name);
2893 if (err < 0) {
2894 perror("failed to create output file");
2895 goto out;
2896 }
2897 }
2898
fd0db102
ACM
2899 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2900
2ae3a312 2901 if (ev_qualifier_str != NULL) {
b059efdf 2902 const char *s = ev_qualifier_str;
005438a8
ACM
2903 struct strlist_config slist_config = {
2904 .dirname = system_path(STRACE_GROUPS_DIR),
2905 };
b059efdf
ACM
2906
2907 trace.not_ev_qualifier = *s == '!';
2908 if (trace.not_ev_qualifier)
2909 ++s;
005438a8 2910 trace.ev_qualifier = strlist__new(s, &slist_config);
2ae3a312 2911 if (trace.ev_qualifier == NULL) {
c24ff998
ACM
2912 fputs("Not enough memory to parse event qualifier",
2913 trace.output);
2914 err = -ENOMEM;
2915 goto out_close;
2ae3a312 2916 }
d0cc439b
ACM
2917
2918 err = trace__validate_ev_qualifier(&trace);
2919 if (err)
2920 goto out_close;
2ae3a312
ACM
2921 }
2922
602ad878 2923 err = target__validate(&trace.opts.target);
32caf0d1 2924 if (err) {
602ad878 2925 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2926 fprintf(trace.output, "%s", bf);
2927 goto out_close;
32caf0d1
NK
2928 }
2929
602ad878 2930 err = target__parse_uid(&trace.opts.target);
514f1c67 2931 if (err) {
602ad878 2932 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2933 fprintf(trace.output, "%s", bf);
2934 goto out_close;
514f1c67
ACM
2935 }
2936
602ad878 2937 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
2938 trace.opts.target.system_wide = true;
2939
6810fc91
DA
2940 if (input_name)
2941 err = trace__replay(&trace);
2942 else
2943 err = trace__run(&trace, argc, argv);
1302d88e 2944
c24ff998
ACM
2945out_close:
2946 if (output_name != NULL)
2947 fclose(trace.output);
2948out:
1302d88e 2949 return err;
514f1c67 2950}
This page took 0.326397 seconds and 5 git commands to generate.