Merge tag 'sunxi-late-for-4.2' of https://git.kernel.org/pub/scm/linux/kernel/git...
[deliverable/linux.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/mman.h>
20 #include <linux/futex.h>
21
22 /* For older distros: */
23 #ifndef MAP_STACK
24 # define MAP_STACK 0x20000
25 #endif
26
27 #ifndef MADV_HWPOISON
28 # define MADV_HWPOISON 100
29 #endif
30
31 #ifndef MADV_MERGEABLE
32 # define MADV_MERGEABLE 12
33 #endif
34
35 #ifndef MADV_UNMERGEABLE
36 # define MADV_UNMERGEABLE 13
37 #endif
38
39 #ifndef EFD_SEMAPHORE
40 # define EFD_SEMAPHORE 1
41 #endif
42
43 #ifndef EFD_NONBLOCK
44 # define EFD_NONBLOCK 00004000
45 #endif
46
47 #ifndef EFD_CLOEXEC
48 # define EFD_CLOEXEC 02000000
49 #endif
50
51 #ifndef O_CLOEXEC
52 # define O_CLOEXEC 02000000
53 #endif
54
55 #ifndef SOCK_DCCP
56 # define SOCK_DCCP 6
57 #endif
58
59 #ifndef SOCK_CLOEXEC
60 # define SOCK_CLOEXEC 02000000
61 #endif
62
63 #ifndef SOCK_NONBLOCK
64 # define SOCK_NONBLOCK 00004000
65 #endif
66
67 #ifndef MSG_CMSG_CLOEXEC
68 # define MSG_CMSG_CLOEXEC 0x40000000
69 #endif
70
71 #ifndef PERF_FLAG_FD_NO_GROUP
72 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
73 #endif
74
75 #ifndef PERF_FLAG_FD_OUTPUT
76 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
77 #endif
78
79 #ifndef PERF_FLAG_PID_CGROUP
80 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
81 #endif
82
83 #ifndef PERF_FLAG_FD_CLOEXEC
84 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
85 #endif
86
87
88 struct tp_field {
89 int offset;
90 union {
91 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
92 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
93 };
94 };
95
96 #define TP_UINT_FIELD(bits) \
97 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
98 { \
99 u##bits value; \
100 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
101 return value; \
102 }
103
104 TP_UINT_FIELD(8);
105 TP_UINT_FIELD(16);
106 TP_UINT_FIELD(32);
107 TP_UINT_FIELD(64);
108
109 #define TP_UINT_FIELD__SWAPPED(bits) \
110 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
111 { \
112 u##bits value; \
113 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
114 return bswap_##bits(value);\
115 }
116
117 TP_UINT_FIELD__SWAPPED(16);
118 TP_UINT_FIELD__SWAPPED(32);
119 TP_UINT_FIELD__SWAPPED(64);
120
121 static int tp_field__init_uint(struct tp_field *field,
122 struct format_field *format_field,
123 bool needs_swap)
124 {
125 field->offset = format_field->offset;
126
127 switch (format_field->size) {
128 case 1:
129 field->integer = tp_field__u8;
130 break;
131 case 2:
132 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
133 break;
134 case 4:
135 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
136 break;
137 case 8:
138 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
139 break;
140 default:
141 return -1;
142 }
143
144 return 0;
145 }
146
147 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
148 {
149 return sample->raw_data + field->offset;
150 }
151
152 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
153 {
154 field->offset = format_field->offset;
155 field->pointer = tp_field__ptr;
156 return 0;
157 }
158
159 struct syscall_tp {
160 struct tp_field id;
161 union {
162 struct tp_field args, ret;
163 };
164 };
165
166 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
167 struct tp_field *field,
168 const char *name)
169 {
170 struct format_field *format_field = perf_evsel__field(evsel, name);
171
172 if (format_field == NULL)
173 return -1;
174
175 return tp_field__init_uint(field, format_field, evsel->needs_swap);
176 }
177
178 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
179 ({ struct syscall_tp *sc = evsel->priv;\
180 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
181
182 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
183 struct tp_field *field,
184 const char *name)
185 {
186 struct format_field *format_field = perf_evsel__field(evsel, name);
187
188 if (format_field == NULL)
189 return -1;
190
191 return tp_field__init_ptr(field, format_field);
192 }
193
194 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
195 ({ struct syscall_tp *sc = evsel->priv;\
196 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
197
198 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
199 {
200 zfree(&evsel->priv);
201 perf_evsel__delete(evsel);
202 }
203
204 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
205 {
206 evsel->priv = malloc(sizeof(struct syscall_tp));
207 if (evsel->priv != NULL) {
208 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
209 goto out_delete;
210
211 evsel->handler = handler;
212 return 0;
213 }
214
215 return -ENOMEM;
216
217 out_delete:
218 zfree(&evsel->priv);
219 return -ENOENT;
220 }
221
222 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
223 {
224 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
225
226 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
227 if (evsel == NULL)
228 evsel = perf_evsel__newtp("syscalls", direction);
229
230 if (evsel) {
231 if (perf_evsel__init_syscall_tp(evsel, handler))
232 goto out_delete;
233 }
234
235 return evsel;
236
237 out_delete:
238 perf_evsel__delete_priv(evsel);
239 return NULL;
240 }
241
242 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
243 ({ struct syscall_tp *fields = evsel->priv; \
244 fields->name.integer(&fields->name, sample); })
245
246 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
247 ({ struct syscall_tp *fields = evsel->priv; \
248 fields->name.pointer(&fields->name, sample); })
249
250 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
251 void *sys_enter_handler,
252 void *sys_exit_handler)
253 {
254 int ret = -1;
255 struct perf_evsel *sys_enter, *sys_exit;
256
257 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
258 if (sys_enter == NULL)
259 goto out;
260
261 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
262 goto out_delete_sys_enter;
263
264 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
265 if (sys_exit == NULL)
266 goto out_delete_sys_enter;
267
268 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
269 goto out_delete_sys_exit;
270
271 perf_evlist__add(evlist, sys_enter);
272 perf_evlist__add(evlist, sys_exit);
273
274 ret = 0;
275 out:
276 return ret;
277
278 out_delete_sys_exit:
279 perf_evsel__delete_priv(sys_exit);
280 out_delete_sys_enter:
281 perf_evsel__delete_priv(sys_enter);
282 goto out;
283 }
284
285
286 struct syscall_arg {
287 unsigned long val;
288 struct thread *thread;
289 struct trace *trace;
290 void *parm;
291 u8 idx;
292 u8 mask;
293 };
294
295 struct strarray {
296 int offset;
297 int nr_entries;
298 const char **entries;
299 };
300
301 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
302 .nr_entries = ARRAY_SIZE(array), \
303 .entries = array, \
304 }
305
306 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
307 .offset = off, \
308 .nr_entries = ARRAY_SIZE(array), \
309 .entries = array, \
310 }
311
312 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
313 const char *intfmt,
314 struct syscall_arg *arg)
315 {
316 struct strarray *sa = arg->parm;
317 int idx = arg->val - sa->offset;
318
319 if (idx < 0 || idx >= sa->nr_entries)
320 return scnprintf(bf, size, intfmt, arg->val);
321
322 return scnprintf(bf, size, "%s", sa->entries[idx]);
323 }
324
325 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
326 struct syscall_arg *arg)
327 {
328 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
329 }
330
331 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
332
333 #if defined(__i386__) || defined(__x86_64__)
334 /*
335 * FIXME: Make this available to all arches as soon as the ioctl beautifier
336 * gets rewritten to support all arches.
337 */
338 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
339 struct syscall_arg *arg)
340 {
341 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
342 }
343
344 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
345 #endif /* defined(__i386__) || defined(__x86_64__) */
346
347 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
348 struct syscall_arg *arg);
349
350 #define SCA_FD syscall_arg__scnprintf_fd
351
352 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
353 struct syscall_arg *arg)
354 {
355 int fd = arg->val;
356
357 if (fd == AT_FDCWD)
358 return scnprintf(bf, size, "CWD");
359
360 return syscall_arg__scnprintf_fd(bf, size, arg);
361 }
362
363 #define SCA_FDAT syscall_arg__scnprintf_fd_at
364
365 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
366 struct syscall_arg *arg);
367
368 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
369
370 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
371 struct syscall_arg *arg)
372 {
373 return scnprintf(bf, size, "%#lx", arg->val);
374 }
375
376 #define SCA_HEX syscall_arg__scnprintf_hex
377
378 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
379 struct syscall_arg *arg)
380 {
381 return scnprintf(bf, size, "%d", arg->val);
382 }
383
384 #define SCA_INT syscall_arg__scnprintf_int
385
386 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
387 struct syscall_arg *arg)
388 {
389 int printed = 0, prot = arg->val;
390
391 if (prot == PROT_NONE)
392 return scnprintf(bf, size, "NONE");
393 #define P_MMAP_PROT(n) \
394 if (prot & PROT_##n) { \
395 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
396 prot &= ~PROT_##n; \
397 }
398
399 P_MMAP_PROT(EXEC);
400 P_MMAP_PROT(READ);
401 P_MMAP_PROT(WRITE);
402 #ifdef PROT_SEM
403 P_MMAP_PROT(SEM);
404 #endif
405 P_MMAP_PROT(GROWSDOWN);
406 P_MMAP_PROT(GROWSUP);
407 #undef P_MMAP_PROT
408
409 if (prot)
410 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
411
412 return printed;
413 }
414
415 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
416
417 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
418 struct syscall_arg *arg)
419 {
420 int printed = 0, flags = arg->val;
421
422 #define P_MMAP_FLAG(n) \
423 if (flags & MAP_##n) { \
424 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
425 flags &= ~MAP_##n; \
426 }
427
428 P_MMAP_FLAG(SHARED);
429 P_MMAP_FLAG(PRIVATE);
430 #ifdef MAP_32BIT
431 P_MMAP_FLAG(32BIT);
432 #endif
433 P_MMAP_FLAG(ANONYMOUS);
434 P_MMAP_FLAG(DENYWRITE);
435 P_MMAP_FLAG(EXECUTABLE);
436 P_MMAP_FLAG(FILE);
437 P_MMAP_FLAG(FIXED);
438 P_MMAP_FLAG(GROWSDOWN);
439 #ifdef MAP_HUGETLB
440 P_MMAP_FLAG(HUGETLB);
441 #endif
442 P_MMAP_FLAG(LOCKED);
443 P_MMAP_FLAG(NONBLOCK);
444 P_MMAP_FLAG(NORESERVE);
445 P_MMAP_FLAG(POPULATE);
446 P_MMAP_FLAG(STACK);
447 #ifdef MAP_UNINITIALIZED
448 P_MMAP_FLAG(UNINITIALIZED);
449 #endif
450 #undef P_MMAP_FLAG
451
452 if (flags)
453 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
454
455 return printed;
456 }
457
458 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
459
460 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
461 struct syscall_arg *arg)
462 {
463 int printed = 0, flags = arg->val;
464
465 #define P_MREMAP_FLAG(n) \
466 if (flags & MREMAP_##n) { \
467 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
468 flags &= ~MREMAP_##n; \
469 }
470
471 P_MREMAP_FLAG(MAYMOVE);
472 #ifdef MREMAP_FIXED
473 P_MREMAP_FLAG(FIXED);
474 #endif
475 #undef P_MREMAP_FLAG
476
477 if (flags)
478 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
479
480 return printed;
481 }
482
483 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
484
485 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
486 struct syscall_arg *arg)
487 {
488 int behavior = arg->val;
489
490 switch (behavior) {
491 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
492 P_MADV_BHV(NORMAL);
493 P_MADV_BHV(RANDOM);
494 P_MADV_BHV(SEQUENTIAL);
495 P_MADV_BHV(WILLNEED);
496 P_MADV_BHV(DONTNEED);
497 P_MADV_BHV(REMOVE);
498 P_MADV_BHV(DONTFORK);
499 P_MADV_BHV(DOFORK);
500 P_MADV_BHV(HWPOISON);
501 #ifdef MADV_SOFT_OFFLINE
502 P_MADV_BHV(SOFT_OFFLINE);
503 #endif
504 P_MADV_BHV(MERGEABLE);
505 P_MADV_BHV(UNMERGEABLE);
506 #ifdef MADV_HUGEPAGE
507 P_MADV_BHV(HUGEPAGE);
508 #endif
509 #ifdef MADV_NOHUGEPAGE
510 P_MADV_BHV(NOHUGEPAGE);
511 #endif
512 #ifdef MADV_DONTDUMP
513 P_MADV_BHV(DONTDUMP);
514 #endif
515 #ifdef MADV_DODUMP
516 P_MADV_BHV(DODUMP);
517 #endif
518 #undef P_MADV_PHV
519 default: break;
520 }
521
522 return scnprintf(bf, size, "%#x", behavior);
523 }
524
525 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
526
527 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
528 struct syscall_arg *arg)
529 {
530 int printed = 0, op = arg->val;
531
532 if (op == 0)
533 return scnprintf(bf, size, "NONE");
534 #define P_CMD(cmd) \
535 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
536 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
537 op &= ~LOCK_##cmd; \
538 }
539
540 P_CMD(SH);
541 P_CMD(EX);
542 P_CMD(NB);
543 P_CMD(UN);
544 P_CMD(MAND);
545 P_CMD(RW);
546 P_CMD(READ);
547 P_CMD(WRITE);
548 #undef P_OP
549
550 if (op)
551 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
552
553 return printed;
554 }
555
556 #define SCA_FLOCK syscall_arg__scnprintf_flock
557
558 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
559 {
560 enum syscall_futex_args {
561 SCF_UADDR = (1 << 0),
562 SCF_OP = (1 << 1),
563 SCF_VAL = (1 << 2),
564 SCF_TIMEOUT = (1 << 3),
565 SCF_UADDR2 = (1 << 4),
566 SCF_VAL3 = (1 << 5),
567 };
568 int op = arg->val;
569 int cmd = op & FUTEX_CMD_MASK;
570 size_t printed = 0;
571
572 switch (cmd) {
573 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
574 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
575 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
576 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
577 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
578 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
579 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
580 P_FUTEX_OP(WAKE_OP); break;
581 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
582 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
583 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
584 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
585 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
586 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
587 default: printed = scnprintf(bf, size, "%#x", cmd); break;
588 }
589
590 if (op & FUTEX_PRIVATE_FLAG)
591 printed += scnprintf(bf + printed, size - printed, "|PRIV");
592
593 if (op & FUTEX_CLOCK_REALTIME)
594 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
595
596 return printed;
597 }
598
599 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
600
601 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
602 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
603
604 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
605 static DEFINE_STRARRAY(itimers);
606
607 static const char *whences[] = { "SET", "CUR", "END",
608 #ifdef SEEK_DATA
609 "DATA",
610 #endif
611 #ifdef SEEK_HOLE
612 "HOLE",
613 #endif
614 };
615 static DEFINE_STRARRAY(whences);
616
617 static const char *fcntl_cmds[] = {
618 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
619 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
620 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
621 "F_GETOWNER_UIDS",
622 };
623 static DEFINE_STRARRAY(fcntl_cmds);
624
625 static const char *rlimit_resources[] = {
626 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
627 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
628 "RTTIME",
629 };
630 static DEFINE_STRARRAY(rlimit_resources);
631
632 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
633 static DEFINE_STRARRAY(sighow);
634
635 static const char *clockid[] = {
636 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
637 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
638 };
639 static DEFINE_STRARRAY(clockid);
640
641 static const char *socket_families[] = {
642 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
643 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
644 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
645 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
646 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
647 "ALG", "NFC", "VSOCK",
648 };
649 static DEFINE_STRARRAY(socket_families);
650
651 #ifndef SOCK_TYPE_MASK
652 #define SOCK_TYPE_MASK 0xf
653 #endif
654
655 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
656 struct syscall_arg *arg)
657 {
658 size_t printed;
659 int type = arg->val,
660 flags = type & ~SOCK_TYPE_MASK;
661
662 type &= SOCK_TYPE_MASK;
663 /*
664 * Can't use a strarray, MIPS may override for ABI reasons.
665 */
666 switch (type) {
667 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
668 P_SK_TYPE(STREAM);
669 P_SK_TYPE(DGRAM);
670 P_SK_TYPE(RAW);
671 P_SK_TYPE(RDM);
672 P_SK_TYPE(SEQPACKET);
673 P_SK_TYPE(DCCP);
674 P_SK_TYPE(PACKET);
675 #undef P_SK_TYPE
676 default:
677 printed = scnprintf(bf, size, "%#x", type);
678 }
679
680 #define P_SK_FLAG(n) \
681 if (flags & SOCK_##n) { \
682 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
683 flags &= ~SOCK_##n; \
684 }
685
686 P_SK_FLAG(CLOEXEC);
687 P_SK_FLAG(NONBLOCK);
688 #undef P_SK_FLAG
689
690 if (flags)
691 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
692
693 return printed;
694 }
695
696 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
697
698 #ifndef MSG_PROBE
699 #define MSG_PROBE 0x10
700 #endif
701 #ifndef MSG_WAITFORONE
702 #define MSG_WAITFORONE 0x10000
703 #endif
704 #ifndef MSG_SENDPAGE_NOTLAST
705 #define MSG_SENDPAGE_NOTLAST 0x20000
706 #endif
707 #ifndef MSG_FASTOPEN
708 #define MSG_FASTOPEN 0x20000000
709 #endif
710
711 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
712 struct syscall_arg *arg)
713 {
714 int printed = 0, flags = arg->val;
715
716 if (flags == 0)
717 return scnprintf(bf, size, "NONE");
718 #define P_MSG_FLAG(n) \
719 if (flags & MSG_##n) { \
720 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
721 flags &= ~MSG_##n; \
722 }
723
724 P_MSG_FLAG(OOB);
725 P_MSG_FLAG(PEEK);
726 P_MSG_FLAG(DONTROUTE);
727 P_MSG_FLAG(TRYHARD);
728 P_MSG_FLAG(CTRUNC);
729 P_MSG_FLAG(PROBE);
730 P_MSG_FLAG(TRUNC);
731 P_MSG_FLAG(DONTWAIT);
732 P_MSG_FLAG(EOR);
733 P_MSG_FLAG(WAITALL);
734 P_MSG_FLAG(FIN);
735 P_MSG_FLAG(SYN);
736 P_MSG_FLAG(CONFIRM);
737 P_MSG_FLAG(RST);
738 P_MSG_FLAG(ERRQUEUE);
739 P_MSG_FLAG(NOSIGNAL);
740 P_MSG_FLAG(MORE);
741 P_MSG_FLAG(WAITFORONE);
742 P_MSG_FLAG(SENDPAGE_NOTLAST);
743 P_MSG_FLAG(FASTOPEN);
744 P_MSG_FLAG(CMSG_CLOEXEC);
745 #undef P_MSG_FLAG
746
747 if (flags)
748 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
749
750 return printed;
751 }
752
753 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
754
755 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
756 struct syscall_arg *arg)
757 {
758 size_t printed = 0;
759 int mode = arg->val;
760
761 if (mode == F_OK) /* 0 */
762 return scnprintf(bf, size, "F");
763 #define P_MODE(n) \
764 if (mode & n##_OK) { \
765 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
766 mode &= ~n##_OK; \
767 }
768
769 P_MODE(R);
770 P_MODE(W);
771 P_MODE(X);
772 #undef P_MODE
773
774 if (mode)
775 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
776
777 return printed;
778 }
779
780 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
781
782 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
783 struct syscall_arg *arg)
784 {
785 int printed = 0, flags = arg->val;
786
787 if (!(flags & O_CREAT))
788 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
789
790 if (flags == 0)
791 return scnprintf(bf, size, "RDONLY");
792 #define P_FLAG(n) \
793 if (flags & O_##n) { \
794 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
795 flags &= ~O_##n; \
796 }
797
798 P_FLAG(APPEND);
799 P_FLAG(ASYNC);
800 P_FLAG(CLOEXEC);
801 P_FLAG(CREAT);
802 P_FLAG(DIRECT);
803 P_FLAG(DIRECTORY);
804 P_FLAG(EXCL);
805 P_FLAG(LARGEFILE);
806 P_FLAG(NOATIME);
807 P_FLAG(NOCTTY);
808 #ifdef O_NONBLOCK
809 P_FLAG(NONBLOCK);
810 #elif O_NDELAY
811 P_FLAG(NDELAY);
812 #endif
813 #ifdef O_PATH
814 P_FLAG(PATH);
815 #endif
816 P_FLAG(RDWR);
817 #ifdef O_DSYNC
818 if ((flags & O_SYNC) == O_SYNC)
819 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
820 else {
821 P_FLAG(DSYNC);
822 }
823 #else
824 P_FLAG(SYNC);
825 #endif
826 P_FLAG(TRUNC);
827 P_FLAG(WRONLY);
828 #undef P_FLAG
829
830 if (flags)
831 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
832
833 return printed;
834 }
835
836 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
837
838 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
839 struct syscall_arg *arg)
840 {
841 int printed = 0, flags = arg->val;
842
843 if (flags == 0)
844 return 0;
845
846 #define P_FLAG(n) \
847 if (flags & PERF_FLAG_##n) { \
848 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
849 flags &= ~PERF_FLAG_##n; \
850 }
851
852 P_FLAG(FD_NO_GROUP);
853 P_FLAG(FD_OUTPUT);
854 P_FLAG(PID_CGROUP);
855 P_FLAG(FD_CLOEXEC);
856 #undef P_FLAG
857
858 if (flags)
859 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
860
861 return printed;
862 }
863
864 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
865
866 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
867 struct syscall_arg *arg)
868 {
869 int printed = 0, flags = arg->val;
870
871 if (flags == 0)
872 return scnprintf(bf, size, "NONE");
873 #define P_FLAG(n) \
874 if (flags & EFD_##n) { \
875 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
876 flags &= ~EFD_##n; \
877 }
878
879 P_FLAG(SEMAPHORE);
880 P_FLAG(CLOEXEC);
881 P_FLAG(NONBLOCK);
882 #undef P_FLAG
883
884 if (flags)
885 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
886
887 return printed;
888 }
889
890 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
891
892 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
893 struct syscall_arg *arg)
894 {
895 int printed = 0, flags = arg->val;
896
897 #define P_FLAG(n) \
898 if (flags & O_##n) { \
899 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
900 flags &= ~O_##n; \
901 }
902
903 P_FLAG(CLOEXEC);
904 P_FLAG(NONBLOCK);
905 #undef P_FLAG
906
907 if (flags)
908 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
909
910 return printed;
911 }
912
913 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
914
915 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
916 {
917 int sig = arg->val;
918
919 switch (sig) {
920 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
921 P_SIGNUM(HUP);
922 P_SIGNUM(INT);
923 P_SIGNUM(QUIT);
924 P_SIGNUM(ILL);
925 P_SIGNUM(TRAP);
926 P_SIGNUM(ABRT);
927 P_SIGNUM(BUS);
928 P_SIGNUM(FPE);
929 P_SIGNUM(KILL);
930 P_SIGNUM(USR1);
931 P_SIGNUM(SEGV);
932 P_SIGNUM(USR2);
933 P_SIGNUM(PIPE);
934 P_SIGNUM(ALRM);
935 P_SIGNUM(TERM);
936 P_SIGNUM(CHLD);
937 P_SIGNUM(CONT);
938 P_SIGNUM(STOP);
939 P_SIGNUM(TSTP);
940 P_SIGNUM(TTIN);
941 P_SIGNUM(TTOU);
942 P_SIGNUM(URG);
943 P_SIGNUM(XCPU);
944 P_SIGNUM(XFSZ);
945 P_SIGNUM(VTALRM);
946 P_SIGNUM(PROF);
947 P_SIGNUM(WINCH);
948 P_SIGNUM(IO);
949 P_SIGNUM(PWR);
950 P_SIGNUM(SYS);
951 #ifdef SIGEMT
952 P_SIGNUM(EMT);
953 #endif
954 #ifdef SIGSTKFLT
955 P_SIGNUM(STKFLT);
956 #endif
957 #ifdef SIGSWI
958 P_SIGNUM(SWI);
959 #endif
960 default: break;
961 }
962
963 return scnprintf(bf, size, "%#x", sig);
964 }
965
966 #define SCA_SIGNUM syscall_arg__scnprintf_signum
967
968 #if defined(__i386__) || defined(__x86_64__)
969 /*
970 * FIXME: Make this available to all arches.
971 */
972 #define TCGETS 0x5401
973
974 static const char *tioctls[] = {
975 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
976 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
977 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
978 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
979 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
980 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
981 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
982 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
983 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
984 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
985 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
986 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
987 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
988 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
989 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
990 };
991
992 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
993 #endif /* defined(__i386__) || defined(__x86_64__) */
994
995 #define STRARRAY(arg, name, array) \
996 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
997 .arg_parm = { [arg] = &strarray__##array, }
998
999 static struct syscall_fmt {
1000 const char *name;
1001 const char *alias;
1002 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1003 void *arg_parm[6];
1004 bool errmsg;
1005 bool timeout;
1006 bool hexret;
1007 } syscall_fmts[] = {
1008 { .name = "access", .errmsg = true,
1009 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
1010 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1011 { .name = "brk", .hexret = true,
1012 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1013 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1014 { .name = "close", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1016 { .name = "connect", .errmsg = true, },
1017 { .name = "dup", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1019 { .name = "dup2", .errmsg = true,
1020 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1021 { .name = "dup3", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1023 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1024 { .name = "eventfd2", .errmsg = true,
1025 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1026 { .name = "faccessat", .errmsg = true,
1027 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1028 { .name = "fadvise64", .errmsg = true,
1029 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030 { .name = "fallocate", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1032 { .name = "fchdir", .errmsg = true,
1033 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034 { .name = "fchmod", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036 { .name = "fchmodat", .errmsg = true,
1037 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1038 { .name = "fchown", .errmsg = true,
1039 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1040 { .name = "fchownat", .errmsg = true,
1041 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1042 { .name = "fcntl", .errmsg = true,
1043 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1044 [1] = SCA_STRARRAY, /* cmd */ },
1045 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1046 { .name = "fdatasync", .errmsg = true,
1047 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1048 { .name = "flock", .errmsg = true,
1049 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1050 [1] = SCA_FLOCK, /* cmd */ }, },
1051 { .name = "fsetxattr", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1056 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1057 { .name = "fstatfs", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059 { .name = "fsync", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "ftruncate", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063 { .name = "futex", .errmsg = true,
1064 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1065 { .name = "futimesat", .errmsg = true,
1066 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1067 { .name = "getdents", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 { .name = "getdents64", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1071 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1072 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1073 { .name = "ioctl", .errmsg = true,
1074 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1075 #if defined(__i386__) || defined(__x86_64__)
1076 /*
1077 * FIXME: Make this available to all arches.
1078 */
1079 [1] = SCA_STRHEXARRAY, /* cmd */
1080 [2] = SCA_HEX, /* arg */ },
1081 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1082 #else
1083 [2] = SCA_HEX, /* arg */ }, },
1084 #endif
1085 { .name = "kill", .errmsg = true,
1086 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1087 { .name = "linkat", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1089 { .name = "lseek", .errmsg = true,
1090 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1091 [2] = SCA_STRARRAY, /* whence */ },
1092 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1093 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1094 { .name = "madvise", .errmsg = true,
1095 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1096 [2] = SCA_MADV_BHV, /* behavior */ }, },
1097 { .name = "mkdirat", .errmsg = true,
1098 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1099 { .name = "mknodat", .errmsg = true,
1100 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1101 { .name = "mlock", .errmsg = true,
1102 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1103 { .name = "mlockall", .errmsg = true,
1104 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1105 { .name = "mmap", .hexret = true,
1106 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1107 [2] = SCA_MMAP_PROT, /* prot */
1108 [3] = SCA_MMAP_FLAGS, /* flags */
1109 [4] = SCA_FD, /* fd */ }, },
1110 { .name = "mprotect", .errmsg = true,
1111 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1112 [2] = SCA_MMAP_PROT, /* prot */ }, },
1113 { .name = "mremap", .hexret = true,
1114 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1115 [3] = SCA_MREMAP_FLAGS, /* flags */
1116 [4] = SCA_HEX, /* new_addr */ }, },
1117 { .name = "munlock", .errmsg = true,
1118 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1119 { .name = "munmap", .errmsg = true,
1120 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1121 { .name = "name_to_handle_at", .errmsg = true,
1122 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1123 { .name = "newfstatat", .errmsg = true,
1124 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1125 { .name = "open", .errmsg = true,
1126 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1127 { .name = "open_by_handle_at", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1129 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1130 { .name = "openat", .errmsg = true,
1131 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1132 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1133 { .name = "perf_event_open", .errmsg = true,
1134 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1135 [2] = SCA_INT, /* cpu */
1136 [3] = SCA_FD, /* group_fd */
1137 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1138 { .name = "pipe2", .errmsg = true,
1139 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1140 { .name = "poll", .errmsg = true, .timeout = true, },
1141 { .name = "ppoll", .errmsg = true, .timeout = true, },
1142 { .name = "pread", .errmsg = true, .alias = "pread64",
1143 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1144 { .name = "preadv", .errmsg = true, .alias = "pread",
1145 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1146 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1147 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1148 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1149 { .name = "pwritev", .errmsg = true,
1150 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1151 { .name = "read", .errmsg = true,
1152 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1153 { .name = "readlinkat", .errmsg = true,
1154 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1155 { .name = "readv", .errmsg = true,
1156 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1157 { .name = "recvfrom", .errmsg = true,
1158 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1159 { .name = "recvmmsg", .errmsg = true,
1160 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1161 { .name = "recvmsg", .errmsg = true,
1162 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1163 { .name = "renameat", .errmsg = true,
1164 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1165 { .name = "rt_sigaction", .errmsg = true,
1166 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1167 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1168 { .name = "rt_sigqueueinfo", .errmsg = true,
1169 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1170 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1171 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1172 { .name = "select", .errmsg = true, .timeout = true, },
1173 { .name = "sendmmsg", .errmsg = true,
1174 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1175 { .name = "sendmsg", .errmsg = true,
1176 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1177 { .name = "sendto", .errmsg = true,
1178 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1179 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1180 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1181 { .name = "shutdown", .errmsg = true,
1182 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1183 { .name = "socket", .errmsg = true,
1184 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1185 [1] = SCA_SK_TYPE, /* type */ },
1186 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1187 { .name = "socketpair", .errmsg = true,
1188 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1189 [1] = SCA_SK_TYPE, /* type */ },
1190 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1191 { .name = "stat", .errmsg = true, .alias = "newstat", },
1192 { .name = "symlinkat", .errmsg = true,
1193 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1194 { .name = "tgkill", .errmsg = true,
1195 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1196 { .name = "tkill", .errmsg = true,
1197 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1198 { .name = "uname", .errmsg = true, .alias = "newuname", },
1199 { .name = "unlinkat", .errmsg = true,
1200 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1201 { .name = "utimensat", .errmsg = true,
1202 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1203 { .name = "write", .errmsg = true,
1204 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205 { .name = "writev", .errmsg = true,
1206 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207 };
1208
1209 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1210 {
1211 const struct syscall_fmt *fmt = fmtp;
1212 return strcmp(name, fmt->name);
1213 }
1214
1215 static struct syscall_fmt *syscall_fmt__find(const char *name)
1216 {
1217 const int nmemb = ARRAY_SIZE(syscall_fmts);
1218 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1219 }
1220
1221 struct syscall {
1222 struct event_format *tp_format;
1223 int nr_args;
1224 struct format_field *args;
1225 const char *name;
1226 bool filtered;
1227 bool is_exit;
1228 struct syscall_fmt *fmt;
1229 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1230 void **arg_parm;
1231 };
1232
1233 static size_t fprintf_duration(unsigned long t, FILE *fp)
1234 {
1235 double duration = (double)t / NSEC_PER_MSEC;
1236 size_t printed = fprintf(fp, "(");
1237
1238 if (duration >= 1.0)
1239 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1240 else if (duration >= 0.01)
1241 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1242 else
1243 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1244 return printed + fprintf(fp, "): ");
1245 }
1246
1247 struct thread_trace {
1248 u64 entry_time;
1249 u64 exit_time;
1250 bool entry_pending;
1251 unsigned long nr_events;
1252 unsigned long pfmaj, pfmin;
1253 char *entry_str;
1254 double runtime_ms;
1255 struct {
1256 int max;
1257 char **table;
1258 } paths;
1259
1260 struct intlist *syscall_stats;
1261 };
1262
1263 static struct thread_trace *thread_trace__new(void)
1264 {
1265 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1266
1267 if (ttrace)
1268 ttrace->paths.max = -1;
1269
1270 ttrace->syscall_stats = intlist__new(NULL);
1271
1272 return ttrace;
1273 }
1274
1275 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1276 {
1277 struct thread_trace *ttrace;
1278
1279 if (thread == NULL)
1280 goto fail;
1281
1282 if (thread__priv(thread) == NULL)
1283 thread__set_priv(thread, thread_trace__new());
1284
1285 if (thread__priv(thread) == NULL)
1286 goto fail;
1287
1288 ttrace = thread__priv(thread);
1289 ++ttrace->nr_events;
1290
1291 return ttrace;
1292 fail:
1293 color_fprintf(fp, PERF_COLOR_RED,
1294 "WARNING: not enough memory, dropping samples!\n");
1295 return NULL;
1296 }
1297
1298 #define TRACE_PFMAJ (1 << 0)
1299 #define TRACE_PFMIN (1 << 1)
1300
1301 struct trace {
1302 struct perf_tool tool;
1303 struct {
1304 int machine;
1305 int open_id;
1306 } audit;
1307 struct {
1308 int max;
1309 struct syscall *table;
1310 } syscalls;
1311 struct record_opts opts;
1312 struct perf_evlist *evlist;
1313 struct machine *host;
1314 struct thread *current;
1315 u64 base_time;
1316 FILE *output;
1317 unsigned long nr_events;
1318 struct strlist *ev_qualifier;
1319 const char *last_vfs_getname;
1320 struct intlist *tid_list;
1321 struct intlist *pid_list;
1322 struct {
1323 size_t nr;
1324 pid_t *entries;
1325 } filter_pids;
1326 double duration_filter;
1327 double runtime_ms;
1328 struct {
1329 u64 vfs_getname,
1330 proc_getname;
1331 } stats;
1332 bool not_ev_qualifier;
1333 bool live;
1334 bool full_time;
1335 bool sched;
1336 bool multiple_threads;
1337 bool summary;
1338 bool summary_only;
1339 bool show_comm;
1340 bool show_tool_stats;
1341 bool trace_syscalls;
1342 bool force;
1343 int trace_pgfaults;
1344 };
1345
1346 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1347 {
1348 struct thread_trace *ttrace = thread__priv(thread);
1349
1350 if (fd > ttrace->paths.max) {
1351 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1352
1353 if (npath == NULL)
1354 return -1;
1355
1356 if (ttrace->paths.max != -1) {
1357 memset(npath + ttrace->paths.max + 1, 0,
1358 (fd - ttrace->paths.max) * sizeof(char *));
1359 } else {
1360 memset(npath, 0, (fd + 1) * sizeof(char *));
1361 }
1362
1363 ttrace->paths.table = npath;
1364 ttrace->paths.max = fd;
1365 }
1366
1367 ttrace->paths.table[fd] = strdup(pathname);
1368
1369 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1370 }
1371
1372 static int thread__read_fd_path(struct thread *thread, int fd)
1373 {
1374 char linkname[PATH_MAX], pathname[PATH_MAX];
1375 struct stat st;
1376 int ret;
1377
1378 if (thread->pid_ == thread->tid) {
1379 scnprintf(linkname, sizeof(linkname),
1380 "/proc/%d/fd/%d", thread->pid_, fd);
1381 } else {
1382 scnprintf(linkname, sizeof(linkname),
1383 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1384 }
1385
1386 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1387 return -1;
1388
1389 ret = readlink(linkname, pathname, sizeof(pathname));
1390
1391 if (ret < 0 || ret > st.st_size)
1392 return -1;
1393
1394 pathname[ret] = '\0';
1395 return trace__set_fd_pathname(thread, fd, pathname);
1396 }
1397
1398 static const char *thread__fd_path(struct thread *thread, int fd,
1399 struct trace *trace)
1400 {
1401 struct thread_trace *ttrace = thread__priv(thread);
1402
1403 if (ttrace == NULL)
1404 return NULL;
1405
1406 if (fd < 0)
1407 return NULL;
1408
1409 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1410 if (!trace->live)
1411 return NULL;
1412 ++trace->stats.proc_getname;
1413 if (thread__read_fd_path(thread, fd))
1414 return NULL;
1415 }
1416
1417 return ttrace->paths.table[fd];
1418 }
1419
1420 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1421 struct syscall_arg *arg)
1422 {
1423 int fd = arg->val;
1424 size_t printed = scnprintf(bf, size, "%d", fd);
1425 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1426
1427 if (path)
1428 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1429
1430 return printed;
1431 }
1432
1433 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1434 struct syscall_arg *arg)
1435 {
1436 int fd = arg->val;
1437 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1438 struct thread_trace *ttrace = thread__priv(arg->thread);
1439
1440 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1441 zfree(&ttrace->paths.table[fd]);
1442
1443 return printed;
1444 }
1445
1446 static bool trace__filter_duration(struct trace *trace, double t)
1447 {
1448 return t < (trace->duration_filter * NSEC_PER_MSEC);
1449 }
1450
1451 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1452 {
1453 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1454
1455 return fprintf(fp, "%10.3f ", ts);
1456 }
1457
1458 static bool done = false;
1459 static bool interrupted = false;
1460
1461 static void sig_handler(int sig)
1462 {
1463 done = true;
1464 interrupted = sig == SIGINT;
1465 }
1466
1467 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1468 u64 duration, u64 tstamp, FILE *fp)
1469 {
1470 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1471 printed += fprintf_duration(duration, fp);
1472
1473 if (trace->multiple_threads) {
1474 if (trace->show_comm)
1475 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1476 printed += fprintf(fp, "%d ", thread->tid);
1477 }
1478
1479 return printed;
1480 }
1481
1482 static int trace__process_event(struct trace *trace, struct machine *machine,
1483 union perf_event *event, struct perf_sample *sample)
1484 {
1485 int ret = 0;
1486
1487 switch (event->header.type) {
1488 case PERF_RECORD_LOST:
1489 color_fprintf(trace->output, PERF_COLOR_RED,
1490 "LOST %" PRIu64 " events!\n", event->lost.lost);
1491 ret = machine__process_lost_event(machine, event, sample);
1492 default:
1493 ret = machine__process_event(machine, event, sample);
1494 break;
1495 }
1496
1497 return ret;
1498 }
1499
1500 static int trace__tool_process(struct perf_tool *tool,
1501 union perf_event *event,
1502 struct perf_sample *sample,
1503 struct machine *machine)
1504 {
1505 struct trace *trace = container_of(tool, struct trace, tool);
1506 return trace__process_event(trace, machine, event, sample);
1507 }
1508
1509 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1510 {
1511 int err = symbol__init(NULL);
1512
1513 if (err)
1514 return err;
1515
1516 trace->host = machine__new_host();
1517 if (trace->host == NULL)
1518 return -ENOMEM;
1519
1520 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1521 evlist->threads, trace__tool_process, false,
1522 trace->opts.proc_map_timeout);
1523 if (err)
1524 symbol__exit();
1525
1526 return err;
1527 }
1528
1529 static int syscall__set_arg_fmts(struct syscall *sc)
1530 {
1531 struct format_field *field;
1532 int idx = 0;
1533
1534 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1535 if (sc->arg_scnprintf == NULL)
1536 return -1;
1537
1538 if (sc->fmt)
1539 sc->arg_parm = sc->fmt->arg_parm;
1540
1541 for (field = sc->args; field; field = field->next) {
1542 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1543 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1544 else if (field->flags & FIELD_IS_POINTER)
1545 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1546 ++idx;
1547 }
1548
1549 return 0;
1550 }
1551
1552 static int trace__read_syscall_info(struct trace *trace, int id)
1553 {
1554 char tp_name[128];
1555 struct syscall *sc;
1556 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1557
1558 if (name == NULL)
1559 return -1;
1560
1561 if (id > trace->syscalls.max) {
1562 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1563
1564 if (nsyscalls == NULL)
1565 return -1;
1566
1567 if (trace->syscalls.max != -1) {
1568 memset(nsyscalls + trace->syscalls.max + 1, 0,
1569 (id - trace->syscalls.max) * sizeof(*sc));
1570 } else {
1571 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1572 }
1573
1574 trace->syscalls.table = nsyscalls;
1575 trace->syscalls.max = id;
1576 }
1577
1578 sc = trace->syscalls.table + id;
1579 sc->name = name;
1580
1581 if (trace->ev_qualifier) {
1582 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1583
1584 if (!(in ^ trace->not_ev_qualifier)) {
1585 sc->filtered = true;
1586 /*
1587 * No need to do read tracepoint information since this will be
1588 * filtered out.
1589 */
1590 return 0;
1591 }
1592 }
1593
1594 sc->fmt = syscall_fmt__find(sc->name);
1595
1596 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1597 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1598
1599 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1600 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1601 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1602 }
1603
1604 if (sc->tp_format == NULL)
1605 return -1;
1606
1607 sc->args = sc->tp_format->format.fields;
1608 sc->nr_args = sc->tp_format->format.nr_fields;
1609 /* drop nr field - not relevant here; does not exist on older kernels */
1610 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1611 sc->args = sc->args->next;
1612 --sc->nr_args;
1613 }
1614
1615 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1616
1617 return syscall__set_arg_fmts(sc);
1618 }
1619
1620 static int trace__validate_ev_qualifier(struct trace *trace)
1621 {
1622 int err = 0;
1623 struct str_node *pos;
1624
1625 strlist__for_each(pos, trace->ev_qualifier) {
1626 const char *sc = pos->s;
1627
1628 if (audit_name_to_syscall(sc, trace->audit.machine) < 0) {
1629 if (err == 0) {
1630 fputs("Error:\tInvalid syscall ", trace->output);
1631 err = -EINVAL;
1632 } else {
1633 fputs(", ", trace->output);
1634 }
1635
1636 fputs(sc, trace->output);
1637 }
1638 }
1639
1640 if (err < 0) {
1641 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1642 "\nHint:\tand: 'man syscalls'\n", trace->output);
1643 }
1644
1645 return err;
1646 }
1647
1648 /*
1649 * args is to be interpreted as a series of longs but we need to handle
1650 * 8-byte unaligned accesses. args points to raw_data within the event
1651 * and raw_data is guaranteed to be 8-byte unaligned because it is
1652 * preceded by raw_size which is a u32. So we need to copy args to a temp
1653 * variable to read it. Most notably this avoids extended load instructions
1654 * on unaligned addresses
1655 */
1656
1657 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1658 unsigned char *args, struct trace *trace,
1659 struct thread *thread)
1660 {
1661 size_t printed = 0;
1662 unsigned char *p;
1663 unsigned long val;
1664
1665 if (sc->args != NULL) {
1666 struct format_field *field;
1667 u8 bit = 1;
1668 struct syscall_arg arg = {
1669 .idx = 0,
1670 .mask = 0,
1671 .trace = trace,
1672 .thread = thread,
1673 };
1674
1675 for (field = sc->args; field;
1676 field = field->next, ++arg.idx, bit <<= 1) {
1677 if (arg.mask & bit)
1678 continue;
1679
1680 /* special care for unaligned accesses */
1681 p = args + sizeof(unsigned long) * arg.idx;
1682 memcpy(&val, p, sizeof(val));
1683
1684 /*
1685 * Suppress this argument if its value is zero and
1686 * and we don't have a string associated in an
1687 * strarray for it.
1688 */
1689 if (val == 0 &&
1690 !(sc->arg_scnprintf &&
1691 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1692 sc->arg_parm[arg.idx]))
1693 continue;
1694
1695 printed += scnprintf(bf + printed, size - printed,
1696 "%s%s: ", printed ? ", " : "", field->name);
1697 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1698 arg.val = val;
1699 if (sc->arg_parm)
1700 arg.parm = sc->arg_parm[arg.idx];
1701 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1702 size - printed, &arg);
1703 } else {
1704 printed += scnprintf(bf + printed, size - printed,
1705 "%ld", val);
1706 }
1707 }
1708 } else {
1709 int i = 0;
1710
1711 while (i < 6) {
1712 /* special care for unaligned accesses */
1713 p = args + sizeof(unsigned long) * i;
1714 memcpy(&val, p, sizeof(val));
1715 printed += scnprintf(bf + printed, size - printed,
1716 "%sarg%d: %ld",
1717 printed ? ", " : "", i, val);
1718 ++i;
1719 }
1720 }
1721
1722 return printed;
1723 }
1724
1725 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1726 union perf_event *event,
1727 struct perf_sample *sample);
1728
1729 static struct syscall *trace__syscall_info(struct trace *trace,
1730 struct perf_evsel *evsel, int id)
1731 {
1732
1733 if (id < 0) {
1734
1735 /*
1736 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1737 * before that, leaving at a higher verbosity level till that is
1738 * explained. Reproduced with plain ftrace with:
1739 *
1740 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1741 * grep "NR -1 " /t/trace_pipe
1742 *
1743 * After generating some load on the machine.
1744 */
1745 if (verbose > 1) {
1746 static u64 n;
1747 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1748 id, perf_evsel__name(evsel), ++n);
1749 }
1750 return NULL;
1751 }
1752
1753 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1754 trace__read_syscall_info(trace, id))
1755 goto out_cant_read;
1756
1757 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1758 goto out_cant_read;
1759
1760 return &trace->syscalls.table[id];
1761
1762 out_cant_read:
1763 if (verbose) {
1764 fprintf(trace->output, "Problems reading syscall %d", id);
1765 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1766 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1767 fputs(" information\n", trace->output);
1768 }
1769 return NULL;
1770 }
1771
1772 static void thread__update_stats(struct thread_trace *ttrace,
1773 int id, struct perf_sample *sample)
1774 {
1775 struct int_node *inode;
1776 struct stats *stats;
1777 u64 duration = 0;
1778
1779 inode = intlist__findnew(ttrace->syscall_stats, id);
1780 if (inode == NULL)
1781 return;
1782
1783 stats = inode->priv;
1784 if (stats == NULL) {
1785 stats = malloc(sizeof(struct stats));
1786 if (stats == NULL)
1787 return;
1788 init_stats(stats);
1789 inode->priv = stats;
1790 }
1791
1792 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1793 duration = sample->time - ttrace->entry_time;
1794
1795 update_stats(stats, duration);
1796 }
1797
1798 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1799 {
1800 struct thread_trace *ttrace;
1801 u64 duration;
1802 size_t printed;
1803
1804 if (trace->current == NULL)
1805 return 0;
1806
1807 ttrace = thread__priv(trace->current);
1808
1809 if (!ttrace->entry_pending)
1810 return 0;
1811
1812 duration = sample->time - ttrace->entry_time;
1813
1814 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1815 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1816 ttrace->entry_pending = false;
1817
1818 return printed;
1819 }
1820
1821 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1822 union perf_event *event __maybe_unused,
1823 struct perf_sample *sample)
1824 {
1825 char *msg;
1826 void *args;
1827 size_t printed = 0;
1828 struct thread *thread;
1829 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1830 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1831 struct thread_trace *ttrace;
1832
1833 if (sc == NULL)
1834 return -1;
1835
1836 if (sc->filtered)
1837 return 0;
1838
1839 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1840 ttrace = thread__trace(thread, trace->output);
1841 if (ttrace == NULL)
1842 goto out_put;
1843
1844 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1845
1846 if (ttrace->entry_str == NULL) {
1847 ttrace->entry_str = malloc(1024);
1848 if (!ttrace->entry_str)
1849 goto out_put;
1850 }
1851
1852 if (!trace->summary_only)
1853 trace__printf_interrupted_entry(trace, sample);
1854
1855 ttrace->entry_time = sample->time;
1856 msg = ttrace->entry_str;
1857 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1858
1859 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1860 args, trace, thread);
1861
1862 if (sc->is_exit) {
1863 if (!trace->duration_filter && !trace->summary_only) {
1864 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1865 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1866 }
1867 } else
1868 ttrace->entry_pending = true;
1869
1870 if (trace->current != thread) {
1871 thread__put(trace->current);
1872 trace->current = thread__get(thread);
1873 }
1874 err = 0;
1875 out_put:
1876 thread__put(thread);
1877 return err;
1878 }
1879
1880 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1881 union perf_event *event __maybe_unused,
1882 struct perf_sample *sample)
1883 {
1884 long ret;
1885 u64 duration = 0;
1886 struct thread *thread;
1887 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1888 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1889 struct thread_trace *ttrace;
1890
1891 if (sc == NULL)
1892 return -1;
1893
1894 if (sc->filtered)
1895 return 0;
1896
1897 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1898 ttrace = thread__trace(thread, trace->output);
1899 if (ttrace == NULL)
1900 goto out_put;
1901
1902 if (trace->summary)
1903 thread__update_stats(ttrace, id, sample);
1904
1905 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1906
1907 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1908 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1909 trace->last_vfs_getname = NULL;
1910 ++trace->stats.vfs_getname;
1911 }
1912
1913 ttrace->exit_time = sample->time;
1914
1915 if (ttrace->entry_time) {
1916 duration = sample->time - ttrace->entry_time;
1917 if (trace__filter_duration(trace, duration))
1918 goto out;
1919 } else if (trace->duration_filter)
1920 goto out;
1921
1922 if (trace->summary_only)
1923 goto out;
1924
1925 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1926
1927 if (ttrace->entry_pending) {
1928 fprintf(trace->output, "%-70s", ttrace->entry_str);
1929 } else {
1930 fprintf(trace->output, " ... [");
1931 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1932 fprintf(trace->output, "]: %s()", sc->name);
1933 }
1934
1935 if (sc->fmt == NULL) {
1936 signed_print:
1937 fprintf(trace->output, ") = %ld", ret);
1938 } else if (ret < 0 && sc->fmt->errmsg) {
1939 char bf[STRERR_BUFSIZE];
1940 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1941 *e = audit_errno_to_name(-ret);
1942
1943 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1944 } else if (ret == 0 && sc->fmt->timeout)
1945 fprintf(trace->output, ") = 0 Timeout");
1946 else if (sc->fmt->hexret)
1947 fprintf(trace->output, ") = %#lx", ret);
1948 else
1949 goto signed_print;
1950
1951 fputc('\n', trace->output);
1952 out:
1953 ttrace->entry_pending = false;
1954 err = 0;
1955 out_put:
1956 thread__put(thread);
1957 return err;
1958 }
1959
1960 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1961 union perf_event *event __maybe_unused,
1962 struct perf_sample *sample)
1963 {
1964 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1965 return 0;
1966 }
1967
1968 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1969 union perf_event *event __maybe_unused,
1970 struct perf_sample *sample)
1971 {
1972 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1973 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1974 struct thread *thread = machine__findnew_thread(trace->host,
1975 sample->pid,
1976 sample->tid);
1977 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1978
1979 if (ttrace == NULL)
1980 goto out_dump;
1981
1982 ttrace->runtime_ms += runtime_ms;
1983 trace->runtime_ms += runtime_ms;
1984 thread__put(thread);
1985 return 0;
1986
1987 out_dump:
1988 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1989 evsel->name,
1990 perf_evsel__strval(evsel, sample, "comm"),
1991 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1992 runtime,
1993 perf_evsel__intval(evsel, sample, "vruntime"));
1994 thread__put(thread);
1995 return 0;
1996 }
1997
1998 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1999 union perf_event *event __maybe_unused,
2000 struct perf_sample *sample)
2001 {
2002 trace__printf_interrupted_entry(trace, sample);
2003 trace__fprintf_tstamp(trace, sample->time, trace->output);
2004
2005 if (trace->trace_syscalls)
2006 fprintf(trace->output, "( ): ");
2007
2008 fprintf(trace->output, "%s:", evsel->name);
2009
2010 if (evsel->tp_format) {
2011 event_format__fprintf(evsel->tp_format, sample->cpu,
2012 sample->raw_data, sample->raw_size,
2013 trace->output);
2014 }
2015
2016 fprintf(trace->output, ")\n");
2017 return 0;
2018 }
2019
2020 static void print_location(FILE *f, struct perf_sample *sample,
2021 struct addr_location *al,
2022 bool print_dso, bool print_sym)
2023 {
2024
2025 if ((verbose || print_dso) && al->map)
2026 fprintf(f, "%s@", al->map->dso->long_name);
2027
2028 if ((verbose || print_sym) && al->sym)
2029 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2030 al->addr - al->sym->start);
2031 else if (al->map)
2032 fprintf(f, "0x%" PRIx64, al->addr);
2033 else
2034 fprintf(f, "0x%" PRIx64, sample->addr);
2035 }
2036
2037 static int trace__pgfault(struct trace *trace,
2038 struct perf_evsel *evsel,
2039 union perf_event *event,
2040 struct perf_sample *sample)
2041 {
2042 struct thread *thread;
2043 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2044 struct addr_location al;
2045 char map_type = 'd';
2046 struct thread_trace *ttrace;
2047 int err = -1;
2048
2049 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2050 ttrace = thread__trace(thread, trace->output);
2051 if (ttrace == NULL)
2052 goto out_put;
2053
2054 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2055 ttrace->pfmaj++;
2056 else
2057 ttrace->pfmin++;
2058
2059 if (trace->summary_only)
2060 goto out;
2061
2062 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2063 sample->ip, &al);
2064
2065 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2066
2067 fprintf(trace->output, "%sfault [",
2068 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2069 "maj" : "min");
2070
2071 print_location(trace->output, sample, &al, false, true);
2072
2073 fprintf(trace->output, "] => ");
2074
2075 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2076 sample->addr, &al);
2077
2078 if (!al.map) {
2079 thread__find_addr_location(thread, cpumode,
2080 MAP__FUNCTION, sample->addr, &al);
2081
2082 if (al.map)
2083 map_type = 'x';
2084 else
2085 map_type = '?';
2086 }
2087
2088 print_location(trace->output, sample, &al, true, false);
2089
2090 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2091 out:
2092 err = 0;
2093 out_put:
2094 thread__put(thread);
2095 return err;
2096 }
2097
2098 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2099 {
2100 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2101 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2102 return false;
2103
2104 if (trace->pid_list || trace->tid_list)
2105 return true;
2106
2107 return false;
2108 }
2109
2110 static int trace__process_sample(struct perf_tool *tool,
2111 union perf_event *event,
2112 struct perf_sample *sample,
2113 struct perf_evsel *evsel,
2114 struct machine *machine __maybe_unused)
2115 {
2116 struct trace *trace = container_of(tool, struct trace, tool);
2117 int err = 0;
2118
2119 tracepoint_handler handler = evsel->handler;
2120
2121 if (skip_sample(trace, sample))
2122 return 0;
2123
2124 if (!trace->full_time && trace->base_time == 0)
2125 trace->base_time = sample->time;
2126
2127 if (handler) {
2128 ++trace->nr_events;
2129 handler(trace, evsel, event, sample);
2130 }
2131
2132 return err;
2133 }
2134
2135 static int parse_target_str(struct trace *trace)
2136 {
2137 if (trace->opts.target.pid) {
2138 trace->pid_list = intlist__new(trace->opts.target.pid);
2139 if (trace->pid_list == NULL) {
2140 pr_err("Error parsing process id string\n");
2141 return -EINVAL;
2142 }
2143 }
2144
2145 if (trace->opts.target.tid) {
2146 trace->tid_list = intlist__new(trace->opts.target.tid);
2147 if (trace->tid_list == NULL) {
2148 pr_err("Error parsing thread id string\n");
2149 return -EINVAL;
2150 }
2151 }
2152
2153 return 0;
2154 }
2155
2156 static int trace__record(struct trace *trace, int argc, const char **argv)
2157 {
2158 unsigned int rec_argc, i, j;
2159 const char **rec_argv;
2160 const char * const record_args[] = {
2161 "record",
2162 "-R",
2163 "-m", "1024",
2164 "-c", "1",
2165 };
2166
2167 const char * const sc_args[] = { "-e", };
2168 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2169 const char * const majpf_args[] = { "-e", "major-faults" };
2170 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2171 const char * const minpf_args[] = { "-e", "minor-faults" };
2172 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2173
2174 /* +1 is for the event string below */
2175 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2176 majpf_args_nr + minpf_args_nr + argc;
2177 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2178
2179 if (rec_argv == NULL)
2180 return -ENOMEM;
2181
2182 j = 0;
2183 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2184 rec_argv[j++] = record_args[i];
2185
2186 if (trace->trace_syscalls) {
2187 for (i = 0; i < sc_args_nr; i++)
2188 rec_argv[j++] = sc_args[i];
2189
2190 /* event string may be different for older kernels - e.g., RHEL6 */
2191 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2192 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2193 else if (is_valid_tracepoint("syscalls:sys_enter"))
2194 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2195 else {
2196 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2197 return -1;
2198 }
2199 }
2200
2201 if (trace->trace_pgfaults & TRACE_PFMAJ)
2202 for (i = 0; i < majpf_args_nr; i++)
2203 rec_argv[j++] = majpf_args[i];
2204
2205 if (trace->trace_pgfaults & TRACE_PFMIN)
2206 for (i = 0; i < minpf_args_nr; i++)
2207 rec_argv[j++] = minpf_args[i];
2208
2209 for (i = 0; i < (unsigned int)argc; i++)
2210 rec_argv[j++] = argv[i];
2211
2212 return cmd_record(j, rec_argv, NULL);
2213 }
2214
2215 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2216
2217 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2218 {
2219 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2220 if (evsel == NULL)
2221 return;
2222
2223 if (perf_evsel__field(evsel, "pathname") == NULL) {
2224 perf_evsel__delete(evsel);
2225 return;
2226 }
2227
2228 evsel->handler = trace__vfs_getname;
2229 perf_evlist__add(evlist, evsel);
2230 }
2231
2232 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2233 u64 config)
2234 {
2235 struct perf_evsel *evsel;
2236 struct perf_event_attr attr = {
2237 .type = PERF_TYPE_SOFTWARE,
2238 .mmap_data = 1,
2239 };
2240
2241 attr.config = config;
2242 attr.sample_period = 1;
2243
2244 event_attr_init(&attr);
2245
2246 evsel = perf_evsel__new(&attr);
2247 if (!evsel)
2248 return -ENOMEM;
2249
2250 evsel->handler = trace__pgfault;
2251 perf_evlist__add(evlist, evsel);
2252
2253 return 0;
2254 }
2255
2256 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2257 {
2258 const u32 type = event->header.type;
2259 struct perf_evsel *evsel;
2260
2261 if (!trace->full_time && trace->base_time == 0)
2262 trace->base_time = sample->time;
2263
2264 if (type != PERF_RECORD_SAMPLE) {
2265 trace__process_event(trace, trace->host, event, sample);
2266 return;
2267 }
2268
2269 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2270 if (evsel == NULL) {
2271 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2272 return;
2273 }
2274
2275 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2276 sample->raw_data == NULL) {
2277 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2278 perf_evsel__name(evsel), sample->tid,
2279 sample->cpu, sample->raw_size);
2280 } else {
2281 tracepoint_handler handler = evsel->handler;
2282 handler(trace, evsel, event, sample);
2283 }
2284 }
2285
2286 static int trace__run(struct trace *trace, int argc, const char **argv)
2287 {
2288 struct perf_evlist *evlist = trace->evlist;
2289 int err = -1, i;
2290 unsigned long before;
2291 const bool forks = argc > 0;
2292 bool draining = false;
2293
2294 trace->live = true;
2295
2296 if (trace->trace_syscalls &&
2297 perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2298 trace__sys_exit))
2299 goto out_error_raw_syscalls;
2300
2301 if (trace->trace_syscalls)
2302 perf_evlist__add_vfs_getname(evlist);
2303
2304 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2305 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2306 goto out_error_mem;
2307 }
2308
2309 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2310 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2311 goto out_error_mem;
2312
2313 if (trace->sched &&
2314 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2315 trace__sched_stat_runtime))
2316 goto out_error_sched_stat_runtime;
2317
2318 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2319 if (err < 0) {
2320 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2321 goto out_delete_evlist;
2322 }
2323
2324 err = trace__symbols_init(trace, evlist);
2325 if (err < 0) {
2326 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2327 goto out_delete_evlist;
2328 }
2329
2330 perf_evlist__config(evlist, &trace->opts);
2331
2332 signal(SIGCHLD, sig_handler);
2333 signal(SIGINT, sig_handler);
2334
2335 if (forks) {
2336 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2337 argv, false, NULL);
2338 if (err < 0) {
2339 fprintf(trace->output, "Couldn't run the workload!\n");
2340 goto out_delete_evlist;
2341 }
2342 }
2343
2344 err = perf_evlist__open(evlist);
2345 if (err < 0)
2346 goto out_error_open;
2347
2348 /*
2349 * Better not use !target__has_task() here because we need to cover the
2350 * case where no threads were specified in the command line, but a
2351 * workload was, and in that case we will fill in the thread_map when
2352 * we fork the workload in perf_evlist__prepare_workload.
2353 */
2354 if (trace->filter_pids.nr > 0)
2355 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2356 else if (thread_map__pid(evlist->threads, 0) == -1)
2357 err = perf_evlist__set_filter_pid(evlist, getpid());
2358
2359 if (err < 0) {
2360 printf("err=%d,%s\n", -err, strerror(-err));
2361 exit(1);
2362 }
2363
2364 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2365 if (err < 0)
2366 goto out_error_mmap;
2367
2368 if (!target__none(&trace->opts.target))
2369 perf_evlist__enable(evlist);
2370
2371 if (forks)
2372 perf_evlist__start_workload(evlist);
2373
2374 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2375 evlist->threads->nr > 1 ||
2376 perf_evlist__first(evlist)->attr.inherit;
2377 again:
2378 before = trace->nr_events;
2379
2380 for (i = 0; i < evlist->nr_mmaps; i++) {
2381 union perf_event *event;
2382
2383 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2384 struct perf_sample sample;
2385
2386 ++trace->nr_events;
2387
2388 err = perf_evlist__parse_sample(evlist, event, &sample);
2389 if (err) {
2390 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2391 goto next_event;
2392 }
2393
2394 trace__handle_event(trace, event, &sample);
2395 next_event:
2396 perf_evlist__mmap_consume(evlist, i);
2397
2398 if (interrupted)
2399 goto out_disable;
2400
2401 if (done && !draining) {
2402 perf_evlist__disable(evlist);
2403 draining = true;
2404 }
2405 }
2406 }
2407
2408 if (trace->nr_events == before) {
2409 int timeout = done ? 100 : -1;
2410
2411 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2412 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2413 draining = true;
2414
2415 goto again;
2416 }
2417 } else {
2418 goto again;
2419 }
2420
2421 out_disable:
2422 thread__zput(trace->current);
2423
2424 perf_evlist__disable(evlist);
2425
2426 if (!err) {
2427 if (trace->summary)
2428 trace__fprintf_thread_summary(trace, trace->output);
2429
2430 if (trace->show_tool_stats) {
2431 fprintf(trace->output, "Stats:\n "
2432 " vfs_getname : %" PRIu64 "\n"
2433 " proc_getname: %" PRIu64 "\n",
2434 trace->stats.vfs_getname,
2435 trace->stats.proc_getname);
2436 }
2437 }
2438
2439 out_delete_evlist:
2440 perf_evlist__delete(evlist);
2441 trace->evlist = NULL;
2442 trace->live = false;
2443 return err;
2444 {
2445 char errbuf[BUFSIZ];
2446
2447 out_error_sched_stat_runtime:
2448 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2449 goto out_error;
2450
2451 out_error_raw_syscalls:
2452 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2453 goto out_error;
2454
2455 out_error_mmap:
2456 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2457 goto out_error;
2458
2459 out_error_open:
2460 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2461
2462 out_error:
2463 fprintf(trace->output, "%s\n", errbuf);
2464 goto out_delete_evlist;
2465 }
2466 out_error_mem:
2467 fprintf(trace->output, "Not enough memory to run!\n");
2468 goto out_delete_evlist;
2469 }
2470
2471 static int trace__replay(struct trace *trace)
2472 {
2473 const struct perf_evsel_str_handler handlers[] = {
2474 { "probe:vfs_getname", trace__vfs_getname, },
2475 };
2476 struct perf_data_file file = {
2477 .path = input_name,
2478 .mode = PERF_DATA_MODE_READ,
2479 .force = trace->force,
2480 };
2481 struct perf_session *session;
2482 struct perf_evsel *evsel;
2483 int err = -1;
2484
2485 trace->tool.sample = trace__process_sample;
2486 trace->tool.mmap = perf_event__process_mmap;
2487 trace->tool.mmap2 = perf_event__process_mmap2;
2488 trace->tool.comm = perf_event__process_comm;
2489 trace->tool.exit = perf_event__process_exit;
2490 trace->tool.fork = perf_event__process_fork;
2491 trace->tool.attr = perf_event__process_attr;
2492 trace->tool.tracing_data = perf_event__process_tracing_data;
2493 trace->tool.build_id = perf_event__process_build_id;
2494
2495 trace->tool.ordered_events = true;
2496 trace->tool.ordering_requires_timestamps = true;
2497
2498 /* add tid to output */
2499 trace->multiple_threads = true;
2500
2501 session = perf_session__new(&file, false, &trace->tool);
2502 if (session == NULL)
2503 return -1;
2504
2505 if (symbol__init(&session->header.env) < 0)
2506 goto out;
2507
2508 trace->host = &session->machines.host;
2509
2510 err = perf_session__set_tracepoints_handlers(session, handlers);
2511 if (err)
2512 goto out;
2513
2514 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2515 "raw_syscalls:sys_enter");
2516 /* older kernels have syscalls tp versus raw_syscalls */
2517 if (evsel == NULL)
2518 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2519 "syscalls:sys_enter");
2520
2521 if (evsel &&
2522 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2523 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2524 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2525 goto out;
2526 }
2527
2528 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2529 "raw_syscalls:sys_exit");
2530 if (evsel == NULL)
2531 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2532 "syscalls:sys_exit");
2533 if (evsel &&
2534 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2535 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2536 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2537 goto out;
2538 }
2539
2540 evlist__for_each(session->evlist, evsel) {
2541 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2542 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2543 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2544 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2545 evsel->handler = trace__pgfault;
2546 }
2547
2548 err = parse_target_str(trace);
2549 if (err != 0)
2550 goto out;
2551
2552 setup_pager();
2553
2554 err = perf_session__process_events(session);
2555 if (err)
2556 pr_err("Failed to process events, error %d", err);
2557
2558 else if (trace->summary)
2559 trace__fprintf_thread_summary(trace, trace->output);
2560
2561 out:
2562 perf_session__delete(session);
2563
2564 return err;
2565 }
2566
2567 static size_t trace__fprintf_threads_header(FILE *fp)
2568 {
2569 size_t printed;
2570
2571 printed = fprintf(fp, "\n Summary of events:\n\n");
2572
2573 return printed;
2574 }
2575
2576 static size_t thread__dump_stats(struct thread_trace *ttrace,
2577 struct trace *trace, FILE *fp)
2578 {
2579 struct stats *stats;
2580 size_t printed = 0;
2581 struct syscall *sc;
2582 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2583
2584 if (inode == NULL)
2585 return 0;
2586
2587 printed += fprintf(fp, "\n");
2588
2589 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2590 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2591 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2592
2593 /* each int_node is a syscall */
2594 while (inode) {
2595 stats = inode->priv;
2596 if (stats) {
2597 double min = (double)(stats->min) / NSEC_PER_MSEC;
2598 double max = (double)(stats->max) / NSEC_PER_MSEC;
2599 double avg = avg_stats(stats);
2600 double pct;
2601 u64 n = (u64) stats->n;
2602
2603 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2604 avg /= NSEC_PER_MSEC;
2605
2606 sc = &trace->syscalls.table[inode->i];
2607 printed += fprintf(fp, " %-15s", sc->name);
2608 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2609 n, min, avg);
2610 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2611 }
2612
2613 inode = intlist__next(inode);
2614 }
2615
2616 printed += fprintf(fp, "\n\n");
2617
2618 return printed;
2619 }
2620
2621 /* struct used to pass data to per-thread function */
2622 struct summary_data {
2623 FILE *fp;
2624 struct trace *trace;
2625 size_t printed;
2626 };
2627
2628 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2629 {
2630 struct summary_data *data = priv;
2631 FILE *fp = data->fp;
2632 size_t printed = data->printed;
2633 struct trace *trace = data->trace;
2634 struct thread_trace *ttrace = thread__priv(thread);
2635 double ratio;
2636
2637 if (ttrace == NULL)
2638 return 0;
2639
2640 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2641
2642 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2643 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2644 printed += fprintf(fp, "%.1f%%", ratio);
2645 if (ttrace->pfmaj)
2646 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2647 if (ttrace->pfmin)
2648 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2649 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2650 printed += thread__dump_stats(ttrace, trace, fp);
2651
2652 data->printed += printed;
2653
2654 return 0;
2655 }
2656
2657 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2658 {
2659 struct summary_data data = {
2660 .fp = fp,
2661 .trace = trace
2662 };
2663 data.printed = trace__fprintf_threads_header(fp);
2664
2665 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2666
2667 return data.printed;
2668 }
2669
2670 static int trace__set_duration(const struct option *opt, const char *str,
2671 int unset __maybe_unused)
2672 {
2673 struct trace *trace = opt->value;
2674
2675 trace->duration_filter = atof(str);
2676 return 0;
2677 }
2678
2679 static int trace__set_filter_pids(const struct option *opt, const char *str,
2680 int unset __maybe_unused)
2681 {
2682 int ret = -1;
2683 size_t i;
2684 struct trace *trace = opt->value;
2685 /*
2686 * FIXME: introduce a intarray class, plain parse csv and create a
2687 * { int nr, int entries[] } struct...
2688 */
2689 struct intlist *list = intlist__new(str);
2690
2691 if (list == NULL)
2692 return -1;
2693
2694 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2695 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2696
2697 if (trace->filter_pids.entries == NULL)
2698 goto out;
2699
2700 trace->filter_pids.entries[0] = getpid();
2701
2702 for (i = 1; i < trace->filter_pids.nr; ++i)
2703 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2704
2705 intlist__delete(list);
2706 ret = 0;
2707 out:
2708 return ret;
2709 }
2710
2711 static int trace__open_output(struct trace *trace, const char *filename)
2712 {
2713 struct stat st;
2714
2715 if (!stat(filename, &st) && st.st_size) {
2716 char oldname[PATH_MAX];
2717
2718 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2719 unlink(oldname);
2720 rename(filename, oldname);
2721 }
2722
2723 trace->output = fopen(filename, "w");
2724
2725 return trace->output == NULL ? -errno : 0;
2726 }
2727
2728 static int parse_pagefaults(const struct option *opt, const char *str,
2729 int unset __maybe_unused)
2730 {
2731 int *trace_pgfaults = opt->value;
2732
2733 if (strcmp(str, "all") == 0)
2734 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2735 else if (strcmp(str, "maj") == 0)
2736 *trace_pgfaults |= TRACE_PFMAJ;
2737 else if (strcmp(str, "min") == 0)
2738 *trace_pgfaults |= TRACE_PFMIN;
2739 else
2740 return -1;
2741
2742 return 0;
2743 }
2744
2745 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2746 {
2747 struct perf_evsel *evsel;
2748
2749 evlist__for_each(evlist, evsel)
2750 evsel->handler = handler;
2751 }
2752
2753 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2754 {
2755 const char *trace_usage[] = {
2756 "perf trace [<options>] [<command>]",
2757 "perf trace [<options>] -- <command> [<options>]",
2758 "perf trace record [<options>] [<command>]",
2759 "perf trace record [<options>] -- <command> [<options>]",
2760 NULL
2761 };
2762 struct trace trace = {
2763 .audit = {
2764 .machine = audit_detect_machine(),
2765 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2766 },
2767 .syscalls = {
2768 . max = -1,
2769 },
2770 .opts = {
2771 .target = {
2772 .uid = UINT_MAX,
2773 .uses_mmap = true,
2774 },
2775 .user_freq = UINT_MAX,
2776 .user_interval = ULLONG_MAX,
2777 .no_buffering = true,
2778 .mmap_pages = UINT_MAX,
2779 .proc_map_timeout = 500,
2780 },
2781 .output = stdout,
2782 .show_comm = true,
2783 .trace_syscalls = true,
2784 };
2785 const char *output_name = NULL;
2786 const char *ev_qualifier_str = NULL;
2787 const struct option trace_options[] = {
2788 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2789 "event selector. use 'perf list' to list available events",
2790 parse_events_option),
2791 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2792 "show the thread COMM next to its id"),
2793 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2794 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2795 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2796 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2797 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2798 "trace events on existing process id"),
2799 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2800 "trace events on existing thread id"),
2801 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2802 "pids to filter (by the kernel)", trace__set_filter_pids),
2803 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2804 "system-wide collection from all CPUs"),
2805 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2806 "list of cpus to monitor"),
2807 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2808 "child tasks do not inherit counters"),
2809 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2810 "number of mmap data pages",
2811 perf_evlist__parse_mmap_pages),
2812 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2813 "user to profile"),
2814 OPT_CALLBACK(0, "duration", &trace, "float",
2815 "show only events with duration > N.M ms",
2816 trace__set_duration),
2817 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2818 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2819 OPT_BOOLEAN('T', "time", &trace.full_time,
2820 "Show full timestamp, not time relative to first start"),
2821 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2822 "Show only syscall summary with statistics"),
2823 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2824 "Show all syscalls and summary with statistics"),
2825 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2826 "Trace pagefaults", parse_pagefaults, "maj"),
2827 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2828 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2829 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2830 "per thread proc mmap processing timeout in ms"),
2831 OPT_END()
2832 };
2833 const char * const trace_subcommands[] = { "record", NULL };
2834 int err;
2835 char bf[BUFSIZ];
2836
2837 signal(SIGSEGV, sighandler_dump_stack);
2838 signal(SIGFPE, sighandler_dump_stack);
2839
2840 trace.evlist = perf_evlist__new();
2841
2842 if (trace.evlist == NULL) {
2843 pr_err("Not enough memory to run!\n");
2844 err = -ENOMEM;
2845 goto out;
2846 }
2847
2848 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2849 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2850
2851 if (trace.trace_pgfaults) {
2852 trace.opts.sample_address = true;
2853 trace.opts.sample_time = true;
2854 }
2855
2856 if (trace.evlist->nr_entries > 0)
2857 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2858
2859 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2860 return trace__record(&trace, argc-1, &argv[1]);
2861
2862 /* summary_only implies summary option, but don't overwrite summary if set */
2863 if (trace.summary_only)
2864 trace.summary = trace.summary_only;
2865
2866 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2867 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2868 pr_err("Please specify something to trace.\n");
2869 return -1;
2870 }
2871
2872 if (output_name != NULL) {
2873 err = trace__open_output(&trace, output_name);
2874 if (err < 0) {
2875 perror("failed to create output file");
2876 goto out;
2877 }
2878 }
2879
2880 if (ev_qualifier_str != NULL) {
2881 const char *s = ev_qualifier_str;
2882
2883 trace.not_ev_qualifier = *s == '!';
2884 if (trace.not_ev_qualifier)
2885 ++s;
2886 trace.ev_qualifier = strlist__new(true, s);
2887 if (trace.ev_qualifier == NULL) {
2888 fputs("Not enough memory to parse event qualifier",
2889 trace.output);
2890 err = -ENOMEM;
2891 goto out_close;
2892 }
2893
2894 err = trace__validate_ev_qualifier(&trace);
2895 if (err)
2896 goto out_close;
2897 }
2898
2899 err = target__validate(&trace.opts.target);
2900 if (err) {
2901 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2902 fprintf(trace.output, "%s", bf);
2903 goto out_close;
2904 }
2905
2906 err = target__parse_uid(&trace.opts.target);
2907 if (err) {
2908 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2909 fprintf(trace.output, "%s", bf);
2910 goto out_close;
2911 }
2912
2913 if (!argc && target__none(&trace.opts.target))
2914 trace.opts.target.system_wide = true;
2915
2916 if (input_name)
2917 err = trace__replay(&trace);
2918 else
2919 err = trace__run(&trace, argc, argv);
2920
2921 out_close:
2922 if (output_name != NULL)
2923 fclose(trace.output);
2924 out:
2925 return err;
2926 }
This page took 0.090847 seconds and 6 git commands to generate.