Commit | Line | Data |
---|---|---|
ddcacfa0 IM |
1 | /* |
2 | * kerneltop.c: show top kernel functions - performance counters showcase | |
3 | ||
4 | Build with: | |
5 | ||
6 | cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt | |
7 | ||
8 | Sample output: | |
9 | ||
10 | ------------------------------------------------------------------------------ | |
11 | KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) | |
12 | ------------------------------------------------------------------------------ | |
13 | ||
14 | weight RIP kernel function | |
15 | ______ ________________ _______________ | |
16 | ||
17 | 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev | |
18 | 33.00 - ffffffff804cb740 : sock_alloc_send_skb | |
19 | 31.26 - ffffffff804ce808 : skb_push | |
20 | 22.43 - ffffffff80510004 : tcp_established_options | |
21 | 19.00 - ffffffff8027d250 : find_get_page | |
22 | 15.76 - ffffffff804e4fc9 : eth_type_trans | |
23 | 15.20 - ffffffff804d8baa : dst_release | |
24 | 14.86 - ffffffff804cf5d8 : skb_release_head_state | |
25 | 14.00 - ffffffff802217d5 : read_hpet | |
26 | 12.00 - ffffffff804ffb7f : __ip_local_out | |
27 | 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish | |
28 | 8.54 - ffffffff805001a3 : ip_queue_xmit | |
29 | */ | |
30 | ||
31 | /* | |
32 | * perfstat: /usr/bin/time -alike performance counter statistics utility | |
33 | ||
34 | It summarizes the counter events of all tasks (and child tasks), | |
35 | covering all CPUs that the command (or workload) executes on. | |
36 | It only counts the per-task events of the workload started, | |
37 | independent of how many other tasks run on those CPUs. | |
38 | ||
39 | Sample output: | |
40 | ||
41 | $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null | |
42 | ||
43 | Performance counter stats for 'ls': | |
44 | ||
45 | 163516953 instructions | |
46 | 2295 cache-misses | |
47 | 2855182 branch-misses | |
48 | */ | |
49 | ||
50 | /* | |
51 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | |
52 | * | |
53 | * Improvements and fixes by: | |
54 | * | |
55 | * Arjan van de Ven <arjan@linux.intel.com> | |
56 | * Yanmin Zhang <yanmin.zhang@intel.com> | |
57 | * Wu Fengguang <fengguang.wu@intel.com> | |
58 | * Mike Galbraith <efault@gmx.de> | |
59 | * Paul Mackerras <paulus@samba.org> | |
60 | * | |
61 | * Released under the GPL v2. (and only v2, not any later version) | |
62 | */ | |
63 | ||
148be2c1 | 64 | #include "util/util.h" |
ddcacfa0 IM |
65 | |
66 | #include <getopt.h> | |
67 | #include <assert.h> | |
68 | #include <fcntl.h> | |
69 | #include <stdio.h> | |
70 | #include <errno.h> | |
ddcacfa0 IM |
71 | #include <time.h> |
72 | #include <sched.h> | |
73 | #include <pthread.h> | |
74 | ||
75 | #include <sys/syscall.h> | |
76 | #include <sys/ioctl.h> | |
77 | #include <sys/poll.h> | |
78 | #include <sys/prctl.h> | |
79 | #include <sys/wait.h> | |
80 | #include <sys/uio.h> | |
81 | #include <sys/mman.h> | |
82 | ||
83 | #include <linux/unistd.h> | |
84 | #include <linux/types.h> | |
85 | ||
86 | #include "../../include/linux/perf_counter.h" | |
87 | ||
6eda5838 | 88 | #include "perf.h" |
ddcacfa0 | 89 | |
16c8a109 PZ |
90 | #define EVENT_MASK_KERNEL 1 |
91 | #define EVENT_MASK_USER 2 | |
92 | ||
ddcacfa0 IM |
93 | static int system_wide = 0; |
94 | ||
95 | static int nr_counters = 0; | |
96 | static __u64 event_id[MAX_COUNTERS] = { | |
97 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), | |
98 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), | |
99 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), | |
100 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), | |
101 | ||
102 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), | |
103 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), | |
104 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), | |
105 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), | |
106 | }; | |
107 | static int default_interval = 100000; | |
108 | static int event_count[MAX_COUNTERS]; | |
109 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | |
16c8a109 | 110 | static int event_mask[MAX_COUNTERS]; |
ddcacfa0 IM |
111 | |
112 | static int tid = -1; | |
113 | static int profile_cpu = -1; | |
114 | static int nr_cpus = 0; | |
115 | static int nmi = 1; | |
116 | static int group = 0; | |
117 | static unsigned int page_size; | |
118 | ||
119 | static int zero; | |
120 | ||
66cf7829 | 121 | static int scale = 1; |
ddcacfa0 IM |
122 | |
123 | static const unsigned int default_count[] = { | |
124 | 1000000, | |
125 | 1000000, | |
126 | 10000, | |
127 | 10000, | |
128 | 1000000, | |
129 | 10000, | |
130 | }; | |
131 | ||
132 | static char *hw_event_names[] = { | |
133 | "CPU cycles", | |
134 | "instructions", | |
135 | "cache references", | |
136 | "cache misses", | |
137 | "branches", | |
138 | "branch misses", | |
139 | "bus cycles", | |
140 | }; | |
141 | ||
142 | static char *sw_event_names[] = { | |
143 | "cpu clock ticks", | |
144 | "task clock ticks", | |
145 | "pagefaults", | |
146 | "context switches", | |
147 | "CPU migrations", | |
148 | "minor faults", | |
149 | "major faults", | |
150 | }; | |
151 | ||
152 | struct event_symbol { | |
153 | __u64 event; | |
154 | char *symbol; | |
155 | }; | |
156 | ||
157 | static struct event_symbol event_symbols[] = { | |
158 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, | |
159 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, | |
160 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, | |
161 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, | |
162 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, | |
163 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, | |
164 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, | |
165 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, | |
166 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, | |
167 | ||
168 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, | |
169 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, | |
170 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, | |
171 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, | |
172 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, | |
173 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, | |
174 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, | |
175 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, | |
176 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, | |
177 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, | |
178 | }; | |
179 | ||
180 | #define __PERF_COUNTER_FIELD(config, name) \ | |
181 | ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) | |
182 | ||
183 | #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) | |
184 | #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) | |
185 | #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) | |
186 | #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) | |
187 | ||
188 | static void display_events_help(void) | |
189 | { | |
190 | unsigned int i; | |
191 | __u64 e; | |
192 | ||
193 | printf( | |
194 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | |
195 | ||
196 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | |
197 | int type, id; | |
198 | ||
199 | e = event_symbols[i].event; | |
200 | type = PERF_COUNTER_TYPE(e); | |
201 | id = PERF_COUNTER_ID(e); | |
202 | ||
203 | printf("\n %d:%d: %-20s", | |
204 | type, id, event_symbols[i].symbol); | |
205 | } | |
206 | ||
207 | printf("\n" | |
208 | " rNNN: raw PMU events (eventsel+umask)\n\n"); | |
209 | } | |
210 | ||
211 | static void display_help(void) | |
212 | { | |
213 | printf( | |
214 | "Usage: perfstat [<events...>] <cmd...>\n\n" | |
215 | "PerfStat Options (up to %d event types can be specified):\n\n", | |
216 | MAX_COUNTERS); | |
217 | ||
218 | display_events_help(); | |
219 | ||
220 | printf( | |
221 | " -l # scale counter values\n" | |
222 | " -a # system-wide collection\n"); | |
223 | exit(0); | |
224 | } | |
225 | ||
226 | static char *event_name(int ctr) | |
227 | { | |
228 | __u64 config = event_id[ctr]; | |
229 | int type = PERF_COUNTER_TYPE(config); | |
230 | int id = PERF_COUNTER_ID(config); | |
231 | static char buf[32]; | |
232 | ||
233 | if (PERF_COUNTER_RAW(config)) { | |
234 | sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); | |
235 | return buf; | |
236 | } | |
237 | ||
238 | switch (type) { | |
239 | case PERF_TYPE_HARDWARE: | |
240 | if (id < PERF_HW_EVENTS_MAX) | |
241 | return hw_event_names[id]; | |
242 | return "unknown-hardware"; | |
243 | ||
244 | case PERF_TYPE_SOFTWARE: | |
245 | if (id < PERF_SW_EVENTS_MAX) | |
246 | return sw_event_names[id]; | |
247 | return "unknown-software"; | |
248 | ||
249 | default: | |
250 | break; | |
251 | } | |
252 | ||
253 | return "unknown"; | |
254 | } | |
255 | ||
256 | /* | |
257 | * Each event can have multiple symbolic names. | |
258 | * Symbolic names are (almost) exactly matched. | |
259 | */ | |
260 | static __u64 match_event_symbols(char *str) | |
261 | { | |
262 | __u64 config, id; | |
263 | int type; | |
264 | unsigned int i; | |
16c8a109 | 265 | char mask_str[4]; |
ddcacfa0 IM |
266 | |
267 | if (sscanf(str, "r%llx", &config) == 1) | |
268 | return config | PERF_COUNTER_RAW_MASK; | |
269 | ||
16c8a109 PZ |
270 | switch (sscanf(str, "%d:%llu:%2s", &type, &id, mask_str)) { |
271 | case 3: | |
272 | if (strchr(mask_str, 'u')) | |
273 | event_mask[nr_counters] |= EVENT_MASK_USER; | |
274 | if (strchr(mask_str, 'k')) | |
275 | event_mask[nr_counters] |= EVENT_MASK_KERNEL; | |
276 | case 2: | |
277 | return EID(type, id); | |
278 | ||
279 | default: | |
280 | break; | |
281 | } | |
ddcacfa0 IM |
282 | |
283 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | |
284 | if (!strncmp(str, event_symbols[i].symbol, | |
285 | strlen(event_symbols[i].symbol))) | |
286 | return event_symbols[i].event; | |
287 | } | |
288 | ||
289 | return ~0ULL; | |
290 | } | |
291 | ||
292 | static int parse_events(char *str) | |
293 | { | |
294 | __u64 config; | |
295 | ||
296 | again: | |
297 | if (nr_counters == MAX_COUNTERS) | |
298 | return -1; | |
299 | ||
300 | config = match_event_symbols(str); | |
301 | if (config == ~0ULL) | |
302 | return -1; | |
303 | ||
304 | event_id[nr_counters] = config; | |
305 | nr_counters++; | |
306 | ||
307 | str = strstr(str, ","); | |
308 | if (str) { | |
309 | str++; | |
310 | goto again; | |
311 | } | |
312 | ||
313 | return 0; | |
314 | } | |
315 | ||
316 | ||
317 | /* | |
318 | * perfstat | |
319 | */ | |
320 | ||
321 | char fault_here[1000000]; | |
322 | ||
323 | static void create_perfstat_counter(int counter) | |
324 | { | |
325 | struct perf_counter_hw_event hw_event; | |
326 | ||
327 | memset(&hw_event, 0, sizeof(hw_event)); | |
328 | hw_event.config = event_id[counter]; | |
329 | hw_event.record_type = 0; | |
330 | hw_event.nmi = 0; | |
16c8a109 PZ |
331 | hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL; |
332 | hw_event.exclude_user = event_mask[counter] & EVENT_MASK_USER; | |
333 | ||
ddcacfa0 IM |
334 | if (scale) |
335 | hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | |
336 | PERF_FORMAT_TOTAL_TIME_RUNNING; | |
337 | ||
338 | if (system_wide) { | |
339 | int cpu; | |
340 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | |
341 | fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); | |
342 | if (fd[cpu][counter] < 0) { | |
343 | printf("perfstat error: syscall returned with %d (%s)\n", | |
344 | fd[cpu][counter], strerror(errno)); | |
345 | exit(-1); | |
346 | } | |
347 | } | |
348 | } else { | |
349 | hw_event.inherit = 1; | |
350 | hw_event.disabled = 1; | |
351 | ||
352 | fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); | |
353 | if (fd[0][counter] < 0) { | |
354 | printf("perfstat error: syscall returned with %d (%s)\n", | |
355 | fd[0][counter], strerror(errno)); | |
356 | exit(-1); | |
357 | } | |
358 | } | |
359 | } | |
360 | ||
361 | int do_perfstat(int argc, char *argv[]) | |
362 | { | |
363 | unsigned long long t0, t1; | |
364 | int counter; | |
365 | ssize_t res; | |
366 | int status; | |
367 | int pid; | |
368 | ||
369 | if (!system_wide) | |
370 | nr_cpus = 1; | |
371 | ||
372 | for (counter = 0; counter < nr_counters; counter++) | |
373 | create_perfstat_counter(counter); | |
374 | ||
375 | argc -= optind; | |
376 | argv += optind; | |
377 | ||
378 | if (!argc) | |
379 | display_help(); | |
380 | ||
381 | /* | |
382 | * Enable counters and exec the command: | |
383 | */ | |
384 | t0 = rdclock(); | |
385 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | |
386 | ||
387 | if ((pid = fork()) < 0) | |
388 | perror("failed to fork"); | |
389 | if (!pid) { | |
390 | if (execvp(argv[0], argv)) { | |
391 | perror(argv[0]); | |
392 | exit(-1); | |
393 | } | |
394 | } | |
395 | while (wait(&status) >= 0) | |
396 | ; | |
397 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | |
398 | t1 = rdclock(); | |
399 | ||
400 | fflush(stdout); | |
401 | ||
402 | fprintf(stderr, "\n"); | |
403 | fprintf(stderr, " Performance counter stats for \'%s\':\n", | |
404 | argv[0]); | |
405 | fprintf(stderr, "\n"); | |
406 | ||
407 | for (counter = 0; counter < nr_counters; counter++) { | |
408 | int cpu, nv; | |
409 | __u64 count[3], single_count[3]; | |
410 | int scaled; | |
411 | ||
412 | count[0] = count[1] = count[2] = 0; | |
413 | nv = scale ? 3 : 1; | |
414 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | |
415 | res = read(fd[cpu][counter], | |
416 | single_count, nv * sizeof(__u64)); | |
417 | assert(res == nv * sizeof(__u64)); | |
418 | ||
419 | count[0] += single_count[0]; | |
420 | if (scale) { | |
421 | count[1] += single_count[1]; | |
422 | count[2] += single_count[2]; | |
423 | } | |
424 | } | |
425 | ||
426 | scaled = 0; | |
427 | if (scale) { | |
428 | if (count[2] == 0) { | |
429 | fprintf(stderr, " %14s %-20s\n", | |
430 | "<not counted>", event_name(counter)); | |
431 | continue; | |
432 | } | |
433 | if (count[2] < count[1]) { | |
434 | scaled = 1; | |
435 | count[0] = (unsigned long long) | |
436 | ((double)count[0] * count[1] / count[2] + 0.5); | |
437 | } | |
438 | } | |
439 | ||
440 | if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || | |
441 | event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { | |
442 | ||
443 | double msecs = (double)count[0] / 1000000; | |
444 | ||
445 | fprintf(stderr, " %14.6f %-20s (msecs)", | |
446 | msecs, event_name(counter)); | |
447 | } else { | |
448 | fprintf(stderr, " %14Ld %-20s (events)", | |
449 | count[0], event_name(counter)); | |
450 | } | |
451 | if (scaled) | |
452 | fprintf(stderr, " (scaled from %.2f%%)", | |
453 | (double) count[2] / count[1] * 100); | |
454 | fprintf(stderr, "\n"); | |
455 | } | |
456 | fprintf(stderr, "\n"); | |
457 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | |
458 | (double)(t1-t0)/1e6); | |
459 | fprintf(stderr, "\n"); | |
460 | ||
461 | return 0; | |
462 | } | |
463 | ||
464 | static void process_options(int argc, char **argv) | |
465 | { | |
466 | int error = 0, counter; | |
467 | ||
468 | for (;;) { | |
469 | int option_index = 0; | |
470 | /** Options for getopt */ | |
471 | static struct option long_options[] = { | |
472 | {"count", required_argument, NULL, 'c'}, | |
473 | {"cpu", required_argument, NULL, 'C'}, | |
474 | {"delay", required_argument, NULL, 'd'}, | |
475 | {"dump_symtab", no_argument, NULL, 'D'}, | |
476 | {"event", required_argument, NULL, 'e'}, | |
477 | {"filter", required_argument, NULL, 'f'}, | |
478 | {"group", required_argument, NULL, 'g'}, | |
479 | {"help", no_argument, NULL, 'h'}, | |
480 | {"nmi", required_argument, NULL, 'n'}, | |
481 | {"munmap_info", no_argument, NULL, 'U'}, | |
482 | {"pid", required_argument, NULL, 'p'}, | |
483 | {"realtime", required_argument, NULL, 'r'}, | |
484 | {"scale", no_argument, NULL, 'l'}, | |
485 | {"symbol", required_argument, NULL, 's'}, | |
486 | {"stat", no_argument, NULL, 'S'}, | |
487 | {"vmlinux", required_argument, NULL, 'x'}, | |
488 | {"zero", no_argument, NULL, 'z'}, | |
489 | {NULL, 0, NULL, 0 } | |
490 | }; | |
491 | int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU", | |
492 | long_options, &option_index); | |
493 | if (c == -1) | |
494 | break; | |
495 | ||
496 | switch (c) { | |
497 | case 'a': system_wide = 1; break; | |
498 | case 'c': default_interval = atoi(optarg); break; | |
499 | case 'C': | |
500 | /* CPU and PID are mutually exclusive */ | |
501 | if (tid != -1) { | |
502 | printf("WARNING: CPU switch overriding PID\n"); | |
503 | sleep(1); | |
504 | tid = -1; | |
505 | } | |
506 | profile_cpu = atoi(optarg); break; | |
507 | ||
508 | case 'e': error = parse_events(optarg); break; | |
509 | ||
510 | case 'g': group = atoi(optarg); break; | |
511 | case 'h': display_help(); break; | |
512 | case 'l': scale = 1; break; | |
513 | case 'n': nmi = atoi(optarg); break; | |
514 | case 'p': | |
515 | /* CPU and PID are mutually exclusive */ | |
516 | if (profile_cpu != -1) { | |
517 | printf("WARNING: PID switch overriding CPU\n"); | |
518 | sleep(1); | |
519 | profile_cpu = -1; | |
520 | } | |
521 | tid = atoi(optarg); break; | |
522 | case 'z': zero = 1; break; | |
523 | default: error = 1; break; | |
524 | } | |
525 | } | |
526 | if (error) | |
527 | display_help(); | |
528 | ||
529 | if (!nr_counters) { | |
530 | nr_counters = 8; | |
531 | } | |
532 | ||
533 | for (counter = 0; counter < nr_counters; counter++) { | |
534 | if (event_count[counter]) | |
535 | continue; | |
536 | ||
537 | event_count[counter] = default_interval; | |
538 | } | |
539 | } | |
540 | ||
541 | int cmd_stat(int argc, char **argv, const char *prefix) | |
542 | { | |
543 | page_size = sysconf(_SC_PAGE_SIZE); | |
544 | ||
545 | process_options(argc, argv); | |
546 | ||
547 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | |
548 | assert(nr_cpus <= MAX_NR_CPUS); | |
549 | assert(nr_cpus >= 0); | |
550 | ||
551 | return do_perfstat(argc, argv); | |
552 | } |