tracing: Move fields from event to class structure
[deliverable/linux.git] / kernel / trace / trace_syscalls.c
1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/slab.h>
4 #include <linux/kernel.h>
5 #include <linux/ftrace.h>
6 #include <linux/perf_event.h>
7 #include <asm/syscall.h>
8
9 #include "trace_output.h"
10 #include "trace.h"
11
12 static DEFINE_MUTEX(syscall_trace_lock);
13 static int sys_refcount_enter;
14 static int sys_refcount_exit;
15 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17
18 static int syscall_enter_register(struct ftrace_event_call *event,
19 enum trace_reg type);
20 static int syscall_exit_register(struct ftrace_event_call *event,
21 enum trace_reg type);
22
23 static int syscall_enter_define_fields(struct ftrace_event_call *call);
24 static int syscall_exit_define_fields(struct ftrace_event_call *call);
25
26 static struct list_head *
27 syscall_get_enter_fields(struct ftrace_event_call *call)
28 {
29 struct syscall_metadata *entry = call->data;
30
31 return &entry->enter_fields;
32 }
33
34 static struct list_head *
35 syscall_get_exit_fields(struct ftrace_event_call *call)
36 {
37 struct syscall_metadata *entry = call->data;
38
39 return &entry->exit_fields;
40 }
41
42 struct ftrace_event_class event_class_syscall_enter = {
43 .system = "syscalls",
44 .reg = syscall_enter_register,
45 .define_fields = syscall_enter_define_fields,
46 .get_fields = syscall_get_enter_fields,
47 };
48
49 struct ftrace_event_class event_class_syscall_exit = {
50 .system = "syscalls",
51 .reg = syscall_exit_register,
52 .define_fields = syscall_exit_define_fields,
53 .get_fields = syscall_get_exit_fields,
54 };
55
56 extern unsigned long __start_syscalls_metadata[];
57 extern unsigned long __stop_syscalls_metadata[];
58
59 static struct syscall_metadata **syscalls_metadata;
60
61 static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
62 {
63 struct syscall_metadata *start;
64 struct syscall_metadata *stop;
65 char str[KSYM_SYMBOL_LEN];
66
67
68 start = (struct syscall_metadata *)__start_syscalls_metadata;
69 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
70 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
71
72 for ( ; start < stop; start++) {
73 /*
74 * Only compare after the "sys" prefix. Archs that use
75 * syscall wrappers may have syscalls symbols aliases prefixed
76 * with "SyS" instead of "sys", leading to an unwanted
77 * mismatch.
78 */
79 if (start->name && !strcmp(start->name + 3, str + 3))
80 return start;
81 }
82 return NULL;
83 }
84
85 static struct syscall_metadata *syscall_nr_to_meta(int nr)
86 {
87 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
88 return NULL;
89
90 return syscalls_metadata[nr];
91 }
92
93 enum print_line_t
94 print_syscall_enter(struct trace_iterator *iter, int flags)
95 {
96 struct trace_seq *s = &iter->seq;
97 struct trace_entry *ent = iter->ent;
98 struct syscall_trace_enter *trace;
99 struct syscall_metadata *entry;
100 int i, ret, syscall;
101
102 trace = (typeof(trace))ent;
103 syscall = trace->nr;
104 entry = syscall_nr_to_meta(syscall);
105
106 if (!entry)
107 goto end;
108
109 if (entry->enter_event->id != ent->type) {
110 WARN_ON_ONCE(1);
111 goto end;
112 }
113
114 ret = trace_seq_printf(s, "%s(", entry->name);
115 if (!ret)
116 return TRACE_TYPE_PARTIAL_LINE;
117
118 for (i = 0; i < entry->nb_args; i++) {
119 /* parameter types */
120 if (trace_flags & TRACE_ITER_VERBOSE) {
121 ret = trace_seq_printf(s, "%s ", entry->types[i]);
122 if (!ret)
123 return TRACE_TYPE_PARTIAL_LINE;
124 }
125 /* parameter values */
126 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
127 trace->args[i],
128 i == entry->nb_args - 1 ? "" : ", ");
129 if (!ret)
130 return TRACE_TYPE_PARTIAL_LINE;
131 }
132
133 ret = trace_seq_putc(s, ')');
134 if (!ret)
135 return TRACE_TYPE_PARTIAL_LINE;
136
137 end:
138 ret = trace_seq_putc(s, '\n');
139 if (!ret)
140 return TRACE_TYPE_PARTIAL_LINE;
141
142 return TRACE_TYPE_HANDLED;
143 }
144
145 enum print_line_t
146 print_syscall_exit(struct trace_iterator *iter, int flags)
147 {
148 struct trace_seq *s = &iter->seq;
149 struct trace_entry *ent = iter->ent;
150 struct syscall_trace_exit *trace;
151 int syscall;
152 struct syscall_metadata *entry;
153 int ret;
154
155 trace = (typeof(trace))ent;
156 syscall = trace->nr;
157 entry = syscall_nr_to_meta(syscall);
158
159 if (!entry) {
160 trace_seq_printf(s, "\n");
161 return TRACE_TYPE_HANDLED;
162 }
163
164 if (entry->exit_event->id != ent->type) {
165 WARN_ON_ONCE(1);
166 return TRACE_TYPE_UNHANDLED;
167 }
168
169 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
170 trace->ret);
171 if (!ret)
172 return TRACE_TYPE_PARTIAL_LINE;
173
174 return TRACE_TYPE_HANDLED;
175 }
176
177 extern char *__bad_type_size(void);
178
179 #define SYSCALL_FIELD(type, name) \
180 sizeof(type) != sizeof(trace.name) ? \
181 __bad_type_size() : \
182 #type, #name, offsetof(typeof(trace), name), \
183 sizeof(trace.name), is_signed_type(type)
184
185 static
186 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
187 {
188 int i;
189 int pos = 0;
190
191 /* When len=0, we just calculate the needed length */
192 #define LEN_OR_ZERO (len ? len - pos : 0)
193
194 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
195 for (i = 0; i < entry->nb_args; i++) {
196 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
197 entry->args[i], sizeof(unsigned long),
198 i == entry->nb_args - 1 ? "" : ", ");
199 }
200 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
201
202 for (i = 0; i < entry->nb_args; i++) {
203 pos += snprintf(buf + pos, LEN_OR_ZERO,
204 ", ((unsigned long)(REC->%s))", entry->args[i]);
205 }
206
207 #undef LEN_OR_ZERO
208
209 /* return the length of print_fmt */
210 return pos;
211 }
212
213 static int set_syscall_print_fmt(struct ftrace_event_call *call)
214 {
215 char *print_fmt;
216 int len;
217 struct syscall_metadata *entry = call->data;
218
219 if (entry->enter_event != call) {
220 call->print_fmt = "\"0x%lx\", REC->ret";
221 return 0;
222 }
223
224 /* First: called with 0 length to calculate the needed length */
225 len = __set_enter_print_fmt(entry, NULL, 0);
226
227 print_fmt = kmalloc(len + 1, GFP_KERNEL);
228 if (!print_fmt)
229 return -ENOMEM;
230
231 /* Second: actually write the @print_fmt */
232 __set_enter_print_fmt(entry, print_fmt, len + 1);
233 call->print_fmt = print_fmt;
234
235 return 0;
236 }
237
238 static void free_syscall_print_fmt(struct ftrace_event_call *call)
239 {
240 struct syscall_metadata *entry = call->data;
241
242 if (entry->enter_event == call)
243 kfree(call->print_fmt);
244 }
245
246 static int syscall_enter_define_fields(struct ftrace_event_call *call)
247 {
248 struct syscall_trace_enter trace;
249 struct syscall_metadata *meta = call->data;
250 int ret;
251 int i;
252 int offset = offsetof(typeof(trace), args);
253
254 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
255 if (ret)
256 return ret;
257
258 for (i = 0; i < meta->nb_args; i++) {
259 ret = trace_define_field(call, meta->types[i],
260 meta->args[i], offset,
261 sizeof(unsigned long), 0,
262 FILTER_OTHER);
263 offset += sizeof(unsigned long);
264 }
265
266 return ret;
267 }
268
269 static int syscall_exit_define_fields(struct ftrace_event_call *call)
270 {
271 struct syscall_trace_exit trace;
272 int ret;
273
274 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
275 if (ret)
276 return ret;
277
278 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
279 FILTER_OTHER);
280
281 return ret;
282 }
283
284 void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
285 {
286 struct syscall_trace_enter *entry;
287 struct syscall_metadata *sys_data;
288 struct ring_buffer_event *event;
289 struct ring_buffer *buffer;
290 int size;
291 int syscall_nr;
292
293 syscall_nr = syscall_get_nr(current, regs);
294 if (syscall_nr < 0)
295 return;
296 if (!test_bit(syscall_nr, enabled_enter_syscalls))
297 return;
298
299 sys_data = syscall_nr_to_meta(syscall_nr);
300 if (!sys_data)
301 return;
302
303 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
304
305 event = trace_current_buffer_lock_reserve(&buffer,
306 sys_data->enter_event->id, size, 0, 0);
307 if (!event)
308 return;
309
310 entry = ring_buffer_event_data(event);
311 entry->nr = syscall_nr;
312 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
313
314 if (!filter_current_check_discard(buffer, sys_data->enter_event,
315 entry, event))
316 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
317 }
318
319 void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
320 {
321 struct syscall_trace_exit *entry;
322 struct syscall_metadata *sys_data;
323 struct ring_buffer_event *event;
324 struct ring_buffer *buffer;
325 int syscall_nr;
326
327 syscall_nr = syscall_get_nr(current, regs);
328 if (syscall_nr < 0)
329 return;
330 if (!test_bit(syscall_nr, enabled_exit_syscalls))
331 return;
332
333 sys_data = syscall_nr_to_meta(syscall_nr);
334 if (!sys_data)
335 return;
336
337 event = trace_current_buffer_lock_reserve(&buffer,
338 sys_data->exit_event->id, sizeof(*entry), 0, 0);
339 if (!event)
340 return;
341
342 entry = ring_buffer_event_data(event);
343 entry->nr = syscall_nr;
344 entry->ret = syscall_get_return_value(current, regs);
345
346 if (!filter_current_check_discard(buffer, sys_data->exit_event,
347 entry, event))
348 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
349 }
350
351 int reg_event_syscall_enter(struct ftrace_event_call *call)
352 {
353 int ret = 0;
354 int num;
355
356 num = ((struct syscall_metadata *)call->data)->syscall_nr;
357 if (num < 0 || num >= NR_syscalls)
358 return -ENOSYS;
359 mutex_lock(&syscall_trace_lock);
360 if (!sys_refcount_enter)
361 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
362 if (!ret) {
363 set_bit(num, enabled_enter_syscalls);
364 sys_refcount_enter++;
365 }
366 mutex_unlock(&syscall_trace_lock);
367 return ret;
368 }
369
370 void unreg_event_syscall_enter(struct ftrace_event_call *call)
371 {
372 int num;
373
374 num = ((struct syscall_metadata *)call->data)->syscall_nr;
375 if (num < 0 || num >= NR_syscalls)
376 return;
377 mutex_lock(&syscall_trace_lock);
378 sys_refcount_enter--;
379 clear_bit(num, enabled_enter_syscalls);
380 if (!sys_refcount_enter)
381 unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
382 mutex_unlock(&syscall_trace_lock);
383 }
384
385 int reg_event_syscall_exit(struct ftrace_event_call *call)
386 {
387 int ret = 0;
388 int num;
389
390 num = ((struct syscall_metadata *)call->data)->syscall_nr;
391 if (num < 0 || num >= NR_syscalls)
392 return -ENOSYS;
393 mutex_lock(&syscall_trace_lock);
394 if (!sys_refcount_exit)
395 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
396 if (!ret) {
397 set_bit(num, enabled_exit_syscalls);
398 sys_refcount_exit++;
399 }
400 mutex_unlock(&syscall_trace_lock);
401 return ret;
402 }
403
404 void unreg_event_syscall_exit(struct ftrace_event_call *call)
405 {
406 int num;
407
408 num = ((struct syscall_metadata *)call->data)->syscall_nr;
409 if (num < 0 || num >= NR_syscalls)
410 return;
411 mutex_lock(&syscall_trace_lock);
412 sys_refcount_exit--;
413 clear_bit(num, enabled_exit_syscalls);
414 if (!sys_refcount_exit)
415 unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
416 mutex_unlock(&syscall_trace_lock);
417 }
418
419 int init_syscall_trace(struct ftrace_event_call *call)
420 {
421 int id;
422
423 if (set_syscall_print_fmt(call) < 0)
424 return -ENOMEM;
425
426 id = trace_event_raw_init(call);
427
428 if (id < 0) {
429 free_syscall_print_fmt(call);
430 return id;
431 }
432
433 return id;
434 }
435
436 unsigned long __init arch_syscall_addr(int nr)
437 {
438 return (unsigned long)sys_call_table[nr];
439 }
440
441 int __init init_ftrace_syscalls(void)
442 {
443 struct syscall_metadata *meta;
444 unsigned long addr;
445 int i;
446
447 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
448 NR_syscalls, GFP_KERNEL);
449 if (!syscalls_metadata) {
450 WARN_ON(1);
451 return -ENOMEM;
452 }
453
454 for (i = 0; i < NR_syscalls; i++) {
455 addr = arch_syscall_addr(i);
456 meta = find_syscall_meta(addr);
457 if (!meta)
458 continue;
459
460 meta->syscall_nr = i;
461 syscalls_metadata[i] = meta;
462 }
463
464 return 0;
465 }
466 core_initcall(init_ftrace_syscalls);
467
468 #ifdef CONFIG_PERF_EVENTS
469
470 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
471 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
472 static int sys_perf_refcount_enter;
473 static int sys_perf_refcount_exit;
474
475 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
476 {
477 struct syscall_metadata *sys_data;
478 struct syscall_trace_enter *rec;
479 unsigned long flags;
480 int syscall_nr;
481 int rctx;
482 int size;
483
484 syscall_nr = syscall_get_nr(current, regs);
485 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
486 return;
487
488 sys_data = syscall_nr_to_meta(syscall_nr);
489 if (!sys_data)
490 return;
491
492 /* get the size after alignment with the u32 buffer size field */
493 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
494 size = ALIGN(size + sizeof(u32), sizeof(u64));
495 size -= sizeof(u32);
496
497 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
498 "perf buffer not large enough"))
499 return;
500
501 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
502 sys_data->enter_event->id, &rctx, &flags);
503 if (!rec)
504 return;
505
506 rec->nr = syscall_nr;
507 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
508 (unsigned long *)&rec->args);
509 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
510 }
511
512 int perf_sysenter_enable(struct ftrace_event_call *call)
513 {
514 int ret = 0;
515 int num;
516
517 num = ((struct syscall_metadata *)call->data)->syscall_nr;
518
519 mutex_lock(&syscall_trace_lock);
520 if (!sys_perf_refcount_enter)
521 ret = register_trace_sys_enter(perf_syscall_enter, NULL);
522 if (ret) {
523 pr_info("event trace: Could not activate"
524 "syscall entry trace point");
525 } else {
526 set_bit(num, enabled_perf_enter_syscalls);
527 sys_perf_refcount_enter++;
528 }
529 mutex_unlock(&syscall_trace_lock);
530 return ret;
531 }
532
533 void perf_sysenter_disable(struct ftrace_event_call *call)
534 {
535 int num;
536
537 num = ((struct syscall_metadata *)call->data)->syscall_nr;
538
539 mutex_lock(&syscall_trace_lock);
540 sys_perf_refcount_enter--;
541 clear_bit(num, enabled_perf_enter_syscalls);
542 if (!sys_perf_refcount_enter)
543 unregister_trace_sys_enter(perf_syscall_enter, NULL);
544 mutex_unlock(&syscall_trace_lock);
545 }
546
547 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
548 {
549 struct syscall_metadata *sys_data;
550 struct syscall_trace_exit *rec;
551 unsigned long flags;
552 int syscall_nr;
553 int rctx;
554 int size;
555
556 syscall_nr = syscall_get_nr(current, regs);
557 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
558 return;
559
560 sys_data = syscall_nr_to_meta(syscall_nr);
561 if (!sys_data)
562 return;
563
564 /* We can probably do that at build time */
565 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
566 size -= sizeof(u32);
567
568 /*
569 * Impossible, but be paranoid with the future
570 * How to put this check outside runtime?
571 */
572 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
573 "exit event has grown above perf buffer size"))
574 return;
575
576 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
577 sys_data->exit_event->id, &rctx, &flags);
578 if (!rec)
579 return;
580
581 rec->nr = syscall_nr;
582 rec->ret = syscall_get_return_value(current, regs);
583
584 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
585 }
586
587 int perf_sysexit_enable(struct ftrace_event_call *call)
588 {
589 int ret = 0;
590 int num;
591
592 num = ((struct syscall_metadata *)call->data)->syscall_nr;
593
594 mutex_lock(&syscall_trace_lock);
595 if (!sys_perf_refcount_exit)
596 ret = register_trace_sys_exit(perf_syscall_exit, NULL);
597 if (ret) {
598 pr_info("event trace: Could not activate"
599 "syscall exit trace point");
600 } else {
601 set_bit(num, enabled_perf_exit_syscalls);
602 sys_perf_refcount_exit++;
603 }
604 mutex_unlock(&syscall_trace_lock);
605 return ret;
606 }
607
608 void perf_sysexit_disable(struct ftrace_event_call *call)
609 {
610 int num;
611
612 num = ((struct syscall_metadata *)call->data)->syscall_nr;
613
614 mutex_lock(&syscall_trace_lock);
615 sys_perf_refcount_exit--;
616 clear_bit(num, enabled_perf_exit_syscalls);
617 if (!sys_perf_refcount_exit)
618 unregister_trace_sys_exit(perf_syscall_exit, NULL);
619 mutex_unlock(&syscall_trace_lock);
620 }
621
622 #endif /* CONFIG_PERF_EVENTS */
623
624 static int syscall_enter_register(struct ftrace_event_call *event,
625 enum trace_reg type)
626 {
627 switch (type) {
628 case TRACE_REG_REGISTER:
629 return reg_event_syscall_enter(event);
630 case TRACE_REG_UNREGISTER:
631 unreg_event_syscall_enter(event);
632 return 0;
633
634 #ifdef CONFIG_PERF_EVENTS
635 case TRACE_REG_PERF_REGISTER:
636 return perf_sysenter_enable(event);
637 case TRACE_REG_PERF_UNREGISTER:
638 perf_sysenter_disable(event);
639 return 0;
640 #endif
641 }
642 return 0;
643 }
644
645 static int syscall_exit_register(struct ftrace_event_call *event,
646 enum trace_reg type)
647 {
648 switch (type) {
649 case TRACE_REG_REGISTER:
650 return reg_event_syscall_exit(event);
651 case TRACE_REG_UNREGISTER:
652 unreg_event_syscall_exit(event);
653 return 0;
654
655 #ifdef CONFIG_PERF_EVENTS
656 case TRACE_REG_PERF_REGISTER:
657 return perf_sysexit_enable(event);
658 case TRACE_REG_PERF_UNREGISTER:
659 perf_sysexit_disable(event);
660 return 0;
661 #endif
662 }
663 return 0;
664 }
This page took 0.045583 seconds and 5 git commands to generate.