aa240551fc5d4b184adf0058d9d76ff726fb731c
[deliverable/linux.git] / kernel / trace / trace.c
1 /*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
13 */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49 * On boot up, the ring buffer is set to the minimum size, so that
50 * we do not waste memory on systems that are not using tracing.
51 */
52 bool ring_buffer_expanded;
53
54 /*
55 * We need to change this state when a selftest is running.
56 * A selftest will lurk into the ring-buffer to count the
57 * entries inserted during the selftest although some concurrent
58 * insertions into the ring-buffer such as trace_printk could occurred
59 * at the same time, giving false positive or negative results.
60 */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64 * If a tracer is running, we do not want to run SELFTEST.
65 */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 return 0;
81 }
82
83 /*
84 * To prevent the comm cache from being overwritten when no
85 * tracing is active, only save the comm when a trace event
86 * occurred.
87 */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91 * Kill all tracing for good (never come back).
92 * It is initialized to 1 but will turn to zero if the initialization
93 * of the tracer is successful. But that is the only place that sets
94 * this back to zero.
95 */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly tracing_buffer_mask;
99
100 /*
101 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102 *
103 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104 * is set, then ftrace_dump is called. This will output the contents
105 * of the ftrace buffers to the console. This is very useful for
106 * capturing traces that lead to crashes and outputing it to a
107 * serial console.
108 *
109 * It is default off, but you can enable it with either specifying
110 * "ftrace_dump_on_oops" in the kernel command line, or setting
111 * /proc/sys/kernel/ftrace_dump_on_oops
112 * Set 1 if you want to dump buffers of all CPUs
113 * Set 2 if you want to dump the buffer of the CPU that triggered oops
114 */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 struct module *mod;
125 unsigned long length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131 /*
132 * "end" is first and points to NULL as it must be different
133 * than "mod" or "enum_string"
134 */
135 union trace_enum_map_item *next;
136 const char *end; /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142 * The trace_enum_maps are saved in an array with two extra elements,
143 * one at the beginning, and one at the end. The beginning item contains
144 * the count of the saved maps (head.length), and the module they
145 * belong to if not built in (head.mod). The ending item contains a
146 * pointer to the next array of saved enum_map items.
147 */
148 union trace_enum_map_item {
149 struct trace_enum_map map;
150 struct trace_enum_map_head head;
151 struct trace_enum_map_tail tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE 100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 default_bootup_tracer = bootup_tracer_buf;
169 /* We are using ftrace early, expand it */
170 ring_buffer_expanded = true;
171 return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 if (*str++ != '=' || !*str) {
178 ftrace_dump_on_oops = DUMP_ALL;
179 return 1;
180 }
181
182 if (!strcmp("orig_cpu", str)) {
183 ftrace_dump_on_oops = DUMP_ORIG;
184 return 1;
185 }
186
187 return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 __disable_trace_on_warning = 1;
195 return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201 allocate_snapshot = true;
202 /* We also need the main ring buffer expanded */
203 ring_buffer_expanded = true;
204 return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 trace_boot_clock = trace_boot_clock_buf;
225 return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 tracepoint_printk = 1;
233 return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 nsec += 500;
240 do_div(nsec, 1000);
241 return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS \
246 (FUNCTION_DEFAULT_FLAGS | \
247 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
248 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
249 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
250 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
254 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 TRACE_ITER_EVENT_FORK
259
260 /*
261 * The global_trace is the descriptor that holds the tracing
262 * buffers for the live tracing. For each CPU, it contains
263 * a link list of pages that will store trace entries. The
264 * page descriptor of the pages in the memory is used to hold
265 * the link list by linking the lru item in the page descriptor
266 * to each of the pages in the buffer per CPU.
267 *
268 * For each active CPU there is a data field that holds the
269 * pages for the buffer for that CPU. Each CPU has the same number
270 * of pages allocated for its buffer.
271 */
272 static struct trace_array global_trace = {
273 .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280 struct trace_array *tr;
281 int ret = -ENODEV;
282
283 mutex_lock(&trace_types_lock);
284 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 if (tr == this_tr) {
286 tr->ref++;
287 ret = 0;
288 break;
289 }
290 }
291 mutex_unlock(&trace_types_lock);
292
293 return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 WARN_ON(!this_tr->ref);
299 this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304 mutex_lock(&trace_types_lock);
305 __trace_array_put(this_tr);
306 mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 struct ring_buffer *buffer,
311 struct ring_buffer_event *event)
312 {
313 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 !filter_match_preds(call->filter, rec)) {
315 __trace_event_discard_commit(buffer, event);
316 return 1;
317 }
318
319 return 0;
320 }
321
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 vfree(pid_list->pids);
325 kfree(pid_list);
326 }
327
328 /**
329 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330 * @filtered_pids: The list of pids to check
331 * @search_pid: The PID to find in @filtered_pids
332 *
333 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334 */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 /*
339 * If pid_max changed after filtered_pids was created, we
340 * by default ignore all pids greater than the previous pid_max.
341 */
342 if (search_pid >= filtered_pids->pid_max)
343 return false;
344
345 return test_bit(search_pid, filtered_pids->pids);
346 }
347
348 /**
349 * trace_ignore_this_task - should a task be ignored for tracing
350 * @filtered_pids: The list of pids to check
351 * @task: The task that should be ignored if not filtered
352 *
353 * Checks if @task should be traced or not from @filtered_pids.
354 * Returns true if @task should *NOT* be traced.
355 * Returns false if @task should be traced.
356 */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 /*
361 * Return false, because if filtered_pids does not exist,
362 * all pids are good to trace.
363 */
364 if (!filtered_pids)
365 return false;
366
367 return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369
370 /**
371 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372 * @pid_list: The list to modify
373 * @self: The current task for fork or NULL for exit
374 * @task: The task to add or remove
375 *
376 * If adding a task, if @self is defined, the task is only added if @self
377 * is also included in @pid_list. This happens on fork and tasks should
378 * only be added when the parent is listed. If @self is NULL, then the
379 * @task pid will be removed from the list, which would happen on exit
380 * of a task.
381 */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 struct task_struct *self,
384 struct task_struct *task)
385 {
386 if (!pid_list)
387 return;
388
389 /* For forks, we only add if the forking task is listed */
390 if (self) {
391 if (!trace_find_filtered_pid(pid_list, self->pid))
392 return;
393 }
394
395 /* Sorry, but we don't support pid_max changing after setting */
396 if (task->pid >= pid_list->pid_max)
397 return;
398
399 /* "self" is set for forks, and NULL for exits */
400 if (self)
401 set_bit(task->pid, pid_list->pids);
402 else
403 clear_bit(task->pid, pid_list->pids);
404 }
405
406 /**
407 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408 * @pid_list: The pid list to show
409 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410 * @pos: The position of the file
411 *
412 * This is used by the seq_file "next" operation to iterate the pids
413 * listed in a trace_pid_list structure.
414 *
415 * Returns the pid+1 as we want to display pid of zero, but NULL would
416 * stop the iteration.
417 */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 unsigned long pid = (unsigned long)v;
421
422 (*pos)++;
423
424 /* pid already is +1 of the actual prevous bit */
425 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426
427 /* Return pid + 1 to allow zero to be represented */
428 if (pid < pid_list->pid_max)
429 return (void *)(pid + 1);
430
431 return NULL;
432 }
433
434 /**
435 * trace_pid_start - Used for seq_file to start reading pid lists
436 * @pid_list: The pid list to show
437 * @pos: The position of the file
438 *
439 * This is used by seq_file "start" operation to start the iteration
440 * of listing pids.
441 *
442 * Returns the pid+1 as we want to display pid of zero, but NULL would
443 * stop the iteration.
444 */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 unsigned long pid;
448 loff_t l = 0;
449
450 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 if (pid >= pid_list->pid_max)
452 return NULL;
453
454 /* Return pid + 1 so that zero can be the exit value */
455 for (pid++; pid && l < *pos;
456 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 ;
458 return (void *)pid;
459 }
460
461 /**
462 * trace_pid_show - show the current pid in seq_file processing
463 * @m: The seq_file structure to write into
464 * @v: A void pointer of the pid (+1) value to display
465 *
466 * Can be directly used by seq_file operations to display the current
467 * pid value.
468 */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 unsigned long pid = (unsigned long)v - 1;
472
473 seq_printf(m, "%lu\n", pid);
474 return 0;
475 }
476
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE 127
479
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 struct trace_pid_list **new_pid_list,
482 const char __user *ubuf, size_t cnt)
483 {
484 struct trace_pid_list *pid_list;
485 struct trace_parser parser;
486 unsigned long val;
487 int nr_pids = 0;
488 ssize_t read = 0;
489 ssize_t ret = 0;
490 loff_t pos;
491 pid_t pid;
492
493 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 return -ENOMEM;
495
496 /*
497 * Always recreate a new array. The write is an all or nothing
498 * operation. Always create a new array when adding new pids by
499 * the user. If the operation fails, then the current list is
500 * not modified.
501 */
502 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 if (!pid_list)
504 return -ENOMEM;
505
506 pid_list->pid_max = READ_ONCE(pid_max);
507
508 /* Only truncating will shrink pid_max */
509 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 pid_list->pid_max = filtered_pids->pid_max;
511
512 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 if (!pid_list->pids) {
514 kfree(pid_list);
515 return -ENOMEM;
516 }
517
518 if (filtered_pids) {
519 /* copy the current bits to the new max */
520 pid = find_first_bit(filtered_pids->pids,
521 filtered_pids->pid_max);
522 while (pid < filtered_pids->pid_max) {
523 set_bit(pid, pid_list->pids);
524 pid = find_next_bit(filtered_pids->pids,
525 filtered_pids->pid_max,
526 pid + 1);
527 nr_pids++;
528 }
529 }
530
531 while (cnt > 0) {
532
533 pos = 0;
534
535 ret = trace_get_user(&parser, ubuf, cnt, &pos);
536 if (ret < 0 || !trace_parser_loaded(&parser))
537 break;
538
539 read += ret;
540 ubuf += ret;
541 cnt -= ret;
542
543 parser.buffer[parser.idx] = 0;
544
545 ret = -EINVAL;
546 if (kstrtoul(parser.buffer, 0, &val))
547 break;
548 if (val >= pid_list->pid_max)
549 break;
550
551 pid = (pid_t)val;
552
553 set_bit(pid, pid_list->pids);
554 nr_pids++;
555
556 trace_parser_clear(&parser);
557 ret = 0;
558 }
559 trace_parser_put(&parser);
560
561 if (ret < 0) {
562 trace_free_pid_list(pid_list);
563 return ret;
564 }
565
566 if (!nr_pids) {
567 /* Cleared the list of pids */
568 trace_free_pid_list(pid_list);
569 read = ret;
570 pid_list = NULL;
571 }
572
573 *new_pid_list = pid_list;
574
575 return read;
576 }
577
578 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
579 {
580 u64 ts;
581
582 /* Early boot up does not have a buffer yet */
583 if (!buf->buffer)
584 return trace_clock_local();
585
586 ts = ring_buffer_time_stamp(buf->buffer, cpu);
587 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
588
589 return ts;
590 }
591
592 cycle_t ftrace_now(int cpu)
593 {
594 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
595 }
596
597 /**
598 * tracing_is_enabled - Show if global_trace has been disabled
599 *
600 * Shows if the global trace has been enabled or not. It uses the
601 * mirror flag "buffer_disabled" to be used in fast paths such as for
602 * the irqsoff tracer. But it may be inaccurate due to races. If you
603 * need to know the accurate state, use tracing_is_on() which is a little
604 * slower, but accurate.
605 */
606 int tracing_is_enabled(void)
607 {
608 /*
609 * For quick access (irqsoff uses this in fast path), just
610 * return the mirror variable of the state of the ring buffer.
611 * It's a little racy, but we don't really care.
612 */
613 smp_rmb();
614 return !global_trace.buffer_disabled;
615 }
616
617 /*
618 * trace_buf_size is the size in bytes that is allocated
619 * for a buffer. Note, the number of bytes is always rounded
620 * to page size.
621 *
622 * This number is purposely set to a low number of 16384.
623 * If the dump on oops happens, it will be much appreciated
624 * to not have to wait for all that output. Anyway this can be
625 * boot time and run time configurable.
626 */
627 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
628
629 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
630
631 /* trace_types holds a link list of available tracers. */
632 static struct tracer *trace_types __read_mostly;
633
634 /*
635 * trace_types_lock is used to protect the trace_types list.
636 */
637 DEFINE_MUTEX(trace_types_lock);
638
639 /*
640 * serialize the access of the ring buffer
641 *
642 * ring buffer serializes readers, but it is low level protection.
643 * The validity of the events (which returns by ring_buffer_peek() ..etc)
644 * are not protected by ring buffer.
645 *
646 * The content of events may become garbage if we allow other process consumes
647 * these events concurrently:
648 * A) the page of the consumed events may become a normal page
649 * (not reader page) in ring buffer, and this page will be rewrited
650 * by events producer.
651 * B) The page of the consumed events may become a page for splice_read,
652 * and this page will be returned to system.
653 *
654 * These primitives allow multi process access to different cpu ring buffer
655 * concurrently.
656 *
657 * These primitives don't distinguish read-only and read-consume access.
658 * Multi read-only access are also serialized.
659 */
660
661 #ifdef CONFIG_SMP
662 static DECLARE_RWSEM(all_cpu_access_lock);
663 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
664
665 static inline void trace_access_lock(int cpu)
666 {
667 if (cpu == RING_BUFFER_ALL_CPUS) {
668 /* gain it for accessing the whole ring buffer. */
669 down_write(&all_cpu_access_lock);
670 } else {
671 /* gain it for accessing a cpu ring buffer. */
672
673 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
674 down_read(&all_cpu_access_lock);
675
676 /* Secondly block other access to this @cpu ring buffer. */
677 mutex_lock(&per_cpu(cpu_access_lock, cpu));
678 }
679 }
680
681 static inline void trace_access_unlock(int cpu)
682 {
683 if (cpu == RING_BUFFER_ALL_CPUS) {
684 up_write(&all_cpu_access_lock);
685 } else {
686 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
687 up_read(&all_cpu_access_lock);
688 }
689 }
690
691 static inline void trace_access_lock_init(void)
692 {
693 int cpu;
694
695 for_each_possible_cpu(cpu)
696 mutex_init(&per_cpu(cpu_access_lock, cpu));
697 }
698
699 #else
700
701 static DEFINE_MUTEX(access_lock);
702
703 static inline void trace_access_lock(int cpu)
704 {
705 (void)cpu;
706 mutex_lock(&access_lock);
707 }
708
709 static inline void trace_access_unlock(int cpu)
710 {
711 (void)cpu;
712 mutex_unlock(&access_lock);
713 }
714
715 static inline void trace_access_lock_init(void)
716 {
717 }
718
719 #endif
720
721 #ifdef CONFIG_STACKTRACE
722 static void __ftrace_trace_stack(struct ring_buffer *buffer,
723 unsigned long flags,
724 int skip, int pc, struct pt_regs *regs);
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726 struct ring_buffer *buffer,
727 unsigned long flags,
728 int skip, int pc, struct pt_regs *regs);
729
730 #else
731 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
732 unsigned long flags,
733 int skip, int pc, struct pt_regs *regs)
734 {
735 }
736 static inline void ftrace_trace_stack(struct trace_array *tr,
737 struct ring_buffer *buffer,
738 unsigned long flags,
739 int skip, int pc, struct pt_regs *regs)
740 {
741 }
742
743 #endif
744
745 static void tracer_tracing_on(struct trace_array *tr)
746 {
747 if (tr->trace_buffer.buffer)
748 ring_buffer_record_on(tr->trace_buffer.buffer);
749 /*
750 * This flag is looked at when buffers haven't been allocated
751 * yet, or by some tracers (like irqsoff), that just want to
752 * know if the ring buffer has been disabled, but it can handle
753 * races of where it gets disabled but we still do a record.
754 * As the check is in the fast path of the tracers, it is more
755 * important to be fast than accurate.
756 */
757 tr->buffer_disabled = 0;
758 /* Make the flag seen by readers */
759 smp_wmb();
760 }
761
762 /**
763 * tracing_on - enable tracing buffers
764 *
765 * This function enables tracing buffers that may have been
766 * disabled with tracing_off.
767 */
768 void tracing_on(void)
769 {
770 tracer_tracing_on(&global_trace);
771 }
772 EXPORT_SYMBOL_GPL(tracing_on);
773
774 /**
775 * __trace_puts - write a constant string into the trace buffer.
776 * @ip: The address of the caller
777 * @str: The constant string to write
778 * @size: The size of the string.
779 */
780 int __trace_puts(unsigned long ip, const char *str, int size)
781 {
782 struct ring_buffer_event *event;
783 struct ring_buffer *buffer;
784 struct print_entry *entry;
785 unsigned long irq_flags;
786 int alloc;
787 int pc;
788
789 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
790 return 0;
791
792 pc = preempt_count();
793
794 if (unlikely(tracing_selftest_running || tracing_disabled))
795 return 0;
796
797 alloc = sizeof(*entry) + size + 2; /* possible \n added */
798
799 local_save_flags(irq_flags);
800 buffer = global_trace.trace_buffer.buffer;
801 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
802 irq_flags, pc);
803 if (!event)
804 return 0;
805
806 entry = ring_buffer_event_data(event);
807 entry->ip = ip;
808
809 memcpy(&entry->buf, str, size);
810
811 /* Add a newline if necessary */
812 if (entry->buf[size - 1] != '\n') {
813 entry->buf[size] = '\n';
814 entry->buf[size + 1] = '\0';
815 } else
816 entry->buf[size] = '\0';
817
818 __buffer_unlock_commit(buffer, event);
819 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
820
821 return size;
822 }
823 EXPORT_SYMBOL_GPL(__trace_puts);
824
825 /**
826 * __trace_bputs - write the pointer to a constant string into trace buffer
827 * @ip: The address of the caller
828 * @str: The constant string to write to the buffer to
829 */
830 int __trace_bputs(unsigned long ip, const char *str)
831 {
832 struct ring_buffer_event *event;
833 struct ring_buffer *buffer;
834 struct bputs_entry *entry;
835 unsigned long irq_flags;
836 int size = sizeof(struct bputs_entry);
837 int pc;
838
839 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
840 return 0;
841
842 pc = preempt_count();
843
844 if (unlikely(tracing_selftest_running || tracing_disabled))
845 return 0;
846
847 local_save_flags(irq_flags);
848 buffer = global_trace.trace_buffer.buffer;
849 event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
850 irq_flags, pc);
851 if (!event)
852 return 0;
853
854 entry = ring_buffer_event_data(event);
855 entry->ip = ip;
856 entry->str = str;
857
858 __buffer_unlock_commit(buffer, event);
859 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
860
861 return 1;
862 }
863 EXPORT_SYMBOL_GPL(__trace_bputs);
864
865 #ifdef CONFIG_TRACER_SNAPSHOT
866 /**
867 * trace_snapshot - take a snapshot of the current buffer.
868 *
869 * This causes a swap between the snapshot buffer and the current live
870 * tracing buffer. You can use this to take snapshots of the live
871 * trace when some condition is triggered, but continue to trace.
872 *
873 * Note, make sure to allocate the snapshot with either
874 * a tracing_snapshot_alloc(), or by doing it manually
875 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
876 *
877 * If the snapshot buffer is not allocated, it will stop tracing.
878 * Basically making a permanent snapshot.
879 */
880 void tracing_snapshot(void)
881 {
882 struct trace_array *tr = &global_trace;
883 struct tracer *tracer = tr->current_trace;
884 unsigned long flags;
885
886 if (in_nmi()) {
887 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
888 internal_trace_puts("*** snapshot is being ignored ***\n");
889 return;
890 }
891
892 if (!tr->allocated_snapshot) {
893 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
894 internal_trace_puts("*** stopping trace here! ***\n");
895 tracing_off();
896 return;
897 }
898
899 /* Note, snapshot can not be used when the tracer uses it */
900 if (tracer->use_max_tr) {
901 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
902 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
903 return;
904 }
905
906 local_irq_save(flags);
907 update_max_tr(tr, current, smp_processor_id());
908 local_irq_restore(flags);
909 }
910 EXPORT_SYMBOL_GPL(tracing_snapshot);
911
912 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
913 struct trace_buffer *size_buf, int cpu_id);
914 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
915
916 static int alloc_snapshot(struct trace_array *tr)
917 {
918 int ret;
919
920 if (!tr->allocated_snapshot) {
921
922 /* allocate spare buffer */
923 ret = resize_buffer_duplicate_size(&tr->max_buffer,
924 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
925 if (ret < 0)
926 return ret;
927
928 tr->allocated_snapshot = true;
929 }
930
931 return 0;
932 }
933
934 static void free_snapshot(struct trace_array *tr)
935 {
936 /*
937 * We don't free the ring buffer. instead, resize it because
938 * The max_tr ring buffer has some state (e.g. ring->clock) and
939 * we want preserve it.
940 */
941 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
942 set_buffer_entries(&tr->max_buffer, 1);
943 tracing_reset_online_cpus(&tr->max_buffer);
944 tr->allocated_snapshot = false;
945 }
946
947 /**
948 * tracing_alloc_snapshot - allocate snapshot buffer.
949 *
950 * This only allocates the snapshot buffer if it isn't already
951 * allocated - it doesn't also take a snapshot.
952 *
953 * This is meant to be used in cases where the snapshot buffer needs
954 * to be set up for events that can't sleep but need to be able to
955 * trigger a snapshot.
956 */
957 int tracing_alloc_snapshot(void)
958 {
959 struct trace_array *tr = &global_trace;
960 int ret;
961
962 ret = alloc_snapshot(tr);
963 WARN_ON(ret < 0);
964
965 return ret;
966 }
967 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
968
969 /**
970 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
971 *
972 * This is similar to trace_snapshot(), but it will allocate the
973 * snapshot buffer if it isn't already allocated. Use this only
974 * where it is safe to sleep, as the allocation may sleep.
975 *
976 * This causes a swap between the snapshot buffer and the current live
977 * tracing buffer. You can use this to take snapshots of the live
978 * trace when some condition is triggered, but continue to trace.
979 */
980 void tracing_snapshot_alloc(void)
981 {
982 int ret;
983
984 ret = tracing_alloc_snapshot();
985 if (ret < 0)
986 return;
987
988 tracing_snapshot();
989 }
990 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
991 #else
992 void tracing_snapshot(void)
993 {
994 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
995 }
996 EXPORT_SYMBOL_GPL(tracing_snapshot);
997 int tracing_alloc_snapshot(void)
998 {
999 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1000 return -ENODEV;
1001 }
1002 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1003 void tracing_snapshot_alloc(void)
1004 {
1005 /* Give warning */
1006 tracing_snapshot();
1007 }
1008 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1009 #endif /* CONFIG_TRACER_SNAPSHOT */
1010
1011 static void tracer_tracing_off(struct trace_array *tr)
1012 {
1013 if (tr->trace_buffer.buffer)
1014 ring_buffer_record_off(tr->trace_buffer.buffer);
1015 /*
1016 * This flag is looked at when buffers haven't been allocated
1017 * yet, or by some tracers (like irqsoff), that just want to
1018 * know if the ring buffer has been disabled, but it can handle
1019 * races of where it gets disabled but we still do a record.
1020 * As the check is in the fast path of the tracers, it is more
1021 * important to be fast than accurate.
1022 */
1023 tr->buffer_disabled = 1;
1024 /* Make the flag seen by readers */
1025 smp_wmb();
1026 }
1027
1028 /**
1029 * tracing_off - turn off tracing buffers
1030 *
1031 * This function stops the tracing buffers from recording data.
1032 * It does not disable any overhead the tracers themselves may
1033 * be causing. This function simply causes all recording to
1034 * the ring buffers to fail.
1035 */
1036 void tracing_off(void)
1037 {
1038 tracer_tracing_off(&global_trace);
1039 }
1040 EXPORT_SYMBOL_GPL(tracing_off);
1041
1042 void disable_trace_on_warning(void)
1043 {
1044 if (__disable_trace_on_warning)
1045 tracing_off();
1046 }
1047
1048 /**
1049 * tracer_tracing_is_on - show real state of ring buffer enabled
1050 * @tr : the trace array to know if ring buffer is enabled
1051 *
1052 * Shows real state of the ring buffer if it is enabled or not.
1053 */
1054 static int tracer_tracing_is_on(struct trace_array *tr)
1055 {
1056 if (tr->trace_buffer.buffer)
1057 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1058 return !tr->buffer_disabled;
1059 }
1060
1061 /**
1062 * tracing_is_on - show state of ring buffers enabled
1063 */
1064 int tracing_is_on(void)
1065 {
1066 return tracer_tracing_is_on(&global_trace);
1067 }
1068 EXPORT_SYMBOL_GPL(tracing_is_on);
1069
1070 static int __init set_buf_size(char *str)
1071 {
1072 unsigned long buf_size;
1073
1074 if (!str)
1075 return 0;
1076 buf_size = memparse(str, &str);
1077 /* nr_entries can not be zero */
1078 if (buf_size == 0)
1079 return 0;
1080 trace_buf_size = buf_size;
1081 return 1;
1082 }
1083 __setup("trace_buf_size=", set_buf_size);
1084
1085 static int __init set_tracing_thresh(char *str)
1086 {
1087 unsigned long threshold;
1088 int ret;
1089
1090 if (!str)
1091 return 0;
1092 ret = kstrtoul(str, 0, &threshold);
1093 if (ret < 0)
1094 return 0;
1095 tracing_thresh = threshold * 1000;
1096 return 1;
1097 }
1098 __setup("tracing_thresh=", set_tracing_thresh);
1099
1100 unsigned long nsecs_to_usecs(unsigned long nsecs)
1101 {
1102 return nsecs / 1000;
1103 }
1104
1105 /*
1106 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1107 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1108 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1109 * of strings in the order that the enums were defined.
1110 */
1111 #undef C
1112 #define C(a, b) b
1113
1114 /* These must match the bit postions in trace_iterator_flags */
1115 static const char *trace_options[] = {
1116 TRACE_FLAGS
1117 NULL
1118 };
1119
1120 static struct {
1121 u64 (*func)(void);
1122 const char *name;
1123 int in_ns; /* is this clock in nanoseconds? */
1124 } trace_clocks[] = {
1125 { trace_clock_local, "local", 1 },
1126 { trace_clock_global, "global", 1 },
1127 { trace_clock_counter, "counter", 0 },
1128 { trace_clock_jiffies, "uptime", 0 },
1129 { trace_clock, "perf", 1 },
1130 { ktime_get_mono_fast_ns, "mono", 1 },
1131 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1132 ARCH_TRACE_CLOCKS
1133 };
1134
1135 /*
1136 * trace_parser_get_init - gets the buffer for trace parser
1137 */
1138 int trace_parser_get_init(struct trace_parser *parser, int size)
1139 {
1140 memset(parser, 0, sizeof(*parser));
1141
1142 parser->buffer = kmalloc(size, GFP_KERNEL);
1143 if (!parser->buffer)
1144 return 1;
1145
1146 parser->size = size;
1147 return 0;
1148 }
1149
1150 /*
1151 * trace_parser_put - frees the buffer for trace parser
1152 */
1153 void trace_parser_put(struct trace_parser *parser)
1154 {
1155 kfree(parser->buffer);
1156 }
1157
1158 /*
1159 * trace_get_user - reads the user input string separated by space
1160 * (matched by isspace(ch))
1161 *
1162 * For each string found the 'struct trace_parser' is updated,
1163 * and the function returns.
1164 *
1165 * Returns number of bytes read.
1166 *
1167 * See kernel/trace/trace.h for 'struct trace_parser' details.
1168 */
1169 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1170 size_t cnt, loff_t *ppos)
1171 {
1172 char ch;
1173 size_t read = 0;
1174 ssize_t ret;
1175
1176 if (!*ppos)
1177 trace_parser_clear(parser);
1178
1179 ret = get_user(ch, ubuf++);
1180 if (ret)
1181 goto out;
1182
1183 read++;
1184 cnt--;
1185
1186 /*
1187 * The parser is not finished with the last write,
1188 * continue reading the user input without skipping spaces.
1189 */
1190 if (!parser->cont) {
1191 /* skip white space */
1192 while (cnt && isspace(ch)) {
1193 ret = get_user(ch, ubuf++);
1194 if (ret)
1195 goto out;
1196 read++;
1197 cnt--;
1198 }
1199
1200 /* only spaces were written */
1201 if (isspace(ch)) {
1202 *ppos += read;
1203 ret = read;
1204 goto out;
1205 }
1206
1207 parser->idx = 0;
1208 }
1209
1210 /* read the non-space input */
1211 while (cnt && !isspace(ch)) {
1212 if (parser->idx < parser->size - 1)
1213 parser->buffer[parser->idx++] = ch;
1214 else {
1215 ret = -EINVAL;
1216 goto out;
1217 }
1218 ret = get_user(ch, ubuf++);
1219 if (ret)
1220 goto out;
1221 read++;
1222 cnt--;
1223 }
1224
1225 /* We either got finished input or we have to wait for another call. */
1226 if (isspace(ch)) {
1227 parser->buffer[parser->idx] = 0;
1228 parser->cont = false;
1229 } else if (parser->idx < parser->size - 1) {
1230 parser->cont = true;
1231 parser->buffer[parser->idx++] = ch;
1232 } else {
1233 ret = -EINVAL;
1234 goto out;
1235 }
1236
1237 *ppos += read;
1238 ret = read;
1239
1240 out:
1241 return ret;
1242 }
1243
1244 /* TODO add a seq_buf_to_buffer() */
1245 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1246 {
1247 int len;
1248
1249 if (trace_seq_used(s) <= s->seq.readpos)
1250 return -EBUSY;
1251
1252 len = trace_seq_used(s) - s->seq.readpos;
1253 if (cnt > len)
1254 cnt = len;
1255 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1256
1257 s->seq.readpos += cnt;
1258 return cnt;
1259 }
1260
1261 unsigned long __read_mostly tracing_thresh;
1262
1263 #ifdef CONFIG_TRACER_MAX_TRACE
1264 /*
1265 * Copy the new maximum trace into the separate maximum-trace
1266 * structure. (this way the maximum trace is permanently saved,
1267 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1268 */
1269 static void
1270 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1271 {
1272 struct trace_buffer *trace_buf = &tr->trace_buffer;
1273 struct trace_buffer *max_buf = &tr->max_buffer;
1274 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1275 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1276
1277 max_buf->cpu = cpu;
1278 max_buf->time_start = data->preempt_timestamp;
1279
1280 max_data->saved_latency = tr->max_latency;
1281 max_data->critical_start = data->critical_start;
1282 max_data->critical_end = data->critical_end;
1283
1284 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1285 max_data->pid = tsk->pid;
1286 /*
1287 * If tsk == current, then use current_uid(), as that does not use
1288 * RCU. The irq tracer can be called out of RCU scope.
1289 */
1290 if (tsk == current)
1291 max_data->uid = current_uid();
1292 else
1293 max_data->uid = task_uid(tsk);
1294
1295 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1296 max_data->policy = tsk->policy;
1297 max_data->rt_priority = tsk->rt_priority;
1298
1299 /* record this tasks comm */
1300 tracing_record_cmdline(tsk);
1301 }
1302
1303 /**
1304 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1305 * @tr: tracer
1306 * @tsk: the task with the latency
1307 * @cpu: The cpu that initiated the trace.
1308 *
1309 * Flip the buffers between the @tr and the max_tr and record information
1310 * about which task was the cause of this latency.
1311 */
1312 void
1313 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1314 {
1315 struct ring_buffer *buf;
1316
1317 if (tr->stop_count)
1318 return;
1319
1320 WARN_ON_ONCE(!irqs_disabled());
1321
1322 if (!tr->allocated_snapshot) {
1323 /* Only the nop tracer should hit this when disabling */
1324 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1325 return;
1326 }
1327
1328 arch_spin_lock(&tr->max_lock);
1329
1330 buf = tr->trace_buffer.buffer;
1331 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1332 tr->max_buffer.buffer = buf;
1333
1334 __update_max_tr(tr, tsk, cpu);
1335 arch_spin_unlock(&tr->max_lock);
1336 }
1337
1338 /**
1339 * update_max_tr_single - only copy one trace over, and reset the rest
1340 * @tr - tracer
1341 * @tsk - task with the latency
1342 * @cpu - the cpu of the buffer to copy.
1343 *
1344 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1345 */
1346 void
1347 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1348 {
1349 int ret;
1350
1351 if (tr->stop_count)
1352 return;
1353
1354 WARN_ON_ONCE(!irqs_disabled());
1355 if (!tr->allocated_snapshot) {
1356 /* Only the nop tracer should hit this when disabling */
1357 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1358 return;
1359 }
1360
1361 arch_spin_lock(&tr->max_lock);
1362
1363 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1364
1365 if (ret == -EBUSY) {
1366 /*
1367 * We failed to swap the buffer due to a commit taking
1368 * place on this CPU. We fail to record, but we reset
1369 * the max trace buffer (no one writes directly to it)
1370 * and flag that it failed.
1371 */
1372 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1373 "Failed to swap buffers due to commit in progress\n");
1374 }
1375
1376 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1377
1378 __update_max_tr(tr, tsk, cpu);
1379 arch_spin_unlock(&tr->max_lock);
1380 }
1381 #endif /* CONFIG_TRACER_MAX_TRACE */
1382
1383 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1384 {
1385 /* Iterators are static, they should be filled or empty */
1386 if (trace_buffer_iter(iter, iter->cpu_file))
1387 return 0;
1388
1389 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1390 full);
1391 }
1392
1393 #ifdef CONFIG_FTRACE_STARTUP_TEST
1394 static int run_tracer_selftest(struct tracer *type)
1395 {
1396 struct trace_array *tr = &global_trace;
1397 struct tracer *saved_tracer = tr->current_trace;
1398 int ret;
1399
1400 if (!type->selftest || tracing_selftest_disabled)
1401 return 0;
1402
1403 /*
1404 * Run a selftest on this tracer.
1405 * Here we reset the trace buffer, and set the current
1406 * tracer to be this tracer. The tracer can then run some
1407 * internal tracing to verify that everything is in order.
1408 * If we fail, we do not register this tracer.
1409 */
1410 tracing_reset_online_cpus(&tr->trace_buffer);
1411
1412 tr->current_trace = type;
1413
1414 #ifdef CONFIG_TRACER_MAX_TRACE
1415 if (type->use_max_tr) {
1416 /* If we expanded the buffers, make sure the max is expanded too */
1417 if (ring_buffer_expanded)
1418 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1419 RING_BUFFER_ALL_CPUS);
1420 tr->allocated_snapshot = true;
1421 }
1422 #endif
1423
1424 /* the test is responsible for initializing and enabling */
1425 pr_info("Testing tracer %s: ", type->name);
1426 ret = type->selftest(type, tr);
1427 /* the test is responsible for resetting too */
1428 tr->current_trace = saved_tracer;
1429 if (ret) {
1430 printk(KERN_CONT "FAILED!\n");
1431 /* Add the warning after printing 'FAILED' */
1432 WARN_ON(1);
1433 return -1;
1434 }
1435 /* Only reset on passing, to avoid touching corrupted buffers */
1436 tracing_reset_online_cpus(&tr->trace_buffer);
1437
1438 #ifdef CONFIG_TRACER_MAX_TRACE
1439 if (type->use_max_tr) {
1440 tr->allocated_snapshot = false;
1441
1442 /* Shrink the max buffer again */
1443 if (ring_buffer_expanded)
1444 ring_buffer_resize(tr->max_buffer.buffer, 1,
1445 RING_BUFFER_ALL_CPUS);
1446 }
1447 #endif
1448
1449 printk(KERN_CONT "PASSED\n");
1450 return 0;
1451 }
1452 #else
1453 static inline int run_tracer_selftest(struct tracer *type)
1454 {
1455 return 0;
1456 }
1457 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1458
1459 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1460
1461 static void __init apply_trace_boot_options(void);
1462
1463 /**
1464 * register_tracer - register a tracer with the ftrace system.
1465 * @type - the plugin for the tracer
1466 *
1467 * Register a new plugin tracer.
1468 */
1469 int __init register_tracer(struct tracer *type)
1470 {
1471 struct tracer *t;
1472 int ret = 0;
1473
1474 if (!type->name) {
1475 pr_info("Tracer must have a name\n");
1476 return -1;
1477 }
1478
1479 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1480 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1481 return -1;
1482 }
1483
1484 mutex_lock(&trace_types_lock);
1485
1486 tracing_selftest_running = true;
1487
1488 for (t = trace_types; t; t = t->next) {
1489 if (strcmp(type->name, t->name) == 0) {
1490 /* already found */
1491 pr_info("Tracer %s already registered\n",
1492 type->name);
1493 ret = -1;
1494 goto out;
1495 }
1496 }
1497
1498 if (!type->set_flag)
1499 type->set_flag = &dummy_set_flag;
1500 if (!type->flags) {
1501 /*allocate a dummy tracer_flags*/
1502 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1503 if (!type->flags) {
1504 ret = -ENOMEM;
1505 goto out;
1506 }
1507 type->flags->val = 0;
1508 type->flags->opts = dummy_tracer_opt;
1509 } else
1510 if (!type->flags->opts)
1511 type->flags->opts = dummy_tracer_opt;
1512
1513 /* store the tracer for __set_tracer_option */
1514 type->flags->trace = type;
1515
1516 ret = run_tracer_selftest(type);
1517 if (ret < 0)
1518 goto out;
1519
1520 type->next = trace_types;
1521 trace_types = type;
1522 add_tracer_options(&global_trace, type);
1523
1524 out:
1525 tracing_selftest_running = false;
1526 mutex_unlock(&trace_types_lock);
1527
1528 if (ret || !default_bootup_tracer)
1529 goto out_unlock;
1530
1531 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1532 goto out_unlock;
1533
1534 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1535 /* Do we want this tracer to start on bootup? */
1536 tracing_set_tracer(&global_trace, type->name);
1537 default_bootup_tracer = NULL;
1538
1539 apply_trace_boot_options();
1540
1541 /* disable other selftests, since this will break it. */
1542 tracing_selftest_disabled = true;
1543 #ifdef CONFIG_FTRACE_STARTUP_TEST
1544 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1545 type->name);
1546 #endif
1547
1548 out_unlock:
1549 return ret;
1550 }
1551
1552 void tracing_reset(struct trace_buffer *buf, int cpu)
1553 {
1554 struct ring_buffer *buffer = buf->buffer;
1555
1556 if (!buffer)
1557 return;
1558
1559 ring_buffer_record_disable(buffer);
1560
1561 /* Make sure all commits have finished */
1562 synchronize_sched();
1563 ring_buffer_reset_cpu(buffer, cpu);
1564
1565 ring_buffer_record_enable(buffer);
1566 }
1567
1568 void tracing_reset_online_cpus(struct trace_buffer *buf)
1569 {
1570 struct ring_buffer *buffer = buf->buffer;
1571 int cpu;
1572
1573 if (!buffer)
1574 return;
1575
1576 ring_buffer_record_disable(buffer);
1577
1578 /* Make sure all commits have finished */
1579 synchronize_sched();
1580
1581 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1582
1583 for_each_online_cpu(cpu)
1584 ring_buffer_reset_cpu(buffer, cpu);
1585
1586 ring_buffer_record_enable(buffer);
1587 }
1588
1589 /* Must have trace_types_lock held */
1590 void tracing_reset_all_online_cpus(void)
1591 {
1592 struct trace_array *tr;
1593
1594 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1595 tracing_reset_online_cpus(&tr->trace_buffer);
1596 #ifdef CONFIG_TRACER_MAX_TRACE
1597 tracing_reset_online_cpus(&tr->max_buffer);
1598 #endif
1599 }
1600 }
1601
1602 #define SAVED_CMDLINES_DEFAULT 128
1603 #define NO_CMDLINE_MAP UINT_MAX
1604 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1605 struct saved_cmdlines_buffer {
1606 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1607 unsigned *map_cmdline_to_pid;
1608 unsigned cmdline_num;
1609 int cmdline_idx;
1610 char *saved_cmdlines;
1611 };
1612 static struct saved_cmdlines_buffer *savedcmd;
1613
1614 /* temporary disable recording */
1615 static atomic_t trace_record_cmdline_disabled __read_mostly;
1616
1617 static inline char *get_saved_cmdlines(int idx)
1618 {
1619 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1620 }
1621
1622 static inline void set_cmdline(int idx, const char *cmdline)
1623 {
1624 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1625 }
1626
1627 static int allocate_cmdlines_buffer(unsigned int val,
1628 struct saved_cmdlines_buffer *s)
1629 {
1630 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1631 GFP_KERNEL);
1632 if (!s->map_cmdline_to_pid)
1633 return -ENOMEM;
1634
1635 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1636 if (!s->saved_cmdlines) {
1637 kfree(s->map_cmdline_to_pid);
1638 return -ENOMEM;
1639 }
1640
1641 s->cmdline_idx = 0;
1642 s->cmdline_num = val;
1643 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1644 sizeof(s->map_pid_to_cmdline));
1645 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1646 val * sizeof(*s->map_cmdline_to_pid));
1647
1648 return 0;
1649 }
1650
1651 static int trace_create_savedcmd(void)
1652 {
1653 int ret;
1654
1655 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1656 if (!savedcmd)
1657 return -ENOMEM;
1658
1659 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1660 if (ret < 0) {
1661 kfree(savedcmd);
1662 savedcmd = NULL;
1663 return -ENOMEM;
1664 }
1665
1666 return 0;
1667 }
1668
1669 int is_tracing_stopped(void)
1670 {
1671 return global_trace.stop_count;
1672 }
1673
1674 /**
1675 * tracing_start - quick start of the tracer
1676 *
1677 * If tracing is enabled but was stopped by tracing_stop,
1678 * this will start the tracer back up.
1679 */
1680 void tracing_start(void)
1681 {
1682 struct ring_buffer *buffer;
1683 unsigned long flags;
1684
1685 if (tracing_disabled)
1686 return;
1687
1688 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1689 if (--global_trace.stop_count) {
1690 if (global_trace.stop_count < 0) {
1691 /* Someone screwed up their debugging */
1692 WARN_ON_ONCE(1);
1693 global_trace.stop_count = 0;
1694 }
1695 goto out;
1696 }
1697
1698 /* Prevent the buffers from switching */
1699 arch_spin_lock(&global_trace.max_lock);
1700
1701 buffer = global_trace.trace_buffer.buffer;
1702 if (buffer)
1703 ring_buffer_record_enable(buffer);
1704
1705 #ifdef CONFIG_TRACER_MAX_TRACE
1706 buffer = global_trace.max_buffer.buffer;
1707 if (buffer)
1708 ring_buffer_record_enable(buffer);
1709 #endif
1710
1711 arch_spin_unlock(&global_trace.max_lock);
1712
1713 out:
1714 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1715 }
1716
1717 static void tracing_start_tr(struct trace_array *tr)
1718 {
1719 struct ring_buffer *buffer;
1720 unsigned long flags;
1721
1722 if (tracing_disabled)
1723 return;
1724
1725 /* If global, we need to also start the max tracer */
1726 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1727 return tracing_start();
1728
1729 raw_spin_lock_irqsave(&tr->start_lock, flags);
1730
1731 if (--tr->stop_count) {
1732 if (tr->stop_count < 0) {
1733 /* Someone screwed up their debugging */
1734 WARN_ON_ONCE(1);
1735 tr->stop_count = 0;
1736 }
1737 goto out;
1738 }
1739
1740 buffer = tr->trace_buffer.buffer;
1741 if (buffer)
1742 ring_buffer_record_enable(buffer);
1743
1744 out:
1745 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1746 }
1747
1748 /**
1749 * tracing_stop - quick stop of the tracer
1750 *
1751 * Light weight way to stop tracing. Use in conjunction with
1752 * tracing_start.
1753 */
1754 void tracing_stop(void)
1755 {
1756 struct ring_buffer *buffer;
1757 unsigned long flags;
1758
1759 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1760 if (global_trace.stop_count++)
1761 goto out;
1762
1763 /* Prevent the buffers from switching */
1764 arch_spin_lock(&global_trace.max_lock);
1765
1766 buffer = global_trace.trace_buffer.buffer;
1767 if (buffer)
1768 ring_buffer_record_disable(buffer);
1769
1770 #ifdef CONFIG_TRACER_MAX_TRACE
1771 buffer = global_trace.max_buffer.buffer;
1772 if (buffer)
1773 ring_buffer_record_disable(buffer);
1774 #endif
1775
1776 arch_spin_unlock(&global_trace.max_lock);
1777
1778 out:
1779 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1780 }
1781
1782 static void tracing_stop_tr(struct trace_array *tr)
1783 {
1784 struct ring_buffer *buffer;
1785 unsigned long flags;
1786
1787 /* If global, we need to also stop the max tracer */
1788 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1789 return tracing_stop();
1790
1791 raw_spin_lock_irqsave(&tr->start_lock, flags);
1792 if (tr->stop_count++)
1793 goto out;
1794
1795 buffer = tr->trace_buffer.buffer;
1796 if (buffer)
1797 ring_buffer_record_disable(buffer);
1798
1799 out:
1800 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1801 }
1802
1803 void trace_stop_cmdline_recording(void);
1804
1805 static int trace_save_cmdline(struct task_struct *tsk)
1806 {
1807 unsigned pid, idx;
1808
1809 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1810 return 0;
1811
1812 /*
1813 * It's not the end of the world if we don't get
1814 * the lock, but we also don't want to spin
1815 * nor do we want to disable interrupts,
1816 * so if we miss here, then better luck next time.
1817 */
1818 if (!arch_spin_trylock(&trace_cmdline_lock))
1819 return 0;
1820
1821 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1822 if (idx == NO_CMDLINE_MAP) {
1823 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1824
1825 /*
1826 * Check whether the cmdline buffer at idx has a pid
1827 * mapped. We are going to overwrite that entry so we
1828 * need to clear the map_pid_to_cmdline. Otherwise we
1829 * would read the new comm for the old pid.
1830 */
1831 pid = savedcmd->map_cmdline_to_pid[idx];
1832 if (pid != NO_CMDLINE_MAP)
1833 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1834
1835 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1836 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1837
1838 savedcmd->cmdline_idx = idx;
1839 }
1840
1841 set_cmdline(idx, tsk->comm);
1842
1843 arch_spin_unlock(&trace_cmdline_lock);
1844
1845 return 1;
1846 }
1847
1848 static void __trace_find_cmdline(int pid, char comm[])
1849 {
1850 unsigned map;
1851
1852 if (!pid) {
1853 strcpy(comm, "<idle>");
1854 return;
1855 }
1856
1857 if (WARN_ON_ONCE(pid < 0)) {
1858 strcpy(comm, "<XXX>");
1859 return;
1860 }
1861
1862 if (pid > PID_MAX_DEFAULT) {
1863 strcpy(comm, "<...>");
1864 return;
1865 }
1866
1867 map = savedcmd->map_pid_to_cmdline[pid];
1868 if (map != NO_CMDLINE_MAP)
1869 strcpy(comm, get_saved_cmdlines(map));
1870 else
1871 strcpy(comm, "<...>");
1872 }
1873
1874 void trace_find_cmdline(int pid, char comm[])
1875 {
1876 preempt_disable();
1877 arch_spin_lock(&trace_cmdline_lock);
1878
1879 __trace_find_cmdline(pid, comm);
1880
1881 arch_spin_unlock(&trace_cmdline_lock);
1882 preempt_enable();
1883 }
1884
1885 void tracing_record_cmdline(struct task_struct *tsk)
1886 {
1887 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1888 return;
1889
1890 if (!__this_cpu_read(trace_cmdline_save))
1891 return;
1892
1893 if (trace_save_cmdline(tsk))
1894 __this_cpu_write(trace_cmdline_save, false);
1895 }
1896
1897 void
1898 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1899 int pc)
1900 {
1901 struct task_struct *tsk = current;
1902
1903 entry->preempt_count = pc & 0xff;
1904 entry->pid = (tsk) ? tsk->pid : 0;
1905 entry->flags =
1906 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1907 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1908 #else
1909 TRACE_FLAG_IRQS_NOSUPPORT |
1910 #endif
1911 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
1912 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1913 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1914 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1915 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1916 }
1917 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1918
1919 static __always_inline void
1920 trace_event_setup(struct ring_buffer_event *event,
1921 int type, unsigned long flags, int pc)
1922 {
1923 struct trace_entry *ent = ring_buffer_event_data(event);
1924
1925 tracing_generic_entry_update(ent, flags, pc);
1926 ent->type = type;
1927 }
1928
1929 struct ring_buffer_event *
1930 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1931 int type,
1932 unsigned long len,
1933 unsigned long flags, int pc)
1934 {
1935 struct ring_buffer_event *event;
1936
1937 event = ring_buffer_lock_reserve(buffer, len);
1938 if (event != NULL)
1939 trace_event_setup(event, type, flags, pc);
1940
1941 return event;
1942 }
1943
1944 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1945 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1946 static int trace_buffered_event_ref;
1947
1948 /**
1949 * trace_buffered_event_enable - enable buffering events
1950 *
1951 * When events are being filtered, it is quicker to use a temporary
1952 * buffer to write the event data into if there's a likely chance
1953 * that it will not be committed. The discard of the ring buffer
1954 * is not as fast as committing, and is much slower than copying
1955 * a commit.
1956 *
1957 * When an event is to be filtered, allocate per cpu buffers to
1958 * write the event data into, and if the event is filtered and discarded
1959 * it is simply dropped, otherwise, the entire data is to be committed
1960 * in one shot.
1961 */
1962 void trace_buffered_event_enable(void)
1963 {
1964 struct ring_buffer_event *event;
1965 struct page *page;
1966 int cpu;
1967
1968 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1969
1970 if (trace_buffered_event_ref++)
1971 return;
1972
1973 for_each_tracing_cpu(cpu) {
1974 page = alloc_pages_node(cpu_to_node(cpu),
1975 GFP_KERNEL | __GFP_NORETRY, 0);
1976 if (!page)
1977 goto failed;
1978
1979 event = page_address(page);
1980 memset(event, 0, sizeof(*event));
1981
1982 per_cpu(trace_buffered_event, cpu) = event;
1983
1984 preempt_disable();
1985 if (cpu == smp_processor_id() &&
1986 this_cpu_read(trace_buffered_event) !=
1987 per_cpu(trace_buffered_event, cpu))
1988 WARN_ON_ONCE(1);
1989 preempt_enable();
1990 }
1991
1992 return;
1993 failed:
1994 trace_buffered_event_disable();
1995 }
1996
1997 static void enable_trace_buffered_event(void *data)
1998 {
1999 /* Probably not needed, but do it anyway */
2000 smp_rmb();
2001 this_cpu_dec(trace_buffered_event_cnt);
2002 }
2003
2004 static void disable_trace_buffered_event(void *data)
2005 {
2006 this_cpu_inc(trace_buffered_event_cnt);
2007 }
2008
2009 /**
2010 * trace_buffered_event_disable - disable buffering events
2011 *
2012 * When a filter is removed, it is faster to not use the buffered
2013 * events, and to commit directly into the ring buffer. Free up
2014 * the temp buffers when there are no more users. This requires
2015 * special synchronization with current events.
2016 */
2017 void trace_buffered_event_disable(void)
2018 {
2019 int cpu;
2020
2021 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2022
2023 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2024 return;
2025
2026 if (--trace_buffered_event_ref)
2027 return;
2028
2029 preempt_disable();
2030 /* For each CPU, set the buffer as used. */
2031 smp_call_function_many(tracing_buffer_mask,
2032 disable_trace_buffered_event, NULL, 1);
2033 preempt_enable();
2034
2035 /* Wait for all current users to finish */
2036 synchronize_sched();
2037
2038 for_each_tracing_cpu(cpu) {
2039 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2040 per_cpu(trace_buffered_event, cpu) = NULL;
2041 }
2042 /*
2043 * Make sure trace_buffered_event is NULL before clearing
2044 * trace_buffered_event_cnt.
2045 */
2046 smp_wmb();
2047
2048 preempt_disable();
2049 /* Do the work on each cpu */
2050 smp_call_function_many(tracing_buffer_mask,
2051 enable_trace_buffered_event, NULL, 1);
2052 preempt_enable();
2053 }
2054
2055 void
2056 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2057 {
2058 __this_cpu_write(trace_cmdline_save, true);
2059
2060 /* If this is the temp buffer, we need to commit fully */
2061 if (this_cpu_read(trace_buffered_event) == event) {
2062 /* Length is in event->array[0] */
2063 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2064 /* Release the temp buffer */
2065 this_cpu_dec(trace_buffered_event_cnt);
2066 } else
2067 ring_buffer_unlock_commit(buffer, event);
2068 }
2069
2070 static struct ring_buffer *temp_buffer;
2071
2072 struct ring_buffer_event *
2073 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2074 struct trace_event_file *trace_file,
2075 int type, unsigned long len,
2076 unsigned long flags, int pc)
2077 {
2078 struct ring_buffer_event *entry;
2079 int val;
2080
2081 *current_rb = trace_file->tr->trace_buffer.buffer;
2082
2083 if ((trace_file->flags &
2084 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2085 (entry = this_cpu_read(trace_buffered_event))) {
2086 /* Try to use the per cpu buffer first */
2087 val = this_cpu_inc_return(trace_buffered_event_cnt);
2088 if (val == 1) {
2089 trace_event_setup(entry, type, flags, pc);
2090 entry->array[0] = len;
2091 return entry;
2092 }
2093 this_cpu_dec(trace_buffered_event_cnt);
2094 }
2095
2096 entry = trace_buffer_lock_reserve(*current_rb,
2097 type, len, flags, pc);
2098 /*
2099 * If tracing is off, but we have triggers enabled
2100 * we still need to look at the event data. Use the temp_buffer
2101 * to store the trace event for the tigger to use. It's recusive
2102 * safe and will not be recorded anywhere.
2103 */
2104 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2105 *current_rb = temp_buffer;
2106 entry = trace_buffer_lock_reserve(*current_rb,
2107 type, len, flags, pc);
2108 }
2109 return entry;
2110 }
2111 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2112
2113 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2114 struct ring_buffer *buffer,
2115 struct ring_buffer_event *event,
2116 unsigned long flags, int pc,
2117 struct pt_regs *regs)
2118 {
2119 __buffer_unlock_commit(buffer, event);
2120
2121 ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
2122 ftrace_trace_userstack(buffer, flags, pc);
2123 }
2124
2125 void
2126 trace_function(struct trace_array *tr,
2127 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2128 int pc)
2129 {
2130 struct trace_event_call *call = &event_function;
2131 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2132 struct ring_buffer_event *event;
2133 struct ftrace_entry *entry;
2134
2135 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2136 flags, pc);
2137 if (!event)
2138 return;
2139 entry = ring_buffer_event_data(event);
2140 entry->ip = ip;
2141 entry->parent_ip = parent_ip;
2142
2143 if (!call_filter_check_discard(call, entry, buffer, event))
2144 __buffer_unlock_commit(buffer, event);
2145 }
2146
2147 #ifdef CONFIG_STACKTRACE
2148
2149 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2150 struct ftrace_stack {
2151 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2152 };
2153
2154 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2155 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2156
2157 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2158 unsigned long flags,
2159 int skip, int pc, struct pt_regs *regs)
2160 {
2161 struct trace_event_call *call = &event_kernel_stack;
2162 struct ring_buffer_event *event;
2163 struct stack_entry *entry;
2164 struct stack_trace trace;
2165 int use_stack;
2166 int size = FTRACE_STACK_ENTRIES;
2167
2168 trace.nr_entries = 0;
2169 trace.skip = skip;
2170
2171 /*
2172 * Since events can happen in NMIs there's no safe way to
2173 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2174 * or NMI comes in, it will just have to use the default
2175 * FTRACE_STACK_SIZE.
2176 */
2177 preempt_disable_notrace();
2178
2179 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2180 /*
2181 * We don't need any atomic variables, just a barrier.
2182 * If an interrupt comes in, we don't care, because it would
2183 * have exited and put the counter back to what we want.
2184 * We just need a barrier to keep gcc from moving things
2185 * around.
2186 */
2187 barrier();
2188 if (use_stack == 1) {
2189 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2190 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2191
2192 if (regs)
2193 save_stack_trace_regs(regs, &trace);
2194 else
2195 save_stack_trace(&trace);
2196
2197 if (trace.nr_entries > size)
2198 size = trace.nr_entries;
2199 } else
2200 /* From now on, use_stack is a boolean */
2201 use_stack = 0;
2202
2203 size *= sizeof(unsigned long);
2204
2205 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2206 sizeof(*entry) + size, flags, pc);
2207 if (!event)
2208 goto out;
2209 entry = ring_buffer_event_data(event);
2210
2211 memset(&entry->caller, 0, size);
2212
2213 if (use_stack)
2214 memcpy(&entry->caller, trace.entries,
2215 trace.nr_entries * sizeof(unsigned long));
2216 else {
2217 trace.max_entries = FTRACE_STACK_ENTRIES;
2218 trace.entries = entry->caller;
2219 if (regs)
2220 save_stack_trace_regs(regs, &trace);
2221 else
2222 save_stack_trace(&trace);
2223 }
2224
2225 entry->size = trace.nr_entries;
2226
2227 if (!call_filter_check_discard(call, entry, buffer, event))
2228 __buffer_unlock_commit(buffer, event);
2229
2230 out:
2231 /* Again, don't let gcc optimize things here */
2232 barrier();
2233 __this_cpu_dec(ftrace_stack_reserve);
2234 preempt_enable_notrace();
2235
2236 }
2237
2238 static inline void ftrace_trace_stack(struct trace_array *tr,
2239 struct ring_buffer *buffer,
2240 unsigned long flags,
2241 int skip, int pc, struct pt_regs *regs)
2242 {
2243 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2244 return;
2245
2246 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2247 }
2248
2249 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2250 int pc)
2251 {
2252 __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2253 }
2254
2255 /**
2256 * trace_dump_stack - record a stack back trace in the trace buffer
2257 * @skip: Number of functions to skip (helper handlers)
2258 */
2259 void trace_dump_stack(int skip)
2260 {
2261 unsigned long flags;
2262
2263 if (tracing_disabled || tracing_selftest_running)
2264 return;
2265
2266 local_save_flags(flags);
2267
2268 /*
2269 * Skip 3 more, seems to get us at the caller of
2270 * this function.
2271 */
2272 skip += 3;
2273 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2274 flags, skip, preempt_count(), NULL);
2275 }
2276
2277 static DEFINE_PER_CPU(int, user_stack_count);
2278
2279 void
2280 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2281 {
2282 struct trace_event_call *call = &event_user_stack;
2283 struct ring_buffer_event *event;
2284 struct userstack_entry *entry;
2285 struct stack_trace trace;
2286
2287 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2288 return;
2289
2290 /*
2291 * NMIs can not handle page faults, even with fix ups.
2292 * The save user stack can (and often does) fault.
2293 */
2294 if (unlikely(in_nmi()))
2295 return;
2296
2297 /*
2298 * prevent recursion, since the user stack tracing may
2299 * trigger other kernel events.
2300 */
2301 preempt_disable();
2302 if (__this_cpu_read(user_stack_count))
2303 goto out;
2304
2305 __this_cpu_inc(user_stack_count);
2306
2307 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2308 sizeof(*entry), flags, pc);
2309 if (!event)
2310 goto out_drop_count;
2311 entry = ring_buffer_event_data(event);
2312
2313 entry->tgid = current->tgid;
2314 memset(&entry->caller, 0, sizeof(entry->caller));
2315
2316 trace.nr_entries = 0;
2317 trace.max_entries = FTRACE_STACK_ENTRIES;
2318 trace.skip = 0;
2319 trace.entries = entry->caller;
2320
2321 save_stack_trace_user(&trace);
2322 if (!call_filter_check_discard(call, entry, buffer, event))
2323 __buffer_unlock_commit(buffer, event);
2324
2325 out_drop_count:
2326 __this_cpu_dec(user_stack_count);
2327 out:
2328 preempt_enable();
2329 }
2330
2331 #ifdef UNUSED
2332 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2333 {
2334 ftrace_trace_userstack(tr, flags, preempt_count());
2335 }
2336 #endif /* UNUSED */
2337
2338 #endif /* CONFIG_STACKTRACE */
2339
2340 /* created for use with alloc_percpu */
2341 struct trace_buffer_struct {
2342 char buffer[TRACE_BUF_SIZE];
2343 };
2344
2345 static struct trace_buffer_struct *trace_percpu_buffer;
2346 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
2347 static struct trace_buffer_struct *trace_percpu_irq_buffer;
2348 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
2349
2350 /*
2351 * The buffer used is dependent on the context. There is a per cpu
2352 * buffer for normal context, softirq contex, hard irq context and
2353 * for NMI context. Thise allows for lockless recording.
2354 *
2355 * Note, if the buffers failed to be allocated, then this returns NULL
2356 */
2357 static char *get_trace_buf(void)
2358 {
2359 struct trace_buffer_struct *percpu_buffer;
2360
2361 /*
2362 * If we have allocated per cpu buffers, then we do not
2363 * need to do any locking.
2364 */
2365 if (in_nmi())
2366 percpu_buffer = trace_percpu_nmi_buffer;
2367 else if (in_irq())
2368 percpu_buffer = trace_percpu_irq_buffer;
2369 else if (in_softirq())
2370 percpu_buffer = trace_percpu_sirq_buffer;
2371 else
2372 percpu_buffer = trace_percpu_buffer;
2373
2374 if (!percpu_buffer)
2375 return NULL;
2376
2377 return this_cpu_ptr(&percpu_buffer->buffer[0]);
2378 }
2379
2380 static int alloc_percpu_trace_buffer(void)
2381 {
2382 struct trace_buffer_struct *buffers;
2383 struct trace_buffer_struct *sirq_buffers;
2384 struct trace_buffer_struct *irq_buffers;
2385 struct trace_buffer_struct *nmi_buffers;
2386
2387 buffers = alloc_percpu(struct trace_buffer_struct);
2388 if (!buffers)
2389 goto err_warn;
2390
2391 sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2392 if (!sirq_buffers)
2393 goto err_sirq;
2394
2395 irq_buffers = alloc_percpu(struct trace_buffer_struct);
2396 if (!irq_buffers)
2397 goto err_irq;
2398
2399 nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2400 if (!nmi_buffers)
2401 goto err_nmi;
2402
2403 trace_percpu_buffer = buffers;
2404 trace_percpu_sirq_buffer = sirq_buffers;
2405 trace_percpu_irq_buffer = irq_buffers;
2406 trace_percpu_nmi_buffer = nmi_buffers;
2407
2408 return 0;
2409
2410 err_nmi:
2411 free_percpu(irq_buffers);
2412 err_irq:
2413 free_percpu(sirq_buffers);
2414 err_sirq:
2415 free_percpu(buffers);
2416 err_warn:
2417 WARN(1, "Could not allocate percpu trace_printk buffer");
2418 return -ENOMEM;
2419 }
2420
2421 static int buffers_allocated;
2422
2423 void trace_printk_init_buffers(void)
2424 {
2425 if (buffers_allocated)
2426 return;
2427
2428 if (alloc_percpu_trace_buffer())
2429 return;
2430
2431 /* trace_printk() is for debug use only. Don't use it in production. */
2432
2433 pr_warn("\n");
2434 pr_warn("**********************************************************\n");
2435 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2436 pr_warn("** **\n");
2437 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2438 pr_warn("** **\n");
2439 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2440 pr_warn("** unsafe for production use. **\n");
2441 pr_warn("** **\n");
2442 pr_warn("** If you see this message and you are not debugging **\n");
2443 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2444 pr_warn("** **\n");
2445 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2446 pr_warn("**********************************************************\n");
2447
2448 /* Expand the buffers to set size */
2449 tracing_update_buffers();
2450
2451 buffers_allocated = 1;
2452
2453 /*
2454 * trace_printk_init_buffers() can be called by modules.
2455 * If that happens, then we need to start cmdline recording
2456 * directly here. If the global_trace.buffer is already
2457 * allocated here, then this was called by module code.
2458 */
2459 if (global_trace.trace_buffer.buffer)
2460 tracing_start_cmdline_record();
2461 }
2462
2463 void trace_printk_start_comm(void)
2464 {
2465 /* Start tracing comms if trace printk is set */
2466 if (!buffers_allocated)
2467 return;
2468 tracing_start_cmdline_record();
2469 }
2470
2471 static void trace_printk_start_stop_comm(int enabled)
2472 {
2473 if (!buffers_allocated)
2474 return;
2475
2476 if (enabled)
2477 tracing_start_cmdline_record();
2478 else
2479 tracing_stop_cmdline_record();
2480 }
2481
2482 /**
2483 * trace_vbprintk - write binary msg to tracing buffer
2484 *
2485 */
2486 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2487 {
2488 struct trace_event_call *call = &event_bprint;
2489 struct ring_buffer_event *event;
2490 struct ring_buffer *buffer;
2491 struct trace_array *tr = &global_trace;
2492 struct bprint_entry *entry;
2493 unsigned long flags;
2494 char *tbuffer;
2495 int len = 0, size, pc;
2496
2497 if (unlikely(tracing_selftest_running || tracing_disabled))
2498 return 0;
2499
2500 /* Don't pollute graph traces with trace_vprintk internals */
2501 pause_graph_tracing();
2502
2503 pc = preempt_count();
2504 preempt_disable_notrace();
2505
2506 tbuffer = get_trace_buf();
2507 if (!tbuffer) {
2508 len = 0;
2509 goto out;
2510 }
2511
2512 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2513
2514 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2515 goto out;
2516
2517 local_save_flags(flags);
2518 size = sizeof(*entry) + sizeof(u32) * len;
2519 buffer = tr->trace_buffer.buffer;
2520 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2521 flags, pc);
2522 if (!event)
2523 goto out;
2524 entry = ring_buffer_event_data(event);
2525 entry->ip = ip;
2526 entry->fmt = fmt;
2527
2528 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2529 if (!call_filter_check_discard(call, entry, buffer, event)) {
2530 __buffer_unlock_commit(buffer, event);
2531 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2532 }
2533
2534 out:
2535 preempt_enable_notrace();
2536 unpause_graph_tracing();
2537
2538 return len;
2539 }
2540 EXPORT_SYMBOL_GPL(trace_vbprintk);
2541
2542 static int
2543 __trace_array_vprintk(struct ring_buffer *buffer,
2544 unsigned long ip, const char *fmt, va_list args)
2545 {
2546 struct trace_event_call *call = &event_print;
2547 struct ring_buffer_event *event;
2548 int len = 0, size, pc;
2549 struct print_entry *entry;
2550 unsigned long flags;
2551 char *tbuffer;
2552
2553 if (tracing_disabled || tracing_selftest_running)
2554 return 0;
2555
2556 /* Don't pollute graph traces with trace_vprintk internals */
2557 pause_graph_tracing();
2558
2559 pc = preempt_count();
2560 preempt_disable_notrace();
2561
2562
2563 tbuffer = get_trace_buf();
2564 if (!tbuffer) {
2565 len = 0;
2566 goto out;
2567 }
2568
2569 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2570
2571 local_save_flags(flags);
2572 size = sizeof(*entry) + len + 1;
2573 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2574 flags, pc);
2575 if (!event)
2576 goto out;
2577 entry = ring_buffer_event_data(event);
2578 entry->ip = ip;
2579
2580 memcpy(&entry->buf, tbuffer, len + 1);
2581 if (!call_filter_check_discard(call, entry, buffer, event)) {
2582 __buffer_unlock_commit(buffer, event);
2583 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2584 }
2585 out:
2586 preempt_enable_notrace();
2587 unpause_graph_tracing();
2588
2589 return len;
2590 }
2591
2592 int trace_array_vprintk(struct trace_array *tr,
2593 unsigned long ip, const char *fmt, va_list args)
2594 {
2595 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2596 }
2597
2598 int trace_array_printk(struct trace_array *tr,
2599 unsigned long ip, const char *fmt, ...)
2600 {
2601 int ret;
2602 va_list ap;
2603
2604 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2605 return 0;
2606
2607 va_start(ap, fmt);
2608 ret = trace_array_vprintk(tr, ip, fmt, ap);
2609 va_end(ap);
2610 return ret;
2611 }
2612
2613 int trace_array_printk_buf(struct ring_buffer *buffer,
2614 unsigned long ip, const char *fmt, ...)
2615 {
2616 int ret;
2617 va_list ap;
2618
2619 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2620 return 0;
2621
2622 va_start(ap, fmt);
2623 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2624 va_end(ap);
2625 return ret;
2626 }
2627
2628 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2629 {
2630 return trace_array_vprintk(&global_trace, ip, fmt, args);
2631 }
2632 EXPORT_SYMBOL_GPL(trace_vprintk);
2633
2634 static void trace_iterator_increment(struct trace_iterator *iter)
2635 {
2636 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2637
2638 iter->idx++;
2639 if (buf_iter)
2640 ring_buffer_read(buf_iter, NULL);
2641 }
2642
2643 static struct trace_entry *
2644 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2645 unsigned long *lost_events)
2646 {
2647 struct ring_buffer_event *event;
2648 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2649
2650 if (buf_iter)
2651 event = ring_buffer_iter_peek(buf_iter, ts);
2652 else
2653 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2654 lost_events);
2655
2656 if (event) {
2657 iter->ent_size = ring_buffer_event_length(event);
2658 return ring_buffer_event_data(event);
2659 }
2660 iter->ent_size = 0;
2661 return NULL;
2662 }
2663
2664 static struct trace_entry *
2665 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2666 unsigned long *missing_events, u64 *ent_ts)
2667 {
2668 struct ring_buffer *buffer = iter->trace_buffer->buffer;
2669 struct trace_entry *ent, *next = NULL;
2670 unsigned long lost_events = 0, next_lost = 0;
2671 int cpu_file = iter->cpu_file;
2672 u64 next_ts = 0, ts;
2673 int next_cpu = -1;
2674 int next_size = 0;
2675 int cpu;
2676
2677 /*
2678 * If we are in a per_cpu trace file, don't bother by iterating over
2679 * all cpu and peek directly.
2680 */
2681 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2682 if (ring_buffer_empty_cpu(buffer, cpu_file))
2683 return NULL;
2684 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2685 if (ent_cpu)
2686 *ent_cpu = cpu_file;
2687
2688 return ent;
2689 }
2690
2691 for_each_tracing_cpu(cpu) {
2692
2693 if (ring_buffer_empty_cpu(buffer, cpu))
2694 continue;
2695
2696 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2697
2698 /*
2699 * Pick the entry with the smallest timestamp:
2700 */
2701 if (ent && (!next || ts < next_ts)) {
2702 next = ent;
2703 next_cpu = cpu;
2704 next_ts = ts;
2705 next_lost = lost_events;
2706 next_size = iter->ent_size;
2707 }
2708 }
2709
2710 iter->ent_size = next_size;
2711
2712 if (ent_cpu)
2713 *ent_cpu = next_cpu;
2714
2715 if (ent_ts)
2716 *ent_ts = next_ts;
2717
2718 if (missing_events)
2719 *missing_events = next_lost;
2720
2721 return next;
2722 }
2723
2724 /* Find the next real entry, without updating the iterator itself */
2725 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2726 int *ent_cpu, u64 *ent_ts)
2727 {
2728 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2729 }
2730
2731 /* Find the next real entry, and increment the iterator to the next entry */
2732 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2733 {
2734 iter->ent = __find_next_entry(iter, &iter->cpu,
2735 &iter->lost_events, &iter->ts);
2736
2737 if (iter->ent)
2738 trace_iterator_increment(iter);
2739
2740 return iter->ent ? iter : NULL;
2741 }
2742
2743 static void trace_consume(struct trace_iterator *iter)
2744 {
2745 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2746 &iter->lost_events);
2747 }
2748
2749 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2750 {
2751 struct trace_iterator *iter = m->private;
2752 int i = (int)*pos;
2753 void *ent;
2754
2755 WARN_ON_ONCE(iter->leftover);
2756
2757 (*pos)++;
2758
2759 /* can't go backwards */
2760 if (iter->idx > i)
2761 return NULL;
2762
2763 if (iter->idx < 0)
2764 ent = trace_find_next_entry_inc(iter);
2765 else
2766 ent = iter;
2767
2768 while (ent && iter->idx < i)
2769 ent = trace_find_next_entry_inc(iter);
2770
2771 iter->pos = *pos;
2772
2773 return ent;
2774 }
2775
2776 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2777 {
2778 struct ring_buffer_event *event;
2779 struct ring_buffer_iter *buf_iter;
2780 unsigned long entries = 0;
2781 u64 ts;
2782
2783 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2784
2785 buf_iter = trace_buffer_iter(iter, cpu);
2786 if (!buf_iter)
2787 return;
2788
2789 ring_buffer_iter_reset(buf_iter);
2790
2791 /*
2792 * We could have the case with the max latency tracers
2793 * that a reset never took place on a cpu. This is evident
2794 * by the timestamp being before the start of the buffer.
2795 */
2796 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2797 if (ts >= iter->trace_buffer->time_start)
2798 break;
2799 entries++;
2800 ring_buffer_read(buf_iter, NULL);
2801 }
2802
2803 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2804 }
2805
2806 /*
2807 * The current tracer is copied to avoid a global locking
2808 * all around.
2809 */
2810 static void *s_start(struct seq_file *m, loff_t *pos)
2811 {
2812 struct trace_iterator *iter = m->private;
2813 struct trace_array *tr = iter->tr;
2814 int cpu_file = iter->cpu_file;
2815 void *p = NULL;
2816 loff_t l = 0;
2817 int cpu;
2818
2819 /*
2820 * copy the tracer to avoid using a global lock all around.
2821 * iter->trace is a copy of current_trace, the pointer to the
2822 * name may be used instead of a strcmp(), as iter->trace->name
2823 * will point to the same string as current_trace->name.
2824 */
2825 mutex_lock(&trace_types_lock);
2826 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2827 *iter->trace = *tr->current_trace;
2828 mutex_unlock(&trace_types_lock);
2829
2830 #ifdef CONFIG_TRACER_MAX_TRACE
2831 if (iter->snapshot && iter->trace->use_max_tr)
2832 return ERR_PTR(-EBUSY);
2833 #endif
2834
2835 if (!iter->snapshot)
2836 atomic_inc(&trace_record_cmdline_disabled);
2837
2838 if (*pos != iter->pos) {
2839 iter->ent = NULL;
2840 iter->cpu = 0;
2841 iter->idx = -1;
2842
2843 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2844 for_each_tracing_cpu(cpu)
2845 tracing_iter_reset(iter, cpu);
2846 } else
2847 tracing_iter_reset(iter, cpu_file);
2848
2849 iter->leftover = 0;
2850 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2851 ;
2852
2853 } else {
2854 /*
2855 * If we overflowed the seq_file before, then we want
2856 * to just reuse the trace_seq buffer again.
2857 */
2858 if (iter->leftover)
2859 p = iter;
2860 else {
2861 l = *pos - 1;
2862 p = s_next(m, p, &l);
2863 }
2864 }
2865
2866 trace_event_read_lock();
2867 trace_access_lock(cpu_file);
2868 return p;
2869 }
2870
2871 static void s_stop(struct seq_file *m, void *p)
2872 {
2873 struct trace_iterator *iter = m->private;
2874
2875 #ifdef CONFIG_TRACER_MAX_TRACE
2876 if (iter->snapshot && iter->trace->use_max_tr)
2877 return;
2878 #endif
2879
2880 if (!iter->snapshot)
2881 atomic_dec(&trace_record_cmdline_disabled);
2882
2883 trace_access_unlock(iter->cpu_file);
2884 trace_event_read_unlock();
2885 }
2886
2887 static void
2888 get_total_entries(struct trace_buffer *buf,
2889 unsigned long *total, unsigned long *entries)
2890 {
2891 unsigned long count;
2892 int cpu;
2893
2894 *total = 0;
2895 *entries = 0;
2896
2897 for_each_tracing_cpu(cpu) {
2898 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2899 /*
2900 * If this buffer has skipped entries, then we hold all
2901 * entries for the trace and we need to ignore the
2902 * ones before the time stamp.
2903 */
2904 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2905 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2906 /* total is the same as the entries */
2907 *total += count;
2908 } else
2909 *total += count +
2910 ring_buffer_overrun_cpu(buf->buffer, cpu);
2911 *entries += count;
2912 }
2913 }
2914
2915 static void print_lat_help_header(struct seq_file *m)
2916 {
2917 seq_puts(m, "# _------=> CPU# \n"
2918 "# / _-----=> irqs-off \n"
2919 "# | / _----=> need-resched \n"
2920 "# || / _---=> hardirq/softirq \n"
2921 "# ||| / _--=> preempt-depth \n"
2922 "# |||| / delay \n"
2923 "# cmd pid ||||| time | caller \n"
2924 "# \\ / ||||| \\ | / \n");
2925 }
2926
2927 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2928 {
2929 unsigned long total;
2930 unsigned long entries;
2931
2932 get_total_entries(buf, &total, &entries);
2933 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
2934 entries, total, num_online_cpus());
2935 seq_puts(m, "#\n");
2936 }
2937
2938 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2939 {
2940 print_event_info(buf, m);
2941 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
2942 "# | | | | |\n");
2943 }
2944
2945 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2946 {
2947 print_event_info(buf, m);
2948 seq_puts(m, "# _-----=> irqs-off\n"
2949 "# / _----=> need-resched\n"
2950 "# | / _---=> hardirq/softirq\n"
2951 "# || / _--=> preempt-depth\n"
2952 "# ||| / delay\n"
2953 "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
2954 "# | | | |||| | |\n");
2955 }
2956
2957 void
2958 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2959 {
2960 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2961 struct trace_buffer *buf = iter->trace_buffer;
2962 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2963 struct tracer *type = iter->trace;
2964 unsigned long entries;
2965 unsigned long total;
2966 const char *name = "preemption";
2967
2968 name = type->name;
2969
2970 get_total_entries(buf, &total, &entries);
2971
2972 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2973 name, UTS_RELEASE);
2974 seq_puts(m, "# -----------------------------------"
2975 "---------------------------------\n");
2976 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2977 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2978 nsecs_to_usecs(data->saved_latency),
2979 entries,
2980 total,
2981 buf->cpu,
2982 #if defined(CONFIG_PREEMPT_NONE)
2983 "server",
2984 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2985 "desktop",
2986 #elif defined(CONFIG_PREEMPT)
2987 "preempt",
2988 #else
2989 "unknown",
2990 #endif
2991 /* These are reserved for later use */
2992 0, 0, 0, 0);
2993 #ifdef CONFIG_SMP
2994 seq_printf(m, " #P:%d)\n", num_online_cpus());
2995 #else
2996 seq_puts(m, ")\n");
2997 #endif
2998 seq_puts(m, "# -----------------\n");
2999 seq_printf(m, "# | task: %.16s-%d "
3000 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3001 data->comm, data->pid,
3002 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3003 data->policy, data->rt_priority);
3004 seq_puts(m, "# -----------------\n");
3005
3006 if (data->critical_start) {
3007 seq_puts(m, "# => started at: ");
3008 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3009 trace_print_seq(m, &iter->seq);
3010 seq_puts(m, "\n# => ended at: ");
3011 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3012 trace_print_seq(m, &iter->seq);
3013 seq_puts(m, "\n#\n");
3014 }
3015
3016 seq_puts(m, "#\n");
3017 }
3018
3019 static void test_cpu_buff_start(struct trace_iterator *iter)
3020 {
3021 struct trace_seq *s = &iter->seq;
3022 struct trace_array *tr = iter->tr;
3023
3024 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3025 return;
3026
3027 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3028 return;
3029
3030 if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3031 return;
3032
3033 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3034 return;
3035
3036 if (iter->started)
3037 cpumask_set_cpu(iter->cpu, iter->started);
3038
3039 /* Don't print started cpu buffer for the first entry of the trace */
3040 if (iter->idx > 1)
3041 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3042 iter->cpu);
3043 }
3044
3045 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3046 {
3047 struct trace_array *tr = iter->tr;
3048 struct trace_seq *s = &iter->seq;
3049 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3050 struct trace_entry *entry;
3051 struct trace_event *event;
3052
3053 entry = iter->ent;
3054
3055 test_cpu_buff_start(iter);
3056
3057 event = ftrace_find_event(entry->type);
3058
3059 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3060 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3061 trace_print_lat_context(iter);
3062 else
3063 trace_print_context(iter);
3064 }
3065
3066 if (trace_seq_has_overflowed(s))
3067 return TRACE_TYPE_PARTIAL_LINE;
3068
3069 if (event)
3070 return event->funcs->trace(iter, sym_flags, event);
3071
3072 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3073
3074 return trace_handle_return(s);
3075 }
3076
3077 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3078 {
3079 struct trace_array *tr = iter->tr;
3080 struct trace_seq *s = &iter->seq;
3081 struct trace_entry *entry;
3082 struct trace_event *event;
3083
3084 entry = iter->ent;
3085
3086 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3087 trace_seq_printf(s, "%d %d %llu ",
3088 entry->pid, iter->cpu, iter->ts);
3089
3090 if (trace_seq_has_overflowed(s))
3091 return TRACE_TYPE_PARTIAL_LINE;
3092
3093 event = ftrace_find_event(entry->type);
3094 if (event)
3095 return event->funcs->raw(iter, 0, event);
3096
3097 trace_seq_printf(s, "%d ?\n", entry->type);
3098
3099 return trace_handle_return(s);
3100 }
3101
3102 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3103 {
3104 struct trace_array *tr = iter->tr;
3105 struct trace_seq *s = &iter->seq;
3106 unsigned char newline = '\n';
3107 struct trace_entry *entry;
3108 struct trace_event *event;
3109
3110 entry = iter->ent;
3111
3112 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3113 SEQ_PUT_HEX_FIELD(s, entry->pid);
3114 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3115 SEQ_PUT_HEX_FIELD(s, iter->ts);
3116 if (trace_seq_has_overflowed(s))
3117 return TRACE_TYPE_PARTIAL_LINE;
3118 }
3119
3120 event = ftrace_find_event(entry->type);
3121 if (event) {
3122 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3123 if (ret != TRACE_TYPE_HANDLED)
3124 return ret;
3125 }
3126
3127 SEQ_PUT_FIELD(s, newline);
3128
3129 return trace_handle_return(s);
3130 }
3131
3132 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3133 {
3134 struct trace_array *tr = iter->tr;
3135 struct trace_seq *s = &iter->seq;
3136 struct trace_entry *entry;
3137 struct trace_event *event;
3138
3139 entry = iter->ent;
3140
3141 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3142 SEQ_PUT_FIELD(s, entry->pid);
3143 SEQ_PUT_FIELD(s, iter->cpu);
3144 SEQ_PUT_FIELD(s, iter->ts);
3145 if (trace_seq_has_overflowed(s))
3146 return TRACE_TYPE_PARTIAL_LINE;
3147 }
3148
3149 event = ftrace_find_event(entry->type);
3150 return event ? event->funcs->binary(iter, 0, event) :
3151 TRACE_TYPE_HANDLED;
3152 }
3153
3154 int trace_empty(struct trace_iterator *iter)
3155 {
3156 struct ring_buffer_iter *buf_iter;
3157 int cpu;
3158
3159 /* If we are looking at one CPU buffer, only check that one */
3160 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3161 cpu = iter->cpu_file;
3162 buf_iter = trace_buffer_iter(iter, cpu);
3163 if (buf_iter) {
3164 if (!ring_buffer_iter_empty(buf_iter))
3165 return 0;
3166 } else {
3167 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3168 return 0;
3169 }
3170 return 1;
3171 }
3172
3173 for_each_tracing_cpu(cpu) {
3174 buf_iter = trace_buffer_iter(iter, cpu);
3175 if (buf_iter) {
3176 if (!ring_buffer_iter_empty(buf_iter))
3177 return 0;
3178 } else {
3179 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3180 return 0;
3181 }
3182 }
3183
3184 return 1;
3185 }
3186
3187 /* Called with trace_event_read_lock() held. */
3188 enum print_line_t print_trace_line(struct trace_iterator *iter)
3189 {
3190 struct trace_array *tr = iter->tr;
3191 unsigned long trace_flags = tr->trace_flags;
3192 enum print_line_t ret;
3193
3194 if (iter->lost_events) {
3195 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3196 iter->cpu, iter->lost_events);
3197 if (trace_seq_has_overflowed(&iter->seq))
3198 return TRACE_TYPE_PARTIAL_LINE;
3199 }
3200
3201 if (iter->trace && iter->trace->print_line) {
3202 ret = iter->trace->print_line(iter);
3203 if (ret != TRACE_TYPE_UNHANDLED)
3204 return ret;
3205 }
3206
3207 if (iter->ent->type == TRACE_BPUTS &&
3208 trace_flags & TRACE_ITER_PRINTK &&
3209 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3210 return trace_print_bputs_msg_only(iter);
3211
3212 if (iter->ent->type == TRACE_BPRINT &&
3213 trace_flags & TRACE_ITER_PRINTK &&
3214 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3215 return trace_print_bprintk_msg_only(iter);
3216
3217 if (iter->ent->type == TRACE_PRINT &&
3218 trace_flags & TRACE_ITER_PRINTK &&
3219 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3220 return trace_print_printk_msg_only(iter);
3221
3222 if (trace_flags & TRACE_ITER_BIN)
3223 return print_bin_fmt(iter);
3224
3225 if (trace_flags & TRACE_ITER_HEX)
3226 return print_hex_fmt(iter);
3227
3228 if (trace_flags & TRACE_ITER_RAW)
3229 return print_raw_fmt(iter);
3230
3231 return print_trace_fmt(iter);
3232 }
3233
3234 void trace_latency_header(struct seq_file *m)
3235 {
3236 struct trace_iterator *iter = m->private;
3237 struct trace_array *tr = iter->tr;
3238
3239 /* print nothing if the buffers are empty */
3240 if (trace_empty(iter))
3241 return;
3242
3243 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3244 print_trace_header(m, iter);
3245
3246 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3247 print_lat_help_header(m);
3248 }
3249
3250 void trace_default_header(struct seq_file *m)
3251 {
3252 struct trace_iterator *iter = m->private;
3253 struct trace_array *tr = iter->tr;
3254 unsigned long trace_flags = tr->trace_flags;
3255
3256 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3257 return;
3258
3259 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3260 /* print nothing if the buffers are empty */
3261 if (trace_empty(iter))
3262 return;
3263 print_trace_header(m, iter);
3264 if (!(trace_flags & TRACE_ITER_VERBOSE))
3265 print_lat_help_header(m);
3266 } else {
3267 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3268 if (trace_flags & TRACE_ITER_IRQ_INFO)
3269 print_func_help_header_irq(iter->trace_buffer, m);
3270 else
3271 print_func_help_header(iter->trace_buffer, m);
3272 }
3273 }
3274 }
3275
3276 static void test_ftrace_alive(struct seq_file *m)
3277 {
3278 if (!ftrace_is_dead())
3279 return;
3280 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3281 "# MAY BE MISSING FUNCTION EVENTS\n");
3282 }
3283
3284 #ifdef CONFIG_TRACER_MAX_TRACE
3285 static void show_snapshot_main_help(struct seq_file *m)
3286 {
3287 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3288 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3289 "# Takes a snapshot of the main buffer.\n"
3290 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3291 "# (Doesn't have to be '2' works with any number that\n"
3292 "# is not a '0' or '1')\n");
3293 }
3294
3295 static void show_snapshot_percpu_help(struct seq_file *m)
3296 {
3297 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3298 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3299 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3300 "# Takes a snapshot of the main buffer for this cpu.\n");
3301 #else
3302 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3303 "# Must use main snapshot file to allocate.\n");
3304 #endif
3305 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3306 "# (Doesn't have to be '2' works with any number that\n"
3307 "# is not a '0' or '1')\n");
3308 }
3309
3310 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3311 {
3312 if (iter->tr->allocated_snapshot)
3313 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3314 else
3315 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3316
3317 seq_puts(m, "# Snapshot commands:\n");
3318 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3319 show_snapshot_main_help(m);
3320 else
3321 show_snapshot_percpu_help(m);
3322 }
3323 #else
3324 /* Should never be called */
3325 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3326 #endif
3327
3328 static int s_show(struct seq_file *m, void *v)
3329 {
3330 struct trace_iterator *iter = v;
3331 int ret;
3332
3333 if (iter->ent == NULL) {
3334 if (iter->tr) {
3335 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3336 seq_puts(m, "#\n");
3337 test_ftrace_alive(m);
3338 }
3339 if (iter->snapshot && trace_empty(iter))
3340 print_snapshot_help(m, iter);
3341 else if (iter->trace && iter->trace->print_header)
3342 iter->trace->print_header(m);
3343 else
3344 trace_default_header(m);
3345
3346 } else if (iter->leftover) {
3347 /*
3348 * If we filled the seq_file buffer earlier, we
3349 * want to just show it now.
3350 */
3351 ret = trace_print_seq(m, &iter->seq);
3352
3353 /* ret should this time be zero, but you never know */
3354 iter->leftover = ret;
3355
3356 } else {
3357 print_trace_line(iter);
3358 ret = trace_print_seq(m, &iter->seq);
3359 /*
3360 * If we overflow the seq_file buffer, then it will
3361 * ask us for this data again at start up.
3362 * Use that instead.
3363 * ret is 0 if seq_file write succeeded.
3364 * -1 otherwise.
3365 */
3366 iter->leftover = ret;
3367 }
3368
3369 return 0;
3370 }
3371
3372 /*
3373 * Should be used after trace_array_get(), trace_types_lock
3374 * ensures that i_cdev was already initialized.
3375 */
3376 static inline int tracing_get_cpu(struct inode *inode)
3377 {
3378 if (inode->i_cdev) /* See trace_create_cpu_file() */
3379 return (long)inode->i_cdev - 1;
3380 return RING_BUFFER_ALL_CPUS;
3381 }
3382
3383 static const struct seq_operations tracer_seq_ops = {
3384 .start = s_start,
3385 .next = s_next,
3386 .stop = s_stop,
3387 .show = s_show,
3388 };
3389
3390 static struct trace_iterator *
3391 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3392 {
3393 struct trace_array *tr = inode->i_private;
3394 struct trace_iterator *iter;
3395 int cpu;
3396
3397 if (tracing_disabled)
3398 return ERR_PTR(-ENODEV);
3399
3400 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3401 if (!iter)
3402 return ERR_PTR(-ENOMEM);
3403
3404 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3405 GFP_KERNEL);
3406 if (!iter->buffer_iter)
3407 goto release;
3408
3409 /*
3410 * We make a copy of the current tracer to avoid concurrent
3411 * changes on it while we are reading.
3412 */
3413 mutex_lock(&trace_types_lock);
3414 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3415 if (!iter->trace)
3416 goto fail;
3417
3418 *iter->trace = *tr->current_trace;
3419
3420 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3421 goto fail;
3422
3423 iter->tr = tr;
3424
3425 #ifdef CONFIG_TRACER_MAX_TRACE
3426 /* Currently only the top directory has a snapshot */
3427 if (tr->current_trace->print_max || snapshot)
3428 iter->trace_buffer = &tr->max_buffer;
3429 else
3430 #endif
3431 iter->trace_buffer = &tr->trace_buffer;
3432 iter->snapshot = snapshot;
3433 iter->pos = -1;
3434 iter->cpu_file = tracing_get_cpu(inode);
3435 mutex_init(&iter->mutex);
3436
3437 /* Notify the tracer early; before we stop tracing. */
3438 if (iter->trace && iter->trace->open)
3439 iter->trace->open(iter);
3440
3441 /* Annotate start of buffers if we had overruns */
3442 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3443 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3444
3445 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3446 if (trace_clocks[tr->clock_id].in_ns)
3447 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3448
3449 /* stop the trace while dumping if we are not opening "snapshot" */
3450 if (!iter->snapshot)
3451 tracing_stop_tr(tr);
3452
3453 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3454 for_each_tracing_cpu(cpu) {
3455 iter->buffer_iter[cpu] =
3456 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3457 }
3458 ring_buffer_read_prepare_sync();
3459 for_each_tracing_cpu(cpu) {
3460 ring_buffer_read_start(iter->buffer_iter[cpu]);
3461 tracing_iter_reset(iter, cpu);
3462 }
3463 } else {
3464 cpu = iter->cpu_file;
3465 iter->buffer_iter[cpu] =
3466 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3467 ring_buffer_read_prepare_sync();
3468 ring_buffer_read_start(iter->buffer_iter[cpu]);
3469 tracing_iter_reset(iter, cpu);
3470 }
3471
3472 mutex_unlock(&trace_types_lock);
3473
3474 return iter;
3475
3476 fail:
3477 mutex_unlock(&trace_types_lock);
3478 kfree(iter->trace);
3479 kfree(iter->buffer_iter);
3480 release:
3481 seq_release_private(inode, file);
3482 return ERR_PTR(-ENOMEM);
3483 }
3484
3485 int tracing_open_generic(struct inode *inode, struct file *filp)
3486 {
3487 if (tracing_disabled)
3488 return -ENODEV;
3489
3490 filp->private_data = inode->i_private;
3491 return 0;
3492 }
3493
3494 bool tracing_is_disabled(void)
3495 {
3496 return (tracing_disabled) ? true: false;
3497 }
3498
3499 /*
3500 * Open and update trace_array ref count.
3501 * Must have the current trace_array passed to it.
3502 */
3503 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3504 {
3505 struct trace_array *tr = inode->i_private;
3506
3507 if (tracing_disabled)
3508 return -ENODEV;
3509
3510 if (trace_array_get(tr) < 0)
3511 return -ENODEV;
3512
3513 filp->private_data = inode->i_private;
3514
3515 return 0;
3516 }
3517
3518 static int tracing_release(struct inode *inode, struct file *file)
3519 {
3520 struct trace_array *tr = inode->i_private;
3521 struct seq_file *m = file->private_data;
3522 struct trace_iterator *iter;
3523 int cpu;
3524
3525 if (!(file->f_mode & FMODE_READ)) {
3526 trace_array_put(tr);
3527 return 0;
3528 }
3529
3530 /* Writes do not use seq_file */
3531 iter = m->private;
3532 mutex_lock(&trace_types_lock);
3533
3534 for_each_tracing_cpu(cpu) {
3535 if (iter->buffer_iter[cpu])
3536 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3537 }
3538
3539 if (iter->trace && iter->trace->close)
3540 iter->trace->close(iter);
3541
3542 if (!iter->snapshot)
3543 /* reenable tracing if it was previously enabled */
3544 tracing_start_tr(tr);
3545
3546 __trace_array_put(tr);
3547
3548 mutex_unlock(&trace_types_lock);
3549
3550 mutex_destroy(&iter->mutex);
3551 free_cpumask_var(iter->started);
3552 kfree(iter->trace);
3553 kfree(iter->buffer_iter);
3554 seq_release_private(inode, file);
3555
3556 return 0;
3557 }
3558
3559 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3560 {
3561 struct trace_array *tr = inode->i_private;
3562
3563 trace_array_put(tr);
3564 return 0;
3565 }
3566
3567 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3568 {
3569 struct trace_array *tr = inode->i_private;
3570
3571 trace_array_put(tr);
3572
3573 return single_release(inode, file);
3574 }
3575
3576 static int tracing_open(struct inode *inode, struct file *file)
3577 {
3578 struct trace_array *tr = inode->i_private;
3579 struct trace_iterator *iter;
3580 int ret = 0;
3581
3582 if (trace_array_get(tr) < 0)
3583 return -ENODEV;
3584
3585 /* If this file was open for write, then erase contents */
3586 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3587 int cpu = tracing_get_cpu(inode);
3588
3589 if (cpu == RING_BUFFER_ALL_CPUS)
3590 tracing_reset_online_cpus(&tr->trace_buffer);
3591 else
3592 tracing_reset(&tr->trace_buffer, cpu);
3593 }
3594
3595 if (file->f_mode & FMODE_READ) {
3596 iter = __tracing_open(inode, file, false);
3597 if (IS_ERR(iter))
3598 ret = PTR_ERR(iter);
3599 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3600 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3601 }
3602
3603 if (ret < 0)
3604 trace_array_put(tr);
3605
3606 return ret;
3607 }
3608
3609 /*
3610 * Some tracers are not suitable for instance buffers.
3611 * A tracer is always available for the global array (toplevel)
3612 * or if it explicitly states that it is.
3613 */
3614 static bool
3615 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3616 {
3617 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3618 }
3619
3620 /* Find the next tracer that this trace array may use */
3621 static struct tracer *
3622 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3623 {
3624 while (t && !trace_ok_for_array(t, tr))
3625 t = t->next;
3626
3627 return t;
3628 }
3629
3630 static void *
3631 t_next(struct seq_file *m, void *v, loff_t *pos)
3632 {
3633 struct trace_array *tr = m->private;
3634 struct tracer *t = v;
3635
3636 (*pos)++;
3637
3638 if (t)
3639 t = get_tracer_for_array(tr, t->next);
3640
3641 return t;
3642 }
3643
3644 static void *t_start(struct seq_file *m, loff_t *pos)
3645 {
3646 struct trace_array *tr = m->private;
3647 struct tracer *t;
3648 loff_t l = 0;
3649
3650 mutex_lock(&trace_types_lock);
3651
3652 t = get_tracer_for_array(tr, trace_types);
3653 for (; t && l < *pos; t = t_next(m, t, &l))
3654 ;
3655
3656 return t;
3657 }
3658
3659 static void t_stop(struct seq_file *m, void *p)
3660 {
3661 mutex_unlock(&trace_types_lock);
3662 }
3663
3664 static int t_show(struct seq_file *m, void *v)
3665 {
3666 struct tracer *t = v;
3667
3668 if (!t)
3669 return 0;
3670
3671 seq_puts(m, t->name);
3672 if (t->next)
3673 seq_putc(m, ' ');
3674 else
3675 seq_putc(m, '\n');
3676
3677 return 0;
3678 }
3679
3680 static const struct seq_operations show_traces_seq_ops = {
3681 .start = t_start,
3682 .next = t_next,
3683 .stop = t_stop,
3684 .show = t_show,
3685 };
3686
3687 static int show_traces_open(struct inode *inode, struct file *file)
3688 {
3689 struct trace_array *tr = inode->i_private;
3690 struct seq_file *m;
3691 int ret;
3692
3693 if (tracing_disabled)
3694 return -ENODEV;
3695
3696 ret = seq_open(file, &show_traces_seq_ops);
3697 if (ret)
3698 return ret;
3699
3700 m = file->private_data;
3701 m->private = tr;
3702
3703 return 0;
3704 }
3705
3706 static ssize_t
3707 tracing_write_stub(struct file *filp, const char __user *ubuf,
3708 size_t count, loff_t *ppos)
3709 {
3710 return count;
3711 }
3712
3713 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3714 {
3715 int ret;
3716
3717 if (file->f_mode & FMODE_READ)
3718 ret = seq_lseek(file, offset, whence);
3719 else
3720 file->f_pos = ret = 0;
3721
3722 return ret;
3723 }
3724
3725 static const struct file_operations tracing_fops = {
3726 .open = tracing_open,
3727 .read = seq_read,
3728 .write = tracing_write_stub,
3729 .llseek = tracing_lseek,
3730 .release = tracing_release,
3731 };
3732
3733 static const struct file_operations show_traces_fops = {
3734 .open = show_traces_open,
3735 .read = seq_read,
3736 .release = seq_release,
3737 .llseek = seq_lseek,
3738 };
3739
3740 /*
3741 * The tracer itself will not take this lock, but still we want
3742 * to provide a consistent cpumask to user-space:
3743 */
3744 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3745
3746 /*
3747 * Temporary storage for the character representation of the
3748 * CPU bitmask (and one more byte for the newline):
3749 */
3750 static char mask_str[NR_CPUS + 1];
3751
3752 static ssize_t
3753 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3754 size_t count, loff_t *ppos)
3755 {
3756 struct trace_array *tr = file_inode(filp)->i_private;
3757 int len;
3758
3759 mutex_lock(&tracing_cpumask_update_lock);
3760
3761 len = snprintf(mask_str, count, "%*pb\n",
3762 cpumask_pr_args(tr->tracing_cpumask));
3763 if (len >= count) {
3764 count = -EINVAL;
3765 goto out_err;
3766 }
3767 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3768
3769 out_err:
3770 mutex_unlock(&tracing_cpumask_update_lock);
3771
3772 return count;
3773 }
3774
3775 static ssize_t
3776 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3777 size_t count, loff_t *ppos)
3778 {
3779 struct trace_array *tr = file_inode(filp)->i_private;
3780 cpumask_var_t tracing_cpumask_new;
3781 int err, cpu;
3782
3783 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3784 return -ENOMEM;
3785
3786 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3787 if (err)
3788 goto err_unlock;
3789
3790 mutex_lock(&tracing_cpumask_update_lock);
3791
3792 local_irq_disable();
3793 arch_spin_lock(&tr->max_lock);
3794 for_each_tracing_cpu(cpu) {
3795 /*
3796 * Increase/decrease the disabled counter if we are
3797 * about to flip a bit in the cpumask:
3798 */
3799 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3800 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3801 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3802 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3803 }
3804 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3805 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3806 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3807 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3808 }
3809 }
3810 arch_spin_unlock(&tr->max_lock);
3811 local_irq_enable();
3812
3813 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3814
3815 mutex_unlock(&tracing_cpumask_update_lock);
3816 free_cpumask_var(tracing_cpumask_new);
3817
3818 return count;
3819
3820 err_unlock:
3821 free_cpumask_var(tracing_cpumask_new);
3822
3823 return err;
3824 }
3825
3826 static const struct file_operations tracing_cpumask_fops = {
3827 .open = tracing_open_generic_tr,
3828 .read = tracing_cpumask_read,
3829 .write = tracing_cpumask_write,
3830 .release = tracing_release_generic_tr,
3831 .llseek = generic_file_llseek,
3832 };
3833
3834 static int tracing_trace_options_show(struct seq_file *m, void *v)
3835 {
3836 struct tracer_opt *trace_opts;
3837 struct trace_array *tr = m->private;
3838 u32 tracer_flags;
3839 int i;
3840
3841 mutex_lock(&trace_types_lock);
3842 tracer_flags = tr->current_trace->flags->val;
3843 trace_opts = tr->current_trace->flags->opts;
3844
3845 for (i = 0; trace_options[i]; i++) {
3846 if (tr->trace_flags & (1 << i))
3847 seq_printf(m, "%s\n", trace_options[i]);
3848 else
3849 seq_printf(m, "no%s\n", trace_options[i]);
3850 }
3851
3852 for (i = 0; trace_opts[i].name; i++) {
3853 if (tracer_flags & trace_opts[i].bit)
3854 seq_printf(m, "%s\n", trace_opts[i].name);
3855 else
3856 seq_printf(m, "no%s\n", trace_opts[i].name);
3857 }
3858 mutex_unlock(&trace_types_lock);
3859
3860 return 0;
3861 }
3862
3863 static int __set_tracer_option(struct trace_array *tr,
3864 struct tracer_flags *tracer_flags,
3865 struct tracer_opt *opts, int neg)
3866 {
3867 struct tracer *trace = tracer_flags->trace;
3868 int ret;
3869
3870 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3871 if (ret)
3872 return ret;
3873
3874 if (neg)
3875 tracer_flags->val &= ~opts->bit;
3876 else
3877 tracer_flags->val |= opts->bit;
3878 return 0;
3879 }
3880
3881 /* Try to assign a tracer specific option */
3882 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3883 {
3884 struct tracer *trace = tr->current_trace;
3885 struct tracer_flags *tracer_flags = trace->flags;
3886 struct tracer_opt *opts = NULL;
3887 int i;
3888
3889 for (i = 0; tracer_flags->opts[i].name; i++) {
3890 opts = &tracer_flags->opts[i];
3891
3892 if (strcmp(cmp, opts->name) == 0)
3893 return __set_tracer_option(tr, trace->flags, opts, neg);
3894 }
3895
3896 return -EINVAL;
3897 }
3898
3899 /* Some tracers require overwrite to stay enabled */
3900 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3901 {
3902 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3903 return -1;
3904
3905 return 0;
3906 }
3907
3908 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3909 {
3910 /* do nothing if flag is already set */
3911 if (!!(tr->trace_flags & mask) == !!enabled)
3912 return 0;
3913
3914 /* Give the tracer a chance to approve the change */
3915 if (tr->current_trace->flag_changed)
3916 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3917 return -EINVAL;
3918
3919 if (enabled)
3920 tr->trace_flags |= mask;
3921 else
3922 tr->trace_flags &= ~mask;
3923
3924 if (mask == TRACE_ITER_RECORD_CMD)
3925 trace_event_enable_cmd_record(enabled);
3926
3927 if (mask == TRACE_ITER_EVENT_FORK)
3928 trace_event_follow_fork(tr, enabled);
3929
3930 if (mask == TRACE_ITER_OVERWRITE) {
3931 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3932 #ifdef CONFIG_TRACER_MAX_TRACE
3933 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3934 #endif
3935 }
3936
3937 if (mask == TRACE_ITER_PRINTK) {
3938 trace_printk_start_stop_comm(enabled);
3939 trace_printk_control(enabled);
3940 }
3941
3942 return 0;
3943 }
3944
3945 static int trace_set_options(struct trace_array *tr, char *option)
3946 {
3947 char *cmp;
3948 int neg = 0;
3949 int ret = -ENODEV;
3950 int i;
3951 size_t orig_len = strlen(option);
3952
3953 cmp = strstrip(option);
3954
3955 if (strncmp(cmp, "no", 2) == 0) {
3956 neg = 1;
3957 cmp += 2;
3958 }
3959
3960 mutex_lock(&trace_types_lock);
3961
3962 for (i = 0; trace_options[i]; i++) {
3963 if (strcmp(cmp, trace_options[i]) == 0) {
3964 ret = set_tracer_flag(tr, 1 << i, !neg);
3965 break;
3966 }
3967 }
3968
3969 /* If no option could be set, test the specific tracer options */
3970 if (!trace_options[i])
3971 ret = set_tracer_option(tr, cmp, neg);
3972
3973 mutex_unlock(&trace_types_lock);
3974
3975 /*
3976 * If the first trailing whitespace is replaced with '\0' by strstrip,
3977 * turn it back into a space.
3978 */
3979 if (orig_len > strlen(option))
3980 option[strlen(option)] = ' ';
3981
3982 return ret;
3983 }
3984
3985 static void __init apply_trace_boot_options(void)
3986 {
3987 char *buf = trace_boot_options_buf;
3988 char *option;
3989
3990 while (true) {
3991 option = strsep(&buf, ",");
3992
3993 if (!option)
3994 break;
3995
3996 if (*option)
3997 trace_set_options(&global_trace, option);
3998
3999 /* Put back the comma to allow this to be called again */
4000 if (buf)
4001 *(buf - 1) = ',';
4002 }
4003 }
4004
4005 static ssize_t
4006 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4007 size_t cnt, loff_t *ppos)
4008 {
4009 struct seq_file *m = filp->private_data;
4010 struct trace_array *tr = m->private;
4011 char buf[64];
4012 int ret;
4013
4014 if (cnt >= sizeof(buf))
4015 return -EINVAL;
4016
4017 if (copy_from_user(buf, ubuf, cnt))
4018 return -EFAULT;
4019
4020 buf[cnt] = 0;
4021
4022 ret = trace_set_options(tr, buf);
4023 if (ret < 0)
4024 return ret;
4025
4026 *ppos += cnt;
4027
4028 return cnt;
4029 }
4030
4031 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4032 {
4033 struct trace_array *tr = inode->i_private;
4034 int ret;
4035
4036 if (tracing_disabled)
4037 return -ENODEV;
4038
4039 if (trace_array_get(tr) < 0)
4040 return -ENODEV;
4041
4042 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4043 if (ret < 0)
4044 trace_array_put(tr);
4045
4046 return ret;
4047 }
4048
4049 static const struct file_operations tracing_iter_fops = {
4050 .open = tracing_trace_options_open,
4051 .read = seq_read,
4052 .llseek = seq_lseek,
4053 .release = tracing_single_release_tr,
4054 .write = tracing_trace_options_write,
4055 };
4056
4057 static const char readme_msg[] =
4058 "tracing mini-HOWTO:\n\n"
4059 "# echo 0 > tracing_on : quick way to disable tracing\n"
4060 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4061 " Important files:\n"
4062 " trace\t\t\t- The static contents of the buffer\n"
4063 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4064 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4065 " current_tracer\t- function and latency tracers\n"
4066 " available_tracers\t- list of configured tracers for current_tracer\n"
4067 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4068 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4069 " trace_clock\t\t-change the clock used to order events\n"
4070 " local: Per cpu clock but may not be synced across CPUs\n"
4071 " global: Synced across CPUs but slows tracing down.\n"
4072 " counter: Not a clock, but just an increment\n"
4073 " uptime: Jiffy counter from time of boot\n"
4074 " perf: Same clock that perf events use\n"
4075 #ifdef CONFIG_X86_64
4076 " x86-tsc: TSC cycle counter\n"
4077 #endif
4078 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4079 " tracing_cpumask\t- Limit which CPUs to trace\n"
4080 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4081 "\t\t\t Remove sub-buffer with rmdir\n"
4082 " trace_options\t\t- Set format or modify how tracing happens\n"
4083 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4084 "\t\t\t option name\n"
4085 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4086 #ifdef CONFIG_DYNAMIC_FTRACE
4087 "\n available_filter_functions - list of functions that can be filtered on\n"
4088 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4089 "\t\t\t functions\n"
4090 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4091 "\t modules: Can select a group via module\n"
4092 "\t Format: :mod:<module-name>\n"
4093 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4094 "\t triggers: a command to perform when function is hit\n"
4095 "\t Format: <function>:<trigger>[:count]\n"
4096 "\t trigger: traceon, traceoff\n"
4097 "\t\t enable_event:<system>:<event>\n"
4098 "\t\t disable_event:<system>:<event>\n"
4099 #ifdef CONFIG_STACKTRACE
4100 "\t\t stacktrace\n"
4101 #endif
4102 #ifdef CONFIG_TRACER_SNAPSHOT
4103 "\t\t snapshot\n"
4104 #endif
4105 "\t\t dump\n"
4106 "\t\t cpudump\n"
4107 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4108 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4109 "\t The first one will disable tracing every time do_fault is hit\n"
4110 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4111 "\t The first time do trap is hit and it disables tracing, the\n"
4112 "\t counter will decrement to 2. If tracing is already disabled,\n"
4113 "\t the counter will not decrement. It only decrements when the\n"
4114 "\t trigger did work\n"
4115 "\t To remove trigger without count:\n"
4116 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4117 "\t To remove trigger with a count:\n"
4118 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4119 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4120 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4121 "\t modules: Can select a group via module command :mod:\n"
4122 "\t Does not accept triggers\n"
4123 #endif /* CONFIG_DYNAMIC_FTRACE */
4124 #ifdef CONFIG_FUNCTION_TRACER
4125 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4126 "\t\t (function)\n"
4127 #endif
4128 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4129 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4130 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4131 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4132 #endif
4133 #ifdef CONFIG_TRACER_SNAPSHOT
4134 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4135 "\t\t\t snapshot buffer. Read the contents for more\n"
4136 "\t\t\t information\n"
4137 #endif
4138 #ifdef CONFIG_STACK_TRACER
4139 " stack_trace\t\t- Shows the max stack trace when active\n"
4140 " stack_max_size\t- Shows current max stack size that was traced\n"
4141 "\t\t\t Write into this file to reset the max size (trigger a\n"
4142 "\t\t\t new trace)\n"
4143 #ifdef CONFIG_DYNAMIC_FTRACE
4144 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4145 "\t\t\t traces\n"
4146 #endif
4147 #endif /* CONFIG_STACK_TRACER */
4148 " events/\t\t- Directory containing all trace event subsystems:\n"
4149 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4150 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4151 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4152 "\t\t\t events\n"
4153 " filter\t\t- If set, only events passing filter are traced\n"
4154 " events/<system>/<event>/\t- Directory containing control files for\n"
4155 "\t\t\t <event>:\n"
4156 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4157 " filter\t\t- If set, only events passing filter are traced\n"
4158 " trigger\t\t- If set, a command to perform when event is hit\n"
4159 "\t Format: <trigger>[:count][if <filter>]\n"
4160 "\t trigger: traceon, traceoff\n"
4161 "\t enable_event:<system>:<event>\n"
4162 "\t disable_event:<system>:<event>\n"
4163 #ifdef CONFIG_HIST_TRIGGERS
4164 "\t enable_hist:<system>:<event>\n"
4165 "\t disable_hist:<system>:<event>\n"
4166 #endif
4167 #ifdef CONFIG_STACKTRACE
4168 "\t\t stacktrace\n"
4169 #endif
4170 #ifdef CONFIG_TRACER_SNAPSHOT
4171 "\t\t snapshot\n"
4172 #endif
4173 #ifdef CONFIG_HIST_TRIGGERS
4174 "\t\t hist (see below)\n"
4175 #endif
4176 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4177 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4178 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4179 "\t events/block/block_unplug/trigger\n"
4180 "\t The first disables tracing every time block_unplug is hit.\n"
4181 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4182 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4183 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4184 "\t Like function triggers, the counter is only decremented if it\n"
4185 "\t enabled or disabled tracing.\n"
4186 "\t To remove a trigger without a count:\n"
4187 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4188 "\t To remove a trigger with a count:\n"
4189 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4190 "\t Filters can be ignored when removing a trigger.\n"
4191 #ifdef CONFIG_HIST_TRIGGERS
4192 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4193 "\t Format: hist:keys=<field1[,field2,...]>\n"
4194 "\t [:values=<field1[,field2,...]>]\n"
4195 "\t [:sort=<field1[,field2,...]>]\n"
4196 "\t [:size=#entries]\n"
4197 "\t [:pause][:continue][:clear]\n"
4198 "\t [:name=histname1]\n"
4199 "\t [if <filter>]\n\n"
4200 "\t When a matching event is hit, an entry is added to a hash\n"
4201 "\t table using the key(s) and value(s) named, and the value of a\n"
4202 "\t sum called 'hitcount' is incremented. Keys and values\n"
4203 "\t correspond to fields in the event's format description. Keys\n"
4204 "\t can be any field, or the special string 'stacktrace'.\n"
4205 "\t Compound keys consisting of up to two fields can be specified\n"
4206 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4207 "\t fields. Sort keys consisting of up to two fields can be\n"
4208 "\t specified using the 'sort' keyword. The sort direction can\n"
4209 "\t be modified by appending '.descending' or '.ascending' to a\n"
4210 "\t sort field. The 'size' parameter can be used to specify more\n"
4211 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4212 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4213 "\t its histogram data will be shared with other triggers of the\n"
4214 "\t same name, and trigger hits will update this common data.\n\n"
4215 "\t Reading the 'hist' file for the event will dump the hash\n"
4216 "\t table in its entirety to stdout. If there are multiple hist\n"
4217 "\t triggers attached to an event, there will be a table for each\n"
4218 "\t trigger in the output. The table displayed for a named\n"
4219 "\t trigger will be the same as any other instance having the\n"
4220 "\t same name. The default format used to display a given field\n"
4221 "\t can be modified by appending any of the following modifiers\n"
4222 "\t to the field name, as applicable:\n\n"
4223 "\t .hex display a number as a hex value\n"
4224 "\t .sym display an address as a symbol\n"
4225 "\t .sym-offset display an address as a symbol and offset\n"
4226 "\t .execname display a common_pid as a program name\n"
4227 "\t .syscall display a syscall id as a syscall name\n\n"
4228 "\t .log2 display log2 value rather than raw number\n\n"
4229 "\t The 'pause' parameter can be used to pause an existing hist\n"
4230 "\t trigger or to start a hist trigger but not log any events\n"
4231 "\t until told to do so. 'continue' can be used to start or\n"
4232 "\t restart a paused hist trigger.\n\n"
4233 "\t The 'clear' parameter will clear the contents of a running\n"
4234 "\t hist trigger and leave its current paused/active state\n"
4235 "\t unchanged.\n\n"
4236 "\t The enable_hist and disable_hist triggers can be used to\n"
4237 "\t have one event conditionally start and stop another event's\n"
4238 "\t already-attached hist trigger. The syntax is analagous to\n"
4239 "\t the enable_event and disable_event triggers.\n"
4240 #endif
4241 ;
4242
4243 static ssize_t
4244 tracing_readme_read(struct file *filp, char __user *ubuf,
4245 size_t cnt, loff_t *ppos)
4246 {
4247 return simple_read_from_buffer(ubuf, cnt, ppos,
4248 readme_msg, strlen(readme_msg));
4249 }
4250
4251 static const struct file_operations tracing_readme_fops = {
4252 .open = tracing_open_generic,
4253 .read = tracing_readme_read,
4254 .llseek = generic_file_llseek,
4255 };
4256
4257 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4258 {
4259 unsigned int *ptr = v;
4260
4261 if (*pos || m->count)
4262 ptr++;
4263
4264 (*pos)++;
4265
4266 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4267 ptr++) {
4268 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4269 continue;
4270
4271 return ptr;
4272 }
4273
4274 return NULL;
4275 }
4276
4277 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4278 {
4279 void *v;
4280 loff_t l = 0;
4281
4282 preempt_disable();
4283 arch_spin_lock(&trace_cmdline_lock);
4284
4285 v = &savedcmd->map_cmdline_to_pid[0];
4286 while (l <= *pos) {
4287 v = saved_cmdlines_next(m, v, &l);
4288 if (!v)
4289 return NULL;
4290 }
4291
4292 return v;
4293 }
4294
4295 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4296 {
4297 arch_spin_unlock(&trace_cmdline_lock);
4298 preempt_enable();
4299 }
4300
4301 static int saved_cmdlines_show(struct seq_file *m, void *v)
4302 {
4303 char buf[TASK_COMM_LEN];
4304 unsigned int *pid = v;
4305
4306 __trace_find_cmdline(*pid, buf);
4307 seq_printf(m, "%d %s\n", *pid, buf);
4308 return 0;
4309 }
4310
4311 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4312 .start = saved_cmdlines_start,
4313 .next = saved_cmdlines_next,
4314 .stop = saved_cmdlines_stop,
4315 .show = saved_cmdlines_show,
4316 };
4317
4318 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4319 {
4320 if (tracing_disabled)
4321 return -ENODEV;
4322
4323 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4324 }
4325
4326 static const struct file_operations tracing_saved_cmdlines_fops = {
4327 .open = tracing_saved_cmdlines_open,
4328 .read = seq_read,
4329 .llseek = seq_lseek,
4330 .release = seq_release,
4331 };
4332
4333 static ssize_t
4334 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4335 size_t cnt, loff_t *ppos)
4336 {
4337 char buf[64];
4338 int r;
4339
4340 arch_spin_lock(&trace_cmdline_lock);
4341 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4342 arch_spin_unlock(&trace_cmdline_lock);
4343
4344 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4345 }
4346
4347 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4348 {
4349 kfree(s->saved_cmdlines);
4350 kfree(s->map_cmdline_to_pid);
4351 kfree(s);
4352 }
4353
4354 static int tracing_resize_saved_cmdlines(unsigned int val)
4355 {
4356 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4357
4358 s = kmalloc(sizeof(*s), GFP_KERNEL);
4359 if (!s)
4360 return -ENOMEM;
4361
4362 if (allocate_cmdlines_buffer(val, s) < 0) {
4363 kfree(s);
4364 return -ENOMEM;
4365 }
4366
4367 arch_spin_lock(&trace_cmdline_lock);
4368 savedcmd_temp = savedcmd;
4369 savedcmd = s;
4370 arch_spin_unlock(&trace_cmdline_lock);
4371 free_saved_cmdlines_buffer(savedcmd_temp);
4372
4373 return 0;
4374 }
4375
4376 static ssize_t
4377 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4378 size_t cnt, loff_t *ppos)
4379 {
4380 unsigned long val;
4381 int ret;
4382
4383 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4384 if (ret)
4385 return ret;
4386
4387 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4388 if (!val || val > PID_MAX_DEFAULT)
4389 return -EINVAL;
4390
4391 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4392 if (ret < 0)
4393 return ret;
4394
4395 *ppos += cnt;
4396
4397 return cnt;
4398 }
4399
4400 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4401 .open = tracing_open_generic,
4402 .read = tracing_saved_cmdlines_size_read,
4403 .write = tracing_saved_cmdlines_size_write,
4404 };
4405
4406 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4407 static union trace_enum_map_item *
4408 update_enum_map(union trace_enum_map_item *ptr)
4409 {
4410 if (!ptr->map.enum_string) {
4411 if (ptr->tail.next) {
4412 ptr = ptr->tail.next;
4413 /* Set ptr to the next real item (skip head) */
4414 ptr++;
4415 } else
4416 return NULL;
4417 }
4418 return ptr;
4419 }
4420
4421 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4422 {
4423 union trace_enum_map_item *ptr = v;
4424
4425 /*
4426 * Paranoid! If ptr points to end, we don't want to increment past it.
4427 * This really should never happen.
4428 */
4429 ptr = update_enum_map(ptr);
4430 if (WARN_ON_ONCE(!ptr))
4431 return NULL;
4432
4433 ptr++;
4434
4435 (*pos)++;
4436
4437 ptr = update_enum_map(ptr);
4438
4439 return ptr;
4440 }
4441
4442 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4443 {
4444 union trace_enum_map_item *v;
4445 loff_t l = 0;
4446
4447 mutex_lock(&trace_enum_mutex);
4448
4449 v = trace_enum_maps;
4450 if (v)
4451 v++;
4452
4453 while (v && l < *pos) {
4454 v = enum_map_next(m, v, &l);
4455 }
4456
4457 return v;
4458 }
4459
4460 static void enum_map_stop(struct seq_file *m, void *v)
4461 {
4462 mutex_unlock(&trace_enum_mutex);
4463 }
4464
4465 static int enum_map_show(struct seq_file *m, void *v)
4466 {
4467 union trace_enum_map_item *ptr = v;
4468
4469 seq_printf(m, "%s %ld (%s)\n",
4470 ptr->map.enum_string, ptr->map.enum_value,
4471 ptr->map.system);
4472
4473 return 0;
4474 }
4475
4476 static const struct seq_operations tracing_enum_map_seq_ops = {
4477 .start = enum_map_start,
4478 .next = enum_map_next,
4479 .stop = enum_map_stop,
4480 .show = enum_map_show,
4481 };
4482
4483 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4484 {
4485 if (tracing_disabled)
4486 return -ENODEV;
4487
4488 return seq_open(filp, &tracing_enum_map_seq_ops);
4489 }
4490
4491 static const struct file_operations tracing_enum_map_fops = {
4492 .open = tracing_enum_map_open,
4493 .read = seq_read,
4494 .llseek = seq_lseek,
4495 .release = seq_release,
4496 };
4497
4498 static inline union trace_enum_map_item *
4499 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4500 {
4501 /* Return tail of array given the head */
4502 return ptr + ptr->head.length + 1;
4503 }
4504
4505 static void
4506 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4507 int len)
4508 {
4509 struct trace_enum_map **stop;
4510 struct trace_enum_map **map;
4511 union trace_enum_map_item *map_array;
4512 union trace_enum_map_item *ptr;
4513
4514 stop = start + len;
4515
4516 /*
4517 * The trace_enum_maps contains the map plus a head and tail item,
4518 * where the head holds the module and length of array, and the
4519 * tail holds a pointer to the next list.
4520 */
4521 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4522 if (!map_array) {
4523 pr_warn("Unable to allocate trace enum mapping\n");
4524 return;
4525 }
4526
4527 mutex_lock(&trace_enum_mutex);
4528
4529 if (!trace_enum_maps)
4530 trace_enum_maps = map_array;
4531 else {
4532 ptr = trace_enum_maps;
4533 for (;;) {
4534 ptr = trace_enum_jmp_to_tail(ptr);
4535 if (!ptr->tail.next)
4536 break;
4537 ptr = ptr->tail.next;
4538
4539 }
4540 ptr->tail.next = map_array;
4541 }
4542 map_array->head.mod = mod;
4543 map_array->head.length = len;
4544 map_array++;
4545
4546 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4547 map_array->map = **map;
4548 map_array++;
4549 }
4550 memset(map_array, 0, sizeof(*map_array));
4551
4552 mutex_unlock(&trace_enum_mutex);
4553 }
4554
4555 static void trace_create_enum_file(struct dentry *d_tracer)
4556 {
4557 trace_create_file("enum_map", 0444, d_tracer,
4558 NULL, &tracing_enum_map_fops);
4559 }
4560
4561 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4562 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4563 static inline void trace_insert_enum_map_file(struct module *mod,
4564 struct trace_enum_map **start, int len) { }
4565 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4566
4567 static void trace_insert_enum_map(struct module *mod,
4568 struct trace_enum_map **start, int len)
4569 {
4570 struct trace_enum_map **map;
4571
4572 if (len <= 0)
4573 return;
4574
4575 map = start;
4576
4577 trace_event_enum_update(map, len);
4578
4579 trace_insert_enum_map_file(mod, start, len);
4580 }
4581
4582 static ssize_t
4583 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4584 size_t cnt, loff_t *ppos)
4585 {
4586 struct trace_array *tr = filp->private_data;
4587 char buf[MAX_TRACER_SIZE+2];
4588 int r;
4589
4590 mutex_lock(&trace_types_lock);
4591 r = sprintf(buf, "%s\n", tr->current_trace->name);
4592 mutex_unlock(&trace_types_lock);
4593
4594 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4595 }
4596
4597 int tracer_init(struct tracer *t, struct trace_array *tr)
4598 {
4599 tracing_reset_online_cpus(&tr->trace_buffer);
4600 return t->init(tr);
4601 }
4602
4603 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4604 {
4605 int cpu;
4606
4607 for_each_tracing_cpu(cpu)
4608 per_cpu_ptr(buf->data, cpu)->entries = val;
4609 }
4610
4611 #ifdef CONFIG_TRACER_MAX_TRACE
4612 /* resize @tr's buffer to the size of @size_tr's entries */
4613 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4614 struct trace_buffer *size_buf, int cpu_id)
4615 {
4616 int cpu, ret = 0;
4617
4618 if (cpu_id == RING_BUFFER_ALL_CPUS) {
4619 for_each_tracing_cpu(cpu) {
4620 ret = ring_buffer_resize(trace_buf->buffer,
4621 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4622 if (ret < 0)
4623 break;
4624 per_cpu_ptr(trace_buf->data, cpu)->entries =
4625 per_cpu_ptr(size_buf->data, cpu)->entries;
4626 }
4627 } else {
4628 ret = ring_buffer_resize(trace_buf->buffer,
4629 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4630 if (ret == 0)
4631 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4632 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4633 }
4634
4635 return ret;
4636 }
4637 #endif /* CONFIG_TRACER_MAX_TRACE */
4638
4639 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4640 unsigned long size, int cpu)
4641 {
4642 int ret;
4643
4644 /*
4645 * If kernel or user changes the size of the ring buffer
4646 * we use the size that was given, and we can forget about
4647 * expanding it later.
4648 */
4649 ring_buffer_expanded = true;
4650
4651 /* May be called before buffers are initialized */
4652 if (!tr->trace_buffer.buffer)
4653 return 0;
4654
4655 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4656 if (ret < 0)
4657 return ret;
4658
4659 #ifdef CONFIG_TRACER_MAX_TRACE
4660 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4661 !tr->current_trace->use_max_tr)
4662 goto out;
4663
4664 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4665 if (ret < 0) {
4666 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4667 &tr->trace_buffer, cpu);
4668 if (r < 0) {
4669 /*
4670 * AARGH! We are left with different
4671 * size max buffer!!!!
4672 * The max buffer is our "snapshot" buffer.
4673 * When a tracer needs a snapshot (one of the
4674 * latency tracers), it swaps the max buffer
4675 * with the saved snap shot. We succeeded to
4676 * update the size of the main buffer, but failed to
4677 * update the size of the max buffer. But when we tried
4678 * to reset the main buffer to the original size, we
4679 * failed there too. This is very unlikely to
4680 * happen, but if it does, warn and kill all
4681 * tracing.
4682 */
4683 WARN_ON(1);
4684 tracing_disabled = 1;
4685 }
4686 return ret;
4687 }
4688
4689 if (cpu == RING_BUFFER_ALL_CPUS)
4690 set_buffer_entries(&tr->max_buffer, size);
4691 else
4692 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4693
4694 out:
4695 #endif /* CONFIG_TRACER_MAX_TRACE */
4696
4697 if (cpu == RING_BUFFER_ALL_CPUS)
4698 set_buffer_entries(&tr->trace_buffer, size);
4699 else
4700 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4701
4702 return ret;
4703 }
4704
4705 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4706 unsigned long size, int cpu_id)
4707 {
4708 int ret = size;
4709
4710 mutex_lock(&trace_types_lock);
4711
4712 if (cpu_id != RING_BUFFER_ALL_CPUS) {
4713 /* make sure, this cpu is enabled in the mask */
4714 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4715 ret = -EINVAL;
4716 goto out;
4717 }
4718 }
4719
4720 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4721 if (ret < 0)
4722 ret = -ENOMEM;
4723
4724 out:
4725 mutex_unlock(&trace_types_lock);
4726
4727 return ret;
4728 }
4729
4730
4731 /**
4732 * tracing_update_buffers - used by tracing facility to expand ring buffers
4733 *
4734 * To save on memory when the tracing is never used on a system with it
4735 * configured in. The ring buffers are set to a minimum size. But once
4736 * a user starts to use the tracing facility, then they need to grow
4737 * to their default size.
4738 *
4739 * This function is to be called when a tracer is about to be used.
4740 */
4741 int tracing_update_buffers(void)
4742 {
4743 int ret = 0;
4744
4745 mutex_lock(&trace_types_lock);
4746 if (!ring_buffer_expanded)
4747 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4748 RING_BUFFER_ALL_CPUS);
4749 mutex_unlock(&trace_types_lock);
4750
4751 return ret;
4752 }
4753
4754 struct trace_option_dentry;
4755
4756 static void
4757 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4758
4759 /*
4760 * Used to clear out the tracer before deletion of an instance.
4761 * Must have trace_types_lock held.
4762 */
4763 static void tracing_set_nop(struct trace_array *tr)
4764 {
4765 if (tr->current_trace == &nop_trace)
4766 return;
4767
4768 tr->current_trace->enabled--;
4769
4770 if (tr->current_trace->reset)
4771 tr->current_trace->reset(tr);
4772
4773 tr->current_trace = &nop_trace;
4774 }
4775
4776 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4777 {
4778 /* Only enable if the directory has been created already. */
4779 if (!tr->dir)
4780 return;
4781
4782 create_trace_option_files(tr, t);
4783 }
4784
4785 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4786 {
4787 struct tracer *t;
4788 #ifdef CONFIG_TRACER_MAX_TRACE
4789 bool had_max_tr;
4790 #endif
4791 int ret = 0;
4792
4793 mutex_lock(&trace_types_lock);
4794
4795 if (!ring_buffer_expanded) {
4796 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4797 RING_BUFFER_ALL_CPUS);
4798 if (ret < 0)
4799 goto out;
4800 ret = 0;
4801 }
4802
4803 for (t = trace_types; t; t = t->next) {
4804 if (strcmp(t->name, buf) == 0)
4805 break;
4806 }
4807 if (!t) {
4808 ret = -EINVAL;
4809 goto out;
4810 }
4811 if (t == tr->current_trace)
4812 goto out;
4813
4814 /* Some tracers are only allowed for the top level buffer */
4815 if (!trace_ok_for_array(t, tr)) {
4816 ret = -EINVAL;
4817 goto out;
4818 }
4819
4820 /* If trace pipe files are being read, we can't change the tracer */
4821 if (tr->current_trace->ref) {
4822 ret = -EBUSY;
4823 goto out;
4824 }
4825
4826 trace_branch_disable();
4827
4828 tr->current_trace->enabled--;
4829
4830 if (tr->current_trace->reset)
4831 tr->current_trace->reset(tr);
4832
4833 /* Current trace needs to be nop_trace before synchronize_sched */
4834 tr->current_trace = &nop_trace;
4835
4836 #ifdef CONFIG_TRACER_MAX_TRACE
4837 had_max_tr = tr->allocated_snapshot;
4838
4839 if (had_max_tr && !t->use_max_tr) {
4840 /*
4841 * We need to make sure that the update_max_tr sees that
4842 * current_trace changed to nop_trace to keep it from
4843 * swapping the buffers after we resize it.
4844 * The update_max_tr is called from interrupts disabled
4845 * so a synchronized_sched() is sufficient.
4846 */
4847 synchronize_sched();
4848 free_snapshot(tr);
4849 }
4850 #endif
4851
4852 #ifdef CONFIG_TRACER_MAX_TRACE
4853 if (t->use_max_tr && !had_max_tr) {
4854 ret = alloc_snapshot(tr);
4855 if (ret < 0)
4856 goto out;
4857 }
4858 #endif
4859
4860 if (t->init) {
4861 ret = tracer_init(t, tr);
4862 if (ret)
4863 goto out;
4864 }
4865
4866 tr->current_trace = t;
4867 tr->current_trace->enabled++;
4868 trace_branch_enable(tr);
4869 out:
4870 mutex_unlock(&trace_types_lock);
4871
4872 return ret;
4873 }
4874
4875 static ssize_t
4876 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4877 size_t cnt, loff_t *ppos)
4878 {
4879 struct trace_array *tr = filp->private_data;
4880 char buf[MAX_TRACER_SIZE+1];
4881 int i;
4882 size_t ret;
4883 int err;
4884
4885 ret = cnt;
4886
4887 if (cnt > MAX_TRACER_SIZE)
4888 cnt = MAX_TRACER_SIZE;
4889
4890 if (copy_from_user(buf, ubuf, cnt))
4891 return -EFAULT;
4892
4893 buf[cnt] = 0;
4894
4895 /* strip ending whitespace. */
4896 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4897 buf[i] = 0;
4898
4899 err = tracing_set_tracer(tr, buf);
4900 if (err)
4901 return err;
4902
4903 *ppos += ret;
4904
4905 return ret;
4906 }
4907
4908 static ssize_t
4909 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4910 size_t cnt, loff_t *ppos)
4911 {
4912 char buf[64];
4913 int r;
4914
4915 r = snprintf(buf, sizeof(buf), "%ld\n",
4916 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4917 if (r > sizeof(buf))
4918 r = sizeof(buf);
4919 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4920 }
4921
4922 static ssize_t
4923 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4924 size_t cnt, loff_t *ppos)
4925 {
4926 unsigned long val;
4927 int ret;
4928
4929 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4930 if (ret)
4931 return ret;
4932
4933 *ptr = val * 1000;
4934
4935 return cnt;
4936 }
4937
4938 static ssize_t
4939 tracing_thresh_read(struct file *filp, char __user *ubuf,
4940 size_t cnt, loff_t *ppos)
4941 {
4942 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4943 }
4944
4945 static ssize_t
4946 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4947 size_t cnt, loff_t *ppos)
4948 {
4949 struct trace_array *tr = filp->private_data;
4950 int ret;
4951
4952 mutex_lock(&trace_types_lock);
4953 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4954 if (ret < 0)
4955 goto out;
4956
4957 if (tr->current_trace->update_thresh) {
4958 ret = tr->current_trace->update_thresh(tr);
4959 if (ret < 0)
4960 goto out;
4961 }
4962
4963 ret = cnt;
4964 out:
4965 mutex_unlock(&trace_types_lock);
4966
4967 return ret;
4968 }
4969
4970 #ifdef CONFIG_TRACER_MAX_TRACE
4971
4972 static ssize_t
4973 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4974 size_t cnt, loff_t *ppos)
4975 {
4976 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4977 }
4978
4979 static ssize_t
4980 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4981 size_t cnt, loff_t *ppos)
4982 {
4983 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4984 }
4985
4986 #endif
4987
4988 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4989 {
4990 struct trace_array *tr = inode->i_private;
4991 struct trace_iterator *iter;
4992 int ret = 0;
4993
4994 if (tracing_disabled)
4995 return -ENODEV;
4996
4997 if (trace_array_get(tr) < 0)
4998 return -ENODEV;
4999
5000 mutex_lock(&trace_types_lock);
5001
5002 /* create a buffer to store the information to pass to userspace */
5003 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5004 if (!iter) {
5005 ret = -ENOMEM;
5006 __trace_array_put(tr);
5007 goto out;
5008 }
5009
5010 trace_seq_init(&iter->seq);
5011 iter->trace = tr->current_trace;
5012
5013 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5014 ret = -ENOMEM;
5015 goto fail;
5016 }
5017
5018 /* trace pipe does not show start of buffer */
5019 cpumask_setall(iter->started);
5020
5021 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5022 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5023
5024 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5025 if (trace_clocks[tr->clock_id].in_ns)
5026 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5027
5028 iter->tr = tr;
5029 iter->trace_buffer = &tr->trace_buffer;
5030 iter->cpu_file = tracing_get_cpu(inode);
5031 mutex_init(&iter->mutex);
5032 filp->private_data = iter;
5033
5034 if (iter->trace->pipe_open)
5035 iter->trace->pipe_open(iter);
5036
5037 nonseekable_open(inode, filp);
5038
5039 tr->current_trace->ref++;
5040 out:
5041 mutex_unlock(&trace_types_lock);
5042 return ret;
5043
5044 fail:
5045 kfree(iter->trace);
5046 kfree(iter);
5047 __trace_array_put(tr);
5048 mutex_unlock(&trace_types_lock);
5049 return ret;
5050 }
5051
5052 static int tracing_release_pipe(struct inode *inode, struct file *file)
5053 {
5054 struct trace_iterator *iter = file->private_data;
5055 struct trace_array *tr = inode->i_private;
5056
5057 mutex_lock(&trace_types_lock);
5058
5059 tr->current_trace->ref--;
5060
5061 if (iter->trace->pipe_close)
5062 iter->trace->pipe_close(iter);
5063
5064 mutex_unlock(&trace_types_lock);
5065
5066 free_cpumask_var(iter->started);
5067 mutex_destroy(&iter->mutex);
5068 kfree(iter);
5069
5070 trace_array_put(tr);
5071
5072 return 0;
5073 }
5074
5075 static unsigned int
5076 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5077 {
5078 struct trace_array *tr = iter->tr;
5079
5080 /* Iterators are static, they should be filled or empty */
5081 if (trace_buffer_iter(iter, iter->cpu_file))
5082 return POLLIN | POLLRDNORM;
5083
5084 if (tr->trace_flags & TRACE_ITER_BLOCK)
5085 /*
5086 * Always select as readable when in blocking mode
5087 */
5088 return POLLIN | POLLRDNORM;
5089 else
5090 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5091 filp, poll_table);
5092 }
5093
5094 static unsigned int
5095 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5096 {
5097 struct trace_iterator *iter = filp->private_data;
5098
5099 return trace_poll(iter, filp, poll_table);
5100 }
5101
5102 /* Must be called with iter->mutex held. */
5103 static int tracing_wait_pipe(struct file *filp)
5104 {
5105 struct trace_iterator *iter = filp->private_data;
5106 int ret;
5107
5108 while (trace_empty(iter)) {
5109
5110 if ((filp->f_flags & O_NONBLOCK)) {
5111 return -EAGAIN;
5112 }
5113
5114 /*
5115 * We block until we read something and tracing is disabled.
5116 * We still block if tracing is disabled, but we have never
5117 * read anything. This allows a user to cat this file, and
5118 * then enable tracing. But after we have read something,
5119 * we give an EOF when tracing is again disabled.
5120 *
5121 * iter->pos will be 0 if we haven't read anything.
5122 */
5123 if (!tracing_is_on() && iter->pos)
5124 break;
5125
5126 mutex_unlock(&iter->mutex);
5127
5128 ret = wait_on_pipe(iter, false);
5129
5130 mutex_lock(&iter->mutex);
5131
5132 if (ret)
5133 return ret;
5134 }
5135
5136 return 1;
5137 }
5138
5139 /*
5140 * Consumer reader.
5141 */
5142 static ssize_t
5143 tracing_read_pipe(struct file *filp, char __user *ubuf,
5144 size_t cnt, loff_t *ppos)
5145 {
5146 struct trace_iterator *iter = filp->private_data;
5147 ssize_t sret;
5148
5149 /* return any leftover data */
5150 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5151 if (sret != -EBUSY)
5152 return sret;
5153
5154 trace_seq_init(&iter->seq);
5155
5156 /*
5157 * Avoid more than one consumer on a single file descriptor
5158 * This is just a matter of traces coherency, the ring buffer itself
5159 * is protected.
5160 */
5161 mutex_lock(&iter->mutex);
5162 if (iter->trace->read) {
5163 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5164 if (sret)
5165 goto out;
5166 }
5167
5168 waitagain:
5169 sret = tracing_wait_pipe(filp);
5170 if (sret <= 0)
5171 goto out;
5172
5173 /* stop when tracing is finished */
5174 if (trace_empty(iter)) {
5175 sret = 0;
5176 goto out;
5177 }
5178
5179 if (cnt >= PAGE_SIZE)
5180 cnt = PAGE_SIZE - 1;
5181
5182 /* reset all but tr, trace, and overruns */
5183 memset(&iter->seq, 0,
5184 sizeof(struct trace_iterator) -
5185 offsetof(struct trace_iterator, seq));
5186 cpumask_clear(iter->started);
5187 iter->pos = -1;
5188
5189 trace_event_read_lock();
5190 trace_access_lock(iter->cpu_file);
5191 while (trace_find_next_entry_inc(iter) != NULL) {
5192 enum print_line_t ret;
5193 int save_len = iter->seq.seq.len;
5194
5195 ret = print_trace_line(iter);
5196 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5197 /* don't print partial lines */
5198 iter->seq.seq.len = save_len;
5199 break;
5200 }
5201 if (ret != TRACE_TYPE_NO_CONSUME)
5202 trace_consume(iter);
5203
5204 if (trace_seq_used(&iter->seq) >= cnt)
5205 break;
5206
5207 /*
5208 * Setting the full flag means we reached the trace_seq buffer
5209 * size and we should leave by partial output condition above.
5210 * One of the trace_seq_* functions is not used properly.
5211 */
5212 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5213 iter->ent->type);
5214 }
5215 trace_access_unlock(iter->cpu_file);
5216 trace_event_read_unlock();
5217
5218 /* Now copy what we have to the user */
5219 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5220 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5221 trace_seq_init(&iter->seq);
5222
5223 /*
5224 * If there was nothing to send to user, in spite of consuming trace
5225 * entries, go back to wait for more entries.
5226 */
5227 if (sret == -EBUSY)
5228 goto waitagain;
5229
5230 out:
5231 mutex_unlock(&iter->mutex);
5232
5233 return sret;
5234 }
5235
5236 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5237 unsigned int idx)
5238 {
5239 __free_page(spd->pages[idx]);
5240 }
5241
5242 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5243 .can_merge = 0,
5244 .confirm = generic_pipe_buf_confirm,
5245 .release = generic_pipe_buf_release,
5246 .steal = generic_pipe_buf_steal,
5247 .get = generic_pipe_buf_get,
5248 };
5249
5250 static size_t
5251 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5252 {
5253 size_t count;
5254 int save_len;
5255 int ret;
5256
5257 /* Seq buffer is page-sized, exactly what we need. */
5258 for (;;) {
5259 save_len = iter->seq.seq.len;
5260 ret = print_trace_line(iter);
5261
5262 if (trace_seq_has_overflowed(&iter->seq)) {
5263 iter->seq.seq.len = save_len;
5264 break;
5265 }
5266
5267 /*
5268 * This should not be hit, because it should only
5269 * be set if the iter->seq overflowed. But check it
5270 * anyway to be safe.
5271 */
5272 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5273 iter->seq.seq.len = save_len;
5274 break;
5275 }
5276
5277 count = trace_seq_used(&iter->seq) - save_len;
5278 if (rem < count) {
5279 rem = 0;
5280 iter->seq.seq.len = save_len;
5281 break;
5282 }
5283
5284 if (ret != TRACE_TYPE_NO_CONSUME)
5285 trace_consume(iter);
5286 rem -= count;
5287 if (!trace_find_next_entry_inc(iter)) {
5288 rem = 0;
5289 iter->ent = NULL;
5290 break;
5291 }
5292 }
5293
5294 return rem;
5295 }
5296
5297 static ssize_t tracing_splice_read_pipe(struct file *filp,
5298 loff_t *ppos,
5299 struct pipe_inode_info *pipe,
5300 size_t len,
5301 unsigned int flags)
5302 {
5303 struct page *pages_def[PIPE_DEF_BUFFERS];
5304 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5305 struct trace_iterator *iter = filp->private_data;
5306 struct splice_pipe_desc spd = {
5307 .pages = pages_def,
5308 .partial = partial_def,
5309 .nr_pages = 0, /* This gets updated below. */
5310 .nr_pages_max = PIPE_DEF_BUFFERS,
5311 .flags = flags,
5312 .ops = &tracing_pipe_buf_ops,
5313 .spd_release = tracing_spd_release_pipe,
5314 };
5315 ssize_t ret;
5316 size_t rem;
5317 unsigned int i;
5318
5319 if (splice_grow_spd(pipe, &spd))
5320 return -ENOMEM;
5321
5322 mutex_lock(&iter->mutex);
5323
5324 if (iter->trace->splice_read) {
5325 ret = iter->trace->splice_read(iter, filp,
5326 ppos, pipe, len, flags);
5327 if (ret)
5328 goto out_err;
5329 }
5330
5331 ret = tracing_wait_pipe(filp);
5332 if (ret <= 0)
5333 goto out_err;
5334
5335 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5336 ret = -EFAULT;
5337 goto out_err;
5338 }
5339
5340 trace_event_read_lock();
5341 trace_access_lock(iter->cpu_file);
5342
5343 /* Fill as many pages as possible. */
5344 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5345 spd.pages[i] = alloc_page(GFP_KERNEL);
5346 if (!spd.pages[i])
5347 break;
5348
5349 rem = tracing_fill_pipe_page(rem, iter);
5350
5351 /* Copy the data into the page, so we can start over. */
5352 ret = trace_seq_to_buffer(&iter->seq,
5353 page_address(spd.pages[i]),
5354 trace_seq_used(&iter->seq));
5355 if (ret < 0) {
5356 __free_page(spd.pages[i]);
5357 break;
5358 }
5359 spd.partial[i].offset = 0;
5360 spd.partial[i].len = trace_seq_used(&iter->seq);
5361
5362 trace_seq_init(&iter->seq);
5363 }
5364
5365 trace_access_unlock(iter->cpu_file);
5366 trace_event_read_unlock();
5367 mutex_unlock(&iter->mutex);
5368
5369 spd.nr_pages = i;
5370
5371 if (i)
5372 ret = splice_to_pipe(pipe, &spd);
5373 else
5374 ret = 0;
5375 out:
5376 splice_shrink_spd(&spd);
5377 return ret;
5378
5379 out_err:
5380 mutex_unlock(&iter->mutex);
5381 goto out;
5382 }
5383
5384 static ssize_t
5385 tracing_entries_read(struct file *filp, char __user *ubuf,
5386 size_t cnt, loff_t *ppos)
5387 {
5388 struct inode *inode = file_inode(filp);
5389 struct trace_array *tr = inode->i_private;
5390 int cpu = tracing_get_cpu(inode);
5391 char buf[64];
5392 int r = 0;
5393 ssize_t ret;
5394
5395 mutex_lock(&trace_types_lock);
5396
5397 if (cpu == RING_BUFFER_ALL_CPUS) {
5398 int cpu, buf_size_same;
5399 unsigned long size;
5400
5401 size = 0;
5402 buf_size_same = 1;
5403 /* check if all cpu sizes are same */
5404 for_each_tracing_cpu(cpu) {
5405 /* fill in the size from first enabled cpu */
5406 if (size == 0)
5407 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5408 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5409 buf_size_same = 0;
5410 break;
5411 }
5412 }
5413
5414 if (buf_size_same) {
5415 if (!ring_buffer_expanded)
5416 r = sprintf(buf, "%lu (expanded: %lu)\n",
5417 size >> 10,
5418 trace_buf_size >> 10);
5419 else
5420 r = sprintf(buf, "%lu\n", size >> 10);
5421 } else
5422 r = sprintf(buf, "X\n");
5423 } else
5424 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5425
5426 mutex_unlock(&trace_types_lock);
5427
5428 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5429 return ret;
5430 }
5431
5432 static ssize_t
5433 tracing_entries_write(struct file *filp, const char __user *ubuf,
5434 size_t cnt, loff_t *ppos)
5435 {
5436 struct inode *inode = file_inode(filp);
5437 struct trace_array *tr = inode->i_private;
5438 unsigned long val;
5439 int ret;
5440
5441 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5442 if (ret)
5443 return ret;
5444
5445 /* must have at least 1 entry */
5446 if (!val)
5447 return -EINVAL;
5448
5449 /* value is in KB */
5450 val <<= 10;
5451 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5452 if (ret < 0)
5453 return ret;
5454
5455 *ppos += cnt;
5456
5457 return cnt;
5458 }
5459
5460 static ssize_t
5461 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5462 size_t cnt, loff_t *ppos)
5463 {
5464 struct trace_array *tr = filp->private_data;
5465 char buf[64];
5466 int r, cpu;
5467 unsigned long size = 0, expanded_size = 0;
5468
5469 mutex_lock(&trace_types_lock);
5470 for_each_tracing_cpu(cpu) {
5471 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5472 if (!ring_buffer_expanded)
5473 expanded_size += trace_buf_size >> 10;
5474 }
5475 if (ring_buffer_expanded)
5476 r = sprintf(buf, "%lu\n", size);
5477 else
5478 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5479 mutex_unlock(&trace_types_lock);
5480
5481 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5482 }
5483
5484 static ssize_t
5485 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5486 size_t cnt, loff_t *ppos)
5487 {
5488 /*
5489 * There is no need to read what the user has written, this function
5490 * is just to make sure that there is no error when "echo" is used
5491 */
5492
5493 *ppos += cnt;
5494
5495 return cnt;
5496 }
5497
5498 static int
5499 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5500 {
5501 struct trace_array *tr = inode->i_private;
5502
5503 /* disable tracing ? */
5504 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5505 tracer_tracing_off(tr);
5506 /* resize the ring buffer to 0 */
5507 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5508
5509 trace_array_put(tr);
5510
5511 return 0;
5512 }
5513
5514 static ssize_t
5515 tracing_mark_write(struct file *filp, const char __user *ubuf,
5516 size_t cnt, loff_t *fpos)
5517 {
5518 unsigned long addr = (unsigned long)ubuf;
5519 struct trace_array *tr = filp->private_data;
5520 struct ring_buffer_event *event;
5521 struct ring_buffer *buffer;
5522 struct print_entry *entry;
5523 unsigned long irq_flags;
5524 struct page *pages[2];
5525 void *map_page[2];
5526 int nr_pages = 1;
5527 ssize_t written;
5528 int offset;
5529 int size;
5530 int len;
5531 int ret;
5532 int i;
5533
5534 if (tracing_disabled)
5535 return -EINVAL;
5536
5537 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5538 return -EINVAL;
5539
5540 if (cnt > TRACE_BUF_SIZE)
5541 cnt = TRACE_BUF_SIZE;
5542
5543 /*
5544 * Userspace is injecting traces into the kernel trace buffer.
5545 * We want to be as non intrusive as possible.
5546 * To do so, we do not want to allocate any special buffers
5547 * or take any locks, but instead write the userspace data
5548 * straight into the ring buffer.
5549 *
5550 * First we need to pin the userspace buffer into memory,
5551 * which, most likely it is, because it just referenced it.
5552 * But there's no guarantee that it is. By using get_user_pages_fast()
5553 * and kmap_atomic/kunmap_atomic() we can get access to the
5554 * pages directly. We then write the data directly into the
5555 * ring buffer.
5556 */
5557 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5558
5559 /* check if we cross pages */
5560 if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5561 nr_pages = 2;
5562
5563 offset = addr & (PAGE_SIZE - 1);
5564 addr &= PAGE_MASK;
5565
5566 ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5567 if (ret < nr_pages) {
5568 while (--ret >= 0)
5569 put_page(pages[ret]);
5570 written = -EFAULT;
5571 goto out;
5572 }
5573
5574 for (i = 0; i < nr_pages; i++)
5575 map_page[i] = kmap_atomic(pages[i]);
5576
5577 local_save_flags(irq_flags);
5578 size = sizeof(*entry) + cnt + 2; /* possible \n added */
5579 buffer = tr->trace_buffer.buffer;
5580 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5581 irq_flags, preempt_count());
5582 if (!event) {
5583 /* Ring buffer disabled, return as if not open for write */
5584 written = -EBADF;
5585 goto out_unlock;
5586 }
5587
5588 entry = ring_buffer_event_data(event);
5589 entry->ip = _THIS_IP_;
5590
5591 if (nr_pages == 2) {
5592 len = PAGE_SIZE - offset;
5593 memcpy(&entry->buf, map_page[0] + offset, len);
5594 memcpy(&entry->buf[len], map_page[1], cnt - len);
5595 } else
5596 memcpy(&entry->buf, map_page[0] + offset, cnt);
5597
5598 if (entry->buf[cnt - 1] != '\n') {
5599 entry->buf[cnt] = '\n';
5600 entry->buf[cnt + 1] = '\0';
5601 } else
5602 entry->buf[cnt] = '\0';
5603
5604 __buffer_unlock_commit(buffer, event);
5605
5606 written = cnt;
5607
5608 *fpos += written;
5609
5610 out_unlock:
5611 for (i = nr_pages - 1; i >= 0; i--) {
5612 kunmap_atomic(map_page[i]);
5613 put_page(pages[i]);
5614 }
5615 out:
5616 return written;
5617 }
5618
5619 static int tracing_clock_show(struct seq_file *m, void *v)
5620 {
5621 struct trace_array *tr = m->private;
5622 int i;
5623
5624 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5625 seq_printf(m,
5626 "%s%s%s%s", i ? " " : "",
5627 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5628 i == tr->clock_id ? "]" : "");
5629 seq_putc(m, '\n');
5630
5631 return 0;
5632 }
5633
5634 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5635 {
5636 int i;
5637
5638 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5639 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5640 break;
5641 }
5642 if (i == ARRAY_SIZE(trace_clocks))
5643 return -EINVAL;
5644
5645 mutex_lock(&trace_types_lock);
5646
5647 tr->clock_id = i;
5648
5649 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5650
5651 /*
5652 * New clock may not be consistent with the previous clock.
5653 * Reset the buffer so that it doesn't have incomparable timestamps.
5654 */
5655 tracing_reset_online_cpus(&tr->trace_buffer);
5656
5657 #ifdef CONFIG_TRACER_MAX_TRACE
5658 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5659 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5660 tracing_reset_online_cpus(&tr->max_buffer);
5661 #endif
5662
5663 mutex_unlock(&trace_types_lock);
5664
5665 return 0;
5666 }
5667
5668 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5669 size_t cnt, loff_t *fpos)
5670 {
5671 struct seq_file *m = filp->private_data;
5672 struct trace_array *tr = m->private;
5673 char buf[64];
5674 const char *clockstr;
5675 int ret;
5676
5677 if (cnt >= sizeof(buf))
5678 return -EINVAL;
5679
5680 if (copy_from_user(buf, ubuf, cnt))
5681 return -EFAULT;
5682
5683 buf[cnt] = 0;
5684
5685 clockstr = strstrip(buf);
5686
5687 ret = tracing_set_clock(tr, clockstr);
5688 if (ret)
5689 return ret;
5690
5691 *fpos += cnt;
5692
5693 return cnt;
5694 }
5695
5696 static int tracing_clock_open(struct inode *inode, struct file *file)
5697 {
5698 struct trace_array *tr = inode->i_private;
5699 int ret;
5700
5701 if (tracing_disabled)
5702 return -ENODEV;
5703
5704 if (trace_array_get(tr))
5705 return -ENODEV;
5706
5707 ret = single_open(file, tracing_clock_show, inode->i_private);
5708 if (ret < 0)
5709 trace_array_put(tr);
5710
5711 return ret;
5712 }
5713
5714 struct ftrace_buffer_info {
5715 struct trace_iterator iter;
5716 void *spare;
5717 unsigned int read;
5718 };
5719
5720 #ifdef CONFIG_TRACER_SNAPSHOT
5721 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5722 {
5723 struct trace_array *tr = inode->i_private;
5724 struct trace_iterator *iter;
5725 struct seq_file *m;
5726 int ret = 0;
5727
5728 if (trace_array_get(tr) < 0)
5729 return -ENODEV;
5730
5731 if (file->f_mode & FMODE_READ) {
5732 iter = __tracing_open(inode, file, true);
5733 if (IS_ERR(iter))
5734 ret = PTR_ERR(iter);
5735 } else {
5736 /* Writes still need the seq_file to hold the private data */
5737 ret = -ENOMEM;
5738 m = kzalloc(sizeof(*m), GFP_KERNEL);
5739 if (!m)
5740 goto out;
5741 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5742 if (!iter) {
5743 kfree(m);
5744 goto out;
5745 }
5746 ret = 0;
5747
5748 iter->tr = tr;
5749 iter->trace_buffer = &tr->max_buffer;
5750 iter->cpu_file = tracing_get_cpu(inode);
5751 m->private = iter;
5752 file->private_data = m;
5753 }
5754 out:
5755 if (ret < 0)
5756 trace_array_put(tr);
5757
5758 return ret;
5759 }
5760
5761 static ssize_t
5762 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5763 loff_t *ppos)
5764 {
5765 struct seq_file *m = filp->private_data;
5766 struct trace_iterator *iter = m->private;
5767 struct trace_array *tr = iter->tr;
5768 unsigned long val;
5769 int ret;
5770
5771 ret = tracing_update_buffers();
5772 if (ret < 0)
5773 return ret;
5774
5775 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5776 if (ret)
5777 return ret;
5778
5779 mutex_lock(&trace_types_lock);
5780
5781 if (tr->current_trace->use_max_tr) {
5782 ret = -EBUSY;
5783 goto out;
5784 }
5785
5786 switch (val) {
5787 case 0:
5788 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5789 ret = -EINVAL;
5790 break;
5791 }
5792 if (tr->allocated_snapshot)
5793 free_snapshot(tr);
5794 break;
5795 case 1:
5796 /* Only allow per-cpu swap if the ring buffer supports it */
5797 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5798 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5799 ret = -EINVAL;
5800 break;
5801 }
5802 #endif
5803 if (!tr->allocated_snapshot) {
5804 ret = alloc_snapshot(tr);
5805 if (ret < 0)
5806 break;
5807 }
5808 local_irq_disable();
5809 /* Now, we're going to swap */
5810 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5811 update_max_tr(tr, current, smp_processor_id());
5812 else
5813 update_max_tr_single(tr, current, iter->cpu_file);
5814 local_irq_enable();
5815 break;
5816 default:
5817 if (tr->allocated_snapshot) {
5818 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5819 tracing_reset_online_cpus(&tr->max_buffer);
5820 else
5821 tracing_reset(&tr->max_buffer, iter->cpu_file);
5822 }
5823 break;
5824 }
5825
5826 if (ret >= 0) {
5827 *ppos += cnt;
5828 ret = cnt;
5829 }
5830 out:
5831 mutex_unlock(&trace_types_lock);
5832 return ret;
5833 }
5834
5835 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5836 {
5837 struct seq_file *m = file->private_data;
5838 int ret;
5839
5840 ret = tracing_release(inode, file);
5841
5842 if (file->f_mode & FMODE_READ)
5843 return ret;
5844
5845 /* If write only, the seq_file is just a stub */
5846 if (m)
5847 kfree(m->private);
5848 kfree(m);
5849
5850 return 0;
5851 }
5852
5853 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5854 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5855 size_t count, loff_t *ppos);
5856 static int tracing_buffers_release(struct inode *inode, struct file *file);
5857 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5858 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5859
5860 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5861 {
5862 struct ftrace_buffer_info *info;
5863 int ret;
5864
5865 ret = tracing_buffers_open(inode, filp);
5866 if (ret < 0)
5867 return ret;
5868
5869 info = filp->private_data;
5870
5871 if (info->iter.trace->use_max_tr) {
5872 tracing_buffers_release(inode, filp);
5873 return -EBUSY;
5874 }
5875
5876 info->iter.snapshot = true;
5877 info->iter.trace_buffer = &info->iter.tr->max_buffer;
5878
5879 return ret;
5880 }
5881
5882 #endif /* CONFIG_TRACER_SNAPSHOT */
5883
5884
5885 static const struct file_operations tracing_thresh_fops = {
5886 .open = tracing_open_generic,
5887 .read = tracing_thresh_read,
5888 .write = tracing_thresh_write,
5889 .llseek = generic_file_llseek,
5890 };
5891
5892 #ifdef CONFIG_TRACER_MAX_TRACE
5893 static const struct file_operations tracing_max_lat_fops = {
5894 .open = tracing_open_generic,
5895 .read = tracing_max_lat_read,
5896 .write = tracing_max_lat_write,
5897 .llseek = generic_file_llseek,
5898 };
5899 #endif
5900
5901 static const struct file_operations set_tracer_fops = {
5902 .open = tracing_open_generic,
5903 .read = tracing_set_trace_read,
5904 .write = tracing_set_trace_write,
5905 .llseek = generic_file_llseek,
5906 };
5907
5908 static const struct file_operations tracing_pipe_fops = {
5909 .open = tracing_open_pipe,
5910 .poll = tracing_poll_pipe,
5911 .read = tracing_read_pipe,
5912 .splice_read = tracing_splice_read_pipe,
5913 .release = tracing_release_pipe,
5914 .llseek = no_llseek,
5915 };
5916
5917 static const struct file_operations tracing_entries_fops = {
5918 .open = tracing_open_generic_tr,
5919 .read = tracing_entries_read,
5920 .write = tracing_entries_write,
5921 .llseek = generic_file_llseek,
5922 .release = tracing_release_generic_tr,
5923 };
5924
5925 static const struct file_operations tracing_total_entries_fops = {
5926 .open = tracing_open_generic_tr,
5927 .read = tracing_total_entries_read,
5928 .llseek = generic_file_llseek,
5929 .release = tracing_release_generic_tr,
5930 };
5931
5932 static const struct file_operations tracing_free_buffer_fops = {
5933 .open = tracing_open_generic_tr,
5934 .write = tracing_free_buffer_write,
5935 .release = tracing_free_buffer_release,
5936 };
5937
5938 static const struct file_operations tracing_mark_fops = {
5939 .open = tracing_open_generic_tr,
5940 .write = tracing_mark_write,
5941 .llseek = generic_file_llseek,
5942 .release = tracing_release_generic_tr,
5943 };
5944
5945 static const struct file_operations trace_clock_fops = {
5946 .open = tracing_clock_open,
5947 .read = seq_read,
5948 .llseek = seq_lseek,
5949 .release = tracing_single_release_tr,
5950 .write = tracing_clock_write,
5951 };
5952
5953 #ifdef CONFIG_TRACER_SNAPSHOT
5954 static const struct file_operations snapshot_fops = {
5955 .open = tracing_snapshot_open,
5956 .read = seq_read,
5957 .write = tracing_snapshot_write,
5958 .llseek = tracing_lseek,
5959 .release = tracing_snapshot_release,
5960 };
5961
5962 static const struct file_operations snapshot_raw_fops = {
5963 .open = snapshot_raw_open,
5964 .read = tracing_buffers_read,
5965 .release = tracing_buffers_release,
5966 .splice_read = tracing_buffers_splice_read,
5967 .llseek = no_llseek,
5968 };
5969
5970 #endif /* CONFIG_TRACER_SNAPSHOT */
5971
5972 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5973 {
5974 struct trace_array *tr = inode->i_private;
5975 struct ftrace_buffer_info *info;
5976 int ret;
5977
5978 if (tracing_disabled)
5979 return -ENODEV;
5980
5981 if (trace_array_get(tr) < 0)
5982 return -ENODEV;
5983
5984 info = kzalloc(sizeof(*info), GFP_KERNEL);
5985 if (!info) {
5986 trace_array_put(tr);
5987 return -ENOMEM;
5988 }
5989
5990 mutex_lock(&trace_types_lock);
5991
5992 info->iter.tr = tr;
5993 info->iter.cpu_file = tracing_get_cpu(inode);
5994 info->iter.trace = tr->current_trace;
5995 info->iter.trace_buffer = &tr->trace_buffer;
5996 info->spare = NULL;
5997 /* Force reading ring buffer for first read */
5998 info->read = (unsigned int)-1;
5999
6000 filp->private_data = info;
6001
6002 tr->current_trace->ref++;
6003
6004 mutex_unlock(&trace_types_lock);
6005
6006 ret = nonseekable_open(inode, filp);
6007 if (ret < 0)
6008 trace_array_put(tr);
6009
6010 return ret;
6011 }
6012
6013 static unsigned int
6014 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6015 {
6016 struct ftrace_buffer_info *info = filp->private_data;
6017 struct trace_iterator *iter = &info->iter;
6018
6019 return trace_poll(iter, filp, poll_table);
6020 }
6021
6022 static ssize_t
6023 tracing_buffers_read(struct file *filp, char __user *ubuf,
6024 size_t count, loff_t *ppos)
6025 {
6026 struct ftrace_buffer_info *info = filp->private_data;
6027 struct trace_iterator *iter = &info->iter;
6028 ssize_t ret;
6029 ssize_t size;
6030
6031 if (!count)
6032 return 0;
6033
6034 #ifdef CONFIG_TRACER_MAX_TRACE
6035 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6036 return -EBUSY;
6037 #endif
6038
6039 if (!info->spare)
6040 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6041 iter->cpu_file);
6042 if (!info->spare)
6043 return -ENOMEM;
6044
6045 /* Do we have previous read data to read? */
6046 if (info->read < PAGE_SIZE)
6047 goto read;
6048
6049 again:
6050 trace_access_lock(iter->cpu_file);
6051 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6052 &info->spare,
6053 count,
6054 iter->cpu_file, 0);
6055 trace_access_unlock(iter->cpu_file);
6056
6057 if (ret < 0) {
6058 if (trace_empty(iter)) {
6059 if ((filp->f_flags & O_NONBLOCK))
6060 return -EAGAIN;
6061
6062 ret = wait_on_pipe(iter, false);
6063 if (ret)
6064 return ret;
6065
6066 goto again;
6067 }
6068 return 0;
6069 }
6070
6071 info->read = 0;
6072 read:
6073 size = PAGE_SIZE - info->read;
6074 if (size > count)
6075 size = count;
6076
6077 ret = copy_to_user(ubuf, info->spare + info->read, size);
6078 if (ret == size)
6079 return -EFAULT;
6080
6081 size -= ret;
6082
6083 *ppos += size;
6084 info->read += size;
6085
6086 return size;
6087 }
6088
6089 static int tracing_buffers_release(struct inode *inode, struct file *file)
6090 {
6091 struct ftrace_buffer_info *info = file->private_data;
6092 struct trace_iterator *iter = &info->iter;
6093
6094 mutex_lock(&trace_types_lock);
6095
6096 iter->tr->current_trace->ref--;
6097
6098 __trace_array_put(iter->tr);
6099
6100 if (info->spare)
6101 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6102 kfree(info);
6103
6104 mutex_unlock(&trace_types_lock);
6105
6106 return 0;
6107 }
6108
6109 struct buffer_ref {
6110 struct ring_buffer *buffer;
6111 void *page;
6112 int ref;
6113 };
6114
6115 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6116 struct pipe_buffer *buf)
6117 {
6118 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6119
6120 if (--ref->ref)
6121 return;
6122
6123 ring_buffer_free_read_page(ref->buffer, ref->page);
6124 kfree(ref);
6125 buf->private = 0;
6126 }
6127
6128 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6129 struct pipe_buffer *buf)
6130 {
6131 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6132
6133 ref->ref++;
6134 }
6135
6136 /* Pipe buffer operations for a buffer. */
6137 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6138 .can_merge = 0,
6139 .confirm = generic_pipe_buf_confirm,
6140 .release = buffer_pipe_buf_release,
6141 .steal = generic_pipe_buf_steal,
6142 .get = buffer_pipe_buf_get,
6143 };
6144
6145 /*
6146 * Callback from splice_to_pipe(), if we need to release some pages
6147 * at the end of the spd in case we error'ed out in filling the pipe.
6148 */
6149 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6150 {
6151 struct buffer_ref *ref =
6152 (struct buffer_ref *)spd->partial[i].private;
6153
6154 if (--ref->ref)
6155 return;
6156
6157 ring_buffer_free_read_page(ref->buffer, ref->page);
6158 kfree(ref);
6159 spd->partial[i].private = 0;
6160 }
6161
6162 static ssize_t
6163 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6164 struct pipe_inode_info *pipe, size_t len,
6165 unsigned int flags)
6166 {
6167 struct ftrace_buffer_info *info = file->private_data;
6168 struct trace_iterator *iter = &info->iter;
6169 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6170 struct page *pages_def[PIPE_DEF_BUFFERS];
6171 struct splice_pipe_desc spd = {
6172 .pages = pages_def,
6173 .partial = partial_def,
6174 .nr_pages_max = PIPE_DEF_BUFFERS,
6175 .flags = flags,
6176 .ops = &buffer_pipe_buf_ops,
6177 .spd_release = buffer_spd_release,
6178 };
6179 struct buffer_ref *ref;
6180 int entries, size, i;
6181 ssize_t ret = 0;
6182
6183 #ifdef CONFIG_TRACER_MAX_TRACE
6184 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6185 return -EBUSY;
6186 #endif
6187
6188 if (splice_grow_spd(pipe, &spd))
6189 return -ENOMEM;
6190
6191 if (*ppos & (PAGE_SIZE - 1))
6192 return -EINVAL;
6193
6194 if (len & (PAGE_SIZE - 1)) {
6195 if (len < PAGE_SIZE)
6196 return -EINVAL;
6197 len &= PAGE_MASK;
6198 }
6199
6200 again:
6201 trace_access_lock(iter->cpu_file);
6202 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6203
6204 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6205 struct page *page;
6206 int r;
6207
6208 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6209 if (!ref) {
6210 ret = -ENOMEM;
6211 break;
6212 }
6213
6214 ref->ref = 1;
6215 ref->buffer = iter->trace_buffer->buffer;
6216 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6217 if (!ref->page) {
6218 ret = -ENOMEM;
6219 kfree(ref);
6220 break;
6221 }
6222
6223 r = ring_buffer_read_page(ref->buffer, &ref->page,
6224 len, iter->cpu_file, 1);
6225 if (r < 0) {
6226 ring_buffer_free_read_page(ref->buffer, ref->page);
6227 kfree(ref);
6228 break;
6229 }
6230
6231 /*
6232 * zero out any left over data, this is going to
6233 * user land.
6234 */
6235 size = ring_buffer_page_len(ref->page);
6236 if (size < PAGE_SIZE)
6237 memset(ref->page + size, 0, PAGE_SIZE - size);
6238
6239 page = virt_to_page(ref->page);
6240
6241 spd.pages[i] = page;
6242 spd.partial[i].len = PAGE_SIZE;
6243 spd.partial[i].offset = 0;
6244 spd.partial[i].private = (unsigned long)ref;
6245 spd.nr_pages++;
6246 *ppos += PAGE_SIZE;
6247
6248 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6249 }
6250
6251 trace_access_unlock(iter->cpu_file);
6252 spd.nr_pages = i;
6253
6254 /* did we read anything? */
6255 if (!spd.nr_pages) {
6256 if (ret)
6257 return ret;
6258
6259 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6260 return -EAGAIN;
6261
6262 ret = wait_on_pipe(iter, true);
6263 if (ret)
6264 return ret;
6265
6266 goto again;
6267 }
6268
6269 ret = splice_to_pipe(pipe, &spd);
6270 splice_shrink_spd(&spd);
6271
6272 return ret;
6273 }
6274
6275 static const struct file_operations tracing_buffers_fops = {
6276 .open = tracing_buffers_open,
6277 .read = tracing_buffers_read,
6278 .poll = tracing_buffers_poll,
6279 .release = tracing_buffers_release,
6280 .splice_read = tracing_buffers_splice_read,
6281 .llseek = no_llseek,
6282 };
6283
6284 static ssize_t
6285 tracing_stats_read(struct file *filp, char __user *ubuf,
6286 size_t count, loff_t *ppos)
6287 {
6288 struct inode *inode = file_inode(filp);
6289 struct trace_array *tr = inode->i_private;
6290 struct trace_buffer *trace_buf = &tr->trace_buffer;
6291 int cpu = tracing_get_cpu(inode);
6292 struct trace_seq *s;
6293 unsigned long cnt;
6294 unsigned long long t;
6295 unsigned long usec_rem;
6296
6297 s = kmalloc(sizeof(*s), GFP_KERNEL);
6298 if (!s)
6299 return -ENOMEM;
6300
6301 trace_seq_init(s);
6302
6303 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6304 trace_seq_printf(s, "entries: %ld\n", cnt);
6305
6306 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6307 trace_seq_printf(s, "overrun: %ld\n", cnt);
6308
6309 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6310 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6311
6312 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6313 trace_seq_printf(s, "bytes: %ld\n", cnt);
6314
6315 if (trace_clocks[tr->clock_id].in_ns) {
6316 /* local or global for trace_clock */
6317 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6318 usec_rem = do_div(t, USEC_PER_SEC);
6319 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6320 t, usec_rem);
6321
6322 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6323 usec_rem = do_div(t, USEC_PER_SEC);
6324 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6325 } else {
6326 /* counter or tsc mode for trace_clock */
6327 trace_seq_printf(s, "oldest event ts: %llu\n",
6328 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6329
6330 trace_seq_printf(s, "now ts: %llu\n",
6331 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6332 }
6333
6334 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6335 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6336
6337 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6338 trace_seq_printf(s, "read events: %ld\n", cnt);
6339
6340 count = simple_read_from_buffer(ubuf, count, ppos,
6341 s->buffer, trace_seq_used(s));
6342
6343 kfree(s);
6344
6345 return count;
6346 }
6347
6348 static const struct file_operations tracing_stats_fops = {
6349 .open = tracing_open_generic_tr,
6350 .read = tracing_stats_read,
6351 .llseek = generic_file_llseek,
6352 .release = tracing_release_generic_tr,
6353 };
6354
6355 #ifdef CONFIG_DYNAMIC_FTRACE
6356
6357 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6358 {
6359 return 0;
6360 }
6361
6362 static ssize_t
6363 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6364 size_t cnt, loff_t *ppos)
6365 {
6366 static char ftrace_dyn_info_buffer[1024];
6367 static DEFINE_MUTEX(dyn_info_mutex);
6368 unsigned long *p = filp->private_data;
6369 char *buf = ftrace_dyn_info_buffer;
6370 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6371 int r;
6372
6373 mutex_lock(&dyn_info_mutex);
6374 r = sprintf(buf, "%ld ", *p);
6375
6376 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6377 buf[r++] = '\n';
6378
6379 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6380
6381 mutex_unlock(&dyn_info_mutex);
6382
6383 return r;
6384 }
6385
6386 static const struct file_operations tracing_dyn_info_fops = {
6387 .open = tracing_open_generic,
6388 .read = tracing_read_dyn_info,
6389 .llseek = generic_file_llseek,
6390 };
6391 #endif /* CONFIG_DYNAMIC_FTRACE */
6392
6393 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6394 static void
6395 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6396 {
6397 tracing_snapshot();
6398 }
6399
6400 static void
6401 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6402 {
6403 unsigned long *count = (long *)data;
6404
6405 if (!*count)
6406 return;
6407
6408 if (*count != -1)
6409 (*count)--;
6410
6411 tracing_snapshot();
6412 }
6413
6414 static int
6415 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6416 struct ftrace_probe_ops *ops, void *data)
6417 {
6418 long count = (long)data;
6419
6420 seq_printf(m, "%ps:", (void *)ip);
6421
6422 seq_puts(m, "snapshot");
6423
6424 if (count == -1)
6425 seq_puts(m, ":unlimited\n");
6426 else
6427 seq_printf(m, ":count=%ld\n", count);
6428
6429 return 0;
6430 }
6431
6432 static struct ftrace_probe_ops snapshot_probe_ops = {
6433 .func = ftrace_snapshot,
6434 .print = ftrace_snapshot_print,
6435 };
6436
6437 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6438 .func = ftrace_count_snapshot,
6439 .print = ftrace_snapshot_print,
6440 };
6441
6442 static int
6443 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6444 char *glob, char *cmd, char *param, int enable)
6445 {
6446 struct ftrace_probe_ops *ops;
6447 void *count = (void *)-1;
6448 char *number;
6449 int ret;
6450
6451 /* hash funcs only work with set_ftrace_filter */
6452 if (!enable)
6453 return -EINVAL;
6454
6455 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
6456
6457 if (glob[0] == '!') {
6458 unregister_ftrace_function_probe_func(glob+1, ops);
6459 return 0;
6460 }
6461
6462 if (!param)
6463 goto out_reg;
6464
6465 number = strsep(&param, ":");
6466
6467 if (!strlen(number))
6468 goto out_reg;
6469
6470 /*
6471 * We use the callback data field (which is a pointer)
6472 * as our counter.
6473 */
6474 ret = kstrtoul(number, 0, (unsigned long *)&count);
6475 if (ret)
6476 return ret;
6477
6478 out_reg:
6479 ret = register_ftrace_function_probe(glob, ops, count);
6480
6481 if (ret >= 0)
6482 alloc_snapshot(&global_trace);
6483
6484 return ret < 0 ? ret : 0;
6485 }
6486
6487 static struct ftrace_func_command ftrace_snapshot_cmd = {
6488 .name = "snapshot",
6489 .func = ftrace_trace_snapshot_callback,
6490 };
6491
6492 static __init int register_snapshot_cmd(void)
6493 {
6494 return register_ftrace_command(&ftrace_snapshot_cmd);
6495 }
6496 #else
6497 static inline __init int register_snapshot_cmd(void) { return 0; }
6498 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6499
6500 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6501 {
6502 if (WARN_ON(!tr->dir))
6503 return ERR_PTR(-ENODEV);
6504
6505 /* Top directory uses NULL as the parent */
6506 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6507 return NULL;
6508
6509 /* All sub buffers have a descriptor */
6510 return tr->dir;
6511 }
6512
6513 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6514 {
6515 struct dentry *d_tracer;
6516
6517 if (tr->percpu_dir)
6518 return tr->percpu_dir;
6519
6520 d_tracer = tracing_get_dentry(tr);
6521 if (IS_ERR(d_tracer))
6522 return NULL;
6523
6524 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6525
6526 WARN_ONCE(!tr->percpu_dir,
6527 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6528
6529 return tr->percpu_dir;
6530 }
6531
6532 static struct dentry *
6533 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6534 void *data, long cpu, const struct file_operations *fops)
6535 {
6536 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6537
6538 if (ret) /* See tracing_get_cpu() */
6539 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6540 return ret;
6541 }
6542
6543 static void
6544 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6545 {
6546 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6547 struct dentry *d_cpu;
6548 char cpu_dir[30]; /* 30 characters should be more than enough */
6549
6550 if (!d_percpu)
6551 return;
6552
6553 snprintf(cpu_dir, 30, "cpu%ld", cpu);
6554 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6555 if (!d_cpu) {
6556 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6557 return;
6558 }
6559
6560 /* per cpu trace_pipe */
6561 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6562 tr, cpu, &tracing_pipe_fops);
6563
6564 /* per cpu trace */
6565 trace_create_cpu_file("trace", 0644, d_cpu,
6566 tr, cpu, &tracing_fops);
6567
6568 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6569 tr, cpu, &tracing_buffers_fops);
6570
6571 trace_create_cpu_file("stats", 0444, d_cpu,
6572 tr, cpu, &tracing_stats_fops);
6573
6574 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6575 tr, cpu, &tracing_entries_fops);
6576
6577 #ifdef CONFIG_TRACER_SNAPSHOT
6578 trace_create_cpu_file("snapshot", 0644, d_cpu,
6579 tr, cpu, &snapshot_fops);
6580
6581 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6582 tr, cpu, &snapshot_raw_fops);
6583 #endif
6584 }
6585
6586 #ifdef CONFIG_FTRACE_SELFTEST
6587 /* Let selftest have access to static functions in this file */
6588 #include "trace_selftest.c"
6589 #endif
6590
6591 static ssize_t
6592 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6593 loff_t *ppos)
6594 {
6595 struct trace_option_dentry *topt = filp->private_data;
6596 char *buf;
6597
6598 if (topt->flags->val & topt->opt->bit)
6599 buf = "1\n";
6600 else
6601 buf = "0\n";
6602
6603 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6604 }
6605
6606 static ssize_t
6607 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6608 loff_t *ppos)
6609 {
6610 struct trace_option_dentry *topt = filp->private_data;
6611 unsigned long val;
6612 int ret;
6613
6614 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6615 if (ret)
6616 return ret;
6617
6618 if (val != 0 && val != 1)
6619 return -EINVAL;
6620
6621 if (!!(topt->flags->val & topt->opt->bit) != val) {
6622 mutex_lock(&trace_types_lock);
6623 ret = __set_tracer_option(topt->tr, topt->flags,
6624 topt->opt, !val);
6625 mutex_unlock(&trace_types_lock);
6626 if (ret)
6627 return ret;
6628 }
6629
6630 *ppos += cnt;
6631
6632 return cnt;
6633 }
6634
6635
6636 static const struct file_operations trace_options_fops = {
6637 .open = tracing_open_generic,
6638 .read = trace_options_read,
6639 .write = trace_options_write,
6640 .llseek = generic_file_llseek,
6641 };
6642
6643 /*
6644 * In order to pass in both the trace_array descriptor as well as the index
6645 * to the flag that the trace option file represents, the trace_array
6646 * has a character array of trace_flags_index[], which holds the index
6647 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6648 * The address of this character array is passed to the flag option file
6649 * read/write callbacks.
6650 *
6651 * In order to extract both the index and the trace_array descriptor,
6652 * get_tr_index() uses the following algorithm.
6653 *
6654 * idx = *ptr;
6655 *
6656 * As the pointer itself contains the address of the index (remember
6657 * index[1] == 1).
6658 *
6659 * Then to get the trace_array descriptor, by subtracting that index
6660 * from the ptr, we get to the start of the index itself.
6661 *
6662 * ptr - idx == &index[0]
6663 *
6664 * Then a simple container_of() from that pointer gets us to the
6665 * trace_array descriptor.
6666 */
6667 static void get_tr_index(void *data, struct trace_array **ptr,
6668 unsigned int *pindex)
6669 {
6670 *pindex = *(unsigned char *)data;
6671
6672 *ptr = container_of(data - *pindex, struct trace_array,
6673 trace_flags_index);
6674 }
6675
6676 static ssize_t
6677 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6678 loff_t *ppos)
6679 {
6680 void *tr_index = filp->private_data;
6681 struct trace_array *tr;
6682 unsigned int index;
6683 char *buf;
6684
6685 get_tr_index(tr_index, &tr, &index);
6686
6687 if (tr->trace_flags & (1 << index))
6688 buf = "1\n";
6689 else
6690 buf = "0\n";
6691
6692 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6693 }
6694
6695 static ssize_t
6696 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6697 loff_t *ppos)
6698 {
6699 void *tr_index = filp->private_data;
6700 struct trace_array *tr;
6701 unsigned int index;
6702 unsigned long val;
6703 int ret;
6704
6705 get_tr_index(tr_index, &tr, &index);
6706
6707 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6708 if (ret)
6709 return ret;
6710
6711 if (val != 0 && val != 1)
6712 return -EINVAL;
6713
6714 mutex_lock(&trace_types_lock);
6715 ret = set_tracer_flag(tr, 1 << index, val);
6716 mutex_unlock(&trace_types_lock);
6717
6718 if (ret < 0)
6719 return ret;
6720
6721 *ppos += cnt;
6722
6723 return cnt;
6724 }
6725
6726 static const struct file_operations trace_options_core_fops = {
6727 .open = tracing_open_generic,
6728 .read = trace_options_core_read,
6729 .write = trace_options_core_write,
6730 .llseek = generic_file_llseek,
6731 };
6732
6733 struct dentry *trace_create_file(const char *name,
6734 umode_t mode,
6735 struct dentry *parent,
6736 void *data,
6737 const struct file_operations *fops)
6738 {
6739 struct dentry *ret;
6740
6741 ret = tracefs_create_file(name, mode, parent, data, fops);
6742 if (!ret)
6743 pr_warn("Could not create tracefs '%s' entry\n", name);
6744
6745 return ret;
6746 }
6747
6748
6749 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6750 {
6751 struct dentry *d_tracer;
6752
6753 if (tr->options)
6754 return tr->options;
6755
6756 d_tracer = tracing_get_dentry(tr);
6757 if (IS_ERR(d_tracer))
6758 return NULL;
6759
6760 tr->options = tracefs_create_dir("options", d_tracer);
6761 if (!tr->options) {
6762 pr_warn("Could not create tracefs directory 'options'\n");
6763 return NULL;
6764 }
6765
6766 return tr->options;
6767 }
6768
6769 static void
6770 create_trace_option_file(struct trace_array *tr,
6771 struct trace_option_dentry *topt,
6772 struct tracer_flags *flags,
6773 struct tracer_opt *opt)
6774 {
6775 struct dentry *t_options;
6776
6777 t_options = trace_options_init_dentry(tr);
6778 if (!t_options)
6779 return;
6780
6781 topt->flags = flags;
6782 topt->opt = opt;
6783 topt->tr = tr;
6784
6785 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6786 &trace_options_fops);
6787
6788 }
6789
6790 static void
6791 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6792 {
6793 struct trace_option_dentry *topts;
6794 struct trace_options *tr_topts;
6795 struct tracer_flags *flags;
6796 struct tracer_opt *opts;
6797 int cnt;
6798 int i;
6799
6800 if (!tracer)
6801 return;
6802
6803 flags = tracer->flags;
6804
6805 if (!flags || !flags->opts)
6806 return;
6807
6808 /*
6809 * If this is an instance, only create flags for tracers
6810 * the instance may have.
6811 */
6812 if (!trace_ok_for_array(tracer, tr))
6813 return;
6814
6815 for (i = 0; i < tr->nr_topts; i++) {
6816 /* Make sure there's no duplicate flags. */
6817 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6818 return;
6819 }
6820
6821 opts = flags->opts;
6822
6823 for (cnt = 0; opts[cnt].name; cnt++)
6824 ;
6825
6826 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6827 if (!topts)
6828 return;
6829
6830 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6831 GFP_KERNEL);
6832 if (!tr_topts) {
6833 kfree(topts);
6834 return;
6835 }
6836
6837 tr->topts = tr_topts;
6838 tr->topts[tr->nr_topts].tracer = tracer;
6839 tr->topts[tr->nr_topts].topts = topts;
6840 tr->nr_topts++;
6841
6842 for (cnt = 0; opts[cnt].name; cnt++) {
6843 create_trace_option_file(tr, &topts[cnt], flags,
6844 &opts[cnt]);
6845 WARN_ONCE(topts[cnt].entry == NULL,
6846 "Failed to create trace option: %s",
6847 opts[cnt].name);
6848 }
6849 }
6850
6851 static struct dentry *
6852 create_trace_option_core_file(struct trace_array *tr,
6853 const char *option, long index)
6854 {
6855 struct dentry *t_options;
6856
6857 t_options = trace_options_init_dentry(tr);
6858 if (!t_options)
6859 return NULL;
6860
6861 return trace_create_file(option, 0644, t_options,
6862 (void *)&tr->trace_flags_index[index],
6863 &trace_options_core_fops);
6864 }
6865
6866 static void create_trace_options_dir(struct trace_array *tr)
6867 {
6868 struct dentry *t_options;
6869 bool top_level = tr == &global_trace;
6870 int i;
6871
6872 t_options = trace_options_init_dentry(tr);
6873 if (!t_options)
6874 return;
6875
6876 for (i = 0; trace_options[i]; i++) {
6877 if (top_level ||
6878 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6879 create_trace_option_core_file(tr, trace_options[i], i);
6880 }
6881 }
6882
6883 static ssize_t
6884 rb_simple_read(struct file *filp, char __user *ubuf,
6885 size_t cnt, loff_t *ppos)
6886 {
6887 struct trace_array *tr = filp->private_data;
6888 char buf[64];
6889 int r;
6890
6891 r = tracer_tracing_is_on(tr);
6892 r = sprintf(buf, "%d\n", r);
6893
6894 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6895 }
6896
6897 static ssize_t
6898 rb_simple_write(struct file *filp, const char __user *ubuf,
6899 size_t cnt, loff_t *ppos)
6900 {
6901 struct trace_array *tr = filp->private_data;
6902 struct ring_buffer *buffer = tr->trace_buffer.buffer;
6903 unsigned long val;
6904 int ret;
6905
6906 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6907 if (ret)
6908 return ret;
6909
6910 if (buffer) {
6911 mutex_lock(&trace_types_lock);
6912 if (val) {
6913 tracer_tracing_on(tr);
6914 if (tr->current_trace->start)
6915 tr->current_trace->start(tr);
6916 } else {
6917 tracer_tracing_off(tr);
6918 if (tr->current_trace->stop)
6919 tr->current_trace->stop(tr);
6920 }
6921 mutex_unlock(&trace_types_lock);
6922 }
6923
6924 (*ppos)++;
6925
6926 return cnt;
6927 }
6928
6929 static const struct file_operations rb_simple_fops = {
6930 .open = tracing_open_generic_tr,
6931 .read = rb_simple_read,
6932 .write = rb_simple_write,
6933 .release = tracing_release_generic_tr,
6934 .llseek = default_llseek,
6935 };
6936
6937 struct dentry *trace_instance_dir;
6938
6939 static void
6940 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6941
6942 static int
6943 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6944 {
6945 enum ring_buffer_flags rb_flags;
6946
6947 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6948
6949 buf->tr = tr;
6950
6951 buf->buffer = ring_buffer_alloc(size, rb_flags);
6952 if (!buf->buffer)
6953 return -ENOMEM;
6954
6955 buf->data = alloc_percpu(struct trace_array_cpu);
6956 if (!buf->data) {
6957 ring_buffer_free(buf->buffer);
6958 return -ENOMEM;
6959 }
6960
6961 /* Allocate the first page for all buffers */
6962 set_buffer_entries(&tr->trace_buffer,
6963 ring_buffer_size(tr->trace_buffer.buffer, 0));
6964
6965 return 0;
6966 }
6967
6968 static int allocate_trace_buffers(struct trace_array *tr, int size)
6969 {
6970 int ret;
6971
6972 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6973 if (ret)
6974 return ret;
6975
6976 #ifdef CONFIG_TRACER_MAX_TRACE
6977 ret = allocate_trace_buffer(tr, &tr->max_buffer,
6978 allocate_snapshot ? size : 1);
6979 if (WARN_ON(ret)) {
6980 ring_buffer_free(tr->trace_buffer.buffer);
6981 free_percpu(tr->trace_buffer.data);
6982 return -ENOMEM;
6983 }
6984 tr->allocated_snapshot = allocate_snapshot;
6985
6986 /*
6987 * Only the top level trace array gets its snapshot allocated
6988 * from the kernel command line.
6989 */
6990 allocate_snapshot = false;
6991 #endif
6992 return 0;
6993 }
6994
6995 static void free_trace_buffer(struct trace_buffer *buf)
6996 {
6997 if (buf->buffer) {
6998 ring_buffer_free(buf->buffer);
6999 buf->buffer = NULL;
7000 free_percpu(buf->data);
7001 buf->data = NULL;
7002 }
7003 }
7004
7005 static void free_trace_buffers(struct trace_array *tr)
7006 {
7007 if (!tr)
7008 return;
7009
7010 free_trace_buffer(&tr->trace_buffer);
7011
7012 #ifdef CONFIG_TRACER_MAX_TRACE
7013 free_trace_buffer(&tr->max_buffer);
7014 #endif
7015 }
7016
7017 static void init_trace_flags_index(struct trace_array *tr)
7018 {
7019 int i;
7020
7021 /* Used by the trace options files */
7022 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7023 tr->trace_flags_index[i] = i;
7024 }
7025
7026 static void __update_tracer_options(struct trace_array *tr)
7027 {
7028 struct tracer *t;
7029
7030 for (t = trace_types; t; t = t->next)
7031 add_tracer_options(tr, t);
7032 }
7033
7034 static void update_tracer_options(struct trace_array *tr)
7035 {
7036 mutex_lock(&trace_types_lock);
7037 __update_tracer_options(tr);
7038 mutex_unlock(&trace_types_lock);
7039 }
7040
7041 static int instance_mkdir(const char *name)
7042 {
7043 struct trace_array *tr;
7044 int ret;
7045
7046 mutex_lock(&trace_types_lock);
7047
7048 ret = -EEXIST;
7049 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7050 if (tr->name && strcmp(tr->name, name) == 0)
7051 goto out_unlock;
7052 }
7053
7054 ret = -ENOMEM;
7055 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7056 if (!tr)
7057 goto out_unlock;
7058
7059 tr->name = kstrdup(name, GFP_KERNEL);
7060 if (!tr->name)
7061 goto out_free_tr;
7062
7063 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7064 goto out_free_tr;
7065
7066 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7067
7068 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7069
7070 raw_spin_lock_init(&tr->start_lock);
7071
7072 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7073
7074 tr->current_trace = &nop_trace;
7075
7076 INIT_LIST_HEAD(&tr->systems);
7077 INIT_LIST_HEAD(&tr->events);
7078
7079 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7080 goto out_free_tr;
7081
7082 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7083 if (!tr->dir)
7084 goto out_free_tr;
7085
7086 ret = event_trace_add_tracer(tr->dir, tr);
7087 if (ret) {
7088 tracefs_remove_recursive(tr->dir);
7089 goto out_free_tr;
7090 }
7091
7092 init_tracer_tracefs(tr, tr->dir);
7093 init_trace_flags_index(tr);
7094 __update_tracer_options(tr);
7095
7096 list_add(&tr->list, &ftrace_trace_arrays);
7097
7098 mutex_unlock(&trace_types_lock);
7099
7100 return 0;
7101
7102 out_free_tr:
7103 free_trace_buffers(tr);
7104 free_cpumask_var(tr->tracing_cpumask);
7105 kfree(tr->name);
7106 kfree(tr);
7107
7108 out_unlock:
7109 mutex_unlock(&trace_types_lock);
7110
7111 return ret;
7112
7113 }
7114
7115 static int instance_rmdir(const char *name)
7116 {
7117 struct trace_array *tr;
7118 int found = 0;
7119 int ret;
7120 int i;
7121
7122 mutex_lock(&trace_types_lock);
7123
7124 ret = -ENODEV;
7125 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7126 if (tr->name && strcmp(tr->name, name) == 0) {
7127 found = 1;
7128 break;
7129 }
7130 }
7131 if (!found)
7132 goto out_unlock;
7133
7134 ret = -EBUSY;
7135 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7136 goto out_unlock;
7137
7138 list_del(&tr->list);
7139
7140 /* Disable all the flags that were enabled coming in */
7141 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7142 if ((1 << i) & ZEROED_TRACE_FLAGS)
7143 set_tracer_flag(tr, 1 << i, 0);
7144 }
7145
7146 tracing_set_nop(tr);
7147 event_trace_del_tracer(tr);
7148 ftrace_destroy_function_files(tr);
7149 tracefs_remove_recursive(tr->dir);
7150 free_trace_buffers(tr);
7151
7152 for (i = 0; i < tr->nr_topts; i++) {
7153 kfree(tr->topts[i].topts);
7154 }
7155 kfree(tr->topts);
7156
7157 kfree(tr->name);
7158 kfree(tr);
7159
7160 ret = 0;
7161
7162 out_unlock:
7163 mutex_unlock(&trace_types_lock);
7164
7165 return ret;
7166 }
7167
7168 static __init void create_trace_instances(struct dentry *d_tracer)
7169 {
7170 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7171 instance_mkdir,
7172 instance_rmdir);
7173 if (WARN_ON(!trace_instance_dir))
7174 return;
7175 }
7176
7177 static void
7178 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7179 {
7180 int cpu;
7181
7182 trace_create_file("available_tracers", 0444, d_tracer,
7183 tr, &show_traces_fops);
7184
7185 trace_create_file("current_tracer", 0644, d_tracer,
7186 tr, &set_tracer_fops);
7187
7188 trace_create_file("tracing_cpumask", 0644, d_tracer,
7189 tr, &tracing_cpumask_fops);
7190
7191 trace_create_file("trace_options", 0644, d_tracer,
7192 tr, &tracing_iter_fops);
7193
7194 trace_create_file("trace", 0644, d_tracer,
7195 tr, &tracing_fops);
7196
7197 trace_create_file("trace_pipe", 0444, d_tracer,
7198 tr, &tracing_pipe_fops);
7199
7200 trace_create_file("buffer_size_kb", 0644, d_tracer,
7201 tr, &tracing_entries_fops);
7202
7203 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7204 tr, &tracing_total_entries_fops);
7205
7206 trace_create_file("free_buffer", 0200, d_tracer,
7207 tr, &tracing_free_buffer_fops);
7208
7209 trace_create_file("trace_marker", 0220, d_tracer,
7210 tr, &tracing_mark_fops);
7211
7212 trace_create_file("trace_clock", 0644, d_tracer, tr,
7213 &trace_clock_fops);
7214
7215 trace_create_file("tracing_on", 0644, d_tracer,
7216 tr, &rb_simple_fops);
7217
7218 create_trace_options_dir(tr);
7219
7220 #ifdef CONFIG_TRACER_MAX_TRACE
7221 trace_create_file("tracing_max_latency", 0644, d_tracer,
7222 &tr->max_latency, &tracing_max_lat_fops);
7223 #endif
7224
7225 if (ftrace_create_function_files(tr, d_tracer))
7226 WARN(1, "Could not allocate function filter files");
7227
7228 #ifdef CONFIG_TRACER_SNAPSHOT
7229 trace_create_file("snapshot", 0644, d_tracer,
7230 tr, &snapshot_fops);
7231 #endif
7232
7233 for_each_tracing_cpu(cpu)
7234 tracing_init_tracefs_percpu(tr, cpu);
7235
7236 ftrace_init_tracefs(tr, d_tracer);
7237 }
7238
7239 static struct vfsmount *trace_automount(void *ingore)
7240 {
7241 struct vfsmount *mnt;
7242 struct file_system_type *type;
7243
7244 /*
7245 * To maintain backward compatibility for tools that mount
7246 * debugfs to get to the tracing facility, tracefs is automatically
7247 * mounted to the debugfs/tracing directory.
7248 */
7249 type = get_fs_type("tracefs");
7250 if (!type)
7251 return NULL;
7252 mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7253 put_filesystem(type);
7254 if (IS_ERR(mnt))
7255 return NULL;
7256 mntget(mnt);
7257
7258 return mnt;
7259 }
7260
7261 /**
7262 * tracing_init_dentry - initialize top level trace array
7263 *
7264 * This is called when creating files or directories in the tracing
7265 * directory. It is called via fs_initcall() by any of the boot up code
7266 * and expects to return the dentry of the top level tracing directory.
7267 */
7268 struct dentry *tracing_init_dentry(void)
7269 {
7270 struct trace_array *tr = &global_trace;
7271
7272 /* The top level trace array uses NULL as parent */
7273 if (tr->dir)
7274 return NULL;
7275
7276 if (WARN_ON(!tracefs_initialized()) ||
7277 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7278 WARN_ON(!debugfs_initialized())))
7279 return ERR_PTR(-ENODEV);
7280
7281 /*
7282 * As there may still be users that expect the tracing
7283 * files to exist in debugfs/tracing, we must automount
7284 * the tracefs file system there, so older tools still
7285 * work with the newer kerenl.
7286 */
7287 tr->dir = debugfs_create_automount("tracing", NULL,
7288 trace_automount, NULL);
7289 if (!tr->dir) {
7290 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7291 return ERR_PTR(-ENOMEM);
7292 }
7293
7294 return NULL;
7295 }
7296
7297 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7298 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7299
7300 static void __init trace_enum_init(void)
7301 {
7302 int len;
7303
7304 len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7305 trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7306 }
7307
7308 #ifdef CONFIG_MODULES
7309 static void trace_module_add_enums(struct module *mod)
7310 {
7311 if (!mod->num_trace_enums)
7312 return;
7313
7314 /*
7315 * Modules with bad taint do not have events created, do
7316 * not bother with enums either.
7317 */
7318 if (trace_module_has_bad_taint(mod))
7319 return;
7320
7321 trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7322 }
7323
7324 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7325 static void trace_module_remove_enums(struct module *mod)
7326 {
7327 union trace_enum_map_item *map;
7328 union trace_enum_map_item **last = &trace_enum_maps;
7329
7330 if (!mod->num_trace_enums)
7331 return;
7332
7333 mutex_lock(&trace_enum_mutex);
7334
7335 map = trace_enum_maps;
7336
7337 while (map) {
7338 if (map->head.mod == mod)
7339 break;
7340 map = trace_enum_jmp_to_tail(map);
7341 last = &map->tail.next;
7342 map = map->tail.next;
7343 }
7344 if (!map)
7345 goto out;
7346
7347 *last = trace_enum_jmp_to_tail(map)->tail.next;
7348 kfree(map);
7349 out:
7350 mutex_unlock(&trace_enum_mutex);
7351 }
7352 #else
7353 static inline void trace_module_remove_enums(struct module *mod) { }
7354 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7355
7356 static int trace_module_notify(struct notifier_block *self,
7357 unsigned long val, void *data)
7358 {
7359 struct module *mod = data;
7360
7361 switch (val) {
7362 case MODULE_STATE_COMING:
7363 trace_module_add_enums(mod);
7364 break;
7365 case MODULE_STATE_GOING:
7366 trace_module_remove_enums(mod);
7367 break;
7368 }
7369
7370 return 0;
7371 }
7372
7373 static struct notifier_block trace_module_nb = {
7374 .notifier_call = trace_module_notify,
7375 .priority = 0,
7376 };
7377 #endif /* CONFIG_MODULES */
7378
7379 static __init int tracer_init_tracefs(void)
7380 {
7381 struct dentry *d_tracer;
7382
7383 trace_access_lock_init();
7384
7385 d_tracer = tracing_init_dentry();
7386 if (IS_ERR(d_tracer))
7387 return 0;
7388
7389 init_tracer_tracefs(&global_trace, d_tracer);
7390
7391 trace_create_file("tracing_thresh", 0644, d_tracer,
7392 &global_trace, &tracing_thresh_fops);
7393
7394 trace_create_file("README", 0444, d_tracer,
7395 NULL, &tracing_readme_fops);
7396
7397 trace_create_file("saved_cmdlines", 0444, d_tracer,
7398 NULL, &tracing_saved_cmdlines_fops);
7399
7400 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7401 NULL, &tracing_saved_cmdlines_size_fops);
7402
7403 trace_enum_init();
7404
7405 trace_create_enum_file(d_tracer);
7406
7407 #ifdef CONFIG_MODULES
7408 register_module_notifier(&trace_module_nb);
7409 #endif
7410
7411 #ifdef CONFIG_DYNAMIC_FTRACE
7412 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7413 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7414 #endif
7415
7416 create_trace_instances(d_tracer);
7417
7418 update_tracer_options(&global_trace);
7419
7420 return 0;
7421 }
7422
7423 static int trace_panic_handler(struct notifier_block *this,
7424 unsigned long event, void *unused)
7425 {
7426 if (ftrace_dump_on_oops)
7427 ftrace_dump(ftrace_dump_on_oops);
7428 return NOTIFY_OK;
7429 }
7430
7431 static struct notifier_block trace_panic_notifier = {
7432 .notifier_call = trace_panic_handler,
7433 .next = NULL,
7434 .priority = 150 /* priority: INT_MAX >= x >= 0 */
7435 };
7436
7437 static int trace_die_handler(struct notifier_block *self,
7438 unsigned long val,
7439 void *data)
7440 {
7441 switch (val) {
7442 case DIE_OOPS:
7443 if (ftrace_dump_on_oops)
7444 ftrace_dump(ftrace_dump_on_oops);
7445 break;
7446 default:
7447 break;
7448 }
7449 return NOTIFY_OK;
7450 }
7451
7452 static struct notifier_block trace_die_notifier = {
7453 .notifier_call = trace_die_handler,
7454 .priority = 200
7455 };
7456
7457 /*
7458 * printk is set to max of 1024, we really don't need it that big.
7459 * Nothing should be printing 1000 characters anyway.
7460 */
7461 #define TRACE_MAX_PRINT 1000
7462
7463 /*
7464 * Define here KERN_TRACE so that we have one place to modify
7465 * it if we decide to change what log level the ftrace dump
7466 * should be at.
7467 */
7468 #define KERN_TRACE KERN_EMERG
7469
7470 void
7471 trace_printk_seq(struct trace_seq *s)
7472 {
7473 /* Probably should print a warning here. */
7474 if (s->seq.len >= TRACE_MAX_PRINT)
7475 s->seq.len = TRACE_MAX_PRINT;
7476
7477 /*
7478 * More paranoid code. Although the buffer size is set to
7479 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7480 * an extra layer of protection.
7481 */
7482 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7483 s->seq.len = s->seq.size - 1;
7484
7485 /* should be zero ended, but we are paranoid. */
7486 s->buffer[s->seq.len] = 0;
7487
7488 printk(KERN_TRACE "%s", s->buffer);
7489
7490 trace_seq_init(s);
7491 }
7492
7493 void trace_init_global_iter(struct trace_iterator *iter)
7494 {
7495 iter->tr = &global_trace;
7496 iter->trace = iter->tr->current_trace;
7497 iter->cpu_file = RING_BUFFER_ALL_CPUS;
7498 iter->trace_buffer = &global_trace.trace_buffer;
7499
7500 if (iter->trace && iter->trace->open)
7501 iter->trace->open(iter);
7502
7503 /* Annotate start of buffers if we had overruns */
7504 if (ring_buffer_overruns(iter->trace_buffer->buffer))
7505 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7506
7507 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7508 if (trace_clocks[iter->tr->clock_id].in_ns)
7509 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7510 }
7511
7512 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7513 {
7514 /* use static because iter can be a bit big for the stack */
7515 static struct trace_iterator iter;
7516 static atomic_t dump_running;
7517 struct trace_array *tr = &global_trace;
7518 unsigned int old_userobj;
7519 unsigned long flags;
7520 int cnt = 0, cpu;
7521
7522 /* Only allow one dump user at a time. */
7523 if (atomic_inc_return(&dump_running) != 1) {
7524 atomic_dec(&dump_running);
7525 return;
7526 }
7527
7528 /*
7529 * Always turn off tracing when we dump.
7530 * We don't need to show trace output of what happens
7531 * between multiple crashes.
7532 *
7533 * If the user does a sysrq-z, then they can re-enable
7534 * tracing with echo 1 > tracing_on.
7535 */
7536 tracing_off();
7537
7538 local_irq_save(flags);
7539
7540 /* Simulate the iterator */
7541 trace_init_global_iter(&iter);
7542
7543 for_each_tracing_cpu(cpu) {
7544 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7545 }
7546
7547 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7548
7549 /* don't look at user memory in panic mode */
7550 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7551
7552 switch (oops_dump_mode) {
7553 case DUMP_ALL:
7554 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7555 break;
7556 case DUMP_ORIG:
7557 iter.cpu_file = raw_smp_processor_id();
7558 break;
7559 case DUMP_NONE:
7560 goto out_enable;
7561 default:
7562 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7563 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7564 }
7565
7566 printk(KERN_TRACE "Dumping ftrace buffer:\n");
7567
7568 /* Did function tracer already get disabled? */
7569 if (ftrace_is_dead()) {
7570 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7571 printk("# MAY BE MISSING FUNCTION EVENTS\n");
7572 }
7573
7574 /*
7575 * We need to stop all tracing on all CPUS to read the
7576 * the next buffer. This is a bit expensive, but is
7577 * not done often. We fill all what we can read,
7578 * and then release the locks again.
7579 */
7580
7581 while (!trace_empty(&iter)) {
7582
7583 if (!cnt)
7584 printk(KERN_TRACE "---------------------------------\n");
7585
7586 cnt++;
7587
7588 /* reset all but tr, trace, and overruns */
7589 memset(&iter.seq, 0,
7590 sizeof(struct trace_iterator) -
7591 offsetof(struct trace_iterator, seq));
7592 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7593 iter.pos = -1;
7594
7595 if (trace_find_next_entry_inc(&iter) != NULL) {
7596 int ret;
7597
7598 ret = print_trace_line(&iter);
7599 if (ret != TRACE_TYPE_NO_CONSUME)
7600 trace_consume(&iter);
7601 }
7602 touch_nmi_watchdog();
7603
7604 trace_printk_seq(&iter.seq);
7605 }
7606
7607 if (!cnt)
7608 printk(KERN_TRACE " (ftrace buffer empty)\n");
7609 else
7610 printk(KERN_TRACE "---------------------------------\n");
7611
7612 out_enable:
7613 tr->trace_flags |= old_userobj;
7614
7615 for_each_tracing_cpu(cpu) {
7616 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7617 }
7618 atomic_dec(&dump_running);
7619 local_irq_restore(flags);
7620 }
7621 EXPORT_SYMBOL_GPL(ftrace_dump);
7622
7623 __init static int tracer_alloc_buffers(void)
7624 {
7625 int ring_buf_size;
7626 int ret = -ENOMEM;
7627
7628 /*
7629 * Make sure we don't accidently add more trace options
7630 * than we have bits for.
7631 */
7632 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7633
7634 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7635 goto out;
7636
7637 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7638 goto out_free_buffer_mask;
7639
7640 /* Only allocate trace_printk buffers if a trace_printk exists */
7641 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7642 /* Must be called before global_trace.buffer is allocated */
7643 trace_printk_init_buffers();
7644
7645 /* To save memory, keep the ring buffer size to its minimum */
7646 if (ring_buffer_expanded)
7647 ring_buf_size = trace_buf_size;
7648 else
7649 ring_buf_size = 1;
7650
7651 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7652 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7653
7654 raw_spin_lock_init(&global_trace.start_lock);
7655
7656 /* Used for event triggers */
7657 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7658 if (!temp_buffer)
7659 goto out_free_cpumask;
7660
7661 if (trace_create_savedcmd() < 0)
7662 goto out_free_temp_buffer;
7663
7664 /* TODO: make the number of buffers hot pluggable with CPUS */
7665 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7666 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7667 WARN_ON(1);
7668 goto out_free_savedcmd;
7669 }
7670
7671 if (global_trace.buffer_disabled)
7672 tracing_off();
7673
7674 if (trace_boot_clock) {
7675 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7676 if (ret < 0)
7677 pr_warn("Trace clock %s not defined, going back to default\n",
7678 trace_boot_clock);
7679 }
7680
7681 /*
7682 * register_tracer() might reference current_trace, so it
7683 * needs to be set before we register anything. This is
7684 * just a bootstrap of current_trace anyway.
7685 */
7686 global_trace.current_trace = &nop_trace;
7687
7688 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7689
7690 ftrace_init_global_array_ops(&global_trace);
7691
7692 init_trace_flags_index(&global_trace);
7693
7694 register_tracer(&nop_trace);
7695
7696 /* All seems OK, enable tracing */
7697 tracing_disabled = 0;
7698
7699 atomic_notifier_chain_register(&panic_notifier_list,
7700 &trace_panic_notifier);
7701
7702 register_die_notifier(&trace_die_notifier);
7703
7704 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7705
7706 INIT_LIST_HEAD(&global_trace.systems);
7707 INIT_LIST_HEAD(&global_trace.events);
7708 list_add(&global_trace.list, &ftrace_trace_arrays);
7709
7710 apply_trace_boot_options();
7711
7712 register_snapshot_cmd();
7713
7714 return 0;
7715
7716 out_free_savedcmd:
7717 free_saved_cmdlines_buffer(savedcmd);
7718 out_free_temp_buffer:
7719 ring_buffer_free(temp_buffer);
7720 out_free_cpumask:
7721 free_cpumask_var(global_trace.tracing_cpumask);
7722 out_free_buffer_mask:
7723 free_cpumask_var(tracing_buffer_mask);
7724 out:
7725 return ret;
7726 }
7727
7728 void __init trace_init(void)
7729 {
7730 if (tracepoint_printk) {
7731 tracepoint_print_iter =
7732 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7733 if (WARN_ON(!tracepoint_print_iter))
7734 tracepoint_printk = 0;
7735 }
7736 tracer_alloc_buffers();
7737 trace_event_init();
7738 }
7739
7740 __init static int clear_boot_tracer(void)
7741 {
7742 /*
7743 * The default tracer at boot buffer is an init section.
7744 * This function is called in lateinit. If we did not
7745 * find the boot tracer, then clear it out, to prevent
7746 * later registration from accessing the buffer that is
7747 * about to be freed.
7748 */
7749 if (!default_bootup_tracer)
7750 return 0;
7751
7752 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7753 default_bootup_tracer);
7754 default_bootup_tracer = NULL;
7755
7756 return 0;
7757 }
7758
7759 fs_initcall(tracer_init_tracefs);
7760 late_initcall(clear_boot_tracer);
This page took 0.234662 seconds and 4 git commands to generate.