2 #define TRACE_SYSTEM sched
4 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
7 #include <linux/sched.h>
8 #include <linux/tracepoint.h>
9 #include <linux/binfmts.h>
11 #define SCHEDULING_POLICY \
12 EM( SCHED_NORMAL, "SCHED_NORMAL") \
13 EM( SCHED_FIFO, "SCHED_FIFO") \
14 EM( SCHED_RR, "SCHED_RR") \
15 EM( SCHED_BATCH, "SCHED_BATCH") \
16 EM( SCHED_IDLE, "SCHED_IDLE") \
17 EMe(SCHED_DEADLINE, "SCHED_DEADLINE")
20 * First define the enums in the above macros to be exported to userspace
21 * via TRACE_DEFINE_ENUM().
25 #define EM(a, b) TRACE_DEFINE_ENUM(a);
26 #define EMe(a, b) TRACE_DEFINE_ENUM(a);
31 * Now redefine the EM() and EMe() macros to map the enums to the strings
32 * that will be printed in the output.
36 #define EM(a, b) {a, b},
37 #define EMe(a, b) {a, b}
40 * Tracepoint for calling kthread_stop, performed to end a kthread:
42 TRACE_EVENT(sched_kthread_stop
,
44 TP_PROTO(struct task_struct
*t
),
49 __array( char, comm
, TASK_COMM_LEN
)
54 memcpy(__entry
->comm
, t
->comm
, TASK_COMM_LEN
);
55 __entry
->pid
= t
->pid
;
58 TP_printk("comm=%s pid=%d", __entry
->comm
, __entry
->pid
)
62 * Tracepoint for the return value of the kthread stopping:
64 TRACE_EVENT(sched_kthread_stop_ret
,
78 TP_printk("ret=%d", __entry
->ret
)
82 * Tracepoint for waking up a task:
84 DECLARE_EVENT_CLASS(sched_wakeup_template
,
86 TP_PROTO(struct task_struct
*p
),
88 TP_ARGS(__perf_task(p
)),
91 __array( char, comm
, TASK_COMM_LEN
)
94 __field( int, success
)
95 __field( int, target_cpu
)
99 memcpy(__entry
->comm
, p
->comm
, TASK_COMM_LEN
);
100 __entry
->pid
= p
->pid
;
101 __entry
->prio
= p
->prio
;
102 __entry
->success
= 1; /* rudiment, kill when possible */
103 __entry
->target_cpu
= task_cpu(p
);
106 TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
107 __entry
->comm
, __entry
->pid
, __entry
->prio
,
112 * Tracepoint called when waking a task; this tracepoint is guaranteed to be
113 * called from the waking context.
115 DEFINE_EVENT(sched_wakeup_template
, sched_waking
,
116 TP_PROTO(struct task_struct
*p
),
120 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
121 * It it not always called from the waking context.
123 DEFINE_EVENT(sched_wakeup_template
, sched_wakeup
,
124 TP_PROTO(struct task_struct
*p
),
128 * Tracepoint for waking up a new task:
130 DEFINE_EVENT(sched_wakeup_template
, sched_wakeup_new
,
131 TP_PROTO(struct task_struct
*p
),
134 #ifdef CREATE_TRACE_POINTS
135 static inline long __trace_sched_switch_state(bool preempt
, struct task_struct
*p
)
137 #ifdef CONFIG_SCHED_DEBUG
138 BUG_ON(p
!= current
);
139 #endif /* CONFIG_SCHED_DEBUG */
142 * Preemption ignores task state, therefore preempted tasks are always
143 * RUNNING (we will not have dequeued if state != RUNNING).
145 return preempt
? TASK_RUNNING
| TASK_STATE_MAX
: p
->state
;
147 #endif /* CREATE_TRACE_POINTS */
150 * Tracepoint for task switches, performed by the scheduler:
152 TRACE_EVENT(sched_switch
,
154 TP_PROTO(bool preempt
,
155 struct task_struct
*prev
,
156 struct task_struct
*next
),
158 TP_ARGS(preempt
, prev
, next
),
161 __array( char, prev_comm
, TASK_COMM_LEN
)
162 __field( pid_t
, prev_pid
)
163 __field( int, prev_prio
)
164 __field( long, prev_state
)
165 __array( char, next_comm
, TASK_COMM_LEN
)
166 __field( pid_t
, next_pid
)
167 __field( int, next_prio
)
171 memcpy(__entry
->next_comm
, next
->comm
, TASK_COMM_LEN
);
172 __entry
->prev_pid
= prev
->pid
;
173 __entry
->prev_prio
= prev
->prio
;
174 __entry
->prev_state
= __trace_sched_switch_state(preempt
, prev
);
175 memcpy(__entry
->prev_comm
, prev
->comm
, TASK_COMM_LEN
);
176 __entry
->next_pid
= next
->pid
;
177 __entry
->next_prio
= next
->prio
;
180 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
181 __entry
->prev_comm
, __entry
->prev_pid
, __entry
->prev_prio
,
182 __entry
->prev_state
& (TASK_STATE_MAX
-1) ?
183 __print_flags(__entry
->prev_state
& (TASK_STATE_MAX
-1), "|",
184 { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
185 { 16, "Z" }, { 32, "X" }, { 64, "x" },
186 { 128, "K" }, { 256, "W" }, { 512, "P" },
187 { 1024, "N" }) : "R",
188 __entry
->prev_state
& TASK_STATE_MAX
? "+" : "",
189 __entry
->next_comm
, __entry
->next_pid
, __entry
->next_prio
)
193 * Tracepoint for a task being migrated:
195 TRACE_EVENT(sched_migrate_task
,
197 TP_PROTO(struct task_struct
*p
, int dest_cpu
),
199 TP_ARGS(p
, dest_cpu
),
202 __array( char, comm
, TASK_COMM_LEN
)
203 __field( pid_t
, pid
)
205 __field( int, orig_cpu
)
206 __field( int, dest_cpu
)
210 memcpy(__entry
->comm
, p
->comm
, TASK_COMM_LEN
);
211 __entry
->pid
= p
->pid
;
212 __entry
->prio
= p
->prio
;
213 __entry
->orig_cpu
= task_cpu(p
);
214 __entry
->dest_cpu
= dest_cpu
;
217 TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
218 __entry
->comm
, __entry
->pid
, __entry
->prio
,
219 __entry
->orig_cpu
, __entry
->dest_cpu
)
222 DECLARE_EVENT_CLASS(sched_process_template
,
224 TP_PROTO(struct task_struct
*p
),
229 __array( char, comm
, TASK_COMM_LEN
)
230 __field( pid_t
, pid
)
235 memcpy(__entry
->comm
, p
->comm
, TASK_COMM_LEN
);
236 __entry
->pid
= p
->pid
;
237 __entry
->prio
= p
->prio
;
240 TP_printk("comm=%s pid=%d prio=%d",
241 __entry
->comm
, __entry
->pid
, __entry
->prio
)
245 * Tracepoint for freeing a task:
247 DEFINE_EVENT(sched_process_template
, sched_process_free
,
248 TP_PROTO(struct task_struct
*p
),
253 * Tracepoint for a task exiting:
255 DEFINE_EVENT(sched_process_template
, sched_process_exit
,
256 TP_PROTO(struct task_struct
*p
),
260 * Tracepoint for waiting on task to unschedule:
262 DEFINE_EVENT(sched_process_template
, sched_wait_task
,
263 TP_PROTO(struct task_struct
*p
),
267 * Tracepoint for a waiting task:
269 TRACE_EVENT(sched_process_wait
,
271 TP_PROTO(struct pid
*pid
),
276 __array( char, comm
, TASK_COMM_LEN
)
277 __field( pid_t
, pid
)
282 memcpy(__entry
->comm
, current
->comm
, TASK_COMM_LEN
);
283 __entry
->pid
= pid_nr(pid
);
284 __entry
->prio
= current
->prio
;
287 TP_printk("comm=%s pid=%d prio=%d",
288 __entry
->comm
, __entry
->pid
, __entry
->prio
)
292 * Tracepoint for do_fork:
294 TRACE_EVENT(sched_process_fork
,
296 TP_PROTO(struct task_struct
*parent
, struct task_struct
*child
),
298 TP_ARGS(parent
, child
),
301 __array( char, parent_comm
, TASK_COMM_LEN
)
302 __field( pid_t
, parent_pid
)
303 __array( char, child_comm
, TASK_COMM_LEN
)
304 __field( pid_t
, child_pid
)
308 memcpy(__entry
->parent_comm
, parent
->comm
, TASK_COMM_LEN
);
309 __entry
->parent_pid
= parent
->pid
;
310 memcpy(__entry
->child_comm
, child
->comm
, TASK_COMM_LEN
);
311 __entry
->child_pid
= child
->pid
;
314 TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
315 __entry
->parent_comm
, __entry
->parent_pid
,
316 __entry
->child_comm
, __entry
->child_pid
)
320 * Tracepoint for exec:
322 TRACE_EVENT(sched_process_exec
,
324 TP_PROTO(struct task_struct
*p
, pid_t old_pid
,
325 struct linux_binprm
*bprm
),
327 TP_ARGS(p
, old_pid
, bprm
),
330 __string( filename
, bprm
->filename
)
331 __field( pid_t
, pid
)
332 __field( pid_t
, old_pid
)
336 __assign_str(filename
, bprm
->filename
);
337 __entry
->pid
= p
->pid
;
338 __entry
->old_pid
= old_pid
;
341 TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename
),
342 __entry
->pid
, __entry
->old_pid
)
346 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
347 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
349 DECLARE_EVENT_CLASS(sched_stat_template
,
351 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
353 TP_ARGS(__perf_task(tsk
), __perf_count(delay
)),
356 __array( char, comm
, TASK_COMM_LEN
)
357 __field( pid_t
, pid
)
358 __field( u64
, delay
)
362 memcpy(__entry
->comm
, tsk
->comm
, TASK_COMM_LEN
);
363 __entry
->pid
= tsk
->pid
;
364 __entry
->delay
= delay
;
367 TP_printk("comm=%s pid=%d delay=%Lu [ns]",
368 __entry
->comm
, __entry
->pid
,
369 (unsigned long long)__entry
->delay
)
374 * Tracepoint for accounting wait time (time the task is runnable
375 * but not actually running due to scheduler contention).
377 DEFINE_EVENT(sched_stat_template
, sched_stat_wait
,
378 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
379 TP_ARGS(tsk
, delay
));
382 * Tracepoint for accounting sleep time (time the task is not runnable,
383 * including iowait, see below).
385 DEFINE_EVENT(sched_stat_template
, sched_stat_sleep
,
386 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
387 TP_ARGS(tsk
, delay
));
390 * Tracepoint for accounting iowait time (time the task is not runnable
391 * due to waiting on IO to complete).
393 DEFINE_EVENT(sched_stat_template
, sched_stat_iowait
,
394 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
395 TP_ARGS(tsk
, delay
));
398 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
400 DEFINE_EVENT(sched_stat_template
, sched_stat_blocked
,
401 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
402 TP_ARGS(tsk
, delay
));
405 * Tracepoint for accounting runtime (time the task is executing
408 DECLARE_EVENT_CLASS(sched_stat_runtime
,
410 TP_PROTO(struct task_struct
*tsk
, u64 runtime
, u64 vruntime
),
412 TP_ARGS(tsk
, __perf_count(runtime
), vruntime
),
415 __array( char, comm
, TASK_COMM_LEN
)
416 __field( pid_t
, pid
)
417 __field( u64
, runtime
)
418 __field( u64
, vruntime
)
422 memcpy(__entry
->comm
, tsk
->comm
, TASK_COMM_LEN
);
423 __entry
->pid
= tsk
->pid
;
424 __entry
->runtime
= runtime
;
425 __entry
->vruntime
= vruntime
;
428 TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
429 __entry
->comm
, __entry
->pid
,
430 (unsigned long long)__entry
->runtime
,
431 (unsigned long long)__entry
->vruntime
)
434 DEFINE_EVENT(sched_stat_runtime
, sched_stat_runtime
,
435 TP_PROTO(struct task_struct
*tsk
, u64 runtime
, u64 vruntime
),
436 TP_ARGS(tsk
, runtime
, vruntime
));
439 * Tracepoint for showing priority inheritance modifying a tasks
442 TRACE_EVENT(sched_pi_setprio
,
444 TP_PROTO(struct task_struct
*tsk
, int newprio
),
446 TP_ARGS(tsk
, newprio
),
449 __array( char, comm
, TASK_COMM_LEN
)
450 __field( pid_t
, pid
)
451 __field( int, oldprio
)
452 __field( int, newprio
)
456 memcpy(__entry
->comm
, tsk
->comm
, TASK_COMM_LEN
);
457 __entry
->pid
= tsk
->pid
;
458 __entry
->oldprio
= tsk
->prio
;
459 __entry
->newprio
= newprio
;
462 TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
463 __entry
->comm
, __entry
->pid
,
464 __entry
->oldprio
, __entry
->newprio
)
467 #ifdef CONFIG_DETECT_HUNG_TASK
468 TRACE_EVENT(sched_process_hang
,
469 TP_PROTO(struct task_struct
*tsk
),
473 __array( char, comm
, TASK_COMM_LEN
)
474 __field( pid_t
, pid
)
478 memcpy(__entry
->comm
, tsk
->comm
, TASK_COMM_LEN
);
479 __entry
->pid
= tsk
->pid
;
482 TP_printk("comm=%s pid=%d", __entry
->comm
, __entry
->pid
)
484 #endif /* CONFIG_DETECT_HUNG_TASK */
486 DECLARE_EVENT_CLASS(sched_move_task_template
,
488 TP_PROTO(struct task_struct
*tsk
, int src_cpu
, int dst_cpu
),
490 TP_ARGS(tsk
, src_cpu
, dst_cpu
),
493 __field( pid_t
, pid
)
494 __field( pid_t
, tgid
)
495 __field( pid_t
, ngid
)
496 __field( int, src_cpu
)
497 __field( int, src_nid
)
498 __field( int, dst_cpu
)
499 __field( int, dst_nid
)
503 __entry
->pid
= task_pid_nr(tsk
);
504 __entry
->tgid
= task_tgid_nr(tsk
);
505 __entry
->ngid
= task_numa_group_id(tsk
);
506 __entry
->src_cpu
= src_cpu
;
507 __entry
->src_nid
= cpu_to_node(src_cpu
);
508 __entry
->dst_cpu
= dst_cpu
;
509 __entry
->dst_nid
= cpu_to_node(dst_cpu
);
512 TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
513 __entry
->pid
, __entry
->tgid
, __entry
->ngid
,
514 __entry
->src_cpu
, __entry
->src_nid
,
515 __entry
->dst_cpu
, __entry
->dst_nid
)
519 * Tracks migration of tasks from one runqueue to another. Can be used to
520 * detect if automatic NUMA balancing is bouncing between nodes
522 DEFINE_EVENT(sched_move_task_template
, sched_move_numa
,
523 TP_PROTO(struct task_struct
*tsk
, int src_cpu
, int dst_cpu
),
525 TP_ARGS(tsk
, src_cpu
, dst_cpu
)
528 DEFINE_EVENT(sched_move_task_template
, sched_stick_numa
,
529 TP_PROTO(struct task_struct
*tsk
, int src_cpu
, int dst_cpu
),
531 TP_ARGS(tsk
, src_cpu
, dst_cpu
)
534 TRACE_EVENT(sched_swap_numa
,
536 TP_PROTO(struct task_struct
*src_tsk
, int src_cpu
,
537 struct task_struct
*dst_tsk
, int dst_cpu
),
539 TP_ARGS(src_tsk
, src_cpu
, dst_tsk
, dst_cpu
),
542 __field( pid_t
, src_pid
)
543 __field( pid_t
, src_tgid
)
544 __field( pid_t
, src_ngid
)
545 __field( int, src_cpu
)
546 __field( int, src_nid
)
547 __field( pid_t
, dst_pid
)
548 __field( pid_t
, dst_tgid
)
549 __field( pid_t
, dst_ngid
)
550 __field( int, dst_cpu
)
551 __field( int, dst_nid
)
555 __entry
->src_pid
= task_pid_nr(src_tsk
);
556 __entry
->src_tgid
= task_tgid_nr(src_tsk
);
557 __entry
->src_ngid
= task_numa_group_id(src_tsk
);
558 __entry
->src_cpu
= src_cpu
;
559 __entry
->src_nid
= cpu_to_node(src_cpu
);
560 __entry
->dst_pid
= task_pid_nr(dst_tsk
);
561 __entry
->dst_tgid
= task_tgid_nr(dst_tsk
);
562 __entry
->dst_ngid
= task_numa_group_id(dst_tsk
);
563 __entry
->dst_cpu
= dst_cpu
;
564 __entry
->dst_nid
= cpu_to_node(dst_cpu
);
567 TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
568 __entry
->src_pid
, __entry
->src_tgid
, __entry
->src_ngid
,
569 __entry
->src_cpu
, __entry
->src_nid
,
570 __entry
->dst_pid
, __entry
->dst_tgid
, __entry
->dst_ngid
,
571 __entry
->dst_cpu
, __entry
->dst_nid
)
575 * Tracepoint for waking a polling cpu without an IPI.
577 TRACE_EVENT(sched_wake_idle_without_ipi
,
591 TP_printk("cpu=%d", __entry
->cpu
)
595 * Tracepoint for showing scheduling priority changes.
597 TRACE_EVENT(sched_update_prio
,
599 TP_PROTO(struct task_struct
*tsk
),
604 __array( char, comm
, TASK_COMM_LEN
)
605 __field( pid_t
, pid
)
606 __field( unsigned int, policy
)
608 __field( unsigned int, rt_priority
)
609 __field( u64
, dl_runtime
)
610 __field( u64
, dl_deadline
)
611 __field( u64
, dl_period
)
615 memcpy(__entry
->comm
, tsk
->comm
, TASK_COMM_LEN
);
616 __entry
->pid
= tsk
->pid
;
617 __entry
->policy
= tsk
->policy
;
618 __entry
->nice
= task_nice(tsk
);
619 __entry
->rt_priority
= tsk
->rt_priority
;
620 __entry
->dl_runtime
= tsk
->dl
.dl_runtime
;
621 __entry
->dl_deadline
= tsk
->dl
.dl_deadline
;
622 __entry
->dl_period
= tsk
->dl
.dl_period
;
625 TP_printk("comm=%s pid=%d, policy=%s, nice=%d, rt_priority=%u, "
626 "dl_runtime=%Lu, dl_deadline=%Lu, dl_period=%Lu",
627 __entry
->comm
, __entry
->pid
,
628 __print_symbolic(__entry
->policy
, SCHEDULING_POLICY
),
629 __entry
->nice
, __entry
->rt_priority
,
630 __entry
->dl_runtime
, __entry
->dl_deadline
,
633 #endif /* _TRACE_SCHED_H */
635 /* This part must be outside protection */
636 #include <trace/define_trace.h>