tracing: add sched_update_prio
authorJulien Desfossez <jdesfossez@efficios.com>
Tue, 28 Jun 2016 20:31:12 +0000 (16:31 -0400)
committerJulien Desfossez <jdesfossez@efficios.com>
Fri, 16 Sep 2016 21:02:15 +0000 (17:02 -0400)
This tracepoint allows to keep track of all explicit priority changes of
a task. It outputs the scheduling policy, the nice value, the
rt_priority and the deadline-related attributes (dl_runtime, dl_deadline
and dl_period).

It is emitted in the code path of the sched_setscheduler, sched_setattr,
sched_setparam, and nice system calls.

This allows the analysis of real-time scheduling delays based on the
configured scheduling priorities and policies, which cannot be performed
with the current instrumentation in sched_switch. Also, instead of
exposing the internal kernel prio field, this tracepoint only outputs
the user-visible priority attributes.

The effective priority of running threads can also be temporarily
changed in the PI code, but a dedicated tracepoint is already in place
to cover this case.

Here are a few output examples:
After fork of a normal task:
sched_update_prio: comm=bash pid=2104, policy=SCHED_NORMAL, nice=0,
rt_priority=0, dl_runtime=0, dl_deadline=0, dl_period=0

renice -n 10 of a normal task:
sched_update_prio: comm=sleep pid=2130, policy=SCHED_NORMAL, nice=10,
rt_priority=0, dl_runtime=0, dl_deadline=0, dl_period=0

SCHED_FIFO 60:
sched_update_prio: comm=chrt pid=2105, policy=SCHED_FIFO, nice=0,
rt_priority=60, dl_runtime=0, dl_deadline=0, dl_period=0

SCHED_RR 60:
sched_update_prio: comm=chrt pid=2109, policy=SCHED_RR, nice=0,
rt_priority=60, dl_runtime=0, dl_deadline=0, dl_period=0

SCHED_DEADLINE:
sched_update_prio: comm=b pid=2110, policy=SCHED_DEADLINE, nice=0,
rt_priority=0, dl_runtime=10000000, dl_deadline=30000000,
dl_period=30000000

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (Red Hat) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Julien Desfossez <jdesfossez@efficios.com>
include/trace/events/sched.h
kernel/sched/core.c

index 9b90c57517a918687189933ae6920b80d251e98e..bc695e48f2d8bbfb09f5f2101877d8e37a660cce 100644 (file)
@@ -8,6 +8,34 @@
 #include <linux/tracepoint.h>
 #include <linux/binfmts.h>
 
+#define SCHEDULING_POLICY                              \
+       EM( SCHED_NORMAL,       "SCHED_NORMAL")         \
+       EM( SCHED_FIFO,         "SCHED_FIFO")           \
+       EM( SCHED_RR,           "SCHED_RR")             \
+       EM( SCHED_BATCH,        "SCHED_BATCH")          \
+       EM( SCHED_IDLE,         "SCHED_IDLE")           \
+       EMe(SCHED_DEADLINE,     "SCHED_DEADLINE")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)       TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)      TRACE_DEFINE_ENUM(a);
+
+SCHEDULING_POLICY
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)       {a, b},
+#define EMe(a, b)      {a, b}
+
 /*
  * Tracepoint for calling kthread_stop, performed to end a kthread:
  */
@@ -562,6 +590,46 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
 
        TP_printk("cpu=%d", __entry->cpu)
 );
+
+/*
+ * Tracepoint for showing scheduling priority changes.
+ */
+TRACE_EVENT(sched_update_prio,
+
+       TP_PROTO(struct task_struct *tsk),
+
+       TP_ARGS(tsk),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( unsigned int,  policy          )
+               __field( int,   nice                    )
+               __field( unsigned int,  rt_priority     )
+               __field( u64,   dl_runtime              )
+               __field( u64,   dl_deadline             )
+               __field( u64,   dl_period               )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid            = tsk->pid;
+               __entry->policy         = tsk->policy;
+               __entry->nice           = task_nice(tsk);
+               __entry->rt_priority    = tsk->rt_priority;
+               __entry->dl_runtime     = tsk->dl.dl_runtime;
+               __entry->dl_deadline    = tsk->dl.dl_deadline;
+               __entry->dl_period      = tsk->dl.dl_period;
+       ),
+
+       TP_printk("comm=%s pid=%d, policy=%s, nice=%d, rt_priority=%u, "
+                       "dl_runtime=%Lu, dl_deadline=%Lu, dl_period=%Lu",
+                       __entry->comm, __entry->pid,
+                       __print_symbolic(__entry->policy, SCHEDULING_POLICY),
+                       __entry->nice, __entry->rt_priority,
+                       __entry->dl_runtime, __entry->dl_deadline,
+                       __entry->dl_period)
+);
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
index f5f7b3cdf0be0577d69fc44e4390e7d9a1e75c88..979a635962979993b0b4710d33c2de2be51c953e 100644 (file)
@@ -3751,6 +3751,7 @@ void set_user_nice(struct task_struct *p, long nice)
                        resched_curr(rq);
        }
 out_unlock:
+       trace_sched_update_prio(p);
        task_rq_unlock(rq, p, &rf);
 }
 EXPORT_SYMBOL(set_user_nice);
@@ -3955,6 +3956,8 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
                p->sched_class = &rt_sched_class;
        else
                p->sched_class = &fair_sched_class;
+
+       trace_sched_update_prio(p);
 }
 
 static void
This page took 0.028186 seconds and 5 git commands to generate.