From b9f05f8d1d26088d952d6f93c68b92d0113f08b2 Mon Sep 17 00:00:00 2001 From: Antoine Busque Date: Mon, 2 Nov 2015 17:58:13 -0500 Subject: [PATCH] Implement sched switch latency analysis The sched switch latency analysis allows to measure latencies between sched_wakeup or sched_waking events and their corresponding sched_switch event. It features log, top, and stats output modes. Signed-off-by: Antoine Busque --- lttng-schedlog | 28 ++ lttng-schedstats | 28 ++ lttng-schedtop | 28 ++ lttnganalyses/cli/command.py | 33 +- lttnganalyses/cli/cputop.py | 1 + lttnganalyses/cli/io.py | 7 +- lttnganalyses/cli/irq.py | 21 +- lttnganalyses/cli/memtop.py | 15 +- lttnganalyses/cli/mi.py | 1 + lttnganalyses/cli/sched.py | 522 ++++++++++++++++++++++ lttnganalyses/cli/syscallstats.py | 11 +- lttnganalyses/core/analysis.py | 2 - lttnganalyses/core/sched.py | 142 ++++++ lttnganalyses/linuxautomaton/sched.py | 39 +- lttnganalyses/linuxautomaton/statedump.py | 3 +- lttnganalyses/linuxautomaton/sv.py | 2 + setup.py | 6 + 17 files changed, 847 insertions(+), 42 deletions(-) create mode 100755 lttng-schedlog create mode 100755 lttng-schedstats create mode 100755 lttng-schedtop create mode 100644 lttnganalyses/cli/sched.py create mode 100644 lttnganalyses/core/sched.py diff --git a/lttng-schedlog b/lttng-schedlog new file mode 100755 index 0000000..0e96d61 --- /dev/null +++ b/lttng-schedlog @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# +# The MIT License (MIT) +# +# Copyright (C) 2015 - Julien Desfossez +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from lttnganalyses.cli import sched + +if __name__ == '__main__': + sched.runlog() diff --git a/lttng-schedstats b/lttng-schedstats new file mode 100755 index 0000000..45795d5 --- /dev/null +++ b/lttng-schedstats @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# +# The MIT License (MIT) +# +# Copyright (C) 2015 - Julien Desfossez +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from lttnganalyses.cli import sched + +if __name__ == '__main__': + sched.runstats() diff --git a/lttng-schedtop b/lttng-schedtop new file mode 100755 index 0000000..72e51ee --- /dev/null +++ b/lttng-schedtop @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# +# The MIT License (MIT) +# +# Copyright (C) 2015 - Julien Desfossez +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from lttnganalyses.cli import sched + +if __name__ == '__main__': + sched.runtop() diff --git a/lttnganalyses/cli/command.py b/lttnganalyses/cli/command.py index a0466e2..cc8950b 100644 --- a/lttnganalyses/cli/command.py +++ b/lttnganalyses/cli/command.py @@ -171,8 +171,8 @@ class Command: subprocess.check_output('babeltrace "%s"' % self._args.path, shell=True) except subprocess.CalledProcessError: - self._gen_error('Cannot run babeltrace on the trace, cannot verify if ' - 'events were lost during the trace recording') + self._gen_error('Cannot run babeltrace on the trace, cannot verify' + ' if events were lost during the trace recording') def _pre_analysis(self): pass @@ -271,8 +271,6 @@ class Command: ap.add_argument('--gmt', action='store_true', help='Manipulate timestamps based on GMT instead ' 'of local time') - ap.add_argument('--limit', type=int, default=10, - help='Limit to top X (default = 10)') ap.add_argument('--skip-validation', action='store_true', help='Skip the trace validation') ap.add_argument('--begin', type=str, help='start time: ' @@ -285,8 +283,8 @@ class Command: help='Analysis period end marker event name ' '(requires --period-begin)') ap.add_argument('--period-key', type=str, default='cpu_id', - help='Optional, list of event field names used to match ' - 'period markers (default: cpu_id)') + help='Optional, list of event field names used to ' + 'match period markers (default: cpu_id)') ap.add_argument('--timerange', type=str, help='time range: ' '[begin,end]') ap.add_argument('-V', '--version', action='version', @@ -295,13 +293,14 @@ class Command: # MI mode-dependent arguments if self._mi_mode: ap.add_argument('--metadata', action='store_true', - help="Show analysis's metadata") - ap.add_argument('path', metavar='', help='trace path', - nargs='*') + help='Show analysis\'s metadata') + ap.add_argument('path', metavar='', + help='trace path', nargs='*') else: ap.add_argument('--no-progress', action='store_true', help='Don\'t display the progress bar') - ap.add_argument('path', metavar='', help='trace path') + ap.add_argument('path', metavar='', + help='trace path') # Used to add command-specific args self._add_arguments(ap) @@ -343,6 +342,15 @@ class Command: ap.add_argument('--log', action='store_true', help=help) + @staticmethod + def _add_top_args(ap, help=None): + if not help: + help = 'Output the top results' + + ap.add_argument('--limit', type=int, default=10, + help='Limit to top X (default = 10)') + ap.add_argument('--top', action='store_true', help=help) + @staticmethod def _add_stats_args(ap, help=None): if not help: @@ -371,7 +379,8 @@ class Command: self._args.timerange, self._args.gmt) if None in [begin_ts, end_ts]: - self._cmdline_error('Invalid time format: "{}"'.format(self._args.timerange)) + self._cmdline_error( + 'Invalid time format: "{}"'.format(self._args.timerange)) else: if self._args.begin: begin_ts = date_to_epoch_nsec(self._args.begin) @@ -406,8 +415,6 @@ class Command: begin_ns = kwargs['begin_ns'] end_ns = kwargs['end_ns'] - # TODO allow output of results to some other place/in other - # format than plain text-cli self._analysis_tick(begin_ns, end_ns) self._ticks += 1 diff --git a/lttnganalyses/cli/cputop.py b/lttnganalyses/cli/cputop.py index 95e5448..7885740 100644 --- a/lttnganalyses/cli/cputop.py +++ b/lttnganalyses/cli/cputop.py @@ -211,6 +211,7 @@ class Cputop(Command): def _add_arguments(self, ap): Command._add_proc_filter_args(ap) + Command._add_top_args(ap) def _run(mi_mode): diff --git a/lttnganalyses/cli/io.py b/lttnganalyses/cli/io.py index 9c0eb89..112768f 100644 --- a/lttnganalyses/cli/io.py +++ b/lttnganalyses/cli/io.py @@ -530,8 +530,7 @@ class IoAnalysisCommand(Command): result_table) def _fill_file_usage_result_tables(self, read_table, write_table): - files = self._analysis.get_files_stats(self._args.pid_list, - self._args.proc_list) + files = self._analysis.get_files_stats() self._fill_file_read_usage_result_table(files, read_table) self._fill_file_write_usage_result_table(files, write_table) @@ -1156,6 +1155,8 @@ class IoAnalysisCommand(Command): Command._add_min_max_args(ap) Command._add_log_args( ap, help='Output the I/O requests in chronological order') + Command._add_top_args( + ap, help='Output the top I/O latencies by category') Command._add_stats_args(ap, help='Output the I/O latency statistics') Command._add_freq_args( ap, help='Output the I/O latency frequency distribution') @@ -1167,8 +1168,6 @@ class IoAnalysisCommand(Command): ap.add_argument('--maxsize', type=float, help='Filter out, I/O operations working with ' 'more that maxsize bytes') - ap.add_argument('--top', action='store_true', - help='Output the top I/O latencies by category') def _run(mi_mode): diff --git a/lttnganalyses/cli/irq.py b/lttnganalyses/cli/irq.py index 9671d1c..29f2970 100644 --- a/lttnganalyses/cli/irq.py +++ b/lttnganalyses/cli/irq.py @@ -62,7 +62,8 @@ class IrqAnalysisCommand(Command): ('min_duration', 'Minimum duration', mi.Duration), ('avg_duration', 'Average duration', mi.Duration), ('max_duration', 'Maximum duration', mi.Duration), - ('stdev_duration', "Interrupt duration standard deviation", mi.Duration), + ('stdev_duration', 'Interrupt duration standard deviation', + mi.Duration), ] ), ( @@ -73,12 +74,15 @@ class IrqAnalysisCommand(Command): ('min_duration', 'Minimum duration', mi.Duration), ('avg_duration', 'Average duration', mi.Duration), ('max_duration', 'Maximum duration', mi.Duration), - ('stdev_duration', "Interrupt duration standard deviation", mi.Duration), - ('raise_count', 'Interrupt raise count', mi.Integer, 'interrupt raises'), + ('stdev_duration', 'Interrupt duration standard deviation', + mi.Duration), + ('raise_count', 'Interrupt raise count', mi.Integer, + 'interrupt raises'), ('min_latency', 'Minimum raise latency', mi.Duration), ('avg_latency', 'Average raise latency', mi.Duration), ('max_latency', 'Maximum raise latency', mi.Duration), - ('stdev_latency', "Interrupt raise latency standard deviation", mi.Duration), + ('stdev_latency', 'Interrupt raise latency standard deviation', + mi.Duration), ] ), ( @@ -137,7 +141,7 @@ class IrqAnalysisCommand(Command): self._mi_get_result_tables(self._MI_TABLE_CLASS_HARD_STATS) soft_stats_tables = \ self._mi_get_result_tables(self._MI_TABLE_CLASS_SOFT_STATS) - assert(len(hard_stats_table) == len(soft_stats_tables)) + assert len(hard_stats_tables) == len(soft_stats_tables) begin = hard_stats_tables[0].timerange.begin end = hard_stats_tables[-1].timerange.end summary_table = \ @@ -145,7 +149,7 @@ class IrqAnalysisCommand(Command): begin, end) for hs_table, ss_table in zip(hard_stats_tables, soft_stats_tables): - assert(hs_table.timerange == ss_table.timerange) + assert hs_table.timerange == ss_table.timerange for row in itertools.chain(hs_table.rows, ss_table.rows): summary_table.append_row( @@ -278,7 +282,8 @@ class IrqAnalysisCommand(Command): for irq in irq_stats.irq_list: duration_us = (irq.end_ts - irq.begin_ts) / 1000 - index = min(int((duration_us - min_duration_us) / step), resolution - 1) + index = min(int((duration_us - min_duration_us) / step), + resolution - 1) counts[index] += 1 graph_data = [] @@ -456,7 +461,7 @@ class IrqAnalysisCommand(Command): if self._args.softirq_filter_list: return str(irq.id) in self._args.softirq_filter_list if self._args.irq_filter_list: - return FORalse + return False return True diff --git a/lttnganalyses/cli/memtop.py b/lttnganalyses/cli/memtop.py index 69c6471..9bde9ae 100644 --- a/lttnganalyses/cli/memtop.py +++ b/lttnganalyses/cli/memtop.py @@ -129,16 +129,15 @@ class Memtop(Command): return result_table - def _get_per_tid_allocd_result_table(self, begin_ns, end_ns): return self._get_per_tid_attr_result_table(self._MI_TABLE_CLASS_ALLOCD, - 'allocated_pages', - begin_ns, end_ns) + 'allocated_pages', + begin_ns, end_ns) def _get_per_tid_freed_result_table(self, begin_ns, end_ns): return self._get_per_tid_attr_result_table(self._MI_TABLE_CLASS_FREED, - 'freed_pages', - begin_ns, end_ns) + 'freed_pages', + begin_ns, end_ns) def _get_total_result_table(self, begin_ns, end_ns): result_table = self._mi_create_result_table(self._MI_TABLE_CLASS_TOTAL, @@ -168,7 +167,7 @@ class Memtop(Command): process_do = row.process pages = row.pages.value values.append(('%s (%d)' % (process_do.name, process_do.tid), - pages)) + pages)) for line in graph.graph(title, values, unit=' pages'): print(line) @@ -177,7 +176,8 @@ class Memtop(Command): self._print_per_tid_result(result_table, 'Per-TID Memory Allocations') def _print_per_tid_freed(self, result_table): - self._print_per_tid_result(result_table, 'Per-TID Memory Deallocations') + self._print_per_tid_result(result_table, + 'Per-TID Memory Deallocations') def _print_total(self, result_table): alloc = result_table.rows[0].allocd.value @@ -187,6 +187,7 @@ class Memtop(Command): def _add_arguments(self, ap): Command._add_proc_filter_args(ap) + Command._add_top_args(ap) def _run(mi_mode): diff --git a/lttnganalyses/cli/mi.py b/lttnganalyses/cli/mi.py index b5457be..550c382 100644 --- a/lttnganalyses/cli/mi.py +++ b/lttnganalyses/cli/mi.py @@ -27,6 +27,7 @@ class Tags: CPU = 'cpu' MEMORY = 'memory' INTERRUPT = 'interrupt' + SCHED = 'sched' SYSCALL = 'syscall' IO = 'io' TOP = 'top' diff --git a/lttnganalyses/cli/sched.py b/lttnganalyses/cli/sched.py new file mode 100644 index 0000000..7c34dee --- /dev/null +++ b/lttnganalyses/cli/sched.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python3 +# +# The MIT License (MIT) +# +# Copyright (C) 2015 - Julien Desfossez +# 2015 - Antoine Busque +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from .command import Command +from ..core import sched +from ..linuxautomaton import common +from ..ascii_graph import Pyasciigraph +from . import mi +import math +import operator +import statistics +import sys + + +class SchedAnalysisCommand(Command): + _DESC = """The sched command.""" + _ANALYSIS_CLASS = sched.SchedAnalysis + _MI_TITLE = 'Scheduling latencies analysis' + _MI_DESCRIPTION = \ + 'Scheduling latencies frequency distribution, statistics, top, and log' + _MI_TAGS = [mi.Tags.SCHED, mi.Tags.STATS, mi.Tags.FREQ, mi.Tags.TOP, + mi.Tags.LOG] + _MI_TABLE_CLASS_LOG = 'log' + _MI_TABLE_CLASS_TOP = 'top' + _MI_TABLE_CLASS_TOTAL_STATS = 'total_stats' + _MI_TABLE_CLASS_PER_TID_STATS = 'per_tid_stats' + _MI_TABLE_CLASS_PER_PRIO_STATS = 'per_prio_stats' + # _MI_TABLE_CLASS_FREQ = 'freq' + # _MI_TABLE_CLASS_SUMMARY = 'summary' + _MI_TABLE_CLASSES = [ + ( + _MI_TABLE_CLASS_LOG, + 'Sched switch log', [ + ('wakeup_ts', 'Wakeup timestamp', mi.Timestamp), + ('switch_ts', 'Switch timestamp', mi.Timestamp), + ('latency', 'Scheduling latency', mi.Duration), + ('prio', 'Priority', mi.Integer), + ('wakee_proc', 'Wakee process', mi.Process), + ('waker_proc', 'Waker process', mi.Process), + ] + ), + ( + _MI_TABLE_CLASS_TOP, + 'Sched switch top', [ + ('wakeup_ts', 'Wakeup timestamp', mi.Timestamp), + ('switch_ts', 'Switch timestamp', mi.Timestamp), + ('latency', 'Scheduling latency', mi.Duration), + ('prio', 'Priority', mi.Integer), + ('wakee_proc', 'Wakee process', mi.Process), + ('waker_proc', 'Waker process', mi.Process), + ] + ), + ( + _MI_TABLE_CLASS_TOTAL_STATS, + 'Sched switch latency stats (total)', [ + ('count', 'Sched switch count', mi.Integer, 'sched switches'), + ('min_latency', 'Minimum latency', mi.Duration), + ('avg_latency', 'Average latency', mi.Duration), + ('max_latency', 'Maximum latency', mi.Duration), + ('stdev_latency', 'Scheduling latency standard deviation', + mi.Duration), + ] + ), + ( + _MI_TABLE_CLASS_PER_TID_STATS, + 'Sched switch latency stats (per-TID)', [ + ('process', 'Wakee process', mi.Process), + ('count', 'Sched switch count', mi.Integer, 'sched switches'), + ('min_latency', 'Minimum latency', mi.Duration), + ('avg_latency', 'Average latency', mi.Duration), + ('max_latency', 'Maximum latency', mi.Duration), + ('stdev_latency', 'Scheduling latency standard deviation', + mi.Duration), + ] + ), + ( + _MI_TABLE_CLASS_PER_PRIO_STATS, + 'Sched switch latency stats (per-prio)', [ + ('prio', 'Priority', mi.Integer), + ('count', 'Sched switch count', mi.Integer, 'sched switches'), + ('min_latency', 'Minimum latency', mi.Duration), + ('avg_latency', 'Average latency', mi.Duration), + ('max_latency', 'Maximum latency', mi.Duration), + ('stdev_latency', 'Scheduling latency standard deviation', + mi.Duration), + ] + ), + ] + + def _analysis_tick(self, begin_ns, end_ns): + log_table = None + top_table = None + total_stats_table = None + per_tid_stats_table = None + per_prio_stats_table = None + + if self._args.log: + log_table = self._get_log_result_table(begin_ns, end_ns) + + if self._args.top: + top_table = self._get_top_result_table(begin_ns, end_ns) + + if self._args.stats: + if self._args.total: + total_stats_table = self._get_total_stats_result_table( + begin_ns, end_ns) + + if self._args.per_tid: + per_tid_stats_table = self._get_per_tid_stats_result_table( + begin_ns, end_ns) + + if self._args.per_prio: + per_prio_stats_table = self._get_per_prio_stats_result_table( + begin_ns, end_ns) + + if self._mi_mode: + if log_table: + self._mi_append_result_table(log_table) + + if top_table: + self._mi_append_result_table(top_table) + + if total_stats_table and total_stats_table.rows: + self._mi_append_result_table(total_stats_table) + + if per_tid_stats_table and per_tid_stats_table.rows: + self._mi_append_result_table(per_tid_stats_table) + + if per_prio_stats_table and per_prio_stats_table.rows: + self._mi_append_result_table(per_prio_stats_table) + else: + self._print_date(begin_ns, end_ns) + + if self._args.stats: + if total_stats_table: + self._print_total_stats(total_stats_table) + if per_tid_stats_table: + self._print_per_tid_stats(per_tid_stats_table) + if per_prio_stats_table: + self._print_per_prio_stats(per_prio_stats_table) + + if log_table: + self._print_sched_events(log_table) + + if top_table: + self._print_sched_events(top_table) + + def _get_log_result_table(self, begin_ns, end_ns): + result_table = self._mi_create_result_table(self._MI_TABLE_CLASS_LOG, + begin_ns, end_ns) + + for sched_event in self._analysis.sched_list: + wakee_proc = mi.Process(sched_event.wakee_proc.comm, + sched_event.wakee_proc.pid, + sched_event.wakee_proc.tid) + + if sched_event.waker_proc: + waker_proc = mi.Process(sched_event.waker_proc.comm, + sched_event.waker_proc.pid, + sched_event.waker_proc.tid) + else: + waker_proc = mi.Empty() + + result_table.append_row( + wakeup_ts=mi.Timestamp(sched_event.wakeup_ts), + switch_ts=mi.Timestamp(sched_event.switch_ts), + latency=mi.Duration(sched_event.latency), + prio=mi.Integer(sched_event.prio), + wakee_proc=wakee_proc, + waker_proc=waker_proc, + ) + + return result_table + + def _get_top_result_table(self, begin_ns, end_ns): + result_table = self._mi_create_result_table(self._MI_TABLE_CLASS_TOP , + begin_ns, end_ns) + + top_events = sorted(self._analysis.sched_list, + key=operator.attrgetter('latency'), + reverse=True) + top_events = top_events[:self._args.limit] + + for sched_event in top_events: + wakee_proc = mi.Process(sched_event.wakee_proc.comm, + sched_event.wakee_proc.pid, + sched_event.wakee_proc.tid) + + if sched_event.waker_proc: + waker_proc = mi.Process(sched_event.waker_proc.comm, + sched_event.waker_proc.pid, + sched_event.waker_proc.tid) + else: + waker_proc = mi.Empty() + + result_table.append_row( + wakeup_ts=mi.Timestamp(sched_event.wakeup_ts), + switch_ts=mi.Timestamp(sched_event.switch_ts), + latency=mi.Duration(sched_event.latency), + prio=mi.Integer(sched_event.prio), + wakee_proc=wakee_proc, + waker_proc=waker_proc, + ) + + return result_table + + def _get_total_stats_result_table(self, begin_ns, end_ns): + stats_table = \ + self._mi_create_result_table(self._MI_TABLE_CLASS_TOTAL_STATS, + begin_ns, end_ns) + + stdev = self._compute_sched_latency_stdev(self._analysis.sched_list) + if math.isnan(stdev): + stdev = mi.Unknown() + else: + stdev = mi.Duration(stdev) + + stats_table.append_row( + count=mi.Integer(self._analysis.count), + min_latency=mi.Duration(self._analysis.min_latency), + avg_latency=mi.Duration(self._analysis.total_latency / + self._analysis.count), + max_latency=mi.Duration(self._analysis.max_latency), + stdev_latency=stdev, + ) + + return stats_table + + def _get_per_tid_stats_result_table(self, begin_ns, end_ns): + stats_table = \ + self._mi_create_result_table(self._MI_TABLE_CLASS_PER_TID_STATS, + begin_ns, end_ns) + + tid_stats_list = sorted(list(self._analysis.tids.values()), + key=operator.attrgetter('comm')) + + for tid_stats in tid_stats_list: + if not tid_stats.sched_list: + continue + + stdev = self._compute_sched_latency_stdev(tid_stats.sched_list) + if math.isnan(stdev): + stdev = mi.Unknown() + else: + stdev = mi.Duration(stdev) + + stats_table.append_row( + process=mi.Process(tid=tid_stats.tid, name=tid_stats.comm), + count=mi.Integer(tid_stats.count), + min_latency=mi.Duration(tid_stats.min_latency), + avg_latency=mi.Duration(tid_stats.total_latency / + tid_stats.count), + max_latency=mi.Duration(tid_stats.max_latency), + stdev_latency=stdev, + ) + + return stats_table + + def _get_per_prio_stats_result_table(self, begin_ns, end_ns): + stats_table = \ + self._mi_create_result_table(self._MI_TABLE_CLASS_PER_PRIO_STATS, + begin_ns, end_ns) + + prio_sched_lists = {} + for sched_event in self._analysis.sched_list: + if sched_event.prio not in prio_sched_lists: + prio_sched_lists[sched_event.prio] = [] + + prio_sched_lists[sched_event.prio].append(sched_event) + + for prio in sorted(prio_sched_lists): + sched_list = prio_sched_lists[prio] + if not sched_list: + continue + + stdev = self._compute_sched_latency_stdev(sched_list) + if math.isnan(stdev): + stdev = mi.Unknown() + else: + stdev = mi.Duration(stdev) + + latencies = [sched.latency for sched in sched_list] + count = len(latencies) + min_latency = min(latencies) + max_latency = max(latencies) + total_latency = sum(latencies) + + stats_table.append_row( + prio=mi.Integer(prio), + count=mi.Integer(count), + min_latency=mi.Duration(min_latency), + avg_latency=mi.Duration(total_latency / count), + max_latency=mi.Duration(max_latency), + stdev_latency=stdev, + ) + + return stats_table + + def _compute_sched_latency_stdev(self, sched_events): + if len(sched_events) < 2: + return float('nan') + + sched_latencies = [] + for sched_event in sched_events: + sched_latencies.append(sched_event.latency) + + return statistics.stdev(sched_latencies) + + def _ns_to_hour_nsec(self, ts): + return common.ns_to_hour_nsec(ts, self._args.multi_day, self._args.gmt) + + def _print_sched_events(self, result_table): + fmt = '[{:<18}, {:<18}] {:>15} {:>10} {:<25} {:<25}' + title_fmt = '{:<20} {:<19} {:>15} {:>10} {:<25} {:<25}' + print() + print(result_table.title) + print(title_fmt.format('Wakeup', 'Switch', 'Latency (us)', 'Priority', + 'Wakee', 'Waker')) + for row in result_table.rows: + wakeup_ts = row.wakeup_ts.value + switch_ts = row.switch_ts.value + latency = row.latency.value + prio = row.prio.value + wakee_proc = row.wakee_proc + waker_proc = row.waker_proc + + wakee_str = '%s (%d)' % (wakee_proc.name, wakee_proc.tid) + if isinstance(waker_proc, mi.Empty): + waker_str = 'Unknown (N/A)' + else: + waker_str = '%s (%d)' % (waker_proc.name, waker_proc.tid) + + print(fmt.format(self._ns_to_hour_nsec(wakeup_ts), + self._ns_to_hour_nsec(switch_ts), + '%0.03f' % (latency / 1000), prio, + wakee_str, waker_str)) + + def _print_total_stats(self, stats_table): + row_format = '{:<12} {:<12} {:<12} {:<12} {:<12}' + header = row_format.format( + 'Count', 'Min', 'Avg', 'Max', 'Stdev' + ) + + if stats_table.rows: + print() + print(stats_table.title + ' (us)') + print(header) + for row in stats_table.rows: + if type(row.stdev_latency) is mi.Unknown: + stdev_str = '?' + else: + stdev_str = '%0.03f' % row.stdev_latency.to_us() + + row_str = row_format.format( + '%d' % row.count.value, + '%0.03f' % row.min_latency.to_us(), + '%0.03f' % row.avg_latency.to_us(), + '%0.03f' % row.max_latency.to_us(), + '%s' % stdev_str, + ) + + print(row_str) + + def _print_per_tid_stats(self, stats_table): + row_format = '{:<25} {:>8} {:>12} {:>12} {:>12} {:>12}' + header = row_format.format( + 'Process', 'Count', 'Min', 'Avg', 'Max', 'Stdev' + ) + + if stats_table.rows: + print() + print(stats_table.title + ' (us)') + print(header) + for row in stats_table.rows: + if type(row.stdev_latency) is mi.Unknown: + stdev_str = '?' + else: + stdev_str = '%0.03f' % row.stdev_latency.to_us() + + proc = row.process + proc_str = '%s (%d)' % (proc.name, proc.tid) + + row_str = row_format.format( + '%s' % proc_str, + '%d' % row.count.value, + '%0.03f' % row.min_latency.to_us(), + '%0.03f' % row.avg_latency.to_us(), + '%0.03f' % row.max_latency.to_us(), + '%s' % stdev_str, + ) + + print(row_str) + + def _print_per_prio_stats(self, stats_table): + row_format = '{:>4} {:>8} {:>12} {:>12} {:>12} {:>12}' + header = row_format.format( + 'Prio', 'Count', 'Min', 'Avg', 'Max', 'Stdev' + ) + + if stats_table.rows: + print() + print(stats_table.title + ' (us)') + print(header) + for row in stats_table.rows: + if type(row.stdev_latency) is mi.Unknown: + stdev_str = '?' + else: + stdev_str = '%0.03f' % row.stdev_latency.to_us() + + row_str = row_format.format( + '%d' % row.prio.value, + '%d' % row.count.value, + '%0.03f' % row.min_latency.to_us(), + '%0.03f' % row.avg_latency.to_us(), + '%0.03f' % row.max_latency.to_us(), + '%s' % stdev_str, + ) + + print(row_str) + + def _validate_transform_args(self, args): + # If neither --total nor --per-prio are specified, default + # to --per-tid + if not (args.total or args.per_prio): + args.per_tid = True + + def _add_arguments(self, ap): + Command._add_min_max_args(ap) + Command._add_freq_args( + ap, help='Output the frequency distribution of sched switch ' + 'latencies') + Command._add_top_args(ap, help='Output the top sched switch latencies') + Command._add_log_args( + ap, help='Output the sched switches in chronological order') + Command._add_stats_args(ap, help='Output sched switch statistics') + ap.add_argument('--total', action='store_true', + help='Group all results (applies to stats and freq)') + ap.add_argument('--per-tid', action='store_true', + help='Group results per-TID (applies to stats and ' + 'freq) (default)') + ap.add_argument('--per-prio', action='store_true', + help='Group results per-prio (applies to stats and ' + 'freq)') + + +def _run(mi_mode): + schedcmd = SchedAnalysisCommand(mi_mode=mi_mode) + schedcmd.run() + + +def _runstats(mi_mode): + sys.argv.insert(1, '--stats') + _run(mi_mode) + + +def _runlog(mi_mode): + sys.argv.insert(1, '--log') + _run(mi_mode) + + +def _runtop(mi_mode): + sys.argv.insert(1, '--top') + _run(mi_mode) + + +def _runfreq(mi_mode): + sys.argv.insert(1, '--freq') + _run(mi_mode) + + +def runstats(): + _runstats(mi_mode=False) + + +def runlog(): + _runlog(mi_mode=False) + + +def runtop(): + _runtop(mi_mode=False) + + +def runfreq(): + _runfreq(mi_mode=False) + + +def runstats_mi(): + _runstats(mi_mode=True) + + +def runlog_mi(): + _runlog(mi_mode=True) + + +def runtop_mi(): + _runtop(mi_mode=True) + + +def runfreq_mi(): + _runfreq(mi_mode=True) diff --git a/lttnganalyses/cli/syscallstats.py b/lttnganalyses/cli/syscallstats.py index b8c8337..9de4b78 100644 --- a/lttnganalyses/cli/syscallstats.py +++ b/lttnganalyses/cli/syscallstats.py @@ -48,7 +48,8 @@ class SyscallsAnalysis(Command): ('min_duration', 'Minimum call duration', mi.Duration), ('avg_duration', 'Average call duration', mi.Duration), ('max_duration', 'Maximum call duration', mi.Duration), - ('stdev_duration', 'Call duration standard deviation', mi.Duration), + ('stdev_duration', 'Call duration standard deviation', + mi.Duration), ('return_values', 'Return values count', mi.String), ] ), @@ -129,8 +130,9 @@ class SyscallsAnalysis(Command): subtitle = '%s (%s, TID: %d)' % (proc_stats.comm, pid, proc_stats.tid) result_table = \ - self._mi_create_result_table(self._MI_TABLE_CLASS_PER_TID_STATS, - begin_ns, end_ns, subtitle) + self._mi_create_result_table( + self._MI_TABLE_CLASS_PER_TID_STATS, begin_ns, end_ns, + subtitle) for syscall in sorted(proc_stats.syscalls.values(), key=operator.attrgetter('count'), @@ -163,7 +165,8 @@ class SyscallsAnalysis(Command): syscall=mi.Syscall(syscall.name), count=mi.Integer(syscall.count), min_duration=mi.Duration(syscall.min_duration), - avg_duration=mi.Duration(syscall.total_duration / syscall.count), + avg_duration=mi.Duration(syscall.total_duration / + syscall.count), max_duration=mi.Duration(syscall.max_duration), stdev_duration=stdev, return_values=mi.String(str(return_count)), diff --git a/lttnganalyses/core/analysis.py b/lttnganalyses/core/analysis.py index 4cd19ee..75e8eed 100644 --- a/lttnganalyses/core/analysis.py +++ b/lttnganalyses/core/analysis.py @@ -22,8 +22,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from ..linuxautomaton import common - class AnalysisConfig: def __init__(self): diff --git a/lttnganalyses/core/sched.py b/lttnganalyses/core/sched.py new file mode 100644 index 0000000..311b325 --- /dev/null +++ b/lttnganalyses/core/sched.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# +# The MIT License (MIT) +# +# Copyright (C) 2015 - Julien Desfossez +# 2015 - Antoine Busque +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from .analysis import Analysis + + +class SchedAnalysis(Analysis): + def __init__(self, state, conf): + notification_cbs = { + 'sched_switch_per_tid': self._process_sched_switch, + } + + super().__init__(state, conf) + self._state.register_notification_cbs(notification_cbs) + + # Log of individual wake scheduling events + self.sched_list = [] + # Scheduling latency stats indexed by TID + self.tids = {} + # Stats + self.min_latency = None + self.max_latency = None + self.total_latency = 0 + + @property + def count(self): + return len(self.sched_list) + + def reset(self): + self.sched_list = [] + self.min_latency = None + self.max_latency = None + self.total_latency = 0 + for tid in self.tids: + self.tids[tid].reset() + + def _process_sched_switch(self, **kwargs): + switch_ts = kwargs['timestamp'] + wakeup_ts = kwargs['wakeup_ts'] + wakee_proc = kwargs['wakee_proc'] + waker_proc = kwargs['waker_proc'] + next_tid = kwargs['next_tid'] + next_prio = kwargs['next_prio'] + + if wakeup_ts is None: + return + + latency = switch_ts - wakeup_ts + if self._conf.min_duration is not None and \ + latency < self._conf.min_duration: + return + if self._conf.max_duration is not None and \ + latency > self._conf.max_duration: + return + + if waker_proc is not None and waker_proc.tid not in self.tids: + self.tids[waker_proc.tid] = \ + SchedStats.new_from_process(waker_proc) + + if next_tid not in self.tids: + self.tids[next_tid] = SchedStats.new_from_process(wakee_proc) + + sched_event = SchedEvent(wakeup_ts, switch_ts, wakee_proc, waker_proc, + next_prio) + self.tids[next_tid].update_stats(sched_event) + self._update_stats(sched_event) + + def _update_stats(self, sched_event): + if self.min_latency is None or sched_event.latency < self.min_latency: + self.min_latency = sched_event.latency + + if self.max_latency is None or sched_event.latency > self.max_latency: + self.max_latency = sched_event.latency + + self.total_latency += sched_event.latency + self.sched_list.append(sched_event) + + +class SchedStats(): + def __init__(self, tid, comm): + self.tid = tid + self.comm = comm + self.min_latency = None + self.max_latency = None + self.total_latency = 0 + self.sched_list = [] + + @classmethod + def new_from_process(cls, proc): + return cls(proc.tid, proc.comm) + + @property + def count(self): + return len(self.sched_list) + + def update_stats(self, sched_event): + if self.min_latency is None or sched_event.latency < self.min_latency: + self.min_latency = sched_event.latency + + if self.max_latency is None or sched_event.latency > self.max_latency: + self.max_latency = sched_event.latency + + self.total_latency += sched_event.latency + self.sched_list.append(sched_event) + + def reset(self): + self.min_latency = None + self.max_latency = None + self.total_latency = 0 + self.sched_list = [] + + +class SchedEvent(): + def __init__(self, wakeup_ts, switch_ts, wakee_proc, waker_proc, prio): + self.wakeup_ts = wakeup_ts + self.switch_ts = switch_ts + self.wakee_proc = wakee_proc + self.waker_proc = waker_proc + self.prio = prio + self.latency = switch_ts - wakeup_ts diff --git a/lttnganalyses/linuxautomaton/sched.py b/lttnganalyses/linuxautomaton/sched.py index c4a0c4f..03917b3 100644 --- a/lttnganalyses/linuxautomaton/sched.py +++ b/lttnganalyses/linuxautomaton/sched.py @@ -33,6 +33,7 @@ class SchedStateProvider(sp.StateProvider): 'sched_migrate_task': self._process_sched_migrate_task, 'sched_wakeup': self._process_sched_wakeup, 'sched_wakeup_new': self._process_sched_wakeup, + 'sched_waking': self._process_sched_wakeup, 'sched_process_fork': self._process_sched_process_fork, 'sched_process_exec': self._process_sched_process_exec, } @@ -82,10 +83,16 @@ class SchedStateProvider(sp.StateProvider): next_tid = event['next_tid'] next_comm = event['next_comm'] prev_tid = event['prev_tid'] + next_prio = event['next_prio'] self._sched_switch_per_cpu(cpu_id, next_tid) self._sched_switch_per_tid(next_tid, next_comm, prev_tid) + wakee_proc = self._state.tids[next_tid] + waker_proc = None + if wakee_proc.last_waker is not None: + waker_proc = self._state.tids[wakee_proc.last_waker] + self._state.send_notification_cb('sched_switch_per_cpu', timestamp=timestamp, cpu_id=cpu_id, @@ -94,7 +101,14 @@ class SchedStateProvider(sp.StateProvider): timestamp=timestamp, prev_tid=prev_tid, next_tid=next_tid, - next_comm=next_comm) + next_comm=next_comm, + wakee_proc=wakee_proc, + wakeup_ts=wakee_proc.last_wakeup, + waker_proc=waker_proc, + next_prio=next_prio) + + wakee_proc.last_wakeup = None + wakee_proc.last_waker = None def _process_sched_migrate_task(self, event): tid = event['tid'] @@ -111,14 +125,31 @@ class SchedStateProvider(sp.StateProvider): def _process_sched_wakeup(self, event): target_cpu = event['target_cpu'] tid = event['tid'] + current_cpu = event['cpu_id'] if target_cpu not in self._state.cpus: self._state.cpus[target_cpu] = sv.CPU(target_cpu) + if current_cpu not in self._state.cpus: + self._state.cpus[current_cpu] = sv.CPU(current_cpu) + + # If the TID is already executing on a CPU, ignore this wakeup + for cpu_id in self._state.cpus: + cpu = self._state.cpus[cpu_id] + if cpu.current_tid == tid: + return + if tid not in self._state.tids: proc = sv.Process() proc.tid = tid self._state.tids[tid] = proc + # a process can be woken up multiple times, only record + # the first one + if self._state.tids[tid].last_wakeup is None: + self._state.tids[tid].last_wakeup = event.timestamp + if self._state.cpus[current_cpu].current_tid is not None: + self._state.tids[tid].last_waker = \ + self._state.cpus[current_cpu].current_tid def _process_sched_process_fork(self, event): child_tid = event['child_tid'] @@ -142,7 +173,8 @@ class SchedStateProvider(sp.StateProvider): self._state.send_notification_cb('create_fd', fd=fd, parent_proc=child_proc, - timestamp=event.timestamp) + timestamp=event.timestamp, + cpu_id=event['cpu_id']) self._state.tids[child_tid] = child_proc @@ -168,5 +200,6 @@ class SchedStateProvider(sp.StateProvider): self._state.send_notification_cb('close_fd', fd=fd, parent_proc=proc, - timestamp=event.timestamp) + timestamp=event.timestamp, + cpu_id=event['cpu_id']) del proc.fds[fd] diff --git a/lttnganalyses/linuxautomaton/statedump.py b/lttnganalyses/linuxautomaton/statedump.py index a82ba61..ead0497 100644 --- a/lttnganalyses/linuxautomaton/statedump.py +++ b/lttnganalyses/linuxautomaton/statedump.py @@ -91,7 +91,8 @@ class StatedumpStateProvider(sp.StateProvider): self._state.send_notification_cb('create_fd', fd=fd, parent_proc=proc, - timestamp=event.timestamp) + timestamp=event.timestamp, + cpu_id=event['cpu_id']) else: # just fix the filename proc.fds[fd].filename = filename diff --git a/lttnganalyses/linuxautomaton/sv.py b/lttnganalyses/linuxautomaton/sv.py index a90d2bf..562599f 100644 --- a/lttnganalyses/linuxautomaton/sv.py +++ b/lttnganalyses/linuxautomaton/sv.py @@ -41,6 +41,8 @@ class Process(): self.current_syscall = None # the process scheduled before this one self.prev_tid = None + self.last_wakeup = None + self.last_waker = None class CPU(): diff --git a/setup.py b/setup.py index 0a1e9b4..356b499 100755 --- a/setup.py +++ b/setup.py @@ -95,6 +95,9 @@ setup( 'lttng-irqstats = lttnganalyses.cli.irq:runstats', 'lttng-memtop = lttnganalyses.cli.memtop:run', 'lttng-syscallstats = lttnganalyses.cli.syscallstats:run', + 'lttng-schedlog = lttnganalyses.cli.sched:runlog', + 'lttng-schedtop = lttnganalyses.cli.sched:runtop', + 'lttng-schedstats = lttnganalyses.cli.sched:runstats', # MI mode 'lttng-cputop-mi = lttnganalyses.cli.cputop:run_mi', @@ -108,6 +111,9 @@ setup( 'lttng-iolatencytop-mi = lttnganalyses.cli.io:runlatencytop_mi', 'lttng-iolog-mi = lttnganalyses.cli.io:runlog_mi', 'lttng-iousagetop-mi = lttnganalyses.cli.io:runusage_mi', + 'lttng-schedlog-mi = lttnganalyses.cli.sched:runlog_mi', + 'lttng-schedtop-mi = lttnganalyses.cli.sched:runtop_mi', + 'lttng-schedstats-mi = lttnganalyses.cli.sched:runstats_mi', ], }, -- 2.34.1