From 5f62672c9d7c60346c916c54e2fe21d2907510fd Mon Sep 17 00:00:00 2001 From: Julien Desfossez Date: Fri, 18 Apr 2014 16:06:40 -0400 Subject: [PATCH] various fixes for iotop handling Signed-off-by: Julien Desfossez --- LTTngAnalyzes/common.py | 11 ++++- LTTngAnalyzes/sched.py | 1 + LTTngAnalyzes/statedump.py | 7 ++- LTTngAnalyzes/syscalls.py | 93 +++++++++++++++++++++++++++++--------- iotop.py | 14 ++++-- 5 files changed, 98 insertions(+), 28 deletions(-) diff --git a/LTTngAnalyzes/common.py b/LTTngAnalyzes/common.py index a70f907..8083786 100644 --- a/LTTngAnalyzes/common.py +++ b/LTTngAnalyzes/common.py @@ -22,6 +22,7 @@ class Process(): self.fds = {} # indexed by filename self.closed_fds = {} + self.current_syscall = {} class CPU(): def __init__(self): @@ -29,7 +30,6 @@ class CPU(): self.cpu_ns = 0 self.current_tid = -1 self.start_task_ns = 0 - self.current_syscall = {} class Syscall(): def __init__(self): @@ -39,6 +39,7 @@ class Syscall(): class Disk(): def __init__(self): self.name = "" + self.prettyname = "" self.nr_sector = 0 self.nr_requests = 0 self.completed_requests = 0 @@ -69,6 +70,7 @@ def get_disk(dev, disks): if not dev in disks: d = Disk() d.name = "%d" % dev + d.prettyname = "%d" % dev disks[dev] = d else: d = disks[dev] @@ -97,6 +99,13 @@ def ns_to_hour(ns): d = time.localtime(ns/NSEC_PER_SEC) return "%02d:%02d:%02d" % (d.tm_hour, d.tm_min, d.tm_sec) +def ns_to_hour_nsec(ns): + d = time.localtime(ns/NSEC_PER_SEC) + return "%02d:%02d:%02d.%09d" % (d.tm_hour, d.tm_min, d.tm_sec, ns % NSEC_PER_SEC) + +def ns_to_sec(ns): + return "%lu.%09u" % (ns/NSEC_PER_SEC, ns % NSEC_PER_SEC) + def sec_to_hour(ns): d = time.localtime(ns) return "%02d:%02d:%02d" % (d.tm_hour, d.tm_min, d.tm_sec) diff --git a/LTTngAnalyzes/sched.py b/LTTngAnalyzes/sched.py index 3ea64e6..28f768b 100644 --- a/LTTngAnalyzes/sched.py +++ b/LTTngAnalyzes/sched.py @@ -46,6 +46,7 @@ class Sched(): self.tids[next_tid] = p else: p = self.tids[next_tid] + p.comm = next_comm p.last_sched = ts def switch(self, event): diff --git a/LTTngAnalyzes/statedump.py b/LTTngAnalyzes/statedump.py index 5f2a329..7c04dae 100644 --- a/LTTngAnalyzes/statedump.py +++ b/LTTngAnalyzes/statedump.py @@ -1,8 +1,9 @@ from LTTngAnalyzes.common import * class Statedump(): - def __init__(self, tids): + def __init__(self, tids, disks): self.tids = tids + self.disks = disks def merge_fd_dict(self, p, parent): if len(p.fds.keys()) != 0: @@ -89,3 +90,7 @@ class Statedump(): else: # just fix the filename p.fds[fd].filename = filename + + def block_device(self, event): + d = get_disk(event["dev"], self.disks) + d.prettyname = event["diskname"] diff --git a/LTTngAnalyzes/syscalls.py b/LTTngAnalyzes/syscalls.py index 34ab1e5..8f7e77d 100644 --- a/LTTngAnalyzes/syscalls.py +++ b/LTTngAnalyzes/syscalls.py @@ -18,6 +18,9 @@ class Syscalls(): # generic names assigned to special FDs, don't try to match these in the # closed_fds dict self.generic_names = ["unknown", "socket"] + # TS, syscall_name, filename, ms/bytes + self.read_timing = [] + self.write_timing = [] def global_syscall_entry(self, name): if not name in self.syscalls: @@ -46,32 +49,33 @@ class Syscalls(): s.count += 1 def track_open(self, name, proc, event, cpu): - cpu.current_syscall = {} + self.tids[cpu.current_tid].current_syscall = {} + current_syscall = self.tids[cpu.current_tid].current_syscall if name in ["sys_open", "sys_openat"]: - cpu.current_syscall["filename"] = event["filename"] + current_syscall["filename"] = event["filename"] if event["flags"] & O_CLOEXEC == O_CLOEXEC: - cpu.current_syscall["cloexec"] = 1 + current_syscall["cloexec"] = 1 elif name in ["sys_accept", "sys_socket"]: - cpu.current_syscall["filename"] = "socket" + current_syscall["filename"] = "socket" elif name in ["sys_dup2"]: newfd = event["newfd"] oldfd = event["oldfd"] if newfd in proc.fds.keys(): self.close_fd(proc, newfd) if oldfd in proc.fds.keys(): - cpu.current_syscall["filename"] = proc.fds[oldfd].filename + current_syscall["filename"] = proc.fds[oldfd].filename else: - cpu.current_syscall["filename"] = "" + current_syscall["filename"] = "" elif name in ["sys_fcntl"]: # F_DUPFD if event["cmd"] != 0: return oldfd = event["fd"] if oldfd in proc.fds.keys(): - cpu.current_syscall["filename"] = proc.fds[oldfd].filename + current_syscall["filename"] = proc.fds[oldfd].filename else: - cpu.current_syscall["filename"] = "" - cpu.current_syscall["name"] = name + current_syscall["filename"] = "" + current_syscall["name"] = name def close_fd(self, proc, fd): filename = proc.fds[fd].filename @@ -133,14 +137,25 @@ class Syscalls(): # if it's a thread, we want the parent if t.pid != -1 and t.tid != t.pid: t = self.tids[t.pid] - c.current_syscall["name"] = name + current_syscall = self.tids[c.current_tid].current_syscall + current_syscall["name"] = name if name == "sys_splice": - c.current_syscall["fd_in"] = self.get_fd(t, event["fd_in"]) - c.current_syscall["fd_out"] = self.get_fd(t, event["fd_out"]) + # FIXME : FD() for read and write here + current_syscall["fd_in"] = self.get_fd(t, event["fd_in"]) + current_syscall["fd_out"] = self.get_fd(t, event["fd_out"]) return fd = event["fd"] f = self.get_fd(t, fd) - c.current_syscall["fd"] = f + current_syscall["fd"] = f + current_syscall["start"] = event.timestamp + if name in ["sys_writev"]: + current_syscall["count"] = event["vlen"] + elif name in ["sys_recvfrom"]: + current_syscall["count"] = event["size"] + elif name in ["sys_recvmsg"]: + current_syscall["count"] = "" + else: + current_syscall["count"] = event["count"] def add_tid_fd(self, event, cpu): ret = event["ret"] @@ -148,7 +163,8 @@ class Syscalls(): # if it's a thread, we want the parent if t.pid != -1 and t.tid != t.pid: t = self.tids[t.pid] - name = cpu.current_syscall["filename"] + current_syscall = self.tids[cpu.current_tid].current_syscall + name = current_syscall["filename"] if name not in self.generic_names and name in t.closed_fds.keys(): fd = t.closed_fds[name] fd.open += 1 @@ -163,7 +179,7 @@ class Syscalls(): # if fd.fd in t.fds.keys(): # print("%lu : FD %d in tid %d was already there, untracked close" % # (event.timestamp, fd.fd, t.tid)) - if "cloexec" in cpu.current_syscall.keys(): + if "cloexec" in current_syscall.keys(): fd.cloexec = 1 t.fds[fd.fd] = fd #print("%lu : %s opened %s (%d times)" % (event.timestamp, t.comm, @@ -177,20 +193,51 @@ class Syscalls(): # if it's a thread, we want the parent if proc.pid != -1 and proc.tid != proc.pid: proc = self.tids[proc.pid] + current_syscall = self.tids[cpu.current_tid].current_syscall if name == "sys_splice": - cpu.current_syscall["fd_in"].read += ret + current_syscall["fd_in"].read += ret proc.read += ret - cpu.current_syscall["fd_out"].write += ret + current_syscall["fd_out"].write += ret proc.write += ret elif name in self.read_syscalls: if ret > 0: - cpu.current_syscall["fd"].read += ret + current_syscall["fd"].read += ret proc.read += ret elif name in self.write_syscalls: if ret > 0: - cpu.current_syscall["fd"].write += ret + current_syscall["fd"].write += ret proc.write += ret + def track_rw_latency(self, name, ret, c, ts): + current_syscall = self.tids[c.current_tid].current_syscall + if not "start" in current_syscall.keys(): + return + if "fd" in current_syscall.keys(): + filename = current_syscall["fd"].filename + else: + filename = "unknown" + if ret > 0: + ms = (ts - current_syscall["start"]) / MSEC_PER_NSEC + sec = (ts - current_syscall["start"]) / NSEC_PER_SEC + thr = ret / sec + speed = "%0.03f ms, %s/sec" % (ms, convert_size(thr)) + else: + ms = (ts - current_syscall["start"]) / MSEC_PER_NSEC + speed = "%0.03f ms" % ms + + ts_start = ns_to_hour_nsec(current_syscall["start"]) + ts_end = ns_to_hour_nsec(ts) + procname = self.tids[c.current_tid].comm + if name in ["sys_recvmsg"]: + count = "" + else: + count = ", count = %d" % (current_syscall["count"]) + if ms > 1.000: + print("[%s - %s] %s (%d) %s(fd = %d <%s>%s) = %d, %s" % \ + (ts_start, ts_end, procname, c.current_tid, name, + current_syscall["fd"].fd, filename, count, ret, + speed)) + def entry(self, event): name = event.name cpu_id = event["cpu_id"] @@ -207,12 +254,14 @@ class Syscalls(): c = self.cpus[cpu_id] if c.current_tid == -1: return - if len(c.current_syscall.keys()) == 0: + current_syscall = self.tids[c.current_tid].current_syscall + if len(current_syscall.keys()) == 0: return - name = c.current_syscall["name"] + name = current_syscall["name"] ret = event["ret"] if name in self.open_syscalls: self.add_tid_fd(event, c) elif name in self.read_syscalls or name in self.write_syscalls: self.track_read_write_return(name, ret, c) - c.current_syscall = {} + self.track_rw_latency(name, ret, c, event.timestamp) + self.tids[c.current_tid].current_syscall = {} diff --git a/iotop.py b/iotop.py index 46b462c..b6a2520 100755 --- a/iotop.py +++ b/iotop.py @@ -45,7 +45,7 @@ class IOTop(): syscall = Syscalls(self.cpus, self.tids, self.syscalls) block_bio = BlockBio(self.cpus, self.disks) net = Net(self.ifaces) - statedump = Statedump(self.tids) + statedump = Statedump(self.tids, self.disks) for event in self.traces.events: if self.start_ns == 0: @@ -77,6 +77,8 @@ class IOTop(): statedump.process_state(event) elif event.name == "lttng_statedump_file_descriptor": statedump.file_descriptor(event) + elif event.name == "lttng_statedump_block_device": + statedump.block_device(event) if args.refresh == 0: # stats for the whole trace self.output(args, self.trace_start_ts, self.trace_end_ts, final=1) @@ -172,7 +174,9 @@ class IOTop(): values = [] for disk in sorted(self.disks.values(), key=operator.attrgetter('nr_sector'), reverse=True): - values.append((disk.name, disk.nr_sector)) + if disk.nr_sector == 0: + continue + values.append((disk.prettyname, disk.nr_sector)) for line in graph.graph('Disk nr_sector', values): print(line) @@ -182,7 +186,9 @@ class IOTop(): values = [] for disk in sorted(self.disks.values(), key=operator.attrgetter('nr_requests'), reverse=True): - values.append((disk.name, disk.nr_requests)) + if disk.nr_sector == 0: + continue + values.append((disk.prettyname, disk.nr_requests)) for line in graph.graph('Disk nr_requests', values): print(line) @@ -195,7 +201,7 @@ class IOTop(): continue total = (disk.request_time / disk.completed_requests) / MSEC_PER_NSEC total = float("%0.03f" % total) - values.append(("ms %s" % disk.name, total)) + values.append(("ms %s" % disk.prettyname, total)) for line in graph.graph('Disk request time/sector', values, sort=2): print(line) -- 2.34.1