| 1 | # The MIT License (MIT) |
| 2 | # |
| 3 | # Copyright (C) 2015 - Julien Desfossez <jdesfossez@efficios.com> |
| 4 | # 2015 - Antoine Busque <abusque@efficios.com> |
| 5 | # |
| 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | # of this software and associated documentation files (the "Software"), to deal |
| 8 | # in the Software without restriction, including without limitation the rights |
| 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 | # copies of the Software, and to permit persons to whom the Software is |
| 11 | # furnished to do so, subject to the following conditions: |
| 12 | # |
| 13 | # The above copyright notice and this permission notice shall be included in |
| 14 | # all copies or substantial portions of the Software. |
| 15 | # |
| 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | # SOFTWARE. |
| 23 | |
| 24 | import os |
| 25 | import socket |
| 26 | from ..common import format_utils, trace_utils |
| 27 | |
| 28 | |
| 29 | class Process(): |
| 30 | def __init__(self, tid=None, pid=None, comm='', prio=None): |
| 31 | self.tid = tid |
| 32 | self.pid = pid |
| 33 | self.comm = comm |
| 34 | self.prio = prio |
| 35 | # indexed by fd |
| 36 | self.fds = {} |
| 37 | self.current_syscall = None |
| 38 | # the process scheduled before this one |
| 39 | self.prev_tid = None |
| 40 | self.last_wakeup = None |
| 41 | self.last_waker = None |
| 42 | |
| 43 | |
| 44 | class CPU(): |
| 45 | def __init__(self, cpu_id): |
| 46 | self.cpu_id = cpu_id |
| 47 | self.current_tid = None |
| 48 | self.current_hard_irq = None |
| 49 | # softirqs use a dict because multiple ones can be raised before |
| 50 | # handling. They are indexed by vec, and each entry is a list, |
| 51 | # ordered chronologically |
| 52 | self.current_softirqs = {} |
| 53 | |
| 54 | |
| 55 | class MemoryManagement(): |
| 56 | def __init__(self): |
| 57 | self.page_count = 0 |
| 58 | |
| 59 | |
| 60 | class SyscallEvent(): |
| 61 | def __init__(self, name, begin_ts): |
| 62 | self.name = name |
| 63 | self.begin_ts = begin_ts |
| 64 | self.end_ts = None |
| 65 | self.ret = None |
| 66 | self.duration = None |
| 67 | # Only applicable to I/O syscalls |
| 68 | self.io_rq = None |
| 69 | |
| 70 | def process_exit(self, event): |
| 71 | self.end_ts = event.timestamp |
| 72 | # On certain architectures (notably arm32), lttng-modules |
| 73 | # versions prior to 2.8 would erroneously trace certain |
| 74 | # syscalls (e.g. mmap2) without their return value. In this |
| 75 | # case, get() will simply set self.ret to None. These syscalls |
| 76 | # with a None return value should simply be ignored down the |
| 77 | # line. |
| 78 | self.ret = event.get('ret') |
| 79 | self.duration = self.end_ts - self.begin_ts |
| 80 | |
| 81 | @classmethod |
| 82 | def new_from_entry(cls, event): |
| 83 | name = trace_utils.get_syscall_name(event) |
| 84 | return cls(name, event.timestamp) |
| 85 | |
| 86 | |
| 87 | class Disk(): |
| 88 | def __init__(self): |
| 89 | # pending block IO Requests, indexed by sector |
| 90 | self.pending_requests = {} |
| 91 | |
| 92 | |
| 93 | class FDType(): |
| 94 | unknown = 0 |
| 95 | disk = 1 |
| 96 | net = 2 |
| 97 | # not 100% sure they are network FDs (assumed when net_dev_xmit is |
| 98 | # called during a write syscall and the type in unknown). |
| 99 | maybe_net = 3 |
| 100 | |
| 101 | @staticmethod |
| 102 | def get_fd_type(name, family): |
| 103 | if name in SyscallConsts.NET_OPEN_SYSCALLS: |
| 104 | if family in SyscallConsts.INET_FAMILIES: |
| 105 | return FDType.net |
| 106 | if family in SyscallConsts.DISK_FAMILIES: |
| 107 | return FDType.disk |
| 108 | |
| 109 | if name in SyscallConsts.DISK_OPEN_SYSCALLS: |
| 110 | return FDType.disk |
| 111 | |
| 112 | return FDType.unknown |
| 113 | |
| 114 | |
| 115 | class FD(): |
| 116 | def __init__(self, fd, filename='unknown', fd_type=FDType.unknown, |
| 117 | cloexec=False, family=None): |
| 118 | self.fd = fd |
| 119 | self.filename = filename |
| 120 | self.fd_type = fd_type |
| 121 | self.cloexec = cloexec |
| 122 | self.family = family |
| 123 | |
| 124 | @classmethod |
| 125 | def new_from_fd(cls, fd): |
| 126 | return cls(fd.fd, fd.filename, fd.fd_type, fd.cloexec, fd.family) |
| 127 | |
| 128 | @classmethod |
| 129 | def new_from_open_rq(cls, io_rq): |
| 130 | return cls(io_rq.fd, io_rq.filename, io_rq.fd_type, io_rq.cloexec, |
| 131 | io_rq.family) |
| 132 | |
| 133 | |
| 134 | class IRQ(): |
| 135 | def __init__(self, id, cpu_id, begin_ts=None): |
| 136 | self.id = id |
| 137 | self.cpu_id = cpu_id |
| 138 | self.begin_ts = begin_ts |
| 139 | self.end_ts = None |
| 140 | |
| 141 | @property |
| 142 | def duration(self): |
| 143 | if not self.end_ts or not self.begin_ts: |
| 144 | return None |
| 145 | |
| 146 | return self.end_ts - self.begin_ts |
| 147 | |
| 148 | |
| 149 | class HardIRQ(IRQ): |
| 150 | def __init__(self, id, cpu_id, begin_ts): |
| 151 | super().__init__(id, cpu_id, begin_ts) |
| 152 | self.ret = None |
| 153 | |
| 154 | @classmethod |
| 155 | def new_from_irq_handler_entry(cls, event): |
| 156 | id = event['irq'] |
| 157 | cpu_id = event['cpu_id'] |
| 158 | begin_ts = event.timestamp |
| 159 | return cls(id, cpu_id, begin_ts) |
| 160 | |
| 161 | |
| 162 | class SoftIRQ(IRQ): |
| 163 | def __init__(self, id, cpu_id, raise_ts=None, begin_ts=None): |
| 164 | super().__init__(id, cpu_id, begin_ts) |
| 165 | self.raise_ts = raise_ts |
| 166 | |
| 167 | @classmethod |
| 168 | def new_from_softirq_raise(cls, event): |
| 169 | id = event['vec'] |
| 170 | cpu_id = event['cpu_id'] |
| 171 | raise_ts = event.timestamp |
| 172 | return cls(id, cpu_id, raise_ts) |
| 173 | |
| 174 | @classmethod |
| 175 | def new_from_softirq_entry(cls, event): |
| 176 | id = event['vec'] |
| 177 | cpu_id = event['cpu_id'] |
| 178 | begin_ts = event.timestamp |
| 179 | return cls(id, cpu_id, begin_ts=begin_ts) |
| 180 | |
| 181 | |
| 182 | class IORequest(): |
| 183 | # I/O operations |
| 184 | OP_OPEN = 1 |
| 185 | OP_READ = 2 |
| 186 | OP_WRITE = 3 |
| 187 | OP_CLOSE = 4 |
| 188 | OP_SYNC = 5 |
| 189 | # Operation used for requests that both read and write, |
| 190 | # e.g. splice and sendfile |
| 191 | OP_READ_WRITE = 6 |
| 192 | |
| 193 | def __init__(self, begin_ts, size, tid, operation): |
| 194 | self.begin_ts = begin_ts |
| 195 | self.end_ts = None |
| 196 | self.duration = None |
| 197 | # request size in bytes |
| 198 | self.size = size |
| 199 | self.operation = operation |
| 200 | # tid of process that triggered the rq |
| 201 | self.tid = tid |
| 202 | # Error number if request failed |
| 203 | self.errno = None |
| 204 | |
| 205 | @staticmethod |
| 206 | def is_equivalent_operation(left_op, right_op): |
| 207 | """Predicate used to compare equivalence of IO_OPERATION. |
| 208 | |
| 209 | This method is employed because OP_READ_WRITE behaves like a |
| 210 | set containing both OP_READ and OP_WRITE and is therefore |
| 211 | equivalent to these operations as well as itself |
| 212 | """ |
| 213 | if left_op == IORequest.OP_READ_WRITE: |
| 214 | return right_op in [IORequest.OP_READ, IORequest.OP_WRITE, |
| 215 | IORequest.OP_READ_WRITE] |
| 216 | if left_op == IORequest.OP_READ: |
| 217 | return right_op in [IORequest.OP_READ, IORequest.OP_READ_WRITE] |
| 218 | if left_op == IORequest.OP_WRITE: |
| 219 | return right_op in [IORequest.OP_WRITE, IORequest.OP_READ_WRITE] |
| 220 | |
| 221 | return left_op == right_op |
| 222 | |
| 223 | |
| 224 | class SyscallIORequest(IORequest): |
| 225 | def __init__(self, begin_ts, size, tid, operation, syscall_name): |
| 226 | super().__init__(begin_ts, None, tid, operation) |
| 227 | self.fd = None |
| 228 | self.syscall_name = syscall_name |
| 229 | # Number of pages alloc'd/freed/written to disk during the rq |
| 230 | self.pages_allocated = 0 |
| 231 | self.pages_freed = 0 |
| 232 | self.pages_written = 0 |
| 233 | # Whether kswapd was forced to wakeup during the rq |
| 234 | self.woke_kswapd = False |
| 235 | |
| 236 | def update_from_exit(self, event): |
| 237 | self.end_ts = event.timestamp |
| 238 | self.duration = self.end_ts - self.begin_ts |
| 239 | if event['ret'] < 0: |
| 240 | self.errno = -event['ret'] |
| 241 | |
| 242 | |
| 243 | class OpenIORequest(SyscallIORequest): |
| 244 | def __init__(self, begin_ts, tid, syscall_name, filename, |
| 245 | fd_type): |
| 246 | super().__init__(begin_ts, None, tid, IORequest.OP_OPEN, syscall_name) |
| 247 | # FD set on syscall exit |
| 248 | self.fd = None |
| 249 | self.filename = filename |
| 250 | self.fd_type = fd_type |
| 251 | self.family = None |
| 252 | self.cloexec = False |
| 253 | |
| 254 | def update_from_exit(self, event): |
| 255 | super().update_from_exit(event) |
| 256 | if event['ret'] >= 0: |
| 257 | self.fd = event['ret'] |
| 258 | |
| 259 | @classmethod |
| 260 | def new_from_disk_open(cls, event, tid): |
| 261 | begin_ts = event.timestamp |
| 262 | name = trace_utils.get_syscall_name(event) |
| 263 | filename = event['filename'] |
| 264 | |
| 265 | req = cls(begin_ts, tid, name, filename, FDType.disk) |
| 266 | req.cloexec = event['flags'] & os.O_CLOEXEC == os.O_CLOEXEC |
| 267 | |
| 268 | return req |
| 269 | |
| 270 | @classmethod |
| 271 | def new_from_accept(cls, event, tid): |
| 272 | # Handle both accept and accept4 |
| 273 | begin_ts = event.timestamp |
| 274 | name = trace_utils.get_syscall_name(event) |
| 275 | req = cls(begin_ts, tid, name, 'socket', FDType.net) |
| 276 | |
| 277 | if 'family' in event: |
| 278 | req.family = event['family'] |
| 279 | # Set filename to ip:port if INET socket |
| 280 | if req.family == socket.AF_INET: |
| 281 | req.filename = format_utils.format_ipv4( |
| 282 | event['v4addr'], event['sport'] |
| 283 | ) |
| 284 | |
| 285 | return req |
| 286 | |
| 287 | @classmethod |
| 288 | def new_from_socket(cls, event, tid): |
| 289 | begin_ts = event.timestamp |
| 290 | req = cls(begin_ts, tid, 'socket', 'socket', FDType.net) |
| 291 | |
| 292 | if 'family' in event: |
| 293 | req.family = event['family'] |
| 294 | |
| 295 | return req |
| 296 | |
| 297 | @classmethod |
| 298 | def new_from_old_fd(cls, event, tid, old_fd): |
| 299 | begin_ts = event.timestamp |
| 300 | name = trace_utils.get_syscall_name(event) |
| 301 | if old_fd is None: |
| 302 | filename = 'unknown' |
| 303 | fd_type = FDType.unknown |
| 304 | else: |
| 305 | filename = old_fd.filename |
| 306 | fd_type = old_fd.fd_type |
| 307 | |
| 308 | return cls(begin_ts, tid, name, filename, fd_type) |
| 309 | |
| 310 | |
| 311 | class CloseIORequest(SyscallIORequest): |
| 312 | def __init__(self, begin_ts, tid, fd): |
| 313 | super().__init__(begin_ts, None, tid, IORequest.OP_CLOSE, 'close') |
| 314 | self.fd = fd |
| 315 | |
| 316 | |
| 317 | class ReadWriteIORequest(SyscallIORequest): |
| 318 | def __init__(self, begin_ts, size, tid, operation, syscall_name): |
| 319 | super().__init__(begin_ts, size, tid, operation, syscall_name) |
| 320 | # The size returned on syscall exit, in bytes. May differ from |
| 321 | # the size initially requested |
| 322 | self.returned_size = None |
| 323 | # Unused if fd is set |
| 324 | self.fd_in = None |
| 325 | self.fd_out = None |
| 326 | |
| 327 | def update_from_exit(self, event): |
| 328 | super().update_from_exit(event) |
| 329 | ret = event['ret'] |
| 330 | if ret >= 0: |
| 331 | self.returned_size = ret |
| 332 | # Set the size to the returned one if none was set at |
| 333 | # entry, as with recvmsg or sendmsg |
| 334 | if self.size is None: |
| 335 | self.size = ret |
| 336 | |
| 337 | @classmethod |
| 338 | def new_from_splice(cls, event, tid): |
| 339 | begin_ts = event.timestamp |
| 340 | size = event['len'] |
| 341 | |
| 342 | req = cls(begin_ts, size, tid, IORequest.OP_READ_WRITE, 'splice') |
| 343 | req.fd_in = event['fd_in'] |
| 344 | req.fd_out = event['fd_out'] |
| 345 | |
| 346 | return req |
| 347 | |
| 348 | @classmethod |
| 349 | def new_from_sendfile64(cls, event, tid): |
| 350 | begin_ts = event.timestamp |
| 351 | size = event['count'] |
| 352 | |
| 353 | req = cls(begin_ts, size, tid, IORequest.OP_READ_WRITE, 'sendfile64') |
| 354 | req.fd_in = event['in_fd'] |
| 355 | req.fd_out = event['out_fd'] |
| 356 | |
| 357 | return req |
| 358 | |
| 359 | @classmethod |
| 360 | def new_from_fd_event(cls, event, tid, size_key): |
| 361 | begin_ts = event.timestamp |
| 362 | # Some events, like recvmsg or sendmsg, only have size info on return |
| 363 | if size_key is not None: |
| 364 | size = event[size_key] |
| 365 | else: |
| 366 | size = None |
| 367 | |
| 368 | syscall_name = trace_utils.get_syscall_name(event) |
| 369 | if syscall_name in SyscallConsts.READ_SYSCALLS: |
| 370 | operation = IORequest.OP_READ |
| 371 | else: |
| 372 | operation = IORequest.OP_WRITE |
| 373 | |
| 374 | req = cls(begin_ts, size, tid, operation, syscall_name) |
| 375 | req.fd = event['fd'] |
| 376 | |
| 377 | return req |
| 378 | |
| 379 | |
| 380 | class SyncIORequest(SyscallIORequest): |
| 381 | def __init__(self, begin_ts, size, tid, syscall_name): |
| 382 | super().__init__(begin_ts, size, tid, IORequest.OP_SYNC, syscall_name) |
| 383 | |
| 384 | @classmethod |
| 385 | def new_from_sync(cls, event, tid): |
| 386 | begin_ts = event.timestamp |
| 387 | size = None |
| 388 | |
| 389 | return cls(begin_ts, size, tid, 'sync') |
| 390 | |
| 391 | @classmethod |
| 392 | def new_from_fsync(cls, event, tid): |
| 393 | # Also handle fdatasync |
| 394 | begin_ts = event.timestamp |
| 395 | size = None |
| 396 | syscall_name = trace_utils.get_syscall_name(event) |
| 397 | |
| 398 | req = cls(begin_ts, size, tid, syscall_name) |
| 399 | req.fd = event['fd'] |
| 400 | |
| 401 | return req |
| 402 | |
| 403 | @classmethod |
| 404 | def new_from_sync_file_range(cls, event, tid): |
| 405 | begin_ts = event.timestamp |
| 406 | size = event['nbytes'] |
| 407 | |
| 408 | req = cls(begin_ts, size, tid, 'sync_file_range') |
| 409 | req.fd = event['fd'] |
| 410 | |
| 411 | return req |
| 412 | |
| 413 | |
| 414 | class BlockIORequest(IORequest): |
| 415 | # Logical sector size in bytes, according to the kernel |
| 416 | SECTOR_SIZE = 512 |
| 417 | |
| 418 | def __init__(self, begin_ts, tid, operation, dev, sector, nr_sector): |
| 419 | size = nr_sector * BlockIORequest.SECTOR_SIZE |
| 420 | super().__init__(begin_ts, size, tid, operation) |
| 421 | self.dev = dev |
| 422 | self.sector = sector |
| 423 | self.nr_sector = nr_sector |
| 424 | |
| 425 | def update_from_rq_complete(self, event): |
| 426 | self.end_ts = event.timestamp |
| 427 | self.duration = self.end_ts - self.begin_ts |
| 428 | |
| 429 | @classmethod |
| 430 | def new_from_rq_issue(cls, event): |
| 431 | begin_ts = event.timestamp |
| 432 | dev = event['dev'] |
| 433 | sector = event['sector'] |
| 434 | nr_sector = event['nr_sector'] |
| 435 | tid = event['tid'] |
| 436 | # An even rwbs indicates read operation, odd indicates write |
| 437 | if event['rwbs'] % 2 == 0: |
| 438 | operation = IORequest.OP_READ |
| 439 | else: |
| 440 | operation = IORequest.OP_WRITE |
| 441 | |
| 442 | return cls(begin_ts, tid, operation, dev, sector, nr_sector) |
| 443 | |
| 444 | |
| 445 | class BlockRemapRequest(): |
| 446 | def __init__(self, dev, sector, old_dev, old_sector): |
| 447 | self.dev = dev |
| 448 | self.sector = sector |
| 449 | self.old_dev = old_dev |
| 450 | self.old_sector = old_sector |
| 451 | |
| 452 | |
| 453 | class SyscallConsts(): |
| 454 | # TODO: decouple socket/family logic from this class |
| 455 | INET_FAMILIES = [socket.AF_INET, socket.AF_INET6] |
| 456 | DISK_FAMILIES = [socket.AF_UNIX] |
| 457 | # list nof syscalls that open a FD on disk (in the exit_syscall event) |
| 458 | DISK_OPEN_SYSCALLS = ['open', 'openat'] |
| 459 | # list of syscalls that open a FD on the network |
| 460 | # (in the exit_syscall event) |
| 461 | NET_OPEN_SYSCALLS = ['socket'] |
| 462 | # list of syscalls that can duplicate a FD |
| 463 | DUP_OPEN_SYSCALLS = ['fcntl', 'dup', 'dup2', 'dup3'] |
| 464 | SYNC_SYSCALLS = ['sync', 'sync_file_range', 'fsync', 'fdatasync'] |
| 465 | # merge the 3 open lists |
| 466 | OPEN_SYSCALLS = DISK_OPEN_SYSCALLS + NET_OPEN_SYSCALLS + DUP_OPEN_SYSCALLS |
| 467 | # list of syscalls that close a FD (in the 'fd =' field) |
| 468 | CLOSE_SYSCALLS = ['close'] |
| 469 | # list of syscall that read on a FD, value in the exit_syscall following |
| 470 | READ_SYSCALLS = ['read', 'recvmsg', 'recvfrom', 'readv', 'pread', |
| 471 | 'pread64', 'preadv'] |
| 472 | # list of syscall that write on a FD, value in the exit_syscall following |
| 473 | WRITE_SYSCALLS = ['write', 'sendmsg', 'sendto', 'writev', 'pwrite', |
| 474 | 'pwrite64', 'pwritev'] |
| 475 | # list of syscalls that both read and write on two FDs |
| 476 | READ_WRITE_SYSCALLS = ['splice', 'sendfile64'] |
| 477 | # All I/O related syscalls |
| 478 | IO_SYSCALLS = OPEN_SYSCALLS + CLOSE_SYSCALLS + READ_SYSCALLS + \ |
| 479 | WRITE_SYSCALLS + SYNC_SYSCALLS + READ_WRITE_SYSCALLS |