Handle short replies, and fix bandwidth calculation when delay != 1s
[iotop.git] / iotop.py
blob3c1e22a5b5afeb06443c1752cc222dbeee80554b
1 #!/usr/bin/python
2 # iotop: Display I/O usage of processes in a top like UI
3 # Copyright (c) 2007 Guillaume Chazarain <guichaz@yahoo.fr>, GPLv2
4 # See ./iotop.py --help for some help
6 # 20070723: Added support for taskstats version > 4
7 # 20070813: Handle short replies, and fix bandwidth calculation when delay != 1s
9 import curses
10 import errno
11 import optparse
12 import os
13 import pwd
14 import select
15 import socket
16 import struct
17 import sys
18 import time
21 # Check for requirements:
22 # o Python >= 2.5 for AF_NETLINK sockets
23 # o Linux >= 2.6.20 with I/O accounting
25 try:
26 socket.NETLINK_ROUTE
27 python25 = True
28 except AttributeError:
29 python25 = False
31 ioaccounting = os.path.exists('/proc/self/io')
33 if not python25 or not ioaccounting:
34 def boolean2string(boolean):
35 return boolean and 'Found' or 'Not found'
36 print 'Could not run iotop as some of the requirements are not met:'
37 print '- Python >= 2.5 for AF_NETLINK support:', boolean2string(python25)
38 print '- Linux >= 2.6.20 with I/O accounting support:', \
39 boolean2string(ioaccounting)
40 sys.exit(1)
43 # Netlink stuff
44 # Based on code from pynl80211: Netlink message generation/parsing
45 # http://git.sipsolutions.net/?p=pynl80211.git
46 # Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
47 # GPLv2
49 # flags
50 NLM_F_REQUEST = 1
52 # types
53 NLMSG_ERROR = 2
54 NLMSG_MIN_TYPE = 0x10
56 class Attr:
57 def __init__(self, type, str, *kw):
58 self.type = type
59 if len(kw):
60 self.data = struct.pack(str, *kw)
61 else:
62 self.data = str
64 def _dump(self):
65 hdr = struct.pack('HH', len(self.data)+4, self.type)
66 length = len(self.data)
67 pad = ((length + 4 - 1) & ~3 ) - length
68 return hdr + self.data + '\0' * pad
70 def u16(self):
71 return struct.unpack('H', self.data)[0]
73 class NulStrAttr(Attr):
74 def __init__(self, type, str):
75 Attr.__init__(self, type, '%dsB'%len(str), str, 0)
77 class U32Attr(Attr):
78 def __init__(self, type, val):
79 Attr.__init__(self, type, 'L', val)
81 NETLINK_GENERIC = 16
83 class Message:
84 def __init__(self, tp, flags = 0, seq = -1, payload = []):
85 self.type = tp
86 self.flags = flags
87 self.seq = seq
88 self.pid = -1
89 if type(payload) == list:
90 contents = []
91 for attr in payload:
92 contents.append(attr._dump())
93 self.payload = ''.join(contents)
94 else:
95 self.payload = payload
97 def send(self, conn):
98 if self.seq == -1:
99 self.seq = conn.seq()
101 self.pid = conn.pid
102 length = len(self.payload)
104 hdr = struct.pack('IHHII', length + 4*4, self.type, self.flags,
105 self.seq, self.pid)
106 conn.send(hdr + self.payload)
108 class Connection:
109 def __init__(self, nltype, groups=0, unexpected_msg_handler = None):
110 self.fd = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, nltype)
111 self.fd.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, 65536)
112 self.fd.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 65536)
113 self.fd.bind((0, groups))
114 self.pid, self.groups = self.fd.getsockname()
115 self._seq = 0
116 self.unexpected = unexpected_msg_handler
118 def send(self, msg):
119 self.fd.send(msg)
121 def recv(self):
122 cntnts = self.fd.recv(65536)
123 # should check msgflags for TRUNC!
124 len, type, flags, seq, pid = struct.unpack('IHHII', cntnts[:16])
125 m = Message(type, flags, seq, cntnts[16:])
126 m.pid = pid
127 if m.type == NLMSG_ERROR:
128 errno = -struct.unpack('i', m.payload[:4])[0]
129 if errno != 0:
130 e = OSError('Netlink error: %s (%d)' % \
131 (os.strerror(errno), errno))
132 e.errno = errno
133 return m
135 def seq(self):
136 self._seq += 1
137 return self._seq
139 def parse_attributes(str):
140 attrs = {}
141 while str:
142 l, tp = struct.unpack('HH', str[:4])
143 attrs[tp] = Attr(tp, str[4:l])
144 l = ((l + 4 - 1) & ~3 )
145 str = str[l:]
146 return attrs
148 CTRL_CMD_GETFAMILY = 3
150 CTRL_ATTR_FAMILY_ID = 1
151 CTRL_ATTR_FAMILY_NAME = 2
153 class GenlHdr:
154 def __init__(self, cmd, version = 0):
155 self.cmd = cmd
156 self.version = version
158 def _dump(self):
159 return struct.pack('BBxx', self.cmd, self.version)
161 def _genl_hdr_parse(data):
162 return GenlHdr(*struct.unpack('BBxx', data))
164 GENL_ID_CTRL = NLMSG_MIN_TYPE
166 class GeNlMessage(Message):
167 def __init__(self, family, cmd, attrs=[], flags=0):
168 self.cmd = cmd
169 self.attrs = attrs
170 self.family = family
171 Message.__init__(self, family, flags=flags,
172 payload=[GenlHdr(self.cmd)] + attrs)
174 class Controller:
175 def __init__(self, conn):
176 self.conn = conn
178 def get_family_id(self, family):
179 a = NulStrAttr(CTRL_ATTR_FAMILY_NAME, family)
180 m = GeNlMessage(GENL_ID_CTRL, CTRL_CMD_GETFAMILY,
181 flags=NLM_F_REQUEST, attrs=[a])
182 m.send(self.conn)
183 m = self.conn.recv()
184 gh = _genl_hdr_parse(m.payload[:4])
185 attrs = parse_attributes(m.payload[4:])
186 return attrs[CTRL_ATTR_FAMILY_ID].u16()
189 # Netlink usage for taskstats
192 TASKSTATS_CMD_GET = 1
193 TASKSTATS_CMD_ATTR_PID = 1
194 TASKSTATS_CMD_ATTR_TGID = 2
196 class TaskStatsNetlink(object):
197 # Keep in sync with human_stats(stats, duration)
198 members_offsets = [
199 ('blkio_delay_total', 40),
200 ('swapin_delay_total', 56),
201 ('ac_etime', 144),
202 ('read_bytes', 248),
203 ('write_bytes', 256),
204 ('cancelled_write_bytes', 264)
207 def __init__(self, options):
208 self.options = options
209 self.connection = Connection(NETLINK_GENERIC)
210 controller = Controller(self.connection)
211 self.family_id = controller.get_family_id('TASKSTATS')
213 def get_task_stats(self, pid):
214 if self.options.processes:
215 attr = TASKSTATS_CMD_ATTR_TGID
216 else:
217 attr = TASKSTATS_CMD_ATTR_PID
218 request = GeNlMessage(self.family_id, cmd=TASKSTATS_CMD_GET,
219 attrs=[U32Attr(attr, pid)],
220 flags=NLM_F_REQUEST)
221 request.send(self.connection)
222 try:
223 reply = self.connection.recv()
224 except OSError, e:
225 if e.errno == errno.ESRCH:
226 # OSError: Netlink error: No such process (3)
227 return
228 raise
229 if len(reply.payload) < 292:
230 # Short reply
231 return
232 reply_data = reply.payload[20:]
234 reply_length, reply_type = struct.unpack('HH', reply.payload[4:8])
235 reply_version = struct.unpack('H', reply.payload[20:22])[0]
236 assert reply_length >= 288
237 assert reply_type == attr + 3
238 assert reply_version >= 4
240 res = {}
241 for name, offset in TaskStatsNetlink.members_offsets:
242 data = reply_data[offset: offset + 8]
243 res[name] = struct.unpack('Q', data)[0]
245 return res
248 # PIDs manipulations
251 def find_uids(options):
252 options.uids = []
253 error = False
254 for u in options.users or []:
255 try:
256 uid = int(u)
257 except ValueError:
258 try:
259 passwd = pwd.getpwnam(u)
260 except KeyError:
261 print >> sys.stderr, 'Unknown user:', u
262 error = True
263 else:
264 uid = passwd.pw_uid
265 if not error:
266 options.uids.append(uid)
267 if error:
268 sys.exit(1)
270 class pinfo(object):
271 def __init__(self, pid, options):
272 self.mark = False
273 self.pid = pid
274 self.stats = {}
275 for name, offset in TaskStatsNetlink.members_offsets:
276 self.stats[name] = (0, 0) # Total, Delta
277 self.parse_status('/proc/%d/status' % pid, options)
279 def check_if_valid(self, uid, options):
280 self.valid = not options.uids and not options.pids
281 if not self.valid:
282 self.valid = uid in options.uids
283 if not self.valid:
284 self.valid = self.pid in options.pids
286 def parse_status(self, path, options):
287 for line in open(path):
288 if line.startswith('Name:'):
289 # Name kernel threads
290 self.name = '[' + line.split()[1].strip() + ']'
291 elif line.startswith('Uid:'):
292 uid = int(line.split()[1])
293 # We check monitored PIDs only here
294 self.check_if_valid(uid, options)
295 try:
296 self.user = pwd.getpwuid(uid).pw_name
297 except KeyError:
298 self.user = str(uid)
299 break
301 def add_stats(self, stats):
302 self.stats_timestamp = time.time()
303 for name, value in stats.iteritems():
304 prev_value = self.stats[name][0]
305 self.stats[name] = (value, value - prev_value)
307 def get_cmdline(self, max_length):
308 # A process may exec, so we must always reread its cmdline
309 try:
310 proc_cmdline = open('/proc/%d/cmdline' % self.pid)
311 except IOError:
312 return '{no such process}'
313 cmdline = proc_cmdline.read(max_length)
314 parts = cmdline.split('\0')
315 first_command_char = parts[0].rfind('/') + 1
316 parts[0] = parts[0][first_command_char:]
317 cmdline = ' '.join(parts).strip()
318 return cmdline.encode('string_escape') or self.name
320 class ProcessList(object):
321 def __init__(self, taskstats_connection, options):
322 # {pid: pinfo}
323 self.processes = {}
324 self.taskstats_connection = taskstats_connection
325 self.options = options
327 # A first time as we are interested in the delta
328 self.update_process_counts()
330 def get_process(self, pid):
331 process = self.processes.get(pid, None)
332 if not process:
333 try:
334 process = pinfo(pid, self.options)
335 except IOError:
336 # IOError: [Errno 2] No such file or directory: '/proc/...'
337 return
338 if not process.valid:
339 return
340 self.processes[pid] = process
341 return process
343 def list_pids(self, tgid):
344 if self.options.processes:
345 return [tgid]
346 try:
347 return map(int, os.listdir('/proc/%d/task' % tgid))
348 except OSError:
349 return []
351 def update_process_counts(self):
352 total_read = total_write = 0
353 duration = None
354 tgids = [int(tgid) for tgid in os.listdir('/proc') if
355 '0' <= tgid[0] and tgid[0] <= '9']
356 for tgid in tgids:
357 for pid in self.list_pids(tgid):
358 process = self.get_process(pid)
359 if process:
360 stats = self.taskstats_connection.get_task_stats(pid)
361 if stats:
362 process.mark = False
363 process.add_stats(stats)
364 total_read += process.stats['read_bytes'][1]
365 total_write += process.stats['write_bytes'][1]
366 if duration is None:
367 duration = process.stats['ac_etime'][1] / 1000000.0
368 return total_read, total_write, duration
370 def refresh_processes(self):
371 for process in self.processes.values():
372 process.mark = True
373 total_read_and_write_and_duration = self.update_process_counts()
374 to_delete = []
375 for pid, process in self.processes.iteritems():
376 if process.mark:
377 to_delete.append(pid)
378 for pid in to_delete:
379 del self.processes[pid]
380 return total_read_and_write_and_duration
383 # Utility functions for the UI
386 UNITS = ['B', 'K', 'M', 'G', 'T', 'P', 'E']
388 def human_bandwidth(size, duration):
389 bw = float(size) / duration
390 for i in xrange(len(UNITS) - 1, 0, -1):
391 base = 1 << (10 * i)
392 if 2 * base < size:
393 res = '%.2f %s' % ((float(bw) / base), UNITS[i])
394 break
395 else:
396 res = str(bw) + ' ' + UNITS[0]
397 return res + '/s'
399 def human_stats(stats):
400 # Keep in sync with TaskStatsNetlink.members_offsets and
401 # IOTopUI.get_data(self)
402 duration = stats['ac_etime'][1] / 1000000.0
403 def delay2percent(name): # delay in ns, duration in s
404 return '%.2f %%' % min(99.99, stats[name][1] / (duration * 10000000.0))
405 io_delay = delay2percent('blkio_delay_total')
406 swapin_delay = delay2percent('swapin_delay_total')
407 read_bytes = human_bandwidth(stats['read_bytes'][1], duration)
408 written_bytes = stats['write_bytes'][1] - stats['cancelled_write_bytes'][1]
409 written_bytes = max(0, written_bytes)
410 write_bytes = human_bandwidth(written_bytes, duration)
411 return io_delay, swapin_delay, read_bytes, write_bytes
414 # The UI
417 class IOTopUI(object):
418 # key, reverse
419 sorting_keys = [
420 (lambda p: p.pid, False),
421 (lambda p: p.user, False),
422 (lambda p: p.stats['read_bytes'][1], True),
423 (lambda p: p.stats['write_bytes'][1] -
424 p.stats['cancelled_write_bytes'][1], True),
425 (lambda p: p.stats['swapin_delay_total'][1], True),
426 # The default sorting (by I/O % time) should show processes doing
427 # only writes, without waiting on them
428 (lambda p: p.stats['blkio_delay_total'][1] or
429 int(not(not(p.stats['read_bytes'][1] or
430 p.stats['write_bytes'][1]))), True),
431 (lambda p: p.get_cmdline(4096), False),
434 def __init__(self, win, process_list, options):
435 self.process_list = process_list
436 self.options = options
437 self.sorting_key = 5
438 self.sorting_reverse = IOTopUI.sorting_keys[5][1]
439 if not self.options.batch:
440 self.win = win
441 self.resize()
442 curses.use_default_colors()
443 curses.start_color()
444 curses.curs_set(0)
446 def resize(self, *unused):
447 self.height, self.width = self.win.getmaxyx()
449 def run(self):
450 iterations = 0
451 poll = select.poll()
452 if not self.options.batch:
453 poll.register(sys.stdin.fileno(), select.POLLIN|select.POLLPRI)
454 while self.options.iterations is None or \
455 iterations < self.options.iterations:
456 total = self.process_list.refresh_processes()
457 total_read, total_write, duration = total
458 self.refresh_display(total_read, total_write, duration)
459 if self.options.iterations is not None:
460 iterations += 1
461 if iterations >= self.options.iterations:
462 break
464 events = poll.poll(self.options.delay_seconds * 1000.0)
465 if events:
466 key = self.win.getch()
467 self.handle_key(key)
469 def reverse_sorting(self):
470 self.sorting_reverse = not self.sorting_reverse
472 def adjust_sorting_key(self, delta):
473 orig_sorting_key = self.sorting_key
474 self.sorting_key += delta
475 self.sorting_key = max(0, self.sorting_key)
476 self.sorting_key = min(len(IOTopUI.sorting_keys) - 1, self.sorting_key)
477 if orig_sorting_key != self.sorting_key:
478 self.sorting_reverse = IOTopUI.sorting_keys[self.sorting_key][1]
480 def handle_key(self, key):
481 key_bindings = {
482 ord('q'):
483 lambda: sys.exit(0),
484 ord('Q'):
485 lambda: sys.exit(0),
486 ord('r'):
487 lambda: self.reverse_sorting(),
488 ord('R'):
489 lambda: self.reverse_sorting(),
490 curses.KEY_LEFT:
491 lambda: self.adjust_sorting_key(-1),
492 curses.KEY_RIGHT:
493 lambda: self.adjust_sorting_key(1),
494 curses.KEY_HOME:
495 lambda: self.adjust_sorting_key(-len(IOTopUI.sorting_keys)),
496 curses.KEY_END:
497 lambda: self.adjust_sorting_key(len(IOTopUI.sorting_keys))
500 action = key_bindings.get(key, lambda: None)
501 action()
503 def get_data(self):
504 if self.options.batch:
505 max_length = 4096
506 else:
507 max_length = self.width
508 def format(p):
509 stats = human_stats(p.stats)
510 io_delay, swapin_delay, read_bytes, write_bytes = stats
511 return '%5d %-8s %11s %11s %7s %7s %s' % \
512 (p.pid, p.user[:8], read_bytes, write_bytes, swapin_delay,
513 io_delay, p.get_cmdline(max_length))
514 processes = self.process_list.processes.values()
515 key = IOTopUI.sorting_keys[self.sorting_key][0]
516 processes.sort(key=key, reverse=self.sorting_reverse)
517 if not self.options.batch:
518 del processes[self.height - 2:]
519 return map(format, processes)
521 def refresh_display(self, total_read, total_write, duration):
522 summary = 'Total DISK READ: %s | Total DISK WRITE: %s' % (
523 human_bandwidth(total_read, duration),
524 human_bandwidth(total_write, duration))
525 titles = [' PID', ' USER', ' DISK READ', ' DISK WRITE',
526 ' SWAPIN', ' IO', ' COMMAND']
527 lines = self.get_data()
528 if self.options.batch:
529 print summary
530 print ''.join(titles)
531 for l in lines:
532 print l
533 else:
534 self.win.clear()
535 self.win.addstr(summary)
536 self.win.hline(1, 0, ord(' ') | curses.A_REVERSE, self.width)
537 for i in xrange(len(titles)):
538 attr = curses.A_REVERSE
539 title = titles[i]
540 if i == self.sorting_key:
541 attr |= curses.A_BOLD
542 title += self.sorting_reverse and '>' or '<'
543 self.win.addstr(title, attr)
544 for i in xrange(len(lines)):
545 self.win.insstr(i + 2, 0, lines[i])
546 self.win.refresh()
548 def run_iotop(win, options):
549 taskstats_connection = TaskStatsNetlink(options)
550 process_list = ProcessList(taskstats_connection, options)
551 ui = IOTopUI(win, process_list, options)
552 ui.run()
555 # Main program
558 VERSION = '0.1'
560 USAGE = 'Usage: %s [OPTIONS]' % sys.argv[0] + '''
562 DISK READ and DISK WRITE are the block I/O bandwidth used during the sampling
563 period. SWAPIN and IO are the percentages of time the thread spent respectively
564 while swapping in and waiting on I/O more generally.
565 Controls: left and right arrows to should the sorting column, r to invert the
566 sorting order, q to quit, any other key to force a refresh'''
568 def main():
569 parser = optparse.OptionParser(usage=USAGE, version='iotop ' + VERSION)
570 parser.add_option('-d', '--delay', type='float', dest='delay_seconds',
571 help='delay between iterations [1 second]',
572 metavar='SEC', default=1)
573 parser.add_option('-p', '--pid', type='int', dest='pids', action='append',
574 help='processes to monitor [all]', metavar='PID')
575 parser.add_option('-u', '--user', type='str', dest='users', action='append',
576 help='users to monitor [all]', metavar='USER')
577 parser.add_option('-b', '--batch', action='store_true', dest='batch',
578 help='non-interactive mode')
579 parser.add_option('-P', '--processes', action='store_true',
580 dest='processes',
581 help='show only processes, not all threads')
582 parser.add_option('-n', '--iter', type='int', dest='iterations',
583 metavar='NUM',
584 help='number of iterations before ending [infinite]')
585 options, args = parser.parse_args()
586 if args:
587 parser.error('Unexpected arguments: ' + ' '.join(args))
588 find_uids(options)
589 options.pids = options.pids or []
590 if options.batch:
591 run_iotop(None, options)
592 else:
593 curses.wrapper(run_iotop, options)
595 if __name__ == '__main__':
596 try:
597 main()
598 except KeyboardInterrupt:
599 pass
600 sys.exit(0)