Fix typo, reported by Iain Lea <iain@bricbrac.de>
[iotop.git] / iotop.py
blobf1ce2cc44ff6fdafad3eb47800a38942dfec0f18
1 #!/usr/bin/python
2 # iotop: Display I/O usage of processes in a top like UI
3 # Copyright (c) 2007 Guillaume Chazarain <guichaz@yahoo.fr>, GPLv2
4 # See ./iotop.py --help for some help
6 # 20070723: Added support for taskstats version > 4
7 # 20070813: Handle short replies, and fix bandwidth calculation when delay != 1s
8 # 20070819: Fix "-P -p NOT_A_TGID", optimize -p, handle empty process list
9 # 20070825: More accurate cutting of the command line, handle terminal resizing
10 # 20070826: Document taskstats bug: http://lkml.org/lkml/2007/8/2/185
11 # 20070930: Fixed -b
12 # 20071219: Tolerate misconfigured terminals
14 import curses
15 import errno
16 import optparse
17 import os
18 import pwd
19 import select
20 import socket
21 import struct
22 import sys
23 import time
26 # Check for requirements:
27 # o Python >= 2.5 for AF_NETLINK sockets
28 # o Linux >= 2.6.20 with I/O accounting
30 try:
31 socket.NETLINK_ROUTE
32 python25 = True
33 except AttributeError:
34 python25 = False
36 ioaccounting = os.path.exists('/proc/self/io')
38 if not python25 or not ioaccounting:
39 def boolean2string(boolean):
40 return boolean and 'Found' or 'Not found'
41 print 'Could not run iotop as some of the requirements are not met:'
42 print '- Python >= 2.5 for AF_NETLINK support:', boolean2string(python25)
43 print '- Linux >= 2.6.20 with I/O accounting support:', \
44 boolean2string(ioaccounting)
45 sys.exit(1)
48 # Netlink stuff
49 # Based on code from pynl80211: Netlink message generation/parsing
50 # http://git.sipsolutions.net/?p=pynl80211.git
51 # Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
52 # GPLv2
54 # flags
55 NLM_F_REQUEST = 1
57 # types
58 NLMSG_ERROR = 2
59 NLMSG_MIN_TYPE = 0x10
61 class Attr:
62 def __init__(self, type, str, *kw):
63 self.type = type
64 if len(kw):
65 self.data = struct.pack(str, *kw)
66 else:
67 self.data = str
69 def _dump(self):
70 hdr = struct.pack('HH', len(self.data)+4, self.type)
71 length = len(self.data)
72 pad = ((length + 4 - 1) & ~3 ) - length
73 return hdr + self.data + '\0' * pad
75 def u16(self):
76 return struct.unpack('H', self.data)[0]
78 class NulStrAttr(Attr):
79 def __init__(self, type, str):
80 Attr.__init__(self, type, '%dsB'%len(str), str, 0)
82 class U32Attr(Attr):
83 def __init__(self, type, val):
84 Attr.__init__(self, type, 'L', val)
86 NETLINK_GENERIC = 16
88 class Message:
89 def __init__(self, tp, flags = 0, seq = -1, payload = []):
90 self.type = tp
91 self.flags = flags
92 self.seq = seq
93 self.pid = -1
94 if type(payload) == list:
95 contents = []
96 for attr in payload:
97 contents.append(attr._dump())
98 self.payload = ''.join(contents)
99 else:
100 self.payload = payload
102 def send(self, conn):
103 if self.seq == -1:
104 self.seq = conn.seq()
106 self.pid = conn.pid
107 length = len(self.payload)
109 hdr = struct.pack('IHHII', length + 4*4, self.type, self.flags,
110 self.seq, self.pid)
111 conn.send(hdr + self.payload)
113 class Connection:
114 def __init__(self, nltype, groups=0, unexpected_msg_handler = None):
115 self.fd = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, nltype)
116 self.fd.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, 65536)
117 self.fd.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 65536)
118 self.fd.bind((0, groups))
119 self.pid, self.groups = self.fd.getsockname()
120 self._seq = 0
121 self.unexpected = unexpected_msg_handler
123 def send(self, msg):
124 self.fd.send(msg)
126 def recv(self):
127 cntnts = self.fd.recv(65536)
128 # should check msgflags for TRUNC!
129 len, type, flags, seq, pid = struct.unpack('IHHII', cntnts[:16])
130 m = Message(type, flags, seq, cntnts[16:])
131 m.pid = pid
132 if m.type == NLMSG_ERROR:
133 errno = -struct.unpack('i', m.payload[:4])[0]
134 if errno != 0:
135 e = OSError('Netlink error: %s (%d)' % \
136 (os.strerror(errno), errno))
137 e.errno = errno
138 return m
140 def seq(self):
141 self._seq += 1
142 return self._seq
144 def parse_attributes(str):
145 attrs = {}
146 while str:
147 l, tp = struct.unpack('HH', str[:4])
148 attrs[tp] = Attr(tp, str[4:l])
149 l = ((l + 4 - 1) & ~3 )
150 str = str[l:]
151 return attrs
153 CTRL_CMD_GETFAMILY = 3
155 CTRL_ATTR_FAMILY_ID = 1
156 CTRL_ATTR_FAMILY_NAME = 2
158 class GenlHdr:
159 def __init__(self, cmd, version = 0):
160 self.cmd = cmd
161 self.version = version
163 def _dump(self):
164 return struct.pack('BBxx', self.cmd, self.version)
166 def _genl_hdr_parse(data):
167 return GenlHdr(*struct.unpack('BBxx', data))
169 GENL_ID_CTRL = NLMSG_MIN_TYPE
171 class GeNlMessage(Message):
172 def __init__(self, family, cmd, attrs=[], flags=0):
173 self.cmd = cmd
174 self.attrs = attrs
175 self.family = family
176 Message.__init__(self, family, flags=flags,
177 payload=[GenlHdr(self.cmd)] + attrs)
179 class Controller:
180 def __init__(self, conn):
181 self.conn = conn
183 def get_family_id(self, family):
184 a = NulStrAttr(CTRL_ATTR_FAMILY_NAME, family)
185 m = GeNlMessage(GENL_ID_CTRL, CTRL_CMD_GETFAMILY,
186 flags=NLM_F_REQUEST, attrs=[a])
187 m.send(self.conn)
188 m = self.conn.recv()
189 gh = _genl_hdr_parse(m.payload[:4])
190 attrs = parse_attributes(m.payload[4:])
191 return attrs[CTRL_ATTR_FAMILY_ID].u16()
194 # Netlink usage for taskstats
197 TASKSTATS_CMD_GET = 1
198 TASKSTATS_CMD_ATTR_PID = 1
199 TASKSTATS_CMD_ATTR_TGID = 2
201 class TaskStatsNetlink(object):
202 # Keep in sync with human_stats(stats, duration)
203 members_offsets = [
204 ('blkio_delay_total', 40),
205 ('swapin_delay_total', 56),
206 ('ac_etime', 144),
207 ('read_bytes', 248),
208 ('write_bytes', 256),
209 ('cancelled_write_bytes', 264)
212 def __init__(self, options):
213 self.options = options
214 self.connection = Connection(NETLINK_GENERIC)
215 controller = Controller(self.connection)
216 self.family_id = controller.get_family_id('TASKSTATS')
218 def get_task_stats(self, pid):
219 if self.options.processes:
220 attr = TASKSTATS_CMD_ATTR_TGID
221 else:
222 attr = TASKSTATS_CMD_ATTR_PID
223 request = GeNlMessage(self.family_id, cmd=TASKSTATS_CMD_GET,
224 attrs=[U32Attr(attr, pid)],
225 flags=NLM_F_REQUEST)
226 request.send(self.connection)
227 try:
228 reply = self.connection.recv()
229 except OSError, e:
230 if e.errno == errno.ESRCH:
231 # OSError: Netlink error: No such process (3)
232 return
233 raise
234 if len(reply.payload) < 292:
235 # Short reply
236 return
237 reply_data = reply.payload[20:]
239 reply_length, reply_type = struct.unpack('HH', reply.payload[4:8])
240 reply_version = struct.unpack('H', reply.payload[20:22])[0]
241 assert reply_length >= 288
242 assert reply_type == attr + 3
243 assert reply_version >= 4
245 res = {}
246 for name, offset in TaskStatsNetlink.members_offsets:
247 data = reply_data[offset: offset + 8]
248 res[name] = struct.unpack('Q', data)[0]
250 return res
253 # PIDs manipulations
256 def find_uids(options):
257 options.uids = []
258 error = False
259 for u in options.users or []:
260 try:
261 uid = int(u)
262 except ValueError:
263 try:
264 passwd = pwd.getpwnam(u)
265 except KeyError:
266 print >> sys.stderr, 'Unknown user:', u
267 error = True
268 else:
269 uid = passwd.pw_uid
270 if not error:
271 options.uids.append(uid)
272 if error:
273 sys.exit(1)
275 class pinfo(object):
276 def __init__(self, pid, options):
277 self.mark = False
278 self.pid = pid
279 self.stats = {}
280 for name, offset in TaskStatsNetlink.members_offsets:
281 self.stats[name] = (0, 0) # Total, Delta
282 self.parse_status('/proc/%d/status' % pid, options)
284 def check_if_valid(self, uid, options):
285 self.valid = options.pids or not options.uids or uid in options.uids
287 def parse_status(self, path, options):
288 for line in open(path):
289 if line.startswith('Name:'):
290 # Name kernel threads
291 self.name = '[' + line.split()[1].strip() + ']'
292 elif line.startswith('Uid:'):
293 uid = int(line.split()[1])
294 # We check monitored PIDs only here
295 self.check_if_valid(uid, options)
296 try:
297 self.user = pwd.getpwuid(uid).pw_name
298 except KeyError:
299 self.user = str(uid)
300 break
302 def add_stats(self, stats):
303 self.stats_timestamp = time.time()
304 for name, value in stats.iteritems():
305 prev_value = self.stats[name][0]
306 self.stats[name] = (value, value - prev_value)
308 def get_cmdline(self):
309 # A process may exec, so we must always reread its cmdline
310 try:
311 proc_cmdline = open('/proc/%d/cmdline' % self.pid)
312 except IOError:
313 return '{no such process}'
314 cmdline = proc_cmdline.read(4096)
315 parts = cmdline.split('\0')
316 first_command_char = parts[0].rfind('/') + 1
317 parts[0] = parts[0][first_command_char:]
318 cmdline = ' '.join(parts).strip()
319 return cmdline.encode('string_escape') or self.name
321 class ProcessList(object):
322 def __init__(self, taskstats_connection, options):
323 # {pid: pinfo}
324 self.processes = {}
325 self.taskstats_connection = taskstats_connection
326 self.options = options
328 # A first time as we are interested in the delta
329 self.update_process_counts()
331 def get_process(self, pid):
332 process = self.processes.get(pid, None)
333 if not process:
334 try:
335 process = pinfo(pid, self.options)
336 except IOError:
337 # IOError: [Errno 2] No such file or directory: '/proc/...'
338 return
339 if not process.valid:
340 return
341 self.processes[pid] = process
342 return process
344 def list_pids(self, tgid):
345 if self.options.processes or self.options.pids:
346 return [tgid]
347 try:
348 return map(int, os.listdir('/proc/%d/task' % tgid))
349 except OSError:
350 return []
352 def update_process_counts(self):
353 total_read = total_write = duration = 0
354 tgids = self.options.pids or [int(tgid) for tgid in os.listdir('/proc')
355 if '0' <= tgid[0] and tgid[0] <= '9']
356 for tgid in tgids:
357 for pid in self.list_pids(tgid):
358 process = self.get_process(pid)
359 if process:
360 stats = self.taskstats_connection.get_task_stats(pid)
361 if stats:
362 process.mark = False
363 process.add_stats(stats)
364 total_read += process.stats['read_bytes'][1]
365 total_write += process.stats['write_bytes'][1]
366 if not duration:
367 duration = process.stats['ac_etime'][1] / 1000000.0
368 return total_read, total_write, duration
370 def refresh_processes(self):
371 for process in self.processes.values():
372 process.mark = True
373 total_read_and_write_and_duration = self.update_process_counts()
374 to_delete = []
375 for pid, process in self.processes.iteritems():
376 if process.mark:
377 to_delete.append(pid)
378 for pid in to_delete:
379 del self.processes[pid]
380 return total_read_and_write_and_duration
383 # Utility functions for the UI
386 UNITS = ['B', 'K', 'M', 'G', 'T', 'P', 'E']
388 def human_bandwidth(size, duration):
389 bw = size and float(size) / duration
390 for i in xrange(len(UNITS) - 1, 0, -1):
391 base = 1 << (10 * i)
392 if 2 * base < size:
393 res = '%.2f %s' % ((float(bw) / base), UNITS[i])
394 break
395 else:
396 res = str(bw) + ' ' + UNITS[0]
397 return res + '/s'
399 def human_stats(stats):
400 # Keep in sync with TaskStatsNetlink.members_offsets and
401 # IOTopUI.get_data(self)
402 duration = stats['ac_etime'][1] / 1000000.0
403 def delay2percent(name): # delay in ns, duration in s
404 if not duration:
405 return 'KERNBUG'
406 return '%.2f %%' % min(99.99, stats[name][1] / (duration * 10000000.0))
407 io_delay = delay2percent('blkio_delay_total')
408 swapin_delay = delay2percent('swapin_delay_total')
409 read_bytes = human_bandwidth(stats['read_bytes'][1], duration)
410 written_bytes = stats['write_bytes'][1] - stats['cancelled_write_bytes'][1]
411 written_bytes = max(0, written_bytes)
412 write_bytes = human_bandwidth(written_bytes, duration)
413 return io_delay, swapin_delay, read_bytes, write_bytes
416 # The UI
419 class IOTopUI(object):
420 # key, reverse
421 sorting_keys = [
422 (lambda p: p.pid, False),
423 (lambda p: p.user, False),
424 (lambda p: p.stats['read_bytes'][1], True),
425 (lambda p: p.stats['write_bytes'][1] -
426 p.stats['cancelled_write_bytes'][1], True),
427 (lambda p: p.stats['swapin_delay_total'][1], True),
428 # The default sorting (by I/O % time) should show processes doing
429 # only writes, without waiting on them
430 (lambda p: p.stats['blkio_delay_total'][1] or
431 int(not(not(p.stats['read_bytes'][1] or
432 p.stats['write_bytes'][1]))), True),
433 (lambda p: p.get_cmdline(), False),
436 def __init__(self, win, process_list, options):
437 self.process_list = process_list
438 self.options = options
439 self.sorting_key = 5
440 self.sorting_reverse = IOTopUI.sorting_keys[5][1]
441 if not self.options.batch:
442 self.win = win
443 self.resize()
444 curses.use_default_colors()
445 curses.start_color()
446 try:
447 curses.curs_set(0)
448 except curses.error:
449 # This call can fail with misconfigured terminals, for example
450 # TERM=xterm-color. This is harmless
451 pass
453 def resize(self):
454 self.height, self.width = self.win.getmaxyx()
456 def run(self):
457 iterations = 0
458 poll = select.poll()
459 if not self.options.batch:
460 poll.register(sys.stdin.fileno(), select.POLLIN|select.POLLPRI)
461 while self.options.iterations is None or \
462 iterations < self.options.iterations:
463 total = self.process_list.refresh_processes()
464 total_read, total_write, duration = total
465 self.refresh_display(total_read, total_write, duration)
466 if self.options.iterations is not None:
467 iterations += 1
468 if iterations >= self.options.iterations:
469 break
471 try:
472 events = poll.poll(self.options.delay_seconds * 1000.0)
473 except select.error, e:
474 if e.args and e.args[0] == errno.EINTR:
475 events = 0
476 else:
477 raise
478 if not self.options.batch:
479 self.resize()
480 if events:
481 key = self.win.getch()
482 self.handle_key(key)
484 def reverse_sorting(self):
485 self.sorting_reverse = not self.sorting_reverse
487 def adjust_sorting_key(self, delta):
488 orig_sorting_key = self.sorting_key
489 self.sorting_key += delta
490 self.sorting_key = max(0, self.sorting_key)
491 self.sorting_key = min(len(IOTopUI.sorting_keys) - 1, self.sorting_key)
492 if orig_sorting_key != self.sorting_key:
493 self.sorting_reverse = IOTopUI.sorting_keys[self.sorting_key][1]
495 def handle_key(self, key):
496 key_bindings = {
497 ord('q'):
498 lambda: sys.exit(0),
499 ord('Q'):
500 lambda: sys.exit(0),
501 ord('r'):
502 lambda: self.reverse_sorting(),
503 ord('R'):
504 lambda: self.reverse_sorting(),
505 curses.KEY_LEFT:
506 lambda: self.adjust_sorting_key(-1),
507 curses.KEY_RIGHT:
508 lambda: self.adjust_sorting_key(1),
509 curses.KEY_HOME:
510 lambda: self.adjust_sorting_key(-len(IOTopUI.sorting_keys)),
511 curses.KEY_END:
512 lambda: self.adjust_sorting_key(len(IOTopUI.sorting_keys))
515 action = key_bindings.get(key, lambda: None)
516 action()
518 def get_data(self):
519 def format(p):
520 stats = human_stats(p.stats)
521 io_delay, swapin_delay, read_bytes, write_bytes = stats
522 line = '%5d %-8s %11s %11s %7s %7s ' % (p.pid, p.user[:8],
523 read_bytes, write_bytes, swapin_delay, io_delay)
524 if self.options.batch:
525 max_cmdline_length = 4096
526 else:
527 max_cmdline_length = self.width - len(line)
528 line += p.get_cmdline()[:max_cmdline_length]
529 return line
531 processes = self.process_list.processes.values()
532 key = IOTopUI.sorting_keys[self.sorting_key][0]
533 processes.sort(key=key, reverse=self.sorting_reverse)
534 if not self.options.batch:
535 del processes[self.height - 2:]
536 return map(format, processes)
538 def refresh_display(self, total_read, total_write, duration):
539 summary = 'Total DISK READ: %s | Total DISK WRITE: %s' % (
540 human_bandwidth(total_read, duration),
541 human_bandwidth(total_write, duration))
542 titles = [' PID', ' USER', ' DISK READ', ' DISK WRITE',
543 ' SWAPIN', ' IO', ' COMMAND']
544 lines = self.get_data()
545 if self.options.batch:
546 print summary
547 print ''.join(titles)
548 for l in lines:
549 print l
550 else:
551 self.win.clear()
552 self.win.addstr(summary)
553 self.win.hline(1, 0, ord(' ') | curses.A_REVERSE, self.width)
554 for i in xrange(len(titles)):
555 attr = curses.A_REVERSE
556 title = titles[i]
557 if i == self.sorting_key:
558 attr |= curses.A_BOLD
559 title += self.sorting_reverse and '>' or '<'
560 self.win.addstr(title, attr)
561 for i in xrange(len(lines)):
562 self.win.insstr(i + 2, 0, lines[i])
563 self.win.refresh()
565 def run_iotop(win, options):
566 taskstats_connection = TaskStatsNetlink(options)
567 process_list = ProcessList(taskstats_connection, options)
568 ui = IOTopUI(win, process_list, options)
569 ui.run()
572 # Main program
575 VERSION = '0.1'
577 USAGE = 'Usage: %s [OPTIONS]' % sys.argv[0] + '''
579 DISK READ and DISK WRITE are the block I/O bandwidth used during the sampling
580 period. SWAPIN and IO are the percentages of time the thread spent respectively
581 while swapping in and waiting on I/O more generally.
582 Controls: left and right arrows to show the sorting column, r to invert the
583 sorting order, q to quit, any other key to force a refresh'''
585 def main():
586 parser = optparse.OptionParser(usage=USAGE, version='iotop ' + VERSION)
587 parser.add_option('-d', '--delay', type='float', dest='delay_seconds',
588 help='delay between iterations [1 second]',
589 metavar='SEC', default=1)
590 parser.add_option('-p', '--pid', type='int', dest='pids', action='append',
591 help='processes to monitor [all]', metavar='PID')
592 parser.add_option('-u', '--user', type='str', dest='users', action='append',
593 help='users to monitor [all]', metavar='USER')
594 parser.add_option('-b', '--batch', action='store_true', dest='batch',
595 help='non-interactive mode')
596 parser.add_option('-P', '--processes', action='store_true',
597 dest='processes',
598 help='show only processes, not all threads')
599 parser.add_option('-n', '--iter', type='int', dest='iterations',
600 metavar='NUM',
601 help='number of iterations before ending [infinite]')
602 options, args = parser.parse_args()
603 if args:
604 parser.error('Unexpected arguments: ' + ' '.join(args))
605 find_uids(options)
606 options.pids = options.pids or []
607 if options.batch:
608 run_iotop(None, options)
609 else:
610 curses.wrapper(run_iotop, options)
612 if __name__ == '__main__':
613 try:
614 main()
615 except KeyboardInterrupt:
616 pass
617 sys.exit(0)