move sections
[python/dscho.git] / Lib / pstats.py
blob8b6081040a589a9e7489209243084ee094656af2
1 """Class for printing reports on profiled python code."""
3 # Class for printing reports on profiled python code. rev 1.0 4/1/94
5 # Based on prior profile module by Sjoerd Mullender...
6 # which was hacked somewhat by: Guido van Rossum
8 # see profile.doc and profile.py for more info.
10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
11 # Written by James Roskind
13 # Permission to use, copy, modify, and distribute this Python software
14 # and its associated documentation for any purpose (subject to the
15 # restriction in the following sentence) without fee is hereby granted,
16 # provided that the above copyright notice appears in all copies, and
17 # that both that copyright notice and this permission notice appear in
18 # supporting documentation, and that the name of InfoSeek not be used in
19 # advertising or publicity pertaining to distribution of the software
20 # without specific, written prior permission. This permission is
21 # explicitly restricted to the copying and modification of the software
22 # to remain in Python, compiled Python, or other languages (such as C)
23 # wherein the modified or derived code is exclusively imported into a
24 # Python module.
26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
35 import sys
36 import os
37 import time
38 import marshal
39 import re
40 from functools import cmp_to_key
42 __all__ = ["Stats"]
44 class Stats:
45 """This class is used for creating reports from data generated by the
46 Profile class. It is a "friend" of that class, and imports data either
47 by direct access to members of Profile class, or by reading in a dictionary
48 that was emitted (via marshal) from the Profile class.
50 The big change from the previous Profiler (in terms of raw functionality)
51 is that an "add()" method has been provided to combine Stats from
52 several distinct profile runs. Both the constructor and the add()
53 method now take arbitrarily many file names as arguments.
55 All the print methods now take an argument that indicates how many lines
56 to print. If the arg is a floating point number between 0 and 1.0, then
57 it is taken as a decimal percentage of the available lines to be printed
58 (e.g., .1 means print 10% of all available lines). If it is an integer,
59 it is taken to mean the number of lines of data that you wish to have
60 printed.
62 The sort_stats() method now processes some additional options (i.e., in
63 addition to the old -1, 0, 1, or 2). It takes an arbitrary number of
64 quoted strings to select the sort order. For example sort_stats('time',
65 'name') sorts on the major key of 'internal function time', and on the
66 minor key of 'the name of the function'. Look at the two tables in
67 sort_stats() and get_sort_arg_defs(self) for more examples.
69 All methods return self, so you can string together commands like:
70 Stats('foo', 'goo').strip_dirs().sort_stats('calls').\
71 print_stats(5).print_callers(5)
72 """
74 def __init__(self, *args, **kwds):
75 # I can't figure out how to explictly specify a stream keyword arg
76 # with *args:
77 # def __init__(self, *args, stream=sys.stdout): ...
78 # so I use **kwds and sqauwk if something unexpected is passed in.
79 self.stream = sys.stdout
80 if "stream" in kwds:
81 self.stream = kwds["stream"]
82 del kwds["stream"]
83 if kwds:
84 keys = kwds.keys()
85 keys.sort()
86 extras = ", ".join(["%s=%s" % (k, kwds[k]) for k in keys])
87 raise ValueError, "unrecognized keyword args: %s" % extras
88 if not len(args):
89 arg = None
90 else:
91 arg = args[0]
92 args = args[1:]
93 self.init(arg)
94 self.add(*args)
96 def init(self, arg):
97 self.all_callees = None # calc only if needed
98 self.files = []
99 self.fcn_list = None
100 self.total_tt = 0
101 self.total_calls = 0
102 self.prim_calls = 0
103 self.max_name_len = 0
104 self.top_level = {}
105 self.stats = {}
106 self.sort_arg_dict = {}
107 self.load_stats(arg)
108 trouble = 1
109 try:
110 self.get_top_level_stats()
111 trouble = 0
112 finally:
113 if trouble:
114 print >> self.stream, "Invalid timing data",
115 if self.files: print >> self.stream, self.files[-1],
116 print >> self.stream
118 def load_stats(self, arg):
119 if not arg: self.stats = {}
120 elif isinstance(arg, basestring):
121 f = open(arg, 'rb')
122 self.stats = marshal.load(f)
123 f.close()
124 try:
125 file_stats = os.stat(arg)
126 arg = time.ctime(file_stats.st_mtime) + " " + arg
127 except: # in case this is not unix
128 pass
129 self.files = [ arg ]
130 elif hasattr(arg, 'create_stats'):
131 arg.create_stats()
132 self.stats = arg.stats
133 arg.stats = {}
134 if not self.stats:
135 raise TypeError, "Cannot create or construct a %r object from '%r''" % (
136 self.__class__, arg)
137 return
139 def get_top_level_stats(self):
140 for func, (cc, nc, tt, ct, callers) in self.stats.items():
141 self.total_calls += nc
142 self.prim_calls += cc
143 self.total_tt += tt
144 if ("jprofile", 0, "profiler") in callers:
145 self.top_level[func] = None
146 if len(func_std_string(func)) > self.max_name_len:
147 self.max_name_len = len(func_std_string(func))
149 def add(self, *arg_list):
150 if not arg_list: return self
151 if len(arg_list) > 1: self.add(*arg_list[1:])
152 other = arg_list[0]
153 if type(self) != type(other) or self.__class__ != other.__class__:
154 other = Stats(other)
155 self.files += other.files
156 self.total_calls += other.total_calls
157 self.prim_calls += other.prim_calls
158 self.total_tt += other.total_tt
159 for func in other.top_level:
160 self.top_level[func] = None
162 if self.max_name_len < other.max_name_len:
163 self.max_name_len = other.max_name_len
165 self.fcn_list = None
167 for func, stat in other.stats.iteritems():
168 if func in self.stats:
169 old_func_stat = self.stats[func]
170 else:
171 old_func_stat = (0, 0, 0, 0, {},)
172 self.stats[func] = add_func_stats(old_func_stat, stat)
173 return self
175 def dump_stats(self, filename):
176 """Write the profile data to a file we know how to load back."""
177 f = file(filename, 'wb')
178 try:
179 marshal.dump(self.stats, f)
180 finally:
181 f.close()
183 # list the tuple indices and directions for sorting,
184 # along with some printable description
185 sort_arg_dict_default = {
186 "calls" : (((1,-1), ), "call count"),
187 "cumulative": (((3,-1), ), "cumulative time"),
188 "file" : (((4, 1), ), "file name"),
189 "line" : (((5, 1), ), "line number"),
190 "module" : (((4, 1), ), "file name"),
191 "name" : (((6, 1), ), "function name"),
192 "nfl" : (((6, 1),(4, 1),(5, 1),), "name/file/line"),
193 "pcalls" : (((0,-1), ), "call count"),
194 "stdname" : (((7, 1), ), "standard name"),
195 "time" : (((2,-1), ), "internal time"),
198 def get_sort_arg_defs(self):
199 """Expand all abbreviations that are unique."""
200 if not self.sort_arg_dict:
201 self.sort_arg_dict = dict = {}
202 bad_list = {}
203 for word, tup in self.sort_arg_dict_default.iteritems():
204 fragment = word
205 while fragment:
206 if not fragment:
207 break
208 if fragment in dict:
209 bad_list[fragment] = 0
210 break
211 dict[fragment] = tup
212 fragment = fragment[:-1]
213 for word in bad_list:
214 del dict[word]
215 return self.sort_arg_dict
217 def sort_stats(self, *field):
218 if not field:
219 self.fcn_list = 0
220 return self
221 if len(field) == 1 and type(field[0]) == type(1):
222 # Be compatible with old profiler
223 field = [ {-1: "stdname",
224 0:"calls",
225 1:"time",
226 2: "cumulative" } [ field[0] ] ]
228 sort_arg_defs = self.get_sort_arg_defs()
229 sort_tuple = ()
230 self.sort_type = ""
231 connector = ""
232 for word in field:
233 sort_tuple = sort_tuple + sort_arg_defs[word][0]
234 self.sort_type += connector + sort_arg_defs[word][1]
235 connector = ", "
237 stats_list = []
238 for func, (cc, nc, tt, ct, callers) in self.stats.iteritems():
239 stats_list.append((cc, nc, tt, ct) + func +
240 (func_std_string(func), func))
242 stats_list.sort(key=cmp_to_key(TupleComp(sort_tuple).compare))
244 self.fcn_list = fcn_list = []
245 for tuple in stats_list:
246 fcn_list.append(tuple[-1])
247 return self
249 def reverse_order(self):
250 if self.fcn_list:
251 self.fcn_list.reverse()
252 return self
254 def strip_dirs(self):
255 oldstats = self.stats
256 self.stats = newstats = {}
257 max_name_len = 0
258 for func, (cc, nc, tt, ct, callers) in oldstats.iteritems():
259 newfunc = func_strip_path(func)
260 if len(func_std_string(newfunc)) > max_name_len:
261 max_name_len = len(func_std_string(newfunc))
262 newcallers = {}
263 for func2, caller in callers.iteritems():
264 newcallers[func_strip_path(func2)] = caller
266 if newfunc in newstats:
267 newstats[newfunc] = add_func_stats(
268 newstats[newfunc],
269 (cc, nc, tt, ct, newcallers))
270 else:
271 newstats[newfunc] = (cc, nc, tt, ct, newcallers)
272 old_top = self.top_level
273 self.top_level = new_top = {}
274 for func in old_top:
275 new_top[func_strip_path(func)] = None
277 self.max_name_len = max_name_len
279 self.fcn_list = None
280 self.all_callees = None
281 return self
283 def calc_callees(self):
284 if self.all_callees: return
285 self.all_callees = all_callees = {}
286 for func, (cc, nc, tt, ct, callers) in self.stats.iteritems():
287 if not func in all_callees:
288 all_callees[func] = {}
289 for func2, caller in callers.iteritems():
290 if not func2 in all_callees:
291 all_callees[func2] = {}
292 all_callees[func2][func] = caller
293 return
295 #******************************************************************
296 # The following functions support actual printing of reports
297 #******************************************************************
299 # Optional "amount" is either a line count, or a percentage of lines.
301 def eval_print_amount(self, sel, list, msg):
302 new_list = list
303 if type(sel) == type(""):
304 new_list = []
305 for func in list:
306 if re.search(sel, func_std_string(func)):
307 new_list.append(func)
308 else:
309 count = len(list)
310 if type(sel) == type(1.0) and 0.0 <= sel < 1.0:
311 count = int(count * sel + .5)
312 new_list = list[:count]
313 elif type(sel) == type(1) and 0 <= sel < count:
314 count = sel
315 new_list = list[:count]
316 if len(list) != len(new_list):
317 msg = msg + " List reduced from %r to %r due to restriction <%r>\n" % (
318 len(list), len(new_list), sel)
320 return new_list, msg
322 def get_print_list(self, sel_list):
323 width = self.max_name_len
324 if self.fcn_list:
325 list = self.fcn_list[:]
326 msg = " Ordered by: " + self.sort_type + '\n'
327 else:
328 list = self.stats.keys()
329 msg = " Random listing order was used\n"
331 for selection in sel_list:
332 list, msg = self.eval_print_amount(selection, list, msg)
334 count = len(list)
336 if not list:
337 return 0, list
338 print >> self.stream, msg
339 if count < len(self.stats):
340 width = 0
341 for func in list:
342 if len(func_std_string(func)) > width:
343 width = len(func_std_string(func))
344 return width+2, list
346 def print_stats(self, *amount):
347 for filename in self.files:
348 print >> self.stream, filename
349 if self.files: print >> self.stream
350 indent = ' ' * 8
351 for func in self.top_level:
352 print >> self.stream, indent, func_get_function_name(func)
354 print >> self.stream, indent, self.total_calls, "function calls",
355 if self.total_calls != self.prim_calls:
356 print >> self.stream, "(%d primitive calls)" % self.prim_calls,
357 print >> self.stream, "in %.3f CPU seconds" % self.total_tt
358 print >> self.stream
359 width, list = self.get_print_list(amount)
360 if list:
361 self.print_title()
362 for func in list:
363 self.print_line(func)
364 print >> self.stream
365 print >> self.stream
366 return self
368 def print_callees(self, *amount):
369 width, list = self.get_print_list(amount)
370 if list:
371 self.calc_callees()
373 self.print_call_heading(width, "called...")
374 for func in list:
375 if func in self.all_callees:
376 self.print_call_line(width, func, self.all_callees[func])
377 else:
378 self.print_call_line(width, func, {})
379 print >> self.stream
380 print >> self.stream
381 return self
383 def print_callers(self, *amount):
384 width, list = self.get_print_list(amount)
385 if list:
386 self.print_call_heading(width, "was called by...")
387 for func in list:
388 cc, nc, tt, ct, callers = self.stats[func]
389 self.print_call_line(width, func, callers, "<-")
390 print >> self.stream
391 print >> self.stream
392 return self
394 def print_call_heading(self, name_size, column_title):
395 print >> self.stream, "Function ".ljust(name_size) + column_title
396 # print sub-header only if we have new-style callers
397 subheader = False
398 for cc, nc, tt, ct, callers in self.stats.itervalues():
399 if callers:
400 value = callers.itervalues().next()
401 subheader = isinstance(value, tuple)
402 break
403 if subheader:
404 print >> self.stream, " "*name_size + " ncalls tottime cumtime"
406 def print_call_line(self, name_size, source, call_dict, arrow="->"):
407 print >> self.stream, func_std_string(source).ljust(name_size) + arrow,
408 if not call_dict:
409 print >> self.stream
410 return
411 clist = call_dict.keys()
412 clist.sort()
413 indent = ""
414 for func in clist:
415 name = func_std_string(func)
416 value = call_dict[func]
417 if isinstance(value, tuple):
418 nc, cc, tt, ct = value
419 if nc != cc:
420 substats = '%d/%d' % (nc, cc)
421 else:
422 substats = '%d' % (nc,)
423 substats = '%s %s %s %s' % (substats.rjust(7+2*len(indent)),
424 f8(tt), f8(ct), name)
425 left_width = name_size + 1
426 else:
427 substats = '%s(%r) %s' % (name, value, f8(self.stats[func][3]))
428 left_width = name_size + 3
429 print >> self.stream, indent*left_width + substats
430 indent = " "
432 def print_title(self):
433 print >> self.stream, ' ncalls tottime percall cumtime percall',
434 print >> self.stream, 'filename:lineno(function)'
436 def print_line(self, func): # hack : should print percentages
437 cc, nc, tt, ct, callers = self.stats[func]
438 c = str(nc)
439 if nc != cc:
440 c = c + '/' + str(cc)
441 print >> self.stream, c.rjust(9),
442 print >> self.stream, f8(tt),
443 if nc == 0:
444 print >> self.stream, ' '*8,
445 else:
446 print >> self.stream, f8(float(tt)/nc),
447 print >> self.stream, f8(ct),
448 if cc == 0:
449 print >> self.stream, ' '*8,
450 else:
451 print >> self.stream, f8(float(ct)/cc),
452 print >> self.stream, func_std_string(func)
454 class TupleComp:
455 """This class provides a generic function for comparing any two tuples.
456 Each instance records a list of tuple-indices (from most significant
457 to least significant), and sort direction (ascending or decending) for
458 each tuple-index. The compare functions can then be used as the function
459 argument to the system sort() function when a list of tuples need to be
460 sorted in the instances order."""
462 def __init__(self, comp_select_list):
463 self.comp_select_list = comp_select_list
465 def compare (self, left, right):
466 for index, direction in self.comp_select_list:
467 l = left[index]
468 r = right[index]
469 if l < r:
470 return -direction
471 if l > r:
472 return direction
473 return 0
475 #**************************************************************************
476 # func_name is a triple (file:string, line:int, name:string)
478 def func_strip_path(func_name):
479 filename, line, name = func_name
480 return os.path.basename(filename), line, name
482 def func_get_function_name(func):
483 return func[2]
485 def func_std_string(func_name): # match what old profile produced
486 if func_name[:2] == ('~', 0):
487 # special case for built-in functions
488 name = func_name[2]
489 if name.startswith('<') and name.endswith('>'):
490 return '{%s}' % name[1:-1]
491 else:
492 return name
493 else:
494 return "%s:%d(%s)" % func_name
496 #**************************************************************************
497 # The following functions combine statists for pairs functions.
498 # The bulk of the processing involves correctly handling "call" lists,
499 # such as callers and callees.
500 #**************************************************************************
502 def add_func_stats(target, source):
503 """Add together all the stats for two profile entries."""
504 cc, nc, tt, ct, callers = source
505 t_cc, t_nc, t_tt, t_ct, t_callers = target
506 return (cc+t_cc, nc+t_nc, tt+t_tt, ct+t_ct,
507 add_callers(t_callers, callers))
509 def add_callers(target, source):
510 """Combine two caller lists in a single list."""
511 new_callers = {}
512 for func, caller in target.iteritems():
513 new_callers[func] = caller
514 for func, caller in source.iteritems():
515 if func in new_callers:
516 new_callers[func] = tuple([i[0] + i[1] for i in
517 zip(caller, new_callers[func])])
518 else:
519 new_callers[func] = caller
520 return new_callers
522 def count_calls(callers):
523 """Sum the caller statistics to get total number of calls received."""
524 nc = 0
525 for calls in callers.itervalues():
526 nc += calls
527 return nc
529 #**************************************************************************
530 # The following functions support printing of reports
531 #**************************************************************************
533 def f8(x):
534 return "%8.3f" % x
536 #**************************************************************************
537 # Statistics browser added by ESR, April 2001
538 #**************************************************************************
540 if __name__ == '__main__':
541 import cmd
542 try:
543 import readline
544 except ImportError:
545 pass
547 class ProfileBrowser(cmd.Cmd):
548 def __init__(self, profile=None):
549 cmd.Cmd.__init__(self)
550 self.prompt = "% "
551 if profile is not None:
552 self.stats = Stats(profile)
553 self.stream = self.stats.stream
554 else:
555 self.stats = None
556 self.stream = sys.stdout
558 def generic(self, fn, line):
559 args = line.split()
560 processed = []
561 for term in args:
562 try:
563 processed.append(int(term))
564 continue
565 except ValueError:
566 pass
567 try:
568 frac = float(term)
569 if frac > 1 or frac < 0:
570 print >> self.stream, "Fraction argument must be in [0, 1]"
571 continue
572 processed.append(frac)
573 continue
574 except ValueError:
575 pass
576 processed.append(term)
577 if self.stats:
578 getattr(self.stats, fn)(*processed)
579 else:
580 print >> self.stream, "No statistics object is loaded."
581 return 0
582 def generic_help(self):
583 print >> self.stream, "Arguments may be:"
584 print >> self.stream, "* An integer maximum number of entries to print."
585 print >> self.stream, "* A decimal fractional number between 0 and 1, controlling"
586 print >> self.stream, " what fraction of selected entries to print."
587 print >> self.stream, "* A regular expression; only entries with function names"
588 print >> self.stream, " that match it are printed."
590 def do_add(self, line):
591 self.stats.add(line)
592 return 0
593 def help_add(self):
594 print >> self.stream, "Add profile info from given file to current statistics object."
596 def do_callees(self, line):
597 return self.generic('print_callees', line)
598 def help_callees(self):
599 print >> self.stream, "Print callees statistics from the current stat object."
600 self.generic_help()
602 def do_callers(self, line):
603 return self.generic('print_callers', line)
604 def help_callers(self):
605 print >> self.stream, "Print callers statistics from the current stat object."
606 self.generic_help()
608 def do_EOF(self, line):
609 print >> self.stream, ""
610 return 1
611 def help_EOF(self):
612 print >> self.stream, "Leave the profile brower."
614 def do_quit(self, line):
615 return 1
616 def help_quit(self):
617 print >> self.stream, "Leave the profile brower."
619 def do_read(self, line):
620 if line:
621 try:
622 self.stats = Stats(line)
623 except IOError, args:
624 print >> self.stream, args[1]
625 return
626 self.prompt = line + "% "
627 elif len(self.prompt) > 2:
628 line = self.prompt[-2:]
629 else:
630 print >> self.stream, "No statistics object is current -- cannot reload."
631 return 0
632 def help_read(self):
633 print >> self.stream, "Read in profile data from a specified file."
635 def do_reverse(self, line):
636 self.stats.reverse_order()
637 return 0
638 def help_reverse(self):
639 print >> self.stream, "Reverse the sort order of the profiling report."
641 def do_sort(self, line):
642 abbrevs = self.stats.get_sort_arg_defs()
643 if line and all((x in abbrevs) for x in line.split()):
644 self.stats.sort_stats(*line.split())
645 else:
646 print >> self.stream, "Valid sort keys (unique prefixes are accepted):"
647 for (key, value) in Stats.sort_arg_dict_default.iteritems():
648 print >> self.stream, "%s -- %s" % (key, value[1])
649 return 0
650 def help_sort(self):
651 print >> self.stream, "Sort profile data according to specified keys."
652 print >> self.stream, "(Typing `sort' without arguments lists valid keys.)"
653 def complete_sort(self, text, *args):
654 return [a for a in Stats.sort_arg_dict_default if a.startswith(text)]
656 def do_stats(self, line):
657 return self.generic('print_stats', line)
658 def help_stats(self):
659 print >> self.stream, "Print statistics from the current stat object."
660 self.generic_help()
662 def do_strip(self, line):
663 self.stats.strip_dirs()
664 return 0
665 def help_strip(self):
666 print >> self.stream, "Strip leading path information from filenames in the report."
668 def postcmd(self, stop, line):
669 if stop:
670 return stop
671 return None
673 import sys
674 if len(sys.argv) > 1:
675 initprofile = sys.argv[1]
676 else:
677 initprofile = None
678 try:
679 browser = ProfileBrowser(initprofile)
680 print >> browser.stream, "Welcome to the profile statistics browser."
681 browser.cmdloop()
682 print >> browser.stream, "Goodbye."
683 except KeyboardInterrupt:
684 pass
686 # That's all, folks.