Lib/pstats.py

   1 """Class for printing reports on profiled python code."""
   2
   3 # Class for printing reports on profiled python code. rev 1.0  4/1/94
   4 #
   5 # Based on prior profile module by Sjoerd Mullender...
   6 #   which was hacked somewhat by: Guido van Rossum
   7 #
   8 # see profile.doc and profile.py for more info.
   9
  10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  11 # Written by James Roskind
  12 #
  13 # Permission to use, copy, modify, and distribute this Python software
  14 # and its associated documentation for any purpose (subject to the
  15 # restriction in the following sentence) without fee is hereby granted,
  16 # provided that the above copyright notice appears in all copies, and
  17 # that both that copyright notice and this permission notice appear in
  18 # supporting documentation, and that the name of InfoSeek not be used in
  19 # advertising or publicity pertaining to distribution of the software
  20 # without specific, written prior permission.  This permission is
  21 # explicitly restricted to the copying and modification of the software
  22 # to remain in Python, compiled Python, or other languages (such as C)
  23 # wherein the modified or derived code is exclusively imported into a
  24 # Python module.
  25 #
  26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  33
  34
  35 import os
  36 import time
  37 import marshal
  38 import re
  39
  40 __all__ = ["Stats"]
  41
  42 class Stats:
  43     """This class is used for creating reports from data generated by the
  44     Profile class.  It is a "friend" of that class, and imports data either
  45     by direct access to members of Profile class, or by reading in a dictionary
  46     that was emitted (via marshal) from the Profile class.
  47
  48     The big change from the previous Profiler (in terms of raw functionality)
  49     is that an "add()" method has been provided to combine Stats from
  50     several distinct profile runs.  Both the constructor and the add()
  51     method now take arbitrarily many file names as arguments.
  52
  53     All the print methods now take an argument that indicates how many lines
  54     to print.  If the arg is a floating point number between 0 and 1.0, then
  55     it is taken as a decimal percentage of the available lines to be printed
  56     (e.g., .1 means print 10% of all available lines).  If it is an integer,
  57     it is taken to mean the number of lines of data that you wish to have
  58     printed.
  59
  60     The sort_stats() method now processes some additional options (i.e., in
  61     addition to the old -1, 0, 1, or 2).  It takes an arbitrary number of quoted
  62     strings to select the sort order.  For example sort_stats('time', 'name')
  63     sorts on the major key of "internal function time", and on the minor
  64     key of 'the name of the function'.  Look at the two tables in sort_stats()
  65     and get_sort_arg_defs(self) for more examples.
  66
  67     All methods now return "self",  so you can string together commands like:
  68         Stats('foo', 'goo').strip_dirs().sort_stats('calls').\
  69                             print_stats(5).print_callers(5)
  70     """
  71
  72     def __init__(self, *args):
  73         if not len(args):
  74             arg = None
  75         else:
  76             arg = args[0]
  77             args = args[1:]
  78         self.init(arg)
  79         self.add(*args)
  80
  81     def init(self, arg):
  82         self.all_callees = None  # calc only if needed
  83         self.files = []
  84         self.fcn_list = None
  85         self.total_tt = 0
  86         self.total_calls = 0
  87         self.prim_calls = 0
  88         self.max_name_len = 0
  89         self.top_level = {}
  90         self.stats = {}
  91         self.sort_arg_dict = {}
  92         self.load_stats(arg)
  93         trouble = 1
  94         try:
  95             self.get_top_level_stats()
  96             trouble = 0
  97         finally:
  98             if trouble:
  99                 print "Invalid timing data",
 100                 if self.files: print self.files[-1],
 101                 print
 102
 103     def load_stats(self, arg):
 104         if not arg:  self.stats = {}
 105         elif type(arg) == type(""):
 106             f = open(arg, 'rb')
 107             self.stats = marshal.load(f)
 108             f.close()
 109             try:
 110                 file_stats = os.stat(arg)
 111                 arg = time.ctime(file_stats.st_mtime) + "    " + arg
 112             except:  # in case this is not unix
 113                 pass
 114             self.files = [ arg ]
 115         elif hasattr(arg, 'create_stats'):
 116             arg.create_stats()
 117             self.stats = arg.stats
 118             arg.stats = {}
 119         if not self.stats:
 120             raise TypeError,  "Cannot create or construct a %r object from '%r''" % (
 121                               self.__class__, arg)
 122         return
 123
 124     def get_top_level_stats(self):
 125         for func, (cc, nc, tt, ct, callers) in self.stats.items():
 126             self.total_calls += nc
 127             self.prim_calls  += cc
 128             self.total_tt    += tt
 129             if callers.has_key(("jprofile", 0, "profiler")):
 130                 self.top_level[func] = None
 131             if len(func_std_string(func)) > self.max_name_len:
 132                 self.max_name_len = len(func_std_string(func))
 133
 134     def add(self, *arg_list):
 135         if not arg_list: return self
 136         if len(arg_list) > 1: self.add(*arg_list[1:])
 137         other = arg_list[0]
 138         if type(self) != type(other) or self.__class__ != other.__class__:
 139             other = Stats(other)
 140         self.files += other.files
 141         self.total_calls += other.total_calls
 142         self.prim_calls += other.prim_calls
 143         self.total_tt += other.total_tt
 144         for func in other.top_level:
 145             self.top_level[func] = None
 146
 147         if self.max_name_len < other.max_name_len:
 148             self.max_name_len = other.max_name_len
 149
 150         self.fcn_list = None
 151
 152         for func, stat in other.stats.iteritems():
 153             if func in self.stats:
 154                 old_func_stat = self.stats[func]
 155             else:
 156                 old_func_stat = (0, 0, 0, 0, {},)
 157             self.stats[func] = add_func_stats(old_func_stat, stat)
 158         return self
 159
 160     def dump_stats(self, filename):
 161         """Write the profile data to a file we know how to load back."""
 162         f = file(filename, 'wb')
 163         try:
 164             marshal.dump(self.stats, f)
 165         finally:
 166             f.close()
 167
 168     # list the tuple indices and directions for sorting,
 169     # along with some printable description
 170     sort_arg_dict_default = {
 171               "calls"     : (((1,-1),              ), "call count"),
 172               "cumulative": (((3,-1),              ), "cumulative time"),
 173               "file"      : (((4, 1),              ), "file name"),
 174               "line"      : (((5, 1),              ), "line number"),
 175               "module"    : (((4, 1),              ), "file name"),
 176               "name"      : (((6, 1),              ), "function name"),
 177               "nfl"       : (((6, 1),(4, 1),(5, 1),), "name/file/line"),
 178               "pcalls"    : (((0,-1),              ), "call count"),
 179               "stdname"   : (((7, 1),              ), "standard name"),
 180               "time"      : (((2,-1),              ), "internal time"),
 181               }
 182
 183     def get_sort_arg_defs(self):
 184         """Expand all abbreviations that are unique."""
 185         if not self.sort_arg_dict:
 186             self.sort_arg_dict = dict = {}
 187             bad_list = {}
 188             for word, tup in self.sort_arg_dict_default.iteritems():
 189                 fragment = word
 190                 while fragment:
 191                     if not fragment:
 192                         break
 193                     if fragment in dict:
 194                         bad_list[fragment] = 0
 195                         break
 196                     dict[fragment] = tup
 197                     fragment = fragment[:-1]
 198             for word in bad_list:
 199                 del dict[word]
 200         return self.sort_arg_dict
 201
 202     def sort_stats(self, *field):
 203         if not field:
 204             self.fcn_list = 0
 205             return self
 206         if len(field) == 1 and type(field[0]) == type(1):
 207             # Be compatible with old profiler
 208             field = [ {-1: "stdname",
 209                       0:"calls",
 210                       1:"time",
 211                       2: "cumulative" }  [ field[0] ] ]
 212
 213         sort_arg_defs = self.get_sort_arg_defs()
 214         sort_tuple = ()
 215         self.sort_type = ""
 216         connector = ""
 217         for word in field:
 218             sort_tuple = sort_tuple + sort_arg_defs[word][0]
 219             self.sort_type += connector + sort_arg_defs[word][1]
 220             connector = ", "
 221
 222         stats_list = []
 223         for func, (cc, nc, tt, ct, callers) in self.stats.iteritems():
 224             stats_list.append((cc, nc, tt, ct) + func +
 225                               (func_std_string(func), func))
 226
 227         stats_list.sort(TupleComp(sort_tuple).compare)
 228
 229         self.fcn_list = fcn_list = []
 230         for tuple in stats_list:
 231             fcn_list.append(tuple[-1])
 232         return self
 233
 234     def reverse_order(self):
 235         if self.fcn_list:
 236             self.fcn_list.reverse()
 237         return self
 238
 239     def strip_dirs(self):
 240         oldstats = self.stats
 241         self.stats = newstats = {}
 242         max_name_len = 0
 243         for func, (cc, nc, tt, ct, callers) in oldstats.iteritems():
 244             newfunc = func_strip_path(func)
 245             if len(func_std_string(newfunc)) > max_name_len:
 246                 max_name_len = len(func_std_string(newfunc))
 247             newcallers = {}
 248             for func2, caller in callers.iteritems():
 249                 newcallers[func_strip_path(func2)] = caller
 250
 251             if newfunc in newstats:
 252                 newstats[newfunc] = add_func_stats(
 253                                         newstats[newfunc],
 254                                         (cc, nc, tt, ct, newcallers))
 255             else:
 256                 newstats[newfunc] = (cc, nc, tt, ct, newcallers)
 257         old_top = self.top_level
 258         self.top_level = new_top = {}
 259         for func in old_top:
 260             new_top[func_strip_path(func)] = None
 261
 262         self.max_name_len = max_name_len
 263
 264         self.fcn_list = None
 265         self.all_callees = None
 266         return self
 267
 268     def calc_callees(self):
 269         if self.all_callees: return
 270         self.all_callees = all_callees = {}
 271         for func, (cc, nc, tt, ct, callers) in self.stats.iteritems():
 272             if not func in all_callees:
 273                 all_callees[func] = {}
 274             for func2, caller in callers.iteritems():
 275                 if not func2 in all_callees:
 276                     all_callees[func2] = {}
 277                 all_callees[func2][func]  = caller
 278         return
 279
 280     #******************************************************************
 281     # The following functions support actual printing of reports
 282     #******************************************************************
 283
 284     # Optional "amount" is either a line count, or a percentage of lines.
 285
 286     def eval_print_amount(self, sel, list, msg):
 287         new_list = list
 288         if type(sel) == type(""):
 289             new_list = []
 290             for func in list:
 291                 if re.search(sel, func_std_string(func)):
 292                     new_list.append(func)
 293         else:
 294             count = len(list)
 295             if type(sel) == type(1.0) and 0.0 <= sel < 1.0:
 296                 count = int(count * sel + .5)
 297                 new_list = list[:count]
 298             elif type(sel) == type(1) and 0 <= sel < count:
 299                 count = sel
 300                 new_list = list[:count]
 301         if len(list) != len(new_list):
 302             msg = msg + "   List reduced from %r to %r due to restriction <%r>\n" % (
 303                          len(list), len(new_list), sel)
 304
 305         return new_list, msg
 306
 307     def get_print_list(self, sel_list):
 308         width = self.max_name_len
 309         if self.fcn_list:
 310             list = self.fcn_list[:]
 311             msg = "   Ordered by: " + self.sort_type + '\n'
 312         else:
 313             list = self.stats.keys()
 314             msg = "   Random listing order was used\n"
 315
 316         for selection in sel_list:
 317             list, msg = self.eval_print_amount(selection, list, msg)
 318
 319         count = len(list)
 320
 321         if not list:
 322             return 0, list
 323         print msg
 324         if count < len(self.stats):
 325             width = 0
 326             for func in list:
 327                 if  len(func_std_string(func)) > width:
 328                     width = len(func_std_string(func))
 329         return width+2, list
 330
 331     def print_stats(self, *amount):
 332         for filename in self.files:
 333             print filename
 334         if self.files: print
 335         indent = ' ' * 8
 336         for func in self.top_level:
 337             print indent, func_get_function_name(func)
 338
 339         print indent, self.total_calls, "function calls",
 340         if self.total_calls != self.prim_calls:
 341             print "(%d primitive calls)" % self.prim_calls,
 342         print "in %.3f CPU seconds" % self.total_tt
 343         print
 344         width, list = self.get_print_list(amount)
 345         if list:
 346             self.print_title()
 347             for func in list:
 348                 self.print_line(func)
 349             print
 350             print
 351         return self
 352
 353     def print_callees(self, *amount):
 354         width, list = self.get_print_list(amount)
 355         if list:
 356             self.calc_callees()
 357
 358             self.print_call_heading(width, "called...")
 359             for func in list:
 360                 if func in self.all_callees:
 361                     self.print_call_line(width, func, self.all_callees[func])
 362                 else:
 363                     self.print_call_line(width, func, {})
 364             print
 365             print
 366         return self
 367
 368     def print_callers(self, *amount):
 369         width, list = self.get_print_list(amount)
 370         if list:
 371             self.print_call_heading(width, "was called by...")
 372             for func in list:
 373                 cc, nc, tt, ct, callers = self.stats[func]
 374                 self.print_call_line(width, func, callers)
 375             print
 376             print
 377         return self
 378
 379     def print_call_heading(self, name_size, column_title):
 380         print "Function ".ljust(name_size) + column_title
 381
 382     def print_call_line(self, name_size, source, call_dict):
 383         print func_std_string(source).ljust(name_size),
 384         if not call_dict:
 385             print "--"
 386             return
 387         clist = call_dict.keys()
 388         clist.sort()
 389         name_size = name_size + 1
 390         indent = ""
 391         for func in clist:
 392             name = func_std_string(func)
 393             print indent*name_size + name + '(%r)' % (call_dict[func],), \
 394                       f8(self.stats[func][3])
 395             indent = " "
 396
 397     def print_title(self):
 398         print '   ncalls  tottime  percall  cumtime  percall', \
 399               'filename:lineno(function)'
 400
 401     def print_line(self, func):  # hack : should print percentages
 402         cc, nc, tt, ct, callers = self.stats[func]
 403         c = str(nc)
 404         if nc != cc:
 405             c = c + '/' + str(cc)
 406         print c.rjust(9),
 407         print f8(tt),
 408         if nc == 0:
 409             print ' '*8,
 410         else:
 411             print f8(tt/nc),
 412         print f8(ct),
 413         if cc == 0:
 414             print ' '*8,
 415         else:
 416             print f8(ct/cc),
 417         print func_std_string(func)
 418
 419 class TupleComp:
 420     """This class provides a generic function for comparing any two tuples.
 421     Each instance records a list of tuple-indices (from most significant
 422     to least significant), and sort direction (ascending or decending) for
 423     each tuple-index.  The compare functions can then be used as the function
 424     argument to the system sort() function when a list of tuples need to be
 425     sorted in the instances order."""
 426
 427     def __init__(self, comp_select_list):
 428         self.comp_select_list = comp_select_list
 429
 430     def compare (self, left, right):
 431         for index, direction in self.comp_select_list:
 432             l = left[index]
 433             r = right[index]
 434             if l < r:
 435                 return -direction
 436             if l > r:
 437                 return direction
 438         return 0
 439
 440 #**************************************************************************
 441 # func_name is a triple (file:string, line:int, name:string)
 442
 443 def func_strip_path(func_name):
 444     filename, line, name = func_name
 445     return os.path.basename(filename), line, name
 446
 447 def func_get_function_name(func):
 448     return func[2]
 449
 450 def func_std_string(func_name): # match what old profile produced
 451     return "%s:%d(%s)" % func_name
 452
 453 #**************************************************************************
 454 # The following functions combine statists for pairs functions.
 455 # The bulk of the processing involves correctly handling "call" lists,
 456 # such as callers and callees.
 457 #**************************************************************************
 458
 459 def add_func_stats(target, source):
 460     """Add together all the stats for two profile entries."""
 461     cc, nc, tt, ct, callers = source
 462     t_cc, t_nc, t_tt, t_ct, t_callers = target
 463     return (cc+t_cc, nc+t_nc, tt+t_tt, ct+t_ct,
 464               add_callers(t_callers, callers))
 465
 466 def add_callers(target, source):
 467     """Combine two caller lists in a single list."""
 468     new_callers = {}
 469     for func, caller in target.iteritems():
 470         new_callers[func] = caller
 471     for func, caller in source.iteritems():
 472         if func in new_callers:
 473             new_callers[func] = caller + new_callers[func]
 474         else:
 475             new_callers[func] = caller
 476     return new_callers
 477
 478 def count_calls(callers):
 479     """Sum the caller statistics to get total number of calls received."""
 480     nc = 0
 481     for calls in callers.itervalues():
 482         nc += calls
 483     return nc
 484
 485 #**************************************************************************
 486 # The following functions support printing of reports
 487 #**************************************************************************
 488
 489 def f8(x):
 490     return "%8.3f" % x
 491
 492 #**************************************************************************
 493 # Statistics browser added by ESR, April 2001
 494 #**************************************************************************
 495
 496 if __name__ == '__main__':
 497     import cmd
 498     try:
 499         import readline
 500     except ImportError:
 501         pass
 502
 503     class ProfileBrowser(cmd.Cmd):
 504         def __init__(self, profile=None):
 505             cmd.Cmd.__init__(self)
 506             self.prompt = "% "
 507             if profile is not None:
 508                 self.stats = Stats(profile)
 509             else:
 510                 self.stats = None
 511
 512         def generic(self, fn, line):
 513             args = line.split()
 514             processed = []
 515             for term in args:
 516                 try:
 517                     processed.append(int(term))
 518                     continue
 519                 except ValueError:
 520                     pass
 521                 try:
 522                     frac = float(term)
 523                     if frac > 1 or frac < 0:
 524                         print "Fraction argument mus be in [0, 1]"
 525                         continue
 526                     processed.append(frac)
 527                     continue
 528                 except ValueError:
 529                     pass
 530                 processed.append(term)
 531             if self.stats:
 532                 getattr(self.stats, fn)(*processed)
 533             else:
 534                 print "No statistics object is loaded."
 535             return 0
 536         def generic_help(self):
 537             print "Arguments may be:"
 538             print "* An integer maximum number of entries to print."
 539             print "* A decimal fractional number between 0 and 1, controlling"
 540             print "  what fraction of selected entries to print."
 541             print "* A regular expression; only entries with function names"
 542             print "  that match it are printed."
 543
 544         def do_add(self, line):
 545             self.stats.add(line)
 546             return 0
 547         def help_add(self):
 548             print "Add profile info from given file to current statistics object."
 549
 550         def do_callees(self, line):
 551             return self.generic('print_callees', line)
 552         def help_callees(self):
 553             print "Print callees statistics from the current stat object."
 554             self.generic_help()
 555
 556         def do_callers(self, line):
 557             return self.generic('print_callers', line)
 558         def help_callers(self):
 559             print "Print callers statistics from the current stat object."
 560             self.generic_help()
 561
 562         def do_EOF(self, line):
 563             print ""
 564             return 1
 565         def help_EOF(self):
 566             print "Leave the profile brower."
 567
 568         def do_quit(self, line):
 569             return 1
 570         def help_quit(self):
 571             print "Leave the profile brower."
 572
 573         def do_read(self, line):
 574             if line:
 575                 try:
 576                     self.stats = Stats(line)
 577                 except IOError, args:
 578                     print args[1]
 579                     return
 580                 self.prompt = line + "% "
 581             elif len(self.prompt) > 2:
 582                 line = self.prompt[-2:]
 583             else:
 584                 print "No statistics object is current -- cannot reload."
 585             return 0
 586         def help_read(self):
 587             print "Read in profile data from a specified file."
 588
 589         def do_reverse(self, line):
 590             self.stats.reverse_order()
 591             return 0
 592         def help_reverse(self):
 593             print "Reverse the sort order of the profiling report."
 594
 595         def do_sort(self, line):
 596             abbrevs = self.stats.get_sort_arg_defs()
 597             if line and not filter(lambda x,a=abbrevs: x not in a,line.split()):
 598                 self.stats.sort_stats(*line.split())
 599             else:
 600                 print "Valid sort keys (unique prefixes are accepted):"
 601                 for (key, value) in Stats.sort_arg_dict_default.iteritems():
 602                     print "%s -- %s" % (key, value[1])
 603             return 0
 604         def help_sort(self):
 605             print "Sort profile data according to specified keys."
 606             print "(Typing `sort' without arguments lists valid keys.)"
 607         def complete_sort(self, text, *args):
 608             return [a for a in Stats.sort_arg_dict_default if a.startswith(text)]
 609
 610         def do_stats(self, line):
 611             return self.generic('print_stats', line)
 612         def help_stats(self):
 613             print "Print statistics from the current stat object."
 614             self.generic_help()
 615
 616         def do_strip(self, line):
 617             self.stats.strip_dirs()
 618             return 0
 619         def help_strip(self):
 620             print "Strip leading path information from filenames in the report."
 621
 622         def postcmd(self, stop, line):
 623             if stop:
 624                 return stop
 625             return None
 626
 627     import sys
 628     print "Welcome to the profile statistics browser."
 629     if len(sys.argv) > 1:
 630         initprofile = sys.argv[1]
 631     else:
 632         initprofile = None
 633     try:
 634         ProfileBrowser(initprofile).cmdloop()
 635         print "Goodbye."
 636     except KeyboardInterrupt:
 637         pass
 638
 639 # That's all, folks.