Lib/profile.py

   1 #! /usr/bin/env python
   2 #
   3 # Class for profiling python code. rev 1.0  6/2/94
   4 #
   5 # Based on prior profile module by Sjoerd Mullender...
   6 #   which was hacked somewhat by: Guido van Rossum
   7
   8 """Class for profiling Python code."""
   9
  10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  11 # Written by James Roskind
  12 #
  13 # Permission to use, copy, modify, and distribute this Python software
  14 # and its associated documentation for any purpose (subject to the
  15 # restriction in the following sentence) without fee is hereby granted,
  16 # provided that the above copyright notice appears in all copies, and
  17 # that both that copyright notice and this permission notice appear in
  18 # supporting documentation, and that the name of InfoSeek not be used in
  19 # advertising or publicity pertaining to distribution of the software
  20 # without specific, written prior permission.  This permission is
  21 # explicitly restricted to the copying and modification of the software
  22 # to remain in Python, compiled Python, or other languages (such as C)
  23 # wherein the modified or derived code is exclusively imported into a
  24 # Python module.
  25 #
  26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  33
  34
  35
  36 import sys
  37 import os
  38 import time
  39 import marshal
  40 from optparse import OptionParser
  41
  42 __all__ = ["run", "runctx", "help", "Profile"]
  43
  44 # Sample timer for use with
  45 #i_count = 0
  46 #def integer_timer():
  47 #       global i_count
  48 #       i_count = i_count + 1
  49 #       return i_count
  50 #itimes = integer_timer # replace with C coded timer returning integers
  51
  52 #**************************************************************************
  53 # The following are the static member functions for the profiler class
  54 # Note that an instance of Profile() is *not* needed to call them.
  55 #**************************************************************************
  56
  57 def run(statement, filename=None, sort=-1):
  58     """Run statement under profiler optionally saving results in filename
  59
  60     This function takes a single argument that can be passed to the
  61     "exec" statement, and an optional file name.  In all cases this
  62     routine attempts to "exec" its first argument and gather profiling
  63     statistics from the execution. If no file name is present, then this
  64     function automatically prints a simple profiling report, sorted by the
  65     standard name string (file/line/function-name) that is presented in
  66     each line.
  67     """
  68     prof = Profile()
  69     try:
  70         prof = prof.run(statement)
  71     except SystemExit:
  72         pass
  73     if filename is not None:
  74         prof.dump_stats(filename)
  75     else:
  76         return prof.print_stats(sort)
  77
  78 def runctx(statement, globals, locals, filename=None):
  79     """Run statement under profiler, supplying your own globals and locals,
  80     optionally saving results in filename.
  81
  82     statement and filename have the same semantics as profile.run
  83     """
  84     prof = Profile()
  85     try:
  86         prof = prof.runctx(statement, globals, locals)
  87     except SystemExit:
  88         pass
  89
  90     if filename is not None:
  91         prof.dump_stats(filename)
  92     else:
  93         return prof.print_stats()
  94
  95 # Backwards compatibility.
  96 def help():
  97     print "Documentation for the profile module can be found "
  98     print "in the Python Library Reference, section 'The Python Profiler'."
  99
 100 if os.name == "mac":
 101     import MacOS
 102     def _get_time_mac(timer=MacOS.GetTicks):
 103         return timer() / 60.0
 104
 105 if hasattr(os, "times"):
 106     def _get_time_times(timer=os.times):
 107         t = timer()
 108         return t[0] + t[1]
 109
 110 # Using getrusage(3) is better than clock(3) if available:
 111 # on some systems (e.g. FreeBSD), getrusage has a higher resolution
 112 # Furthermore, on a POSIX system, returns microseconds, which
 113 # wrap around after 36min.
 114 _has_res = 0
 115 try:
 116     import resource
 117     resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
 118     def _get_time_resource(timer=resgetrusage):
 119         t = timer()
 120         return t[0] + t[1]
 121     _has_res = 1
 122 except ImportError:
 123     pass
 124
 125 class Profile:
 126     """Profiler class.
 127
 128     self.cur is always a tuple.  Each such tuple corresponds to a stack
 129     frame that is currently active (self.cur[-2]).  The following are the
 130     definitions of its members.  We use this external "parallel stack" to
 131     avoid contaminating the program that we are profiling. (old profiler
 132     used to write into the frames local dictionary!!) Derived classes
 133     can change the definition of some entries, as long as they leave
 134     [-2:] intact (frame and previous tuple).  In case an internal error is
 135     detected, the -3 element is used as the function name.
 136
 137     [ 0] = Time that needs to be charged to the parent frame's function.
 138            It is used so that a function call will not have to access the
 139            timing data for the parent frame.
 140     [ 1] = Total time spent in this frame's function, excluding time in
 141            subfunctions (this latter is tallied in cur[2]).
 142     [ 2] = Total time spent in subfunctions, excluding time executing the
 143            frame's function (this latter is tallied in cur[1]).
 144     [-3] = Name of the function that corresponds to this frame.
 145     [-2] = Actual frame that we correspond to (used to sync exception handling).
 146     [-1] = Our parent 6-tuple (corresponds to frame.f_back).
 147
 148     Timing data for each function is stored as a 5-tuple in the dictionary
 149     self.timings[].  The index is always the name stored in self.cur[-3].
 150     The following are the definitions of the members:
 151
 152     [0] = The number of times this function was called, not counting direct
 153           or indirect recursion,
 154     [1] = Number of times this function appears on the stack, minus one
 155     [2] = Total time spent internal to this function
 156     [3] = Cumulative time that this function was present on the stack.  In
 157           non-recursive functions, this is the total execution time from start
 158           to finish of each invocation of a function, including time spent in
 159           all subfunctions.
 160     [4] = A dictionary indicating for each function name, the number of times
 161           it was called by us.
 162     """
 163
 164     bias = 0  # calibration constant
 165
 166     def __init__(self, timer=None, bias=None):
 167         self.timings = {}
 168         self.cur = None
 169         self.cmd = ""
 170         self.c_func_name = ""
 171
 172         if bias is None:
 173             bias = self.bias
 174         self.bias = bias     # Materialize in local dict for lookup speed.
 175
 176         if not timer:
 177             if _has_res:
 178                 self.timer = resgetrusage
 179                 self.dispatcher = self.trace_dispatch
 180                 self.get_time = _get_time_resource
 181             elif os.name == 'mac':
 182                 self.timer = MacOS.GetTicks
 183                 self.dispatcher = self.trace_dispatch_mac
 184                 self.get_time = _get_time_mac
 185             elif hasattr(time, 'clock'):
 186                 self.timer = self.get_time = time.clock
 187                 self.dispatcher = self.trace_dispatch_i
 188             elif hasattr(os, 'times'):
 189                 self.timer = os.times
 190                 self.dispatcher = self.trace_dispatch
 191                 self.get_time = _get_time_times
 192             else:
 193                 self.timer = self.get_time = time.time
 194                 self.dispatcher = self.trace_dispatch_i
 195         else:
 196             self.timer = timer
 197             t = self.timer() # test out timer function
 198             try:
 199                 length = len(t)
 200             except TypeError:
 201                 self.get_time = timer
 202                 self.dispatcher = self.trace_dispatch_i
 203             else:
 204                 if length == 2:
 205                     self.dispatcher = self.trace_dispatch
 206                 else:
 207                     self.dispatcher = self.trace_dispatch_l
 208                 # This get_time() implementation needs to be defined
 209                 # here to capture the passed-in timer in the parameter
 210                 # list (for performance).  Note that we can't assume
 211                 # the timer() result contains two values in all
 212                 # cases.
 213                 def get_time_timer(timer=timer, sum=sum):
 214                     return sum(timer())
 215                 self.get_time = get_time_timer
 216         self.t = self.get_time()
 217         self.simulate_call('profiler')
 218
 219     # Heavily optimized dispatch routine for os.times() timer
 220
 221     def trace_dispatch(self, frame, event, arg):
 222         timer = self.timer
 223         t = timer()
 224         t = t[0] + t[1] - self.t - self.bias
 225
 226         if event == "c_call":
 227             self.c_func_name = arg.__name__
 228
 229         if self.dispatch[event](self, frame,t):
 230             t = timer()
 231             self.t = t[0] + t[1]
 232         else:
 233             r = timer()
 234             self.t = r[0] + r[1] - t # put back unrecorded delta
 235
 236     # Dispatch routine for best timer program (return = scalar, fastest if
 237     # an integer but float works too -- and time.clock() relies on that).
 238
 239     def trace_dispatch_i(self, frame, event, arg):
 240         timer = self.timer
 241         t = timer() - self.t - self.bias
 242
 243         if event == "c_call":
 244             self.c_func_name = arg.__name__
 245
 246         if self.dispatch[event](self, frame, t):
 247             self.t = timer()
 248         else:
 249             self.t = timer() - t  # put back unrecorded delta
 250
 251     # Dispatch routine for macintosh (timer returns time in ticks of
 252     # 1/60th second)
 253
 254     def trace_dispatch_mac(self, frame, event, arg):
 255         timer = self.timer
 256         t = timer()/60.0 - self.t - self.bias
 257
 258         if event == "c_call":
 259             self.c_func_name = arg.__name__
 260
 261         if self.dispatch[event](self, frame, t):
 262             self.t = timer()/60.0
 263         else:
 264             self.t = timer()/60.0 - t  # put back unrecorded delta
 265
 266     # SLOW generic dispatch routine for timer returning lists of numbers
 267
 268     def trace_dispatch_l(self, frame, event, arg):
 269         get_time = self.get_time
 270         t = get_time() - self.t - self.bias
 271
 272         if event == "c_call":
 273             self.c_func_name = arg.__name__
 274
 275         if self.dispatch[event](self, frame, t):
 276             self.t = get_time()
 277         else:
 278             self.t = get_time() - t # put back unrecorded delta
 279
 280     # In the event handlers, the first 3 elements of self.cur are unpacked
 281     # into vrbls w/ 3-letter names.  The last two characters are meant to be
 282     # mnemonic:
 283     #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
 284     #     _it  self.cur[1] "internal time" time spent directly in the function
 285     #     _et  self.cur[2] "external time" time spent in subfunctions
 286
 287     def trace_dispatch_exception(self, frame, t):
 288         rpt, rit, ret, rfn, rframe, rcur = self.cur
 289         if (rframe is not frame) and rcur:
 290             return self.trace_dispatch_return(rframe, t)
 291         self.cur = rpt, rit+t, ret, rfn, rframe, rcur
 292         return 1
 293
 294
 295     def trace_dispatch_call(self, frame, t):
 296         if self.cur and frame.f_back is not self.cur[-2]:
 297             rpt, rit, ret, rfn, rframe, rcur = self.cur
 298             if not isinstance(rframe, Profile.fake_frame):
 299                 assert rframe.f_back is frame.f_back, ("Bad call", rfn,
 300                                                        rframe, rframe.f_back,
 301                                                        frame, frame.f_back)
 302                 self.trace_dispatch_return(rframe, 0)
 303                 assert (self.cur is None or \
 304                         frame.f_back is self.cur[-2]), ("Bad call",
 305                                                         self.cur[-3])
 306         fcode = frame.f_code
 307         fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
 308         self.cur = (t, 0, 0, fn, frame, self.cur)
 309         timings = self.timings
 310         if fn in timings:
 311             cc, ns, tt, ct, callers = timings[fn]
 312             timings[fn] = cc, ns + 1, tt, ct, callers
 313         else:
 314             timings[fn] = 0, 0, 0, 0, {}
 315         return 1
 316
 317     def trace_dispatch_c_call (self, frame, t):
 318         fn = ("", 0, self.c_func_name)
 319         self.cur = (t, 0, 0, fn, frame, self.cur)
 320         timings = self.timings
 321         if fn in timings:
 322             cc, ns, tt, ct, callers = timings[fn]
 323             timings[fn] = cc, ns+1, tt, ct, callers
 324         else:
 325             timings[fn] = 0, 0, 0, 0, {}
 326         return 1
 327
 328     def trace_dispatch_return(self, frame, t):
 329         if frame is not self.cur[-2]:
 330             assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
 331             self.trace_dispatch_return(self.cur[-2], 0)
 332
 333         # Prefix "r" means part of the Returning or exiting frame.
 334         # Prefix "p" means part of the Previous or Parent or older frame.
 335
 336         rpt, rit, ret, rfn, frame, rcur = self.cur
 337         rit = rit + t
 338         frame_total = rit + ret
 339
 340         ppt, pit, pet, pfn, pframe, pcur = rcur
 341         self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
 342
 343         timings = self.timings
 344         cc, ns, tt, ct, callers = timings[rfn]
 345         if not ns:
 346             # This is the only occurrence of the function on the stack.
 347             # Else this is a (directly or indirectly) recursive call, and
 348             # its cumulative time will get updated when the topmost call to
 349             # it returns.
 350             ct = ct + frame_total
 351             cc = cc + 1
 352
 353         if pfn in callers:
 354             callers[pfn] = callers[pfn] + 1  # hack: gather more
 355             # stats such as the amount of time added to ct courtesy
 356             # of this specific call, and the contribution to cc
 357             # courtesy of this call.
 358         else:
 359             callers[pfn] = 1
 360
 361         timings[rfn] = cc, ns - 1, tt + rit, ct, callers
 362
 363         return 1
 364
 365
 366     dispatch = {
 367         "call": trace_dispatch_call,
 368         "exception": trace_dispatch_exception,
 369         "return": trace_dispatch_return,
 370         "c_call": trace_dispatch_c_call,
 371         "c_exception": trace_dispatch_return,  # the C function returned
 372         "c_return": trace_dispatch_return,
 373         }
 374
 375
 376     # The next few functions play with self.cmd. By carefully preloading
 377     # our parallel stack, we can force the profiled result to include
 378     # an arbitrary string as the name of the calling function.
 379     # We use self.cmd as that string, and the resulting stats look
 380     # very nice :-).
 381
 382     def set_cmd(self, cmd):
 383         if self.cur[-1]: return   # already set
 384         self.cmd = cmd
 385         self.simulate_call(cmd)
 386
 387     class fake_code:
 388         def __init__(self, filename, line, name):
 389             self.co_filename = filename
 390             self.co_line = line
 391             self.co_name = name
 392             self.co_firstlineno = 0
 393
 394         def __repr__(self):
 395             return repr((self.co_filename, self.co_line, self.co_name))
 396
 397     class fake_frame:
 398         def __init__(self, code, prior):
 399             self.f_code = code
 400             self.f_back = prior
 401
 402     def simulate_call(self, name):
 403         code = self.fake_code('profile', 0, name)
 404         if self.cur:
 405             pframe = self.cur[-2]
 406         else:
 407             pframe = None
 408         frame = self.fake_frame(code, pframe)
 409         self.dispatch['call'](self, frame, 0)
 410
 411     # collect stats from pending stack, including getting final
 412     # timings for self.cmd frame.
 413
 414     def simulate_cmd_complete(self):
 415         get_time = self.get_time
 416         t = get_time() - self.t
 417         while self.cur[-1]:
 418             # We *can* cause assertion errors here if
 419             # dispatch_trace_return checks for a frame match!
 420             self.dispatch['return'](self, self.cur[-2], t)
 421             t = 0
 422         self.t = get_time() - t
 423
 424
 425     def print_stats(self, sort=-1):
 426         import pstats
 427         pstats.Stats(self).strip_dirs().sort_stats(sort). \
 428                   print_stats()
 429
 430     def dump_stats(self, file):
 431         f = open(file, 'wb')
 432         self.create_stats()
 433         marshal.dump(self.stats, f)
 434         f.close()
 435
 436     def create_stats(self):
 437         self.simulate_cmd_complete()
 438         self.snapshot_stats()
 439
 440     def snapshot_stats(self):
 441         self.stats = {}
 442         for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
 443             callers = callers.copy()
 444             nc = 0
 445             for callcnt in callers.itervalues():
 446                 nc += callcnt
 447             self.stats[func] = cc, nc, tt, ct, callers
 448
 449
 450     # The following two methods can be called by clients to use
 451     # a profiler to profile a statement, given as a string.
 452
 453     def run(self, cmd):
 454         import __main__
 455         dict = __main__.__dict__
 456         return self.runctx(cmd, dict, dict)
 457
 458     def runctx(self, cmd, globals, locals):
 459         self.set_cmd(cmd)
 460         sys.setprofile(self.dispatcher)
 461         try:
 462             exec cmd in globals, locals
 463         finally:
 464             sys.setprofile(None)
 465         return self
 466
 467     # This method is more useful to profile a single function call.
 468     def runcall(self, func, *args, **kw):
 469         self.set_cmd(repr(func))
 470         sys.setprofile(self.dispatcher)
 471         try:
 472             return func(*args, **kw)
 473         finally:
 474             sys.setprofile(None)
 475
 476
 477     #******************************************************************
 478     # The following calculates the overhead for using a profiler.  The
 479     # problem is that it takes a fair amount of time for the profiler
 480     # to stop the stopwatch (from the time it receives an event).
 481     # Similarly, there is a delay from the time that the profiler
 482     # re-starts the stopwatch before the user's code really gets to
 483     # continue.  The following code tries to measure the difference on
 484     # a per-event basis.
 485     #
 486     # Note that this difference is only significant if there are a lot of
 487     # events, and relatively little user code per event.  For example,
 488     # code with small functions will typically benefit from having the
 489     # profiler calibrated for the current platform.  This *could* be
 490     # done on the fly during init() time, but it is not worth the
 491     # effort.  Also note that if too large a value specified, then
 492     # execution time on some functions will actually appear as a
 493     # negative number.  It is *normal* for some functions (with very
 494     # low call counts) to have such negative stats, even if the
 495     # calibration figure is "correct."
 496     #
 497     # One alternative to profile-time calibration adjustments (i.e.,
 498     # adding in the magic little delta during each event) is to track
 499     # more carefully the number of events (and cumulatively, the number
 500     # of events during sub functions) that are seen.  If this were
 501     # done, then the arithmetic could be done after the fact (i.e., at
 502     # display time).  Currently, we track only call/return events.
 503     # These values can be deduced by examining the callees and callers
 504     # vectors for each functions.  Hence we *can* almost correct the
 505     # internal time figure at print time (note that we currently don't
 506     # track exception event processing counts).  Unfortunately, there
 507     # is currently no similar information for cumulative sub-function
 508     # time.  It would not be hard to "get all this info" at profiler
 509     # time.  Specifically, we would have to extend the tuples to keep
 510     # counts of this in each frame, and then extend the defs of timing
 511     # tuples to include the significant two figures. I'm a bit fearful
 512     # that this additional feature will slow the heavily optimized
 513     # event/time ratio (i.e., the profiler would run slower, fur a very
 514     # low "value added" feature.)
 515     #**************************************************************
 516
 517     def calibrate(self, m, verbose=0):
 518         if self.__class__ is not Profile:
 519             raise TypeError("Subclasses must override .calibrate().")
 520
 521         saved_bias = self.bias
 522         self.bias = 0
 523         try:
 524             return self._calibrate_inner(m, verbose)
 525         finally:
 526             self.bias = saved_bias
 527
 528     def _calibrate_inner(self, m, verbose):
 529         get_time = self.get_time
 530
 531         # Set up a test case to be run with and without profiling.  Include
 532         # lots of calls, because we're trying to quantify stopwatch overhead.
 533         # Do not raise any exceptions, though, because we want to know
 534         # exactly how many profile events are generated (one call event, +
 535         # one return event, per Python-level call).
 536
 537         def f1(n):
 538             for i in range(n):
 539                 x = 1
 540
 541         def f(m, f1=f1):
 542             for i in range(m):
 543                 f1(100)
 544
 545         f(m)    # warm up the cache
 546
 547         # elapsed_noprofile <- time f(m) takes without profiling.
 548         t0 = get_time()
 549         f(m)
 550         t1 = get_time()
 551         elapsed_noprofile = t1 - t0
 552         if verbose:
 553             print "elapsed time without profiling =", elapsed_noprofile
 554
 555         # elapsed_profile <- time f(m) takes with profiling.  The difference
 556         # is profiling overhead, only some of which the profiler subtracts
 557         # out on its own.
 558         p = Profile()
 559         t0 = get_time()
 560         p.runctx('f(m)', globals(), locals())
 561         t1 = get_time()
 562         elapsed_profile = t1 - t0
 563         if verbose:
 564             print "elapsed time with profiling =", elapsed_profile
 565
 566         # reported_time <- "CPU seconds" the profiler charged to f and f1.
 567         total_calls = 0.0
 568         reported_time = 0.0
 569         for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
 570                 p.timings.items():
 571             if funcname in ("f", "f1"):
 572                 total_calls += cc
 573                 reported_time += tt
 574
 575         if verbose:
 576             print "'CPU seconds' profiler reported =", reported_time
 577             print "total # calls =", total_calls
 578         if total_calls != m + 1:
 579             raise ValueError("internal error: total calls = %d" % total_calls)
 580
 581         # reported_time - elapsed_noprofile = overhead the profiler wasn't
 582         # able to measure.  Divide by twice the number of calls (since there
 583         # are two profiler events per call in this test) to get the hidden
 584         # overhead per event.
 585         mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
 586         if verbose:
 587             print "mean stopwatch overhead per profile event =", mean
 588         return mean
 589
 590 #****************************************************************************
 591 def Stats(*args):
 592     print 'Report generating functions are in the "pstats" module\a'
 593
 594 def main():
 595     usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
 596     parser = OptionParser(usage=usage)
 597     parser.allow_interspersed_args = False
 598     parser.add_option('-o', '--outfile', dest="outfile",
 599         help="Save stats to <outfile>", default=None)
 600     parser.add_option('-s', '--sort', dest="sort",
 601         help="Sort order when printing to stdout, based on pstats.Stats class", default=-1)
 602
 603     if not sys.argv[1:]:
 604         parser.print_usage()
 605         sys.exit(2)
 606
 607     (options, args) = parser.parse_args()
 608
 609     if (len(args) > 0):
 610         sys.argv[:] = args
 611         sys.path.insert(0, os.path.dirname(sys.argv[0]))
 612         run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort)
 613     else:
 614         parser.print_usage()
 615     return parser
 616
 617 # When invoked as main program, invoke the profiler on a script
 618 if __name__ == '__main__':
 619     main()