3 # Class for profiling python code. rev 1.0 6/2/94
5 # Based on prior profile module by Sjoerd Mullender...
6 # which was hacked somewhat by: Guido van Rossum
8 """Class for profiling Python code."""
10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
11 # Written by James Roskind
13 # Permission to use, copy, modify, and distribute this Python software
14 # and its associated documentation for any purpose (subject to the
15 # restriction in the following sentence) without fee is hereby granted,
16 # provided that the above copyright notice appears in all copies, and
17 # that both that copyright notice and this permission notice appear in
18 # supporting documentation, and that the name of InfoSeek not be used in
19 # advertising or publicity pertaining to distribution of the software
20 # without specific, written prior permission. This permission is
21 # explicitly restricted to the copying and modification of the software
22 # to remain in Python, compiled Python, or other languages (such as C)
23 # wherein the modified or derived code is exclusively imported into a
26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
40 from optparse
import OptionParser
42 __all__
= ["run", "runctx", "help", "Profile"]
44 # Sample timer for use with
48 # i_count = i_count + 1
50 #itimes = integer_timer # replace with C coded timer returning integers
52 #**************************************************************************
53 # The following are the static member functions for the profiler class
54 # Note that an instance of Profile() is *not* needed to call them.
55 #**************************************************************************
57 def run(statement
, filename
=None, sort
=-1):
58 """Run statement under profiler optionally saving results in filename
60 This function takes a single argument that can be passed to the
61 "exec" statement, and an optional file name. In all cases this
62 routine attempts to "exec" its first argument and gather profiling
63 statistics from the execution. If no file name is present, then this
64 function automatically prints a simple profiling report, sorted by the
65 standard name string (file/line/function-name) that is presented in
70 prof
= prof
.run(statement
)
73 if filename
is not None:
74 prof
.dump_stats(filename
)
76 return prof
.print_stats(sort
)
78 def runctx(statement
, globals, locals, filename
=None):
79 """Run statement under profiler, supplying your own globals and locals,
80 optionally saving results in filename.
82 statement and filename have the same semantics as profile.run
86 prof
= prof
.runctx(statement
, globals, locals)
90 if filename
is not None:
91 prof
.dump_stats(filename
)
93 return prof
.print_stats()
95 # Backwards compatibility.
97 print "Documentation for the profile module can be found "
98 print "in the Python Library Reference, section 'The Python Profiler'."
102 def _get_time_mac(timer
=MacOS
.GetTicks
):
103 return timer() / 60.0
105 if hasattr(os
, "times"):
106 def _get_time_times(timer
=os
.times
):
110 # Using getrusage(3) is better than clock(3) if available:
111 # on some systems (e.g. FreeBSD), getrusage has a higher resolution
112 # Furthermore, on a POSIX system, returns microseconds, which
113 # wrap around after 36min.
117 resgetrusage
= lambda: resource
.getrusage(resource
.RUSAGE_SELF
)
118 def _get_time_resource(timer
=resgetrusage
):
128 self.cur is always a tuple. Each such tuple corresponds to a stack
129 frame that is currently active (self.cur[-2]). The following are the
130 definitions of its members. We use this external "parallel stack" to
131 avoid contaminating the program that we are profiling. (old profiler
132 used to write into the frames local dictionary!!) Derived classes
133 can change the definition of some entries, as long as they leave
134 [-2:] intact (frame and previous tuple). In case an internal error is
135 detected, the -3 element is used as the function name.
137 [ 0] = Time that needs to be charged to the parent frame's function.
138 It is used so that a function call will not have to access the
139 timing data for the parent frame.
140 [ 1] = Total time spent in this frame's function, excluding time in
141 subfunctions (this latter is tallied in cur[2]).
142 [ 2] = Total time spent in subfunctions, excluding time executing the
143 frame's function (this latter is tallied in cur[1]).
144 [-3] = Name of the function that corresponds to this frame.
145 [-2] = Actual frame that we correspond to (used to sync exception handling).
146 [-1] = Our parent 6-tuple (corresponds to frame.f_back).
148 Timing data for each function is stored as a 5-tuple in the dictionary
149 self.timings[]. The index is always the name stored in self.cur[-3].
150 The following are the definitions of the members:
152 [0] = The number of times this function was called, not counting direct
153 or indirect recursion,
154 [1] = Number of times this function appears on the stack, minus one
155 [2] = Total time spent internal to this function
156 [3] = Cumulative time that this function was present on the stack. In
157 non-recursive functions, this is the total execution time from start
158 to finish of each invocation of a function, including time spent in
160 [4] = A dictionary indicating for each function name, the number of times
164 bias
= 0 # calibration constant
166 def __init__(self
, timer
=None, bias
=None):
170 self
.c_func_name
= ""
174 self
.bias
= bias
# Materialize in local dict for lookup speed.
178 self
.timer
= resgetrusage
179 self
.dispatcher
= self
.trace_dispatch
180 self
.get_time
= _get_time_resource
181 elif os
.name
== 'mac':
182 self
.timer
= MacOS
.GetTicks
183 self
.dispatcher
= self
.trace_dispatch_mac
184 self
.get_time
= _get_time_mac
185 elif hasattr(time
, 'clock'):
186 self
.timer
= self
.get_time
= time
.clock
187 self
.dispatcher
= self
.trace_dispatch_i
188 elif hasattr(os
, 'times'):
189 self
.timer
= os
.times
190 self
.dispatcher
= self
.trace_dispatch
191 self
.get_time
= _get_time_times
193 self
.timer
= self
.get_time
= time
.time
194 self
.dispatcher
= self
.trace_dispatch_i
197 t
= self
.timer() # test out timer function
201 self
.get_time
= timer
202 self
.dispatcher
= self
.trace_dispatch_i
205 self
.dispatcher
= self
.trace_dispatch
207 self
.dispatcher
= self
.trace_dispatch_l
208 # This get_time() implementation needs to be defined
209 # here to capture the passed-in timer in the parameter
210 # list (for performance). Note that we can't assume
211 # the timer() result contains two values in all
213 def get_time_timer(timer
=timer
, sum=sum):
215 self
.get_time
= get_time_timer
216 self
.t
= self
.get_time()
217 self
.simulate_call('profiler')
219 # Heavily optimized dispatch routine for os.times() timer
221 def trace_dispatch(self
, frame
, event
, arg
):
224 t
= t
[0] + t
[1] - self
.t
- self
.bias
226 if event
== "c_call":
227 self
.c_func_name
= arg
.__name
__
229 if self
.dispatch
[event
](self
, frame
,t
):
234 self
.t
= r
[0] + r
[1] - t
# put back unrecorded delta
236 # Dispatch routine for best timer program (return = scalar, fastest if
237 # an integer but float works too -- and time.clock() relies on that).
239 def trace_dispatch_i(self
, frame
, event
, arg
):
241 t
= timer() - self
.t
- self
.bias
243 if event
== "c_call":
244 self
.c_func_name
= arg
.__name
__
246 if self
.dispatch
[event
](self
, frame
, t
):
249 self
.t
= timer() - t
# put back unrecorded delta
251 # Dispatch routine for macintosh (timer returns time in ticks of
254 def trace_dispatch_mac(self
, frame
, event
, arg
):
256 t
= timer()/60.0 - self
.t
- self
.bias
258 if event
== "c_call":
259 self
.c_func_name
= arg
.__name
__
261 if self
.dispatch
[event
](self
, frame
, t
):
262 self
.t
= timer()/60.0
264 self
.t
= timer()/60.0 - t
# put back unrecorded delta
266 # SLOW generic dispatch routine for timer returning lists of numbers
268 def trace_dispatch_l(self
, frame
, event
, arg
):
269 get_time
= self
.get_time
270 t
= get_time() - self
.t
- self
.bias
272 if event
== "c_call":
273 self
.c_func_name
= arg
.__name
__
275 if self
.dispatch
[event
](self
, frame
, t
):
278 self
.t
= get_time() - t
# put back unrecorded delta
280 # In the event handlers, the first 3 elements of self.cur are unpacked
281 # into vrbls w/ 3-letter names. The last two characters are meant to be
283 # _pt self.cur[0] "parent time" time to be charged to parent frame
284 # _it self.cur[1] "internal time" time spent directly in the function
285 # _et self.cur[2] "external time" time spent in subfunctions
287 def trace_dispatch_exception(self
, frame
, t
):
288 rpt
, rit
, ret
, rfn
, rframe
, rcur
= self
.cur
289 if (rframe
is not frame
) and rcur
:
290 return self
.trace_dispatch_return(rframe
, t
)
291 self
.cur
= rpt
, rit
+t
, ret
, rfn
, rframe
, rcur
295 def trace_dispatch_call(self
, frame
, t
):
296 if self
.cur
and frame
.f_back
is not self
.cur
[-2]:
297 rpt
, rit
, ret
, rfn
, rframe
, rcur
= self
.cur
298 if not isinstance(rframe
, Profile
.fake_frame
):
299 assert rframe
.f_back
is frame
.f_back
, ("Bad call", rfn
,
300 rframe
, rframe
.f_back
,
302 self
.trace_dispatch_return(rframe
, 0)
303 assert (self
.cur
is None or \
304 frame
.f_back
is self
.cur
[-2]), ("Bad call",
307 fn
= (fcode
.co_filename
, fcode
.co_firstlineno
, fcode
.co_name
)
308 self
.cur
= (t
, 0, 0, fn
, frame
, self
.cur
)
309 timings
= self
.timings
311 cc
, ns
, tt
, ct
, callers
= timings
[fn
]
312 timings
[fn
] = cc
, ns
+ 1, tt
, ct
, callers
314 timings
[fn
] = 0, 0, 0, 0, {}
317 def trace_dispatch_c_call (self
, frame
, t
):
318 fn
= ("", 0, self
.c_func_name
)
319 self
.cur
= (t
, 0, 0, fn
, frame
, self
.cur
)
320 timings
= self
.timings
322 cc
, ns
, tt
, ct
, callers
= timings
[fn
]
323 timings
[fn
] = cc
, ns
+1, tt
, ct
, callers
325 timings
[fn
] = 0, 0, 0, 0, {}
328 def trace_dispatch_return(self
, frame
, t
):
329 if frame
is not self
.cur
[-2]:
330 assert frame
is self
.cur
[-2].f_back
, ("Bad return", self
.cur
[-3])
331 self
.trace_dispatch_return(self
.cur
[-2], 0)
333 # Prefix "r" means part of the Returning or exiting frame.
334 # Prefix "p" means part of the Previous or Parent or older frame.
336 rpt
, rit
, ret
, rfn
, frame
, rcur
= self
.cur
338 frame_total
= rit
+ ret
340 ppt
, pit
, pet
, pfn
, pframe
, pcur
= rcur
341 self
.cur
= ppt
, pit
+ rpt
, pet
+ frame_total
, pfn
, pframe
, pcur
343 timings
= self
.timings
344 cc
, ns
, tt
, ct
, callers
= timings
[rfn
]
346 # This is the only occurrence of the function on the stack.
347 # Else this is a (directly or indirectly) recursive call, and
348 # its cumulative time will get updated when the topmost call to
350 ct
= ct
+ frame_total
354 callers
[pfn
] = callers
[pfn
] + 1 # hack: gather more
355 # stats such as the amount of time added to ct courtesy
356 # of this specific call, and the contribution to cc
357 # courtesy of this call.
361 timings
[rfn
] = cc
, ns
- 1, tt
+ rit
, ct
, callers
367 "call": trace_dispatch_call
,
368 "exception": trace_dispatch_exception
,
369 "return": trace_dispatch_return
,
370 "c_call": trace_dispatch_c_call
,
371 "c_exception": trace_dispatch_return
, # the C function returned
372 "c_return": trace_dispatch_return
,
376 # The next few functions play with self.cmd. By carefully preloading
377 # our parallel stack, we can force the profiled result to include
378 # an arbitrary string as the name of the calling function.
379 # We use self.cmd as that string, and the resulting stats look
382 def set_cmd(self
, cmd
):
383 if self
.cur
[-1]: return # already set
385 self
.simulate_call(cmd
)
388 def __init__(self
, filename
, line
, name
):
389 self
.co_filename
= filename
392 self
.co_firstlineno
= 0
395 return repr((self
.co_filename
, self
.co_line
, self
.co_name
))
398 def __init__(self
, code
, prior
):
402 def simulate_call(self
, name
):
403 code
= self
.fake_code('profile', 0, name
)
405 pframe
= self
.cur
[-2]
408 frame
= self
.fake_frame(code
, pframe
)
409 self
.dispatch
['call'](self
, frame
, 0)
411 # collect stats from pending stack, including getting final
412 # timings for self.cmd frame.
414 def simulate_cmd_complete(self
):
415 get_time
= self
.get_time
416 t
= get_time() - self
.t
418 # We *can* cause assertion errors here if
419 # dispatch_trace_return checks for a frame match!
420 self
.dispatch
['return'](self
, self
.cur
[-2], t
)
422 self
.t
= get_time() - t
425 def print_stats(self
, sort
=-1):
427 pstats
.Stats(self
).strip_dirs().sort_stats(sort
). \
430 def dump_stats(self
, file):
433 marshal
.dump(self
.stats
, f
)
436 def create_stats(self
):
437 self
.simulate_cmd_complete()
438 self
.snapshot_stats()
440 def snapshot_stats(self
):
442 for func
, (cc
, ns
, tt
, ct
, callers
) in self
.timings
.iteritems():
443 callers
= callers
.copy()
445 for callcnt
in callers
.itervalues():
447 self
.stats
[func
] = cc
, nc
, tt
, ct
, callers
450 # The following two methods can be called by clients to use
451 # a profiler to profile a statement, given as a string.
455 dict = __main__
.__dict
__
456 return self
.runctx(cmd
, dict, dict)
458 def runctx(self
, cmd
, globals, locals):
460 sys
.setprofile(self
.dispatcher
)
462 exec cmd
in globals, locals
467 # This method is more useful to profile a single function call.
468 def runcall(self
, func
, *args
, **kw
):
469 self
.set_cmd(repr(func
))
470 sys
.setprofile(self
.dispatcher
)
472 return func(*args
, **kw
)
477 #******************************************************************
478 # The following calculates the overhead for using a profiler. The
479 # problem is that it takes a fair amount of time for the profiler
480 # to stop the stopwatch (from the time it receives an event).
481 # Similarly, there is a delay from the time that the profiler
482 # re-starts the stopwatch before the user's code really gets to
483 # continue. The following code tries to measure the difference on
486 # Note that this difference is only significant if there are a lot of
487 # events, and relatively little user code per event. For example,
488 # code with small functions will typically benefit from having the
489 # profiler calibrated for the current platform. This *could* be
490 # done on the fly during init() time, but it is not worth the
491 # effort. Also note that if too large a value specified, then
492 # execution time on some functions will actually appear as a
493 # negative number. It is *normal* for some functions (with very
494 # low call counts) to have such negative stats, even if the
495 # calibration figure is "correct."
497 # One alternative to profile-time calibration adjustments (i.e.,
498 # adding in the magic little delta during each event) is to track
499 # more carefully the number of events (and cumulatively, the number
500 # of events during sub functions) that are seen. If this were
501 # done, then the arithmetic could be done after the fact (i.e., at
502 # display time). Currently, we track only call/return events.
503 # These values can be deduced by examining the callees and callers
504 # vectors for each functions. Hence we *can* almost correct the
505 # internal time figure at print time (note that we currently don't
506 # track exception event processing counts). Unfortunately, there
507 # is currently no similar information for cumulative sub-function
508 # time. It would not be hard to "get all this info" at profiler
509 # time. Specifically, we would have to extend the tuples to keep
510 # counts of this in each frame, and then extend the defs of timing
511 # tuples to include the significant two figures. I'm a bit fearful
512 # that this additional feature will slow the heavily optimized
513 # event/time ratio (i.e., the profiler would run slower, fur a very
514 # low "value added" feature.)
515 #**************************************************************
517 def calibrate(self
, m
, verbose
=0):
518 if self
.__class
__ is not Profile
:
519 raise TypeError("Subclasses must override .calibrate().")
521 saved_bias
= self
.bias
524 return self
._calibrate
_inner
(m
, verbose
)
526 self
.bias
= saved_bias
528 def _calibrate_inner(self
, m
, verbose
):
529 get_time
= self
.get_time
531 # Set up a test case to be run with and without profiling. Include
532 # lots of calls, because we're trying to quantify stopwatch overhead.
533 # Do not raise any exceptions, though, because we want to know
534 # exactly how many profile events are generated (one call event, +
535 # one return event, per Python-level call).
545 f(m
) # warm up the cache
547 # elapsed_noprofile <- time f(m) takes without profiling.
551 elapsed_noprofile
= t1
- t0
553 print "elapsed time without profiling =", elapsed_noprofile
555 # elapsed_profile <- time f(m) takes with profiling. The difference
556 # is profiling overhead, only some of which the profiler subtracts
560 p
.runctx('f(m)', globals(), locals())
562 elapsed_profile
= t1
- t0
564 print "elapsed time with profiling =", elapsed_profile
566 # reported_time <- "CPU seconds" the profiler charged to f and f1.
569 for (filename
, line
, funcname
), (cc
, ns
, tt
, ct
, callers
) in \
571 if funcname
in ("f", "f1"):
576 print "'CPU seconds' profiler reported =", reported_time
577 print "total # calls =", total_calls
578 if total_calls
!= m
+ 1:
579 raise ValueError("internal error: total calls = %d" % total_calls
)
581 # reported_time - elapsed_noprofile = overhead the profiler wasn't
582 # able to measure. Divide by twice the number of calls (since there
583 # are two profiler events per call in this test) to get the hidden
584 # overhead per event.
585 mean
= (reported_time
- elapsed_noprofile
) / 2.0 / total_calls
587 print "mean stopwatch overhead per profile event =", mean
590 #****************************************************************************
592 print 'Report generating functions are in the "pstats" module\a'
595 usage
= "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
596 parser
= OptionParser(usage
=usage
)
597 parser
.allow_interspersed_args
= False
598 parser
.add_option('-o', '--outfile', dest
="outfile",
599 help="Save stats to <outfile>", default
=None)
600 parser
.add_option('-s', '--sort', dest
="sort",
601 help="Sort order when printing to stdout, based on pstats.Stats class", default
=-1)
607 (options
, args
) = parser
.parse_args()
611 sys
.path
.insert(0, os
.path
.dirname(sys
.argv
[0]))
612 run('execfile(%r)' % (sys
.argv
[0],), options
.outfile
, options
.sort
)
617 # When invoked as main program, invoke the profiler on a script
618 if __name__
== '__main__':