More maybe-terminate-block.
[sbcl.git] / contrib / sb-sprof / sb-sprof.lisp
blob705bea26876f36db8d5ad60892ffc16ddd0f364b
1 ;;; Copyright (C) 2003 Gerd Moellmann <gerd.moellmann@t-online.de>
2 ;;; All rights reserved.
3 ;;;
4 ;;; Redistribution and use in source and binary forms, with or without
5 ;;; modification, are permitted provided that the following conditions
6 ;;; are met:
7 ;;;
8 ;;; 1. Redistributions of source code must retain the above copyright
9 ;;; notice, this list of conditions and the following disclaimer.
10 ;;; 2. Redistributions in binary form must reproduce the above copyright
11 ;;; notice, this list of conditions and the following disclaimer in the
12 ;;; documentation and/or other materials provided with the distribution.
13 ;;; 3. The name of the author may not be used to endorse or promote
14 ;;; products derived from this software without specific prior written
15 ;;; permission.
16 ;;;
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
21 ;;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 ;;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
23 ;;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 ;;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
27 ;;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28 ;;; DAMAGE.
30 ;;; Statistical profiler.
32 ;;; Overview:
33 ;;;
34 ;;; This profiler arranges for SIGPROF interrupts to interrupt a
35 ;;; running program at regular intervals. Each time a SIGPROF occurs,
36 ;;; the current program counter and return address is recorded in a
37 ;;; vector, until a configurable maximum number of samples have been
38 ;;; taken.
39 ;;;
40 ;;; A profiling report is generated from the samples array by
41 ;;; determining the Lisp functions corresponding to the recorded
42 ;;; addresses. Each program counter/return address pair forms one
43 ;;; edge in a call graph.
45 ;;; Problems:
46 ;;;
47 ;;; The code being generated on x86 makes determining callers reliably
48 ;;; something between extremely difficult and impossible. Example:
49 ;;;
50 ;;; 10979F00: .entry eval::eval-stack-args(arg-count)
51 ;;; 18: pop dword ptr [ebp-8]
52 ;;; 1B: lea esp, [ebp-32]
53 ;;; 1E: mov edi, edx
54 ;;;
55 ;;; 20: cmp ecx, 4
56 ;;; 23: jne L4
57 ;;; 29: mov [ebp-12], edi
58 ;;; 2C: mov dword ptr [ebp-16], #x28F0000B ; nil
59 ;;; ; No-arg-parsing entry point
60 ;;; 33: mov dword ptr [ebp-20], 0
61 ;;; 3A: jmp L3
62 ;;; 3C: L0: mov edx, esp
63 ;;; 3E: sub esp, 12
64 ;;; 41: mov eax, [#x10979EF8] ; #<FDEFINITION object for eval::eval-stack-pop>
65 ;;; 47: xor ecx, ecx
66 ;;; 49: mov [edx-4], ebp
67 ;;; 4C: mov ebp, edx
68 ;;; 4E: call dword ptr [eax+5]
69 ;;; 51: mov esp, ebx
70 ;;;
71 ;;; Suppose this function is interrupted by SIGPROF at 4E. At that
72 ;;; point, the frame pointer EBP has been modified so that the
73 ;;; original return address of the caller of eval-stack-args is no
74 ;;; longer where it can be found by x86-call-context, and the new
75 ;;; return address, for the call to eval-stack-pop, is not yet on the
76 ;;; stack. The effect is that x86-call-context returns something
77 ;;; bogus, which leads to wrong edges in the call graph.
78 ;;;
79 ;;; One thing that one might try is filtering cases where the program
80 ;;; is interrupted at a call instruction. But since the above example
81 ;;; of an interrupt at a call instruction isn't the only case where
82 ;;; the stack is something x86-call-context can't really cope with,
83 ;;; this is not a general solution.
84 ;;;
85 ;;; Random ideas for implementation:
86 ;;;
87 ;;; * Space profiler. Sample when new pages are allocated instead of
88 ;;; at SIGPROF.
89 ;;;
90 ;;; * Record a configurable number of callers up the stack. That
91 ;;; could give a more complete graph when there are many small
92 ;;; functions.
93 ;;;
94 ;;; * Print help strings for reports, include hints to the problem
95 ;;; explained above.
96 ;;;
97 ;;; * Make flat report the default since call-graph isn't that
98 ;;; reliable?
100 (defpackage #:sb-sprof
101 (:use #:cl #:sb-ext #:sb-unix #:sb-alien #:sb-sys :sb-int)
102 (:export #:*sample-interval* #:*max-samples* #:*alloc-interval*
103 #:*report-sort-by* #:*report-sort-order*
104 #:start-sampling #:stop-sampling #:with-sampling
105 #:with-profiling #:start-profiling #:stop-profiling
106 #:profile-call-counts #:unprofile-call-counts
107 #:reset #:report))
109 (in-package #:sb-sprof)
112 ;;;; Graph Utilities
114 (defstruct (vertex (:constructor make-vertex)
115 (:constructor make-scc (scc-vertices edges)))
116 (visited nil :type boolean)
117 (root nil :type (or null vertex))
118 (dfn 0 :type fixnum)
119 (edges () :type list)
120 (scc-vertices () :type list))
122 (defstruct edge
123 (vertex (sb-impl::missing-arg) :type vertex))
125 (defstruct graph
126 (vertices () :type list))
128 (declaim (inline scc-p))
129 (defun scc-p (vertex)
130 (not (null (vertex-scc-vertices vertex))))
132 (defmacro do-vertices ((vertex graph) &body body)
133 `(dolist (,vertex (graph-vertices ,graph))
134 ,@body))
136 (defmacro do-edges ((edge edge-to vertex) &body body)
137 `(dolist (,edge (vertex-edges ,vertex))
138 (let ((,edge-to (edge-vertex ,edge)))
139 ,@body)))
141 (defun self-cycle-p (vertex)
142 (do-edges (e to vertex)
143 (when (eq to vertex)
144 (return t))))
146 (defun map-vertices (fn vertices)
147 (dolist (v vertices)
148 (setf (vertex-visited v) nil))
149 (dolist (v vertices)
150 (unless (vertex-visited v)
151 (funcall fn v))))
153 ;;; Eeko Nuutila, Eljas Soisalon-Soininen, around 1992. Improves on
154 ;;; Tarjan's original algorithm by not using the stack when processing
155 ;;; trivial components. Trivial components should appear frequently
156 ;;; in a call-graph such as ours, I think. Same complexity O(V+E) as
157 ;;; Tarjan.
158 (defun strong-components (vertices)
159 (let ((in-component (make-array (length vertices)
160 :element-type 'boolean
161 :initial-element nil))
162 (stack ())
163 (components ())
164 (dfn -1))
165 (labels ((min-root (x y)
166 (let ((rx (vertex-root x))
167 (ry (vertex-root y)))
168 (if (< (vertex-dfn rx) (vertex-dfn ry))
170 ry)))
171 (in-component (v)
172 (aref in-component (vertex-dfn v)))
173 ((setf in-component) (in v)
174 (setf (aref in-component (vertex-dfn v)) in))
175 (vertex-> (x y)
176 (> (vertex-dfn x) (vertex-dfn y)))
177 (visit (v)
178 (setf (vertex-dfn v) (incf dfn)
179 (in-component v) nil
180 (vertex-root v) v
181 (vertex-visited v) t)
182 (do-edges (e w v)
183 (unless (vertex-visited w)
184 (visit w))
185 (unless (in-component w)
186 (setf (vertex-root v) (min-root v w))))
187 (if (eq v (vertex-root v))
188 (loop while (and stack (vertex-> (car stack) v))
189 as w = (pop stack)
190 collect w into this-component
191 do (setf (in-component w) t)
192 finally
193 (setf (in-component v) t)
194 (push (cons v this-component) components))
195 (push v stack))))
196 (map-vertices #'visit vertices)
197 components)))
199 ;;; Given a dag as a list of vertices, return the list sorted
200 ;;; topologically, children first.
201 (defun topological-sort (dag)
202 (let ((sorted ())
203 (dfn -1))
204 (labels ((rec-sort (v)
205 (setf (vertex-visited v) t)
206 (setf (vertex-dfn v) (incf dfn))
207 (dolist (e (vertex-edges v))
208 (unless (vertex-visited (edge-vertex e))
209 (rec-sort (edge-vertex e))))
210 (push v sorted)))
211 (map-vertices #'rec-sort dag)
212 (nreverse sorted))))
214 ;;; Reduce graph G to a dag by coalescing strongly connected components
215 ;;; into vertices. Sort the result topologically.
216 (defun reduce-graph (graph &optional (scc-constructor #'make-scc))
217 (sb-int:collect ((sccs) (trivial))
218 (dolist (c (strong-components (graph-vertices graph)))
219 (if (or (cdr c) (self-cycle-p (car c)))
220 (sb-int:collect ((outgoing))
221 (dolist (v c)
222 (do-edges (e w v)
223 (unless (member w c)
224 (outgoing e))))
225 (sccs (funcall scc-constructor c (outgoing))))
226 (trivial (car c))))
227 (dolist (scc (sccs))
228 (dolist (v (trivial))
229 (do-edges (e w v)
230 (when (member w (vertex-scc-vertices scc))
231 (setf (edge-vertex e) scc)))))
232 (setf (graph-vertices graph)
233 (topological-sort (nconc (sccs) (trivial))))))
235 ;;;; The Profiler
237 (deftype address ()
238 "Type used for addresses, for instance, program counters,
239 code start/end locations etc."
240 '(unsigned-byte #.sb-vm::n-machine-word-bits))
242 (defconstant +unknown-address+ 0
243 "Constant representing an address that cannot be determined.")
245 ;;; A call graph. Vertices are NODE structures, edges are CALL
246 ;;; structures.
247 (defstruct (call-graph (:include graph)
248 (:constructor %make-call-graph))
249 ;; the value of *SAMPLE-INTERVAL* or *ALLOC-INTERVAL* at the time
250 ;; the graph was created (depending on the current allocation mode)
251 (sample-interval (sb-impl::missing-arg) :type number)
252 ;; the sampling-mode that was used for the profiling run
253 (sampling-mode (sb-impl::missing-arg) :type (member :cpu :alloc :time))
254 ;; number of samples taken
255 (nsamples (sb-impl::missing-arg) :type sb-int:index)
256 ;; threads that have been sampled
257 (sampled-threads nil :type list)
258 ;; sample count for samples not in any function
259 (elsewhere-count (sb-impl::missing-arg) :type sb-int:index)
260 ;; a flat list of NODEs, sorted by sample count
261 (flat-nodes () :type list))
263 ;;; A node in a call graph, representing a function that has been
264 ;;; sampled. The edges of a node are CALL structures that represent
265 ;;; functions called from a given node.
266 (defstruct (node (:include vertex)
267 (:constructor %make-node))
268 ;; A numeric label for the node. The most frequently called function
269 ;; gets label 1. This is just for identification purposes in the
270 ;; profiling report.
271 (index 0 :type fixnum)
272 ;; Start and end address of the function's code. Depending on the
273 ;; debug-info, this might be either as absolute addresses for things
274 ;; that won't move around in memory, or as relative offsets from
275 ;; some point for things that might move.
276 (start-pc-or-offset 0 :type address)
277 (end-pc-or-offset 0 :type address)
278 ;; the name of the function
279 (name nil :type t)
280 ;; sample count for this function
281 (count 0 :type fixnum)
282 ;; count including time spent in functions called from this one
283 (accrued-count 0 :type fixnum)
284 ;; the debug-info that this node was created from
285 (debug-info nil :type t)
286 ;; list of NODEs for functions calling this one
287 (callers () :type list)
288 ;; the call count for the function that corresponds to this node (or NIL
289 ;; if call counting wasn't enabled for this function)
290 (call-count nil :type (or null integer)))
292 ;;; A cycle in a call graph. The functions forming the cycle are
293 ;;; found in the SCC-VERTICES slot of the VERTEX structure.
294 (defstruct (cycle (:include node)))
296 ;;; An edge in a call graph. EDGE-VERTEX is the function being
297 ;;; called.
298 (defstruct (call (:include edge)
299 (:constructor make-call (vertex)))
300 ;; number of times the call was sampled
301 (count 1 :type sb-int:index))
303 (defvar *sample-interval* 0.01
304 "Default number of seconds between samples.")
305 (declaim (type number *sample-interval*))
307 (defvar *alloc-interval* 4
308 "Default number of allocation region openings between samples.")
309 (declaim (type number *alloc-interval*))
311 (defvar *max-samples* 50000
312 "Default number of traces taken. This variable is somewhat misnamed:
313 each trace may actually consist of an arbitrary number of samples, depending
314 on the depth of the call stack.")
315 (declaim (type sb-int:index *max-samples*))
317 ;;; Encapsulate all the information about a sampling run
318 (defstruct (samples)
319 ;; When this vector fills up, we allocate a new one and copy over
320 ;; the old contents.
321 (vector (make-array (* *max-samples*
322 ;; Arbitrary guess at how many samples we'll be
323 ;; taking for each trace. The exact amount doesn't
324 ;; matter, this is just to decrease the amount of
325 ;; re-allocation that will need to be done.
327 ;; Each sample takes two cells in the vector
329 :type simple-vector)
330 (trace-count 0 :type sb-int:index)
331 (index 0 :type sb-int:index)
332 (mode nil :type (member :cpu :alloc :time))
333 (sample-interval (sb-int:missing-arg) :type number)
334 (alloc-interval (sb-int:missing-arg) :type number)
335 (max-depth most-positive-fixnum :type number)
336 (max-samples (sb-int:missing-arg) :type sb-int:index)
337 (sampled-threads nil :type list))
339 (defmethod print-object ((samples samples) stream)
340 (print-unreadable-object (samples stream :type t :identity t)
341 (let ((*print-array* nil))
342 (call-next-method))))
344 (defmethod print-object ((call-graph call-graph) stream)
345 (print-unreadable-object (call-graph stream :type t :identity t)
346 (format stream "~d samples" (call-graph-nsamples call-graph))))
348 (defmethod print-object ((node node) stream)
349 (print-unreadable-object (node stream :type t :identity t)
350 (format stream "~s [~d]" (node-name node) (node-index node))))
352 (defmethod print-object ((call call) stream)
353 (print-unreadable-object (call stream :type t :identity t)
354 (format stream "~s [~d]" (node-name (call-vertex call))
355 (node-index (call-vertex call)))))
357 (deftype report-type ()
358 '(member nil :flat :graph))
360 (defvar *sampling-mode* :cpu
361 "Default sampling mode. :CPU for cpu profiling, :ALLOC for allocation
362 profiling, and :TIME for wallclock profiling.")
363 (declaim (type (member :cpu :alloc :time) *sampling-mode*))
365 (defvar *alloc-region-size*
366 #-gencgc
367 (get-page-size)
368 #+gencgc
369 (max sb-vm:gencgc-alloc-granularity sb-vm:gencgc-card-bytes))
370 (declaim (type number *alloc-region-size*))
372 (defvar *samples* nil)
373 (declaim (type (or null samples) *samples*))
375 (defvar *profiling* nil)
376 (declaim (type (member nil :alloc :cpu :time) *profiling*))
377 (defvar *sampling* nil)
378 (declaim (type boolean *sampling*))
380 (defvar *show-progress* nil)
382 (defvar *old-sampling* nil)
384 ;; Call count encapsulation information
385 (defvar *encapsulations* (make-hash-table :test 'equal))
387 (defun turn-off-sampling ()
388 (setq *old-sampling* *sampling*)
389 (setq *sampling* nil))
391 (defun turn-on-sampling ()
392 (setq *sampling* *old-sampling*))
394 (defun show-progress (format-string &rest args)
395 (when *show-progress*
396 (apply #'format t format-string args)
397 (finish-output)))
399 (defun start-sampling ()
400 "Switch on statistical sampling."
401 (setq *sampling* t))
403 (defun stop-sampling ()
404 "Switch off statistical sampling."
405 (setq *sampling* nil))
407 (defmacro with-sampling ((&optional (on t)) &body body)
408 "Evaluate body with statistical sampling turned on or off."
409 `(let ((*sampling* ,on)
410 (sb-vm:*alloc-signal* sb-vm:*alloc-signal*))
411 ,@body))
413 ;;; Return something serving as debug info for address PC.
414 (declaim (inline debug-info))
415 (defun debug-info (pc)
416 (declare (type system-area-pointer pc)
417 (muffle-conditions compiler-note))
418 (let ((code (sb-di::code-header-from-pc pc)))
419 (cond ((not code)
420 (let ((name (sap-foreign-symbol pc)))
421 (if name
422 (values (format nil "foreign function ~a" name)
423 (sap-int pc) :foreign)
424 (values nil (sap-int pc) :foreign))))
426 (let* ((code-header-len (* (sb-kernel:code-header-words code)
427 sb-vm:n-word-bytes))
428 ;; Give up if we land in the 2 or 3 instructions of a
429 ;; code component sans simple-fun that is not an asm routine.
430 ;; While it's conceivable that this could be improved,
431 ;; the problem will be different or nonexistent after
432 ;; funcallable-instances each contain their own trampoline.
433 #+immobile-code
434 (di (unless (typep (sb-kernel:%code-debug-info code)
435 'sb-c::compiled-debug-info)
436 (return-from debug-info
437 (values code (sap-int pc)))))
438 (pc-offset (- (sap-int pc)
439 (- (sb-kernel:get-lisp-obj-address code)
440 sb-vm:other-pointer-lowtag)
441 code-header-len))
442 (df (sb-di::debug-fun-from-pc code pc-offset)))
443 #+immobile-code (declare (ignorable di))
444 (cond ((typep df 'sb-di::bogus-debug-fun)
445 (values code (sap-int pc) nil))
447 ;; The code component might be moved by the GC. Store
448 ;; a PC offset, and reconstruct the data in
449 ;; SAMPLE-PC-FROM-PC-OR-OFFSET.
450 (values df pc-offset nil))
452 (values nil 0 nil))))))))
454 (defun ensure-samples-vector (samples)
455 (let ((vector (samples-vector samples))
456 (index (samples-index samples)))
457 ;; Allocate a new sample vector if the old one is full
458 (if (= (length vector) index)
459 (let ((new-vector (make-array (* 2 index))))
460 (format *trace-output* "Profiler sample vector full (~a traces / ~a samples), doubling the size~%"
461 (samples-trace-count samples)
462 (truncate index 2))
463 (replace new-vector vector)
464 (setf (samples-vector samples) new-vector))
465 vector)))
467 (declaim (inline record))
468 (defun record (samples pc)
469 (declare (type system-area-pointer pc)
470 (muffle-conditions compiler-note))
471 (multiple-value-bind (info pc-or-offset foreign)
472 (debug-info pc)
473 (let ((vector (ensure-samples-vector samples))
474 (index (samples-index samples)))
475 (declare (type simple-vector vector))
476 ;; Allocate a new sample vector if the old one is full
477 (when (= (length vector) index)
478 (let ((new-vector (make-array (* 2 index))))
479 (format *trace-output* "Profiler sample vector full (~a traces / ~a samples), doubling the size~%"
480 (samples-trace-count samples)
481 (truncate index 2))
482 (replace new-vector vector)
483 (setf vector new-vector
484 (samples-vector samples) new-vector)))
485 ;; For each sample, store the debug-info and the PC/offset into
486 ;; adjacent cells.
487 (setf (aref vector index) info
488 (aref vector (1+ index)) pc-or-offset))
489 (incf (samples-index samples) 2)
490 foreign))
492 (defun record-trace-start (samples)
493 ;; Mark the start of the trace.
494 (let ((vector (ensure-samples-vector samples)))
495 (declare (type simple-vector vector))
496 (setf (aref vector (samples-index samples))
497 'trace-start))
498 (incf (samples-index samples) 2))
500 ;;; List of thread currently profiled, or :ALL for all threads.
501 (defvar *profiled-threads* nil)
502 (declaim (type (or list (member :all)) *profiled-threads*))
504 ;;; Thread which runs the wallclock timers, if any.
505 (defvar *timer-thread* nil)
507 (defun profiled-threads ()
508 (let ((profiled-threads *profiled-threads*))
509 (remove *timer-thread*
510 (if (eq :all profiled-threads)
511 (sb-thread:list-all-threads)
512 profiled-threads))))
514 (defun profiled-thread-p (thread)
515 (let ((profiled-threads *profiled-threads*))
516 (or (and (eq :all profiled-threads)
517 (not (eq *timer-thread* thread)))
518 (member thread profiled-threads :test #'eq))))
520 #+(and (or x86 x86-64) (not win32))
521 (progn
522 ;; Ensure that only one thread at a time will be doing profiling stuff.
523 (defvar *profiler-lock* (sb-thread:make-mutex :name "Statistical Profiler"))
524 (defvar *distribution-lock* (sb-thread:make-mutex :name "Wallclock profiling lock"))
526 #+sb-thread
527 (declaim (inline pthread-kill))
528 #+sb-thread
529 (define-alien-routine pthread-kill int (os-thread unsigned-long) (signal int))
531 ;;; A random thread will call this in response to either a timer firing,
532 ;;; This in turn will distribute the notice to those threads we are
533 ;;; interested using SIGPROF.
534 (defun thread-distribution-handler ()
535 (declare (optimize speed (space 0)))
536 #+sb-thread
537 (let ((lock *distribution-lock*))
538 ;; Don't flood the system with more interrupts if the last
539 ;; set is still being delivered.
540 (unless (sb-thread:mutex-value lock)
541 (sb-thread::with-system-mutex (lock)
542 (dolist (thread (profiled-threads))
543 ;; This may occasionally fail to deliver the signal, but that
544 ;; seems better then using kill_thread_safely with it's 1
545 ;; second backoff.
546 (let ((os-thread (sb-thread::thread-os-thread thread)))
547 (when os-thread
548 (pthread-kill os-thread sb-unix:sigprof)))))))
549 #-sb-thread
550 (unix-kill 0 sb-unix:sigprof))
552 (defun sigprof-handler (signal code scp)
553 (declare (ignore signal code) (optimize speed (space 0))
554 (disable-package-locks sb-di::x86-call-context)
555 (muffle-conditions compiler-note)
556 (type system-area-pointer scp))
557 (let ((self sb-thread:*current-thread*)
558 (profiling *profiling*))
559 ;; Turn off allocation counter when it is not needed. Doing this in the
560 ;; signal handler means we don't have to worry about racing with the runtime
561 (unless (eq :alloc profiling)
562 (setf sb-vm::*alloc-signal* nil))
563 (when (and *sampling*
564 ;; Normal SIGPROF gets practically speaking delivered to threads
565 ;; depending on the run time they use, so we need to filter
566 ;; out those we don't care about. For :ALLOC and :TIME profiling
567 ;; only the interesting threads get SIGPROF in the first place.
569 ;; ...except that Darwin at least doesn't seem to work like we
570 ;; would want it to, which makes multithreaded :CPU profiling pretty
571 ;; pointless there -- though it may be that our mach magic is
572 ;; partially to blame?
573 (or (not (eq :cpu profiling)) (profiled-thread-p self)))
574 (sb-thread::with-system-mutex (*profiler-lock* :without-gcing t)
575 (let ((samples *samples*))
576 (when (and samples
577 (< (samples-trace-count samples)
578 (samples-max-samples samples)))
579 (with-alien ((scp (* os-context-t) :local scp))
580 (let* ((pc-ptr (sb-vm:context-pc scp))
581 (fp (sb-vm::context-register scp #.sb-vm::ebp-offset)))
582 ;; foreign code might not have a useful frame
583 ;; pointer in ebp/rbp, so make sure it looks
584 ;; reasonable before walking the stack
585 (unless (sb-di::control-stack-pointer-valid-p (sb-sys:int-sap fp))
586 (record samples pc-ptr)
587 (return-from sigprof-handler nil))
588 (incf (samples-trace-count samples))
589 (pushnew self (samples-sampled-threads samples))
590 (let ((fp (int-sap fp))
591 (ok t))
592 (declare (type system-area-pointer fp pc-ptr))
593 ;; FIXME: How annoying. The XC doesn't store enough
594 ;; type information about SB-DI::X86-CALL-CONTEXT,
595 ;; even if we declaim the ftype explicitly in
596 ;; src/code/debug-int. And for some reason that type
597 ;; information is needed for the inlined version to
598 ;; be compiled without boxing the returned saps. So
599 ;; we declare the correct ftype here manually, even
600 ;; if the compiler should be able to deduce this
601 ;; exact same information.
602 (declare (ftype (function (system-area-pointer)
603 (values (member nil t)
604 system-area-pointer
605 system-area-pointer))
606 sb-di::x86-call-context))
607 (record-trace-start samples)
608 (dotimes (i (samples-max-depth samples))
609 (record samples pc-ptr)
610 (setf (values ok pc-ptr fp)
611 (sb-di::x86-call-context fp))
612 (unless ok
613 (return))))))
614 ;; Reset thread-local allocation counter before interrupts
615 ;; are enabled.
616 (when (eq t sb-vm::*alloc-signal*)
617 (setf sb-vm:*alloc-signal* (1- (samples-alloc-interval samples)))))))))
618 nil))
620 ;; FIXME: On non-x86 platforms we don't yet walk the call stack deeper
621 ;; than one level.
622 #-(or x86 x86-64)
623 (defun sigprof-handler (signal code scp)
624 (declare (ignore signal code))
625 (sb-sys:without-interrupts
626 (let ((samples *samples*))
627 (when (and *sampling*
628 samples
629 (< (samples-trace-count samples)
630 (samples-max-samples samples)))
631 (sb-sys:without-gcing
632 (with-alien ((scp (* os-context-t) :local scp))
633 (locally (declare (optimize (inhibit-warnings 2)))
634 (incf (samples-trace-count samples))
635 (record-trace-start samples)
636 (let ((pc-ptr (sb-vm:context-pc scp))
637 (fp (sb-vm::context-register scp #.sb-vm::cfp-offset)))
638 (unless (eq (record samples pc-ptr) :foreign)
639 (record samples (sap-ref-sap
640 (int-sap fp)
641 (* sb-vm::lra-save-offset sb-vm::n-word-bytes))))))))))))
643 ;;; Return the start address of CODE.
644 (defun code-start (code)
645 (declare (type sb-kernel:code-component code))
646 (sap-int (sb-kernel:code-instructions code)))
648 ;;; Return start and end address of CODE as multiple values.
649 (defun code-bounds (code)
650 (declare (type sb-kernel:code-component code))
651 (let* ((start (code-start code))
652 (end (+ start (sb-kernel:%code-code-size code))))
653 (values start end)))
655 (defmacro with-profiling ((&key (sample-interval '*sample-interval*)
656 (alloc-interval '*alloc-interval*)
657 (max-samples '*max-samples*)
658 (reset nil)
659 (mode '*sampling-mode*)
660 (loop nil)
661 (max-depth most-positive-fixnum)
662 show-progress
663 (threads '(list sb-thread:*current-thread*))
664 (report nil report-p))
665 &body body)
666 "Evaluate BODY with statistical profiling turned on. If LOOP is true,
667 loop around the BODY until a sufficient number of samples has been collected.
668 Returns the values from the last evaluation of BODY.
670 In multithreaded operation, only the thread in which WITH-PROFILING was
671 evaluated will be profiled by default. If you want to profile multiple
672 threads, invoke the profiler with START-PROFILING.
674 The following keyword args are recognized:
676 :SAMPLE-INTERVAL <n>
677 Take a sample every <n> seconds. Default is *SAMPLE-INTERVAL*.
679 :ALLOC-INTERVAL <n>
680 Take a sample every time <n> allocation regions (approximately
681 8kB) have been allocated since the last sample. Default is
682 *ALLOC-INTERVAL*.
684 :MODE <mode>
685 If :CPU, run the profiler in CPU profiling mode. If :ALLOC, run the
686 profiler in allocation profiling mode. If :TIME, run the profiler
687 in wallclock profiling mode.
689 :MAX-SAMPLES <max>
690 Repeat evaluating body until <max> samples are taken.
691 Default is *MAX-SAMPLES*.
693 :MAX-DEPTH <max>
694 Maximum call stack depth that the profiler should consider. Only
695 has an effect on x86 and x86-64.
697 :REPORT <type>
698 If specified, call REPORT with :TYPE <type> at the end.
700 :RESET <bool>
701 It true, call RESET at the beginning.
703 :THREADS <list-form>
704 Form that evaluates to the list threads to profile, or :ALL to indicate
705 that all threads should be profiled. Defaults to the current
706 thread. (Note: START-PROFILING defaults to all threads.)
708 :THREADS has no effect on call-counting at the moment.
710 On some platforms (eg. Darwin) the signals used by the profiler are
711 not properly delivered to threads in proportion to their CPU usage
712 when doing :CPU profiling. If you see empty call graphs, or are obviously
713 missing several samples from certain threads, you may be falling afoul
714 of this. In this case using :MODE :TIME is likely to work better.
716 :LOOP <bool>
717 If false (the default), evaluate BODY only once. If true repeatedly
718 evaluate BODY."
719 (declare (type report-type report))
720 (check-type loop boolean)
721 (with-unique-names (values last-index oops)
722 `(let* ((*sample-interval* ,sample-interval)
723 (*alloc-interval* ,alloc-interval)
724 (*sampling* nil)
725 (*sampling-mode* ,mode)
726 (*max-samples* ,max-samples))
727 ,@(when reset '((reset)))
728 (flet ((,oops ()
729 (warn "~@<No sampling progress; run too short, sampling interval ~
730 too long, inappropriate set of sampled thread, or possibly ~
731 a profiler bug.~:@>")))
732 (unwind-protect
733 (progn
734 (start-profiling :max-depth ,max-depth :threads ,threads)
735 ,(if loop
736 `(let (,values)
737 (loop
738 (when (>= (samples-trace-count *samples*)
739 (samples-max-samples *samples*))
740 (return))
741 ,@(when show-progress
742 `((format t "~&===> ~d of ~d samples taken.~%"
743 (samples-trace-count *samples*)
744 (samples-max-samples *samples*))))
745 (let ((,last-index (samples-index *samples*)))
746 (setf ,values (multiple-value-list (progn ,@body)))
747 (when (= ,last-index (samples-index *samples*))
748 (,oops)
749 (return))))
750 (values-list ,values))
751 `(let ((,last-index (samples-index *samples*)))
752 (multiple-value-prog1 (progn ,@body)
753 (when (= ,last-index (samples-index *samples*))
754 (,oops))))))
755 (stop-profiling)))
756 ,@(when report-p `((report :type ,report))))))
758 (defvar *timer* nil)
760 (defvar *old-alloc-interval* nil)
761 (defvar *old-sample-interval* nil)
763 #-win32
764 (defun start-profiling (&key (max-samples *max-samples*)
765 (mode *sampling-mode*)
766 (sample-interval *sample-interval*)
767 (alloc-interval *alloc-interval*)
768 (max-depth most-positive-fixnum)
769 (threads :all)
770 (sampling t))
771 "Start profiling statistically in the current thread if not already profiling.
772 The following keyword args are recognized:
774 :SAMPLE-INTERVAL <n>
775 Take a sample every <n> seconds. Default is *SAMPLE-INTERVAL*.
777 :ALLOC-INTERVAL <n>
778 Take a sample every time <n> allocation regions (approximately
779 8kB) have been allocated since the last sample. Default is
780 *ALLOC-INTERVAL*.
782 :MODE <mode>
783 If :CPU, run the profiler in CPU profiling mode. If :ALLOC, run
784 the profiler in allocation profiling mode. If :TIME, run the profiler
785 in wallclock profiling mode.
787 :MAX-SAMPLES <max>
788 Maximum number of samples. Default is *MAX-SAMPLES*.
790 :MAX-DEPTH <max>
791 Maximum call stack depth that the profiler should consider. Only
792 has an effect on x86 and x86-64.
794 :THREADS <list>
795 List threads to profile, or :ALL to indicate that all threads should be
796 profiled. Defaults to :ALL. (Note: WITH-PROFILING defaults to the current
797 thread.)
799 :THREADS has no effect on call-counting at the moment.
801 On some platforms (eg. Darwin) the signals used by the profiler are
802 not properly delivered to threads in proportion to their CPU usage
803 when doing :CPU profiling. If you see empty call graphs, or are obviously
804 missing several samples from certain threads, you may be falling afoul
805 of this.
807 :SAMPLING <bool>
808 If true, the default, start sampling right away.
809 If false, START-SAMPLING can be used to turn sampling on."
810 #-gencgc
811 (when (eq mode :alloc)
812 (error "Allocation profiling is only supported for builds using the generational garbage collector."))
813 (unless *profiling*
814 (multiple-value-bind (secs usecs)
815 (multiple-value-bind (secs rest)
816 (truncate sample-interval)
817 (values secs (truncate (* rest 1000000))))
818 (setf *sampling* sampling
819 *samples* (make-samples :max-depth max-depth
820 :max-samples max-samples
821 :sample-interval sample-interval
822 :alloc-interval alloc-interval
823 :mode mode))
824 (enable-call-counting)
825 (setf *profiled-threads* threads)
826 (sb-sys:enable-interrupt sb-unix:sigprof
827 #'sigprof-handler
828 :synchronous t)
829 (ecase mode
830 (:alloc
831 (let ((alloc-signal (1- alloc-interval)))
832 #+sb-thread
833 (progn
834 (when (eq :all threads)
835 ;; Set the value new threads inherit.
836 (sb-thread::with-all-threads-lock
837 (setf sb-thread::*default-alloc-signal* alloc-signal)))
838 ;; Turn on allocation profiling in existing threads.
839 (dolist (thread (profiled-threads))
840 (sb-thread::%set-symbol-value-in-thread 'sb-vm::*alloc-signal* thread alloc-signal)))
841 #-sb-thread
842 (setf sb-vm:*alloc-signal* alloc-signal)))
843 (:cpu
844 (unix-setitimer :profile secs usecs secs usecs))
845 (:time
846 #+sb-thread
847 (let ((setup (sb-thread:make-semaphore :name "Timer thread setup semaphore")))
848 (setf *timer-thread*
849 (sb-thread:make-thread (lambda ()
850 (sb-thread:wait-on-semaphore setup)
851 (loop while (eq sb-thread:*current-thread* *timer-thread*)
852 do (sleep 1.0)))
853 :name "SB-SPROF wallclock timer thread"))
854 (sb-thread:signal-semaphore setup))
855 #-sb-thread
856 (setf *timer-thread* nil)
857 (setf *timer* (make-timer #'thread-distribution-handler :name "SB-PROF wallclock timer"
858 :thread *timer-thread*))
859 (schedule-timer *timer* sample-interval :repeat-interval sample-interval)))
860 (setq *profiling* mode)))
861 (values))
863 (defun stop-profiling ()
864 "Stop profiling if profiling."
865 (let ((profiling *profiling*))
866 (when profiling
867 ;; Even with the timers shut down we cannot be sure that there is no
868 ;; undelivered sigprof. The handler is also responsible for turning the
869 ;; *ALLOC-SIGNAL* off in individual threads.
870 (ecase profiling
871 (:alloc
872 #+sb-thread
873 (setf sb-thread::*default-alloc-signal* nil)
874 #-sb-thread
875 (setf sb-vm:*alloc-signal* nil))
876 (:cpu
877 (unix-setitimer :profile 0 0 0 0))
878 (:time
879 (unschedule-timer *timer*)
880 (setf *timer* nil
881 *timer-thread* nil)))
882 (disable-call-counting)
883 (setf *profiling* nil
884 *sampling* nil
885 *profiled-threads* nil)))
886 (values))
888 (defun reset ()
889 "Reset the profiler."
890 (stop-profiling)
891 (setq *sampling* nil)
892 (setq *samples* nil)
893 (values))
895 ;;; Make a NODE for debug-info INFO.
896 (defun make-node (info)
897 (flet ((clean-name (name)
898 (if (and (consp name)
899 (member (first name)
900 '(sb-c::xep sb-c::tl-xep sb-c::&more-processor
901 sb-c::top-level-form
902 sb-c::&optional-processor)))
903 (second name)
904 name)))
905 (typecase info
906 (sb-kernel::code-component
907 (multiple-value-bind (start end)
908 (code-bounds info)
909 (values
910 (%make-node :name (or (sb-disassem::find-assembler-routine start)
911 (format nil "~a" info))
912 :debug-info info
913 :start-pc-or-offset start
914 :end-pc-or-offset end)
915 info)))
916 (sb-di::compiled-debug-fun
917 (let* ((name (sb-di::debug-fun-name info))
918 (cdf (sb-di::compiled-debug-fun-compiler-debug-fun info))
919 (start-offset (sb-c::compiled-debug-fun-start-pc cdf))
920 (end-offset (sb-c::compiled-debug-fun-elsewhere-pc cdf))
921 (component (sb-di::compiled-debug-fun-component info))
922 (start-pc (code-start component)))
923 ;; Call graphs are mostly useless unless we somehow
924 ;; distinguish a gazillion different (LAMBDA ())'s.
925 (when (equal name '(lambda ()))
926 (setf name (format nil "Unknown component: #x~x" start-pc)))
927 (values (%make-node :name (clean-name name)
928 :debug-info info
929 :start-pc-or-offset start-offset
930 :end-pc-or-offset end-offset)
931 component)))
932 (sb-di::debug-fun
933 (%make-node :name (clean-name (sb-di::debug-fun-name info))
934 :debug-info info))
936 (%make-node :name (coerce info 'string)
937 :debug-info info)))))
939 ;;; One function can have more than one COMPILED-DEBUG-FUNCTION with
940 ;;; the same name. Reduce the number of calls to Debug-Info by first
941 ;;; looking for a given PC in a red-black tree. If not found in the
942 ;;; tree, get debug info, and look for a node in a hash-table by
943 ;;; function name. If not found in the hash-table, make a new node.
945 (defvar *name->node*)
947 (defmacro with-lookup-tables (() &body body)
948 `(let ((*name->node* (make-hash-table :test 'equal)))
949 ,@body))
951 ;;; Find or make a new node for INFO. Value is the NODE found or
952 ;;; made; NIL if not enough information exists to make a NODE for INFO.
953 (defun lookup-node (info)
954 (when info
955 (multiple-value-bind (new key)
956 (make-node info)
957 (when (eql (node-name new) 'call-counter)
958 (return-from lookup-node (values nil nil)))
959 (let* ((key (cons (node-name new) key))
960 (found (gethash key *name->node*)))
961 (cond (found
962 (setf (node-start-pc-or-offset found)
963 (min (node-start-pc-or-offset found)
964 (node-start-pc-or-offset new)))
965 (setf (node-end-pc-or-offset found)
966 (max (node-end-pc-or-offset found)
967 (node-end-pc-or-offset new)))
968 found)
970 (let ((call-count-info (gethash (node-name new)
971 *encapsulations*)))
972 (when call-count-info
973 (setf (node-call-count new)
974 (car call-count-info))))
975 (setf (gethash key *name->node*) new)
976 new))))))
978 ;;; Return a list of all nodes created by LOOKUP-NODE.
979 (defun collect-nodes ()
980 (loop for node being the hash-values of *name->node*
981 collect node))
983 ;;; Value is a CALL-GRAPH for the current contents of *SAMPLES*.
984 (defun make-call-graph-1 (max-depth)
985 (let ((elsewhere-count 0)
986 visited-nodes)
987 (with-lookup-tables ()
988 (loop for i below (- (samples-index *samples*) 2) by 2
989 with depth = 0
990 for debug-info = (aref (samples-vector *samples*) i)
991 for next-info = (aref (samples-vector *samples*)
992 (+ i 2))
993 do (if (eq debug-info 'trace-start)
994 (setf depth 0)
995 (let ((callee (lookup-node debug-info))
996 (caller (unless (eq next-info 'trace-start)
997 (lookup-node next-info))))
998 (when (< depth max-depth)
999 (when (zerop depth)
1000 (setf visited-nodes nil)
1001 (cond (callee
1002 (incf (node-accrued-count callee))
1003 (incf (node-count callee)))
1005 (incf elsewhere-count))))
1006 (incf depth)
1007 (when callee
1008 (push callee visited-nodes))
1009 (when caller
1010 (unless (member caller visited-nodes)
1011 (incf (node-accrued-count caller)))
1012 (when callee
1013 (let ((call (find callee (node-edges caller)
1014 :key #'call-vertex)))
1015 (pushnew caller (node-callers callee))
1016 (if call
1017 (unless (member caller visited-nodes)
1018 (incf (call-count call)))
1019 (push (make-call callee)
1020 (node-edges caller))))))))))
1021 (let ((sorted-nodes (sort (collect-nodes) #'> :key #'node-count)))
1022 (loop for node in sorted-nodes and i from 1 do
1023 (setf (node-index node) i))
1024 (%make-call-graph :nsamples (samples-trace-count *samples*)
1025 :sample-interval (if (eq (samples-mode *samples*)
1026 :alloc)
1027 (samples-alloc-interval *samples*)
1028 (samples-sample-interval *samples*))
1029 :sampling-mode (samples-mode *samples*)
1030 :sampled-threads (samples-sampled-threads *samples*)
1031 :elsewhere-count elsewhere-count
1032 :vertices sorted-nodes)))))
1034 ;;; Reduce CALL-GRAPH to a dag, creating CYCLE structures for call
1035 ;;; cycles.
1036 (defun reduce-call-graph (call-graph)
1037 (let ((cycle-no 0))
1038 (flet ((make-one-cycle (vertices edges)
1039 (let* ((name (format nil "<Cycle ~d>" (incf cycle-no)))
1040 (count (loop for v in vertices sum (node-count v))))
1041 (make-cycle :name name
1042 :index cycle-no
1043 :count count
1044 :scc-vertices vertices
1045 :edges edges))))
1046 (reduce-graph call-graph #'make-one-cycle))))
1048 ;;; For all nodes in CALL-GRAPH, compute times including the time
1049 ;;; spent in functions called from them. Note that the call-graph
1050 ;;; vertices are in reverse topological order, children first, so we
1051 ;;; will have computed accrued counts of called functions before they
1052 ;;; are used to compute accrued counts for callers.
1053 (defun compute-accrued-counts (call-graph)
1054 (do-vertices (from call-graph)
1055 (setf (node-accrued-count from) (node-count from))
1056 (do-edges (call to from)
1057 (incf (node-accrued-count from)
1058 (round (* (/ (call-count call) (node-count to))
1059 (node-accrued-count to)))))))
1061 ;;; Return a CALL-GRAPH structure for the current contents of
1062 ;;; *SAMPLES*. The result contain a list of nodes sorted by self-time
1063 ;;; in the FLAT-NODES slot, and a dag in VERTICES, with call cycles
1064 ;;; reduced to CYCLE structures.
1065 (defun make-call-graph (max-depth)
1066 (stop-profiling)
1067 (show-progress "~&Computing call graph ")
1068 (let ((call-graph (without-gcing (make-call-graph-1 max-depth))))
1069 (setf (call-graph-flat-nodes call-graph)
1070 (copy-list (graph-vertices call-graph)))
1071 (show-progress "~&Finding cycles")
1072 #+nil
1073 (reduce-call-graph call-graph)
1074 (show-progress "~&Propagating counts")
1075 #+nil
1076 (compute-accrued-counts call-graph)
1077 call-graph))
1080 ;;;; Reporting
1082 (defun print-separator (&key (length 72) (char #\-))
1083 (format t "~&~V,,,V<~>~%" length char))
1085 (defun samples-percent (call-graph count)
1086 (if (> count 0)
1087 (* 100.0 (/ count (call-graph-nsamples call-graph)))
1090 (defun print-call-graph-header (call-graph)
1091 (let ((nsamples (call-graph-nsamples call-graph))
1092 (interval (call-graph-sample-interval call-graph))
1093 (ncycles (loop for v in (graph-vertices call-graph)
1094 count (scc-p v))))
1095 (if (eq (call-graph-sampling-mode call-graph) :alloc)
1096 (format t "~2&Number of samples: ~d~%~
1097 Alloc interval: ~a regions (approximately ~a kB)~%~
1098 Total sampling amount: ~a regions (approximately ~a kB)~%~
1099 Number of cycles: ~d~%~
1100 Sampled threads:~{~% ~S~}~2%"
1101 nsamples
1102 interval
1103 (truncate (* interval *alloc-region-size*) 1024)
1104 (* nsamples interval)
1105 (truncate (* nsamples interval *alloc-region-size*) 1024)
1106 ncycles
1107 (call-graph-sampled-threads call-graph))
1108 (format t "~2&Number of samples: ~d~%~
1109 Sample interval: ~f seconds~%~
1110 Total sampling time: ~f seconds~%~
1111 Number of cycles: ~d~%~
1112 Sampled threads:~{~% ~S~}~2%"
1113 nsamples
1114 interval
1115 (* nsamples interval)
1116 ncycles
1117 (call-graph-sampled-threads call-graph)))))
1119 (declaim (type (member :samples :cumulative-samples) *report-sort-by*))
1120 (defvar *report-sort-by* :samples
1121 "Method for sorting the flat report: either by :SAMPLES or by :CUMULATIVE-SAMPLES.")
1123 (declaim (type (member :descending :ascending) *report-sort-order*))
1124 (defvar *report-sort-order* :descending
1125 "Order for sorting the flat report: either :DESCENDING or :ASCENDING.")
1127 (defun print-flat (call-graph &key (stream *standard-output*) max
1128 min-percent (print-header t)
1129 (sort-by *report-sort-by*)
1130 (sort-order *report-sort-order*))
1131 (declare (type (member :descending :ascending) sort-order)
1132 (type (member :samples :cumulative-samples) sort-by))
1133 (let ((*standard-output* stream)
1134 (*print-pretty* nil)
1135 (total-count 0)
1136 (total-percent 0)
1137 (min-count (if min-percent
1138 (round (* (/ min-percent 100.0)
1139 (call-graph-nsamples call-graph)))
1140 0)))
1141 (when print-header
1142 (print-call-graph-header call-graph))
1143 (format t "~& Self Total Cumul~%")
1144 (format t "~& Nr Count % Count % Count % Calls Function~%")
1145 (print-separator)
1146 (let ((elsewhere-count (call-graph-elsewhere-count call-graph))
1147 (i 0)
1148 (nodes (stable-sort (copy-list (call-graph-flat-nodes call-graph))
1149 (let ((cmp (if (eq :descending sort-order) #'> #'<)))
1150 (multiple-value-bind (primary secondary)
1151 (if (eq :samples sort-by)
1152 (values #'node-count #'node-accrued-count)
1153 (values #'node-accrued-count #'node-count))
1154 (lambda (x y)
1155 (let ((cx (funcall primary x))
1156 (cy (funcall primary y)))
1157 (if (= cx cy)
1158 (funcall cmp (funcall secondary x) (funcall secondary y))
1159 (funcall cmp cx cy)))))))))
1160 (dolist (node nodes)
1161 (when (or (and max (> (incf i) max))
1162 (< (node-count node) min-count))
1163 (return))
1164 (let* ((count (node-count node))
1165 (percent (samples-percent call-graph count))
1166 (accrued-count (node-accrued-count node))
1167 (accrued-percent (samples-percent call-graph accrued-count)))
1168 (incf total-count count)
1169 (incf total-percent percent)
1170 (format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~6d ~5,1f ~8@a ~s~%"
1171 (incf i)
1172 count
1173 percent
1174 accrued-count
1175 accrued-percent
1176 total-count
1177 total-percent
1178 (or (node-call-count node) "-")
1179 (node-name node))
1180 (finish-output)))
1181 (print-separator)
1182 (format t "~& ~6d ~5,1f~36a elsewhere~%"
1183 elsewhere-count
1184 (samples-percent call-graph elsewhere-count)
1185 ""))))
1187 (defun print-cycles (call-graph)
1188 (when (some #'cycle-p (graph-vertices call-graph))
1189 (format t "~& Cycle~%")
1190 (format t "~& Count % Parts~%")
1191 (do-vertices (node call-graph)
1192 (when (cycle-p node)
1193 (flet ((print-info (indent index count percent name)
1194 (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
1195 count percent indent name index)))
1196 (print-separator)
1197 (format t "~&~6d ~5,1f ~a...~%"
1198 (node-count node)
1199 (samples-percent call-graph (cycle-count node))
1200 (node-name node))
1201 (dolist (v (vertex-scc-vertices node))
1202 (print-info 4 (node-index v) (node-count v)
1203 (samples-percent call-graph (node-count v))
1204 (node-name v))))))
1205 (print-separator)
1206 (format t "~2%")))
1208 (defun print-graph (call-graph &key (stream *standard-output*)
1209 max min-percent)
1210 (let ((*standard-output* stream)
1211 (*print-pretty* nil))
1212 (print-call-graph-header call-graph)
1213 (print-cycles call-graph)
1214 (flet ((find-call (from to)
1215 (find to (node-edges from) :key #'call-vertex))
1216 (print-info (indent index count percent name)
1217 (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
1218 count percent indent name index)))
1219 (format t "~& Callers~%")
1220 (format t "~& Total. Function~%")
1221 (format t "~& Count % Count % Callees~%")
1222 (do-vertices (node call-graph)
1223 (print-separator)
1225 ;; Print caller information.
1226 (dolist (caller (node-callers node))
1227 (let ((call (find-call caller node)))
1228 (print-info 4 (node-index caller)
1229 (call-count call)
1230 (samples-percent call-graph (call-count call))
1231 (node-name caller))))
1232 ;; Print the node itself.
1233 (format t "~&~6d ~5,1f ~6d ~5,1f ~s [~d]~%"
1234 (node-count node)
1235 (samples-percent call-graph (node-count node))
1236 (node-accrued-count node)
1237 (samples-percent call-graph (node-accrued-count node))
1238 (node-name node)
1239 (node-index node))
1240 ;; Print callees.
1241 (do-edges (call called node)
1242 (print-info 4 (node-index called)
1243 (call-count call)
1244 (samples-percent call-graph (call-count call))
1245 (node-name called))))
1246 (print-separator)
1247 (format t "~2%")
1248 (print-flat call-graph :stream stream :max max
1249 :min-percent min-percent :print-header nil))))
1251 (defun report (&key (type :graph) max min-percent call-graph
1252 ((:sort-by *report-sort-by*) *report-sort-by*)
1253 ((:sort-order *report-sort-order*) *report-sort-order*)
1254 (stream *standard-output*) ((:show-progress *show-progress*)))
1255 "Report statistical profiling results. The following keyword
1256 args are recognized:
1258 :TYPE <type>
1259 Specifies the type of report to generate. If :FLAT, show
1260 flat report, if :GRAPH show a call graph and a flat report.
1261 If nil, don't print out a report.
1263 :STREAM <stream>
1264 Specify a stream to print the report on. Default is
1265 *STANDARD-OUTPUT*.
1267 :MAX <max>
1268 Don't show more than <max> entries in the flat report.
1270 :MIN-PERCENT <min-percent>
1271 Don't show functions taking less than <min-percent> of the
1272 total time in the flat report.
1274 :SORT-BY <column>
1275 If :SAMPLES, sort flat report by number of samples taken.
1276 If :CUMULATIVE-SAMPLES, sort flat report by cumulative number of samples
1277 taken (shows how much time each function spent on stack.) Default
1278 is *REPORT-SORT-BY*.
1280 :SORT-ORDER <order>
1281 If :DESCENDING, sort flat report in descending order. If :ASCENDING,
1282 sort flat report in ascending order. Default is *REPORT-SORT-ORDER*.
1284 :SHOW-PROGRESS <bool>
1285 If true, print progress messages while generating the call graph.
1287 :CALL-GRAPH <graph>
1288 Print a report from <graph> instead of the latest profiling
1289 results.
1291 Value of this function is a CALL-GRAPH object representing the
1292 resulting call-graph, or NIL if there are no samples (eg. right after
1293 calling RESET.)
1295 Profiling is stopped before the call graph is generated."
1296 (cond (*samples*
1297 (let ((graph (or call-graph (make-call-graph most-positive-fixnum))))
1298 (ecase type
1299 (:flat
1300 (print-flat graph :stream stream :max max :min-percent min-percent))
1301 (:graph
1302 (print-graph graph :stream stream :max max :min-percent min-percent))
1303 ((nil)))
1304 graph))
1306 (format stream "~&; No samples to report.~%")
1307 nil)))
1309 ;;; Interface to DISASSEMBLE
1311 (defun sample-pc-from-pc-or-offset (sample pc-or-offset)
1312 (etypecase sample
1313 ;; Assembly routines or foreign functions don't move around, so we've
1314 ;; stored a raw PC
1315 ((or sb-kernel:code-component string)
1316 pc-or-offset)
1317 ;; Lisp functions might move, so we've stored a offset from the
1318 ;; start of the code component.
1319 (sb-di::compiled-debug-fun
1320 (let* ((component (sb-di::compiled-debug-fun-component sample))
1321 (start-pc (code-start component)))
1322 (+ start-pc pc-or-offset)))))
1324 (defun add-disassembly-profile-note (chunk stream dstate)
1325 (declare (ignore chunk stream))
1326 (when *samples*
1327 (let* ((location (+ (sb-disassem::seg-virtual-location
1328 (sb-disassem:dstate-segment dstate))
1329 (sb-disassem::dstate-cur-offs dstate)))
1330 (samples (loop with index = (samples-index *samples*)
1331 for x from 0 below (- index 2) by 2
1332 for last-sample = nil then sample
1333 for sample = (aref (samples-vector *samples*) x)
1334 for pc-or-offset = (aref (samples-vector *samples*)
1335 (1+ x))
1336 when (and sample (eq last-sample 'trace-start))
1337 count (= location
1338 (sample-pc-from-pc-or-offset sample
1339 pc-or-offset)))))
1340 (unless (zerop samples)
1341 (sb-disassem::note (format nil "~A/~A samples"
1342 samples (samples-trace-count *samples*))
1343 dstate)))))
1345 (pushnew 'add-disassembly-profile-note sb-disassem::*default-dstate-hooks*)
1348 ;;;; Call counting
1350 ;;; The following functions tell sb-sprof to do call count profiling
1351 ;;; for the named functions in addition to normal statistical
1352 ;;; profiling. The benefit of this over using SB-PROFILE is that this
1353 ;;; encapsulation is a lot more lightweight, due to not needing to
1354 ;;; track cpu usage / consing. (For example, compiling asdf 20 times
1355 ;;; took 13s normally, 15s with call counting for all functions in
1356 ;;; SB-C, and 94s with SB-PROFILE profiling SB-C).
1358 (defun profile-call-counts (&rest names)
1359 "Mark the functions named by NAMES as being subject to call counting
1360 during statistical profiling. If a string is used as a name, it will
1361 be interpreted as a package name. In this case call counting will be
1362 done for all functions with names like X or (SETF X), where X is a symbol
1363 with the package as its home package."
1364 (dolist (name names)
1365 (if (stringp name)
1366 (let ((package (find-package name)))
1367 (do-symbols (symbol package)
1368 (when (eql (symbol-package symbol) package)
1369 (dolist (function-name (list symbol (list 'setf symbol)))
1370 (profile-call-counts-for-function function-name)))))
1371 (profile-call-counts-for-function name))))
1373 (defun profile-call-counts-for-function (function-name)
1374 (unless (gethash function-name *encapsulations*)
1375 (setf (gethash function-name *encapsulations*) nil)))
1377 (defun unprofile-call-counts ()
1378 "Clear all call counting information. Call counting will be done for no
1379 functions during statistical profiling."
1380 (clrhash *encapsulations*))
1382 ;;; Called when profiling is started to enable the call counting
1383 ;;; encapsulation. Wrap all the call counted functions
1384 (defun enable-call-counting ()
1385 (maphash (lambda (k v)
1386 (declare (ignore v))
1387 (enable-call-counting-for-function k))
1388 *encapsulations*))
1390 ;;; Called when profiling is stopped to disable the encapsulation. Restore
1391 ;;; the original functions.
1392 (defun disable-call-counting ()
1393 (maphash (lambda (k v)
1394 (when v
1395 (assert (cdr v))
1396 (without-package-locks
1397 (setf (fdefinition k) (cdr v)))
1398 (setf (cdr v) nil)))
1399 *encapsulations*))
1401 (defun enable-call-counting-for-function (function-name)
1402 (let ((info (gethash function-name *encapsulations*)))
1403 ;; We should never try to encapsulate an fdefn multiple times.
1404 (assert (or (null info)
1405 (null (cdr info))))
1406 (when (and (fboundp function-name)
1407 (or (not (symbolp function-name))
1408 (and (not (special-operator-p function-name))
1409 (not (macro-function function-name)))))
1410 (let* ((original-fun (fdefinition function-name))
1411 (info (cons 0 original-fun)))
1412 (setf (gethash function-name *encapsulations*) info)
1413 (without-package-locks
1414 (setf (fdefinition function-name)
1415 (sb-int:named-lambda call-counter (sb-int:&more more-context more-count)
1416 (declare (optimize speed (safety 0)))
1417 ;; 2^59 calls should be enough for anybody, and it
1418 ;; allows using fixnum arithmetic on x86-64. 2^32
1419 ;; isn't enough, so we can't do that on 32 bit platforms.
1420 (incf (the (unsigned-byte 59)
1421 (car info)))
1422 (multiple-value-call original-fun
1423 (sb-c:%more-arg-values more-context
1425 more-count)))))))))