1 /* Profiler implementation.
3 Copyright (C) 2012-2015 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
22 #include "syssignal.h"
25 /* Return A + B, but return the maximum fixnum if the result would overflow.
26 Assume A and B are nonnegative and in fixnum range. */
29 saturated_add (EMACS_INT a
, EMACS_INT b
)
31 return min (a
+ b
, MOST_POSITIVE_FIXNUM
);
36 typedef struct Lisp_Hash_Table log_t
;
38 static struct hash_table_test hashtest_profiler
;
41 make_log (int heap_size
, int max_stack_depth
)
43 /* We use a standard Elisp hash-table object, but we use it in
44 a special way. This is OK as long as the object is not exposed
45 to Elisp, i.e. until it is returned by *-profiler-log, after which
46 it can't be used any more. */
47 Lisp_Object log
= make_hash_table (hashtest_profiler
,
48 make_number (heap_size
),
49 make_float (DEFAULT_REHASH_SIZE
),
50 make_float (DEFAULT_REHASH_THRESHOLD
),
52 struct Lisp_Hash_Table
*h
= XHASH_TABLE (log
);
54 /* What is special about our hash-tables is that the keys are pre-filled
55 with the vectors we'll put in them. */
56 int i
= ASIZE (h
->key_and_value
) / 2;
58 set_hash_key_slot (h
, --i
,
59 Fmake_vector (make_number (max_stack_depth
), Qnil
));
63 /* Evict the least used half of the hash_table.
65 When the table is full, we have to evict someone.
66 The easiest and most efficient is to evict the value we're about to add
67 (i.e. once the table is full, stop sampling).
69 We could also pick the element with the lowest count and evict it,
70 but finding it is O(N) and for that amount of work we get very
71 little in return: for the next sample, this latest sample will have
72 count==1 and will hence be a prime candidate for eviction :-(
74 So instead, we take O(N) time to eliminate more or less half of the
75 entries (the half with the lowest counts). So we get an amortized
76 cost of O(1) and we get O(N) time for a new entry to grow larger
77 than the other least counts before a new round of eviction. */
79 static EMACS_INT
approximate_median (log_t
*log
,
80 ptrdiff_t start
, ptrdiff_t size
)
84 return XINT (HASH_VALUE (log
, start
));
86 /* Not an actual median, but better for our application than
87 choosing either of the two numbers. */
88 return ((XINT (HASH_VALUE (log
, start
))
89 + XINT (HASH_VALUE (log
, start
+ 1)))
93 ptrdiff_t newsize
= size
/ 3;
94 ptrdiff_t start2
= start
+ newsize
;
95 EMACS_INT i1
= approximate_median (log
, start
, newsize
);
96 EMACS_INT i2
= approximate_median (log
, start2
, newsize
);
97 EMACS_INT i3
= approximate_median (log
, start2
+ newsize
,
100 ? (i2
< i3
? i2
: (i1
< i3
? i3
: i1
))
101 : (i1
< i3
? i1
: (i2
< i3
? i3
: i2
)));
105 static void evict_lower_half (log_t
*log
)
107 ptrdiff_t size
= ASIZE (log
->key_and_value
) / 2;
108 EMACS_INT median
= approximate_median (log
, 0, size
);
111 for (i
= 0; i
< size
; i
++)
112 /* Evict not only values smaller but also values equal to the median,
113 so as to make sure we evict something no matter what. */
114 if (XINT (HASH_VALUE (log
, i
)) <= median
)
116 Lisp_Object key
= HASH_KEY (log
, i
);
117 { /* FIXME: we could make this more efficient. */
119 XSET_HASH_TABLE (tmp
, log
); /* FIXME: Use make_lisp_ptr. */
122 eassert (EQ (log
->next_free
, make_number (i
)));
125 eassert (VECTORP (key
));
126 for (j
= 0; j
< ASIZE (key
); j
++)
129 set_hash_key_slot (log
, i
, key
);
133 /* Record the current backtrace in LOG. COUNT is the weight of this
134 current backtrace: interrupt counts for CPU, and the allocation
138 record_backtrace (log_t
*log
, EMACS_INT count
)
140 Lisp_Object backtrace
;
143 if (!INTEGERP (log
->next_free
))
144 /* FIXME: transfer the evicted counts to a special entry rather
145 than dropping them on the floor. */
146 evict_lower_half (log
);
147 index
= XINT (log
->next_free
);
149 /* Get a "working memory" vector. */
150 backtrace
= HASH_KEY (log
, index
);
151 get_backtrace (backtrace
);
153 { /* We basically do a `gethash+puthash' here, except that we have to be
154 careful to avoid memory allocation since we're in a signal
155 handler, and we optimize the code to try and avoid computing the
156 hash+lookup twice. See fns.c:Fputhash for reference. */
158 ptrdiff_t j
= hash_lookup (log
, backtrace
, &hash
);
161 EMACS_INT old_val
= XINT (HASH_VALUE (log
, j
));
162 EMACS_INT new_val
= saturated_add (old_val
, count
);
163 set_hash_value_slot (log
, j
, make_number (new_val
));
166 { /* BEWARE! hash_put in general can allocate memory.
167 But currently it only does that if log->next_free is nil. */
169 eassert (!NILP (log
->next_free
));
170 j
= hash_put (log
, backtrace
, make_number (count
), hash
);
171 /* Let's make sure we've put `backtrace' right where it
172 already was to start with. */
173 eassert (index
== j
);
175 /* FIXME: If the hash-table is almost full, we should set
176 some global flag so that some Elisp code can offload its
177 data elsewhere, so as to avoid the eviction code.
178 There are 2 ways to do that, AFAICT:
179 - Set a flag checked in QUIT, such that QUIT can then call
180 Fprofiler_cpu_log and stash the full log for later use.
181 - Set a flag check in post-gc-hook, so that Elisp code can call
182 profiler-cpu-log. That gives us more flexibility since that
183 Elisp code can then do all kinds of fun stuff like write
184 the log to disk. Or turn it right away into a call tree.
185 Of course, using Elisp is generally preferable, but it may
186 take longer until we get a chance to run the Elisp code, so
187 there's more risk that the table will get full before we
193 /* Sampling profiler. */
195 #ifdef PROFILER_CPU_SUPPORT
197 /* The profiler timer and whether it was properly initialized, if
198 POSIX timers are available. */
199 #ifdef HAVE_ITIMERSPEC
200 static timer_t profiler_timer
;
201 static bool profiler_timer_ok
;
204 /* Status of sampling profiler. */
205 static enum profiler_cpu_running
206 { NOT_RUNNING
, TIMER_SETTIME_RUNNING
, SETITIMER_RUNNING
}
207 profiler_cpu_running
;
209 /* Hash-table log of CPU profiler. */
210 static Lisp_Object cpu_log
;
212 /* Separate counter for the time spent in the GC. */
213 static EMACS_INT cpu_gc_count
;
215 /* The current sampling interval in nanoseconds. */
216 static EMACS_INT current_sampling_interval
;
218 /* Signal handler for sampling profiler. */
220 /* timer_getoverrun is not implemented on Cygwin, but the following
221 seems to be good enough for profiling. */
223 #define timer_getoverrun(x) 0
227 handle_profiler_signal (int signal
)
229 if (EQ (backtrace_top_function (), Qautomatic_gc
))
230 /* Special case the time-count inside GC because the hash-table
231 code is not prepared to be used while the GC is running.
232 More specifically it uses ASIZE at many places where it does
233 not expect the ARRAY_MARK_FLAG to be set. We could try and
234 harden the hash-table code, but it doesn't seem worth the
236 cpu_gc_count
= saturated_add (cpu_gc_count
, 1);
240 #ifdef HAVE_ITIMERSPEC
241 if (profiler_timer_ok
)
243 int overruns
= timer_getoverrun (profiler_timer
);
244 eassert (overruns
>= 0);
248 eassert (HASH_TABLE_P (cpu_log
));
249 record_backtrace (XHASH_TABLE (cpu_log
), count
);
254 deliver_profiler_signal (int signal
)
256 deliver_process_signal (signal
, handle_profiler_signal
);
260 setup_cpu_timer (Lisp_Object sampling_interval
)
262 struct sigaction action
;
263 struct itimerval timer
;
264 struct timespec interval
;
265 int billion
= 1000000000;
267 if (! RANGED_INTEGERP (1, sampling_interval
,
268 (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX
/ billion
269 ? ((EMACS_INT
) TYPE_MAXIMUM (time_t) * billion
274 current_sampling_interval
= XINT (sampling_interval
);
275 interval
= make_timespec (current_sampling_interval
/ billion
,
276 current_sampling_interval
% billion
);
277 emacs_sigaction_init (&action
, deliver_profiler_signal
);
278 sigaction (SIGPROF
, &action
, 0);
280 #ifdef HAVE_ITIMERSPEC
281 if (! profiler_timer_ok
)
283 /* System clocks to try, in decreasing order of desirability. */
284 static clockid_t
const system_clock
[] = {
285 #ifdef CLOCK_THREAD_CPUTIME_ID
286 CLOCK_THREAD_CPUTIME_ID
,
288 #ifdef CLOCK_PROCESS_CPUTIME_ID
289 CLOCK_PROCESS_CPUTIME_ID
,
291 #ifdef CLOCK_MONOTONIC
297 struct sigevent sigev
;
298 sigev
.sigev_value
.sival_ptr
= &profiler_timer
;
299 sigev
.sigev_signo
= SIGPROF
;
300 sigev
.sigev_notify
= SIGEV_SIGNAL
;
302 for (i
= 0; i
< ARRAYELTS (system_clock
); i
++)
303 if (timer_create (system_clock
[i
], &sigev
, &profiler_timer
) == 0)
305 profiler_timer_ok
= 1;
310 if (profiler_timer_ok
)
312 struct itimerspec ispec
;
313 ispec
.it_value
= ispec
.it_interval
= interval
;
314 if (timer_settime (profiler_timer
, 0, &ispec
, 0) == 0)
315 return TIMER_SETTIME_RUNNING
;
319 #ifdef HAVE_SETITIMER
320 timer
.it_value
= timer
.it_interval
= make_timeval (interval
);
321 if (setitimer (ITIMER_PROF
, &timer
, 0) == 0)
322 return SETITIMER_RUNNING
;
328 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start
, Sprofiler_cpu_start
,
330 doc
: /* Start or restart the cpu profiler.
331 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
332 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
333 (Lisp_Object sampling_interval
)
335 if (profiler_cpu_running
)
336 error ("CPU profiler is already running");
341 cpu_log
= make_log (profiler_log_size
,
342 profiler_max_stack_depth
);
345 int status
= setup_cpu_timer (sampling_interval
);
348 profiler_cpu_running
= NOT_RUNNING
;
349 error ("Invalid sampling interval");
353 profiler_cpu_running
= status
;
354 if (! profiler_cpu_running
)
355 error ("Unable to start profiler timer");
361 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop
, Sprofiler_cpu_stop
,
363 doc
: /* Stop the cpu profiler. The profiler log is not affected.
364 Return non-nil if the profiler was running. */)
367 switch (profiler_cpu_running
)
372 #ifdef HAVE_ITIMERSPEC
373 case TIMER_SETTIME_RUNNING
:
375 struct itimerspec disable
;
376 memset (&disable
, 0, sizeof disable
);
377 timer_settime (profiler_timer
, 0, &disable
, 0);
382 #ifdef HAVE_SETITIMER
383 case SETITIMER_RUNNING
:
385 struct itimerval disable
;
386 memset (&disable
, 0, sizeof disable
);
387 setitimer (ITIMER_PROF
, &disable
, 0);
393 signal (SIGPROF
, SIG_IGN
);
394 profiler_cpu_running
= NOT_RUNNING
;
398 DEFUN ("profiler-cpu-running-p",
399 Fprofiler_cpu_running_p
, Sprofiler_cpu_running_p
,
401 doc
: /* Return non-nil if cpu profiler is running. */)
404 return profiler_cpu_running
? Qt
: Qnil
;
407 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log
, Sprofiler_cpu_log
,
409 doc
: /* Return the current cpu profiler log.
410 The log is a hash-table mapping backtraces to counters which represent
411 the amount of time spent at those points. Every backtrace is a vector
412 of functions, where the last few elements may be nil.
413 Before returning, a new log is allocated for future samples. */)
416 Lisp_Object result
= cpu_log
;
417 /* Here we're making the log visible to Elisp, so it's not safe any
418 more for our use afterwards since we can't rely on its special
419 pre-allocated keys anymore. So we have to allocate a new one. */
420 cpu_log
= (profiler_cpu_running
421 ? make_log (profiler_log_size
, profiler_max_stack_depth
)
423 Fputhash (Fmake_vector (make_number (1), Qautomatic_gc
),
424 make_number (cpu_gc_count
),
429 #endif /* PROFILER_CPU_SUPPORT */
431 /* Memory profiler. */
433 /* True if memory profiler is running. */
434 bool profiler_memory_running
;
436 static Lisp_Object memory_log
;
438 DEFUN ("profiler-memory-start", Fprofiler_memory_start
, Sprofiler_memory_start
,
440 doc
: /* Start/restart the memory profiler.
441 The memory profiler will take samples of the call-stack whenever a new
442 allocation takes place. Note that most small allocations only trigger
443 the profiler occasionally.
444 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
447 if (profiler_memory_running
)
448 error ("Memory profiler is already running");
450 if (NILP (memory_log
))
451 memory_log
= make_log (profiler_log_size
,
452 profiler_max_stack_depth
);
454 profiler_memory_running
= true;
459 DEFUN ("profiler-memory-stop",
460 Fprofiler_memory_stop
, Sprofiler_memory_stop
,
462 doc
: /* Stop the memory profiler. The profiler log is not affected.
463 Return non-nil if the profiler was running. */)
466 if (!profiler_memory_running
)
468 profiler_memory_running
= false;
472 DEFUN ("profiler-memory-running-p",
473 Fprofiler_memory_running_p
, Sprofiler_memory_running_p
,
475 doc
: /* Return non-nil if memory profiler is running. */)
478 return profiler_memory_running
? Qt
: Qnil
;
481 DEFUN ("profiler-memory-log",
482 Fprofiler_memory_log
, Sprofiler_memory_log
,
484 doc
: /* Return the current memory profiler log.
485 The log is a hash-table mapping backtraces to counters which represent
486 the amount of memory allocated at those points. Every backtrace is a vector
487 of functions, where the last few elements may be nil.
488 Before returning, a new log is allocated for future samples. */)
491 Lisp_Object result
= memory_log
;
492 /* Here we're making the log visible to Elisp , so it's not safe any
493 more for our use afterwards since we can't rely on its special
494 pre-allocated keys anymore. So we have to allocate a new one. */
495 memory_log
= (profiler_memory_running
496 ? make_log (profiler_log_size
, profiler_max_stack_depth
)
502 /* Signals and probes. */
504 /* Record that the current backtrace allocated SIZE bytes. */
506 malloc_probe (size_t size
)
508 eassert (HASH_TABLE_P (memory_log
));
509 record_backtrace (XHASH_TABLE (memory_log
), min (size
, MOST_POSITIVE_FIXNUM
));
512 DEFUN ("function-equal", Ffunction_equal
, Sfunction_equal
, 2, 2, 0,
513 doc
: /* Return non-nil if F1 and F2 come from the same source.
514 Used to determine if different closures are just different instances of
515 the same lambda expression, or are really unrelated function. */)
516 (Lisp_Object f1
, Lisp_Object f2
)
521 else if (COMPILEDP (f1
) && COMPILEDP (f2
))
522 res
= EQ (AREF (f1
, COMPILED_BYTECODE
), AREF (f2
, COMPILED_BYTECODE
));
523 else if (CONSP (f1
) && CONSP (f2
) && CONSP (XCDR (f1
)) && CONSP (XCDR (f2
))
524 && EQ (Qclosure
, XCAR (f1
))
525 && EQ (Qclosure
, XCAR (f2
)))
526 res
= EQ (XCDR (XCDR (f1
)), XCDR (XCDR (f2
)));
529 return res
? Qt
: Qnil
;
533 cmpfn_profiler (struct hash_table_test
*t
,
534 Lisp_Object bt1
, Lisp_Object bt2
)
536 if (VECTORP (bt1
) && VECTORP (bt2
))
538 ptrdiff_t i
, l
= ASIZE (bt1
);
539 if (l
!= ASIZE (bt2
))
541 for (i
= 0; i
< l
; i
++)
542 if (NILP (Ffunction_equal (AREF (bt1
, i
), AREF (bt2
, i
))))
547 return EQ (bt1
, bt2
);
551 hashfn_profiler (struct hash_table_test
*ht
, Lisp_Object bt
)
556 ptrdiff_t i
, l
= ASIZE (bt
);
557 for (i
= 0; i
< l
; i
++)
559 Lisp_Object f
= AREF (bt
, i
);
561 = (COMPILEDP (f
) ? XHASH (AREF (f
, COMPILED_BYTECODE
))
562 : (CONSP (f
) && CONSP (XCDR (f
)) && EQ (Qclosure
, XCAR (f
)))
563 ? XHASH (XCDR (XCDR (f
))) : XHASH (f
));
564 hash
= sxhash_combine (hash
, hash1
);
566 return SXHASH_REDUCE (hash
);
573 syms_of_profiler (void)
575 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth
,
576 doc
: /* Number of elements from the call-stack recorded in the log. */);
577 profiler_max_stack_depth
= 16;
578 DEFVAR_INT ("profiler-log-size", profiler_log_size
,
579 doc
: /* Number of distinct call-stacks that can be recorded in a profiler log.
580 If the log gets full, some of the least-seen call-stacks will be evicted
581 to make room for new entries. */);
582 profiler_log_size
= 10000;
584 DEFSYM (Qprofiler_backtrace_equal
, "profiler-backtrace-equal");
586 hashtest_profiler
.name
= Qprofiler_backtrace_equal
;
587 hashtest_profiler
.user_hash_function
= Qnil
;
588 hashtest_profiler
.user_cmp_function
= Qnil
;
589 hashtest_profiler
.cmpfn
= cmpfn_profiler
;
590 hashtest_profiler
.hashfn
= hashfn_profiler
;
592 defsubr (&Sfunction_equal
);
594 #ifdef PROFILER_CPU_SUPPORT
595 profiler_cpu_running
= NOT_RUNNING
;
597 staticpro (&cpu_log
);
598 defsubr (&Sprofiler_cpu_start
);
599 defsubr (&Sprofiler_cpu_stop
);
600 defsubr (&Sprofiler_cpu_running_p
);
601 defsubr (&Sprofiler_cpu_log
);
603 profiler_memory_running
= false;
605 staticpro (&memory_log
);
606 defsubr (&Sprofiler_memory_start
);
607 defsubr (&Sprofiler_memory_stop
);
608 defsubr (&Sprofiler_memory_running_p
);
609 defsubr (&Sprofiler_memory_log
);