1 /* Profiler implementation.
3 Copyright (C) 2012-2015 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
22 #include "syssignal.h"
25 /* Return A + B, but return the maximum fixnum if the result would overflow.
26 Assume A and B are nonnegative and in fixnum range. */
29 saturated_add (EMACS_INT a
, EMACS_INT b
)
31 return min (a
+ b
, MOST_POSITIVE_FIXNUM
);
36 typedef struct Lisp_Hash_Table log_t
;
38 static struct hash_table_test hashtest_profiler
;
41 make_log (int heap_size
, int max_stack_depth
)
43 /* We use a standard Elisp hash-table object, but we use it in
44 a special way. This is OK as long as the object is not exposed
45 to Elisp, i.e. until it is returned by *-profiler-log, after which
46 it can't be used any more. */
47 Lisp_Object log
= make_hash_table (hashtest_profiler
,
48 make_number (heap_size
),
49 make_float (DEFAULT_REHASH_SIZE
),
50 make_float (DEFAULT_REHASH_THRESHOLD
),
52 struct Lisp_Hash_Table
*h
= XHASH_TABLE (log
);
54 /* What is special about our hash-tables is that the keys are pre-filled
55 with the vectors we'll put in them. */
56 int i
= ASIZE (h
->key_and_value
) / 2;
58 set_hash_key_slot (h
, --i
,
59 Fmake_vector (make_number (max_stack_depth
), Qnil
));
63 /* Evict the least used half of the hash_table.
65 When the table is full, we have to evict someone.
66 The easiest and most efficient is to evict the value we're about to add
67 (i.e. once the table is full, stop sampling).
69 We could also pick the element with the lowest count and evict it,
70 but finding it is O(N) and for that amount of work we get very
71 little in return: for the next sample, this latest sample will have
72 count==1 and will hence be a prime candidate for eviction :-(
74 So instead, we take O(N) time to eliminate more or less half of the
75 entries (the half with the lowest counts). So we get an amortized
76 cost of O(1) and we get O(N) time for a new entry to grow larger
77 than the other least counts before a new round of eviction. */
79 static EMACS_INT
approximate_median (log_t
*log
,
80 ptrdiff_t start
, ptrdiff_t size
)
84 return XINT (HASH_VALUE (log
, start
));
86 /* Not an actual median, but better for our application than
87 choosing either of the two numbers. */
88 return ((XINT (HASH_VALUE (log
, start
))
89 + XINT (HASH_VALUE (log
, start
+ 1)))
93 ptrdiff_t newsize
= size
/ 3;
94 ptrdiff_t start2
= start
+ newsize
;
95 EMACS_INT i1
= approximate_median (log
, start
, newsize
);
96 EMACS_INT i2
= approximate_median (log
, start2
, newsize
);
97 EMACS_INT i3
= approximate_median (log
, start2
+ newsize
,
100 ? (i2
< i3
? i2
: (i1
< i3
? i3
: i1
))
101 : (i1
< i3
? i1
: (i2
< i3
? i3
: i2
)));
105 static void evict_lower_half (log_t
*log
)
107 ptrdiff_t size
= ASIZE (log
->key_and_value
) / 2;
108 EMACS_INT median
= approximate_median (log
, 0, size
);
111 for (i
= 0; i
< size
; i
++)
112 /* Evict not only values smaller but also values equal to the median,
113 so as to make sure we evict something no matter what. */
114 if (XINT (HASH_VALUE (log
, i
)) <= median
)
116 Lisp_Object key
= HASH_KEY (log
, i
);
117 { /* FIXME: we could make this more efficient. */
119 XSET_HASH_TABLE (tmp
, log
); /* FIXME: Use make_lisp_ptr. */
122 eassert (EQ (log
->next_free
, make_number (i
)));
125 eassert (VECTORP (key
));
126 for (j
= 0; j
< ASIZE (key
); j
++)
129 set_hash_key_slot (log
, i
, key
);
133 /* Record the current backtrace in LOG. COUNT is the weight of this
134 current backtrace: interrupt counts for CPU, and the allocation
138 record_backtrace (log_t
*log
, EMACS_INT count
)
140 Lisp_Object backtrace
;
143 if (!INTEGERP (log
->next_free
))
144 /* FIXME: transfer the evicted counts to a special entry rather
145 than dropping them on the floor. */
146 evict_lower_half (log
);
147 index
= XINT (log
->next_free
);
149 /* Get a "working memory" vector. */
150 backtrace
= HASH_KEY (log
, index
);
151 get_backtrace (backtrace
);
153 { /* We basically do a `gethash+puthash' here, except that we have to be
154 careful to avoid memory allocation since we're in a signal
155 handler, and we optimize the code to try and avoid computing the
156 hash+lookup twice. See fns.c:Fputhash for reference. */
158 ptrdiff_t j
= hash_lookup (log
, backtrace
, &hash
);
161 EMACS_INT old_val
= XINT (HASH_VALUE (log
, j
));
162 EMACS_INT new_val
= saturated_add (old_val
, count
);
163 set_hash_value_slot (log
, j
, make_number (new_val
));
166 { /* BEWARE! hash_put in general can allocate memory.
167 But currently it only does that if log->next_free is nil. */
169 eassert (!NILP (log
->next_free
));
170 j
= hash_put (log
, backtrace
, make_number (count
), hash
);
171 /* Let's make sure we've put `backtrace' right where it
172 already was to start with. */
173 eassert (index
== j
);
175 /* FIXME: If the hash-table is almost full, we should set
176 some global flag so that some Elisp code can offload its
177 data elsewhere, so as to avoid the eviction code.
178 There are 2 ways to do that, AFAICT:
179 - Set a flag checked in QUIT, such that QUIT can then call
180 Fprofiler_cpu_log and stash the full log for later use.
181 - Set a flag check in post-gc-hook, so that Elisp code can call
182 profiler-cpu-log. That gives us more flexibility since that
183 Elisp code can then do all kinds of fun stuff like write
184 the log to disk. Or turn it right away into a call tree.
185 Of course, using Elisp is generally preferable, but it may
186 take longer until we get a chance to run the Elisp code, so
187 there's more risk that the table will get full before we
193 /* Sampling profiler. */
195 #ifdef PROFILER_CPU_SUPPORT
197 /* The profiler timer and whether it was properly initialized, if
198 POSIX timers are available. */
199 #ifdef HAVE_ITIMERSPEC
200 static timer_t profiler_timer
;
201 static bool profiler_timer_ok
;
204 /* Status of sampling profiler. */
205 static enum profiler_cpu_running
206 { NOT_RUNNING
, TIMER_SETTIME_RUNNING
, SETITIMER_RUNNING
}
207 profiler_cpu_running
;
209 /* Hash-table log of CPU profiler. */
210 static Lisp_Object cpu_log
;
212 /* Separate counter for the time spent in the GC. */
213 static EMACS_INT cpu_gc_count
;
215 /* The current sampling interval in nanoseconds. */
216 static EMACS_INT current_sampling_interval
;
218 /* Signal handler for sampling profiler. */
221 handle_profiler_signal (int signal
)
223 if (EQ (backtrace_top_function (), Qautomatic_gc
))
224 /* Special case the time-count inside GC because the hash-table
225 code is not prepared to be used while the GC is running.
226 More specifically it uses ASIZE at many places where it does
227 not expect the ARRAY_MARK_FLAG to be set. We could try and
228 harden the hash-table code, but it doesn't seem worth the
230 cpu_gc_count
= saturated_add (cpu_gc_count
, 1);
234 #ifdef HAVE_ITIMERSPEC
235 if (profiler_timer_ok
)
237 int overruns
= timer_getoverrun (profiler_timer
);
238 eassert (overruns
>= 0);
242 eassert (HASH_TABLE_P (cpu_log
));
243 record_backtrace (XHASH_TABLE (cpu_log
), count
);
248 deliver_profiler_signal (int signal
)
250 deliver_process_signal (signal
, handle_profiler_signal
);
253 static enum profiler_cpu_running
254 setup_cpu_timer (Lisp_Object sampling_interval
)
256 struct sigaction action
;
257 struct itimerval timer
;
258 struct timespec interval
;
259 int billion
= 1000000000;
261 if (! RANGED_INTEGERP (1, sampling_interval
,
262 (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX
/ billion
263 ? ((EMACS_INT
) TYPE_MAXIMUM (time_t) * billion
268 current_sampling_interval
= XINT (sampling_interval
);
269 interval
= make_timespec (current_sampling_interval
/ billion
,
270 current_sampling_interval
% billion
);
271 emacs_sigaction_init (&action
, deliver_profiler_signal
);
272 sigaction (SIGPROF
, &action
, 0);
274 #ifdef HAVE_ITIMERSPEC
275 if (! profiler_timer_ok
)
277 /* System clocks to try, in decreasing order of desirability. */
278 static clockid_t
const system_clock
[] = {
279 #ifdef CLOCK_THREAD_CPUTIME_ID
280 CLOCK_THREAD_CPUTIME_ID
,
282 #ifdef CLOCK_PROCESS_CPUTIME_ID
283 CLOCK_PROCESS_CPUTIME_ID
,
285 #ifdef CLOCK_MONOTONIC
291 struct sigevent sigev
;
292 sigev
.sigev_value
.sival_ptr
= &profiler_timer
;
293 sigev
.sigev_signo
= SIGPROF
;
294 sigev
.sigev_notify
= SIGEV_SIGNAL
;
296 for (i
= 0; i
< ARRAYELTS (system_clock
); i
++)
297 if (timer_create (system_clock
[i
], &sigev
, &profiler_timer
) == 0)
299 profiler_timer_ok
= 1;
304 if (profiler_timer_ok
)
306 struct itimerspec ispec
;
307 ispec
.it_value
= ispec
.it_interval
= interval
;
308 if (timer_settime (profiler_timer
, 0, &ispec
, 0) == 0)
309 return TIMER_SETTIME_RUNNING
;
313 #ifdef HAVE_SETITIMER
314 timer
.it_value
= timer
.it_interval
= make_timeval (interval
);
315 if (setitimer (ITIMER_PROF
, &timer
, 0) == 0)
316 return SETITIMER_RUNNING
;
322 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start
, Sprofiler_cpu_start
,
324 doc
: /* Start or restart the cpu profiler.
325 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
326 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
327 (Lisp_Object sampling_interval
)
329 if (profiler_cpu_running
)
330 error ("CPU profiler is already running");
335 cpu_log
= make_log (profiler_log_size
,
336 profiler_max_stack_depth
);
339 profiler_cpu_running
= setup_cpu_timer (sampling_interval
);
340 if (! profiler_cpu_running
)
341 error ("Invalid sampling interval");
346 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop
, Sprofiler_cpu_stop
,
348 doc
: /* Stop the cpu profiler. The profiler log is not affected.
349 Return non-nil if the profiler was running. */)
352 switch (profiler_cpu_running
)
357 #ifdef HAVE_ITIMERSPEC
358 case TIMER_SETTIME_RUNNING
:
360 struct itimerspec disable
;
361 memset (&disable
, 0, sizeof disable
);
362 timer_settime (profiler_timer
, 0, &disable
, 0);
367 #ifdef HAVE_SETITIMER
368 case SETITIMER_RUNNING
:
370 struct itimerval disable
;
371 memset (&disable
, 0, sizeof disable
);
372 setitimer (ITIMER_PROF
, &disable
, 0);
378 signal (SIGPROF
, SIG_IGN
);
379 profiler_cpu_running
= NOT_RUNNING
;
383 DEFUN ("profiler-cpu-running-p",
384 Fprofiler_cpu_running_p
, Sprofiler_cpu_running_p
,
386 doc
: /* Return non-nil if cpu profiler is running. */)
389 return profiler_cpu_running
? Qt
: Qnil
;
392 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log
, Sprofiler_cpu_log
,
394 doc
: /* Return the current cpu profiler log.
395 The log is a hash-table mapping backtraces to counters which represent
396 the amount of time spent at those points. Every backtrace is a vector
397 of functions, where the last few elements may be nil.
398 Before returning, a new log is allocated for future samples. */)
401 Lisp_Object result
= cpu_log
;
402 /* Here we're making the log visible to Elisp, so it's not safe any
403 more for our use afterwards since we can't rely on its special
404 pre-allocated keys anymore. So we have to allocate a new one. */
405 cpu_log
= (profiler_cpu_running
406 ? make_log (profiler_log_size
, profiler_max_stack_depth
)
408 Fputhash (Fmake_vector (make_number (1), Qautomatic_gc
),
409 make_number (cpu_gc_count
),
414 #endif /* PROFILER_CPU_SUPPORT */
416 /* Memory profiler. */
418 /* True if memory profiler is running. */
419 bool profiler_memory_running
;
421 static Lisp_Object memory_log
;
423 DEFUN ("profiler-memory-start", Fprofiler_memory_start
, Sprofiler_memory_start
,
425 doc
: /* Start/restart the memory profiler.
426 The memory profiler will take samples of the call-stack whenever a new
427 allocation takes place. Note that most small allocations only trigger
428 the profiler occasionally.
429 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
432 if (profiler_memory_running
)
433 error ("Memory profiler is already running");
435 if (NILP (memory_log
))
436 memory_log
= make_log (profiler_log_size
,
437 profiler_max_stack_depth
);
439 profiler_memory_running
= true;
444 DEFUN ("profiler-memory-stop",
445 Fprofiler_memory_stop
, Sprofiler_memory_stop
,
447 doc
: /* Stop the memory profiler. The profiler log is not affected.
448 Return non-nil if the profiler was running. */)
451 if (!profiler_memory_running
)
453 profiler_memory_running
= false;
457 DEFUN ("profiler-memory-running-p",
458 Fprofiler_memory_running_p
, Sprofiler_memory_running_p
,
460 doc
: /* Return non-nil if memory profiler is running. */)
463 return profiler_memory_running
? Qt
: Qnil
;
466 DEFUN ("profiler-memory-log",
467 Fprofiler_memory_log
, Sprofiler_memory_log
,
469 doc
: /* Return the current memory profiler log.
470 The log is a hash-table mapping backtraces to counters which represent
471 the amount of memory allocated at those points. Every backtrace is a vector
472 of functions, where the last few elements may be nil.
473 Before returning, a new log is allocated for future samples. */)
476 Lisp_Object result
= memory_log
;
477 /* Here we're making the log visible to Elisp , so it's not safe any
478 more for our use afterwards since we can't rely on its special
479 pre-allocated keys anymore. So we have to allocate a new one. */
480 memory_log
= (profiler_memory_running
481 ? make_log (profiler_log_size
, profiler_max_stack_depth
)
487 /* Signals and probes. */
489 /* Record that the current backtrace allocated SIZE bytes. */
491 malloc_probe (size_t size
)
493 eassert (HASH_TABLE_P (memory_log
));
494 record_backtrace (XHASH_TABLE (memory_log
), min (size
, MOST_POSITIVE_FIXNUM
));
497 DEFUN ("function-equal", Ffunction_equal
, Sfunction_equal
, 2, 2, 0,
498 doc
: /* Return non-nil if F1 and F2 come from the same source.
499 Used to determine if different closures are just different instances of
500 the same lambda expression, or are really unrelated function. */)
501 (Lisp_Object f1
, Lisp_Object f2
)
506 else if (COMPILEDP (f1
) && COMPILEDP (f2
))
507 res
= EQ (AREF (f1
, COMPILED_BYTECODE
), AREF (f2
, COMPILED_BYTECODE
));
508 else if (CONSP (f1
) && CONSP (f2
) && CONSP (XCDR (f1
)) && CONSP (XCDR (f2
))
509 && EQ (Qclosure
, XCAR (f1
))
510 && EQ (Qclosure
, XCAR (f2
)))
511 res
= EQ (XCDR (XCDR (f1
)), XCDR (XCDR (f2
)));
514 return res
? Qt
: Qnil
;
518 cmpfn_profiler (struct hash_table_test
*t
,
519 Lisp_Object bt1
, Lisp_Object bt2
)
521 if (VECTORP (bt1
) && VECTORP (bt2
))
523 ptrdiff_t i
, l
= ASIZE (bt1
);
524 if (l
!= ASIZE (bt2
))
526 for (i
= 0; i
< l
; i
++)
527 if (NILP (Ffunction_equal (AREF (bt1
, i
), AREF (bt2
, i
))))
532 return EQ (bt1
, bt2
);
536 hashfn_profiler (struct hash_table_test
*ht
, Lisp_Object bt
)
541 ptrdiff_t i
, l
= ASIZE (bt
);
542 for (i
= 0; i
< l
; i
++)
544 Lisp_Object f
= AREF (bt
, i
);
546 = (COMPILEDP (f
) ? XHASH (AREF (f
, COMPILED_BYTECODE
))
547 : (CONSP (f
) && CONSP (XCDR (f
)) && EQ (Qclosure
, XCAR (f
)))
548 ? XHASH (XCDR (XCDR (f
))) : XHASH (f
));
549 hash
= sxhash_combine (hash
, hash1
);
551 return SXHASH_REDUCE (hash
);
558 syms_of_profiler (void)
560 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth
,
561 doc
: /* Number of elements from the call-stack recorded in the log. */);
562 profiler_max_stack_depth
= 16;
563 DEFVAR_INT ("profiler-log-size", profiler_log_size
,
564 doc
: /* Number of distinct call-stacks that can be recorded in a profiler log.
565 If the log gets full, some of the least-seen call-stacks will be evicted
566 to make room for new entries. */);
567 profiler_log_size
= 10000;
569 DEFSYM (Qprofiler_backtrace_equal
, "profiler-backtrace-equal");
571 hashtest_profiler
.name
= Qprofiler_backtrace_equal
;
572 hashtest_profiler
.user_hash_function
= Qnil
;
573 hashtest_profiler
.user_cmp_function
= Qnil
;
574 hashtest_profiler
.cmpfn
= cmpfn_profiler
;
575 hashtest_profiler
.hashfn
= hashfn_profiler
;
577 defsubr (&Sfunction_equal
);
579 #ifdef PROFILER_CPU_SUPPORT
580 profiler_cpu_running
= NOT_RUNNING
;
582 staticpro (&cpu_log
);
583 defsubr (&Sprofiler_cpu_start
);
584 defsubr (&Sprofiler_cpu_stop
);
585 defsubr (&Sprofiler_cpu_running_p
);
586 defsubr (&Sprofiler_cpu_log
);
588 profiler_memory_running
= false;
590 staticpro (&memory_log
);
591 defsubr (&Sprofiler_memory_start
);
592 defsubr (&Sprofiler_memory_stop
);
593 defsubr (&Sprofiler_memory_running_p
);
594 defsubr (&Sprofiler_memory_log
);