* NEWS: Fix the last change.
[emacs.git] / src / profiler.c
blob8b092dcc818b7b26b2a3ae15364f14efbf7efe32
1 /* Profiler implementation.
3 Copyright (C) 2012-2014 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 #include <config.h>
21 #include "lisp.h"
22 #include "syssignal.h"
23 #include "systime.h"
25 /* Return A + B, but return the maximum fixnum if the result would overflow.
26 Assume A and B are nonnegative and in fixnum range. */
28 static EMACS_INT
29 saturated_add (EMACS_INT a, EMACS_INT b)
31 return min (a + b, MOST_POSITIVE_FIXNUM);
34 /* Logs. */
36 typedef struct Lisp_Hash_Table log_t;
38 static Lisp_Object Qprofiler_backtrace_equal;
39 static struct hash_table_test hashtest_profiler;
41 static Lisp_Object
42 make_log (int heap_size, int max_stack_depth)
44 /* We use a standard Elisp hash-table object, but we use it in
45 a special way. This is OK as long as the object is not exposed
46 to Elisp, i.e. until it is returned by *-profiler-log, after which
47 it can't be used any more. */
48 Lisp_Object log = make_hash_table (hashtest_profiler,
49 make_number (heap_size),
50 make_float (DEFAULT_REHASH_SIZE),
51 make_float (DEFAULT_REHASH_THRESHOLD),
52 Qnil);
53 struct Lisp_Hash_Table *h = XHASH_TABLE (log);
55 /* What is special about our hash-tables is that the keys are pre-filled
56 with the vectors we'll put in them. */
57 int i = ASIZE (h->key_and_value) / 2;
58 while (i > 0)
59 set_hash_key_slot (h, --i,
60 Fmake_vector (make_number (max_stack_depth), Qnil));
61 return log;
64 /* Evict the least used half of the hash_table.
66 When the table is full, we have to evict someone.
67 The easiest and most efficient is to evict the value we're about to add
68 (i.e. once the table is full, stop sampling).
70 We could also pick the element with the lowest count and evict it,
71 but finding it is O(N) and for that amount of work we get very
72 little in return: for the next sample, this latest sample will have
73 count==1 and will hence be a prime candidate for eviction :-(
75 So instead, we take O(N) time to eliminate more or less half of the
76 entries (the half with the lowest counts). So we get an amortized
77 cost of O(1) and we get O(N) time for a new entry to grow larger
78 than the other least counts before a new round of eviction. */
80 static EMACS_INT approximate_median (log_t *log,
81 ptrdiff_t start, ptrdiff_t size)
83 eassert (size > 0);
84 if (size < 2)
85 return XINT (HASH_VALUE (log, start));
86 if (size < 3)
87 /* Not an actual median, but better for our application than
88 choosing either of the two numbers. */
89 return ((XINT (HASH_VALUE (log, start))
90 + XINT (HASH_VALUE (log, start + 1)))
91 / 2);
92 else
94 ptrdiff_t newsize = size / 3;
95 ptrdiff_t start2 = start + newsize;
96 EMACS_INT i1 = approximate_median (log, start, newsize);
97 EMACS_INT i2 = approximate_median (log, start2, newsize);
98 EMACS_INT i3 = approximate_median (log, start2 + newsize,
99 size - 2 * newsize);
100 return (i1 < i2
101 ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
102 : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
106 static void evict_lower_half (log_t *log)
108 ptrdiff_t size = ASIZE (log->key_and_value) / 2;
109 EMACS_INT median = approximate_median (log, 0, size);
110 ptrdiff_t i;
112 for (i = 0; i < size; i++)
113 /* Evict not only values smaller but also values equal to the median,
114 so as to make sure we evict something no matter what. */
115 if (XINT (HASH_VALUE (log, i)) <= median)
117 Lisp_Object key = HASH_KEY (log, i);
118 { /* FIXME: we could make this more efficient. */
119 Lisp_Object tmp;
120 XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */
121 Fremhash (key, tmp);
123 eassert (EQ (log->next_free, make_number (i)));
125 int j;
126 eassert (VECTORP (key));
127 for (j = 0; j < ASIZE (key); j++)
128 ASET (key, j, Qnil);
130 set_hash_key_slot (log, i, key);
134 /* Record the current backtrace in LOG. COUNT is the weight of this
135 current backtrace: interrupt counts for CPU, and the allocation
136 size for memory. */
138 static void
139 record_backtrace (log_t *log, EMACS_INT count)
141 Lisp_Object backtrace;
142 ptrdiff_t index;
144 if (!INTEGERP (log->next_free))
145 /* FIXME: transfer the evicted counts to a special entry rather
146 than dropping them on the floor. */
147 evict_lower_half (log);
148 index = XINT (log->next_free);
150 /* Get a "working memory" vector. */
151 backtrace = HASH_KEY (log, index);
152 get_backtrace (backtrace);
154 { /* We basically do a `gethash+puthash' here, except that we have to be
155 careful to avoid memory allocation since we're in a signal
156 handler, and we optimize the code to try and avoid computing the
157 hash+lookup twice. See fns.c:Fputhash for reference. */
158 EMACS_UINT hash;
159 ptrdiff_t j = hash_lookup (log, backtrace, &hash);
160 if (j >= 0)
162 EMACS_INT old_val = XINT (HASH_VALUE (log, j));
163 EMACS_INT new_val = saturated_add (old_val, count);
164 set_hash_value_slot (log, j, make_number (new_val));
166 else
167 { /* BEWARE! hash_put in general can allocate memory.
168 But currently it only does that if log->next_free is nil. */
169 int j;
170 eassert (!NILP (log->next_free));
171 j = hash_put (log, backtrace, make_number (count), hash);
172 /* Let's make sure we've put `backtrace' right where it
173 already was to start with. */
174 eassert (index == j);
176 /* FIXME: If the hash-table is almost full, we should set
177 some global flag so that some Elisp code can offload its
178 data elsewhere, so as to avoid the eviction code.
179 There are 2 ways to do that, AFAICT:
180 - Set a flag checked in QUIT, such that QUIT can then call
181 Fprofiler_cpu_log and stash the full log for later use.
182 - Set a flag check in post-gc-hook, so that Elisp code can call
183 profiler-cpu-log. That gives us more flexibility since that
184 Elisp code can then do all kinds of fun stuff like write
185 the log to disk. Or turn it right away into a call tree.
186 Of course, using Elisp is generally preferable, but it may
187 take longer until we get a chance to run the Elisp code, so
188 there's more risk that the table will get full before we
189 get there. */
194 /* Sampling profiler. */
196 #ifdef PROFILER_CPU_SUPPORT
198 /* The profiler timer and whether it was properly initialized, if
199 POSIX timers are available. */
200 #ifdef HAVE_ITIMERSPEC
201 static timer_t profiler_timer;
202 static bool profiler_timer_ok;
203 #endif
205 /* Status of sampling profiler. */
206 static enum profiler_cpu_running
207 { NOT_RUNNING, TIMER_SETTIME_RUNNING, SETITIMER_RUNNING }
208 profiler_cpu_running;
210 /* Hash-table log of CPU profiler. */
211 static Lisp_Object cpu_log;
213 /* Separate counter for the time spent in the GC. */
214 static EMACS_INT cpu_gc_count;
216 /* The current sampling interval in nanoseconds. */
217 static EMACS_INT current_sampling_interval;
219 /* Signal handler for sampling profiler. */
221 static void
222 handle_profiler_signal (int signal)
224 if (EQ (backtrace_top_function (), Qautomatic_gc))
225 /* Special case the time-count inside GC because the hash-table
226 code is not prepared to be used while the GC is running.
227 More specifically it uses ASIZE at many places where it does
228 not expect the ARRAY_MARK_FLAG to be set. We could try and
229 harden the hash-table code, but it doesn't seem worth the
230 effort. */
231 cpu_gc_count = saturated_add (cpu_gc_count, 1);
232 else
234 EMACS_INT count = 1;
235 #ifdef HAVE_ITIMERSPEC
236 if (profiler_timer_ok)
238 int overruns = timer_getoverrun (profiler_timer);
239 eassert (overruns >= 0);
240 count += overruns;
242 #endif
243 eassert (HASH_TABLE_P (cpu_log));
244 record_backtrace (XHASH_TABLE (cpu_log), count);
248 static void
249 deliver_profiler_signal (int signal)
251 deliver_process_signal (signal, handle_profiler_signal);
254 static enum profiler_cpu_running
255 setup_cpu_timer (Lisp_Object sampling_interval)
257 struct sigaction action;
258 struct itimerval timer;
259 struct timespec interval;
260 int billion = 1000000000;
262 if (! RANGED_INTEGERP (1, sampling_interval,
263 (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX / billion
264 ? ((EMACS_INT) TYPE_MAXIMUM (time_t) * billion
265 + (billion - 1))
266 : EMACS_INT_MAX)))
267 return NOT_RUNNING;
269 current_sampling_interval = XINT (sampling_interval);
270 interval = make_timespec (current_sampling_interval / billion,
271 current_sampling_interval % billion);
272 emacs_sigaction_init (&action, deliver_profiler_signal);
273 sigaction (SIGPROF, &action, 0);
275 #ifdef HAVE_ITIMERSPEC
276 if (! profiler_timer_ok)
278 /* System clocks to try, in decreasing order of desirability. */
279 static clockid_t const system_clock[] = {
280 #ifdef CLOCK_THREAD_CPUTIME_ID
281 CLOCK_THREAD_CPUTIME_ID,
282 #endif
283 #ifdef CLOCK_PROCESS_CPUTIME_ID
284 CLOCK_PROCESS_CPUTIME_ID,
285 #endif
286 #ifdef CLOCK_MONOTONIC
287 CLOCK_MONOTONIC,
288 #endif
289 CLOCK_REALTIME
291 int i;
292 struct sigevent sigev;
293 sigev.sigev_value.sival_ptr = &profiler_timer;
294 sigev.sigev_signo = SIGPROF;
295 sigev.sigev_notify = SIGEV_SIGNAL;
297 for (i = 0; i < sizeof system_clock / sizeof *system_clock; i++)
298 if (timer_create (system_clock[i], &sigev, &profiler_timer) == 0)
300 profiler_timer_ok = 1;
301 break;
305 if (profiler_timer_ok)
307 struct itimerspec ispec;
308 ispec.it_value = ispec.it_interval = interval;
309 if (timer_settime (profiler_timer, 0, &ispec, 0) == 0)
310 return TIMER_SETTIME_RUNNING;
312 #endif
314 #ifdef HAVE_SETITIMER
315 timer.it_value = timer.it_interval = make_timeval (interval);
316 if (setitimer (ITIMER_PROF, &timer, 0) == 0)
317 return SETITIMER_RUNNING;
318 #endif
320 return NOT_RUNNING;
323 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start,
324 1, 1, 0,
325 doc: /* Start or restart the cpu profiler.
326 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
327 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
328 (Lisp_Object sampling_interval)
330 if (profiler_cpu_running)
331 error ("CPU profiler is already running");
333 if (NILP (cpu_log))
335 cpu_gc_count = 0;
336 cpu_log = make_log (profiler_log_size,
337 profiler_max_stack_depth);
340 profiler_cpu_running = setup_cpu_timer (sampling_interval);
341 if (! profiler_cpu_running)
342 error ("Invalid sampling interval");
344 return Qt;
347 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop,
348 0, 0, 0,
349 doc: /* Stop the cpu profiler. The profiler log is not affected.
350 Return non-nil if the profiler was running. */)
351 (void)
353 switch (profiler_cpu_running)
355 case NOT_RUNNING:
356 return Qnil;
358 #ifdef HAVE_ITIMERSPEC
359 case TIMER_SETTIME_RUNNING:
361 struct itimerspec disable;
362 memset (&disable, 0, sizeof disable);
363 timer_settime (profiler_timer, 0, &disable, 0);
365 break;
366 #endif
368 #ifdef HAVE_SETITIMER
369 case SETITIMER_RUNNING:
371 struct itimerval disable;
372 memset (&disable, 0, sizeof disable);
373 setitimer (ITIMER_PROF, &disable, 0);
375 break;
376 #endif
379 signal (SIGPROF, SIG_IGN);
380 profiler_cpu_running = NOT_RUNNING;
381 return Qt;
384 DEFUN ("profiler-cpu-running-p",
385 Fprofiler_cpu_running_p, Sprofiler_cpu_running_p,
386 0, 0, 0,
387 doc: /* Return non-nil iff cpu profiler is running. */)
388 (void)
390 return profiler_cpu_running ? Qt : Qnil;
393 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log,
394 0, 0, 0,
395 doc: /* Return the current cpu profiler log.
396 The log is a hash-table mapping backtraces to counters which represent
397 the amount of time spent at those points. Every backtrace is a vector
398 of functions, where the last few elements may be nil.
399 Before returning, a new log is allocated for future samples. */)
400 (void)
402 Lisp_Object result = cpu_log;
403 /* Here we're making the log visible to Elisp, so it's not safe any
404 more for our use afterwards since we can't rely on its special
405 pre-allocated keys anymore. So we have to allocate a new one. */
406 cpu_log = (profiler_cpu_running
407 ? make_log (profiler_log_size, profiler_max_stack_depth)
408 : Qnil);
409 Fputhash (Fmake_vector (make_number (1), Qautomatic_gc),
410 make_number (cpu_gc_count),
411 result);
412 cpu_gc_count = 0;
413 return result;
415 #endif /* PROFILER_CPU_SUPPORT */
417 /* Memory profiler. */
419 /* True if memory profiler is running. */
420 bool profiler_memory_running;
422 static Lisp_Object memory_log;
424 DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start,
425 0, 0, 0,
426 doc: /* Start/restart the memory profiler.
427 The memory profiler will take samples of the call-stack whenever a new
428 allocation takes place. Note that most small allocations only trigger
429 the profiler occasionally.
430 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
431 (void)
433 if (profiler_memory_running)
434 error ("Memory profiler is already running");
436 if (NILP (memory_log))
437 memory_log = make_log (profiler_log_size,
438 profiler_max_stack_depth);
440 profiler_memory_running = true;
442 return Qt;
445 DEFUN ("profiler-memory-stop",
446 Fprofiler_memory_stop, Sprofiler_memory_stop,
447 0, 0, 0,
448 doc: /* Stop the memory profiler. The profiler log is not affected.
449 Return non-nil if the profiler was running. */)
450 (void)
452 if (!profiler_memory_running)
453 return Qnil;
454 profiler_memory_running = false;
455 return Qt;
458 DEFUN ("profiler-memory-running-p",
459 Fprofiler_memory_running_p, Sprofiler_memory_running_p,
460 0, 0, 0,
461 doc: /* Return non-nil if memory profiler is running. */)
462 (void)
464 return profiler_memory_running ? Qt : Qnil;
467 DEFUN ("profiler-memory-log",
468 Fprofiler_memory_log, Sprofiler_memory_log,
469 0, 0, 0,
470 doc: /* Return the current memory profiler log.
471 The log is a hash-table mapping backtraces to counters which represent
472 the amount of memory allocated at those points. Every backtrace is a vector
473 of functions, where the last few elements may be nil.
474 Before returning, a new log is allocated for future samples. */)
475 (void)
477 Lisp_Object result = memory_log;
478 /* Here we're making the log visible to Elisp , so it's not safe any
479 more for our use afterwards since we can't rely on its special
480 pre-allocated keys anymore. So we have to allocate a new one. */
481 memory_log = (profiler_memory_running
482 ? make_log (profiler_log_size, profiler_max_stack_depth)
483 : Qnil);
484 return result;
488 /* Signals and probes. */
490 /* Record that the current backtrace allocated SIZE bytes. */
491 void
492 malloc_probe (size_t size)
494 eassert (HASH_TABLE_P (memory_log));
495 record_backtrace (XHASH_TABLE (memory_log), min (size, MOST_POSITIVE_FIXNUM));
498 DEFUN ("function-equal", Ffunction_equal, Sfunction_equal, 2, 2, 0,
499 doc: /* Return non-nil if F1 and F2 come from the same source.
500 Used to determine if different closures are just different instances of
501 the same lambda expression, or are really unrelated function. */)
502 (Lisp_Object f1, Lisp_Object f2)
504 bool res;
505 if (EQ (f1, f2))
506 res = true;
507 else if (COMPILEDP (f1) && COMPILEDP (f2))
508 res = EQ (AREF (f1, COMPILED_BYTECODE), AREF (f2, COMPILED_BYTECODE));
509 else if (CONSP (f1) && CONSP (f2) && CONSP (XCDR (f1)) && CONSP (XCDR (f2))
510 && EQ (Qclosure, XCAR (f1))
511 && EQ (Qclosure, XCAR (f2)))
512 res = EQ (XCDR (XCDR (f1)), XCDR (XCDR (f2)));
513 else
514 res = false;
515 return res ? Qt : Qnil;
518 static bool
519 cmpfn_profiler (struct hash_table_test *t,
520 Lisp_Object bt1, Lisp_Object bt2)
522 if (VECTORP (bt1) && VECTORP (bt2))
524 ptrdiff_t i, l = ASIZE (bt1);
525 if (l != ASIZE (bt2))
526 return false;
527 for (i = 0; i < l; i++)
528 if (NILP (Ffunction_equal (AREF (bt1, i), AREF (bt2, i))))
529 return false;
530 return true;
532 else
533 return EQ (bt1, bt2);
536 static EMACS_UINT
537 hashfn_profiler (struct hash_table_test *ht, Lisp_Object bt)
539 if (VECTORP (bt))
541 EMACS_UINT hash = 0;
542 ptrdiff_t i, l = ASIZE (bt);
543 for (i = 0; i < l; i++)
545 Lisp_Object f = AREF (bt, i);
546 EMACS_UINT hash1
547 = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE))
548 : (CONSP (f) && CONSP (XCDR (f)) && EQ (Qclosure, XCAR (f)))
549 ? XHASH (XCDR (XCDR (f))) : XHASH (f));
550 hash = sxhash_combine (hash, hash1);
552 return SXHASH_REDUCE (hash);
554 else
555 return XHASH (bt);
558 void
559 syms_of_profiler (void)
561 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
562 doc: /* Number of elements from the call-stack recorded in the log. */);
563 profiler_max_stack_depth = 16;
564 DEFVAR_INT ("profiler-log-size", profiler_log_size,
565 doc: /* Number of distinct call-stacks that can be recorded in a profiler log.
566 If the log gets full, some of the least-seen call-stacks will be evicted
567 to make room for new entries. */);
568 profiler_log_size = 10000;
570 DEFSYM (Qprofiler_backtrace_equal, "profiler-backtrace-equal");
572 hashtest_profiler.name = Qprofiler_backtrace_equal;
573 hashtest_profiler.user_hash_function = Qnil;
574 hashtest_profiler.user_cmp_function = Qnil;
575 hashtest_profiler.cmpfn = cmpfn_profiler;
576 hashtest_profiler.hashfn = hashfn_profiler;
578 defsubr (&Sfunction_equal);
580 #ifdef PROFILER_CPU_SUPPORT
581 profiler_cpu_running = NOT_RUNNING;
582 cpu_log = Qnil;
583 staticpro (&cpu_log);
584 defsubr (&Sprofiler_cpu_start);
585 defsubr (&Sprofiler_cpu_stop);
586 defsubr (&Sprofiler_cpu_running_p);
587 defsubr (&Sprofiler_cpu_log);
588 #endif
589 profiler_memory_running = false;
590 memory_log = Qnil;
591 staticpro (&memory_log);
592 defsubr (&Sprofiler_memory_start);
593 defsubr (&Sprofiler_memory_stop);
594 defsubr (&Sprofiler_memory_running_p);
595 defsubr (&Sprofiler_memory_log);