Fix problems caught with --enable-gcc-warnings
[emacs.git] / src / profiler.c
blobefdb1d9fe14f74d0bfe8202cf1876af028581fef
1 /* Profiler implementation.
3 Copyright (C) 2012-2015 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 #include <config.h>
21 #include "lisp.h"
22 #include "syssignal.h"
23 #include "systime.h"
25 /* Return A + B, but return the maximum fixnum if the result would overflow.
26 Assume A and B are nonnegative and in fixnum range. */
28 static EMACS_INT
29 saturated_add (EMACS_INT a, EMACS_INT b)
31 return min (a + b, MOST_POSITIVE_FIXNUM);
34 /* Logs. */
36 typedef struct Lisp_Hash_Table log_t;
38 static struct hash_table_test hashtest_profiler;
40 static Lisp_Object
41 make_log (EMACS_INT heap_size, EMACS_INT max_stack_depth)
43 /* We use a standard Elisp hash-table object, but we use it in
44 a special way. This is OK as long as the object is not exposed
45 to Elisp, i.e. until it is returned by *-profiler-log, after which
46 it can't be used any more. */
47 Lisp_Object log = make_hash_table (hashtest_profiler,
48 make_number (heap_size),
49 make_float (DEFAULT_REHASH_SIZE),
50 make_float (DEFAULT_REHASH_THRESHOLD),
51 Qnil);
52 struct Lisp_Hash_Table *h = XHASH_TABLE (log);
54 /* What is special about our hash-tables is that the keys are pre-filled
55 with the vectors we'll put in them. */
56 ptrdiff_t i = ASIZE (h->key_and_value) >> 1;
57 while (i > 0)
58 set_hash_key_slot (h, --i,
59 Fmake_vector (make_number (max_stack_depth), Qnil));
60 return log;
63 /* Evict the least used half of the hash_table.
65 When the table is full, we have to evict someone.
66 The easiest and most efficient is to evict the value we're about to add
67 (i.e. once the table is full, stop sampling).
69 We could also pick the element with the lowest count and evict it,
70 but finding it is O(N) and for that amount of work we get very
71 little in return: for the next sample, this latest sample will have
72 count==1 and will hence be a prime candidate for eviction :-(
74 So instead, we take O(N) time to eliminate more or less half of the
75 entries (the half with the lowest counts). So we get an amortized
76 cost of O(1) and we get O(N) time for a new entry to grow larger
77 than the other least counts before a new round of eviction. */
79 static EMACS_INT approximate_median (log_t *log,
80 ptrdiff_t start, ptrdiff_t size)
82 eassert (size > 0);
83 if (size < 2)
84 return XINT (HASH_VALUE (log, start));
85 if (size < 3)
86 /* Not an actual median, but better for our application than
87 choosing either of the two numbers. */
88 return ((XINT (HASH_VALUE (log, start))
89 + XINT (HASH_VALUE (log, start + 1)))
90 / 2);
91 else
93 ptrdiff_t newsize = size / 3;
94 ptrdiff_t start2 = start + newsize;
95 EMACS_INT i1 = approximate_median (log, start, newsize);
96 EMACS_INT i2 = approximate_median (log, start2, newsize);
97 EMACS_INT i3 = approximate_median (log, start2 + newsize,
98 size - 2 * newsize);
99 return (i1 < i2
100 ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
101 : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
105 static void evict_lower_half (log_t *log)
107 ptrdiff_t size = ASIZE (log->key_and_value) / 2;
108 EMACS_INT median = approximate_median (log, 0, size);
109 ptrdiff_t i;
111 for (i = 0; i < size; i++)
112 /* Evict not only values smaller but also values equal to the median,
113 so as to make sure we evict something no matter what. */
114 if (XINT (HASH_VALUE (log, i)) <= median)
116 Lisp_Object key = HASH_KEY (log, i);
117 { /* FIXME: we could make this more efficient. */
118 Lisp_Object tmp;
119 XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */
120 Fremhash (key, tmp);
122 eassert (EQ (log->next_free, make_number (i)));
124 eassert (VECTORP (key));
125 for (ptrdiff_t j = 0; j < ASIZE (key); j++)
126 ASET (key, j, Qnil);
128 set_hash_key_slot (log, i, key);
132 /* Record the current backtrace in LOG. COUNT is the weight of this
133 current backtrace: interrupt counts for CPU, and the allocation
134 size for memory. */
136 static void
137 record_backtrace (log_t *log, EMACS_INT count)
139 Lisp_Object backtrace;
140 ptrdiff_t index;
142 if (!INTEGERP (log->next_free))
143 /* FIXME: transfer the evicted counts to a special entry rather
144 than dropping them on the floor. */
145 evict_lower_half (log);
146 index = XINT (log->next_free);
148 /* Get a "working memory" vector. */
149 backtrace = HASH_KEY (log, index);
150 get_backtrace (backtrace);
152 { /* We basically do a `gethash+puthash' here, except that we have to be
153 careful to avoid memory allocation since we're in a signal
154 handler, and we optimize the code to try and avoid computing the
155 hash+lookup twice. See fns.c:Fputhash for reference. */
156 EMACS_UINT hash;
157 ptrdiff_t j = hash_lookup (log, backtrace, &hash);
158 if (j >= 0)
160 EMACS_INT old_val = XINT (HASH_VALUE (log, j));
161 EMACS_INT new_val = saturated_add (old_val, count);
162 set_hash_value_slot (log, j, make_number (new_val));
164 else
165 { /* BEWARE! hash_put in general can allocate memory.
166 But currently it only does that if log->next_free is nil. */
167 eassert (!NILP (log->next_free));
168 ptrdiff_t j = hash_put (log, backtrace, make_number (count), hash);
169 /* Let's make sure we've put `backtrace' right where it
170 already was to start with. */
171 eassert (index == j);
173 /* FIXME: If the hash-table is almost full, we should set
174 some global flag so that some Elisp code can offload its
175 data elsewhere, so as to avoid the eviction code.
176 There are 2 ways to do that, AFAICT:
177 - Set a flag checked in QUIT, such that QUIT can then call
178 Fprofiler_cpu_log and stash the full log for later use.
179 - Set a flag check in post-gc-hook, so that Elisp code can call
180 profiler-cpu-log. That gives us more flexibility since that
181 Elisp code can then do all kinds of fun stuff like write
182 the log to disk. Or turn it right away into a call tree.
183 Of course, using Elisp is generally preferable, but it may
184 take longer until we get a chance to run the Elisp code, so
185 there's more risk that the table will get full before we
186 get there. */
191 /* Sampling profiler. */
193 #ifdef PROFILER_CPU_SUPPORT
195 /* The profiler timer and whether it was properly initialized, if
196 POSIX timers are available. */
197 #ifdef HAVE_ITIMERSPEC
198 static timer_t profiler_timer;
199 static bool profiler_timer_ok;
200 #endif
202 /* Status of sampling profiler. */
203 static enum profiler_cpu_running
204 { NOT_RUNNING, TIMER_SETTIME_RUNNING, SETITIMER_RUNNING }
205 profiler_cpu_running;
207 /* Hash-table log of CPU profiler. */
208 static Lisp_Object cpu_log;
210 /* Separate counter for the time spent in the GC. */
211 static EMACS_INT cpu_gc_count;
213 /* The current sampling interval in nanoseconds. */
214 static EMACS_INT current_sampling_interval;
216 /* Signal handler for sampling profiler. */
218 /* timer_getoverrun is not implemented on Cygwin, but the following
219 seems to be good enough for profiling. */
220 #ifdef CYGWIN
221 #define timer_getoverrun(x) 0
222 #endif
224 static void
225 handle_profiler_signal (int signal)
227 if (EQ (backtrace_top_function (), Qautomatic_gc))
228 /* Special case the time-count inside GC because the hash-table
229 code is not prepared to be used while the GC is running.
230 More specifically it uses ASIZE at many places where it does
231 not expect the ARRAY_MARK_FLAG to be set. We could try and
232 harden the hash-table code, but it doesn't seem worth the
233 effort. */
234 cpu_gc_count = saturated_add (cpu_gc_count, 1);
235 else
237 EMACS_INT count = 1;
238 #ifdef HAVE_ITIMERSPEC
239 if (profiler_timer_ok)
241 int overruns = timer_getoverrun (profiler_timer);
242 eassert (overruns >= 0);
243 count += overruns;
245 #endif
246 eassert (HASH_TABLE_P (cpu_log));
247 record_backtrace (XHASH_TABLE (cpu_log), count);
251 static void
252 deliver_profiler_signal (int signal)
254 deliver_process_signal (signal, handle_profiler_signal);
257 static int
258 setup_cpu_timer (Lisp_Object sampling_interval)
260 struct sigaction action;
261 struct itimerval timer;
262 struct timespec interval;
263 int billion = 1000000000;
265 if (! RANGED_INTEGERP (1, sampling_interval,
266 (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX / billion
267 ? ((EMACS_INT) TYPE_MAXIMUM (time_t) * billion
268 + (billion - 1))
269 : EMACS_INT_MAX)))
270 return -1;
272 current_sampling_interval = XINT (sampling_interval);
273 interval = make_timespec (current_sampling_interval / billion,
274 current_sampling_interval % billion);
275 emacs_sigaction_init (&action, deliver_profiler_signal);
276 sigaction (SIGPROF, &action, 0);
278 #ifdef HAVE_ITIMERSPEC
279 if (! profiler_timer_ok)
281 /* System clocks to try, in decreasing order of desirability. */
282 static clockid_t const system_clock[] = {
283 #ifdef CLOCK_THREAD_CPUTIME_ID
284 CLOCK_THREAD_CPUTIME_ID,
285 #endif
286 #ifdef CLOCK_PROCESS_CPUTIME_ID
287 CLOCK_PROCESS_CPUTIME_ID,
288 #endif
289 #ifdef CLOCK_MONOTONIC
290 CLOCK_MONOTONIC,
291 #endif
292 CLOCK_REALTIME
294 int i;
295 struct sigevent sigev;
296 sigev.sigev_value.sival_ptr = &profiler_timer;
297 sigev.sigev_signo = SIGPROF;
298 sigev.sigev_notify = SIGEV_SIGNAL;
300 for (i = 0; i < ARRAYELTS (system_clock); i++)
301 if (timer_create (system_clock[i], &sigev, &profiler_timer) == 0)
303 profiler_timer_ok = 1;
304 break;
308 if (profiler_timer_ok)
310 struct itimerspec ispec;
311 ispec.it_value = ispec.it_interval = interval;
312 if (timer_settime (profiler_timer, 0, &ispec, 0) == 0)
313 return TIMER_SETTIME_RUNNING;
315 #endif
317 #ifdef HAVE_SETITIMER
318 timer.it_value = timer.it_interval = make_timeval (interval);
319 if (setitimer (ITIMER_PROF, &timer, 0) == 0)
320 return SETITIMER_RUNNING;
321 #endif
323 return NOT_RUNNING;
326 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start,
327 1, 1, 0,
328 doc: /* Start or restart the cpu profiler.
329 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
330 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
331 (Lisp_Object sampling_interval)
333 if (profiler_cpu_running)
334 error ("CPU profiler is already running");
336 if (NILP (cpu_log))
338 cpu_gc_count = 0;
339 cpu_log = make_log (profiler_log_size,
340 profiler_max_stack_depth);
343 int status = setup_cpu_timer (sampling_interval);
344 if (status == -1)
346 profiler_cpu_running = NOT_RUNNING;
347 error ("Invalid sampling interval");
349 else
351 profiler_cpu_running = status;
352 if (! profiler_cpu_running)
353 error ("Unable to start profiler timer");
356 return Qt;
359 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop,
360 0, 0, 0,
361 doc: /* Stop the cpu profiler. The profiler log is not affected.
362 Return non-nil if the profiler was running. */)
363 (void)
365 switch (profiler_cpu_running)
367 case NOT_RUNNING:
368 return Qnil;
370 #ifdef HAVE_ITIMERSPEC
371 case TIMER_SETTIME_RUNNING:
373 struct itimerspec disable;
374 memset (&disable, 0, sizeof disable);
375 timer_settime (profiler_timer, 0, &disable, 0);
377 break;
378 #endif
380 #ifdef HAVE_SETITIMER
381 case SETITIMER_RUNNING:
383 struct itimerval disable;
384 memset (&disable, 0, sizeof disable);
385 setitimer (ITIMER_PROF, &disable, 0);
387 break;
388 #endif
391 signal (SIGPROF, SIG_IGN);
392 profiler_cpu_running = NOT_RUNNING;
393 return Qt;
396 DEFUN ("profiler-cpu-running-p",
397 Fprofiler_cpu_running_p, Sprofiler_cpu_running_p,
398 0, 0, 0,
399 doc: /* Return non-nil if cpu profiler is running. */)
400 (void)
402 return profiler_cpu_running ? Qt : Qnil;
405 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log,
406 0, 0, 0,
407 doc: /* Return the current cpu profiler log.
408 The log is a hash-table mapping backtraces to counters which represent
409 the amount of time spent at those points. Every backtrace is a vector
410 of functions, where the last few elements may be nil.
411 Before returning, a new log is allocated for future samples. */)
412 (void)
414 Lisp_Object result = cpu_log;
415 /* Here we're making the log visible to Elisp, so it's not safe any
416 more for our use afterwards since we can't rely on its special
417 pre-allocated keys anymore. So we have to allocate a new one. */
418 cpu_log = (profiler_cpu_running
419 ? make_log (profiler_log_size, profiler_max_stack_depth)
420 : Qnil);
421 Fputhash (Fmake_vector (make_number (1), Qautomatic_gc),
422 make_number (cpu_gc_count),
423 result);
424 cpu_gc_count = 0;
425 return result;
427 #endif /* PROFILER_CPU_SUPPORT */
429 /* Memory profiler. */
431 /* True if memory profiler is running. */
432 bool profiler_memory_running;
434 static Lisp_Object memory_log;
436 DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start,
437 0, 0, 0,
438 doc: /* Start/restart the memory profiler.
439 The memory profiler will take samples of the call-stack whenever a new
440 allocation takes place. Note that most small allocations only trigger
441 the profiler occasionally.
442 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
443 (void)
445 if (profiler_memory_running)
446 error ("Memory profiler is already running");
448 if (NILP (memory_log))
449 memory_log = make_log (profiler_log_size,
450 profiler_max_stack_depth);
452 profiler_memory_running = true;
454 return Qt;
457 DEFUN ("profiler-memory-stop",
458 Fprofiler_memory_stop, Sprofiler_memory_stop,
459 0, 0, 0,
460 doc: /* Stop the memory profiler. The profiler log is not affected.
461 Return non-nil if the profiler was running. */)
462 (void)
464 if (!profiler_memory_running)
465 return Qnil;
466 profiler_memory_running = false;
467 return Qt;
470 DEFUN ("profiler-memory-running-p",
471 Fprofiler_memory_running_p, Sprofiler_memory_running_p,
472 0, 0, 0,
473 doc: /* Return non-nil if memory profiler is running. */)
474 (void)
476 return profiler_memory_running ? Qt : Qnil;
479 DEFUN ("profiler-memory-log",
480 Fprofiler_memory_log, Sprofiler_memory_log,
481 0, 0, 0,
482 doc: /* Return the current memory profiler log.
483 The log is a hash-table mapping backtraces to counters which represent
484 the amount of memory allocated at those points. Every backtrace is a vector
485 of functions, where the last few elements may be nil.
486 Before returning, a new log is allocated for future samples. */)
487 (void)
489 Lisp_Object result = memory_log;
490 /* Here we're making the log visible to Elisp , so it's not safe any
491 more for our use afterwards since we can't rely on its special
492 pre-allocated keys anymore. So we have to allocate a new one. */
493 memory_log = (profiler_memory_running
494 ? make_log (profiler_log_size, profiler_max_stack_depth)
495 : Qnil);
496 return result;
500 /* Signals and probes. */
502 /* Record that the current backtrace allocated SIZE bytes. */
503 void
504 malloc_probe (size_t size)
506 eassert (HASH_TABLE_P (memory_log));
507 record_backtrace (XHASH_TABLE (memory_log), min (size, MOST_POSITIVE_FIXNUM));
510 DEFUN ("function-equal", Ffunction_equal, Sfunction_equal, 2, 2, 0,
511 doc: /* Return non-nil if F1 and F2 come from the same source.
512 Used to determine if different closures are just different instances of
513 the same lambda expression, or are really unrelated function. */)
514 (Lisp_Object f1, Lisp_Object f2)
516 bool res;
517 if (EQ (f1, f2))
518 res = true;
519 else if (COMPILEDP (f1) && COMPILEDP (f2))
520 res = EQ (AREF (f1, COMPILED_BYTECODE), AREF (f2, COMPILED_BYTECODE));
521 else if (CONSP (f1) && CONSP (f2) && CONSP (XCDR (f1)) && CONSP (XCDR (f2))
522 && EQ (Qclosure, XCAR (f1))
523 && EQ (Qclosure, XCAR (f2)))
524 res = EQ (XCDR (XCDR (f1)), XCDR (XCDR (f2)));
525 else
526 res = false;
527 return res ? Qt : Qnil;
530 static bool
531 cmpfn_profiler (struct hash_table_test *t,
532 Lisp_Object bt1, Lisp_Object bt2)
534 if (VECTORP (bt1) && VECTORP (bt2))
536 ptrdiff_t i, l = ASIZE (bt1);
537 if (l != ASIZE (bt2))
538 return false;
539 for (i = 0; i < l; i++)
540 if (NILP (Ffunction_equal (AREF (bt1, i), AREF (bt2, i))))
541 return false;
542 return true;
544 else
545 return EQ (bt1, bt2);
548 static EMACS_UINT
549 hashfn_profiler (struct hash_table_test *ht, Lisp_Object bt)
551 if (VECTORP (bt))
553 EMACS_UINT hash = 0;
554 ptrdiff_t i, l = ASIZE (bt);
555 for (i = 0; i < l; i++)
557 Lisp_Object f = AREF (bt, i);
558 EMACS_UINT hash1
559 = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE))
560 : (CONSP (f) && CONSP (XCDR (f)) && EQ (Qclosure, XCAR (f)))
561 ? XHASH (XCDR (XCDR (f))) : XHASH (f));
562 hash = sxhash_combine (hash, hash1);
564 return SXHASH_REDUCE (hash);
566 else
567 return XHASH (bt);
570 void
571 syms_of_profiler (void)
573 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
574 doc: /* Number of elements from the call-stack recorded in the log. */);
575 profiler_max_stack_depth = 16;
576 DEFVAR_INT ("profiler-log-size", profiler_log_size,
577 doc: /* Number of distinct call-stacks that can be recorded in a profiler log.
578 If the log gets full, some of the least-seen call-stacks will be evicted
579 to make room for new entries. */);
580 profiler_log_size = 10000;
582 DEFSYM (Qprofiler_backtrace_equal, "profiler-backtrace-equal");
584 hashtest_profiler.name = Qprofiler_backtrace_equal;
585 hashtest_profiler.user_hash_function = Qnil;
586 hashtest_profiler.user_cmp_function = Qnil;
587 hashtest_profiler.cmpfn = cmpfn_profiler;
588 hashtest_profiler.hashfn = hashfn_profiler;
590 defsubr (&Sfunction_equal);
592 #ifdef PROFILER_CPU_SUPPORT
593 profiler_cpu_running = NOT_RUNNING;
594 cpu_log = Qnil;
595 staticpro (&cpu_log);
596 defsubr (&Sprofiler_cpu_start);
597 defsubr (&Sprofiler_cpu_stop);
598 defsubr (&Sprofiler_cpu_running_p);
599 defsubr (&Sprofiler_cpu_log);
600 #endif
601 profiler_memory_running = false;
602 memory_log = Qnil;
603 staticpro (&memory_log);
604 defsubr (&Sprofiler_memory_start);
605 defsubr (&Sprofiler_memory_stop);
606 defsubr (&Sprofiler_memory_running_p);
607 defsubr (&Sprofiler_memory_log);