Fix unlikely overflows with wd length
[emacs.git] / src / profiler.c
blobd9d7d0b1c7f7bea4c8bce5a3e99562cbb47671fc
1 /* Profiler implementation.
3 Copyright (C) 2012-2017 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
20 #include <config.h>
21 #include "lisp.h"
22 #include "syssignal.h"
23 #include "systime.h"
25 /* Return A + B, but return the maximum fixnum if the result would overflow.
26 Assume A and B are nonnegative and in fixnum range. */
28 static EMACS_INT
29 saturated_add (EMACS_INT a, EMACS_INT b)
31 return min (a + b, MOST_POSITIVE_FIXNUM);
34 /* Logs. */
36 typedef struct Lisp_Hash_Table log_t;
38 static struct hash_table_test hashtest_profiler;
40 static Lisp_Object
41 make_log (EMACS_INT heap_size, EMACS_INT max_stack_depth)
43 /* We use a standard Elisp hash-table object, but we use it in
44 a special way. This is OK as long as the object is not exposed
45 to Elisp, i.e. until it is returned by *-profiler-log, after which
46 it can't be used any more. */
47 Lisp_Object log = make_hash_table (hashtest_profiler, heap_size,
48 DEFAULT_REHASH_SIZE,
49 DEFAULT_REHASH_THRESHOLD,
50 Qnil, false);
51 struct Lisp_Hash_Table *h = XHASH_TABLE (log);
53 /* What is special about our hash-tables is that the keys are pre-filled
54 with the vectors we'll put in them. */
55 ptrdiff_t i = ASIZE (h->key_and_value) >> 1;
56 while (i > 0)
57 set_hash_key_slot (h, --i,
58 Fmake_vector (make_number (max_stack_depth), Qnil));
59 return log;
62 /* Evict the least used half of the hash_table.
64 When the table is full, we have to evict someone.
65 The easiest and most efficient is to evict the value we're about to add
66 (i.e. once the table is full, stop sampling).
68 We could also pick the element with the lowest count and evict it,
69 but finding it is O(N) and for that amount of work we get very
70 little in return: for the next sample, this latest sample will have
71 count==1 and will hence be a prime candidate for eviction :-(
73 So instead, we take O(N) time to eliminate more or less half of the
74 entries (the half with the lowest counts). So we get an amortized
75 cost of O(1) and we get O(N) time for a new entry to grow larger
76 than the other least counts before a new round of eviction. */
78 static EMACS_INT approximate_median (log_t *log,
79 ptrdiff_t start, ptrdiff_t size)
81 eassert (size > 0);
82 if (size < 2)
83 return XINT (HASH_VALUE (log, start));
84 if (size < 3)
85 /* Not an actual median, but better for our application than
86 choosing either of the two numbers. */
87 return ((XINT (HASH_VALUE (log, start))
88 + XINT (HASH_VALUE (log, start + 1)))
89 / 2);
90 else
92 ptrdiff_t newsize = size / 3;
93 ptrdiff_t start2 = start + newsize;
94 EMACS_INT i1 = approximate_median (log, start, newsize);
95 EMACS_INT i2 = approximate_median (log, start2, newsize);
96 EMACS_INT i3 = approximate_median (log, start2 + newsize,
97 size - 2 * newsize);
98 return (i1 < i2
99 ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
100 : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
104 static void evict_lower_half (log_t *log)
106 ptrdiff_t size = ASIZE (log->key_and_value) / 2;
107 EMACS_INT median = approximate_median (log, 0, size);
108 ptrdiff_t i;
110 for (i = 0; i < size; i++)
111 /* Evict not only values smaller but also values equal to the median,
112 so as to make sure we evict something no matter what. */
113 if (XINT (HASH_VALUE (log, i)) <= median)
115 Lisp_Object key = HASH_KEY (log, i);
116 { /* FIXME: we could make this more efficient. */
117 Lisp_Object tmp;
118 XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */
119 Fremhash (key, tmp);
121 eassert (log->next_free == i);
123 eassert (VECTORP (key));
124 for (ptrdiff_t j = 0; j < ASIZE (key); j++)
125 ASET (key, j, Qnil);
127 set_hash_key_slot (log, i, key);
131 /* Record the current backtrace in LOG. COUNT is the weight of this
132 current backtrace: interrupt counts for CPU, and the allocation
133 size for memory. */
135 static void
136 record_backtrace (log_t *log, EMACS_INT count)
138 Lisp_Object backtrace;
139 ptrdiff_t index;
141 if (log->next_free < 0)
142 /* FIXME: transfer the evicted counts to a special entry rather
143 than dropping them on the floor. */
144 evict_lower_half (log);
145 index = log->next_free;
147 /* Get a "working memory" vector. */
148 backtrace = HASH_KEY (log, index);
149 get_backtrace (backtrace);
151 { /* We basically do a `gethash+puthash' here, except that we have to be
152 careful to avoid memory allocation since we're in a signal
153 handler, and we optimize the code to try and avoid computing the
154 hash+lookup twice. See fns.c:Fputhash for reference. */
155 EMACS_UINT hash;
156 ptrdiff_t j = hash_lookup (log, backtrace, &hash);
157 if (j >= 0)
159 EMACS_INT old_val = XINT (HASH_VALUE (log, j));
160 EMACS_INT new_val = saturated_add (old_val, count);
161 set_hash_value_slot (log, j, make_number (new_val));
163 else
164 { /* BEWARE! hash_put in general can allocate memory.
165 But currently it only does that if log->next_free is -1. */
166 eassert (0 <= log->next_free);
167 ptrdiff_t j = hash_put (log, backtrace, make_number (count), hash);
168 /* Let's make sure we've put `backtrace' right where it
169 already was to start with. */
170 eassert (index == j);
172 /* FIXME: If the hash-table is almost full, we should set
173 some global flag so that some Elisp code can offload its
174 data elsewhere, so as to avoid the eviction code.
175 There are 2 ways to do that, AFAICT:
176 - Set a flag checked in maybe_quit, such that maybe_quit can then
177 call Fprofiler_cpu_log and stash the full log for later use.
178 - Set a flag check in post-gc-hook, so that Elisp code can call
179 profiler-cpu-log. That gives us more flexibility since that
180 Elisp code can then do all kinds of fun stuff like write
181 the log to disk. Or turn it right away into a call tree.
182 Of course, using Elisp is generally preferable, but it may
183 take longer until we get a chance to run the Elisp code, so
184 there's more risk that the table will get full before we
185 get there. */
190 /* Sampling profiler. */
192 #ifdef PROFILER_CPU_SUPPORT
194 /* The profiler timer and whether it was properly initialized, if
195 POSIX timers are available. */
196 #ifdef HAVE_ITIMERSPEC
197 static timer_t profiler_timer;
198 static bool profiler_timer_ok;
199 #endif
201 /* Status of sampling profiler. */
202 static enum profiler_cpu_running
203 { NOT_RUNNING,
204 #ifdef HAVE_ITIMERSPEC
205 TIMER_SETTIME_RUNNING,
206 #endif
207 SETITIMER_RUNNING
209 profiler_cpu_running;
211 /* Hash-table log of CPU profiler. */
212 static Lisp_Object cpu_log;
214 /* Separate counter for the time spent in the GC. */
215 static EMACS_INT cpu_gc_count;
217 /* The current sampling interval in nanoseconds. */
218 static EMACS_INT current_sampling_interval;
220 /* Signal handler for sampling profiler. */
222 /* timer_getoverrun is not implemented on Cygwin, but the following
223 seems to be good enough for profiling. */
224 #ifdef CYGWIN
225 #define timer_getoverrun(x) 0
226 #endif
228 static void
229 handle_profiler_signal (int signal)
231 if (EQ (backtrace_top_function (), QAutomatic_GC))
232 /* Special case the time-count inside GC because the hash-table
233 code is not prepared to be used while the GC is running.
234 More specifically it uses ASIZE at many places where it does
235 not expect the ARRAY_MARK_FLAG to be set. We could try and
236 harden the hash-table code, but it doesn't seem worth the
237 effort. */
238 cpu_gc_count = saturated_add (cpu_gc_count, 1);
239 else
241 EMACS_INT count = 1;
242 #ifdef HAVE_ITIMERSPEC
243 if (profiler_timer_ok)
245 int overruns = timer_getoverrun (profiler_timer);
246 eassert (overruns >= 0);
247 count += overruns;
249 #endif
250 eassert (HASH_TABLE_P (cpu_log));
251 record_backtrace (XHASH_TABLE (cpu_log), count);
255 static void
256 deliver_profiler_signal (int signal)
258 deliver_process_signal (signal, handle_profiler_signal);
261 static int
262 setup_cpu_timer (Lisp_Object sampling_interval)
264 struct sigaction action;
265 struct itimerval timer;
266 struct timespec interval;
267 int billion = 1000000000;
269 if (! RANGED_INTEGERP (1, sampling_interval,
270 (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX / billion
271 ? ((EMACS_INT) TYPE_MAXIMUM (time_t) * billion
272 + (billion - 1))
273 : EMACS_INT_MAX)))
274 return -1;
276 current_sampling_interval = XINT (sampling_interval);
277 interval = make_timespec (current_sampling_interval / billion,
278 current_sampling_interval % billion);
279 emacs_sigaction_init (&action, deliver_profiler_signal);
280 sigaction (SIGPROF, &action, 0);
282 #ifdef HAVE_ITIMERSPEC
283 if (! profiler_timer_ok)
285 /* System clocks to try, in decreasing order of desirability. */
286 static clockid_t const system_clock[] = {
287 #ifdef CLOCK_THREAD_CPUTIME_ID
288 CLOCK_THREAD_CPUTIME_ID,
289 #endif
290 #ifdef CLOCK_PROCESS_CPUTIME_ID
291 CLOCK_PROCESS_CPUTIME_ID,
292 #endif
293 #ifdef CLOCK_MONOTONIC
294 CLOCK_MONOTONIC,
295 #endif
296 CLOCK_REALTIME
298 int i;
299 struct sigevent sigev;
300 sigev.sigev_value.sival_ptr = &profiler_timer;
301 sigev.sigev_signo = SIGPROF;
302 sigev.sigev_notify = SIGEV_SIGNAL;
304 for (i = 0; i < ARRAYELTS (system_clock); i++)
305 if (timer_create (system_clock[i], &sigev, &profiler_timer) == 0)
307 profiler_timer_ok = 1;
308 break;
312 if (profiler_timer_ok)
314 struct itimerspec ispec;
315 ispec.it_value = ispec.it_interval = interval;
316 if (timer_settime (profiler_timer, 0, &ispec, 0) == 0)
317 return TIMER_SETTIME_RUNNING;
319 #endif
321 #ifdef HAVE_SETITIMER
322 timer.it_value = timer.it_interval = make_timeval (interval);
323 if (setitimer (ITIMER_PROF, &timer, 0) == 0)
324 return SETITIMER_RUNNING;
325 #endif
327 return NOT_RUNNING;
330 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start,
331 1, 1, 0,
332 doc: /* Start or restart the cpu profiler.
333 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
334 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
335 (Lisp_Object sampling_interval)
337 if (profiler_cpu_running)
338 error ("CPU profiler is already running");
340 if (NILP (cpu_log))
342 cpu_gc_count = 0;
343 cpu_log = make_log (profiler_log_size,
344 profiler_max_stack_depth);
347 int status = setup_cpu_timer (sampling_interval);
348 if (status == -1)
350 profiler_cpu_running = NOT_RUNNING;
351 error ("Invalid sampling interval");
353 else
355 profiler_cpu_running = status;
356 if (! profiler_cpu_running)
357 error ("Unable to start profiler timer");
360 return Qt;
363 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop,
364 0, 0, 0,
365 doc: /* Stop the cpu profiler. The profiler log is not affected.
366 Return non-nil if the profiler was running. */)
367 (void)
369 switch (profiler_cpu_running)
371 case NOT_RUNNING:
372 return Qnil;
374 #ifdef HAVE_ITIMERSPEC
375 case TIMER_SETTIME_RUNNING:
377 struct itimerspec disable;
378 memset (&disable, 0, sizeof disable);
379 timer_settime (profiler_timer, 0, &disable, 0);
381 break;
382 #endif
384 #ifdef HAVE_SETITIMER
385 case SETITIMER_RUNNING:
387 struct itimerval disable;
388 memset (&disable, 0, sizeof disable);
389 setitimer (ITIMER_PROF, &disable, 0);
391 break;
392 #endif
395 signal (SIGPROF, SIG_IGN);
396 profiler_cpu_running = NOT_RUNNING;
397 return Qt;
400 DEFUN ("profiler-cpu-running-p",
401 Fprofiler_cpu_running_p, Sprofiler_cpu_running_p,
402 0, 0, 0,
403 doc: /* Return non-nil if cpu profiler is running. */)
404 (void)
406 return profiler_cpu_running ? Qt : Qnil;
409 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log,
410 0, 0, 0,
411 doc: /* Return the current cpu profiler log.
412 The log is a hash-table mapping backtraces to counters which represent
413 the amount of time spent at those points. Every backtrace is a vector
414 of functions, where the last few elements may be nil.
415 Before returning, a new log is allocated for future samples. */)
416 (void)
418 Lisp_Object result = cpu_log;
419 /* Here we're making the log visible to Elisp, so it's not safe any
420 more for our use afterwards since we can't rely on its special
421 pre-allocated keys anymore. So we have to allocate a new one. */
422 cpu_log = (profiler_cpu_running
423 ? make_log (profiler_log_size, profiler_max_stack_depth)
424 : Qnil);
425 Fputhash (Fmake_vector (make_number (1), QAutomatic_GC),
426 make_number (cpu_gc_count),
427 result);
428 cpu_gc_count = 0;
429 return result;
431 #endif /* PROFILER_CPU_SUPPORT */
433 /* Memory profiler. */
435 /* True if memory profiler is running. */
436 bool profiler_memory_running;
438 static Lisp_Object memory_log;
440 DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start,
441 0, 0, 0,
442 doc: /* Start/restart the memory profiler.
443 The memory profiler will take samples of the call-stack whenever a new
444 allocation takes place. Note that most small allocations only trigger
445 the profiler occasionally.
446 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
447 (void)
449 if (profiler_memory_running)
450 error ("Memory profiler is already running");
452 if (NILP (memory_log))
453 memory_log = make_log (profiler_log_size,
454 profiler_max_stack_depth);
456 profiler_memory_running = true;
458 return Qt;
461 DEFUN ("profiler-memory-stop",
462 Fprofiler_memory_stop, Sprofiler_memory_stop,
463 0, 0, 0,
464 doc: /* Stop the memory profiler. The profiler log is not affected.
465 Return non-nil if the profiler was running. */)
466 (void)
468 if (!profiler_memory_running)
469 return Qnil;
470 profiler_memory_running = false;
471 return Qt;
474 DEFUN ("profiler-memory-running-p",
475 Fprofiler_memory_running_p, Sprofiler_memory_running_p,
476 0, 0, 0,
477 doc: /* Return non-nil if memory profiler is running. */)
478 (void)
480 return profiler_memory_running ? Qt : Qnil;
483 DEFUN ("profiler-memory-log",
484 Fprofiler_memory_log, Sprofiler_memory_log,
485 0, 0, 0,
486 doc: /* Return the current memory profiler log.
487 The log is a hash-table mapping backtraces to counters which represent
488 the amount of memory allocated at those points. Every backtrace is a vector
489 of functions, where the last few elements may be nil.
490 Before returning, a new log is allocated for future samples. */)
491 (void)
493 Lisp_Object result = memory_log;
494 /* Here we're making the log visible to Elisp , so it's not safe any
495 more for our use afterwards since we can't rely on its special
496 pre-allocated keys anymore. So we have to allocate a new one. */
497 memory_log = (profiler_memory_running
498 ? make_log (profiler_log_size, profiler_max_stack_depth)
499 : Qnil);
500 return result;
504 /* Signals and probes. */
506 /* Record that the current backtrace allocated SIZE bytes. */
507 void
508 malloc_probe (size_t size)
510 eassert (HASH_TABLE_P (memory_log));
511 record_backtrace (XHASH_TABLE (memory_log), min (size, MOST_POSITIVE_FIXNUM));
514 DEFUN ("function-equal", Ffunction_equal, Sfunction_equal, 2, 2, 0,
515 doc: /* Return non-nil if F1 and F2 come from the same source.
516 Used to determine if different closures are just different instances of
517 the same lambda expression, or are really unrelated function. */)
518 (Lisp_Object f1, Lisp_Object f2)
520 bool res;
521 if (EQ (f1, f2))
522 res = true;
523 else if (COMPILEDP (f1) && COMPILEDP (f2))
524 res = EQ (AREF (f1, COMPILED_BYTECODE), AREF (f2, COMPILED_BYTECODE));
525 else if (CONSP (f1) && CONSP (f2) && CONSP (XCDR (f1)) && CONSP (XCDR (f2))
526 && EQ (Qclosure, XCAR (f1))
527 && EQ (Qclosure, XCAR (f2)))
528 res = EQ (XCDR (XCDR (f1)), XCDR (XCDR (f2)));
529 else
530 res = false;
531 return res ? Qt : Qnil;
534 static bool
535 cmpfn_profiler (struct hash_table_test *t,
536 Lisp_Object bt1, Lisp_Object bt2)
538 if (VECTORP (bt1) && VECTORP (bt2))
540 ptrdiff_t i, l = ASIZE (bt1);
541 if (l != ASIZE (bt2))
542 return false;
543 for (i = 0; i < l; i++)
544 if (NILP (Ffunction_equal (AREF (bt1, i), AREF (bt2, i))))
545 return false;
546 return true;
548 else
549 return EQ (bt1, bt2);
552 static EMACS_UINT
553 hashfn_profiler (struct hash_table_test *ht, Lisp_Object bt)
555 if (VECTORP (bt))
557 EMACS_UINT hash = 0;
558 ptrdiff_t i, l = ASIZE (bt);
559 for (i = 0; i < l; i++)
561 Lisp_Object f = AREF (bt, i);
562 EMACS_UINT hash1
563 = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE))
564 : (CONSP (f) && CONSP (XCDR (f)) && EQ (Qclosure, XCAR (f)))
565 ? XHASH (XCDR (XCDR (f))) : XHASH (f));
566 hash = sxhash_combine (hash, hash1);
568 return SXHASH_REDUCE (hash);
570 else
571 return XHASH (bt);
574 void
575 syms_of_profiler (void)
577 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
578 doc: /* Number of elements from the call-stack recorded in the log. */);
579 profiler_max_stack_depth = 16;
580 DEFVAR_INT ("profiler-log-size", profiler_log_size,
581 doc: /* Number of distinct call-stacks that can be recorded in a profiler log.
582 If the log gets full, some of the least-seen call-stacks will be evicted
583 to make room for new entries. */);
584 profiler_log_size = 10000;
586 DEFSYM (Qprofiler_backtrace_equal, "profiler-backtrace-equal");
588 hashtest_profiler.name = Qprofiler_backtrace_equal;
589 hashtest_profiler.user_hash_function = Qnil;
590 hashtest_profiler.user_cmp_function = Qnil;
591 hashtest_profiler.cmpfn = cmpfn_profiler;
592 hashtest_profiler.hashfn = hashfn_profiler;
594 defsubr (&Sfunction_equal);
596 #ifdef PROFILER_CPU_SUPPORT
597 profiler_cpu_running = NOT_RUNNING;
598 cpu_log = Qnil;
599 staticpro (&cpu_log);
600 defsubr (&Sprofiler_cpu_start);
601 defsubr (&Sprofiler_cpu_stop);
602 defsubr (&Sprofiler_cpu_running_p);
603 defsubr (&Sprofiler_cpu_log);
604 #endif
605 profiler_memory_running = false;
606 memory_log = Qnil;
607 staticpro (&memory_log);
608 defsubr (&Sprofiler_memory_start);
609 defsubr (&Sprofiler_memory_stop);
610 defsubr (&Sprofiler_memory_running_p);
611 defsubr (&Sprofiler_memory_log);