* lisp/emacs-lisp/bytecomp.el (byte-compile): Fix handling of closures.
[emacs.git] / src / profiler.c
blob3d8f7243d2f793a54b4dde4beaf76f3a9cae9dbd
1 /* Profiler implementation.
3 Copyright (C) 2012 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 #include <config.h>
21 #include "lisp.h"
22 #include "syssignal.h"
23 #include "systime.h"
25 /* Return A + B, but return the maximum fixnum if the result would overflow.
26 Assume A and B are nonnegative and in fixnum range. */
28 static EMACS_INT
29 saturated_add (EMACS_INT a, EMACS_INT b)
31 return min (a + b, MOST_POSITIVE_FIXNUM);
34 /* Logs. */
36 typedef struct Lisp_Hash_Table log_t;
38 static Lisp_Object Qprofiler_backtrace_equal;
39 static struct hash_table_test hashtest_profiler;
41 static Lisp_Object
42 make_log (int heap_size, int max_stack_depth)
44 /* We use a standard Elisp hash-table object, but we use it in
45 a special way. This is OK as long as the object is not exposed
46 to Elisp, i.e. until it is returned by *-profiler-log, after which
47 it can't be used any more. */
48 Lisp_Object log = make_hash_table (hashtest_profiler,
49 make_number (heap_size),
50 make_float (DEFAULT_REHASH_SIZE),
51 make_float (DEFAULT_REHASH_THRESHOLD),
52 Qnil);
53 struct Lisp_Hash_Table *h = XHASH_TABLE (log);
55 /* What is special about our hash-tables is that the keys are pre-filled
56 with the vectors we'll put in them. */
57 int i = ASIZE (h->key_and_value) / 2;
58 while (0 < i)
59 set_hash_key_slot (h, --i,
60 Fmake_vector (make_number (max_stack_depth), Qnil));
61 return log;
64 /* Evict the least used half of the hash_table.
66 When the table is full, we have to evict someone.
67 The easiest and most efficient is to evict the value we're about to add
68 (i.e. once the table is full, stop sampling).
70 We could also pick the element with the lowest count and evict it,
71 but finding it is O(N) and for that amount of work we get very
72 little in return: for the next sample, this latest sample will have
73 count==1 and will hence be a prime candidate for eviction :-(
75 So instead, we take O(N) time to eliminate more or less half of the
76 entries (the half with the lowest counts). So we get an amortized
77 cost of O(1) and we get O(N) time for a new entry to grow larger
78 than the other least counts before a new round of eviction. */
80 static EMACS_INT approximate_median (log_t *log,
81 ptrdiff_t start, ptrdiff_t size)
83 eassert (size > 0);
84 if (size < 2)
85 return XINT (HASH_VALUE (log, start));
86 if (size < 3)
87 /* Not an actual median, but better for our application than
88 choosing either of the two numbers. */
89 return ((XINT (HASH_VALUE (log, start))
90 + XINT (HASH_VALUE (log, start + 1)))
91 / 2);
92 else
94 ptrdiff_t newsize = size / 3;
95 ptrdiff_t start2 = start + newsize;
96 EMACS_INT i1 = approximate_median (log, start, newsize);
97 EMACS_INT i2 = approximate_median (log, start2, newsize);
98 EMACS_INT i3 = approximate_median (log, start2 + newsize,
99 size - 2 * newsize);
100 return (i1 < i2
101 ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
102 : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
106 static void evict_lower_half (log_t *log)
108 ptrdiff_t size = ASIZE (log->key_and_value) / 2;
109 EMACS_INT median = approximate_median (log, 0, size);
110 ptrdiff_t i;
112 for (i = 0; i < size; i++)
113 /* Evict not only values smaller but also values equal to the median,
114 so as to make sure we evict something no matter what. */
115 if (XINT (HASH_VALUE (log, i)) <= median)
117 Lisp_Object key = HASH_KEY (log, i);
118 { /* FIXME: we could make this more efficient. */
119 Lisp_Object tmp;
120 XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */
121 Fremhash (key, tmp);
123 eassert (EQ (log->next_free, make_number (i)));
125 int j;
126 eassert (VECTORP (key));
127 for (j = 0; j < ASIZE (key); j++)
128 ASET (key, j, Qnil);
130 set_hash_key_slot (log, i, key);
134 /* Record the current backtrace in LOG. COUNT is the weight of this
135 current backtrace: interrupt counts for CPU, and the allocation
136 size for memory. */
138 static void
139 record_backtrace (log_t *log, EMACS_INT count)
141 struct backtrace *backlist = backtrace_list;
142 Lisp_Object backtrace;
143 ptrdiff_t index, i = 0;
144 ptrdiff_t asize;
146 if (!INTEGERP (log->next_free))
147 /* FIXME: transfer the evicted counts to a special entry rather
148 than dropping them on the floor. */
149 evict_lower_half (log);
150 index = XINT (log->next_free);
152 /* Get a "working memory" vector. */
153 backtrace = HASH_KEY (log, index);
154 asize = ASIZE (backtrace);
156 /* Copy the backtrace contents into working memory. */
157 for (; i < asize && backlist; i++, backlist = backlist->next)
158 /* FIXME: For closures we should ignore the environment. */
159 ASET (backtrace, i, backlist->function);
161 /* Make sure that unused space of working memory is filled with nil. */
162 for (; i < asize; i++)
163 ASET (backtrace, i, Qnil);
165 { /* We basically do a `gethash+puthash' here, except that we have to be
166 careful to avoid memory allocation since we're in a signal
167 handler, and we optimize the code to try and avoid computing the
168 hash+lookup twice. See fns.c:Fputhash for reference. */
169 EMACS_UINT hash;
170 ptrdiff_t j = hash_lookup (log, backtrace, &hash);
171 if (j >= 0)
173 EMACS_INT old_val = XINT (HASH_VALUE (log, j));
174 EMACS_INT new_val = saturated_add (old_val, count);
175 set_hash_value_slot (log, j, make_number (new_val));
177 else
178 { /* BEWARE! hash_put in general can allocate memory.
179 But currently it only does that if log->next_free is nil. */
180 int j;
181 eassert (!NILP (log->next_free));
182 j = hash_put (log, backtrace, make_number (count), hash);
183 /* Let's make sure we've put `backtrace' right where it
184 already was to start with. */
185 eassert (index == j);
187 /* FIXME: If the hash-table is almost full, we should set
188 some global flag so that some Elisp code can offload its
189 data elsewhere, so as to avoid the eviction code.
190 There are 2 ways to do that, AFAICT:
191 - Set a flag checked in QUIT, such that QUIT can then call
192 Fprofiler_cpu_log and stash the full log for later use.
193 - Set a flag check in post-gc-hook, so that Elisp code can call
194 profiler-cpu-log. That gives us more flexibility since that
195 Elisp code can then do all kinds of fun stuff like write
196 the log to disk. Or turn it right away into a call tree.
197 Of course, using Elisp is generally preferable, but it may
198 take longer until we get a chance to run the Elisp code, so
199 there's more risk that the table will get full before we
200 get there. */
205 /* Sampling profiler. */
207 #ifdef PROFILER_CPU_SUPPORT
209 /* The profiler timer and whether it was properly initialized, if
210 POSIX timers are available. */
211 #ifdef HAVE_ITIMERSPEC
212 static timer_t profiler_timer;
213 static bool profiler_timer_ok;
214 #endif
216 /* Status of sampling profiler. */
217 static enum profiler_cpu_running
218 { NOT_RUNNING, TIMER_SETTIME_RUNNING, SETITIMER_RUNNING }
219 profiler_cpu_running;
221 /* Hash-table log of CPU profiler. */
222 static Lisp_Object cpu_log;
224 /* Separate counter for the time spent in the GC. */
225 static EMACS_INT cpu_gc_count;
227 /* The current sampling interval in nanoseconds. */
228 static EMACS_INT current_sampling_interval;
230 /* Signal handler for sampling profiler. */
232 static void
233 handle_profiler_signal (int signal)
235 if (backtrace_list && EQ (backtrace_list->function, Qautomatic_gc))
236 /* Special case the time-count inside GC because the hash-table
237 code is not prepared to be used while the GC is running.
238 More specifically it uses ASIZE at many places where it does
239 not expect the ARRAY_MARK_FLAG to be set. We could try and
240 harden the hash-table code, but it doesn't seem worth the
241 effort. */
242 cpu_gc_count = saturated_add (cpu_gc_count, 1);
243 else
245 EMACS_INT count = 1;
246 #ifdef HAVE_ITIMERSPEC
247 if (profiler_timer_ok)
249 int overruns = timer_getoverrun (profiler_timer);
250 eassert (0 <= overruns);
251 count += overruns;
253 #endif
254 eassert (HASH_TABLE_P (cpu_log));
255 record_backtrace (XHASH_TABLE (cpu_log), count);
259 static void
260 deliver_profiler_signal (int signal)
262 deliver_process_signal (signal, handle_profiler_signal);
265 static enum profiler_cpu_running
266 setup_cpu_timer (Lisp_Object sampling_interval)
268 struct sigaction action;
269 struct itimerval timer;
270 struct timespec interval;
271 int billion = 1000000000;
273 if (! RANGED_INTEGERP (1, sampling_interval,
274 (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX / billion
275 ? ((EMACS_INT) TYPE_MAXIMUM (time_t) * billion
276 + (billion - 1))
277 : EMACS_INT_MAX)))
278 return NOT_RUNNING;
280 current_sampling_interval = XINT (sampling_interval);
281 interval = make_emacs_time (current_sampling_interval / billion,
282 current_sampling_interval % billion);
283 emacs_sigaction_init (&action, deliver_profiler_signal);
284 sigaction (SIGPROF, &action, 0);
286 #ifdef HAVE_ITIMERSPEC
287 if (! profiler_timer_ok)
289 /* System clocks to try, in decreasing order of desirability. */
290 static clockid_t const system_clock[] = {
291 #ifdef CLOCK_THREAD_CPUTIME_ID
292 CLOCK_THREAD_CPUTIME_ID,
293 #endif
294 #ifdef CLOCK_PROCESS_CPUTIME_ID
295 CLOCK_PROCESS_CPUTIME_ID,
296 #endif
297 #ifdef CLOCK_MONOTONIC
298 CLOCK_MONOTONIC,
299 #endif
300 CLOCK_REALTIME
302 int i;
303 struct sigevent sigev;
304 sigev.sigev_value.sival_ptr = &profiler_timer;
305 sigev.sigev_signo = SIGPROF;
306 sigev.sigev_notify = SIGEV_SIGNAL;
308 for (i = 0; i < sizeof system_clock / sizeof *system_clock; i++)
309 if (timer_create (system_clock[i], &sigev, &profiler_timer) == 0)
311 profiler_timer_ok = 1;
312 break;
316 if (profiler_timer_ok)
318 struct itimerspec ispec;
319 ispec.it_value = ispec.it_interval = interval;
320 if (timer_settime (profiler_timer, 0, &ispec, 0) == 0)
321 return TIMER_SETTIME_RUNNING;
323 #endif
325 #ifdef HAVE_SETITIMER
326 timer.it_value = timer.it_interval = make_timeval (interval);
327 if (setitimer (ITIMER_PROF, &timer, 0) == 0)
328 return SETITIMER_RUNNING;
329 #endif
331 return NOT_RUNNING;
334 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start,
335 1, 1, 0,
336 doc: /* Start or restart the cpu profiler.
337 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
338 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
339 (Lisp_Object sampling_interval)
341 if (profiler_cpu_running)
342 error ("CPU profiler is already running");
344 if (NILP (cpu_log))
346 cpu_gc_count = 0;
347 cpu_log = make_log (profiler_log_size,
348 profiler_max_stack_depth);
351 profiler_cpu_running = setup_cpu_timer (sampling_interval);
352 if (! profiler_cpu_running)
353 error ("Invalid sampling interval");
355 return Qt;
358 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop,
359 0, 0, 0,
360 doc: /* Stop the cpu profiler. The profiler log is not affected.
361 Return non-nil if the profiler was running. */)
362 (void)
364 switch (profiler_cpu_running)
366 case NOT_RUNNING:
367 return Qnil;
369 #ifdef HAVE_ITIMERSPEC
370 case TIMER_SETTIME_RUNNING:
372 struct itimerspec disable;
373 memset (&disable, 0, sizeof disable);
374 timer_settime (profiler_timer, 0, &disable, 0);
376 break;
377 #endif
379 #ifdef HAVE_SETITIMER
380 case SETITIMER_RUNNING:
382 struct itimerval disable;
383 memset (&disable, 0, sizeof disable);
384 setitimer (ITIMER_PROF, &disable, 0);
386 break;
387 #endif
390 signal (SIGPROF, SIG_IGN);
391 profiler_cpu_running = NOT_RUNNING;
392 return Qt;
395 DEFUN ("profiler-cpu-running-p",
396 Fprofiler_cpu_running_p, Sprofiler_cpu_running_p,
397 0, 0, 0,
398 doc: /* Return non-nil iff cpu profiler is running. */)
399 (void)
401 return profiler_cpu_running ? Qt : Qnil;
404 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log,
405 0, 0, 0,
406 doc: /* Return the current cpu profiler log.
407 The log is a hash-table mapping backtraces to counters which represent
408 the amount of time spent at those points. Every backtrace is a vector
409 of functions, where the last few elements may be nil.
410 Before returning, a new log is allocated for future samples. */)
411 (void)
413 Lisp_Object result = cpu_log;
414 /* Here we're making the log visible to Elisp, so it's not safe any
415 more for our use afterwards since we can't rely on its special
416 pre-allocated keys anymore. So we have to allocate a new one. */
417 cpu_log = (profiler_cpu_running
418 ? make_log (profiler_log_size, profiler_max_stack_depth)
419 : Qnil);
420 Fputhash (Fmake_vector (make_number (1), Qautomatic_gc),
421 make_number (cpu_gc_count),
422 result);
423 cpu_gc_count = 0;
424 return result;
426 #endif /* PROFILER_CPU_SUPPORT */
428 /* Memory profiler. */
430 /* True if memory profiler is running. */
431 bool profiler_memory_running;
433 static Lisp_Object memory_log;
435 DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start,
436 0, 0, 0,
437 doc: /* Start/restart the memory profiler.
438 The memory profiler will take samples of the call-stack whenever a new
439 allocation takes place. Note that most small allocations only trigger
440 the profiler occasionally.
441 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
442 (void)
444 if (profiler_memory_running)
445 error ("Memory profiler is already running");
447 if (NILP (memory_log))
448 memory_log = make_log (profiler_log_size,
449 profiler_max_stack_depth);
451 profiler_memory_running = true;
453 return Qt;
456 DEFUN ("profiler-memory-stop",
457 Fprofiler_memory_stop, Sprofiler_memory_stop,
458 0, 0, 0,
459 doc: /* Stop the memory profiler. The profiler log is not affected.
460 Return non-nil if the profiler was running. */)
461 (void)
463 if (!profiler_memory_running)
464 return Qnil;
465 profiler_memory_running = false;
466 return Qt;
469 DEFUN ("profiler-memory-running-p",
470 Fprofiler_memory_running_p, Sprofiler_memory_running_p,
471 0, 0, 0,
472 doc: /* Return non-nil if memory profiler is running. */)
473 (void)
475 return profiler_memory_running ? Qt : Qnil;
478 DEFUN ("profiler-memory-log",
479 Fprofiler_memory_log, Sprofiler_memory_log,
480 0, 0, 0,
481 doc: /* Return the current memory profiler log.
482 The log is a hash-table mapping backtraces to counters which represent
483 the amount of memory allocated at those points. Every backtrace is a vector
484 of functions, where the last few elements may be nil.
485 Before returning, a new log is allocated for future samples. */)
486 (void)
488 Lisp_Object result = memory_log;
489 /* Here we're making the log visible to Elisp , so it's not safe any
490 more for our use afterwards since we can't rely on its special
491 pre-allocated keys anymore. So we have to allocate a new one. */
492 memory_log = (profiler_memory_running
493 ? make_log (profiler_log_size, profiler_max_stack_depth)
494 : Qnil);
495 return result;
499 /* Signals and probes. */
501 /* Record that the current backtrace allocated SIZE bytes. */
502 void
503 malloc_probe (size_t size)
505 eassert (HASH_TABLE_P (memory_log));
506 record_backtrace (XHASH_TABLE (memory_log), min (size, MOST_POSITIVE_FIXNUM));
509 DEFUN ("function-equal", Ffunction_equal, Sfunction_equal, 2, 2, 0,
510 doc: /* Return non-nil if F1 and F2 come from the same source.
511 Used to determine if different closures are just different instances of
512 the same lambda expression, or are really unrelated function. */)
513 (Lisp_Object f1, Lisp_Object f2)
515 bool res;
516 if (EQ (f1, f2))
517 res = true;
518 else if (COMPILEDP (f1) && COMPILEDP (f2))
519 res = EQ (AREF (f1, COMPILED_BYTECODE), AREF (f2, COMPILED_BYTECODE));
520 else if (CONSP (f1) && CONSP (f2) && CONSP (XCDR (f1)) && CONSP (XCDR (f2))
521 && EQ (Qclosure, XCAR (f1))
522 && EQ (Qclosure, XCAR (f2)))
523 res = EQ (XCDR (XCDR (f1)), XCDR (XCDR (f2)));
524 else
525 res = false;
526 return res ? Qt : Qnil;
529 static bool
530 cmpfn_profiler (struct hash_table_test *t,
531 Lisp_Object bt1, Lisp_Object bt2)
533 if (VECTORP (bt1) && VECTORP (bt2))
535 ptrdiff_t i, l = ASIZE (bt1);
536 if (l != ASIZE (bt2))
537 return false;
538 for (i = 0; i < l; i++)
539 if (NILP (Ffunction_equal (AREF (bt1, i), AREF (bt2, i))))
540 return false;
541 return true;
543 else
544 return EQ (bt1, bt2);
547 static EMACS_UINT
548 hashfn_profiler (struct hash_table_test *ht, Lisp_Object bt)
550 if (VECTORP (bt))
552 EMACS_UINT hash = 0;
553 ptrdiff_t i, l = ASIZE (bt);
554 for (i = 0; i < l; i++)
556 Lisp_Object f = AREF (bt, i);
557 EMACS_UINT hash1
558 = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE))
559 : (CONSP (f) && CONSP (XCDR (f)) && EQ (Qclosure, XCAR (f)))
560 ? XHASH (XCDR (XCDR (f))) : XHASH (f));
561 hash = sxhash_combine (hash, hash1);
563 return (hash & INTMASK);
565 else
566 return XHASH (bt);
569 void
570 syms_of_profiler (void)
572 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
573 doc: /* Number of elements from the call-stack recorded in the log. */);
574 profiler_max_stack_depth = 16;
575 DEFVAR_INT ("profiler-log-size", profiler_log_size,
576 doc: /* Number of distinct call-stacks that can be recorded in a profiler log.
577 If the log gets full, some of the least-seen call-stacks will be evicted
578 to make room for new entries. */);
579 profiler_log_size = 10000;
581 DEFSYM (Qprofiler_backtrace_equal, "profiler-backtrace-equal");
583 struct hash_table_test test
584 = { Qprofiler_backtrace_equal, Qnil, Qnil,
585 cmpfn_profiler, hashfn_profiler };
586 hashtest_profiler = test;
589 defsubr (&Sfunction_equal);
591 #ifdef PROFILER_CPU_SUPPORT
592 profiler_cpu_running = NOT_RUNNING;
593 cpu_log = Qnil;
594 staticpro (&cpu_log);
595 defsubr (&Sprofiler_cpu_start);
596 defsubr (&Sprofiler_cpu_stop);
597 defsubr (&Sprofiler_cpu_running_p);
598 defsubr (&Sprofiler_cpu_log);
599 #endif
600 profiler_memory_running = false;
601 memory_log = Qnil;
602 staticpro (&memory_log);
603 defsubr (&Sprofiler_memory_start);
604 defsubr (&Sprofiler_memory_stop);
605 defsubr (&Sprofiler_memory_running_p);
606 defsubr (&Sprofiler_memory_log);