In Tramp. check ssh Control* options only when needed
[emacs.git] / src / profiler.c
blob1b49afe0331d57cfecf86aa95650051c54a65c91
1 /* Profiler implementation.
3 Copyright (C) 2012-2015 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 #include <config.h>
21 #include "lisp.h"
22 #include "syssignal.h"
23 #include "systime.h"
25 /* Return A + B, but return the maximum fixnum if the result would overflow.
26 Assume A and B are nonnegative and in fixnum range. */
28 static EMACS_INT
29 saturated_add (EMACS_INT a, EMACS_INT b)
31 return min (a + b, MOST_POSITIVE_FIXNUM);
34 /* Logs. */
36 typedef struct Lisp_Hash_Table log_t;
38 static struct hash_table_test hashtest_profiler;
40 static Lisp_Object
41 make_log (int heap_size, int max_stack_depth)
43 /* We use a standard Elisp hash-table object, but we use it in
44 a special way. This is OK as long as the object is not exposed
45 to Elisp, i.e. until it is returned by *-profiler-log, after which
46 it can't be used any more. */
47 Lisp_Object log = make_hash_table (hashtest_profiler,
48 make_number (heap_size),
49 make_float (DEFAULT_REHASH_SIZE),
50 make_float (DEFAULT_REHASH_THRESHOLD),
51 Qnil);
52 struct Lisp_Hash_Table *h = XHASH_TABLE (log);
54 /* What is special about our hash-tables is that the keys are pre-filled
55 with the vectors we'll put in them. */
56 int i = ASIZE (h->key_and_value) / 2;
57 while (i > 0)
58 set_hash_key_slot (h, --i,
59 Fmake_vector (make_number (max_stack_depth), Qnil));
60 return log;
63 /* Evict the least used half of the hash_table.
65 When the table is full, we have to evict someone.
66 The easiest and most efficient is to evict the value we're about to add
67 (i.e. once the table is full, stop sampling).
69 We could also pick the element with the lowest count and evict it,
70 but finding it is O(N) and for that amount of work we get very
71 little in return: for the next sample, this latest sample will have
72 count==1 and will hence be a prime candidate for eviction :-(
74 So instead, we take O(N) time to eliminate more or less half of the
75 entries (the half with the lowest counts). So we get an amortized
76 cost of O(1) and we get O(N) time for a new entry to grow larger
77 than the other least counts before a new round of eviction. */
79 static EMACS_INT approximate_median (log_t *log,
80 ptrdiff_t start, ptrdiff_t size)
82 eassert (size > 0);
83 if (size < 2)
84 return XINT (HASH_VALUE (log, start));
85 if (size < 3)
86 /* Not an actual median, but better for our application than
87 choosing either of the two numbers. */
88 return ((XINT (HASH_VALUE (log, start))
89 + XINT (HASH_VALUE (log, start + 1)))
90 / 2);
91 else
93 ptrdiff_t newsize = size / 3;
94 ptrdiff_t start2 = start + newsize;
95 EMACS_INT i1 = approximate_median (log, start, newsize);
96 EMACS_INT i2 = approximate_median (log, start2, newsize);
97 EMACS_INT i3 = approximate_median (log, start2 + newsize,
98 size - 2 * newsize);
99 return (i1 < i2
100 ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
101 : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
105 static void evict_lower_half (log_t *log)
107 ptrdiff_t size = ASIZE (log->key_and_value) / 2;
108 EMACS_INT median = approximate_median (log, 0, size);
109 ptrdiff_t i;
111 for (i = 0; i < size; i++)
112 /* Evict not only values smaller but also values equal to the median,
113 so as to make sure we evict something no matter what. */
114 if (XINT (HASH_VALUE (log, i)) <= median)
116 Lisp_Object key = HASH_KEY (log, i);
117 { /* FIXME: we could make this more efficient. */
118 Lisp_Object tmp;
119 XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */
120 Fremhash (key, tmp);
122 eassert (EQ (log->next_free, make_number (i)));
124 int j;
125 eassert (VECTORP (key));
126 for (j = 0; j < ASIZE (key); j++)
127 ASET (key, j, Qnil);
129 set_hash_key_slot (log, i, key);
133 /* Record the current backtrace in LOG. COUNT is the weight of this
134 current backtrace: interrupt counts for CPU, and the allocation
135 size for memory. */
137 static void
138 record_backtrace (log_t *log, EMACS_INT count)
140 Lisp_Object backtrace;
141 ptrdiff_t index;
143 if (!INTEGERP (log->next_free))
144 /* FIXME: transfer the evicted counts to a special entry rather
145 than dropping them on the floor. */
146 evict_lower_half (log);
147 index = XINT (log->next_free);
149 /* Get a "working memory" vector. */
150 backtrace = HASH_KEY (log, index);
151 get_backtrace (backtrace);
153 { /* We basically do a `gethash+puthash' here, except that we have to be
154 careful to avoid memory allocation since we're in a signal
155 handler, and we optimize the code to try and avoid computing the
156 hash+lookup twice. See fns.c:Fputhash for reference. */
157 EMACS_UINT hash;
158 ptrdiff_t j = hash_lookup (log, backtrace, &hash);
159 if (j >= 0)
161 EMACS_INT old_val = XINT (HASH_VALUE (log, j));
162 EMACS_INT new_val = saturated_add (old_val, count);
163 set_hash_value_slot (log, j, make_number (new_val));
165 else
166 { /* BEWARE! hash_put in general can allocate memory.
167 But currently it only does that if log->next_free is nil. */
168 int j;
169 eassert (!NILP (log->next_free));
170 j = hash_put (log, backtrace, make_number (count), hash);
171 /* Let's make sure we've put `backtrace' right where it
172 already was to start with. */
173 eassert (index == j);
175 /* FIXME: If the hash-table is almost full, we should set
176 some global flag so that some Elisp code can offload its
177 data elsewhere, so as to avoid the eviction code.
178 There are 2 ways to do that, AFAICT:
179 - Set a flag checked in QUIT, such that QUIT can then call
180 Fprofiler_cpu_log and stash the full log for later use.
181 - Set a flag check in post-gc-hook, so that Elisp code can call
182 profiler-cpu-log. That gives us more flexibility since that
183 Elisp code can then do all kinds of fun stuff like write
184 the log to disk. Or turn it right away into a call tree.
185 Of course, using Elisp is generally preferable, but it may
186 take longer until we get a chance to run the Elisp code, so
187 there's more risk that the table will get full before we
188 get there. */
193 /* Sampling profiler. */
195 #ifdef PROFILER_CPU_SUPPORT
197 /* The profiler timer and whether it was properly initialized, if
198 POSIX timers are available. */
199 #ifdef HAVE_ITIMERSPEC
200 static timer_t profiler_timer;
201 static bool profiler_timer_ok;
202 #endif
204 /* Status of sampling profiler. */
205 static enum profiler_cpu_running
206 { NOT_RUNNING, TIMER_SETTIME_RUNNING, SETITIMER_RUNNING }
207 profiler_cpu_running;
209 /* Hash-table log of CPU profiler. */
210 static Lisp_Object cpu_log;
212 /* Separate counter for the time spent in the GC. */
213 static EMACS_INT cpu_gc_count;
215 /* The current sampling interval in nanoseconds. */
216 static EMACS_INT current_sampling_interval;
218 /* Signal handler for sampling profiler. */
220 static void
221 handle_profiler_signal (int signal)
223 if (EQ (backtrace_top_function (), Qautomatic_gc))
224 /* Special case the time-count inside GC because the hash-table
225 code is not prepared to be used while the GC is running.
226 More specifically it uses ASIZE at many places where it does
227 not expect the ARRAY_MARK_FLAG to be set. We could try and
228 harden the hash-table code, but it doesn't seem worth the
229 effort. */
230 cpu_gc_count = saturated_add (cpu_gc_count, 1);
231 else
233 EMACS_INT count = 1;
234 #ifdef HAVE_ITIMERSPEC
235 if (profiler_timer_ok)
237 int overruns = timer_getoverrun (profiler_timer);
238 eassert (overruns >= 0);
239 count += overruns;
241 #endif
242 eassert (HASH_TABLE_P (cpu_log));
243 record_backtrace (XHASH_TABLE (cpu_log), count);
247 static void
248 deliver_profiler_signal (int signal)
250 deliver_process_signal (signal, handle_profiler_signal);
253 static enum profiler_cpu_running
254 setup_cpu_timer (Lisp_Object sampling_interval)
256 struct sigaction action;
257 struct itimerval timer;
258 struct timespec interval;
259 int billion = 1000000000;
261 if (! RANGED_INTEGERP (1, sampling_interval,
262 (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX / billion
263 ? ((EMACS_INT) TYPE_MAXIMUM (time_t) * billion
264 + (billion - 1))
265 : EMACS_INT_MAX)))
266 return NOT_RUNNING;
268 current_sampling_interval = XINT (sampling_interval);
269 interval = make_timespec (current_sampling_interval / billion,
270 current_sampling_interval % billion);
271 emacs_sigaction_init (&action, deliver_profiler_signal);
272 sigaction (SIGPROF, &action, 0);
274 #ifdef HAVE_ITIMERSPEC
275 if (! profiler_timer_ok)
277 /* System clocks to try, in decreasing order of desirability. */
278 static clockid_t const system_clock[] = {
279 #ifdef CLOCK_THREAD_CPUTIME_ID
280 CLOCK_THREAD_CPUTIME_ID,
281 #endif
282 #ifdef CLOCK_PROCESS_CPUTIME_ID
283 CLOCK_PROCESS_CPUTIME_ID,
284 #endif
285 #ifdef CLOCK_MONOTONIC
286 CLOCK_MONOTONIC,
287 #endif
288 CLOCK_REALTIME
290 int i;
291 struct sigevent sigev;
292 sigev.sigev_value.sival_ptr = &profiler_timer;
293 sigev.sigev_signo = SIGPROF;
294 sigev.sigev_notify = SIGEV_SIGNAL;
296 for (i = 0; i < ARRAYELTS (system_clock); i++)
297 if (timer_create (system_clock[i], &sigev, &profiler_timer) == 0)
299 profiler_timer_ok = 1;
300 break;
304 if (profiler_timer_ok)
306 struct itimerspec ispec;
307 ispec.it_value = ispec.it_interval = interval;
308 if (timer_settime (profiler_timer, 0, &ispec, 0) == 0)
309 return TIMER_SETTIME_RUNNING;
311 #endif
313 #ifdef HAVE_SETITIMER
314 timer.it_value = timer.it_interval = make_timeval (interval);
315 if (setitimer (ITIMER_PROF, &timer, 0) == 0)
316 return SETITIMER_RUNNING;
317 #endif
319 return NOT_RUNNING;
322 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start,
323 1, 1, 0,
324 doc: /* Start or restart the cpu profiler.
325 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
326 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
327 (Lisp_Object sampling_interval)
329 if (profiler_cpu_running)
330 error ("CPU profiler is already running");
332 if (NILP (cpu_log))
334 cpu_gc_count = 0;
335 cpu_log = make_log (profiler_log_size,
336 profiler_max_stack_depth);
339 profiler_cpu_running = setup_cpu_timer (sampling_interval);
340 if (! profiler_cpu_running)
341 error ("Invalid sampling interval");
343 return Qt;
346 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop,
347 0, 0, 0,
348 doc: /* Stop the cpu profiler. The profiler log is not affected.
349 Return non-nil if the profiler was running. */)
350 (void)
352 switch (profiler_cpu_running)
354 case NOT_RUNNING:
355 return Qnil;
357 #ifdef HAVE_ITIMERSPEC
358 case TIMER_SETTIME_RUNNING:
360 struct itimerspec disable;
361 memset (&disable, 0, sizeof disable);
362 timer_settime (profiler_timer, 0, &disable, 0);
364 break;
365 #endif
367 #ifdef HAVE_SETITIMER
368 case SETITIMER_RUNNING:
370 struct itimerval disable;
371 memset (&disable, 0, sizeof disable);
372 setitimer (ITIMER_PROF, &disable, 0);
374 break;
375 #endif
378 signal (SIGPROF, SIG_IGN);
379 profiler_cpu_running = NOT_RUNNING;
380 return Qt;
383 DEFUN ("profiler-cpu-running-p",
384 Fprofiler_cpu_running_p, Sprofiler_cpu_running_p,
385 0, 0, 0,
386 doc: /* Return non-nil if cpu profiler is running. */)
387 (void)
389 return profiler_cpu_running ? Qt : Qnil;
392 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log,
393 0, 0, 0,
394 doc: /* Return the current cpu profiler log.
395 The log is a hash-table mapping backtraces to counters which represent
396 the amount of time spent at those points. Every backtrace is a vector
397 of functions, where the last few elements may be nil.
398 Before returning, a new log is allocated for future samples. */)
399 (void)
401 Lisp_Object result = cpu_log;
402 /* Here we're making the log visible to Elisp, so it's not safe any
403 more for our use afterwards since we can't rely on its special
404 pre-allocated keys anymore. So we have to allocate a new one. */
405 cpu_log = (profiler_cpu_running
406 ? make_log (profiler_log_size, profiler_max_stack_depth)
407 : Qnil);
408 Fputhash (Fmake_vector (make_number (1), Qautomatic_gc),
409 make_number (cpu_gc_count),
410 result);
411 cpu_gc_count = 0;
412 return result;
414 #endif /* PROFILER_CPU_SUPPORT */
416 /* Memory profiler. */
418 /* True if memory profiler is running. */
419 bool profiler_memory_running;
421 static Lisp_Object memory_log;
423 DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start,
424 0, 0, 0,
425 doc: /* Start/restart the memory profiler.
426 The memory profiler will take samples of the call-stack whenever a new
427 allocation takes place. Note that most small allocations only trigger
428 the profiler occasionally.
429 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
430 (void)
432 if (profiler_memory_running)
433 error ("Memory profiler is already running");
435 if (NILP (memory_log))
436 memory_log = make_log (profiler_log_size,
437 profiler_max_stack_depth);
439 profiler_memory_running = true;
441 return Qt;
444 DEFUN ("profiler-memory-stop",
445 Fprofiler_memory_stop, Sprofiler_memory_stop,
446 0, 0, 0,
447 doc: /* Stop the memory profiler. The profiler log is not affected.
448 Return non-nil if the profiler was running. */)
449 (void)
451 if (!profiler_memory_running)
452 return Qnil;
453 profiler_memory_running = false;
454 return Qt;
457 DEFUN ("profiler-memory-running-p",
458 Fprofiler_memory_running_p, Sprofiler_memory_running_p,
459 0, 0, 0,
460 doc: /* Return non-nil if memory profiler is running. */)
461 (void)
463 return profiler_memory_running ? Qt : Qnil;
466 DEFUN ("profiler-memory-log",
467 Fprofiler_memory_log, Sprofiler_memory_log,
468 0, 0, 0,
469 doc: /* Return the current memory profiler log.
470 The log is a hash-table mapping backtraces to counters which represent
471 the amount of memory allocated at those points. Every backtrace is a vector
472 of functions, where the last few elements may be nil.
473 Before returning, a new log is allocated for future samples. */)
474 (void)
476 Lisp_Object result = memory_log;
477 /* Here we're making the log visible to Elisp , so it's not safe any
478 more for our use afterwards since we can't rely on its special
479 pre-allocated keys anymore. So we have to allocate a new one. */
480 memory_log = (profiler_memory_running
481 ? make_log (profiler_log_size, profiler_max_stack_depth)
482 : Qnil);
483 return result;
487 /* Signals and probes. */
489 /* Record that the current backtrace allocated SIZE bytes. */
490 void
491 malloc_probe (size_t size)
493 eassert (HASH_TABLE_P (memory_log));
494 record_backtrace (XHASH_TABLE (memory_log), min (size, MOST_POSITIVE_FIXNUM));
497 DEFUN ("function-equal", Ffunction_equal, Sfunction_equal, 2, 2, 0,
498 doc: /* Return non-nil if F1 and F2 come from the same source.
499 Used to determine if different closures are just different instances of
500 the same lambda expression, or are really unrelated function. */)
501 (Lisp_Object f1, Lisp_Object f2)
503 bool res;
504 if (EQ (f1, f2))
505 res = true;
506 else if (COMPILEDP (f1) && COMPILEDP (f2))
507 res = EQ (AREF (f1, COMPILED_BYTECODE), AREF (f2, COMPILED_BYTECODE));
508 else if (CONSP (f1) && CONSP (f2) && CONSP (XCDR (f1)) && CONSP (XCDR (f2))
509 && EQ (Qclosure, XCAR (f1))
510 && EQ (Qclosure, XCAR (f2)))
511 res = EQ (XCDR (XCDR (f1)), XCDR (XCDR (f2)));
512 else
513 res = false;
514 return res ? Qt : Qnil;
517 static bool
518 cmpfn_profiler (struct hash_table_test *t,
519 Lisp_Object bt1, Lisp_Object bt2)
521 if (VECTORP (bt1) && VECTORP (bt2))
523 ptrdiff_t i, l = ASIZE (bt1);
524 if (l != ASIZE (bt2))
525 return false;
526 for (i = 0; i < l; i++)
527 if (NILP (Ffunction_equal (AREF (bt1, i), AREF (bt2, i))))
528 return false;
529 return true;
531 else
532 return EQ (bt1, bt2);
535 static EMACS_UINT
536 hashfn_profiler (struct hash_table_test *ht, Lisp_Object bt)
538 if (VECTORP (bt))
540 EMACS_UINT hash = 0;
541 ptrdiff_t i, l = ASIZE (bt);
542 for (i = 0; i < l; i++)
544 Lisp_Object f = AREF (bt, i);
545 EMACS_UINT hash1
546 = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE))
547 : (CONSP (f) && CONSP (XCDR (f)) && EQ (Qclosure, XCAR (f)))
548 ? XHASH (XCDR (XCDR (f))) : XHASH (f));
549 hash = sxhash_combine (hash, hash1);
551 return SXHASH_REDUCE (hash);
553 else
554 return XHASH (bt);
557 void
558 syms_of_profiler (void)
560 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
561 doc: /* Number of elements from the call-stack recorded in the log. */);
562 profiler_max_stack_depth = 16;
563 DEFVAR_INT ("profiler-log-size", profiler_log_size,
564 doc: /* Number of distinct call-stacks that can be recorded in a profiler log.
565 If the log gets full, some of the least-seen call-stacks will be evicted
566 to make room for new entries. */);
567 profiler_log_size = 10000;
569 DEFSYM (Qprofiler_backtrace_equal, "profiler-backtrace-equal");
571 hashtest_profiler.name = Qprofiler_backtrace_equal;
572 hashtest_profiler.user_hash_function = Qnil;
573 hashtest_profiler.user_cmp_function = Qnil;
574 hashtest_profiler.cmpfn = cmpfn_profiler;
575 hashtest_profiler.hashfn = hashfn_profiler;
577 defsubr (&Sfunction_equal);
579 #ifdef PROFILER_CPU_SUPPORT
580 profiler_cpu_running = NOT_RUNNING;
581 cpu_log = Qnil;
582 staticpro (&cpu_log);
583 defsubr (&Sprofiler_cpu_start);
584 defsubr (&Sprofiler_cpu_stop);
585 defsubr (&Sprofiler_cpu_running_p);
586 defsubr (&Sprofiler_cpu_log);
587 #endif
588 profiler_memory_running = false;
589 memory_log = Qnil;
590 staticpro (&memory_log);
591 defsubr (&Sprofiler_memory_start);
592 defsubr (&Sprofiler_memory_stop);
593 defsubr (&Sprofiler_memory_running_p);
594 defsubr (&Sprofiler_memory_log);