src/profiler.c

   1 /* Profiler implementation.
   2
   3 Copyright (C) 2012 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include <config.h>
  21 #include <stdio.h>
  22 #include <limits.h>
  23 #include <sys/time.h>
  24 #include <signal.h>
  25 #include <setjmp.h>
  26 #include "lisp.h"
  27
  28 /* Logs.  */
  29
  30 typedef struct Lisp_Hash_Table log_t;
  31
  32 static Lisp_Object
  33 make_log (int heap_size, int max_stack_depth)
  34 {
  35   /* We use a standard Elisp hash-table object, but we use it in
  36      a special way.  This is OK as long as the object is not exposed
  37      to Elisp, i.e. until it is returned by *-profiler-log, after which
  38      it can't be used any more.  */
  39   Lisp_Object log = make_hash_table (Qequal, make_number (heap_size),
  40                                      make_float (DEFAULT_REHASH_SIZE),
  41                                      make_float (DEFAULT_REHASH_THRESHOLD),
  42                                      Qnil, Qnil, Qnil);
  43   struct Lisp_Hash_Table *h = XHASH_TABLE (log);
  44
  45   /* What is special about our hash-tables is that the keys are pre-filled
  46      with the vectors we'll put in them.  */
  47   int i = ASIZE (h->key_and_value) / 2;
  48   while (0 < i)
  49     set_hash_key_slot (h, --i,
  50                        Fmake_vector (make_number (max_stack_depth), Qnil));
  51   return log;
  52 }
  53
  54 /* Evict the least used half of the hash_table.
  55
  56    When the table is full, we have to evict someone.
  57    The easiest and most efficient is to evict the value we're about to add
  58    (i.e. once the table is full, stop sampling).
  59
  60    We could also pick the element with the lowest count and evict it,
  61    but finding it is O(N) and for that amount of work we get very
  62    little in return: for the next sample, this latest sample will have
  63    count==1 and will hence be a prime candidate for eviction :-(
  64
  65    So instead, we take O(N) time to eliminate more or less half of the
  66    entries (the half with the lowest counts).  So we get an amortized
  67    cost of O(1) and we get O(N) time for a new entry to grow larger
  68    than the other least counts before a new round of eviction.  */
  69
  70 static EMACS_INT approximate_median (log_t *log,
  71                                      ptrdiff_t start, ptrdiff_t size)
  72 {
  73   eassert (size > 0);
  74   if (size < 2)
  75     return XINT (HASH_VALUE (log, start));
  76   if (size < 3)
  77     /* Not an actual median, but better for our application than
  78        choosing either of the two numbers.  */
  79     return ((XINT (HASH_VALUE (log, start))
  80              + XINT (HASH_VALUE (log, start + 1)))
  81             / 2);
  82   else
  83     {
  84       ptrdiff_t newsize = size / 3;
  85       ptrdiff_t start2 = start + newsize;
  86       EMACS_INT i1 = approximate_median (log, start, newsize);
  87       EMACS_INT i2 = approximate_median (log, start2, newsize);
  88       EMACS_INT i3 = approximate_median (log, start2 + newsize,
  89                                          size - 2 * newsize);
  90       return (i1 < i2
  91               ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
  92               : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
  93     }
  94 }
  95
  96 static void evict_lower_half (log_t *log)
  97 {
  98   ptrdiff_t size = ASIZE (log->key_and_value) / 2;
  99   EMACS_INT median = approximate_median (log, 0, size);
 100   ptrdiff_t i;
 101
 102   for (i = 0; i < size; i++)
 103     /* Evict not only values smaller but also values equal to the median,
 104        so as to make sure we evict something no matter what.  */
 105     if (XINT (HASH_VALUE (log, i)) <= median)
 106       {
 107         Lisp_Object key = HASH_KEY (log, i);
 108         { /* FIXME: we could make this more efficient.  */
 109           Lisp_Object tmp;
 110           XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr.  */
 111           Fremhash (key, tmp);
 112         }
 113         eassert (EQ (log->next_free, make_number (i)));
 114         {
 115           int j;
 116           eassert (VECTORP (key));
 117           for (j = 0; j < ASIZE (key); j++)
 118             ASET (key, j, Qnil);
 119         }
 120         set_hash_key_slot (log, i, key);
 121       }
 122 }
 123
 124 /* Record the current backtrace in LOG. BASE is a special name for
 125    describing which the backtrace come from. BASE can be nil. COUNT is
 126    a number how many times the profiler sees the backtrace at the
 127    time.  ELAPSED is a elapsed time in millisecond that the backtrace
 128    took.  */
 129
 130 static void
 131 record_backtrace (log_t *log, size_t count)
 132 {
 133   struct backtrace *backlist = backtrace_list;
 134   Lisp_Object backtrace;
 135   ptrdiff_t index, i = 0;
 136   ptrdiff_t asize;
 137
 138   if (!INTEGERP (log->next_free))
 139     /* FIXME: transfer the evicted counts to a special entry rather
 140        than dropping them on the floor.  */
 141     evict_lower_half (log);
 142   index = XINT (log->next_free);
 143
 144   /* Get a "working memory" vector.  */
 145   backtrace = HASH_KEY (log, index);
 146   asize = ASIZE (backtrace);
 147
 148   /* Copy the backtrace contents into working memory.  */
 149   for (; i < asize && backlist; i++, backlist = backlist->next)
 150     /* FIXME: For closures we should ignore the environment.  */
 151     ASET (backtrace, i, *backlist->function);
 152
 153   /* Make sure that unused space of working memory is filled with nil.  */
 154   for (; i < asize; i++)
 155     ASET (backtrace, i, Qnil);
 156
 157   { /* We basically do a `gethash+puthash' here, except that we have to be
 158        careful to avoid memory allocation since we're in a signal
 159        handler, and we optimize the code to try and avoid computing the
 160        hash+lookup twice.  See fns.c:Fputhash for reference.  */
 161     EMACS_UINT hash;
 162     ptrdiff_t j = hash_lookup (log, backtrace, &hash);
 163     if (j >= 0)
 164       set_hash_value_slot (log, j,
 165                            make_number (count + XINT (HASH_VALUE (log, j))));
 166     else
 167       { /* BEWARE!  hash_put in general can allocate memory.
 168            But currently it only does that if log->next_free is nil.  */
 169         int j;
 170         eassert (!NILP (log->next_free));
 171         j = hash_put (log, backtrace, make_number (count), hash);
 172         /* Let's make sure we've put `backtrace' right where it
 173            already was to start with.  */
 174         eassert (index == j);
 175
 176         /* FIXME: If the hash-table is almost full, we should set
 177            some global flag so that some Elisp code can offload its
 178            data elsewhere, so as to avoid the eviction code.
 179            There are 2 ways to do that, AFAICT:
 180            - Set a flag checked in QUIT, such that QUIT can then call
 181              Fprofiler_cpu_log and stash the full log for later use.
 182            - Set a flag check in post-gc-hook, so that Elisp code can call
 183              profiler-cpu-log.  That gives us more flexibility since that
 184              Elisp code can then do all kinds of fun stuff like write
 185              the log to disk.  Or turn it right away into a call tree.
 186            Of course, using Elisp is generally preferable, but it may
 187            take longer until we get a chance to run the Elisp code, so
 188            there's more risk that the table will get full before we
 189            get there.  */
 190       }
 191   }
 192 }
 193 \f
 194 /* Sample profiler.  */
 195
 196 #if defined SIGPROF && defined HAVE_SETITIMER
 197 #define PROFILER_CPU_SUPPORT
 198
 199 /* True if sampling profiler is running.  */
 200 static bool profiler_cpu_running;
 201
 202 static Lisp_Object cpu_log;
 203 /* Separate counter for the time spent in the GC.  */
 204 static EMACS_INT cpu_gc_count;
 205
 206 /* The current sample interval in millisecond.  */
 207
 208 static int current_sample_interval;
 209
 210 /* Signal handler for sample profiler.  */
 211
 212 static void
 213 sigprof_handler (int signal, siginfo_t *info, void *ctx)
 214 {
 215   eassert (HASH_TABLE_P (cpu_log));
 216   if (backtrace_list && EQ (*backtrace_list->function, Qautomatic_gc))
 217     /* Special case the time-count inside GC because the hash-table
 218        code is not prepared to be used while the GC is running.
 219        More specifically it uses ASIZE at many places where it does
 220        not expect the ARRAY_MARK_FLAG to be set.  We could try and
 221        harden the hash-table code, but it doesn't seem worth the
 222        effort.  */
 223     cpu_gc_count += current_sample_interval;
 224   else
 225     record_backtrace (XHASH_TABLE (cpu_log), current_sample_interval);
 226 }
 227
 228 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start,
 229        1, 1, 0,
 230        doc: /* Start or restart the cpu profiler.
 231 The cpu profiler will take call-stack samples each SAMPLE-INTERVAL (expressed in milliseconds).
 232 See also `profiler-log-size' and `profiler-max-stack-depth'.  */)
 233   (Lisp_Object sample_interval)
 234 {
 235   struct sigaction sa;
 236   struct itimerval timer;
 237
 238   if (profiler_cpu_running)
 239     error ("Sample profiler is already running");
 240
 241   if (NILP (cpu_log))
 242     {
 243       cpu_gc_count = 0;
 244       cpu_log = make_log (profiler_log_size,
 245                           profiler_max_stack_depth);
 246     }
 247
 248   current_sample_interval = XINT (sample_interval);
 249
 250   sa.sa_sigaction = sigprof_handler;
 251   sa.sa_flags = SA_RESTART | SA_SIGINFO;
 252   sigemptyset (&sa.sa_mask);
 253   sigaction (SIGPROF, &sa, 0);
 254
 255   timer.it_interval.tv_sec = 0;
 256   timer.it_interval.tv_usec = current_sample_interval * 1000;
 257   timer.it_value = timer.it_interval;
 258   setitimer (ITIMER_PROF, &timer, 0);
 259
 260   profiler_cpu_running = true;
 261
 262   return Qt;
 263 }
 264
 265 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop,
 266        0, 0, 0,
 267        doc: /* Stop the cpu profiler.  The profiler log is not affected.
 268 Return non-nil if the profiler was running.  */)
 269   (void)
 270 {
 271   if (!profiler_cpu_running)
 272     return Qnil;
 273   profiler_cpu_running = false;
 274
 275   setitimer (ITIMER_PROF, 0, 0);
 276
 277   return Qt;
 278 }
 279
 280 DEFUN ("profiler-cpu-running-p",
 281        Fprofiler_cpu_running_p, Sprofiler_cpu_running_p,
 282        0, 0, 0,
 283        doc: /* Return non-nil iff cpu profiler is running.  */)
 284   (void)
 285 {
 286   return profiler_cpu_running ? Qt : Qnil;
 287 }
 288
 289 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log,
 290        0, 0, 0,
 291        doc: /* Return the current cpu profiler log.
 292 The log is a hash-table mapping backtraces to counters which represent
 293 the amount of time spent at those points.  Every backtrace is a vector
 294 of functions, where the last few elements may be nil.
 295 Before returning, a new log is allocated for future samples.  */)
 296   (void)
 297 {
 298   Lisp_Object result = cpu_log;
 299   /* Here we're making the log visible to Elisp , so it's not safe any
 300      more for our use afterwards since we can't rely on its special
 301      pre-allocated keys anymore.  So we have to allocate a new one.  */
 302   cpu_log = (profiler_cpu_running
 303              ? make_log (profiler_log_size, profiler_max_stack_depth)
 304              : Qnil);
 305   Fputhash (Fmake_vector (make_number (1), Qautomatic_gc),
 306             make_number (cpu_gc_count),
 307             result);
 308   cpu_gc_count = 0;
 309   return result;
 310 }
 311 #endif  /* not defined PROFILER_CPU_SUPPORT */
 312 \f
 313 /* Memory profiler.  */
 314
 315 /* True if memory profiler is running.  */
 316 bool profiler_memory_running;
 317
 318 static Lisp_Object memory_log;
 319
 320 DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start,
 321        0, 0, 0,
 322        doc: /* Start/restart the memory profiler.
 323 The memory profiler will take samples of the call-stack whenever a new
 324 allocation takes place.  Note that most small allocations only trigger
 325 the profiler occasionally.
 326 See also `profiler-log-size' and `profiler-max-stack-depth'.  */)
 327   (void)
 328 {
 329   if (profiler_memory_running)
 330     error ("Memory profiler is already running");
 331
 332   if (NILP (memory_log))
 333     memory_log = make_log (profiler_log_size,
 334                            profiler_max_stack_depth);
 335
 336   profiler_memory_running = true;
 337
 338   return Qt;
 339 }
 340
 341 DEFUN ("profiler-memory-stop",
 342        Fprofiler_memory_stop, Sprofiler_memory_stop,
 343        0, 0, 0,
 344        doc: /* Stop the memory profiler.  The profiler log is not affected.
 345 Return non-nil if the profiler was running.  */)
 346   (void)
 347 {
 348   if (!profiler_memory_running)
 349     return Qnil;
 350   profiler_memory_running = false;
 351   return Qt;
 352 }
 353
 354 DEFUN ("profiler-memory-running-p",
 355        Fprofiler_memory_running_p, Sprofiler_memory_running_p,
 356        0, 0, 0,
 357        doc: /* Return non-nil if memory profiler is running.  */)
 358   (void)
 359 {
 360   return profiler_memory_running ? Qt : Qnil;
 361 }
 362
 363 DEFUN ("profiler-memory-log",
 364        Fprofiler_memory_log, Sprofiler_memory_log,
 365        0, 0, 0,
 366        doc: /* Return the current memory profiler log.
 367 The log is a hash-table mapping backtraces to counters which represent
 368 the amount of memory allocated at those points.  Every backtrace is a vector
 369 of functions, where the last few elements may be nil.
 370 Before returning, a new log is allocated for future samples.  */)
 371   (void)
 372 {
 373   Lisp_Object result = memory_log;
 374   /* Here we're making the log visible to Elisp , so it's not safe any
 375      more for our use afterwards since we can't rely on its special
 376      pre-allocated keys anymore.  So we have to allocate a new one.  */
 377   memory_log = (profiler_memory_running
 378                 ? make_log (profiler_log_size, profiler_max_stack_depth)
 379                 : Qnil);
 380   return result;
 381 }
 382
 383 \f
 384 /* Signals and probes.  */
 385
 386 /* Record that the current backtrace allocated SIZE bytes.  */
 387 void
 388 malloc_probe (size_t size)
 389 {
 390   eassert (HASH_TABLE_P (memory_log));
 391   record_backtrace (XHASH_TABLE (memory_log), size);
 392 }
 393
 394 void
 395 syms_of_profiler (void)
 396 {
 397   DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
 398               doc: /* Number of elements from the call-stack recorded in the log.  */);
 399   profiler_max_stack_depth = 16;
 400   DEFVAR_INT ("profiler-log-size", profiler_log_size,
 401               doc: /* Number of distinct call-stacks that can be recorded in a profiler log.
 402 If the log gets full, some of the least-seen call-stacks will be evicted
 403 to make room for new entries.  */);
 404   profiler_log_size = 10000;
 405
 406 #ifdef PROFILER_CPU_SUPPORT
 407   profiler_cpu_running = false;
 408   cpu_log = Qnil;
 409   staticpro (&cpu_log);
 410   defsubr (&Sprofiler_cpu_start);
 411   defsubr (&Sprofiler_cpu_stop);
 412   defsubr (&Sprofiler_cpu_running_p);
 413   defsubr (&Sprofiler_cpu_log);
 414 #endif
 415   profiler_memory_running = false;
 416   memory_log = Qnil;
 417   staticpro (&memory_log);
 418   defsubr (&Sprofiler_memory_start);
 419   defsubr (&Sprofiler_memory_stop);
 420   defsubr (&Sprofiler_memory_running_p);
 421   defsubr (&Sprofiler_memory_log);
 422 }