no bug - Bumping Firefox l10n changesets r=release a=l10n-bump DONTBUILD CLOSED TREE
[gecko.git] / tools / jprof / stub / libmalloc.cpp
blob3003543a93cea608a7cbdc1966e4aef5205b35f0
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 // vim:cindent:sw=4:et:ts=8:
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // The linux glibc hides part of sigaction if _POSIX_SOURCE is defined
8 #if defined(linux)
9 # undef _POSIX_SOURCE
10 # undef _SVID_SOURCE
11 # ifndef _GNU_SOURCE
12 # define _GNU_SOURCE
13 # endif
14 #endif
16 #include <errno.h>
17 #if defined(linux)
18 # include <linux/rtc.h>
19 # include <pthread.h>
20 #endif
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <signal.h>
26 #include <sys/time.h>
27 #include <sys/types.h>
28 #include <sys/ioctl.h>
29 #include <sys/stat.h>
30 #include <sys/syscall.h>
31 #include <ucontext.h>
32 #include <execinfo.h>
34 #include "libmalloc.h"
35 #include "jprof.h"
36 #include <string.h>
37 #include <errno.h>
38 #include <dlfcn.h>
40 #ifdef NTO
41 # include <sys/link.h>
42 extern r_debug _r_debug;
43 #else
44 # include <link.h>
45 #endif
47 #define USE_GLIBC_BACKTRACE 1
48 // To debug, use #define JPROF_STATIC
49 #define JPROF_STATIC static
51 static int gLogFD = -1;
52 static pthread_t main_thread;
54 static bool gIsChild = false;
55 static int gFilenamePID;
57 static void startSignalCounter(unsigned long millisec);
58 static int enableRTCSignals(bool enable);
60 //----------------------------------------------------------------------
61 // replace use of atexit()
63 static void DumpAddressMap();
65 struct JprofShutdown {
66 JprofShutdown() {}
67 ~JprofShutdown() { DumpAddressMap(); }
70 static void RegisterJprofShutdown() {
71 // This instanciates the dummy class above, and will trigger the class
72 // destructor when libxul is unloaded. This is equivalent to atexit(),
73 // but gracefully handles dlclose().
74 static JprofShutdown t;
77 #if defined(i386) || defined(_i386) || defined(__x86_64__)
78 JPROF_STATIC void CrawlStack(malloc_log_entry* me, void* stack_top,
79 void* top_instr_ptr) {
80 # if USE_GLIBC_BACKTRACE
81 // This probably works on more than x86! But we need a way to get the
82 // top instruction pointer, which is kindof arch-specific
83 void* array[500];
84 int cnt, i;
85 u_long numpcs = 0;
87 // This is from glibc. A more generic version might use
88 // libunwind and/or CaptureStackBackTrace() on Windows
89 cnt = backtrace(&array[0], sizeof(array) / sizeof(array[0]));
91 // StackHook->JprofLog->CrawlStack
92 // Then we have sigaction, which replaced top_instr_ptr
93 array[3] = top_instr_ptr;
94 for (i = 3; i < cnt; i++) {
95 me->pcs[numpcs++] = (char*)array[i];
97 me->numpcs = numpcs;
99 # else
100 // original code - this breaks on many platforms
101 void** bp;
102 # if defined(__i386)
103 __asm__("movl %%ebp, %0" : "=g"(bp));
104 # elif defined(__x86_64__)
105 __asm__("movq %%rbp, %0" : "=g"(bp));
106 # else
107 // It would be nice if this worked uniformly, but at least on i386 and
108 // x86_64, it stopped working with gcc 4.1, because it points to the
109 // end of the saved registers instead of the start.
110 bp = __builtin_frame_address(0);
111 # endif
112 u_long numpcs = 0;
113 bool tracing = false;
115 me->pcs[numpcs++] = (char*)top_instr_ptr;
117 while (numpcs < MAX_STACK_CRAWL) {
118 void** nextbp = (void**)*bp++;
119 void* pc = *bp;
120 if (nextbp < bp) {
121 break;
123 if (tracing) {
124 // Skip the signal handling.
125 me->pcs[numpcs++] = (char*)pc;
126 } else if (pc == top_instr_ptr) {
127 tracing = true;
129 bp = nextbp;
131 me->numpcs = numpcs;
132 # endif
134 #endif
136 //----------------------------------------------------------------------
138 static int rtcHz;
139 static int rtcFD = -1;
140 static bool circular = false;
142 #if defined(linux) || defined(NTO)
143 static void DumpAddressMap() {
144 // Turn off the timer so we don't get interrupts during shutdown
145 # if defined(linux)
146 if (rtcHz) {
147 enableRTCSignals(false);
148 } else
149 # endif
151 startSignalCounter(0);
154 char filename[2048];
155 if (gIsChild)
156 snprintf(filename, sizeof(filename), "%s-%d", M_MAPFILE, gFilenamePID);
157 else
158 snprintf(filename, sizeof(filename), "%s", M_MAPFILE);
160 int mfd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
161 if (mfd >= 0) {
162 malloc_map_entry mme;
163 link_map* map = _r_debug.r_map;
164 while (nullptr != map) {
165 if (map->l_name && *map->l_name) {
166 mme.nameLen = strlen(map->l_name);
167 mme.address = map->l_addr;
168 write(mfd, &mme, sizeof(mme));
169 write(mfd, map->l_name, mme.nameLen);
170 # if 0
171 write(1, map->l_name, mme.nameLen);
172 write(1, "\n", 1);
173 # endif
175 map = map->l_next;
177 close(mfd);
180 #endif
182 static bool was_paused = true;
184 JPROF_STATIC void JprofBufferDump();
185 JPROF_STATIC void JprofBufferClear();
187 static void ClearProfilingHook(int signum) {
188 if (circular) {
189 JprofBufferClear();
190 puts("Jprof: cleared circular buffer.");
194 static void EndProfilingHook(int signum) {
195 if (circular) JprofBufferDump();
197 DumpAddressMap();
198 was_paused = true;
199 puts("Jprof: profiling paused.");
202 //----------------------------------------------------------------------
203 // proper usage would be a template, including the function to find the
204 // size of an entry, or include a size header explicitly to each entry.
205 #if defined(linux)
206 # define DUMB_LOCK() pthread_mutex_lock(&mutex);
207 # define DUMB_UNLOCK() pthread_mutex_unlock(&mutex);
208 #else
209 # define DUMB_LOCK() FIXME()
210 # define DUMB_UNLOCK() FIXME()
211 #endif
213 class DumbCircularBuffer {
214 public:
215 DumbCircularBuffer(size_t init_buffer_size) {
216 used = 0;
217 buffer_size = init_buffer_size;
218 buffer = (unsigned char*)malloc(buffer_size);
219 head = tail = buffer;
221 #if defined(linux)
222 pthread_mutexattr_t mAttr;
223 pthread_mutexattr_settype(&mAttr, PTHREAD_MUTEX_RECURSIVE_NP);
224 pthread_mutex_init(&mutex, &mAttr);
225 pthread_mutexattr_destroy(&mAttr);
226 #endif
228 ~DumbCircularBuffer() {
229 free(buffer);
230 #if defined(linux)
231 pthread_mutex_destroy(&mutex);
232 #endif
235 void clear() {
236 DUMB_LOCK();
237 head = tail;
238 used = 0;
239 DUMB_UNLOCK();
242 bool empty() { return head == tail; }
244 size_t space_available() {
245 size_t result;
246 DUMB_LOCK();
247 if (tail > head)
248 result = buffer_size - (tail - head) - 1;
249 else
250 result = head - tail - 1;
251 DUMB_UNLOCK();
252 return result;
255 void drop(size_t size) {
256 // assumes correctness!
257 DUMB_LOCK();
258 head += size;
259 if (head >= &buffer[buffer_size]) head -= buffer_size;
260 used--;
261 DUMB_UNLOCK();
264 bool insert(void* data, size_t size) {
265 // can fail if not enough space in the entire buffer
266 DUMB_LOCK();
267 if (space_available() < size) return false;
269 size_t max_without_wrap = &buffer[buffer_size] - tail;
270 size_t initial = size > max_without_wrap ? max_without_wrap : size;
271 #if DEBUG_CIRCULAR
272 fprintf(stderr, "insert(%d): max_without_wrap %d, size %d, initial %d\n",
273 used, max_without_wrap, size, initial);
274 #endif
275 memcpy(tail, data, initial);
276 tail += initial;
277 data = ((char*)data) + initial;
278 size -= initial;
279 if (size != 0) {
280 #if DEBUG_CIRCULAR
281 fprintf(stderr, "wrapping by %d bytes\n", size);
282 #endif
283 memcpy(buffer, data, size);
284 tail = &(((unsigned char*)buffer)[size]);
287 used++;
288 DUMB_UNLOCK();
290 return true;
293 // for external access to the buffer (saving)
294 void lock() { DUMB_LOCK(); }
296 void unlock() { DUMB_UNLOCK(); }
298 // XXX These really shouldn't be public...
299 unsigned char* head;
300 unsigned char* tail;
301 unsigned int used;
302 unsigned char* buffer;
303 size_t buffer_size;
305 private:
306 pthread_mutex_t mutex;
309 class DumbCircularBuffer* JprofBuffer;
311 JPROF_STATIC void JprofBufferInit(size_t size) {
312 JprofBuffer = new DumbCircularBuffer(size);
315 JPROF_STATIC void JprofBufferClear() {
316 fprintf(stderr, "Told to clear JPROF circular buffer\n");
317 JprofBuffer->clear();
320 JPROF_STATIC size_t JprofEntrySizeof(malloc_log_entry* me) {
321 return offsetof(malloc_log_entry, pcs) + me->numpcs * sizeof(char*);
324 JPROF_STATIC void JprofBufferAppend(malloc_log_entry* me) {
325 size_t size = JprofEntrySizeof(me);
327 do {
328 while (JprofBuffer->space_available() < size && JprofBuffer->used > 0) {
329 #if DEBUG_CIRCULAR
330 fprintf(
331 stderr,
332 "dropping entry: %d in use, %d free, need %d, size_to_free = %d\n",
333 JprofBuffer->used, JprofBuffer->space_available(), size,
334 JprofEntrySizeof((malloc_log_entry*)JprofBuffer->head));
335 #endif
336 JprofBuffer->drop(JprofEntrySizeof((malloc_log_entry*)JprofBuffer->head));
338 if (JprofBuffer->space_available() < size) return;
340 } while (!JprofBuffer->insert(me, size));
343 JPROF_STATIC void JprofBufferDump() {
344 JprofBuffer->lock();
345 #if DEBUG_CIRCULAR
346 fprintf(
347 stderr, "dumping JP_CIRCULAR buffer, %d of %d bytes\n",
348 JprofBuffer->tail > JprofBuffer->head
349 ? JprofBuffer->tail - JprofBuffer->head
350 : JprofBuffer->buffer_size + JprofBuffer->tail - JprofBuffer->head,
351 JprofBuffer->buffer_size);
352 #endif
353 if (JprofBuffer->tail >= JprofBuffer->head) {
354 write(gLogFD, JprofBuffer->head, JprofBuffer->tail - JprofBuffer->head);
355 } else {
356 write(gLogFD, JprofBuffer->head,
357 &(JprofBuffer->buffer[JprofBuffer->buffer_size]) - JprofBuffer->head);
358 write(gLogFD, JprofBuffer->buffer, JprofBuffer->tail - JprofBuffer->buffer);
360 JprofBuffer->clear();
361 JprofBuffer->unlock();
364 //----------------------------------------------------------------------
366 JPROF_STATIC void JprofLog(u_long aTime, void* stack_top, void* top_instr_ptr) {
367 // Static is simply to make debugging tolerable
368 static malloc_log_entry me;
370 me.delTime = aTime;
371 me.thread = syscall(SYS_gettid); // gettid();
372 if (was_paused) {
373 me.flags = JP_FIRST_AFTER_PAUSE;
374 was_paused = 0;
375 } else {
376 me.flags = 0;
379 CrawlStack(&me, stack_top, top_instr_ptr);
381 #ifndef NTO
382 if (circular) {
383 JprofBufferAppend(&me);
384 } else {
385 write(gLogFD, &me, JprofEntrySizeof(&me));
387 #else
388 printf("Neutrino is missing the pcs member of malloc_log_entry!! \n");
389 #endif
392 static int realTime;
394 /* Lets interrupt at 10 Hz. This is so my log files don't get too large.
395 * This can be changed to a faster value latter. This timer is not
396 * programmed to reset, even though it is capable of doing so. This is
397 * to keep from getting interrupts from inside of the handler.
399 static void startSignalCounter(unsigned long millisec) {
400 struct itimerval tvalue;
402 tvalue.it_interval.tv_sec = 0;
403 tvalue.it_interval.tv_usec = 0;
404 tvalue.it_value.tv_sec = millisec / 1000;
405 tvalue.it_value.tv_usec = (millisec % 1000) * 1000;
407 if (realTime) {
408 setitimer(ITIMER_REAL, &tvalue, nullptr);
409 } else {
410 setitimer(ITIMER_PROF, &tvalue, nullptr);
414 static long timerMilliSec = 50;
416 #if defined(linux)
417 static int setupRTCSignals(int hz, struct sigaction* sap) {
418 /* global */ rtcFD = open("/dev/rtc", O_RDONLY);
419 if (rtcFD < 0) {
420 perror("JPROF_RTC setup: open(\"/dev/rtc\", O_RDONLY)");
421 return 0;
424 if (sigaction(SIGIO, sap, nullptr) == -1) {
425 perror("JPROF_RTC setup: sigaction(SIGIO)");
426 return 0;
429 if (ioctl(rtcFD, RTC_IRQP_SET, hz) == -1) {
430 perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_IRQP_SET, $JPROF_RTC_HZ)");
431 return 0;
434 if (ioctl(rtcFD, RTC_PIE_ON, 0) == -1) {
435 perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_PIE_ON)");
436 return 0;
439 if (fcntl(rtcFD, F_SETSIG, 0) == -1) {
440 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETSIG, 0)");
441 return 0;
444 if (fcntl(rtcFD, F_SETOWN, getpid()) == -1) {
445 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETOWN, getpid())");
446 return 0;
449 return 1;
452 static int enableRTCSignals(bool enable) {
453 static bool enabled = false;
454 if (enabled == enable) {
455 return 0;
457 enabled = enable;
459 int flags = fcntl(rtcFD, F_GETFL);
460 if (flags < 0) {
461 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_GETFL)");
462 return 0;
465 if (enable) {
466 flags |= FASYNC;
467 } else {
468 flags &= ~FASYNC;
471 if (fcntl(rtcFD, F_SETFL, flags) == -1) {
472 if (enable) {
473 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags | FASYNC)");
474 } else {
475 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags & ~FASYNC)");
477 return 0;
480 return 1;
482 #endif
484 JPROF_STATIC void StackHook(int signum, siginfo_t* info, void* ucontext) {
485 static struct timeval tFirst;
486 static int first = 1;
487 size_t millisec = 0;
489 #if defined(linux)
490 if (rtcHz && pthread_self() != main_thread) {
491 // Only collect stack data on the main thread, for now.
492 return;
494 #endif
496 if (first && !(first = 0)) {
497 puts("Jprof: received first signal");
498 #if defined(linux)
499 if (rtcHz) {
500 enableRTCSignals(true);
501 } else
502 #endif
504 gettimeofday(&tFirst, 0);
505 millisec = 0;
507 } else {
508 #if defined(linux)
509 if (rtcHz) {
510 enableRTCSignals(true);
511 } else
512 #endif
514 struct timeval tNow;
515 gettimeofday(&tNow, 0);
516 double usec = 1e6 * (tNow.tv_sec - tFirst.tv_sec);
517 usec += (tNow.tv_usec - tFirst.tv_usec);
518 millisec = static_cast<size_t>(usec * 1e-3);
522 gregset_t& gregs = ((ucontext_t*)ucontext)->uc_mcontext.gregs;
523 #ifdef __x86_64__
524 JprofLog(millisec, (void*)gregs[REG_RSP], (void*)gregs[REG_RIP]);
525 #else
526 JprofLog(millisec, (void*)gregs[REG_ESP], (void*)gregs[REG_EIP]);
527 #endif
529 if (!rtcHz) startSignalCounter(timerMilliSec);
532 NS_EXPORT_(void) setupProfilingStuff(void) {
533 static int gFirstTime = 1;
534 char filename[2048]; // XXX fix
536 if (gFirstTime && !(gFirstTime = 0)) {
537 int startTimer = 1;
538 int doNotStart = 1;
539 int firstDelay = 0;
540 int append = O_TRUNC;
541 char* tst = getenv("JPROF_FLAGS");
543 /* Options from JPROF_FLAGS environment variable:
544 * JP_DEFER -> Wait for a SIGPROF (or SIGALRM, if JP_REALTIME
545 * is set) from userland before starting
546 * to generate them internally
547 * JP_START -> Install the signal handler
548 * JP_PERIOD -> Time between profiler ticks
549 * JP_FIRST -> Extra delay before starting
550 * JP_REALTIME -> Take stack traces in intervals of real time
551 * rather than time used by the process (and the
552 * system for the process). This is useful for
553 * finding time spent by the X server.
554 * JP_APPEND -> Append to jprof-log rather than overwriting it.
555 * This is somewhat risky since it depends on the
556 * address map staying constant across multiple runs.
557 * JP_FILENAME -> base filename to use when saving logs. Note that
558 * this does not affect the mapfile.
559 * JP_CIRCULAR -> use a circular buffer of size N, write/clear on SIGUSR1
561 * JPROF_ISCHILD is set if this is not the first process.
564 circular = false;
566 if (tst) {
567 if (strstr(tst, "JP_DEFER")) {
568 doNotStart = 0;
569 startTimer = 0;
571 if (strstr(tst, "JP_START")) doNotStart = 0;
572 if (strstr(tst, "JP_REALTIME")) realTime = 1;
573 if (strstr(tst, "JP_APPEND")) append = O_APPEND;
575 char* delay = strstr(tst, "JP_PERIOD=");
576 if (delay) {
577 double tmp = strtod(delay + strlen("JP_PERIOD="), nullptr);
578 if (tmp >= 1e-3) {
579 timerMilliSec = static_cast<unsigned long>(1000 * tmp);
580 } else {
581 fprintf(stderr, "JP_PERIOD of %g less than 0.001 (1ms), using 1ms\n",
582 tmp);
583 timerMilliSec = 1;
587 char* circular_op = strstr(tst, "JP_CIRCULAR=");
588 if (circular_op) {
589 size_t size = atol(circular_op + strlen("JP_CIRCULAR="));
590 if (size < 1000) {
591 fprintf(stderr, "JP_CIRCULAR of %lu less than 1000, using 10000\n",
592 (unsigned long)size);
593 size = 10000;
595 JprofBufferInit(size);
596 fprintf(stderr, "JP_CIRCULAR buffer of %lu bytes\n",
597 (unsigned long)size);
598 circular = true;
601 char* first = strstr(tst, "JP_FIRST=");
602 if (first) {
603 firstDelay = atol(first + strlen("JP_FIRST="));
606 char* rtc = strstr(tst, "JP_RTC_HZ=");
607 if (rtc) {
608 #if defined(linux)
609 rtcHz = atol(rtc + strlen("JP_RTC_HZ="));
610 timerMilliSec = 0; /* This makes JP_FIRST work right. */
611 realTime = 1; /* It's the _R_TC and all. ;) */
613 # define IS_POWER_OF_TWO(x) (((x) & ((x)-1)) == 0)
615 if (!IS_POWER_OF_TWO(rtcHz) || rtcHz < 2) {
616 fprintf(stderr,
617 "JP_RTC_HZ must be power of two and >= 2, "
618 "but %d was provided; using default of 2048\n",
619 rtcHz);
620 rtcHz = 2048;
622 #else
623 fputs(
624 "JP_RTC_HZ found, but RTC profiling only supported on "
625 "Linux!\n",
626 stderr);
628 #endif
630 const char* f = strstr(tst, "JP_FILENAME=");
631 if (f)
632 f = f + strlen("JP_FILENAME=");
633 else
634 f = M_LOGFILE;
636 char* is_child = getenv("JPROF_ISCHILD");
637 if (!is_child) setenv("JPROF_ISCHILD", "", 0);
638 gIsChild = !!is_child;
640 gFilenamePID = syscall(SYS_gettid); // gettid();
641 if (is_child)
642 snprintf(filename, sizeof(filename), "%s-%d", f, gFilenamePID);
643 else
644 snprintf(filename, sizeof(filename), "%s", f);
646 // XXX FIX! inherit current capture state!
649 if (!doNotStart) {
650 if (gLogFD < 0) {
651 gLogFD = open(filename, O_CREAT | O_WRONLY | append, 0666);
652 if (gLogFD < 0) {
653 fprintf(stderr, "Unable to create " M_LOGFILE);
654 perror(":");
655 } else {
656 struct sigaction action;
657 sigset_t mset;
659 // Dump out the address map when we terminate
660 RegisterJprofShutdown();
662 main_thread = pthread_self();
663 // fprintf(stderr,"jprof: main_thread = %u\n",
664 // (unsigned int)main_thread);
666 // FIX! probably should block these against each other
667 // Very unlikely.
668 sigemptyset(&mset);
669 action.sa_handler = nullptr;
670 action.sa_sigaction = StackHook;
671 action.sa_mask = mset;
672 action.sa_flags = SA_RESTART | SA_SIGINFO;
673 #if defined(linux)
674 if (rtcHz) {
675 if (!setupRTCSignals(rtcHz, &action)) {
676 fputs(
677 "jprof: Error initializing RTC, NOT "
678 "profiling\n",
679 stderr);
680 return;
684 if (!rtcHz || firstDelay != 0)
685 #endif
687 if (realTime) {
688 sigaction(SIGALRM, &action, nullptr);
691 // enable PROF in all cases to simplify JP_DEFER/pause/restart
692 sigaction(SIGPROF, &action, nullptr);
694 // make it so a SIGUSR1 will stop the profiling
695 // Note: It currently does not close the logfile.
696 // This could be configurable (so that it could
697 // later be reopened).
699 struct sigaction stop_action;
700 stop_action.sa_handler = EndProfilingHook;
701 stop_action.sa_mask = mset;
702 stop_action.sa_flags = SA_RESTART;
703 sigaction(SIGUSR1, &stop_action, nullptr);
705 // make it so a SIGUSR2 will clear the circular buffer
707 stop_action.sa_handler = ClearProfilingHook;
708 stop_action.sa_mask = mset;
709 stop_action.sa_flags = SA_RESTART;
710 sigaction(SIGUSR2, &stop_action, nullptr);
712 printf(
713 "Jprof: Initialized signal handler and set "
714 "timer for %lu %s, %d s "
715 "initial delay\n",
716 rtcHz ? rtcHz : timerMilliSec, rtcHz ? "Hz" : "ms", firstDelay);
718 if (startTimer) {
719 #if defined(linux)
720 /* If we have an initial delay we can just use
721 startSignalCounter to set up a timer to fire the
722 first stackHook after that delay. When that happens
723 we'll go and switch to RTC profiling. */
724 if (rtcHz && firstDelay == 0) {
725 puts("Jprof: enabled RTC signals");
726 enableRTCSignals(true);
727 } else
728 #endif
730 puts("Jprof: started timer");
731 startSignalCounter(firstDelay * 1000 + timerMilliSec);
737 } else {
738 printf("setupProfilingStuff() called multiple times\n");