1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 // vim:cindent:sw=4:et:ts=8:
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // The linux glibc hides part of sigaction if _POSIX_SOURCE is defined
18 # include <linux/rtc.h>
27 #include <sys/types.h>
28 #include <sys/ioctl.h>
30 #include <sys/syscall.h>
34 #include "libmalloc.h"
41 # include <sys/link.h>
42 extern r_debug _r_debug
;
47 #define USE_GLIBC_BACKTRACE 1
48 // To debug, use #define JPROF_STATIC
49 #define JPROF_STATIC static
51 static int gLogFD
= -1;
52 static pthread_t main_thread
;
54 static bool gIsChild
= false;
55 static int gFilenamePID
;
57 static void startSignalCounter(unsigned long millisec
);
58 static int enableRTCSignals(bool enable
);
60 //----------------------------------------------------------------------
61 // replace use of atexit()
63 static void DumpAddressMap();
65 struct JprofShutdown
{
67 ~JprofShutdown() { DumpAddressMap(); }
70 static void RegisterJprofShutdown() {
71 // This instanciates the dummy class above, and will trigger the class
72 // destructor when libxul is unloaded. This is equivalent to atexit(),
73 // but gracefully handles dlclose().
74 static JprofShutdown t
;
77 #if defined(i386) || defined(_i386) || defined(__x86_64__)
78 JPROF_STATIC
void CrawlStack(malloc_log_entry
* me
, void* stack_top
,
79 void* top_instr_ptr
) {
80 # if USE_GLIBC_BACKTRACE
81 // This probably works on more than x86! But we need a way to get the
82 // top instruction pointer, which is kindof arch-specific
87 // This is from glibc. A more generic version might use
88 // libunwind and/or CaptureStackBackTrace() on Windows
89 cnt
= backtrace(&array
[0], sizeof(array
) / sizeof(array
[0]));
91 // StackHook->JprofLog->CrawlStack
92 // Then we have sigaction, which replaced top_instr_ptr
93 array
[3] = top_instr_ptr
;
94 for (i
= 3; i
< cnt
; i
++) {
95 me
->pcs
[numpcs
++] = (char*)array
[i
];
100 // original code - this breaks on many platforms
103 __asm__("movl %%ebp, %0" : "=g"(bp
));
104 # elif defined(__x86_64__)
105 __asm__("movq %%rbp, %0" : "=g"(bp
));
107 // It would be nice if this worked uniformly, but at least on i386 and
108 // x86_64, it stopped working with gcc 4.1, because it points to the
109 // end of the saved registers instead of the start.
110 bp
= __builtin_frame_address(0);
113 bool tracing
= false;
115 me
->pcs
[numpcs
++] = (char*)top_instr_ptr
;
117 while (numpcs
< MAX_STACK_CRAWL
) {
118 void** nextbp
= (void**)*bp
++;
124 // Skip the signal handling.
125 me
->pcs
[numpcs
++] = (char*)pc
;
126 } else if (pc
== top_instr_ptr
) {
136 //----------------------------------------------------------------------
139 static int rtcFD
= -1;
140 static bool circular
= false;
142 #if defined(linux) || defined(NTO)
143 static void DumpAddressMap() {
144 // Turn off the timer so we don't get interrupts during shutdown
147 enableRTCSignals(false);
151 startSignalCounter(0);
156 snprintf(filename
, sizeof(filename
), "%s-%d", M_MAPFILE
, gFilenamePID
);
158 snprintf(filename
, sizeof(filename
), "%s", M_MAPFILE
);
160 int mfd
= open(filename
, O_CREAT
| O_WRONLY
| O_TRUNC
, 0666);
162 malloc_map_entry mme
;
163 link_map
* map
= _r_debug
.r_map
;
164 while (nullptr != map
) {
165 if (map
->l_name
&& *map
->l_name
) {
166 mme
.nameLen
= strlen(map
->l_name
);
167 mme
.address
= map
->l_addr
;
168 write(mfd
, &mme
, sizeof(mme
));
169 write(mfd
, map
->l_name
, mme
.nameLen
);
171 write(1, map
->l_name
, mme
.nameLen
);
182 static bool was_paused
= true;
184 JPROF_STATIC
void JprofBufferDump();
185 JPROF_STATIC
void JprofBufferClear();
187 static void ClearProfilingHook(int signum
) {
190 puts("Jprof: cleared circular buffer.");
194 static void EndProfilingHook(int signum
) {
195 if (circular
) JprofBufferDump();
199 puts("Jprof: profiling paused.");
202 //----------------------------------------------------------------------
203 // proper usage would be a template, including the function to find the
204 // size of an entry, or include a size header explicitly to each entry.
206 # define DUMB_LOCK() pthread_mutex_lock(&mutex);
207 # define DUMB_UNLOCK() pthread_mutex_unlock(&mutex);
209 # define DUMB_LOCK() FIXME()
210 # define DUMB_UNLOCK() FIXME()
213 class DumbCircularBuffer
{
215 DumbCircularBuffer(size_t init_buffer_size
) {
217 buffer_size
= init_buffer_size
;
218 buffer
= (unsigned char*)malloc(buffer_size
);
219 head
= tail
= buffer
;
222 pthread_mutexattr_t mAttr
;
223 pthread_mutexattr_settype(&mAttr
, PTHREAD_MUTEX_RECURSIVE_NP
);
224 pthread_mutex_init(&mutex
, &mAttr
);
225 pthread_mutexattr_destroy(&mAttr
);
228 ~DumbCircularBuffer() {
231 pthread_mutex_destroy(&mutex
);
242 bool empty() { return head
== tail
; }
244 size_t space_available() {
248 result
= buffer_size
- (tail
- head
) - 1;
250 result
= head
- tail
- 1;
255 void drop(size_t size
) {
256 // assumes correctness!
259 if (head
>= &buffer
[buffer_size
]) head
-= buffer_size
;
264 bool insert(void* data
, size_t size
) {
265 // can fail if not enough space in the entire buffer
267 if (space_available() < size
) return false;
269 size_t max_without_wrap
= &buffer
[buffer_size
] - tail
;
270 size_t initial
= size
> max_without_wrap
? max_without_wrap
: size
;
272 fprintf(stderr
, "insert(%d): max_without_wrap %d, size %d, initial %d\n",
273 used
, max_without_wrap
, size
, initial
);
275 memcpy(tail
, data
, initial
);
277 data
= ((char*)data
) + initial
;
281 fprintf(stderr
, "wrapping by %d bytes\n", size
);
283 memcpy(buffer
, data
, size
);
284 tail
= &(((unsigned char*)buffer
)[size
]);
293 // for external access to the buffer (saving)
294 void lock() { DUMB_LOCK(); }
296 void unlock() { DUMB_UNLOCK(); }
298 // XXX These really shouldn't be public...
302 unsigned char* buffer
;
306 pthread_mutex_t mutex
;
309 class DumbCircularBuffer
* JprofBuffer
;
311 JPROF_STATIC
void JprofBufferInit(size_t size
) {
312 JprofBuffer
= new DumbCircularBuffer(size
);
315 JPROF_STATIC
void JprofBufferClear() {
316 fprintf(stderr
, "Told to clear JPROF circular buffer\n");
317 JprofBuffer
->clear();
320 JPROF_STATIC
size_t JprofEntrySizeof(malloc_log_entry
* me
) {
321 return offsetof(malloc_log_entry
, pcs
) + me
->numpcs
* sizeof(char*);
324 JPROF_STATIC
void JprofBufferAppend(malloc_log_entry
* me
) {
325 size_t size
= JprofEntrySizeof(me
);
328 while (JprofBuffer
->space_available() < size
&& JprofBuffer
->used
> 0) {
332 "dropping entry: %d in use, %d free, need %d, size_to_free = %d\n",
333 JprofBuffer
->used
, JprofBuffer
->space_available(), size
,
334 JprofEntrySizeof((malloc_log_entry
*)JprofBuffer
->head
));
336 JprofBuffer
->drop(JprofEntrySizeof((malloc_log_entry
*)JprofBuffer
->head
));
338 if (JprofBuffer
->space_available() < size
) return;
340 } while (!JprofBuffer
->insert(me
, size
));
343 JPROF_STATIC
void JprofBufferDump() {
347 stderr
, "dumping JP_CIRCULAR buffer, %d of %d bytes\n",
348 JprofBuffer
->tail
> JprofBuffer
->head
349 ? JprofBuffer
->tail
- JprofBuffer
->head
350 : JprofBuffer
->buffer_size
+ JprofBuffer
->tail
- JprofBuffer
->head
,
351 JprofBuffer
->buffer_size
);
353 if (JprofBuffer
->tail
>= JprofBuffer
->head
) {
354 write(gLogFD
, JprofBuffer
->head
, JprofBuffer
->tail
- JprofBuffer
->head
);
356 write(gLogFD
, JprofBuffer
->head
,
357 &(JprofBuffer
->buffer
[JprofBuffer
->buffer_size
]) - JprofBuffer
->head
);
358 write(gLogFD
, JprofBuffer
->buffer
, JprofBuffer
->tail
- JprofBuffer
->buffer
);
360 JprofBuffer
->clear();
361 JprofBuffer
->unlock();
364 //----------------------------------------------------------------------
366 JPROF_STATIC
void JprofLog(u_long aTime
, void* stack_top
, void* top_instr_ptr
) {
367 // Static is simply to make debugging tolerable
368 static malloc_log_entry me
;
371 me
.thread
= syscall(SYS_gettid
); // gettid();
373 me
.flags
= JP_FIRST_AFTER_PAUSE
;
379 CrawlStack(&me
, stack_top
, top_instr_ptr
);
383 JprofBufferAppend(&me
);
385 write(gLogFD
, &me
, JprofEntrySizeof(&me
));
388 printf("Neutrino is missing the pcs member of malloc_log_entry!! \n");
394 /* Lets interrupt at 10 Hz. This is so my log files don't get too large.
395 * This can be changed to a faster value latter. This timer is not
396 * programmed to reset, even though it is capable of doing so. This is
397 * to keep from getting interrupts from inside of the handler.
399 static void startSignalCounter(unsigned long millisec
) {
400 struct itimerval tvalue
;
402 tvalue
.it_interval
.tv_sec
= 0;
403 tvalue
.it_interval
.tv_usec
= 0;
404 tvalue
.it_value
.tv_sec
= millisec
/ 1000;
405 tvalue
.it_value
.tv_usec
= (millisec
% 1000) * 1000;
408 setitimer(ITIMER_REAL
, &tvalue
, nullptr);
410 setitimer(ITIMER_PROF
, &tvalue
, nullptr);
414 static long timerMilliSec
= 50;
417 static int setupRTCSignals(int hz
, struct sigaction
* sap
) {
418 /* global */ rtcFD
= open("/dev/rtc", O_RDONLY
);
420 perror("JPROF_RTC setup: open(\"/dev/rtc\", O_RDONLY)");
424 if (sigaction(SIGIO
, sap
, nullptr) == -1) {
425 perror("JPROF_RTC setup: sigaction(SIGIO)");
429 if (ioctl(rtcFD
, RTC_IRQP_SET
, hz
) == -1) {
430 perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_IRQP_SET, $JPROF_RTC_HZ)");
434 if (ioctl(rtcFD
, RTC_PIE_ON
, 0) == -1) {
435 perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_PIE_ON)");
439 if (fcntl(rtcFD
, F_SETSIG
, 0) == -1) {
440 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETSIG, 0)");
444 if (fcntl(rtcFD
, F_SETOWN
, getpid()) == -1) {
445 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETOWN, getpid())");
452 static int enableRTCSignals(bool enable
) {
453 static bool enabled
= false;
454 if (enabled
== enable
) {
459 int flags
= fcntl(rtcFD
, F_GETFL
);
461 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_GETFL)");
471 if (fcntl(rtcFD
, F_SETFL
, flags
) == -1) {
473 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags | FASYNC)");
475 perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags & ~FASYNC)");
484 JPROF_STATIC
void StackHook(int signum
, siginfo_t
* info
, void* ucontext
) {
485 static struct timeval tFirst
;
486 static int first
= 1;
490 if (rtcHz
&& pthread_self() != main_thread
) {
491 // Only collect stack data on the main thread, for now.
496 if (first
&& !(first
= 0)) {
497 puts("Jprof: received first signal");
500 enableRTCSignals(true);
504 gettimeofday(&tFirst
, 0);
510 enableRTCSignals(true);
515 gettimeofday(&tNow
, 0);
516 double usec
= 1e6
* (tNow
.tv_sec
- tFirst
.tv_sec
);
517 usec
+= (tNow
.tv_usec
- tFirst
.tv_usec
);
518 millisec
= static_cast<size_t>(usec
* 1e-3);
522 gregset_t
& gregs
= ((ucontext_t
*)ucontext
)->uc_mcontext
.gregs
;
524 JprofLog(millisec
, (void*)gregs
[REG_RSP
], (void*)gregs
[REG_RIP
]);
526 JprofLog(millisec
, (void*)gregs
[REG_ESP
], (void*)gregs
[REG_EIP
]);
529 if (!rtcHz
) startSignalCounter(timerMilliSec
);
532 NS_EXPORT_(void) setupProfilingStuff(void) {
533 static int gFirstTime
= 1;
534 char filename
[2048]; // XXX fix
536 if (gFirstTime
&& !(gFirstTime
= 0)) {
540 int append
= O_TRUNC
;
541 char* tst
= getenv("JPROF_FLAGS");
543 /* Options from JPROF_FLAGS environment variable:
544 * JP_DEFER -> Wait for a SIGPROF (or SIGALRM, if JP_REALTIME
545 * is set) from userland before starting
546 * to generate them internally
547 * JP_START -> Install the signal handler
548 * JP_PERIOD -> Time between profiler ticks
549 * JP_FIRST -> Extra delay before starting
550 * JP_REALTIME -> Take stack traces in intervals of real time
551 * rather than time used by the process (and the
552 * system for the process). This is useful for
553 * finding time spent by the X server.
554 * JP_APPEND -> Append to jprof-log rather than overwriting it.
555 * This is somewhat risky since it depends on the
556 * address map staying constant across multiple runs.
557 * JP_FILENAME -> base filename to use when saving logs. Note that
558 * this does not affect the mapfile.
559 * JP_CIRCULAR -> use a circular buffer of size N, write/clear on SIGUSR1
561 * JPROF_ISCHILD is set if this is not the first process.
567 if (strstr(tst
, "JP_DEFER")) {
571 if (strstr(tst
, "JP_START")) doNotStart
= 0;
572 if (strstr(tst
, "JP_REALTIME")) realTime
= 1;
573 if (strstr(tst
, "JP_APPEND")) append
= O_APPEND
;
575 char* delay
= strstr(tst
, "JP_PERIOD=");
577 double tmp
= strtod(delay
+ strlen("JP_PERIOD="), nullptr);
579 timerMilliSec
= static_cast<unsigned long>(1000 * tmp
);
581 fprintf(stderr
, "JP_PERIOD of %g less than 0.001 (1ms), using 1ms\n",
587 char* circular_op
= strstr(tst
, "JP_CIRCULAR=");
589 size_t size
= atol(circular_op
+ strlen("JP_CIRCULAR="));
591 fprintf(stderr
, "JP_CIRCULAR of %lu less than 1000, using 10000\n",
592 (unsigned long)size
);
595 JprofBufferInit(size
);
596 fprintf(stderr
, "JP_CIRCULAR buffer of %lu bytes\n",
597 (unsigned long)size
);
601 char* first
= strstr(tst
, "JP_FIRST=");
603 firstDelay
= atol(first
+ strlen("JP_FIRST="));
606 char* rtc
= strstr(tst
, "JP_RTC_HZ=");
609 rtcHz
= atol(rtc
+ strlen("JP_RTC_HZ="));
610 timerMilliSec
= 0; /* This makes JP_FIRST work right. */
611 realTime
= 1; /* It's the _R_TC and all. ;) */
613 # define IS_POWER_OF_TWO(x) (((x) & ((x)-1)) == 0)
615 if (!IS_POWER_OF_TWO(rtcHz
) || rtcHz
< 2) {
617 "JP_RTC_HZ must be power of two and >= 2, "
618 "but %d was provided; using default of 2048\n",
624 "JP_RTC_HZ found, but RTC profiling only supported on "
630 const char* f
= strstr(tst
, "JP_FILENAME=");
632 f
= f
+ strlen("JP_FILENAME=");
636 char* is_child
= getenv("JPROF_ISCHILD");
637 if (!is_child
) setenv("JPROF_ISCHILD", "", 0);
638 gIsChild
= !!is_child
;
640 gFilenamePID
= syscall(SYS_gettid
); // gettid();
642 snprintf(filename
, sizeof(filename
), "%s-%d", f
, gFilenamePID
);
644 snprintf(filename
, sizeof(filename
), "%s", f
);
646 // XXX FIX! inherit current capture state!
651 gLogFD
= open(filename
, O_CREAT
| O_WRONLY
| append
, 0666);
653 fprintf(stderr
, "Unable to create " M_LOGFILE
);
656 struct sigaction action
;
659 // Dump out the address map when we terminate
660 RegisterJprofShutdown();
662 main_thread
= pthread_self();
663 // fprintf(stderr,"jprof: main_thread = %u\n",
664 // (unsigned int)main_thread);
666 // FIX! probably should block these against each other
669 action
.sa_handler
= nullptr;
670 action
.sa_sigaction
= StackHook
;
671 action
.sa_mask
= mset
;
672 action
.sa_flags
= SA_RESTART
| SA_SIGINFO
;
675 if (!setupRTCSignals(rtcHz
, &action
)) {
677 "jprof: Error initializing RTC, NOT "
684 if (!rtcHz
|| firstDelay
!= 0)
688 sigaction(SIGALRM
, &action
, nullptr);
691 // enable PROF in all cases to simplify JP_DEFER/pause/restart
692 sigaction(SIGPROF
, &action
, nullptr);
694 // make it so a SIGUSR1 will stop the profiling
695 // Note: It currently does not close the logfile.
696 // This could be configurable (so that it could
697 // later be reopened).
699 struct sigaction stop_action
;
700 stop_action
.sa_handler
= EndProfilingHook
;
701 stop_action
.sa_mask
= mset
;
702 stop_action
.sa_flags
= SA_RESTART
;
703 sigaction(SIGUSR1
, &stop_action
, nullptr);
705 // make it so a SIGUSR2 will clear the circular buffer
707 stop_action
.sa_handler
= ClearProfilingHook
;
708 stop_action
.sa_mask
= mset
;
709 stop_action
.sa_flags
= SA_RESTART
;
710 sigaction(SIGUSR2
, &stop_action
, nullptr);
713 "Jprof: Initialized signal handler and set "
714 "timer for %lu %s, %d s "
716 rtcHz
? rtcHz
: timerMilliSec
, rtcHz
? "Hz" : "ms", firstDelay
);
720 /* If we have an initial delay we can just use
721 startSignalCounter to set up a timer to fire the
722 first stackHook after that delay. When that happens
723 we'll go and switch to RTC profiling. */
724 if (rtcHz
&& firstDelay
== 0) {
725 puts("Jprof: enabled RTC signals");
726 enableRTCSignals(true);
730 puts("Jprof: started timer");
731 startSignalCounter(firstDelay
* 1000 + timerMilliSec
);
738 printf("setupProfilingStuff() called multiple times\n");