1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
33 static const u_int DefaultBuckets
= 10007; // arbitrary, but prime
34 static const u_int MaxBuckets
= 1000003; // arbitrary, but prime
36 //----------------------------------------------------------------------
38 int main(int argc
, char** argv
) {
41 l
->initialize(argc
, argv
);
44 for (int i
= 0; i
< l
->numLogFiles
; i
++) {
45 if (l
->output_dir
|| l
->numLogFiles
> 1) {
46 char name
[2048]; // XXX fix
48 snprintf(name
, sizeof(name
), "%s/%s.html", l
->output_dir
,
49 argv
[l
->logFileIndex
+ i
]);
51 snprintf(name
, sizeof(name
), "%s.html", argv
[l
->logFileIndex
+ i
]);
53 fprintf(stderr
, "opening %s\n", name
);
54 l
->outputfd
= fopen(name
, "w");
55 // if an error we won't process the file
57 if (l
->outputfd
) { // paranoia
58 l
->open(argv
[l
->logFileIndex
+ i
]);
60 if (l
->outputfd
!= stderr
) {
62 l
->outputfd
= nullptr;
70 char* htmlify(const char* in
) {
76 // Count the number of '<' and '>' in the input.
77 while ((p
= strpbrk(p
, "<>"))) {
82 // Knowing the number of '<' and '>', we can calculate the space
83 // needed for the output string.
84 newlen
= strlen(in
) + n
* 3 + 1;
85 out
= new char[newlen
];
87 // Copy the input to the output, with substitutions.
94 } else if (*p
== '>') {
108 applicationName
= nullptr;
119 firstLogEntry
= lastLogEntry
= 0;
124 numExternalSymbols
= 0;
125 lowestSymbolAddr
= 0;
126 highestSymbolAddr
= 0;
130 collect_last
= false;
137 void leaky::usageError() {
139 "Usage: %s [-v] [-t] [-e exclude] [-i include] [-s stackdepth] "
140 "[--last] [--all] [--start n [--end m]] [--cleo] [--output-dir dir] "
141 "prog log [log2 ...]\n",
142 (char*)applicationName
);
146 "\t-t | --threads: split threads\n"
147 "\t--only-thread n: only profile thread N\n"
148 "\t-i include-id: stack must include specified id\n"
149 "\t-e exclude-id: stack must NOT include specified id\n"
150 "\t-s stackdepth: Limit depth looked at from captured stack frames\n"
151 "\t--last: only profile the last capture section\n"
152 "\t--start n [--end m]: profile n to m (or end) capture sections\n"
153 "\t--cleo: format output for 'cleopatra' display\n"
154 "\t--output-dir dir: write output files to dir\n"
155 "\tIf there's one log, output goes to stdout unless --output-dir is set\n"
156 "\tIf there are more than one log, output files will be named with .html "
161 static struct option longopts
[] = {
162 {"threads", 0, nullptr, 't'}, {"only-thread", 1, nullptr, 'T'},
163 {"last", 0, nullptr, 'l'}, {"start", 1, nullptr, 'x'},
164 {"end", 1, nullptr, 'n'}, {"cleo", 0, nullptr, 'c'},
165 {"output-dir", 1, nullptr, 'd'}, {nullptr, 0, nullptr, 0},
168 void leaky::initialize(int argc
, char** argv
) {
169 applicationName
= argv
[0];
170 applicationName
= strrchr(applicationName
, '/');
171 if (!applicationName
) {
172 applicationName
= argv
[0];
182 output_dir
= nullptr;
185 // XXX tons of cruft here left over from tracemalloc
186 // XXX The -- options shouldn't need short versions, or they should be
188 while (((arg
= getopt_long(argc
, argv
, "adEe:gh:i:r:Rs:tT:qvx:ln:", longopts
,
189 &longindex
)) != -1)) {
193 fprintf(stderr
, "error: unknown option %c\n", optopt
);
198 case 'A': // not implemented
205 output_dir
= optarg
; // reference to an argv pointer
210 exclusions
.add(optarg
);
214 case 'r': // not implemented
216 if (!includes
.IsEmpty()) {
221 includes
.add(optarg
);
222 if (!roots
.IsEmpty()) {
229 stackDepth
= atoi(optarg
);
230 if (stackDepth
< 2) {
236 collect_start
= atoi(optarg
);
240 collect_end
= atoi(optarg
);
256 onlyThread
= atoi(optarg
);
260 if (errflg
|| ((argc
- optind
) < 2)) {
263 progFile
= argv
[optind
++];
264 logFileIndex
= optind
;
265 numLogFiles
= argc
- optind
;
266 if (!quiet
) fprintf(stderr
, "numlogfiles = %d\n", numLogFiles
);
269 static void* mapFile(int fd
, u_int flags
, off_t
* sz
) {
271 if (fstat(fd
, &sb
) < 0) {
275 void* base
= mmap(0, (int)sb
.st_size
, flags
, MAP_PRIVATE
, fd
, 0);
284 void leaky::LoadMap() {
285 malloc_map_entry mme
;
289 // all files use the same map
290 int fd
= ::open(M_MAPFILE
, O_RDONLY
);
292 perror("open: " M_MAPFILE
);
296 int nb
= read(fd
, &mme
, sizeof(mme
));
297 if (nb
!= sizeof(mme
)) break;
298 nb
= read(fd
, name
, mme
.nameLen
);
299 if (nb
!= (int)mme
.nameLen
) break;
300 name
[mme
.nameLen
] = 0;
302 fprintf(stderr
, "%s @ %lx\n", name
, mme
.address
);
305 LoadMapEntry
* lme
= new LoadMapEntry
;
306 lme
->address
= mme
.address
;
307 lme
->name
= strdup(name
);
315 void leaky::open(char* logFile
) {
316 int threadArray
[100]; // should auto-expand
317 int last_thread
= -1;
320 bool collecting
= false;
324 setupSymbols(progFile
);
326 // open up the log file
327 if (mappedLogFile
) ::close(mappedLogFile
);
329 mappedLogFile
= ::open(logFile
, O_RDONLY
);
330 if (mappedLogFile
< 0) {
335 firstLogEntry
= (malloc_log_entry
*)mapFile(mappedLogFile
, PROT_READ
, &size
);
336 lastLogEntry
= (malloc_log_entry
*)((char*)firstLogEntry
+ size
);
338 if (!collect_last
|| collect_start
< 0) {
342 // First, restrict it to the capture sections specified (all, last, start/end)
343 // This loop walks through all the call stacks we recorded
344 for (malloc_log_entry
* lep
= firstLogEntry
; lep
< lastLogEntry
;
345 lep
= reinterpret_cast<malloc_log_entry
*>(&lep
->pcs
[lep
->numpcs
])) {
346 if (lep
->flags
& JP_FIRST_AFTER_PAUSE
) {
353 if (collect_start
== section
) {
357 if (collect_end
== section
) {
362 fprintf(stderr
, "New section %d: first=%p, last=%p, collecting=%d\n",
363 section
, (void*)firstLogEntry
, (void*)lastLogEntry
, collecting
);
366 // Capture thread info at the same time
368 // Find all the threads captured
370 // pthread/linux docs say the signal can be delivered to any thread in
371 // the process. In practice, it appears in Linux that it's always
372 // delivered to the thread that called setitimer(), and each thread can
373 // have a separate itimer. There's a support library for gprof that
374 // overlays pthread_create() to set timers in any threads you spawn.
375 if (showThreads
&& collecting
) {
376 if (lep
->thread
!= last_thread
) {
378 for (i
= 0; i
< numThreads
; i
++) {
379 if (lep
->thread
== threadArray
[i
]) break;
381 if (i
== numThreads
&&
382 i
< (int)(sizeof(threadArray
) / sizeof(threadArray
[0]))) {
383 threadArray
[i
] = lep
->thread
;
385 if (!quiet
) fprintf(stderr
, "new thread %d\n", lep
->thread
);
392 "Done collecting: sections %d: first=%p, last=%p, numThreads=%d\n",
393 section
, (void*)firstLogEntry
, (void*)lastLogEntry
, numThreads
);
397 "<html><head><title>Jprof Profile Report</title></head><body>\n");
398 fprintf(outputfd
, "<h1><center>Jprof Profile Report</center></h1>\n");
402 fprintf(stderr
, "Num threads %d\n", numThreads
);
405 fprintf(outputfd
, "<hr>Threads:<p><pre>\n");
406 for (int i
= 0; i
< numThreads
; i
++) {
407 fprintf(outputfd
, " <a href=\"#thread_%d\">%d</a> ", threadArray
[i
],
409 if ((i
+ 1) % 10 == 0) fprintf(outputfd
, "<br>\n");
411 fprintf(outputfd
, "</pre>");
414 for (int i
= 0; i
< numThreads
; i
++) {
415 if (!onlyThread
|| onlyThread
== threadArray
[i
]) analyze(threadArray
[i
]);
421 if (!cleo
) fprintf(outputfd
, "</pre></body></html>\n");
424 //----------------------------------------------------------------------
426 static int symbolOrder(void const* a
, void const* b
) {
427 Symbol
const** ap
= (Symbol
const**)a
;
428 Symbol
const** bp
= (Symbol
const**)b
;
429 return (*ap
)->address
== (*bp
)->address
431 : ((*ap
)->address
> (*bp
)->address
? 1 : -1);
434 void leaky::ReadSharedLibrarySymbols() {
435 LoadMapEntry
* lme
= loadMap
;
436 while (nullptr != lme
) {
437 ReadSymbols(lme
->name
, lme
->address
);
442 void leaky::setupSymbols(const char* fileName
) {
443 if (usefulSymbols
== 0) {
446 // Read in symbols from the program
447 ReadSymbols(fileName
, 0);
449 // Read in symbols from the .so's
450 ReadSharedLibrarySymbols();
453 fprintf(stderr
, "A total of %d symbols were loaded\n", usefulSymbols
);
457 qsort(externalSymbols
, usefulSymbols
, sizeof(Symbol
*), symbolOrder
);
458 lowestSymbolAddr
= externalSymbols
[0]->address
;
459 highestSymbolAddr
= externalSymbols
[usefulSymbols
- 1]->address
;
463 // Binary search the table, looking for a symbol that covers this
465 int leaky::findSymbolIndex(u_long addr
) {
467 u_int limit
= usefulSymbols
- 1;
468 Symbol
** end
= &externalSymbols
[limit
];
469 while (base
<= limit
) {
470 u_int midPoint
= (base
+ limit
) >> 1;
471 Symbol
** sp
= &externalSymbols
[midPoint
];
472 if (addr
< (*sp
)->address
) {
476 limit
= midPoint
- 1;
479 if (addr
< (*(sp
+ 1))->address
) {
491 Symbol
* leaky::findSymbol(u_long addr
) {
492 int idx
= findSymbolIndex(addr
);
497 return externalSymbols
[idx
];
501 //----------------------------------------------------------------------
503 bool leaky::excluded(malloc_log_entry
* lep
) {
504 if (exclusions
.IsEmpty()) {
508 char** pcp
= &lep
->pcs
[0];
509 u_int n
= lep
->numpcs
;
510 for (u_int i
= 0; i
< n
; i
++, pcp
++) {
511 Symbol
* sp
= findSymbol((u_long
)*pcp
);
512 if (sp
&& exclusions
.contains(sp
->name
)) {
519 bool leaky::included(malloc_log_entry
* lep
) {
520 if (includes
.IsEmpty()) {
524 char** pcp
= &lep
->pcs
[0];
525 u_int n
= lep
->numpcs
;
526 for (u_int i
= 0; i
< n
; i
++, pcp
++) {
527 Symbol
* sp
= findSymbol((u_long
)*pcp
);
528 if (sp
&& includes
.contains(sp
->name
)) {
535 //----------------------------------------------------------------------
537 void leaky::displayStackTrace(FILE* out
, malloc_log_entry
* lep
) {
538 char** pcp
= &lep
->pcs
[0];
539 u_int n
= (lep
->numpcs
< stackDepth
) ? lep
->numpcs
: stackDepth
;
540 for (u_int i
= 0; i
< n
; i
++, pcp
++) {
541 u_long addr
= (u_long
)*pcp
;
542 Symbol
* sp
= findSymbol(addr
);
544 fputs(sp
->name
, out
);
546 fprintf(out
, "[%p]", (char*)addr
);
549 fprintf(out
, "<%p>", (char*)addr
);
556 void leaky::dumpEntryToLog(malloc_log_entry
* lep
) {
557 printf("%ld\t", lep
->delTime
);
559 displayStackTrace(outputfd
, lep
);
562 void leaky::generateReportHTML(FILE* fp
, int* countArray
, int count
,
564 fprintf(fp
, "<center>");
566 fprintf(fp
, "<hr><A NAME=thread_%d><b>Thread: %d</b></A><p>", thread
,
571 "<A href=#flat_%d>flat</A><b> | </b><A href=#hier_%d>hierarchical</A>",
573 fprintf(fp
, "</center><P><P><P>\n");
575 int totalTimerHits
= count
;
576 int* rankingTable
= new int[usefulSymbols
];
578 for (int cnt
= usefulSymbols
; --cnt
>= 0; rankingTable
[cnt
] = cnt
)
581 // Drat. I would use ::qsort() but I would need a global variable and my
582 // intro-pascal professor threatened to flunk anyone who used globals.
583 // She damaged me for life :-) (That was 1986. See how much influence
584 // she had. I don't remember her name but I always feel guilty about globals)
586 // Shell Sort. 581130733 is the max 31 bit value of h = 3h+1
588 for (mx
= usefulSymbols
/ 9, h
= 581130733; h
> 0; h
/= 3) {
590 for (i
= h
- 1; i
< usefulSymbols
; i
++) {
591 int j
, tmp
= rankingTable
[i
], val
= countArray
[tmp
];
592 for (j
= i
; (j
>= h
) && (countArray
[rankingTable
[j
- h
]] < val
);
594 rankingTable
[j
] = rankingTable
[j
- h
];
596 rankingTable
[j
] = tmp
;
601 // Ok, We are sorted now. Let's go through the table until we get to
602 // functions that were never called. Right now we don't do much inside
603 // this loop. Later we can get callers and callees into it like gprof
606 "<h2><A NAME=hier_%d></A><center><a "
607 "href=\"http://searchfox.org/mozilla-central/source/tools/jprof/"
608 "README.html#hier\">Hierarchical Profile</a></center></h2><hr>\n",
610 fprintf(fp
, "<pre>\n");
611 fprintf(fp
, "%6s %6s %4s %s\n", "index", "Count", "Hits",
614 for (i
= 0; i
< usefulSymbols
&& countArray
[rankingTable
[i
]] > 0; i
++) {
615 Symbol
** sp
= &externalSymbols
[rankingTable
[i
]];
617 (*sp
)->cntP
.printReport(fp
, this, rankingTable
[i
], totalTimerHits
);
619 char* symname
= htmlify((*sp
)->name
);
621 "%6d %6d (%3.1f%%)%s <a name=%d>%8d (%3.1f%%)</a>%s <b>%s</b>\n",
622 rankingTable
[i
], (*sp
)->timerHit
,
623 ((*sp
)->timerHit
* 1000 / totalTimerHits
) / 10.0,
624 ((*sp
)->timerHit
* 1000 / totalTimerHits
) / 10.0 >= 10.0 ? "" : " ",
625 rankingTable
[i
], countArray
[rankingTable
[i
]],
626 (countArray
[rankingTable
[i
]] * 1000 / totalTimerHits
) / 10.0,
627 (countArray
[rankingTable
[i
]] * 1000 / totalTimerHits
) / 10.0 >= 10.0
633 (*sp
)->cntC
.printReport(fp
, this, rankingTable
[i
], totalTimerHits
);
635 fprintf(fp
, "<hr>\n");
637 fprintf(fp
, "</pre>\n");
639 // OK, Now we want to print the flat profile. To do this we resort on
642 // Cut-N-Paste Shell sort from above. The Ranking Table has already been
643 // populated, so we do not have to reinitialize it.
644 for (mx
= usefulSymbols
/ 9, h
= 581130733; h
> 0; h
/= 3) {
646 for (i
= h
- 1; i
< usefulSymbols
; i
++) {
647 int j
, tmp
= rankingTable
[i
], val
= externalSymbols
[tmp
]->timerHit
;
649 (j
>= h
) && (externalSymbols
[rankingTable
[j
- h
]]->timerHit
< val
);
651 rankingTable
[j
] = rankingTable
[j
- h
];
653 rankingTable
[j
] = tmp
;
658 // Pre-count up total counter hits, to get a percentage.
659 // I wanted the total before walking the list, if this
660 // double-pass over externalSymbols gets slow we can
661 // do single-pass and print this out after the loop finishes.
664 i
< usefulSymbols
&& externalSymbols
[rankingTable
[i
]]->timerHit
> 0;
666 Symbol
** sp
= &externalSymbols
[rankingTable
[i
]];
667 totalTimerHits
+= (*sp
)->timerHit
;
669 if (totalTimerHits
== 0) totalTimerHits
= 1;
671 if (totalTimerHits
!= count
)
672 fprintf(stderr
, "Hit count mismatch: count=%d; totalTimerHits=%d", count
,
676 "<h2><A NAME=flat_%d></A><center><a "
677 "href=\"http://searchfox.org/mozilla-central/source/tools/jprof/"
678 "README.html#flat\">Flat Profile</a></center></h2><br>\n",
680 fprintf(fp
, "<pre>\n");
682 fprintf(fp
, "Total hit count: %d\n", totalTimerHits
);
683 fprintf(fp
, "Count %%Total Function Name\n");
684 // Now loop for as long as we have timer hits
686 i
< usefulSymbols
&& externalSymbols
[rankingTable
[i
]]->timerHit
> 0;
688 Symbol
** sp
= &externalSymbols
[rankingTable
[i
]];
690 char* symname
= htmlify((*sp
)->name
);
691 fprintf(fp
, "<a href=\"#%d\">%3d %-2.1f %s</a>\n", rankingTable
[i
],
693 ((float)(*sp
)->timerHit
/ (float)totalTimerHits
) * 100.0, symname
);
698 void leaky::analyze(int thread
) {
699 int* countArray
= new int[usefulSymbols
];
700 int* flagArray
= new int[usefulSymbols
];
702 // Zero our function call counter
703 memset(countArray
, 0, sizeof(countArray
[0]) * usefulSymbols
);
706 for (int i
= 0; i
< usefulSymbols
; i
++) {
707 externalSymbols
[i
]->timerHit
= 0;
708 externalSymbols
[i
]->regClear();
711 // The flag array is used to prevent counting symbols multiple times
712 // if functions are called recursively. In order to keep from having
713 // to zero it on each pass through the loop, we mark it with the value
714 // of stacks on each trip through the loop. This means we can determine
715 // if we have seen this symbol for this stack trace w/o having to reset
716 // from the prior stacktrace.
717 memset(flagArray
, -1, sizeof(flagArray
[0]) * usefulSymbols
);
719 if (cleo
) fprintf(outputfd
, "m-Start\n");
721 // This loop walks through all the call stacks we recorded
722 // --last, --start and --end can restrict it, as can excludes/includes
724 for (malloc_log_entry
* lep
= firstLogEntry
; lep
< lastLogEntry
;
725 lep
= reinterpret_cast<malloc_log_entry
*>(&lep
->pcs
[lep
->numpcs
])) {
726 if ((thread
!= 0 && lep
->thread
!= thread
) || excluded(lep
) ||
731 ++stacks
; // How many stack frames did we collect
733 u_int n
= (lep
->numpcs
< stackDepth
) ? lep
->numpcs
: stackDepth
;
734 char** pcp
= &lep
->pcs
[n
- 1];
735 int idx
= -1, parrentIdx
= -1; // Init idx incase n==0
737 // This loop walks through every symbol in the call stack. By walking it
738 // backwards we know who called the function when we get there.
740 for (int i
= n
- 1; i
>= 0; --i
, --pcp
) {
741 idx
= findSymbolIndex(reinterpret_cast<u_long
>(*pcp
));
744 // Skip over bogus __restore_rt frames that realtime profiling
746 if (i
> 0 && !strcmp(externalSymbols
[idx
]->name
, "__restore_rt")) {
749 idx
= findSymbolIndex(reinterpret_cast<u_long
>(*pcp
));
754 Symbol
** sp
= &externalSymbols
[idx
];
755 char* symname
= htmlify((*sp
)->name
);
756 fprintf(outputfd
, "%c-%s\n", type
, symname
);
759 // else can't find symbol - ignore
763 // This loop walks through every symbol in the call stack. By walking it
764 // backwards we know who called the function when we get there.
765 for (int i
= n
- 1; i
>= 0; --i
, --pcp
) {
766 idx
= findSymbolIndex(reinterpret_cast<u_long
>(*pcp
));
769 // Skip over bogus __restore_rt frames that realtime profiling
771 if (i
> 0 && !strcmp(externalSymbols
[idx
]->name
, "__restore_rt")) {
774 idx
= findSymbolIndex(reinterpret_cast<u_long
>(*pcp
));
780 // If we have not seen this symbol before count it and mark it as seen
781 if (flagArray
[idx
] != stacks
&& ((flagArray
[idx
] = stacks
) || true)) {
785 // We know who we are and we know who our parrent is. Count this
786 if (parrentIdx
>= 0) {
787 externalSymbols
[parrentIdx
]->regChild(idx
);
788 externalSymbols
[idx
]->regParrent(parrentIdx
);
790 // inside if() so an unknown in the middle of a stack won't break
796 // idx should be the function that we were in when we received the signal.
798 ++externalSymbols
[idx
]->timerHit
;
802 if (!cleo
) generateReportHTML(outputfd
, countArray
, stacks
, thread
);
805 void FunctionCount::printReport(FILE* fp
, leaky
* lk
, int parent
, int total
) {
807 " <A href=\"#%d\">%8d (%3.1f%%)%s %s</A>%s\n";
809 int nmax
, tmax
= ((~0U) >> 1);
813 for (int j
= getSize(); --j
>= 0;) {
814 int cnt
= getCount(j
);
816 int idx
= getIndex(j
);
817 char* symname
= htmlify(lk
->indexToName(idx
));
818 fprintf(fp
, fmt
, idx
, getCount(j
), getCount(j
) * 100.0 / total
,
819 getCount(j
) * 100.0 / total
>= 10.0 ? "" : " ", symname
,
820 parent
== idx
? " (self)" : "");
822 } else if (cnt
< tmax
&& cnt
> nmax
) {
826 } while ((tmax
= nmax
) > 0);