2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/tools/tc-print/tc-print.h"
27 #include "hphp/runtime/vm/repo.h"
28 #include "hphp/runtime/base/preg.h"
29 #include "hphp/runtime/base/program-functions.h"
31 #include "hphp/tools/tc-print/perf-events.h"
32 #include "hphp/tools/tc-print/offline-trans-data.h"
33 #include "hphp/tools/tc-print/offline-x86-code.h"
34 #include "hphp/tools/tc-print/mappers.h"
35 #include "hphp/tools/tc-print/repo-wrapper.h"
38 using namespace HPHP::jit
;
40 #define MAX_SYM_LEN 10240
42 std::string
dumpDir("/tmp");
43 std::string configFile
;
44 std::string profFileName
;
45 uint32_t nTopTrans
= 0;
46 uint32_t nTopFuncs
= 0;
47 bool creationOrder
= false;
48 bool transCFG
= false;
49 bool collectBCStats
= false;
50 bool inclusiveStats
= false;
51 bool verboseStats
= false;
52 folly::Optional
<MD5
> md5Filter
;
53 PerfEventType sortBy
= SPECIAL_PROF_COUNTERS
;
54 bool sortByDensity
= false;
55 double helpersMinPercentage
= 0;
56 ExtOpcode filterByOpcode
= 0;
57 std::string kindFilter
= "all";
58 uint32_t selectedFuncId
= INVALID_ID
;
60 TCA maxAddr
= (TCA
)-1;
61 uint32_t annotationsVerbosity
= 2;
63 std::vector
<uint32_t> transPrintOrder
;
66 OfflineTransData
* g_transData
;
67 OfflineX86Code
* transCode
;
69 char errMsgBuff
[MAX_SYM_LEN
];
70 const char* kListKeyword
= "list";
72 PerfEventsMap
<TCA
> tcaPerfEvents
;
73 PerfEventsMap
<TransID
> transPerfEvents
;
75 #define NTRANS (g_transData->getNumTrans())
76 #define NFUNCS (g_transData->getNumFuncs())
77 #define TREC(TID) (g_transData->getTransRec(TID))
79 void warnTooFew(const std::string
& name
,
83 "Requested top %u %s, but there are only %u available.\n",
90 std::string
toString(T value
) {
97 printf("Usage: tc-print [OPTIONS]\n"
99 " -c <FILE> : uses the given config file\n"
100 " -D : used along with -t, this option sorts the top "
101 "translations by density (count / size) of the selected perf event\n"
102 " -d <DIRECTORY> : looks for dump file in <DIRECTORY> "
104 " -f <FUNC_ID> : prints the translations for the given "
105 "<FUNC_ID>, sorted by start offset\n"
106 " -g <FUNC_ID> : prints the CFG among the translations for the "
108 " -p <FILE> : uses raw profile data from <FILE>\n"
109 " -s : prints all translations sorted by creation "
111 " -u <MD5> : prints all translations from the specified "
113 " -t <NUMBER> : prints top <NUMBER> translations according to "
115 " -k <TRANS_KIND> : used with -t, filters only translations of the "
116 "given kind, e.g. TransLive (default: all)\n"
117 " -a <ADDR> : used with -t, filters only events at addresses "
119 " -A <ADDR> : used with -t, filters only events at addresses "
121 " -T <NUMBER> : prints top <NUMBER> functions according to "
123 " -e <EVENT_TYPE> : sorts by the specified perf event. Pass '%s' "
124 "to get a list of valid event types.\n"
125 " -b : prints bytecode stats\n"
126 " -B <OPCODE> : used in conjunction with -e, prints the top "
127 "bytecode translationc event type. Pass '%s' to get a "
128 "list of valid opcodes.\n"
129 " -i : reports inclusive stats by including helpers "
130 "(perf data must include call graph information)\n"
131 " -n <level> : level of verbosity for annotations. Use 0 for "
132 "no annotations, 1 - for inline, 2 - to print all annotations "
133 "including from a file (default: 2).\n"
134 " -v <PERCENTAGE> : sets the minimum percentage to <PERCENTAGE> "
135 "when printing the top helpers (implies -i). The lower the percentage,"
136 " the more helpers that will show up.\n"
137 " -h : prints help message\n",
142 void printValidBytecodes() {
143 printf("<OPCODE>:\n");
144 auto validOpcodes
= getValidOpcodeNames();
145 for (size_t i
= 0; i
< validOpcodes
.size(); i
++) {
146 printf(" * %s\n", validOpcodes
[i
].first
.c_str());
150 void printValidEventTypes() {
151 printf("<EVENT_TYPE>:\n");
152 for (size_t i
= 0; i
< NUM_EVENT_TYPES
; i
++) {
153 printf(" * %s\n", eventTypeToCommandLineArgument((PerfEventType
)i
));
157 void parseOptions(int argc
, char *argv
[]) {
160 while ((c
= getopt (argc
, argv
, "hc:Dd:f:g:ip:st:u:T:o:e:bB:v:k:a:A:n:"))
164 if (sscanf(optarg
, "%p", &maxAddr
) != 1) {
170 if (sscanf(optarg
, "%p", &minAddr
) != 1) {
185 creationOrder
= true;
186 if (sscanf(optarg
, "%u", &selectedFuncId
) != 1) {
193 if (sscanf(optarg
, "%u", &selectedFuncId
) != 1) {
199 profFileName
= optarg
;
202 creationOrder
= true;
205 if (sscanf(optarg
, "%u", &nTopTrans
) != 1) {
211 if (strlen(optarg
) == 32) {
212 md5Filter
= MD5(optarg
);
219 if (sscanf(optarg
, "%u", &nTopFuncs
) != 1) {
228 sortByDensity
= true;
231 if (!strcmp(optarg
, kListKeyword
)) {
232 printValidEventTypes();
235 sortBy
= commandLineArgumentToEventType(optarg
);
236 if (sortBy
== NUM_EVENT_TYPES
) {
242 collectBCStats
= true;
245 if (!strcmp(optarg
, kListKeyword
)) {
246 printValidBytecodes();
249 filterByOpcode
= stringToExtOpcode(optarg
);
250 if (!filterByOpcode
) {
256 inclusiveStats
= true;
259 if (sscanf(optarg
, "%u", &annotationsVerbosity
) != 1) {
267 inclusiveStats
= true;
268 if (sscanf(optarg
, "%lf", &helpersMinPercentage
) != 1) {
274 if (optopt
== 'd' || optopt
== 'c' || optopt
== 'p' || optopt
== 't') {
275 fprintf (stderr
, "Error: -%c expects an argument\n\n", optopt
);
285 for (uint32_t tid
= 0; tid
< NTRANS
; tid
++) {
286 if (TREC(tid
)->isValid() &&
287 (selectedFuncId
== INVALID_ID
||
288 selectedFuncId
== TREC(tid
)->src
.funcID())) {
289 transPrintOrder
.push_back(tid
);
294 void loadPerfEvents() {
297 profFile
= fopen(profFileName
.c_str(), "rt");
300 error("Error opening file " + profFileName
);
303 char program
[MAX_SYM_LEN
];
304 char eventCaption
[MAX_SYM_LEN
];
305 char line
[2*MAX_SYM_LEN
];
307 uint32_t tcSamples
[NUM_EVENT_TYPES
];
308 uint32_t hhvmSamples
[NUM_EVENT_TYPES
];
309 size_t numEntries
= 0;
310 PerfEventType eventType
= NUM_EVENT_TYPES
;
311 // samplesPerKind[event][kind]
312 uint32_t samplesPerKind
[NUM_EVENT_TYPES
][NumTransKinds
];
313 uint32_t samplesPerTCRegion
[NUM_EVENT_TYPES
][TCRCount
];
315 memset(tcSamples
, 0, sizeof(tcSamples
));
316 memset(hhvmSamples
, 0, sizeof(hhvmSamples
));
317 memset(samplesPerKind
, 0, sizeof(samplesPerKind
));
318 memset(samplesPerTCRegion
, 0, sizeof(samplesPerTCRegion
));
320 while (fgets(line
, 2*MAX_SYM_LEN
, profFile
) != nullptr) {
321 always_assert(sscanf(line
, "%s %s %lu", program
, eventCaption
, &numEntries
)
323 always_assert(numEntries
);
325 std::vector
<std::pair
<TCA
,std::string
>> entries
;
327 for (size_t i
= 0; i
< numEntries
; i
++) {
328 fscanf(profFile
, "%p %s\n", &addr
, line
);
329 entries
.push_back(std::pair
<TCA
,std::string
>(addr
, line
));
332 if (strncmp(program
, "hhvm", 4) == 0) {
333 eventType
= perfScriptOutputToEventType(eventCaption
);
334 if (eventType
== NUM_EVENT_TYPES
) {
337 "loadProfData: invalid event caption '%s'",
342 hhvmSamples
[eventType
]++;
345 addr
= entries
[0].first
;
347 if (inclusiveStats
) {
348 for (size_t i
= 0; i
< entries
.size(); i
++) {
349 if (g_transData
->isAddrInSomeTrans(entries
[i
].first
)) {
350 addr
= entries
[i
].first
;
357 if (!(minAddr
<= addr
&& addr
<= maxAddr
)) continue;
358 if (!g_transData
->isAddrInSomeTrans(addr
)) continue;
359 TransID transId
= g_transData
->getTransContaining(addr
);
360 always_assert(transId
!= INVALID_ID
);
361 tcSamples
[eventType
]++;
363 const TransRec
* trec
= g_transData
->getTransRec(transId
);
364 TransKind kind
= trec
->kind
;
365 samplesPerKind
[eventType
][static_cast<uint32_t>(kind
)]++;
366 TCRegion region
= transCode
->findTCRegionContaining(addr
);
367 always_assert(region
!= TCRCount
);
368 samplesPerTCRegion
[eventType
][region
]++;
370 std::vector
<std::string
> stackTrace
;
372 for (size_t i
= 0; i
< selIdx
; i
++) {
374 if (!strcmp(entries
[i
].second
.c_str(), "[unknown]")) {
376 // Append the address to disambiguate.
377 entries
[i
].second
+= std::string("@")
378 + toString((void*)entries
[i
].first
);
381 stackTrace
.push_back(entries
[i
].second
);
383 reverse(stackTrace
.begin(), stackTrace
.end());
387 tcaPerfEvents
.addEvent(addr
, (PerfEvent
){eventType
, 1}, stackTrace
);
391 AddrToTransMapper
transMapper(g_transData
);
392 transPerfEvents
= tcaPerfEvents
.mapTo(transMapper
);
394 printf("# Number of hhvm samples read (%% in TC) from file %s\n",
395 profFileName
.c_str());
397 for (size_t i
= 0; i
< NUM_EVENT_TYPES
; i
++) {
398 if (!hhvmSamples
[i
]) continue;
400 printf("# %-19s TOTAL: %10u (%u in TC = %5.2lf%%)\n",
401 eventTypeToCommandLineArgument((PerfEventType
)i
),
404 100.0 * tcSamples
[i
] / hhvmSamples
[i
]);
406 for (size_t j
= 0; j
< NumTransKinds
; ++j
) {
407 auto ct
= samplesPerKind
[i
][j
];
409 std::string kind
= show(static_cast<TransKind
>(j
));
410 printf("# %26s: %-8u (%5.2lf%%)\n",
411 kind
.c_str(), ct
, 100.0 * ct
/ tcSamples
[i
]);
417 // print per-TCRegion information
420 printf("# TCRegion ");
421 for (size_t i
= 0; i
< NUM_EVENT_TYPES
; i
++) {
422 printf("%17s ", eventTypeToCommandLineArgument((PerfEventType
)i
));
426 // HW events for each region
427 for (size_t i
= 0 ; i
< TCRCount
; i
++) {
428 printf("# %8s ", tcRegionToString(static_cast<TCRegion
>(i
)).c_str());
429 for (size_t j
= 0; j
< NUM_EVENT_TYPES
; j
++) {
430 auto ct
= samplesPerTCRegion
[j
][i
];
431 printf("%8u (%5.2lf%%) ", ct
, ct
? (100.0 * ct
/ tcSamples
[j
]) : 0);
440 void loadProfData() {
441 if (!profFileName
.empty()) {
445 // The prof-counters are collected independently.
446 for (TransID tid
= 0; tid
< NTRANS
; tid
++) {
447 if (!TREC(tid
)->isValid()) continue;
449 PerfEvent profCounters
;
450 profCounters
.type
= SPECIAL_PROF_COUNTERS
;
451 profCounters
.count
= g_transData
->getTransCounter(tid
);
452 transPerfEvents
.addEvent(tid
, profCounters
);
456 // Prints the metadata, bytecode, and disassembly for the given translation
457 void printTrans(TransID transId
) {
458 always_assert(transId
< NTRANS
);
460 printf("\n====================\n");
461 g_transData
->printTransRec(transId
, transPerfEvents
);
463 const TransRec
* tRec
= TREC(transId
);
464 if (!tRec
->isValid()) return;
466 if (!tRec
->blocks
.empty()) {
467 printf("----------\nbytecode:\n----------\n");
468 const Func
* curFunc
= nullptr;
469 for (auto& block
: tRec
->blocks
) {
470 auto unit
= g_repo
->getUnit(block
.md5
);
472 std::cout
<< folly::format(
473 "<<< couldn't find unit {} to print bytecode range [{},{}) >>>\n",
474 block
.md5
, block
.bcStart
, block
.bcPast
);
478 auto newFunc
= unit
->getFunc(block
.bcStart
);
479 always_assert(newFunc
);
480 if (newFunc
!= curFunc
) {
482 newFunc
->prettyPrint(std::cout
, Func::PrintOpts().noFpi().noMetadata());
487 std::cout
, Unit::PrintOpts().range(block
.bcStart
, block
.bcPast
)
492 printf("----------\nx64: main\n----------\n");
493 transCode
->printDisasm(tRec
->aStart
, tRec
->aLen
,
494 tRec
->bcMapping
, tcaPerfEvents
);
496 printf("----------\nx64: cold\n----------\n");
497 // Sometimes acoldStart is the same as afrozenStart. Avoid printing the code
498 // twice in such cases.
499 if (tRec
->acoldStart
!= tRec
->afrozenStart
) {
500 transCode
->printDisasm(tRec
->acoldStart
, tRec
->acoldLen
,
501 tRec
->bcMapping
, tcaPerfEvents
);
504 printf("----------\nx64: frozen\n----------\n");
505 transCode
->printDisasm(tRec
->afrozenStart
, tRec
->afrozenLen
,
506 tRec
->bcMapping
, tcaPerfEvents
);
508 printf("----------\n");
512 void printCFGOutArcs(TransID transId
) {
513 std::vector
<TCA
> jmpTargets
;
515 TCA fallThru
= transCode
->getTransJmpTargets(
516 g_transData
->getTransRec(transId
), &jmpTargets
);
518 auto const srcFuncId
= TREC(transId
)->src
.funcID();
520 for (size_t i
= 0; i
< jmpTargets
.size(); i
++) {
521 TransID targetId
= g_transData
->getTransStartingAt(jmpTargets
[i
]);
522 if (targetId
!= INVALID_ID
&&
523 // filter jumps to prologues of other funcs
524 TREC(targetId
)->src
.funcID() == srcFuncId
&&
525 TREC(targetId
)->kind
!= TransKind::Anchor
) {
527 bool retrans
= (TREC(transId
)->src
.offset() ==
528 TREC(targetId
)->src
.offset());
531 color
= "darkorange";
532 } else if (jmpTargets
[i
] == fallThru
) {
537 printf("t%u -> t%u [color=%s] ;\n", transId
, targetId
, color
);
544 std::vector
<TransID
> inodes
;
546 printf("digraph CFG {\n");
548 uint64_t maxProfCount
= g_transData
->findFuncTrans(selectedFuncId
, &inodes
);
551 for (uint32_t i
= 0; i
< inodes
.size(); i
++) {
552 auto tid
= inodes
[i
];
553 uint64_t profCount
= g_transData
->getTransCounter(tid
);
554 uint32_t bcStart
= TREC(tid
)->src
.offset();
555 uint32_t bcStop
= TREC(tid
)->bcPast();
556 uint32_t coldness
= 255 - (255 * profCount
/ maxProfCount
);
557 const auto kind
= TREC(tid
)->kind
;
558 bool isPrologue
= kind
== TransKind::LivePrologue
||
559 kind
== TransKind::OptPrologue
;
560 const char* shape
= "box";
561 switch (TREC(tid
)->kind
) {
562 case TransKind::Optimize
: shape
= "oval"; break;
563 case TransKind::Profile
: shape
= "hexagon"; break;
564 case TransKind::LivePrologue
:
565 case TransKind::ProfPrologue
:
566 case TransKind::OptPrologue
: shape
= "invtrapezium"; break;
567 default: shape
= "box";
569 printf("t%u [shape=%s,label=\"T: %u\\np: %" PRIu64
"\\nbc: [0x%x-0x%x)\","
570 "style=filled,fillcolor=\"#ff%02x%02x\"%s];\n", tid
, shape
, tid
,
571 profCount
, bcStart
, bcStop
, coldness
, coldness
,
572 (isPrologue
? ",color=blue" : ""));
576 for (uint32_t i
= 0; i
< inodes
.size(); i
++) {
577 uint32_t tid
= inodes
[i
];
578 printCFGOutArcs(tid
);
584 void printTopFuncs() {
585 if (!nTopFuncs
) return;
586 TransToFuncMapper
funcMapper(g_transData
);
587 PerfEventsMap
<FuncId
> funcPerfEvents
= transPerfEvents
.mapTo(funcMapper
);
588 funcPerfEvents
.printEventsSummary(sortBy
,
592 helpersMinPercentage
);
597 const PerfEventsMap
<TransID
>& transPerfEvents
;
598 const PerfEventType etype
;
601 CompTrans(const PerfEventsMap
<TransID
>& _transPerfEvents
,
602 PerfEventType _etype
) :
603 transPerfEvents(_transPerfEvents
), etype(_etype
) {}
605 bool operator()(TransID t1
, TransID t2
) const {
606 const auto count1
= transPerfEvents
.getEventCount(t1
, etype
);
607 const auto count2
= transPerfEvents
.getEventCount(t2
, etype
);
609 const auto size1
= TREC(t1
)->aLen
;
610 const auto size2
= TREC(t2
)->aLen
;
611 return count1
* size2
> count2
* size1
;
613 return count1
> count2
;
617 void printTopTrans() {
618 if (!nTopTrans
) return;
620 // The summary currently includes all translations, so it's misleading
621 // if we're filtering a specific kind of translations or address range.
622 // It also doesn't sort by density, so do print it if sortByDensity is set.
623 if (kindFilter
== "all" && minAddr
== 0 && maxAddr
== (TCA
)-1 &&
625 transPerfEvents
.printEventsSummary(sortBy
,
629 helpersMinPercentage
);
632 // Sort and print the top translations.
633 std::vector
<TransID
> transIds
;
635 for (TransID t
= 0; t
< NTRANS
; t
++) {
636 if (TREC(t
)->isValid() &&
637 (kindFilter
== "all" || kindFilter
== show(TREC(t
)->kind
).c_str()) &&
638 ((minAddr
<= TREC(t
)->aStart
&& TREC(t
)->aStart
<= maxAddr
) ||
639 (minAddr
<= TREC(t
)->acoldStart
&& TREC(t
)->acoldStart
<= maxAddr
))) {
640 transIds
.push_back(t
);
644 CompTrans
compTrans(transPerfEvents
, sortBy
);
645 sort(transIds
.begin(), transIds
.end(), compTrans
);
647 size_t nPrint
= nTopTrans
;
648 if (transIds
.size() < nTopTrans
) {
649 fprintf(stderr
, "Warning: too few translations selected (%lu)\n",
651 nPrint
= transIds
.size();
653 for (size_t i
= 0; i
< nPrint
; i
++) printTrans(transIds
[i
]);
656 void printBytecodeStats(const OfflineTransData
* tdata
,
657 const PerfEventsMap
<TCA
>& events
,
658 PerfEventType etype
) {
660 if (!g_repo
) error("printBytecodeStats: null repo");
661 if (!tdata
) error("printBytecodeStats: null g_transData");
663 AddrToBcMapper
bcMapper(tdata
);
664 PerfEventsMap
<ExtOpcode
> bcPerfEvents
= events
.mapTo(bcMapper
);
666 std::map
<ExtOpcode
,std::string
> opcodeToName
;
667 PerfEventsMap
<ExtOpcode
>::const_iterator it
;
669 for (it
= bcPerfEvents
.begin(); it
!= bcPerfEvents
.end(); it
++) {
670 opcodeToName
[it
->first
] = extOpcodeToString(it
->first
);
673 bcPerfEvents
.printEventsSummary(etype
,
675 PerfEventsMap
<ExtOpcode
>::kAllEntries
,
677 helpersMinPercentage
,
681 void printTopBytecodes(const OfflineTransData
* tdata
,
682 OfflineX86Code
* x86code
,
683 const PerfEventsMap
<TCA
>& samples
,
685 ExtOpcode filterBy
) {
687 always_assert(etype
< NUM_EVENT_TYPES
);
689 AddrToTransFragmentMapper
mapper(tdata
, filterBy
);
690 PerfEventsMap
<TransFragment
> tfragPerfEvents
= samples
.mapTo(mapper
);
692 std::vector
<std::pair
<uint64_t, TransFragment
> > ranking
;
693 PerfEventsMap
<TransFragment
>::const_iterator it
;
695 for (it
= tfragPerfEvents
.begin(); it
!= tfragPerfEvents
.end(); it
++) {
696 ranking
.push_back(std::make_pair(it
->second
[etype
], it
->first
));
699 sort(ranking
.rbegin(), ranking
.rend());
701 for (size_t i
= 0; i
< ranking
.size(); i
++) {
702 const TransFragment
& tfrag
= ranking
[i
].second
;
703 const TransRec
* trec
= tdata
->getTransRec(tfrag
.tid
);
705 Unit
* unit
= g_repo
->getUnit(trec
->md5
);
708 printf("\n====================\n");
710 printf(" FuncID = %u\n", trec
->src
.funcID());
711 printf(" TransID = %u\n", tfrag
.tid
);
712 tfragPerfEvents
.printEventsHeader(tfrag
);
715 printf("----------\nx64: main\n----------\n");
716 x86code
->printDisasm(tfrag
.aStart
,
721 printf("----------\nx64: cold\n----------\n");
722 x86code
->printDisasm(tfrag
.acoldStart
,
727 printf("----------\nx64: frozen\n----------\n");
728 x86code
->printDisasm(tfrag
.afrozenStart
,
735 int main(int argc
, char *argv
[]) {
738 parseOptions(argc
, argv
);
740 g_transData
= new OfflineTransData(dumpDir
);
741 transCode
= new OfflineX86Code(dumpDir
,
742 g_transData
->getHotBase(),
743 g_transData
->getMainBase(),
744 g_transData
->getProfBase(),
745 g_transData
->getColdBase(),
746 g_transData
->getFrozenBase());
747 g_repo
= new RepoWrapper(g_transData
->getRepoSchema(), configFile
);
751 g_transData
->setAnnotationsVerbosity(annotationsVerbosity
);
754 if (nTopFuncs
> NFUNCS
) {
755 warnTooFew("functions", nTopFuncs
, NFUNCS
);
759 } else if (nTopTrans
) {
760 if (nTopTrans
> NTRANS
) {
761 warnTooFew("translations", nTopTrans
, NTRANS
);
765 } else if (transCFG
) {
767 } else if (creationOrder
) {
768 // Print translations (all or for a given funcId) in the order
769 // they were created.
771 for (uint32_t i
=0; i
< transPrintOrder
.size(); i
++) {
772 printTrans(transPrintOrder
[i
]);
774 } else if (collectBCStats
) {
775 printBytecodeStats(g_transData
, tcaPerfEvents
, sortBy
);
776 } else if (filterByOpcode
) {
777 printTopBytecodes(g_transData
,
783 // Print all translations in original order, filtered by unit if desired.
784 for (uint32_t t
= 0; t
< NTRANS
; t
++) {
786 if (!tRec
->isValid()) continue;
787 if (tRec
->kind
== TransKind::Anchor
) continue;
788 if (md5Filter
&& tRec
->md5
!= *md5Filter
) continue;