2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/mc-generator.h"
17 #include "hphp/runtime/vm/jit/vtune-jit.h"
19 #include "folly/MapUtil.h"
32 #include <unordered_set>
34 #include <sys/ucontext.h>
38 #define RIP_REGISTER(v) (v).mc_rip
39 #elif defined(__APPLE__)
40 #define RIP_REGISTER(v) (v)->__ss.__rip
41 #elif defined(__x86_64__)
42 #define RIP_REGISTER(v) (v).gregs[REG_RIP]
43 #elif defined(__AARCH64EL__)
44 #define RIP_REGISTER(v) (v).pc
46 #error How is rip accessed on this architecture?
49 #include <boost/bind.hpp>
50 #include <boost/utility/typed_in_place_factory.hpp>
51 #include <boost/range/adaptors.hpp>
52 #include <boost/scoped_ptr.hpp>
58 #include "folly/Format.h"
59 #include "folly/String.h"
61 #include "hphp/util/abi-cxx.h"
62 #include "hphp/util/asm-x64.h"
63 #include "hphp/util/bitops.h"
64 #include "hphp/util/debug.h"
65 #include "hphp/util/disasm.h"
66 #include "hphp/util/maphuge.h"
67 #include "hphp/util/rank.h"
68 #include "hphp/util/ringbuffer.h"
69 #include "hphp/util/timer.h"
70 #include "hphp/util/trace.h"
71 #include "hphp/util/meta.h"
72 #include "hphp/util/process.h"
73 #include "hphp/util/repo-schema.h"
74 #include "hphp/util/cycles.h"
76 #include "hphp/vixl/a64/decoder-a64.h"
77 #include "hphp/vixl/a64/disasm-a64.h"
78 #include "hphp/vixl/a64/macro-assembler-a64.h"
79 #include "hphp/vixl/a64/simulator-a64.h"
81 #include "hphp/runtime/vm/jit/abi-arm.h"
82 #include "hphp/runtime/vm/jit/arch.h"
83 #include "hphp/runtime/vm/jit/unique-stubs-arm.h"
84 #include "hphp/runtime/vm/jit/unique-stubs-x64.h"
85 #include "hphp/runtime/vm/bytecode.h"
86 #include "hphp/runtime/vm/php-debug.h"
87 #include "hphp/runtime/vm/runtime.h"
88 #include "hphp/runtime/base/complex-types.h"
89 #include "hphp/runtime/base/execution-context.h"
90 #include "hphp/runtime/base/runtime-option.h"
91 #include "hphp/runtime/base/runtime-option-guard.h"
92 #include "hphp/runtime/base/strings.h"
93 #include "hphp/runtime/server/source-root-info.h"
94 #include "hphp/runtime/base/zend-string.h"
95 #include "hphp/runtime/ext/ext_closure.h"
96 #include "hphp/runtime/ext/ext_continuation.h"
97 #include "hphp/runtime/ext/ext_function.h"
98 #include "hphp/runtime/vm/debug/debug.h"
99 #include "hphp/runtime/base/stats.h"
100 #include "hphp/runtime/vm/pendq.h"
101 #include "hphp/runtime/vm/srckey.h"
102 #include "hphp/runtime/vm/treadmill.h"
103 #include "hphp/runtime/vm/repo.h"
104 #include "hphp/runtime/vm/type-profile.h"
105 #include "hphp/runtime/vm/member-operations.h"
106 #include "hphp/runtime/vm/jit/abi-x64.h"
107 #include "hphp/runtime/vm/jit/check.h"
108 #include "hphp/runtime/vm/jit/hhbc-translator.h"
109 #include "hphp/runtime/vm/jit/ir-translator.h"
110 #include "hphp/runtime/vm/jit/normalized-instruction.h"
111 #include "hphp/runtime/vm/jit/opt.h"
112 #include "hphp/runtime/vm/jit/print.h"
113 #include "hphp/runtime/vm/jit/region-selection.h"
114 #include "hphp/runtime/vm/jit/srcdb.h"
115 #include "hphp/runtime/base/rds.h"
116 #include "hphp/runtime/vm/jit/tracelet.h"
117 #include "hphp/runtime/vm/jit/translator-inline.h"
118 #include "hphp/runtime/vm/jit/unwind-arm.h"
119 #include "hphp/runtime/vm/jit/unwind-x64.h"
120 #include "hphp/runtime/vm/jit/code-gen-helpers-arm.h"
121 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
122 #include "hphp/runtime/vm/jit/code-gen.h"
123 #include "hphp/runtime/vm/jit/service-requests-x64.h"
124 #include "hphp/runtime/vm/jit/jump-smash.h"
125 #include "hphp/runtime/vm/jit/func-prologues.h"
126 #include "hphp/runtime/vm/jit/func-prologues-x64.h"
127 #include "hphp/runtime/vm/jit/func-prologues-arm.h"
128 #include "hphp/runtime/vm/jit/debug-guards.h"
129 #include "hphp/runtime/vm/jit/timer.h"
130 #include "hphp/runtime/vm/unwind.h"
132 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
140 using namespace Trace
;
143 #define TRANS_PERF_COUNTERS \
153 static const char* const kInstrCountMCGName
= "instr_mcg";
154 static const char* const kInstrCountIRName
= "instr_hhir";
156 #define TPC(n) "jit_" #n,
157 static const char* const kPerfCounterNames
[] = {
164 #define TPC(n) tpc_ ## n,
165 enum TransPerfCounter
{
170 static __thread
int64_t s_perfCounters
[tpc_num_counters
];
171 #define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
173 // The global MCGenerator object.
176 // Register dirtiness: thread-private.
177 __thread VMRegState tl_regState
= VMRegState::CLEAN
;
179 CppCall
MCGenerator::getDtorCall(DataType type
) {
181 case BitwiseKindOfString
:
182 return CppCall(getMethodPtr(&StringData::release
));
184 return CppCall(getMethodPtr(&ArrayData::release
));
186 return CppCall(getMethodPtr(&ObjectData::release
));
188 return CppCall(getMethodPtr(&ResourceData::release
));
190 return CppCall(getMethodPtr(&RefData::release
));
197 bool MCGenerator::profileSrcKey(const SrcKey
& sk
) const {
198 if (!sk
.func()->shouldPGO()) return false;
200 if (m_tx
.profData()->optimized(sk
.getFuncId())) return false;
202 // If we've hit EvalJitProfileRequests, then don't emit profiling
203 // translations that would trigger an optimizing retranslation.
204 // This limits the duration of profiling. For
205 // non-retranslate-triggering SrcKeys, whose profiling translations
206 // only increment a counter, it's OK to emit them past the
207 // EvalJitProfileRequests threshold as long as we're already
208 // profiling this function (next check below) but haven't
209 // retranslated this function yet (checked above).
210 bool triggersRetrans
= sk
.func()->isEntry(sk
.offset());
211 if (triggersRetrans
&&
212 requestCount() > RuntimeOption::EvalJitProfileRequests
) {
216 // For translations that don't trigger a retranslation, only emit
217 // them if we've already generated a retranslation-triggering
218 // translation for its function.
219 if (!triggersRetrans
&&
220 !m_tx
.profData()->profiling(sk
.getFuncId())) {
227 bool MCGenerator::profilePrologue(const SrcKey
& sk
) const {
228 if (!sk
.func()->shouldPGO()) return false;
230 if (m_tx
.profData()->optimized(sk
.getFuncId())) return false;
232 // Proflogues don't trigger retranslation, so only emit them if
233 // we've already generated a retranslation-triggering translation
234 // for its function or if we're about to generate one (which
235 // requires depends on requestCount(), see profileSrcKey()).
236 return m_tx
.profData()->profiling(sk
.getFuncId()) ||
237 requestCount() <= RuntimeOption::EvalJitProfileRequests
;
241 * Invalidate the SrcDB entries for func's SrcKeys that have any
242 * Profile translation.
244 void MCGenerator::invalidateFuncProfSrcKeys(const Func
* func
) {
245 assert(RuntimeOption::EvalJitPGO
);
246 FuncId funcId
= func
->getFuncId();
247 for (auto tid
: m_tx
.profData()->funcProfTransIDs(funcId
)) {
248 invalidateSrcKey(m_tx
.profData()->transSrcKey(tid
));
252 TCA
MCGenerator::retranslate(const TranslArgs
& args
) {
253 SrcRec
* sr
= m_tx
.getSrcDB().find(args
.m_sk
);
255 bool locked
= sr
->tryLock();
257 if (locked
) sr
->freeLock();
259 if (isDebuggerAttachedProcess() && m_tx
.isSrcKeyInBL(args
.m_sk
)) {
260 // We are about to translate something known to be blacklisted by
261 // debugger, exit early
262 SKTRACE(1, args
.m_sk
, "retranslate abort due to debugger\n");
265 LeaseHolder
writer(Translator::WriteLease());
266 if (!writer
|| !shouldTranslate()) return nullptr;
268 // Even though we knew above that we were going to skip
269 // doing another translation, we wait until we get the
270 // write lease, to avoid spinning through the tracelet
271 // guards again and again while another thread is writing
273 return sr
->getTopTranslation();
275 SKTRACE(1, args
.m_sk
, "retranslate\n");
277 m_tx
.setMode(profileSrcKey(args
.m_sk
) ? TransProfile
: TransLive
);
278 SCOPE_EXIT
{ m_tx
.setMode(TransInvalid
); };
280 return translate(args
);
283 TCA
MCGenerator::retranslateOpt(TransID transId
, bool align
) {
284 LeaseHolder
writer(Translator::WriteLease());
285 if (!writer
|| !shouldTranslate()) return nullptr;
286 if (isDebuggerAttachedProcess()) return nullptr;
288 TRACE(1, "retranslateOpt: transId = %u\n", transId
);
290 SCOPE_EXIT
{ m_tx
.setMode(TransInvalid
); };
292 always_assert(m_tx
.profData()->transRegion(transId
) != nullptr);
294 Func
* func
= m_tx
.profData()->transFunc(transId
);
295 FuncId funcId
= func
->getFuncId();
296 const SrcKey
& sk
= m_tx
.profData()->transSrcKey(transId
);
298 if (m_tx
.profData()->optimized(funcId
)) return nullptr;
299 m_tx
.profData()->setOptimized(funcId
);
301 bool setFuncBody
= func
->getDVFunclets().size() == 0;
303 func
->setFuncBody(m_tx
.uniqueStubs
.funcBodyHelperThunk
);
305 // Invalidate SrcDB's entries for all func's SrcKeys.
306 invalidateFuncProfSrcKeys(func
);
308 // Regenerate the prologues and DV funclets before the actual function body.
309 TCA start
= regeneratePrologues(func
, sk
);
311 // Regionize func and translate all its regions.
312 std::vector
<RegionDescPtr
> regions
;
313 regionizeFunc(func
, this, regions
);
315 for (auto region
: regions
) {
316 m_tx
.setMode(TransOptimize
);
317 always_assert(region
->blocks
.size() > 0);
318 SrcKey regionSk
= region
->blocks
[0]->start();
319 auto translArgs
= TranslArgs(regionSk
, align
).region(region
);
320 if (setFuncBody
&& regionSk
.offset() == func
->base()) {
321 translArgs
.setFuncBody();
324 TCA regionStart
= translate(translArgs
);
325 if (start
== nullptr && regionSk
== sk
) {
335 * Find or create a translation for sk. Returns TCA of "best" current
336 * translation. May return NULL if it is currently impossible to create
340 MCGenerator::getTranslation(const TranslArgs
& args
) {
342 sk
.func()->validate();
344 "getTranslation: curUnit %s funcId %x offset %d\n",
345 sk
.unit()->filepath()->data(),
348 SKTRACE(2, sk
, " funcId: %x \n", sk
.func()->getFuncId());
350 if (Translator::liveFrameIsPseudoMain()) {
351 SKTRACE(2, sk
, "punting on pseudoMain\n");
354 if (const SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
355 TCA tca
= sr
->getTopTranslation();
357 SKTRACE(2, sk
, "getTranslation: found %p\n", tca
);
361 return createTranslation(args
);
365 MCGenerator::numTranslations(SrcKey sk
) const {
366 if (const SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
367 return sr
->translations().size();
372 static void populateLiveContext(RegionContext
& ctx
) {
373 typedef RegionDesc::Location L
;
375 const ActRec
* const fp
{g_context
->getFP()};
376 const TypedValue
* const sp
{g_context
->getStack().top()};
378 for (uint32_t i
= 0; i
< fp
->m_func
->numLocals(); ++i
) {
379 ctx
.liveTypes
.push_back(
380 { L::Local
{i
}, liveTVType(frame_local(fp
, i
)) }
384 uint32_t stackOff
= 0;
386 fp
, sp
, ctx
.bcOffset
,
387 [&](const ActRec
* ar
) {
388 // TODO(#2466980): when it's a Cls, we should pass the Class* in
390 auto const objOrCls
=
391 ar
->hasThis() ? Type::Obj
.specialize(ar
->getThis()->getVMClass()) :
392 ar
->hasClass() ? Type::Cls
395 ctx
.preLiveARs
.push_back(
401 FTRACE(2, "added prelive ActRec {}\n", show(ctx
.preLiveARs
.back()));
403 stackOff
+= kNumActRecCells
;
405 [&](const TypedValue
* tv
) {
406 ctx
.liveTypes
.push_back(
407 { L::Stack
{stackOff
, ctx
.spOffset
- stackOff
}, liveTVType(tv
) }
410 FTRACE(2, "added live type {}\n", show(ctx
.liveTypes
.back()));
416 MCGenerator::createTranslation(const TranslArgs
& args
) {
417 if (!shouldTranslate()) return nullptr;
420 * Try to become the writer. We delay this until we *know* we will have
421 * a need to create new translations, instead of just trying to win the
422 * lottery at the dawn of time. Hopefully lots of requests won't require
423 * any new translation.
426 LeaseHolder
writer(Translator::WriteLease());
427 if (!writer
) return nullptr;
429 if (SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
430 TCA tca
= sr
->getTopTranslation();
432 // Handle extremely unlikely race; someone may have just already
433 // added the first instance of this SrcRec while we did a
434 // non-blocking wait on the write lease.
437 // Since we are holding the write lease, we know that sk is properly
438 // initialized, except that it has no translations (due to
439 // replaceOldTranslations)
440 return retranslate(args
);
444 // We put retranslate requests at the end of our slab to more frequently
445 // allow conditional jump fall-throughs
446 TCA astart
= code
.main().frontier();
447 TCA stubstart
= code
.stubs().frontier();
448 TCA req
= emitServiceReq(code
.stubs(), REQ_RETRANSLATE
, sk
.offset());
449 SKTRACE(1, sk
, "inserting anchor translation for (%p,%d) at %p\n",
450 sk
.unit(), sk
.offset(), req
);
451 SrcRec
* sr
= m_tx
.getSrcRec(sk
);
452 sr
->setFuncInfo(sk
.func());
453 sr
->setAnchorTranslation(req
);
455 size_t asize
= code
.main().frontier() - astart
;
456 size_t stubsize
= code
.stubs().frontier() - stubstart
;
458 if (stubsize
&& RuntimeOption::EvalDumpTCAnchors
) {
459 TransRec
tr(sk
, sk
.unit()->md5(), TransAnchor
,
460 astart
, asize
, stubstart
, stubsize
);
461 m_tx
.addTranslation(tr
);
462 if (RuntimeOption::EvalJitUseVtuneAPI
) {
463 reportTraceletToVtune(sk
.unit(), sk
.func(), tr
);
466 if (m_tx
.profData()) {
467 m_tx
.profData()->addTransNonProf(TransAnchor
, sk
);
469 assert(!m_tx
.isTransDBEnabled() ||
470 m_tx
.getTransRec(stubstart
)->kind
== TransAnchor
);
473 return retranslate(args
);
477 MCGenerator::lookupTranslation(SrcKey sk
) const {
478 if (SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
479 return sr
->getTopTranslation();
485 MCGenerator::translate(const TranslArgs
& args
) {
488 assert(((uintptr_t)vmsp() & (sizeof(Cell
) - 1)) == 0);
489 assert(((uintptr_t)vmfp() & (sizeof(Cell
) - 1)) == 0);
490 assert(m_tx
.mode() != TransInvalid
);
491 SCOPE_EXIT
{ m_tx
.setMode(TransInvalid
); };
493 if (!args
.m_interp
) {
494 if (m_numHHIRTrans
== RuntimeOption::EvalJitGlobalTranslationLimit
) {
495 RuntimeOption::EvalJit
= false;
496 ThreadInfo::s_threadInfo
->m_reqInjectionData
.updateJit();
501 Func
* func
= const_cast<Func
*>(args
.m_sk
.func());
502 CodeCache::Selector
asmSel(CodeCache::Selector::Args(code
)
503 .profile(m_tx
.mode() == TransProfile
)
504 .hot(func
->attrs() & AttrHot
));
507 X64::moveToAlign(code
.main(), kNonFallthroughAlign
);
510 TCA start
= code
.main().frontier();
512 if (RuntimeOption::EvalJitDryRuns
&&
513 (m_tx
.mode() == TransLive
|| m_tx
.mode() == TransProfile
)) {
514 auto const useRegion
=
515 RuntimeOption::EvalJitRegionSelector
== "tracelet";
516 always_assert(useRegion
||
517 RuntimeOption::EvalJitRegionSelector
== "");
521 dryArgs
.dryRun(!useRegion
);
523 // First, run translateWork with the tracelet region selector. If
524 // useRegion == false, the generated code will be thrown away at the end.
525 OPTION_GUARD(EvalJitRegionSelector
, "tracelet");
526 OPTION_GUARD(EvalHHIRRelaxGuards
, true);
527 OPTION_GUARD(EvalHHBCRelaxGuards
, false);
528 translateWork(dryArgs
);
531 dryArgs
.dryRun(useRegion
);
533 // Now translate with analyze(), throwing away the generated code if
534 // useRegion == true.
535 OPTION_GUARD(EvalJitRegionSelector
, "");
536 OPTION_GUARD(EvalHHIRRelaxGuards
, false);
537 OPTION_GUARD(EvalHHBCRelaxGuards
, true);
538 translateWork(dryArgs
);
544 if (args
.m_setFuncBody
) {
545 func
->setFuncBody(start
);
547 SKTRACE(1, args
.m_sk
, "translate moved head from %p to %p\n",
548 getTopTranslation(args
.m_sk
), start
);
553 MCGenerator::getCallArrayPrologue(Func
* func
) {
554 TCA tca
= func
->getFuncBody();
555 if (tca
!= m_tx
.uniqueStubs
.funcBodyHelperThunk
) return tca
;
557 DVFuncletsVec dvs
= func
->getDVFunclets();
560 LeaseHolder
writer(Translator::WriteLease());
561 if (!writer
) return nullptr;
562 tca
= func
->getFuncBody();
563 if (tca
!= m_tx
.uniqueStubs
.funcBodyHelperThunk
) return tca
;
566 tca
= X64::emitCallArrayPrologue(func
, dvs
);
569 tca
= ARM::emitCallArrayPrologue(func
, dvs
);
572 func
->setFuncBody(tca
);
574 SrcKey
sk(func
, func
->base(), false);
575 tca
= mcg
->getTranslation(TranslArgs(sk
, false).setFuncBody());
582 MCGenerator::smashPrologueGuards(TCA
* prologues
, int numPrologues
,
584 DEBUG_ONLY
std::unique_ptr
<LeaseHolder
> writer
;
585 for (int i
= 0; i
< numPrologues
; i
++) {
586 if (prologues
[i
] != m_tx
.uniqueStubs
.fcallHelperThunk
587 && funcPrologueHasGuard(prologues
[i
], func
)) {
590 * Unit's are sometimes created racily, in which case all
591 * but the first are destroyed immediately. In that case,
592 * the Funcs of the destroyed Units never need their
593 * prologues smashing, and it would be a lock rank violation
594 * to take the write lease here.
595 * In all other cases, Funcs are destroyed via a delayed path
596 * (treadmill) and the rank violation isn't an issue.
598 * Also note that we only need the write lease because we
599 * mprotect the translation cache in debug builds.
602 writer
.reset(new LeaseHolder(Translator::WriteLease(),
603 LeaseAcquire::BLOCKING
));
608 X64::funcPrologueSmashGuard(prologues
[i
], func
);
611 ARM::funcPrologueSmashGuard(prologues
[i
], func
);
621 * Given a callee and a number of args, match up to the callee's
622 * argument expectations and dispatch.
624 * Call/return hand-shaking is a bit funny initially. At translation time,
625 * we don't necessarily know what function we're calling. For instance,
629 * Will lead to a set of basic blocks like:
636 * The fcallc labelled "b2" above is not statically bindable in our
639 * We decouple the call work into a per-callsite portion, responsible
640 * for recording the return address, and a per-(callee, numArgs) portion,
641 * responsible for fixing up arguments and dispatching to remaining
642 * code. We call the per-callee portion a "prologue."
644 * Also, we are called from two distinct environments. From REQ_BIND_CALL,
645 * we're running "between" basic blocks, with all VM registers sync'ed.
646 * However, we're also called in the middle of basic blocks, when dropping
647 * entries into func->m_prologues. So don't go around using the
648 * translation-time values of vmfp()/vmsp(), since they have an
649 * unpredictable relationship to the source.
652 MCGenerator::checkCachedPrologue(const Func
* func
, int paramIdx
,
653 TCA
& prologue
) const {
654 prologue
= (TCA
)func
->getPrologue(paramIdx
);
655 if (prologue
!= m_tx
.uniqueStubs
.fcallHelperThunk
) {
656 TRACE(1, "cached prologue %s(%d) -> cached %p\n",
657 func
->fullName()->data(), paramIdx
, prologue
);
658 assert(isValidCodeAddress(prologue
));
664 static void interp_set_regs(ActRec
* ar
, Cell
* sp
, Offset pcOff
) {
665 assert(tl_regState
== VMRegState::DIRTY
);
666 tl_regState
= VMRegState::CLEAN
;
669 vmpc() = ar
->unit()->at(pcOff
);
673 MCGenerator::getFuncPrologue(Func
* func
, int nPassed
, ActRec
* ar
) {
675 TRACE(1, "funcPrologue %s(%d)\n", func
->fullName()->data(), nPassed
);
676 int const numParams
= func
->numNonVariadicParams();
677 int paramIndex
= nPassed
<= numParams
? nPassed
: numParams
+ 1;
679 bool const funcIsMagic
= func
->isMagic();
681 // Do a quick test before grabbing the write lease
683 if (checkCachedPrologue(func
, paramIndex
, prologue
)) return prologue
;
685 Offset entry
= func
->getEntryForNumArgs(nPassed
);
686 SrcKey
funcBody(func
, entry
, false);
688 if (func
->isClonedClosure()) {
690 interp_set_regs(ar
, (Cell
*)ar
- func
->numSlotsInFrame(), entry
);
691 TCA tca
= getTranslation(TranslArgs(funcBody
, false));
692 tl_regState
= VMRegState::DIRTY
;
695 func
->setPrologue(paramIndex
, tca
);
700 LeaseHolder
writer(Translator::WriteLease());
701 if (!writer
|| !shouldTranslate()) return nullptr;
703 // Double check the prologue array now that we have the write lease
704 // in case another thread snuck in and set the prologue already.
705 if (checkCachedPrologue(func
, paramIndex
, prologue
)) return prologue
;
707 // We're comming from a BIND_CALL service request, so enable
708 // profiling if we haven't optimized the function entry yet.
709 assert(m_tx
.mode() == TransInvalid
|| m_tx
.mode() == TransPrologue
);
710 if (m_tx
.mode() == TransInvalid
&& profilePrologue(funcBody
)) {
711 m_tx
.setMode(TransProflogue
);
713 m_tx
.setMode(TransPrologue
);
715 SCOPE_EXIT
{ m_tx
.setMode(TransInvalid
); };
717 CodeCache::Selector
asmSel(CodeCache::Selector::Args(code
)
718 .profile(m_tx
.mode() == TransProflogue
)
719 .hot(func
->attrs() & AttrHot
));
721 // If we're close to a cache line boundary, just burn some space to
722 // try to keep the func and its body on fewer total lines.
723 if (((uintptr_t)code
.main().frontier() & kX64CacheLineMask
) >= 32) {
724 X64::moveToAlign(code
.main(), kX64CacheLineSize
);
727 // Careful: this isn't necessarily the real entry point. For funcIsMagic
728 // prologues, this is just a possible prologue.
729 TCA aStart
= code
.main().frontier();
731 TCA stubStart
= code
.stubs().frontier();
733 auto const skFuncBody
= [&] {
737 ? X64::emitMagicFuncPrologue(func
, nPassed
, start
)
738 : X64::emitFuncPrologue(func
, nPassed
, start
);
740 return ARM::emitFuncPrologue(
741 code
.main(), code
.stubs(), func
, funcIsMagic
, nPassed
, start
, aStart
747 assert(funcPrologueHasGuard(start
, func
));
748 TRACE(2, "funcPrologue mcg %p %s(%d) setting prologue %p\n",
749 this, func
->fullName()->data(), nPassed
, start
);
750 assert(isValidCodeAddress(start
));
751 func
->setPrologue(paramIndex
, start
);
753 assert(m_tx
.mode() == TransPrologue
|| m_tx
.mode() == TransProflogue
);
754 TransRec
tr(skFuncBody
, func
->unit()->md5(),
755 m_tx
.mode(), aStart
, code
.main().frontier() - aStart
,
756 stubStart
, code
.stubs().frontier() - stubStart
);
757 m_tx
.addTranslation(tr
);
758 if (RuntimeOption::EvalJitUseVtuneAPI
) {
759 reportTraceletToVtune(func
->unit(), func
, tr
);
762 if (m_tx
.profData()) {
763 m_tx
.profData()->addTransPrologue(m_tx
.mode(), skFuncBody
, paramIndex
);
766 recordGdbTranslation(skFuncBody
, func
,
769 recordBCInstr(OpFuncPrologue
, code
.main(), start
);
775 * Given the proflogueTransId for a TransProflogue translation,
776 * regenerate the prologue (as a TransPrologue). Returns the starting
777 * address for the translation corresponding to triggerSk, if such
778 * translation is generated; otherwise returns nullptr.
780 TCA
MCGenerator::regeneratePrologue(TransID prologueTransId
,
782 Func
* func
= m_tx
.profData()->transFunc(prologueTransId
);
783 int nArgs
= m_tx
.profData()->prologueArgs(prologueTransId
);
785 // Regenerate the prologue.
786 func
->resetPrologue(nArgs
);
787 m_tx
.setMode(TransPrologue
);
788 SCOPE_EXIT
{ m_tx
.setMode(TransInvalid
); };
789 TCA start
= getFuncPrologue(func
, nArgs
);
790 func
->setPrologue(nArgs
, start
);
792 // Smash callers of the old prologue with the address of the new one.
793 PrologueCallersRec
* pcr
=
794 m_tx
.profData()->prologueCallers(prologueTransId
);
795 for (TCA toSmash
: pcr
->mainCallers()) {
796 smashCall(toSmash
, start
);
798 // If the prologue has a guard, then smash its guard-callers as well.
799 if (funcPrologueHasGuard(start
, func
)) {
800 TCA guard
= funcPrologueToGuard(start
, func
);
801 for (TCA toSmash
: pcr
->guardCallers()) {
802 smashCall(toSmash
, guard
);
805 pcr
->clearAllCallers();
807 // If this prologue has a DV funclet, then generate a translation
808 // for the DV funclet right after the prologue. However, skip
809 // cloned closures because their prologues are actually the DV
811 TCA triggerSkStart
= nullptr;
812 if (nArgs
< func
->numNonVariadicParams() && !func
->isClonedClosure()) {
813 auto paramInfo
= func
->params()[nArgs
];
814 if (paramInfo
.hasDefaultValue()) {
815 m_tx
.setMode(TransOptimize
);
816 SrcKey
funcletSK(func
, paramInfo
.funcletOff(), false);
817 TransID funcletTransId
= m_tx
.profData()->dvFuncletTransId(func
, nArgs
);
818 if (funcletTransId
!= InvalidID
) {
819 invalidateSrcKey(funcletSK
);
820 TCA dvStart
= translate(TranslArgs(funcletSK
, false).
821 transId(funcletTransId
));
822 if (dvStart
&& !triggerSkStart
&& funcletSK
== triggerSk
) {
823 triggerSkStart
= dvStart
;
825 // Flag that this translation has been retranslated, so that
826 // it's not retranslated again along with the function body.
827 m_tx
.profData()->setOptimized(funcletSK
);
832 return triggerSkStart
;
836 * Regenerate all prologues of func that were previously generated.
837 * The prologues are sorted in ascending order of profile counters.
838 * For prologues with corresponding DV funclets, their corresponding
839 * DV funclet will be regenerated right after them. The idea is to
840 * generate the function body right after calling this function, so
841 * that all prologues are placed right before it, and with the hottest
842 * prologues closer to it.
844 * Returns the starting address for the translation corresponding to
845 * triggerSk, if such translation is generated; otherwise returns
848 TCA
MCGenerator::regeneratePrologues(Func
* func
, SrcKey triggerSk
) {
849 TCA triggerStart
= nullptr;
850 std::vector
<TransID
> prologTransIDs
;
852 auto const limit
= func
->numNonVariadicParams() + 1;
853 for (int nArgs
= 0; nArgs
<= limit
; nArgs
++) {
854 TransID tid
= m_tx
.profData()->prologueTransId(func
, nArgs
);
855 if (tid
!= InvalidID
) {
856 prologTransIDs
.push_back(tid
);
860 std::sort(prologTransIDs
.begin(), prologTransIDs
.end(),
861 [&](TransID t1
, TransID t2
) -> bool {
862 // This will sort in ascending order. Note that transCounters start
863 // at JitPGOThreshold and count down.
864 return m_tx
.profData()->transCounter(t1
) >
865 m_tx
.profData()->transCounter(t2
);
868 for (TransID tid
: prologTransIDs
) {
869 TCA start
= regeneratePrologue(tid
, triggerSk
);
870 if (triggerStart
== nullptr && start
!= nullptr) {
871 triggerStart
= start
;
881 * Runtime service handler that patches a jmp to the translation of
882 * u:dest from toSmash.
885 MCGenerator::bindJmp(TCA toSmash
, SrcKey destSk
,
886 ServiceRequest req
, bool& smashed
) {
887 TCA tDest
= getTranslation(TranslArgs(destSk
, false));
888 if (!tDest
) return nullptr;
889 LeaseHolder
writer(Translator::WriteLease());
890 if (!writer
) return tDest
;
891 SrcRec
* sr
= m_tx
.getSrcRec(destSk
);
892 // The top translation may have changed while we waited for the
893 // write lease, so read it again. If it was replaced with a new
894 // translation, then bind to the new one. If it was invalidated,
895 // then don't bind the jump.
896 tDest
= sr
->getTopTranslation();
897 if (tDest
== nullptr) return nullptr;
899 if (req
== REQ_BIND_ADDR
) {
900 auto addr
= reinterpret_cast<TCA
*>(toSmash
);
901 if (*addr
== tDest
) {
905 sr
->chainFrom(IncomingBranch::addr(addr
));
906 } else if (req
== REQ_BIND_JCC
|| req
== REQ_BIND_SIDE_EXIT
) {
907 auto jt
= jccTarget(toSmash
);
913 sr
->chainFrom(IncomingBranch::jccFrom(toSmash
));
915 assert(!jccTarget(toSmash
));
916 if (!jmpTarget(toSmash
) || jmpTarget(toSmash
) == tDest
) {
920 sr
->chainFrom(IncomingBranch::jmpFrom(toSmash
));
927 * When we end a tracelet with a conditional jump, emitCondJmp first emits:
929 * 1: j<CC> stubJmpccFirst
932 * Our "taken" argument tells us whether the branch at 1: was taken or
933 * not; and therefore which of offTaken and offNotTaken to continue executing.
934 * If we did take the branch, we now rewrite the code so that the branch is
935 * straightened. This predicts that subsequent executions will go the same way
936 * as the first execution.
938 * jn<CC> stubJmpccSecond:offNotTaken
939 * nop5 ; fallthru, or jmp if there's already a translation.
942 * If we did not take the branch, we leave the sense of the condition
943 * intact, while patching it up to go to the unexplored code:
945 * j<CC> stubJmpccSecond:offTaken
950 MCGenerator::bindJmpccFirst(TCA toSmash
,
951 Offset offTaken
, Offset offNotTaken
,
955 const Func
* f
= liveFunc();
956 LeaseHolder
writer(Translator::WriteLease());
957 if (!writer
) return nullptr;
958 Offset offWillExplore
= taken
? offTaken
: offNotTaken
;
959 Offset offWillDefer
= taken
? offNotTaken
: offTaken
;
960 SrcKey
dest(f
, offWillExplore
, liveResumed());
961 TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
963 offWillExplore
, offWillDefer
, cc
, taken
);
965 // We want the branch to point to whichever side has not been explored
971 auto& cb
= code
.blockFor(toSmash
);
973 // Its not clear where chainFrom should go to if as is astubs
974 assert(&cb
!= &code
.stubs());
978 // can we just directly fall through?
979 // a jmp + jz takes 5 + 6 = 11 bytes
980 bool fallThru
= toSmash
+ kJmpccLen
+ kJmpLen
== cb
.frontier() &&
981 !m_tx
.getSrcDB().find(dest
);
984 tDest
= getTranslation(TranslArgs(dest
, !fallThru
));
989 if (jmpTarget(toSmash
+ kJmpccLen
) != jccTarget(toSmash
)) {
990 // someone else already smashed this one. Ideally we would
991 // just re-execute from toSmash - except the flags will have
996 TCA stub
= emitEphemeralServiceReq(code
.stubs(), getFreeStub(),
997 REQ_BIND_JMPCC_SECOND
, toSmash
,
1001 assert(Translator::WriteLease().amOwner());
1003 * Roll over the jcc and the jmp/fallthru. E.g., from:
1005 * toSmash: jcc <jmpccFirstStub>
1006 * toSmash+6: jmp <jmpccFirstStub>
1007 * toSmash+11: <probably the new translation == tdest>
1011 * toSmash: j[n]z <jmpccSecondStub>
1013 * toSmash+11: newHotness
1015 CodeCursor
cg(cb
, toSmash
);
1017 m_tx
.getSrcRec(dest
)->chainFrom(IncomingBranch::jmpFrom(cb
.frontier()));
1018 TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc
, taken
);
1022 // smashes a jcc to point to a new destination
1024 MCGenerator::bindJmpccSecond(TCA toSmash
, const Offset off
,
1025 ConditionCode cc
, bool& smashed
) {
1026 const Func
* f
= liveFunc();
1027 SrcKey
dest(f
, off
, liveResumed());
1028 TCA branch
= getTranslation(TranslArgs(dest
, true));
1030 LeaseHolder
writer(Translator::WriteLease());
1032 if (branch
== jccTarget(toSmash
)) {
1037 SrcRec
* destRec
= m_tx
.getSrcRec(dest
);
1038 destRec
->chainFrom(IncomingBranch::jccFrom(toSmash
));
1045 void MCGenerator::emitResolvedDeps(const ChangeMap
& resolvedDeps
) {
1046 for (const auto dep
: resolvedDeps
) {
1047 m_tx
.irTrans()->assertType(dep
.first
, dep
.second
->rtt
);
1052 MCGenerator::checkRefs(SrcKey sk
,
1053 const RefDeps
& refDeps
,
1055 if (refDeps
.size() == 0) {
1059 // Set up guards for each pushed ActRec that we've made reffiness
1060 // assumptions about
1061 for (RefDeps::ArMap::const_iterator it
= refDeps
.m_arMap
.begin();
1062 it
!= refDeps
.m_arMap
.end(); ++it
) {
1063 // Be careful! The actual Func might have fewer refs than the number
1064 // of args we're passing. To forestall this, we always prepare at
1065 // least 64 bits in the Func, and always fill out the refBitVec
1066 // to a multiple of 64 bits
1068 int entryArDelta
= it
->first
;
1070 m_tx
.irTrans()->hhbcTrans().guardRefs(entryArDelta
,
1077 class FreeRequestStubTrigger
{
1080 explicit FreeRequestStubTrigger(TCA stub
) : m_stub(stub
) {
1081 TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub
);
1084 TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub
);
1085 if (mcg
->freeRequestStub(m_stub
) != true) {
1086 // If we can't free the stub, enqueue again to retry.
1087 TRACE(3, "FreeStubTrigger: write lease failed, requeueing %p\n", m_stub
);
1088 Treadmill::enqueue(FreeRequestStubTrigger(m_stub
));
1097 static __thread
int m_depth
;
1098 DepthGuard() { m_depth
++; TRACE(2, "DepthGuard: %d {\n", m_depth
); }
1099 ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth
); m_depth
--; }
1101 bool depthOne() const { return m_depth
== 1; }
1103 __thread
int DepthGuard::m_depth
;
1107 struct DepthGuard
{ bool depthOne() const { return false; } };
1112 * enterTCHelper does not save callee-saved registers except %rbp. This means
1113 * when we call it from C++, we have to tell gcc to clobber all the other
1114 * callee-saved registers.
1116 #if defined(__x86_64__)
1117 # define CALLEE_SAVED_BARRIER() \
1118 asm volatile("" : : : "rbx", "r12", "r13", "r14", "r15")
1119 #elif defined(__AARCH64EL__)
1120 # define CALLEE_SAVED_BARRIER() \
1121 asm volatile("" : : : "x19", "x20", "x21", "x22", "x23", "x24", "x25", \
1122 "x26", "x27", "x28")
1124 # error What are the callee-saved registers on your system?
1128 * enterTCHelper is a handwritten assembly function that transfers control in
1129 * and out of the TC.
1131 static_assert(X64::rVmSp
== rbx
&&
1132 X64::rVmFp
== rbp
&&
1133 X64::rVmTl
== r12
&&
1134 X64::rStashedAR
== r15
,
1135 "__enterTCHelper needs to be modified to use the correct ABI");
1136 static_assert(REQ_BIND_CALL
== 0x1,
1137 "Update assembly test for REQ_BIND_CALL in __enterTCHelper");
1138 extern "C" void enterTCHelper(Cell
* vm_sp
,
1143 void* targetCacheBase
);
1146 * A partial equivalent of enterTCHelper, used to set up the ARM simulator.
1148 uintptr_t setupSimRegsAndStack(vixl::Simulator
& sim
,
1149 uintptr_t saved_rStashedAr
) {
1150 sim
. set_xreg(ARM::rGContextReg
.code(), g_context
.getNoCheck());
1151 sim
. set_xreg(ARM::rVmFp
.code(), vmfp());
1152 sim
. set_xreg(ARM::rVmSp
.code(), vmsp());
1153 sim
. set_xreg(ARM::rVmTl
.code(), RDS::tl_base
);
1154 sim
. set_xreg(ARM::rStashedAR
.code(), saved_rStashedAr
);
1156 // Leave space for register spilling and MInstrState.
1157 sim
. set_sp(sim
.sp() - kReservedRSPTotalSpace
);
1158 assert(sim
.is_on_stack(reinterpret_cast<void*>(sim
.sp())));
1160 auto spOnEntry
= sim
.sp();
1162 // Push the link register onto the stack. The link register is
1163 // technically caller-saved; what this means in practice is that
1164 // non-leaf functions push it at the very beginning and pop it just
1165 // before returning (as opposed to just saving it around calls).
1166 sim
. set_sp(sim
.sp() - 16);
1167 *reinterpret_cast<uint64_t*>(sim
.sp()) = sim
.lr();
1174 uintptr_t requestNum
;
1177 // Some TC registers need to be preserved across service requests.
1178 uintptr_t saved_rStashedAr
;
1180 // Stub addresses are passed back to allow us to recycle used stubs.
1186 MCGenerator::enterTC(TCA start
, void* data
) {
1195 if (LIKELY(start
!= nullptr)) {
1196 info
.requestNum
= data
? REQ_BIND_CALL
: -1;
1197 info
.saved_rStashedAr
= (uintptr_t)data
;
1199 info
.requestNum
= -1;
1200 info
.saved_rStashedAr
= 0;
1201 sk
= *(SrcKey
*)data
;
1202 start
= getTranslation(TranslArgs(sk
, true));
1205 assert(sizeof(Cell
) == 16);
1206 assert(((uintptr_t)vmsp() & (sizeof(Cell
) - 1)) == 0);
1207 assert(((uintptr_t)vmfp() & (sizeof(Cell
) - 1)) == 0);
1209 Translator::WriteLease().gremlinUnlock();
1210 // Keep dispatching until we end up somewhere the translator
1211 // recognizes, or we luck out and the leaseholder exits.
1213 TRACE(2, "enterTC forwarding BB to interpreter\n");
1214 g_context
->m_pc
= sk
.unit()->at(sk
.offset());
1216 g_context
->dispatchBB();
1217 PC newPc
= g_context
->getPC();
1218 if (!newPc
) { g_context
->m_fp
= 0; return; }
1219 sk
= SrcKey(liveFunc(), newPc
, liveResumed());
1220 start
= getTranslation(TranslArgs(sk
, true));
1222 assert(start
== m_tx
.uniqueStubs
.funcBodyHelperThunk
||
1223 isValidCodeAddress(start
) ||
1224 (start
== m_tx
.uniqueStubs
.fcallHelperThunk
&&
1225 info
.saved_rStashedAr
== (uintptr_t)data
));
1226 assert(!Translator::WriteLease().amOwner());
1227 const Func
* func
= (vmfp() ? (ActRec
*)vmfp() : (ActRec
*)data
)->m_func
;
1231 TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start
,
1232 vmfp(), func
->name()->data(), vmsp());
1233 tl_regState
= VMRegState::DIRTY
;
1235 if (Trace::moduleEnabledRelease(Trace::ringbuffer
, 1)) {
1236 auto skData
= sk
.valid() ? sk
.toAtomicInt() : uint64_t(-1LL);
1237 Trace::ringbufferEntry(RBTypeEnterTC
, skData
, (uint64_t)start
);
1242 // We have to force C++ to spill anything that might be in a
1243 // callee-saved register (aside from rbp). enterTCHelper does not save
1245 CALLEE_SAVED_BARRIER();
1246 enterTCHelper(vmsp(), vmfp(), start
, &info
, vmFirstAR(),
1248 CALLEE_SAVED_BARRIER();
1252 // This is a pseudo-copy of the logic in enterTCHelper: it sets up the
1253 // simulator's registers and stack, runs the translation, and gets the
1254 // necessary information out of the registers when it's done.
1256 vixl::PrintDisassembler
disasm(std::cout
);
1257 vixl::Decoder decoder
;
1258 if (getenv("ARM_DISASM")) {
1259 decoder
.AppendVisitor(&disasm
);
1261 vixl::Simulator
sim(&decoder
, std::cout
);
1263 Stats::inc(Stats::vixl_SimulatedInstr
, sim
.instr_count());
1264 Stats::inc(Stats::vixl_SimulatedLoad
, sim
.load_count());
1265 Stats::inc(Stats::vixl_SimulatedStore
, sim
.store_count());
1268 sim
.set_exception_hook(ARM::simulatorExceptionHook
);
1270 g_context
->m_activeSims
.push_back(&sim
);
1271 SCOPE_EXIT
{ g_context
->m_activeSims
.pop_back(); };
1273 DEBUG_ONLY
auto spOnEntry
=
1274 setupSimRegsAndStack(sim
, info
.saved_rStashedAr
);
1276 // The handshake is different in the case of REQ_BIND_CALL. The code
1277 // we're jumping to expects to find a return address in x30, and a saved
1278 // return address on the stack.
1279 if (info
.requestNum
== REQ_BIND_CALL
) {
1280 // Put the call's return address in the link register.
1281 auto* ar
= reinterpret_cast<ActRec
*>(info
.saved_rStashedAr
);
1282 sim
.set_lr(ar
->m_savedRip
);
1286 sim
.RunFrom(vixl::Instruction::Cast(start
));
1289 assert(sim
.sp() == spOnEntry
);
1291 info
.requestNum
= sim
.xreg(0);
1292 info
.args
[0] = sim
.xreg(1);
1293 info
.args
[1] = sim
.xreg(2);
1294 info
.args
[2] = sim
.xreg(3);
1295 info
.args
[3] = sim
.xreg(4);
1296 info
.args
[4] = sim
.xreg(5);
1297 info
.saved_rStashedAr
= sim
.xreg(ARM::rStashedAR
.code());
1299 info
.stubAddr
= reinterpret_cast<TCA
>(sim
.xreg(ARM::rAsm
.code()));
1304 assert(g_context
->m_stack
.isValidAddress((uintptr_t)vmsp()));
1306 tl_regState
= VMRegState::CLEAN
; // Careful: pc isn't sync'ed yet.
1307 TRACE(1, "enterTC: %p fp%p sp%p } return\n", start
,
1311 // Debugging code: cede the write lease half the time.
1312 if (RuntimeOption::EvalJitStressLease
) {
1313 if (d
.depthOne() && (rand() % 2) == 0) {
1314 Translator::WriteLease().gremlinLock();
1317 // Ensure that each case either returns, or drives start to a valid
1319 start
= TCA(0xbee5face);
1322 TRACE(2, "enterTC: request(%s) args: %" PRIxPTR
" %" PRIxPTR
" %"
1323 PRIxPTR
" %" PRIxPTR
" %" PRIxPTR
"\n",
1324 serviceReqName(info
.requestNum
),
1325 info
.args
[0], info
.args
[1], info
.args
[2], info
.args
[3],
1328 if (LIKELY(info
.requestNum
== REQ_EXIT
)) {
1332 if (!handleServiceRequest(info
, start
, sk
)) return;
1337 * The contract is that each case will set sk to the place where
1338 * execution should resume, and optionally set start to the hardware
1339 * translation of the resumption point (or otherwise set it to null).
1340 * Returns false if we need to halt this nesting of the VM.
1342 * start and sk might be subtly different; i.e., there are cases where
1343 * start != NULL && start != getTranslation(sk). For instance,
1344 * REQ_BIND_CALL has not finished executing the OpCall when it gets
1345 * here, and has even done some work on its behalf. sk == OpFCall,
1346 * while start == the point in the TC that's "half-way through" the
1347 * Call instruction. If we punt to the interpreter, the interpreter
1348 * will redo some of the work that the translator has already done.
1350 bool MCGenerator::handleServiceRequest(TReqInfo
& info
,
1353 const ServiceRequest requestNum
=
1354 static_cast<ServiceRequest
>(info
.requestNum
);
1355 auto* const args
= info
.args
;
1356 assert(requestNum
!= REQ_EXIT
);
1357 INC_TPC(service_req
);
1359 bool smashed
= false;
1360 switch (requestNum
) {
1361 case REQ_BIND_CALL
: {
1362 ReqBindCall
* req
= reinterpret_cast<ReqBindCall
*>(args
[0]);
1363 ActRec
* calleeFrame
= reinterpret_cast<ActRec
*>(args
[1]);
1364 TCA toSmash
= req
->m_toSmash
;
1365 Func
*func
= const_cast<Func
*>(calleeFrame
->m_func
);
1366 int nArgs
= req
->m_nArgs
;
1367 bool isImmutable
= req
->m_isImmutable
;
1368 TRACE(2, "enterTC: bindCall %s, ActRec %p\n",
1369 func
->fullName()->data(), calleeFrame
);
1370 TCA dest
= getFuncPrologue(func
, nArgs
);
1371 TRACE(2, "enterTC: bindCall -> %p\n", dest
);
1373 // We dont know we're calling the right function, so adjust
1374 // dest to point to the dynamic check of ar->m_func.
1375 dest
= funcPrologueToGuard(dest
, func
);
1377 TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
1378 func
->fullName()->data(), dest
);
1381 LeaseHolder
writer(Translator::WriteLease());
1383 // Someone else may have changed the func prologue while we
1384 // waited for the write lease, so read it again.
1385 dest
= getFuncPrologue(func
, nArgs
);
1387 if (!isImmutable
) dest
= funcPrologueToGuard(dest
, func
);
1389 if (callTarget(toSmash
) != dest
) {
1390 TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash
, dest
);
1391 smashCall(toSmash
, dest
);
1393 // For functions to be PGO'ed, if their current prologues
1394 // are still profiling ones (living in code.prof()), then
1395 // save toSmash as a caller to the prologue, so that it can
1396 // later be smashed to call a new prologue when it's generated.
1397 int calleeNumParams
= func
->numNonVariadicParams();
1398 int calledPrologNumArgs
= (nArgs
<= calleeNumParams
?
1399 nArgs
: calleeNumParams
+ 1);
1400 if (code
.prof().contains(dest
)) {
1402 m_tx
.profData()->addPrologueMainCaller(func
, calledPrologNumArgs
,
1405 m_tx
.profData()->addPrologueGuardCaller(func
, calledPrologNumArgs
,
1411 // sk: stale, but doesn't matter since we have a valid dest TCA.
1413 // We need translator help; we're not at the callee yet, so
1414 // roll back. The prelude has done some work already, but it
1415 // should be safe to redo.
1416 TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
1418 sk
= req
->m_sourceInstr
;
1420 // EnterTCHelper pushes the return ip onto the stack when the
1421 // requestNum is REQ_BIND_CALL, but if start is NULL, it will
1422 // interpret in doFCall, so we clear out the requestNum in this
1423 // case to prevent enterTCHelper from pushing the return ip
1425 info
.requestNum
= ~REQ_BIND_CALL
;
1430 case REQ_BIND_SIDE_EXIT
:
1435 TCA toSmash
= (TCA
)args
[0];
1436 Offset off
= args
[1];
1437 sk
= SrcKey(liveFunc(), off
, liveResumed());
1438 if (requestNum
== REQ_BIND_SIDE_EXIT
) {
1439 SKTRACE(3, sk
, "side exit taken!\n");
1441 start
= bindJmp(toSmash
, sk
, requestNum
, smashed
);
1444 case REQ_BIND_JMPCC_FIRST
: {
1445 TCA toSmash
= (TCA
)args
[0];
1446 Offset offTaken
= (Offset
)args
[1];
1447 Offset offNotTaken
= (Offset
)args
[2];
1448 ConditionCode cc
= ConditionCode(args
[3]);
1449 bool taken
= int64_t(args
[4]) & 1;
1450 start
= bindJmpccFirst(toSmash
, offTaken
, offNotTaken
,
1451 taken
, cc
, smashed
);
1452 // SrcKey: we basically need to emulate the fail
1453 sk
= SrcKey(liveFunc(), taken
? offTaken
: offNotTaken
, liveResumed());
1456 case REQ_BIND_JMPCC_SECOND
: {
1457 TCA toSmash
= (TCA
)args
[0];
1458 Offset off
= (Offset
)args
[1];
1459 ConditionCode cc
= ConditionCode(args
[2]);
1460 start
= bindJmpccSecond(toSmash
, off
, cc
, smashed
);
1461 sk
= SrcKey(liveFunc(), off
, liveResumed());
1464 case REQ_RETRANSLATE_OPT
: {
1465 auto ai
= (SrcKey::AtomicInt
)args
[0];
1466 TransID transId
= (TransID
)args
[1];
1467 sk
= SrcKey::fromAtomicInt(ai
);
1468 start
= retranslateOpt(transId
, false);
1469 SKTRACE(2, sk
, "retranslated-OPT: transId = %d start: @%p\n", transId
,
1474 case REQ_RETRANSLATE
: {
1475 INC_TPC(retranslate
);
1476 sk
= SrcKey(liveFunc(), (Offset
)args
[0], liveResumed());
1477 start
= retranslate(TranslArgs(sk
, true));
1478 SKTRACE(2, sk
, "retranslated @%p\n", start
);
1481 case REQ_INTERPRET
: {
1482 Offset off
= args
[0];
1483 int numInstrs
= args
[1];
1484 g_context
->m_pc
= liveUnit()->at(off
);
1486 * We know the compilation unit has not changed; basic blocks do
1487 * not span files. I claim even exceptions do not violate this
1490 assert(numInstrs
>= 0);
1491 SKTRACE(5, SrcKey(liveFunc(), off
, liveResumed()), "interp: enter\n");
1493 s_perfCounters
[tpc_interp_instr
] += numInstrs
;
1494 g_context
->dispatchN(numInstrs
);
1496 // numInstrs == 0 means it wants to dispatch until BB ends
1498 g_context
->dispatchBB();
1500 PC newPc
= g_context
->getPC();
1501 if (!newPc
) { g_context
->m_fp
= 0; return false; }
1502 SrcKey
newSk(liveFunc(), newPc
, liveResumed());
1503 SKTRACE(5, newSk
, "interp: exit\n");
1505 start
= getTranslation(TranslArgs(newSk
, true));
1508 case REQ_POST_INTERP_RET
: {
1509 // This is only responsible for the control-flow aspect of the Ret:
1510 // getting to the destination's translation, if any.
1511 ActRec
* ar
= (ActRec
*)args
[0];
1512 ActRec
* caller
= (ActRec
*)args
[1];
1513 assert((Cell
*) caller
== vmfp());
1514 Unit
* destUnit
= caller
->m_func
->unit();
1515 // Set PC so logging code in getTranslation doesn't get confused.
1516 vmpc() = destUnit
->at(caller
->m_func
->base() + ar
->m_soff
);
1517 SrcKey
dest(caller
->func(), vmpc(), caller
->resumed());
1519 start
= getTranslation(TranslArgs(dest
, true));
1520 TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
1521 ar
->m_func
->fullName()->data(),
1522 caller
->m_func
->fullName()->data());
1526 if (UNLIKELY(vmpc() == 0)) {
1527 g_context
->m_fp
= 0;
1530 SrcKey
dest(liveFunc(), vmpc(), liveResumed());
1532 start
= getTranslation(TranslArgs(dest
, true));
1535 case REQ_STACK_OVERFLOW
:
1536 if (((ActRec
*)info
.saved_rStashedAr
)->m_savedRbp
== (uintptr_t)vmfp()) {
1538 * The normal case - we were called via FCall, or FCallArray.
1539 * We need to construct the pc of the fcall from the return
1540 * address (which will be after the fcall). Because fcall is
1541 * a variable length instruction, and because we sometimes
1542 * delete instructions from the instruction stream, we
1543 * need to use fpi regions to find the fcall.
1545 const FPIEnt
* fe
= liveFunc()->findPrecedingFPI(
1546 liveUnit()->offsetOf(vmpc()));
1547 vmpc() = liveUnit()->at(fe
->m_fcallOff
);
1548 assert(isFCallStar(*reinterpret_cast<const Op
*>(vmpc())));
1549 raise_error("Stack overflow");
1553 * We were called via re-entry
1554 * Leak the params and the actrec, and tell the unwinder
1555 * that there's nothing left to do in this "entry".
1557 vmsp() = (Cell
*)((ActRec
*)info
.saved_rStashedAr
+ 1);
1558 throw VMReenterStackOverflow();
1565 if (smashed
&& info
.stubAddr
) {
1566 Treadmill::enqueue(FreeRequestStubTrigger(info
.stubAddr
));
1573 * Support for the stub freelist.
1575 TCA
FreeStubList::maybePop() {
1576 StubNode
* ret
= m_list
;
1578 TRACE(1, "alloc stub %p\n", ret
);
1579 m_list
= ret
->m_next
;
1580 ret
->m_freed
= ~kStubFree
;
1585 void FreeStubList::push(TCA stub
) {
1587 * A freed stub may be released by Treadmill more than once if multiple
1588 * threads execute the service request before it is freed. We detect
1589 * duplicates by marking freed stubs
1591 StubNode
* n
= (StubNode
*)stub
;
1592 if (n
->m_freed
== kStubFree
) {
1593 TRACE(1, "already freed stub %p\n", stub
);
1596 n
->m_freed
= kStubFree
;
1598 TRACE(1, "free stub %p (-> %p)\n", stub
, m_list
);
1603 MCGenerator::freeRequestStub(TCA stub
) {
1604 LeaseHolder
writer(Translator::WriteLease());
1606 * If we can't acquire the write lock, the caller
1607 * (FreeRequestStubTrigger) retries
1609 if (!writer
) return false;
1610 assert(code
.stubs().contains(stub
));
1611 m_freeStubs
.push(stub
);
1615 TCA
MCGenerator::getFreeStub() {
1616 TCA ret
= m_freeStubs
.maybePop();
1618 Stats::inc(Stats::Astubs_Reused
);
1619 assert(m_freeStubs
.m_list
== nullptr ||
1620 code
.stubs().contains(TCA(m_freeStubs
.m_list
)));
1621 TRACE(1, "recycle stub %p\n", ret
);
1623 ret
= code
.stubs().frontier();
1624 Stats::inc(Stats::Astubs_New
);
1625 TRACE(1, "alloc new stub %p\n", ret
);
1630 #define O(opcode, imm, pusph, pop, flags) \
1632 * The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
1633 * calls into the interpreter, and then return a pointer to the
1634 * current ExecutionContext.
1637 interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) { \
1638 interp_set_regs(ar, sp, pcOff); \
1639 SKTRACE(5, SrcKey(liveFunc(), vmpc(), liveResumed()), "%40s %p %p\n", \
1640 "interpOne" #opcode " before (fp,sp)", \
1642 assert(*reinterpret_cast<const Op*>(vmpc()) == Op::opcode); \
1643 auto const ec = g_context.getNoCheck(); \
1644 Stats::inc(Stats::Instr_InterpOne ## opcode); \
1645 if (Trace::moduleEnabled(Trace::interpOne, 1)) { \
1646 static const StringData* cat = makeStaticString("interpOne"); \
1647 static const StringData* name = makeStaticString(#opcode); \
1648 Stats::incStatGrouped(cat, name, 1); \
1650 INC_TPC(interp_one) \
1651 /* Correct for over-counting in TC-stats. */ \
1652 Stats::inc(Stats::Instr_TC, -1); \
1655 * Only set regstate back to dirty if an exception is not
1656 * propagating. If an exception is throwing, regstate for this call
1657 * is actually still correct, and we don't have information in the
1658 * fixup map for interpOne calls anyway.
1660 tl_regState = VMRegState::DIRTY; \
1667 void* interpOneEntryPoints
[] = {
1668 #define O(opcode, imm, pusph, pop, flags) \
1669 (void*)(interpOne ## opcode),
1674 TCA
MCGenerator::getTranslatedCaller() const {
1675 DECLARE_FRAME_POINTER(fp
);
1676 ActRec
* framePtr
= fp
; // can't directly mutate the register-mapped one
1677 for (; framePtr
; framePtr
= (ActRec
*)framePtr
->m_savedRbp
) {
1678 TCA rip
= (TCA
)framePtr
->m_savedRip
;
1679 if (isValidCodeAddress(rip
)) {
1687 MCGenerator::syncWork() {
1688 assert(tl_regState
== VMRegState::DIRTY
);
1689 m_fixupMap
.fixup(g_context
.getNoCheck());
1690 tl_regState
= VMRegState::CLEAN
;
1691 Stats::inc(Stats::TC_Sync
);
1695 MCGenerator::emitNativeTrampoline(TCA helperAddr
) {
1696 auto& trampolines
= code
.trampolines();
1697 if (!trampolines
.canEmit(kExpectedPerTrampolineSize
)) {
1698 // not enough space to emit a trampoline, so just return the
1699 // helper address and emitCall will the emit the right sequence
1700 // to call it indirectly
1701 TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr
);
1702 always_assert(false);
1706 uint32_t index
= m_numNativeTrampolines
++;
1707 TCA trampAddr
= trampolines
.frontier();
1708 if (Stats::enabled()) {
1709 emitIncStat(trampolines
, &Stats::tl_helper_counters
[0], index
);
1710 auto name
= getNativeFunctionName(helperAddr
);
1711 const size_t limit
= 50;
1712 if (name
.size() > limit
) {
1716 // The duped string lives until process death intentionally.
1717 Stats::helperNames
[index
].store(strdup(name
.c_str()),
1718 std::memory_order_release
);
1721 Asm a
{ trampolines
};
1722 a
. jmp (helperAddr
);
1725 m_trampolineMap
[helperAddr
] = trampAddr
;
1726 recordBCInstr(OpNativeTrampoline
, trampolines
, trampAddr
);
1727 if (RuntimeOption::EvalJitUseVtuneAPI
) {
1728 reportTrampolineToVtune(trampAddr
, trampolines
.frontier() - trampAddr
);
1735 MCGenerator::getNativeTrampoline(TCA helperAddr
) {
1736 if (!RuntimeOption::EvalJitTrampolines
&& !Stats::enabled()) {
1739 auto const trampAddr
= (TCA
)folly::get_default(m_trampolineMap
, helperAddr
);
1743 return emitNativeTrampoline(helperAddr
);
1747 MCGenerator::reachedTranslationLimit(SrcKey sk
,
1748 const SrcRec
& srcRec
) const {
1749 if (srcRec
.translations().size() == RuntimeOption::EvalJitMaxTranslations
) {
1751 if (debug
&& Trace::moduleEnabled(Trace::mcg
, 2)) {
1752 const auto& tns
= srcRec
.translations();
1753 TRACE(1, "Too many (%zd) translations: %s, BC offset %d\n",
1754 tns
.size(), sk
.unit()->filepath()->data(),
1756 SKTRACE(2, sk
, "{\n");
1757 TCA topTrans
= srcRec
.getTopTranslation();
1758 for (size_t i
= 0; i
< tns
.size(); ++i
) {
1759 const TransRec
* rec
= m_tx
.getTransRec(tns
[i
]);
1761 SKTRACE(2, sk
, "%zd %p\n", i
, tns
[i
]);
1762 if (tns
[i
] == topTrans
) {
1763 SKTRACE(2, sk
, "%zd: *Top*\n", i
);
1765 if (rec
->kind
== TransAnchor
) {
1766 SKTRACE(2, sk
, "%zd: Anchor\n", i
);
1768 SKTRACE(2, sk
, "%zd: guards {\n", i
);
1769 for (unsigned j
= 0; j
< rec
->dependencies
.size(); ++j
) {
1770 TRACE(2, rec
->dependencies
[j
]);
1772 SKTRACE(2, sk
, "%zd } guards\n", i
);
1775 SKTRACE(2, sk
, "} /* Too many translations */\n");
1784 MCGenerator::emitGuardChecks(SrcKey sk
,
1785 const ChangeMap
& dependencies
,
1786 const RefDeps
& refDeps
,
1788 if (Trace::moduleEnabled(Trace::stats
, 2)) {
1789 emitIncStat(code
.main(), Stats::TraceletGuard_enter
);
1792 m_tx
.irTrans()->hhbcTrans().emitRB(RBTypeTraceletGuards
, sk
);
1793 bool checkOuterTypeOnly
= m_tx
.mode() != TransProfile
;
1794 for (auto const& dep
: dependencies
) {
1795 m_tx
.irTrans()->checkType(dep
.first
, dep
.second
->rtt
, checkOuterTypeOnly
);
1798 checkRefs(sk
, refDeps
, fail
);
1800 if (Trace::moduleEnabled(Trace::stats
, 2)) {
1801 emitIncStat(code
.main(), Stats::TraceletGuard_execute
);
1806 void dumpTranslationInfo(const Tracelet
& t
, TCA postGuards
) {
1810 DEBUG_ONLY
auto unit
= sk
.unit();
1812 TRACE(3, "----------------------------------------------\n");
1813 TRACE(3, " Translating from file %s:%d %s at %p:\n",
1814 unit
->filepath()->data(),
1815 unit
->getLineNumber(sk
.offset()),
1816 sk
.func()->name()->data(),
1818 TRACE(3, " preconds:\n");
1819 TRACE(3, " types:\n");
1820 for (DepMap::const_iterator i
= t
.m_dependencies
.begin();
1821 i
!= t
.m_dependencies
.end(); ++i
) {
1822 TRACE(3, " %-5s\n", i
->second
->pretty().c_str());
1824 if (t
.m_refDeps
.size() != 0) {
1825 TRACE(3, " refs:\n");
1826 for (RefDeps::ArMap::const_iterator i
= t
.m_refDeps
.m_arMap
.begin();
1827 i
!= t
.m_refDeps
.m_arMap
.end();
1829 TRACE(3, " (ActRec %" PRId64
" : %-5s)\n", i
->first
,
1830 i
->second
.pretty().c_str());
1833 TRACE(3, " postconds:\n");
1834 for (ChangeMap::const_iterator i
= t
.m_changes
.begin();
1835 i
!= t
.m_changes
.end(); ++i
) {
1836 TRACE(3, " %-5s\n", i
->second
->pretty().c_str());
1838 for (auto ni
= t
.m_instrStream
.first
; ni
; ni
= ni
->next
) {
1839 TRACE(3, " %6d: %s\n", ni
->source
.offset(),
1840 instrToString((Op
*)ni
->pc()).c_str());
1841 if (ni
->breaksTracelet
) break;
1843 TRACE(3, "----------------------------------------------\n");
1844 if (Trace::moduleEnabled(Trace::mcg
, 5)) {
1845 // prettyStack() expects to use vmpc(). Leave it in the state we
1846 // found it since this code is debug-only, and we don't want behavior
1847 // to vary across the optimized/debug builds.
1849 vmpc() = unit
->at(sk
.offset());
1850 TRACE(3, g_context
->prettyStack(std::string(" mcg ")));
1852 TRACE(3, "----------------------------------------------\n");
1857 MCGenerator::translateWork(const TranslArgs
& args
) {
1858 Timer
_t(Timer::translate
);
1859 auto sk
= args
.m_sk
;
1860 std::unique_ptr
<Tracelet
> tp
;
1862 SKTRACE(1, sk
, "translateWork\n");
1863 assert(m_tx
.getSrcDB().find(sk
));
1865 TCA start
= code
.main().frontier();
1866 TCA stubStart
= code
.stubs().frontier();
1867 SrcRec
& srcRec
= *m_tx
.getSrcRec(sk
);
1868 TransKind transKind
= TransInterp
;
1869 UndoMarker
undoA(code
.main());
1870 UndoMarker
undoAstubs(code
.stubs());
1871 UndoMarker
undoGlobalData(code
.data());
1873 auto resetState
= [&] {
1876 undoGlobalData
.undo();
1877 m_fixupMap
.clearPendingFixups();
1878 m_pendingCatchTraces
.clear();
1880 srcRec
.clearInProgressTailJumps();
1883 auto assertCleanState
= [&] {
1884 assert(code
.main().frontier() == start
);
1885 assert(code
.stubs().frontier() == stubStart
);
1886 assert(m_fixupMap
.pendingFixupsEmpty());
1887 assert(m_pendingCatchTraces
.empty());
1888 assert(m_bcMap
.empty());
1889 assert(srcRec
.inProgressTailJumps().empty());
1892 PostConditions pconds
;
1893 RegionDescPtr region
;
1894 if (!args
.m_interp
&& !reachedTranslationLimit(sk
, srcRec
)) {
1895 // Attempt to create a region at this SrcKey
1896 if (m_tx
.mode() == TransOptimize
) {
1897 assert(RuntimeOption::EvalJitPGO
);
1898 region
= args
.m_region
;
1900 assert(region
->blocks
.size() > 0);
1902 TransID transId
= args
.m_transId
;
1903 assert(transId
!= InvalidID
);
1904 region
= selectHotRegion(transId
, this);
1906 if (region
&& region
->blocks
.size() == 0) region
= nullptr;
1909 assert(m_tx
.mode() == TransProfile
|| m_tx
.mode() == TransLive
);
1910 tp
= m_tx
.analyze(sk
);
1911 // TODO(#4150507): use sk.resumed() instead of liveResumed()?
1912 RegionContext rContext
{ sk
.func(), sk
.offset(), liveSpOff(),
1914 FTRACE(2, "populating live context for region\n");
1915 populateLiveContext(rContext
);
1916 region
= selectRegion(rContext
, tp
.get(), m_tx
.mode());
1918 if (RuntimeOption::EvalJitCompareRegions
&&
1919 RuntimeOption::EvalJitRegionSelector
== "tracelet") {
1920 // Re-analyze with guard relaxation on
1921 OPTION_GUARD(EvalHHBCRelaxGuards
, 1);
1922 OPTION_GUARD(EvalHHIRRelaxGuards
, 0);
1923 auto legacyRegion
= selectTraceletLegacy(rContext
.spOffset
,
1926 Trace::ftraceRelease("{:-^60}\nCouldn't select tracelet region "
1927 "for:\n{}", "", show(*legacyRegion
));
1929 diffRegions(*region
, *legacyRegion
);
1934 Translator::TranslateResult result
= Translator::Retry
;
1935 Translator::RegionBlacklist regionInterps
;
1936 Offset initSpOffset
= region
? region
->blocks
[0]->initialSpOffset()
1938 bool bcControlFlow
= RuntimeOption::EvalHHIRBytecodeControlFlow
;
1940 while (result
== Translator::Retry
) {
1941 // TODO(#4150507): use sk.resumed() instead of liveResumed()?
1942 m_tx
.traceStart(sk
.offset(), initSpOffset
, liveResumed(), sk
.func());
1944 // Try translating a region if we have one, then fall back to using the
1949 result
= m_tx
.translateRegion(*region
, bcControlFlow
, regionInterps
);
1951 // If we're profiling, grab the postconditions so we can
1952 // use them in region selection whenever we decide to retranslate.
1953 if (m_tx
.mode() == TransProfile
&& result
== Translator::Success
&&
1954 RuntimeOption::EvalJitPGOUsePostConditions
) {
1955 pconds
= m_tx
.irTrans()->hhbcTrans().irBuilder().getKnownTypes();
1958 FTRACE(2, "translateRegion finished with result {}\n",
1959 Translator::translateResultName(result
));
1960 } catch (ControlFlowFailedExc
& cfe
) {
1961 FTRACE(2, "translateRegion with control flow failed: '{}'\n",
1963 always_assert(bcControlFlow
&&
1964 "control flow translation failed, but control flow not enabled");
1965 bcControlFlow
= false;
1966 result
= Translator::Retry
;
1967 } catch (const std::exception
& e
) {
1968 FTRACE(1, "translateRegion failed with '{}'\n", e
.what());
1969 result
= Translator::Failure
;
1971 if (result
== Translator::Failure
) {
1973 // TODO(#4150507): use sk.resumed() instead of liveResumed()?
1974 m_tx
.traceStart(sk
.offset(), liveSpOff(), liveResumed(), sk
.func());
1978 if (!region
|| result
== Translator::Failure
) {
1979 // If the region translator failed for an Optimize
1980 // translation, it's OK to do a Live translation for the
1981 // function entry. We lazily create the tracelet here in this
1983 if (m_tx
.mode() == TransOptimize
) {
1984 if (sk
.getFuncId() == liveFunc()->getFuncId() &&
1985 liveUnit()->contains(vmpc()) &&
1986 sk
.offset() == liveUnit()->offsetOf(vmpc())) {
1987 m_tx
.setMode(TransLive
);
1988 tp
= m_tx
.analyze(sk
);
1990 m_tx
.setMode(TransInterp
);
1995 FTRACE(1, "trying translateTracelet\n");
1997 result
= translateTracelet(*tp
);
1999 // If we're profiling, grab the postconditions so we can
2000 // use them in region selection whenever we decide to
2002 if (m_tx
.mode() == TransProfile
&& result
== Translator::Success
&&
2003 RuntimeOption::EvalJitPGOUsePostConditions
) {
2004 pconds
= m_tx
.irTrans()->hhbcTrans().irBuilder().getKnownTypes();
2008 if (result
!= Translator::Success
) {
2009 // Translation failed. Free resources for this trace, rollback the
2010 // translation cache frontiers, and discard any pending fixups.
2016 if (result
== Translator::Success
) {
2017 assert(m_tx
.mode() == TransLive
||
2018 m_tx
.mode() == TransProfile
||
2019 m_tx
.mode() == TransOptimize
);
2020 transKind
= m_tx
.mode();
2024 if (args
.m_dryRun
) {
2029 if (transKind
== TransInterp
) {
2031 auto interpOps
= tp
? tp
->m_numOpcodes
: 1;
2032 FTRACE(1, "emitting {}-instr interp request for failed translation\n",
2036 Asm a
{ code
.main() };
2037 // Add a counter for the translation if requested
2038 if (RuntimeOption::EvalJitTransCounters
) {
2039 X64::emitTransCounterInc(a
);
2041 a
. jmp(emitServiceReq(code
.stubs(), REQ_INTERPRET
,
2042 sk
.offset(), interpOps
));
2046 if (RuntimeOption::EvalJitTransCounters
) {
2047 vixl::MacroAssembler a
{ code
.main() };
2048 ARM::emitTransCounterInc(a
);
2050 // This jump won't be smashed, but a far jump on ARM requires the same
2054 emitServiceReq(code
.stubs(), REQ_INTERPRET
,
2055 sk
.offset(), interpOps
),
2064 m_fixupMap
.processPendingFixups();
2065 processPendingCatchTraces();
2067 TransRec
tr(sk
, sk
.unit()->md5(), transKind
, tp
.get(), start
,
2068 code
.main().frontier() - start
, stubStart
,
2069 code
.stubs().frontier() - stubStart
,
2071 m_tx
.addTranslation(tr
);
2072 if (RuntimeOption::EvalJitUseVtuneAPI
) {
2073 reportTraceletToVtune(sk
.unit(), sk
.func(), tr
);
2077 recordGdbTranslation(sk
, sk
.func(), code
.main(), start
,
2079 recordGdbTranslation(sk
, sk
.func(), code
.stubs(), stubStart
,
2081 if (RuntimeOption::EvalJitPGO
) {
2082 if (transKind
== TransProfile
) {
2085 region
= selectTraceletLegacy(liveSpOff(), *tp
);
2087 m_tx
.profData()->addTransProfile(region
, pconds
);
2089 m_tx
.profData()->addTransNonProf(transKind
, sk
);
2092 // SrcRec::newTranslation() makes this code reachable. Do this last;
2093 // otherwise there's some chance of hitting in the reader threads whose
2094 // metadata is not yet visible.
2095 TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n",
2096 start
, sk
.getFuncId(), sk
.offset());
2097 srcRec
.newTranslation(start
);
2098 TRACE(1, "mcg: %zd-byte tracelet\n", code
.main().frontier() - start
);
2099 if (Trace::moduleEnabledRelease(Trace::tcspace
, 1)) {
2100 Trace::traceRelease("%s", getUsage().c_str());
2104 Translator::TranslateResult
2105 MCGenerator::translateTracelet(Tracelet
& t
) {
2106 if (RuntimeOption::EvalJitRegionSelector
!= "") {
2107 // In order to properly simulate a post-Tracelet world, refuse to translate
2108 // Tracelets when a region selector is active.
2109 return Translator::Failure
;
2112 Timer
_t(Timer::translateTracelet
);
2114 FTRACE(2, "attempting to translate tracelet:\n{}\n", t
.toString());
2115 assert(!Translator::liveFrameIsPseudoMain());
2116 const SrcKey
&sk
= t
.m_sk
;
2117 SrcRec
& srcRec
= *m_tx
.getSrcRec(sk
);
2118 HhbcTranslator
& ht
= m_tx
.irTrans()->hhbcTrans();
2119 bool profilingFunc
= false;
2121 assert(srcRec
.inProgressTailJumps().size() == 0);
2123 emitResolvedDeps(t
.m_resolvedDeps
);
2125 emitGuardChecks(sk
, t
.m_dependencies
, t
.m_refDeps
, srcRec
);
2127 dumpTranslationInfo(t
, code
.main().frontier());
2129 // after guards, add a counter for the translation if requested
2130 if (RuntimeOption::EvalJitTransCounters
) {
2131 ht
.emitIncTransCounter();
2134 if (m_tx
.mode() == TransProfile
) {
2135 if (t
.func()->isEntry(sk
.offset())) {
2136 ht
.emitCheckCold(m_tx
.profData()->curTransID());
2137 profilingFunc
= true;
2139 ht
.emitIncProfCounter(m_tx
.profData()->curTransID());
2143 ht
.emitRB(RBTypeTraceletBody
, t
.m_sk
);
2144 emitIncStat(code
.main(), Stats::Instr_TC
, t
.m_numOpcodes
);
2147 // Profiling on function entry.
2148 if (t
.m_sk
.offset() == t
.func()->base()) {
2149 ht
.profileFunctionEntry("Normal");
2153 * Profiling on the shapes of tracelets that are whole functions.
2154 * (These are the things we might consider trying to support
2158 static const bool enabled
= Stats::enabledAny() &&
2159 getenv("HHVM_STATS_FUNCSHAPE");
2160 if (!enabled
) return;
2161 if (t
.m_sk
.offset() != t
.func()->base()) return;
2162 if (auto last
= t
.m_instrStream
.last
) {
2163 if (last
->op() != OpRetC
&& last
->op() != OpRetV
&&
2164 last
->op() != OpCreateCont
&& last
->op() != OpAsyncSuspend
) {
2168 ht
.profileSmallFunctionShape(traceletShape(t
));
2171 Timer
irGenTimer(Timer::translateTracelet_irGeneration
);
2172 Unit::MetaHandle metaHand
;
2173 // Translate each instruction in the tracelet
2174 for (auto* ni
= t
.m_instrStream
.first
; ni
&& !ht
.hasExit();
2176 ht
.setBcOff(ni
->source
.offset(),
2177 ni
->breaksTracelet
&& !ht
.isInlining());
2178 readMetaData(metaHand
, *ni
, m_tx
.irTrans()->hhbcTrans(),
2179 m_tx
.mode() == TransProfile
, MetaMode::Legacy
);
2182 SKTRACE(1, ni
->source
, "HHIR: translateInstr\n");
2183 assert(!(m_tx
.mode() ==
2184 TransProfile
&& ni
->outputPredicted
&& ni
->next
));
2185 m_tx
.irTrans()->translateInstr(*ni
);
2186 } catch (FailedIRGen
& fcg
) {
2187 always_assert(!ni
->interp
);
2189 FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
2190 "after failing to generate ir: {} \n\n",
2191 m_tx
.getCurrentTransID(), ni
->toString(), fcg
.what());
2192 return Translator::Retry
;
2194 assert(ni
->source
.offset() >= t
.func()->base());
2195 // We sometimes leave the tail of a truncated tracelet in place to aid
2196 // analysis, but breaksTracelet is authoritative.
2197 if (ni
->breaksTracelet
|| m_tx
.irTrans()->hhbcTrans().hasExit()) break;
2204 TRACE(1, "HHIR: SUCCEEDED to generate code for Translation %d\n\n\n",
2205 m_tx
.getCurrentTransID());
2206 if (profilingFunc
) m_tx
.profData()->setProfiling(t
.func()->getFuncId());
2207 return Translator::Success
;
2208 } catch (FailedCodeGen
& fcg
) {
2209 // Code-gen failed. Search for the bytecode instruction that caused the
2210 // problem, flag it to be interpreted, and retranslate the tracelet.
2211 SrcKey sk
{fcg
.vmFunc
, fcg
.bcOff
, fcg
.resumed
};
2213 for (auto ni
= t
.m_instrStream
.first
; ni
; ni
= ni
->next
) {
2214 if (ni
->source
== sk
) {
2218 std::ostringstream oss
;
2219 oss
<< folly::format("code generation failed with {}\n",
2221 print(oss
, m_tx
.irTrans()->hhbcTrans().unit());
2226 FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
2227 "after failing to code-gen \n\n",
2228 m_tx
.getCurrentTransID(), ni
->toString(), fcg
.what());
2229 return Translator::Retry
;
2234 } catch (FailedCodeGen
& fcg
) {
2235 TRACE(1, "HHIR: FAILED to generate code for Translation %d "
2236 "@ %s:%d (%s)\n", m_tx
.getCurrentTransID(),
2237 fcg
.file
, fcg
.line
, fcg
.func
);
2238 // HHIR:TODO Remove extra TRACE and adjust tools
2239 TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
2240 fcg
.file
, fcg
.line
, fcg
.func
);
2241 } catch (FailedIRGen
& x
) {
2242 TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
2243 x
.file
, x
.line
, x
.func
);
2244 } catch (const FailedAssertion
& fa
) {
2246 StackTraceNoHeap::AddExtraLogging(
2247 "Assertion failure",
2248 folly::format("{}\n\nActive Unit:\n{}\n",
2249 fa
.summary
, ht
.unit().toString()).str());
2251 } catch (const FailedTraceGen
& e
) {
2252 FTRACE(1, "HHIR: FAILED to translate whole unit: {}\n",
2255 return Translator::Failure
;
2258 void MCGenerator::traceCodeGen() {
2259 HhbcTranslator
& ht
= m_tx
.irTrans()->hhbcTrans();
2260 auto& unit
= ht
.unit();
2262 auto finishPass
= [&](const char* msg
, int level
) {
2263 dumpTrace(level
, unit
, msg
, nullptr, nullptr, ht
.irBuilder().guards());
2264 assert(checkCfg(unit
));
2267 finishPass(" after initial translation ", kIRLevel
);
2269 optimize(unit
, ht
.irBuilder(), m_tx
.mode());
2270 finishPass(" after optimizing ", kOptLevel
);
2272 auto regs
= allocateRegs(unit
);
2273 assert(checkRegisters(unit
, regs
)); // calls checkCfg internally.
2275 recordBCInstr(OpTraceletGuard
, code
.main(), code
.main().frontier());
2276 genCode(code
.main(), code
.stubs(), unit
, &m_bcMap
, this, regs
);
2281 MCGenerator::MCGenerator()
2282 : m_numNativeTrampolines(0)
2284 , m_catchTraceMap(128)
2286 TRACE(1, "MCGenerator@%p startup\n", this);
2289 m_unwindRegistrar
= register_unwind_region(code
.base(), code
.codeSize());
2291 static bool profileUp
= false;
2297 if (Trace::moduleEnabledRelease(Trace::printir
) &&
2298 !RuntimeOption::EvalJit
) {
2299 Trace::traceRelease("TRACE=printir is set but the jit isn't on. "
2300 "Did you mean to run with -vEval.Jit=1?\n");
2304 void MCGenerator::initUniqueStubs() {
2305 // Put the following stubs into ahot, rather than a.
2306 CodeCache::Selector
asmSel(CodeCache::Selector::Args(code
).hot(true));
2309 m_tx
.uniqueStubs
= X64::emitUniqueStubs();
2312 m_tx
.uniqueStubs
= ARM::emitUniqueStubs();
2317 void MCGenerator::registerCatchBlock(CTCA ip
, TCA block
) {
2318 FTRACE(1, "registerCatchBlock: afterCall: {} block: {}\n", ip
, block
);
2319 m_pendingCatchTraces
.emplace_back(ip
, block
);
2322 void MCGenerator::processPendingCatchTraces() {
2323 for (auto const& pair
: m_pendingCatchTraces
) {
2324 m_catchTraceMap
.insert(pair
.first
, pair
.second
);
2326 m_pendingCatchTraces
.clear();
2329 folly::Optional
<TCA
> MCGenerator::getCatchTrace(CTCA ip
) const {
2330 TCA
* found
= m_catchTraceMap
.find(ip
);
2331 if (found
) return *found
;
2335 void MCGenerator::requestInit() {
2336 tl_regState
= VMRegState::CLEAN
;
2337 Timer::RequestInit();
2339 m_tx
.requestResetHighLevelTranslator();
2340 Treadmill::startRequest();
2341 memset(&s_perfCounters
, 0, sizeof(s_perfCounters
));
2345 void MCGenerator::requestExit() {
2346 if (Translator::WriteLease().amOwner()) {
2347 Translator::WriteLease().drop();
2349 TRACE_MOD(txlease
, 2, "%" PRIx64
" write lease stats: %15" PRId64
2350 " kept, %15" PRId64
" grabbed\n",
2351 Process::GetThreadIdForTrace(), Translator::WriteLease().m_hintKept
,
2352 Translator::WriteLease().m_hintGrabbed
);
2354 Treadmill::finishRequest();
2357 Timer::RequestExit();
2359 if (Trace::moduleEnabledRelease(Trace::mcgstats
, 1)) {
2360 Trace::traceRelease("MCGenerator perf counters for %s:\n",
2361 g_context
->getRequestUrl(50).c_str());
2362 for (int i
= 0; i
< tpc_num_counters
; i
++) {
2363 Trace::traceRelease("%-20s %10" PRId64
"\n",
2364 kPerfCounterNames
[i
], s_perfCounters
[i
]);
2366 Trace::traceRelease("\n");
2371 MCGenerator::isPseudoEvent(const char* event
) {
2372 for (auto name
: kPerfCounterNames
) {
2373 if (!strcmp(event
, name
)) {
2381 MCGenerator::getPerfCounters(Array
& ret
) {
2382 for (int i
= 0; i
< tpc_num_counters
; i
++) {
2383 // Until Perflab can automatically scale the values we give it to
2384 // an appropriate range, we have to fudge these numbers so they
2385 // look more like reasonable hardware counter values.
2386 ret
.set(String::FromCStr(kPerfCounterNames
[i
]),
2387 s_perfCounters
[i
] * 1000);
2390 if (RuntimeOption::EnableInstructionCounts
) {
2391 auto doCounts
= [&](unsigned begin
, const char* const name
) {
2393 for (; begin
< Stats::Instr_InterpOneHighInvalid
;
2394 begin
+= STATS_PER_OPCODE
) {
2395 count
+= Stats::tl_counters
[Stats::StatCounter(begin
)];
2397 ret
.set(String::FromCStr(name
), count
);
2400 doCounts(Stats::Instr_TranslLowInvalid
+ STATS_PER_OPCODE
,
2401 kInstrCountMCGName
);
2402 doCounts(Stats::Instr_TranslIRPostLowInvalid
+ STATS_PER_OPCODE
,
2406 for (auto const& pair
: Timer::Counters()) {
2407 if (pair
.second
.total
== 0 && pair
.second
.count
== 0) continue;
2409 ret
.set(String("jit_time_") + pair
.first
, pair
.second
.total
);
2413 MCGenerator::~MCGenerator() {
2416 static Debug::TCRange
rangeFrom(const CodeBlock
& cb
, const TCA addr
,
2418 assert(cb
.contains(addr
));
2419 return Debug::TCRange(addr
, cb
.frontier(), isAstubs
);
2422 void MCGenerator::recordBCInstr(uint32_t op
,
2423 const CodeBlock
& cb
,
2425 if (addr
!= cb
.frontier()) {
2426 m_debugInfo
.recordBCInstr(Debug::TCRange(addr
, cb
.frontier(),
2427 &cb
== &code
.stubs()), op
);
2431 void MCGenerator::recordGdbTranslation(SrcKey sk
,
2432 const Func
* srcFunc
,
2433 const CodeBlock
& cb
,
2437 if (start
!= cb
.frontier()) {
2438 assert(Translator::WriteLease().amOwner());
2439 if (!RuntimeOption::EvalJitNoGdb
) {
2440 m_debugInfo
.recordTracelet(rangeFrom(cb
, start
, &cb
== &code
.stubs()),
2442 reinterpret_cast<const Op
*>(
2444 srcFunc
->unit()->at(sk
.offset()) : nullptr
2448 if (RuntimeOption::EvalPerfPidMap
) {
2449 m_debugInfo
.recordPerfMap(rangeFrom(cb
, start
, &cb
== &code
.stubs()),
2450 srcFunc
, exit
, inPrologue
);
2455 void MCGenerator::recordGdbStub(const CodeBlock
& cb
,
2456 const TCA start
, const char* name
) {
2457 if (!RuntimeOption::EvalJitNoGdb
) {
2458 m_debugInfo
.recordStub(rangeFrom(cb
, start
, &cb
== &code
.stubs()),
2463 std::string
MCGenerator::getUsage() {
2465 size_t totalBlockSize
= 0;
2466 size_t totalBlockCapacity
= 0;
2468 auto addRow
= [&](const std::string
& name
, size_t used
, size_t capacity
) {
2469 totalBlockSize
+= used
;
2470 totalBlockCapacity
+= capacity
;
2471 auto percent
= capacity
? 100 * used
/ capacity
: 0;
2472 usage
+= folly::format("mcg: {:9} bytes ({}%) in {}\n",
2473 used
, percent
, name
).str();
2475 code
.forEachBlock([&](const char* name
, const CodeBlock
& a
) {
2476 addRow(std::string("code.") + name
, a
.used(), a
.capacity());
2478 addRow("data", code
.data().used(), code
.data().capacity());
2479 addRow("RDS", RDS::usedBytes(),
2480 RuntimeOption::EvalJitTargetCacheSize
* 3 / 4);
2481 addRow("persistentRDS", RDS::usedPersistentBytes(),
2482 RuntimeOption::EvalJitTargetCacheSize
/ 4);
2484 totalBlockSize
+ code
.data().used() +
2485 RDS::usedBytes() + RDS::usedPersistentBytes(),
2486 totalBlockCapacity
+ code
.data().capacity() +
2487 RuntimeOption::EvalJitTargetCacheSize
);
2492 std::string
MCGenerator::getTCAddrs() {
2494 code
.forEachBlock([&](const char* name
, const CodeBlock
& a
) {
2495 addrs
+= folly::format("{}: {}\n", name
, a
.base()).str();
2500 bool MCGenerator::addDbgGuards(const Unit
* unit
) {
2502 // It grabs the write lease and iterating through whole SrcDB...
2503 bool locked
= Translator::WriteLease().acquire(true);
2507 struct timespec tsBegin
, tsEnd
;
2508 HPHP::Timer::GetMonotonicTime(tsBegin
);
2509 // Doc says even find _could_ invalidate iterator, in pactice it should
2510 // be very rare, so go with it now.
2511 for (SrcDB::const_iterator it
= m_tx
.getSrcDB().begin();
2512 it
!= m_tx
.getSrcDB().end(); ++it
) {
2513 SrcKey
const sk
= SrcKey::fromAtomicInt(it
->first
);
2514 // We may have a SrcKey to a deleted function. NB: this may miss a
2515 // race with deleting a Func. See task #2826313.
2516 if (!Func::isFuncIdValid(sk
.getFuncId())) continue;
2517 SrcRec
* sr
= it
->second
;
2518 if (sr
->unitMd5() == unit
->md5() &&
2519 !sr
->hasDebuggerGuard() &&
2520 m_tx
.isSrcKeyInBL(sk
)) {
2521 addDbgGuardImpl(sk
, sr
);
2524 Translator::WriteLease().drop();
2525 HPHP::Timer::GetMonotonicTime(tsEnd
);
2526 int64_t elapsed
= gettime_diff_us(tsBegin
, tsEnd
);
2527 if (Trace::moduleEnabledRelease(Trace::mcg
, 5)) {
2528 Trace::traceRelease("addDbgGuards got lease for %" PRId64
" us\n", elapsed
);
2533 bool MCGenerator::addDbgGuard(const Func
* func
, Offset offset
, bool resumed
) {
2534 SrcKey
sk(func
, offset
, resumed
);
2536 if (SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
2537 if (sr
->hasDebuggerGuard()) {
2541 // no translation yet
2546 if (!m_tx
.isSrcKeyInBL(sk
)) {
2547 TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
2551 bool locked
= Translator::WriteLease().acquire(true);
2556 if (SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
2557 addDbgGuardImpl(sk
, sr
);
2560 Translator::WriteLease().drop();
2564 bool MCGenerator::dumpTCCode(const char* filename
) {
2565 #define OPEN_FILE(F, SUFFIX) \
2566 std::string F ## name = std::string(filename).append(SUFFIX); \
2567 FILE* F = fopen(F ## name .c_str(),"wb"); \
2568 if (F == nullptr) return false; \
2569 SCOPE_EXIT{ fclose(F); };
2571 OPEN_FILE(aFile
, "_a");
2572 OPEN_FILE(aprofFile
, "_aprof");
2573 OPEN_FILE(astubFile
, "_astub");
2574 OPEN_FILE(helperAddrFile
, "_helpers_addrs.txt");
2578 // dump starting from the trampolines; this assumes CodeCache places
2579 // trampolines before the translation cache
2580 size_t count
= code
.main().frontier() - code
.trampolines().base();
2581 bool result
= (fwrite(code
.trampolines().base(), 1, count
, aFile
) == count
);
2583 count
= code
.prof().used();
2584 result
= (fwrite(code
.prof().base(), 1, count
, aprofFile
) == count
);
2587 count
= code
.stubs().used();
2588 result
= (fwrite(code
.stubs().base(), 1, count
, astubFile
) == count
);
2591 for (auto const& pair
: m_trampolineMap
) {
2592 void* helperAddr
= pair
.first
;
2593 void* trampAddr
= pair
.second
;
2594 auto functionName
= getNativeFunctionName(helperAddr
);
2595 fprintf(helperAddrFile
,"%10p %10p %s\n",
2596 trampAddr
, helperAddr
,
2597 functionName
.c_str());
2603 // Returns true on success
2604 bool MCGenerator::dumpTC(bool ignoreLease
) {
2605 if (!ignoreLease
&& !Translator::WriteLease().acquire(true)) return false;
2606 bool success
= dumpTCData();
2608 success
= dumpTCCode("/tmp/tc_dump");
2610 if (!ignoreLease
) Translator::WriteLease().drop();
2614 // Returns true on success
2615 bool tc_dump(void) {
2616 return mcg
&& mcg
->dumpTC();
2619 // Returns true on success
2620 bool MCGenerator::dumpTCData() {
2621 gzFile tcDataFile
= gzopen("/tmp/tc_data.txt.gz", "w");
2622 if (!tcDataFile
) return false;
2624 if (!gzprintf(tcDataFile
,
2625 "repo_schema = %s\n"
2629 "aprof.frontier = %p\n"
2630 "astubs.base = %p\n"
2631 "astubs.frontier = %p\n\n",
2633 code
.trampolines().base(), code
.main().frontier(),
2634 code
.prof().base(), code
.prof().frontier(),
2635 code
.stubs().base(), code
.stubs().frontier())) {
2639 if (!gzprintf(tcDataFile
, "total_translations = %zu\n\n",
2640 m_tx
.getCurrentTransID())) {
2644 for (TransID t
= 0; t
< m_tx
.getCurrentTransID(); t
++) {
2645 if (gzputs(tcDataFile
,
2646 m_tx
.getTransRec(t
)->print(m_tx
.getTransCounter(t
)).c_str()) ==
2652 gzclose(tcDataFile
);
2656 void MCGenerator::invalidateSrcKey(SrcKey sk
) {
2657 assert(!RuntimeOption::RepoAuthoritative
|| RuntimeOption::EvalJitPGO
);
2658 assert(Translator::WriteLease().amOwner());
2660 * Reroute existing translations for SrcKey to an as-yet indeterminate
2663 SrcRec
* sr
= m_tx
.getSrcDB().find(sk
);
2666 * Since previous translations aren't reachable from here, we know we
2667 * just created some garbage in the TC. We currently have no mechanism
2670 sr
->replaceOldTranslations();
2673 void MCGenerator::setJmpTransID(TCA jmp
) {
2674 if (m_tx
.mode() != TransProfile
) return;
2676 TransID transId
= m_tx
.profData()->curTransID();
2677 FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp
, transId
);
2678 m_jmpToTransID
[jmp
] = transId
;
2682 emitIncStat(CodeBlock
& cb
, uint64_t* tl_table
, uint index
, int n
, bool force
) {
2683 if (!force
&& !Stats::enabled()) return;
2684 intptr_t disp
= uintptr_t(&tl_table
[index
]) - tlsBase();
2686 if (arch() == Arch::X64
) {
2687 X64Assembler a
{ cb
};
2690 // addq $n, [%fs:disp]
2691 a
. fs().addq(n
, baseless(disp
));
2693 } else if (arch() == Arch::ARM
) {
2696 vixl::MacroAssembler a
{ cb
};
2698 a
. Mrs (rAsm2
, vixl::TPIDR_EL0
);
2699 a
. Ldr (rAsm
, rAsm2
[disp
]);
2700 a
. Add (rAsm
, rAsm
, n
);
2701 a
. Str (rAsm
, rAsm2
[disp
]);