2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/mc-generator.h"
18 #include "hphp/runtime/vm/jit/vtune-jit.h"
35 #include <unordered_set>
38 #include <folly/Format.h>
39 #include <folly/MapUtil.h>
40 #include <folly/Optional.h>
41 #include <folly/String.h>
43 #include "hphp/util/abi-cxx.h"
44 #include "hphp/util/asm-x64.h"
45 #include "hphp/util/bitops.h"
46 #include "hphp/util/cycles.h"
47 #include "hphp/util/debug.h"
48 #include "hphp/util/disasm.h"
49 #include "hphp/util/maphuge.h"
50 #include "hphp/util/meta.h"
51 #include "hphp/util/process.h"
52 #include "hphp/util/rank.h"
53 #include "hphp/util/repo-schema.h"
54 #include "hphp/util/ringbuffer.h"
55 #include "hphp/util/timer.h"
56 #include "hphp/util/trace.h"
58 #include "hphp/runtime/base/arch.h"
59 #include "hphp/runtime/base/execution-context.h"
60 #include "hphp/runtime/base/rds.h"
61 #include "hphp/runtime/base/runtime-option-guard.h"
62 #include "hphp/runtime/base/runtime-option.h"
63 #include "hphp/runtime/base/stats.h"
64 #include "hphp/runtime/base/strings.h"
65 #include "hphp/runtime/base/zend-string.h"
66 #include "hphp/runtime/ext/ext_closure.h"
67 #include "hphp/runtime/ext/ext_generator.h"
68 #include "hphp/runtime/ext/std/ext_std_function.h"
69 #include "hphp/runtime/server/source-root-info.h"
70 #include "hphp/runtime/vm/bytecode.h"
71 #include "hphp/runtime/vm/debug/debug.h"
72 #include "hphp/runtime/vm/func.h"
73 #include "hphp/runtime/vm/jit/back-end-x64.h" // XXX Layering violation.
74 #include "hphp/runtime/vm/jit/check.h"
75 #include "hphp/runtime/vm/jit/code-gen.h"
76 #include "hphp/runtime/vm/jit/debug-guards.h"
77 #include "hphp/runtime/vm/jit/inlining-decider.h"
78 #include "hphp/runtime/vm/jit/irgen.h"
79 #include "hphp/runtime/vm/jit/normalized-instruction.h"
80 #include "hphp/runtime/vm/jit/opt.h"
81 #include "hphp/runtime/vm/jit/print.h"
82 #include "hphp/runtime/vm/jit/prof-data.h"
83 #include "hphp/runtime/vm/jit/region-selection.h"
84 #include "hphp/runtime/vm/jit/service-requests-inline.h"
85 #include "hphp/runtime/vm/jit/srcdb.h"
86 #include "hphp/runtime/vm/jit/timer.h"
87 #include "hphp/runtime/vm/jit/translate-region.h"
88 #include "hphp/runtime/vm/jit/translator-inline.h"
89 #include "hphp/runtime/vm/jit/vasm-emit.h"
90 #include "hphp/runtime/vm/jit/vasm-instr.h"
91 #include "hphp/runtime/vm/member-operations.h"
92 #include "hphp/runtime/vm/php-debug.h"
93 #include "hphp/runtime/vm/repo.h"
94 #include "hphp/runtime/vm/runtime.h"
95 #include "hphp/runtime/vm/srckey.h"
96 #include "hphp/runtime/vm/treadmill.h"
97 #include "hphp/runtime/vm/type-profile.h"
98 #include "hphp/runtime/vm/unwind.h"
100 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
102 namespace HPHP
{ namespace jit
{
107 using namespace Trace
;
110 #define TRANS_PERF_COUNTERS \
120 #define TPC(n) "jit_" #n,
121 static const char* const kPerfCounterNames
[] = {
126 __thread
int64_t s_perfCounters
[tpc_num_counters
];
127 static __thread
size_t s_initialTCSize
;
129 // The global MCGenerator object.
132 CppCall
MCGenerator::getDtorCall(DataType type
) {
135 return CppCall::method(&StringData::release
);
137 return CppCall::method(&ArrayData::release
);
139 return CppCall::method(&ObjectData::release
);
141 return CppCall::method(&ResourceData::release
);
143 return CppCall::method(&RefData::release
);
151 ///////////////////////////////////////////////////////////////////////////////
153 bool shouldPGOFunc(const Func
& func
) {
154 if (!RuntimeOption::EvalJitPGO
) return false;
156 // JITing pseudo-mains requires extra checks that blow the IR. PGO
157 // can significantly increase the size of the regions, so disable it for
158 // pseudo-mains (so regions will be just tracelets).
159 if (func
.isPseudoMain()) return false;
161 // Non-cloned closures simply contain prologues that redispacth to
162 // cloned closures. They don't contain a translation for the
163 // function entry, which is what triggers an Optimize retranslation.
164 // So don't generate profiling translations for them -- there's not
165 // much to do with PGO anyway here, since they just have prologues.
166 if (func
.isClosureBody() && !func
.isClonedClosure()) return false;
168 if (!RuntimeOption::EvalJitPGOHotOnly
) return true;
169 return func
.attrs() & AttrHot
;
172 bool MCGenerator::profileSrcKey(SrcKey sk
) const {
173 if (!shouldPGOFunc(*sk
.func())) return false;
174 if (m_tx
.profData()->optimized(sk
.getFuncId())) return false;
175 if (m_tx
.profData()->profiling(sk
.getFuncId())) return true;
177 // Don't start profiling new functions if the size of either main or
178 // prof is already above Eval.JitAMaxUsage.
179 auto tcUsage
= std::max(code
.mainUsed(), code
.profUsed());
180 if (tcUsage
>= RuntimeOption::EvalJitAMaxUsage
) {
184 return requestCount() <= RuntimeOption::EvalJitProfileRequests
;
188 * Invalidate the SrcDB entries for func's SrcKeys that have any
189 * Profile translation.
191 void MCGenerator::invalidateFuncProfSrcKeys(const Func
* func
) {
192 assert(RuntimeOption::EvalJitPGO
);
193 FuncId funcId
= func
->getFuncId();
194 for (auto tid
: m_tx
.profData()->funcProfTransIDs(funcId
)) {
195 invalidateSrcKey(m_tx
.profData()->transSrcKey(tid
));
199 TCA
MCGenerator::retranslate(const TranslArgs
& args
) {
200 auto sr
= m_tx
.getSrcDB().find(args
.sk
);
202 bool locked
= sr
->tryLock();
204 if (locked
) sr
->freeLock();
206 if (isDebuggerAttachedProcess() && m_tx
.isSrcKeyInBL(args
.sk
)) {
207 // We are about to translate something known to be blacklisted by
208 // debugger, exit early
209 SKTRACE(1, args
.sk
, "retranslate abort due to debugger\n");
212 LeaseHolder
writer(Translator::WriteLease());
213 if (!writer
|| !shouldTranslate(args
.sk
.func())) return nullptr;
215 // Even though we knew above that we were going to skip
216 // doing another translation, we wait until we get the
217 // write lease, to avoid spinning through the tracelet
218 // guards again and again while another thread is writing
220 return sr
->getTopTranslation();
222 if (sr
->translations().size() > RuntimeOption::EvalJitMaxTranslations
) {
223 always_assert(sr
->translations().size() ==
224 RuntimeOption::EvalJitMaxTranslations
+ 1);
225 return sr
->getTopTranslation();
227 SKTRACE(1, args
.sk
, "retranslate\n");
229 m_tx
.setMode(profileSrcKey(args
.sk
) ? TransKind::Profile
: TransKind::Live
);
230 SCOPE_EXIT
{ m_tx
.setMode(TransKind::Invalid
); };
232 return translate(args
);
235 TCA
MCGenerator::retranslateOpt(TransID transId
, bool align
) {
236 LeaseHolder
writer(Translator::WriteLease());
237 if (!writer
) return nullptr;
238 if (isDebuggerAttachedProcess()) return nullptr;
240 TRACE(1, "retranslateOpt: transId = %u\n", transId
);
242 SCOPE_EXIT
{ m_tx
.setMode(TransKind::Invalid
); };
244 if (!m_tx
.profData()->hasTransRec(transId
)) return nullptr;
246 always_assert(m_tx
.profData()->transRegion(transId
) != nullptr);
248 auto func
= m_tx
.profData()->transFunc(transId
);
249 auto funcId
= func
->getFuncId();
250 auto sk
= m_tx
.profData()->transSrcKey(transId
);
252 if (m_tx
.profData()->optimized(funcId
)) return nullptr;
253 m_tx
.profData()->setOptimized(funcId
);
255 bool setFuncBody
= func
->getDVFunclets().size() == 0;
257 func
->setFuncBody(m_tx
.uniqueStubs
.funcBodyHelperThunk
);
259 // Invalidate SrcDB's entries for all func's SrcKeys.
260 invalidateFuncProfSrcKeys(func
);
262 // Regenerate the prologues and DV funclets before the actual function body.
263 TCA start
= regeneratePrologues(func
, sk
);
265 // Regionize func and translate all its regions.
266 std::vector
<RegionDescPtr
> regions
;
267 regionizeFunc(func
, this, regions
);
269 for (auto region
: regions
) {
270 m_tx
.setMode(TransKind::Optimize
);
271 always_assert(!region
->empty());
272 auto regionSk
= region
->start();
273 auto translArgs
= TranslArgs
{regionSk
, align
};
274 translArgs
.region
= region
;
276 if (setFuncBody
&& regionSk
.offset() == func
->base()) {
277 translArgs
.setFuncBody
= true;
280 auto regionStart
= translate(translArgs
);
281 if (start
== nullptr && regionSk
== sk
) {
284 // Cloned closures' prologue tables point to the corresponding
285 // main/DV entry point. So update the prologue table when
286 // retranslating their entries.
287 if (func
->isClonedClosure() && func
->isEntry(regionSk
.offset()) &&
289 int entryNumParams
= func
->getEntryNumParams(regionSk
.offset());
290 func
->setPrologue(entryNumParams
, regionStart
);
294 m_tx
.profData()->freeFuncData(funcId
);
299 static bool liveFrameIsPseudoMain() {
300 ActRec
* ar
= (ActRec
*)vmfp();
301 return ar
->hasVarEnv() && ar
->getVarEnv()->isGlobalScope();
305 * Find or create a translation for sk. Returns TCA of "best" current
306 * translation. May return NULL if it is currently impossible to create
310 MCGenerator::getTranslation(const TranslArgs
& args
) {
312 sk
.func()->validate();
314 "getTranslation: curUnit %s funcId %x offset %d\n",
315 sk
.unit()->filepath()->data(),
318 SKTRACE(2, sk
, " funcId: %x \n", sk
.func()->getFuncId());
320 if (liveFrameIsPseudoMain() && !RuntimeOption::EvalJitPseudomain
) {
321 SKTRACE(2, sk
, "punting on pseudoMain\n");
324 if (const SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
325 TCA tca
= sr
->getTopTranslation();
327 SKTRACE(2, sk
, "getTranslation: found %p\n", tca
);
331 return createTranslation(args
);
335 MCGenerator::numTranslations(SrcKey sk
) const {
336 if (const SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
337 return sr
->translations().size();
343 s_php_errormsg("php_errormsg"),
344 s_http_response_header("http_response_header");
346 bool MCGenerator::shouldTranslateNoSizeLimit(const Func
* func
) const {
347 // If we've hit Eval.JitGlobalTranslationLimit, then we stop translating.
348 if (m_numTrans
>= RuntimeOption::EvalJitGlobalTranslationLimit
) {
353 * We don't support JIT compiling functions that use some super-dynamic php
356 if (func
->lookupVarId(s_php_errormsg
.get()) != -1 ||
357 func
->lookupVarId(s_http_response_header
.get()) != -1) {
364 bool MCGenerator::shouldTranslate(const Func
* func
) const {
365 if (!shouldTranslateNoSizeLimit(func
)) return false;
366 // Otherwise, follow the Eval.JitAMaxUsage limit. However, we do
367 // allow Optimize translations past that limit.
368 return code
.mainUsed() < RuntimeOption::EvalJitAMaxUsage
||
369 m_tx
.mode() == TransKind::Optimize
;
373 static void populateLiveContext(RegionContext
& ctx
) {
374 typedef RegionDesc::Location L
;
376 const ActRec
* const fp
{vmfp()};
377 const TypedValue
* const sp
{vmsp()};
379 for (uint32_t i
= 0; i
< fp
->m_func
->numLocals(); ++i
) {
380 ctx
.liveTypes
.push_back(
381 { L::Local
{i
}, liveTVType(frame_local(fp
, i
)) }
385 int32_t stackOff
= 0;
387 fp
, sp
, ctx
.bcOffset
,
388 [&](const ActRec
* ar
) {
389 // TODO(#2466980): when it's a Cls, we should pass the Class* in
391 auto const objOrCls
=
392 ar
->hasThis() ? Type::SubObj(ar
->getThis()->getVMClass()) :
393 ar
->hasClass() ? Type::Cls
396 ctx
.preLiveARs
.push_back({
401 FTRACE(2, "added prelive ActRec {}\n", show(ctx
.preLiveARs
.back()));
402 stackOff
+= kNumActRecCells
;
404 [&](const TypedValue
* tv
) {
405 ctx
.liveTypes
.push_back(
406 { L::Stack
{ctx
.spOffset
- stackOff
}, liveTVType(tv
) }
409 FTRACE(2, "added live type {}\n", show(ctx
.liveTypes
.back()));
415 MCGenerator::createTranslation(const TranslArgs
& args
) {
416 if (!shouldTranslate(args
.sk
.func())) return nullptr;
419 * Try to become the writer. We delay this until we *know* we will have
420 * a need to create new translations, instead of just trying to win the
421 * lottery at the dawn of time. Hopefully lots of requests won't require
422 * any new translation.
425 LeaseHolder
writer(Translator::WriteLease());
426 if (!writer
|| !shouldTranslate(args
.sk
.func())) return nullptr;
428 if (auto sr
= m_tx
.getSrcDB().find(sk
)) {
429 TCA tca
= sr
->getTopTranslation();
431 // Handle extremely unlikely race; someone may have just already
432 // added the first instance of this SrcRec while we did a
433 // non-blocking wait on the write lease.
436 // Since we are holding the write lease, we know that sk is properly
437 // initialized, except that it has no translations (due to
438 // replaceOldTranslations)
439 return retranslate(args
);
443 // We put retranslate requests at the end of our slab to more frequently
444 // allow conditional jump fall-throughs
445 TCA astart
= code
.main().frontier();
446 TCA realColdStart
= code
.realCold().frontier();
447 TCA realFrozenStart
= code
.realFrozen().frontier();
448 TCA req
= emitServiceReq(code
.cold(), REQ_RETRANSLATE
,
449 sk
.offset(), TransFlags().packed
);
450 SKTRACE(1, sk
, "inserting anchor translation for (%p,%d) at %p\n",
451 sk
.unit(), sk
.offset(), req
);
452 SrcRec
* sr
= m_tx
.getSrcRec(sk
);
453 sr
->setFuncInfo(sk
.func());
454 sr
->setAnchorTranslation(req
);
456 size_t asize
= code
.main().frontier() - astart
;
457 size_t realColdSize
= code
.realCold().frontier() - realColdStart
;
458 size_t realFrozenSize
= code
.realFrozen().frontier() - realFrozenStart
;
460 if (realColdSize
&& RuntimeOption::EvalDumpTCAnchors
) {
463 astart
, asize
, realColdStart
, realColdSize
,
464 realFrozenStart
, realFrozenSize
);
465 m_tx
.addTranslation(tr
);
466 if (RuntimeOption::EvalJitUseVtuneAPI
) {
467 reportTraceletToVtune(sk
.unit(), sk
.func(), tr
);
470 if (m_tx
.profData()) {
471 m_tx
.profData()->addTransNonProf(TransKind::Anchor
, sk
);
473 assert(!m_tx
.isTransDBEnabled() ||
474 m_tx
.getTransRec(realColdStart
)->kind
== TransKind::Anchor
);
477 return retranslate(args
);
481 MCGenerator::lookupTranslation(SrcKey sk
) const {
482 if (SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
483 return sr
->getTopTranslation();
489 MCGenerator::translate(const TranslArgs
& args
) {
492 assert(((uintptr_t)vmsp() & (sizeof(Cell
) - 1)) == 0);
493 assert(((uintptr_t)vmfp() & (sizeof(Cell
) - 1)) == 0);
494 assert(m_tx
.mode() != TransKind::Invalid
);
495 SCOPE_EXIT
{ m_tx
.setMode(TransKind::Invalid
); };
497 if (!shouldTranslate(args
.sk
.func())) return nullptr;
499 auto func
= const_cast<Func
*>(args
.sk
.func());
500 CodeCache::Selector
cbSel(CodeCache::Selector::Args(code
)
501 .profile(m_tx
.mode() == TransKind::Profile
)
502 .hot(RuntimeOption::EvalHotFuncCount
&&
503 (func
->attrs() & AttrHot
) && m_tx
.useAHot()));
505 auto start
= translateWork(args
);
507 if (args
.setFuncBody
) {
508 func
->setFuncBody(start
);
510 SKTRACE(1, args
.sk
, "translate moved head from %p to %p\n",
511 getTopTranslation(args
.sk
), start
);
517 MCGenerator::getCallArrayPrologue(Func
* func
) {
518 TCA tca
= func
->getFuncBody();
519 if (tca
!= m_tx
.uniqueStubs
.funcBodyHelperThunk
) return tca
;
521 DVFuncletsVec dvs
= func
->getDVFunclets();
524 LeaseHolder
writer(Translator::WriteLease());
525 if (!writer
) return nullptr;
526 tca
= func
->getFuncBody();
527 if (tca
!= m_tx
.uniqueStubs
.funcBodyHelperThunk
) return tca
;
528 tca
= backEnd().emitCallArrayPrologue(func
, dvs
);
529 func
->setFuncBody(tca
);
531 SrcKey
sk(func
, func
->base(), false);
532 auto args
= TranslArgs
{sk
, false};
533 args
.setFuncBody
= true;
534 tca
= mcg
->getTranslation(args
);
541 MCGenerator::smashPrologueGuards(TCA
* prologues
, int numPrologues
,
543 for (int i
= 0; i
< numPrologues
; i
++) {
544 if (prologues
[i
] != m_tx
.uniqueStubs
.fcallHelperThunk
545 && backEnd().funcPrologueHasGuard(prologues
[i
], func
)) {
546 backEnd().funcPrologueSmashGuard(prologues
[i
], func
);
554 * Given a callee and a number of args, match up to the callee's
555 * argument expectations and dispatch.
557 * Call/return hand-shaking is a bit funny initially. At translation time,
558 * we don't necessarily know what function we're calling. For instance,
562 * Will lead to a set of basic blocks like:
569 * The fcallc labelled "b2" above is not statically bindable in our
572 * We decouple the call work into a per-callsite portion, responsible
573 * for recording the return address, and a per-(callee, numArgs) portion,
574 * responsible for fixing up arguments and dispatching to remaining
575 * code. We call the per-callee portion a "prologue."
577 * Also, we are called from two distinct environments. From REQ_BIND_CALL,
578 * we're running "between" basic blocks, with all VM registers sync'ed.
579 * However, we're also called in the middle of basic blocks, when dropping
580 * entries into func->m_prologues. So don't go around using the
581 * translation-time values of vmfp()/vmsp(), since they have an
582 * unpredictable relationship to the source.
585 MCGenerator::checkCachedPrologue(const Func
* func
, int paramIdx
,
586 TCA
& prologue
) const {
587 prologue
= (TCA
)func
->getPrologue(paramIdx
);
588 if (prologue
!= m_tx
.uniqueStubs
.fcallHelperThunk
) {
589 TRACE(1, "cached prologue %s(%d) -> cached %p\n",
590 func
->fullName()->data(), paramIdx
, prologue
);
591 assert(isValidCodeAddress(prologue
));
598 MCGenerator::getFuncPrologue(Func
* func
, int nPassed
, ActRec
* ar
,
599 bool forRegeneratePrologue
) {
601 TRACE(1, "funcPrologue %s(%d)\n", func
->fullName()->data(), nPassed
);
602 int const numParams
= func
->numNonVariadicParams();
603 int paramIndex
= nPassed
<= numParams
? nPassed
: numParams
+ 1;
605 bool const funcIsMagic
= func
->isMagic();
607 // Do a quick test before grabbing the write lease
609 if (checkCachedPrologue(func
, paramIndex
, prologue
)) return prologue
;
611 Offset entry
= func
->getEntryForNumArgs(nPassed
);
612 SrcKey
funcBody(func
, entry
, false);
614 if (func
->isClonedClosure()) {
616 interp_set_regs(ar
, (Cell
*)ar
- func
->numSlotsInFrame(), entry
);
617 auto tca
= getTranslation(TranslArgs
{funcBody
, false});
618 tl_regState
= VMRegState::DIRTY
;
621 func
->setPrologue(paramIndex
, tca
);
626 LeaseHolder
writer(Translator::WriteLease());
627 if (!writer
) return nullptr;
629 // If we're regenerating a prologue, and we want to check shouldTranslate()
630 // but ignore the code size limits. We still want to respect the global
631 // translation limit and other restrictions, though.
632 if (forRegeneratePrologue
) {
633 if (!shouldTranslateNoSizeLimit(func
)) return nullptr;
635 if (!shouldTranslate(func
)) return nullptr;
638 // Double check the prologue array now that we have the write lease
639 // in case another thread snuck in and set the prologue already.
640 if (checkCachedPrologue(func
, paramIndex
, prologue
)) return prologue
;
642 // We're coming from a BIND_CALL service request, so enable
643 // profiling if we haven't optimized the function entry yet.
644 assert(m_tx
.mode() == TransKind::Invalid
||
645 m_tx
.mode() == TransKind::Prologue
);
646 if (m_tx
.mode() == TransKind::Invalid
&& profileSrcKey(funcBody
)) {
647 m_tx
.setMode(TransKind::Proflogue
);
649 m_tx
.setMode(TransKind::Prologue
);
651 SCOPE_EXIT
{ m_tx
.setMode(TransKind::Invalid
); };
653 CodeCache::Selector
cbSel(CodeCache::Selector::Args(code
)
654 .profile(m_tx
.mode() == TransKind::Proflogue
)
655 .hot(RuntimeOption::EvalHotFuncCount
&&
656 (func
->attrs() & AttrHot
) && m_tx
.useAHot()));
658 assert(m_fixups
.empty());
659 // If we're close to a cache line boundary, just burn some space to
660 // try to keep the func and its body on fewer total lines.
661 if (((uintptr_t)code
.main().frontier() & backEnd().cacheLineMask()) >=
662 (backEnd().cacheLineSize() / 2)) {
663 backEnd().moveToAlign(code
.main(), MoveToAlignFlags::kCacheLineAlign
);
665 m_fixups
.m_alignFixups
.emplace(
666 code
.main().frontier(), std::make_pair(backEnd().cacheLineSize() / 2, 0));
668 // Careful: this isn't necessarily the real entry point. For funcIsMagic
669 // prologues, this is just a possible prologue.
670 TCA aStart
= code
.main().frontier();
672 TCA realColdStart
= mcg
->code
.realCold().frontier();
673 TCA realFrozenStart
= mcg
->code
.realFrozen().frontier();
675 auto const skFuncBody
= backEnd().emitFuncPrologue(
676 code
.main(), code
.cold(), func
, funcIsMagic
, nPassed
, start
, aStart
);
677 m_fixups
.process(nullptr);
679 assert(backEnd().funcPrologueHasGuard(start
, func
));
680 TRACE(2, "funcPrologue mcg %p %s(%d) setting prologue %p\n",
681 this, func
->fullName()->data(), nPassed
, start
);
682 assert(isValidCodeAddress(start
));
683 func
->setPrologue(paramIndex
, start
);
685 assert(m_tx
.mode() == TransKind::Prologue
||
686 m_tx
.mode() == TransKind::Proflogue
);
687 TransRec
tr(skFuncBody
,
689 aStart
, code
.main().frontier() - aStart
,
690 realColdStart
, code
.realCold().frontier() - realColdStart
,
691 realFrozenStart
, code
.realFrozen().frontier() - realFrozenStart
);
692 m_tx
.addTranslation(tr
);
693 if (RuntimeOption::EvalJitUseVtuneAPI
) {
694 reportTraceletToVtune(func
->unit(), func
, tr
);
697 if (m_tx
.profData()) {
698 m_tx
.profData()->addTransPrologue(m_tx
.mode(), skFuncBody
, paramIndex
);
701 recordGdbTranslation(skFuncBody
, func
,
704 recordBCInstr(OpFuncPrologue
, aStart
, code
.main().frontier(), false);
707 assert(m_numTrans
<= RuntimeOption::EvalJitGlobalTranslationLimit
);
713 * Given the proflogueTransId for a TransProflogue translation,
714 * regenerate the prologue (as a TransPrologue). Returns the starting
715 * address for the translation corresponding to triggerSk, if such
716 * translation is generated; otherwise returns nullptr.
718 TCA
MCGenerator::regeneratePrologue(TransID prologueTransId
, SrcKey triggerSk
) {
719 Func
* func
= m_tx
.profData()->transFunc(prologueTransId
);
720 int nArgs
= m_tx
.profData()->prologueArgs(prologueTransId
);
722 // Regenerate the prologue.
723 func
->resetPrologue(nArgs
);
724 m_tx
.setMode(TransKind::Prologue
);
725 SCOPE_EXIT
{ m_tx
.setMode(TransKind::Invalid
); };
726 auto const start
= getFuncPrologue(
729 nullptr /* ActRec */,
730 true /* regeneratePrologue */
732 if (!start
) return nullptr;
734 func
->setPrologue(nArgs
, start
);
736 // Smash callers of the old prologue with the address of the new one.
737 PrologueCallersRec
* pcr
=
738 m_tx
.profData()->prologueCallers(prologueTransId
);
739 for (TCA toSmash
: pcr
->mainCallers()) {
740 backEnd().smashCall(toSmash
, start
);
742 // If the prologue has a guard, then smash its guard-callers as well.
743 if (backEnd().funcPrologueHasGuard(start
, func
)) {
744 TCA guard
= backEnd().funcPrologueToGuard(start
, func
);
745 for (TCA toSmash
: pcr
->guardCallers()) {
746 backEnd().smashCall(toSmash
, guard
);
749 pcr
->clearAllCallers();
751 // If this prologue has a DV funclet, then generate a translation
752 // for the DV funclet right after the prologue. However, skip
753 // cloned closures because their prologues are actually the DV
755 TCA triggerSkStart
= nullptr;
756 if (nArgs
< func
->numNonVariadicParams() && !func
->isClonedClosure()) {
757 auto paramInfo
= func
->params()[nArgs
];
758 if (paramInfo
.hasDefaultValue()) {
759 m_tx
.setMode(TransKind::Optimize
);
760 SrcKey
funcletSK(func
, paramInfo
.funcletOff
, false);
761 auto funcletTransId
= m_tx
.profData()->dvFuncletTransId(func
, nArgs
);
762 if (funcletTransId
!= kInvalidTransID
) {
763 invalidateSrcKey(funcletSK
);
764 auto args
= TranslArgs
{funcletSK
, false};
765 args
.transId
= funcletTransId
;
766 auto dvStart
= translate(args
);
767 if (dvStart
&& !triggerSkStart
&& funcletSK
== triggerSk
) {
768 triggerSkStart
= dvStart
;
770 // Flag that this translation has been retranslated, so that
771 // it's not retranslated again along with the function body.
772 m_tx
.profData()->setOptimized(funcletSK
);
777 return triggerSkStart
;
781 * Regenerate all prologues of func that were previously generated.
782 * The prologues are sorted in ascending order of profile counters.
783 * For prologues with corresponding DV funclets, their corresponding
784 * DV funclet will be regenerated right after them. The idea is to
785 * generate the function body right after calling this function, so
786 * that all prologues are placed right before it, and with the hottest
787 * prologues closer to it.
789 * Returns the starting address for the translation corresponding to
790 * triggerSk, if such translation is generated; otherwise returns
793 TCA
MCGenerator::regeneratePrologues(Func
* func
, SrcKey triggerSk
) {
794 TCA triggerStart
= nullptr;
795 std::vector
<TransID
> prologTransIDs
;
797 for (int nArgs
= 0; nArgs
< func
->numPrologues(); nArgs
++) {
798 TransID tid
= m_tx
.profData()->prologueTransId(func
, nArgs
);
799 if (tid
!= kInvalidTransID
) {
800 prologTransIDs
.push_back(tid
);
804 std::sort(prologTransIDs
.begin(), prologTransIDs
.end(),
805 [&](TransID t1
, TransID t2
) -> bool {
806 // This will sort in ascending order. Note that transCounters start
807 // at JitPGOThreshold and count down.
808 return m_tx
.profData()->transCounter(t1
) >
809 m_tx
.profData()->transCounter(t2
);
812 for (TransID tid
: prologTransIDs
) {
813 TCA start
= regeneratePrologue(tid
, triggerSk
);
814 if (triggerStart
== nullptr && start
!= nullptr) {
815 triggerStart
= start
;
825 * Runtime service handler that patches a jmp to the translation of
826 * u:dest from toSmash.
829 MCGenerator::bindJmp(TCA toSmash
, SrcKey destSk
, ServiceRequest req
,
830 TransFlags trflags
, bool& smashed
) {
831 auto args
= TranslArgs
{destSk
, false};
832 args
.flags
= trflags
;
833 auto tDest
= getTranslation(args
);
834 if (!tDest
) return nullptr;
836 LeaseHolder
writer(Translator::WriteLease());
837 if (!writer
) return tDest
;
839 SrcRec
* sr
= m_tx
.getSrcRec(destSk
);
840 // The top translation may have changed while we waited for the
841 // write lease, so read it again. If it was replaced with a new
842 // translation, then bind to the new one. If it was invalidated,
843 // then don't bind the jump.
844 tDest
= sr
->getTopTranslation();
845 if (tDest
== nullptr) return nullptr;
847 if (req
== REQ_BIND_ADDR
) {
848 auto addr
= reinterpret_cast<TCA
*>(toSmash
);
849 if (*addr
== tDest
) {
853 sr
->chainFrom(IncomingBranch::addr(addr
));
855 DecodedInstruction
di(toSmash
);
856 if (di
.isBranch() && !di
.isJmp()) {
857 auto jt
= backEnd().jccTarget(toSmash
);
863 sr
->chainFrom(IncomingBranch::jccFrom(toSmash
));
865 assert(!backEnd().jccTarget(toSmash
));
866 if (!backEnd().jmpTarget(toSmash
)
867 || backEnd().jmpTarget(toSmash
) == tDest
) {
871 sr
->chainFrom(IncomingBranch::jmpFrom(toSmash
));
879 * When we end a tracelet with a conditional jump, emitCondJmp first emits:
881 * 1: j<CC> stubJmpccFirst
884 * Our "taken" argument tells us whether the branch at 1: was taken or
885 * not; and therefore which of offTaken and offNotTaken to continue executing.
886 * If we did take the branch, we now rewrite the code so that the branch is
887 * straightened. This predicts that subsequent executions will go the same way
888 * as the first execution.
890 * jn<CC> stubJmpccSecond:offNotTaken
891 * nop5 ; fallthru, or jmp if there's already a translation.
894 * If we did not take the branch, we leave the sense of the condition
895 * intact, while patching it up to go to the unexplored code:
897 * j<CC> stubJmpccSecond:offTaken
902 MCGenerator::bindJmpccFirst(TCA toSmash
,
903 SrcKey skTaken
, SrcKey skNotTaken
,
906 LeaseHolder
writer(Translator::WriteLease());
907 if (!writer
) return nullptr;
908 auto skWillExplore
= taken
? skTaken
: skNotTaken
;
909 auto skWillDefer
= taken
? skNotTaken
: skTaken
;
910 auto dest
= skWillExplore
;
911 auto cc
= backEnd().jccCondCode(toSmash
);
912 TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
914 skWillExplore
.offset(), skWillDefer
.offset(), cc
, taken
);
916 // We want the branch to point to whichever side has not been explored yet.
917 if (taken
) cc
= ccNegate(cc
);
919 auto& cb
= code
.blockFor(toSmash
);
921 // Its not clear where the IncomingBranch should go to if cb is code.frozen()
922 assert(&cb
!= &code
.frozen());
924 // XXX Use of kJmp*Len here is a layering violation.
927 // can we just directly fall through?
928 // a jmp + jz takes 5 + 6 = 11 bytes
929 bool fallThru
= toSmash
+ kJmpccLen
+ kJmpLen
== cb
.frontier() &&
930 !m_tx
.getSrcDB().find(dest
);
932 auto tDest
= getTranslation(TranslArgs
{dest
, !fallThru
});
937 if (backEnd().jmpTarget(toSmash
+ kJmpccLen
)
938 != backEnd().jccTarget(toSmash
)) {
939 // someone else already smashed this one. Ideally we would
940 // just re-execute from toSmash - except the flags will have
945 TCA stub
= emitEphemeralServiceReq(code
.frozen(),
946 getFreeStub(code
.frozen(),
949 RipRelative(toSmash
),
950 skWillDefer
.toAtomicInt(),
951 TransFlags
{}.packed
);
953 mcg
->cgFixups().process(nullptr);
955 assert(Translator::WriteLease().amOwner());
957 * Roll over the jcc and the jmp/fallthru. E.g., from:
959 * toSmash: jcc <jmpccFirstStub>
960 * toSmash+6: jmp <jmpccFirstStub>
961 * toSmash+11: <probably the new translation == tdest>
965 * toSmash: j[n]z <jmpccSecondStub>
967 * toSmash+11: newHotness
969 CodeCursor
cg(cb
, toSmash
);
971 m_tx
.getSrcRec(dest
)->chainFrom(IncomingBranch::jmpFrom(cb
.frontier()));
972 TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc
, taken
);
976 TCA
MCGenerator::bindCall(ActRec
* calleeFrame
,
979 ServiceRequest
& req
) {
980 TCA toSmash
= backEnd().smashableCallFromReturn((TCA
)calleeFrame
->m_savedRip
);
981 Func
*func
= const_cast<Func
*>(calleeFrame
->m_func
);
982 int nArgs
= calleeFrame
->numArgs();
983 TRACE(2, "bindCall %s, ActRec %p\n",
984 func
->fullName()->data(), calleeFrame
);
985 TCA start
= getFuncPrologue(func
, nArgs
);
986 TRACE(2, "bindCall -> %p\n", start
);
988 // We dont know we're calling the right function, so adjust start to point
989 // to the dynamic check of ar->m_func.
990 start
= backEnd().funcPrologueToGuard(start
, func
);
992 TRACE(2, "bindCall immutably %s -> %p\n",
993 func
->fullName()->data(), start
);
996 LeaseHolder
writer(Translator::WriteLease());
998 // Someone else may have changed the func prologue while we waited for
999 // the write lease, so read it again.
1000 start
= getFuncPrologue(func
, nArgs
);
1001 if (!isImmutable
) start
= backEnd().funcPrologueToGuard(start
, func
);
1003 if (start
&& backEnd().callTarget(toSmash
) != start
) {
1004 assert(backEnd().callTarget(toSmash
));
1005 TRACE(2, "bindCall smash %p -> %p\n",
1007 backEnd().smashCall(toSmash
, start
);
1008 // For functions to be PGO'ed, if their current prologues are still
1009 // profiling ones (living in code.prof()), then save toSmash as a
1010 // caller to the prologue, so that it can later be smashed to call a
1011 // new prologue when it's generated.
1012 int calleeNumParams
= func
->numNonVariadicParams();
1013 int calledPrologNumArgs
= (nArgs
<= calleeNumParams
?
1014 nArgs
: calleeNumParams
+ 1);
1015 if (code
.prof().contains(start
)) {
1017 m_tx
.profData()->addPrologueMainCaller(
1018 func
, calledPrologNumArgs
, toSmash
);
1020 m_tx
.profData()->addPrologueGuardCaller(
1021 func
, calledPrologNumArgs
, toSmash
);
1026 // sk: stale, but doesn't matter since we have a valid start TCA.
1028 // We need translator help; we're not at the callee yet, so roll back. The
1029 // prelude has done some work already, but it should be safe to redo.
1030 TRACE(2, "bindCall rollback smash %p -> %p\n",
1033 const FPIEnt
* fe
= liveFunc()->findPrecedingFPI(
1034 liveFunc()->base() + calleeFrame
->m_soff
);
1036 sk
= SrcKey
{liveFunc(), fe
->m_fcallOff
, vmfp()->resumed()};
1038 // We're going to have to interpret the FCall, so make sure handleSRHelper
1039 // doesn't think we're coming back from a REQ_BIND_CALL when we finally
1040 // make it back to the TC.
1048 class FreeRequestStubTrigger
{
1051 explicit FreeRequestStubTrigger(TCA stub
) : m_stub(stub
) {
1052 TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub
);
1055 TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub
);
1056 if (mcg
->freeRequestStub(m_stub
) != true) {
1057 // If we can't free the stub, enqueue again to retry.
1058 TRACE(3, "FreeStubTrigger: write lease failed, requeueing %p\n", m_stub
);
1059 Treadmill::enqueue(FreeRequestStubTrigger(m_stub
));
1068 static __thread
int m_depth
;
1069 DepthGuard() { m_depth
++; TRACE(2, "DepthGuard: %d {\n", m_depth
); }
1070 ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth
); m_depth
--; }
1072 bool depthOne() const { return m_depth
== 1; }
1074 __thread
int DepthGuard::m_depth
;
1078 struct DepthGuard
{ bool depthOne() const { return false; } };
1083 MCGenerator::enterTC(TCA start
, ActRec
* stashedAR
) {
1090 assert(isValidCodeAddress(start
));
1091 assert(((uintptr_t)vmsp() & (sizeof(Cell
) - 1)) == 0);
1092 assert(((uintptr_t)vmfp() & (sizeof(Cell
) - 1)) == 0);
1094 Translator::WriteLease().gremlinUnlock();
1095 assert(!Translator::WriteLease().amOwner());
1098 if (Trace::moduleEnabledRelease(Trace::ringbuffer
, 1)) {
1099 auto skData
= SrcKey
{liveFunc(), vmpc(), liveResumed()}.toAtomicInt();
1100 Trace::ringbufferEntry(RBTypeEnterTC
, skData
, (uint64_t)start
);
1103 tl_regState
= VMRegState::DIRTY
;
1104 backEnd().enterTCHelper(start
, stashedAR
);
1105 tl_regState
= VMRegState::CLEAN
;
1106 assert(isValidVMStackAddress(vmsp()));
1109 // Debugging code: cede the write lease half the time.
1110 if (RuntimeOption::EvalJitStressLease
) {
1111 if (d
.depthOne() && (rand() % 2) == 0) {
1112 Translator::WriteLease().gremlinLock();
1120 TCA
MCGenerator::handleServiceRequest(ServiceReqInfo
& info
) {
1121 assert_native_stack_aligned();
1122 tl_regState
= VMRegState::CLEAN
; // partially a lie: vmpc() isn't synced
1124 auto callToExit
= [&] {
1125 tl_regState
= VMRegState::DIRTY
;
1126 return m_tx
.uniqueStubs
.callToExit
;
1129 TCA start
= nullptr;
1131 auto smashed
= false;
1133 // If start is still nullptr at the end of this switch, we will enter the
1134 // interpreter at sk.
1136 case REQ_BIND_CALL
: {
1137 auto calleeFrame
= info
.stashedAR
;
1138 auto isImmutable
= info
.args
[0].boolVal
;
1139 start
= bindCall(calleeFrame
, isImmutable
, sk
, info
.req
);
1144 case REQ_BIND_ADDR
: {
1145 auto const toSmash
= info
.args
[0].tca
;
1146 sk
= SrcKey::fromAtomicInt(info
.args
[1].sk
);
1147 auto const trflags
= info
.args
[2].trflags
;
1148 start
= bindJmp(toSmash
, sk
, info
.req
, trflags
, smashed
);
1152 case REQ_BIND_JMPCC_FIRST
: {
1153 auto toSmash
= info
.args
[0].tca
;
1154 auto skTaken
= SrcKey::fromAtomicInt(info
.args
[1].sk
);
1155 auto skNotTaken
= SrcKey::fromAtomicInt(info
.args
[2].sk
);
1156 auto taken
= info
.args
[3].boolVal
;
1157 sk
= taken
? skTaken
: skNotTaken
;
1158 start
= bindJmpccFirst(toSmash
, skTaken
, skNotTaken
, taken
, smashed
);
1162 case REQ_RETRANSLATE
: {
1163 INC_TPC(retranslate
);
1164 sk
= SrcKey
{liveFunc(), info
.args
[0].offset
, liveResumed()};
1165 auto trflags
= info
.args
[1].trflags
;
1166 auto args
= TranslArgs
{sk
, true};
1167 args
.flags
= trflags
;
1168 start
= retranslate(args
);
1169 SKTRACE(2, sk
, "retranslated @%p\n", start
);
1173 case REQ_RETRANSLATE_OPT
: {
1174 sk
= SrcKey::fromAtomicInt(info
.args
[0].sk
);
1175 auto transID
= info
.args
[1].transID
;
1176 start
= retranslateOpt(transID
, false);
1177 SKTRACE(2, sk
, "retranslated-OPT: transId = %d start: @%p\n", transID
,
1183 // Leave start as nullptr and let the dispatchBB() happen down below.
1184 sk
= SrcKey
{liveFunc(), info
.args
[0].offset
, liveResumed()};
1187 case REQ_POST_INTERP_RET
: {
1188 // This is only responsible for the control-flow aspect of the Ret:
1189 // getting to the destination's translation, if any.
1190 auto ar
= info
.args
[0].ar
;
1191 auto caller
= info
.args
[1].ar
;
1192 assert(caller
== vmfp());
1193 Unit
* destUnit
= caller
->func()->unit();
1194 // Set PC so logging code in getTranslation doesn't get confused.
1195 vmpc() = destUnit
->at(caller
->m_func
->base() + ar
->m_soff
);
1196 sk
= SrcKey
{caller
->func(), vmpc(), caller
->resumed()};
1197 start
= getTranslation(TranslArgs
{sk
, true});
1198 TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
1199 ar
->m_func
->fullName()->data(),
1200 caller
->m_func
->fullName()->data());
1205 if (UNLIKELY(vmpc() == 0)) return callToExit();
1206 sk
= SrcKey
{liveFunc(), vmpc(), liveResumed()};
1207 start
= getTranslation(TranslArgs
{sk
, true});
1211 case REQ_STACK_OVERFLOW
: {
1212 if (info
.stashedAR
->m_sfp
== vmfp()) {
1214 * The normal case - we were called via FCall, or FCallArray. We need
1215 * to construct the pc of the fcall from the return address (which will
1216 * be after the fcall). Because fcall is a variable length instruction,
1217 * and because we sometimes delete instructions from the instruction
1218 * stream, we need to use fpi regions to find the fcall.
1220 const FPIEnt
* fe
= liveFunc()->findPrecedingFPI(
1221 liveUnit()->offsetOf(vmpc()));
1222 vmpc() = liveUnit()->at(fe
->m_fcallOff
);
1223 assert(isFCallStar(*reinterpret_cast<const Op
*>(vmpc())));
1224 raise_error("Stack overflow");
1227 * We were called via re-entry. Leak the params and the actrec, and
1228 * tell the unwinder that there's nothing left to do in this "entry".
1230 vmsp() = reinterpret_cast<Cell
*>(info
.stashedAR
+ 1);
1231 throw VMReenterStackOverflow();
1237 if (smashed
&& info
.stub
) {
1238 Treadmill::enqueue(FreeRequestStubTrigger(info
.stub
));
1241 // If we don't have a starting address, interpret basic blocks until we end
1242 // up somewhere with a translation (which we may have created, if the lease
1243 // holder dropped it).
1245 vmpc() = sk
.unit()->at(sk
.offset());
1248 if (!vmpc()) return callToExit();
1249 sk
= SrcKey
{liveFunc(), vmpc(), liveResumed()};
1250 start
= getTranslation(TranslArgs
{sk
, true});
1253 if (Trace::moduleEnabledRelease(Trace::ringbuffer
, 1)) {
1254 auto skData
= sk
.valid() ? sk
.toAtomicInt() : uint64_t(-1LL);
1255 Trace::ringbufferEntry(RBTypeResumeTC
, skData
, (uint64_t)start
);
1258 tl_regState
= VMRegState::DIRTY
;
1263 * Support for the stub freelist.
1265 TCA
FreeStubList::maybePop() {
1266 StubNode
* ret
= m_list
;
1268 TRACE(1, "alloc stub %p\n", ret
);
1269 m_list
= ret
->m_next
;
1270 ret
->m_freed
= ~kStubFree
;
1275 void FreeStubList::push(TCA stub
) {
1277 * A freed stub may be released by Treadmill more than once if multiple
1278 * threads execute the service request before it is freed. We detect
1279 * duplicates by marking freed stubs
1281 StubNode
* n
= reinterpret_cast<StubNode
*>(stub
);
1282 if (n
->m_freed
== kStubFree
) {
1283 TRACE(1, "already freed stub %p\n", stub
);
1286 n
->m_freed
= kStubFree
;
1288 TRACE(1, "free stub %p (-> %p)\n", stub
, m_list
);
1293 MCGenerator::freeRequestStub(TCA stub
) {
1294 LeaseHolder
writer(Translator::WriteLease());
1296 * If we can't acquire the write lock, the caller
1297 * (FreeRequestStubTrigger) retries
1299 if (!writer
) return false;
1300 assert(code
.frozen().contains(stub
));
1301 m_freeStubs
.push(stub
);
1305 TCA
MCGenerator::getFreeStub(CodeBlock
& frozen
, CodeGenFixups
* fixups
) {
1306 TCA ret
= m_freeStubs
.maybePop();
1308 Stats::inc(Stats::Astub_Reused
);
1309 always_assert(m_freeStubs
.m_list
== nullptr ||
1310 code
.isValidCodeAddress(TCA(m_freeStubs
.m_list
)));
1311 TRACE(1, "recycle stub %p\n", ret
);
1313 ret
= frozen
.frontier();
1314 Stats::inc(Stats::Astub_New
);
1315 TRACE(1, "alloc new stub %p\n", ret
);
1318 fixups
->m_reusedStubs
.emplace_back(ret
);
1323 TCA
MCGenerator::getTranslatedCaller() const {
1324 DECLARE_FRAME_POINTER(fp
);
1325 ActRec
* framePtr
= fp
; // can't directly mutate the register-mapped one
1326 for (; framePtr
; framePtr
= framePtr
->m_sfp
) {
1327 TCA rip
= (TCA
)framePtr
->m_savedRip
;
1328 if (isValidCodeAddress(rip
)) {
1336 MCGenerator::syncWork() {
1337 assert(tl_regState
== VMRegState::DIRTY
);
1338 m_fixupMap
.fixup(g_context
.getNoCheck());
1339 tl_regState
= VMRegState::CLEAN
;
1340 Stats::inc(Stats::TC_Sync
);
1343 // Get the address of the literal val in the global data section.
1344 // If it's not there, add it to the map in m_fixups, which will
1345 // be committed to m_literals when m_fixups.process() is called.
1347 MCGenerator::allocLiteral(uint64_t val
) {
1348 auto it
= m_literals
.find(val
);
1349 if (it
!= m_literals
.end()) {
1350 assert(*it
->second
== val
);
1353 auto& pending
= m_fixups
.m_literals
;
1354 it
= pending
.find(val
);
1355 if (it
!= pending
.end()) {
1356 assert(*it
->second
== val
);
1359 auto addr
= allocData
<uint64_t>(sizeof(uint64_t), 1);
1361 return pending
[val
] = addr
;
1365 MCGenerator::reachedTranslationLimit(SrcKey sk
,
1366 const SrcRec
& srcRec
) const {
1367 if (srcRec
.translations().size() == RuntimeOption::EvalJitMaxTranslations
) {
1369 if (debug
&& Trace::moduleEnabled(Trace::mcg
, 2)) {
1370 const auto& tns
= srcRec
.translations();
1371 TRACE(1, "Too many (%zd) translations: %s, BC offset %d\n",
1372 tns
.size(), sk
.unit()->filepath()->data(),
1374 SKTRACE(2, sk
, "{\n");
1375 TCA topTrans
= srcRec
.getTopTranslation();
1376 for (size_t i
= 0; i
< tns
.size(); ++i
) {
1377 const TransRec
* rec
= m_tx
.getTransRec(tns
[i
]);
1379 SKTRACE(2, sk
, "%zd %p\n", i
, tns
[i
]);
1380 if (tns
[i
] == topTrans
) {
1381 SKTRACE(2, sk
, "%zd: *Top*\n", i
);
1383 if (rec
->kind
== TransKind::Anchor
) {
1384 SKTRACE(2, sk
, "%zd: Anchor\n", i
);
1386 SKTRACE(2, sk
, "%zd: guards {\n", i
);
1387 for (unsigned j
= 0; j
< rec
->guards
.size(); ++j
) {
1388 FTRACE(2, "{}\n", rec
->guards
[j
]);
1390 SKTRACE(2, sk
, "%zd } guards\n", i
);
1393 SKTRACE(2, sk
, "} /* Too many translations */\n");
1402 MCGenerator::recordSyncPoint(CodeAddress frontier
, Offset pcOff
, Offset spOff
) {
1403 m_fixups
.m_pendingFixups
.push_back(
1404 PendingFixup(frontier
, Fixup(pcOff
, spOff
)));
1408 * Equivalent to container.clear(), but guarantees to free
1409 * any memory associated with the container (eg clear
1410 * doesn't affect std::vector's capacity).
1412 template <typename T
> void ClearContainer(T
& container
) {
1413 T().swap(container
);
1417 CodeGenFixups::process_only(
1418 GrowableVector
<IncomingBranch
>* inProgressTailBranches
) {
1419 for (uint i
= 0; i
< m_pendingFixups
.size(); i
++) {
1420 TCA tca
= m_pendingFixups
[i
].m_tca
;
1421 assert(mcg
->isValidCodeAddress(tca
));
1422 mcg
->fixupMap().recordFixup(tca
, m_pendingFixups
[i
].m_fixup
);
1424 ClearContainer(m_pendingFixups
);
1426 for (auto const& pair
: m_pendingCatchTraces
) {
1427 mcg
->catchTraceMap().insert(pair
.first
, pair
.second
);
1429 ClearContainer(m_pendingCatchTraces
);
1431 for (auto const& elm
: m_pendingJmpTransIDs
) {
1432 mcg
->getJmpToTransIDMap().insert(elm
);
1434 ClearContainer(m_pendingJmpTransIDs
);
1436 mcg
->literals().insert(m_literals
.begin(), m_literals
.end());
1437 ClearContainer(m_literals
);
1439 if (inProgressTailBranches
) {
1440 m_inProgressTailJumps
.swap(*inProgressTailBranches
);
1442 assert(m_inProgressTailJumps
.empty());
1445 void CodeGenFixups::clear() {
1446 ClearContainer(m_pendingFixups
);
1447 ClearContainer(m_pendingCatchTraces
);
1448 ClearContainer(m_pendingJmpTransIDs
);
1449 ClearContainer(m_reusedStubs
);
1450 ClearContainer(m_addressImmediates
);
1451 ClearContainer(m_codePointers
);
1452 ClearContainer(m_bcMap
);
1453 ClearContainer(m_alignFixups
);
1454 ClearContainer(m_inProgressTailJumps
);
1455 ClearContainer(m_literals
);
1458 bool CodeGenFixups::empty() const {
1460 m_pendingFixups
.empty() &&
1461 m_pendingCatchTraces
.empty() &&
1462 m_pendingJmpTransIDs
.empty() &&
1463 m_reusedStubs
.empty() &&
1464 m_addressImmediates
.empty() &&
1465 m_codePointers
.empty() &&
1467 m_alignFixups
.empty() &&
1468 m_inProgressTailJumps
.empty() &&
1473 MCGenerator::translateWork(const TranslArgs
& args
) {
1474 Timer
_t(Timer::translate
);
1477 SKTRACE(1, sk
, "translateWork\n");
1478 assert(m_tx
.getSrcDB().find(sk
));
1481 mcg
->backEnd().moveToAlign(code
.main(),
1482 MoveToAlignFlags::kNonFallthroughAlign
);
1485 TCA start
= code
.main().frontier();
1486 TCA coldStart
= code
.cold().frontier();
1487 TCA realColdStart
= code
.realCold().frontier();
1488 TCA DEBUG_ONLY frozenStart
= code
.frozen().frontier();
1489 TCA realFrozenStart
= code
.realFrozen().frontier();
1490 SrcRec
& srcRec
= *m_tx
.getSrcRec(sk
);
1491 TransKind transKindToRecord
= TransKind::Interp
;
1492 UndoMarker
undoA(code
.main());
1493 UndoMarker
undoAcold(code
.cold());
1494 UndoMarker
undoAfrozen(code
.frozen());
1495 UndoMarker
undoGlobalData(code
.data());
1498 // HHIRBytecodeControlFlow causes vmsp stack manipulations we can't handle
1499 // right now: t4810319
1500 !RuntimeOption::EvalHHIRBytecodeControlFlow
&&
1501 (RuntimeOption::EvalJitLLVM
> 1 ||
1502 (RuntimeOption::EvalJitLLVM
&& m_tx
.mode() == TransKind::Optimize
))
1508 auto resetState
= [&] {
1512 undoGlobalData
.undo();
1516 auto assertCleanState
= [&] {
1517 assert(code
.main().frontier() == start
);
1518 assert(code
.frozen().frontier() == frozenStart
);
1519 assert(m_fixups
.empty());
1522 PostConditions pconds
;
1523 RegionDescPtr region
;
1524 if (!reachedTranslationLimit(sk
, srcRec
)) {
1525 // Attempt to create a region at this SrcKey
1526 if (m_tx
.mode() == TransKind::Optimize
) {
1527 assert(RuntimeOption::EvalJitPGO
);
1528 region
= args
.region
;
1530 assert(!region
->empty());
1532 assert(isValidTransID(args
.transId
));
1533 region
= selectHotRegion(args
.transId
, this);
1535 if (region
&& region
->empty()) region
= nullptr;
1538 assert(m_tx
.mode() == TransKind::Profile
||
1539 m_tx
.mode() == TransKind::Live
);
1540 RegionContext rContext
{ sk
.func(), sk
.offset(), liveSpOff(),
1542 FTRACE(2, "populating live context for region\n");
1543 populateLiveContext(rContext
);
1544 region
= selectRegion(rContext
, m_tx
.mode());
1547 auto result
= TranslateResult::Retry
;
1548 auto regionInterps
= RegionBlacklist
{};
1549 auto const initSpOffset
= region
? region
->entry()->initialSpOffset()
1552 while (region
&& result
== TranslateResult::Retry
) {
1553 auto const transContext
= TransContext
{
1554 RuntimeOption::EvalJitPGO
1555 ? m_tx
.profData()->curTransID()
1564 HTS hhbcTrans
{ transContext
};
1565 FTRACE(1, "{}{:-^40}{}\n",
1566 color(ANSI_COLOR_BLACK
, ANSI_BGCOLOR_GREEN
),
1567 " HHIR during translation ",
1568 color(ANSI_COLOR_END
));
1572 result
= translateRegion(hhbcTrans
, *region
, regionInterps
, args
.flags
,
1574 FTRACE(2, "translateRegion finished with result {}\n", show(result
));
1575 } catch (const std::exception
& e
) {
1576 FTRACE(1, "translateRegion failed with '{}'\n", e
.what());
1577 result
= TranslateResult::Failure
;
1580 if (result
!= TranslateResult::Success
) {
1581 // Translation failed or will be retried. Free resources for this
1582 // trace, rollback the translation cache frontiers, and discard any
1587 if (result
== TranslateResult::Failure
) {
1588 // If the region translator failed for an Optimize translation, it's OK
1589 // to do a Live translation for the function entry. Otherwise, fall
1591 if (m_tx
.mode() == TransKind::Optimize
) {
1592 if (sk
.getFuncId() == liveFunc()->getFuncId() &&
1593 liveUnit()->contains(vmpc()) &&
1594 sk
.offset() == liveUnit()->offsetOf(vmpc()) &&
1595 sk
.resumed() == liveResumed()) {
1596 m_tx
.setMode(TransKind::Live
);
1597 RegionContext rContext
{ sk
.func(), sk
.offset(), liveSpOff(),
1599 FTRACE(2, "populating live context for region after failed optimize"
1601 populateLiveContext(rContext
);
1602 region
= selectRegion(rContext
, m_tx
.mode());
1610 if (!region
) m_tx
.setMode(TransKind::Interp
);
1612 if (result
== TranslateResult::Success
) {
1613 assert(m_tx
.mode() == TransKind::Live
||
1614 m_tx
.mode() == TransKind::Profile
||
1615 m_tx
.mode() == TransKind::Optimize
);
1616 transKindToRecord
= m_tx
.mode();
1625 if (transKindToRecord
== TransKind::Interp
) {
1627 FTRACE(1, "emitting dispatchBB interp request for failed translation\n");
1628 backEnd().emitInterpReq(code
.main(), code
.cold(), sk
);
1633 m_fixups
.m_alignFixups
.emplace(
1634 start
, std::make_pair(backEnd().cacheLineSize() - 1, 0));
1637 if (RuntimeOption::EvalProfileBC
) {
1638 auto* unit
= sk
.unit();
1639 TransBCMapping prev
{};
1640 for (auto& cur
: m_fixups
.m_bcMap
) {
1641 if (!cur
.aStart
) continue;
1643 if (prev
.bcStart
< unit
->bclen()) {
1644 recordBCInstr(unit
->entry()[prev
.bcStart
],
1645 prev
.aStart
, cur
.aStart
, false);
1648 recordBCInstr(OpTraceletGuard
, start
, cur
.aStart
, false);
1654 recordGdbTranslation(sk
, sk
.func(), code
.main(), start
,
1656 recordGdbTranslation(sk
, sk
.func(), code
.cold(), coldStart
,
1658 if (RuntimeOption::EvalJitPGO
) {
1659 if (transKindToRecord
== TransKind::Profile
) {
1660 always_assert(region
);
1661 m_tx
.profData()->addTransProfile(region
, pconds
);
1663 m_tx
.profData()->addTransNonProf(transKindToRecord
, sk
);
1667 TransRec
tr(sk
, transKindToRecord
,
1668 start
, code
.main().frontier() - start
,
1669 realColdStart
, code
.realCold().frontier() - realColdStart
,
1670 realFrozenStart
, code
.realFrozen().frontier() - realFrozenStart
,
1671 region
, m_fixups
.m_bcMap
,
1673 m_tx
.addTranslation(tr
);
1674 if (RuntimeOption::EvalJitUseVtuneAPI
) {
1675 reportTraceletToVtune(sk
.unit(), sk
.func(), tr
);
1678 GrowableVector
<IncomingBranch
> inProgressTailBranches
;
1679 m_fixups
.process(&inProgressTailBranches
);
1681 // SrcRec::newTranslation() makes this code reachable. Do this last;
1682 // otherwise there's some chance of hitting in the reader threads whose
1683 // metadata is not yet visible.
1684 TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n",
1685 start
, sk
.getFuncId(), sk
.offset());
1686 srcRec
.newTranslation(start
, inProgressTailBranches
);
1688 TRACE(1, "mcg: %zd-byte tracelet\n", code
.main().frontier() - start
);
1689 if (Trace::moduleEnabledRelease(Trace::tcspace
, 1)) {
1690 Trace::traceRelease("%s", getUsageString().c_str());
1696 void MCGenerator::traceCodeGen(HTS
& hts
) {
1697 auto& unit
= hts
.unit
;
1699 auto finishPass
= [&](const char* msg
, int level
) {
1700 printUnit(level
, unit
, msg
, nullptr, hts
.irb
->guards());
1701 assert(checkCfg(unit
));
1704 finishPass(" after initial translation ", kIRLevel
);
1707 IMPLIES(cfgHasLoop(unit
), RuntimeOption::EvalJitLoops
),
1708 "IRUnit has loop but Eval.JitLoops=0"
1711 optimize(unit
, *hts
.irb
, m_tx
.mode());
1712 finishPass(" after optimizing ", kOptLevel
);
1714 always_assert(this == mcg
);
1718 assert(m_numTrans
<= RuntimeOption::EvalJitGlobalTranslationLimit
);
1721 MCGenerator::MCGenerator()
1722 : m_backEnd(newBackEnd())
1724 , m_catchTraceMap(128)
1726 TRACE(1, "MCGenerator@%p startup\n", this);
1729 m_unwindRegistrar
= register_unwind_region(code
.base(), code
.codeSize());
1731 static bool profileUp
= false;
1737 if (Trace::moduleEnabledRelease(Trace::printir
) &&
1738 !RuntimeOption::EvalJit
) {
1739 Trace::traceRelease("TRACE=printir is set but the jit isn't on. "
1740 "Did you mean to run with -vEval.Jit=1?\n");
1742 if (Trace::moduleEnabledRelease(Trace::llvm
, 1) ||
1743 RuntimeOption::EvalJitLLVMCounters
) {
1744 g_bytecodesVasm
.bind();
1745 g_bytecodesLLVM
.bind();
1749 void MCGenerator::initUniqueStubs() {
1750 // Put the following stubs into ahot, rather than a.
1751 CodeCache::Selector
cbSel(CodeCache::Selector::Args(code
).
1752 hot(m_tx
.useAHot()));
1753 m_tx
.uniqueStubs
= backEnd().emitUniqueStubs();
1754 m_fixups
.process(nullptr); // in case we generated literals
1757 void MCGenerator::registerCatchBlock(CTCA ip
, TCA block
) {
1758 FTRACE(1, "registerCatchBlock: afterCall: {} block: {}\n", ip
, block
);
1759 m_fixups
.m_pendingCatchTraces
.emplace_back(ip
, block
);
1762 folly::Optional
<TCA
> MCGenerator::getCatchTrace(CTCA ip
) const {
1763 TCA
* found
= m_catchTraceMap
.find(ip
);
1764 if (found
) return *found
;
1768 void MCGenerator::codeEmittedThisRequest(size_t& requestEntry
,
1769 size_t& now
) const {
1770 requestEntry
= s_initialTCSize
;
1771 now
= code
.totalUsed();
1774 void MCGenerator::requestInit() {
1775 tl_regState
= VMRegState::CLEAN
;
1776 Timer::RequestInit();
1777 memset(&s_perfCounters
, 0, sizeof(s_perfCounters
));
1779 s_initialTCSize
= code
.totalUsed();
1782 void MCGenerator::requestExit() {
1783 always_assert(!Translator::WriteLease().amOwner());
1784 TRACE_MOD(txlease
, 2, "%" PRIx64
" write lease stats: %15" PRId64
1785 " kept, %15" PRId64
" grabbed\n",
1786 Process::GetThreadIdForTrace(), Translator::WriteLease().m_hintKept
,
1787 Translator::WriteLease().m_hintGrabbed
);
1790 Timer::RequestExit();
1792 if (Trace::moduleEnabledRelease(Trace::mcgstats
, 1)) {
1793 Trace::traceRelease("MCGenerator perf counters for %s:\n",
1794 g_context
->getRequestUrl(50).c_str());
1795 for (int i
= 0; i
< tpc_num_counters
; i
++) {
1796 Trace::traceRelease("%-20s %10" PRId64
"\n",
1797 kPerfCounterNames
[i
], s_perfCounters
[i
]);
1799 Trace::traceRelease("\n");
1802 if (Trace::moduleEnabledRelease(Trace::llvm
, 1)) {
1803 auto llvm
= *g_bytecodesLLVM
;
1804 auto total
= llvm
+ *g_bytecodesVasm
;
1805 Trace::ftraceRelease(
1806 "{:9} / {:9} bytecodes ({:6.2f}%) handled by LLVM backend for {}\n",
1807 llvm
, total
, llvm
* 100.0 / total
, g_context
->getRequestUrl(50)
1813 MCGenerator::isPseudoEvent(const char* event
) {
1814 for (auto name
: kPerfCounterNames
) {
1815 if (!strcmp(event
, name
)) {
1823 MCGenerator::getPerfCounters(Array
& ret
) {
1824 for (int i
= 0; i
< tpc_num_counters
; i
++) {
1825 // Until Perflab can automatically scale the values we give it to
1826 // an appropriate range, we have to fudge these numbers so they
1827 // look more like reasonable hardware counter values.
1828 ret
.set(String::FromCStr(kPerfCounterNames
[i
]),
1829 s_perfCounters
[i
] * 1000);
1832 for (auto const& pair
: Timer::Counters()) {
1833 if (pair
.second
.total
== 0 && pair
.second
.count
== 0) continue;
1835 ret
.set(String("jit_time_") + pair
.first
, pair
.second
.total
);
1838 if (RuntimeOption::EvalJitLLVMCounters
) {
1839 ret
.set(String("jit_instr_vasm"), *g_bytecodesVasm
);
1840 ret
.set(String("jit_instr_llvm"), *g_bytecodesLLVM
);
1844 MCGenerator::~MCGenerator() {
1847 static Debug::TCRange
rangeFrom(const CodeBlock
& cb
, const TCA addr
,
1849 assert(cb
.contains(addr
));
1850 return Debug::TCRange(addr
, cb
.frontier(), isAcold
);
1853 void MCGenerator::recordBCInstr(uint32_t op
,
1858 m_debugInfo
.recordBCInstr(
1859 Debug::TCRange(addr
, end
, cold
), op
);
1863 void MCGenerator::recordGdbTranslation(SrcKey sk
,
1864 const Func
* srcFunc
,
1865 const CodeBlock
& cb
,
1869 if (start
!= cb
.frontier()) {
1870 assert(Translator::WriteLease().amOwner());
1871 if (!RuntimeOption::EvalJitNoGdb
) {
1872 m_debugInfo
.recordTracelet(rangeFrom(cb
, start
, &cb
== &code
.cold()),
1874 reinterpret_cast<const Op
*>(
1876 srcFunc
->unit()->at(sk
.offset()) : nullptr
1880 if (RuntimeOption::EvalPerfPidMap
) {
1881 m_debugInfo
.recordPerfMap(rangeFrom(cb
, start
, &cb
== &code
.cold()),
1882 srcFunc
, exit
, inPrologue
);
1887 void MCGenerator::recordGdbStub(const CodeBlock
& cb
,
1888 const TCA start
, const char* name
) {
1889 if (!RuntimeOption::EvalJitNoGdb
) {
1890 m_debugInfo
.recordStub(rangeFrom(cb
, start
, &cb
== &code
.cold()),
1895 std::vector
<UsageInfo
> MCGenerator::getUsageInfo() {
1896 std::vector
<UsageInfo
> tcUsageInfo
;
1897 code
.forEachBlock([&](const char* name
, const CodeBlock
& a
) {
1898 tcUsageInfo
.emplace_back(UsageInfo
{std::string("code.") + name
,
1903 tcUsageInfo
.emplace_back(UsageInfo
{
1906 code
.data().capacity(),
1908 tcUsageInfo
.emplace_back(UsageInfo
{
1911 RuntimeOption::EvalJitTargetCacheSize
* 3 / 4,
1913 tcUsageInfo
.emplace_back(UsageInfo
{
1915 rds::usedLocalBytes(),
1916 RuntimeOption::EvalJitTargetCacheSize
* 3 / 4,
1918 tcUsageInfo
.emplace_back(UsageInfo
{
1920 rds::usedPersistentBytes(),
1921 RuntimeOption::EvalJitTargetCacheSize
/ 4,
1923 tcUsageInfo
.emplace_back(UsageInfo
{
1925 Func::s_totalClonedClosures
,
1926 100000 /* dummy value -- there isn't really a capacity for this */});
1930 std::string
MCGenerator::getUsageString() {
1932 size_t totalBlockSize
= 0;
1933 size_t totalBlockCapacity
= 0;
1934 auto addRow
= [&](UsageInfo blockUsageInfo
) {
1935 auto percent
= blockUsageInfo
.m_capacity
?
1936 100 * blockUsageInfo
.m_used
/ blockUsageInfo
.m_capacity
: 0;
1937 usage
+= folly::format("mcg: {:9} bytes ({}%) in {}\n",
1938 blockUsageInfo
.m_used
,
1940 blockUsageInfo
.m_name
).str();
1941 if (blockUsageInfo
.m_global
) {
1942 totalBlockSize
+= blockUsageInfo
.m_used
;
1943 totalBlockCapacity
+= blockUsageInfo
.m_capacity
;
1946 auto tcUsageInfo
= getUsageInfo();
1947 for_each(tcUsageInfo
.begin(), tcUsageInfo
.end(), addRow
);
1948 addRow(UsageInfo
{"total", totalBlockSize
, totalBlockCapacity
, false});
1952 std::string
MCGenerator::getTCAddrs() {
1954 code
.forEachBlock([&](const char* name
, const CodeBlock
& a
) {
1955 addrs
+= folly::format("{}: {}\n", name
, a
.base()).str();
1960 bool MCGenerator::addDbgGuards(const Unit
* unit
) {
1962 // It grabs the write lease and iterates through whole SrcDB...
1963 struct timespec tsBegin
, tsEnd
;
1965 BlockingLeaseHolder
writer(Translator::WriteLease());
1969 assert(mcg
->cgFixups().empty());
1970 HPHP::Timer::GetMonotonicTime(tsBegin
);
1971 // Doc says even find _could_ invalidate iterator, in pactice it should
1972 // be very rare, so go with it now.
1973 for (SrcDB::const_iterator it
= m_tx
.getSrcDB().begin();
1974 it
!= m_tx
.getSrcDB().end(); ++it
) {
1975 SrcKey
const sk
= SrcKey::fromAtomicInt(it
->first
);
1976 // We may have a SrcKey to a deleted function. NB: this may miss a
1977 // race with deleting a Func. See task #2826313.
1978 if (!Func::isFuncIdValid(sk
.getFuncId())) continue;
1979 SrcRec
* sr
= it
->second
;
1980 if (sr
->unitMd5() == unit
->md5() &&
1981 !sr
->hasDebuggerGuard() &&
1982 m_tx
.isSrcKeyInBL(sk
)) {
1983 addDbgGuardImpl(sk
, sr
);
1986 mcg
->cgFixups().process(nullptr);
1988 HPHP::Timer::GetMonotonicTime(tsEnd
);
1989 int64_t elapsed
= gettime_diff_us(tsBegin
, tsEnd
);
1990 if (Trace::moduleEnabledRelease(Trace::mcg
, 5)) {
1991 Trace::traceRelease("addDbgGuards got lease for %" PRId64
" us\n", elapsed
);
1996 bool MCGenerator::addDbgGuard(const Func
* func
, Offset offset
, bool resumed
) {
1997 SrcKey
sk(func
, offset
, resumed
);
1999 if (SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
2000 if (sr
->hasDebuggerGuard()) {
2004 // no translation yet
2009 if (!m_tx
.isSrcKeyInBL(sk
)) {
2010 TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
2014 BlockingLeaseHolder
writer(Translator::WriteLease());
2018 assert(mcg
->cgFixups().empty());
2020 if (SrcRec
* sr
= m_tx
.getSrcDB().find(sk
)) {
2021 addDbgGuardImpl(sk
, sr
);
2024 mcg
->cgFixups().process(nullptr);
2028 bool MCGenerator::dumpTCCode(const char* filename
) {
2029 #define OPEN_FILE(F, SUFFIX) \
2030 std::string F ## name = std::string(filename).append(SUFFIX); \
2031 FILE* F = fopen(F ## name .c_str(),"wb"); \
2032 if (F == nullptr) return false; \
2033 SCOPE_EXIT{ fclose(F); };
2035 OPEN_FILE(ahotFile
, "_ahot");
2036 OPEN_FILE(aFile
, "_a");
2037 OPEN_FILE(aprofFile
, "_aprof");
2038 OPEN_FILE(acoldFile
, "_acold");
2039 OPEN_FILE(afrozenFile
, "_afrozen");
2040 OPEN_FILE(helperAddrFile
, "_helpers_addrs.txt");
2044 // dump starting from the hot region
2045 size_t count
= code
.hot().used();
2046 bool result
= (fwrite(code
.hot().base(), 1, count
, ahotFile
) == count
);
2048 count
= code
.main().used();
2049 result
= (fwrite(code
.main().base(), 1, count
, aFile
) == count
);
2052 count
= code
.prof().used();
2053 result
= (fwrite(code
.prof().base(), 1, count
, aprofFile
) == count
);
2056 count
= code
.cold().used();
2057 result
= (fwrite(code
.cold().base(), 1, count
, acoldFile
) == count
);
2060 count
= code
.frozen().used();
2061 result
= (fwrite(code
.frozen().base(), 1, count
, afrozenFile
) == count
);
2066 // Returns true on success
2067 bool MCGenerator::dumpTC(bool ignoreLease
) {
2068 folly::Optional
<BlockingLeaseHolder
> writer
;
2070 writer
.emplace(Translator::WriteLease());
2071 if (!*writer
) return false;
2073 bool success
= dumpTCData();
2075 success
= dumpTCCode("/tmp/tc_dump");
2080 // Returns true on success
2081 bool tc_dump(void) {
2082 return mcg
&& mcg
->dumpTC();
2085 // Returns true on success
2086 bool MCGenerator::dumpTCData() {
2087 gzFile tcDataFile
= gzopen("/tmp/tc_data.txt.gz", "w");
2088 if (!tcDataFile
) return false;
2090 if (!gzprintf(tcDataFile
,
2091 "repo_schema = %s\n"
2093 "ahot.frontier = %p\n"
2097 "aprof.frontier = %p\n"
2099 "acold.frontier = %p\n"
2100 "afrozen.base = %p\n"
2101 "afrozen.frontier = %p\n\n",
2103 code
.hot().base(), code
.hot().frontier(),
2104 code
.main().base(), code
.main().frontier(),
2105 code
.prof().base(), code
.prof().frontier(),
2106 code
.cold().base(), code
.cold().frontier(),
2107 code
.frozen().base(), code
.frozen().frontier())) {
2111 if (!gzprintf(tcDataFile
, "total_translations = %zu\n\n",
2112 m_tx
.getCurrentTransID())) {
2116 for (TransID t
= 0; t
< m_tx
.getCurrentTransID(); t
++) {
2117 if (gzputs(tcDataFile
,
2118 m_tx
.getTransRec(t
)->print(m_tx
.getTransCounter(t
)).c_str()) ==
2124 gzclose(tcDataFile
);
2128 void MCGenerator::invalidateSrcKey(SrcKey sk
) {
2129 assert(!RuntimeOption::RepoAuthoritative
|| RuntimeOption::EvalJitPGO
);
2130 assert(Translator::WriteLease().amOwner());
2132 * Reroute existing translations for SrcKey to an as-yet indeterminate
2135 SrcRec
* sr
= m_tx
.getSrcDB().find(sk
);
2138 * Since previous translations aren't reachable from here, we know we
2139 * just created some garbage in the TC. We currently have no mechanism
2142 sr
->replaceOldTranslations();
2145 void MCGenerator::setJmpTransID(TCA jmp
) {
2146 if (m_tx
.mode() != TransKind::Profile
) return;
2148 TransID transId
= m_tx
.profData()->curTransID();
2149 FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp
, transId
);
2150 m_fixups
.m_pendingJmpTransIDs
.emplace_back(jmp
, transId
);
2153 void RelocationInfo::recordRange(TCA start
, TCA end
,
2154 TCA destStart
, TCA destEnd
) {
2155 m_srcRanges
.emplace_back(start
, end
);
2156 m_dstRanges
.emplace_back(destStart
, destEnd
);
2157 m_adjustedAddresses
[start
].second
= destStart
;
2158 m_adjustedAddresses
[end
].first
= destEnd
;
2161 void RelocationInfo::recordAddress(TCA src
, TCA dest
, int range
) {
2162 m_adjustedAddresses
.emplace(src
, std::make_pair(dest
, dest
+ range
));
2165 TCA
RelocationInfo::adjustedAddressAfter(TCA addr
) const {
2166 auto it
= m_adjustedAddresses
.find(addr
);
2167 if (it
== m_adjustedAddresses
.end()) return nullptr;
2169 return it
->second
.second
;
2172 TCA
RelocationInfo::adjustedAddressBefore(TCA addr
) const {
2173 auto it
= m_adjustedAddresses
.find(addr
);
2174 if (it
== m_adjustedAddresses
.end()) return nullptr;
2176 return it
->second
.first
;
2179 void RelocationInfo::rewind(TCA start
, TCA end
) {
2180 if (m_srcRanges
.size() && m_srcRanges
.back().first
== start
) {
2181 assert(m_dstRanges
.size() == m_srcRanges
.size());
2182 assert(m_srcRanges
.back().second
== end
);
2183 m_srcRanges
.pop_back();
2184 m_dstRanges
.pop_back();
2186 auto it
= m_adjustedAddresses
.lower_bound(start
);
2187 if (it
== m_adjustedAddresses
.end()) return;
2188 if (it
->first
== start
) {
2189 // if it->second.first is set, start is also the end
2190 // of an existing region. Don't erase it in that case
2191 if (it
->second
.first
) {
2192 it
++->second
.second
= 0;
2194 m_adjustedAddresses
.erase(it
++);
2197 while (it
!= m_adjustedAddresses
.end() && it
->first
< end
) {
2198 m_adjustedAddresses
.erase(it
++);
2200 if (it
== m_adjustedAddresses
.end()) return;
2201 if (it
->first
== end
) {
2202 // Similar to start above, end could be the start of an
2204 if (it
->second
.second
) {
2205 it
++->second
.first
= 0;
2207 m_adjustedAddresses
.erase(it
++);
2213 emitIncStat(CodeBlock
& cb
, uint64_t* tl_table
, uint index
, int n
, bool force
) {
2214 if (!force
&& !Stats::enabled()) return;
2215 intptr_t disp
= uintptr_t(&tl_table
[index
]) - tlsBase();
2217 mcg
->backEnd().emitIncStat(cb
, disp
, n
);
2220 void emitIncStat(Vout
& v
, Stats::StatCounter stat
, int n
, bool force
) {
2221 if (!force
&& !Stats::enabled()) return;
2222 intptr_t disp
= uintptr_t(&Stats::tl_counters
[stat
]) - tlsBase();
2223 v
<< addqim
{n
, Vptr
{baseless(disp
), Vptr::FS
}, v
.makeReg()};
2226 // generic vasm service-request generator. target specific details
2227 // are hidden by the svcreq{} instruction.
2228 void emitServiceReq(Vout
& v
, TCA stub_block
,
2229 ServiceRequest req
, const ServiceReqArgVec
& argv
) {
2230 TRACE(3, "Emit Service Req %s(", serviceReqName(req
));
2232 for (auto& argInfo
: argv
) {
2233 switch (argInfo
.m_kind
) {
2234 case ServiceReqArgInfo::Immediate
: {
2235 TRACE(3, "%" PRIx64
", ", argInfo
.m_imm
);
2236 args
.push_back(v
.cns(argInfo
.m_imm
));
2240 always_assert(false);
2245 v
<< svcreq
{req
, v
.makeTuple(args
), stub_block
};