Collect post-conditions at IR-gen time
[hiphop-php.git] / hphp / runtime / vm / jit / mc-generator.cpp
blobd137dec2395a2a7ff975a2f36feb82adcf84ad17
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/mc-generator.h"
18 #include "hphp/runtime/vm/jit/vtune-jit.h"
20 #include <cinttypes>
21 #include <assert.h>
22 #include <stdarg.h>
23 #include <stdint.h>
24 #include <stdio.h>
25 #include <sys/mman.h>
26 #include <unistd.h>
27 #include <unwind.h>
29 #include <algorithm>
30 #include <exception>
31 #include <memory>
32 #include <queue>
33 #include <string>
34 #include <strstream>
35 #include <unordered_set>
36 #include <vector>
38 #include <folly/Format.h>
39 #include <folly/MapUtil.h>
40 #include <folly/Optional.h>
41 #include <folly/String.h>
43 #include "hphp/util/abi-cxx.h"
44 #include "hphp/util/asm-x64.h"
45 #include "hphp/util/bitops.h"
46 #include "hphp/util/cycles.h"
47 #include "hphp/util/debug.h"
48 #include "hphp/util/disasm.h"
49 #include "hphp/util/maphuge.h"
50 #include "hphp/util/meta.h"
51 #include "hphp/util/process.h"
52 #include "hphp/util/rank.h"
53 #include "hphp/util/repo-schema.h"
54 #include "hphp/util/ringbuffer.h"
55 #include "hphp/util/timer.h"
56 #include "hphp/util/trace.h"
58 #include "hphp/runtime/base/arch.h"
59 #include "hphp/runtime/base/execution-context.h"
60 #include "hphp/runtime/base/rds.h"
61 #include "hphp/runtime/base/runtime-option-guard.h"
62 #include "hphp/runtime/base/runtime-option.h"
63 #include "hphp/runtime/base/stats.h"
64 #include "hphp/runtime/base/strings.h"
65 #include "hphp/runtime/base/zend-string.h"
66 #include "hphp/runtime/ext/ext_closure.h"
67 #include "hphp/runtime/ext/ext_generator.h"
68 #include "hphp/runtime/ext/std/ext_std_function.h"
69 #include "hphp/runtime/server/source-root-info.h"
70 #include "hphp/runtime/vm/bytecode.h"
71 #include "hphp/runtime/vm/debug/debug.h"
72 #include "hphp/runtime/vm/func.h"
73 #include "hphp/runtime/vm/jit/back-end-x64.h" // XXX Layering violation.
74 #include "hphp/runtime/vm/jit/check.h"
75 #include "hphp/runtime/vm/jit/code-gen.h"
76 #include "hphp/runtime/vm/jit/debug-guards.h"
77 #include "hphp/runtime/vm/jit/inlining-decider.h"
78 #include "hphp/runtime/vm/jit/irgen.h"
79 #include "hphp/runtime/vm/jit/normalized-instruction.h"
80 #include "hphp/runtime/vm/jit/opt.h"
81 #include "hphp/runtime/vm/jit/print.h"
82 #include "hphp/runtime/vm/jit/prof-data.h"
83 #include "hphp/runtime/vm/jit/region-selection.h"
84 #include "hphp/runtime/vm/jit/service-requests-inline.h"
85 #include "hphp/runtime/vm/jit/srcdb.h"
86 #include "hphp/runtime/vm/jit/timer.h"
87 #include "hphp/runtime/vm/jit/translate-region.h"
88 #include "hphp/runtime/vm/jit/translator-inline.h"
89 #include "hphp/runtime/vm/jit/vasm-emit.h"
90 #include "hphp/runtime/vm/jit/vasm-instr.h"
91 #include "hphp/runtime/vm/member-operations.h"
92 #include "hphp/runtime/vm/php-debug.h"
93 #include "hphp/runtime/vm/repo.h"
94 #include "hphp/runtime/vm/runtime.h"
95 #include "hphp/runtime/vm/srckey.h"
96 #include "hphp/runtime/vm/treadmill.h"
97 #include "hphp/runtime/vm/type-profile.h"
98 #include "hphp/runtime/vm/unwind.h"
100 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
102 namespace HPHP { namespace jit {
104 TRACE_SET_MOD(mcg);
106 using namespace reg;
107 using namespace Trace;
108 using std::max;
110 #define TRANS_PERF_COUNTERS \
111 TPC(translate) \
112 TPC(retranslate) \
113 TPC(interp_bb) \
114 TPC(interp_instr) \
115 TPC(interp_one) \
116 TPC(max_trans) \
117 TPC(enter_tc) \
118 TPC(service_req)
120 #define TPC(n) "jit_" #n,
121 static const char* const kPerfCounterNames[] = {
122 TRANS_PERF_COUNTERS
124 #undef TPC
126 __thread int64_t s_perfCounters[tpc_num_counters];
127 static __thread size_t s_initialTCSize;
129 // The global MCGenerator object.
130 MCGenerator* mcg;
132 CppCall MCGenerator::getDtorCall(DataType type) {
133 switch (type) {
134 case KindOfString:
135 return CppCall::method(&StringData::release);
136 case KindOfArray:
137 return CppCall::method(&ArrayData::release);
138 case KindOfObject:
139 return CppCall::method(&ObjectData::release);
140 case KindOfResource:
141 return CppCall::method(&ResourceData::release);
142 case KindOfRef:
143 return CppCall::method(&RefData::release);
144 DT_UNCOUNTED_CASE:
145 case KindOfClass:
146 break;
148 not_reached();
151 ///////////////////////////////////////////////////////////////////////////////
153 bool shouldPGOFunc(const Func& func) {
154 if (!RuntimeOption::EvalJitPGO) return false;
156 // JITing pseudo-mains requires extra checks that blow the IR. PGO
157 // can significantly increase the size of the regions, so disable it for
158 // pseudo-mains (so regions will be just tracelets).
159 if (func.isPseudoMain()) return false;
161 // Non-cloned closures simply contain prologues that redispacth to
162 // cloned closures. They don't contain a translation for the
163 // function entry, which is what triggers an Optimize retranslation.
164 // So don't generate profiling translations for them -- there's not
165 // much to do with PGO anyway here, since they just have prologues.
166 if (func.isClosureBody() && !func.isClonedClosure()) return false;
168 if (!RuntimeOption::EvalJitPGOHotOnly) return true;
169 return func.attrs() & AttrHot;
172 bool MCGenerator::profileSrcKey(SrcKey sk) const {
173 if (!shouldPGOFunc(*sk.func())) return false;
174 if (m_tx.profData()->optimized(sk.getFuncId())) return false;
175 if (m_tx.profData()->profiling(sk.getFuncId())) return true;
177 // Don't start profiling new functions if the size of either main or
178 // prof is already above Eval.JitAMaxUsage.
179 auto tcUsage = std::max(code.mainUsed(), code.profUsed());
180 if (tcUsage >= RuntimeOption::EvalJitAMaxUsage) {
181 return false;
184 return requestCount() <= RuntimeOption::EvalJitProfileRequests;
188 * Invalidate the SrcDB entries for func's SrcKeys that have any
189 * Profile translation.
191 void MCGenerator::invalidateFuncProfSrcKeys(const Func* func) {
192 assert(RuntimeOption::EvalJitPGO);
193 FuncId funcId = func->getFuncId();
194 for (auto tid : m_tx.profData()->funcProfTransIDs(funcId)) {
195 invalidateSrcKey(m_tx.profData()->transSrcKey(tid));
199 TCA MCGenerator::retranslate(const TranslArgs& args) {
200 auto sr = m_tx.getSrcDB().find(args.sk);
201 always_assert(sr);
202 bool locked = sr->tryLock();
203 SCOPE_EXIT {
204 if (locked) sr->freeLock();
206 if (isDebuggerAttachedProcess() && m_tx.isSrcKeyInBL(args.sk)) {
207 // We are about to translate something known to be blacklisted by
208 // debugger, exit early
209 SKTRACE(1, args.sk, "retranslate abort due to debugger\n");
210 return nullptr;
212 LeaseHolder writer(Translator::WriteLease());
213 if (!writer || !shouldTranslate(args.sk.func())) return nullptr;
214 if (!locked) {
215 // Even though we knew above that we were going to skip
216 // doing another translation, we wait until we get the
217 // write lease, to avoid spinning through the tracelet
218 // guards again and again while another thread is writing
219 // to it.
220 return sr->getTopTranslation();
222 if (sr->translations().size() > RuntimeOption::EvalJitMaxTranslations) {
223 always_assert(sr->translations().size() ==
224 RuntimeOption::EvalJitMaxTranslations + 1);
225 return sr->getTopTranslation();
227 SKTRACE(1, args.sk, "retranslate\n");
229 m_tx.setMode(profileSrcKey(args.sk) ? TransKind::Profile : TransKind::Live);
230 SCOPE_EXIT{ m_tx.setMode(TransKind::Invalid); };
232 return translate(args);
235 TCA MCGenerator::retranslateOpt(TransID transId, bool align) {
236 LeaseHolder writer(Translator::WriteLease());
237 if (!writer) return nullptr;
238 if (isDebuggerAttachedProcess()) return nullptr;
240 TRACE(1, "retranslateOpt: transId = %u\n", transId);
242 SCOPE_EXIT { m_tx.setMode(TransKind::Invalid); };
244 if (!m_tx.profData()->hasTransRec(transId)) return nullptr;
246 always_assert(m_tx.profData()->transRegion(transId) != nullptr);
248 auto func = m_tx.profData()->transFunc(transId);
249 auto funcId = func->getFuncId();
250 auto sk = m_tx.profData()->transSrcKey(transId);
252 if (m_tx.profData()->optimized(funcId)) return nullptr;
253 m_tx.profData()->setOptimized(funcId);
255 bool setFuncBody = func->getDVFunclets().size() == 0;
257 func->setFuncBody(m_tx.uniqueStubs.funcBodyHelperThunk);
259 // Invalidate SrcDB's entries for all func's SrcKeys.
260 invalidateFuncProfSrcKeys(func);
262 // Regenerate the prologues and DV funclets before the actual function body.
263 TCA start = regeneratePrologues(func, sk);
265 // Regionize func and translate all its regions.
266 std::vector<RegionDescPtr> regions;
267 regionizeFunc(func, this, regions);
269 for (auto region : regions) {
270 m_tx.setMode(TransKind::Optimize);
271 always_assert(!region->empty());
272 auto regionSk = region->start();
273 auto translArgs = TranslArgs{regionSk, align};
274 translArgs.region = region;
276 if (setFuncBody && regionSk.offset() == func->base()) {
277 translArgs.setFuncBody = true;
278 setFuncBody = false;
280 auto regionStart = translate(translArgs);
281 if (start == nullptr && regionSk == sk) {
282 start = regionStart;
284 // Cloned closures' prologue tables point to the corresponding
285 // main/DV entry point. So update the prologue table when
286 // retranslating their entries.
287 if (func->isClonedClosure() && func->isEntry(regionSk.offset()) &&
288 regionStart) {
289 int entryNumParams = func->getEntryNumParams(regionSk.offset());
290 func->setPrologue(entryNumParams, regionStart);
294 m_tx.profData()->freeFuncData(funcId);
296 return start;
299 static bool liveFrameIsPseudoMain() {
300 ActRec* ar = (ActRec*)vmfp();
301 return ar->hasVarEnv() && ar->getVarEnv()->isGlobalScope();
305 * Find or create a translation for sk. Returns TCA of "best" current
306 * translation. May return NULL if it is currently impossible to create
307 * a translation.
310 MCGenerator::getTranslation(const TranslArgs& args) {
311 auto sk = args.sk;
312 sk.func()->validate();
313 SKTRACE(2, sk,
314 "getTranslation: curUnit %s funcId %x offset %d\n",
315 sk.unit()->filepath()->data(),
316 sk.getFuncId(),
317 sk.offset());
318 SKTRACE(2, sk, " funcId: %x \n", sk.func()->getFuncId());
320 if (liveFrameIsPseudoMain() && !RuntimeOption::EvalJitPseudomain) {
321 SKTRACE(2, sk, "punting on pseudoMain\n");
322 return nullptr;
324 if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
325 TCA tca = sr->getTopTranslation();
326 if (tca) {
327 SKTRACE(2, sk, "getTranslation: found %p\n", tca);
328 return tca;
331 return createTranslation(args);
335 MCGenerator::numTranslations(SrcKey sk) const {
336 if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
337 return sr->translations().size();
339 return 0;
342 const StaticString
343 s_php_errormsg("php_errormsg"),
344 s_http_response_header("http_response_header");
346 bool MCGenerator::shouldTranslateNoSizeLimit(const Func* func) const {
347 // If we've hit Eval.JitGlobalTranslationLimit, then we stop translating.
348 if (m_numTrans >= RuntimeOption::EvalJitGlobalTranslationLimit) {
349 return false;
353 * We don't support JIT compiling functions that use some super-dynamic php
354 * variables.
356 if (func->lookupVarId(s_php_errormsg.get()) != -1 ||
357 func->lookupVarId(s_http_response_header.get()) != -1) {
358 return false;
361 return true;
364 bool MCGenerator::shouldTranslate(const Func* func) const {
365 if (!shouldTranslateNoSizeLimit(func)) return false;
366 // Otherwise, follow the Eval.JitAMaxUsage limit. However, we do
367 // allow Optimize translations past that limit.
368 return code.mainUsed() < RuntimeOption::EvalJitAMaxUsage ||
369 m_tx.mode() == TransKind::Optimize;
373 static void populateLiveContext(RegionContext& ctx) {
374 typedef RegionDesc::Location L;
376 const ActRec* const fp {vmfp()};
377 const TypedValue* const sp {vmsp()};
379 for (uint32_t i = 0; i < fp->m_func->numLocals(); ++i) {
380 ctx.liveTypes.push_back(
381 { L::Local{i}, liveTVType(frame_local(fp, i)) }
385 int32_t stackOff = 0;
386 visitStackElems(
387 fp, sp, ctx.bcOffset,
388 [&](const ActRec* ar) {
389 // TODO(#2466980): when it's a Cls, we should pass the Class* in
390 // the Type.
391 auto const objOrCls =
392 ar->hasThis() ? Type::SubObj(ar->getThis()->getVMClass()) :
393 ar->hasClass() ? Type::Cls
394 : Type::Nullptr;
396 ctx.preLiveARs.push_back({
397 stackOff,
398 ar->m_func,
399 objOrCls
401 FTRACE(2, "added prelive ActRec {}\n", show(ctx.preLiveARs.back()));
402 stackOff += kNumActRecCells;
404 [&](const TypedValue* tv) {
405 ctx.liveTypes.push_back(
406 { L::Stack{ctx.spOffset - stackOff}, liveTVType(tv) }
408 stackOff++;
409 FTRACE(2, "added live type {}\n", show(ctx.liveTypes.back()));
415 MCGenerator::createTranslation(const TranslArgs& args) {
416 if (!shouldTranslate(args.sk.func())) return nullptr;
419 * Try to become the writer. We delay this until we *know* we will have
420 * a need to create new translations, instead of just trying to win the
421 * lottery at the dawn of time. Hopefully lots of requests won't require
422 * any new translation.
424 auto sk = args.sk;
425 LeaseHolder writer(Translator::WriteLease());
426 if (!writer || !shouldTranslate(args.sk.func())) return nullptr;
428 if (auto sr = m_tx.getSrcDB().find(sk)) {
429 TCA tca = sr->getTopTranslation();
430 if (tca) {
431 // Handle extremely unlikely race; someone may have just already
432 // added the first instance of this SrcRec while we did a
433 // non-blocking wait on the write lease.
434 return tca;
435 } else {
436 // Since we are holding the write lease, we know that sk is properly
437 // initialized, except that it has no translations (due to
438 // replaceOldTranslations)
439 return retranslate(args);
443 // We put retranslate requests at the end of our slab to more frequently
444 // allow conditional jump fall-throughs
445 TCA astart = code.main().frontier();
446 TCA realColdStart = code.realCold().frontier();
447 TCA realFrozenStart = code.realFrozen().frontier();
448 TCA req = emitServiceReq(code.cold(), REQ_RETRANSLATE,
449 sk.offset(), TransFlags().packed);
450 SKTRACE(1, sk, "inserting anchor translation for (%p,%d) at %p\n",
451 sk.unit(), sk.offset(), req);
452 SrcRec* sr = m_tx.getSrcRec(sk);
453 sr->setFuncInfo(sk.func());
454 sr->setAnchorTranslation(req);
456 size_t asize = code.main().frontier() - astart;
457 size_t realColdSize = code.realCold().frontier() - realColdStart;
458 size_t realFrozenSize = code.realFrozen().frontier() - realFrozenStart;
459 assert(asize == 0);
460 if (realColdSize && RuntimeOption::EvalDumpTCAnchors) {
461 TransRec tr(sk,
462 TransKind::Anchor,
463 astart, asize, realColdStart, realColdSize,
464 realFrozenStart, realFrozenSize);
465 m_tx.addTranslation(tr);
466 if (RuntimeOption::EvalJitUseVtuneAPI) {
467 reportTraceletToVtune(sk.unit(), sk.func(), tr);
470 if (m_tx.profData()) {
471 m_tx.profData()->addTransNonProf(TransKind::Anchor, sk);
473 assert(!m_tx.isTransDBEnabled() ||
474 m_tx.getTransRec(realColdStart)->kind == TransKind::Anchor);
477 return retranslate(args);
481 MCGenerator::lookupTranslation(SrcKey sk) const {
482 if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
483 return sr->getTopTranslation();
485 return nullptr;
489 MCGenerator::translate(const TranslArgs& args) {
490 INC_TPC(translate);
492 assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
493 assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
494 assert(m_tx.mode() != TransKind::Invalid);
495 SCOPE_EXIT{ m_tx.setMode(TransKind::Invalid); };
497 if (!shouldTranslate(args.sk.func())) return nullptr;
499 auto func = const_cast<Func*>(args.sk.func());
500 CodeCache::Selector cbSel(CodeCache::Selector::Args(code)
501 .profile(m_tx.mode() == TransKind::Profile)
502 .hot(RuntimeOption::EvalHotFuncCount &&
503 (func->attrs() & AttrHot) && m_tx.useAHot()));
505 auto start = translateWork(args);
507 if (args.setFuncBody) {
508 func->setFuncBody(start);
510 SKTRACE(1, args.sk, "translate moved head from %p to %p\n",
511 getTopTranslation(args.sk), start);
513 return start;
517 MCGenerator::getCallArrayPrologue(Func* func) {
518 TCA tca = func->getFuncBody();
519 if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
521 DVFuncletsVec dvs = func->getDVFunclets();
523 if (dvs.size()) {
524 LeaseHolder writer(Translator::WriteLease());
525 if (!writer) return nullptr;
526 tca = func->getFuncBody();
527 if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
528 tca = backEnd().emitCallArrayPrologue(func, dvs);
529 func->setFuncBody(tca);
530 } else {
531 SrcKey sk(func, func->base(), false);
532 auto args = TranslArgs{sk, false};
533 args.setFuncBody = true;
534 tca = mcg->getTranslation(args);
537 return tca;
540 void
541 MCGenerator::smashPrologueGuards(TCA* prologues, int numPrologues,
542 const Func* func) {
543 for (int i = 0; i < numPrologues; i++) {
544 if (prologues[i] != m_tx.uniqueStubs.fcallHelperThunk
545 && backEnd().funcPrologueHasGuard(prologues[i], func)) {
546 backEnd().funcPrologueSmashGuard(prologues[i], func);
552 * funcPrologue --
554 * Given a callee and a number of args, match up to the callee's
555 * argument expectations and dispatch.
557 * Call/return hand-shaking is a bit funny initially. At translation time,
558 * we don't necessarily know what function we're calling. For instance,
560 * f(g());
562 * Will lead to a set of basic blocks like:
564 * b1: pushfuncd "f"
565 * pushfuncd "g"
566 * fcall
567 * b2: fcall
569 * The fcallc labelled "b2" above is not statically bindable in our
570 * execution model.
572 * We decouple the call work into a per-callsite portion, responsible
573 * for recording the return address, and a per-(callee, numArgs) portion,
574 * responsible for fixing up arguments and dispatching to remaining
575 * code. We call the per-callee portion a "prologue."
577 * Also, we are called from two distinct environments. From REQ_BIND_CALL,
578 * we're running "between" basic blocks, with all VM registers sync'ed.
579 * However, we're also called in the middle of basic blocks, when dropping
580 * entries into func->m_prologues. So don't go around using the
581 * translation-time values of vmfp()/vmsp(), since they have an
582 * unpredictable relationship to the source.
584 bool
585 MCGenerator::checkCachedPrologue(const Func* func, int paramIdx,
586 TCA& prologue) const {
587 prologue = (TCA)func->getPrologue(paramIdx);
588 if (prologue != m_tx.uniqueStubs.fcallHelperThunk) {
589 TRACE(1, "cached prologue %s(%d) -> cached %p\n",
590 func->fullName()->data(), paramIdx, prologue);
591 assert(isValidCodeAddress(prologue));
592 return true;
594 return false;
598 MCGenerator::getFuncPrologue(Func* func, int nPassed, ActRec* ar,
599 bool forRegeneratePrologue) {
600 func->validate();
601 TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
602 int const numParams = func->numNonVariadicParams();
603 int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
605 bool const funcIsMagic = func->isMagic();
607 // Do a quick test before grabbing the write lease
608 TCA prologue;
609 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
611 Offset entry = func->getEntryForNumArgs(nPassed);
612 SrcKey funcBody(func, entry, false);
614 if (func->isClonedClosure()) {
615 assert(ar);
616 interp_set_regs(ar, (Cell*)ar - func->numSlotsInFrame(), entry);
617 auto tca = getTranslation(TranslArgs{funcBody, false});
618 tl_regState = VMRegState::DIRTY;
619 if (tca) {
620 // racy, but ok...
621 func->setPrologue(paramIndex, tca);
623 return tca;
626 LeaseHolder writer(Translator::WriteLease());
627 if (!writer) return nullptr;
629 // If we're regenerating a prologue, and we want to check shouldTranslate()
630 // but ignore the code size limits. We still want to respect the global
631 // translation limit and other restrictions, though.
632 if (forRegeneratePrologue) {
633 if (!shouldTranslateNoSizeLimit(func)) return nullptr;
634 } else {
635 if (!shouldTranslate(func)) return nullptr;
638 // Double check the prologue array now that we have the write lease
639 // in case another thread snuck in and set the prologue already.
640 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
642 // We're coming from a BIND_CALL service request, so enable
643 // profiling if we haven't optimized the function entry yet.
644 assert(m_tx.mode() == TransKind::Invalid ||
645 m_tx.mode() == TransKind::Prologue);
646 if (m_tx.mode() == TransKind::Invalid && profileSrcKey(funcBody)) {
647 m_tx.setMode(TransKind::Proflogue);
648 } else {
649 m_tx.setMode(TransKind::Prologue);
651 SCOPE_EXIT{ m_tx.setMode(TransKind::Invalid); };
653 CodeCache::Selector cbSel(CodeCache::Selector::Args(code)
654 .profile(m_tx.mode() == TransKind::Proflogue)
655 .hot(RuntimeOption::EvalHotFuncCount &&
656 (func->attrs() & AttrHot) && m_tx.useAHot()));
658 assert(m_fixups.empty());
659 // If we're close to a cache line boundary, just burn some space to
660 // try to keep the func and its body on fewer total lines.
661 if (((uintptr_t)code.main().frontier() & backEnd().cacheLineMask()) >=
662 (backEnd().cacheLineSize() / 2)) {
663 backEnd().moveToAlign(code.main(), MoveToAlignFlags::kCacheLineAlign);
665 m_fixups.m_alignFixups.emplace(
666 code.main().frontier(), std::make_pair(backEnd().cacheLineSize() / 2, 0));
668 // Careful: this isn't necessarily the real entry point. For funcIsMagic
669 // prologues, this is just a possible prologue.
670 TCA aStart = code.main().frontier();
671 TCA start = aStart;
672 TCA realColdStart = mcg->code.realCold().frontier();
673 TCA realFrozenStart = mcg->code.realFrozen().frontier();
675 auto const skFuncBody = backEnd().emitFuncPrologue(
676 code.main(), code.cold(), func, funcIsMagic, nPassed, start, aStart);
677 m_fixups.process(nullptr);
679 assert(backEnd().funcPrologueHasGuard(start, func));
680 TRACE(2, "funcPrologue mcg %p %s(%d) setting prologue %p\n",
681 this, func->fullName()->data(), nPassed, start);
682 assert(isValidCodeAddress(start));
683 func->setPrologue(paramIndex, start);
685 assert(m_tx.mode() == TransKind::Prologue ||
686 m_tx.mode() == TransKind::Proflogue);
687 TransRec tr(skFuncBody,
688 m_tx.mode(),
689 aStart, code.main().frontier() - aStart,
690 realColdStart, code.realCold().frontier() - realColdStart,
691 realFrozenStart, code.realFrozen().frontier() - realFrozenStart);
692 m_tx.addTranslation(tr);
693 if (RuntimeOption::EvalJitUseVtuneAPI) {
694 reportTraceletToVtune(func->unit(), func, tr);
697 if (m_tx.profData()) {
698 m_tx.profData()->addTransPrologue(m_tx.mode(), skFuncBody, paramIndex);
701 recordGdbTranslation(skFuncBody, func,
702 code.main(), aStart,
703 false, true);
704 recordBCInstr(OpFuncPrologue, aStart, code.main().frontier(), false);
706 m_numTrans++;
707 assert(m_numTrans <= RuntimeOption::EvalJitGlobalTranslationLimit);
709 return start;
713 * Given the proflogueTransId for a TransProflogue translation,
714 * regenerate the prologue (as a TransPrologue). Returns the starting
715 * address for the translation corresponding to triggerSk, if such
716 * translation is generated; otherwise returns nullptr.
718 TCA MCGenerator::regeneratePrologue(TransID prologueTransId, SrcKey triggerSk) {
719 Func* func = m_tx.profData()->transFunc(prologueTransId);
720 int nArgs = m_tx.profData()->prologueArgs(prologueTransId);
722 // Regenerate the prologue.
723 func->resetPrologue(nArgs);
724 m_tx.setMode(TransKind::Prologue);
725 SCOPE_EXIT { m_tx.setMode(TransKind::Invalid); };
726 auto const start = getFuncPrologue(
727 func,
728 nArgs,
729 nullptr /* ActRec */,
730 true /* regeneratePrologue */
732 if (!start) return nullptr;
734 func->setPrologue(nArgs, start);
736 // Smash callers of the old prologue with the address of the new one.
737 PrologueCallersRec* pcr =
738 m_tx.profData()->prologueCallers(prologueTransId);
739 for (TCA toSmash : pcr->mainCallers()) {
740 backEnd().smashCall(toSmash, start);
742 // If the prologue has a guard, then smash its guard-callers as well.
743 if (backEnd().funcPrologueHasGuard(start, func)) {
744 TCA guard = backEnd().funcPrologueToGuard(start, func);
745 for (TCA toSmash : pcr->guardCallers()) {
746 backEnd().smashCall(toSmash, guard);
749 pcr->clearAllCallers();
751 // If this prologue has a DV funclet, then generate a translation
752 // for the DV funclet right after the prologue. However, skip
753 // cloned closures because their prologues are actually the DV
754 // funclets already.
755 TCA triggerSkStart = nullptr;
756 if (nArgs < func->numNonVariadicParams() && !func->isClonedClosure()) {
757 auto paramInfo = func->params()[nArgs];
758 if (paramInfo.hasDefaultValue()) {
759 m_tx.setMode(TransKind::Optimize);
760 SrcKey funcletSK(func, paramInfo.funcletOff, false);
761 auto funcletTransId = m_tx.profData()->dvFuncletTransId(func, nArgs);
762 if (funcletTransId != kInvalidTransID) {
763 invalidateSrcKey(funcletSK);
764 auto args = TranslArgs{funcletSK, false};
765 args.transId = funcletTransId;
766 auto dvStart = translate(args);
767 if (dvStart && !triggerSkStart && funcletSK == triggerSk) {
768 triggerSkStart = dvStart;
770 // Flag that this translation has been retranslated, so that
771 // it's not retranslated again along with the function body.
772 m_tx.profData()->setOptimized(funcletSK);
777 return triggerSkStart;
781 * Regenerate all prologues of func that were previously generated.
782 * The prologues are sorted in ascending order of profile counters.
783 * For prologues with corresponding DV funclets, their corresponding
784 * DV funclet will be regenerated right after them. The idea is to
785 * generate the function body right after calling this function, so
786 * that all prologues are placed right before it, and with the hottest
787 * prologues closer to it.
789 * Returns the starting address for the translation corresponding to
790 * triggerSk, if such translation is generated; otherwise returns
791 * nullptr.
793 TCA MCGenerator::regeneratePrologues(Func* func, SrcKey triggerSk) {
794 TCA triggerStart = nullptr;
795 std::vector<TransID> prologTransIDs;
797 for (int nArgs = 0; nArgs < func->numPrologues(); nArgs++) {
798 TransID tid = m_tx.profData()->prologueTransId(func, nArgs);
799 if (tid != kInvalidTransID) {
800 prologTransIDs.push_back(tid);
804 std::sort(prologTransIDs.begin(), prologTransIDs.end(),
805 [&](TransID t1, TransID t2) -> bool {
806 // This will sort in ascending order. Note that transCounters start
807 // at JitPGOThreshold and count down.
808 return m_tx.profData()->transCounter(t1) >
809 m_tx.profData()->transCounter(t2);
812 for (TransID tid : prologTransIDs) {
813 TCA start = regeneratePrologue(tid, triggerSk);
814 if (triggerStart == nullptr && start != nullptr) {
815 triggerStart = start;
819 return triggerStart;
823 * bindJmp --
825 * Runtime service handler that patches a jmp to the translation of
826 * u:dest from toSmash.
829 MCGenerator::bindJmp(TCA toSmash, SrcKey destSk, ServiceRequest req,
830 TransFlags trflags, bool& smashed) {
831 auto args = TranslArgs{destSk, false};
832 args.flags = trflags;
833 auto tDest = getTranslation(args);
834 if (!tDest) return nullptr;
836 LeaseHolder writer(Translator::WriteLease());
837 if (!writer) return tDest;
839 SrcRec* sr = m_tx.getSrcRec(destSk);
840 // The top translation may have changed while we waited for the
841 // write lease, so read it again. If it was replaced with a new
842 // translation, then bind to the new one. If it was invalidated,
843 // then don't bind the jump.
844 tDest = sr->getTopTranslation();
845 if (tDest == nullptr) return nullptr;
847 if (req == REQ_BIND_ADDR) {
848 auto addr = reinterpret_cast<TCA*>(toSmash);
849 if (*addr == tDest) {
850 // Already smashed
851 return tDest;
853 sr->chainFrom(IncomingBranch::addr(addr));
854 } else {
855 DecodedInstruction di(toSmash);
856 if (di.isBranch() && !di.isJmp()) {
857 auto jt = backEnd().jccTarget(toSmash);
858 assert(jt);
859 if (jt == tDest) {
860 // Already smashed
861 return tDest;
863 sr->chainFrom(IncomingBranch::jccFrom(toSmash));
864 } else {
865 assert(!backEnd().jccTarget(toSmash));
866 if (!backEnd().jmpTarget(toSmash)
867 || backEnd().jmpTarget(toSmash) == tDest) {
868 // Already smashed
869 return tDest;
871 sr->chainFrom(IncomingBranch::jmpFrom(toSmash));
874 smashed = true;
875 return tDest;
879 * When we end a tracelet with a conditional jump, emitCondJmp first emits:
881 * 1: j<CC> stubJmpccFirst
882 * jmp stubJmpccFirst
884 * Our "taken" argument tells us whether the branch at 1: was taken or
885 * not; and therefore which of offTaken and offNotTaken to continue executing.
886 * If we did take the branch, we now rewrite the code so that the branch is
887 * straightened. This predicts that subsequent executions will go the same way
888 * as the first execution.
890 * jn<CC> stubJmpccSecond:offNotTaken
891 * nop5 ; fallthru, or jmp if there's already a translation.
892 * offTaken:
894 * If we did not take the branch, we leave the sense of the condition
895 * intact, while patching it up to go to the unexplored code:
897 * j<CC> stubJmpccSecond:offTaken
898 * nop5
899 * offNotTaken:
902 MCGenerator::bindJmpccFirst(TCA toSmash,
903 SrcKey skTaken, SrcKey skNotTaken,
904 bool taken,
905 bool& smashed) {
906 LeaseHolder writer(Translator::WriteLease());
907 if (!writer) return nullptr;
908 auto skWillExplore = taken ? skTaken : skNotTaken;
909 auto skWillDefer = taken ? skNotTaken : skTaken;
910 auto dest = skWillExplore;
911 auto cc = backEnd().jccCondCode(toSmash);
912 TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
913 "taken %d\n",
914 skWillExplore.offset(), skWillDefer.offset(), cc, taken);
916 // We want the branch to point to whichever side has not been explored yet.
917 if (taken) cc = ccNegate(cc);
919 auto& cb = code.blockFor(toSmash);
920 Asm as { cb };
921 // Its not clear where the IncomingBranch should go to if cb is code.frozen()
922 assert(&cb != &code.frozen());
924 // XXX Use of kJmp*Len here is a layering violation.
925 using namespace x64;
927 // can we just directly fall through?
928 // a jmp + jz takes 5 + 6 = 11 bytes
929 bool fallThru = toSmash + kJmpccLen + kJmpLen == cb.frontier() &&
930 !m_tx.getSrcDB().find(dest);
932 auto tDest = getTranslation(TranslArgs{dest, !fallThru});
933 if (!tDest) {
934 return 0;
937 if (backEnd().jmpTarget(toSmash + kJmpccLen)
938 != backEnd().jccTarget(toSmash)) {
939 // someone else already smashed this one. Ideally we would
940 // just re-execute from toSmash - except the flags will have
941 // been trashed.
942 return tDest;
945 TCA stub = emitEphemeralServiceReq(code.frozen(),
946 getFreeStub(code.frozen(),
947 &mcg->cgFixups()),
948 REQ_BIND_JMP,
949 RipRelative(toSmash),
950 skWillDefer.toAtomicInt(),
951 TransFlags{}.packed);
953 mcg->cgFixups().process(nullptr);
954 smashed = true;
955 assert(Translator::WriteLease().amOwner());
957 * Roll over the jcc and the jmp/fallthru. E.g., from:
959 * toSmash: jcc <jmpccFirstStub>
960 * toSmash+6: jmp <jmpccFirstStub>
961 * toSmash+11: <probably the new translation == tdest>
963 * to:
965 * toSmash: j[n]z <jmpccSecondStub>
966 * toSmash+6: nop5
967 * toSmash+11: newHotness
969 CodeCursor cg(cb, toSmash);
970 as.jcc(cc, stub);
971 m_tx.getSrcRec(dest)->chainFrom(IncomingBranch::jmpFrom(cb.frontier()));
972 TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
973 return tDest;
976 TCA MCGenerator::bindCall(ActRec* calleeFrame,
977 bool isImmutable,
978 SrcKey& sk,
979 ServiceRequest& req) {
980 TCA toSmash = backEnd().smashableCallFromReturn((TCA)calleeFrame->m_savedRip);
981 Func *func = const_cast<Func*>(calleeFrame->m_func);
982 int nArgs = calleeFrame->numArgs();
983 TRACE(2, "bindCall %s, ActRec %p\n",
984 func->fullName()->data(), calleeFrame);
985 TCA start = getFuncPrologue(func, nArgs);
986 TRACE(2, "bindCall -> %p\n", start);
987 if (!isImmutable) {
988 // We dont know we're calling the right function, so adjust start to point
989 // to the dynamic check of ar->m_func.
990 start = backEnd().funcPrologueToGuard(start, func);
991 } else {
992 TRACE(2, "bindCall immutably %s -> %p\n",
993 func->fullName()->data(), start);
995 if (start) {
996 LeaseHolder writer(Translator::WriteLease());
997 if (writer) {
998 // Someone else may have changed the func prologue while we waited for
999 // the write lease, so read it again.
1000 start = getFuncPrologue(func, nArgs);
1001 if (!isImmutable) start = backEnd().funcPrologueToGuard(start, func);
1003 if (start && backEnd().callTarget(toSmash) != start) {
1004 assert(backEnd().callTarget(toSmash));
1005 TRACE(2, "bindCall smash %p -> %p\n",
1006 toSmash, start);
1007 backEnd().smashCall(toSmash, start);
1008 // For functions to be PGO'ed, if their current prologues are still
1009 // profiling ones (living in code.prof()), then save toSmash as a
1010 // caller to the prologue, so that it can later be smashed to call a
1011 // new prologue when it's generated.
1012 int calleeNumParams = func->numNonVariadicParams();
1013 int calledPrologNumArgs = (nArgs <= calleeNumParams ?
1014 nArgs : calleeNumParams + 1);
1015 if (code.prof().contains(start)) {
1016 if (isImmutable) {
1017 m_tx.profData()->addPrologueMainCaller(
1018 func, calledPrologNumArgs, toSmash);
1019 } else {
1020 m_tx.profData()->addPrologueGuardCaller(
1021 func, calledPrologNumArgs, toSmash);
1026 // sk: stale, but doesn't matter since we have a valid start TCA.
1027 } else {
1028 // We need translator help; we're not at the callee yet, so roll back. The
1029 // prelude has done some work already, but it should be safe to redo.
1030 TRACE(2, "bindCall rollback smash %p -> %p\n",
1031 toSmash, start);
1033 const FPIEnt* fe = liveFunc()->findPrecedingFPI(
1034 liveFunc()->base() + calleeFrame->m_soff);
1036 sk = SrcKey{liveFunc(), fe->m_fcallOff, vmfp()->resumed()};
1038 // We're going to have to interpret the FCall, so make sure handleSRHelper
1039 // doesn't think we're coming back from a REQ_BIND_CALL when we finally
1040 // make it back to the TC.
1041 req = REQ_BIND_JMP;
1044 return start;
1047 namespace {
1048 class FreeRequestStubTrigger {
1049 TCA m_stub;
1050 public:
1051 explicit FreeRequestStubTrigger(TCA stub) : m_stub(stub) {
1052 TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub);
1054 void operator()() {
1055 TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub);
1056 if (mcg->freeRequestStub(m_stub) != true) {
1057 // If we can't free the stub, enqueue again to retry.
1058 TRACE(3, "FreeStubTrigger: write lease failed, requeueing %p\n", m_stub);
1059 Treadmill::enqueue(FreeRequestStubTrigger(m_stub));
1065 #ifdef DEBUG
1067 struct DepthGuard {
1068 static __thread int m_depth;
1069 DepthGuard() { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
1070 ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
1072 bool depthOne() const { return m_depth == 1; }
1074 __thread int DepthGuard::m_depth;
1076 #else
1078 struct DepthGuard { bool depthOne() const { return false; } };
1080 #endif
1082 void
1083 MCGenerator::enterTC(TCA start, ActRec* stashedAR) {
1084 if (debug) {
1085 fflush(stdout);
1086 fflush(stderr);
1088 DepthGuard d;
1090 assert(isValidCodeAddress(start));
1091 assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
1092 assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
1094 Translator::WriteLease().gremlinUnlock();
1095 assert(!Translator::WriteLease().amOwner());
1097 INC_TPC(enter_tc);
1098 if (Trace::moduleEnabledRelease(Trace::ringbuffer, 1)) {
1099 auto skData = SrcKey{liveFunc(), vmpc(), liveResumed()}.toAtomicInt();
1100 Trace::ringbufferEntry(RBTypeEnterTC, skData, (uint64_t)start);
1103 tl_regState = VMRegState::DIRTY;
1104 backEnd().enterTCHelper(start, stashedAR);
1105 tl_regState = VMRegState::CLEAN;
1106 assert(isValidVMStackAddress(vmsp()));
1108 if (debug) {
1109 // Debugging code: cede the write lease half the time.
1110 if (RuntimeOption::EvalJitStressLease) {
1111 if (d.depthOne() && (rand() % 2) == 0) {
1112 Translator::WriteLease().gremlinLock();
1117 vmfp() = nullptr;
1120 TCA MCGenerator::handleServiceRequest(ServiceReqInfo& info) {
1121 assert_native_stack_aligned();
1122 tl_regState = VMRegState::CLEAN; // partially a lie: vmpc() isn't synced
1124 auto callToExit = [&] {
1125 tl_regState = VMRegState::DIRTY;
1126 return m_tx.uniqueStubs.callToExit;
1129 TCA start = nullptr;
1130 SrcKey sk;
1131 auto smashed = false;
1133 // If start is still nullptr at the end of this switch, we will enter the
1134 // interpreter at sk.
1135 switch (info.req) {
1136 case REQ_BIND_CALL: {
1137 auto calleeFrame = info.stashedAR;
1138 auto isImmutable = info.args[0].boolVal;
1139 start = bindCall(calleeFrame, isImmutable, sk, info.req);
1140 break;
1143 case REQ_BIND_JMP:
1144 case REQ_BIND_ADDR: {
1145 auto const toSmash = info.args[0].tca;
1146 sk = SrcKey::fromAtomicInt(info.args[1].sk);
1147 auto const trflags = info.args[2].trflags;
1148 start = bindJmp(toSmash, sk, info.req, trflags, smashed);
1149 break;
1152 case REQ_BIND_JMPCC_FIRST: {
1153 auto toSmash = info.args[0].tca;
1154 auto skTaken = SrcKey::fromAtomicInt(info.args[1].sk);
1155 auto skNotTaken = SrcKey::fromAtomicInt(info.args[2].sk);
1156 auto taken = info.args[3].boolVal;
1157 sk = taken ? skTaken : skNotTaken;
1158 start = bindJmpccFirst(toSmash, skTaken, skNotTaken, taken, smashed);
1159 break;
1162 case REQ_RETRANSLATE: {
1163 INC_TPC(retranslate);
1164 sk = SrcKey{liveFunc(), info.args[0].offset, liveResumed()};
1165 auto trflags = info.args[1].trflags;
1166 auto args = TranslArgs{sk, true};
1167 args.flags = trflags;
1168 start = retranslate(args);
1169 SKTRACE(2, sk, "retranslated @%p\n", start);
1170 break;
1173 case REQ_RETRANSLATE_OPT: {
1174 sk = SrcKey::fromAtomicInt(info.args[0].sk);
1175 auto transID = info.args[1].transID;
1176 start = retranslateOpt(transID, false);
1177 SKTRACE(2, sk, "retranslated-OPT: transId = %d start: @%p\n", transID,
1178 start);
1179 break;
1182 case REQ_INTERPRET:
1183 // Leave start as nullptr and let the dispatchBB() happen down below.
1184 sk = SrcKey{liveFunc(), info.args[0].offset, liveResumed()};
1185 break;
1187 case REQ_POST_INTERP_RET: {
1188 // This is only responsible for the control-flow aspect of the Ret:
1189 // getting to the destination's translation, if any.
1190 auto ar = info.args[0].ar;
1191 auto caller = info.args[1].ar;
1192 assert(caller == vmfp());
1193 Unit* destUnit = caller->func()->unit();
1194 // Set PC so logging code in getTranslation doesn't get confused.
1195 vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
1196 sk = SrcKey{caller->func(), vmpc(), caller->resumed()};
1197 start = getTranslation(TranslArgs{sk, true});
1198 TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
1199 ar->m_func->fullName()->data(),
1200 caller->m_func->fullName()->data());
1201 break;
1204 case REQ_RESUME: {
1205 if (UNLIKELY(vmpc() == 0)) return callToExit();
1206 sk = SrcKey{liveFunc(), vmpc(), liveResumed()};
1207 start = getTranslation(TranslArgs{sk, true});
1208 break;
1211 case REQ_STACK_OVERFLOW: {
1212 if (info.stashedAR->m_sfp == vmfp()) {
1214 * The normal case - we were called via FCall, or FCallArray. We need
1215 * to construct the pc of the fcall from the return address (which will
1216 * be after the fcall). Because fcall is a variable length instruction,
1217 * and because we sometimes delete instructions from the instruction
1218 * stream, we need to use fpi regions to find the fcall.
1220 const FPIEnt* fe = liveFunc()->findPrecedingFPI(
1221 liveUnit()->offsetOf(vmpc()));
1222 vmpc() = liveUnit()->at(fe->m_fcallOff);
1223 assert(isFCallStar(*reinterpret_cast<const Op*>(vmpc())));
1224 raise_error("Stack overflow");
1225 } else {
1227 * We were called via re-entry. Leak the params and the actrec, and
1228 * tell the unwinder that there's nothing left to do in this "entry".
1230 vmsp() = reinterpret_cast<Cell*>(info.stashedAR + 1);
1231 throw VMReenterStackOverflow();
1233 not_reached();
1237 if (smashed && info.stub) {
1238 Treadmill::enqueue(FreeRequestStubTrigger(info.stub));
1241 // If we don't have a starting address, interpret basic blocks until we end
1242 // up somewhere with a translation (which we may have created, if the lease
1243 // holder dropped it).
1244 while (!start) {
1245 vmpc() = sk.unit()->at(sk.offset());
1246 INC_TPC(interp_bb);
1247 HPHP::dispatchBB();
1248 if (!vmpc()) return callToExit();
1249 sk = SrcKey{liveFunc(), vmpc(), liveResumed()};
1250 start = getTranslation(TranslArgs{sk, true});
1253 if (Trace::moduleEnabledRelease(Trace::ringbuffer, 1)) {
1254 auto skData = sk.valid() ? sk.toAtomicInt() : uint64_t(-1LL);
1255 Trace::ringbufferEntry(RBTypeResumeTC, skData, (uint64_t)start);
1258 tl_regState = VMRegState::DIRTY;
1259 return start;
1263 * Support for the stub freelist.
1265 TCA FreeStubList::maybePop() {
1266 StubNode* ret = m_list;
1267 if (ret) {
1268 TRACE(1, "alloc stub %p\n", ret);
1269 m_list = ret->m_next;
1270 ret->m_freed = ~kStubFree;
1272 return (TCA)ret;
1275 void FreeStubList::push(TCA stub) {
1277 * A freed stub may be released by Treadmill more than once if multiple
1278 * threads execute the service request before it is freed. We detect
1279 * duplicates by marking freed stubs
1281 StubNode* n = reinterpret_cast<StubNode*>(stub);
1282 if (n->m_freed == kStubFree) {
1283 TRACE(1, "already freed stub %p\n", stub);
1284 return;
1286 n->m_freed = kStubFree;
1287 n->m_next = m_list;
1288 TRACE(1, "free stub %p (-> %p)\n", stub, m_list);
1289 m_list = n;
1292 bool
1293 MCGenerator::freeRequestStub(TCA stub) {
1294 LeaseHolder writer(Translator::WriteLease());
1296 * If we can't acquire the write lock, the caller
1297 * (FreeRequestStubTrigger) retries
1299 if (!writer) return false;
1300 assert(code.frozen().contains(stub));
1301 m_freeStubs.push(stub);
1302 return true;
1305 TCA MCGenerator::getFreeStub(CodeBlock& frozen, CodeGenFixups* fixups) {
1306 TCA ret = m_freeStubs.maybePop();
1307 if (ret) {
1308 Stats::inc(Stats::Astub_Reused);
1309 always_assert(m_freeStubs.m_list == nullptr ||
1310 code.isValidCodeAddress(TCA(m_freeStubs.m_list)));
1311 TRACE(1, "recycle stub %p\n", ret);
1312 } else {
1313 ret = frozen.frontier();
1314 Stats::inc(Stats::Astub_New);
1315 TRACE(1, "alloc new stub %p\n", ret);
1317 if (fixups) {
1318 fixups->m_reusedStubs.emplace_back(ret);
1320 return ret;
1323 TCA MCGenerator::getTranslatedCaller() const {
1324 DECLARE_FRAME_POINTER(fp);
1325 ActRec* framePtr = fp; // can't directly mutate the register-mapped one
1326 for (; framePtr; framePtr = framePtr->m_sfp) {
1327 TCA rip = (TCA)framePtr->m_savedRip;
1328 if (isValidCodeAddress(rip)) {
1329 return rip;
1332 return nullptr;
1335 void
1336 MCGenerator::syncWork() {
1337 assert(tl_regState == VMRegState::DIRTY);
1338 m_fixupMap.fixup(g_context.getNoCheck());
1339 tl_regState = VMRegState::CLEAN;
1340 Stats::inc(Stats::TC_Sync);
1343 // Get the address of the literal val in the global data section.
1344 // If it's not there, add it to the map in m_fixups, which will
1345 // be committed to m_literals when m_fixups.process() is called.
1346 const uint64_t*
1347 MCGenerator::allocLiteral(uint64_t val) {
1348 auto it = m_literals.find(val);
1349 if (it != m_literals.end()) {
1350 assert(*it->second == val);
1351 return it->second;
1353 auto& pending = m_fixups.m_literals;
1354 it = pending.find(val);
1355 if (it != pending.end()) {
1356 assert(*it->second == val);
1357 return it->second;
1359 auto addr = allocData<uint64_t>(sizeof(uint64_t), 1);
1360 *addr = val;
1361 return pending[val] = addr;
1364 bool
1365 MCGenerator::reachedTranslationLimit(SrcKey sk,
1366 const SrcRec& srcRec) const {
1367 if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) {
1368 INC_TPC(max_trans);
1369 if (debug && Trace::moduleEnabled(Trace::mcg, 2)) {
1370 const auto& tns = srcRec.translations();
1371 TRACE(1, "Too many (%zd) translations: %s, BC offset %d\n",
1372 tns.size(), sk.unit()->filepath()->data(),
1373 sk.offset());
1374 SKTRACE(2, sk, "{\n");
1375 TCA topTrans = srcRec.getTopTranslation();
1376 for (size_t i = 0; i < tns.size(); ++i) {
1377 const TransRec* rec = m_tx.getTransRec(tns[i]);
1378 assert(rec);
1379 SKTRACE(2, sk, "%zd %p\n", i, tns[i]);
1380 if (tns[i] == topTrans) {
1381 SKTRACE(2, sk, "%zd: *Top*\n", i);
1383 if (rec->kind == TransKind::Anchor) {
1384 SKTRACE(2, sk, "%zd: Anchor\n", i);
1385 } else {
1386 SKTRACE(2, sk, "%zd: guards {\n", i);
1387 for (unsigned j = 0; j < rec->guards.size(); ++j) {
1388 FTRACE(2, "{}\n", rec->guards[j]);
1390 SKTRACE(2, sk, "%zd } guards\n", i);
1393 SKTRACE(2, sk, "} /* Too many translations */\n");
1395 return true;
1398 return false;
1401 void
1402 MCGenerator::recordSyncPoint(CodeAddress frontier, Offset pcOff, Offset spOff) {
1403 m_fixups.m_pendingFixups.push_back(
1404 PendingFixup(frontier, Fixup(pcOff, spOff)));
1408 * Equivalent to container.clear(), but guarantees to free
1409 * any memory associated with the container (eg clear
1410 * doesn't affect std::vector's capacity).
1412 template <typename T> void ClearContainer(T& container) {
1413 T().swap(container);
1416 void
1417 CodeGenFixups::process_only(
1418 GrowableVector<IncomingBranch>* inProgressTailBranches) {
1419 for (uint i = 0; i < m_pendingFixups.size(); i++) {
1420 TCA tca = m_pendingFixups[i].m_tca;
1421 assert(mcg->isValidCodeAddress(tca));
1422 mcg->fixupMap().recordFixup(tca, m_pendingFixups[i].m_fixup);
1424 ClearContainer(m_pendingFixups);
1426 for (auto const& pair : m_pendingCatchTraces) {
1427 mcg->catchTraceMap().insert(pair.first, pair.second);
1429 ClearContainer(m_pendingCatchTraces);
1431 for (auto const& elm : m_pendingJmpTransIDs) {
1432 mcg->getJmpToTransIDMap().insert(elm);
1434 ClearContainer(m_pendingJmpTransIDs);
1436 mcg->literals().insert(m_literals.begin(), m_literals.end());
1437 ClearContainer(m_literals);
1439 if (inProgressTailBranches) {
1440 m_inProgressTailJumps.swap(*inProgressTailBranches);
1442 assert(m_inProgressTailJumps.empty());
1445 void CodeGenFixups::clear() {
1446 ClearContainer(m_pendingFixups);
1447 ClearContainer(m_pendingCatchTraces);
1448 ClearContainer(m_pendingJmpTransIDs);
1449 ClearContainer(m_reusedStubs);
1450 ClearContainer(m_addressImmediates);
1451 ClearContainer(m_codePointers);
1452 ClearContainer(m_bcMap);
1453 ClearContainer(m_alignFixups);
1454 ClearContainer(m_inProgressTailJumps);
1455 ClearContainer(m_literals);
1458 bool CodeGenFixups::empty() const {
1459 return
1460 m_pendingFixups.empty() &&
1461 m_pendingCatchTraces.empty() &&
1462 m_pendingJmpTransIDs.empty() &&
1463 m_reusedStubs.empty() &&
1464 m_addressImmediates.empty() &&
1465 m_codePointers.empty() &&
1466 m_bcMap.empty() &&
1467 m_alignFixups.empty() &&
1468 m_inProgressTailJumps.empty() &&
1469 m_literals.empty();
1473 MCGenerator::translateWork(const TranslArgs& args) {
1474 Timer _t(Timer::translate);
1475 auto sk = args.sk;
1477 SKTRACE(1, sk, "translateWork\n");
1478 assert(m_tx.getSrcDB().find(sk));
1480 if (args.align) {
1481 mcg->backEnd().moveToAlign(code.main(),
1482 MoveToAlignFlags::kNonFallthroughAlign);
1485 TCA start = code.main().frontier();
1486 TCA coldStart = code.cold().frontier();
1487 TCA realColdStart = code.realCold().frontier();
1488 TCA DEBUG_ONLY frozenStart = code.frozen().frontier();
1489 TCA realFrozenStart = code.realFrozen().frontier();
1490 SrcRec& srcRec = *m_tx.getSrcRec(sk);
1491 TransKind transKindToRecord = TransKind::Interp;
1492 UndoMarker undoA(code.main());
1493 UndoMarker undoAcold(code.cold());
1494 UndoMarker undoAfrozen(code.frozen());
1495 UndoMarker undoGlobalData(code.data());
1497 setUseLLVM(
1498 // HHIRBytecodeControlFlow causes vmsp stack manipulations we can't handle
1499 // right now: t4810319
1500 !RuntimeOption::EvalHHIRBytecodeControlFlow &&
1501 (RuntimeOption::EvalJitLLVM > 1 ||
1502 (RuntimeOption::EvalJitLLVM && m_tx.mode() == TransKind::Optimize))
1504 SCOPE_EXIT {
1505 setUseLLVM(false);
1508 auto resetState = [&] {
1509 undoA.undo();
1510 undoAcold.undo();
1511 undoAfrozen.undo();
1512 undoGlobalData.undo();
1513 m_fixups.clear();
1516 auto assertCleanState = [&] {
1517 assert(code.main().frontier() == start);
1518 assert(code.frozen().frontier() == frozenStart);
1519 assert(m_fixups.empty());
1522 PostConditions pconds;
1523 RegionDescPtr region;
1524 if (!reachedTranslationLimit(sk, srcRec)) {
1525 // Attempt to create a region at this SrcKey
1526 if (m_tx.mode() == TransKind::Optimize) {
1527 assert(RuntimeOption::EvalJitPGO);
1528 region = args.region;
1529 if (region) {
1530 assert(!region->empty());
1531 } else {
1532 assert(isValidTransID(args.transId));
1533 region = selectHotRegion(args.transId, this);
1534 assert(region);
1535 if (region && region->empty()) region = nullptr;
1537 } else {
1538 assert(m_tx.mode() == TransKind::Profile ||
1539 m_tx.mode() == TransKind::Live);
1540 RegionContext rContext { sk.func(), sk.offset(), liveSpOff(),
1541 sk.resumed() };
1542 FTRACE(2, "populating live context for region\n");
1543 populateLiveContext(rContext);
1544 region = selectRegion(rContext, m_tx.mode());
1547 auto result = TranslateResult::Retry;
1548 auto regionInterps = RegionBlacklist{};
1549 auto const initSpOffset = region ? region->entry()->initialSpOffset()
1550 : liveSpOff();
1552 while (region && result == TranslateResult::Retry) {
1553 auto const transContext = TransContext {
1554 RuntimeOption::EvalJitPGO
1555 ? m_tx.profData()->curTransID()
1556 : kInvalidTransID,
1557 sk.offset(),
1558 initSpOffset,
1559 sk.resumed(),
1560 sk.func(),
1561 region.get()
1564 HTS hhbcTrans { transContext };
1565 FTRACE(1, "{}{:-^40}{}\n",
1566 color(ANSI_COLOR_BLACK, ANSI_BGCOLOR_GREEN),
1567 " HHIR during translation ",
1568 color(ANSI_COLOR_END));
1570 try {
1571 assertCleanState();
1572 result = translateRegion(hhbcTrans, *region, regionInterps, args.flags,
1573 pconds);
1574 FTRACE(2, "translateRegion finished with result {}\n", show(result));
1575 } catch (const std::exception& e) {
1576 FTRACE(1, "translateRegion failed with '{}'\n", e.what());
1577 result = TranslateResult::Failure;
1580 if (result != TranslateResult::Success) {
1581 // Translation failed or will be retried. Free resources for this
1582 // trace, rollback the translation cache frontiers, and discard any
1583 // pending fixups.
1584 resetState();
1587 if (result == TranslateResult::Failure) {
1588 // If the region translator failed for an Optimize translation, it's OK
1589 // to do a Live translation for the function entry. Otherwise, fall
1590 // back to Interp.
1591 if (m_tx.mode() == TransKind::Optimize) {
1592 if (sk.getFuncId() == liveFunc()->getFuncId() &&
1593 liveUnit()->contains(vmpc()) &&
1594 sk.offset() == liveUnit()->offsetOf(vmpc()) &&
1595 sk.resumed() == liveResumed()) {
1596 m_tx.setMode(TransKind::Live);
1597 RegionContext rContext { sk.func(), sk.offset(), liveSpOff(),
1598 sk.resumed() };
1599 FTRACE(2, "populating live context for region after failed optimize"
1600 "translation\n");
1601 populateLiveContext(rContext);
1602 region = selectRegion(rContext, m_tx.mode());
1603 } else {
1604 region.reset();
1610 if (!region) m_tx.setMode(TransKind::Interp);
1612 if (result == TranslateResult::Success) {
1613 assert(m_tx.mode() == TransKind::Live ||
1614 m_tx.mode() == TransKind::Profile ||
1615 m_tx.mode() == TransKind::Optimize);
1616 transKindToRecord = m_tx.mode();
1620 if (args.dryRun) {
1621 resetState();
1622 return start;
1625 if (transKindToRecord == TransKind::Interp) {
1626 assertCleanState();
1627 FTRACE(1, "emitting dispatchBB interp request for failed translation\n");
1628 backEnd().emitInterpReq(code.main(), code.cold(), sk);
1629 // Fall through.
1632 if (args.align) {
1633 m_fixups.m_alignFixups.emplace(
1634 start, std::make_pair(backEnd().cacheLineSize() - 1, 0));
1637 if (RuntimeOption::EvalProfileBC) {
1638 auto* unit = sk.unit();
1639 TransBCMapping prev{};
1640 for (auto& cur : m_fixups.m_bcMap) {
1641 if (!cur.aStart) continue;
1642 if (prev.aStart) {
1643 if (prev.bcStart < unit->bclen()) {
1644 recordBCInstr(unit->entry()[prev.bcStart],
1645 prev.aStart, cur.aStart, false);
1647 } else {
1648 recordBCInstr(OpTraceletGuard, start, cur.aStart, false);
1650 prev = cur;
1654 recordGdbTranslation(sk, sk.func(), code.main(), start,
1655 false, false);
1656 recordGdbTranslation(sk, sk.func(), code.cold(), coldStart,
1657 false, false);
1658 if (RuntimeOption::EvalJitPGO) {
1659 if (transKindToRecord == TransKind::Profile) {
1660 always_assert(region);
1661 m_tx.profData()->addTransProfile(region, pconds);
1662 } else {
1663 m_tx.profData()->addTransNonProf(transKindToRecord, sk);
1667 TransRec tr(sk, transKindToRecord,
1668 start, code.main().frontier() - start,
1669 realColdStart, code.realCold().frontier() - realColdStart,
1670 realFrozenStart, code.realFrozen().frontier() - realFrozenStart,
1671 region, m_fixups.m_bcMap,
1672 useLLVM());
1673 m_tx.addTranslation(tr);
1674 if (RuntimeOption::EvalJitUseVtuneAPI) {
1675 reportTraceletToVtune(sk.unit(), sk.func(), tr);
1678 GrowableVector<IncomingBranch> inProgressTailBranches;
1679 m_fixups.process(&inProgressTailBranches);
1681 // SrcRec::newTranslation() makes this code reachable. Do this last;
1682 // otherwise there's some chance of hitting in the reader threads whose
1683 // metadata is not yet visible.
1684 TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n",
1685 start, sk.getFuncId(), sk.offset());
1686 srcRec.newTranslation(start, inProgressTailBranches);
1688 TRACE(1, "mcg: %zd-byte tracelet\n", code.main().frontier() - start);
1689 if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
1690 Trace::traceRelease("%s", getUsageString().c_str());
1693 return start;
1696 void MCGenerator::traceCodeGen(HTS& hts) {
1697 auto& unit = hts.unit;
1699 auto finishPass = [&](const char* msg, int level) {
1700 printUnit(level, unit, msg, nullptr, hts.irb->guards());
1701 assert(checkCfg(unit));
1704 finishPass(" after initial translation ", kIRLevel);
1706 always_assert_flog(
1707 IMPLIES(cfgHasLoop(unit), RuntimeOption::EvalJitLoops),
1708 "IRUnit has loop but Eval.JitLoops=0"
1711 optimize(unit, *hts.irb, m_tx.mode());
1712 finishPass(" after optimizing ", kOptLevel);
1714 always_assert(this == mcg);
1715 genCode(unit);
1717 m_numTrans++;
1718 assert(m_numTrans <= RuntimeOption::EvalJitGlobalTranslationLimit);
1721 MCGenerator::MCGenerator()
1722 : m_backEnd(newBackEnd())
1723 , m_numTrans(0)
1724 , m_catchTraceMap(128)
1726 TRACE(1, "MCGenerator@%p startup\n", this);
1727 mcg = this;
1729 m_unwindRegistrar = register_unwind_region(code.base(), code.codeSize());
1731 static bool profileUp = false;
1732 if (!profileUp) {
1733 profileInit();
1734 profileUp = true;
1737 if (Trace::moduleEnabledRelease(Trace::printir) &&
1738 !RuntimeOption::EvalJit) {
1739 Trace::traceRelease("TRACE=printir is set but the jit isn't on. "
1740 "Did you mean to run with -vEval.Jit=1?\n");
1742 if (Trace::moduleEnabledRelease(Trace::llvm, 1) ||
1743 RuntimeOption::EvalJitLLVMCounters) {
1744 g_bytecodesVasm.bind();
1745 g_bytecodesLLVM.bind();
1749 void MCGenerator::initUniqueStubs() {
1750 // Put the following stubs into ahot, rather than a.
1751 CodeCache::Selector cbSel(CodeCache::Selector::Args(code).
1752 hot(m_tx.useAHot()));
1753 m_tx.uniqueStubs = backEnd().emitUniqueStubs();
1754 m_fixups.process(nullptr); // in case we generated literals
1757 void MCGenerator::registerCatchBlock(CTCA ip, TCA block) {
1758 FTRACE(1, "registerCatchBlock: afterCall: {} block: {}\n", ip, block);
1759 m_fixups.m_pendingCatchTraces.emplace_back(ip, block);
1762 folly::Optional<TCA> MCGenerator::getCatchTrace(CTCA ip) const {
1763 TCA* found = m_catchTraceMap.find(ip);
1764 if (found) return *found;
1765 return folly::none;
1768 void MCGenerator::codeEmittedThisRequest(size_t& requestEntry,
1769 size_t& now) const {
1770 requestEntry = s_initialTCSize;
1771 now = code.totalUsed();
1774 void MCGenerator::requestInit() {
1775 tl_regState = VMRegState::CLEAN;
1776 Timer::RequestInit();
1777 memset(&s_perfCounters, 0, sizeof(s_perfCounters));
1778 Stats::init();
1779 s_initialTCSize = code.totalUsed();
1782 void MCGenerator::requestExit() {
1783 always_assert(!Translator::WriteLease().amOwner());
1784 TRACE_MOD(txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64
1785 " kept, %15" PRId64 " grabbed\n",
1786 Process::GetThreadIdForTrace(), Translator::WriteLease().m_hintKept,
1787 Translator::WriteLease().m_hintGrabbed);
1788 Stats::dump();
1789 Stats::clear();
1790 Timer::RequestExit();
1792 if (Trace::moduleEnabledRelease(Trace::mcgstats, 1)) {
1793 Trace::traceRelease("MCGenerator perf counters for %s:\n",
1794 g_context->getRequestUrl(50).c_str());
1795 for (int i = 0; i < tpc_num_counters; i++) {
1796 Trace::traceRelease("%-20s %10" PRId64 "\n",
1797 kPerfCounterNames[i], s_perfCounters[i]);
1799 Trace::traceRelease("\n");
1802 if (Trace::moduleEnabledRelease(Trace::llvm, 1)) {
1803 auto llvm = *g_bytecodesLLVM;
1804 auto total = llvm + *g_bytecodesVasm;
1805 Trace::ftraceRelease(
1806 "{:9} / {:9} bytecodes ({:6.2f}%) handled by LLVM backend for {}\n",
1807 llvm, total, llvm * 100.0 / total, g_context->getRequestUrl(50)
1812 bool
1813 MCGenerator::isPseudoEvent(const char* event) {
1814 for (auto name : kPerfCounterNames) {
1815 if (!strcmp(event, name)) {
1816 return true;
1819 return false;
1822 void
1823 MCGenerator::getPerfCounters(Array& ret) {
1824 for (int i = 0; i < tpc_num_counters; i++) {
1825 // Until Perflab can automatically scale the values we give it to
1826 // an appropriate range, we have to fudge these numbers so they
1827 // look more like reasonable hardware counter values.
1828 ret.set(String::FromCStr(kPerfCounterNames[i]),
1829 s_perfCounters[i] * 1000);
1832 for (auto const& pair : Timer::Counters()) {
1833 if (pair.second.total == 0 && pair.second.count == 0) continue;
1835 ret.set(String("jit_time_") + pair.first, pair.second.total);
1838 if (RuntimeOption::EvalJitLLVMCounters) {
1839 ret.set(String("jit_instr_vasm"), *g_bytecodesVasm);
1840 ret.set(String("jit_instr_llvm"), *g_bytecodesLLVM);
1844 MCGenerator::~MCGenerator() {
1847 static Debug::TCRange rangeFrom(const CodeBlock& cb, const TCA addr,
1848 bool isAcold) {
1849 assert(cb.contains(addr));
1850 return Debug::TCRange(addr, cb.frontier(), isAcold);
1853 void MCGenerator::recordBCInstr(uint32_t op,
1854 const TCA addr,
1855 const TCA end,
1856 bool cold) {
1857 if (addr != end) {
1858 m_debugInfo.recordBCInstr(
1859 Debug::TCRange(addr, end, cold), op);
1863 void MCGenerator::recordGdbTranslation(SrcKey sk,
1864 const Func* srcFunc,
1865 const CodeBlock& cb,
1866 const TCA start,
1867 bool exit,
1868 bool inPrologue) {
1869 if (start != cb.frontier()) {
1870 assert(Translator::WriteLease().amOwner());
1871 if (!RuntimeOption::EvalJitNoGdb) {
1872 m_debugInfo.recordTracelet(rangeFrom(cb, start, &cb == &code.cold()),
1873 srcFunc,
1874 reinterpret_cast<const Op*>(
1875 srcFunc->unit() ?
1876 srcFunc->unit()->at(sk.offset()) : nullptr
1878 exit, inPrologue);
1880 if (RuntimeOption::EvalPerfPidMap) {
1881 m_debugInfo.recordPerfMap(rangeFrom(cb, start, &cb == &code.cold()),
1882 srcFunc, exit, inPrologue);
1887 void MCGenerator::recordGdbStub(const CodeBlock& cb,
1888 const TCA start, const char* name) {
1889 if (!RuntimeOption::EvalJitNoGdb) {
1890 m_debugInfo.recordStub(rangeFrom(cb, start, &cb == &code.cold()),
1891 name);
1895 std::vector<UsageInfo> MCGenerator::getUsageInfo() {
1896 std::vector<UsageInfo> tcUsageInfo;
1897 code.forEachBlock([&](const char* name, const CodeBlock& a) {
1898 tcUsageInfo.emplace_back(UsageInfo{std::string("code.") + name,
1899 a.used(),
1900 a.capacity(),
1901 true});
1903 tcUsageInfo.emplace_back(UsageInfo{
1904 "data",
1905 code.data().used(),
1906 code.data().capacity(),
1907 true});
1908 tcUsageInfo.emplace_back(UsageInfo{
1909 "RDS",
1910 rds::usedBytes(),
1911 RuntimeOption::EvalJitTargetCacheSize * 3 / 4,
1912 false});
1913 tcUsageInfo.emplace_back(UsageInfo{
1914 "RDSLocal",
1915 rds::usedLocalBytes(),
1916 RuntimeOption::EvalJitTargetCacheSize * 3 / 4,
1917 false});
1918 tcUsageInfo.emplace_back(UsageInfo{
1919 "persistentRDS",
1920 rds::usedPersistentBytes(),
1921 RuntimeOption::EvalJitTargetCacheSize / 4,
1922 false});
1923 tcUsageInfo.emplace_back(UsageInfo{
1924 "cloned-closures",
1925 Func::s_totalClonedClosures,
1926 100000 /* dummy value -- there isn't really a capacity for this */});
1927 return tcUsageInfo;
1930 std::string MCGenerator::getUsageString() {
1931 std::string usage;
1932 size_t totalBlockSize = 0;
1933 size_t totalBlockCapacity = 0;
1934 auto addRow = [&](UsageInfo blockUsageInfo) {
1935 auto percent = blockUsageInfo.m_capacity ?
1936 100 * blockUsageInfo.m_used / blockUsageInfo.m_capacity : 0;
1937 usage += folly::format("mcg: {:9} bytes ({}%) in {}\n",
1938 blockUsageInfo.m_used,
1939 percent,
1940 blockUsageInfo.m_name).str();
1941 if (blockUsageInfo.m_global) {
1942 totalBlockSize += blockUsageInfo.m_used;
1943 totalBlockCapacity += blockUsageInfo.m_capacity;
1946 auto tcUsageInfo = getUsageInfo();
1947 for_each(tcUsageInfo.begin(), tcUsageInfo.end(), addRow);
1948 addRow(UsageInfo{"total", totalBlockSize, totalBlockCapacity, false});
1949 return usage;
1952 std::string MCGenerator::getTCAddrs() {
1953 std::string addrs;
1954 code.forEachBlock([&](const char* name, const CodeBlock& a) {
1955 addrs += folly::format("{}: {}\n", name, a.base()).str();
1957 return addrs;
1960 bool MCGenerator::addDbgGuards(const Unit* unit) {
1961 // TODO refactor
1962 // It grabs the write lease and iterates through whole SrcDB...
1963 struct timespec tsBegin, tsEnd;
1965 BlockingLeaseHolder writer(Translator::WriteLease());
1966 if (!writer) {
1967 return false;
1969 assert(mcg->cgFixups().empty());
1970 HPHP::Timer::GetMonotonicTime(tsBegin);
1971 // Doc says even find _could_ invalidate iterator, in pactice it should
1972 // be very rare, so go with it now.
1973 for (SrcDB::const_iterator it = m_tx.getSrcDB().begin();
1974 it != m_tx.getSrcDB().end(); ++it) {
1975 SrcKey const sk = SrcKey::fromAtomicInt(it->first);
1976 // We may have a SrcKey to a deleted function. NB: this may miss a
1977 // race with deleting a Func. See task #2826313.
1978 if (!Func::isFuncIdValid(sk.getFuncId())) continue;
1979 SrcRec* sr = it->second;
1980 if (sr->unitMd5() == unit->md5() &&
1981 !sr->hasDebuggerGuard() &&
1982 m_tx.isSrcKeyInBL(sk)) {
1983 addDbgGuardImpl(sk, sr);
1986 mcg->cgFixups().process(nullptr);
1988 HPHP::Timer::GetMonotonicTime(tsEnd);
1989 int64_t elapsed = gettime_diff_us(tsBegin, tsEnd);
1990 if (Trace::moduleEnabledRelease(Trace::mcg, 5)) {
1991 Trace::traceRelease("addDbgGuards got lease for %" PRId64 " us\n", elapsed);
1993 return true;
1996 bool MCGenerator::addDbgGuard(const Func* func, Offset offset, bool resumed) {
1997 SrcKey sk(func, offset, resumed);
1999 if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2000 if (sr->hasDebuggerGuard()) {
2001 return true;
2003 } else {
2004 // no translation yet
2005 return true;
2008 if (debug) {
2009 if (!m_tx.isSrcKeyInBL(sk)) {
2010 TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
2011 return false;
2014 BlockingLeaseHolder writer(Translator::WriteLease());
2015 if (!writer) {
2016 return false;
2018 assert(mcg->cgFixups().empty());
2020 if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2021 addDbgGuardImpl(sk, sr);
2024 mcg->cgFixups().process(nullptr);
2025 return true;
2028 bool MCGenerator::dumpTCCode(const char* filename) {
2029 #define OPEN_FILE(F, SUFFIX) \
2030 std::string F ## name = std::string(filename).append(SUFFIX); \
2031 FILE* F = fopen(F ## name .c_str(),"wb"); \
2032 if (F == nullptr) return false; \
2033 SCOPE_EXIT{ fclose(F); };
2035 OPEN_FILE(ahotFile, "_ahot");
2036 OPEN_FILE(aFile, "_a");
2037 OPEN_FILE(aprofFile, "_aprof");
2038 OPEN_FILE(acoldFile, "_acold");
2039 OPEN_FILE(afrozenFile, "_afrozen");
2040 OPEN_FILE(helperAddrFile, "_helpers_addrs.txt");
2042 #undef OPEN_FILE
2044 // dump starting from the hot region
2045 size_t count = code.hot().used();
2046 bool result = (fwrite(code.hot().base(), 1, count, ahotFile) == count);
2047 if (result) {
2048 count = code.main().used();
2049 result = (fwrite(code.main().base(), 1, count, aFile) == count);
2051 if (result) {
2052 count = code.prof().used();
2053 result = (fwrite(code.prof().base(), 1, count, aprofFile) == count);
2055 if (result) {
2056 count = code.cold().used();
2057 result = (fwrite(code.cold().base(), 1, count, acoldFile) == count);
2059 if (result) {
2060 count = code.frozen().used();
2061 result = (fwrite(code.frozen().base(), 1, count, afrozenFile) == count);
2063 return result;
2066 // Returns true on success
2067 bool MCGenerator::dumpTC(bool ignoreLease) {
2068 folly::Optional<BlockingLeaseHolder> writer;
2069 if (!ignoreLease) {
2070 writer.emplace(Translator::WriteLease());
2071 if (!*writer) return false;
2073 bool success = dumpTCData();
2074 if (success) {
2075 success = dumpTCCode("/tmp/tc_dump");
2077 return success;
2080 // Returns true on success
2081 bool tc_dump(void) {
2082 return mcg && mcg->dumpTC();
2085 // Returns true on success
2086 bool MCGenerator::dumpTCData() {
2087 gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
2088 if (!tcDataFile) return false;
2090 if (!gzprintf(tcDataFile,
2091 "repo_schema = %s\n"
2092 "ahot.base = %p\n"
2093 "ahot.frontier = %p\n"
2094 "a.base = %p\n"
2095 "a.frontier = %p\n"
2096 "aprof.base = %p\n"
2097 "aprof.frontier = %p\n"
2098 "acold.base = %p\n"
2099 "acold.frontier = %p\n"
2100 "afrozen.base = %p\n"
2101 "afrozen.frontier = %p\n\n",
2102 kRepoSchemaId,
2103 code.hot().base(), code.hot().frontier(),
2104 code.main().base(), code.main().frontier(),
2105 code.prof().base(), code.prof().frontier(),
2106 code.cold().base(), code.cold().frontier(),
2107 code.frozen().base(), code.frozen().frontier())) {
2108 return false;
2111 if (!gzprintf(tcDataFile, "total_translations = %zu\n\n",
2112 m_tx.getCurrentTransID())) {
2113 return false;
2116 for (TransID t = 0; t < m_tx.getCurrentTransID(); t++) {
2117 if (gzputs(tcDataFile,
2118 m_tx.getTransRec(t)->print(m_tx.getTransCounter(t)).c_str()) ==
2119 -1) {
2120 return false;
2124 gzclose(tcDataFile);
2125 return true;
2128 void MCGenerator::invalidateSrcKey(SrcKey sk) {
2129 assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO);
2130 assert(Translator::WriteLease().amOwner());
2132 * Reroute existing translations for SrcKey to an as-yet indeterminate
2133 * new one.
2135 SrcRec* sr = m_tx.getSrcDB().find(sk);
2136 assert(sr);
2138 * Since previous translations aren't reachable from here, we know we
2139 * just created some garbage in the TC. We currently have no mechanism
2140 * to reclaim this.
2142 sr->replaceOldTranslations();
2145 void MCGenerator::setJmpTransID(TCA jmp) {
2146 if (m_tx.mode() != TransKind::Profile) return;
2148 TransID transId = m_tx.profData()->curTransID();
2149 FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp, transId);
2150 m_fixups.m_pendingJmpTransIDs.emplace_back(jmp, transId);
2153 void RelocationInfo::recordRange(TCA start, TCA end,
2154 TCA destStart, TCA destEnd) {
2155 m_srcRanges.emplace_back(start, end);
2156 m_dstRanges.emplace_back(destStart, destEnd);
2157 m_adjustedAddresses[start].second = destStart;
2158 m_adjustedAddresses[end].first = destEnd;
2161 void RelocationInfo::recordAddress(TCA src, TCA dest, int range) {
2162 m_adjustedAddresses.emplace(src, std::make_pair(dest, dest + range));
2165 TCA RelocationInfo::adjustedAddressAfter(TCA addr) const {
2166 auto it = m_adjustedAddresses.find(addr);
2167 if (it == m_adjustedAddresses.end()) return nullptr;
2169 return it->second.second;
2172 TCA RelocationInfo::adjustedAddressBefore(TCA addr) const {
2173 auto it = m_adjustedAddresses.find(addr);
2174 if (it == m_adjustedAddresses.end()) return nullptr;
2176 return it->second.first;
2179 void RelocationInfo::rewind(TCA start, TCA end) {
2180 if (m_srcRanges.size() && m_srcRanges.back().first == start) {
2181 assert(m_dstRanges.size() == m_srcRanges.size());
2182 assert(m_srcRanges.back().second == end);
2183 m_srcRanges.pop_back();
2184 m_dstRanges.pop_back();
2186 auto it = m_adjustedAddresses.lower_bound(start);
2187 if (it == m_adjustedAddresses.end()) return;
2188 if (it->first == start) {
2189 // if it->second.first is set, start is also the end
2190 // of an existing region. Don't erase it in that case
2191 if (it->second.first) {
2192 it++->second.second = 0;
2193 } else {
2194 m_adjustedAddresses.erase(it++);
2197 while (it != m_adjustedAddresses.end() && it->first < end) {
2198 m_adjustedAddresses.erase(it++);
2200 if (it == m_adjustedAddresses.end()) return;
2201 if (it->first == end) {
2202 // Similar to start above, end could be the start of an
2203 // existing region.
2204 if (it->second.second) {
2205 it++->second.first = 0;
2206 } else {
2207 m_adjustedAddresses.erase(it++);
2212 void
2213 emitIncStat(CodeBlock& cb, uint64_t* tl_table, uint index, int n, bool force) {
2214 if (!force && !Stats::enabled()) return;
2215 intptr_t disp = uintptr_t(&tl_table[index]) - tlsBase();
2217 mcg->backEnd().emitIncStat(cb, disp, n);
2220 void emitIncStat(Vout& v, Stats::StatCounter stat, int n, bool force) {
2221 if (!force && !Stats::enabled()) return;
2222 intptr_t disp = uintptr_t(&Stats::tl_counters[stat]) - tlsBase();
2223 v << addqim{n, Vptr{baseless(disp), Vptr::FS}, v.makeReg()};
2226 // generic vasm service-request generator. target specific details
2227 // are hidden by the svcreq{} instruction.
2228 void emitServiceReq(Vout& v, TCA stub_block,
2229 ServiceRequest req, const ServiceReqArgVec& argv) {
2230 TRACE(3, "Emit Service Req %s(", serviceReqName(req));
2231 VregList args;
2232 for (auto& argInfo : argv) {
2233 switch (argInfo.m_kind) {
2234 case ServiceReqArgInfo::Immediate: {
2235 TRACE(3, "%" PRIx64 ", ", argInfo.m_imm);
2236 args.push_back(v.cns(argInfo.m_imm));
2237 break;
2239 default: {
2240 always_assert(false);
2241 break;
2245 v << svcreq{req, v.makeTuple(args), stub_block};
2248 } // HPHP::jit
2250 } // HPHP