Remove JIT:: in some places where it's unnecessary
[hiphop-php.git] / hphp / runtime / vm / jit / mc-generator.cpp
blob2fd69d6ccaa0947568e2679b7926fd38522b609e
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/mc-generator.h"
17 #include "hphp/runtime/vm/jit/vtune-jit.h"
19 #include "folly/MapUtil.h"
21 #include <cinttypes>
22 #include <stdint.h>
23 #include <assert.h>
24 #include <unistd.h>
25 #include <sys/mman.h>
26 #include <strstream>
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <string>
30 #include <queue>
31 #include <unwind.h>
32 #include <unordered_set>
33 #ifdef __FreeBSD__
34 #include <sys/ucontext.h>
35 #endif
37 #ifdef __FreeBSD__
38 #define RIP_REGISTER(v) (v).mc_rip
39 #elif defined(__APPLE__)
40 #define RIP_REGISTER(v) (v)->__ss.__rip
41 #elif defined(__x86_64__)
42 #define RIP_REGISTER(v) (v).gregs[REG_RIP]
43 #elif defined(__AARCH64EL__)
44 #define RIP_REGISTER(v) (v).pc
45 #else
46 #error How is rip accessed on this architecture?
47 #endif
49 #include <boost/bind.hpp>
50 #include <boost/utility/typed_in_place_factory.hpp>
51 #include <boost/range/adaptors.hpp>
52 #include <boost/scoped_ptr.hpp>
53 #include <algorithm>
54 #include <exception>
55 #include <memory>
56 #include <vector>
58 #include "folly/Format.h"
59 #include "folly/String.h"
61 #include "hphp/util/abi-cxx.h"
62 #include "hphp/util/asm-x64.h"
63 #include "hphp/util/bitops.h"
64 #include "hphp/util/debug.h"
65 #include "hphp/util/disasm.h"
66 #include "hphp/util/maphuge.h"
67 #include "hphp/util/rank.h"
68 #include "hphp/util/ringbuffer.h"
69 #include "hphp/util/timer.h"
70 #include "hphp/util/trace.h"
71 #include "hphp/util/meta.h"
72 #include "hphp/util/process.h"
73 #include "hphp/util/repo-schema.h"
74 #include "hphp/util/cycles.h"
76 #include "hphp/vixl/a64/decoder-a64.h"
77 #include "hphp/vixl/a64/disasm-a64.h"
78 #include "hphp/vixl/a64/macro-assembler-a64.h"
79 #include "hphp/vixl/a64/simulator-a64.h"
81 #include "hphp/runtime/vm/jit/abi-arm.h"
82 #include "hphp/runtime/vm/jit/arch.h"
83 #include "hphp/runtime/vm/jit/unique-stubs-arm.h"
84 #include "hphp/runtime/vm/jit/unique-stubs-x64.h"
85 #include "hphp/runtime/vm/bytecode.h"
86 #include "hphp/runtime/vm/php-debug.h"
87 #include "hphp/runtime/vm/runtime.h"
88 #include "hphp/runtime/base/complex-types.h"
89 #include "hphp/runtime/base/execution-context.h"
90 #include "hphp/runtime/base/runtime-option.h"
91 #include "hphp/runtime/base/runtime-option-guard.h"
92 #include "hphp/runtime/base/strings.h"
93 #include "hphp/runtime/server/source-root-info.h"
94 #include "hphp/runtime/base/zend-string.h"
95 #include "hphp/runtime/ext/ext_closure.h"
96 #include "hphp/runtime/ext/ext_continuation.h"
97 #include "hphp/runtime/ext/ext_function.h"
98 #include "hphp/runtime/vm/debug/debug.h"
99 #include "hphp/runtime/base/stats.h"
100 #include "hphp/runtime/vm/pendq.h"
101 #include "hphp/runtime/vm/srckey.h"
102 #include "hphp/runtime/vm/treadmill.h"
103 #include "hphp/runtime/vm/repo.h"
104 #include "hphp/runtime/vm/type-profile.h"
105 #include "hphp/runtime/vm/member-operations.h"
106 #include "hphp/runtime/vm/jit/abi-x64.h"
107 #include "hphp/runtime/vm/jit/check.h"
108 #include "hphp/runtime/vm/jit/hhbc-translator.h"
109 #include "hphp/runtime/vm/jit/ir-translator.h"
110 #include "hphp/runtime/vm/jit/normalized-instruction.h"
111 #include "hphp/runtime/vm/jit/opt.h"
112 #include "hphp/runtime/vm/jit/print.h"
113 #include "hphp/runtime/vm/jit/region-selection.h"
114 #include "hphp/runtime/vm/jit/srcdb.h"
115 #include "hphp/runtime/base/rds.h"
116 #include "hphp/runtime/vm/jit/tracelet.h"
117 #include "hphp/runtime/vm/jit/translator-inline.h"
118 #include "hphp/runtime/vm/jit/unwind-arm.h"
119 #include "hphp/runtime/vm/jit/unwind-x64.h"
120 #include "hphp/runtime/vm/jit/code-gen-helpers-arm.h"
121 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
122 #include "hphp/runtime/vm/jit/code-gen.h"
123 #include "hphp/runtime/vm/jit/service-requests-x64.h"
124 #include "hphp/runtime/vm/jit/jump-smash.h"
125 #include "hphp/runtime/vm/jit/func-prologues.h"
126 #include "hphp/runtime/vm/jit/func-prologues-x64.h"
127 #include "hphp/runtime/vm/jit/func-prologues-arm.h"
128 #include "hphp/runtime/vm/jit/debug-guards.h"
129 #include "hphp/runtime/vm/jit/timer.h"
130 #include "hphp/runtime/vm/unwind.h"
132 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
134 namespace HPHP {
135 namespace JIT {
137 TRACE_SET_MOD(mcg);
139 using namespace reg;
140 using namespace Trace;
141 using std::max;
143 #define TRANS_PERF_COUNTERS \
144 TPC(translate) \
145 TPC(retranslate) \
146 TPC(interp_bb) \
147 TPC(interp_instr) \
148 TPC(interp_one) \
149 TPC(max_trans) \
150 TPC(enter_tc) \
151 TPC(service_req)
153 static const char* const kInstrCountMCGName = "instr_mcg";
154 static const char* const kInstrCountIRName = "instr_hhir";
156 #define TPC(n) "jit_" #n,
157 static const char* const kPerfCounterNames[] = {
158 TRANS_PERF_COUNTERS
159 kInstrCountMCGName,
160 kInstrCountIRName,
162 #undef TPC
164 #define TPC(n) tpc_ ## n,
165 enum TransPerfCounter {
166 TRANS_PERF_COUNTERS
167 tpc_num_counters
169 #undef TPC
170 static __thread int64_t s_perfCounters[tpc_num_counters];
171 #define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
173 // The global MCGenerator object.
174 MCGenerator* mcg;
176 // Register dirtiness: thread-private.
177 __thread VMRegState tl_regState = VMRegState::CLEAN;
179 CppCall MCGenerator::getDtorCall(DataType type) {
180 switch (type) {
181 case BitwiseKindOfString:
182 return CppCall(getMethodPtr(&StringData::release));
183 case KindOfArray:
184 return CppCall(getMethodPtr(&ArrayData::release));
185 case KindOfObject:
186 return CppCall(getMethodPtr(&ObjectData::release));
187 case KindOfResource:
188 return CppCall(getMethodPtr(&ResourceData::release));
189 case KindOfRef:
190 return CppCall(getMethodPtr(&RefData::release));
191 default:
192 assert(false);
193 NOT_REACHED();
197 bool MCGenerator::profileSrcKey(const SrcKey& sk) const {
198 if (!sk.func()->shouldPGO()) return false;
200 if (m_tx.profData()->optimized(sk.getFuncId())) return false;
202 // If we've hit EvalJitProfileRequests, then don't emit profiling
203 // translations that would trigger an optimizing retranslation.
204 // This limits the duration of profiling. For
205 // non-retranslate-triggering SrcKeys, whose profiling translations
206 // only increment a counter, it's OK to emit them past the
207 // EvalJitProfileRequests threshold as long as we're already
208 // profiling this function (next check below) but haven't
209 // retranslated this function yet (checked above).
210 bool triggersRetrans = sk.func()->isEntry(sk.offset());
211 if (triggersRetrans &&
212 requestCount() > RuntimeOption::EvalJitProfileRequests) {
213 return false;
216 // For translations that don't trigger a retranslation, only emit
217 // them if we've already generated a retranslation-triggering
218 // translation for its function.
219 if (!triggersRetrans &&
220 !m_tx.profData()->profiling(sk.getFuncId())) {
221 return false;
224 return true;
227 bool MCGenerator::profilePrologue(const SrcKey& sk) const {
228 if (!sk.func()->shouldPGO()) return false;
230 if (m_tx.profData()->optimized(sk.getFuncId())) return false;
232 // Proflogues don't trigger retranslation, so only emit them if
233 // we've already generated a retranslation-triggering translation
234 // for its function or if we're about to generate one (which
235 // requires depends on requestCount(), see profileSrcKey()).
236 return m_tx.profData()->profiling(sk.getFuncId()) ||
237 requestCount() <= RuntimeOption::EvalJitProfileRequests;
241 * Invalidate the SrcDB entries for func's SrcKeys that have any
242 * Profile translation.
244 void MCGenerator::invalidateFuncProfSrcKeys(const Func* func) {
245 assert(RuntimeOption::EvalJitPGO);
246 FuncId funcId = func->getFuncId();
247 for (auto tid : m_tx.profData()->funcProfTransIDs(funcId)) {
248 invalidateSrcKey(m_tx.profData()->transSrcKey(tid));
252 TCA MCGenerator::retranslate(const TranslArgs& args) {
253 SrcRec* sr = m_tx.getSrcDB().find(args.m_sk);
255 bool locked = sr->tryLock();
256 SCOPE_EXIT {
257 if (locked) sr->freeLock();
259 if (isDebuggerAttachedProcess() && m_tx.isSrcKeyInBL(args.m_sk)) {
260 // We are about to translate something known to be blacklisted by
261 // debugger, exit early
262 SKTRACE(1, args.m_sk, "retranslate abort due to debugger\n");
263 return nullptr;
265 LeaseHolder writer(Translator::WriteLease());
266 if (!writer || !shouldTranslate()) return nullptr;
267 if (!locked) {
268 // Even though we knew above that we were going to skip
269 // doing another translation, we wait until we get the
270 // write lease, to avoid spinning through the tracelet
271 // guards again and again while another thread is writing
272 // to it.
273 return sr->getTopTranslation();
275 SKTRACE(1, args.m_sk, "retranslate\n");
277 m_tx.setMode(profileSrcKey(args.m_sk) ? TransProfile : TransLive);
278 SCOPE_EXIT{ m_tx.setMode(TransInvalid); };
280 return translate(args);
283 TCA MCGenerator::retranslateOpt(TransID transId, bool align) {
284 LeaseHolder writer(Translator::WriteLease());
285 if (!writer || !shouldTranslate()) return nullptr;
286 if (isDebuggerAttachedProcess()) return nullptr;
288 TRACE(1, "retranslateOpt: transId = %u\n", transId);
290 SCOPE_EXIT{ m_tx.setMode(TransInvalid); };
292 always_assert(m_tx.profData()->transRegion(transId) != nullptr);
294 Func* func = m_tx.profData()->transFunc(transId);
295 FuncId funcId = func->getFuncId();
296 const SrcKey& sk = m_tx.profData()->transSrcKey(transId);
298 if (m_tx.profData()->optimized(funcId)) return nullptr;
299 m_tx.profData()->setOptimized(funcId);
301 bool setFuncBody = func->getDVFunclets().size() == 0;
303 func->setFuncBody(m_tx.uniqueStubs.funcBodyHelperThunk);
305 // Invalidate SrcDB's entries for all func's SrcKeys.
306 invalidateFuncProfSrcKeys(func);
308 // Regenerate the prologues and DV funclets before the actual function body.
309 TCA start = regeneratePrologues(func, sk);
311 // Regionize func and translate all its regions.
312 std::vector<RegionDescPtr> regions;
313 regionizeFunc(func, this, regions);
315 for (auto region : regions) {
316 m_tx.setMode(TransOptimize);
317 always_assert(region->blocks.size() > 0);
318 SrcKey regionSk = region->blocks[0]->start();
319 auto translArgs = TranslArgs(regionSk, align).region(region);
320 if (setFuncBody && regionSk.offset() == func->base()) {
321 translArgs.setFuncBody();
322 setFuncBody = false;
324 TCA regionStart = translate(translArgs);
325 if (start == nullptr && regionSk == sk) {
326 assert(regionStart);
327 start = regionStart;
330 assert(start);
331 return start;
335 * Find or create a translation for sk. Returns TCA of "best" current
336 * translation. May return NULL if it is currently impossible to create
337 * a translation.
340 MCGenerator::getTranslation(const TranslArgs& args) {
341 auto sk = args.m_sk;
342 sk.func()->validate();
343 SKTRACE(2, sk,
344 "getTranslation: curUnit %s funcId %x offset %d\n",
345 sk.unit()->filepath()->data(),
346 sk.getFuncId(),
347 sk.offset());
348 SKTRACE(2, sk, " funcId: %x \n", sk.func()->getFuncId());
350 if (Translator::liveFrameIsPseudoMain()) {
351 SKTRACE(2, sk, "punting on pseudoMain\n");
352 return nullptr;
354 if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
355 TCA tca = sr->getTopTranslation();
356 if (tca) {
357 SKTRACE(2, sk, "getTranslation: found %p\n", tca);
358 return tca;
361 return createTranslation(args);
365 MCGenerator::numTranslations(SrcKey sk) const {
366 if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
367 return sr->translations().size();
369 return 0;
372 static void populateLiveContext(RegionContext& ctx) {
373 typedef RegionDesc::Location L;
375 const ActRec* const fp {g_context->getFP()};
376 const TypedValue* const sp {g_context->getStack().top()};
378 for (uint32_t i = 0; i < fp->m_func->numLocals(); ++i) {
379 ctx.liveTypes.push_back(
380 { L::Local{i}, liveTVType(frame_local(fp, i)) }
384 uint32_t stackOff = 0;
385 visitStackElems(
386 fp, sp, ctx.bcOffset,
387 [&](const ActRec* ar) {
388 // TODO(#2466980): when it's a Cls, we should pass the Class* in
389 // the Type.
390 auto const objOrCls =
391 ar->hasThis() ? Type::Obj.specialize(ar->getThis()->getVMClass()) :
392 ar->hasClass() ? Type::Cls
393 : Type::Nullptr;
395 ctx.preLiveARs.push_back(
396 { stackOff,
397 ar->m_func,
398 objOrCls
401 FTRACE(2, "added prelive ActRec {}\n", show(ctx.preLiveARs.back()));
403 stackOff += kNumActRecCells;
405 [&](const TypedValue* tv) {
406 ctx.liveTypes.push_back(
407 { L::Stack{stackOff, ctx.spOffset - stackOff}, liveTVType(tv) }
409 stackOff++;
410 FTRACE(2, "added live type {}\n", show(ctx.liveTypes.back()));
416 MCGenerator::createTranslation(const TranslArgs& args) {
417 if (!shouldTranslate()) return nullptr;
420 * Try to become the writer. We delay this until we *know* we will have
421 * a need to create new translations, instead of just trying to win the
422 * lottery at the dawn of time. Hopefully lots of requests won't require
423 * any new translation.
425 auto sk = args.m_sk;
426 LeaseHolder writer(Translator::WriteLease());
427 if (!writer) return nullptr;
429 if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
430 TCA tca = sr->getTopTranslation();
431 if (tca) {
432 // Handle extremely unlikely race; someone may have just already
433 // added the first instance of this SrcRec while we did a
434 // non-blocking wait on the write lease.
435 return tca;
436 } else {
437 // Since we are holding the write lease, we know that sk is properly
438 // initialized, except that it has no translations (due to
439 // replaceOldTranslations)
440 return retranslate(args);
444 // We put retranslate requests at the end of our slab to more frequently
445 // allow conditional jump fall-throughs
446 TCA astart = code.main().frontier();
447 TCA stubstart = code.stubs().frontier();
448 TCA req = emitServiceReq(code.stubs(), REQ_RETRANSLATE, sk.offset());
449 SKTRACE(1, sk, "inserting anchor translation for (%p,%d) at %p\n",
450 sk.unit(), sk.offset(), req);
451 SrcRec* sr = m_tx.getSrcRec(sk);
452 sr->setFuncInfo(sk.func());
453 sr->setAnchorTranslation(req);
455 size_t asize = code.main().frontier() - astart;
456 size_t stubsize = code.stubs().frontier() - stubstart;
457 assert(asize == 0);
458 if (stubsize && RuntimeOption::EvalDumpTCAnchors) {
459 TransRec tr(sk, sk.unit()->md5(), TransAnchor,
460 astart, asize, stubstart, stubsize);
461 m_tx.addTranslation(tr);
462 if (RuntimeOption::EvalJitUseVtuneAPI) {
463 reportTraceletToVtune(sk.unit(), sk.func(), tr);
466 if (m_tx.profData()) {
467 m_tx.profData()->addTransNonProf(TransAnchor, sk);
469 assert(!m_tx.isTransDBEnabled() ||
470 m_tx.getTransRec(stubstart)->kind == TransAnchor);
473 return retranslate(args);
477 MCGenerator::lookupTranslation(SrcKey sk) const {
478 if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
479 return sr->getTopTranslation();
481 return nullptr;
485 MCGenerator::translate(const TranslArgs& args) {
486 INC_TPC(translate);
488 assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
489 assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
490 assert(m_tx.mode() != TransInvalid);
491 SCOPE_EXIT{ m_tx.setMode(TransInvalid); };
493 if (!args.m_interp) {
494 if (m_numHHIRTrans == RuntimeOption::EvalJitGlobalTranslationLimit) {
495 RuntimeOption::EvalJit = false;
496 ThreadInfo::s_threadInfo->m_reqInjectionData.updateJit();
497 return nullptr;
501 Func* func = const_cast<Func*>(args.m_sk.func());
502 CodeCache::Selector asmSel(CodeCache::Selector::Args(code)
503 .profile(m_tx.mode() == TransProfile)
504 .hot(func->attrs() & AttrHot));
506 if (args.m_align) {
507 X64::moveToAlign(code.main(), kNonFallthroughAlign);
510 TCA start = code.main().frontier();
512 if (RuntimeOption::EvalJitDryRuns &&
513 (m_tx.mode() == TransLive || m_tx.mode() == TransProfile)) {
514 auto const useRegion =
515 RuntimeOption::EvalJitRegionSelector == "tracelet";
516 always_assert(useRegion ||
517 RuntimeOption::EvalJitRegionSelector == "");
519 auto dryArgs = args;
521 dryArgs.dryRun(!useRegion);
523 // First, run translateWork with the tracelet region selector. If
524 // useRegion == false, the generated code will be thrown away at the end.
525 OPTION_GUARD(EvalJitRegionSelector, "tracelet");
526 OPTION_GUARD(EvalHHIRRelaxGuards, true);
527 OPTION_GUARD(EvalHHBCRelaxGuards, false);
528 translateWork(dryArgs);
531 dryArgs.dryRun(useRegion);
533 // Now translate with analyze(), throwing away the generated code if
534 // useRegion == true.
535 OPTION_GUARD(EvalJitRegionSelector, "");
536 OPTION_GUARD(EvalHHIRRelaxGuards, false);
537 OPTION_GUARD(EvalHHBCRelaxGuards, true);
538 translateWork(dryArgs);
540 } else {
541 translateWork(args);
544 if (args.m_setFuncBody) {
545 func->setFuncBody(start);
547 SKTRACE(1, args.m_sk, "translate moved head from %p to %p\n",
548 getTopTranslation(args.m_sk), start);
549 return start;
553 MCGenerator::getCallArrayPrologue(Func* func) {
554 TCA tca = func->getFuncBody();
555 if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
557 DVFuncletsVec dvs = func->getDVFunclets();
559 if (dvs.size()) {
560 LeaseHolder writer(Translator::WriteLease());
561 if (!writer) return nullptr;
562 tca = func->getFuncBody();
563 if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
564 switch (arch()) {
565 case Arch::X64:
566 tca = X64::emitCallArrayPrologue(func, dvs);
567 break;
568 case Arch::ARM:
569 tca = ARM::emitCallArrayPrologue(func, dvs);
570 break;
572 func->setFuncBody(tca);
573 } else {
574 SrcKey sk(func, func->base(), false);
575 tca = mcg->getTranslation(TranslArgs(sk, false).setFuncBody());
578 return tca;
581 void
582 MCGenerator::smashPrologueGuards(TCA* prologues, int numPrologues,
583 const Func* func) {
584 DEBUG_ONLY std::unique_ptr<LeaseHolder> writer;
585 for (int i = 0; i < numPrologues; i++) {
586 if (prologues[i] != m_tx.uniqueStubs.fcallHelperThunk
587 && funcPrologueHasGuard(prologues[i], func)) {
588 if (debug) {
590 * Unit's are sometimes created racily, in which case all
591 * but the first are destroyed immediately. In that case,
592 * the Funcs of the destroyed Units never need their
593 * prologues smashing, and it would be a lock rank violation
594 * to take the write lease here.
595 * In all other cases, Funcs are destroyed via a delayed path
596 * (treadmill) and the rank violation isn't an issue.
598 * Also note that we only need the write lease because we
599 * mprotect the translation cache in debug builds.
601 if (!writer) {
602 writer.reset(new LeaseHolder(Translator::WriteLease(),
603 LeaseAcquire::BLOCKING));
606 switch (arch()) {
607 case Arch::X64:
608 X64::funcPrologueSmashGuard(prologues[i], func);
609 break;
610 case Arch::ARM:
611 ARM::funcPrologueSmashGuard(prologues[i], func);
612 break;
619 * funcPrologue --
621 * Given a callee and a number of args, match up to the callee's
622 * argument expectations and dispatch.
624 * Call/return hand-shaking is a bit funny initially. At translation time,
625 * we don't necessarily know what function we're calling. For instance,
627 * f(g());
629 * Will lead to a set of basic blocks like:
631 * b1: pushfuncd "f"
632 * pushfuncd "g"
633 * fcall
634 * b2: fcall
636 * The fcallc labelled "b2" above is not statically bindable in our
637 * execution model.
639 * We decouple the call work into a per-callsite portion, responsible
640 * for recording the return address, and a per-(callee, numArgs) portion,
641 * responsible for fixing up arguments and dispatching to remaining
642 * code. We call the per-callee portion a "prologue."
644 * Also, we are called from two distinct environments. From REQ_BIND_CALL,
645 * we're running "between" basic blocks, with all VM registers sync'ed.
646 * However, we're also called in the middle of basic blocks, when dropping
647 * entries into func->m_prologues. So don't go around using the
648 * translation-time values of vmfp()/vmsp(), since they have an
649 * unpredictable relationship to the source.
651 bool
652 MCGenerator::checkCachedPrologue(const Func* func, int paramIdx,
653 TCA& prologue) const {
654 prologue = (TCA)func->getPrologue(paramIdx);
655 if (prologue != m_tx.uniqueStubs.fcallHelperThunk) {
656 TRACE(1, "cached prologue %s(%d) -> cached %p\n",
657 func->fullName()->data(), paramIdx, prologue);
658 assert(isValidCodeAddress(prologue));
659 return true;
661 return false;
664 static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
665 assert(tl_regState == VMRegState::DIRTY);
666 tl_regState = VMRegState::CLEAN;
667 vmfp() = (Cell*)ar;
668 vmsp() = sp;
669 vmpc() = ar->unit()->at(pcOff);
673 MCGenerator::getFuncPrologue(Func* func, int nPassed, ActRec* ar) {
674 func->validate();
675 TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
676 int const numParams = func->numNonVariadicParams();
677 int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
679 bool const funcIsMagic = func->isMagic();
681 // Do a quick test before grabbing the write lease
682 TCA prologue;
683 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
685 Offset entry = func->getEntryForNumArgs(nPassed);
686 SrcKey funcBody(func, entry, false);
688 if (func->isClonedClosure()) {
689 assert(ar);
690 interp_set_regs(ar, (Cell*)ar - func->numSlotsInFrame(), entry);
691 TCA tca = getTranslation(TranslArgs(funcBody, false));
692 tl_regState = VMRegState::DIRTY;
693 if (tca) {
694 // racy, but ok...
695 func->setPrologue(paramIndex, tca);
697 return tca;
700 LeaseHolder writer(Translator::WriteLease());
701 if (!writer || !shouldTranslate()) return nullptr;
703 // Double check the prologue array now that we have the write lease
704 // in case another thread snuck in and set the prologue already.
705 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
707 // We're comming from a BIND_CALL service request, so enable
708 // profiling if we haven't optimized the function entry yet.
709 assert(m_tx.mode() == TransInvalid || m_tx.mode() == TransPrologue);
710 if (m_tx.mode() == TransInvalid && profilePrologue(funcBody)) {
711 m_tx.setMode(TransProflogue);
712 } else {
713 m_tx.setMode(TransPrologue);
715 SCOPE_EXIT{ m_tx.setMode(TransInvalid); };
717 CodeCache::Selector asmSel(CodeCache::Selector::Args(code)
718 .profile(m_tx.mode() == TransProflogue)
719 .hot(func->attrs() & AttrHot));
721 // If we're close to a cache line boundary, just burn some space to
722 // try to keep the func and its body on fewer total lines.
723 if (((uintptr_t)code.main().frontier() & kX64CacheLineMask) >= 32) {
724 X64::moveToAlign(code.main(), kX64CacheLineSize);
727 // Careful: this isn't necessarily the real entry point. For funcIsMagic
728 // prologues, this is just a possible prologue.
729 TCA aStart = code.main().frontier();
730 TCA start = aStart;
731 TCA stubStart = code.stubs().frontier();
733 auto const skFuncBody = [&] {
734 switch (arch()) {
735 case Arch::X64:
736 return funcIsMagic
737 ? X64::emitMagicFuncPrologue(func, nPassed, start)
738 : X64::emitFuncPrologue(func, nPassed, start);
739 case Arch::ARM:
740 return ARM::emitFuncPrologue(
741 code.main(), code.stubs(), func, funcIsMagic, nPassed, start, aStart
744 not_reached();
745 }();
747 assert(funcPrologueHasGuard(start, func));
748 TRACE(2, "funcPrologue mcg %p %s(%d) setting prologue %p\n",
749 this, func->fullName()->data(), nPassed, start);
750 assert(isValidCodeAddress(start));
751 func->setPrologue(paramIndex, start);
753 assert(m_tx.mode() == TransPrologue || m_tx.mode() == TransProflogue);
754 TransRec tr(skFuncBody, func->unit()->md5(),
755 m_tx.mode(), aStart, code.main().frontier() - aStart,
756 stubStart, code.stubs().frontier() - stubStart);
757 m_tx.addTranslation(tr);
758 if (RuntimeOption::EvalJitUseVtuneAPI) {
759 reportTraceletToVtune(func->unit(), func, tr);
762 if (m_tx.profData()) {
763 m_tx.profData()->addTransPrologue(m_tx.mode(), skFuncBody, paramIndex);
766 recordGdbTranslation(skFuncBody, func,
767 code.main(), aStart,
768 false, true);
769 recordBCInstr(OpFuncPrologue, code.main(), start);
771 return start;
775 * Given the proflogueTransId for a TransProflogue translation,
776 * regenerate the prologue (as a TransPrologue). Returns the starting
777 * address for the translation corresponding to triggerSk, if such
778 * translation is generated; otherwise returns nullptr.
780 TCA MCGenerator::regeneratePrologue(TransID prologueTransId,
781 SrcKey triggerSk) {
782 Func* func = m_tx.profData()->transFunc(prologueTransId);
783 int nArgs = m_tx.profData()->prologueArgs(prologueTransId);
785 // Regenerate the prologue.
786 func->resetPrologue(nArgs);
787 m_tx.setMode(TransPrologue);
788 SCOPE_EXIT { m_tx.setMode(TransInvalid); };
789 TCA start = getFuncPrologue(func, nArgs);
790 func->setPrologue(nArgs, start);
792 // Smash callers of the old prologue with the address of the new one.
793 PrologueCallersRec* pcr =
794 m_tx.profData()->prologueCallers(prologueTransId);
795 for (TCA toSmash : pcr->mainCallers()) {
796 smashCall(toSmash, start);
798 // If the prologue has a guard, then smash its guard-callers as well.
799 if (funcPrologueHasGuard(start, func)) {
800 TCA guard = funcPrologueToGuard(start, func);
801 for (TCA toSmash : pcr->guardCallers()) {
802 smashCall(toSmash, guard);
805 pcr->clearAllCallers();
807 // If this prologue has a DV funclet, then generate a translation
808 // for the DV funclet right after the prologue. However, skip
809 // cloned closures because their prologues are actually the DV
810 // funclets already.
811 TCA triggerSkStart = nullptr;
812 if (nArgs < func->numNonVariadicParams() && !func->isClonedClosure()) {
813 auto paramInfo = func->params()[nArgs];
814 if (paramInfo.hasDefaultValue()) {
815 m_tx.setMode(TransOptimize);
816 SrcKey funcletSK(func, paramInfo.funcletOff(), false);
817 TransID funcletTransId = m_tx.profData()->dvFuncletTransId(func, nArgs);
818 if (funcletTransId != InvalidID) {
819 invalidateSrcKey(funcletSK);
820 TCA dvStart = translate(TranslArgs(funcletSK, false).
821 transId(funcletTransId));
822 if (dvStart && !triggerSkStart && funcletSK == triggerSk) {
823 triggerSkStart = dvStart;
825 // Flag that this translation has been retranslated, so that
826 // it's not retranslated again along with the function body.
827 m_tx.profData()->setOptimized(funcletSK);
832 return triggerSkStart;
836 * Regenerate all prologues of func that were previously generated.
837 * The prologues are sorted in ascending order of profile counters.
838 * For prologues with corresponding DV funclets, their corresponding
839 * DV funclet will be regenerated right after them. The idea is to
840 * generate the function body right after calling this function, so
841 * that all prologues are placed right before it, and with the hottest
842 * prologues closer to it.
844 * Returns the starting address for the translation corresponding to
845 * triggerSk, if such translation is generated; otherwise returns
846 * nullptr.
848 TCA MCGenerator::regeneratePrologues(Func* func, SrcKey triggerSk) {
849 TCA triggerStart = nullptr;
850 std::vector<TransID> prologTransIDs;
852 auto const limit = func->numNonVariadicParams() + 1;
853 for (int nArgs = 0; nArgs <= limit; nArgs++) {
854 TransID tid = m_tx.profData()->prologueTransId(func, nArgs);
855 if (tid != InvalidID) {
856 prologTransIDs.push_back(tid);
860 std::sort(prologTransIDs.begin(), prologTransIDs.end(),
861 [&](TransID t1, TransID t2) -> bool {
862 // This will sort in ascending order. Note that transCounters start
863 // at JitPGOThreshold and count down.
864 return m_tx.profData()->transCounter(t1) >
865 m_tx.profData()->transCounter(t2);
868 for (TransID tid : prologTransIDs) {
869 TCA start = regeneratePrologue(tid, triggerSk);
870 if (triggerStart == nullptr && start != nullptr) {
871 triggerStart = start;
875 return triggerStart;
879 * bindJmp --
881 * Runtime service handler that patches a jmp to the translation of
882 * u:dest from toSmash.
885 MCGenerator::bindJmp(TCA toSmash, SrcKey destSk,
886 ServiceRequest req, bool& smashed) {
887 TCA tDest = getTranslation(TranslArgs(destSk, false));
888 if (!tDest) return nullptr;
889 LeaseHolder writer(Translator::WriteLease());
890 if (!writer) return tDest;
891 SrcRec* sr = m_tx.getSrcRec(destSk);
892 // The top translation may have changed while we waited for the
893 // write lease, so read it again. If it was replaced with a new
894 // translation, then bind to the new one. If it was invalidated,
895 // then don't bind the jump.
896 tDest = sr->getTopTranslation();
897 if (tDest == nullptr) return nullptr;
899 if (req == REQ_BIND_ADDR) {
900 auto addr = reinterpret_cast<TCA*>(toSmash);
901 if (*addr == tDest) {
902 // Already smashed
903 return tDest;
905 sr->chainFrom(IncomingBranch::addr(addr));
906 } else if (req == REQ_BIND_JCC || req == REQ_BIND_SIDE_EXIT) {
907 auto jt = jccTarget(toSmash);
908 assert(jt);
909 if (jt == tDest) {
910 // Already smashed
911 return tDest;
913 sr->chainFrom(IncomingBranch::jccFrom(toSmash));
914 } else {
915 assert(!jccTarget(toSmash));
916 if (!jmpTarget(toSmash) || jmpTarget(toSmash) == tDest) {
917 // Already smashed
918 return tDest;
920 sr->chainFrom(IncomingBranch::jmpFrom(toSmash));
922 smashed = true;
923 return tDest;
927 * When we end a tracelet with a conditional jump, emitCondJmp first emits:
929 * 1: j<CC> stubJmpccFirst
930 * jmp stubJmpccFirst
932 * Our "taken" argument tells us whether the branch at 1: was taken or
933 * not; and therefore which of offTaken and offNotTaken to continue executing.
934 * If we did take the branch, we now rewrite the code so that the branch is
935 * straightened. This predicts that subsequent executions will go the same way
936 * as the first execution.
938 * jn<CC> stubJmpccSecond:offNotTaken
939 * nop5 ; fallthru, or jmp if there's already a translation.
940 * offTaken:
942 * If we did not take the branch, we leave the sense of the condition
943 * intact, while patching it up to go to the unexplored code:
945 * j<CC> stubJmpccSecond:offTaken
946 * nop5
947 * offNotTaken:
950 MCGenerator::bindJmpccFirst(TCA toSmash,
951 Offset offTaken, Offset offNotTaken,
952 bool taken,
953 ConditionCode cc,
954 bool& smashed) {
955 const Func* f = liveFunc();
956 LeaseHolder writer(Translator::WriteLease());
957 if (!writer) return nullptr;
958 Offset offWillExplore = taken ? offTaken : offNotTaken;
959 Offset offWillDefer = taken ? offNotTaken : offTaken;
960 SrcKey dest(f, offWillExplore, liveResumed());
961 TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
962 "taken %d\n",
963 offWillExplore, offWillDefer, cc, taken);
965 // We want the branch to point to whichever side has not been explored
966 // yet.
967 if (taken) {
968 cc = ccNegate(cc);
971 auto& cb = code.blockFor(toSmash);
972 Asm as { cb };
973 // Its not clear where chainFrom should go to if as is astubs
974 assert(&cb != &code.stubs());
976 using namespace X64;
978 // can we just directly fall through?
979 // a jmp + jz takes 5 + 6 = 11 bytes
980 bool fallThru = toSmash + kJmpccLen + kJmpLen == cb.frontier() &&
981 !m_tx.getSrcDB().find(dest);
983 TCA tDest;
984 tDest = getTranslation(TranslArgs(dest, !fallThru));
985 if (!tDest) {
986 return 0;
989 if (jmpTarget(toSmash + kJmpccLen) != jccTarget(toSmash)) {
990 // someone else already smashed this one. Ideally we would
991 // just re-execute from toSmash - except the flags will have
992 // been trashed.
993 return tDest;
996 TCA stub = emitEphemeralServiceReq(code.stubs(), getFreeStub(),
997 REQ_BIND_JMPCC_SECOND, toSmash,
998 offWillDefer, cc);
1000 smashed = true;
1001 assert(Translator::WriteLease().amOwner());
1003 * Roll over the jcc and the jmp/fallthru. E.g., from:
1005 * toSmash: jcc <jmpccFirstStub>
1006 * toSmash+6: jmp <jmpccFirstStub>
1007 * toSmash+11: <probably the new translation == tdest>
1009 * to:
1011 * toSmash: j[n]z <jmpccSecondStub>
1012 * toSmash+6: nop5
1013 * toSmash+11: newHotness
1015 CodeCursor cg(cb, toSmash);
1016 as.jcc(cc, stub);
1017 m_tx.getSrcRec(dest)->chainFrom(IncomingBranch::jmpFrom(cb.frontier()));
1018 TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
1019 return tDest;
1022 // smashes a jcc to point to a new destination
1024 MCGenerator::bindJmpccSecond(TCA toSmash, const Offset off,
1025 ConditionCode cc, bool& smashed) {
1026 const Func* f = liveFunc();
1027 SrcKey dest(f, off, liveResumed());
1028 TCA branch = getTranslation(TranslArgs(dest, true));
1029 if (branch) {
1030 LeaseHolder writer(Translator::WriteLease());
1031 if (writer) {
1032 if (branch == jccTarget(toSmash)) {
1033 // already smashed
1034 return branch;
1035 } else {
1036 smashed = true;
1037 SrcRec* destRec = m_tx.getSrcRec(dest);
1038 destRec->chainFrom(IncomingBranch::jccFrom(toSmash));
1042 return branch;
1045 void MCGenerator::emitResolvedDeps(const ChangeMap& resolvedDeps) {
1046 for (const auto dep : resolvedDeps) {
1047 m_tx.irTrans()->assertType(dep.first, dep.second->rtt);
1051 void
1052 MCGenerator::checkRefs(SrcKey sk,
1053 const RefDeps& refDeps,
1054 SrcRec& fail) {
1055 if (refDeps.size() == 0) {
1056 return;
1059 // Set up guards for each pushed ActRec that we've made reffiness
1060 // assumptions about
1061 for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
1062 it != refDeps.m_arMap.end(); ++it) {
1063 // Be careful! The actual Func might have fewer refs than the number
1064 // of args we're passing. To forestall this, we always prepare at
1065 // least 64 bits in the Func, and always fill out the refBitVec
1066 // to a multiple of 64 bits
1068 int entryArDelta = it->first;
1070 m_tx.irTrans()->hhbcTrans().guardRefs(entryArDelta,
1071 it->second.m_mask,
1072 it->second.m_vals);
1076 namespace {
1077 class FreeRequestStubTrigger {
1078 TCA m_stub;
1079 public:
1080 explicit FreeRequestStubTrigger(TCA stub) : m_stub(stub) {
1081 TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub);
1083 void operator()() {
1084 TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub);
1085 if (mcg->freeRequestStub(m_stub) != true) {
1086 // If we can't free the stub, enqueue again to retry.
1087 TRACE(3, "FreeStubTrigger: write lease failed, requeueing %p\n", m_stub);
1088 Treadmill::enqueue(FreeRequestStubTrigger(m_stub));
1094 #ifdef DEBUG
1096 struct DepthGuard {
1097 static __thread int m_depth;
1098 DepthGuard() { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
1099 ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
1101 bool depthOne() const { return m_depth == 1; }
1103 __thread int DepthGuard::m_depth;
1105 #else
1107 struct DepthGuard { bool depthOne() const { return false; } };
1109 #endif
1112 * enterTCHelper does not save callee-saved registers except %rbp. This means
1113 * when we call it from C++, we have to tell gcc to clobber all the other
1114 * callee-saved registers.
1116 #if defined(__x86_64__)
1117 # define CALLEE_SAVED_BARRIER() \
1118 asm volatile("" : : : "rbx", "r12", "r13", "r14", "r15")
1119 #elif defined(__AARCH64EL__)
1120 # define CALLEE_SAVED_BARRIER() \
1121 asm volatile("" : : : "x19", "x20", "x21", "x22", "x23", "x24", "x25", \
1122 "x26", "x27", "x28")
1123 #else
1124 # error What are the callee-saved registers on your system?
1125 #endif
1128 * enterTCHelper is a handwritten assembly function that transfers control in
1129 * and out of the TC.
1131 static_assert(X64::rVmSp == rbx &&
1132 X64::rVmFp == rbp &&
1133 X64::rVmTl == r12 &&
1134 X64::rStashedAR == r15,
1135 "__enterTCHelper needs to be modified to use the correct ABI");
1136 static_assert(REQ_BIND_CALL == 0x1,
1137 "Update assembly test for REQ_BIND_CALL in __enterTCHelper");
1138 extern "C" void enterTCHelper(Cell* vm_sp,
1139 Cell* vm_fp,
1140 TCA start,
1141 TReqInfo* infoPtr,
1142 ActRec* firstAR,
1143 void* targetCacheBase);
1146 * A partial equivalent of enterTCHelper, used to set up the ARM simulator.
1148 uintptr_t setupSimRegsAndStack(vixl::Simulator& sim,
1149 uintptr_t saved_rStashedAr) {
1150 sim. set_xreg(ARM::rGContextReg.code(), g_context.getNoCheck());
1151 sim. set_xreg(ARM::rVmFp.code(), vmfp());
1152 sim. set_xreg(ARM::rVmSp.code(), vmsp());
1153 sim. set_xreg(ARM::rVmTl.code(), RDS::tl_base);
1154 sim. set_xreg(ARM::rStashedAR.code(), saved_rStashedAr);
1156 // Leave space for register spilling and MInstrState.
1157 sim. set_sp(sim.sp() - kReservedRSPTotalSpace);
1158 assert(sim.is_on_stack(reinterpret_cast<void*>(sim.sp())));
1160 auto spOnEntry = sim.sp();
1162 // Push the link register onto the stack. The link register is
1163 // technically caller-saved; what this means in practice is that
1164 // non-leaf functions push it at the very beginning and pop it just
1165 // before returning (as opposed to just saving it around calls).
1166 sim. set_sp(sim.sp() - 16);
1167 *reinterpret_cast<uint64_t*>(sim.sp()) = sim.lr();
1169 return spOnEntry;
1173 struct TReqInfo {
1174 uintptr_t requestNum;
1175 uintptr_t args[5];
1177 // Some TC registers need to be preserved across service requests.
1178 uintptr_t saved_rStashedAr;
1180 // Stub addresses are passed back to allow us to recycle used stubs.
1181 TCA stubAddr;
1185 void
1186 MCGenerator::enterTC(TCA start, void* data) {
1187 if (debug) {
1188 fflush(stdout);
1189 fflush(stderr);
1191 DepthGuard d;
1192 TReqInfo info;
1193 SrcKey sk;
1195 if (LIKELY(start != nullptr)) {
1196 info.requestNum = data ? REQ_BIND_CALL : -1;
1197 info.saved_rStashedAr = (uintptr_t)data;
1198 } else {
1199 info.requestNum = -1;
1200 info.saved_rStashedAr = 0;
1201 sk = *(SrcKey*)data;
1202 start = getTranslation(TranslArgs(sk, true));
1204 for (;;) {
1205 assert(sizeof(Cell) == 16);
1206 assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
1207 assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
1209 Translator::WriteLease().gremlinUnlock();
1210 // Keep dispatching until we end up somewhere the translator
1211 // recognizes, or we luck out and the leaseholder exits.
1212 while (!start) {
1213 TRACE(2, "enterTC forwarding BB to interpreter\n");
1214 g_context->m_pc = sk.unit()->at(sk.offset());
1215 INC_TPC(interp_bb);
1216 g_context->dispatchBB();
1217 PC newPc = g_context->getPC();
1218 if (!newPc) { g_context->m_fp = 0; return; }
1219 sk = SrcKey(liveFunc(), newPc, liveResumed());
1220 start = getTranslation(TranslArgs(sk, true));
1222 assert(start == m_tx.uniqueStubs.funcBodyHelperThunk ||
1223 isValidCodeAddress(start) ||
1224 (start == m_tx.uniqueStubs.fcallHelperThunk &&
1225 info.saved_rStashedAr == (uintptr_t)data));
1226 assert(!Translator::WriteLease().amOwner());
1227 const Func* func = (vmfp() ? (ActRec*)vmfp() : (ActRec*)data)->m_func;
1228 func->validate();
1229 INC_TPC(enter_tc);
1231 TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
1232 vmfp(), func->name()->data(), vmsp());
1233 tl_regState = VMRegState::DIRTY;
1235 if (Trace::moduleEnabledRelease(Trace::ringbuffer, 1)) {
1236 auto skData = sk.valid() ? sk.toAtomicInt() : uint64_t(-1LL);
1237 Trace::ringbufferEntry(RBTypeEnterTC, skData, (uint64_t)start);
1240 switch (arch()) {
1241 case Arch::X64: {
1242 // We have to force C++ to spill anything that might be in a
1243 // callee-saved register (aside from rbp). enterTCHelper does not save
1244 // them.
1245 CALLEE_SAVED_BARRIER();
1246 enterTCHelper(vmsp(), vmfp(), start, &info, vmFirstAR(),
1247 RDS::tl_base);
1248 CALLEE_SAVED_BARRIER();
1249 break;
1251 case Arch::ARM: {
1252 // This is a pseudo-copy of the logic in enterTCHelper: it sets up the
1253 // simulator's registers and stack, runs the translation, and gets the
1254 // necessary information out of the registers when it's done.
1256 vixl::PrintDisassembler disasm(std::cout);
1257 vixl::Decoder decoder;
1258 if (getenv("ARM_DISASM")) {
1259 decoder.AppendVisitor(&disasm);
1261 vixl::Simulator sim(&decoder, std::cout);
1262 SCOPE_EXIT {
1263 Stats::inc(Stats::vixl_SimulatedInstr, sim.instr_count());
1264 Stats::inc(Stats::vixl_SimulatedLoad, sim.load_count());
1265 Stats::inc(Stats::vixl_SimulatedStore, sim.store_count());
1268 sim.set_exception_hook(ARM::simulatorExceptionHook);
1270 g_context->m_activeSims.push_back(&sim);
1271 SCOPE_EXIT { g_context->m_activeSims.pop_back(); };
1273 DEBUG_ONLY auto spOnEntry =
1274 setupSimRegsAndStack(sim, info.saved_rStashedAr);
1276 // The handshake is different in the case of REQ_BIND_CALL. The code
1277 // we're jumping to expects to find a return address in x30, and a saved
1278 // return address on the stack.
1279 if (info.requestNum == REQ_BIND_CALL) {
1280 // Put the call's return address in the link register.
1281 auto* ar = reinterpret_cast<ActRec*>(info.saved_rStashedAr);
1282 sim.set_lr(ar->m_savedRip);
1285 std::cout.flush();
1286 sim.RunFrom(vixl::Instruction::Cast(start));
1287 std::cout.flush();
1289 assert(sim.sp() == spOnEntry);
1291 info.requestNum = sim.xreg(0);
1292 info.args[0] = sim.xreg(1);
1293 info.args[1] = sim.xreg(2);
1294 info.args[2] = sim.xreg(3);
1295 info.args[3] = sim.xreg(4);
1296 info.args[4] = sim.xreg(5);
1297 info.saved_rStashedAr = sim.xreg(ARM::rStashedAR.code());
1299 info.stubAddr = reinterpret_cast<TCA>(sim.xreg(ARM::rAsm.code()));
1300 break;
1304 assert(g_context->m_stack.isValidAddress((uintptr_t)vmsp()));
1306 tl_regState = VMRegState::CLEAN; // Careful: pc isn't sync'ed yet.
1307 TRACE(1, "enterTC: %p fp%p sp%p } return\n", start,
1308 vmfp(), vmsp());
1310 if (debug) {
1311 // Debugging code: cede the write lease half the time.
1312 if (RuntimeOption::EvalJitStressLease) {
1313 if (d.depthOne() && (rand() % 2) == 0) {
1314 Translator::WriteLease().gremlinLock();
1317 // Ensure that each case either returns, or drives start to a valid
1318 // value.
1319 start = TCA(0xbee5face);
1322 TRACE(2, "enterTC: request(%s) args: %" PRIxPTR " %" PRIxPTR " %"
1323 PRIxPTR " %" PRIxPTR " %" PRIxPTR "\n",
1324 serviceReqName(info.requestNum),
1325 info.args[0], info.args[1], info.args[2], info.args[3],
1326 info.args[4]);
1328 if (LIKELY(info.requestNum == REQ_EXIT)) {
1329 vmfp() = nullptr;
1330 return;
1332 if (!handleServiceRequest(info, start, sk)) return;
1337 * The contract is that each case will set sk to the place where
1338 * execution should resume, and optionally set start to the hardware
1339 * translation of the resumption point (or otherwise set it to null).
1340 * Returns false if we need to halt this nesting of the VM.
1342 * start and sk might be subtly different; i.e., there are cases where
1343 * start != NULL && start != getTranslation(sk). For instance,
1344 * REQ_BIND_CALL has not finished executing the OpCall when it gets
1345 * here, and has even done some work on its behalf. sk == OpFCall,
1346 * while start == the point in the TC that's "half-way through" the
1347 * Call instruction. If we punt to the interpreter, the interpreter
1348 * will redo some of the work that the translator has already done.
1350 bool MCGenerator::handleServiceRequest(TReqInfo& info,
1351 TCA& start,
1352 SrcKey& sk) {
1353 const ServiceRequest requestNum =
1354 static_cast<ServiceRequest>(info.requestNum);
1355 auto* const args = info.args;
1356 assert(requestNum != REQ_EXIT);
1357 INC_TPC(service_req);
1359 bool smashed = false;
1360 switch (requestNum) {
1361 case REQ_BIND_CALL: {
1362 ReqBindCall* req = reinterpret_cast<ReqBindCall*>(args[0]);
1363 ActRec* calleeFrame = reinterpret_cast<ActRec*>(args[1]);
1364 TCA toSmash = req->m_toSmash;
1365 Func *func = const_cast<Func*>(calleeFrame->m_func);
1366 int nArgs = req->m_nArgs;
1367 bool isImmutable = req->m_isImmutable;
1368 TRACE(2, "enterTC: bindCall %s, ActRec %p\n",
1369 func->fullName()->data(), calleeFrame);
1370 TCA dest = getFuncPrologue(func, nArgs);
1371 TRACE(2, "enterTC: bindCall -> %p\n", dest);
1372 if (!isImmutable) {
1373 // We dont know we're calling the right function, so adjust
1374 // dest to point to the dynamic check of ar->m_func.
1375 dest = funcPrologueToGuard(dest, func);
1376 } else {
1377 TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
1378 func->fullName()->data(), dest);
1380 if (dest) {
1381 LeaseHolder writer(Translator::WriteLease());
1382 if (writer) {
1383 // Someone else may have changed the func prologue while we
1384 // waited for the write lease, so read it again.
1385 dest = getFuncPrologue(func, nArgs);
1386 assert(dest);
1387 if (!isImmutable) dest = funcPrologueToGuard(dest, func);
1389 if (callTarget(toSmash) != dest) {
1390 TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
1391 smashCall(toSmash, dest);
1392 smashed = true;
1393 // For functions to be PGO'ed, if their current prologues
1394 // are still profiling ones (living in code.prof()), then
1395 // save toSmash as a caller to the prologue, so that it can
1396 // later be smashed to call a new prologue when it's generated.
1397 int calleeNumParams = func->numNonVariadicParams();
1398 int calledPrologNumArgs = (nArgs <= calleeNumParams ?
1399 nArgs : calleeNumParams + 1);
1400 if (code.prof().contains(dest)) {
1401 if (isImmutable) {
1402 m_tx.profData()->addPrologueMainCaller(func, calledPrologNumArgs,
1403 toSmash);
1404 } else {
1405 m_tx.profData()->addPrologueGuardCaller(func, calledPrologNumArgs,
1406 toSmash);
1411 // sk: stale, but doesn't matter since we have a valid dest TCA.
1412 } else {
1413 // We need translator help; we're not at the callee yet, so
1414 // roll back. The prelude has done some work already, but it
1415 // should be safe to redo.
1416 TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
1417 toSmash, dest);
1418 sk = req->m_sourceInstr;
1420 // EnterTCHelper pushes the return ip onto the stack when the
1421 // requestNum is REQ_BIND_CALL, but if start is NULL, it will
1422 // interpret in doFCall, so we clear out the requestNum in this
1423 // case to prevent enterTCHelper from pushing the return ip
1424 // onto the stack.
1425 info.requestNum = ~REQ_BIND_CALL;
1427 start = dest;
1428 } break;
1430 case REQ_BIND_SIDE_EXIT:
1431 case REQ_BIND_JMP:
1432 case REQ_BIND_JCC:
1433 case REQ_BIND_ADDR:
1435 TCA toSmash = (TCA)args[0];
1436 Offset off = args[1];
1437 sk = SrcKey(liveFunc(), off, liveResumed());
1438 if (requestNum == REQ_BIND_SIDE_EXIT) {
1439 SKTRACE(3, sk, "side exit taken!\n");
1441 start = bindJmp(toSmash, sk, requestNum, smashed);
1442 } break;
1444 case REQ_BIND_JMPCC_FIRST: {
1445 TCA toSmash = (TCA)args[0];
1446 Offset offTaken = (Offset)args[1];
1447 Offset offNotTaken = (Offset)args[2];
1448 ConditionCode cc = ConditionCode(args[3]);
1449 bool taken = int64_t(args[4]) & 1;
1450 start = bindJmpccFirst(toSmash, offTaken, offNotTaken,
1451 taken, cc, smashed);
1452 // SrcKey: we basically need to emulate the fail
1453 sk = SrcKey(liveFunc(), taken ? offTaken : offNotTaken, liveResumed());
1454 } break;
1456 case REQ_BIND_JMPCC_SECOND: {
1457 TCA toSmash = (TCA)args[0];
1458 Offset off = (Offset)args[1];
1459 ConditionCode cc = ConditionCode(args[2]);
1460 start = bindJmpccSecond(toSmash, off, cc, smashed);
1461 sk = SrcKey(liveFunc(), off, liveResumed());
1462 } break;
1464 case REQ_RETRANSLATE_OPT: {
1465 auto ai = (SrcKey::AtomicInt)args[0];
1466 TransID transId = (TransID)args[1];
1467 sk = SrcKey::fromAtomicInt(ai);
1468 start = retranslateOpt(transId, false);
1469 SKTRACE(2, sk, "retranslated-OPT: transId = %d start: @%p\n", transId,
1470 start);
1471 break;
1474 case REQ_RETRANSLATE: {
1475 INC_TPC(retranslate);
1476 sk = SrcKey(liveFunc(), (Offset)args[0], liveResumed());
1477 start = retranslate(TranslArgs(sk, true));
1478 SKTRACE(2, sk, "retranslated @%p\n", start);
1479 } break;
1481 case REQ_INTERPRET: {
1482 Offset off = args[0];
1483 int numInstrs = args[1];
1484 g_context->m_pc = liveUnit()->at(off);
1486 * We know the compilation unit has not changed; basic blocks do
1487 * not span files. I claim even exceptions do not violate this
1488 * axiom.
1490 assert(numInstrs >= 0);
1491 SKTRACE(5, SrcKey(liveFunc(), off, liveResumed()), "interp: enter\n");
1492 if (numInstrs) {
1493 s_perfCounters[tpc_interp_instr] += numInstrs;
1494 g_context->dispatchN(numInstrs);
1495 } else {
1496 // numInstrs == 0 means it wants to dispatch until BB ends
1497 INC_TPC(interp_bb);
1498 g_context->dispatchBB();
1500 PC newPc = g_context->getPC();
1501 if (!newPc) { g_context->m_fp = 0; return false; }
1502 SrcKey newSk(liveFunc(), newPc, liveResumed());
1503 SKTRACE(5, newSk, "interp: exit\n");
1504 sk = newSk;
1505 start = getTranslation(TranslArgs(newSk, true));
1506 } break;
1508 case REQ_POST_INTERP_RET: {
1509 // This is only responsible for the control-flow aspect of the Ret:
1510 // getting to the destination's translation, if any.
1511 ActRec* ar = (ActRec*)args[0];
1512 ActRec* caller = (ActRec*)args[1];
1513 assert((Cell*) caller == vmfp());
1514 Unit* destUnit = caller->m_func->unit();
1515 // Set PC so logging code in getTranslation doesn't get confused.
1516 vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
1517 SrcKey dest(caller->func(), vmpc(), caller->resumed());
1518 sk = dest;
1519 start = getTranslation(TranslArgs(dest, true));
1520 TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
1521 ar->m_func->fullName()->data(),
1522 caller->m_func->fullName()->data());
1523 } break;
1525 case REQ_RESUME: {
1526 if (UNLIKELY(vmpc() == 0)) {
1527 g_context->m_fp = 0;
1528 return false;
1530 SrcKey dest(liveFunc(), vmpc(), liveResumed());
1531 sk = dest;
1532 start = getTranslation(TranslArgs(dest, true));
1533 } break;
1535 case REQ_STACK_OVERFLOW:
1536 if (((ActRec*)info.saved_rStashedAr)->m_savedRbp == (uintptr_t)vmfp()) {
1538 * The normal case - we were called via FCall, or FCallArray.
1539 * We need to construct the pc of the fcall from the return
1540 * address (which will be after the fcall). Because fcall is
1541 * a variable length instruction, and because we sometimes
1542 * delete instructions from the instruction stream, we
1543 * need to use fpi regions to find the fcall.
1545 const FPIEnt* fe = liveFunc()->findPrecedingFPI(
1546 liveUnit()->offsetOf(vmpc()));
1547 vmpc() = liveUnit()->at(fe->m_fcallOff);
1548 assert(isFCallStar(*reinterpret_cast<const Op*>(vmpc())));
1549 raise_error("Stack overflow");
1550 NOT_REACHED();
1551 } else {
1553 * We were called via re-entry
1554 * Leak the params and the actrec, and tell the unwinder
1555 * that there's nothing left to do in this "entry".
1557 vmsp() = (Cell*)((ActRec*)info.saved_rStashedAr + 1);
1558 throw VMReenterStackOverflow();
1561 case REQ_EXIT:
1562 not_reached();
1565 if (smashed && info.stubAddr) {
1566 Treadmill::enqueue(FreeRequestStubTrigger(info.stubAddr));
1569 return true;
1573 * Support for the stub freelist.
1575 TCA FreeStubList::maybePop() {
1576 StubNode* ret = m_list;
1577 if (ret) {
1578 TRACE(1, "alloc stub %p\n", ret);
1579 m_list = ret->m_next;
1580 ret->m_freed = ~kStubFree;
1582 return (TCA)ret;
1585 void FreeStubList::push(TCA stub) {
1587 * A freed stub may be released by Treadmill more than once if multiple
1588 * threads execute the service request before it is freed. We detect
1589 * duplicates by marking freed stubs
1591 StubNode* n = (StubNode *)stub;
1592 if (n->m_freed == kStubFree) {
1593 TRACE(1, "already freed stub %p\n", stub);
1594 return;
1596 n->m_freed = kStubFree;
1597 n->m_next = m_list;
1598 TRACE(1, "free stub %p (-> %p)\n", stub, m_list);
1599 m_list = n;
1602 bool
1603 MCGenerator::freeRequestStub(TCA stub) {
1604 LeaseHolder writer(Translator::WriteLease());
1606 * If we can't acquire the write lock, the caller
1607 * (FreeRequestStubTrigger) retries
1609 if (!writer) return false;
1610 assert(code.stubs().contains(stub));
1611 m_freeStubs.push(stub);
1612 return true;
1615 TCA MCGenerator::getFreeStub() {
1616 TCA ret = m_freeStubs.maybePop();
1617 if (ret) {
1618 Stats::inc(Stats::Astubs_Reused);
1619 assert(m_freeStubs.m_list == nullptr ||
1620 code.stubs().contains(TCA(m_freeStubs.m_list)));
1621 TRACE(1, "recycle stub %p\n", ret);
1622 } else {
1623 ret = code.stubs().frontier();
1624 Stats::inc(Stats::Astubs_New);
1625 TRACE(1, "alloc new stub %p\n", ret);
1627 return ret;
1630 #define O(opcode, imm, pusph, pop, flags) \
1632 * The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
1633 * calls into the interpreter, and then return a pointer to the
1634 * current ExecutionContext.
1635 */ \
1636 ExecutionContext* \
1637 interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) { \
1638 interp_set_regs(ar, sp, pcOff); \
1639 SKTRACE(5, SrcKey(liveFunc(), vmpc(), liveResumed()), "%40s %p %p\n", \
1640 "interpOne" #opcode " before (fp,sp)", \
1641 vmfp(), vmsp()); \
1642 assert(*reinterpret_cast<const Op*>(vmpc()) == Op::opcode); \
1643 auto const ec = g_context.getNoCheck(); \
1644 Stats::inc(Stats::Instr_InterpOne ## opcode); \
1645 if (Trace::moduleEnabled(Trace::interpOne, 1)) { \
1646 static const StringData* cat = makeStaticString("interpOne"); \
1647 static const StringData* name = makeStaticString(#opcode); \
1648 Stats::incStatGrouped(cat, name, 1); \
1650 INC_TPC(interp_one) \
1651 /* Correct for over-counting in TC-stats. */ \
1652 Stats::inc(Stats::Instr_TC, -1); \
1653 ec->op##opcode(); \
1655 * Only set regstate back to dirty if an exception is not
1656 * propagating. If an exception is throwing, regstate for this call
1657 * is actually still correct, and we don't have information in the
1658 * fixup map for interpOne calls anyway.
1659 */ \
1660 tl_regState = VMRegState::DIRTY; \
1661 return ec; \
1664 OPCODES
1665 #undef O
1667 void* interpOneEntryPoints[] = {
1668 #define O(opcode, imm, pusph, pop, flags) \
1669 (void*)(interpOne ## opcode),
1670 OPCODES
1671 #undef O
1674 TCA MCGenerator::getTranslatedCaller() const {
1675 DECLARE_FRAME_POINTER(fp);
1676 ActRec* framePtr = fp; // can't directly mutate the register-mapped one
1677 for (; framePtr; framePtr = (ActRec*)framePtr->m_savedRbp) {
1678 TCA rip = (TCA)framePtr->m_savedRip;
1679 if (isValidCodeAddress(rip)) {
1680 return rip;
1683 return nullptr;
1686 void
1687 MCGenerator::syncWork() {
1688 assert(tl_regState == VMRegState::DIRTY);
1689 m_fixupMap.fixup(g_context.getNoCheck());
1690 tl_regState = VMRegState::CLEAN;
1691 Stats::inc(Stats::TC_Sync);
1695 MCGenerator::emitNativeTrampoline(TCA helperAddr) {
1696 auto& trampolines = code.trampolines();
1697 if (!trampolines.canEmit(kExpectedPerTrampolineSize)) {
1698 // not enough space to emit a trampoline, so just return the
1699 // helper address and emitCall will the emit the right sequence
1700 // to call it indirectly
1701 TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
1702 always_assert(false);
1703 return helperAddr;
1706 uint32_t index = m_numNativeTrampolines++;
1707 TCA trampAddr = trampolines.frontier();
1708 if (Stats::enabled()) {
1709 emitIncStat(trampolines, &Stats::tl_helper_counters[0], index);
1710 auto name = getNativeFunctionName(helperAddr);
1711 const size_t limit = 50;
1712 if (name.size() > limit) {
1713 name[limit] = '\0';
1716 // The duped string lives until process death intentionally.
1717 Stats::helperNames[index].store(strdup(name.c_str()),
1718 std::memory_order_release);
1721 Asm a { trampolines };
1722 a. jmp (helperAddr);
1723 a. ud2 ();
1725 m_trampolineMap[helperAddr] = trampAddr;
1726 recordBCInstr(OpNativeTrampoline, trampolines, trampAddr);
1727 if (RuntimeOption::EvalJitUseVtuneAPI) {
1728 reportTrampolineToVtune(trampAddr, trampolines.frontier() - trampAddr);
1731 return trampAddr;
1735 MCGenerator::getNativeTrampoline(TCA helperAddr) {
1736 if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
1737 return helperAddr;
1739 auto const trampAddr = (TCA)folly::get_default(m_trampolineMap, helperAddr);
1740 if (trampAddr) {
1741 return trampAddr;
1743 return emitNativeTrampoline(helperAddr);
1746 bool
1747 MCGenerator::reachedTranslationLimit(SrcKey sk,
1748 const SrcRec& srcRec) const {
1749 if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) {
1750 INC_TPC(max_trans);
1751 if (debug && Trace::moduleEnabled(Trace::mcg, 2)) {
1752 const auto& tns = srcRec.translations();
1753 TRACE(1, "Too many (%zd) translations: %s, BC offset %d\n",
1754 tns.size(), sk.unit()->filepath()->data(),
1755 sk.offset());
1756 SKTRACE(2, sk, "{\n");
1757 TCA topTrans = srcRec.getTopTranslation();
1758 for (size_t i = 0; i < tns.size(); ++i) {
1759 const TransRec* rec = m_tx.getTransRec(tns[i]);
1760 assert(rec);
1761 SKTRACE(2, sk, "%zd %p\n", i, tns[i]);
1762 if (tns[i] == topTrans) {
1763 SKTRACE(2, sk, "%zd: *Top*\n", i);
1765 if (rec->kind == TransAnchor) {
1766 SKTRACE(2, sk, "%zd: Anchor\n", i);
1767 } else {
1768 SKTRACE(2, sk, "%zd: guards {\n", i);
1769 for (unsigned j = 0; j < rec->dependencies.size(); ++j) {
1770 TRACE(2, rec->dependencies[j]);
1772 SKTRACE(2, sk, "%zd } guards\n", i);
1775 SKTRACE(2, sk, "} /* Too many translations */\n");
1777 return true;
1780 return false;
1783 void
1784 MCGenerator::emitGuardChecks(SrcKey sk,
1785 const ChangeMap& dependencies,
1786 const RefDeps& refDeps,
1787 SrcRec& fail) {
1788 if (Trace::moduleEnabled(Trace::stats, 2)) {
1789 emitIncStat(code.main(), Stats::TraceletGuard_enter);
1792 m_tx.irTrans()->hhbcTrans().emitRB(RBTypeTraceletGuards, sk);
1793 bool checkOuterTypeOnly = m_tx.mode() != TransProfile;
1794 for (auto const& dep : dependencies) {
1795 m_tx.irTrans()->checkType(dep.first, dep.second->rtt, checkOuterTypeOnly);
1798 checkRefs(sk, refDeps, fail);
1800 if (Trace::moduleEnabled(Trace::stats, 2)) {
1801 emitIncStat(code.main(), Stats::TraceletGuard_execute);
1806 void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
1807 if (!debug) return;
1809 SrcKey sk = t.m_sk;
1810 DEBUG_ONLY auto unit = sk.unit();
1812 TRACE(3, "----------------------------------------------\n");
1813 TRACE(3, " Translating from file %s:%d %s at %p:\n",
1814 unit->filepath()->data(),
1815 unit->getLineNumber(sk.offset()),
1816 sk.func()->name()->data(),
1817 postGuards);
1818 TRACE(3, " preconds:\n");
1819 TRACE(3, " types:\n");
1820 for (DepMap::const_iterator i = t.m_dependencies.begin();
1821 i != t.m_dependencies.end(); ++i) {
1822 TRACE(3, " %-5s\n", i->second->pretty().c_str());
1824 if (t.m_refDeps.size() != 0) {
1825 TRACE(3, " refs:\n");
1826 for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
1827 i != t.m_refDeps.m_arMap.end();
1828 ++i) {
1829 TRACE(3, " (ActRec %" PRId64 " : %-5s)\n", i->first,
1830 i->second.pretty().c_str());
1833 TRACE(3, " postconds:\n");
1834 for (ChangeMap::const_iterator i = t.m_changes.begin();
1835 i != t.m_changes.end(); ++i) {
1836 TRACE(3, " %-5s\n", i->second->pretty().c_str());
1838 for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
1839 TRACE(3, " %6d: %s\n", ni->source.offset(),
1840 instrToString((Op*)ni->pc()).c_str());
1841 if (ni->breaksTracelet) break;
1843 TRACE(3, "----------------------------------------------\n");
1844 if (Trace::moduleEnabled(Trace::mcg, 5)) {
1845 // prettyStack() expects to use vmpc(). Leave it in the state we
1846 // found it since this code is debug-only, and we don't want behavior
1847 // to vary across the optimized/debug builds.
1848 PC oldPC = vmpc();
1849 vmpc() = unit->at(sk.offset());
1850 TRACE(3, g_context->prettyStack(std::string(" mcg ")));
1851 vmpc() = oldPC;
1852 TRACE(3, "----------------------------------------------\n");
1856 void
1857 MCGenerator::translateWork(const TranslArgs& args) {
1858 Timer _t(Timer::translate);
1859 auto sk = args.m_sk;
1860 std::unique_ptr<Tracelet> tp;
1862 SKTRACE(1, sk, "translateWork\n");
1863 assert(m_tx.getSrcDB().find(sk));
1865 TCA start = code.main().frontier();
1866 TCA stubStart = code.stubs().frontier();
1867 SrcRec& srcRec = *m_tx.getSrcRec(sk);
1868 TransKind transKind = TransInterp;
1869 UndoMarker undoA(code.main());
1870 UndoMarker undoAstubs(code.stubs());
1871 UndoMarker undoGlobalData(code.data());
1873 auto resetState = [&] {
1874 undoA.undo();
1875 undoAstubs.undo();
1876 undoGlobalData.undo();
1877 m_fixupMap.clearPendingFixups();
1878 m_pendingCatchTraces.clear();
1879 m_bcMap.clear();
1880 srcRec.clearInProgressTailJumps();
1883 auto assertCleanState = [&] {
1884 assert(code.main().frontier() == start);
1885 assert(code.stubs().frontier() == stubStart);
1886 assert(m_fixupMap.pendingFixupsEmpty());
1887 assert(m_pendingCatchTraces.empty());
1888 assert(m_bcMap.empty());
1889 assert(srcRec.inProgressTailJumps().empty());
1892 PostConditions pconds;
1893 RegionDescPtr region;
1894 if (!args.m_interp && !reachedTranslationLimit(sk, srcRec)) {
1895 // Attempt to create a region at this SrcKey
1896 if (m_tx.mode() == TransOptimize) {
1897 assert(RuntimeOption::EvalJitPGO);
1898 region = args.m_region;
1899 if (region) {
1900 assert(region->blocks.size() > 0);
1901 } else {
1902 TransID transId = args.m_transId;
1903 assert(transId != InvalidID);
1904 region = selectHotRegion(transId, this);
1905 assert(region);
1906 if (region && region->blocks.size() == 0) region = nullptr;
1908 } else {
1909 assert(m_tx.mode() == TransProfile || m_tx.mode() == TransLive);
1910 tp = m_tx.analyze(sk);
1911 // TODO(#4150507): use sk.resumed() instead of liveResumed()?
1912 RegionContext rContext { sk.func(), sk.offset(), liveSpOff(),
1913 liveResumed() };
1914 FTRACE(2, "populating live context for region\n");
1915 populateLiveContext(rContext);
1916 region = selectRegion(rContext, tp.get(), m_tx.mode());
1918 if (RuntimeOption::EvalJitCompareRegions &&
1919 RuntimeOption::EvalJitRegionSelector == "tracelet") {
1920 // Re-analyze with guard relaxation on
1921 OPTION_GUARD(EvalHHBCRelaxGuards, 1);
1922 OPTION_GUARD(EvalHHIRRelaxGuards, 0);
1923 auto legacyRegion = selectTraceletLegacy(rContext.spOffset,
1924 *m_tx.analyze(sk));
1925 if (!region) {
1926 Trace::ftraceRelease("{:-^60}\nCouldn't select tracelet region "
1927 "for:\n{}", "", show(*legacyRegion));
1928 } else {
1929 diffRegions(*region, *legacyRegion);
1934 Translator::TranslateResult result = Translator::Retry;
1935 Translator::RegionBlacklist regionInterps;
1936 Offset initSpOffset = region ? region->blocks[0]->initialSpOffset()
1937 : liveSpOff();
1938 bool bcControlFlow = RuntimeOption::EvalHHIRBytecodeControlFlow;
1940 while (result == Translator::Retry) {
1941 // TODO(#4150507): use sk.resumed() instead of liveResumed()?
1942 m_tx.traceStart(sk.offset(), initSpOffset, liveResumed(), sk.func());
1944 // Try translating a region if we have one, then fall back to using the
1945 // Tracelet.
1946 if (region) {
1947 try {
1948 assertCleanState();
1949 result = m_tx.translateRegion(*region, bcControlFlow, regionInterps);
1951 // If we're profiling, grab the postconditions so we can
1952 // use them in region selection whenever we decide to retranslate.
1953 if (m_tx.mode() == TransProfile && result == Translator::Success &&
1954 RuntimeOption::EvalJitPGOUsePostConditions) {
1955 pconds = m_tx.irTrans()->hhbcTrans().irBuilder().getKnownTypes();
1958 FTRACE(2, "translateRegion finished with result {}\n",
1959 Translator::translateResultName(result));
1960 } catch (ControlFlowFailedExc& cfe) {
1961 FTRACE(2, "translateRegion with control flow failed: '{}'\n",
1962 cfe.what());
1963 always_assert(bcControlFlow &&
1964 "control flow translation failed, but control flow not enabled");
1965 bcControlFlow = false;
1966 result = Translator::Retry;
1967 } catch (const std::exception& e) {
1968 FTRACE(1, "translateRegion failed with '{}'\n", e.what());
1969 result = Translator::Failure;
1971 if (result == Translator::Failure) {
1972 m_tx.traceFree();
1973 // TODO(#4150507): use sk.resumed() instead of liveResumed()?
1974 m_tx.traceStart(sk.offset(), liveSpOff(), liveResumed(), sk.func());
1975 resetState();
1978 if (!region || result == Translator::Failure) {
1979 // If the region translator failed for an Optimize
1980 // translation, it's OK to do a Live translation for the
1981 // function entry. We lazily create the tracelet here in this
1982 // case.
1983 if (m_tx.mode() == TransOptimize) {
1984 if (sk.getFuncId() == liveFunc()->getFuncId() &&
1985 liveUnit()->contains(vmpc()) &&
1986 sk.offset() == liveUnit()->offsetOf(vmpc())) {
1987 m_tx.setMode(TransLive);
1988 tp = m_tx.analyze(sk);
1989 } else {
1990 m_tx.setMode(TransInterp);
1991 m_tx.traceFree();
1992 break;
1995 FTRACE(1, "trying translateTracelet\n");
1996 assertCleanState();
1997 result = translateTracelet(*tp);
1999 // If we're profiling, grab the postconditions so we can
2000 // use them in region selection whenever we decide to
2001 // retranslate.
2002 if (m_tx.mode() == TransProfile && result == Translator::Success &&
2003 RuntimeOption::EvalJitPGOUsePostConditions) {
2004 pconds = m_tx.irTrans()->hhbcTrans().irBuilder().getKnownTypes();
2008 if (result != Translator::Success) {
2009 // Translation failed. Free resources for this trace, rollback the
2010 // translation cache frontiers, and discard any pending fixups.
2011 resetState();
2013 m_tx.traceFree();
2016 if (result == Translator::Success) {
2017 assert(m_tx.mode() == TransLive ||
2018 m_tx.mode() == TransProfile ||
2019 m_tx.mode() == TransOptimize);
2020 transKind = m_tx.mode();
2024 if (args.m_dryRun) {
2025 resetState();
2026 return;
2029 if (transKind == TransInterp) {
2030 assertCleanState();
2031 auto interpOps = tp ? tp->m_numOpcodes : 1;
2032 FTRACE(1, "emitting {}-instr interp request for failed translation\n",
2033 interpOps);
2034 switch (arch()) {
2035 case Arch::X64: {
2036 Asm a { code.main() };
2037 // Add a counter for the translation if requested
2038 if (RuntimeOption::EvalJitTransCounters) {
2039 X64::emitTransCounterInc(a);
2041 a. jmp(emitServiceReq(code.stubs(), REQ_INTERPRET,
2042 sk.offset(), interpOps));
2043 break;
2045 case Arch::ARM: {
2046 if (RuntimeOption::EvalJitTransCounters) {
2047 vixl::MacroAssembler a { code.main() };
2048 ARM::emitTransCounterInc(a);
2050 // This jump won't be smashed, but a far jump on ARM requires the same
2051 // code sequence.
2052 emitSmashableJump(
2053 code.main(),
2054 emitServiceReq(code.stubs(), REQ_INTERPRET,
2055 sk.offset(), interpOps),
2056 CC_None
2058 break;
2061 // Fall through.
2064 m_fixupMap.processPendingFixups();
2065 processPendingCatchTraces();
2067 TransRec tr(sk, sk.unit()->md5(), transKind, tp.get(), start,
2068 code.main().frontier() - start, stubStart,
2069 code.stubs().frontier() - stubStart,
2070 m_bcMap);
2071 m_tx.addTranslation(tr);
2072 if (RuntimeOption::EvalJitUseVtuneAPI) {
2073 reportTraceletToVtune(sk.unit(), sk.func(), tr);
2075 m_bcMap.clear();
2077 recordGdbTranslation(sk, sk.func(), code.main(), start,
2078 false, false);
2079 recordGdbTranslation(sk, sk.func(), code.stubs(), stubStart,
2080 false, false);
2081 if (RuntimeOption::EvalJitPGO) {
2082 if (transKind == TransProfile) {
2083 if (!region) {
2084 assert(tp);
2085 region = selectTraceletLegacy(liveSpOff(), *tp);
2087 m_tx.profData()->addTransProfile(region, pconds);
2088 } else {
2089 m_tx.profData()->addTransNonProf(transKind, sk);
2092 // SrcRec::newTranslation() makes this code reachable. Do this last;
2093 // otherwise there's some chance of hitting in the reader threads whose
2094 // metadata is not yet visible.
2095 TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n",
2096 start, sk.getFuncId(), sk.offset());
2097 srcRec.newTranslation(start);
2098 TRACE(1, "mcg: %zd-byte tracelet\n", code.main().frontier() - start);
2099 if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
2100 Trace::traceRelease("%s", getUsage().c_str());
2104 Translator::TranslateResult
2105 MCGenerator::translateTracelet(Tracelet& t) {
2106 if (RuntimeOption::EvalJitRegionSelector != "") {
2107 // In order to properly simulate a post-Tracelet world, refuse to translate
2108 // Tracelets when a region selector is active.
2109 return Translator::Failure;
2112 Timer _t(Timer::translateTracelet);
2114 FTRACE(2, "attempting to translate tracelet:\n{}\n", t.toString());
2115 assert(!Translator::liveFrameIsPseudoMain());
2116 const SrcKey &sk = t.m_sk;
2117 SrcRec& srcRec = *m_tx.getSrcRec(sk);
2118 HhbcTranslator& ht = m_tx.irTrans()->hhbcTrans();
2119 bool profilingFunc = false;
2121 assert(srcRec.inProgressTailJumps().size() == 0);
2122 try {
2123 emitResolvedDeps(t.m_resolvedDeps);
2125 emitGuardChecks(sk, t.m_dependencies, t.m_refDeps, srcRec);
2127 dumpTranslationInfo(t, code.main().frontier());
2129 // after guards, add a counter for the translation if requested
2130 if (RuntimeOption::EvalJitTransCounters) {
2131 ht.emitIncTransCounter();
2134 if (m_tx.mode() == TransProfile) {
2135 if (t.func()->isEntry(sk.offset())) {
2136 ht.emitCheckCold(m_tx.profData()->curTransID());
2137 profilingFunc = true;
2138 } else {
2139 ht.emitIncProfCounter(m_tx.profData()->curTransID());
2143 ht.emitRB(RBTypeTraceletBody, t.m_sk);
2144 emitIncStat(code.main(), Stats::Instr_TC, t.m_numOpcodes);
2147 // Profiling on function entry.
2148 if (t.m_sk.offset() == t.func()->base()) {
2149 ht.profileFunctionEntry("Normal");
2153 * Profiling on the shapes of tracelets that are whole functions.
2154 * (These are the things we might consider trying to support
2155 * inlining.)
2157 [&]{
2158 static const bool enabled = Stats::enabledAny() &&
2159 getenv("HHVM_STATS_FUNCSHAPE");
2160 if (!enabled) return;
2161 if (t.m_sk.offset() != t.func()->base()) return;
2162 if (auto last = t.m_instrStream.last) {
2163 if (last->op() != OpRetC && last->op() != OpRetV &&
2164 last->op() != OpCreateCont && last->op() != OpAsyncSuspend) {
2165 return;
2168 ht.profileSmallFunctionShape(traceletShape(t));
2169 }();
2171 Timer irGenTimer(Timer::translateTracelet_irGeneration);
2172 Unit::MetaHandle metaHand;
2173 // Translate each instruction in the tracelet
2174 for (auto* ni = t.m_instrStream.first; ni && !ht.hasExit();
2175 ni = ni->next) {
2176 ht.setBcOff(ni->source.offset(),
2177 ni->breaksTracelet && !ht.isInlining());
2178 readMetaData(metaHand, *ni, m_tx.irTrans()->hhbcTrans(),
2179 m_tx.mode() == TransProfile, MetaMode::Legacy);
2181 try {
2182 SKTRACE(1, ni->source, "HHIR: translateInstr\n");
2183 assert(!(m_tx.mode() ==
2184 TransProfile && ni->outputPredicted && ni->next));
2185 m_tx.irTrans()->translateInstr(*ni);
2186 } catch (FailedIRGen& fcg) {
2187 always_assert(!ni->interp);
2188 ni->interp = true;
2189 FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
2190 "after failing to generate ir: {} \n\n",
2191 m_tx.getCurrentTransID(), ni->toString(), fcg.what());
2192 return Translator::Retry;
2194 assert(ni->source.offset() >= t.func()->base());
2195 // We sometimes leave the tail of a truncated tracelet in place to aid
2196 // analysis, but breaksTracelet is authoritative.
2197 if (ni->breaksTracelet || m_tx.irTrans()->hhbcTrans().hasExit()) break;
2199 m_tx.traceEnd();
2200 irGenTimer.end();
2202 try {
2203 traceCodeGen();
2204 TRACE(1, "HHIR: SUCCEEDED to generate code for Translation %d\n\n\n",
2205 m_tx.getCurrentTransID());
2206 if (profilingFunc) m_tx.profData()->setProfiling(t.func()->getFuncId());
2207 return Translator::Success;
2208 } catch (FailedCodeGen& fcg) {
2209 // Code-gen failed. Search for the bytecode instruction that caused the
2210 // problem, flag it to be interpreted, and retranslate the tracelet.
2211 SrcKey sk{fcg.vmFunc, fcg.bcOff, fcg.resumed};
2213 for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
2214 if (ni->source == sk) {
2215 always_assert_log(
2216 !ni->interp,
2217 [&] {
2218 std::ostringstream oss;
2219 oss << folly::format("code generation failed with {}\n",
2220 fcg.what());
2221 print(oss, m_tx.irTrans()->hhbcTrans().unit());
2222 return oss.str();
2225 ni->interp = true;
2226 FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
2227 "after failing to code-gen \n\n",
2228 m_tx.getCurrentTransID(), ni->toString(), fcg.what());
2229 return Translator::Retry;
2232 throw fcg;
2234 } catch (FailedCodeGen& fcg) {
2235 TRACE(1, "HHIR: FAILED to generate code for Translation %d "
2236 "@ %s:%d (%s)\n", m_tx.getCurrentTransID(),
2237 fcg.file, fcg.line, fcg.func);
2238 // HHIR:TODO Remove extra TRACE and adjust tools
2239 TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
2240 fcg.file, fcg.line, fcg.func);
2241 } catch (FailedIRGen& x) {
2242 TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
2243 x.file, x.line, x.func);
2244 } catch (const FailedAssertion& fa) {
2245 fa.print();
2246 StackTraceNoHeap::AddExtraLogging(
2247 "Assertion failure",
2248 folly::format("{}\n\nActive Unit:\n{}\n",
2249 fa.summary, ht.unit().toString()).str());
2250 abort();
2251 } catch (const FailedTraceGen& e) {
2252 FTRACE(1, "HHIR: FAILED to translate whole unit: {}\n",
2253 e.what());
2255 return Translator::Failure;
2258 void MCGenerator::traceCodeGen() {
2259 HhbcTranslator& ht = m_tx.irTrans()->hhbcTrans();
2260 auto& unit = ht.unit();
2262 auto finishPass = [&](const char* msg, int level) {
2263 dumpTrace(level, unit, msg, nullptr, nullptr, ht.irBuilder().guards());
2264 assert(checkCfg(unit));
2267 finishPass(" after initial translation ", kIRLevel);
2269 optimize(unit, ht.irBuilder(), m_tx.mode());
2270 finishPass(" after optimizing ", kOptLevel);
2272 auto regs = allocateRegs(unit);
2273 assert(checkRegisters(unit, regs)); // calls checkCfg internally.
2275 recordBCInstr(OpTraceletGuard, code.main(), code.main().frontier());
2276 genCode(code.main(), code.stubs(), unit, &m_bcMap, this, regs);
2278 m_numHHIRTrans++;
2281 MCGenerator::MCGenerator()
2282 : m_numNativeTrampolines(0)
2283 , m_numHHIRTrans(0)
2284 , m_catchTraceMap(128)
2286 TRACE(1, "MCGenerator@%p startup\n", this);
2287 mcg = this;
2289 m_unwindRegistrar = register_unwind_region(code.base(), code.codeSize());
2291 static bool profileUp = false;
2292 if (!profileUp) {
2293 profileInit();
2294 profileUp = true;
2297 if (Trace::moduleEnabledRelease(Trace::printir) &&
2298 !RuntimeOption::EvalJit) {
2299 Trace::traceRelease("TRACE=printir is set but the jit isn't on. "
2300 "Did you mean to run with -vEval.Jit=1?\n");
2304 void MCGenerator::initUniqueStubs() {
2305 // Put the following stubs into ahot, rather than a.
2306 CodeCache::Selector asmSel(CodeCache::Selector::Args(code).hot(true));
2307 switch (arch()) {
2308 case Arch::X64:
2309 m_tx.uniqueStubs = X64::emitUniqueStubs();
2310 break;
2311 case Arch::ARM:
2312 m_tx.uniqueStubs = ARM::emitUniqueStubs();
2313 break;
2317 void MCGenerator::registerCatchBlock(CTCA ip, TCA block) {
2318 FTRACE(1, "registerCatchBlock: afterCall: {} block: {}\n", ip, block);
2319 m_pendingCatchTraces.emplace_back(ip, block);
2322 void MCGenerator::processPendingCatchTraces() {
2323 for (auto const& pair : m_pendingCatchTraces) {
2324 m_catchTraceMap.insert(pair.first, pair.second);
2326 m_pendingCatchTraces.clear();
2329 folly::Optional<TCA> MCGenerator::getCatchTrace(CTCA ip) const {
2330 TCA* found = m_catchTraceMap.find(ip);
2331 if (found) return *found;
2332 return folly::none;
2335 void MCGenerator::requestInit() {
2336 tl_regState = VMRegState::CLEAN;
2337 Timer::RequestInit();
2338 PendQ::drain();
2339 m_tx.requestResetHighLevelTranslator();
2340 Treadmill::startRequest();
2341 memset(&s_perfCounters, 0, sizeof(s_perfCounters));
2342 Stats::init();
2345 void MCGenerator::requestExit() {
2346 if (Translator::WriteLease().amOwner()) {
2347 Translator::WriteLease().drop();
2349 TRACE_MOD(txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64
2350 " kept, %15" PRId64 " grabbed\n",
2351 Process::GetThreadIdForTrace(), Translator::WriteLease().m_hintKept,
2352 Translator::WriteLease().m_hintGrabbed);
2353 PendQ::drain();
2354 Treadmill::finishRequest();
2355 Stats::dump();
2356 Stats::clear();
2357 Timer::RequestExit();
2359 if (Trace::moduleEnabledRelease(Trace::mcgstats, 1)) {
2360 Trace::traceRelease("MCGenerator perf counters for %s:\n",
2361 g_context->getRequestUrl(50).c_str());
2362 for (int i = 0; i < tpc_num_counters; i++) {
2363 Trace::traceRelease("%-20s %10" PRId64 "\n",
2364 kPerfCounterNames[i], s_perfCounters[i]);
2366 Trace::traceRelease("\n");
2370 bool
2371 MCGenerator::isPseudoEvent(const char* event) {
2372 for (auto name : kPerfCounterNames) {
2373 if (!strcmp(event, name)) {
2374 return true;
2377 return false;
2380 void
2381 MCGenerator::getPerfCounters(Array& ret) {
2382 for (int i = 0; i < tpc_num_counters; i++) {
2383 // Until Perflab can automatically scale the values we give it to
2384 // an appropriate range, we have to fudge these numbers so they
2385 // look more like reasonable hardware counter values.
2386 ret.set(String::FromCStr(kPerfCounterNames[i]),
2387 s_perfCounters[i] * 1000);
2390 if (RuntimeOption::EnableInstructionCounts) {
2391 auto doCounts = [&](unsigned begin, const char* const name) {
2392 int64_t count = 0;
2393 for (; begin < Stats::Instr_InterpOneHighInvalid;
2394 begin += STATS_PER_OPCODE) {
2395 count += Stats::tl_counters[Stats::StatCounter(begin)];
2397 ret.set(String::FromCStr(name), count);
2400 doCounts(Stats::Instr_TranslLowInvalid + STATS_PER_OPCODE,
2401 kInstrCountMCGName);
2402 doCounts(Stats::Instr_TranslIRPostLowInvalid + STATS_PER_OPCODE,
2403 kInstrCountIRName);
2406 for (auto const& pair : Timer::Counters()) {
2407 if (pair.second.total == 0 && pair.second.count == 0) continue;
2409 ret.set(String("jit_time_") + pair.first, pair.second.total);
2413 MCGenerator::~MCGenerator() {
2416 static Debug::TCRange rangeFrom(const CodeBlock& cb, const TCA addr,
2417 bool isAstubs) {
2418 assert(cb.contains(addr));
2419 return Debug::TCRange(addr, cb.frontier(), isAstubs);
2422 void MCGenerator::recordBCInstr(uint32_t op,
2423 const CodeBlock& cb,
2424 const TCA addr) {
2425 if (addr != cb.frontier()) {
2426 m_debugInfo.recordBCInstr(Debug::TCRange(addr, cb.frontier(),
2427 &cb == &code.stubs()), op);
2431 void MCGenerator::recordGdbTranslation(SrcKey sk,
2432 const Func* srcFunc,
2433 const CodeBlock& cb,
2434 const TCA start,
2435 bool exit,
2436 bool inPrologue) {
2437 if (start != cb.frontier()) {
2438 assert(Translator::WriteLease().amOwner());
2439 if (!RuntimeOption::EvalJitNoGdb) {
2440 m_debugInfo.recordTracelet(rangeFrom(cb, start, &cb == &code.stubs()),
2441 srcFunc,
2442 reinterpret_cast<const Op*>(
2443 srcFunc->unit() ?
2444 srcFunc->unit()->at(sk.offset()) : nullptr
2446 exit, inPrologue);
2448 if (RuntimeOption::EvalPerfPidMap) {
2449 m_debugInfo.recordPerfMap(rangeFrom(cb, start, &cb == &code.stubs()),
2450 srcFunc, exit, inPrologue);
2455 void MCGenerator::recordGdbStub(const CodeBlock& cb,
2456 const TCA start, const char* name) {
2457 if (!RuntimeOption::EvalJitNoGdb) {
2458 m_debugInfo.recordStub(rangeFrom(cb, start, &cb == &code.stubs()),
2459 name);
2463 std::string MCGenerator::getUsage() {
2464 std::string usage;
2465 size_t totalBlockSize = 0;
2466 size_t totalBlockCapacity = 0;
2468 auto addRow = [&](const std::string& name, size_t used, size_t capacity) {
2469 totalBlockSize += used;
2470 totalBlockCapacity += capacity;
2471 auto percent = capacity ? 100 * used / capacity : 0;
2472 usage += folly::format("mcg: {:9} bytes ({}%) in {}\n",
2473 used, percent, name).str();
2475 code.forEachBlock([&](const char* name, const CodeBlock& a) {
2476 addRow(std::string("code.") + name, a.used(), a.capacity());
2478 addRow("data", code.data().used(), code.data().capacity());
2479 addRow("RDS", RDS::usedBytes(),
2480 RuntimeOption::EvalJitTargetCacheSize * 3 / 4);
2481 addRow("persistentRDS", RDS::usedPersistentBytes(),
2482 RuntimeOption::EvalJitTargetCacheSize / 4);
2483 addRow("total",
2484 totalBlockSize + code.data().used() +
2485 RDS::usedBytes() + RDS::usedPersistentBytes(),
2486 totalBlockCapacity + code.data().capacity() +
2487 RuntimeOption::EvalJitTargetCacheSize);
2489 return usage;
2492 std::string MCGenerator::getTCAddrs() {
2493 std::string addrs;
2494 code.forEachBlock([&](const char* name, const CodeBlock& a) {
2495 addrs += folly::format("{}: {}\n", name, a.base()).str();
2497 return addrs;
2500 bool MCGenerator::addDbgGuards(const Unit* unit) {
2501 // TODO refactor
2502 // It grabs the write lease and iterating through whole SrcDB...
2503 bool locked = Translator::WriteLease().acquire(true);
2504 if (!locked) {
2505 return false;
2507 struct timespec tsBegin, tsEnd;
2508 HPHP::Timer::GetMonotonicTime(tsBegin);
2509 // Doc says even find _could_ invalidate iterator, in pactice it should
2510 // be very rare, so go with it now.
2511 for (SrcDB::const_iterator it = m_tx.getSrcDB().begin();
2512 it != m_tx.getSrcDB().end(); ++it) {
2513 SrcKey const sk = SrcKey::fromAtomicInt(it->first);
2514 // We may have a SrcKey to a deleted function. NB: this may miss a
2515 // race with deleting a Func. See task #2826313.
2516 if (!Func::isFuncIdValid(sk.getFuncId())) continue;
2517 SrcRec* sr = it->second;
2518 if (sr->unitMd5() == unit->md5() &&
2519 !sr->hasDebuggerGuard() &&
2520 m_tx.isSrcKeyInBL(sk)) {
2521 addDbgGuardImpl(sk, sr);
2524 Translator::WriteLease().drop();
2525 HPHP::Timer::GetMonotonicTime(tsEnd);
2526 int64_t elapsed = gettime_diff_us(tsBegin, tsEnd);
2527 if (Trace::moduleEnabledRelease(Trace::mcg, 5)) {
2528 Trace::traceRelease("addDbgGuards got lease for %" PRId64 " us\n", elapsed);
2530 return true;
2533 bool MCGenerator::addDbgGuard(const Func* func, Offset offset, bool resumed) {
2534 SrcKey sk(func, offset, resumed);
2536 if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2537 if (sr->hasDebuggerGuard()) {
2538 return true;
2540 } else {
2541 // no translation yet
2542 return true;
2545 if (debug) {
2546 if (!m_tx.isSrcKeyInBL(sk)) {
2547 TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
2548 return false;
2551 bool locked = Translator::WriteLease().acquire(true);
2552 if (!locked) {
2553 return false;
2556 if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2557 addDbgGuardImpl(sk, sr);
2560 Translator::WriteLease().drop();
2561 return true;
2564 bool MCGenerator::dumpTCCode(const char* filename) {
2565 #define OPEN_FILE(F, SUFFIX) \
2566 std::string F ## name = std::string(filename).append(SUFFIX); \
2567 FILE* F = fopen(F ## name .c_str(),"wb"); \
2568 if (F == nullptr) return false; \
2569 SCOPE_EXIT{ fclose(F); };
2571 OPEN_FILE(aFile, "_a");
2572 OPEN_FILE(aprofFile, "_aprof");
2573 OPEN_FILE(astubFile, "_astub");
2574 OPEN_FILE(helperAddrFile, "_helpers_addrs.txt");
2576 #undef OPEN_FILE
2578 // dump starting from the trampolines; this assumes CodeCache places
2579 // trampolines before the translation cache
2580 size_t count = code.main().frontier() - code.trampolines().base();
2581 bool result = (fwrite(code.trampolines().base(), 1, count, aFile) == count);
2582 if (result) {
2583 count = code.prof().used();
2584 result = (fwrite(code.prof().base(), 1, count, aprofFile) == count);
2586 if (result) {
2587 count = code.stubs().used();
2588 result = (fwrite(code.stubs().base(), 1, count, astubFile) == count);
2590 if (result) {
2591 for (auto const& pair : m_trampolineMap) {
2592 void* helperAddr = pair.first;
2593 void* trampAddr = pair.second;
2594 auto functionName = getNativeFunctionName(helperAddr);
2595 fprintf(helperAddrFile,"%10p %10p %s\n",
2596 trampAddr, helperAddr,
2597 functionName.c_str());
2600 return result;
2603 // Returns true on success
2604 bool MCGenerator::dumpTC(bool ignoreLease) {
2605 if (!ignoreLease && !Translator::WriteLease().acquire(true)) return false;
2606 bool success = dumpTCData();
2607 if (success) {
2608 success = dumpTCCode("/tmp/tc_dump");
2610 if (!ignoreLease) Translator::WriteLease().drop();
2611 return success;
2614 // Returns true on success
2615 bool tc_dump(void) {
2616 return mcg && mcg->dumpTC();
2619 // Returns true on success
2620 bool MCGenerator::dumpTCData() {
2621 gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
2622 if (!tcDataFile) return false;
2624 if (!gzprintf(tcDataFile,
2625 "repo_schema = %s\n"
2626 "a.base = %p\n"
2627 "a.frontier = %p\n"
2628 "aprof.base = %p\n"
2629 "aprof.frontier = %p\n"
2630 "astubs.base = %p\n"
2631 "astubs.frontier = %p\n\n",
2632 kRepoSchemaId,
2633 code.trampolines().base(), code.main().frontier(),
2634 code.prof().base(), code.prof().frontier(),
2635 code.stubs().base(), code.stubs().frontier())) {
2636 return false;
2639 if (!gzprintf(tcDataFile, "total_translations = %zu\n\n",
2640 m_tx.getCurrentTransID())) {
2641 return false;
2644 for (TransID t = 0; t < m_tx.getCurrentTransID(); t++) {
2645 if (gzputs(tcDataFile,
2646 m_tx.getTransRec(t)->print(m_tx.getTransCounter(t)).c_str()) ==
2647 -1) {
2648 return false;
2652 gzclose(tcDataFile);
2653 return true;
2656 void MCGenerator::invalidateSrcKey(SrcKey sk) {
2657 assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO);
2658 assert(Translator::WriteLease().amOwner());
2660 * Reroute existing translations for SrcKey to an as-yet indeterminate
2661 * new one.
2663 SrcRec* sr = m_tx.getSrcDB().find(sk);
2664 assert(sr);
2666 * Since previous translations aren't reachable from here, we know we
2667 * just created some garbage in the TC. We currently have no mechanism
2668 * to reclaim this.
2670 sr->replaceOldTranslations();
2673 void MCGenerator::setJmpTransID(TCA jmp) {
2674 if (m_tx.mode() != TransProfile) return;
2676 TransID transId = m_tx.profData()->curTransID();
2677 FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp, transId);
2678 m_jmpToTransID[jmp] = transId;
2681 void
2682 emitIncStat(CodeBlock& cb, uint64_t* tl_table, uint index, int n, bool force) {
2683 if (!force && !Stats::enabled()) return;
2684 intptr_t disp = uintptr_t(&tl_table[index]) - tlsBase();
2686 if (arch() == Arch::X64) {
2687 X64Assembler a { cb };
2689 a. pushf ();
2690 // addq $n, [%fs:disp]
2691 a. fs().addq(n, baseless(disp));
2692 a. popf ();
2693 } else if (arch() == Arch::ARM) {
2694 using ARM::rAsm;
2695 using ARM::rAsm2;
2696 vixl::MacroAssembler a { cb };
2698 a. Mrs (rAsm2, vixl::TPIDR_EL0);
2699 a. Ldr (rAsm, rAsm2[disp]);
2700 a. Add (rAsm, rAsm, n);
2701 a. Str (rAsm, rAsm2[disp]);
2702 } else {
2703 not_implemented();
2707 } // HPHP::JIT
2709 } // HPHP