Rename runtime/base/zend_* to zend-
[hiphop-php.git] / hphp / runtime / vm / jit / translator-x64.cpp
blobacc41e570d5925e423349bb69ff23f2b5c3bb7a0
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/translator-x64.h"
18 #include <cinttypes>
19 #include <stdint.h>
20 #include <assert.h>
21 #include <unistd.h>
22 #include <sys/mman.h>
23 #include <strstream>
24 #include <stdio.h>
25 #include <stdarg.h>
26 #include <string>
27 #include <queue>
28 #include <unwind.h>
29 #include <unordered_set>
30 #include <signal.h>
31 #ifdef __FreeBSD__
32 #include <sys/ucontext.h>
33 #endif
35 #ifdef __FreeBSD__
36 #define RIP_REGISTER(v) (v).mc_rip
37 #elif defined(__APPLE__)
38 #define RIP_REGISTER(v) (v)->__ss.__rip
39 #elif defined(__x86_64__)
40 #define RIP_REGISTER(v) (v).gregs[REG_RIP]
41 #elif defined(__AARCH64EL__)
42 #define RIP_REGISTER(v) (v).pc
43 #else
44 #error How is rip accessed on this architecture?
45 #endif
47 #include <boost/bind.hpp>
48 #include <boost/optional.hpp>
49 #include <boost/utility/typed_in_place_factory.hpp>
50 #include <boost/range/adaptors.hpp>
51 #include <boost/scoped_ptr.hpp>
53 #include "folly/Format.h"
55 #include "hphp/util/asm-x64.h"
56 #include "hphp/util/bitops.h"
57 #include "hphp/util/debug.h"
58 #include "hphp/util/disasm.h"
59 #include "hphp/util/maphuge.h"
60 #include "hphp/util/rank.h"
61 #include "hphp/util/ringbuffer.h"
62 #include "hphp/util/timer.h"
63 #include "hphp/util/trace.h"
64 #include "hphp/util/meta.h"
65 #include "hphp/util/process.h"
66 #include "hphp/util/util.h"
67 #include "hphp/util/repo_schema.h"
68 #include "hphp/util/cycles.h"
70 #include "hphp/runtime/vm/bytecode.h"
71 #include "hphp/runtime/vm/php-debug.h"
72 #include "hphp/runtime/vm/runtime.h"
73 #include "hphp/runtime/base/complex_types.h"
74 #include "hphp/runtime/base/execution_context.h"
75 #include "hphp/runtime/base/runtime_option.h"
76 #include "hphp/runtime/base/strings.h"
77 #include "hphp/runtime/base/strings.h"
78 #include "hphp/runtime/server/source_root_info.h"
79 #include "hphp/runtime/base/zend-string.h"
80 #include "hphp/runtime/ext/ext_closure.h"
81 #include "hphp/runtime/ext/ext_continuation.h"
82 #include "hphp/runtime/ext/ext_function.h"
83 #include "hphp/runtime/vm/debug/debug.h"
84 #include "hphp/runtime/base/stats.h"
85 #include "hphp/runtime/vm/pendq.h"
86 #include "hphp/runtime/vm/treadmill.h"
87 #include "hphp/runtime/vm/repo.h"
88 #include "hphp/runtime/vm/type-profile.h"
89 #include "hphp/runtime/vm/member-operations.h"
90 #include "hphp/runtime/vm/jit/abi-x64.h"
91 #include "hphp/runtime/vm/jit/check.h"
92 #include "hphp/runtime/vm/jit/code-gen.h"
93 #include "hphp/runtime/vm/jit/hhbc-translator.h"
94 #include "hphp/runtime/vm/jit/ir-translator.h"
95 #include "hphp/runtime/vm/jit/normalized-instruction.h"
96 #include "hphp/runtime/vm/jit/opt.h"
97 #include "hphp/runtime/vm/jit/print.h"
98 #include "hphp/runtime/vm/jit/region-selection.h"
99 #include "hphp/runtime/vm/jit/srcdb.h"
100 #include "hphp/runtime/vm/jit/target-cache.h"
101 #include "hphp/runtime/vm/jit/tracelet.h"
102 #include "hphp/runtime/vm/jit/translator-inline.h"
103 #include "hphp/runtime/vm/jit/unwind-x64.h"
104 #include "hphp/runtime/vm/jit/x64-util.h"
106 #include "hphp/runtime/vm/jit/translator-x64-internal.h"
108 namespace HPHP {
109 namespace Transl {
111 using namespace reg;
112 using namespace Util;
113 using namespace Trace;
114 using std::max;
116 #define TRANS_PERF_COUNTERS \
117 TPC(translate) \
118 TPC(retranslate) \
119 TPC(interp_bb) \
120 TPC(interp_instr) \
121 TPC(interp_one) \
122 TPC(max_trans) \
123 TPC(enter_tc) \
124 TPC(service_req)
126 static const char* const kInstrCountTx64Name = "instr_tx64";
127 static const char* const kInstrCountIRName = "instr_hhir";
129 #define TPC(n) "trans_" #n,
130 static const char* const kPerfCounterNames[] = {
131 TRANS_PERF_COUNTERS
132 kInstrCountTx64Name,
133 kInstrCountIRName,
135 #undef TPC
137 #define TPC(n) tpc_ ## n,
138 enum TransPerfCounter {
139 TRANS_PERF_COUNTERS
140 tpc_num_counters
142 #undef TPC
143 static __thread int64_t s_perfCounters[tpc_num_counters];
144 #define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
146 // nextTx64: Global shared state. The tx64 that should be used for
147 // new requests going forward.
148 TranslatorX64* volatile nextTx64;
149 // tx64: Thread-local state. The tx64 we're using for the current request.
150 __thread TranslatorX64* tx64;
152 // Register dirtiness: thread-private.
153 __thread VMRegState tl_regState = VMRegState::CLEAN;
155 static StaticString s___call(LITSTR_INIT("__call"));
156 static StaticString s___callStatic(LITSTR_INIT("__callStatic"));
158 // Initialize at most this many locals inline in function body prologue; more
159 // than this, and emitting a loop is more compact. To be precise, the actual
160 // crossover point in terms of code size is 6; 9 was determined by experiment to
161 // be the optimal point in certain benchmarks. #microoptimization
162 static const int kLocalsToInitializeInline = 9;
164 // An intentionally funny-looking-in-core-dumps constant for uninitialized
165 // instruction pointers.
166 static const uint64_t kUninitializedRIP = 0xba5eba11acc01ade;
168 // stubBlock --
169 // Used to emit a bunch of outlined code that is unconditionally jumped to.
170 template <typename L>
171 void stubBlock(X64Assembler& hot, X64Assembler& cold, const L& body) {
172 hot. jmp(cold.frontier());
173 guardDiamond(cold, body);
174 cold. jmp(hot.frontier());
177 static bool
178 typeCanBeStatic(DataType t) {
179 return t != KindOfObject && t != KindOfResource && t != KindOfRef;
182 // IfCountNotStatic --
183 // Emits if (%reg->_count != RefCountStaticValue) { ... }.
184 // May short-circuit this check if the type is known to be
185 // static already.
186 struct IfCountNotStatic {
187 typedef CondBlock<FAST_REFCOUNT_OFFSET,
188 RefCountStaticValue,
189 CC_Z,
190 field_type(RefData, m_count)> NonStaticCondBlock;
191 NonStaticCondBlock *m_cb; // might be null
192 IfCountNotStatic(X64Assembler& a,
193 PhysReg reg,
194 DataType t = KindOfInvalid) {
195 // Objects and variants cannot be static
196 if (typeCanBeStatic(t)) {
197 m_cb = new NonStaticCondBlock(a, reg);
198 } else {
199 m_cb = nullptr;
203 ~IfCountNotStatic() {
204 delete m_cb;
208 // Segfault handler: figure out if it's an intentional segfault
209 // (timeout exception) and if so, act appropriately. Otherwise, pass
210 // the signal on.
211 void TranslatorX64::SEGVHandler(int signum, siginfo_t *info, void *ctx) {
212 TranslatorX64 *self = Get();
213 void *surprisePage =
214 ThreadInfo::s_threadInfo->m_reqInjectionData.surprisePage;
215 if (info->si_addr == surprisePage) {
216 ucontext_t *ucontext = (ucontext_t*)ctx;
217 TCA rip = (TCA)RIP_REGISTER(ucontext->uc_mcontext);
218 SignalStubMap::const_accessor a;
219 if (!self->m_segvStubs.find(a, rip)) {
220 NOT_REACHED();
222 TCA astubsCall = a->second;
224 // When this handler returns, "call" the astubs code for this
225 // surprise check.
226 RIP_REGISTER(ucontext->uc_mcontext) = (uintptr_t)astubsCall;
228 // We've processed this event; reset the page in case execution
229 // continues normally.
230 g_vmContext->m_stack.unprotect();
231 } else {
232 sig_t handler = (sig_t)self->m_segvChain;
233 if (handler == SIG_DFL || handler == SIG_IGN) {
234 signal(signum, handler);
235 raise(signum);
236 } else {
237 self->m_segvChain(signum, info, ctx);
242 // Logical register move: ensures the value in src will be in dest
243 // after execution, but might do so in strange ways. Do not count on
244 // being able to smash dest to a different register in the future, e.g.
245 void
246 emitMovRegReg(X64Assembler& a, PhysReg src, PhysReg dest) {
247 SpaceRecorder("_RegMove", a);
248 if (src != dest) {
249 a. movq (src, dest);
253 void
254 emitLea(X64Assembler& a, PhysReg base, int disp, PhysReg dest) {
255 if (!disp) {
256 emitMovRegReg(a, base, dest);
257 return;
259 a. lea (base[disp], dest);
262 static void UNUSED tc_debug_print(const char* message,
263 uintptr_t r1,
264 uintptr_t r2,
265 uintptr_t r3,
266 ActRec* fp) {
267 TRACE(1, "*********************** %s: %p %p %p (for : %s)\n",
268 message, (void*)r1, (void*)r2, (void*)r3,
269 fp->m_func ? fp->m_func->fullName()->data() : "[?]");
272 // Utility for debugging translations that will print a message,
273 // followed by the value of up to three registers.
274 void TranslatorX64::emitDebugPrint(Asm& a,
275 const char* message,
276 PhysReg r1,
277 PhysReg r2,
278 PhysReg r3) {
279 boost::optional<PhysRegSaver> aSaver;
280 boost::optional<PhysRegSaverStub> astubsSaver;
282 if (&a == &this->a) {
283 aSaver = boost::in_place<PhysRegSaver>(boost::ref(a), kAllX64Regs);
284 } else {
285 astubsSaver = boost::in_place<PhysRegSaverStub>(boost::ref(a),
286 kAllX64Regs);
289 a. mov_imm64_reg (uintptr_t(message), argNumToRegName[0]);
290 a. mov_reg64_reg64(r1, argNumToRegName[1]);
291 a. mov_reg64_reg64(r2, argNumToRegName[2]);
292 a. mov_reg64_reg64(r3, argNumToRegName[3]);
293 a. mov_reg64_reg64(rVmFp, argNumToRegName[4]);
294 a. call((TCA)tc_debug_print);
297 void
298 TranslatorX64::emitRB(X64Assembler& a,
299 Trace::RingBufferType t,
300 SrcKey sk, RegSet toSave) {
301 if (!Trace::moduleEnabledRelease(Trace::tx64, 3)) {
302 return;
304 PhysRegSaver rs(a, toSave | kSpecialCrossTraceRegs);
305 int arg = 0;
306 a. emitImmReg(t, argNumToRegName[arg++]);
307 a. emitImmReg(sk.getFuncId(), argNumToRegName[arg++]);
308 a. emitImmReg(sk.offset(), argNumToRegName[arg++]);
309 a. call((TCA)ringbufferEntry);
312 void
313 TranslatorX64::emitRB(X64Assembler& a,
314 Trace::RingBufferType t,
315 const char* msg,
316 RegSet toSave) {
317 if (!Trace::moduleEnabledRelease(Trace::tx64, 3)) {
318 return;
320 PhysRegSaver save(a, toSave | kSpecialCrossTraceRegs);
321 int arg = 0;
322 a. emitImmReg((uintptr_t)msg, argNumToRegName[arg++]);
323 a. emitImmReg(strlen(msg), argNumToRegName[arg++]);
324 a. emitImmReg(t, argNumToRegName[arg++]);
325 a. call((TCA)ringbufferMsg);
328 void
329 TranslatorX64::emitCall(X64Assembler& a, TCA dest) {
330 if (a.jmpDeltaFits(dest) && !Stats::enabled()) {
331 a. call(dest);
332 } else {
333 a. call(getNativeTrampoline(dest));
337 void
338 TranslatorX64::emitCall(X64Assembler& a, CppCall call) {
339 if (call.isDirect()) {
340 return emitCall(a, (TCA)call.getAddress());
342 // Virtual call.
343 // Load method's address from proper offset off of object in rdi,
344 // using rax as scratch.
345 a.loadq(*rdi, rax);
346 a.call(rax[call.getOffset()]);
349 static void emitGetGContext(X64Assembler& a, PhysReg dest) {
350 emitTLSLoad<ExecutionContext>(a, g_context, dest);
353 void
354 TranslatorX64::emitEagerSyncPoint(X64Assembler& a, const Opcode* pc,
355 const Offset spDiff) {
356 static COff spOff = offsetof(VMExecutionContext, m_stack) +
357 Stack::topOfStackOffset();
358 static COff fpOff = offsetof(VMExecutionContext, m_fp);
359 static COff pcOff = offsetof(VMExecutionContext, m_pc);
361 /* we can't use rAsm because the pc store uses it as a
362 temporary */
363 Reg64 rEC = reg::rdi;
365 a. push(rEC);
366 emitGetGContext(a, rEC);
367 a. storeq(rVmFp, rEC[fpOff]);
368 if (spDiff) {
369 a. lea(rVmSp[spDiff], rAsm);
370 a. storeq(rAsm, rEC[spOff]);
371 } else {
372 a. storeq(rVmSp, rEC[spOff]);
374 a. storeq(pc, rEC[pcOff]);
375 a. pop(rEC);
378 void
379 TranslatorX64::recordSyncPoint(X64Assembler& a, Offset pcOff, Offset spOff) {
380 m_pendingFixups.push_back(PendingFixup(a.frontier(), Fixup(pcOff, spOff)));
383 void
384 TranslatorX64::recordIndirectFixup(CTCA addr, int dwordsPushed) {
385 m_fixupMap.recordIndirectFixup(
386 a.frontier(), IndirectFixup((2 + dwordsPushed) * 8));
389 void
390 TranslatorX64::emitIncRef(PhysReg base, DataType dtype) {
391 emitIncRef(a, base, dtype);
394 void
395 TranslatorX64::emitIncRef(X64Assembler &a, PhysReg base, DataType dtype) {
396 if (!IS_REFCOUNTED_TYPE(dtype) && dtype != KindOfInvalid) {
397 return;
399 SpaceRecorder sr("_IncRef", a);
400 static_assert(sizeof(RefCount) == sizeof(int32_t), "");
401 { // if !static then
402 IfCountNotStatic ins(a, base, dtype);
404 * The optimization guide cautions against using inc; while it is
405 * compact, it only writes the low-order 8 bits of eflags, causing a
406 * partial dependency for any downstream flags-dependent code.
408 a. incl(base[FAST_REFCOUNT_OFFSET]);
409 } // endif
412 void
413 TranslatorX64::emitIncRefGenericRegSafe(PhysReg base,
414 int disp,
415 PhysReg tmpReg) {
416 { // if RC
417 IfRefCounted irc(a, base, disp);
418 a. load_reg64_disp_reg64(base, disp + TVOFF(m_data),
419 tmpReg);
420 { // if !static
421 IfCountNotStatic ins(a, tmpReg);
422 a. incl(tmpReg[FAST_REFCOUNT_OFFSET]);
423 } // endif
424 } // endif
427 // emitEagerVMRegSave --
428 // Inline. Saves regs in-place in the TC. This is an unusual need;
429 // you probably want to lazily save these regs via recordCall and
430 // its ilk.
432 // SaveFP uses rVmFp, as usual. SavePC requires the caller to have
433 // placed the PC offset of the instruction about to be executed in
434 // rdi.
435 enum RegSaveFlags {
436 SaveFP = 1,
437 SavePC = 2
440 static TCA
441 emitEagerVMRegSave(X64Assembler& a,
442 int flags /* :: RegSaveFlags */) {
443 TCA start = a.frontier();
444 bool saveFP = bool(flags & SaveFP);
445 bool savePC = bool(flags & SavePC);
446 assert((flags & ~(SavePC | SaveFP)) == 0);
448 Reg64 pcReg = rdi;
449 PhysReg rEC = rAsm;
450 assert(!kSpecialCrossTraceRegs.contains(rdi));
452 emitGetGContext(a, rEC);
454 static COff spOff = offsetof(VMExecutionContext, m_stack) +
455 Stack::topOfStackOffset();
456 static COff fpOff = offsetof(VMExecutionContext, m_fp) - spOff;
457 static COff pcOff = offsetof(VMExecutionContext, m_pc) - spOff;
459 assert(spOff != 0);
460 a. addq (spOff, r64(rEC));
461 a. storeq (rVmSp, *rEC);
462 if (savePC) {
463 // We're going to temporarily abuse rVmSp to hold the current unit.
464 Reg64 rBC = rVmSp;
465 a. push (rBC);
466 // m_fp -> m_func -> m_unit -> m_bc + pcReg
467 a. loadq (rVmFp[AROFF(m_func)], rBC);
468 a. loadq (rBC[Func::unitOff()], rBC);
469 a. loadq (rBC[Unit::bcOff()], rBC);
470 a. addq (rBC, pcReg);
471 a. storeq (pcReg, rEC[pcOff]);
472 a. pop (rBC);
474 if (saveFP) {
475 a. storeq (rVmFp, rEC[fpOff]);
477 return start;
480 CppCall TranslatorX64::getDtorCall(DataType type) {
481 switch (type) {
482 case BitwiseKindOfString:
483 return CppCall(getMethodPtr(&StringData::release));
484 case KindOfArray:
485 return CppCall(getMethodPtr(&ArrayData::release));
486 case KindOfObject:
487 return CppCall(getMethodPtr(&ObjectData::release));
488 case KindOfResource:
489 return CppCall(getMethodPtr(&ResourceData::release));
490 case KindOfRef:
491 return CppCall(getMethodPtr(&RefData::release));
492 default:
493 assert(false);
494 NOT_REACHED();
499 * callDestructor/jumpDestructor --
501 * Emit a call or jump to the appropriate destructor for a dynamically
502 * typed value.
504 * No registers are saved; most translated code should be using
505 * emitDecRefGeneric{Reg,} instead of this.
507 * Inputs:
509 * - typeReg is destroyed and may not be argNumToRegName[0].
510 * - argNumToRegName[0] should contain the m_data for this value.
511 * - scratch is destoyed.
514 static IndexedMemoryRef lookupDestructor(X64Assembler& a,
515 PhysReg typeReg,
516 PhysReg scratch) {
517 assert(typeReg != r32(argNumToRegName[0]));
518 assert(scratch != argNumToRegName[0]);
520 static_assert((BitwiseKindOfString >> kShiftDataTypeToDestrIndex == 1) &&
521 (KindOfArray >> kShiftDataTypeToDestrIndex == 2) &&
522 (KindOfObject >> kShiftDataTypeToDestrIndex == 3) &&
523 (KindOfResource >> kShiftDataTypeToDestrIndex == 4) &&
524 (KindOfRef >> kShiftDataTypeToDestrIndex == 5),
525 "lookup of destructors depends on KindOf* values");
527 a. shrl (kShiftDataTypeToDestrIndex, r32(typeReg));
528 a. movq (&g_destructors, scratch);
529 return scratch[typeReg*8];
532 static void callDestructor(X64Assembler& a,
533 PhysReg typeReg,
534 PhysReg scratch) {
535 a. call (lookupDestructor(a, typeReg, scratch));
538 static void jumpDestructor(X64Assembler& a,
539 PhysReg typeReg,
540 PhysReg scratch) {
541 a. jmp (lookupDestructor(a, typeReg, scratch));
544 void TranslatorX64::emitGenericDecRefHelpers() {
545 Label release;
547 // m_dtorGenericStub just takes a pointer to the TypedValue in rdi.
548 moveToAlign(a, kNonFallthroughAlign);
549 m_irPopRHelper = a.frontier();
550 // popR: Move top-of-stack pointer to rdi
551 emitMovRegReg(a, rVmSp, rdi);
552 // fall through
553 m_dtorGenericStub = a.frontier();
554 emitLoadTVType(a, rdi[TVOFF(m_type)], r32(rAsm));
555 a. loadq (rdi[TVOFF(m_data)], rdi);
556 // Fall through to the regs stub.
559 * Custom calling convention: m_type goes in rAsm, m_data in
560 * rdi. We don't ever store program locations in rAsm, so the
561 * caller didn't need to spill anything. The assembler sometimes
562 * uses rAsm, but we know the stub won't need to and it makes it
563 * possible to share the code for both decref helpers.
565 m_dtorGenericStubRegs = a.frontier();
566 a. cmpl (RefCountStaticValue, rdi[FAST_REFCOUNT_OFFSET]);
567 jccBlock<CC_Z>(a, [&] {
568 a. decl (rdi[FAST_REFCOUNT_OFFSET]);
569 release.jcc8(a, CC_Z);
571 a. ret ();
573 asm_label(a, release);
575 PhysRegSaver prs(a, kGPCallerSaved - RegSet(rdi));
576 callDestructor(a, rAsm, rax);
577 recordIndirectFixup(a.frontier(), prs.rspTotalAdjustmentRegs());
579 a. ret ();
581 TRACE(1, "HOTSTUB: generic dtor start: %lx\n",
582 uintptr_t(m_irPopRHelper));
583 TRACE(1, "HOTSTUB: genericDtorStub: %lx\n", uintptr_t(m_dtorGenericStub));
584 TRACE(1, "HOTSTUB: genericDtorStubRegs: %lx\n",
585 uintptr_t(m_dtorGenericStubRegs));
586 TRACE(1, "HOTSTUB: total dtor generic stubs %zu bytes\n",
587 size_t(a.frontier() - m_dtorGenericStub));
590 bool TranslatorX64::profileSrcKey(const SrcKey& sk) const {
591 if (!RuntimeOption::EvalJitPGO) return false;
593 if (RuntimeOption::EvalJitPGOHotOnly && !(sk.func()->attrs() & AttrHot)) {
594 return false;
597 if (profData()->optimized(sk)) return false;
599 // The TCA of closure bodies is stored in the func's prologue
600 // tables. So, to support retranslating them, we need to reset the
601 // prologue tables and the prologue cache appropriately.
602 // (test/quick/floatcmp.php exposes this problem)
603 if (sk.func()->isClosureBody()) return false;
605 return true;
608 TCA TranslatorX64::retranslate(const TranslArgs& args) {
609 if (isDebuggerAttachedProcess() && isSrcKeyInBL(args.m_sk)) {
610 // We are about to translate something known to be blacklisted by
611 // debugger, exit early
612 SKTRACE(1, args.m_sk, "retranslate abort due to debugger\n");
613 return nullptr;
615 LeaseHolder writer(s_writeLease);
616 if (!writer) return nullptr;
617 SKTRACE(1, args.m_sk, "retranslate\n");
618 if (m_mode == TransInvalid) {
619 m_mode = profileSrcKey(args.m_sk) ? TransProfile : TransLive;
621 return translate(args);
624 // Only use comes from HHIR's cgExitTrace() case TraceExitType::SlowNoProgress
625 TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk,
626 bool align,
627 TCA toSmash) {
628 if (isDebuggerAttachedProcess() && isSrcKeyInBL(sk)) {
629 // We are about to translate something known to be blacklisted by
630 // debugger, exit early
631 SKTRACE(1, sk, "retranslateAndPatchNoIR abort due to debugger\n");
632 return nullptr;
634 LeaseHolder writer(s_writeLease);
635 if (!writer) return nullptr;
636 SKTRACE(1, sk, "retranslateAndPatchNoIR\n");
637 SrcRec* srcRec = getSrcRec(sk);
638 if (srcRec->translations().size() ==
639 RuntimeOption::EvalJitMaxTranslations + 1) {
640 // we've gone over the translation limit and already have an anchor
641 // translation that will interpret, so just return NULL and force
642 // interpretation of this BB.
643 return nullptr;
645 m_mode = TransLive;
646 TCA start = translate(TranslArgs(sk, align).interp(true));
647 if (start != nullptr) {
648 smashJmp(getAsmFor(toSmash), toSmash, start);
650 return start;
653 TCA TranslatorX64::retranslateOpt(TransID transId, bool align) {
654 LeaseHolder writer(s_writeLease);
655 if (!writer) return nullptr;
657 TRACE(1, "retranslateOpt: transId = %u\n", transId);
659 Func* func = nullptr;
660 if (m_profData->transBlock(transId) == nullptr) {
661 // This can happen for profiling translations that have some
662 // feature not supported by translateRegion yet. For such translations,
663 // we don't have a Func* (since it's grabbed from the Block).
664 // Anyway, in this case, the region translator resorts generates a
665 // TransLive translation, corresponding to the current live VM context.
666 func = const_cast<Func*>(liveFunc());
667 } else {
668 func = m_profData->transFunc(transId);
671 // We may get here multiple times because different translations of
672 // the same SrcKey hit the optimization threshold. Only the first
673 // time around we want to invalidate the existing translations.
674 const SrcKey& sk = m_profData->transSrcKey(transId);
675 bool alreadyOptimized = m_profData->optimized(sk);
676 m_profData->setOptimized(sk);
678 bool setFuncBody = (!alreadyOptimized &&
679 func->base() == sk.offset() &&
680 func->getDVFunclets().size() == 0);
682 if (!alreadyOptimized) {
683 if (setFuncBody) func->setFuncBody((TCA)funcBodyHelperThunk);
684 invalidateSrcKey(sk);
685 } else {
686 // Bail if we already reached the maximum number of translations per SrcKey.
687 // Note that this can only happen with multi-threading.
688 SrcRec* srcRec = getSrcRec(sk);
689 assert(srcRec);
690 size_t nTrans = srcRec->translations().size();
691 if (nTrans >= RuntimeOption::EvalJitMaxTranslations + 1) return nullptr;
694 m_mode = TransOptimize;
695 auto translArgs = TranslArgs(sk, align).transId(transId);
696 if (setFuncBody) translArgs.setFuncBody();
698 return retranslate(translArgs);
702 * Satisfy an alignment constraint. If we're in a reachable section
703 * of code, bridge the gap with nops. Otherwise, int3's.
705 void
706 TranslatorX64::moveToAlign(X64Assembler &aa,
707 const size_t align /* =kJmpTargetAlign */,
708 bool unreachable /* =true */) {
709 using namespace HPHP::Util;
710 SpaceRecorder sr("_Align", aa);
711 assert(isPowerOfTwo(align));
712 size_t leftInBlock = align - ((align - 1) & uintptr_t(aa.frontier()));
713 if (leftInBlock == align) return;
714 if (unreachable) {
715 if (leftInBlock > 2) {
716 aa.ud2();
717 leftInBlock -= 2;
719 if (leftInBlock > 0) {
720 aa.emitInt3s(leftInBlock);
722 return;
724 aa.emitNop(leftInBlock);
728 * Req machinery. We sometimes emit code that is unable to proceed
729 * without translator assistance; e.g., a basic block whose successor is
730 * unknown. We leave one of these request arg blobs in m_data, and point
731 * to it at callout-time.
734 // REQ_BIND_CALL
735 struct ReqBindCall {
736 SrcKey m_sourceInstr;
737 TCA m_toSmash;
738 int m_nArgs;
739 bool m_isImmutable; // call was to known func.
740 } m_bindCall;
742 // ID to name mapping for tracing.
743 static inline const char*
744 reqName(int req) {
745 static const char* reqNames[] = {
746 #define REQ(nm) #nm,
747 SERVICE_REQUESTS
748 #undef REQ
750 return reqNames[req];
754 * Find or create a translation for sk. Returns TCA of "best" current
755 * translation. May return NULL if it is currently impossible to create
756 * a translation.
759 TranslatorX64::getTranslation(const TranslArgs& args) {
760 auto sk = args.m_sk;
761 sk.func()->validate();
762 SKTRACE(2, sk,
763 "getTranslation: curUnit %s funcId %x offset %d\n",
764 sk.unit()->filepath()->data(),
765 sk.getFuncId(),
766 sk.offset());
767 SKTRACE(2, sk, " funcId: %x \n", sk.func()->getFuncId());
769 if (Translator::liveFrameIsPseudoMain()) {
770 SKTRACE(2, sk, "punting on pseudoMain\n");
771 return nullptr;
773 if (const SrcRec* sr = m_srcDB.find(sk)) {
774 TCA tca = sr->getTopTranslation();
775 if (tca) {
776 SKTRACE(2, sk, "getTranslation: found %p\n", tca);
777 return tca;
780 return createTranslation(args);
784 TranslatorX64::numTranslations(SrcKey sk) const {
785 if (const SrcRec* sr = m_srcDB.find(sk)) {
786 return sr->translations().size();
788 return 0;
791 static void populateLiveContext(JIT::RegionContext& ctx) {
792 typedef JIT::RegionDesc::Location L;
794 const ActRec* const fp {g_vmContext->getFP()};
795 const TypedValue* const sp {g_vmContext->getStack().top()};
797 for (uint32_t i = 0; i < fp->m_func->numLocals(); ++i) {
798 ctx.liveTypes.push_back(
799 { L::Local{i}, JIT::liveTVType(frame_local(fp, i)) }
803 uint32_t stackOff = 0;
804 visitStackElems(
805 fp, sp, ctx.bcOffset,
806 [&](const ActRec* ar) {
807 // TODO(#2466980): when it's a Cls, we should pass the Class* in
808 // the Type.
809 using JIT::Type;
810 auto const objOrCls =
811 ar->hasThis() ? Type::Obj.specialize(ar->getThis()->getVMClass()) :
812 ar->hasClass() ? Type::Cls
813 : Type::Nullptr;
815 ctx.preLiveARs.push_back(
816 { stackOff,
817 ar->m_func,
818 objOrCls
821 FTRACE(2, "added prelive ActRec {}\n", show(ctx.preLiveARs.back()));
823 stackOff += kNumActRecCells;
825 [&](const TypedValue* tv) {
826 ctx.liveTypes.push_back(
827 { L::Stack{stackOff++}, JIT::liveTVType(tv) }
829 FTRACE(2, "added live type {}\n", show(ctx.liveTypes.back()));
835 TranslatorX64::createTranslation(const TranslArgs& args) {
837 * Try to become the writer. We delay this until we *know* we will have
838 * a need to create new translations, instead of just trying to win the
839 * lottery at the dawn of time. Hopefully lots of requests won't require
840 * any new translation.
842 auto retransl = [&] {
843 return retranslate(args);
845 auto sk = args.m_sk;
846 LeaseHolder writer(s_writeLease);
847 if (!writer) return nullptr;
849 if (SrcRec* sr = m_srcDB.find(sk)) {
850 TCA tca = sr->getTopTranslation();
851 if (tca) {
852 // Handle extremely unlikely race; someone may have just already
853 // added the first instance of this SrcRec while we did a
854 // non-blocking wait on the write lease.
855 return tca;
856 } else {
857 // Since we are holding the write lease, we know that sk is properly
858 // initialized, except that it has no translations (due to
859 // replaceOldTranslations)
860 return retransl();
864 // We put retranslate requests at the end of our slab to more frequently
865 // allow conditional jump fall-throughs
866 TCA astart = a.frontier();
867 TCA stubstart = astubs.frontier();
868 TCA req = emitServiceReq(REQ_RETRANSLATE, sk.offset());
869 SKTRACE(1, sk, "inserting anchor translation for (%p,%d) at %p\n",
870 sk.unit(), sk.offset(), req);
871 SrcRec* sr = m_srcDB.insert(sk);
872 sr->setFuncInfo(sk.func());
873 sr->setAnchorTranslation(req);
875 size_t asize = a.frontier() - astart;
876 size_t stubsize = astubs.frontier() - stubstart;
877 assert(asize == 0);
878 if (stubsize && RuntimeOption::EvalDumpTCAnchors) {
879 addTranslation(TransRec(sk, sk.unit()->md5(), TransAnchor,
880 astart, asize, stubstart, stubsize));
881 if (m_profData) {
882 m_profData->addTransAnchor(sk);
884 assert(!isTransDBEnabled() || getTransRec(stubstart)->kind == TransAnchor);
887 return retransl();
891 TranslatorX64::lookupTranslation(SrcKey sk) const {
892 if (SrcRec* sr = m_srcDB.find(sk)) {
893 return sr->getTopTranslation();
895 return nullptr;
899 TranslatorX64::translate(const TranslArgs& args) {
900 INC_TPC(translate);
901 assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
902 assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
903 assert(m_mode != TransInvalid);
904 SCOPE_EXIT{ m_mode = TransInvalid; };
906 if (!args.m_interp) {
907 if (m_numHHIRTrans == RuntimeOption::EvalJitGlobalTranslationLimit) {
908 RuntimeOption::EvalJit = false;
909 ThreadInfo::s_threadInfo->m_reqInjectionData.updateJit();
910 return nullptr;
914 Func* func = const_cast<Func*>(args.m_sk.func());
915 AsmSelector asmSel(AsmSelector::Args(this).profile(m_mode == TransProfile)
916 .hot(func->attrs() & AttrHot));
918 if (args.m_align) {
919 moveToAlign(a, kNonFallthroughAlign);
922 TCA start = a.frontier();
924 translateWork(args);
926 if (args.m_setFuncBody) {
927 func->setFuncBody(start);
929 SKTRACE(1, args.m_sk, "translate moved head from %p to %p\n",
930 getTopTranslation(args.m_sk), start);
931 return start;
935 * Returns true if the given current frontier can have an nBytes-long
936 * instruction written without any risk of cache-tearing.
938 bool isSmashable(Address frontier, int nBytes, int offset /* = 0 */) {
939 assert(nBytes <= int(kX64CacheLineSize));
940 uintptr_t iFrontier = uintptr_t(frontier) + offset;
941 uintptr_t lastByte = uintptr_t(frontier) + nBytes - 1;
942 return (iFrontier & ~kX64CacheLineMask) == (lastByte & ~kX64CacheLineMask);
946 * Call before emitting a test-jcc sequence. Inserts a nop gap such that after
947 * writing a testBytes-long instruction, the frontier will be smashable.
949 void prepareForTestAndSmash(Asm& a, int testBytes, TestAndSmashFlags flags) {
950 switch (flags) {
951 case TestAndSmashFlags::kAlignJcc:
952 prepareForSmash(a, testBytes + kJmpccLen, testBytes);
953 assert(isSmashable(a.frontier() + testBytes, kJmpccLen));
954 break;
955 case TestAndSmashFlags::kAlignJccImmediate:
956 prepareForSmash(a,
957 testBytes + kJmpccLen,
958 testBytes + kJmpccLen - kJmpImmBytes);
959 assert(isSmashable(a.frontier() + testBytes, kJmpccLen,
960 kJmpccLen - kJmpImmBytes));
961 break;
962 case TestAndSmashFlags::kAlignJccAndJmp:
963 // Ensure that the entire jcc, and the entire jmp are smashable
964 // (but we dont need them both to be in the same cache line)
965 prepareForSmash(a, testBytes + kJmpccLen, testBytes);
966 prepareForSmash(a, testBytes + kJmpccLen + kJmpLen, testBytes + kJmpccLen);
967 assert(isSmashable(a.frontier() + testBytes, kJmpccLen));
968 assert(isSmashable(a.frontier() + testBytes + kJmpccLen, kJmpLen));
969 break;
973 void prepareForSmash(X64Assembler& a, int nBytes, int offset /* = 0 */) {
974 if (!isSmashable(a.frontier(), nBytes, offset)) {
975 int gapSize = (~(uintptr_t(a.frontier()) + offset) &
976 kX64CacheLineMask) + 1;
977 a.emitNop(gapSize);
978 assert(isSmashable(a.frontier(), nBytes, offset));
982 void
983 TranslatorX64::smash(X64Assembler &a, TCA src, TCA dest, bool isCall) {
984 assert(canWrite());
985 TRACE(2, "smash: %p -> %p\n", src, dest);
989 * We are about to smash reachable code in the translation cache. A
990 * hardware thread might be executing the very instruction we're
991 * modifying. This is safe because:
993 * 1. We align smashable instructions so that they reside on a single
994 * cache line;
996 * 2. We modify the instruction with a single processor store; and
998 * 3. The smashed region contains only a single instruction in the
999 * orignal instruction stream (see jmp() -> emitJ32() -> bytes() in
1000 * the assembler.
1002 CodeCursor cg(a, src);
1003 assert(isSmashable(a.frontier(), kJmpLen));
1004 if (dest > src && dest - src <= kJmpLen) {
1005 assert(!isCall);
1006 a. emitNop(dest - src);
1007 } else if (!isCall) {
1008 a. jmp(dest);
1009 } else {
1010 a. call(dest);
1014 void TranslatorX64::protectCode() {
1015 mprotect(tx64->ahot.base(),
1016 tx64->astubs.base() - tx64->ahot.base() +
1017 tx64->astubs.capacity(), PROT_READ | PROT_EXEC);
1021 void TranslatorX64::unprotectCode() {
1022 mprotect(tx64->ahot.base(),
1023 tx64->astubs.base() - tx64->ahot.base() +
1024 tx64->astubs.capacity(),
1025 PROT_READ | PROT_WRITE | PROT_EXEC);
1028 void
1029 TranslatorX64::emitStackCheck(int funcDepth, Offset pc) {
1030 funcDepth += kStackCheckPadding * sizeof(Cell);
1032 uint64_t stackMask = cellsToBytes(RuntimeOption::EvalVMStackElms) - 1;
1033 a. mov_reg64_reg64(rVmSp, rAsm); // copy to destroy
1034 a. and_imm64_reg64(stackMask, rAsm);
1035 a. sub_imm64_reg64(funcDepth + Stack::sSurprisePageSize, rAsm);
1036 assert(m_stackOverflowHelper);
1037 a. jl(m_stackOverflowHelper); // Unlikely branch to failure.
1038 // Success.
1041 // Tests the surprise flags for the current thread. Should be used
1042 // before a jnz to surprise handling code.
1043 void
1044 TranslatorX64::emitTestSurpriseFlags(Asm& a) {
1045 static_assert(RequestInjectionData::LastFlag < (1 << 8),
1046 "Translator assumes RequestInjectionFlags fit in one byte");
1047 a. testb((int8_t)0xff, rVmTl[TargetCache::kConditionFlagsOff]);
1050 void
1051 TranslatorX64::emitCheckSurpriseFlagsEnter(bool inTracelet, Fixup fixup) {
1052 emitTestSurpriseFlags(a);
1054 UnlikelyIfBlock ifTracer(CC_NZ, a, astubs);
1055 if (false) { // typecheck
1056 const ActRec* ar = nullptr;
1057 functionEnterHelper(ar);
1059 astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
1060 emitCall(astubs, (TCA)&functionEnterHelper);
1061 if (inTracelet) {
1062 recordSyncPoint(astubs, fixup.m_pcOffset, fixup.m_spOffset);
1063 } else {
1064 // If we're being called while generating a func prologue, we
1065 // have to record the fixup directly in the fixup map instead of
1066 // going through m_pendingFixups like normal.
1067 m_fixupMap.recordFixup(astubs.frontier(), fixup);
1072 void
1073 TranslatorX64::setArgInActRec(ActRec* ar, int argNum, uint64_t datum,
1074 DataType t) {
1075 TypedValue* tv =
1076 (TypedValue*)(uintptr_t(ar) - (argNum+1) * sizeof(TypedValue));
1077 tv->m_data.num = datum;
1078 tv->m_type = t;
1082 TranslatorX64::shuffleArgsForMagicCall(ActRec* ar) {
1083 if (!ar->hasInvName()) {
1084 return 0;
1086 const Func* f UNUSED = ar->m_func;
1087 f->validate();
1088 assert(f->name()->isame(s___call.get())
1089 || f->name()->isame(s___callStatic.get()));
1090 assert(f->numParams() == 2);
1091 TRACE(1, "shuffleArgsForMagicCall: ar %p\n", ar);
1092 assert(ar->hasInvName());
1093 StringData* invName = ar->getInvName();
1094 assert(invName);
1095 ar->setVarEnv(nullptr);
1096 int nargs = ar->numArgs();
1097 // We need to make an array containing all the arguments passed by the
1098 // caller and put it where the second argument is
1099 HphpArray* argArray = ArrayData::Make(nargs);
1100 argArray->incRefCount();
1101 for (int i = 0; i < nargs; ++i) {
1102 TypedValue* tv =
1103 (TypedValue*)(uintptr_t(ar) - (i+1) * sizeof(TypedValue));
1104 argArray->nvAppend(tv);
1105 tvRefcountedDecRef(tv);
1107 // Put invName in the slot for first argument
1108 setArgInActRec(ar, 0, uint64_t(invName), BitwiseKindOfString);
1109 // Put argArray in the slot for second argument
1110 setArgInActRec(ar, 1, uint64_t(argArray), KindOfArray);
1111 // Fix up ActRec's numArgs
1112 ar->initNumArgs(2);
1113 return 1;
1117 * The standard VMRegAnchor treatment won't work for some cases called
1118 * during function preludes.
1120 * The fp sync machinery is fundamentally based on the notion that
1121 * instruction pointers in the TC are uniquely associated with source
1122 * HHBC instructions, and that source HHBC instructions are in turn
1123 * uniquely associated with SP->FP deltas.
1125 * trimExtraArgs is called from the prologue of the callee.
1126 * The prologue is 1) still in the caller frame for now,
1127 * and 2) shared across multiple call sites. 1 means that we have the
1128 * fp from the caller's frame, and 2 means that this fp is not enough
1129 * to figure out sp.
1131 * However, the prologue passes us the callee actRec, whose predecessor
1132 * has to be the caller. So we can sync sp and fp by ourselves here.
1133 * Geronimo!
1135 static void sync_regstate_to_caller(ActRec* preLive) {
1136 assert(tl_regState == VMRegState::DIRTY);
1137 VMExecutionContext* ec = g_vmContext;
1138 ec->m_stack.top() = (TypedValue*)preLive - preLive->numArgs();
1139 ActRec* fp = preLive == ec->m_firstAR ?
1140 ec->m_nestedVMs.back().m_savedState.fp : (ActRec*)preLive->m_savedRbp;
1141 ec->m_fp = fp;
1142 ec->m_pc = fp->m_func->unit()->at(fp->m_func->base() + preLive->m_soff);
1143 tl_regState = VMRegState::CLEAN;
1146 void
1147 TranslatorX64::trimExtraArgs(ActRec* ar) {
1148 assert(!ar->hasInvName());
1150 sync_regstate_to_caller(ar);
1151 const Func* f = ar->m_func;
1152 int numParams = f->numParams();
1153 int numArgs = ar->numArgs();
1154 assert(numArgs > numParams);
1155 int numExtra = numArgs - numParams;
1157 TRACE(1, "trimExtraArgs: %d args, function %s takes only %d, ar %p\n",
1158 numArgs, f->name()->data(), numParams, ar);
1160 if (f->attrs() & AttrMayUseVV) {
1161 assert(!ar->hasExtraArgs());
1162 ar->setExtraArgs(ExtraArgs::allocateCopy(
1163 (TypedValue*)(uintptr_t(ar) - numArgs * sizeof(TypedValue)),
1164 numArgs - numParams));
1165 } else {
1166 // Function is not marked as "MayUseVV", so discard the extra arguments
1167 TypedValue* tv = (TypedValue*)(uintptr_t(ar) - numArgs*sizeof(TypedValue));
1168 for (int i = 0; i < numExtra; ++i) {
1169 tvRefcountedDecRef(tv);
1170 ++tv;
1172 ar->setNumArgs(numParams);
1175 // Only go back to dirty in a non-exception case. (Same reason as
1176 // above.)
1177 tl_regState = VMRegState::DIRTY;
1181 TranslatorX64::emitCallArrayPrologue(const Func* func,
1182 const DVFuncletsVec& dvs) {
1183 TCA start = a.frontier();
1184 if (dvs.size() == 1) {
1185 a. cmp_imm32_disp_reg32(dvs[0].first,
1186 AROFF(m_numArgsAndCtorFlag), rVmFp);
1187 emitBindJcc(a, CC_LE, SrcKey(func, dvs[0].second));
1188 emitBindJmp(a, SrcKey(func, func->base()));
1189 } else {
1190 a. load_reg64_disp_reg32(rVmFp, AROFF(m_numArgsAndCtorFlag), rax);
1191 for (unsigned i = 0; i < dvs.size(); i++) {
1192 a. cmp_imm32_reg32(dvs[i].first, rax);
1193 emitBindJcc(a, CC_LE, SrcKey(func, dvs[i].second));
1195 emitBindJmp(a, SrcKey(func, func->base()));
1197 return start;
1201 TranslatorX64::getCallArrayPrologue(Func* func) {
1202 TCA tca = func->getFuncBody();
1203 if (tca != (TCA)funcBodyHelperThunk) return tca;
1205 DVFuncletsVec dvs = func->getDVFunclets();
1207 if (dvs.size()) {
1208 LeaseHolder writer(s_writeLease);
1209 if (!writer) return nullptr;
1210 tca = func->getFuncBody();
1211 if (tca != (TCA)funcBodyHelperThunk) return tca;
1212 tca = emitCallArrayPrologue(func, dvs);
1213 func->setFuncBody(tca);
1214 } else {
1215 SrcKey sk(func, func->base());
1216 tca = tx64->getTranslation(TranslArgs(sk, false).setFuncBody());
1219 return tca;
1223 TranslatorX64::emitPrologueRedispatch(X64Assembler& a) {
1224 TCA retval;
1225 moveToAlign(a);
1226 retval = a.frontier();
1227 TRACE(1, "HOTSTUB: emitPrologueRedispatch: %lx\n", uintptr_t(a.frontier()));
1229 // We're in the wrong func prologue.
1231 assert(kScratchCrossTraceRegs.contains(rax));
1232 assert(kScratchCrossTraceRegs.contains(rdx));
1233 assert(kScratchCrossTraceRegs.contains(rcx));
1235 // Get the called func in rax
1236 a. load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
1237 // Get the number of passed parameters in rdx
1238 a. load_reg64_disp_reg32(rStashedAR, AROFF(m_numArgsAndCtorFlag), rdx);
1239 a. and_imm32_reg32(0x7fffffff, rdx);
1240 // Get the number of declared parameters in rcx
1241 a. load_reg64_disp_reg32(rax, Func::numParamsOff(), rcx);
1243 // If we didn't pass too many args, directly dereference
1244 // func->m_prologues.
1245 a. cmp_reg32_reg32(rdx, rcx);
1246 TCA bToFixedProloguesCheck = a.frontier();
1247 a. jcc8(CC_L, bToFixedProloguesCheck);
1249 // cmp $kNumFixedPrologues, %rdx
1250 // jl numParamsCheck
1251 TCA actualDispatch = a.frontier();
1253 // rcx: prologueIdx
1254 // rax = func->prologues[numParams]
1255 // jmp rax
1256 a. loadq (rax[rdx*8 + Func::prologueTableOff()], rax);
1257 a. jmp (rax);
1258 a. ud2 ();
1260 // Hmm, more parameters passed than the function expected. Did we pass
1261 // kNumFixedPrologues or more? If not, %rdx is still a perfectly
1262 // legitimate index into the func prologue table.
1263 // numParamsCheck:
1264 // cmp $kNumFixedPrologues, %rcx
1265 // jl dispatch
1266 a.patchJcc8(bToFixedProloguesCheck, a.frontier()); // numParamsCheck:
1267 a. cmp_imm32_reg32(kNumFixedPrologues, rdx);
1268 a. jcc8(CC_L, actualDispatch);
1270 // Too many gosh-darned parameters passed. Go to numExpected + 1, which
1271 // is always a "too many params" entry point.
1273 // mov %rdx, %rcx
1274 // add $1, %rcx
1275 // jmp dispatch
1276 a. load_reg64_disp_index_reg64(rax,
1277 // %rcx + 1
1278 Func::prologueTableOff() + sizeof(TCA),
1279 rcx,
1280 rax);
1281 a. jmp(rax);
1282 a. ud2();
1283 return retval;
1286 // The funcGuard gets skipped and patched by other code, so we have some
1287 // magic offsets.
1288 static const int kFuncMovImm = 6; // Offset to the immediate for 8 byte Func*
1289 static const int kFuncCmpImm = 4; // Offset to the immediate for 4 byte Func*
1290 static const int kFuncGuardLen = 23;
1291 static const int kFuncGuardShortLen = 14;
1293 template<typename T>
1294 static T*
1295 funcPrologueToGuardImm(TCA prologue) {
1296 assert(sizeof(T) == 4 || sizeof(T) == 8);
1297 T* retval = (T*)(prologue - (sizeof(T) == 8 ?
1298 kFuncGuardLen - kFuncMovImm :
1299 kFuncGuardShortLen - kFuncCmpImm));
1300 // We padded these so the immediate would fit inside a cache line
1301 assert(((uintptr_t(retval) ^ (uintptr_t(retval + 1) - 1)) &
1302 ~(kX64CacheLineSize - 1)) == 0);
1304 return retval;
1307 static inline bool
1308 funcPrologueHasGuard(TCA prologue, const Func* func) {
1309 intptr_t iptr = uintptr_t(func);
1310 if (deltaFits(iptr, sz::dword)) {
1311 return *funcPrologueToGuardImm<int32_t>(prologue) == iptr;
1313 return *funcPrologueToGuardImm<int64_t>(prologue) == iptr;
1316 static TCA
1317 funcPrologueToGuard(TCA prologue, const Func* func) {
1318 if (!prologue || prologue == (TCA)fcallHelperThunk) return prologue;
1319 return prologue -
1320 (deltaFits(uintptr_t(func), sz::dword) ?
1321 kFuncGuardShortLen :
1322 kFuncGuardLen);
1325 static inline void
1326 funcPrologueSmashGuard(TCA prologue, const Func* func) {
1327 intptr_t iptr = uintptr_t(func);
1328 if (deltaFits(iptr, sz::dword)) {
1329 *funcPrologueToGuardImm<int32_t>(prologue) = 0;
1330 return;
1332 *funcPrologueToGuardImm<int64_t>(prologue) = 0;
1335 void
1336 TranslatorX64::smashPrologueGuards(TCA* prologues, int numPrologues,
1337 const Func* func) {
1338 DEBUG_ONLY std::unique_ptr<LeaseHolder> writer;
1339 for (int i = 0; i < numPrologues; i++) {
1340 if (prologues[i] != (TCA)fcallHelperThunk
1341 && funcPrologueHasGuard(prologues[i], func)) {
1342 if (debug) {
1344 * Unit's are sometimes created racily, in which case all
1345 * but the first are destroyed immediately. In that case,
1346 * the Funcs of the destroyed Units never need their
1347 * prologues smashing, and it would be a lock rank violation
1348 * to take the write lease here.
1349 * In all other cases, Funcs are destroyed via a delayed path
1350 * (treadmill) and the rank violation isn't an issue.
1352 * Also note that we only need the write lease because we
1353 * mprotect the translation cache in debug builds.
1355 if (!writer) writer.reset(new LeaseHolder(s_writeLease));
1357 funcPrologueSmashGuard(prologues[i], func);
1363 TranslatorX64::emitFuncGuard(X64Assembler& a, const Func* func) {
1364 assert(kScratchCrossTraceRegs.contains(rax));
1365 assert(kScratchCrossTraceRegs.contains(rdx));
1367 const int kAlign = kX64CacheLineSize;
1368 const int kAlignMask = kAlign - 1;
1369 int loBits = uintptr_t(a.frontier()) & kAlignMask;
1370 int delta, size;
1372 // Ensure the immediate is safely smashable
1373 // the immediate must not cross a qword boundary,
1374 if (!deltaFits((intptr_t)func, sz::dword)) {
1375 size = 8;
1376 delta = loBits + kFuncMovImm;
1377 } else {
1378 size = 4;
1379 delta = loBits + kFuncCmpImm;
1382 delta = (delta + size - 1) & kAlignMask;
1383 if (delta < size - 1) {
1384 a.emitNop(size - 1 - delta);
1387 TCA aStart DEBUG_ONLY = a.frontier();
1388 if (!deltaFits((intptr_t)func, sz::dword)) {
1389 a. load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
1391 Although func doesnt fit in a signed 32-bit immediate, it may still
1392 fit in an unsigned one. Rather than deal with yet another case
1393 (which only happens when we disable jemalloc) just force it to
1394 be an 8-byte immediate, and patch it up afterwards.
1396 a. mov_imm64_reg(0xdeadbeeffeedface, rdx);
1397 assert(((uint64_t*)a.frontier())[-1] == 0xdeadbeeffeedface);
1398 ((uint64_t*)a.frontier())[-1] = uintptr_t(func);
1399 a. cmp_reg64_reg64(rax, rdx);
1400 } else {
1401 a. cmp_imm32_disp_reg32(uint64_t(func), AROFF(m_func), rStashedAR);
1404 assert(m_funcPrologueRedispatch);
1406 a. jnz(m_funcPrologueRedispatch);
1407 assert(funcPrologueToGuard(a.frontier(), func) == aStart);
1408 assert(funcPrologueHasGuard(a.frontier(), func));
1409 return a.frontier();
1413 * funcPrologue --
1415 * Given a callee and a number of args, match up to the callee's
1416 * argument expectations and dispatch.
1418 * Call/return hand-shaking is a bit funny initially. At translation time,
1419 * we don't necessarily know what function we're calling. For instance,
1421 * f(g());
1423 * Will lead to a set of basic blocks like:
1425 * b1: pushfuncd "f"
1426 * pushfuncd "g"
1427 * fcall
1428 * b2: fcall
1430 * The fcallc labelled "b2" above is not statically bindable in our
1431 * execution model.
1433 * We decouple the call work into a per-callsite portion, responsible
1434 * for recording the return address, and a per-(callee, numArgs) portion,
1435 * responsible for fixing up arguments and dispatching to remaining
1436 * code. We call the per-callee portion a "prologue."
1438 * Also, we are called from two distinct environments. From REQ_BIND_CALL,
1439 * we're running "between" basic blocks, with all VM registers sync'ed.
1440 * However, we're also called in the middle of basic blocks, when dropping
1441 * entries into func->m_prologues. So don't go around using the
1442 * translation-time values of vmfp()/vmsp(), since they have an
1443 * unpredictable relationship to the source.
1445 bool
1446 TranslatorX64::checkCachedPrologue(const Func* func, int paramIdx,
1447 TCA& prologue) const {
1448 prologue = (TCA)func->getPrologue(paramIdx);
1449 if (prologue != (TCA)fcallHelperThunk && !s_replaceInFlight) {
1450 TRACE(1, "cached prologue %s(%d) -> cached %p\n",
1451 func->fullName()->data(), paramIdx, prologue);
1452 assert(isValidCodeAddress(prologue));
1453 return true;
1455 return false;
1458 // pops the return address pushed by fcall and stores it into the actrec
1459 void
1460 TranslatorX64::emitPopRetIntoActRec(Asm& a) {
1461 a. pop (rStashedAR[AROFF(m_savedRip)]);
1464 static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
1465 assert(tl_regState == VMRegState::DIRTY);
1466 tl_regState = VMRegState::CLEAN;
1467 vmfp() = (Cell*)ar;
1468 vmsp() = sp;
1469 vmpc() = ar->unit()->at(pcOff);
1473 TranslatorX64::funcPrologue(Func* func, int nPassed, ActRec* ar) {
1474 func->validate();
1475 TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
1476 int numParams = func->numParams();
1477 int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
1479 bool funcIsMagic = func->isMagic();
1481 // Do a quick test before grabbing the write lease
1482 TCA prologue;
1483 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1484 if (func->isClonedClosure()) {
1485 assert(ar);
1486 const Func::ParamInfoVec& paramInfo = func->params();
1487 Offset entry = func->base();
1488 for (int i = nPassed; i < numParams; ++i) {
1489 const Func::ParamInfo& pi = paramInfo[i];
1490 if (pi.hasDefaultValue()) {
1491 entry = pi.funcletOff();
1492 break;
1495 interp_set_regs(ar, (Cell*)ar - func->numSlotsInFrame(), entry);
1496 SrcKey funcBody(func, entry);
1497 TCA tca = getTranslation(TranslArgs(funcBody, false));
1498 tl_regState = VMRegState::DIRTY;
1499 if (tca) {
1500 // racy, but ok...
1501 func->setPrologue(paramIndex, tca);
1503 return tca;
1506 // If the translator is getting replaced out from under us, refuse to
1507 // provide a prologue; we don't know whether this request is running on the
1508 // old or new context.
1509 LeaseHolder writer(s_writeLease);
1510 if (!writer || s_replaceInFlight) return nullptr;
1511 // Double check the prologue array now that we have the write lease
1512 // in case another thread snuck in and set the prologue already.
1513 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1515 AsmSelector asmSel(AsmSelector::Args(this).hot(func->attrs() & AttrHot));
1517 SpaceRecorder sr("_FuncPrologue", a);
1518 // If we're close to a cache line boundary, just burn some space to
1519 // try to keep the func and its body on fewer total lines.
1520 if (((uintptr_t)a.frontier() & kX64CacheLineMask) >= 32) {
1521 moveToAlign(a, kX64CacheLineSize);
1523 // Careful: this isn't necessarily the real entry point. For funcIsMagic
1524 // prologues, this is just a possible prologue.
1525 TCA aStart = a.frontier();
1526 TCA start = aStart;
1527 TCA stubStart = astubs.frontier();
1529 // Guard: we're in the right callee. This happens in magicStart for
1530 // magic callees.
1531 if (!funcIsMagic) {
1532 start = aStart = emitFuncGuard(a, func);
1535 emitRB(a, RBTypeFuncPrologueTry, func->fullName()->data());
1537 // NB: We have most of the register file to play with, since we know
1538 // we're between BB's. So, we hardcode some registers here rather
1539 // than using the scratch allocator.
1540 TRACE(2, "funcPrologue: user function: %s\n", func->name()->data());
1542 // Add a counter for the translation if requested
1543 if (RuntimeOption::EvalJitTransCounters) {
1544 emitTransCounterInc(a);
1547 if (!funcIsMagic) {
1548 emitPopRetIntoActRec(a);
1549 // entry point for magic methods comes later
1550 emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1553 * Guard: we have stack enough stack space to complete this
1554 * function. We omit overflow checks if it is a leaf function
1555 * that can't use more than kStackCheckLeafPadding cells.
1557 auto const needStackCheck =
1558 !(func->attrs() & AttrPhpLeafFn) ||
1559 func->maxStackCells() >= kStackCheckLeafPadding;
1560 if (needStackCheck) {
1561 emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
1565 SrcKey skFuncBody = emitPrologue(func, nPassed);
1567 if (funcIsMagic) {
1568 // entry points for magic methods is here
1569 TCA magicStart = emitFuncGuard(a, func);
1570 emitPopRetIntoActRec(a);
1571 emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1572 // Guard: we have stack enough stack space to complete this function.
1573 emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
1574 assert(numParams == 2);
1575 // Special __call prologue
1576 a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1577 emitCall(a, TCA(TranslatorX64::shuffleArgsForMagicCall));
1578 if (memory_profiling) {
1579 m_fixupMap.recordFixup(
1580 a.frontier(),
1581 Fixup(skFuncBody.offset() - func->base(), func->numSlotsInFrame())
1584 // if shuffleArgs returns 0, that means this was not a magic call
1585 // and we should proceed to a prologue specialized for nPassed;
1586 // otherwise, proceed to a prologue specialized for nPassed==numParams (2).
1587 if (nPassed == 2) {
1588 a.jmp(start);
1589 } else {
1590 a.test_reg64_reg64(rax, rax);
1591 // z ==> not a magic call, go to prologue for nPassed
1592 if (deltaFits(start - (a.frontier() + kJcc8Len), sz::byte)) {
1593 a.jcc8(CC_Z, start);
1594 } else {
1595 a.jcc(CC_Z, start);
1597 // this was a magic call
1598 // nPassed == 2
1599 // Fix up hardware stack pointer
1600 nPassed = 2;
1601 emitLea(a, rStashedAR, -cellsToBytes(nPassed), rVmSp);
1602 // Optimization TODO: Reuse the prologue for args == 2
1603 emitPrologue(func, nPassed);
1605 start = magicStart;
1607 assert(funcPrologueHasGuard(start, func));
1608 TRACE(2, "funcPrologue tx64 %p %s(%d) setting prologue %p\n",
1609 this, func->fullName()->data(), nPassed, start);
1610 assert(isValidCodeAddress(start));
1611 func->setPrologue(paramIndex, start);
1613 addTranslation(TransRec(skFuncBody, func->unit()->md5(),
1614 TransPrologue, aStart, a.frontier() - aStart,
1615 stubStart, astubs.frontier() - stubStart));
1617 if (m_profData) {
1618 m_profData->addTransPrologue(skFuncBody);
1621 recordGdbTranslation(skFuncBody, func,
1622 a, aStart,
1623 false, true);
1624 recordBCInstr(OpFuncPrologue, a, start);
1626 return start;
1629 static void raiseMissingArgument(const char* name, int expected, int got) {
1630 if (expected == 1) {
1631 raise_warning(Strings::MISSING_ARGUMENT, name, got);
1632 } else {
1633 raise_warning(Strings::MISSING_ARGUMENTS, name, expected, got);
1637 SrcKey
1638 TranslatorX64::emitPrologue(Func* func, int nPassed) {
1639 int numParams = func->numParams();
1640 const Func::ParamInfoVec& paramInfo = func->params();
1642 Offset dvInitializer = InvalidAbsoluteOffset;
1644 assert(IMPLIES(func->isGenerator(), nPassed == numParams));
1645 if (nPassed > numParams) {
1646 // Too many args; a weird case, so just callout. Stash ar
1647 // somewhere callee-saved.
1648 if (false) { // typecheck
1649 TranslatorX64::trimExtraArgs((ActRec*)nullptr);
1651 a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1652 emitCall(a, TCA(TranslatorX64::trimExtraArgs));
1653 // We'll fix rVmSp below.
1654 } else if (nPassed < numParams) {
1655 // Figure out which, if any, default value initializer to go to
1656 for (int i = nPassed; i < numParams; ++i) {
1657 const Func::ParamInfo& pi = paramInfo[i];
1658 if (pi.hasDefaultValue()) {
1659 dvInitializer = pi.funcletOff();
1660 break;
1663 TRACE(1, "Only have %d of %d args; getting dvFunclet\n",
1664 nPassed, numParams);
1665 a. emitImmReg(nPassed, rax);
1666 // do { *(--rVmSp) = NULL; nPassed++; } while (nPassed < numParams);
1667 // This should be an unusual case, so optimize for code density
1668 // rather than execution speed; i.e., don't unroll the loop.
1669 TCA loopTop = a.frontier();
1670 a. sub_imm32_reg64(sizeof(Cell), rVmSp);
1671 a. incl(eax);
1672 emitStoreUninitNull(a, 0, rVmSp);
1673 a. cmp_imm32_reg32(numParams, rax);
1674 a. jcc8(CC_L, loopTop);
1677 // Entry point for numParams == nPassed is here.
1678 // Args are kosher. Frame linkage: set fp = ar.
1679 a. mov_reg64_reg64(rStashedAR, rVmFp);
1681 int numLocals = numParams;
1682 if (func->isClosureBody()) {
1683 int numUseVars = func->cls()->numDeclProperties();
1685 emitLea(a, rVmFp, -cellsToBytes(numParams), rVmSp);
1687 PhysReg rClosure = rcx;
1688 a. loadq(rVmFp[AROFF(m_this)], rClosure);
1690 // Swap in the $this or late bound class
1691 a. loadq(rClosure[c_Closure::ctxOffset()], rAsm);
1692 a. storeq(rAsm, rVmFp[AROFF(m_this)]);
1694 if (!(func->attrs() & AttrStatic)) {
1695 a.shrq(1, rAsm);
1696 JccBlock<CC_BE> ifRealThis(a);
1697 a.shlq(1, rAsm);
1698 emitIncRef(rAsm, KindOfObject);
1701 // Put in the correct context
1702 a. loadq(rClosure[c_Closure::funcOffset()], rAsm);
1703 a. storeq(rAsm, rVmFp[AROFF(m_func)]);
1705 // Copy in all the use vars
1706 int baseUVOffset = sizeof(ObjectData) + func->cls()->builtinPropSize();
1707 for (int i = 0; i < numUseVars + 1; i++) {
1708 int spOffset = -cellsToBytes(i+1);
1710 if (i == 0) {
1711 // The closure is the first local.
1712 // We don't incref because it used to be $this
1713 // and now it is a local, so they cancel out
1714 emitStoreTypedValue(a, KindOfObject, rClosure, spOffset, rVmSp);
1715 continue;
1718 int uvOffset = baseUVOffset + cellsToBytes(i-1);
1720 emitCopyTo(a, rClosure, uvOffset, rVmSp, spOffset, rAsm);
1721 emitIncRefGenericRegSafe(rVmSp, spOffset, rAsm);
1724 numLocals += numUseVars + 1;
1727 // We're in the callee frame; initialize locals. Unroll the loop all
1728 // the way if there are a modest number of locals to update;
1729 // otherwise, do it in a compact loop. If we're in a generator body,
1730 // named locals will be initialized by UnpackCont so we can leave
1731 // them alone here.
1732 int numUninitLocals = func->numLocals() - numLocals;
1733 assert(numUninitLocals >= 0);
1734 if (numUninitLocals > 0 && !func->isGenerator()) {
1735 SpaceRecorder sr("_InitializeLocals", a);
1737 // If there are too many locals, then emitting a loop to initialize locals
1738 // is more compact, rather than emitting a slew of movs inline.
1739 if (numUninitLocals > kLocalsToInitializeInline) {
1740 PhysReg loopReg = rcx;
1742 // rVmFp + rcx points to the count/type fields of the TypedValue we're
1743 // about to write to.
1744 int loopStart = -func->numLocals() * sizeof(TypedValue) + TVOFF(m_type);
1745 int loopEnd = -numLocals * sizeof(TypedValue) + TVOFF(m_type);
1747 a. emitImmReg(loopStart, loopReg);
1748 a. emitImmReg(KindOfUninit, rdx);
1750 TCA topOfLoop = a.frontier();
1751 // do {
1752 // rVmFp[loopReg].m_type = KindOfUninit;
1753 // } while(++loopReg != loopEnd);
1755 emitStoreTVType(a, edx, rVmFp[loopReg]);
1756 a. addq (sizeof(Cell), loopReg);
1757 a. cmpq (loopEnd, loopReg);
1758 a. jcc8 (CC_NE, topOfLoop);
1759 } else {
1760 PhysReg base;
1761 int disp, k;
1762 static_assert(KindOfUninit == 0, "");
1763 if (numParams < func->numLocals()) {
1764 a.xorl (eax, eax);
1766 for (k = numLocals; k < func->numLocals(); ++k) {
1767 locToRegDisp(Location(Location::Local, k), &base, &disp, func);
1768 emitStoreTVType(a, eax, base[disp + TVOFF(m_type)]);
1773 const Opcode* destPC = func->unit()->entry() + func->base();
1774 if (dvInitializer != InvalidAbsoluteOffset) {
1775 // dispatch to funclet.
1776 destPC = func->unit()->entry() + dvInitializer;
1778 SrcKey funcBody(func, destPC);
1780 // Move rVmSp to the right place: just past all locals
1781 int frameCells = func->numSlotsInFrame();
1782 if (func->isGenerator()) {
1783 frameCells = 1;
1784 } else {
1785 emitLea(a, rVmFp, -cellsToBytes(frameCells), rVmSp);
1788 Fixup fixup(funcBody.offset() - func->base(), frameCells);
1790 // Emit warnings for any missing arguments
1791 if (!func->info()) {
1792 for (int i = nPassed; i < numParams; ++i) {
1793 if (paramInfo[i].funcletOff() == InvalidAbsoluteOffset) {
1794 a. emitImmReg((intptr_t)func->name()->data(), argNumToRegName[0]);
1795 a. emitImmReg(numParams, argNumToRegName[1]);
1796 a. emitImmReg(i, argNumToRegName[2]);
1797 emitCall(a, (TCA)raiseMissingArgument);
1798 m_fixupMap.recordFixup(a.frontier(), fixup);
1803 // Check surprise flags in the same place as the interpreter: after
1804 // setting up the callee's frame but before executing any of its
1805 // code
1806 emitCheckSurpriseFlagsEnter(false, fixup);
1808 if (func->isClosureBody() && func->cls()) {
1809 int entry = nPassed <= numParams ? nPassed : numParams + 1;
1810 // Relying on rStashedAR == rVmFp here
1811 a. loadq (rStashedAR[AROFF(m_func)], rax);
1812 a. loadq (rax[Func::prologueTableOff() + sizeof(TCA)*entry], rax);
1813 a. jmp (rax);
1814 } else {
1815 emitBindJmp(funcBody);
1817 return funcBody;
1820 static bool
1821 isNativeImplCall(const Func* funcd, int numArgs) {
1822 return funcd && funcd->info() && numArgs == funcd->numParams();
1825 int32_t // returns the amount by which rVmSp should be adjusted
1826 TranslatorX64::emitBindCall(SrcKey srcKey, const Func* funcd, int numArgs) {
1827 // If this is a call to a builtin and we don't need any argument
1828 // munging, we can skip the prologue system and do it inline.
1829 if (isNativeImplCall(funcd, numArgs)) {
1830 StoreImmPatcher patchIP(a, (uint64_t)a.frontier(), reg::rax,
1831 cellsToBytes(numArgs) + AROFF(m_savedRip),
1832 rVmSp);
1833 assert(funcd->numLocals() == funcd->numParams());
1834 assert(funcd->numIterators() == 0);
1835 emitLea(a, rVmSp, cellsToBytes(numArgs), rVmFp);
1836 emitCheckSurpriseFlagsEnter(true, Fixup(0, numArgs));
1837 // rVmSp is already correctly adjusted, because there's no locals
1838 // other than the arguments passed.
1839 auto retval = emitNativeImpl(funcd, false /* don't jump to return */);
1840 patchIP.patch(uint64_t(a.frontier()));
1841 return retval;
1843 if (debug) {
1844 a. storeq (kUninitializedRIP,
1845 rVmSp[cellsToBytes(numArgs) + AROFF(m_savedRip)]);
1847 // Stash callee's rVmFp into rStashedAR for the callee's prologue
1848 emitLea(a, rVmSp, cellsToBytes(numArgs), rStashedAR);
1849 emitBindCallHelper(srcKey, funcd, numArgs);
1850 return 0;
1853 void
1854 TranslatorX64::emitBindCallHelper(SrcKey srcKey,
1855 const Func* funcd,
1856 int numArgs) {
1857 // Whatever prologue we're branching to will check at runtime that we
1858 // went to the right Func*, correcting if necessary. We treat the first
1859 // Func we encounter as a decent prediction. Make space to burn in a
1860 // TCA.
1861 ReqBindCall* req = m_globalData.alloc<ReqBindCall>();
1862 prepareForSmash(a, kCallLen);
1863 TCA toSmash = a.frontier();
1864 a. call(astubs.frontier());
1866 astubs. mov_reg64_reg64(rStashedAR, serviceReqArgRegs[1]);
1867 emitPopRetIntoActRec(astubs);
1868 emitServiceReq(REQ_BIND_CALL, req);
1870 TRACE(1, "will bind static call: tca %p, this %p, funcd %p, astubs %p\n",
1871 toSmash, this, funcd, astubs.frontier());
1872 req->m_toSmash = toSmash;
1873 req->m_nArgs = numArgs;
1874 req->m_sourceInstr = srcKey;
1875 req->m_isImmutable = (bool)funcd;
1877 return;
1881 * NativeImpl is a special operation in the sense that it must be the
1882 * only opcode in a function body, and also functions as the return.
1884 * if emitSavedRIPReturn is false, it returns the amount by which
1885 * rVmSp should be adjusted, otherwise, it emits code to perform
1886 * the adjustment (this allows us to combine updates to rVmSp)
1888 int32_t TranslatorX64::emitNativeImpl(const Func* func,
1889 bool emitSavedRIPReturn) {
1890 BuiltinFunction builtinFuncPtr = func->builtinFuncPtr();
1891 if (false) { // typecheck
1892 ActRec* ar = nullptr;
1893 builtinFuncPtr(ar);
1896 TRACE(2, "calling builtin preClass %p func %p\n", func->preClass(),
1897 builtinFuncPtr);
1899 * Call the native implementation. This will free the locals for us in the
1900 * normal case. In the case where an exception is thrown, the VM unwinder
1901 * will handle it for us.
1903 a. mov_reg64_reg64(rVmFp, argNumToRegName[0]);
1904 if (eagerRecord(func)) {
1905 emitEagerSyncPoint(a, func->getEntry(), 0);
1907 emitCall(a, (TCA)builtinFuncPtr);
1910 * We're sometimes calling this while curFunc() isn't really the
1911 * builtin---make sure to properly record the sync point as if we
1912 * are inside the builtin.
1914 * The assumption here is that for builtins, the generated func
1915 * contains only a single opcode (NativeImpl), and there are no
1916 * non-argument locals.
1918 assert(func->numIterators() == 0 && func->isBuiltin());
1919 assert(func->numLocals() == func->numParams());
1920 assert(toOp(*func->getEntry()) == OpNativeImpl);
1921 assert(instrLen((Op*)func->getEntry()) == func->past() - func->base());
1922 Offset pcOffset = 0; // NativeImpl is the only instruction in the func
1923 Offset stackOff = func->numLocals(); // Builtin stubs have no
1924 // non-arg locals
1925 recordSyncPoint(a, pcOffset, stackOff);
1927 if (emitSavedRIPReturn) {
1928 // push the return address to get ready to ret.
1929 a. push (rVmFp[AROFF(m_savedRip)]);
1933 * The native implementation already put the return value on the
1934 * stack for us, and handled cleaning up the arguments. We have to
1935 * update the frame pointer and the stack pointer, and load the
1936 * return value into the return register so the trace we are
1937 * returning to has it where it expects.
1939 * TODO(#1273094): we should probably modify the actual builtins to
1940 * return values via registers (rax:edx) using the C ABI and do a
1941 * reg-to-reg move.
1943 int nLocalCells = func->numSlotsInFrame();
1944 if (emitSavedRIPReturn) {
1945 a. add_imm64_reg64(sizeof(ActRec) + cellsToBytes(nLocalCells-1), rVmSp);
1947 a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
1949 emitRB(a, RBTypeFuncExit, func->fullName()->data());
1950 if (emitSavedRIPReturn) {
1951 a. ret();
1952 translator_not_reached(a);
1953 return 0;
1955 return sizeof(ActRec) + cellsToBytes(nLocalCells-1);
1959 * bindJmp --
1961 * Runtime service handler that patches a jmp to the translation of
1962 * u:dest from toSmash.
1965 TranslatorX64::bindJmp(TCA toSmash, SrcKey destSk,
1966 ServiceRequest req, bool& smashed) {
1967 TCA tDest = getTranslation(
1968 TranslArgs(destSk, false).interp(req == REQ_BIND_JMP_NO_IR)
1969 .src(toSmash));
1970 if (!tDest) return nullptr;
1971 LeaseHolder writer(s_writeLease);
1972 if (!writer) return tDest;
1973 smashed = true;
1974 SrcRec* sr = getSrcRec(destSk);
1975 if (req == REQ_BIND_ADDR) {
1976 sr->chainFrom(IncomingBranch::addr(reinterpret_cast<TCA*>(toSmash)));
1977 } else if (req == REQ_BIND_JCC) {
1978 sr->chainFrom(IncomingBranch::jccFrom(toSmash));
1979 } else {
1980 sr->chainFrom(IncomingBranch::jmpFrom(toSmash));
1982 return tDest;
1986 * When we end a tracelet with a conditional jump, emitCondJmp first emits:
1988 * 1: j<CC> stubJmpccFirst
1989 * jmp stubJmpccFirst
1991 * Our "taken" argument tells us whether the branch at 1: was taken or
1992 * not; and therefore which of offTaken and offNotTaken to continue executing.
1993 * If we did take the branch, we now rewrite the code so that the branch is
1994 * straightened. This predicts that subsequent executions will go the same way
1995 * as the first execution.
1997 * jn<CC> stubJmpccSecond:offNotTaken
1998 * nop5 ; fallthru, or jmp if there's already a translation.
1999 * offTaken:
2001 * If we did not take the branch, we leave the sense of the condition
2002 * intact, while patching it up to go to the unexplored code:
2004 * j<CC> stubJmpccSecond:offTaken
2005 * nop5
2006 * offNotTaken:
2009 TranslatorX64::bindJmpccFirst(TCA toSmash,
2010 Offset offTaken, Offset offNotTaken,
2011 bool taken,
2012 ConditionCode cc,
2013 bool& smashed) {
2014 const Func* f = liveFunc();
2015 LeaseHolder writer(s_writeLease);
2016 if (!writer) return nullptr;
2017 Offset offWillExplore = taken ? offTaken : offNotTaken;
2018 Offset offWillDefer = taken ? offNotTaken : offTaken;
2019 SrcKey dest(f, offWillExplore);
2020 TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
2021 "taken %d\n",
2022 offWillExplore, offWillDefer, cc, taken);
2024 // We want the branch to point to whichever side has not been explored
2025 // yet.
2026 if (taken) cc = ccNegate(cc);
2027 TCA stub = emitServiceReq(REQ_BIND_JMPCC_SECOND, toSmash, offWillDefer, cc);
2029 Asm& as = getAsmFor(toSmash);
2030 // Its not clear where chainFrom should go to if as is astubs
2031 assert(&as != &astubs);
2033 // can we just directly fall through?
2034 // a jmp + jz takes 5 + 6 = 11 bytes
2035 bool fallThru = toSmash + kJmpccLen + kJmpLen == as.frontier() &&
2036 !m_srcDB.find(dest);
2038 TCA tDest;
2039 tDest = getTranslation(TranslArgs(dest, !fallThru).src(toSmash));
2040 if (!tDest) {
2041 return 0;
2043 smashed = true;
2044 assert(s_writeLease.amOwner());
2046 * Roll over the jcc and the jmp/fallthru. E.g., from:
2048 * toSmash: jcc <jmpccFirstStub>
2049 * toSmash+6: jmp <jmpccFirstStub>
2050 * toSmash+11: <probably the new translation == tdest>
2052 * to:
2054 * toSmash: j[n]z <jmpccSecondStub>
2055 * toSmash+6: nop5
2056 * toSmash+11: newHotness
2058 CodeCursor cg(as, toSmash);
2059 as.jcc(cc, stub);
2060 getSrcRec(dest)->chainFrom(IncomingBranch::jmpFrom(as.frontier()));
2061 TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
2062 return tDest;
2065 // smashes a jcc to point to a new destination
2067 TranslatorX64::bindJmpccSecond(TCA toSmash, const Offset off,
2068 ConditionCode cc, bool& smashed) {
2069 const Func* f = liveFunc();
2070 SrcKey dest(f, off);
2071 TCA branch = getTranslation(TranslArgs(dest, true).src(toSmash));
2072 LeaseHolder writer(s_writeLease, LeaseAcquire::NO_ACQUIRE);
2073 if (branch && writer.acquire()) {
2074 smashed = true;
2075 SrcRec* destRec = getSrcRec(dest);
2076 destRec->chainFrom(IncomingBranch::jccFrom(toSmash));
2078 return branch;
2081 static void emitJmpOrJcc(X64Assembler& a, ConditionCode cc, TCA addr) {
2082 if (cc == CC_None) {
2083 a. jmp(addr);
2084 } else {
2085 a. jcc((ConditionCode)cc, addr);
2090 * emitBindJ --
2092 * Emit code to lazily branch (optionally on condition cc) to the
2093 * srckey in next.
2094 * Assumes current basic block is closed (outputs synced, etc.).
2096 void
2097 TranslatorX64::emitBindJ(X64Assembler& _a, ConditionCode cc,
2098 SrcKey dest, ServiceRequest req) {
2099 prepareForSmash(_a, cc == CC_None ? (int)kJmpLen : kJmpccLen);
2100 TCA toSmash = _a.frontier();
2101 if (&_a == &astubs) {
2102 emitJmpOrJcc(_a, cc, toSmash);
2105 setJmpTransID(toSmash);
2107 TCA sr = emitServiceReq(SRFlags::None, req,
2108 toSmash, dest.offset());
2110 if (&_a == &astubs) {
2111 CodeCursor cursor(_a, toSmash);
2112 emitJmpOrJcc(_a, cc, sr);
2113 } else {
2114 emitJmpOrJcc(_a, cc, sr);
2118 void
2119 TranslatorX64::emitBindJcc(X64Assembler& _a, ConditionCode cc,
2120 SrcKey dest,
2121 ServiceRequest req /* = REQ_BIND_JCC */) {
2122 emitBindJ(_a, cc, dest, req);
2125 void
2126 TranslatorX64::emitBindJmp(X64Assembler& _a,
2127 SrcKey dest,
2128 ServiceRequest req /* = REQ_BIND_JMP */) {
2129 emitBindJ(_a, CC_None, dest, req);
2132 void
2133 TranslatorX64::emitBindJmp(SrcKey dest) {
2134 emitBindJmp(a, dest);
2137 void TranslatorX64::emitResolvedDeps(const ChangeMap& resolvedDeps) {
2138 for (const auto dep : resolvedDeps) {
2139 m_irTrans->assertType(dep.first, dep.second->rtt);
2143 void
2144 TranslatorX64::emitFallbackJmp(SrcRec& dest, ConditionCode cc /* = CC_NZ */) {
2145 emitFallbackJmp(a, dest, cc);
2148 void
2149 TranslatorX64::emitFallbackJmp(Asm& as, SrcRec& dest,
2150 ConditionCode cc /* = CC_NZ */) {
2151 prepareForSmash(as, kJmpccLen);
2152 dest.emitFallbackJump(as.frontier(), cc);
2155 void
2156 TranslatorX64::emitFallbackUncondJmp(Asm& as, SrcRec& dest) {
2157 prepareForSmash(as, kJmpLen);
2158 dest.emitFallbackJump(as.frontier());
2161 void
2162 TranslatorX64::emitFallbackCondJmp(Asm& as, SrcRec& dest, ConditionCode cc) {
2163 prepareForSmash(as, kJmpccLen);
2164 dest.emitFallbackJump(as.frontier(), cc);
2167 void TranslatorX64::emitReqRetransNoIR(Asm& as, const SrcKey& sk) {
2168 prepareForSmash(as, kJmpLen);
2169 TCA toSmash = as.frontier();
2170 if (&as == &astubs) {
2171 as.jmp(toSmash);
2174 TCA sr = emitServiceReq(REQ_RETRANSLATE_NO_IR,
2175 toSmash, sk.offset());
2177 if (&as == &astubs) {
2178 CodeCursor cc(as, toSmash);
2179 as.jmp(sr);
2180 } else {
2181 as.jmp(sr);
2185 void TranslatorX64::emitReqRetransOpt(Asm& as, const SrcKey& sk,
2186 TransID transId) {
2187 emitServiceReq(REQ_RETRANSLATE_OPT,
2188 sk.getFuncId(), sk.offset(), transId);
2191 void
2192 TranslatorX64::checkRefs(X64Assembler& a,
2193 SrcKey sk,
2194 const RefDeps& refDeps,
2195 SrcRec& fail) {
2196 if (refDeps.size() == 0) {
2197 return;
2200 // Set up guards for each pushed ActRec that we've made reffiness
2201 // assumptions about
2202 for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
2203 it != refDeps.m_arMap.end(); ++it) {
2204 // Be careful! The actual Func might have fewer refs than the number
2205 // of args we're passing. To forestall this, we always prepare at
2206 // least 64 bits in the Func, and always fill out the refBitVec
2207 // to a multiple of 64 bits
2209 int entryArDelta = it->first;
2211 m_irTrans->hhbcTrans().guardRefs(entryArDelta,
2212 it->second.m_mask,
2213 it->second.m_vals);
2218 * emitRetFromInterpretedFrame --
2220 * When the interpreter pushes a call frame, there is necessarily no
2221 * machine RIP available to return to. This helper fishes out the
2222 * destination from the frame and redirects execution to it via enterTC.
2225 TranslatorX64::emitRetFromInterpretedFrame() {
2226 int32_t arBase = sizeof(ActRec) - sizeof(Cell);
2227 moveToAlign(astubs);
2228 TCA stub = astubs.frontier();
2229 // Marshall our own args by hand here.
2230 astubs. lea (rVmSp[-arBase], serviceReqArgRegs[0]);
2231 astubs. movq (rVmFp, serviceReqArgRegs[1]);
2232 emitServiceReq(SRFlags::JmpInsteadOfRet, REQ_POST_INTERP_RET);
2233 return stub;
2237 * Same as above, except has different logic for fetching the AR we are trying
2238 * to return from, because generators have ARs in different places.
2241 TranslatorX64::emitRetFromInterpretedGeneratorFrame() {
2242 // We have to get the Continuation object from the current AR's $this, then
2243 // find where its embedded AR is.
2244 moveToAlign(astubs);
2245 TCA stub = astubs.frontier();
2247 PhysReg rContAR = serviceReqArgRegs[0];
2248 astubs. loadq (rVmFp[AROFF(m_this)], rContAR);
2249 astubs. loadq (rContAR[CONTOFF(m_arPtr)], rContAR);
2250 astubs. movq (rVmFp, serviceReqArgRegs[1]);
2251 emitServiceReq(SRFlags::JmpInsteadOfRet, REQ_POST_INTERP_RET);
2252 return stub;
2255 class FreeRequestStubTrigger : public Treadmill::WorkItem {
2256 TCA m_stub;
2257 public:
2258 explicit FreeRequestStubTrigger(TCA stub) : m_stub(stub) {
2259 TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub);
2261 virtual void operator()() {
2262 TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub);
2263 if (TranslatorX64::Get()->freeRequestStub(m_stub) != true) {
2264 // If we can't free the stub, enqueue again to retry.
2265 TRACE(3, "FreeStubTrigger: write lease failed, requeueing %p\n", m_stub);
2266 enqueue(new FreeRequestStubTrigger(m_stub));
2271 #ifdef DEBUG
2273 struct DepthGuard {
2274 static __thread int m_depth;
2275 DepthGuard() { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
2276 ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
2278 bool depthOne() const { return m_depth == 1; }
2280 __thread int DepthGuard::m_depth;
2282 #else
2284 struct DepthGuard { bool depthOne() const { return false; } };
2286 #endif
2289 * enterTCHelper does not save callee-saved registers except %rbp. This means
2290 * when we call it from C++, we have to tell gcc to clobber all the other
2291 * callee-saved registers.
2293 #if defined(__x86_64__)
2294 # define CALLEE_SAVED_BARRIER() \
2295 asm volatile("" : : : "rbx", "r12", "r13", "r14", "r15")
2296 #elif defined(__AARCH64EL__)
2297 # define CALLEE_SAVED_BARRIER() \
2298 asm volatile("" : : : "x19", "x20", "x21", "x22", "x23", "x24", "x25", \
2299 "x26", "x27", "x28")
2300 #else
2301 # error What are the callee-saved registers on your system?
2302 #endif
2305 * enterTCHelper is a handwritten assembly function that transfers control in
2306 * and out of the TC.
2308 static_assert(rVmSp == rbx &&
2309 rVmFp == rbp &&
2310 rVmTl == r12 &&
2311 rStashedAR == r15,
2312 "__enterTCHelper needs to be modified to use the correct ABI");
2313 static_assert(kReservedRSPScratchSpace == 0x280,
2314 "enterTCHelper needs to be updated for changes to "
2315 "kReservedRSPScratchSpace");
2316 static_assert(REQ_BIND_CALL == 0x1,
2317 "Update assembly test for REQ_BIND_CALL in __enterTCHelper");
2318 extern "C" void enterTCHelper(Cell* vm_sp,
2319 Cell* vm_fp,
2320 TCA start,
2321 TReqInfo* infoPtr,
2322 ActRec* firstAR,
2323 void* targetCacheBase);
2326 struct TReqInfo {
2327 uintptr_t requestNum;
2328 uintptr_t args[5];
2330 // Some TC registers need to be preserved across service requests.
2331 uintptr_t saved_rStashedAr;
2333 // Stub addresses are passed back to allow us to recycle used stubs.
2334 TCA stubAddr;
2338 void
2339 TranslatorX64::enterTC(TCA start, void* data) {
2340 using namespace TargetCache;
2342 if (debug) {
2343 fflush(stdout);
2344 fflush(stderr);
2346 DepthGuard d;
2347 TReqInfo info;
2348 SrcKey sk;
2350 if (LIKELY(start != nullptr)) {
2351 info.requestNum = data ? REQ_BIND_CALL : -1;
2352 info.saved_rStashedAr = (uintptr_t)data;
2353 } else {
2354 info.requestNum = -1;
2355 info.saved_rStashedAr = 0;
2356 sk = *(SrcKey*)data;
2357 start = getTranslation(TranslArgs(sk, true));
2359 for (;;) {
2360 assert(sizeof(Cell) == 16);
2361 assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
2362 assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
2364 s_writeLease.gremlinUnlock();
2365 // Keep dispatching until we end up somewhere the translator
2366 // recognizes, or we luck out and the leaseholder exits.
2367 while (!start) {
2368 TRACE(2, "enterTC forwarding BB to interpreter\n");
2369 g_vmContext->m_pc = sk.unit()->at(sk.offset());
2370 INC_TPC(interp_bb);
2371 g_vmContext->dispatchBB();
2372 PC newPc = g_vmContext->getPC();
2373 if (!newPc) { g_vmContext->m_fp = 0; return; }
2374 sk = SrcKey(liveFunc(), newPc);
2375 start = getTranslation(TranslArgs(sk, true));
2377 assert(start == (TCA)HPHP::Transl::funcBodyHelperThunk ||
2378 isValidCodeAddress(start) ||
2379 (start == (TCA)HPHP::Transl::fcallHelperThunk &&
2380 info.saved_rStashedAr == (uintptr_t)data));
2381 assert(!s_writeLease.amOwner());
2382 const Func* func = (vmfp() ? (ActRec*)vmfp() : (ActRec*)data)->m_func;
2383 func->validate();
2384 INC_TPC(enter_tc);
2386 TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
2387 vmfp(), func->name()->data(), vmsp());
2388 tl_regState = VMRegState::DIRTY;
2390 // We have to force C++ to spill anything that might be in a callee-saved
2391 // register (aside from rbp). enterTCHelper does not save them.
2392 CALLEE_SAVED_BARRIER();
2393 enterTCHelper(vmsp(), vmfp(), start, &info, vmFirstAR(),
2394 tl_targetCaches);
2395 CALLEE_SAVED_BARRIER();
2396 assert(g_vmContext->m_stack.isValidAddress((uintptr_t)vmsp()));
2398 tl_regState = VMRegState::CLEAN; // Careful: pc isn't sync'ed yet.
2399 TRACE(1, "enterTC: %p fp%p sp%p } return\n", start,
2400 vmfp(), vmsp());
2402 if (debug) {
2403 // Debugging code: cede the write lease half the time.
2404 if (RuntimeOption::EvalJitStressLease) {
2405 if (d.depthOne() == 1 && (rand() % 2) == 0) {
2406 s_writeLease.gremlinLock();
2409 // Ensure that each case either returns, or drives start to a valid
2410 // value.
2411 start = TCA(0xbee5face);
2414 TRACE(2, "enterTC: request(%s) args: %" PRIxPTR " %" PRIxPTR " %"
2415 PRIxPTR " %" PRIxPTR " %" PRIxPTR "\n",
2416 reqName(info.requestNum),
2417 info.args[0], info.args[1], info.args[2], info.args[3],
2418 info.args[4]);
2420 if (LIKELY(info.requestNum == REQ_EXIT)) {
2421 vmfp() = nullptr;
2422 return;
2424 if (!handleServiceRequest(info, start, sk)) return;
2429 * The contract is that each case will set sk to the place where
2430 * execution should resume, and optionally set start to the hardware
2431 * translation of the resumption point (or otherwise set it to null).
2432 * Returns false if we need to halt this nesting of the VM.
2434 * start and sk might be subtly different; i.e., there are cases where
2435 * start != NULL && start != getTranslation(sk). For instance,
2436 * REQ_BIND_CALL has not finished executing the OpCall when it gets
2437 * here, and has even done some work on its behalf. sk == OpFCall,
2438 * while start == the point in the TC that's "half-way through" the
2439 * Call instruction. If we punt to the interpreter, the interpreter
2440 * will redo some of the work that the translator has already done.
2442 bool TranslatorX64::handleServiceRequest(TReqInfo& info,
2443 TCA& start,
2444 SrcKey& sk) {
2445 const uintptr_t& requestNum = info.requestNum;
2446 auto* const args = info.args;
2447 assert(requestNum != REQ_EXIT);
2448 INC_TPC(service_req);
2450 bool smashed = false;
2451 switch (requestNum) {
2452 case REQ_BIND_CALL: {
2453 ReqBindCall* req = (ReqBindCall*)args[0];
2454 ActRec* calleeFrame = (ActRec*)args[1];
2455 TCA toSmash = req->m_toSmash;
2456 Func *func = const_cast<Func*>(calleeFrame->m_func);
2457 int nArgs = req->m_nArgs;
2458 bool isImmutable = req->m_isImmutable;
2459 TCA dest = tx64->funcPrologue(func, nArgs);
2460 TRACE(2, "enterTC: bindCall %s -> %p\n", func->name()->data(), dest);
2461 if (!isImmutable) {
2462 // We dont know we're calling the right function, so adjust
2463 // dest to point to the dynamic check of ar->m_func.
2464 dest = funcPrologueToGuard(dest, func);
2465 } else {
2466 TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
2467 func->fullName()->data(), dest);
2469 LeaseHolder writer(s_writeLease, LeaseAcquire::NO_ACQUIRE);
2470 if (dest && writer.acquire()) {
2471 TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
2472 smashCall(tx64->getAsmFor(toSmash), toSmash, dest);
2473 smashed = true;
2474 // sk: stale, but doesn't matter since we have a valid dest TCA.
2475 } else {
2476 // We need translator help; we're not at the callee yet, so
2477 // roll back. The prelude has done some work already, but it
2478 // should be safe to redo.
2479 TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
2480 toSmash, dest);
2481 sk = req->m_sourceInstr;
2483 start = dest;
2484 if (!start) {
2485 // EnterTCHelper pushes the return ip onto the stack when the
2486 // requestNum is REQ_BIND_CALL, but if start is NULL, it will
2487 // interpret in doFCall, so we clear out the requestNum in this
2488 // case to prevent enterTCHelper from pushing the return ip
2489 // onto the stack.
2490 info.requestNum = ~REQ_BIND_CALL;
2492 } break;
2494 case REQ_BIND_SIDE_EXIT:
2495 case REQ_BIND_JMP:
2496 case REQ_BIND_JCC:
2497 case REQ_BIND_JMP_NO_IR:
2498 case REQ_BIND_ADDR:
2500 TCA toSmash = (TCA)args[0];
2501 Offset off = args[1];
2502 sk = SrcKey(liveFunc(), off);
2503 if (requestNum == REQ_BIND_SIDE_EXIT) {
2504 SKTRACE(3, sk, "side exit taken!\n");
2506 start = bindJmp(toSmash, sk, (ServiceRequest)requestNum, smashed);
2507 } break;
2509 case REQ_BIND_JMPCC_FIRST: {
2510 TCA toSmash = (TCA)args[0];
2511 Offset offTaken = (Offset)args[1];
2512 Offset offNotTaken = (Offset)args[2];
2513 ConditionCode cc = ConditionCode(args[3]);
2514 bool taken = int64_t(args[4]) & 1;
2515 start = bindJmpccFirst(toSmash, offTaken, offNotTaken,
2516 taken, cc, smashed);
2517 // SrcKey: we basically need to emulate the fail
2518 sk = SrcKey(liveFunc(), taken ? offTaken : offNotTaken);
2519 } break;
2521 case REQ_BIND_JMPCC_SECOND: {
2522 TCA toSmash = (TCA)args[0];
2523 Offset off = (Offset)args[1];
2524 ConditionCode cc = ConditionCode(args[2]);
2525 start = bindJmpccSecond(toSmash, off, cc, smashed);
2526 sk = SrcKey(liveFunc(), off);
2527 } break;
2529 case REQ_RETRANSLATE_NO_IR: {
2530 TCA toSmash = (TCA)args[0];
2531 sk = SrcKey(liveFunc(), (Offset)args[1]);
2532 start = retranslateAndPatchNoIR(sk, true, toSmash);
2533 SKTRACE(1, sk, "retranslated (without IR) @%p\n", start);
2534 } break;
2536 case REQ_RETRANSLATE_OPT: {
2537 FuncId funcId = (FuncId) args[0];
2538 Offset offset = (Offset) args[1];
2539 TransID transId = (TransID)args[2];
2540 sk = SrcKey(funcId, offset);
2541 start = retranslateOpt(transId, false);
2542 SKTRACE(2, sk, "retranslated-OPT: transId = %d start: @%p\n", transId,
2543 start);
2544 break;
2547 case REQ_RETRANSLATE: {
2548 INC_TPC(retranslate);
2549 sk = SrcKey(liveFunc(), (Offset)args[0]);
2550 start = retranslate(TranslArgs(sk, true));
2551 SKTRACE(2, sk, "retranslated @%p\n", start);
2552 } break;
2554 case REQ_INTERPRET: {
2555 Offset off = args[0];
2556 int numInstrs = args[1];
2557 g_vmContext->m_pc = liveUnit()->at(off);
2559 * We know the compilation unit has not changed; basic blocks do
2560 * not span files. I claim even exceptions do not violate this
2561 * axiom.
2563 assert(numInstrs >= 0);
2564 SKTRACE(5, SrcKey(liveFunc(), off), "interp: enter\n");
2565 if (numInstrs) {
2566 s_perfCounters[tpc_interp_instr] += numInstrs;
2567 g_vmContext->dispatchN(numInstrs);
2568 } else {
2569 // numInstrs == 0 means it wants to dispatch until BB ends
2570 INC_TPC(interp_bb);
2571 g_vmContext->dispatchBB();
2573 PC newPc = g_vmContext->getPC();
2574 if (!newPc) { g_vmContext->m_fp = 0; return false; }
2575 SrcKey newSk(liveFunc(), newPc);
2576 SKTRACE(5, newSk, "interp: exit\n");
2577 sk = newSk;
2578 start = getTranslation(TranslArgs(newSk, true));
2579 } break;
2581 case REQ_POST_INTERP_RET: {
2582 // This is only responsible for the control-flow aspect of the Ret:
2583 // getting to the destination's translation, if any.
2584 ActRec* ar = (ActRec*)args[0];
2585 ActRec* caller = (ActRec*)args[1];
2586 assert((Cell*) caller == vmfp());
2587 Unit* destUnit = caller->m_func->unit();
2588 // Set PC so logging code in getTranslation doesn't get confused.
2589 vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
2590 SrcKey dest(caller->m_func, vmpc());
2591 sk = dest;
2592 start = getTranslation(TranslArgs(dest, true));
2593 TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
2594 ar->m_func->fullName()->data(),
2595 caller->m_func->fullName()->data());
2596 } break;
2598 case REQ_RESUME: {
2599 if (UNLIKELY(vmpc() == 0)) {
2600 g_vmContext->m_fp = 0;
2601 return false;
2603 SrcKey dest(liveFunc(), vmpc());
2604 sk = dest;
2605 start = getTranslation(TranslArgs(dest, true));
2606 } break;
2608 case REQ_STACK_OVERFLOW: {
2610 * we need to construct the pc of the fcall from the return
2611 * address (which will be after the fcall). Because fcall is
2612 * a variable length instruction, and because we sometimes
2613 * delete instructions from the instruction stream, we
2614 * need to use fpi regions to find the fcall.
2616 const FPIEnt* fe = liveFunc()->findPrecedingFPI(
2617 liveUnit()->offsetOf(vmpc()));
2618 vmpc() = liveUnit()->at(fe->m_fcallOff);
2619 assert(isFCallStar(toOp(*vmpc())));
2620 raise_error("Stack overflow");
2621 NOT_REACHED();
2625 if (smashed && info.stubAddr) {
2626 Treadmill::WorkItem::enqueue(new FreeRequestStubTrigger(info.stubAddr));
2629 return true;
2633 * Support for the stub freelist.
2635 TCA FreeStubList::maybePop() {
2636 StubNode* ret = m_list;
2637 if (ret) {
2638 TRACE(1, "alloc stub %p\n", ret);
2639 m_list = ret->m_next;
2640 ret->m_freed = ~kStubFree;
2642 return (TCA)ret;
2645 void FreeStubList::push(TCA stub) {
2647 * A freed stub may be released by Treadmill more than once if multiple
2648 * threads execute the service request before it is freed. We detect
2649 * duplicates by marking freed stubs
2651 StubNode* n = (StubNode *)stub;
2652 if (n->m_freed == kStubFree) {
2653 TRACE(1, "already freed stub %p\n", stub);
2654 return;
2656 n->m_freed = kStubFree;
2657 n->m_next = m_list;
2658 TRACE(1, "free stub %p (-> %p)\n", stub, m_list);
2659 m_list = n;
2662 bool
2663 TranslatorX64::freeRequestStub(TCA stub) {
2664 LeaseHolder writer(s_writeLease);
2666 * If we can't acquire the write lock, the caller
2667 * (FreeRequestStubTrigger) retries
2669 if (!writer) return false;
2670 assert(astubs.contains(stub));
2671 m_freeStubs.push(stub);
2672 return true;
2675 TCA TranslatorX64::getFreeStub() {
2676 TCA ret = m_freeStubs.maybePop();
2677 if (ret) {
2678 Stats::inc(Stats::Astubs_Reused);
2679 assert(m_freeStubs.m_list == nullptr ||
2680 astubs.contains(TCA(m_freeStubs.m_list)));
2681 TRACE(1, "recycle stub %p\n", ret);
2682 } else {
2683 ret = astubs.frontier();
2684 Stats::inc(Stats::Astubs_New);
2685 TRACE(1, "alloc new stub %p\n", ret);
2687 return ret;
2691 * emitServiceReqWork --
2693 * Call a translator service co-routine. The code emitted here
2694 * reenters the enterTC loop, invoking the requested service. Control
2695 * will be returned non-locally to the next logical instruction in
2696 * the TC.
2698 * Return value is a destination; we emit the bulky service
2699 * request code into astubs.
2701 * Returns a continuation that will run after the arguments have been
2702 * emitted. This is gross, but is a partial workaround for the inability
2703 * to capture argument packs in the version of gcc we're using.
2706 TranslatorX64::emitServiceReqWork(SRFlags flags, ServiceRequest req,
2707 const TranslatorX64::ServiceReqArgVec& argv) {
2709 * Some requests can be recycled after they've fired once. Since this is
2710 * a special situation, we enumerate them here rather than forcing every
2711 * call site to choose.
2713 auto requestIsEphemeral = [](ServiceRequest req) {
2714 switch (req) {
2715 case REQ_BIND_JMPCC_SECOND:
2716 case REQ_BIND_JMPCC_FIRST:
2717 case REQ_BIND_JMP:
2718 return true;
2719 default:
2720 return false;
2723 static const std::unordered_set<ServiceRequest> ephemeralReqs {
2724 REQ_BIND_JMPCC_SECOND,
2725 REQ_BIND_JMPCC_FIRST,
2726 REQ_BIND_JMP
2729 const bool emitInA = flags & SRFlags::EmitInA;
2730 const bool align = (flags & SRFlags::Align) && !emitInA;
2731 const bool persist = !requestIsEphemeral(req);
2732 Asm& as = emitInA ? a : astubs;
2733 TCA start = emitInA ? a.frontier() :
2734 persist ? astubs.frontier() :
2735 getFreeStub();
2737 * Remember previous state of the code cache.
2739 boost::optional<CodeCursor> maybeCc = boost::none;
2740 if (start != as.frontier()) {
2741 maybeCc = boost::in_place<CodeCursor>(boost::ref(as), start);
2744 /* max space for moving to align, saving VM regs plus emitting args */
2745 static const int
2746 kVMRegSpace = 0x14,
2747 kMovSize = 0xa,
2748 kNumServiceRegs = sizeof(serviceReqArgRegs) / sizeof(PhysReg),
2749 kMaxStubSpace = kJmpTargetAlign - 1 + kVMRegSpace +
2750 kNumServiceRegs * kMovSize;
2751 if (align) {
2752 moveToAlign(as);
2754 TCA retval = as.frontier();
2755 TRACE(3, "Emit Service Req @%p %s(", start, reqName(req));
2757 * Move args into appropriate regs. Eager VMReg save may bash flags,
2758 * so set the CondCode arguments first.
2760 for (int i = 0; i < argv.size(); ++i) {
2761 assert(i < kNumServiceReqArgRegs);
2762 auto reg = serviceReqArgRegs[i];
2763 const auto& argInfo = argv[i];
2764 switch(argv[i].m_kind) {
2765 case ServiceReqArgInfo::Immediate: {
2766 TRACE(3, "%" PRIx64 ", ", argInfo.m_imm);
2767 as. emitImmReg(argInfo.m_imm, reg);
2768 } break;
2769 case ServiceReqArgInfo::CondCode: {
2770 // Already set before VM reg save.
2771 DEBUG_ONLY TCA start = as.frontier();
2772 as. setcc(argInfo.m_cc, rbyte(reg));
2773 assert(start - as.frontier() <= kMovSize);
2774 TRACE(3, "cc(%x), ", argInfo.m_cc);
2775 } break;
2776 default: not_reached();
2779 emitEagerVMRegSave(as, SaveFP);
2780 if (persist) {
2781 as. emitImmReg(0, rAsm);
2782 } else {
2783 as. emitImmReg((uint64_t)start, rAsm);
2785 TRACE(3, ")\n");
2786 as. emitImmReg(req, rdi);
2789 * Weird hand-shaking with enterTC: reverse-call a service routine.
2791 * In the case of some special stubs (m_callToExit, m_retHelper), we
2792 * have already unbalanced the return stack by doing a ret to
2793 * something other than enterTCHelper. In that case
2794 * SRJmpInsteadOfRet indicates to fake the return.
2796 if (flags & SRFlags::JmpInsteadOfRet) {
2797 as. pop(rax);
2798 as. jmp(rax);
2799 } else {
2800 as. ret();
2802 recordBCInstr(OpServiceRequest, as, retval);
2803 translator_not_reached(as);
2804 if (!persist) {
2806 * Recycled stubs need to be uniformly sized. Make space for the
2807 * maximal possible service requests.
2809 assert(as.frontier() - start <= kMaxStubSpace);
2810 as.emitNop(start + kMaxStubSpace - as.frontier());
2811 assert(as.frontier() - start == kMaxStubSpace);
2813 return retval;
2817 TranslatorX64::emitTransCounterInc(X64Assembler& a) {
2818 TCA start = a.frontier();
2819 if (!isTransDBEnabled()) return start;
2821 a. movq (getTransCounterAddr(), rAsm);
2822 a. lock ();
2823 a. incq (*rAsm);
2825 return start;
2828 void
2829 TranslatorX64::getInputsIntoXMMRegs(const NormalizedInstruction& ni,
2830 PhysReg lr, PhysReg rr,
2831 RegXMM lxmm,
2832 RegXMM rxmm) {
2833 const DynLocation& l = *ni.inputs[0];
2834 const DynLocation& r = *ni.inputs[1];
2835 // Get the values into their appropriate xmm locations
2836 auto intoXmm = [&](const DynLocation& l, PhysReg src, RegXMM xmm) {
2837 if (l.isInt()) {
2838 // cvtsi2sd doesn't modify the high bits of its target, which can
2839 // cause false dependencies to prevent register renaming from kicking
2840 // in. Break the dependency chain by zeroing out the destination reg.
2841 a. pxor_xmm_xmm(xmm, xmm);
2842 a. cvtsi2sd_reg64_xmm(src, xmm);
2843 } else {
2844 a. mov_reg64_xmm(src, xmm);
2847 intoXmm(l, lr, lxmm);
2848 intoXmm(r, rr, rxmm);
2851 #define O(opcode, imm, pusph, pop, flags) \
2853 * The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
2854 * calls into the interpreter, and then return a pointer to the
2855 * current ExecutionContext.
2856 */ \
2857 VMExecutionContext* \
2858 interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) { \
2859 interp_set_regs(ar, sp, pcOff); \
2860 SKTRACE(5, SrcKey(liveFunc(), vmpc()), "%40s %p %p\n", \
2861 "interpOne" #opcode " before (fp,sp)", \
2862 vmfp(), vmsp()); \
2863 assert(toOp(*vmpc()) == Op::opcode); \
2864 VMExecutionContext* ec = g_vmContext; \
2865 Stats::inc(Stats::Instr_InterpOne ## opcode); \
2866 if (Trace::moduleEnabled(Trace::interpOne, 1)) { \
2867 static const StringData* cat = StringData::GetStaticString("interpOne"); \
2868 static const StringData* name = StringData::GetStaticString(#opcode); \
2869 Stats::incStatGrouped(cat, name, 1); \
2871 INC_TPC(interp_one) \
2872 /* Correct for over-counting in TC-stats. */ \
2873 Stats::inc(Stats::Instr_TC, -1); \
2874 ec->op##opcode(); \
2876 * Only set regstate back to dirty if an exception is not
2877 * propagating. If an exception is throwing, regstate for this call
2878 * is actually still correct, and we don't have information in the
2879 * fixup map for interpOne calls anyway.
2880 */ \
2881 tl_regState = VMRegState::DIRTY; \
2882 return ec; \
2885 OPCODES
2886 #undef O
2888 void* interpOneEntryPoints[] = {
2889 #define O(opcode, imm, pusph, pop, flags) \
2890 (void*)(interpOne ## opcode),
2891 OPCODES
2892 #undef O
2895 void TranslatorX64::fixupWork(VMExecutionContext* ec,
2896 ActRec* rbp) const {
2897 assert(RuntimeOption::EvalJit);
2899 TRACE_SET_MOD(fixup);
2900 TRACE(1, "fixup(begin):\n");
2902 auto isVMFrame = [] (ActRec* ar) {
2903 assert(ar);
2904 bool ret = uintptr_t(ar) - Util::s_stackLimit >= Util::s_stackSize;
2905 assert(!ret ||
2906 (ar >= g_vmContext->m_stack.getStackLowAddress() &&
2907 ar < g_vmContext->m_stack.getStackHighAddress()) ||
2908 ar->m_func->isGenerator());
2909 return ret;
2912 auto* nextRbp = rbp;
2913 rbp = 0;
2914 do {
2915 auto* prevRbp = rbp;
2916 rbp = nextRbp;
2917 assert(rbp && "Missing fixup for native call");
2918 nextRbp = reinterpret_cast<ActRec*>(rbp->m_savedRbp);
2919 TRACE(2, "considering frame %p, %p\n", rbp, (void*)rbp->m_savedRip);
2921 if (isVMFrame(nextRbp)) {
2922 TRACE(2, "fixup checking vm frame %s\n",
2923 nextRbp->m_func->name()->data());
2924 FixupMap::VMRegs regs;
2925 if (m_fixupMap.getFrameRegs(rbp, prevRbp, &regs)) {
2926 TRACE(2, "fixup(end): func %s fp %p sp %p pc %p\n",
2927 regs.m_fp->m_func->name()->data(),
2928 regs.m_fp, regs.m_sp, regs.m_pc);
2929 ec->m_fp = const_cast<ActRec*>(regs.m_fp);
2930 ec->m_pc = regs.m_pc;
2931 vmsp() = regs.m_sp;
2932 return;
2935 } while (rbp && rbp != nextRbp);
2937 // OK, we've exhausted the entire actRec chain. We are only
2938 // invoking ::fixup() from contexts that were known to be called out
2939 // of the TC, so this cannot happen.
2940 NOT_REACHED();
2943 void TranslatorX64::fixup(VMExecutionContext* ec) const {
2944 // Start looking for fixup entries at the current (C++) frame. This
2945 // will walk the frames upward until we find a TC frame.
2946 DECLARE_FRAME_POINTER(framePtr);
2947 fixupWork(ec, framePtr);
2950 TCA TranslatorX64::getTranslatedCaller() const {
2951 DECLARE_FRAME_POINTER(fp);
2952 ActRec* framePtr = fp; // can't directly mutate the register-mapped one
2953 for (; framePtr; framePtr = (ActRec*)framePtr->m_savedRbp) {
2954 TCA rip = (TCA)framePtr->m_savedRip;
2955 if (isValidCodeAddress(rip)) {
2956 return rip;
2959 return nullptr;
2962 void
2963 TranslatorX64::syncWork() {
2964 assert(tl_regState == VMRegState::DIRTY);
2965 fixup(g_vmContext);
2966 tl_regState = VMRegState::CLEAN;
2967 Stats::inc(Stats::TC_Sync);
2970 // could be static but used in hopt/codegen.cpp
2971 void raiseUndefVariable(StringData* nm) {
2972 raise_notice(Strings::UNDEFINED_VARIABLE, nm->data());
2973 // FIXME: do we need to decref the string if an exception is propagating?
2974 decRefStr(nm);
2977 // This intentionally excludes Int/Int, which is handled separately
2978 // from cases involving the FPU.
2979 bool
2980 mathEquivTypes(RuntimeType lt, RuntimeType rt) {
2981 return (lt.isDouble() && rt.isDouble()) ||
2982 (lt.isInt() && rt.isDouble()) ||
2983 (lt.isDouble() && rt.isInt());
2986 /* This is somewhat hacky. It decides which helpers/builtins should
2987 * use eager vmreganchor based on profile information. Using eager
2988 * vmreganchor for all helper calls is a perf regression. */
2989 bool TranslatorX64::eagerRecord(const Func* func) {
2990 const char* list[] = {
2991 "func_get_args",
2992 "get_called_class",
2993 "func_num_args",
2994 "array_filter",
2995 "array_map",
2998 for (int i = 0; i < sizeof(list)/sizeof(list[0]); i++) {
2999 if (!strcmp(func->name()->data(), list[i])) {
3000 return true;
3003 if (func->cls() && !strcmp(func->cls()->name()->data(), "WaitHandle")
3004 && !strcmp(func->name()->data(), "join")) {
3005 return true;
3007 return false;
3010 ObjectData*
3011 HOT_FUNC_VM
3012 newInstanceHelper(Class* cls, int numArgs, ActRec* ar, ActRec* prevAr) {
3013 const Func* f = cls->getCtor();
3014 ObjectData* ret = nullptr;
3015 if (UNLIKELY(!(f->attrs() & AttrPublic))) {
3016 VMRegAnchor _;
3017 UNUSED MethodLookup::LookupResult res =
3018 g_vmContext->lookupCtorMethod(f, cls, true /*raise*/);
3019 assert(res == MethodLookup::LookupResult::MethodFoundWithThis);
3021 // Don't start pushing the AR until newInstance returns; it may reenter.
3022 ret = newInstance(cls);
3023 f->validate();
3024 ar->m_func = f;
3025 ar->initNumArgs(numArgs, true /*fromCtor*/);
3026 // Count stack and this.
3027 ret->incRefCount();
3028 ret->incRefCount();
3029 ar->setThis(ret);
3030 ar->setVarEnv(nullptr);
3031 arSetSfp(ar, prevAr);
3032 TRACE(2, "newInstanceHelper: AR %p: f %p, savedRbp %#" PRIx64
3033 ", savedRip %#" PRIx64 ", this %p\n",
3034 ar, ar->m_func, ar->m_savedRbp, ar->m_savedRip, ar->m_this);
3035 return ret;
3039 TranslatorX64::emitNativeTrampoline(TCA helperAddr) {
3040 auto& a = atrampolines;
3042 if (!a.canEmit(m_trampolineSize)) {
3043 // not enough space to emit a trampoline, so just return the
3044 // helper address and emitCall will the emit the right sequence
3045 // to call it indirectly
3046 TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
3047 always_assert(false);
3048 return helperAddr;
3050 uint32_t index = m_numNativeTrampolines++;
3051 TCA trampAddr = a.frontier();
3052 if (Stats::enabled()) {
3053 Stats::emitInc(a, &Stats::tl_helper_counters[0], index);
3054 char* name = Util::getNativeFunctionName(helperAddr);
3055 const size_t limit = 50;
3056 if (strlen(name) > limit) {
3057 name[limit] = '\0';
3059 Stats::helperNames[index] = name;
3063 * For stubs that take arguments in rAsm, we need to make sure
3064 * we're not damaging its contents here. (If !jmpDeltaFits, the jmp
3065 * opcode will need to movabs the address into rAsm before
3066 * jumping.)
3068 auto DEBUG_ONLY stubUsingRScratch = [&](TCA tca) {
3069 return tca == m_dtorGenericStubRegs;
3072 assert(IMPLIES(stubUsingRScratch(helperAddr), a.jmpDeltaFits(helperAddr)));
3073 a. jmp (helperAddr);
3074 a. ud2 ();
3076 trampolineMap[helperAddr] = trampAddr;
3077 if (m_trampolineSize == 0) {
3078 m_trampolineSize = a.frontier() - trampAddr;
3079 assert(m_trampolineSize >= kMinPerTrampolineSize);
3081 recordBCInstr(OpNativeTrampoline, a, trampAddr);
3082 return trampAddr;
3086 TranslatorX64::getNativeTrampoline(TCA helperAddr) {
3087 if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
3088 return helperAddr;
3090 TCA trampAddr = (TCA)mapGet<PointerMap>(trampolineMap, helperAddr);
3091 if (trampAddr) {
3092 return trampAddr;
3094 return emitNativeTrampoline(helperAddr);
3097 static void defClsHelper(PreClass *preClass) {
3098 assert(tl_regState == VMRegState::DIRTY);
3099 tl_regState = VMRegState::CLEAN;
3100 Unit::defClass(preClass);
3103 * m_defClsHelper sync'd the registers for us already. This means
3104 * if an exception propagates we want to leave things as
3105 * VMRegState::CLEAN, since we're still in sync. Only set it to dirty
3106 * if we are actually returning to run in the TC again.
3108 tl_regState = VMRegState::DIRTY;
3111 template <typename T>
3112 static int64_t switchBoundsCheck(T v, int64_t base, int64_t nTargets) {
3113 // I'm relying on gcc to be smart enough to optimize away the next
3114 // two lines when T is int64.
3115 if (int64_t(v) == v) {
3116 int64_t ival = v;
3117 if (ival >= base && ival < (base + nTargets)) {
3118 return ival - base;
3121 return nTargets + 1;
3124 int64_t switchDoubleHelper(int64_t val, int64_t base, int64_t nTargets) {
3125 union {
3126 int64_t intbits;
3127 double dblval;
3128 } u;
3129 u.intbits = val;
3130 return switchBoundsCheck(u.dblval, base, nTargets);
3133 int64_t switchStringHelper(StringData* s, int64_t base, int64_t nTargets) {
3134 int64_t ival;
3135 double dval;
3136 switch (s->isNumericWithVal(ival, dval, 1)) {
3137 case KindOfNull:
3138 ival = switchBoundsCheck(0, base, nTargets);
3139 break;
3141 case KindOfDouble:
3142 ival = switchBoundsCheck(dval, base, nTargets);
3143 break;
3145 case KindOfInt64:
3146 ival = switchBoundsCheck(ival, base, nTargets);
3147 break;
3149 default:
3150 not_reached();
3152 decRefStr(s);
3153 return ival;
3156 int64_t switchObjHelper(ObjectData* o, int64_t base, int64_t nTargets) {
3157 int64_t ival = o->o_toInt64();
3158 decRefObj(o);
3159 return switchBoundsCheck(ival, base, nTargets);
3162 bool
3163 TranslatorX64::reachedTranslationLimit(SrcKey sk,
3164 const SrcRec& srcRec) const {
3165 if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) {
3166 INC_TPC(max_trans);
3167 if (debug && Trace::moduleEnabled(Trace::tx64, 2)) {
3168 const vector<TCA>& tns = srcRec.translations();
3169 TRACE(1, "Too many (%zd) translations: %s, BC offset %d\n",
3170 tns.size(), sk.unit()->filepath()->data(),
3171 sk.offset());
3172 SKTRACE(2, sk, "{\n");
3173 TCA topTrans = srcRec.getTopTranslation();
3174 for (size_t i = 0; i < tns.size(); ++i) {
3175 const TransRec* rec = getTransRec(tns[i]);
3176 assert(rec);
3177 SKTRACE(2, sk, "%zd %p\n", i, tns[i]);
3178 if (tns[i] == topTrans) {
3179 SKTRACE(2, sk, "%zd: *Top*\n", i);
3181 if (rec->kind == TransAnchor) {
3182 SKTRACE(2, sk, "%zd: Anchor\n", i);
3183 } else {
3184 SKTRACE(2, sk, "%zd: guards {\n", i);
3185 for (unsigned j = 0; j < rec->dependencies.size(); ++j) {
3186 TRACE(2, rec->dependencies[j]);
3188 SKTRACE(2, sk, "%zd } guards\n", i);
3191 SKTRACE(2, sk, "} /* Too many translations */\n");
3193 return true;
3196 return false;
3199 void
3200 TranslatorX64::emitGuardChecks(X64Assembler& a,
3201 SrcKey sk,
3202 const ChangeMap& dependencies,
3203 const RefDeps& refDeps,
3204 SrcRec& fail) {
3205 if (Trace::moduleEnabled(Trace::stats, 2)) {
3206 Stats::emitInc(a, Stats::TraceletGuard_enter);
3209 emitRB(a, RBTypeTraceletGuards, sk);
3210 for (auto const& dep : dependencies) {
3211 m_irTrans->checkType(dep.first, dep.second->rtt);
3214 checkRefs(a, sk, refDeps, fail);
3216 if (Trace::moduleEnabled(Trace::stats, 2)) {
3217 Stats::emitInc(a, Stats::TraceletGuard_execute);
3222 void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
3223 if (!debug) return;
3225 SrcKey sk = t.m_sk;
3226 DEBUG_ONLY auto unit = sk.unit();
3228 TRACE(3, "----------------------------------------------\n");
3229 TRACE(3, " Translating from file %s:%d %s at %p:\n",
3230 unit->filepath()->data(),
3231 unit->getLineNumber(sk.offset()),
3232 sk.func()->name()->data(),
3233 postGuards);
3234 TRACE(3, " preconds:\n");
3235 TRACE(3, " types:\n");
3236 for (DepMap::const_iterator i = t.m_dependencies.begin();
3237 i != t.m_dependencies.end(); ++i) {
3238 TRACE(3, " %-5s\n", i->second->pretty().c_str());
3240 if (t.m_refDeps.size() != 0) {
3241 TRACE(3, " refs:\n");
3242 for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
3243 i != t.m_refDeps.m_arMap.end();
3244 ++i) {
3245 TRACE(3, " (ActRec %" PRId64 " : %-5s)\n", i->first,
3246 i->second.pretty().c_str());
3249 TRACE(3, " postconds:\n");
3250 for (ChangeMap::const_iterator i = t.m_changes.begin();
3251 i != t.m_changes.end(); ++i) {
3252 TRACE(3, " %-5s\n", i->second->pretty().c_str());
3254 for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
3255 TRACE(3, " %6d: %s\n", ni->source.offset(),
3256 instrToString((Op*)ni->pc()).c_str());
3257 if (ni->breaksTracelet) break;
3259 TRACE(3, "----------------------------------------------\n");
3260 if (Trace::moduleEnabled(Trace::tx64, 5)) {
3261 // prettyStack() expects to use vmpc(). Leave it in the state we
3262 // found it since this code is debug-only, and we don't want behavior
3263 // to vary across the optimized/debug builds.
3264 PC oldPC = vmpc();
3265 vmpc() = unit->at(sk.offset());
3266 TRACE(3, g_vmContext->prettyStack(string(" tx64 ")));
3267 vmpc() = oldPC;
3268 TRACE(3, "----------------------------------------------\n");
3272 void
3273 TranslatorX64::translateWork(const TranslArgs& args) {
3274 auto sk = args.m_sk;
3275 std::unique_ptr<Tracelet> tp = analyze(sk);
3276 Tracelet& t = *tp;
3278 SKTRACE(1, sk, "translateWork\n");
3279 assert(m_srcDB.find(sk));
3281 TCA start = a.frontier();
3282 TCA stubStart = astubs.frontier();
3283 TCA counterStart = 0;
3284 uint8_t counterLen = 0;
3285 SrcRec& srcRec = *getSrcRec(sk);
3286 TransKind transKind = TransInterp;
3287 UndoMarker undoA(a);
3288 UndoMarker undoAstubs(astubs);
3290 auto resetState = [&] {
3291 undoA.undo();
3292 undoAstubs.undo();
3293 m_pendingFixups.clear();
3294 m_bcMap.clear();
3295 srcRec.clearInProgressTailJumps();
3298 auto assertCleanState = [&] {
3299 assert(a.frontier() == start);
3300 assert(astubs.frontier() == stubStart);
3301 assert(m_pendingFixups.empty());
3302 assert(m_bcMap.empty());
3303 assert(srcRec.inProgressTailJumps().empty());
3306 if (!args.m_interp && !reachedTranslationLimit(sk, srcRec)) {
3307 // Attempt to create a region at this SrcKey
3308 JIT::RegionDescPtr region;
3309 if (RuntimeOption::EvalJitPGO) {
3310 if (m_mode == TransOptimize) {
3311 TransID transId = args.m_transId;
3312 assert(transId != InvalidID);
3313 region = JIT::selectHotRegion(transId, this);
3314 if (region && region->blocks.size() == 0) region = nullptr;
3315 } else {
3316 // We always go through the tracelet translator in this case
3318 } else {
3319 JIT::RegionContext rContext { sk.func(), sk.offset(), liveSpOff() };
3320 FTRACE(2, "populating live context for region\n");
3321 populateLiveContext(rContext);
3322 region = JIT::selectRegion(rContext, &t);
3325 TranslateResult result = Retry;
3326 RegionBlacklist regionInterps;
3327 while (result == Retry) {
3328 traceStart(sk.offset());
3330 // Try translating a region if we have one, then fall back to using the
3331 // Tracelet.
3332 if (region) {
3333 try {
3334 assertCleanState();
3335 result = translateRegion(*region, regionInterps);
3336 FTRACE(2, "translateRegion finished with result {}\n",
3337 translateResultName(result));
3338 } catch (const std::exception& e) {
3339 FTRACE(1, "translateRegion failed with '{}'\n", e.what());
3340 result = Failure;
3342 if (result == Failure) {
3343 traceFree();
3344 traceStart(sk.offset());
3345 resetState();
3348 if (!region || result == Failure) {
3349 FTRACE(1, "trying irTranslateTracelet\n");
3350 assertCleanState();
3351 if (m_mode == TransOptimize) {
3352 m_mode = TransLive;
3354 result = translateTracelet(t);
3357 if (result != Success) {
3358 // Translation failed. Free resources for this trace, rollback the
3359 // translation cache frontiers, and discard any pending fixups.
3360 resetState();
3362 traceFree();
3365 if (result == Success) {
3366 assert(m_mode == TransLive ||
3367 m_mode == TransProfile ||
3368 m_mode == TransOptimize);
3369 transKind = m_mode;
3373 if (transKind == TransInterp) {
3374 assertCleanState();
3375 TRACE(1,
3376 "emitting %d-instr interp request for failed translation\n",
3377 int(t.m_numOpcodes));
3378 // Add a counter for the translation if requested
3379 if (RuntimeOption::EvalJitTransCounters) {
3380 emitTransCounterInc(a);
3382 a. jmp(emitServiceReq(REQ_INTERPRET,
3383 t.m_sk.offset(), t.m_numOpcodes));
3384 // Fall through.
3387 for (uint i = 0; i < m_pendingFixups.size(); i++) {
3388 TCA tca = m_pendingFixups[i].m_tca;
3389 assert(isValidCodeAddress(tca));
3390 m_fixupMap.recordFixup(tca, m_pendingFixups[i].m_fixup);
3392 m_pendingFixups.clear();
3394 addTranslation(TransRec(sk, sk.unit()->md5(), transKind, t, start,
3395 a.frontier() - start, stubStart,
3396 astubs.frontier() - stubStart,
3397 counterStart, counterLen,
3398 m_bcMap));
3399 m_bcMap.clear();
3401 recordGdbTranslation(sk, sk.func(), a, start,
3402 false, false);
3403 recordGdbTranslation(sk, sk.func(), astubs, stubStart,
3404 false, false);
3405 if (RuntimeOption::EvalJitPGO) {
3406 m_profData->addTrans(t, transKind);
3408 // SrcRec::newTranslation() makes this code reachable. Do this last;
3409 // otherwise there's some chance of hitting in the reader threads whose
3410 // metadata is not yet visible.
3411 TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n",
3412 start, sk.getFuncId(), sk.offset());
3413 srcRec.newTranslation(start);
3414 TRACE(1, "tx64: %zd-byte tracelet\n", a.frontier() - start);
3415 if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
3416 Trace::traceRelease("%s", getUsage().c_str());
3420 TranslatorX64::TranslateResult
3421 TranslatorX64::translateTracelet(Tracelet& t) {
3422 FTRACE(2, "attempting to translate tracelet:\n{}\n", t.toString());
3423 assert(!Translator::liveFrameIsPseudoMain());
3424 const SrcKey &sk = t.m_sk;
3425 SrcRec& srcRec = *getSrcRec(sk);
3426 HhbcTranslator& ht = m_irTrans->hhbcTrans();
3428 assert(srcRec.inProgressTailJumps().size() == 0);
3429 try {
3430 emitResolvedDeps(t.m_resolvedDeps);
3431 emitGuardChecks(a, sk, t.m_dependencies, t.m_refDeps, srcRec);
3433 dumpTranslationInfo(t, a.frontier());
3435 // after guards, add a counter for the translation if requested
3436 if (RuntimeOption::EvalJitTransCounters) {
3437 ht.emitIncTransCounter();
3440 if (m_mode == TransProfile) {
3441 ht.emitCheckCold(m_profData->curTransID());
3444 emitRB(a, RBTypeTraceletBody, t.m_sk);
3445 Stats::emitInc(a, Stats::Instr_TC, t.m_numOpcodes);
3447 // Profiling on function entry.
3448 if (t.m_sk.offset() == t.func()->base()) {
3449 ht.profileFunctionEntry("Normal");
3453 * Profiling on the shapes of tracelets that are whole functions.
3454 * (These are the things we might consider trying to support
3455 * inlining.)
3457 [&]{
3458 static const bool enabled = Stats::enabledAny() &&
3459 getenv("HHVM_STATS_FUNCSHAPE");
3460 if (!enabled) return;
3461 if (t.m_sk.offset() != t.func()->base()) return;
3462 if (auto last = t.m_instrStream.last) {
3463 if (last->op() != OpRetC && last->op() != OpRetV) {
3464 return;
3467 ht.profileSmallFunctionShape(traceletShape(t));
3468 }();
3470 Unit::MetaHandle metaHand;
3471 // Translate each instruction in the tracelet
3472 for (auto* ni = t.m_instrStream.first; ni && !ht.hasExit();
3473 ni = ni->next) {
3474 readMetaData(metaHand, *ni, m_irTrans->hhbcTrans(), MetaMode::Legacy);
3476 try {
3477 SKTRACE(1, ni->source, "HHIR: translateInstr\n");
3478 assert(!(m_mode == TransProfile && ni->outputPredicted && ni->next));
3479 m_irTrans->translateInstr(*ni);
3480 } catch (JIT::FailedIRGen& fcg) {
3481 always_assert(!ni->interp);
3482 ni->interp = true;
3483 FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
3484 "after failing to generate ir: {} \n\n",
3485 getCurrentTransID(), ni->toString(), fcg.what());
3486 return Retry;
3488 assert(ni->source.offset() >= t.func()->base());
3489 // We sometimes leave the tail of a truncated tracelet in place to aid
3490 // analysis, but breaksTracelet is authoritative.
3491 if (ni->breaksTracelet || m_irTrans->hhbcTrans().hasExit()) break;
3493 traceEnd();
3495 try {
3496 traceCodeGen();
3497 TRACE(1, "HHIR: SUCCEEDED to generate code for Translation %d\n\n\n",
3498 getCurrentTransID());
3499 return Success;
3500 } catch (JIT::FailedCodeGen& fcg) {
3501 // Code-gen failed. Search for the bytecode instruction that caused the
3502 // problem, flag it to be interpreted, and retranslate the tracelet.
3503 for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
3504 if (ni->source.offset() == fcg.bcOff) {
3505 always_assert(!ni->interp);
3506 ni->interp = true;
3507 FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
3508 "after failing to code-gen \n\n",
3509 getCurrentTransID(), ni->toString(), fcg.what());
3510 return Retry;
3513 throw fcg;
3515 } catch (JIT::FailedCodeGen& fcg) {
3516 TRACE(1, "HHIR: FAILED to generate code for Translation %d "
3517 "@ %s:%d (%s)\n", getCurrentTransID(),
3518 fcg.file, fcg.line, fcg.func);
3519 // HHIR:TODO Remove extra TRACE and adjust tools
3520 TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
3521 fcg.file, fcg.line, fcg.func);
3522 } catch (JIT::FailedIRGen& x) {
3523 TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
3524 x.file, x.line, x.func);
3525 } catch (const FailedAssertion& fa) {
3526 fa.print();
3527 StackTraceNoHeap::AddExtraLogging(
3528 "Assertion failure",
3529 folly::format("{}\n\nActive Trace:\n{}\n",
3530 fa.summary, ht.trace()->toString()).str());
3531 abort();
3532 } catch (const std::exception& e) {
3533 FTRACE(1, "HHIR: FAILED with exception: {}\n", e.what());
3534 assert(0);
3536 return Failure;
3539 void TranslatorX64::traceCodeGen() {
3540 using namespace JIT;
3542 HhbcTranslator& ht = m_irTrans->hhbcTrans();
3543 HPHP::JIT::IRTrace* trace = ht.trace();
3544 auto finishPass = [&](const char* msg, int level,
3545 const RegAllocInfo* regs,
3546 const LifetimeInfo* lifetime) {
3547 dumpTrace(level, trace, msg, regs, lifetime);
3548 assert(checkCfg(trace, ht.irFactory()));
3551 finishPass(" after initial translation ", kIRLevel, nullptr, nullptr);
3552 optimizeTrace(trace, ht.traceBuilder());
3553 finishPass(" after optimizing ", kOptLevel, nullptr, nullptr);
3555 auto* factory = &ht.irFactory();
3556 recordBCInstr(OpTraceletGuard, a, a.frontier());
3557 if (dumpIREnabled() || RuntimeOption::EvalJitCompareHHIR) {
3558 LifetimeInfo lifetime(factory);
3559 RegAllocInfo regs = allocRegsForTrace(trace, factory, &lifetime);
3560 finishPass(" after reg alloc ", kRegAllocLevel, &regs, &lifetime);
3561 assert(checkRegisters(trace, *factory, regs));
3562 AsmInfo ai(factory);
3563 genCodeForTrace(trace, a, astubs, factory, &m_bcMap, this, regs,
3564 &lifetime, &ai);
3565 if (RuntimeOption::EvalJitCompareHHIR) {
3566 std::ostringstream out;
3567 dumpTraceImpl(trace, out, &regs, &lifetime, &ai);
3568 } else {
3569 dumpTrace(kCodeGenLevel, trace, " after code gen ", &regs,
3570 &lifetime, &ai);
3572 } else {
3573 RegAllocInfo regs = allocRegsForTrace(trace, factory);
3574 finishPass(" after reg alloc ", kRegAllocLevel, nullptr, nullptr);
3575 assert(checkRegisters(trace, *factory, regs));
3576 genCodeForTrace(trace, a, astubs, factory, &m_bcMap, this, regs);
3579 m_numHHIRTrans++;
3583 * Defines functions called by emitGenericReturn, and
3584 * cgGenericRetDecRefs.
3586 void TranslatorX64::emitFreeLocalsHelpers() {
3587 Label doRelease;
3588 Label release;
3589 Label loopHead;
3592 * Note: the IR currently requires that we preserve r13/r14 across
3593 * calls to these free locals helpers.
3595 static_assert(rVmSp == rbx, "");
3596 auto const rIter = rbx;
3597 auto const rFinished = r15;
3598 auto const rType = esi;
3599 auto const rData = rdi;
3601 moveToAlign(a, kNonFallthroughAlign);
3603 TRACE(1, "HOTSTUB: freeLocalsHelpers starts %lx\n", uintptr_t(a.frontier()));
3605 asm_label(a, release);
3606 a. loadq (rIter[TVOFF(m_data)], rData);
3607 a. cmpl (RefCountStaticValue, rData[FAST_REFCOUNT_OFFSET]);
3608 jccBlock<CC_Z>(a, [&] {
3609 a. decl (rData[FAST_REFCOUNT_OFFSET]);
3610 a. jz8 (doRelease);
3612 a. ret ();
3613 asm_label(a, doRelease);
3614 jumpDestructor(a, PhysReg(rType), rax);
3616 moveToAlign(a, kJmpTargetAlign);
3617 m_freeManyLocalsHelper = a.frontier();
3618 a. lea (rVmFp[-cellsToBytes(kNumFreeLocalsHelpers)], rFinished);
3620 auto emitDecLocal = [&] {
3621 Label skipDecRef;
3623 emitLoadTVType(a, rIter[TVOFF(m_type)], rType);
3624 emitCmpTVType(a, KindOfRefCountThreshold, rType);
3625 a. jle8 (skipDecRef);
3626 a. call (release);
3627 recordIndirectFixup(a.frontier(), 0);
3628 asm_label(a, skipDecRef);
3631 // Loop for the first few locals, but unroll the final
3632 // kNumFreeLocalsHelpers.
3633 asm_label(a, loopHead);
3634 emitDecLocal();
3635 a. addq (sizeof(TypedValue), rIter);
3636 a. cmpq (rIter, rFinished);
3637 a. jnz8 (loopHead);
3639 for (int i = 0; i < kNumFreeLocalsHelpers; ++i) {
3640 m_freeLocalsHelpers[kNumFreeLocalsHelpers - i - 1] = a.frontier();
3641 TRACE(1, "HOTSTUB: m_freeLocalsHelpers[%d] = %p\n",
3642 kNumFreeLocalsHelpers - i - 1, a.frontier());
3643 emitDecLocal();
3644 if (i != kNumFreeLocalsHelpers - 1) {
3645 a.addq (sizeof(TypedValue), rIter);
3649 a. addq (AROFF(m_r) + sizeof(TypedValue), rVmSp);
3650 a. ret (8);
3652 TRACE(1, "STUB freeLocals helpers: %zu bytes\n",
3653 size_t(a.frontier() - m_freeManyLocalsHelper));
3656 TranslatorX64::TranslatorX64()
3657 : m_numNativeTrampolines(0),
3658 m_trampolineSize(0),
3659 m_defClsHelper(0),
3660 m_funcPrologueRedispatch(0),
3661 m_numHHIRTrans(0),
3662 m_catchTraceMap(128)
3664 static const size_t kRoundUp = 2 << 20;
3665 const size_t kAHotSize = RuntimeOption::VMTranslAHotSize;
3666 const size_t kAProfSize = RuntimeOption::EvalJitPGO ?
3667 RuntimeOption::VMTranslAProfSize : 0;
3668 const size_t kASize = RuntimeOption::VMTranslASize;
3669 const size_t kAStubsSize = RuntimeOption::VMTranslAStubsSize;
3670 const size_t kGDataSize = RuntimeOption::VMTranslGDataSize;
3671 m_totalSize = kAHotSize + kASize + kAStubsSize + kAProfSize +
3672 kTrampolinesBlockSize + kGDataSize;
3674 TRACE(1, "TranslatorX64@%p startup\n", this);
3675 tx64 = this;
3677 if ((kAHotSize < (2 << 20)) ||
3678 (kASize < (10 << 20)) ||
3679 (kAStubsSize < (10 << 20)) ||
3680 (kGDataSize < (2 << 20))) {
3681 fprintf(stderr, "Allocation sizes ASize, AStubsSize, and GlobalDataSize "
3682 "are too small.\n");
3683 exit(1);
3686 if (m_totalSize > (2ul << 30)) {
3687 fprintf(stderr,"Combined size of ASize, AStubSize, and GlobalDataSize "
3688 "must be < 2GiB to support 32-bit relative addresses\n");
3689 exit(1);
3692 static bool profileUp = false;
3693 if (!profileUp) {
3694 profileInit();
3695 profileUp = true;
3698 auto enhugen = [&](void* base, int numMB) {
3699 if (RuntimeOption::EvalMapTCHuge) {
3700 assert((uintptr_t(base) & (kRoundUp - 1)) == 0);
3701 hintHuge(base, numMB << 20);
3705 // We want to ensure that the block for "a", "astubs",
3706 // "atrampolines", and "m_globalData" are nearby so that we can
3707 // short jump/point between them. Thus we allocate one slab and
3708 // divide it between "a", "astubs", and "atrampolines".
3710 // Using sbrk to ensure its in the bottom 2G, so we avoid
3711 // the need for trampolines, and get to use shorter
3712 // instructions for tc addresses.
3713 const size_t allocationSize = m_totalSize + kRoundUp - 1;
3714 uint8_t *base = (uint8_t*)sbrk(allocationSize);
3715 if (base == (uint8_t*)-1) {
3716 base = (uint8_t*)low_malloc(allocationSize);
3717 if (!base) {
3718 base = (uint8_t*)malloc(allocationSize);
3720 if (!base) {
3721 fprintf(stderr, "could not allocate %zd bytes for translation cache\n",
3722 allocationSize);
3723 exit(1);
3726 assert(base);
3727 tcStart = base;
3728 base += -(uint64_t)base & (kRoundUp - 1);
3729 enhugen(base, RuntimeOption::EvalTCNumHugeHotMB);
3730 TRACE(1, "init atrampolines @%p\n", base);
3731 atrampolines.init(base, kTrampolinesBlockSize);
3732 base += kTrampolinesBlockSize;
3734 m_unwindRegistrar = register_unwind_region(base, m_totalSize);
3735 TRACE(1, "init ahot @%p\n", base);
3736 ahot.init(base, kAHotSize);
3737 base += kAHotSize;
3738 TRACE(1, "init a @%p\n", base);
3739 a.init(base, kASize);
3740 aStart = base;
3741 base += kASize;
3742 TRACE(1, "init aprof @%p\n", base);
3743 aprof.init(base, kAProfSize);
3744 base += kAProfSize;
3745 base += -(uint64_t)base & (kRoundUp - 1);
3746 TRACE(1, "init astubs @%p\n", base);
3747 astubs.init(base, kAStubsSize);
3748 enhugen(base, RuntimeOption::EvalTCNumHugeColdMB);
3749 base += kAStubsSize;
3750 TRACE(1, "init gdata @%p\n", base);
3751 m_globalData.init(base, kGDataSize);
3753 // put the stubs into ahot, rather than a
3754 AsmSelector asmSel(AsmSelector::Args(this).hot(true));
3756 // Emit some special helpers that are shared across translations.
3758 // Emit a byte of padding. This is a kind of hacky way to
3759 // avoid hitting an assert in recordGdbStub when we call
3760 // it with m_callToExit - 1 as the start address.
3761 astubs.emitNop(1);
3763 // Call to exit with whatever value the program leaves on
3764 // the return stack.
3765 m_callToExit = emitServiceReq(SRFlags::Align | SRFlags::JmpInsteadOfRet,
3766 REQ_EXIT);
3769 * Helpers for returning from a function where the ActRec was pushed
3770 * by the interpreter.
3772 m_retHelper = emitRetFromInterpretedFrame();
3773 m_genRetHelper = emitRetFromInterpretedGeneratorFrame();
3776 * Returning from a function where the ActRec was pushed by an
3777 * inlined call. This is separate from m_retHelper just for
3778 * debugability---it does the same thing.
3780 m_retInlHelper = emitRetFromInterpretedFrame();
3781 FTRACE(1, "retInlHelper: {}\n", (void*)m_retInlHelper);
3783 moveToAlign(astubs);
3784 m_resumeHelperRet = astubs.frontier();
3785 emitPopRetIntoActRec(astubs);
3786 m_resumeHelper = astubs.frontier();
3787 emitGetGContext(astubs, rax);
3788 astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
3789 rVmFp);
3790 astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
3791 Stack::topOfStackOffset(), rVmSp);
3792 emitServiceReq(REQ_RESUME);
3794 // Helper for DefCls, in astubs.
3796 auto& a = astubs;
3797 if (false) {
3798 PreClass *preClass = 0;
3799 defClsHelper(preClass);
3801 m_defClsHelper = TCA(a.frontier());
3802 PhysReg rEC = argNumToRegName[2];
3803 emitGetGContext(a, rEC);
3804 a. storeq (rVmFp, rEC[offsetof(VMExecutionContext, m_fp)]);
3805 a. storeq (argNumToRegName[1],
3806 rEC[offsetof(VMExecutionContext, m_pc)]);
3807 a. storeq (rax, rEC[offsetof(VMExecutionContext, m_stack) +
3808 Stack::topOfStackOffset()]);
3809 a. jmp (TCA(defClsHelper));
3812 // The decRef helper for when we bring the count down to zero. Callee needs to
3813 // bring the value into rdi. These can be burned in for all time, and for all
3814 // translations.
3815 typedef void* vp;
3817 TCA strDtor, arrDtor, objDtor, resDtor, refDtor;
3818 strDtor = emitUnaryStub(astubs, CppCall(getMethodPtr(&StringData::release)));
3819 arrDtor = emitUnaryStub(astubs,
3820 CppCall(getVTableOffset(&HphpArray::release)));
3821 objDtor = emitUnaryStub(astubs, CppCall(getMethodPtr(&ObjectData::release)));
3822 resDtor = emitUnaryStub(astubs,
3823 CppCall(getMethodPtr(&ResourceData::release)));
3824 refDtor = emitUnaryStub(astubs, CppCall(vp(getMethodPtr(&RefData::release))));
3826 m_dtorStubs[0] = nullptr;
3827 m_dtorStubs[typeToDestrIndex(BitwiseKindOfString)] = strDtor;
3828 m_dtorStubs[typeToDestrIndex(KindOfArray)] = arrDtor;
3829 m_dtorStubs[typeToDestrIndex(KindOfObject)] = objDtor;
3830 m_dtorStubs[typeToDestrIndex(KindOfResource)] = resDtor;
3831 m_dtorStubs[typeToDestrIndex(KindOfRef)] = refDtor;
3833 // Hot helper stubs in A:
3834 emitGenericDecRefHelpers();
3835 emitFreeLocalsHelpers();
3836 m_funcPrologueRedispatch = emitPrologueRedispatch(a);
3837 TRACE(1, "HOTSTUB: all stubs finished: %lx\n",
3838 uintptr_t(a.frontier()));
3840 if (trustSigSegv) {
3841 // Install SIGSEGV handler for timeout exceptions
3842 struct sigaction sa;
3843 struct sigaction old_sa;
3844 sa.sa_sigaction = &TranslatorX64::SEGVHandler;
3845 sa.sa_flags = SA_SIGINFO;
3846 sigemptyset(&sa.sa_mask);
3847 if (sigaction(SIGSEGV, &sa, &old_sa) != 0) {
3848 throw std::runtime_error(
3849 std::string("Failed to install SIGSEGV handler: ") +
3850 strerror(errno));
3852 m_segvChain = old_sa.sa_flags & SA_SIGINFO ?
3853 old_sa.sa_sigaction : (sigaction_t)old_sa.sa_handler;
3856 moveToAlign(astubs);
3857 m_stackOverflowHelper = astubs.frontier();
3858 // We are called from emitStackCheck, with the new stack frame in
3859 // rStashedAR. Get the caller's PC into rdi and save it off.
3860 astubs. load_reg64_disp_reg64(rVmFp, AROFF(m_func), rax);
3861 astubs. load_reg64_disp_reg32(rStashedAR, AROFF(m_soff), rdi);
3862 astubs. load_reg64_disp_reg64(rax, Func::sharedOffset(), rax);
3863 astubs. load_reg64_disp_reg32(rax, Func::sharedBaseOffset(), rax);
3864 astubs. add_reg32_reg32(rax, rdi);
3865 emitEagerVMRegSave(astubs, SaveFP | SavePC);
3866 emitServiceReq(REQ_STACK_OVERFLOW);
3869 // do gdb specific initialization. This has to happen after
3870 // the TranslatorX64 constructor is called, because gdb initialization
3871 // calls backs into TranslatorX64::Get()
3872 void TranslatorX64::initGdb() {
3873 // On a backtrace, gdb tries to locate the calling frame at address
3874 // returnRIP-1. However, for the first VM frame, there is no code at
3875 // returnRIP-1, since the AR was set up manually. For this frame,
3876 // record the tracelet address as starting from callToExit-1, so gdb
3877 // does not barf
3878 recordGdbStub(astubs, m_callToExit - 1, "HHVM::callToExit");
3880 recordBCInstr(OpRetFromInterp, astubs, m_retHelper);
3881 recordGdbStub(astubs, m_retHelper - 1, "HHVM::retHelper");
3882 recordBCInstr(OpResumeHelper, astubs, m_resumeHelper);
3883 recordBCInstr(OpDefClsHelper, astubs, m_defClsHelper);
3884 recordBCInstr(OpDtorStub, astubs,
3885 m_dtorStubs[typeToDestrIndex(BitwiseKindOfString)]);
3886 recordGdbStub(astubs, m_dtorStubs[typeToDestrIndex(BitwiseKindOfString)],
3887 "HHVM::destructorStub");
3890 TranslatorX64*
3891 TranslatorX64::Get() {
3893 * Called from outrageously early, pre-main code, and will
3894 * allocate the first translator space.
3896 if (!nextTx64) {
3897 nextTx64 = new TranslatorX64();
3898 nextTx64->initGdb();
3900 if (!tx64) {
3901 tx64 = nextTx64;
3903 assert(tx64);
3904 return tx64;
3907 template<int Arity>
3908 TCA TranslatorX64::emitNAryStub(X64Assembler& a, CppCall c) {
3909 static_assert(Arity < kNumRegisterArgs, "");
3911 // The callNAryStub has already saved these regs on a.
3912 RegSet alreadySaved;
3913 for (size_t i = 0; i < Arity; ++i) {
3914 alreadySaved |= RegSet(argNumToRegName[i]);
3918 * We've made a call instruction, and pushed Arity args on the
3919 * stack. So the stack address will be odd coming into the stub if
3920 * Arity + 1 (for the call) is odd. We need to correct for this
3921 * when saving other registers below to keep SSE-friendly alignment
3922 * of the stack.
3924 const int Parity = (Arity + 1) % 2;
3926 // These dtor stubs are meant to be called with the call
3927 // instruction, unlike most translator code.
3928 moveToAlign(a);
3929 TCA start = a.frontier();
3931 * Preserve most caller-saved regs. The calling code has already
3932 * preserved regs in `alreadySaved'; we push the rest of the caller
3933 * saved regs and rbp. It should take 9 qwords in total, and the
3934 * incoming call instruction made it 10. This is an even number of
3935 * pushes, so we preserve the SSE-friendliness of our execution
3936 * environment (without real intervention from PhysRegSaverParity).
3938 * Note that we don't need to clean all registers because the only
3939 * reason we could need those locations written back is if stack
3940 * unwinding were to happen. These stubs can re-enter due to user
3941 * destructors, but exceptions are not allowed to propagate out of
3942 * those, so it's not a problem.
3944 a. push (rbp); // {
3945 a. movq (rsp, rbp);
3947 RegSet s = kGPCallerSaved - alreadySaved;
3948 PhysRegSaverParity rs(Parity, a, s);
3949 emitCall(a, c);
3951 a. pop (rbp); // }
3952 a. ret ();
3953 return start;
3956 TCA TranslatorX64::emitUnaryStub(X64Assembler& a, CppCall c) {
3957 return emitNAryStub<1>(a, c);
3960 void TranslatorX64::registerCatchTrace(CTCA ip, TCA trace) {
3961 FTRACE(1, "registerCatchTrace: afterCall: {} trace: {}\n", ip, trace);
3962 m_catchTraceMap.insert(ip, trace);
3965 TCA TranslatorX64::getCatchTrace(CTCA ip) const {
3966 TCA* found = m_catchTraceMap.find(ip);
3967 return found ? *found : nullptr;
3970 void
3971 TranslatorX64::requestInit() {
3972 TRACE(1, "in requestInit(%" PRId64 ")\n", g_vmContext->m_currentThreadIdx);
3973 tl_regState = VMRegState::CLEAN;
3974 PendQ::drain();
3975 requestResetHighLevelTranslator();
3976 Treadmill::startRequest(g_vmContext->m_currentThreadIdx);
3977 memset(&s_perfCounters, 0, sizeof(s_perfCounters));
3978 Stats::init();
3981 void
3982 TranslatorX64::requestExit() {
3983 if (s_writeLease.amOwner()) {
3984 s_writeLease.drop();
3986 TRACE_MOD(txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64
3987 " kept, %15" PRId64 " grabbed\n",
3988 Process::GetThreadIdForTrace(), s_writeLease.m_hintKept,
3989 s_writeLease.m_hintGrabbed);
3990 PendQ::drain();
3991 Treadmill::finishRequest(g_vmContext->m_currentThreadIdx);
3992 TRACE(1, "done requestExit(%" PRId64 ")\n", g_vmContext->m_currentThreadIdx);
3993 Stats::dump();
3994 Stats::clear();
3996 if (Trace::moduleEnabledRelease(Trace::tx64stats, 1)) {
3997 Trace::traceRelease("TranslatorX64 perf counters for %s:\n",
3998 g_context->getRequestUrl(50).c_str());
3999 for (int i = 0; i < tpc_num_counters; i++) {
4000 Trace::traceRelease("%-20s %10" PRId64 "\n",
4001 kPerfCounterNames[i], s_perfCounters[i]);
4003 Trace::traceRelease("\n");
4007 bool
4008 TranslatorX64::isPseudoEvent(const char* event) {
4009 for (auto name : kPerfCounterNames) {
4010 if (!strcmp(event, name)) {
4011 return true;
4014 return false;
4017 void
4018 TranslatorX64::getPerfCounters(Array& ret) {
4019 for (int i = 0; i < tpc_num_counters; i++) {
4020 // Until Perflab can automatically scale the values we give it to
4021 // an appropriate range, we have to fudge these numbers so they
4022 // look more like reasonable hardware counter values.
4023 ret.set(String::FromCStr(kPerfCounterNames[i]),
4024 s_perfCounters[i] * 1000);
4027 if (RuntimeOption::EnableInstructionCounts) {
4028 auto doCounts = [&](unsigned begin, const char* const name) {
4029 int64_t count = 0;
4030 for (; begin < Stats::Instr_InterpOneHighInvalid;
4031 begin += STATS_PER_OPCODE) {
4032 count += Stats::tl_counters[Stats::StatCounter(begin)];
4034 ret.set(String::FromCStr(name), count);
4037 doCounts(Stats::Instr_TranslLowInvalid + STATS_PER_OPCODE,
4038 kInstrCountTx64Name);
4039 doCounts(Stats::Instr_TranslIRPostLowInvalid + STATS_PER_OPCODE,
4040 kInstrCountIRName);
4044 TranslatorX64::~TranslatorX64() {
4045 freeSlab(atrampolines.base(), m_totalSize);
4048 static Debug::TCRange rangeFrom(const X64Assembler& a, const TCA addr,
4049 bool isAstubs) {
4050 assert(a.contains(addr));
4051 return Debug::TCRange(addr, a.frontier(), isAstubs);
4054 void TranslatorX64::recordBCInstr(uint32_t op,
4055 const X64Assembler& a,
4056 const TCA addr) {
4057 if (addr != a.frontier()) {
4058 m_debugInfo.recordBCInstr(Debug::TCRange(addr, a.frontier(),
4059 &a == &astubs ? true : false), op);
4063 void TranslatorX64::recordGdbTranslation(SrcKey sk,
4064 const Func* srcFunc,
4065 const X64Assembler& a,
4066 const TCA start,
4067 bool exit,
4068 bool inPrologue) {
4069 if (start != a.frontier()) {
4070 assert(s_writeLease.amOwner());
4071 if (!RuntimeOption::EvalJitNoGdb) {
4072 m_debugInfo.recordTracelet(rangeFrom(a, start,
4073 &a == &astubs ? true : false),
4074 srcFunc,
4075 srcFunc->unit() ?
4076 srcFunc->unit()->at(sk.offset()) : nullptr,
4077 exit, inPrologue);
4079 if (RuntimeOption::EvalPerfPidMap) {
4080 m_debugInfo.recordPerfMap(rangeFrom(a, start,
4081 &a == &astubs ? true : false),
4082 srcFunc, exit, inPrologue);
4087 void TranslatorX64::recordGdbStub(const X64Assembler& a,
4088 const TCA start, const char* name) {
4089 if (!RuntimeOption::EvalJitNoGdb) {
4090 m_debugInfo.recordStub(rangeFrom(a, start, &a == &astubs ? true : false),
4091 name);
4095 size_t TranslatorX64::getCodeSize() {
4096 return a.used();
4099 size_t TranslatorX64::getStubSize() {
4100 return astubs.used();
4103 size_t TranslatorX64::getTargetCacheSize() {
4104 return TargetCache::s_frontier;
4107 std::string TranslatorX64::getUsage() {
4108 std::string usage;
4109 size_t aHotUsage = ahot.used();
4110 size_t aProfUsage = aprof.used();
4111 size_t aUsage = a.used();
4112 size_t stubsUsage = astubs.used();
4113 size_t dataUsage = m_globalData.frontier - m_globalData.base;
4114 size_t tcUsage = TargetCache::s_frontier;
4115 size_t persistentUsage =
4116 TargetCache::s_persistent_frontier - TargetCache::s_persistent_start;
4117 Util::string_printf(
4118 usage,
4119 "tx64: %9zd bytes (%zd%%) in ahot.code\n"
4120 "tx64: %9zd bytes (%zd%%) in a.code\n"
4121 "tx64: %9zd bytes (%zd%%) in aprof.code\n"
4122 "tx64: %9zd bytes (%zd%%) in astubs.code\n"
4123 "tx64: %9zd bytes (%zd%%) in m_globalData\n"
4124 "tx64: %9zd bytes (%zd%%) in targetCache\n"
4125 "tx64: %9zd bytes (%zd%%) in persistentCache\n",
4126 aHotUsage, 100 * aHotUsage / ahot.capacity(),
4127 aUsage, 100 * aUsage / a.capacity(),
4128 aProfUsage, 100 * aProfUsage / aprof.capacity(),
4129 stubsUsage, 100 * stubsUsage / astubs.capacity(),
4130 dataUsage, 100 * dataUsage / m_globalData.size,
4131 tcUsage,
4132 400 * tcUsage / RuntimeOption::EvalJitTargetCacheSize / 3,
4133 persistentUsage,
4134 400 * persistentUsage / RuntimeOption::EvalJitTargetCacheSize);
4135 return usage;
4138 bool TranslatorX64::addDbgGuards(const Unit* unit) {
4139 // TODO refactor
4140 // It grabs the write lease and iterating through whole SrcDB...
4141 bool locked = s_writeLease.acquire(true);
4142 if (!locked) {
4143 return false;
4145 struct timespec tsBegin, tsEnd;
4146 Timer::GetMonotonicTime(tsBegin);
4147 // Doc says even find _could_ invalidate iterator, in pactice it should
4148 // be very rare, so go with it now.
4149 for (SrcDB::iterator it = m_srcDB.begin(); it != m_srcDB.end(); ++it) {
4150 SrcKey const sk = SrcKey::fromAtomicInt(it->first);
4151 SrcRec& sr = *it->second;
4152 if (sr.unitMd5() == unit->md5() &&
4153 !sr.hasDebuggerGuard() &&
4154 isSrcKeyInBL(sk)) {
4155 addDbgGuardImpl(sk, sr);
4158 s_writeLease.drop();
4159 Timer::GetMonotonicTime(tsEnd);
4160 int64_t elapsed = gettime_diff_us(tsBegin, tsEnd);
4161 if (Trace::moduleEnabledRelease(Trace::tx64, 5)) {
4162 Trace::traceRelease("addDbgGuards got lease for %" PRId64 " us\n", elapsed);
4164 return true;
4167 bool TranslatorX64::addDbgGuard(const Func* func, Offset offset) {
4168 SrcKey sk(func, offset);
4170 if (SrcRec* sr = m_srcDB.find(sk)) {
4171 if (sr->hasDebuggerGuard()) {
4172 return true;
4174 } else {
4175 // no translation yet
4176 return true;
4179 if (debug) {
4180 if (!isSrcKeyInBL(sk)) {
4181 TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
4182 return false;
4185 bool locked = s_writeLease.acquire(true);
4186 if (!locked) {
4187 return false;
4190 if (SrcRec* sr = m_srcDB.find(sk)) {
4191 addDbgGuardImpl(sk, *sr);
4194 s_writeLease.drop();
4195 return true;
4198 void TranslatorX64::addDbgGuardImpl(SrcKey sk, SrcRec& srcRec) {
4199 TCA dbgGuard = a.frontier();
4200 // Emit the checks for debugger attach
4201 emitTLSLoad<ThreadInfo>(a, ThreadInfo::s_threadInfo, rAsm);
4202 static COff dbgOff = offsetof(ThreadInfo, m_reqInjectionData) +
4203 RequestInjectionData::debuggerReadOnlyOffset();
4204 a. load_reg64_disp_reg32(rAsm, dbgOff, rAsm);
4205 a. testb((int8_t)0xff, rbyte(rAsm));
4206 // Branch to a special REQ_INTERPRET if attached
4208 TCA fallback = emitServiceReq(REQ_INTERPRET, sk.offset(), 0);
4209 a. jnz(fallback);
4211 // Emit a jump to the actual code
4212 TCA realCode = srcRec.getTopTranslation();
4213 prepareForSmash(a, kJmpLen);
4214 TCA dbgBranchGuardSrc = a.frontier();
4215 a. jmp(realCode);
4216 // Add it to srcRec
4217 srcRec.addDebuggerGuard(dbgGuard, dbgBranchGuardSrc);
4220 bool TranslatorX64::dumpTCCode(const char* filename) {
4221 #define OPEN_FILE(F, SUFFIX) \
4222 string F ## name = string(filename).append(SUFFIX); \
4223 FILE* F = fopen(F ## name .c_str(),"wb"); \
4224 if (F == nullptr) return false; \
4225 SCOPE_EXIT{ fclose(F); };
4227 OPEN_FILE(aFile, "_a");
4228 OPEN_FILE(aprofFile, "_aprof");
4229 OPEN_FILE(astubFile, "_astub");
4230 OPEN_FILE(helperAddrFile, "_helpers_addrs.txt");
4232 #undef OPEN_FILE
4234 // dump starting from the trampolines; this assumes processInit() places
4235 // trampolines before the translation cache
4236 size_t count = a.frontier() - atrampolines.base();
4237 bool result = (fwrite(atrampolines.base(), 1, count, aFile) == count);
4238 if (result) {
4239 count = aprof.used();
4240 result = (fwrite(aprof.base(), 1, count, aprofFile) == count);
4242 if (result) {
4243 count = astubs.used();
4244 result = (fwrite(astubs.base(), 1, count, astubFile) == count);
4246 if (result) {
4247 for(PointerMap::iterator iter = trampolineMap.begin();
4248 iter != trampolineMap.end();
4249 iter++) {
4250 void* helperAddr = iter->first;
4251 void* trampAddr = iter->second;
4252 char* functionName = Util::getNativeFunctionName(helperAddr);
4253 fprintf(helperAddrFile,"%10p %10p %s\n",
4254 trampAddr, helperAddr,
4255 functionName);
4256 free(functionName);
4259 return result;
4262 // Returns true on success
4263 bool TranslatorX64::dumpTC(bool ignoreLease) {
4264 if (!ignoreLease && !s_writeLease.acquire(true)) return false;
4265 bool success = dumpTCData();
4266 if (success) {
4267 success = dumpTCCode("/tmp/tc_dump");
4269 if (!ignoreLease) s_writeLease.drop();
4270 return success;
4273 // Returns true on success
4274 bool tc_dump(void) {
4275 return TranslatorX64::Get() && TranslatorX64::Get()->dumpTC();
4278 // Returns true on success
4279 bool TranslatorX64::dumpTCData() {
4280 gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
4281 if (!tcDataFile) return false;
4283 if (!gzprintf(tcDataFile,
4284 "repo_schema = %s\n"
4285 "a.base = %p\n"
4286 "a.frontier = %p\n"
4287 "aprof.base = %p\n"
4288 "aprof.frontier = %p\n"
4289 "astubs.base = %p\n"
4290 "astubs.frontier = %p\n\n",
4291 kRepoSchemaId,
4292 atrampolines.base(), a.frontier(),
4293 aprof.base(), aprof.frontier(),
4294 astubs.base(), astubs.frontier())) {
4295 return false;
4298 if (!gzprintf(tcDataFile, "total_translations = %zu\n\n",
4299 m_translations.size())) {
4300 return false;
4303 for (size_t t = 0; t < m_translations.size(); t++) {
4304 if (gzputs(tcDataFile,
4305 m_translations[t].print(getTransCounter(t)).c_str()) == -1) {
4306 return false;
4310 gzclose(tcDataFile);
4311 return true;
4314 void TranslatorX64::invalidateSrcKey(SrcKey sk) {
4315 assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO);
4316 assert(s_writeLease.amOwner());
4318 * Reroute existing translations for SrcKey to an as-yet indeterminate
4319 * new one.
4321 SrcRec* sr = m_srcDB.find(sk);
4322 assert(sr);
4324 * Since previous translations aren't reachable from here, we know we
4325 * just created some garbage in the TC. We currently have no mechanism
4326 * to reclaim this.
4328 sr->replaceOldTranslations();
4331 void TranslatorX64::setJmpTransID(TCA jmp) {
4332 if (m_mode != TransProfile) return;
4334 TransID transId = m_profData->curTransID();
4335 FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp, transId);
4336 m_jmpToTransID[jmp] = transId;
4339 TranslatorX64::AsmSelector::AsmSelector(const Args& args)
4340 : m_tx(args.getTranslator())
4341 , m_select(args.getSelection()) {
4343 // If an assembler other an 'a' has already been selected, then just
4344 // keep that selection.
4345 if (m_tx->a.base() != m_tx->aStart) {
4346 m_select = AsmSelection::Default;
4349 swap();
4353 * Swap 'a' with 'ahot' or 'aprof'.
4354 * Note that, although we don't write to either tx->ahot or tx->aprof directly,
4355 * we still need to make sure that all assembler code areas are available
4356 * in a, astubs, aprof, and ahot, for example when we call asmChoose(addr, ...).
4358 void TranslatorX64::AsmSelector::swap() {
4359 switch (m_select) {
4360 case AsmSelection::Profile: std::swap(m_tx->a, m_tx->aprof); break;
4361 case AsmSelection::Hot : std::swap(m_tx->a, m_tx->ahot) ; break;
4362 case AsmSelection::Default: break; // nothing to do
4366 TranslatorX64::AsmSelector::~AsmSelector() {
4367 swap();
4370 TranslatorX64::AsmSelector::Args::Args(TranslatorX64* tx)
4371 : m_tx(tx)
4372 , m_select(AsmSelection::Default) {
4373 assert(m_tx != nullptr);
4376 static const int kMaxTranslationBytes = 8192;
4378 TranslatorX64::AsmSelector::Args&
4379 TranslatorX64::AsmSelector::Args::hot(bool isHot) {
4380 // Profile has precedence over Hot.
4381 if (m_select == AsmSelection::Profile) return *this;
4383 // Make sure there's enough room left in ahot.
4384 if (isHot && m_tx->ahot.available() > kMaxTranslationBytes) {
4385 m_select = AsmSelection::Hot;
4386 } else {
4387 m_select = AsmSelection::Default;
4389 return *this;
4392 TranslatorX64::AsmSelector::Args&
4393 TranslatorX64::AsmSelector::Args::profile(bool isProf) {
4394 if (isProf) {
4395 m_select = AsmSelection::Profile;
4396 } else if (m_select == AsmSelection::Profile) {
4397 m_select = AsmSelection::Default;
4399 return *this;
4402 TranslatorX64::AsmSelection
4403 TranslatorX64::AsmSelector::Args::getSelection() const {
4404 return m_select;
4407 TranslatorX64*
4408 TranslatorX64::AsmSelector::Args::getTranslator() const {
4409 return m_tx;
4412 } // HPHP::Transl
4414 } // HPHP