Clean up a few inc/decrefs
[hiphop-php.git] / src / runtime / vm / translator / translator-x64.cpp
blob17296cb2d7d4f9bbab3535b32470fedbd65b4578
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include <stdint.h>
17 #include <assert.h>
18 #include <unistd.h>
19 #include <sys/mman.h>
20 #include <strstream>
21 #include <stdio.h>
22 #include <stdarg.h>
23 #include <strings.h>
24 #include <string>
25 #include <queue>
26 #include <zlib.h>
27 #include <unwind.h>
29 #ifdef __FreeBSD__
30 # include <ucontext.h>
31 typedef __sighandler_t *sighandler_t;
32 # define RIP_REGISTER(v) (v).mc_rip
33 #else
34 # define RIP_REGISTER(v) (v).gregs[REG_RIP]
35 #endif
37 #include <boost/bind.hpp>
38 #include <boost/optional.hpp>
39 #include <boost/utility/typed_in_place_factory.hpp>
40 #include <boost/scoped_ptr.hpp>
42 #include <util/pathtrack.h>
43 #include <util/trace.h>
44 #include <util/bitops.h>
45 #include <util/debug.h>
46 #include <util/ringbuffer.h>
47 #include <util/rank.h>
48 #include <util/timer.h>
49 #include <util/maphuge.h>
51 #include <runtime/base/tv_macros.h>
52 #include <runtime/vm/bytecode.h>
53 #include <runtime/vm/php_debug.h>
54 #include <runtime/vm/runtime.h>
55 #include <runtime/base/complex_types.h>
56 #include <runtime/base/execution_context.h>
57 #include <runtime/base/strings.h>
58 #include <runtime/base/zend/zend_string.h>
59 #include <runtime/base/runtime_option.h>
60 #include <runtime/base/server/source_root_info.h>
61 #include <runtime/ext/ext_continuation.h>
62 #include <runtime/vm/debug/debug.h>
63 #include <runtime/vm/translator/targetcache.h>
64 #include <runtime/vm/translator/log.h>
65 #include <runtime/vm/translator/translator-deps.h>
66 #include <runtime/vm/translator/translator-inline.h>
67 #include <runtime/vm/translator/translator-x64.h>
68 #include <runtime/vm/translator/asm-x64.h>
69 #include <runtime/vm/translator/srcdb.h>
70 #include <runtime/vm/translator/x64-util.h>
71 #include <runtime/vm/translator/unwind-x64.h>
72 #include <runtime/vm/pendq.h>
73 #include <runtime/vm/treadmill.h>
74 #include <runtime/vm/stats.h>
75 #include <runtime/vm/pendq.h>
76 #include <runtime/vm/treadmill.h>
77 #include <runtime/vm/repo.h>
78 #include <runtime/vm/type-profile.h>
79 #include <runtime/vm/member_operations.h>
80 #include <runtime/vm/translator/abi-x64.h>
81 #include <runtime/eval/runtime/file_repository.h>
82 #include <runtime/vm/translator/hopt/ir.h>
83 #include <runtime/vm/translator/hopt/linearscan.h>
84 #include <runtime/vm/translator/hopt/opt.h>
85 #include <runtime/vm/translator/hopt/codegen.h>
87 #include <runtime/vm/translator/translator-x64-internal.h>
89 namespace HPHP {
90 namespace VM {
91 namespace Transl {
93 using namespace reg;
94 using namespace Util;
95 using namespace Trace;
96 using std::max;
98 #define TRANS_PERF_COUNTERS \
99 TPC(translate) \
100 TPC(retranslate) \
101 TPC(interp_bb) \
102 TPC(interp_instr) \
103 TPC(interp_one) \
104 TPC(max_trans) \
105 TPC(enter_tc) \
106 TPC(service_req)
108 #define TPC(n) "trans_" #n,
109 static const char* const kPerfCounterNames[] = {
110 TRANS_PERF_COUNTERS
112 #undef TPC
114 #define TPC(n) tpc_ ## n,
115 enum TransPerfCounter {
116 TRANS_PERF_COUNTERS
117 tpc_num_counters
119 #undef TPC
120 static __thread int64 s_perfCounters[tpc_num_counters];
121 #define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
123 #define NULLCASE() \
124 case KindOfUninit: case KindOfNull
126 #define STRINGCASE() \
127 case BitwiseKindOfString: case KindOfStaticString
129 // nextTx64: Global shared state. The tx64 that should be used for
130 // new requests going forward.
131 TranslatorX64* volatile nextTx64;
132 // tx64: Thread-local state. The tx64 we're using for the current request.
133 __thread TranslatorX64* tx64;
135 // Register dirtiness: thread-private.
136 __thread VMRegState tl_regState = REGSTATE_CLEAN;
138 static StaticString s___call(LITSTR_INIT("__call"));
139 static StaticString s___callStatic(LITSTR_INIT("__callStatic"));
141 // Initialize at most this many locals inline in function body prologue; more
142 // than this, and emitting a loop is more compact. To be precise, the actual
143 // crossover point in terms of code size is 6; 9 was determined by experiment to
144 // be the optimal point in certain benchmarks. #microoptimization
145 static const int kLocalsToInitializeInline = 9;
147 // An intentionally funny-looking-in-core-dumps constant for uninitialized
148 // instruction pointers.
149 static const uint64_t kUninitializedRIP = 0xba5eba11acc01ade;
151 static int
152 localOffset(int loc) {
153 PhysReg base;
154 int offset;
155 locToRegDisp(Location(Location::Local, loc), &base, &offset);
156 ASSERT(base == rVmFp);
157 return offset;
160 // Return the SrcKey for the operation that should follow the supplied
161 // NormalizedInstruction. (This might not be the next SrcKey in the
162 // unit if we merged some instructions or otherwise modified them
163 // during analysis.)
164 SrcKey nextSrcKey(const Tracelet& t, const NormalizedInstruction& i) {
165 return i.next ? i.next->source : t.m_nextSk;
168 // JccBlock --
169 // A raw condition-code block; assumes whatever comparison or ALU op
170 // that sets the Jcc has already executed.
171 template <int Jcc>
172 struct JccBlock {
173 mutable X64Assembler* m_a;
174 TCA m_jcc8;
175 mutable DiamondGuard* m_dg;
177 explicit JccBlock(X64Assembler& a)
178 : m_a(&a),
179 m_jcc8(a.code.frontier),
180 m_dg(new DiamondGuard(a)) {
181 a. jcc8(Jcc, m_a->code.frontier);
184 ~JccBlock() {
185 if (m_a) {
186 delete m_dg;
187 m_a->patchJcc8(m_jcc8, m_a->code.frontier);
191 private:
192 JccBlock(const JccBlock&);
193 JccBlock& operator=(const JccBlock&);
196 // IfElseBlock: like CondBlock, but with an else clause.
197 // a. test_reg_reg(rax, rax);
198 // { IfElseBlock<CC_Z> ifRax(a);
199 // // Code executed for rax != 0
200 // ifRax.Else();
201 // // Code executed for rax == 0
202 // }
204 template <int Jcc>
205 class IfElseBlock : boost::noncopyable {
206 X64Assembler& m_a;
207 TCA m_jcc8;
208 TCA m_jmp8;
209 public:
210 explicit IfElseBlock(X64Assembler& a) :
211 m_a(a), m_jcc8(a.code.frontier), m_jmp8(NULL) {
212 tx64->m_regMap.freeze();
213 m_a.jcc8(Jcc, m_a.code.frontier); // 1f
215 void Else() {
216 ASSERT(m_jmp8 == NULL);
217 m_jmp8 = m_a.code.frontier;
218 m_a.jmp8(m_jmp8); // 2f
219 // 1:
220 m_a.patchJcc8(m_jcc8, m_a.code.frontier);
222 ~IfElseBlock() {
223 ASSERT(m_jmp8 != NULL);
224 // 2:
225 m_a.patchJmp8(m_jmp8, m_a.code.frontier);
226 tx64->m_regMap.defrost();
230 static bool
231 typeCanBeStatic(DataType t) {
232 return t != KindOfObject && t != KindOfRef;
235 // IfCountNotStatic --
236 // Emits if (%reg->_count != RefCountStaticValue) { ... }.
237 // May short-circuit this check if the type is known to be
238 // static already.
239 struct IfCountNotStatic {
240 typedef CondBlock<TVOFF(_count),
241 RefCountStaticValue,
242 CC_Z> NonStaticCondBlock;
243 NonStaticCondBlock *m_cb; // might be null
244 IfCountNotStatic(X64Assembler& a,
245 PhysReg reg,
246 DataType t = KindOfInvalid) {
247 // Objects and variants cannot be static
248 if (typeCanBeStatic(t)) {
249 m_cb = new NonStaticCondBlock(a, reg);
250 } else {
251 m_cb = NULL;
255 ~IfCountNotStatic() {
256 delete m_cb;
260 // Segfault handler: figure out if it's an intentional segfault
261 // (timeout exception) and if so, act appropriately. Otherwise, pass
262 // the signal on.
263 void TranslatorX64::SEGVHandler(int signum, siginfo_t *info, void *ctx) {
264 TranslatorX64 *self = Get();
265 void *surprisePage =
266 ThreadInfo::s_threadInfo->m_reqInjectionData.surprisePage;
267 if (info->si_addr == surprisePage) {
268 ucontext_t *ucontext = (ucontext_t*)ctx;
269 TCA rip = (TCA)RIP_REGISTER(ucontext->uc_mcontext);
270 SignalStubMap::const_accessor a;
271 if (!self->m_segvStubs.find(a, rip)) {
272 NOT_REACHED();
274 TCA astubsCall = a->second;
276 // When this handler returns, "call" the astubs code for this
277 // surprise check.
278 RIP_REGISTER(ucontext->uc_mcontext) = (uintptr_t)astubsCall;
280 // We've processed this event; reset the page in case execution
281 // continues normally.
282 g_vmContext->m_stack.unprotect();
283 } else {
284 sighandler_t handler = (sighandler_t)self->m_segvChain;
285 if (handler == SIG_DFL || handler == SIG_IGN) {
286 signal(signum, handler);
287 raise(signum);
288 } else {
289 self->m_segvChain(signum, info, ctx);
295 * Copy a heap cell from memory to the stack.
297 * Use emitCopyToStack when you can safely change the state of the
298 * register map. When using emitCopyToStackRegSafe, you'll need to
299 * invalidate the stack location manually at an appropriate time.
302 void
303 TranslatorX64::emitCopyToStackRegSafe(X64Assembler& a,
304 const NormalizedInstruction& ni,
305 PhysReg src,
306 int off,
307 PhysReg tmpReg) {
308 ASSERT(off % sizeof(Cell) == 0);
309 emitCopyTo(a, src, 0, rVmSp, vstackOffset(ni, off), tmpReg);
312 void
313 TranslatorX64::emitCopyToStack(X64Assembler& a,
314 const NormalizedInstruction& ni,
315 PhysReg src,
316 int off) {
317 ScratchReg scratch(m_regMap);
319 FreezeRegs freeze(m_regMap);
320 emitCopyToStackRegSafe(a, ni, src, off, *scratch);
322 // Forget whatever we thought we knew about the stack.
323 m_regMap.invalidate(ni.outStack->location);
327 * Emit code that does the same thing as tvSet().
329 * The `oldType' and `oldData' registers are used for temporary
330 * storage and unconditionally destroyed.
331 * `toPtr' will be destroyed iff the cell we're storing to is
332 * KindOfRef.
333 * The variant check will not be performed if toOffset is nonzero, so
334 * only pass a nonzero offset if you know the destination is not
335 * KindOfRef.
336 * `from' will not be modified.
338 void TranslatorX64::emitTvSetRegSafe(const NormalizedInstruction& i,
339 PhysReg from,
340 DataType fromType,
341 PhysReg toPtr,
342 int toOffset,
343 PhysReg oldType,
344 PhysReg oldData,
345 bool incRefFrom) {
346 ASSERT(!i.isNative());
347 ASSERT(!i.isSimple());
348 ASSERT(fromType != KindOfRef);
350 if (toOffset == 0) {
351 emitDerefIfVariant(a, toPtr);
353 a. load_reg64_disp_reg32(toPtr, toOffset + TVOFF(m_type), oldType);
354 a. load_reg64_disp_reg64(toPtr, toOffset + TVOFF(m_data), oldData);
355 emitStoreTypedValue(a, fromType, from, toOffset, toPtr);
356 if (incRefFrom) {
357 emitIncRef(from, fromType);
359 emitDecRefGenericReg(oldData, oldType);
362 void TranslatorX64::emitTvSet(const NormalizedInstruction& i,
363 PhysReg from,
364 DataType fromType,
365 PhysReg toPtr,
366 int toOffset,
367 bool incRefFrom) {
368 ScratchReg oldType(m_regMap);
369 ScratchReg oldData(m_regMap);
370 emitTvSetRegSafe(i, from, fromType, toPtr, toOffset,
371 *oldType, *oldData, incRefFrom);
374 // Logical register move: ensures the value in src will be in dest
375 // after execution, but might do so in strange ways. Do not count on
376 // being able to smash dest to a different register in the future, e.g.
377 void
378 TranslatorX64::emitMovRegReg(X64Assembler& a, PhysReg src, PhysReg dest) {
379 SpaceRecorder("_RegMove", a);
380 if (src != dest) {
381 a. mov_reg64_reg64(src, dest);
385 void
386 TranslatorX64::emitMovRegReg(PhysReg src, PhysReg dest) {
387 emitMovRegReg(a, src, dest);
391 * emitPushAR --
393 * Push an activation record. Will return to the next instruction emitted by
394 * the invoker. Called on behalf of FPushFuncD and FPushFunc. If func is
395 * unknown, we will leave it to the caller to fill in m_func.
397 void
398 TranslatorX64::emitPushAR(const NormalizedInstruction& i, const Func* func,
399 const int bytesPopped /* = 0 */,
400 bool isCtor /* = false */,
401 bool clearThis /* = true */,
402 uintptr_t varEnvInvName /* = 0 */) {
403 if (func && phpBreakpointEnabled(func->name()->data())) {
404 translator_debug_break(a);
406 ASSERT(sizeof(Cell) < sizeof(ActRec));
407 // We are about to push an ActRec onto the stack. The stack grows down,
408 // so the offset of the beginning of the ActRec from the top of stack
409 // is -sizeof(ActRec).
410 int numArgs = i.imm[0].u_IVA;
411 int startOfActRec = bytesPopped - sizeof(ActRec);
412 size_t funcOff = startOfActRec + AROFF(m_func);
413 size_t thisOff = startOfActRec + AROFF(m_this);
414 size_t nargsOff = startOfActRec + AROFF(m_numArgsAndCtorFlag);
415 size_t varenvOff = startOfActRec + AROFF(m_varEnv);
416 size_t savedRbpOff = startOfActRec + AROFF(m_savedRbp);
418 BOOST_STATIC_ASSERT((
419 sizeof(((ActRec*)NULL)->m_numArgsAndCtorFlag) == sizeof(int32_t)
422 * rVmSp might not be up-to-date here, so we use emitVStackStore and
423 * emitVStackStoreImm which know how to compute the where the top of
424 * stack currently is.
426 if (func) {
427 emitVStackStoreImm(a, i, (uintptr_t)func, funcOff);
428 if (clearThis) {
429 emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
432 emitVStackStoreImm(a, i, ActRec::encodeNumArgs(numArgs, isCtor),
433 nargsOff, sz::dword);
434 emitVStackStoreImm(a, i, varEnvInvName, varenvOff, sz::qword, &m_regMap);
435 emitVStackStore(a, i, rVmFp, savedRbpOff, sz::qword);
438 template<int StackParity>
439 class PhysRegSaverParity {
440 protected:
441 X64Assembler& a;
442 RegSet s;
443 int numElts;
444 public:
445 PhysRegSaverParity(X64Assembler& a_, RegSet s_) : a(a_), s(s_) {
446 RegSet sCopy = s;
447 numElts = 0;
448 PhysReg reg;
449 while (sCopy.findFirst(reg)) {
450 a. pushr(reg);
451 sCopy.remove(reg);
452 numElts++;
454 if ((numElts & 1) == StackParity) {
455 // Maintain stack evenness for SIMD compatibility.
456 a. sub_imm32_reg64(8, rsp);
460 ~PhysRegSaverParity() {
461 if ((numElts & 1) == StackParity) {
462 // See above; stack parity.
463 a. add_imm32_reg64(8, rsp);
465 RegSet sCopy = s;
466 PhysReg reg;
467 while (sCopy.findLast(reg)) {
468 a. popr(reg);
469 sCopy.remove(reg);
474 // In shared stubs, we've already made the stack odd by calling
475 // from a to astubs. Calls from a are on an even rsp.
476 typedef PhysRegSaverParity<0> PhysRegSaverStub;
477 typedef PhysRegSaverParity<1> PhysRegSaver;
479 void
480 TranslatorX64::emitCallSaveRegs() {
481 ASSERT(!m_regMap.frozen());
482 m_regMap.cleanRegs(kCallerSaved);
485 static void UNUSED tc_debug_print(const char* message,
486 uintptr_t r1,
487 uintptr_t r2,
488 uintptr_t r3,
489 ActRec* fp) {
490 TRACE(1, "*********************** %s: %p %p %p (for : %s)\n",
491 message, (void*)r1, (void*)r2, (void*)r3,
492 fp->m_func ? fp->m_func->fullName()->data() : "[?]");
495 // Utility for debugging translations that will print a message,
496 // followed by the value of up to three registers.
497 void TranslatorX64::emitDebugPrint(Asm& a,
498 const char* message,
499 PhysReg r1,
500 PhysReg r2,
501 PhysReg r3) {
502 boost::optional<PhysRegSaver> aSaver;
503 boost::optional<PhysRegSaverStub> astubsSaver;
505 if (&a == &this->a) {
506 aSaver = boost::in_place<PhysRegSaver>(boost::ref(a), kAllX64Regs);
507 } else {
508 astubsSaver = boost::in_place<PhysRegSaverStub>(boost::ref(a),
509 kAllX64Regs);
512 a. mov_imm64_reg (uintptr_t(message), argNumToRegName[0]);
513 a. mov_reg64_reg64(r1, argNumToRegName[1]);
514 a. mov_reg64_reg64(r2, argNumToRegName[2]);
515 a. mov_reg64_reg64(r3, argNumToRegName[3]);
516 a. mov_reg64_reg64(rVmFp, argNumToRegName[4]);
517 a. call((TCA)tc_debug_print);
520 void ArgManager::cleanLocs() {
521 for (size_t i = 0; i < m_args.size(); ++i) {
522 // We only need to clean locations we are passing the address of.
523 // (ArgLoc passes the value in the register mapped for a given
524 // location, not the address of the location itself, so it doesn't
525 // need cleaning here.)
526 if (m_args[i].m_kind != ArgContent::ArgLocAddr) continue;
527 m_tx64.m_regMap.cleanLoc(*m_args[i].m_loc);
531 void ArgManager::computeUsed(std::map<PhysReg, size_t> &used,
532 std::vector<PhysReg> &actual) {
533 size_t n = m_args.size();
534 for (size_t i = 0; i < n; i++) {
535 PhysReg reg = InvalidReg;
536 if (m_args[i].m_kind == ArgContent::ArgReg ||
537 m_args[i].m_kind == ArgContent::ArgRegPlus) {
538 reg = m_args[i].m_reg;
539 } else if (m_args[i].m_kind == ArgContent::ArgLoc ||
540 m_args[i].m_kind == ArgContent::ArgDeref) {
541 reg = m_tx64.getReg(*m_args[i].m_loc);
542 } else {
543 continue;
545 TRACE(6, "ArgManager: arg %zd incoming reg r%d\n", i, reg);
546 used[reg] = i;
547 actual[i] = reg;
551 void
552 TranslatorX64::emitRB(X64Assembler& a,
553 RingBufferType t,
554 SrcKey sk, RegSet toSave) {
555 if (!Trace::moduleEnabledRelease(Trace::tx64, 5)) {
556 return;
558 PhysRegSaver rs(a, toSave | kSpecialCrossTraceRegs);
559 int arg = 0;
560 emitImmReg(a, t, argNumToRegName[arg++]);
561 emitImmReg(a, sk.m_funcId, argNumToRegName[arg++]);
562 emitImmReg(a, sk.m_offset, argNumToRegName[arg++]);
563 a. call((TCA)ringbufferEntry);
566 void
567 TranslatorX64::emitRB(X64Assembler& a,
568 RingBufferType t,
569 const char* msg,
570 RegSet toSave) {
571 if (!Trace::moduleEnabledRelease(Trace::tx64, 5)) {
572 return;
574 PhysRegSaver save(a, toSave | kSpecialCrossTraceRegs);
575 int arg = 0;
576 emitImmReg(a, (uintptr_t)msg, argNumToRegName[arg++]);
577 emitImmReg(a, strlen(msg), argNumToRegName[arg++]);
578 emitImmReg(a, t, argNumToRegName[arg++]);
579 a. call((TCA)ringbufferMsg);
583 * allocate the input registers for i, trying to
584 * match inputs to call arguments.
585 * if args[j] == ArgDontAllocate, the arg is skipped
586 * if args[j] == ArgAnyReg, it will be allocated as normal
587 * otherwise, args[j] should be a positional call argument,
588 * and allocInputsForCall will attempt to allocate it to
589 * argNumToRegName[args[j]].
591 void
592 TranslatorX64::allocInputsForCall(const NormalizedInstruction& i,
593 const int* args) {
594 RegSet blackList;
595 int arg;
597 * If any of the inputs is already in an argument
598 * register, blacklist it. ArgManager already takes
599 * care of shuffling registers efficiently
601 for (arg = i.inputs.size(); arg--; ) {
602 if (args[arg] != ArgDontAllocate &&
603 m_regMap.hasReg(i.inputs[arg]->location)) {
604 blackList |= RegSet(getReg(i.inputs[arg]->location));
607 bool hasAnyReg = false;
608 for (arg = i.inputs.size(); arg--; ) {
609 if (args[arg] != ArgAnyReg) {
610 if (args[arg] != ArgDontAllocate &&
611 !m_regMap.hasReg(i.inputs[arg]->location)) {
612 PhysReg target = argNumToRegName[args[arg]];
613 if (!blackList.contains(target)) {
614 m_regMap.cleanRegs(RegSet(target));
615 m_regMap.smashRegs(RegSet(target));
616 } else {
617 target = InvalidReg;
619 m_regMap.allocInputReg(i, arg, target);
621 } else {
622 hasAnyReg = true;
625 if (hasAnyReg) {
626 for (arg = i.inputs.size(); arg--; ) {
627 if (args[arg] == ArgAnyReg) {
628 m_regMap.allocInputReg(i, arg);
634 void ArgManager::shuffleRegisters(std::map<PhysReg, size_t> &used,
635 std::vector<PhysReg> &actual) {
636 size_t n = m_args.size();
637 for (size_t i = 0; i < n; i++) {
638 if (actual[i] == InvalidReg)
639 continue;
641 if (!mapContains(used, argNumToRegName[i])) {
642 // There's no conflict, so just copy
643 TRACE(6, "ArgManager: arg %zd reg available, copying from r%d to r%d\n",
644 i, actual[i], argNumToRegName[i]);
645 // Do copy and data structure update here, because this way
646 // we can reuse the register in actual[i] later without problems.
647 m_tx64.emitMovRegReg(m_a, actual[i], argNumToRegName[i]);
648 used.erase(actual[i]);
649 actual[i] = argNumToRegName[i];
650 } else {
651 size_t j = used[argNumToRegName[i]];
652 if (actual[j] != actual[i]) {
653 // The register is used by some other value, so we must swap the two
654 // registers.
655 ASSERT(j > i);
656 ASSERT(actual[j] != InvalidReg);
657 PhysReg ri = actual[i],
658 rj = actual[j];
659 TRACE(6, "ArgManager: arg %zd register used by arg %zd, "
660 "swapping r%d with r%d\n", i, j, ri, rj);
662 // Clean the registers first
663 RegSet regs = RegSet(ri) | RegSet(rj);
664 m_tx64.m_regMap.cleanRegs(regs);
666 // Emit the actual swap
667 m_tx64.m_regMap.swapRegisters(ri, rj);
668 m_a. xchg_reg64_reg64(ri, rj);
670 // Update the data structure for later steps
671 for (size_t k = 0; k < n; k++) {
672 if (actual[k] == ri) {
673 actual[k] = rj;
674 } else if (actual[k] == rj) {
675 actual[k] = ri;
678 used[ri] = j;
679 used[rj] = i;
685 void ArgManager::emitValues(std::vector<PhysReg> &actual) {
686 for (size_t i = 0; i < m_args.size(); i++) {
687 switch(m_args[i].m_kind) {
688 case ArgContent::ArgLoc:
689 case ArgContent::ArgDeref:
690 case ArgContent::ArgReg:
691 TRACE(6, "ArgManager: copying arg %zd from r%d to r%d\n",
692 i, actual[i], argNumToRegName[i]);
693 m_tx64.emitMovRegReg(m_a, actual[i], argNumToRegName[i]);
694 // Emit dereference if needed
695 if (m_args[i].m_kind == ArgContent::ArgDeref) {
696 emitDeref(m_a, argNumToRegName[i], argNumToRegName[i]);
698 break;
700 // For any of these cases, the register should already be available.
701 // If it was used previously by an input value, shuffleRegisters
702 // should have moved it to the proper register from argNumToRegName.
703 case ArgContent::ArgImm:
704 emitImmReg(m_a, m_args[i].m_imm, argNumToRegName[i]);
705 break;
707 case ArgContent::ArgRegPlus:
708 if (m_args[i].m_imm) {
709 m_a. add_imm32_reg64(m_args[i].m_imm, argNumToRegName[i]);
711 break;
713 case ArgContent::ArgLocAddr:
715 PhysReg base;
716 int disp;
717 locToRegDisp(*m_args[i].m_loc, &base, &disp);
718 m_a. lea_reg64_disp_reg64(base, disp, argNumToRegName[i]);
720 break;
722 default:
723 // Should never happen
724 ASSERT(false);
729 void
730 TranslatorX64::emitCall(X64Assembler& a, TCA dest, bool killRegs) {
731 if (a.jmpDeltaFits(dest)) {
732 a. call(dest);
733 } else {
734 a. call(getNativeTrampoline(dest));
736 if (killRegs) {
737 // All caller-saved regs are now suspect.
738 m_regMap.smashRegs(kCallerSaved);
742 void
743 TranslatorX64::recordSyncPoint(X64Assembler& a, Offset pcOff, Offset spOff) {
744 m_pendingFixups.push_back(PendingFixup(a.code.frontier,
745 Fixup(pcOff, spOff)));
748 void
749 TranslatorX64::recordCall(Asm& a, const NormalizedInstruction& i) {
750 recordCallImpl<false>(a, i);
753 void
754 TranslatorX64::recordCall(const NormalizedInstruction& i) {
755 recordCall(a, i);
758 template <bool reentrant>
759 void
760 TranslatorX64::recordCallImpl(X64Assembler& a,
761 const NormalizedInstruction& i,
762 bool advance /* = false */) {
763 SrcKey sk = i.source;
764 Offset stackOff = i.stackOff + (vmfp() - vmsp());
765 if (advance) {
766 sk.advance(curUnit());
767 stackOff += getStackDelta(i);
769 ASSERT(i.checkedInputs ||
770 (reentrant && !i.isSimple()) ||
771 (!reentrant && !i.isNative()));
772 Offset pcOff = sk.offset() - curFunc()->base();
773 SKTRACE(2, sk, "record%sCall pcOff %d\n",
774 reentrant ? "Reentrant" : "", int(pcOff));
775 recordSyncPoint(a, pcOff, stackOff);
776 SKTRACE(2, sk, "record%sCall stackOff %d\n",
777 reentrant ? "Reentrant" : "", int(stackOff));
780 * Right now we assume call sites that need to record sync points
781 * may also throw exceptions. We record information about dirty
782 * callee-saved registers so we can spill their contents during
783 * unwinding. See unwind-x64.cpp.
785 if (!m_pendingUnwindRegInfo.empty()) {
786 if (Trace::moduleLevel(Trace::tunwind) >= 2) {
787 sk.trace("recordCallImpl has dirty callee-saved regs\n");
788 TRACE_MOD(Trace::tunwind, 2,
789 "CTCA: %p saving dirty callee regs:\n",
790 a.code.frontier);
791 for (int i = 0; i < UnwindRegInfo::kMaxCalleeSaved; ++i) {
792 if (m_pendingUnwindRegInfo.m_regs[i].dirty) {
793 TRACE_MOD(Trace::tunwind, 2, " %s\n",
794 m_pendingUnwindRegInfo.m_regs[i].pretty().c_str());
798 m_unwindRegMap.insert(a.code.frontier, m_pendingUnwindRegInfo);
799 m_pendingUnwindRegInfo.clear();
803 void TranslatorX64::prepareCallSaveRegs() {
804 emitCallSaveRegs(); // Clean caller-saved regs.
805 m_pendingUnwindRegInfo.clear();
807 RegSet rset = kCalleeSaved;
808 PhysReg reg;
809 while (rset.findFirst(reg)) {
810 rset.remove(reg);
811 if (!m_regMap.regIsDirty(reg)) continue;
812 const RegInfo* ri = m_regMap.getInfo(reg);
813 ASSERT(ri->m_cont.m_kind == RegContent::Loc);
815 // If the register is dirty, we'll record this so that we can
816 // restore it during stack unwinding if an exception is thrown.
817 m_pendingUnwindRegInfo.add(reg, ri->m_type, ri->m_cont.m_loc);
821 void
822 TranslatorX64::emitIncRef(PhysReg base, DataType dtype) {
823 if (!IS_REFCOUNTED_TYPE(dtype) && dtype != KindOfInvalid) {
824 return;
826 ASSERT(m_regMap.getInfo(base));
827 SpaceRecorder sr("_IncRef", a);
828 ASSERT(sizeof(((Cell*)NULL)->_count == sizeof(int32_t)));
829 { // if !static then
830 IfCountNotStatic ins(a, base, dtype);
832 * The optimization guide cautions against using inc; while it is
833 * compact, it only writes the low-order 8 bits of eflags, causing a
834 * partial dependency for any downstream flags-dependent code.
836 a. add_imm32_disp_reg32(1, TVOFF(_count), base);
837 } // endif
840 void
841 TranslatorX64::emitIncRefGenericRegSafe(PhysReg base,
842 int disp,
843 PhysReg tmpReg) {
844 ASSERT(m_regMap.getInfo(base));
845 { // if RC
846 IfRefCounted irc(a, base, disp);
847 a. load_reg64_disp_reg64(base, disp + TVOFF(m_data),
848 tmpReg);
849 { // if !static
850 IfCountNotStatic ins(a, tmpReg);
851 a. add_imm32_disp_reg32(1, TVOFF(_count), tmpReg);
852 } // endif
853 } // endif
856 void TranslatorX64::emitIncRefGeneric(PhysReg base, int disp) {
857 ScratchReg tmpReg(m_regMap);
858 emitIncRefGenericRegSafe(base, disp, *tmpReg);
861 static void emitGetGContext(X64Assembler& a, PhysReg dest) {
862 emitTLSLoad<ExecutionContext>(a, g_context, dest);
865 // emitEagerVMRegSave --
866 // Inline. Saves regs in-place in the TC. This is an unusual need;
867 // you probably want to lazily save these regs via recordCall and
868 // its ilk.
870 // SaveFP uses rVmFp, as usual. SavePC requires the caller to have
871 // placed the PC offset of the instruction about to be executed in
872 // rdi.
873 enum RegSaveFlags {
874 SaveFP = 1,
875 SavePC = 2
878 static TCA
879 emitEagerVMRegSave(X64Assembler& a,
880 int flags /* :: RegSaveFlags */) {
881 TCA start = a.code.frontier;
882 bool saveFP = bool(flags & SaveFP);
883 bool savePC = bool(flags & SavePC);
884 ASSERT((flags & ~(SavePC | SaveFP)) == 0);
886 PhysReg pcReg = rdi;
887 PhysReg rEC = rScratch;
888 ASSERT(!kSpecialCrossTraceRegs.contains(rdi));
890 emitGetGContext(a, rEC);
892 static COff spOff = offsetof(VMExecutionContext, m_stack) +
893 Stack::topOfStackOffset();
894 static COff fpOff = offsetof(VMExecutionContext, m_fp) - spOff;
895 static COff pcOff = offsetof(VMExecutionContext, m_pc) - spOff;
897 ASSERT(spOff != 0);
898 // Instruction selection note: this is an lea, but add is more
899 // compact and we can afford the flags bash.
900 a. add_imm32_reg64(spOff, rEC);
901 a. store_reg64_disp_reg64 (rVmSp, 0, rEC);
902 if (savePC) {
903 // We're going to temporarily abuse rVmSp to hold the current unit.
904 PhysReg rBC = rVmSp;
905 a. pushr(rBC);
906 // m_fp -> m_func -> m_unit -> m_bc + pcReg
907 a. load_reg64_disp_reg64(rVmFp, AROFF(m_func), rBC);
908 a. load_reg64_disp_reg64(rBC, Func::unitOff(), rBC);
909 a. load_reg64_disp_reg64(rBC, Unit::bcOff(), rBC);
910 a. add_reg64_reg64(rBC, pcReg);
911 a. store_reg64_disp_reg64(pcReg, pcOff, rEC);
912 a. popr(rBC);
914 if (saveFP) {
915 a. store_reg64_disp_reg64 (rVmFp, fpOff, rEC);
917 return start;
921 * emitDecRef --
923 * Decrement a value's refcount and call the release helper if
924 * appropriate. emitDecRef requires that the caller knows the
925 * type at translation time.
927 void TranslatorX64::emitDecRef(Asm& a,
928 const NormalizedInstruction& i,
929 PhysReg rDatum,
930 DataType type) {
931 ASSERT(type != KindOfInvalid);
932 if (!IS_REFCOUNTED_TYPE(type)) {
933 return;
936 ASSERT(!i.isNative());
937 ASSERT(!i.isSimple() || !typeReentersOnRelease(type));
938 SpaceRecorder sr("_DecRef", a);
939 { // if !static
940 IfCountNotStatic ins(a, rDatum, type);
941 a. sub_imm32_disp_reg32(1, TVOFF(_count), rDatum);
943 ASSERT(type >= 0 && type < MaxNumDataTypes);
944 if (&a == &this->astubs) {
945 JccBlock<CC_NZ> ifZero(a);
946 callUnaryStub(a, i, m_dtorStubs[type], rDatum);
947 } else {
948 UnlikelyIfBlock<CC_Z> ifZero(this->a, astubs);
949 callUnaryStub(astubs, i, m_dtorStubs[type], rDatum);
951 } // endif
954 void TranslatorX64::emitDecRef(const NormalizedInstruction& i,
955 PhysReg rDatum,
956 DataType type) {
957 emitDecRef(a, i, rDatum, type);
960 void TranslatorX64::emitDecRefInput(Asm& a,
961 const NormalizedInstruction& i,
962 int input) {
963 DynLocation* value = i.inputs[input];
964 if (IS_REFCOUNTED_TYPE(value->outerType())) {
965 m_regMap.allocInputReg(i, input);
966 PhysReg rValue = getReg(value->location);
967 emitDecRef(a, i, rValue, value->outerType());
972 * emitDecRefGeneric --
974 * Decrement a value's refcount and call the release helper if
975 * appropriate. emitDecRefGeneric should only be used when the type
976 * is not known at translation time.
978 * emitDecRefGeneric operates on the memory location given by
979 * srcReg+disp, so the caller is responsible for ensuring that the
980 * memory location is up to date and not enregistered.
982 void TranslatorX64::emitDecRefGeneric(const NormalizedInstruction& i,
983 PhysReg srcReg, int disp /* = 0 */) {
984 SpaceRecorder sr("_DecRefGeneric", a);
986 * The full, inlined generic dec ref looks like:
988 * TypedValue* d = srcReg + disp;
989 * if (IS_REFCOUNTED_TYPE(d->m_type) && // a)
990 * d->_count != kStaticCount && // b)
991 * d->_count-- == 0) && // c)
992 * GenericDestroy(d); // d)
994 * We originally inlined *all* of a-d, and have experimented with sharing
995 * them all, too. At this writing (05-12-2012), inlining a) and outlining
996 * b-d seems to strike the right balance between compactness and not
997 * doing too much work in the common case where it is not refcounted.
1000 IfRefCounted irc(a, srcReg, disp);
1001 callUnaryReentrantStub(a, i, m_dtorGenericStub, srcReg, disp);
1005 // Same as emitDecRefGeneric, except for when we have the type in a
1006 // register as well. Same inlining/outlining choices as
1007 // emitDecRefGeneric above.
1008 void TranslatorX64::emitDecRefGenericReg(PhysReg rData, PhysReg rType) {
1009 SpaceRecorder sr("_DecRefGeneric", a);
1010 a. cmp_imm32_reg32(KindOfRefCountThreshold, rType);
1012 JccBlock<CC_BE> ifRefCounted(a);
1013 callBinaryStub(a, *m_curNI, m_dtorGenericStubRegs, rData, rType);
1018 * genericRefCountStub --
1020 * Shared code to decRef the TypedValue* of unknown, but refcounted, type
1021 * in rdi. Tightly coupled with emitDecRefGeneric.
1023 TCA TranslatorX64::genericRefCountStub(X64Assembler& a) {
1024 moveToAlign(a);
1025 FreezeRegs brr(m_regMap);
1026 TCA retval = a.code.frontier;
1028 // Note we make a real frame here: this is necessary so that the
1029 // fixup map can chase back to the caller of this stub if it needs
1030 // to sync regs.
1031 a. pushr(rbp); // {
1032 a. mov_reg64_reg64(rsp, rbp);
1034 PhysRegSaverStub prs(a, RegSet(rsi));
1035 // We already know the type was refcounted if we got here.
1036 a. load_reg64_disp_reg64(rdi, TVOFF(m_data), rsi);
1037 { // if !static
1038 IfCountNotStatic ins(a, rsi, KindOfInvalid);
1039 a. sub_imm32_disp_reg32(1, TVOFF(_count), rsi);
1040 { // if zero
1041 JccBlock<CC_NZ> ifZero(a);
1042 RegSet s = kCallerSaved - (RegSet(rdi) | RegSet(rsi));
1043 PhysRegSaver prs(a, s);
1044 a.call(TCA(tv_release_generic));
1045 } // endif
1046 } // endif
1048 a. popr(rbp); // }
1049 a. ret();
1050 return retval;
1053 TCA TranslatorX64::genericRefCountStubRegs(X64Assembler& a) {
1054 const PhysReg rData = argNumToRegName[0];
1055 const PhysReg rType = argNumToRegName[1];
1057 moveToAlign(a);
1058 TCA retval = a.code.frontier;
1059 FreezeRegs brr(m_regMap);
1061 // The frame here is needed for the same reason as in
1062 // genericRefCountStub.
1063 a. pushr(rbp); // {
1064 a. mov_reg64_reg64(rsp, rbp);
1066 IfCountNotStatic ins(a, rData, KindOfInvalid);
1067 a. sub_imm32_disp_reg32(1, TVOFF(_count), rData);
1069 JccBlock<CC_NZ> ifZero(a);
1070 // The arguments are already in the right registers.
1071 RegSet s = kCallerSaved - (RegSet(rData) | RegSet(rType));
1072 PhysRegSaverParity<1> saver(a, s);
1073 if (false) { // typecheck
1074 RefData* vp = NULL; DataType dt = KindOfUninit;
1075 (void)tv_release_typed(vp, dt);
1077 a.call(TCA(tv_release_typed));
1080 a. popr(rbp); // }
1081 a. ret();
1082 return retval;
1086 * Translation call targets. It is a lot easier, and a bit more
1087 * portable, to use C linkage from assembly.
1089 TCA TranslatorX64::retranslate(SrcKey sk, bool align, bool useHHIR) {
1090 if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
1091 // We are about to translate something known to be blacklisted by
1092 // debugger, exit early
1093 SKTRACE(1, sk, "retranslate abort due to debugger\n");
1094 return NULL;
1096 LeaseHolder writer(s_writeLease);
1097 if (!writer) return NULL;
1098 SKTRACE(1, sk, "retranslate\n");
1099 return translate(&sk, align, useHHIR);
1102 // Only use comes from HHIR's cgExitTrace() case TraceExitType::SlowNoProgress
1103 TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk,
1104 bool align,
1105 TCA toSmash) {
1106 if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
1107 // We are about to translate something known to be blacklisted by
1108 // debugger, exit early
1109 SKTRACE(1, sk, "retranslateAndPatchNoIR abort due to debugger\n");
1110 return NULL;
1112 LeaseHolder writer(s_writeLease);
1113 if (!writer) return NULL;
1114 SKTRACE(1, sk, "retranslateAndPatchNoIR\n");
1115 SrcRec* srcRec = getSrcRec(sk);
1116 if (srcRec->translations().size() == SrcRec::kMaxTranslations + 1) {
1117 // we've gone over the translation limit and already have an anchor
1118 // translation that will interpret, so just return NULL and force
1119 // interpretation of this BB.
1120 return NULL;
1122 TCA start = translate(&sk, align, false);
1123 if (start != NULL) {
1124 smash(getAsmFor(toSmash), toSmash, start);
1126 return start;
1130 * Satisfy an alignment constraint. If we're in a reachable section
1131 * of code, bridge the gap with nops. Otherwise, int3's.
1133 void
1134 TranslatorX64::moveToAlign(X64Assembler &aa,
1135 const size_t align /* =kJmpTargetAlign */,
1136 bool unreachable /* =true */) {
1137 using namespace HPHP::Util;
1138 SpaceRecorder sr("_Align", aa);
1139 ASSERT(isPowerOfTwo(align));
1140 size_t leftInBlock = align - ((align - 1) & uintptr_t(aa.code.frontier));
1141 if (leftInBlock == align) return;
1142 if (unreachable) {
1143 aa.emitInt3s(leftInBlock);
1144 return;
1146 aa.emitNop(leftInBlock);
1150 * Req machinery. We sometimes emit code that is unable to proceed
1151 * without translator assistance; e.g., a basic block whose successor is
1152 * unknown. We leave one of these request arg blobs in m_data, and point
1153 * to it at callout-time.
1156 // REQ_BIND_CALL
1157 struct ReqBindCall {
1158 SrcKey m_sourceInstr;
1159 TCA m_toSmash;
1160 int m_nArgs;
1161 bool m_isImmutable; // call was to known func.
1162 } m_bindCall;
1164 // ID to name mapping for tracing.
1165 static inline const char*
1166 reqName(int req) {
1167 static const char* reqNames[] = {
1168 #define REQ(nm) #nm,
1169 SERVICE_REQUESTS
1170 #undef REQ
1172 return reqNames[req];
1176 * Find or create a translation for sk. Returns TCA of "best" current
1177 * translation. May return NULL if it is currently impossible to create
1178 * a translation.
1181 TranslatorX64::getTranslation(const SrcKey *sk, bool align,
1182 bool forceNoHHIR /* = false */) {
1183 curFunc()->validate();
1184 SKTRACE(2, *sk, "getTranslation: curUnit %s funcId %llx offset %d\n",
1185 curUnit()->filepath()->data(),
1186 sk->m_funcId,
1187 sk->offset());
1188 SKTRACE(2, *sk, " funcId: %llx\n",
1189 curFunc()->getFuncId());
1191 if (curFrame()->hasVarEnv() && curFrame()->getVarEnv()->isGlobalScope()) {
1192 SKTRACE(2, *sk, "punting on pseudoMain\n");
1193 return NULL;
1195 if (const SrcRec* sr = m_srcDB.find(*sk)) {
1196 TCA tca = sr->getTopTranslation();
1197 if (tca) {
1198 SKTRACE(2, *sk, "getTranslation: found %p\n", tca);
1199 return tca;
1205 * Try to become the writer. We delay this until we *know* we will have
1206 * a need to create new translations, instead of just trying to win the
1207 * lottery at the dawn of time. Hopefully lots of requests won't require
1208 * any new translation.
1210 LeaseHolder writer(s_writeLease);
1211 if (!writer) return NULL;
1212 if (SrcRec* sr = m_srcDB.find(*sk)) {
1213 TCA tca = sr->getTopTranslation();
1214 if (tca) {
1215 // Handle extremely unlikely race; someone may have just already
1216 // added the first instance of this SrcRec while we did a
1217 // non-blocking wait on the write lease.
1218 return tca;
1219 } else {
1220 // Since we are holding the write lease, we know that sk is properly
1221 // initialized, except that it has no translations (due to
1222 // replaceOldTranslations)
1223 return retranslate(*sk, align,
1224 RuntimeOption::EvalJitUseIR && !forceNoHHIR);
1228 // We put retranslate requests at the end of our slab to more frequently
1229 // allow conditional jump fall-throughs
1231 TCA start = emitServiceReq(false, REQ_RETRANSLATE, 1, uint64_t(sk->offset()));
1232 SKTRACE(1, *sk, "inserting anchor translation for (%p,%d) at %p\n",
1233 curUnit(), sk->offset(), start);
1234 SrcRec* sr = m_srcDB.insert(*sk);
1235 sr->setFuncInfo(curFunc());
1236 sr->setAnchorTranslation(start);
1238 addTranslation(TransRec(*sk, curUnit()->md5(), TransAnchor, 0, 0, start,
1239 astubs.code.frontier - start));
1241 ASSERT(getTransRec(start)->kind == TransAnchor);
1243 return retranslate(*sk, align, RuntimeOption::EvalJitUseIR && !forceNoHHIR);
1247 TranslatorX64::translate(const SrcKey *sk, bool align, bool useHHIR) {
1248 INC_TPC(translate);
1249 ASSERT(vmfp() >= vmsp());
1250 ASSERT(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
1251 ASSERT(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
1253 if (useHHIR) {
1254 if (m_numHHIRTrans == RuntimeOption::EvalMaxHHIRTrans) {
1255 useHHIR = false;
1256 m_useHHIR = false;
1257 RuntimeOption::EvalJitUseIR = false;
1259 hhirTraceStart(sk->offset());
1260 } else {
1261 ASSERT(m_useHHIR == false);
1264 Tracelet tlet;
1265 analyze(sk, tlet);
1267 if (align) {
1268 moveToAlign(a);
1271 TCA start = a.code.frontier;
1272 translateTracelet(tlet);
1273 SKTRACE(1, *sk, "translate moved head from %p to %p\n",
1274 getTopTranslation(*sk), start);
1275 if (Trace::moduleEnabledRelease(tcdump, 1)) {
1276 static __thread int n;
1277 if (++n % 10000 == 0) {
1278 std::ofstream f("cfg.dot", std::ios_base::trunc);
1279 drawCFG(f);
1280 f.close();
1283 return start;
1287 * Returns true if a's current frontier can have an nBytes-long
1288 * instruction written without any risk of cache-tearing.
1290 bool
1291 TranslatorX64::isSmashable(X64Assembler& a, int nBytes) {
1292 ASSERT(nBytes <= int(kX64CacheLineSize));
1293 static const uint64 kCacheMask = ~(uint64(kX64CacheLineSize) - 1);
1294 uintptr_t iFrontier = uintptr_t(a.code.frontier);
1295 uintptr_t lastByte = iFrontier + nBytes - 1;
1296 return (iFrontier & kCacheMask) == (lastByte & kCacheMask);
1299 void
1300 TranslatorX64::prepareForSmash(X64Assembler& a, int nBytes) {
1301 if (UNLIKELY(!isSmashable(a, nBytes))) {
1302 moveToAlign(a, kX64CacheLineSize, false);
1304 ASSERT(isSmashable(a, nBytes));
1307 void
1308 TranslatorX64::prepareForSmash(int nBytes) {
1309 prepareForSmash(a, nBytes);
1312 void
1313 TranslatorX64::smash(X64Assembler &a, TCA src, TCA dest) {
1314 ASSERT(canWrite());
1315 TRACE(2, "smash: %p -> %p\n", src, dest);
1319 * We are about to smash reachable code in the translation cache. A
1320 * hardware thread might be executing the very instruction we're
1321 * modifying. This is safe because:
1323 * 1. We align smashable instructions so that they reside on a single
1324 * cache line;
1326 * 2. We modify the instruction with a single processor store; and
1328 * 3. The smashed region contains only a single instruction in the
1329 * orignal instruction stream (see jmp() -> emitJ32() -> bytes() in
1330 * the assembler.
1332 CodeCursor cg(a, src);
1333 ASSERT(isSmashable(a, kJmpLen));
1334 if (dest > src && dest - src <= 7) {
1335 a. emitNop(dest - src);
1336 } else {
1337 a. jmp(dest);
1341 void TranslatorX64::protectCode() {
1342 mprotect(tx64->a.code.base, tx64->a.code.size, PROT_READ | PROT_EXEC);
1346 void TranslatorX64::unprotectCode() {
1347 mprotect(tx64->a.code.base, tx64->a.code.size,
1348 PROT_READ | PROT_WRITE | PROT_EXEC);
1351 void
1352 TranslatorX64::emitStackCheck(int funcDepth, Offset pc) {
1353 uint64_t stackMask = cellsToBytes(RuntimeOption::EvalVMStackElms) - 1;
1354 a. mov_reg64_reg64(rVmSp, rScratch); // copy to destroy
1355 a. and_imm64_reg64(stackMask, rScratch);
1356 a. sub_imm64_reg64(funcDepth + Stack::sSurprisePageSize, rScratch);
1357 ASSERT(m_stackOverflowHelper);
1358 a. jl(m_stackOverflowHelper); // Unlikely branch to failure.
1359 // Success.
1362 // Tests the surprise flags for the current thread. Should be used
1363 // before a jnz to surprise handling code.
1364 void
1365 TranslatorX64::emitTestSurpriseFlags() {
1366 CT_ASSERT(sizeof(((RequestInjectionData*)0)->conditionFlags) == 8);
1367 a.test_imm64_disp_reg64(-1, TargetCache::kConditionFlagsOff, rVmTl);
1370 void
1371 TranslatorX64::emitCheckSurpriseFlagsEnter(bool inTracelet, Offset pcOff,
1372 Offset stackOff) {
1373 emitTestSurpriseFlags();
1375 UnlikelyIfBlock<CC_NZ> ifTracer(a, astubs);
1376 if (false) { // typecheck
1377 const ActRec* ar = NULL;
1378 EventHook::FunctionEnter(ar, 0);
1380 astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
1381 CT_ASSERT(EventHook::NormalFunc == 0);
1382 astubs.xor_reg32_reg32(argNumToRegName[1], argNumToRegName[1]);
1383 emitCall(astubs, (TCA)&EventHook::FunctionEnter);
1384 if (inTracelet) {
1385 recordSyncPoint(astubs, pcOff, stackOff);
1386 } else {
1387 // If we're being called while generating a func prologue, we
1388 // have to record the fixup directly in the fixup map instead of
1389 // going through m_pendingFixups like normal.
1390 m_fixupMap.recordFixup(astubs.code.frontier, Fixup(pcOff, stackOff));
1395 void
1396 TranslatorX64::setArgInActRec(ActRec* ar, int argNum, uint64_t datum,
1397 DataType t) {
1398 TypedValue* tv =
1399 (TypedValue*)(uintptr_t(ar) - (argNum+1) * sizeof(TypedValue));
1400 tv->m_data.num = datum;
1401 tv->m_type = t;
1405 TranslatorX64::shuffleArgsForMagicCall(ActRec* ar) {
1406 if (!ar->hasInvName()) {
1407 return 0;
1409 const Func* f UNUSED = ar->m_func;
1410 f->validate();
1411 ASSERT(f->name()->isame(s___call.get())
1412 || f->name()->isame(s___callStatic.get()));
1413 ASSERT(f->numParams() == 2);
1414 TRACE(1, "shuffleArgsForMagicCall: ar %p\n", ar);
1415 ASSERT(ar->hasInvName());
1416 StringData* invName = ar->getInvName();
1417 ASSERT(invName);
1418 ar->setVarEnv(NULL);
1419 int nargs = ar->numArgs();
1420 // We need to make an array containing all the arguments passed by the
1421 // caller and put it where the second argument is
1422 HphpArray* argArray = NEW(HphpArray)(nargs);
1423 argArray->incRefCount();
1424 for (int i = 0; i < nargs; ++i) {
1425 TypedValue* tv =
1426 (TypedValue*)(uintptr_t(ar) - (i+1) * sizeof(TypedValue));
1427 argArray->nvAppend(tv, false);
1428 tvRefcountedDecRef(tv);
1430 // Put invName in the slot for first argument
1431 setArgInActRec(ar, 0, uint64_t(invName), BitwiseKindOfString);
1432 // Put argArray in the slot for second argument
1433 setArgInActRec(ar, 1, uint64_t(argArray), KindOfArray);
1434 // Fix up ActRec's numArgs
1435 ar->initNumArgs(2);
1436 return 1;
1440 * The standard VMRegAnchor treatment won't work for some cases called
1441 * during function preludes.
1443 * The fp sync machinery is fundamentally based on the notion that
1444 * instruction pointers in the TC are uniquely associated with source
1445 * HHBC instructions, and that source HHBC instructions are in turn
1446 * uniquely associated with SP->FP deltas.
1448 * run_intercept_helper/trimExtraArgs is called from the prologue of
1449 * the callee. The prologue is 1) still in the caller frame for now,
1450 * and 2) shared across multiple call sites. 1 means that we have the
1451 * fp from the caller's frame, and 2 means that this fp is not enough
1452 * to figure out sp.
1454 * However, the prologue passes us the callee actRec, whose predecessor
1455 * has to be the caller. So we can sync sp and fp by ourselves here.
1456 * Geronimo!
1458 static void sync_regstate_to_caller(ActRec* preLive) {
1459 ASSERT(tl_regState == REGSTATE_DIRTY);
1460 vmfp() = (TypedValue*)preLive->m_savedRbp;
1461 vmsp() = (TypedValue*)preLive - preLive->numArgs();
1462 if (ActRec* fp = g_vmContext->m_fp) {
1463 if (fp->m_func && fp->m_func->unit()) {
1464 vmpc() = fp->m_func->unit()->at(fp->m_func->base() + preLive->m_soff);
1467 tl_regState = REGSTATE_CLEAN;
1470 static uint64 run_intercept_helper(ActRec* ar, Variant* ihandler) {
1471 sync_regstate_to_caller(ar);
1472 bool ret = run_intercept_handler<true>(ar, ihandler);
1474 * Restore tl_regState manually in the no-exception case only. (The
1475 * VM regs are clean here---we only need to set them dirty if we are
1476 * stopping to execute in the TC again, which we won't be doing if
1477 * an exception is propagating.)
1479 tl_regState = REGSTATE_DIRTY;
1480 return ret;
1483 void
1484 TranslatorX64::trimExtraArgs(ActRec* ar) {
1485 ASSERT(!ar->hasInvName());
1487 sync_regstate_to_caller(ar);
1488 const Func* f = ar->m_func;
1489 int numParams = f->numParams();
1490 int numArgs = ar->numArgs();
1491 ASSERT(numArgs > numParams);
1492 int numExtra = numArgs - numParams;
1494 TRACE(1, "trimExtraArgs: %d args, function %s takes only %d, ar %p\n",
1495 numArgs, f->name()->data(), numParams, ar);
1497 if (f->attrs() & AttrMayUseVV) {
1498 ASSERT(!ar->hasExtraArgs());
1499 ar->setExtraArgs(ExtraArgs::allocateCopy(
1500 (TypedValue*)(uintptr_t(ar) - numArgs * sizeof(TypedValue)),
1501 numArgs - numParams));
1502 } else {
1503 // Function is not marked as "MayUseVV", so discard the extra arguments
1504 TypedValue* tv = (TypedValue*)(uintptr_t(ar) - numArgs*sizeof(TypedValue));
1505 for (int i = 0; i < numExtra; ++i) {
1506 tvRefcountedDecRef(tv);
1507 ++tv;
1509 ar->setNumArgs(numParams);
1512 // Only go back to dirty in a non-exception case. (Same reason as
1513 // above.)
1514 tl_regState = REGSTATE_DIRTY;
1518 TranslatorX64::getInterceptHelper() {
1519 if (false) { // typecheck
1520 Variant *h = get_intercept_handler(CStrRef((StringData*)NULL),
1521 (char*)NULL);
1522 bool c UNUSED = run_intercept_helper((ActRec*)NULL, h);
1524 if (!m_interceptHelper) {
1525 m_interceptHelper = TCA(astubs.code.frontier);
1526 astubs. load_reg64_disp_reg64(rStashedAR, AROFF(m_func),
1527 rax);
1528 astubs. lea_reg64_disp_reg64(rax, Func::fullNameOff(),
1529 argNumToRegName[0]);
1531 astubs. lea_reg64_disp_reg64(rax, Func::maybeInterceptedOff(),
1532 argNumToRegName[1]);
1534 astubs. call(TCA(get_intercept_handler));
1535 astubs. test_reg64_reg64(rax, rax);
1537 JccBlock<CC_NZ> ifNotIntercepted(astubs);
1538 astubs. ret();
1541 // we might re-enter, so align the stack
1542 astubs. sub_imm32_reg64(8, rsp);
1543 // Copy the old rbp into the savedRbp pointer.
1544 astubs. store_reg64_disp_reg64(rbp, 0, rStashedAR);
1546 PhysReg rSavedRip = r13; // XXX ideally don't hardcode r13 ... but
1547 // we need callee-saved and don't have
1548 // any scratch ones.
1550 // Fish out the saved rip. We may need to jump there, and the helper will
1551 // have wiped out the ActRec.
1552 astubs. load_reg64_disp_reg64(rStashedAR, AROFF(m_savedRip),
1553 rSavedRip);
1554 astubs. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1555 astubs. mov_reg64_reg64(rax, argNumToRegName[1]);
1556 astubs. call(TCA(run_intercept_helper));
1558 // Normally we'd like to recordReentrantCall here, but the vmreg sync'ing
1559 // for run_intercept_handler is a special little snowflake. See
1560 // run_intercept_handler for details.
1561 astubs. test_reg64_reg64(rax, rax);
1563 // If the helper returned false, don't execute this function. The helper
1564 // will have cleaned up the interceptee's arguments and AR, and pushed
1565 // the handler's return value; we now need to get out.
1567 // We don't need to touch rVmFp; it's still pointing to the caller of
1568 // the interceptee. We need to adjust rVmSp. Then we need to jump to the
1569 // saved rip from the interceptee's ActRec.
1570 JccBlock<CC_NZ> ifDontEnterFunction(astubs);
1571 astubs. add_imm32_reg64(16, rsp);
1572 astubs. lea_reg64_disp_reg64(rStashedAR, AROFF(m_r), rVmSp);
1573 astubs. jmp_reg(rSavedRip);
1575 astubs. add_imm32_reg64(8, rsp);
1576 astubs. ret();
1578 return m_interceptHelper;
1582 TranslatorX64::emitPrologueRedispatch(X64Assembler& a) {
1583 TCA retval;
1584 moveToAlign(a);
1585 retval = a.code.frontier;
1586 // We're in the wrong func prologue. By convention with emitFuncGuard,
1587 // rax contains the function we need to enter.
1589 ASSERT(kScratchCrossTraceRegs.contains(rax));
1590 ASSERT(kScratchCrossTraceRegs.contains(rdx));
1591 ASSERT(kScratchCrossTraceRegs.contains(rcx));
1593 // We don't know how many params we were invoked with. Infer it from
1594 // the stack and rStashedAR rather than reading it from the actrec.
1596 // mov %r15, %rdx
1597 // ld m_numParams(%rax), %ecx #ecx: targetFunc->numParams
1598 // sub %rbx, %rdx #edx: n_args
1599 // shr $4, rdx
1600 a. mov_reg64_reg64(rStashedAR, rdx);
1601 a. load_reg64_disp_reg32(rax, Func::numParamsOff(), rcx);
1602 a. sub_reg64_reg64(rVmSp, rdx);
1603 BOOST_STATIC_ASSERT(sizeof(TypedValue) == 16);
1604 a. shr_imm32_reg32(4, rdx); // rdx: numPassed
1606 // If we didn't pass too many args, directly dereference
1607 // func->m_prologues.
1608 a. cmp_reg32_reg32(rdx, rcx);
1609 TCA bToFixedProloguesCheck = a.code.frontier;
1610 a. jcc8(CC_L, bToFixedProloguesCheck);
1612 // cmp $kNumFixedPrologues, %rdx
1613 // jl numParamsCheck
1614 TCA actualDispatch = a.code.frontier;
1616 // rcx: prologueIdx
1617 // rax = func->prologues[numParams]
1618 // jmp rax
1619 a. load_reg64_disp_index_reg64(rax,
1620 Func::prologueTableOff(),
1621 rdx,
1622 rax);
1623 a. jmp_reg(rax);
1624 a. ud2();
1626 // Hmm, more parameters passed than the function expected. Did we pass
1627 // kNumFixedPrologues or more? If not, %rdx is still a perfectly
1628 // legitimate index into the func prologue table.
1629 // numParamsCheck:
1630 // cmp $kNumFixedPrologues, %rcx
1631 // jl dispatch
1632 a.patchJcc8(bToFixedProloguesCheck, a.code.frontier); // numParamsCheck:
1633 a. cmp_imm32_reg32(kNumFixedPrologues, rdx);
1634 a. jcc8(CC_L, actualDispatch);
1636 // Too many gosh-darned parameters passed. Go to numExpected + 1, which
1637 // is always a "too many params" entry point.
1639 // mov %rdx, %rcx
1640 // add $1, %rcx
1641 // jmp dispatch
1642 a. load_reg64_disp_index_reg64(rax,
1643 // %rcx + 1
1644 Func::prologueTableOff() + sizeof(TCA),
1645 rcx,
1646 rax);
1647 a. jmp_reg(rax);
1648 a. ud2();
1649 return retval;
1652 // The funcGuard gets skipped and patched by other code, so we have some
1653 // magic offsets.
1654 static const int kFuncMovImm = 6; // Offset to the immediate for expected func
1655 static const int kFuncGuardLen = 23;
1657 template<typename T>
1658 static T*
1659 funcGuardToFuncImm(TCA funcGuard) {
1660 T* retval = (T*)(funcGuard + kFuncMovImm + (2 - sizeof(T)/4));
1661 // We padded these so the immediate would fit inside an aligned 8 byte region
1662 // so the xor of the address of the first byte, with the address of the last
1663 // byte should only be non zero in the bottom 3 bits.
1664 ASSERT(((uintptr_t(retval) ^ (uintptr_t(retval + 1) - 1)) & ~7) == 0);
1665 return retval;
1668 static inline bool
1669 funcGuardIsForFunc(TCA funcGuard, const Func* func) {
1670 intptr_t iptr = uintptr_t(func);
1671 if (deltaFits(iptr, sz::dword)) {
1672 return *funcGuardToFuncImm<int32_t>(funcGuard) == iptr;
1674 return *funcGuardToFuncImm<int64_t>(funcGuard) == iptr;
1677 static void
1678 disableFuncGuard(TCA funcGuard, Func* func) {
1679 ASSERT(funcGuardIsForFunc(funcGuard, func));
1680 if (deltaFits((intptr_t)func, sz::dword)) {
1681 *funcGuardToFuncImm<int32_t>(funcGuard) = 0;
1682 } else {
1683 *funcGuardToFuncImm<int64_t>(funcGuard) = 0;
1685 ASSERT(!funcGuardIsForFunc(funcGuard, func));
1689 TranslatorX64::emitFuncGuard(X64Assembler& a, const Func* func) {
1690 ASSERT(kScratchCrossTraceRegs.contains(rax));
1691 ASSERT(kScratchCrossTraceRegs.contains(rdx));
1693 // Ensure the immediate is safely smashable; the immediate needs
1694 // to be at a qword boundary, so we need to start the movImm at
1695 // (kAlign - kFuncMovImm) % 8.
1696 static const int kAlign = 8;
1697 static const int kAlignMask = kAlign - 1;
1698 int loBits = uintptr_t(a.code.frontier) & kAlignMask;
1699 a.emitNop(((kAlign - kFuncMovImm) - loBits) & kAlignMask);
1700 ASSERT((uintptr_t(a.code.frontier) & kAlignMask) == kAlign - kFuncMovImm);
1701 TCA aStart = a.code.frontier;
1702 a. load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
1703 ASSERT((a.code.frontier - aStart) ==
1704 (kFuncMovImm - 2 /* rex + movimmOpcode */));
1705 a. mov_imm64_reg(uint64_t(func), rdx);
1706 a. cmp_reg64_reg64(rax, rdx);
1708 if (!m_funcPrologueRedispatch) {
1709 m_funcPrologueRedispatch = emitPrologueRedispatch(astubs);
1711 a. jnz(m_funcPrologueRedispatch);
1712 ASSERT(a.code.frontier - aStart <= kFuncGuardLen);
1713 a.emitNop(kFuncGuardLen - (a.code.frontier - aStart));
1714 ASSERT(a.code.frontier - aStart == kFuncGuardLen);
1715 return aStart;
1719 skipFuncCheck(TCA dest) {
1720 if (!dest || dest == (TCA)fcallHelperThunk) return dest;
1721 return dest + kFuncGuardLen;
1725 * funcPrologue --
1727 * Given a callee and a number of args, match up to the callee's
1728 * argument expectations and dispatch.
1730 * Call/return hand-shaking is a bit funny initially. At translation time,
1731 * we don't necessarily know what function we're calling. For instance,
1733 * f(g());
1735 * Will lead to a set of basic blocks like:
1737 * b1: pushfuncd "f"
1738 * pushfuncd "g"
1739 * fcall
1740 * b2: fcall
1742 * The fcallc labelled "b2" above is not statically bindable in our
1743 * execution model.
1745 * We decouple the call work into a per-callsite portion, responsible
1746 * for recording the return address, and a per-(callee, numArgs) portion,
1747 * responsible for fixing up arguments and dispatching to remaining
1748 * code. We call the per-callee portion a "prologue."
1750 * Also, we are called from two distinct environments. From REQ_BIND_CALL,
1751 * we're running "between" basic blocks, with all VM registers sync'ed.
1752 * However, we're also called in the middle of basic blocks, when dropping
1753 * entries into func->m_prologues. So don't go around using the
1754 * translation-time values of vmfp()/vmsp(), since they have an
1755 * unpredictable relationship to the source.
1757 bool
1758 TranslatorX64::checkCachedPrologue(const Func* func, int paramIdx,
1759 TCA& prologue) const {
1760 prologue = (TCA)func->getPrologue(paramIdx);
1761 if (prologue != (TCA)fcallHelperThunk && !s_replaceInFlight) {
1762 TRACE(1, "cached prologue %s(%d) -> cached %p\n",
1763 func->fullName()->data(), paramIdx, prologue);
1764 ASSERT(isValidCodeAddress(prologue));
1765 return true;
1767 return false;
1771 TranslatorX64::funcPrologue(Func* func, int nPassed) {
1772 func->validate();
1773 TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
1774 int numParams = func->numParams();
1775 int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
1777 bool funcIsMagic = func->isMagic();
1779 // Do a quick test before grabbing the write lease
1780 TCA prologue;
1781 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1783 // If the translator is getting replaced out from under us, refuse to
1784 // provide a prologue; we don't know whether this request is running on the
1785 // old or new context.
1786 LeaseHolder writer(s_writeLease);
1787 if (!writer || s_replaceInFlight) return NULL;
1788 // Double check the prologue array now that we have the write lease
1789 // in case another thread snuck in and set the prologue already.
1790 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1792 SpaceRecorder sr("_FuncPrologue", a);
1793 // Careful: this isn't necessarily the real entry point. For funcIsMagic
1794 // prologues, this is just a possible prologue.
1795 TCA aStart = a.code.frontier;
1796 TCA start = aStart;
1797 TCA stubStart = astubs.code.frontier;
1799 // Guard: we're in the right callee. This happens in magicStart for
1800 // magic callees.
1801 if (!funcIsMagic) {
1802 start = aStart = emitFuncGuard(a, func);
1805 emitRB(a, RBTypeFuncPrologueTry, func->fullName()->data());
1806 // Guard: we have stack enough stack space to complete this function.
1807 emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
1809 // NB: We have most of the register file to play with, since we know
1810 // we're between BB's. So, we hardcode some registers here rather
1811 // than using the scratch allocator.
1812 TRACE(2, "funcPrologue: user function: %s\n", func->name()->data());
1814 // Add a counter for the translation if requested
1815 if (RuntimeOption::EvalJitTransCounters) {
1816 emitTransCounterInc(a);
1819 if (!funcIsMagic) {
1820 // entry point for magic methods comes later
1821 emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1824 SrcKey skFuncBody = emitPrologue(func, nPassed);
1826 if (funcIsMagic) {
1827 // entry points for magic methods is here
1828 TCA magicStart = emitFuncGuard(a, func);
1829 ASSERT(numParams == 2);
1830 emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1831 // Special __call prologue
1832 a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1833 emitCall(a, TCA(TranslatorX64::shuffleArgsForMagicCall));
1834 // if shuffleArgs returns 0, that means this was not a magic call
1835 // and we should proceed to a prologue specialized for nPassed;
1836 // otherwise, proceed to a prologue specialized for nPassed==numParams (2).
1837 if (nPassed == 2) {
1838 a.jmp(start);
1839 } else {
1840 a.test_reg64_reg64(rax, rax);
1841 // z ==> not a magic call, go to prologue for nPassed
1842 if (deltaFits(start - (a.code.frontier + kJcc8Len), sz::byte)) {
1843 a.jcc8(CC_Z, start);
1844 } else {
1845 a.jcc(CC_Z, start);
1847 // this was a magic call
1848 // nPassed == 2
1849 // Fix up hardware stack pointer
1850 nPassed = 2;
1851 a. lea_reg64_disp_reg64(rStashedAR, -cellsToBytes(nPassed), rVmSp);
1852 // Optimization TODO: Reuse the prologue for args == 2
1853 emitPrologue(func, nPassed);
1855 start = magicStart;
1857 ASSERT(funcGuardIsForFunc(start, func));
1858 TRACE(2, "funcPrologue tx64 %p %s(%d) setting prologue %p\n",
1859 this, func->fullName()->data(), nPassed, start);
1860 ASSERT(isValidCodeAddress(start));
1861 func->setPrologue(paramIndex, start);
1863 addTranslation(TransRec(skFuncBody, func->unit()->md5(),
1864 TransProlog, aStart, a.code.frontier - aStart,
1865 stubStart, astubs.code.frontier - stubStart));
1867 recordGdbTranslation(skFuncBody, func->unit(),
1868 a, aStart,
1869 false, true);
1870 recordBCInstr(OpFuncPrologue, a, start);
1872 return start;
1875 static TCA callAndResume(ActRec *ar) {
1876 VMRegAnchor _(ar,true);
1877 g_vmContext->doFCall<true>(ar, g_vmContext->m_pc);
1878 return Translator::Get()->getResumeHelper();
1881 extern "C"
1882 TCA fcallHelper(ActRec* ar) {
1883 TCA tca =
1884 Translator::Get()->funcPrologue((Func*)ar->m_func, ar->numArgs());
1885 if (tca) {
1886 return tca;
1888 return callAndResume(ar);
1892 TranslatorX64::emitInterceptPrologue(Func* func, TCA next) {
1893 TCA start = a.code.frontier;
1894 a.mov_imm64_reg((uintptr_t)&func->maybeIntercepted(), rax);
1895 a.cmp_imm8_disp_reg8(0, 0, rax);
1896 TCA jcc8PatchAddr = NULL;
1897 if (next == NULL) {
1898 jcc8PatchAddr = a.code.frontier;
1899 a.jcc8(CC_E, jcc8PatchAddr);
1900 } else {
1901 a.jcc(CC_E, next);
1903 // Prologues are not really sites for function entry yet; we can get
1904 // here via an optimistic bindCall. Check that the func is as expected.
1906 a. mov_imm64_reg(uint64_t(func), rax);
1907 a. cmp_reg64_disp_reg64(rax, AROFF(m_func), rStashedAR);
1909 JccBlock<CC_NZ> skip(a);
1910 a.call(getInterceptHelper());
1912 if (jcc8PatchAddr != NULL) {
1913 a.patchJcc8(jcc8PatchAddr, a.code.frontier);
1915 return start;
1918 void
1919 TranslatorX64::interceptPrologues(Func* func) {
1920 if (!RuntimeOption::EvalJitEnableRenameFunction &&
1921 !(func->attrs() & AttrDynamicInvoke)) {
1922 return;
1924 if (func->maybeIntercepted() == -1) {
1925 return;
1927 func->maybeIntercepted() = -1;
1928 ASSERT(s_writeLease.amOwner());
1929 int maxNumPrologues = func->numPrologues();
1930 for (int i = 0; i < maxNumPrologues; i++) {
1931 TCA prologue = func->getPrologue(i);
1932 if (prologue == (unsigned char*)fcallHelperThunk)
1933 continue;
1934 ASSERT(funcGuardIsForFunc(prologue, func));
1935 // There might already be calls hard-coded to this via FCall.
1936 // blow away immediate comparison, so that we always use the Func*'s
1937 // prologue table. We use 0 (== NULL on our architecture) as the bit
1938 // pattern for an impossible Func.
1940 // Note that we're modifying reachable code.
1941 disableFuncGuard(prologue, func);
1942 ASSERT(funcGuardIsForFunc(prologue, NULL));
1944 // There's a prologue already generated; redirect it to first
1945 // call the intercept helper. First, reset it (leaking the old
1946 // prologue), so funcPrologue will re-emit it.
1947 func->setPrologue(i, (TCA)fcallHelperThunk);
1948 TCA addr = funcPrologue(func, i);
1949 ASSERT(funcGuardIsForFunc(addr, func));
1950 ASSERT(addr);
1951 func->setPrologue(i, addr);
1952 TRACE(1, "interceptPrologues %s prologue[%d]=%p\n",
1953 func->fullName()->data(), i, (void*)addr);
1957 SrcKey
1958 TranslatorX64::emitPrologue(Func* func, int nPassed) {
1959 int numParams = func->numParams();
1960 ASSERT(IMPLIES(func->maybeIntercepted() == -1,
1961 m_interceptsEnabled));
1962 if (m_interceptsEnabled &&
1963 !func->isPseudoMain() &&
1964 (RuntimeOption::EvalJitEnableRenameFunction ||
1965 func->attrs() & AttrDynamicInvoke)) {
1966 emitInterceptPrologue(func);
1969 Offset dvInitializer = InvalidAbsoluteOffset;
1971 if (nPassed > numParams) {
1972 // Too many args; a weird case, so just callout. Stash ar
1973 // somewhere callee-saved.
1974 if (false) { // typecheck
1975 TranslatorX64::trimExtraArgs((ActRec*)NULL);
1977 a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1978 emitCall(a, TCA(TranslatorX64::trimExtraArgs));
1979 // We'll fix rVmSp below.
1980 } else if (nPassed < numParams) {
1981 // Figure out which, if any, default value initializer to go to
1982 for (int i = nPassed; i < numParams; ++i) {
1983 const Func::ParamInfo& pi = func->params()[i];
1984 if (pi.hasDefaultValue()) {
1985 dvInitializer = pi.funcletOff();
1986 break;
1989 TRACE(1, "Only have %d of %d args; getting dvFunclet\n",
1990 nPassed, numParams);
1991 emitImmReg(a, nPassed, rax);
1992 // do { *(--rVmSp) = NULL; nPassed++; } while (nPassed < numParams);
1993 // This should be an unusual case, so optimize for code density
1994 // rather than execution speed; i.e., don't unroll the loop.
1995 TCA loopTop = a.code.frontier;
1996 a. sub_imm32_reg64(sizeof(Cell), rVmSp);
1997 a. add_imm32_reg32(1, rax);
1998 // XXX "missing argument" warnings need to go here
1999 emitStoreUninitNull(a, 0, rVmSp);
2000 a. cmp_imm32_reg32(numParams, rax);
2001 a. jcc8(CC_L, loopTop);
2004 // Entry point for numParams == nPassed is here.
2005 // Args are kosher. Frame linkage: set fp = ar.
2006 a. mov_reg64_reg64(rStashedAR, rVmFp);
2008 // We're in the callee frame; initialize locals. Unroll the loop all
2009 // the way if there are a modest number of locals to update;
2010 // otherwise, do it in a compact loop. If we're in a generator body,
2011 // named locals will be initialized by UnpackCont so we can leave
2012 // them alone here.
2013 int uninitLimit = func->isGenerator() ? func->numNamedLocals() : numParams;
2014 int numUninitLocals = func->numLocals() - uninitLimit;
2015 ASSERT(numUninitLocals >= 0);
2016 if (numUninitLocals > 0) {
2017 SpaceRecorder sr("_InitializeLocals", a);
2019 // If there are too many locals, then emitting a loop to initialize locals
2020 // is more compact, rather than emitting a slew of movs inline.
2021 if (numUninitLocals > kLocalsToInitializeInline) {
2022 PhysReg loopReg = rcx;
2024 // rVmFp + rcx points to the count/type fields of the TypedValue we're
2025 // about to write to.
2026 int loopStart = -func->numLocals() * sizeof(TypedValue)
2027 + TVOFF(_count);
2028 int loopEnd = -uninitLimit * sizeof(TypedValue)
2029 + TVOFF(_count);
2031 emitImmReg(a, loopStart, loopReg);
2032 emitImmReg(a, 0, rdx);
2034 TCA topOfLoop = a.code.frontier;
2035 // do {
2036 // rVmFp[rcx].m_type = KindOfUninit;
2037 // } while(++rcx != loopEnd);
2039 // mov %rdx, 0x0(%rVmFp, %rcx, 1)
2040 a. emitRM(instr_mov, rVmFp, loopReg, 1, 0, rdx);
2041 a. add_imm32_reg64(sizeof(Cell), loopReg);
2042 a. cmp_imm32_reg64(loopEnd, loopReg);
2043 a. jcc8(CC_NE, topOfLoop);
2044 } else {
2045 PhysReg base;
2046 int disp, k;
2047 for (k = uninitLimit; k < func->numLocals(); ++k) {
2048 locToRegDisp(Location(Location::Local, k), &base, &disp);
2049 emitStoreUninitNull(a, disp, base);
2054 // Move rVmSp to the right place: just past all locals
2055 int frameCells = func->numSlotsInFrame();
2056 a. lea_reg64_disp_reg64(rVmFp, -cellsToBytes(frameCells), rVmSp);
2057 const Opcode* destPC = func->unit()->entry() + func->base();
2058 if (dvInitializer != InvalidAbsoluteOffset) {
2059 // dispatch to funclet.
2060 destPC = func->unit()->entry() + dvInitializer;
2062 SrcKey funcBody(func, destPC);
2064 // Check surprise flags in the same place as the interpreter: after
2065 // setting up the callee's frame but before executing any of its
2066 // code
2067 emitCheckSurpriseFlagsEnter(false, funcBody.m_offset - func->base(),
2068 frameCells);
2070 emitBindJmp(funcBody);
2071 return funcBody;
2074 void
2075 TranslatorX64::emitBindCall(const Tracelet& t,
2076 const NormalizedInstruction &ni,
2077 Offset atCall, Offset afterCall) {
2078 int numArgs = ni.imm[0].u_IVA;
2080 // If this is a call to a builtin and we don't need any argument
2081 // munging, we can skip the prologue system and do it inline.
2082 if (ni.funcd && ni.funcd->isBuiltin() &&
2083 numArgs == ni.funcd->numParams()) {
2084 ASSERT(ni.funcd->numLocals() == ni.funcd->numParams());
2085 ASSERT(ni.funcd->numIterators() == 0);
2086 a. lea_reg64_disp_reg64(rVmSp, cellsToBytes(numArgs), rVmFp);
2087 emitCheckSurpriseFlagsEnter(true, 0, numArgs);
2088 // rVmSp is already correctly adjusted, because there's no locals
2089 // other than the arguments passed.
2090 return emitNativeImpl(ni.funcd, false /* don't jump to return */);
2093 // Stash callee's rVmFp into rStashedAR for the callee's prologue
2094 a. lea_reg64_disp_reg64(rVmSp, cellsToBytes(numArgs), rStashedAR);
2095 emitBindCallHelper(rStashedAR, ni.source, ni.funcd, numArgs, (bool)ni.funcd);
2096 return;
2099 void
2100 TranslatorX64::emitBindCallHelper(register_name_t stashedAR,
2101 SrcKey srcKey,
2102 const Func* funcd,
2103 int numArgs,
2104 bool isImmutable) {
2105 // Whatever prologue we're branching to will check at runtime that we
2106 // went to the right Func*, correcting if necessary. We treat the first
2107 // Func we encounter as a decent prediction. Make space to burn in a
2108 // TCA.
2109 ReqBindCall* req = m_globalData.alloc<ReqBindCall>();
2110 a. mov_reg64_reg64(rStashedAR, serviceReqArgRegs[1]);
2111 prepareForSmash(kJmpLen);
2112 TCA toSmash = a.code.frontier;
2113 a. jmp(emitServiceReq(false, REQ_BIND_CALL, 1ull, req));
2115 TRACE(1, "will bind static call: tca %p, this %p, funcd %p\n",
2116 toSmash, this, funcd);
2117 req->m_toSmash = toSmash;
2118 req->m_nArgs = numArgs;
2119 req->m_sourceInstr = srcKey;
2120 req->m_isImmutable = isImmutable;
2122 return;
2125 // for documentation see bindJmpccFirst below
2126 void
2127 TranslatorX64::emitCondJmp(const SrcKey &skTaken, const SrcKey &skNotTaken,
2128 ConditionCode cc) {
2129 // should be true for SrcKeys generated via OpJmpZ/OpJmpNZ
2130 ASSERT(skTaken.m_funcId == skNotTaken.m_funcId);
2132 // reserve space for a smashable jnz/jmp pair; both initially point
2133 // to our stub
2134 prepareForSmash(kJmpLen + kJmpccLen);
2135 TCA old = a.code.frontier;
2137 moveToAlign(astubs);
2138 TCA stub = astubs.code.frontier;
2140 // begin code for the stub
2142 // We need to be careful here, as we are passing an extra paramter to
2143 // REQ_BIND_JMPCC_FIRST. However we can't pass this parameter via
2144 // emitServiceReq because that only supports constants/immediates, so
2145 // compute the last argument via setcc.
2146 astubs.setcc(cc, serviceReqArgRegs[4]);
2147 emitServiceReq(false /* align */, REQ_BIND_JMPCC_FIRST, 4ull,
2148 old,
2149 uint64_t(skTaken.offset()),
2150 uint64_t(skNotTaken.offset()),
2151 uint64_t(cc));
2153 a.jcc(cc, stub); // MUST use 4-byte immediate form
2154 a.jmp(stub); // MUST use 4-byte immediate form
2157 static void skToName(const SrcKey& sk, char* name) {
2158 sprintf(name, "sk_%08lx_%05d",
2159 long(sk.m_funcId), sk.offset());
2162 static void skToClusterName(const SrcKey& sk, char* name) {
2163 sprintf(name, "skCluster_%08lx_%05d",
2164 long(sk.m_funcId), sk.offset());
2167 static void translToName(const TCA tca, char* name) {
2168 sprintf(name, "tc_%p", tca);
2171 void TranslatorX64::drawCFG(std::ofstream& out) const {
2172 if (!isTransDBEnabled()) return;
2173 const char* indent = " ";
2174 static int genCount;
2175 int numSrcKeys = 0;
2176 int numTranslations = 0;
2177 out << "digraph srcdb" << genCount++ <<" {\n";
2178 out << indent << "size = \"8,11\";\n";
2179 out << indent << "ratio = fill;\n";
2180 for (SrcDB::const_iterator entry = m_srcDB.begin();
2181 entry != m_srcDB.end(); ++entry) {
2182 const SrcKey sk = SrcKey::fromAtomicInt(entry->first);
2183 // 1 subgraph per srcKey.
2184 char name[64];
2185 skToClusterName(sk, name);
2186 numSrcKeys++;
2187 out << indent << "subgraph " << name << "{\n";
2188 char* indent = " ";
2189 skToName(sk, name);
2190 out << indent << name << "[shape=box];\n";
2191 const vector<TCA>& transls = entry->second->translations();
2192 for (vector<TCA>::const_iterator t = transls.begin(); t != transls.end();
2193 ++t) {
2194 out << indent << "// Translations: " << transls.size() << "\n";
2195 char transname[64];
2196 translToName(*t, transname);
2197 numTranslations++;
2198 out << indent << transname << "[fontsize=11.0];\n";
2199 out << indent << name << " -> " << transname << ";\n";
2201 // And, all translations on the same line
2202 out << indent << "{ rank = same; ";
2203 out << name << " ";
2204 for (vector<TCA>::const_iterator t = transls.begin(); t != transls.end();
2205 ++t) {
2206 char transname[64];
2207 translToName(*t, transname);
2208 out << transname << " ";
2210 out << indent << "}\n"; // subgraph
2211 out << indent << "}\n";
2214 // OK! Those were all the nodes. Now edges. While edges are physically
2215 // from translation to translation, they're virtually from srcKey to
2216 // srcKey, and that is how the db represents them.
2217 for (SrcDB::const_iterator entry = m_srcDB.begin(); entry != m_srcDB.end();
2218 ++entry) {
2219 char destName[64];
2220 skToName(SrcKey::fromAtomicInt(entry->first), destName);
2221 const vector<IncomingBranch>& ibs = entry->second->incomingBranches();
2222 out << indent << "// incoming branches to " << destName << "\n";
2223 for (vector<IncomingBranch>::const_iterator ib = ibs.begin();
2224 ib != ibs.end(); ++ib) {
2225 // Find the start of the translation that contains this branch
2226 const char *branchTypeToColorStr[] = {
2227 "black", // JMP
2228 "green", // JZ
2229 "red", // JNZ
2231 TransDB::const_iterator lowerTCA = m_transDB.lower_bound(ib->m_src);
2232 ASSERT(lowerTCA != m_transDB.end());
2233 char srcName[64];
2234 const TransRec* transRec = this->getTransRec(lowerTCA->second);
2235 skToName(transRec->src, srcName);
2236 out << indent << srcName << " -> " << destName << "[ color = " <<
2237 branchTypeToColorStr[ib->m_type] << "];\n";
2240 out << indent << "// " << numSrcKeys << " srckeys, " << numTranslations <<
2241 " tracelets\n";
2242 out << "}\n\n";
2246 * bindJmp --
2248 * Runtime service handler that patches a jmp to the translation of
2249 * u:dest from toSmash.
2252 TranslatorX64::bindJmp(TCA toSmash, SrcKey destSk, bool isAddr,
2253 bool forceNoHHIR /* = false */) {
2254 TCA tDest = getTranslation(&destSk, false, forceNoHHIR);
2255 if (!tDest) return NULL;
2256 LeaseHolder writer(s_writeLease);
2257 if (!writer) return tDest;
2258 SrcRec* sr = getSrcRec(destSk);
2259 if (isAddr) {
2260 sr->chainFrom(a, IncomingBranch((TCA*)toSmash));
2261 } else {
2262 sr->chainFrom(getAsmFor(toSmash), IncomingBranch(toSmash));
2264 return tDest;
2268 * When we end a tracelet with a conditional jump, emitCondJmp first emits:
2270 * 1: j<CC> stubJmpccFirst
2271 * jmp stubJmpccFirst
2273 * Our "taken" argument tells us whether the branch at 1: was taken or
2274 * not; and therefore which of offTaken and offNotTaken to continue executing.
2275 * If we did take the branch, we now rewrite the code so that the branch is
2276 * straightened. This predicts that subsequent executions will go the same way
2277 * as the first execution.
2279 * jn<CC> stubJmpccSecond:offNotTaken
2280 * nop5 ; fallthru, or jmp if there's already a translation.
2281 * offTaken:
2283 * If we did not take the branch, we leave the sense of the condition
2284 * intact, while patching it up to go to the unexplored code:
2286 * j<CC> stubJmpccSecond:offTaken
2287 * nop5
2288 * offNotTaken:
2291 TranslatorX64::bindJmpccFirst(TCA toSmash,
2292 Offset offTaken, Offset offNotTaken,
2293 bool taken,
2294 ConditionCode cc) {
2295 const Func* f = curFunc();
2296 LeaseHolder writer(s_writeLease);
2297 if (!writer) return NULL;
2298 Offset offWillExplore = taken ? offTaken : offNotTaken;
2299 Offset offWillDefer = taken ? offNotTaken : offTaken;
2300 SrcKey dest(f, offWillExplore);
2301 TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
2302 "taken %d\n",
2303 offWillExplore, offWillDefer, cc, taken);
2305 // We want the branch to point to whichever side has not been explored
2306 // yet.
2307 if (taken) cc = ccNegate(cc);
2308 TCA stub =
2309 emitServiceReq(false, REQ_BIND_JMPCC_SECOND, 3,
2310 toSmash, uint64_t(offWillDefer), uint64_t(cc));
2312 Asm &as = getAsmFor(toSmash);
2313 // Its not clear where chainFrom should go to if as is astubs
2314 ASSERT(&as == &a);
2316 // can we just directly fall through?
2317 // a jmp + jz takes 5 + 6 = 11 bytes
2318 bool fallThru = toSmash + kJmpccLen + kJmpLen == as.code.frontier &&
2319 !m_srcDB.find(dest);
2321 TCA tDest;
2322 tDest = getTranslation(&dest, !fallThru /* align */);
2323 if (!tDest) {
2324 return 0;
2326 ASSERT(s_writeLease.amOwner());
2328 * Roll over the jcc and the jmp/fallthru. E.g., from:
2330 * toSmash: jcc <jmpccFirstStub>
2331 * toSmash+6: jmp <jmpccFirstStub>
2332 * toSmash+11: <probably the new translation == tdest>
2334 * to:
2336 * toSmash: j[n]z <jmpccSecondStub>
2337 * toSmash+6: nop5
2338 * toSmash+11: newHotness
2340 CodeCursor cg(as, toSmash);
2341 a.jcc(cc, stub);
2342 getSrcRec(dest)->chainFrom(as, IncomingBranch(as.code.frontier));
2343 TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
2344 return tDest;
2347 // smashes a jcc to point to a new destination
2349 TranslatorX64::bindJmpccSecond(TCA toSmash, const Offset off,
2350 ConditionCode cc) {
2351 const Func* f = curFunc();
2352 SrcKey dest(f, off);
2353 TCA branch = getTranslation(&dest, true);
2354 LeaseHolder writer(s_writeLease, NO_ACQUIRE);
2355 if (branch && writer.acquire()) {
2356 SrcRec* destRec = getSrcRec(dest);
2357 destRec->chainFrom(getAsmFor(toSmash), IncomingBranch(cc, toSmash));
2359 return branch;
2363 * emitBindJmp --
2365 * Emit code to lazily branch to the srckey in next. Assumes current
2366 * basic block is closed (outputs synced, etc.).
2368 void
2369 TranslatorX64::emitBindJmp(X64Assembler& _a, const SrcKey& dest,
2370 ServiceRequest req /* = REQ_BIND_JMP */) {
2371 prepareForSmash(_a, kJmpLen);
2372 TCA toSmash = _a.code.frontier;
2373 if (&_a == &astubs) {
2374 _a. jmp(toSmash);
2377 TCA sr = emitServiceReq(false, req, 2,
2378 toSmash, uint64_t(dest.offset()));
2380 if (&_a == &astubs) {
2381 CodeCursor cc(_a, toSmash);
2382 _a. jmp(sr);
2383 } else {
2384 _a. jmp(sr);
2388 void
2389 TranslatorX64::emitBindJmp(const SrcKey& dest) {
2390 emitBindJmp(a, dest);
2393 void
2394 TranslatorX64::emitStringCheck(X64Assembler& _a,
2395 PhysReg base, int offset, PhysReg tmp) {
2396 // Treat KindOfString and KindOfStaticString identically; they
2397 // are bitwise identical. This is a port of our IS_STRING_TYPE
2398 // macro to assembly, and will have to change in sync with it.
2399 static_assert(IS_STRING_TYPE(7) && IS_STRING_TYPE(6),
2400 "Assembly version of IS_STRING_TYPE needs to be updated");
2401 _a. load_reg64_disp_reg32(base, offset, tmp);
2402 _a. and_imm32_reg32((signed char)(0xfe), tmp); // use 1-byte immediate
2403 _a. cmp_imm32_reg32(6, tmp);
2406 void
2407 TranslatorX64::emitTypeCheck(X64Assembler& _a, DataType dt,
2408 PhysReg base, int offset,
2409 PhysReg tmp/*= InvalidReg*/) {
2410 offset += TVOFF(m_type);
2411 if (IS_STRING_TYPE(dt)) {
2412 LazyScratchReg scr(m_regMap);
2413 if (tmp == InvalidReg) {
2414 scr.alloc();
2415 tmp = *scr;
2417 emitStringCheck(_a, base, offset, tmp);
2418 } else {
2419 _a. cmp_imm32_disp_reg32(dt, offset, base);
2423 void
2424 TranslatorX64::checkType(X64Assembler& a,
2425 const Location& l,
2426 const RuntimeType& rtt,
2427 SrcRec& fail) {
2428 // We can get invalid inputs as a side effect of reading invalid
2429 // items out of BBs we truncate; they don't need guards.
2430 if (rtt.isVagueValue()) return;
2432 if (m_useHHIR) {
2433 irCheckType(a, l, rtt, fail);
2434 return;
2437 PhysReg base;
2438 int disp = 0;
2439 SpaceRecorder sr("_CheckType", a);
2441 TRACE(1, Trace::prettyNode("Precond", DynLocation(l, rtt)) + "\n");
2443 locToRegDisp(l, &base, &disp);
2444 TRACE(2, "TypeCheck: %d(%%r%d)\n", disp, base);
2445 // Negative offsets from RSP are not yet allocated; they had
2446 // better not be inputs to the tracelet.
2447 ASSERT(l.space != Location::Stack || disp >= 0);
2448 if (Trace::moduleEnabled(Trace::stats, 2)) {
2449 Stats::emitInc(a, Stats::TraceletGuard_branch);
2451 if (rtt.isIter()) {
2452 a. cmp_imm32_disp_reg32(rtt.typeCheckValue(),
2453 disp + rtt.typeCheckOffset(),
2454 base);
2455 } else {
2456 emitTypeCheck(a, rtt.typeCheckValue(), base, disp, rax);
2458 emitFallbackJmp(fail);
2461 void
2462 TranslatorX64::emitFallbackJmp(SrcRec& dest) {
2463 prepareForSmash(kJmpccLen);
2464 dest.emitFallbackJump(a, IncomingBranch(CC_NZ, a.code.frontier));
2467 void
2468 TranslatorX64::emitFallbackJmp(Asm& as, SrcRec& dest) {
2469 prepareForSmash(as, kJmpccLen);
2470 dest.emitFallbackJump(as, IncomingBranch(CC_NZ, as.code.frontier));
2473 void
2474 TranslatorX64::emitFallbackUncondJmp(Asm& as, SrcRec& dest) {
2475 prepareForSmash(as, kJmpLen);
2476 dest.emitFallbackJump(as, IncomingBranch(as.code.frontier));
2479 void TranslatorX64::emitReqRetransNoIR(Asm& as, SrcKey& sk) {
2480 prepareForSmash(as, kJmpLen);
2481 TCA toSmash = as.code.frontier;
2482 if (&as == &astubs) {
2483 as.jmp(toSmash);
2486 TCA sr = emitServiceReq(REQ_RETRANSLATE_NO_IR, 2,
2487 toSmash, sk.offset());
2489 if (&as == &astubs) {
2490 CodeCursor cc(as, toSmash);
2491 as.jmp(sr);
2492 } else {
2493 as.jmp(sr);
2497 uint64_t TranslatorX64::packBitVec(const vector<bool>& bits, unsigned i) {
2498 uint64_t retval = 0;
2499 ASSERT(i % 64 == 0);
2500 ASSERT(i < bits.size());
2501 while (i < bits.size()) {
2502 retval |= bits[i] << (i % 64);
2503 if ((++i % 64) == 0) {
2504 break;
2507 return retval;
2510 void
2511 TranslatorX64::checkRefs(X64Assembler& a,
2512 const SrcKey& sk,
2513 const RefDeps& refDeps,
2514 SrcRec& fail) {
2515 if (refDeps.size() == 0) {
2516 return;
2520 * We're still between BB's, so we're not using the real register
2521 * allocator.
2523 RegSet unusedRegs = kScratchCrossTraceRegs;
2524 DumbScratchReg rNumParams(unusedRegs);
2525 DumbScratchReg rMask(unusedRegs);
2526 DumbScratchReg rBits(unusedRegs);
2527 DumbScratchReg rExpectedBits(unusedRegs);
2528 DumbScratchReg rBitsValue(unusedRegs);
2529 DumbScratchReg rFunc(unusedRegs);
2531 // Set up guards for each pushed ActRec that we've made reffiness
2532 // assumptions about
2533 for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
2534 it != refDeps.m_arMap.end(); ++it) {
2535 // Be careful! The actual Func might have fewer refs than the number
2536 // of args we're passing. To forestall this, we're going to have to
2537 // keep checking i against the number of params. We consider invocations
2538 // with too many arguments to have passed their checks.
2539 int entryArDelta = it->first;
2541 if (m_useHHIR) {
2542 m_hhbcTrans->guardRefs(entryArDelta,
2543 it->second.m_mask,
2544 it->second.m_vals);
2545 continue;
2548 int32_t funcOff = cellsToBytes(entryArDelta) + AROFF(m_func);
2549 a. load_reg64_disp_reg64(rVmSp, funcOff, *rFunc); // rFunc <- Func*
2550 a. load_reg64_disp_reg32(*rFunc, Func::numParamsOff(),
2551 *rNumParams);
2552 a. load_reg64_disp_reg64(*rFunc, Func::refBitVecOff(),
2553 *rBits); // rBits <- m_refBitVec
2555 for (unsigned i = 0; i < it->second.m_mask.size(); i += 64) {
2556 ASSERT(i < it->second.m_vals.size());
2557 uint64_t mask = packBitVec(it->second.m_mask, i);
2558 if (mask == 0) {
2559 continue;
2561 uint64_t value = packBitVec(it->second.m_vals, i);
2563 emitImmReg(a, mask, *rMask);
2564 emitImmReg(a, value, *rExpectedBits);
2567 * Before trying to load this block off the bit vector, make
2568 * sure it actually exists. It's ok to index past numArgs
2569 * within one of these words, because the remaining bits will be
2570 * set to zero (or one in the case of the variadic by ref
2571 * builtins).
2573 if (Trace::moduleEnabled(Trace::stats, 2)) {
2574 Stats::emitInc(a, Stats::TraceletGuard_branch);
2576 a. cmp_imm32_reg32(i + 1, *rNumParams);
2578 IfElseBlock<CC_L> ifFewEnoughArgs(a);
2580 // Load the appropriate qword off of the top actRec's func*.
2581 SKTRACE(2, sk, "reffiness mask %lx value %lx, ar @%d\n",
2582 mask, value, entryArDelta);
2583 a. load_reg64_disp_reg64(*rBits, sizeof(uint64) * (i / 64),
2584 *rBitsValue); // rBitsValue <- rBits[i / 64]
2585 a. and_reg64_reg64(*rMask, *rBitsValue); // rBitsValue &= rMask
2586 a. cmp_reg64_reg64(*rBitsValue, *rExpectedBits);
2587 emitFallbackJmp(fail);
2589 ifFewEnoughArgs.Else();
2591 a. test_imm32_disp_reg32(AttrVariadicByRef,
2592 Func::attrsOff(),
2593 *rFunc);
2595 IfElseBlock<CC_NZ> ifNotWeirdBuiltin(a);
2597 // Other than these builtins, we need to have all by value
2598 // args in this case.
2599 a. test_reg64_reg64(*rExpectedBits, *rExpectedBits);
2600 emitFallbackJmp(fail);
2602 ifNotWeirdBuiltin.Else();
2604 // If it is one of the weird builtins that has reffiness for
2605 // additional args, we have to make sure our expectation is
2606 // that these additional args are by ref.
2607 a. cmp_imm32_reg64((signed int)(-1ull & mask), *rExpectedBits);
2608 emitFallbackJmp(fail);
2616 * emitRetFromInterpretedFrame --
2618 * When the interpreter pushes a call frame, there is necessarily no
2619 * machine RIP available to return to. This helper fishes out the
2620 * destination from the frame and redirects execution to it via enterTC.
2623 TranslatorX64::emitRetFromInterpretedFrame() {
2624 int32_t arBase = sizeof(ActRec) - sizeof(Cell);
2625 moveToAlign(astubs);
2626 TCA stub = astubs.code.frontier;
2627 // Marshall our own args by hand here.
2628 astubs. lea_reg64_disp_reg64(rVmSp, -arBase, serviceReqArgRegs[0]);
2629 astubs. mov_reg64_reg64(rVmFp, serviceReqArgRegs[1]);
2630 (void) emitServiceReq(false, REQ_POST_INTERP_RET, 0ull);
2631 return stub;
2635 * fcallHelperThunk
2636 * Note: Assumes rStashedAR is r15
2638 static_assert(rStashedAR == r15,
2639 "__fcallHelperThunk needs to be modified for ABI changes");
2640 asm (
2641 ".byte 0\n"
2642 ".align 16\n"
2643 ".globl __fcallHelperThunk\n"
2644 "__fcallHelperThunk:\n"
2645 #ifdef HHVM
2646 "mov %r15, %rdi\n"
2647 "call fcallHelper\n"
2648 "jmp *%rax\n"
2649 #endif
2650 "ud2\n"
2654 * enterTCHelper
2656 * This helper routine is written in x64 assembly to take care of the details
2657 * when transferring control between jitted code and the translator.
2658 * rdi: Cell* vm_sp
2659 * rsi: Cell* vm_fp
2660 * rdx: unsigned char* start
2661 * rcx: TReqInfo* infoPtr
2662 * r8: ActRec* firstAR
2663 * r9: uint8_t* targetCacheBase
2665 * Note: enterTCHelper does not save callee-saved registers except
2666 * %rbp. This means when we call it from C++, we have to tell gcc to
2667 * clobber all the other callee-saved registers.
2669 static_assert(rVmSp == rbx &&
2670 rVmFp == rbp &&
2671 rVmTl == r12 &&
2672 rStashedAR == r15,
2673 "__enterTCHelper needs to be modified to use the correct ABI");
2674 asm (
2675 ".byte 0\n"
2676 ".align 16\n"
2677 "__enterTCHelper:\n"
2678 // Prologue
2679 ".cfi_startproc\n"
2680 "push %rbp\n"
2681 ".cfi_adjust_cfa_offset 8\n" // offset to previous frame relative to %rsp
2682 ".cfi_offset rbp, -16\n" // Where to find previous value of rbp
2684 // Set firstAR->m_savedRbp to point to this frame.
2685 "mov %rsp, (%r8)\n"
2687 // Save infoPtr
2688 "push %rcx\n"
2689 ".cfi_adjust_cfa_offset 8\n"
2691 // Set up special registers used for translated code.
2692 "mov %rdi, %rbx\n" // rVmSp
2693 "mov %r9, %r12\n" // rVmTl
2694 "mov %rsi, %rbp\n" // rVmFp
2695 "mov 0x30(%rcx), %r15\n" // rStashedAR saved across service requests
2697 // The translated code we are about to enter does not follow the
2698 // standard prologue of pushing rbp at entry, so we are purposely 8
2699 // bytes short of 16-byte alignment before this call instruction so
2700 // that the return address being pushed will make the native stack
2701 // 16-byte aligned.
2702 // Pre-allocate LinearScan::NumPreAllocatedSpillLocs * 8 bytes for
2703 // spill locations.
2704 // This value must be consistent with LinearScan::NumPreAllocatedSpillLocs.
2705 "sub $0x80, %rsp\n"
2706 // May need cfi_adjust_cfa_offset annotations: Task #1747813
2707 "call *%rdx\n"
2708 "add $0x80, %rsp\n"
2710 // Restore infoPtr into %rbx
2711 "pop %rbx\n"
2712 ".cfi_adjust_cfa_offset -8\n"
2714 // Copy the values passed from jitted code into *infoPtr
2715 "mov %rdi, 0x0(%rbx)\n"
2716 "mov %rsi, 0x8(%rbx)\n"
2717 "mov %rdx, 0x10(%rbx)\n"
2718 "mov %rcx, 0x18(%rbx)\n"
2719 "mov %r8, 0x20(%rbx)\n"
2720 "mov %r9, 0x28(%rbx)\n"
2722 // Service request "callee-saved". (Returnee-saved?)
2723 "mov %r15, 0x30(%rbx)\n"
2725 // Epilogue
2726 "pop %rbp\n"
2727 ".cfi_restore rbp\n"
2728 ".cfi_adjust_cfa_offset -8\n"
2729 "ret\n"
2730 ".cfi_endproc\n"
2733 struct TReqInfo {
2734 uintptr_t requestNum;
2735 uintptr_t args[5];
2737 // Some TC registers need to be preserved across service requests.
2738 uintptr_t saved_rStashedAr;
2741 void enterTCHelper(Cell* vm_sp,
2742 Cell* vm_fp,
2743 TCA start,
2744 TReqInfo* infoPtr,
2745 ActRec* firstAR,
2746 uint8_t* targetCacheBase) asm ("__enterTCHelper");
2748 struct DepthGuard {
2749 static __thread int m_depth;
2750 DepthGuard() { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
2751 ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
2753 __thread int DepthGuard::m_depth;
2754 void
2755 TranslatorX64::enterTC(SrcKey sk) {
2756 using namespace TargetCache;
2757 TCA start = getTranslation(&sk, true);
2759 DepthGuard d;
2760 TReqInfo info;
2761 const uintptr_t& requestNum = info.requestNum;
2762 uintptr_t* args = info.args;
2763 for (;;) {
2764 ASSERT(vmfp() >= vmsp() - 1);
2765 ASSERT(sizeof(Cell) == 16);
2766 ASSERT(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
2767 ASSERT(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
2769 TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
2770 vmfp(), ((ActRec*)vmfp())->m_func->name()->data(), vmsp());
2771 s_writeLease.gremlinUnlock();
2772 // Keep dispatching until we end up somewhere the translator
2773 // recognizes, or we luck out and the leaseholder exits.
2774 while (!start) {
2775 TRACE(2, "enterTC forwarding BB to interpreter\n");
2776 g_vmContext->m_pc = curUnit()->at(sk.offset());
2777 INC_TPC(interp_bb);
2778 g_vmContext->dispatchBB();
2779 sk = SrcKey(curFunc(), g_vmContext->getPC());
2780 start = getTranslation(&sk, true);
2782 ASSERT(start);
2783 ASSERT(isValidCodeAddress(start));
2784 tl_regState = REGSTATE_DIRTY;
2785 ASSERT(!s_writeLease.amOwner());
2786 curFunc()->validate();
2787 INC_TPC(enter_tc);
2789 // The asm volatile here is to force C++ to spill anything that
2790 // might be in a callee-saved register (aside from rbp).
2791 // enterTCHelper does not preserve these registers.
2792 asm volatile("" : : : "rbx","r12","r13","r14","r15");
2793 enterTCHelper(vmsp(), vmfp(), start, &info, vmFirstAR(),
2794 tl_targetCaches.base);
2795 asm volatile("" : : : "rbx","r12","r13","r14","r15");
2797 tl_regState = REGSTATE_CLEAN; // Careful: pc isn't sync'ed yet.
2798 // Debugging code: cede the write lease half the time.
2799 if (debug && (RuntimeOption::EvalJitStressLease)) {
2800 if (d.m_depth == 1 && (rand() % 2) == 0) {
2801 s_writeLease.gremlinLock();
2805 TRACE(4, "enterTC: %p fp%p sp%p } return\n", start,
2806 vmfp(), vmsp());
2807 TRACE(4, "enterTC: request(%s) args: %lx %lx %lx %lx %lx\n",
2808 reqName(requestNum),
2809 args[0], args[1], args[2], args[3], args[4]);
2810 ASSERT(vmfp() >= vmsp() - 1 || requestNum == REQ_EXIT);
2812 if (debug) {
2813 // Ensure that each case either returns, or drives start to a valid
2814 // value.
2815 start = TCA(0xbee5face);
2818 // The contract is that each case will either exit, by returning, or
2819 // set sk to the place where execution should resume, and optionally
2820 // set start to the hardware translation of the resumption point.
2822 // start and sk might be subtly different; i.e., there are cases where
2823 // start != NULL && start != getTranslation(sk). For instance,
2824 // REQ_BIND_CALL has not finished executing the OpCall when it gets
2825 // here, and has even done some work on its behalf. sk == OpFCall,
2826 // while start == the point in the TC that's "half-way through" the
2827 // Call instruction. If we punt to the interpreter, the interpreter
2828 // will redo some of the work that the translator has already done.
2829 INC_TPC(service_req);
2830 switch (requestNum) {
2831 case REQ_EXIT: {
2832 // fp is not valid anymore
2833 vmfp() = NULL;
2834 return;
2837 case REQ_BIND_CALL: {
2838 ReqBindCall* req = (ReqBindCall*)args[0];
2839 ActRec* calleeFrame = (ActRec*)args[1];
2840 TCA toSmash = req->m_toSmash;
2841 Func *func = const_cast<Func*>(calleeFrame->m_func);
2842 int nArgs = req->m_nArgs;
2843 bool isImmutable = req->m_isImmutable;
2844 TCA dest = tx64->funcPrologue(func, nArgs);
2845 TRACE(2, "enterTC: bindCall %s -> %p\n", func->name()->data(), dest);
2846 if (isImmutable) {
2847 // If we *know* we're calling the right function, don't bother
2848 // with the dynamic check of ar->m_func.
2849 dest = skipFuncCheck(dest);
2850 TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
2851 func->name()->data(), dest);
2853 LeaseHolder writer(s_writeLease, NO_ACQUIRE);
2854 if (dest && writer.acquire()) {
2855 TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
2856 smash(tx64->getAsmFor(toSmash), toSmash, dest);
2857 // sk: stale, but doesn't matter since we have a valid dest TCA.
2858 } else {
2859 // We need translator help; we're not at the callee yet, so
2860 // roll back. The prelude has done some work already, but it
2861 // should be safe to redo.
2862 TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
2863 toSmash, dest);
2864 sk = req->m_sourceInstr;
2866 start = dest;
2867 } break;
2869 case REQ_BIND_SIDE_EXIT:
2870 case REQ_BIND_JMP:
2871 case REQ_BIND_JMP_NO_IR:
2872 case REQ_BIND_ADDR: {
2873 TCA toSmash = (TCA)args[0];
2874 Offset off = args[1];
2875 sk = SrcKey(curFunc(), off);
2876 if (requestNum == REQ_BIND_SIDE_EXIT) {
2877 SKTRACE(3, sk, "side exit taken!\n");
2879 start = bindJmp(toSmash, sk, requestNum == REQ_BIND_ADDR,
2880 requestNum == REQ_BIND_JMP_NO_IR);
2881 } break;
2883 case REQ_BIND_JMPCC_FIRST: {
2884 TCA toSmash = (TCA)args[0];
2885 Offset offTaken = (Offset)args[1];
2886 Offset offNotTaken = (Offset)args[2];
2887 ConditionCode cc = ConditionCode(args[3]);
2888 bool taken = int64(args[4]) & 1;
2889 start = bindJmpccFirst(toSmash, offTaken, offNotTaken, taken, cc);
2890 // SrcKey: we basically need to emulate the fail
2891 sk = SrcKey(curFunc(), taken ? offTaken : offNotTaken);
2892 } break;
2894 case REQ_BIND_JMPCC_SECOND: {
2895 TCA toSmash = (TCA)args[0];
2896 Offset off = (Offset)args[1];
2897 ConditionCode cc = ConditionCode(args[2]);
2898 start = bindJmpccSecond(toSmash, off, cc);
2899 sk = SrcKey(curFunc(), off);
2900 } break;
2902 case REQ_BIND_REQUIRE: {
2903 ReqLitStaticArgs* rlsa = (ReqLitStaticArgs*)args[0];
2904 sk = SrcKey((Func*)args[1], (Offset)args[2]);
2905 start = getTranslation(&sk, true);
2906 if (start) {
2907 LeaseHolder writer(s_writeLease);
2908 if (writer) {
2909 SrcRec* sr = getSrcRec(sk);
2910 sr->chainFrom(a, IncomingBranch(&rlsa->m_pseudoMain));
2913 } break;
2915 case REQ_RETRANSLATE_NO_IR: {
2916 TCA toSmash = (TCA)args[0];
2917 sk = SrcKey(curFunc(), (Offset)args[1]);
2918 start = retranslateAndPatchNoIR(sk, true, toSmash);
2919 SKTRACE(2, sk, "retranslated (without IR) @%p\n", start);
2920 } break;
2922 case REQ_RETRANSLATE: {
2923 INC_TPC(retranslate);
2924 sk = SrcKey(curFunc(), (Offset)args[0]);
2925 start = retranslate(sk, true, RuntimeOption::EvalJitUseIR);
2926 SKTRACE(2, sk, "retranslated @%p\n", start);
2927 } break;
2929 case REQ_INTERPRET: {
2930 Offset off = args[0];
2931 int numInstrs = args[1];
2932 g_vmContext->m_pc = curUnit()->at(off);
2934 * We know the compilation unit has not changed; basic blocks do
2935 * not span files. I claim even exceptions do not violate this
2936 * axiom.
2938 ASSERT(numInstrs >= 0);
2939 ONTRACE(5, SrcKey(curFunc(), off).trace("interp: enter\n"));
2940 if (numInstrs) {
2941 s_perfCounters[tpc_interp_instr] += numInstrs;
2942 g_vmContext->dispatchN(numInstrs);
2943 } else {
2944 // numInstrs == 0 means it wants to dispatch until BB ends
2945 INC_TPC(interp_bb);
2946 g_vmContext->dispatchBB();
2948 SrcKey newSk(curFunc(), g_vmContext->getPC());
2949 SKTRACE(5, newSk, "interp: exit\n");
2950 sk = newSk;
2951 start = getTranslation(&newSk, true);
2952 } break;
2954 case REQ_POST_INTERP_RET: {
2955 // This is only responsible for the control-flow aspect of the Ret:
2956 // getting to the destination's translation, if any.
2957 ActRec* ar = (ActRec*)args[0];
2958 ActRec* caller = (ActRec*)args[1];
2959 ASSERT((Cell*) ar < vmsp()); // ar is already logically popped
2960 ASSERT((Cell*) caller > vmsp()); // caller is now active
2961 ASSERT((Cell*) caller == vmfp());
2962 Unit* destUnit = caller->m_func->unit();
2963 // Set PC so logging code in getTranslation doesn't get confused.
2964 vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
2965 SrcKey dest(caller->m_func, vmpc());
2966 sk = dest;
2967 start = getTranslation(&dest, true);
2968 TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
2969 ar->m_func->fullName()->data(),
2970 caller->m_func->fullName()->data());
2971 } break;
2973 case REQ_RESUME: {
2974 SrcKey dest(curFunc(), vmpc());
2975 sk = dest;
2976 start = getTranslation(&dest, true);
2977 } break;
2979 case REQ_STACK_OVERFLOW: {
2981 * we need to construct the pc of the fcall from the return
2982 * address (which will be after the fcall). Because fcall is
2983 * a variable length instruction, and because we sometimes
2984 * delete instructions from the instruction stream, we
2985 * need to use fpi regions to find the fcall.
2987 const FPIEnt* fe = curFunc()->findPrecedingFPI(
2988 curUnit()->offsetOf(vmpc()));
2989 vmpc() = curUnit()->at(fe->m_fcallOff);
2990 ASSERT(isFCallStar(*vmpc()));
2991 raise_error("Stack overflow");
2992 NOT_REACHED();
2996 NOT_REACHED();
2999 void TranslatorX64::resume(SrcKey sk) {
3000 enterTC(sk);
3004 * emitServiceReq --
3006 * Call a translator service co-routine. The code emitted here is
3007 * reenters the enterTC loop, invoking the requested service. Control
3008 * will be returned non-locally to the next logical instruction in
3009 * the TC.
3011 * Return value is a destination; we emit the bulky service
3012 * request code into astubs.
3016 TranslatorX64::emitServiceReqVA(bool align, ServiceRequest req, int numArgs,
3017 va_list args) {
3018 if (align) {
3019 moveToAlign(astubs);
3021 TCA retval = astubs.code.frontier;
3022 emitEagerVMRegSave(astubs, SaveFP);
3024 * Move args into appropriate regs.
3026 TRACE(3, "Emit Service Req %s(", reqName(req));
3027 for (int i = 0; i < numArgs; i++) {
3028 uint64_t argVal = va_arg(args, uint64_t);
3029 TRACE(3, "%p,", (void*)argVal);
3030 emitImmReg(astubs, argVal, serviceReqArgRegs[i]);
3032 TRACE(3, ")\n");
3033 emitImmReg(astubs, req, rdi);
3035 * Weird hand-shaking with enterTC: reverse-call a service routine.
3037 astubs. ret();
3038 recordBCInstr(OpServiceRequest, astubs, retval);
3039 translator_not_reached(astubs);
3040 return retval;
3044 TranslatorX64::emitServiceReq(ServiceRequest req, int numArgs, ...) {
3045 va_list args;
3046 va_start(args, numArgs);
3047 TCA retval = emitServiceReqVA(true, req, numArgs, args);
3048 va_end(args);
3049 return retval;
3053 TranslatorX64::emitServiceReq(bool align, ServiceRequest req, int numArgs,
3054 ...) {
3055 va_list args;
3056 va_start(args, numArgs);
3057 TCA retval = emitServiceReqVA(align, req, numArgs, args);
3058 va_end(args);
3059 return retval;
3063 TranslatorX64::emitTransCounterInc(X64Assembler& a) {
3064 TCA start = a.code.frontier;
3065 if (!isTransDBEnabled()) return start;
3066 uint64* counterAddr = getTransCounterAddr();
3068 a.mov_imm64_reg((uint64)counterAddr, rScratch);
3069 a.emitLockPrefix();
3070 a.inc_mem64(rScratch, 0);
3072 return start;
3075 void
3076 TranslatorX64::spillTo(DataType type, PhysReg reg, bool writeType,
3077 PhysReg base, int disp) {
3078 X64Assembler& a = *m_spillFillCode;
3079 // Zero out the count at the same time as writing the type.
3080 SpaceRecorder sr("_Spill", a);
3082 Stats::emitInc(a, Stats::Tx64_Spill);
3083 emitStoreTypedValue(a, type, reg, disp, base, writeType);
3086 void
3087 TranslatorX64::spill(const Location& loc, DataType type,
3088 PhysReg reg, bool writeType) {
3089 PhysReg base;
3090 int disp;
3091 locToRegDisp(loc, &base, &disp);
3092 spillTo(type, reg, writeType, base, disp);
3093 TRACE(2, "%s: (%s, %lld) -> v: %d(r%d) type%d\n",
3094 __func__,
3095 loc.spaceName(), loc.offset, int(disp + TVOFF(m_data)), base, type);
3098 void
3099 TranslatorX64::fill(const Location& loc, PhysReg reg) {
3100 SpaceRecorder sr("_Fill", *m_spillFillCode);
3101 PhysReg base;
3102 int disp;
3103 locToRegDisp(loc, &base, &disp);
3104 TRACE(2, "fill: (%s, %lld) -> reg %d\n",
3105 loc.spaceName(), loc.offset, reg);
3106 m_spillFillCode->load_reg64_disp_reg64(base, disp + TVOFF(m_data), reg);
3109 void TranslatorX64::fillByMov(PhysReg src, PhysReg dst) {
3110 SpaceRecorder sr("_FillMov", *m_spillFillCode);
3111 ASSERT(src != dst);
3112 m_spillFillCode->mov_reg64_reg64(src, dst);
3115 void
3116 TranslatorX64::loadImm(int64 immVal, PhysReg reg) {
3117 SpaceRecorder sr("_FillImm", *m_spillFillCode);
3118 TRACE(2, "loadImm: 0x%llx -> reg %d\n", immVal, reg);
3119 emitImmReg(*m_spillFillCode, immVal, reg);
3122 void
3123 TranslatorX64::poison(PhysReg dest) {
3124 static const bool poison = false;
3125 if (poison) {
3126 emitImmReg(*m_spillFillCode, 0xbadf00d105e5babe, dest);
3131 * Spill all dirty registers, mark all registers as 'free' in the
3132 * register file, and update rVmSp to point to the top of stack at
3133 * the end of the tracelet.
3135 void
3136 TranslatorX64::syncOutputs(const Tracelet& t) {
3137 syncOutputs(t.m_stackChange);
3141 * Same as above, except that it sets rVmSp to point to the top of
3142 * stack at the beginning of the specified instruction.
3144 void
3145 TranslatorX64::syncOutputs(const NormalizedInstruction& i) {
3146 syncOutputs(i.stackOff);
3149 void
3150 TranslatorX64::syncOutputs(int stackOff) {
3151 SpaceRecorder sr("_SyncOuts", a);
3152 TCA start = a.code.frontier;
3153 // Mark all stack locations above the top of stack as dead
3154 m_regMap.scrubStackEntries(stackOff);
3155 // Spill all dirty registers
3156 m_regMap.cleanAll();
3157 if (stackOff != 0) {
3158 TRACE(1, "syncOutputs: rVmSp + %d\n", stackOff);
3159 // t.stackChange is in negative Cells, not bytes.
3160 a. add_imm32_reg64(-cellsToBytes(stackOff), rVmSp);
3162 // All registers have been smashed for realz, yo
3163 m_regMap.smashRegs(kAllRegs);
3164 recordBCInstr(OpSyncOutputs, a, start);
3168 * getBinaryStackInputs --
3170 * Helper for a common pattern of instruction, where two items are popped
3171 * and one is pushed. The second item on the stack at the beginning of
3172 * the instruction is both a source and destination.
3174 static void
3175 getBinaryStackInputs(const RegAlloc& regmap, const NormalizedInstruction& i,
3176 PhysReg& rsrc, PhysReg& rsrcdest) {
3177 ASSERT(i.inputs.size() == 2);
3178 ASSERT(i.outStack && !i.outLocal);
3179 rsrcdest = regmap.getReg(i.outStack->location);
3180 rsrc = regmap.getReg(i.inputs[0]->location);
3181 ASSERT(regmap.getReg(i.inputs[1]->location) == rsrcdest);
3184 // emitBox --
3185 // Leave a boxed version of input in RAX. Destroys the register
3186 // mapping.
3187 void
3188 TranslatorX64::emitBox(DataType t, PhysReg rSrc) {
3189 if (false) { // typecheck
3190 RefData* retval = tvBoxHelper(KindOfArray, 0xdeadbeef01ul);
3191 (void)retval;
3193 // tvBoxHelper will set the refcount of the inner cell to 1
3194 // for us. Because the inner cell now holds a reference to the
3195 // original value, we don't need to perform a decRef.
3196 EMIT_CALL(a, tvBoxHelper, IMM(t), R(rSrc));
3199 // emitUnboxTopOfStack --
3200 // Unbox the known-to-be Variant on top of stack in place.
3201 void
3202 TranslatorX64::emitUnboxTopOfStack(const NormalizedInstruction& i) {
3203 const vector<DynLocation*>& inputs = i.inputs;
3205 ASSERT(inputs.size() == 1);
3206 ASSERT(i.outStack && !i.outLocal);
3207 ASSERT(inputs[0]->isStack());
3208 ASSERT(i.outStack && i.outStack->location == inputs[0]->location);
3209 DataType outType = inputs[0]->rtt.innerType();
3210 ASSERT(outType != KindOfInvalid);
3211 ASSERT(outType == i.outStack->outerType());
3212 PhysReg rSrc = getReg(inputs[0]->location);
3213 // Detach the register rSrc from the input location. We must
3214 // do this dance because the input and output location are the
3215 // same and we want to have separate registers for the input
3216 // and output.
3217 m_regMap.invalidate(inputs[0]->location);
3218 m_regMap.bind(rSrc, Location(), KindOfInvalid, RegInfo::SCRATCH);
3219 // This call to allocOutputRegs will allocate a new register
3220 // for the output location
3221 m_regMap.allocOutputRegs(i);
3222 PhysReg rDest = getReg(i.outStack->location);
3223 emitDeref(a, rSrc, rDest);
3224 emitIncRef(rDest, outType);
3225 // decRef the var on the evaluation stack
3226 emitDecRef(i, rSrc, KindOfRef);
3227 m_regMap.freeScratchReg(rSrc);
3230 // setOpOpToOpcodeOp --
3231 // The SetOp opcode space has nothing to do with the bytecode opcode
3232 // space. Reasonable people like it that way, so translate them here.
3233 static Opcode
3234 setOpOpToOpcodeOp(SetOpOp soo) {
3235 switch(soo) {
3236 #define SETOP_OP(_soo, _bc) case SetOp##_soo: return _bc;
3237 SETOP_OPS
3238 #undef SETOP_OP
3239 default: ASSERT(false);
3241 return -1;
3244 void
3245 TranslatorX64::binaryIntegerArith(const NormalizedInstruction& i,
3246 Opcode op,
3247 PhysReg srcReg,
3248 PhysReg srcDestReg) {
3249 switch (op) {
3250 #define CASEIMM(OpBc, x64op) \
3251 case OpBc: { \
3252 if (i.hasConstImm) { \
3253 a. x64op ## _imm64_reg64(i.constImm.u_I64A, srcDestReg); \
3254 } else { \
3255 a. x64op ## _reg64_reg64(srcReg, srcDestReg); \
3256 } } break;
3257 #define CASE(OpBc, x64op) \
3258 case OpBc: { \
3259 a. x64op ## _reg64_reg64(srcReg, srcDestReg); \
3260 } break;
3261 CASEIMM(OpAdd, add)
3262 CASEIMM(OpSub, sub)
3263 CASEIMM(OpBitAnd, and)
3264 CASEIMM(OpBitOr, or)
3265 CASEIMM(OpBitXor, xor)
3266 CASE(OpMul, imul)
3267 #undef CASE
3268 #undef CASEIMM
3270 default: {
3271 not_reached();
3276 void
3277 TranslatorX64::binaryArithCell(const NormalizedInstruction &i,
3278 Opcode op, const DynLocation& in1,
3279 const DynLocation& inout) {
3280 ASSERT(in1.rtt.isInt());
3281 ASSERT(inout.rtt.isInt());
3282 ASSERT(in1.outerType() != KindOfRef);
3283 ASSERT(in1.isStack());
3284 ASSERT(inout.outerType() != KindOfRef);
3285 ASSERT(inout.isStack());
3286 m_regMap.allocOutputRegs(i);
3287 PhysReg srcReg = m_regMap.getReg(in1.location);
3288 PhysReg srcDestReg = m_regMap.getReg(inout.location);
3289 binaryIntegerArith(i, op, srcReg, srcDestReg);
3292 void
3293 TranslatorX64::binaryArithLocal(const NormalizedInstruction &i,
3294 Opcode op,
3295 const DynLocation& in1,
3296 const DynLocation& in2,
3297 const DynLocation& out) {
3298 // The caller must guarantee that these conditions hold
3299 ASSERT(in1.rtt.isInt());
3300 ASSERT(in2.rtt.isInt());
3301 ASSERT(in1.outerType() != KindOfRef);
3302 ASSERT(in1.isStack());
3303 ASSERT(in2.isLocal());
3304 ASSERT(out.isStack());
3306 PhysReg srcReg = m_regMap.getReg(in1.location);
3307 PhysReg outReg = m_regMap.getReg(out.location);
3308 PhysReg localReg = m_regMap.getReg(in2.location);
3309 if (in2.outerType() != KindOfRef) {
3310 // The local is not a var, so we can operate directly on the
3311 // local's register. We will need to update outReg after the
3312 // operation.
3313 binaryIntegerArith(i, op, srcReg, localReg);
3314 // We operated directly on the local's register, so we need to update
3315 // outReg
3316 emitMovRegReg(localReg, outReg);
3317 } else {
3318 // The local is a var, so we have to read its value into outReg
3319 // on operate on that. We will need to write the result back
3320 // to the local after the operation.
3321 emitDeref(a, localReg, outReg);
3322 binaryIntegerArith(i, op, srcReg, outReg);
3323 // We operated on outReg, so we need to write the result back to the
3324 // local
3325 a. store_reg64_disp_reg64(outReg, 0, localReg);
3329 static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
3330 ASSERT(tl_regState == REGSTATE_DIRTY);
3331 tl_regState = REGSTATE_CLEAN;
3332 vmfp() = (Cell*)ar;
3333 vmsp() = sp;
3334 vmpc() = curUnit()->at(pcOff);
3335 ASSERT(vmsp() <= vmfp());
3338 #define O(opcode, imm, pusph, pop, flags) \
3340 * The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
3341 * calls into the interpreter, and then return a pointer to the
3342 * current ExecutionContext.
3343 */ \
3344 VMExecutionContext* \
3345 interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) { \
3346 interp_set_regs(ar, sp, pcOff); \
3347 SKTRACE(5, SrcKey(curFunc(), vmpc()), "%40s %p %p\n", \
3348 "interpOne" #opcode " before (fp,sp)", \
3349 vmfp(), vmsp()); \
3350 ASSERT(*vmpc() == Op ## opcode); \
3351 VMExecutionContext* ec = g_vmContext; \
3352 Stats::inc(Stats::Instr_InterpOne ## opcode); \
3353 INC_TPC(interp_one) \
3354 /* Correct for over-counting in TC-stats. */ \
3355 Stats::inc(Stats::Instr_TC, -1); \
3356 ec->op##opcode(); \
3358 * Only set regstate back to dirty if an exception is not
3359 * propagating. If an exception is throwing, regstate for this call
3360 * is actually still correct, and we don't have information in the
3361 * fixup map for interpOne calls anyway.
3362 */ \
3363 tl_regState = REGSTATE_DIRTY; \
3364 return ec; \
3367 OPCODES
3368 #undef O
3370 void* interpOneEntryPoints[] = {
3371 #define O(opcode, imm, pusph, pop, flags) \
3372 (void*)(interpOne ## opcode),
3373 OPCODES
3374 #undef O
3377 void TranslatorX64::fixupWork(VMExecutionContext* ec, ActRec* rbp) const {
3378 ASSERT(RuntimeOption::EvalJit);
3379 ActRec* nextAr = rbp;
3380 do {
3381 rbp = nextAr;
3382 FixupMap::VMRegs regs;
3383 TRACE(10, "considering frame %p, %p\n", rbp, (void*)rbp->m_savedRip);
3384 if (g_vmContext->m_stack.isValidAddress(rbp->m_savedRbp) &&
3385 m_fixupMap.getFrameRegs(rbp, &regs)) {
3386 TRACE(10, "fixup func %s fp %p sp %p pc %p\n",
3387 regs.m_fp->m_func->name()->data(),
3388 regs.m_fp, regs.m_sp, regs.m_pc);
3389 ec->m_fp = const_cast<ActRec*>(regs.m_fp);
3390 ec->m_pc = regs.m_pc;
3391 vmsp() = regs.m_sp;
3392 return;
3394 nextAr = (ActRec*)rbp->m_savedRbp;
3395 } while (rbp && rbp != nextAr);
3396 // OK, we've exhausted the entire actRec chain.
3397 // We are only invoking ::fixup() from contexts that were known
3398 // to be called out of the TC, so this cannot happen.
3399 NOT_REACHED();
3402 void TranslatorX64::fixup(VMExecutionContext* ec) const {
3403 // Start looking for fixup entries at the current (C++) frame. This
3404 // will walk the frames upward until we find a TC frame.
3405 ActRec* rbp;
3406 asm volatile("mov %%rbp, %0" : "=r"(rbp));
3407 fixupWork(ec, rbp);
3410 void
3411 TranslatorX64::syncWork() {
3412 ASSERT(tl_regState == REGSTATE_DIRTY);
3413 fixup(g_vmContext);
3414 tl_regState = REGSTATE_CLEAN;
3415 Stats::inc(Stats::TC_Sync);
3418 void
3419 TranslatorX64::emitInterpOne(const Tracelet& t,
3420 const NormalizedInstruction& ni) {
3421 // Write any dirty values to memory
3422 m_regMap.cleanAll();
3423 // Call into the appropriate interpOne method. Note that this call will
3424 // preserve the callee-saved registers including rVmFp and rVmSp.
3425 if (false) { /* typecheck */
3426 UNUSED VMExecutionContext* ec = interpOnePopC((ActRec*)vmfp(), vmsp(), 0);
3428 void* func = interpOneEntryPoints[ni.op()];
3429 TRACE(3, "ip %p of unit %p -> interpOne @%p\n", ni.pc(), ni.unit(), func);
3430 EMIT_CALL(a, func,
3431 R(rVmFp),
3432 RPLUS(rVmSp, -int32_t(cellsToBytes(ni.stackOff))),
3433 IMM(ni.source.offset()));
3434 // The interpreter may have written to memory, so we need to invalidate
3435 // all locations
3436 m_regMap.reset();
3437 // The interpOne method returned a pointer to the current
3438 // ExecutionContext in rax, so we can read the 'm_*' fields
3439 // by adding the appropriate offset to rax and dereferencing.
3441 // If this instruction ends the tracelet, we have some extra work to do.
3442 if (ni.breaksBB) {
3443 // Read the 'm_fp' and 'm_stack.m_top' fields into the rVmFp and
3444 // rVmSp registers.
3445 a. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
3446 rVmFp);
3447 a. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
3448 Stack::topOfStackOffset(), rVmSp);
3449 if (opcodeChangesPC(ni.op())) {
3450 // If interpreting this instruction can potentially set PC to point
3451 // to something other than the next instruction in the bytecode, so
3452 // we need to emit a service request to figure out where to go next
3453 TCA stubDest = emitServiceReq(REQ_RESUME, 0ull);
3454 a. jmp(stubDest);
3455 } else {
3456 // If this instruction always advances PC to the next instruction in
3457 // the bytecode, then we know what SrcKey to bind to
3458 emitBindJmp(nextSrcKey(t, ni));
3463 bool isContextFixed() {
3464 // Translations for pseudomains don't have a fixed context class
3465 return !curFunc()->isPseudoMain();
3468 // could be static but used in hopt/codegen.cpp
3469 void raiseUndefVariable(StringData* nm) {
3470 raise_notice(Strings::UNDEFINED_VARIABLE, nm->data());
3471 // FIXME: do we need to decref the string if an exception is
3472 // propagating?
3473 if (nm->decRefCount() == 0) { nm->release(); }
3476 static TXFlags
3477 planBinaryArithOp(const NormalizedInstruction& i) {
3478 ASSERT(i.inputs.size() == 2);
3479 return nativePlan(i.inputs[0]->isInt() && i.inputs[1]->isInt());
3482 void
3483 TranslatorX64::analyzeBinaryArithOp(Tracelet& t, NormalizedInstruction& i) {
3484 i.m_txFlags = planBinaryArithOp(i);
3487 void
3488 TranslatorX64::translateBinaryArithOp(const Tracelet& t,
3489 const NormalizedInstruction& i) {
3490 const Opcode op = i.op();
3491 ASSERT(op == OpSub || op == OpMul || op == OpBitAnd ||
3492 op == OpBitOr || op == OpBitXor);
3493 ASSERT(planBinaryArithOp(i));
3494 ASSERT(i.inputs.size() == 2);
3496 binaryArithCell(i, op, *i.inputs[0], *i.outStack);
3499 static inline bool sameDataTypes(DataType t1, DataType t2) {
3500 return TypeConstraint::equivDataTypes(t1, t2);
3503 static TXFlags
3504 planSameOp_SameTypes(const NormalizedInstruction& i) {
3505 ASSERT(i.inputs.size() == 2);
3506 const RuntimeType& left = i.inputs[0]->rtt;
3507 const RuntimeType& right = i.inputs[1]->rtt;
3508 DataType leftType = left.outerType();
3509 DataType rightType = right.outerType();
3510 return nativePlan(sameDataTypes(leftType, rightType) &&
3511 (left.isNull() || leftType == KindOfBoolean ||
3512 left.isInt() || left.isString()));
3515 static TXFlags
3516 planSameOp_DifferentTypes(const NormalizedInstruction& i) {
3517 ASSERT(i.inputs.size() == 2);
3518 DataType leftType = i.inputs[0]->outerType();
3519 DataType rightType = i.inputs[1]->outerType();
3520 if (!sameDataTypes(leftType, rightType)) {
3521 if (IS_REFCOUNTED_TYPE(leftType) || IS_REFCOUNTED_TYPE(rightType)) {
3522 // For dissimilar datatypes, we might call out to handle a refcount.
3523 return Supported;
3525 return Native;
3527 return Interp;
3530 void
3531 TranslatorX64::analyzeSameOp(Tracelet& t, NormalizedInstruction& i) {
3532 ASSERT(!(planSameOp_SameTypes(i) && planSameOp_DifferentTypes(i)));
3533 i.m_txFlags = TXFlags(planSameOp_SameTypes(i) | planSameOp_DifferentTypes(i));
3534 i.manuallyAllocInputs = true;
3537 void
3538 TranslatorX64::translateSameOp(const Tracelet& t,
3539 const NormalizedInstruction& i) {
3540 const Opcode op = i.op();
3541 ASSERT(op == OpSame || op == OpNSame);
3542 const vector<DynLocation*>& inputs = i.inputs;
3543 bool instrNeg = (op == OpNSame);
3544 ASSERT(inputs.size() == 2);
3545 ASSERT(i.outStack && !i.outLocal);
3546 DataType leftType = i.inputs[0]->outerType();
3547 DataType rightType DEBUG_ONLY = i.inputs[1]->outerType();
3548 ASSERT(leftType != KindOfRef);
3549 ASSERT(rightType != KindOfRef);
3551 if (planSameOp_DifferentTypes(i)) {
3552 // Some easy cases: when the valueTypes do not match,
3553 // NSame -> true and Same -> false.
3554 SKTRACE(1, i.source, "different types %d %d\n",
3555 leftType, rightType);
3556 emitDecRefInput(a, i, 0);
3557 emitDecRefInput(a, i, 1);
3558 m_regMap.allocOutputRegs(i);
3559 emitImmReg(a, instrNeg, getReg(i.outStack->location));
3560 return; // Done
3563 ASSERT(planSameOp_SameTypes(i));
3565 if (IS_NULL_TYPE(leftType)) {
3566 m_regMap.allocOutputRegs(i);
3567 // null === null is always true
3568 SKTRACE(2, i.source, "straightening null/null comparison\n");
3569 emitImmReg(a, !instrNeg, getReg(i.outStack->location));
3570 return; // Done
3572 if (IS_STRING_TYPE(leftType)) {
3573 int args[2];
3574 args[0] = 0;
3575 args[1] = 1;
3576 allocInputsForCall(i, args);
3577 EMIT_CALL(a, same_str_str,
3578 V(inputs[0]->location),
3579 V(inputs[1]->location));
3580 if (instrNeg) {
3581 a. xor_imm32_reg32(1, rax);
3583 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3584 RegInfo::DIRTY);
3585 return; // Done
3587 m_regMap.allocInputRegs(i);
3588 PhysReg src, srcdest;
3589 getBinaryStackInputs(m_regMap, i, src, srcdest);
3590 m_regMap.allocOutputRegs(i);
3591 ASSERT(getReg(i.outStack->location) == srcdest);
3592 a. cmp_reg64_reg64(src, srcdest);
3593 if (op == OpSame) {
3594 a. sete(srcdest);
3595 } else {
3596 a. setne(srcdest);
3598 a. mov_reg8_reg64_unsigned(srcdest, srcdest);
3601 static bool
3602 trivialEquivType(const RuntimeType& rtt) {
3603 DataType t = rtt.valueType();
3604 return t == KindOfUninit || t == KindOfNull || t == KindOfBoolean ||
3605 rtt.isInt() || rtt.isString();
3608 static void
3609 emitConvertToBool(X64Assembler &a, PhysReg src, PhysReg dest, bool instrNeg) {
3610 a. test_reg64_reg64(src, src);
3611 if (instrNeg) {
3612 a. setz(dest);
3613 } else {
3614 a. setnz(dest);
3616 a. mov_reg8_reg64_unsigned(dest, dest);
3619 void
3620 TranslatorX64::analyzeEqOp(Tracelet& t, NormalizedInstruction& i) {
3621 ASSERT(i.inputs.size() == 2);
3622 RuntimeType &lt = i.inputs[0]->rtt;
3623 RuntimeType &rt = i.inputs[1]->rtt;
3624 i.m_txFlags = nativePlan(trivialEquivType(lt) &&
3625 trivialEquivType(rt));
3626 if (i.isNative() &&
3627 IS_NULL_TYPE(lt.outerType()) &&
3628 IS_NULL_TYPE(rt.outerType())) {
3629 i.manuallyAllocInputs = true;
3633 void
3634 TranslatorX64::translateEqOp(const Tracelet& t,
3635 const NormalizedInstruction& i) {
3636 const Opcode op = i.op();
3637 ASSERT(op == OpEq || op == OpNeq);
3638 ASSERT(i.isNative());
3639 const vector<DynLocation*>& inputs = i.inputs;
3640 bool instrNeg = (op == OpNeq);
3641 ASSERT(inputs.size() == 2);
3642 ASSERT(i.outStack && !i.outLocal);
3643 DataType leftType = i.inputs[0]->outerType();
3644 DataType rightType = i.inputs[1]->outerType();
3645 ASSERT(leftType != KindOfRef);
3646 ASSERT(rightType != KindOfRef);
3648 ConditionCode ccBranch = CC_E;
3649 if (instrNeg) ccBranch = ccNegate(ccBranch);
3651 // Inputless case.
3652 if (IS_NULL_TYPE(leftType) && IS_NULL_TYPE(rightType)) {
3653 ASSERT(i.manuallyAllocInputs);
3654 // null == null is always true
3655 bool result = !instrNeg;
3656 SKTRACE(2, i.source, "straightening null/null comparison\n");
3657 if (i.changesPC) {
3658 fuseBranchAfterStaticBool(t, i, result);
3659 } else {
3660 m_regMap.allocOutputRegs(i);
3661 emitImmReg(a, result, getReg(i.outStack->location));
3663 return; // Done
3666 if (IS_STRING_TYPE(leftType) || IS_STRING_TYPE(rightType)) {
3667 void* fptr = NULL;
3668 bool leftIsString = false;
3669 bool eqNullStr = false;
3670 switch (leftType) {
3671 STRINGCASE(): {
3672 leftIsString = true;
3673 switch (rightType) {
3674 STRINGCASE(): fptr = (void*)eq_str_str; break;
3675 case KindOfInt64: fptr = (void*)eq_int_str; break;
3676 case KindOfBoolean: fptr = (void*)eq_bool_str; break;
3677 NULLCASE(): fptr = (void*)eq_null_str; eqNullStr = true; break;
3678 default: ASSERT(false); break;
3680 } break;
3681 case KindOfInt64: fptr = (void*)eq_int_str; break;
3682 case KindOfBoolean: fptr = (void*)eq_bool_str; break;
3683 NULLCASE(): fptr = (void*)eq_null_str; eqNullStr = true; break;
3684 default: ASSERT(false); break;
3686 if (eqNullStr) {
3687 ASSERT(fptr == (void*)eq_null_str);
3688 EMIT_CALL(a, fptr,
3689 V(inputs[leftIsString ? 0 : 1]->location));
3690 } else {
3691 ASSERT(fptr != NULL);
3692 EMIT_CALL(a, fptr,
3693 V(inputs[leftIsString ? 1 : 0]->location),
3694 V(inputs[leftIsString ? 0 : 1]->location));
3696 if (i.changesPC) {
3697 fuseBranchSync(t, i);
3698 a. test_imm32_reg32(1, rax);
3699 fuseBranchAfterBool(t, i, ccNegate(ccBranch));
3700 return;
3702 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3703 RegInfo::DIRTY);
3704 if (instrNeg) {
3705 a. xor_imm32_reg32(1, rax);
3707 return;
3710 m_regMap.allocOutputRegs(i);
3711 PhysReg src, srcdest;
3712 getBinaryStackInputs(m_regMap, i, src, srcdest);
3713 ASSERT(getReg(i.outStack->location) == srcdest);
3714 if (i.changesPC) {
3715 fuseBranchSync(t, i);
3717 if (IS_NULL_TYPE(leftType) || IS_NULL_TYPE(rightType)) {
3718 if (IS_NULL_TYPE(leftType)) {
3719 a. test_reg64_reg64(srcdest, srcdest);
3720 } else {
3721 ASSERT(IS_NULL_TYPE(rightType));
3722 a. test_reg64_reg64(src, src);
3724 } else if (leftType == KindOfBoolean ||
3725 rightType == KindOfBoolean) {
3726 // OK to destroy src and srcdest in-place; their stack locations are
3727 // blown away by this instruction.
3728 if (leftType != KindOfBoolean)
3729 emitConvertToBool(a, src, src, false);
3730 if (rightType != KindOfBoolean)
3731 emitConvertToBool(a, srcdest, srcdest, false);
3732 a. cmp_reg64_reg64(src, srcdest);
3733 } else {
3734 a. cmp_reg64_reg64(src, srcdest);
3736 if (i.changesPC) {
3737 fuseBranchAfterBool(t, i, ccBranch);
3738 return;
3740 if (instrNeg) {
3741 a. setnz (srcdest);
3742 } else {
3743 a. setz (srcdest);
3745 a. mov_reg8_reg64_unsigned(srcdest, srcdest);
3748 void
3749 TranslatorX64::analyzeLtGtOp(Tracelet& t, NormalizedInstruction& i) {
3750 ASSERT(i.inputs.size() == 2);
3751 const RuntimeType& left = i.inputs[0]->rtt;
3752 DataType leftType = left.outerType();
3753 DataType rightType = i.inputs[1]->outerType();
3754 i.m_txFlags = nativePlan(sameDataTypes(leftType, rightType) &&
3755 (left.isNull() ||
3756 leftType == KindOfBoolean ||
3757 left.isInt()));
3758 if (i.isNative() && IS_NULL_TYPE(left.outerType())) {
3759 // No inputs. w00t.
3760 i.manuallyAllocInputs = true;
3764 void
3765 TranslatorX64::translateLtGtOp(const Tracelet& t,
3766 const NormalizedInstruction& i) {
3767 const Opcode op = i.op();
3768 ASSERT(op == OpLt || op == OpLte || op == OpGt || op == OpGte);
3769 ASSERT(i.inputs.size() == 2);
3770 ASSERT(i.outStack && !i.outLocal);
3771 ASSERT(i.inputs[0]->outerType() != KindOfRef);
3772 ASSERT(i.inputs[1]->outerType() != KindOfRef);
3773 ASSERT(i.isNative());
3775 bool fEquals = (op == OpLte || op == OpGte);
3776 bool fLessThan = (op == OpLt || op == OpLte);
3778 m_regMap.allocOutputRegs(i);
3779 if (IS_NULL_TYPE(i.inputs[0]->outerType())) {
3780 ASSERT(IS_NULL_TYPE(i.inputs[1]->outerType()));
3781 // null < null is always false, null <= null is always true
3782 SKTRACE(2, i.source, "straightening null/null comparison\n");
3783 PhysReg rOut = getReg(i.outStack->location);
3784 bool resultIsTrue = (op == OpLte || op == OpGte);
3785 if (i.changesPC) {
3786 fuseBranchAfterStaticBool(t, i, resultIsTrue);
3787 } else {
3788 emitImmReg(a, resultIsTrue, rOut);
3790 return;
3792 PhysReg src, srcdest;
3793 getBinaryStackInputs(m_regMap, i, src, srcdest);
3794 ASSERT(getReg(i.outStack->location) == srcdest);
3795 if (i.changesPC) {
3796 fuseBranchSync(t, i);
3798 a. cmp_reg64_reg64(src, srcdest);
3799 static const ConditionCode opToCc[2][2] = {
3800 // !fEquals fEquals
3801 { CC_G, CC_GE }, // !fLessThan
3802 { CC_L, CC_LE }, // fLessThan
3804 ConditionCode cc = opToCc[fLessThan][fEquals];
3805 if (i.changesPC) {
3806 // Fuse the coming branch.
3807 fuseBranchAfterBool(t, i, cc);
3808 return;
3810 a. setcc(cc, srcdest);
3811 a. mov_reg8_reg64_unsigned(srcdest, srcdest);
3814 static TXFlags
3815 planUnaryBooleanOp(const NormalizedInstruction& i) {
3816 ASSERT(i.inputs.size() == 1);
3817 RuntimeType& rtt = i.inputs[0]->rtt;
3818 DataType inType = rtt.valueType();
3819 if (inType == KindOfArray) {
3820 return Supported;
3822 if (rtt.isString()) {
3823 return Simple;
3825 return nativePlan(rtt.isNull() ||
3826 inType == KindOfBoolean || rtt.isInt());
3829 void
3830 TranslatorX64::analyzeUnaryBooleanOp(Tracelet& t, NormalizedInstruction& i) {
3831 i.m_txFlags = planUnaryBooleanOp(i);
3834 void
3835 TranslatorX64::translateUnaryBooleanOp(const Tracelet& t,
3836 const NormalizedInstruction& i) {
3837 const Opcode op = i.op();
3838 ASSERT(op == OpCastBool || op == OpEmptyL);
3839 const vector<DynLocation*>& inputs = i.inputs;
3840 ASSERT(inputs.size() == 1);
3841 ASSERT(i.outStack && !i.outLocal);
3842 bool instrNeg = (op == OpEmptyL);
3843 DataType inType = inputs[0]->valueType();
3844 const Location& inLoc = inputs[0]->location;
3845 bool boxedForm = (inputs[0]->outerType() == KindOfRef);
3848 switch (inType) {
3849 NULLCASE(): {
3850 m_regMap.allocOutputRegs(i);
3851 PhysReg outReg = getReg(i.outStack->location);
3852 emitImmReg(a, instrNeg, outReg);
3853 ASSERT(i.isNative());
3854 } break;
3855 case KindOfBoolean: {
3856 if (op == OpCastBool) {
3857 // Casting bool to bool is a nop. CastBool's input must be
3858 // a cell on the stack as per the bytecode specification.
3859 ASSERT(inputs[0]->isStack());
3860 ASSERT(inputs[0]->outerType() != KindOfRef);
3861 ASSERT(inputs[0]->location.space == Location::Stack);
3862 ASSERT(i.isNative());
3863 break;
3865 m_regMap.allocOutputRegs(i);
3866 PhysReg reg = getReg(inLoc);
3867 PhysReg outReg = getReg(i.outStack->location);
3868 if (boxedForm) {
3869 emitDeref(a, reg, outReg);
3870 } else {
3871 emitMovRegReg(reg, outReg);
3873 if (instrNeg) {
3874 a. xor_imm32_reg32(1, outReg);
3876 } break;
3877 case KindOfInt64: {
3878 m_regMap.allocOutputRegs(i);
3879 PhysReg reg = getReg(inLoc);
3880 PhysReg outReg = getReg(i.outStack->location);
3881 ScratchReg scratch(m_regMap);
3882 if (boxedForm) {
3883 emitDeref(a, reg, *scratch);
3884 emitConvertToBool(a, *scratch, outReg, instrNeg);
3885 } else {
3886 emitConvertToBool(a, reg, outReg, instrNeg);
3888 } break;
3889 STRINGCASE():
3890 case KindOfArray: {
3891 bool doDecRef = (inputs[0]->isStack());
3892 void* fptr = IS_STRING_TYPE(inType) ?
3893 (doDecRef ? (void*)str_to_bool : (void*)str0_to_bool) :
3894 (doDecRef ? (void*)arr_to_bool : (void*)arr0_to_bool);
3895 if (boxedForm) {
3896 EMIT_CALL(a, fptr, DEREF(inLoc));
3897 } else {
3898 EMIT_CALL(a, fptr, V(inLoc));
3900 if (!IS_STRING_TYPE(inType)) {
3901 recordReentrantCall(i);
3903 if (instrNeg) {
3904 a. xor_imm32_reg32(1, rax);
3906 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3907 RegInfo::DIRTY);
3908 } break;
3909 default: {
3910 ASSERT(false);
3911 } break;
3915 void
3916 TranslatorX64::analyzeBranchOp(Tracelet& t, NormalizedInstruction& i) {
3917 i.m_txFlags = Supported;
3920 // Helper for decoding dests of branch-like instructions at the end of
3921 // a basic block.
3922 static void branchDests(const Tracelet& t,
3923 const NormalizedInstruction& i,
3924 SrcKey* outTaken, SrcKey* outNotTaken,
3925 int immIdx = 0) {
3926 *outNotTaken = nextSrcKey(t, i);
3927 int dest = i.imm[immIdx].u_BA;
3928 *outTaken = SrcKey(curFunc(), i.offset() + dest);
3931 void TranslatorX64::branchWithFlagsSet(const Tracelet& t,
3932 const NormalizedInstruction& i,
3933 ConditionCode cc) {
3934 ASSERT(i.op() == OpJmpNZ || i.op() == OpJmpZ);
3935 // not_taken
3936 SrcKey taken, notTaken;
3937 branchDests(t, i, &taken, &notTaken);
3938 TRACE(3, "branchWithFlagsSet %d %d cc%02x jmp%sz\n",
3939 taken.offset(), notTaken.offset(), cc,
3940 i.isJmpNZ() ? "n" : "");
3941 emitCondJmp(taken, notTaken, cc);
3944 void TranslatorX64::fuseBranchAfterStaticBool(const Tracelet& t,
3945 const NormalizedInstruction& i,
3946 bool resultIsTrue) {
3947 ASSERT(i.breaksBB);
3948 ASSERT(i.next);
3949 NormalizedInstruction &nexti = *i.next;
3950 fuseBranchSync(t, i);
3951 bool isTaken = (resultIsTrue == nexti.isJmpNZ());
3952 SrcKey taken, notTaken;
3953 branchDests(t, nexti, &taken, &notTaken);
3954 if (isTaken) {
3955 emitBindJmp(taken);
3956 } else {
3957 emitBindJmp(notTaken);
3961 void TranslatorX64::fuseBranchSync(const Tracelet& t,
3962 const NormalizedInstruction& i) {
3963 // Don't bother sync'ing the output of this instruction.
3964 m_regMap.scrubStackEntries(i.outStack->location.offset);
3965 syncOutputs(t);
3968 void TranslatorX64::fuseBranchAfterBool(const Tracelet& t,
3969 const NormalizedInstruction& i,
3970 ConditionCode cc) {
3971 ASSERT(i.breaksBB);
3972 ASSERT(i.next);
3973 NormalizedInstruction &nexti = *i.next;
3974 if (!i.next->isJmpNZ()) cc = ccNegate(cc);
3975 branchWithFlagsSet(t, nexti, cc);
3978 void
3979 TranslatorX64::translateBranchOp(const Tracelet& t,
3980 const NormalizedInstruction& i) {
3981 DEBUG_ONLY const Opcode op = i.op();
3982 ASSERT(op == OpJmpZ || op == OpJmpNZ);
3984 bool isZ = !i.isJmpNZ();
3985 ASSERT(i.inputs.size() == 1);
3986 ASSERT(!i.outStack && !i.outLocal);
3987 m_regMap.allocOutputRegs(i);
3988 const DynLocation& in = *i.inputs[0];
3989 const RuntimeType& rtt = in.rtt;
3990 const Location& inLoc = in.location;
3991 DataType inputType = rtt.outerType();
3992 PhysReg src = getReg(inLoc);
3994 * Careful. We're operating with intimate knowledge of the
3995 * constraints of the register allocator from here out.
3997 if (rtt.isString() || inputType == KindOfArray) {
3998 // str_to_bool and arr_to_bool will decRef for us
3999 void* fptr = IS_STRING_TYPE(inputType) ? (void*)str_to_bool :
4000 (void*)arr_to_bool;
4001 EMIT_CALL(a, fptr, V(inLoc));
4002 src = rax;
4003 ScratchReg sr(m_regMap, rax);
4004 syncOutputs(t);
4005 } else if (inputType != KindOfUninit &&
4006 inputType != KindOfNull &&
4007 inputType != KindOfBoolean &&
4008 !rtt.isInt()) {
4009 // input might be in-flight
4010 m_regMap.cleanLoc(inLoc);
4011 // Cast to a bool.
4012 if (false) {
4013 TypedValue *tv = NULL;
4014 int64 ret = tv_to_bool(tv);
4015 if (ret) {
4016 printf("zoot");
4019 TRACE(2, Trace::prettyNode("tv_to_bool", inLoc) + string("\n"));
4020 // tv_to_bool will decRef for us if appropriate
4021 EMIT_CALL(a, tv_to_bool, A(inLoc));
4022 recordReentrantCall(i);
4023 src = rax;
4024 ScratchReg sr(m_regMap, rax);
4025 syncOutputs(t);
4026 } else {
4027 syncOutputs(t);
4030 // not_taken
4031 SrcKey taken, notTaken;
4032 branchDests(t, i, &taken, &notTaken);
4034 // Since null always evaluates to false, we can emit an
4035 // unconditional jump. OpJmpNZ will never take the branch
4036 // while OpJmpZ will always take the branch.
4037 if (IS_NULL_TYPE(inputType)) {
4038 TRACE(1, "branch on Null -> always Z\n");
4039 emitBindJmp(isZ ? taken : notTaken);
4040 return;
4042 a. test_reg64_reg64(src, src);
4043 branchWithFlagsSet(t, i, isZ ? CC_Z : CC_NZ);
4046 void
4047 TranslatorX64::analyzeCGetL(Tracelet& t, NormalizedInstruction& i) {
4048 ASSERT(i.inputs.size() == 1);
4049 const RuntimeType& type = i.inputs[0]->rtt;
4050 i.m_txFlags = type.isUninit() ? Supported : Native;
4053 void
4054 TranslatorX64::translateCGetL(const Tracelet& t,
4055 const NormalizedInstruction& i) {
4056 const DEBUG_ONLY Opcode op = i.op();
4057 ASSERT(op == OpFPassL || OpCGetL);
4058 const vector<DynLocation*>& inputs = i.inputs;
4059 ASSERT(inputs.size() == 1);
4060 ASSERT(inputs[0]->isLocal());
4061 DataType outType = i.inputs[0]->valueType();
4062 ASSERT(outType != KindOfInvalid);
4064 // Check for use of an undefined local.
4065 if (inputs[0]->rtt.isUninit()) {
4066 ASSERT(!i.outStack || i.outStack->outerType() == KindOfNull);
4067 outType = KindOfNull;
4068 ASSERT(inputs[0]->location.offset < curFunc()->numLocals());
4069 const StringData* name = local_name(inputs[0]->location);
4070 EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
4071 recordReentrantCall(i);
4072 if (i.outStack) {
4073 m_regMap.allocOutputRegs(i);
4075 return;
4079 * we can merge a CGetL with a following InstanceOfD
4080 * to avoid the incRef/decRef on the result (see
4081 * analyzeSecondPass).
4083 * outStack will be clear in that case.
4085 if (!i.outStack) return;
4086 ASSERT(outType == i.outStack->outerType());
4087 m_regMap.allocOutputRegs(i);
4088 if (IS_NULL_TYPE(outType)) return;
4089 PhysReg dest = getReg(i.outStack->location);
4091 if (i.manuallyAllocInputs && !m_regMap.hasReg(inputs[0]->location)) {
4092 fill(inputs[0]->location, dest);
4093 } else {
4094 PhysReg localReg = getReg(inputs[0]->location);
4095 emitMovRegReg(localReg, dest);
4097 if (inputs[0]->isVariant()) {
4098 emitDeref(a, dest, dest);
4100 ASSERT(outType != KindOfStaticString);
4101 emitIncRef(dest, outType);
4104 void
4105 TranslatorX64::analyzeCGetL2(Tracelet& t,
4106 NormalizedInstruction& ni) {
4107 const int locIdx = 1;
4108 ASSERT(ni.inputs.size() == 2);
4109 ni.m_txFlags = ni.inputs[locIdx]->rtt.isUninit() ? Supported : Native;
4112 void
4113 TranslatorX64::translateCGetL2(const Tracelet& t,
4114 const NormalizedInstruction& ni) {
4115 const int stackIdx = 0;
4116 const int locIdx = 1;
4118 // Note: even if it's an undefined local we need to move a few
4119 // values around to have outputs end up in the right place.
4120 const bool undefinedLocal = ni.inputs[locIdx]->rtt.isUninit();
4122 if (undefinedLocal) {
4123 ASSERT(ni.outStack->valueType() == KindOfNull);
4124 ASSERT(ni.inputs[locIdx]->location.offset < curFunc()->numLocals());
4125 const StringData* name = local_name(ni.inputs[locIdx]->location);
4127 EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
4128 recordReentrantCall(ni);
4130 m_regMap.allocInputRegs(ni);
4133 m_regMap.allocOutputRegs(ni);
4134 const PhysReg stackIn = getReg(ni.inputs[stackIdx]->location);
4135 const PhysReg localIn = getReg(ni.inputs[locIdx]->location);
4136 const PhysReg stackOut = getReg(ni.outStack2->location);
4137 ASSERT(ni.inputs[stackIdx]->location.isStack());
4138 ASSERT(ni.inputs[locIdx]->location.isLocal());
4141 * These registers overlap a bit, so we can swap a few bindings to
4142 * avoid a move.
4144 ASSERT(stackIn == getReg(ni.outStack->location) && localIn != stackOut);
4145 m_regMap.swapRegisters(stackIn, stackOut);
4146 const PhysReg cellOut = getReg(ni.outStack->location);
4147 ASSERT(cellOut != stackIn);
4148 if (ni.inputs[locIdx]->isVariant()) {
4149 emitDeref(a, localIn, cellOut);
4150 } else if (!undefinedLocal) {
4151 emitMovRegReg(localIn, cellOut);
4153 emitIncRef(cellOut, ni.inputs[locIdx]->valueType());
4156 void
4157 TranslatorX64::analyzeVGetL(Tracelet& t,
4158 NormalizedInstruction& i) {
4159 i.m_txFlags = Native;
4162 void
4163 TranslatorX64::translateVGetL(const Tracelet& t,
4164 const NormalizedInstruction& i) {
4165 const DEBUG_ONLY Opcode op = i.op();
4166 ASSERT(op == OpVGetL || op == OpFPassL);
4167 const vector<DynLocation*>& inputs = i.inputs;
4168 ASSERT(inputs.size() == 1);
4169 ASSERT(i.outStack);
4170 ASSERT(inputs[0]->isLocal());
4171 ASSERT(i.outStack->rtt.outerType() == KindOfRef);
4173 PhysReg localReg = getReg(inputs[0]->location);
4174 PhysReg dest;
4175 if (inputs[0]->rtt.outerType() != KindOfRef) {
4176 emitBox(inputs[0]->rtt.outerType(), localReg);
4177 m_regMap.bind(rax, inputs[0]->location, KindOfRef,
4178 RegInfo::DIRTY);
4179 m_regMap.allocOutputRegs(i);
4180 dest = getReg(i.outStack->location);
4181 emitMovRegReg(rax, dest);
4182 } else {
4183 m_regMap.allocOutputRegs(i);
4184 dest = getReg(i.outStack->location);
4185 emitMovRegReg(localReg, dest);
4187 emitIncRef(dest, KindOfRef);
4190 void
4191 TranslatorX64::analyzeAssignToLocalOp(Tracelet& t,
4192 NormalizedInstruction& ni) {
4193 const int locIdx = 1;
4194 ni.m_txFlags = planHingesOnRefcounting(ni.inputs[locIdx]->outerType());
4197 void
4198 TranslatorX64::translateAssignToLocalOp(const Tracelet& t,
4199 const NormalizedInstruction& ni) {
4200 const int rhsIdx = 0;
4201 const int locIdx = 1;
4202 const Opcode op = ni.op();
4203 ASSERT(op == OpSetL || op == OpBindL);
4204 ASSERT(ni.inputs.size() == 2);
4205 ASSERT((op == OpBindL) ==
4206 (ni.inputs[rhsIdx]->outerType() == KindOfRef));
4208 ASSERT(!ni.outStack || ni.inputs[locIdx]->location != ni.outStack->location);
4209 ASSERT(ni.outLocal);
4210 ASSERT(ni.inputs[locIdx]->location == ni.outLocal->location);
4211 ASSERT(ni.inputs[rhsIdx]->isStack());
4213 m_regMap.allocOutputRegs(ni);
4214 const PhysReg rhsReg = getReg(ni.inputs[rhsIdx]->location);
4215 const PhysReg localReg = getReg(ni.outLocal->location);
4216 const DataType oldLocalType = ni.inputs[locIdx]->outerType();
4217 const DataType rhsType = ni.inputs[rhsIdx]->outerType();
4218 ASSERT(localReg != rhsReg);
4220 LazyScratchReg oldLocalReg(m_regMap);
4221 DataType decRefType;
4223 // For SetL, when the local is boxed, we need to change the
4224 // type/value of the inner cell. If we're doing BindL, we don't
4225 // want to affect the old inner cell in any case (except to decref
4226 // it).
4227 const bool affectInnerCell = op == OpSetL &&
4228 oldLocalType == KindOfRef;
4229 if (affectInnerCell) {
4230 ASSERT(rhsType != KindOfRef);
4232 oldLocalReg.alloc();
4233 emitDeref(a, localReg, *oldLocalReg);
4234 emitStoreTypedValue(a, rhsType, rhsReg, 0, localReg);
4235 decRefType = ni.inputs[locIdx]->rtt.innerType();
4236 } else {
4238 * Instead of emitting a mov, just swap the locations these two
4239 * registers are mapped to.
4241 * TODO: this might not be the best idea now that the register
4242 * allocator has some awareness about what is a local. (Maybe we
4243 * should just xchg_reg64_reg64.)
4245 m_regMap.swapRegisters(rhsReg, localReg);
4246 decRefType = oldLocalType;
4249 // If we're giving stack output, it's important to incref before
4250 // calling a possible destructor, since the destructor could have
4251 // access to the local if it is a var.
4252 if (ni.outStack) {
4253 emitIncRef(rhsReg, rhsType);
4254 } else {
4255 SKTRACE(3, ni.source, "hoisting Pop* into current instr\n");
4258 emitDecRef(ni, oldLocalReg.isAllocated() ? *oldLocalReg : localReg,
4259 decRefType);
4261 if (ni.outStack && !IS_NULL_TYPE(ni.outStack->outerType())) {
4262 PhysReg stackReg = getReg(ni.outStack->location);
4263 emitMovRegReg(rhsReg, stackReg);
4267 static void
4268 planPop(NormalizedInstruction& i) {
4269 if (i.prev && i.prev->outputPredicted) {
4270 i.prev->outputPredicted = false;
4271 i.inputs[0]->rtt = RuntimeType(KindOfInvalid);
4273 DataType type = i.inputs[0]->outerType();
4274 i.m_txFlags =
4275 (type == KindOfInvalid || IS_REFCOUNTED_TYPE(type)) ? Supported : Native;
4276 i.manuallyAllocInputs = true;
4279 void TranslatorX64::analyzePopC(Tracelet& t, NormalizedInstruction& i) {
4280 planPop(i);
4283 void TranslatorX64::analyzePopV(Tracelet& t, NormalizedInstruction& i) {
4284 planPop(i);
4287 void TranslatorX64::analyzePopR(Tracelet& t, NormalizedInstruction& i) {
4288 planPop(i);
4291 void
4292 TranslatorX64::translatePopC(const Tracelet& t,
4293 const NormalizedInstruction& i) {
4294 ASSERT(i.inputs.size() == 1);
4295 ASSERT(!i.outStack && !i.outLocal);
4296 if (i.inputs[0]->rtt.isVagueValue()) {
4297 PhysReg base;
4298 int disp;
4299 locToRegDisp(i.inputs[0]->location, &base, &disp);
4300 emitDecRefGeneric(i, base, disp);
4301 } else {
4302 emitDecRefInput(a, i, 0);
4306 void
4307 TranslatorX64::translatePopV(const Tracelet& t,
4308 const NormalizedInstruction& i) {
4309 ASSERT(i.inputs[0]->rtt.isVagueValue() ||
4310 i.inputs[0]->isVariant());
4311 translatePopC(t, i);
4314 void
4315 TranslatorX64::translatePopR(const Tracelet& t,
4316 const NormalizedInstruction& i) {
4317 translatePopC(t, i);
4320 void
4321 TranslatorX64::translateUnboxR(const Tracelet& t,
4322 const NormalizedInstruction& i) {
4323 ASSERT(!i.inputs[0]->rtt.isVagueValue());
4325 // If the value on the top of a stack is a var, unbox it and
4326 // leave it on the top of the stack.
4327 if (i.inputs[0]->isVariant()) {
4328 emitUnboxTopOfStack(i);
4332 void
4333 TranslatorX64::translateNull(const Tracelet& t,
4334 const NormalizedInstruction& i) {
4335 ASSERT(i.inputs.size() == 0);
4336 ASSERT(!i.outLocal);
4337 if (i.outStack) {
4338 ASSERT(i.outStack->outerType() == KindOfNull);
4340 // We have to mark the output register as dirty to ensure that
4341 // the type gets spilled at the end of the tracelet
4342 m_regMap.allocOutputRegs(i);
4344 /* nop */
4347 void
4348 TranslatorX64::translateTrue(const Tracelet& t,
4349 const NormalizedInstruction& i) {
4350 ASSERT(i.inputs.size() == 0);
4351 ASSERT(!i.outLocal);
4352 if (i.outStack) {
4353 m_regMap.allocOutputRegs(i);
4354 PhysReg rdest = getReg(i.outStack->location);
4355 emitImmReg(a, 1, rdest);
4359 void
4360 TranslatorX64::translateFalse(const Tracelet& t,
4361 const NormalizedInstruction& i) {
4362 ASSERT(i.inputs.size() == 0);
4363 ASSERT(!i.outLocal);
4364 if (i.outStack) {
4365 m_regMap.allocOutputRegs(i);
4366 PhysReg dest = getReg(i.outStack->location);
4367 emitImmReg(a, false, dest);
4371 void
4372 TranslatorX64::translateInt(const Tracelet& t,
4373 const NormalizedInstruction& i) {
4374 ASSERT(i.inputs.size() == 0);
4375 ASSERT(!i.outLocal);
4376 if (i.outStack) {
4377 ASSERT(i.outStack->isInt());
4378 m_regMap.allocOutputRegs(i);
4379 PhysReg dest = getReg(i.outStack->location);
4380 uint64_t srcImm = i.imm[0].u_I64A;
4381 emitImmReg(a, srcImm, dest);
4385 void
4386 TranslatorX64::translateString(const Tracelet& t,
4387 const NormalizedInstruction& i) {
4388 ASSERT(i.inputs.size() == 0);
4389 ASSERT(!i.outLocal);
4390 if (!i.outStack) return;
4391 ASSERT(Translator::typeIsString(i.outStack->outerType()));
4392 m_regMap.allocOutputRegs(i);
4393 PhysReg dest = getReg(i.outStack->location);
4394 uint64_t srcImm = (uintptr_t)curUnit()->lookupLitstrId(i.imm[0].u_SA);
4395 // XXX: can simplify the lookup here by just fishing it out of the
4396 // output's valueString().
4397 // We are guaranteed that the string is static, so we do not need to
4398 // increment the refcount
4399 ASSERT(((StringData*)srcImm)->isStatic());
4400 SKTRACE(2, i.source, "Litstr %d -> %p \"%s\"\n",
4401 i.imm[0].u_SA, (StringData*)srcImm,
4402 Util::escapeStringForCPP(((StringData*)srcImm)->data()).c_str());
4403 emitImmReg(a, srcImm, dest);
4406 void
4407 TranslatorX64::translateArray(const Tracelet& t,
4408 const NormalizedInstruction& i) {
4409 ASSERT(i.inputs.size() == 0);
4410 ASSERT(!i.outLocal);
4411 if (i.outStack) {
4412 ASSERT(i.outStack->outerType() == KindOfArray);
4413 m_regMap.allocOutputRegs(i);
4414 ArrayData* ad = curUnit()->lookupArrayId(i.imm[0].u_AA);
4415 PhysReg r = getReg(i.outStack->location);
4416 emitImmReg(a, uint64(ad), r);
4417 // We are guaranteed that the array is static, so we do not need to
4418 // increment the refcount
4419 ASSERT(ad->isStatic());
4423 void
4424 TranslatorX64::translateNewArray(const Tracelet& t,
4425 const NormalizedInstruction& i) {
4426 ASSERT(i.inputs.size() == 0);
4427 ASSERT(i.outStack && !i.outLocal);
4428 ASSERT(i.outStack->outerType() == KindOfArray);
4429 m_regMap.allocOutputRegs(i);
4430 PhysReg r = getReg(i.outStack->location);
4431 emitImmReg(a, uint64(HphpArray::GetStaticEmptyArray()), r);
4432 // We are guaranteed that the new array is static, so we do not need to
4433 // increment the refcount
4434 ASSERT(HphpArray::GetStaticEmptyArray()->isStatic());
4437 void
4438 TranslatorX64::analyzeNop(Tracelet& t, NormalizedInstruction& i) {
4439 i.m_txFlags = Native;
4442 void
4443 TranslatorX64::translateNop(const Tracelet& t,
4444 const NormalizedInstruction& i) {
4447 void
4448 TranslatorX64::analyzeAddElemC(Tracelet& t, NormalizedInstruction& i) {
4449 i.m_txFlags = supportedPlan(i.inputs[2]->outerType() == KindOfArray &&
4450 (i.inputs[1]->isInt() ||
4451 i.inputs[1]->isString()));
4454 void
4455 TranslatorX64::translateAddElemC(const Tracelet& t,
4456 const NormalizedInstruction& i) {
4457 ASSERT(i.outStack && !i.outLocal);
4458 ASSERT(i.inputs.size() >= 3);
4459 const DynLocation& arr = *i.inputs[2];
4460 const DynLocation& key = *i.inputs[1];
4461 const DynLocation& val = *i.inputs[0];
4462 ASSERT(!arr.isVariant()); // not handling variants.
4463 ASSERT(!key.isVariant());
4464 ASSERT(!val.isVariant());
4466 const Location& arrLoc = arr.location;
4467 const Location& keyLoc = key.location;
4468 const Location& valLoc = val.location;
4470 ASSERT(arrLoc.isStack());
4471 ASSERT(keyLoc.isStack());
4472 ASSERT(arrLoc.isStack());
4474 // If either the key or the rhs is not Int64, we will need to pass the
4475 // rhs by address, so we need to sync it back to memory
4476 if (!key.rtt.isInt() || !val.rtt.isInt()) {
4477 m_regMap.cleanLoc(valLoc);
4480 // The array_setm helpers will decRef any old value that is
4481 // overwritten if appropriate. If copy-on-write occurs, it will also
4482 // incRef the new array and decRef the old array for us. Finally,
4483 // some of the array_setm helpers will decRef the key if it is a
4484 // string (for cases where the key is not a local), while others do
4485 // not (for cases where the key is a local).
4486 void* fptr;
4487 if (key.rtt.isInt() && val.rtt.isInt()) {
4488 if (false) { // type-check
4489 TypedValue* cell = NULL;
4490 ArrayData* arr = NULL;
4491 ArrayData* ret = array_setm_ik1_iv(cell, arr, 12, 3);
4492 printf("%p", ret); // use ret
4494 // If the rhs is Int64, we can use a specialized helper
4495 EMIT_CALL(a, array_setm_ik1_iv,
4496 IMM(0),
4497 V(arrLoc),
4498 V(keyLoc),
4499 V(valLoc));
4500 recordReentrantCall(i);
4501 } else if (key.rtt.isInt() || key.rtt.isString()) {
4502 if (false) { // type-check
4503 TypedValue* cell = NULL;
4504 TypedValue* rhs = NULL;
4505 StringData* strkey = NULL;
4506 ArrayData* arr = NULL;
4507 ArrayData* ret;
4508 ret = array_setm_ik1_v0(cell, arr, 12, rhs);
4509 printf("%p", ret); // use ret
4510 ret = array_setm_sk1_v0(cell, arr, strkey, rhs);
4511 printf("%p", ret); // use ret
4513 // Otherwise, we pass the rhs by address
4514 fptr = key.rtt.isString() ? (void*)array_setm_sk1_v0 :
4515 (void*)array_setm_ik1_v0;
4516 EMIT_CALL(a, fptr,
4517 IMM(0),
4518 V(arrLoc),
4519 V(keyLoc),
4520 A(valLoc));
4521 recordReentrantCall(i);
4522 } else {
4523 ASSERT(false);
4525 // The array value may have changed, so we need to invalidate any
4526 // register we have associated with arrLoc
4527 m_regMap.invalidate(arrLoc);
4528 // The array_setm helper returns the up-to-date array pointer in rax.
4529 // Therefore, we can bind rax to arrLoc and mark it as dirty.
4530 m_regMap.bind(rax, arrLoc, KindOfArray, RegInfo::DIRTY);
4533 void
4534 TranslatorX64::analyzeAddNewElemC(Tracelet& t, NormalizedInstruction& i) {
4535 ASSERT(i.inputs.size() == 2);
4536 i.m_txFlags = supportedPlan(i.inputs[1]->outerType() == KindOfArray);
4539 void
4540 TranslatorX64::translateAddNewElemC(const Tracelet& t,
4541 const NormalizedInstruction& i) {
4542 ASSERT(i.inputs.size() == 2);
4543 ASSERT(i.outStack && !i.outLocal);
4544 ASSERT(i.inputs[0]->outerType() != KindOfRef);
4545 ASSERT(i.inputs[1]->outerType() != KindOfRef);
4546 ASSERT(i.inputs[0]->isStack());
4547 ASSERT(i.inputs[1]->isStack());
4549 Location arrLoc = i.inputs[1]->location;
4550 Location valLoc = i.inputs[0]->location;
4552 // We pass the rhs by address, so we need to sync it back to memory
4553 m_regMap.cleanLoc(valLoc);
4555 // The array_setm helpers will decRef any old value that is
4556 // overwritten if appropriate. If copy-on-write occurs, it will also
4557 // incRef the new array and decRef the old array for us. Finally,
4558 // some of the array_setm helpers will decRef the key if it is a
4559 // string (for cases where the key is not a local), while others do
4560 // not (for cases where the key is a local).
4561 if (false) { // type-check
4562 TypedValue* cell = NULL;
4563 TypedValue* rhs = NULL;
4564 ArrayData* arr = NULL;
4565 ArrayData* ret;
4566 ret = array_setm_wk1_v0(cell, arr, rhs);
4567 printf("%p", ret); // use ret
4569 EMIT_CALL(a, array_setm_wk1_v0,
4570 IMM(0),
4571 V(arrLoc),
4572 A(valLoc));
4573 recordReentrantCall(i);
4574 // The array value may have changed, so we need to invalidate any
4575 // register we have associated with arrLoc
4576 m_regMap.invalidate(arrLoc);
4577 // The array_setm helper returns the up-to-date array pointer in rax.
4578 // Therefore, we can bind rax to arrLoc and mark it as dirty.
4579 m_regMap.bind(rax, arrLoc, KindOfArray, RegInfo::DIRTY);
4582 static void undefCns(const StringData* nm) {
4583 VMRegAnchor _;
4584 TypedValue *cns = g_vmContext->getCns(const_cast<StringData*>(nm));
4585 if (!cns) {
4586 raise_notice(Strings::UNDEFINED_CONSTANT, nm->data(), nm->data());
4587 g_vmContext->getStack().pushStringNoRc(const_cast<StringData*>(nm));
4588 } else {
4589 Cell* c1 = g_vmContext->getStack().allocC();
4590 TV_READ_CELL(cns, c1);
4594 void TranslatorX64::emitSideExit(Asm& a, const NormalizedInstruction& i,
4595 bool next) {
4596 const NormalizedInstruction& dest = next ? *i.next : i;
4598 SKTRACE(3, i.source, "sideexit check %p\n", a.code.frontier);
4599 // NB: if next == true, we are assuming here that stack elements
4600 // spit out by this instruction are already clean and sync'd back to
4601 // the top slot of the stack.
4602 m_regMap.scrubStackEntries(dest.stackOff);
4603 m_regMap.cleanAll();
4604 emitRB(a, RBTypeSideExit, i.source);
4605 int stackDisp = dest.stackOff;
4606 if (stackDisp != 0) {
4607 SKTRACE(3, i.source, "stack bump %d => %x\n", stackDisp,
4608 -cellsToBytes(stackDisp));
4609 a. add_imm32_reg64(-cellsToBytes(stackDisp), rVmSp);
4611 emitBindJmp(a, dest.source, REQ_BIND_SIDE_EXIT);
4614 void
4615 TranslatorX64::translateCns(const Tracelet& t,
4616 const NormalizedInstruction& i) {
4617 ASSERT(i.inputs.size() == 0);
4618 ASSERT(i.outStack && !i.outLocal);
4620 // OK to burn "name" into TC: it was merged into the static string
4621 // table, so as long as this code is reachable, so shoud the string
4622 // be.
4623 DataType outType = i.outStack->valueType();
4624 StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4625 const TypedValue* tv = g_vmContext->getCns(name, true, false);
4626 bool checkDefined = false;
4627 if (outType != KindOfInvalid && tv == NULL &&
4628 !RuntimeOption::RepoAuthoritative) {
4629 PreConstDepMap::accessor acc;
4630 tv = findUniquePreConst(acc, name);
4631 if (tv != NULL) {
4632 checkDefined = true;
4633 acc->second.srcKeys.insert(t.m_sk);
4634 Stats::emitInc(a, Stats::Tx64_CnsFast);
4635 } else {
4636 // We had a unique value while analyzing but don't anymore. This
4637 // should be rare so just punt to keep things simple.
4638 punt();
4641 using namespace TargetCache;
4642 if (tv && tvIsStatic(tv)) {
4643 m_regMap.allocOutputRegs(i);
4644 if (checkDefined) {
4645 size_t bit = allocCnsBit(name);
4646 uint32 mask;
4647 CacheHandle ch = bitOffToHandleAndMask(bit, mask);
4648 // The 'test' instruction takes a signed immediate and the mask is
4649 // unsigned, but everything works out okay because the immediate is
4650 // the same size as the other operand. However, we have to sign-extend
4651 // the mask to 64 bits to make the assembler happy.
4652 int64_t imm = (int64_t)(int32)mask;
4653 a.test_imm32_disp_reg32(imm, ch, rVmTl);
4655 // If we get to the optimistic translation and the constant
4656 // isn't defined, our tracelet is ruined because the type may
4657 // not be what we expect. If we were expecting KindOfString we
4658 // could theoretically keep going here since that's the type
4659 // of an undefined constant expression, but it should be rare
4660 // enough that it's not worth the complexity.
4661 UnlikelyIfBlock<CC_Z> ifZero(a, astubs);
4662 Stats::emitInc(astubs, Stats::Tx64_CnsFast, -1);
4663 emitSideExit(astubs, i, false);
4666 // Its type and value are known at compile-time.
4667 ASSERT(tv->m_type == outType ||
4668 (IS_STRING_TYPE(tv->m_type) && IS_STRING_TYPE(outType)));
4669 PhysReg r = getReg(i.outStack->location);
4670 a. mov_imm64_reg(tv->m_data.num, r);
4671 // tv is static; no need to incref
4672 return;
4675 Stats::emitInc(a, Stats::Tx64_CnsSlow);
4676 CacheHandle ch = allocConstant(name);
4677 TRACE(2, "Cns: %s -> ch %ld\n", name->data(), ch);
4678 // Load the constant out of the thread-private tl_targetCaches.
4679 ScratchReg cns(m_regMap);
4680 a. lea_reg64_disp_reg64(rVmTl, ch, *cns);
4681 a. cmp_imm32_disp_reg32(0, TVOFF(m_type), *cns);
4682 DiamondReturn astubsRet;
4683 int stackDest = 0 - int(sizeof(Cell)); // popped - pushed
4685 // It's tempting to dedup these, but not obvious we really can;
4686 // at least stackDest and tmp are specific to the translation
4687 // context.
4688 UnlikelyIfBlock<CC_Z> ifb(a, astubs, &astubsRet);
4689 EMIT_CALL(astubs, undefCns, IMM((uintptr_t)name));
4690 recordReentrantStubCall(i);
4691 m_regMap.invalidate(i.outStack->location);
4694 // Bitwise copy to output area.
4695 emitCopyToStack(a, i, *cns, stackDest);
4696 m_regMap.invalidate(i.outStack->location);
4699 void
4700 TranslatorX64::analyzeDefCns(Tracelet& t,
4701 NormalizedInstruction& i) {
4702 StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4703 /* don't bother to translate if it names a builtin constant */
4704 i.m_txFlags = supportedPlan(!g_vmContext->getCns(name, true, false));
4707 typedef void (*defCnsHelper_func_t)(TargetCache::CacheHandle ch, Variant *inout,
4708 StringData *name, size_t bit);
4709 template<bool setBit>
4710 static void defCnsHelper(TargetCache::CacheHandle ch, Variant *inout,
4711 StringData *name, size_t bit) {
4712 using namespace TargetCache;
4713 TypedValue *tv = (TypedValue*)handleToPtr(ch);
4714 if (LIKELY(tv->m_type == KindOfUninit &&
4715 inout->isAllowedAsConstantValue())) {
4716 inout->setEvalScalar();
4717 if (LIKELY(g_vmContext->insertCns(name, (TypedValue*)inout))) {
4718 tvDup((TypedValue*)inout, tv);
4719 *inout = true;
4720 if (setBit) {
4721 DEBUG_ONLY bool alreadyDefined = testAndSetBit(bit);
4722 ASSERT(!alreadyDefined);
4724 return;
4726 tv = (TypedValue*)&false_varNR;
4729 if (tv->m_type != KindOfUninit) {
4730 raise_warning(Strings::CONSTANT_ALREADY_DEFINED, name->data());
4731 } else {
4732 ASSERT(!inout->isAllowedAsConstantValue());
4733 raise_warning(Strings::CONSTANTS_MUST_BE_SCALAR);
4735 *inout = false;
4738 void
4739 TranslatorX64::translateDefCns(const Tracelet& t,
4740 const NormalizedInstruction& i) {
4741 StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4743 if (false) {
4744 TargetCache::CacheHandle ch = 0;
4745 size_t bit = 0;
4746 Variant *inout = 0;
4747 StringData *name = 0;
4748 defCnsHelper<true>(ch, inout, name, bit);
4749 defCnsHelper<false>(ch, inout, name, bit);
4752 using namespace TargetCache;
4753 CacheHandle ch = allocConstant(name);
4754 TRACE(2, "DefCns: %s -> ch %ld\n", name->data(), ch);
4756 m_regMap.cleanLoc(i.inputs[0]->location);
4757 if (RuntimeOption::RepoAuthoritative) {
4758 EMIT_CALL(a, (defCnsHelper_func_t)defCnsHelper<false>,
4759 IMM(ch), A(i.inputs[0]->location),
4760 IMM((uint64)name));
4761 } else {
4762 EMIT_CALL(a, (defCnsHelper_func_t)defCnsHelper<true>,
4763 IMM(ch), A(i.inputs[0]->location),
4764 IMM((uint64)name), IMM(allocCnsBit(name)));
4766 recordReentrantCall(i);
4767 m_regMap.invalidate(i.outStack->location);
4770 void
4771 TranslatorX64::translateClsCnsD(const Tracelet& t,
4772 const NormalizedInstruction& i) {
4773 using namespace TargetCache;
4774 const NamedEntityPair& namedEntityPair =
4775 curUnit()->lookupNamedEntityPairId(i.imm[1].u_SA);
4776 ASSERT(namedEntityPair.second);
4777 const StringData *clsName = namedEntityPair.first;
4778 ASSERT(clsName->isStatic());
4779 StringData* cnsName = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4780 ASSERT(cnsName->isStatic());
4781 StringData* fullName = StringData::GetStaticString(
4782 Util::toLower(clsName->data()) + "::" + cnsName->data());
4784 Stats::emitInc(a, Stats::TgtCache_ClsCnsHit);
4785 CacheHandle ch = allocClassConstant(fullName);
4786 ScratchReg cns(m_regMap);
4787 a.lea_reg64_disp_reg64(rVmTl, ch, *cns);
4788 a.cmp_imm32_disp_reg32(0, TVOFF(m_type), *cns);
4790 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
4792 if (false) { // typecheck
4793 TypedValue* tv = NULL;
4794 UNUSED TypedValue* ret =
4795 TargetCache::lookupClassConstant(tv, namedEntityPair.second,
4796 namedEntityPair.first, cnsName);
4799 EMIT_CALL(astubs, TCA(TargetCache::lookupClassConstant),
4800 R(*cns),
4801 IMM(uintptr_t(namedEntityPair.second)),
4802 IMM(uintptr_t(namedEntityPair.first)),
4803 IMM(uintptr_t(cnsName)));
4804 recordReentrantStubCall(i);
4805 // DiamondGuard will restore cns's SCRATCH state but not its
4806 // contents. lookupClassConstant returns the value we want.
4807 emitMovRegReg(astubs, rax, *cns);
4809 int stackDest = 0 - int(sizeof(Cell)); // 0 popped - 1 pushed
4810 emitCopyToStack(a, i, *cns, stackDest);
4813 void
4814 TranslatorX64::analyzeConcat(Tracelet& t, NormalizedInstruction& i) {
4815 ASSERT(i.inputs.size() == 2);
4816 const RuntimeType& r = i.inputs[0]->rtt;
4817 const RuntimeType& l = i.inputs[1]->rtt;
4818 // The concat translation isn't reentrant; objects that override
4819 // __toString() can cause reentry.
4820 i.m_txFlags = simplePlan(r.valueType() != KindOfObject &&
4821 l.valueType() != KindOfObject);
4824 void
4825 TranslatorX64::translateConcat(const Tracelet& t,
4826 const NormalizedInstruction& i) {
4827 ASSERT(i.inputs.size() == 2);
4828 const DynLocation& r = *i.inputs[0];
4829 const DynLocation& l = *i.inputs[1];
4830 // We have specialized helpers for concatenating two strings, a
4831 // string and an int, and an int an a string.
4832 void* fptr = NULL;
4833 if (l.rtt.isString() && r.rtt.isString()) {
4834 fptr = (void*)concat_ss;
4835 } else if (l.rtt.isString() && r.rtt.isInt()) {
4836 fptr = (void*)concat_si;
4837 } else if (l.rtt.isInt() && r.rtt.isString()) {
4838 fptr = (void*)concat_is;
4840 if (fptr) {
4841 // If we have a specialized helper, use it
4842 if (false) { // type check
4843 StringData* v1 = NULL;
4844 StringData* v2 = NULL;
4845 StringData* retval = concat_ss(v1, v2);
4846 printf("%p", retval); // use retval
4849 // The concat helper will decRef the inputs and incRef the output
4850 // for us if appropriate
4851 EMIT_CALL(a, fptr,
4852 V(l.location),
4853 V(r.location));
4854 ASSERT(i.outStack->rtt.isString());
4855 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4856 RegInfo::DIRTY);
4858 } else {
4859 // Otherwise, use the generic concat helper
4860 if (false) { // type check
4861 uint64_t v1 = 0, v2 = 0;
4862 DataType t1 = KindOfUninit, t2 = KindOfUninit;
4863 StringData *retval = concat(t1, v1, t2, v2);
4864 printf("%p", retval); // use retval
4866 // concat will decRef the two inputs and incRef the output
4867 // for us if appropriate
4868 EMIT_CALL(a, concat,
4869 IMM(l.valueType()), V(l.location),
4870 IMM(r.valueType()), V(r.location));
4871 ASSERT(i.outStack->isString());
4872 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4873 RegInfo::DIRTY);
4877 TXFlags
4878 planInstrAdd_Int(const NormalizedInstruction& i) {
4879 ASSERT(i.inputs.size() == 2);
4880 return nativePlan(i.inputs[0]->isInt() && i.inputs[1]->isInt());
4883 TXFlags
4884 planInstrAdd_Array(const NormalizedInstruction& i) {
4885 ASSERT(i.inputs.size() == 2);
4886 return supportedPlan(i.inputs[0]->valueType() == KindOfArray &&
4887 i.inputs[1]->valueType() == KindOfArray);
4890 void
4891 TranslatorX64::analyzeAdd(Tracelet& t, NormalizedInstruction& i) {
4892 i.m_txFlags = TXFlags(planInstrAdd_Int(i) | planInstrAdd_Array(i));
4895 void
4896 TranslatorX64::translateAdd(const Tracelet& t,
4897 const NormalizedInstruction& i) {
4898 ASSERT(i.inputs.size() == 2);
4900 if (planInstrAdd_Array(i)) {
4901 // Handle adding two arrays
4902 ASSERT(i.outStack->outerType() == KindOfArray);
4903 if (false) { // type check
4904 ArrayData* v = NULL;
4905 v = array_add(v, v);
4907 // The array_add helper will decRef the inputs and incRef the output
4908 // for us if appropriate
4909 EMIT_CALL(a, array_add,
4910 V(i.inputs[1]->location),
4911 V(i.inputs[0]->location));
4912 recordReentrantCall(i);
4913 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4914 RegInfo::DIRTY);
4915 return;
4918 ASSERT(planInstrAdd_Int(i));
4919 binaryArithCell(i, OpAdd, *i.inputs[0], *i.outStack);
4922 void
4923 TranslatorX64::analyzeXor(Tracelet& t, NormalizedInstruction& i) {
4924 i.m_txFlags = nativePlan((i.inputs[0]->outerType() == KindOfBoolean ||
4925 i.inputs[0]->isInt()) &&
4926 (i.inputs[1]->outerType() == KindOfBoolean ||
4927 i.inputs[1]->isInt()));
4930 static inline void
4931 emitIntToCCBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch,
4932 int CC) {
4934 * test %srcdest, %srcdest
4935 * set<CC> %scratchL
4936 * movzbq %scratchL, %srcdest
4938 a. test_reg64_reg64(srcdest, srcdest);
4939 a. setcc (CC, scratch);
4940 a. mov_reg8_reg64_unsigned(scratch, srcdest);
4943 static inline void
4944 emitIntToBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch) {
4945 emitIntToCCBool(a, srcdest, scratch, CC_NZ);
4948 static inline void
4949 emitIntToNegBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch) {
4950 emitIntToCCBool(a, srcdest, scratch, CC_Z);
4953 void
4954 TranslatorX64::translateXor(const Tracelet& t,
4955 const NormalizedInstruction& i) {
4956 PhysReg src, srcdest;
4957 getBinaryStackInputs(m_regMap, i, src, srcdest);
4958 m_regMap.allocOutputRegs(i);
4959 ScratchReg scr(m_regMap);
4960 if (i.inputs[0]->isInt()) {
4961 emitIntToBool(a, src, *scr);
4963 if (i.inputs[1]->isInt()) {
4964 emitIntToBool(a, srcdest, *scr);
4966 a. xor_reg64_reg64(src, srcdest);
4969 void
4970 TranslatorX64::analyzeNot(Tracelet& t, NormalizedInstruction& i) {
4971 ASSERT(i.inputs.size() == 1);
4972 i.m_txFlags = nativePlan(i.inputs[0]->isInt() ||
4973 i.inputs[0]->outerType() == KindOfBoolean);
4976 void
4977 TranslatorX64::translateNot(const Tracelet& t,
4978 const NormalizedInstruction& i) {
4979 ASSERT(i.isNative());
4980 ASSERT(i.outStack && !i.outLocal);
4981 ASSERT(!i.inputs[0]->isVariant());
4982 m_regMap.allocOutputRegs(i);
4983 PhysReg srcdest = m_regMap.getReg(i.outStack->location);
4984 ScratchReg scr(m_regMap);
4985 emitIntToNegBool(a, srcdest, *scr);
4988 void
4989 TranslatorX64::analyzeBitNot(Tracelet& t, NormalizedInstruction& i) {
4990 i.m_txFlags = nativePlan(i.inputs[0]->isInt());
4993 void
4994 TranslatorX64::translateBitNot(const Tracelet& t,
4995 const NormalizedInstruction& i) {
4996 ASSERT(i.outStack && !i.outLocal);
4997 m_regMap.allocOutputRegs(i);
4998 PhysReg srcdest = m_regMap.getReg(i.outStack->location);
4999 a. not_reg64(srcdest);
5002 void
5003 TranslatorX64::analyzeCastInt(Tracelet& t, NormalizedInstruction& i) {
5004 i.m_txFlags = nativePlan(i.inputs[0]->isInt());
5007 void
5008 TranslatorX64::translateCastInt(const Tracelet& t,
5009 const NormalizedInstruction& i) {
5010 ASSERT(i.inputs.size() == 1);
5011 ASSERT(i.outStack && !i.outLocal);
5013 /* nop */
5016 void
5017 TranslatorX64::analyzeCastString(Tracelet& t, NormalizedInstruction& i) {
5018 i.m_txFlags =
5019 i.inputs[0]->isArray() || i.inputs[0]->isObject() ? Supported :
5020 i.inputs[0]->isInt() ? Simple :
5021 Native;
5022 i.funcd = NULL;
5025 static void toStringError(StringData *cls) {
5026 raise_error("Method __toString() must return a string value");
5029 static const StringData* stringDataFromInt(int64 n) {
5030 StringData* s = buildStringData(n);
5031 s->incRefCount();
5032 return s;
5035 static const StringData* stringDataFromDouble(int64 n) {
5036 StringData* s = buildStringData(*(double*)&n);
5037 s->incRefCount();
5038 return s;
5041 void TranslatorX64::toStringHelper(ObjectData *obj) {
5042 // caller must set r15 to the new ActRec
5043 static_assert(rStashedAR == r15 &&
5044 rVmFp == rbp,
5045 "toStringHelper needs to be updated for ABI changes");
5046 register ActRec *ar asm("r15");
5047 register ActRec *rbp asm("rbp");
5049 const Class* cls = obj->getVMClass();
5050 const Func* toString = cls->getToString();
5051 if (!toString) {
5052 // the unwinder will restore rVmSp to
5053 // &ar->m_r, so we'd better make sure its
5054 // got a valid TypedValue there.
5055 TV_WRITE_UNINIT(&ar->m_r);
5056 std::string msg = cls->preClass()->name()->data();
5057 msg += "::__toString() was not defined";
5058 throw BadTypeConversionException(msg.c_str());
5060 // ar->m_savedRbp set by caller
5061 ar->m_savedRip = rbp->m_savedRip;
5062 ar->m_func = toString;
5063 // ar->m_soff set by caller
5064 ar->initNumArgs(0);
5065 ar->setThis(obj);
5066 ar->setVarEnv(0);
5067 // Point the return address of this C++ function at the prolog to
5068 // execute.
5069 rbp->m_savedRip = (uint64_t)toString->getPrologue(0);
5072 void
5073 TranslatorX64::translateCastString(const Tracelet& t,
5074 const NormalizedInstruction& i) {
5075 ASSERT(i.inputs.size() == 1);
5076 ASSERT(i.outStack && !i.outLocal);
5078 if (i.inputs[0]->isNull()) {
5079 m_regMap.allocOutputRegs(i);
5080 PhysReg dest = m_regMap.getReg(i.outStack->location);
5081 a. mov_imm64_reg((uint64)empty_string.get(), dest);
5082 } else if (i.inputs[0]->isBoolean()) {
5083 static StringData* s_1 = StringData::GetStaticString("1");
5084 m_regMap.allocOutputRegs(i);
5085 PhysReg dest = m_regMap.getReg(i.outStack->location);
5086 a. cmp_imm32_reg64(0, dest);
5087 a. mov_imm64_reg((uint64)empty_string.get(), dest);
5088 ScratchReg scratch(m_regMap);
5089 a. mov_imm64_reg((intptr_t)s_1, *scratch);
5090 a. cmov_reg64_reg64(CC_NZ, *scratch, dest);
5091 } else if (i.inputs[0]->isInt()) {
5092 EMIT_CALL(a, stringDataFromInt, V(i.inputs[0]->location));
5093 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5094 RegInfo::DIRTY);
5095 } else if (i.inputs[0]->isDouble()) {
5096 EMIT_CALL(a, stringDataFromDouble, V(i.inputs[0]->location));
5097 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5098 RegInfo::DIRTY);
5099 } else if (i.inputs[0]->isString()) {
5100 // nop
5101 } else if (i.inputs[0]->isArray()) {
5102 static StringData* s_array = StringData::GetStaticString("Array");
5103 m_regMap.allocOutputRegs(i);
5104 PhysReg dest = m_regMap.getReg(i.outStack->location);
5105 emitDecRef(i, dest, KindOfArray);
5106 a. mov_imm64_reg((uint64)s_array, dest);
5107 } else if (i.inputs[0]->isObject()) {
5108 m_regMap.scrubStackEntries(i.stackOff - 1);
5109 m_regMap.cleanAll();
5110 int delta = i.stackOff + kNumActRecCells - 1;
5111 if (delta) {
5112 a. add_imm64_reg64(-cellsToBytes(delta), rVmSp);
5114 a. store_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmSp);
5115 a. store_imm32_disp_reg(nextSrcKey(t, i).offset() - curFunc()->base(),
5116 AROFF(m_soff), rVmSp);
5117 PhysReg obj = m_regMap.getReg(i.inputs[0]->location);
5118 if (obj != argNumToRegName[0]) {
5119 a. mov_reg64_reg64(obj, argNumToRegName[0]);
5121 m_regMap.smashRegs(kAllRegs);
5122 a. mov_reg64_reg64(rVmSp, rStashedAR);
5123 EMIT_CALL(a, TCA(toStringHelper));
5124 recordReentrantCall(i);
5125 if (i.stackOff != 0) {
5126 a. add_imm64_reg64(cellsToBytes(i.stackOff), rVmSp);
5129 PhysReg base;
5130 int disp;
5131 locToRegDisp(i.outStack->location, &base, &disp);
5132 ScratchReg scratch(m_regMap);
5133 emitStringCheck(a, base, disp + TVOFF(m_type), *scratch);
5135 UnlikelyIfBlock<CC_NZ> ifNotString(a, astubs);
5136 EMIT_CALL(astubs, toStringError, IMM(0));
5137 recordReentrantStubCall(i);
5139 } else {
5140 NOT_REACHED();
5144 void
5145 TranslatorX64::analyzePrint(Tracelet& t, NormalizedInstruction& i) {
5146 ASSERT(i.inputs.size() == 1);
5147 const RuntimeType& rtt = i.inputs[0]->rtt;
5148 DataType type = rtt.outerType();
5149 i.m_txFlags = simplePlan(
5150 type == KindOfUninit ||
5151 type == KindOfNull ||
5152 type == KindOfBoolean ||
5153 rtt.isInt() ||
5154 rtt.isString());
5157 void
5158 TranslatorX64::translatePrint(const Tracelet& t,
5159 const NormalizedInstruction& i) {
5160 const vector<DynLocation*>& inputs = i.inputs;
5161 ASSERT(inputs.size() == 1);
5162 ASSERT(!i.outLocal);
5163 ASSERT(!i.outStack || i.outStack->isInt());
5164 Location loc = inputs[0]->location;
5165 DataType type = inputs[0]->outerType();
5166 switch (type) {
5167 STRINGCASE(): EMIT_CALL(a, print_string, V(loc)); break;
5168 case KindOfInt64: EMIT_CALL(a, print_int, V(loc)); break;
5169 case KindOfBoolean: EMIT_CALL(a, print_boolean, V(loc)); break;
5170 NULLCASE(): /* do nothing */ break;
5171 default: {
5172 // Translation is only supported for Null, Boolean, Int, and String
5173 ASSERT(false);
5174 break;
5177 m_regMap.allocOutputRegs(i);
5178 if (i.outStack) {
5179 PhysReg outReg = getReg(i.outStack->location);
5180 emitImmReg(a, 1, outReg);
5184 void
5185 TranslatorX64::translateJmp(const Tracelet& t,
5186 const NormalizedInstruction& i) {
5187 ASSERT(!i.outStack && !i.outLocal);
5188 syncOutputs(t);
5190 // Check the surprise page on all backwards jumps
5191 if (i.imm[0].u_BA < 0) {
5192 if (trustSigSegv) {
5193 const uint64_t stackMask =
5194 ~(cellsToBytes(RuntimeOption::EvalVMStackElms) - 1);
5195 a.mov_reg64_reg64(rVmSp, rScratch);
5196 a.and_imm64_reg64(stackMask, rScratch);
5197 TCA surpriseLoad = a.code.frontier;
5198 a.load_reg64_disp_reg64(rScratch, 0, rScratch);
5200 if (!m_segvStubs.insert(SignalStubMap::value_type(surpriseLoad,
5201 astubs.code.frontier)))
5202 NOT_REACHED();
5204 * Note that it is safe not to register unwind information here,
5205 * because we just called syncOutputs so all registers are
5206 * already clean.
5208 astubs.call((TCA)&EventHook::CheckSurprise);
5209 recordStubCall(i);
5210 astubs.jmp(a.code.frontier);
5211 } else {
5212 emitTestSurpriseFlags();
5214 UnlikelyIfBlock<CC_NZ> ifSurprise(a, astubs);
5215 astubs.call((TCA)&EventHook::CheckSurprise);
5216 recordStubCall(i);
5220 SrcKey sk(curFunc(), i.offset() + i.imm[0].u_BA);
5221 emitBindJmp(sk);
5224 void
5225 TranslatorX64::analyzeSwitch(Tracelet& t,
5226 NormalizedInstruction& i) {
5227 RuntimeType& rtt = i.inputs[0]->rtt;
5228 ASSERT(rtt.outerType() != KindOfRef);
5229 switch (rtt.outerType()) {
5230 NULLCASE():
5231 case KindOfBoolean:
5232 case KindOfInt64:
5233 i.m_txFlags = Native;
5234 break;
5236 case KindOfDouble:
5237 i.m_txFlags = Simple;
5238 break;
5240 STRINGCASE():
5241 case KindOfObject:
5242 case KindOfArray:
5243 i.m_txFlags = Supported;
5244 break;
5246 default:
5247 not_reached();
5251 template <typename T>
5252 static int64 switchBoundsCheck(T v, int64 base, int64 nTargets) {
5253 // I'm relying on gcc to be smart enough to optimize away the next
5254 // two lines when T is int64.
5255 if (int64(v) == v) {
5256 int64 ival = v;
5257 if (ival >= base && ival < (base + nTargets)) {
5258 return ival - base;
5261 return nTargets + 1;
5264 static int64 switchDoubleHelper(int64 val, int64 base, int64 nTargets) {
5265 union {
5266 int64 intbits;
5267 double dblval;
5268 } u;
5269 u.intbits = val;
5270 return switchBoundsCheck(u.dblval, base, nTargets);
5273 static int64 switchStringHelper(StringData* s, int64 base, int64 nTargets) {
5274 int64 ival;
5275 double dval;
5276 switch (s->isNumericWithVal(ival, dval, 1)) {
5277 case KindOfNull:
5278 ival = switchBoundsCheck(0, base, nTargets);
5279 break;
5281 case KindOfDouble:
5282 ival = switchBoundsCheck(dval, base, nTargets);
5283 break;
5285 case KindOfInt64:
5286 ival = switchBoundsCheck(ival, base, nTargets);
5287 break;
5289 default:
5290 not_reached();
5292 if (s->decRefCount() == 0) {
5293 s->release();
5295 return ival;
5298 static int64 switchObjHelper(ObjectData* o, int64 base, int64 nTargets) {
5299 int64 ival = o->o_toInt64();
5300 if (o->decRefCount() == 0) {
5301 o->release();
5303 return switchBoundsCheck(ival, base, nTargets);
5306 void
5307 TranslatorX64::translateSwitch(const Tracelet& t,
5308 const NormalizedInstruction& i) {
5309 int64 base = i.imm[1].u_I64A;
5310 bool bounded = i.imm[2].u_IVA;
5311 const ImmVector& iv = i.immVec;
5312 int nTargets = bounded ? iv.size() - 2 : iv.size();
5313 int jmptabSize = nTargets;
5314 ASSERT(nTargets > 0);
5315 PhysReg valReg = getReg(i.inputs[0]->location);
5316 DataType inType = i.inputs[0]->outerType();
5317 ASSERT(IMPLIES(inType != KindOfInt64, bounded));
5318 ASSERT(IMPLIES(bounded, iv.size() > 2));
5319 syncOutputs(t); // this will mark valReg as FREE but it still has
5320 // its old value
5322 SrcKey defaultSk(curFunc(), i.offset() + iv.vec32()[iv.size() - 1]);
5323 SrcKey zeroSk(curFunc(), 0);
5324 if (0 >= base && 0 < (base + nTargets)) {
5325 zeroSk.m_offset = i.offset() + iv.vec32()[0 - base];
5326 } else {
5327 zeroSk.m_offset = defaultSk.m_offset;
5330 switch (i.inputs[0]->outerType()) {
5331 NULLCASE(): {
5332 emitBindJmp(zeroSk);
5333 return;
5336 case KindOfBoolean: {
5337 SrcKey nonzeroSk(curFunc(), i.offset() + iv.vec32()[iv.size() - 2]);
5338 a.test_reg64_reg64(valReg, valReg);
5339 emitCondJmp(nonzeroSk, zeroSk, CC_NZ);
5340 return;
5343 case KindOfInt64:
5344 // No special treatment needed
5345 break;
5347 case KindOfDouble:
5348 STRINGCASE():
5349 case KindOfObject: {
5350 // switch(Double|String|Obj)Helper do bounds-checking for us, so
5351 // we need to make sure the default case is in the jump table,
5352 // and don't emit our own bounds-checking code
5353 jmptabSize = iv.size();
5354 bounded = false;
5355 if (false) {
5356 StringData* s = NULL;
5357 ObjectData* o = NULL;
5358 switchDoubleHelper(0.0, 0, 0);
5359 switchStringHelper(s, 0, 0);
5360 switchObjHelper(o, 0, 0);
5362 EMIT_CALL(a,
5363 inType == KindOfDouble ? (TCA)switchDoubleHelper :
5364 (IS_STRING_TYPE(inType) ? (TCA)switchStringHelper :
5365 (TCA)switchObjHelper),
5366 R(valReg), IMM(base), IMM(nTargets));
5367 recordCall(i);
5368 valReg = rax;
5369 break;
5372 case KindOfArray:
5373 emitDecRef(a, i, valReg, KindOfArray);
5374 emitBindJmp(defaultSk);
5375 return;
5377 default:
5378 not_reached();
5381 if (bounded) {
5382 if (base) {
5383 a.sub_imm64_reg64(base, valReg);
5385 a.cmp_imm64_reg64(nTargets, valReg);
5386 prepareForSmash(a, kJmpccLen);
5387 TCA defaultStub =
5388 emitServiceReq(REQ_BIND_JMPCC_SECOND, 3,
5389 a.code.frontier, defaultSk.m_offset, CC_AE);
5390 // Unsigned comparison: check for < 0 and >= nTargets at the same time
5391 a.jae(defaultStub);
5394 TCA* jmptab = m_globalData.alloc<TCA>(sizeof(TCA), jmptabSize);
5395 TCA afterLea = a.code.frontier + kLeaRipLen;
5396 ptrdiff_t diff = (TCA)jmptab - afterLea;
5397 ASSERT(deltaFits(diff, sz::dword));
5398 a.lea_rip_disp_reg64(diff, rScratch);
5399 ASSERT(a.code.frontier == afterLea);
5400 a.jmp_reg64_index_displ(rScratch, valReg, 0);
5402 for (int idx = 0; idx < jmptabSize; ++idx) {
5403 SrcKey sk(curFunc(), i.offset() + iv.vec32()[idx]);
5404 jmptab[idx] = emitServiceReq(false, REQ_BIND_ADDR, 2ull,
5405 &jmptab[idx], uint64_t(sk.offset()));
5409 void
5410 TranslatorX64::analyzeRetC(Tracelet& t,
5411 NormalizedInstruction& i) {
5412 i.manuallyAllocInputs = true;
5413 i.m_txFlags = Supported;
5416 void
5417 TranslatorX64::analyzeRetV(Tracelet& t,
5418 NormalizedInstruction& i) {
5419 analyzeRetC(t, i);
5422 static TypedValue getGroupedRetTV(const NormalizedInstruction& i) {
5423 TypedValue tv;
5424 TV_WRITE_UNINIT(&tv);
5425 tv.m_data.num = 0; // to keep the compiler happy
5426 if (!i.grouped) return tv;
5429 * We suppressed the write of the (literal) return value
5430 * to the stack. Figure out what it was.
5432 NormalizedInstruction* prev = i.prev;
5433 ASSERT(!prev->outStack);
5434 switch (prev->op()) {
5435 case OpNull:
5436 tv.m_type = KindOfNull;
5437 break;
5438 case OpTrue:
5439 case OpFalse:
5440 tv.m_type = KindOfBoolean;
5441 tv.m_data.num = prev->op() == OpTrue;
5442 break;
5443 case OpInt:
5444 tv.m_type = KindOfInt64;
5445 tv.m_data.num = prev->imm[0].u_I64A;
5446 break;
5447 case OpDouble:
5448 tv.m_type = KindOfDouble;
5449 tv.m_data.dbl = prev->imm[0].u_DA;
5450 break;
5451 case OpString:
5452 tv.m_type = BitwiseKindOfString;
5453 tv.m_data.pstr = curUnit()->lookupLitstrId(prev->imm[0].u_SA);
5454 break;
5455 case OpArray:
5456 tv.m_type = KindOfArray;
5457 tv.m_data.parr = curUnit()->lookupArrayId(prev->imm[0].u_AA);
5458 break;
5459 default:
5460 not_reached();
5462 return tv;
5465 // translateRetC --
5467 // Return to caller with the current activation record replaced with the
5468 // top-of-stack return value. Call with outputs sync'ed, so the code
5469 // we're emmitting runs "in between" basic blocks.
5470 void
5471 TranslatorX64::translateRetC(const Tracelet& t,
5472 const NormalizedInstruction& i) {
5473 if (i.skipSync) ASSERT(i.grouped);
5474 if (i.grouped) ASSERT(freeLocalsInline());
5477 * This method chooses one of two ways to generate machine code for RetC
5478 * depending on whether we are generating a specialized return (where we
5479 * free the locals inline when possible) or a generic return (where we call
5480 * a helper function to free locals).
5482 * For the specialized return, we emit the following flow:
5484 * Check if varenv is NULL
5485 * If it's not NULL, branch to label 2
5486 * Free each local variable
5487 * 1:
5488 * Teleport the return value to appropriate memory location
5489 * Restore the old values for rVmFp and rVmSp, and
5490 * unconditionally transfer control back to the caller
5491 * 2:
5492 * Call the frame_free_locals helper
5493 * Jump to label 1
5495 * For a generic return, we emit the following flow:
5497 * Call the frame_free_locals helper
5498 * Teleport the return value to appropriate memory location
5499 * Restore the old values for rVmFp and rVmSp, and
5500 * unconditionally transfer control back to the caller
5503 int stackAdjustment = t.m_stackChange;
5504 if (i.skipSync) {
5505 SKTRACE(2, i.source, "i.skipSync\n");
5508 * getting here means there was nothing to do between
5509 * a previous reqXXX and this ret. Any spill code we generate
5510 * here would be broken (because the rbx is wrong), so
5511 * verify that we don't generate anything...
5513 TCA s DEBUG_ONLY = a.code.frontier;
5514 syncOutputs(0);
5515 ASSERT(s == a.code.frontier);
5516 stackAdjustment = 0;
5517 } else {
5519 * no need to syncOutputs here... we're going to update
5520 * rbx at the end of this function anyway, and we may want
5521 * to use enregistered locals on the fast path below
5523 m_regMap.scrubStackEntries(t.m_stackChange);
5524 m_regMap.cleanAll(); // TODO(#1339331): don't.
5527 bool noThis = !curFunc()->isPseudoMain() &&
5528 (!curFunc()->isMethod() || curFunc()->isStatic());
5529 bool mayUseVV = (curFunc()->attrs() & AttrMayUseVV);
5531 const TypedValue groupedRetTV = getGroupedRetTV(i);
5534 * figure out where to put the return value, and where to get it from
5536 ASSERT(i.stackOff == t.m_stackChange);
5537 const Location retValSrcLoc(Location::Stack, stackAdjustment - 1);
5539 const Func *callee = curFunc();
5540 ASSERT(callee);
5541 int nLocalCells =
5542 callee == NULL ? 0 : // This happens for returns from pseudo-main.
5543 callee->numSlotsInFrame();
5544 int retvalSrcBase = cellsToBytes(-stackAdjustment);
5546 ASSERT(cellsToBytes(locPhysicalOffset(retValSrcLoc)) == retvalSrcBase);
5549 * The (1 + nLocalCells) skips 1 slot for the return value.
5551 int retvalDestDisp = cellsToBytes(1 + nLocalCells - stackAdjustment) +
5552 AROFF(m_r);
5554 if (freeLocalsInline()) {
5555 SKTRACE(2, i.source, "emitting specialized inline return\n");
5557 // Emit specialized code inline to clean up the locals
5558 ASSERT(curFunc()->numLocals() == (int)i.inputs.size());
5560 ScratchReg rTmp(m_regMap);
5563 * If this function can possibly use variadic arguments or shared
5564 * variable environment, we need to check for it and go to a
5565 * generic return if so.
5567 boost::scoped_ptr<DiamondReturn> mayUseVVRet;
5568 if (mayUseVV) {
5569 SKTRACE(2, i.source, "emitting mayUseVV in UnlikelyIf\n");
5571 mayUseVVRet.reset(new DiamondReturn);
5572 a. load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rTmp);
5573 a. test_reg64_reg64(*rTmp, *rTmp);
5575 UnlikelyIfBlock<CC_NZ> varEnvCheck(a, astubs, mayUseVVRet.get());
5577 m_regMap.cleanAll();
5578 if (i.grouped) {
5579 emitStoreImm(astubs, groupedRetTV.m_type,
5580 rVmSp, retvalSrcBase + TVOFF(m_type), sz::dword);
5581 if (groupedRetTV.m_type != KindOfNull) {
5582 emitStoreImm(astubs, groupedRetTV.m_data.num,
5583 rVmSp, retvalSrcBase, sz::qword);
5586 emitFrameRelease(astubs, i, noThis);
5590 for (unsigned int k = 0; k < i.inputs.size(); ++k) {
5591 // RetC's inputs should all be locals
5592 ASSERT(i.inputs[k]->location.space == Location::Local);
5593 DataType t = i.inputs[k]->outerType();
5594 if (IS_REFCOUNTED_TYPE(t)) {
5595 PhysReg reg = m_regMap.allocReg(i.inputs[k]->location, t,
5596 RegInfo::CLEAN);
5597 emitDecRef(i, reg, t);
5601 // If this is a instance method called on an object or if it is a
5602 // pseudomain, we need to decRef $this (if there is one)
5603 if (curFunc()->isMethod() && !curFunc()->isStatic()) {
5604 // This assert is weaker than it looks; it only checks the invocation
5605 // we happen to be translating for. The runtime "assert" is the
5606 // unconditional dereference of m_this we emit; if the frame has
5607 // neither this nor a class, then m_this will be null and we'll
5608 // SEGV.
5609 ASSERT(curFrame()->hasThis() || curFrame()->hasClass());
5610 // m_this and m_cls share a slot in the ActRec, so we check the
5611 // lowest bit (0 -> m_this, 1 -> m_cls)
5612 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rTmp);
5613 if (i.guardedThis) {
5614 emitDecRef(i, *rTmp, KindOfObject);
5615 } else {
5616 a. test_imm32_reg64(1, *rTmp);
5618 JccBlock<CC_NZ> ifZero(a);
5619 emitDecRef(i, *rTmp, KindOfObject); // this. decref it.
5622 } else if (curFunc()->isPseudoMain()) {
5623 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rTmp);
5624 a. shr_imm32_reg64(1, *rTmp); // sets c (from bit 0) and z
5625 FreezeRegs ice(m_regMap);
5627 // tests for Not Zero and Not Carry
5628 UnlikelyIfBlock<CC_NBE> ifRealThis(a, astubs);
5629 astubs. shl_imm32_reg64(1, *rTmp);
5630 emitDecRef(astubs, i, *rTmp, KindOfObject);
5634 // Register map is officially out of commission now.
5635 m_regMap.scrubLoc(retValSrcLoc);
5636 m_regMap.smashRegs(kAllRegs);
5638 emitTestSurpriseFlags();
5640 UnlikelyIfBlock<CC_NZ> ifTracer(a, astubs);
5641 if (i.grouped) {
5642 emitStoreImm(astubs, groupedRetTV.m_type,
5643 rVmSp, retvalSrcBase + TVOFF(m_type), sz::dword);
5644 if (groupedRetTV.m_type != KindOfNull) {
5645 emitStoreImm(astubs, groupedRetTV.m_data.num,
5646 rVmSp, retvalSrcBase, sz::qword);
5649 astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5650 emitCall(astubs, (TCA)&EventHook::FunctionExit, true);
5651 recordReentrantStubCall(i);
5654 // The register map on the main line better be empty (everything
5655 // smashed) or some of the above DiamondReturns might generate
5656 // reconciliation code.
5657 ASSERT(m_regMap.empty());
5658 } else {
5659 SKTRACE(2, i.source, "emitting generic return\n");
5661 if (i.grouped) {
5663 * What a pain: EventHook::onFunctionExit needs access
5664 * to the return value - so we'd better not have suppressed
5665 * writing it to the stack...
5667 ASSERT(false);
5668 not_reached();
5670 m_regMap.cleanAll();
5671 m_regMap.smashRegs(kAllRegs);
5672 // If we are doing the generic return flow, we emit a call to
5673 // frame_free_locals here
5674 ASSERT(i.inputs.size() == 0);
5675 emitFrameRelease(a, i, noThis);
5679 * We're officially between tracelets now, and the normal register
5680 * allocator is not being used.
5682 ASSERT(m_regMap.empty());
5683 RegSet scratchRegs = kScratchCrossTraceRegs;
5684 DumbScratchReg rRetAddr(scratchRegs);
5686 a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRip), *rRetAddr);
5687 a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
5690 * Having gotten everything we care about out of the current frame
5691 * pointer, smash the return address type and value over it. We don't
5692 * care about reference counts: as long as this runs to completion, we're
5693 * refcount-neutral.
5695 if (i.grouped) {
5696 emitStoreImm(a, groupedRetTV.m_type,
5697 rVmSp, retvalDestDisp + TVOFF(m_type), sz::dword);
5698 if (groupedRetTV.m_type != KindOfNull) {
5699 emitStoreImm(a, groupedRetTV.m_data.num,
5700 rVmSp, retvalDestDisp, sz::qword);
5702 } else {
5703 ASSERT(sizeof(Cell) == 16);
5704 a. load_reg64_disp_reg64 (rVmSp, retvalSrcBase, rScratch);
5705 a. store_reg64_disp_reg64(rScratch, retvalDestDisp, rVmSp);
5706 a. load_reg64_disp_reg64 (rVmSp, retvalSrcBase + 8, rScratch);
5707 a. store_reg64_disp_reg64(rScratch, retvalDestDisp + 8, rVmSp);
5711 * Now update the principal hardware registers.
5713 * Stack pointer has to skip over all the locals as well as the
5714 * activation record.
5716 a. add_imm64_reg64(sizeof(ActRec) +
5717 cellsToBytes(nLocalCells - stackAdjustment), rVmSp);
5718 emitRB(a, RBTypeFuncExit, curFunc()->fullName()->data(), RegSet(*rRetAddr));
5719 a. jmp_reg (*rRetAddr);
5720 translator_not_reached(a);
5723 void
5724 TranslatorX64::translateRetV(const Tracelet& t,
5725 const NormalizedInstruction& i) {
5726 translateRetC(t, i);
5730 * NativeImpl is a special operation in the sense that it must be the
5731 * only opcode in a function body, and also functions as the return.
5733 * This function runs between tracelets and does not use m_regMap.
5735 void TranslatorX64::emitNativeImpl(const Func* func,
5736 bool emitSavedRIPReturn) {
5737 BuiltinFunction builtinFuncPtr = func->builtinFuncPtr();
5738 if (false) { // typecheck
5739 ActRec* ar = NULL;
5740 builtinFuncPtr(ar);
5743 TRACE(2, "calling builtin preClass %p func %p\n", func->preClass(),
5744 builtinFuncPtr);
5746 * Call the native implementation. This will free the locals for us in the
5747 * normal case. In the case where an exception is thrown, the VM unwinder
5748 * will handle it for us.
5750 a. mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5751 emitCall(a, (TCA)builtinFuncPtr, false /* smash regs */);
5754 * We're sometimes calling this while curFunc() isn't really the
5755 * builtin---make sure to properly record the sync point as if we
5756 * are inside the builtin.
5758 * The assumption here is that for builtins, the generated func
5759 * contains only a single opcode (NativeImpl), and there are no
5760 * non-argument locals.
5762 ASSERT(func->numIterators() == 0 && func->isBuiltin());
5763 ASSERT(func->numLocals() == func->numParams());
5764 ASSERT(*func->getEntry() == OpNativeImpl);
5765 ASSERT(instrLen(func->getEntry()) == func->past() - func->base());
5766 Offset pcOffset = 0; // NativeImpl is the only instruction in the func
5767 Offset stackOff = func->numLocals(); // Builtin stubs have no
5768 // non-arg locals
5769 recordSyncPoint(a, pcOffset, stackOff);
5771 RegSet unusedRegs = kScratchCrossTraceRegs;
5772 DumbScratchReg rRetAddr(unusedRegs);
5774 RegSet saveDuringEmitRB;
5775 if (emitSavedRIPReturn) {
5776 // Get the return address from the ActRec
5777 a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRip), *rRetAddr);
5778 saveDuringEmitRB |= RegSet(*rRetAddr);
5782 * The native implementation already put the return value on the
5783 * stack for us, and handled cleaning up the arguments. We have to
5784 * update the frame pointer and the stack pointer, and load the
5785 * return value into the return register so the trace we are
5786 * returning to has it where it expects.
5788 * TODO(#1273094): we should probably modify the actual builtins to
5789 * return values via registers (rax:edx) using the C ABI and do a
5790 * reg-to-reg move.
5792 int nLocalCells = func->numSlotsInFrame();
5793 a. add_imm64_reg64(sizeof(ActRec) + cellsToBytes(nLocalCells-1), rVmSp);
5794 a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
5796 emitRB(a, RBTypeFuncExit, func->fullName()->data(), saveDuringEmitRB);
5797 if (emitSavedRIPReturn) {
5798 a. jmp_reg (*rRetAddr);
5799 translator_not_reached(a);
5803 void
5804 TranslatorX64::translateNativeImpl(const Tracelet& t,
5805 const NormalizedInstruction& ni) {
5807 * We assume that NativeImpl is the only instruction in the trace,
5808 * and the only instruction for the implementation of the function.
5810 ASSERT(ni.stackOff == 0);
5811 ASSERT(m_regMap.empty());
5812 emitNativeImpl(curFunc(), true);
5815 // Warning: smashes rsi and rdi, and can't handle unclean registers.
5816 // Used between functions.
5817 void
5818 TranslatorX64::emitFrameRelease(X64Assembler& a,
5819 const NormalizedInstruction& i,
5820 bool noThis /*= false*/) {
5821 if (false) { // typecheck
5822 frame_free_locals(curFrame(), 0);
5824 a. mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5825 int numLocals = curFunc()->numLocals();
5826 emitImmReg(a, numLocals, argNumToRegName[1]);
5827 if (noThis) {
5828 emitCall(a, (TCA)frame_free_locals_no_this);
5829 } else {
5830 emitCall(a, (TCA)frame_free_locals);
5832 recordReentrantCall(a, i);
5835 // emitClsLocalIndex --
5836 // emitStringToClass --
5837 // emitStringToKnownClass --
5838 // emitObjToClass --
5839 // emitClsAndPals --
5840 // Helpers for AGetC/AGetL.
5842 const int kEmitClsLocalIdx = 0;
5845 * Determine if the class is defined, and fatal if not.
5846 * If reg is not noreg, return the Class* in it
5847 * If we can statically prove that the class is defined,
5848 * all checks are omitted (eg its a parent of the current,
5849 * fixed, context).
5851 void
5852 TranslatorX64::emitKnownClassCheck(const NormalizedInstruction& i,
5853 const StringData* clsName,
5854 register_name_t reg) {
5855 using namespace TargetCache;
5856 ASSERT(clsName);
5857 Class* klass = Unit::lookupClass(clsName);
5858 bool guarded = false;
5859 if (klass) {
5860 guarded = i.guardedCls;
5861 if (!guarded && isContextFixed()) {
5862 Class *ctx = curFunc()->cls();
5863 if (ctx && ctx->classof(klass)) {
5864 guarded = true;
5868 if (guarded) {
5869 if (reg != reg::noreg) {
5870 emitImmReg(a, (uint64_t)klass, reg);
5872 } else {
5873 Stats::emitInc(a, Stats::TgtCache_KnownClsHit);
5874 CacheHandle ch = allocKnownClass(clsName);
5875 if (reg == reg::noreg) {
5876 a. cmp_imm32_disp_reg32(0, ch, rVmTl);
5877 } else {
5878 a. load_reg64_disp_reg64(rVmTl, ch, reg);
5879 a. test_reg64_reg64(reg, reg);
5882 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
5883 ScratchReg clsPtr(m_regMap);
5884 astubs. lea_reg64_disp_reg64(rVmTl, ch, *clsPtr);
5885 if (false) { // typecheck
5886 Class** cache = NULL;
5887 UNUSED Class* ret =
5888 TargetCache::lookupKnownClass<false>(cache, clsName, true);
5890 // We're only passing two arguments to lookupKnownClass because
5891 // the third is ignored in the checkOnly == false case
5892 EMIT_CALL(astubs, ((TargetCache::lookupKnownClass_func_t)
5893 TargetCache::lookupKnownClass<false>),
5894 R(*clsPtr), IMM((uintptr_t)clsName));
5895 recordReentrantStubCall(i);
5896 if (reg != reg::noreg) {
5897 emitMovRegReg(astubs, rax, reg);
5903 void
5904 TranslatorX64::emitStringToKnownClass(const NormalizedInstruction& i,
5905 const StringData* clsName) {
5906 ScratchReg cls(m_regMap);
5907 emitKnownClassCheck(i, clsName, *cls);
5908 m_regMap.bindScratch(cls, i.outStack->location, KindOfClass, RegInfo::DIRTY);
5911 void
5912 TranslatorX64::emitStringToClass(const NormalizedInstruction& i) {
5913 using namespace TargetCache;
5914 if (!i.inputs[kEmitClsLocalIdx]->rtt.valueString()) {
5915 // Handle the case where we don't know the name of the class
5916 // at translation time
5917 const Location& in = i.inputs[kEmitClsLocalIdx]->location;
5918 const Location& out = i.outStack->location;
5919 CacheHandle ch = ClassCache::alloc();
5920 if (false) {
5921 StringData *name = NULL;
5922 const UNUSED Class* cls = ClassCache::lookup(ch, name);
5924 TRACE(1, "ClassCache @ %d\n", int(ch));
5925 if (i.inputs[kEmitClsLocalIdx]->rtt.isVariant()) {
5926 EMIT_CALL(a, ClassCache::lookup,
5927 IMM(ch),
5928 DEREF(in));
5929 } else {
5930 EMIT_CALL(a, ClassCache::lookup,
5931 IMM(ch),
5932 V(in));
5934 recordReentrantCall(i);
5935 m_regMap.bind(rax, out, KindOfClass, RegInfo::DIRTY);
5936 return;
5938 // We know the name of the class at translation time; use the
5939 // target cache associated with the name of the class
5940 const StringData* clsName = i.inputs[kEmitClsLocalIdx]->rtt.valueString();
5941 emitStringToKnownClass(i, clsName);
5944 void
5945 TranslatorX64::emitObjToClass(const NormalizedInstruction& i) {
5946 m_regMap.allocOutputRegs(i);
5947 const Location& in = i.inputs[kEmitClsLocalIdx]->location;
5948 const Location& out = i.outStack->location;
5949 PhysReg src = getReg(in);
5950 ScratchReg tmp(m_regMap);
5951 if (i.inputs[kEmitClsLocalIdx]->rtt.isVariant()) {
5952 emitDeref(a, src, *tmp);
5953 src = *tmp;
5955 ASSERT(i.outStack->valueType() == KindOfClass);
5956 a. load_reg64_disp_reg64(src, ObjectData::getVMClassOffset(), getReg(out));
5959 void
5960 TranslatorX64::emitClsAndPals(const NormalizedInstruction& ni) {
5961 if (ni.inputs[kEmitClsLocalIdx]->isString()) {
5962 emitStringToClass(ni);
5963 } else {
5964 emitObjToClass(ni);
5968 void
5969 TranslatorX64::analyzeAGetC(Tracelet& t, NormalizedInstruction& i) {
5970 ASSERT(i.inputs.size() == 1);
5971 ASSERT(i.outStack && !i.outLocal);
5972 ASSERT(i.outStack->valueType() == KindOfClass);
5973 const RuntimeType& rtt = i.inputs[0]->rtt;
5974 ASSERT(!rtt.isVariant());
5975 i.m_txFlags = supportedPlan(rtt.isString() ||
5976 rtt.valueType() == KindOfObject);
5977 if (rtt.isString() && rtt.valueString()) i.manuallyAllocInputs = true;
5980 void TranslatorX64::translateAGetC(const Tracelet& t,
5981 const NormalizedInstruction& ni) {
5982 if (ni.outStack) {
5983 emitClsAndPals(ni);
5987 void TranslatorX64::analyzeAGetL(Tracelet& t,
5988 NormalizedInstruction& ni) {
5989 ASSERT(ni.inputs.size() == 1);
5990 ASSERT(ni.inputs[0]->isLocal());
5991 const RuntimeType& rtt = ni.inputs[0]->rtt;
5992 ni.m_txFlags = supportedPlan(rtt.isString() ||
5993 rtt.valueType() == KindOfObject);
5996 void TranslatorX64::translateAGetL(const Tracelet& t,
5997 const NormalizedInstruction& ni) {
5998 emitClsAndPals(ni);
6001 void TranslatorX64::translateSelf(const Tracelet& t,
6002 const NormalizedInstruction& i) {
6003 m_regMap.allocOutputRegs(i);
6004 PhysReg tmp = getReg(i.outStack->location);
6005 ASSERT(isContextFixed() && curFunc()->cls());
6006 emitImmReg(a, (int64_t)curFunc()->cls(), tmp);
6009 void TranslatorX64::translateParent(const Tracelet& t,
6010 const NormalizedInstruction& i) {
6011 m_regMap.allocOutputRegs(i);
6012 PhysReg tmp = getReg(i.outStack->location);
6013 ASSERT(isContextFixed() && curFunc()->cls() && curFunc()->cls()->parent());
6014 emitImmReg(a, (int64_t)curFunc()->cls()->parent(), tmp);
6017 void TranslatorX64::analyzeSelf(Tracelet& t,NormalizedInstruction& i) {
6018 Class* clss = curClass();
6019 if (clss == NULL) {
6020 i.m_txFlags = Interp;
6021 return;
6023 i.m_txFlags = Supported;
6026 void TranslatorX64::analyzeParent(Tracelet& t,NormalizedInstruction& i) {
6027 Class* clss = curClass();
6028 if (clss == NULL) {
6029 i.m_txFlags = Interp;
6030 return;
6032 if (clss->parent() == NULL) {
6033 // clss has no parent; interpret to throw fatal
6034 i.m_txFlags = Interp;
6035 return;
6037 i.m_txFlags = Supported;
6040 void TranslatorX64::translateDup(const Tracelet& t,
6041 const NormalizedInstruction& ni) {
6042 ASSERT(ni.inputs.size() == 1);
6043 ASSERT(ni.outStack);
6044 ASSERT(!ni.inputs[0]->rtt.isVariant());
6045 m_regMap.allocOutputRegs(ni);
6046 PhysReg outR = getReg(ni.outStack->location);
6047 emitMovRegReg(a, getReg(ni.inputs[0]->location), outR);
6048 emitIncRef(outR, ni.inputs[0]->outerType());
6051 typedef std::map<int, int> ParamMap;
6053 * mapContParams determines if every named local in origFunc has a
6054 * corresponding named local in genFunc. If this step succeeds and
6055 * there's no VarEnv at runtime, the continuation's variables can be
6056 * filled completely inline in the TC (assuming there aren't too
6057 * many).
6059 bool TranslatorX64::mapContParams(ParamMap& map,
6060 const Func* origFunc, const Func* genFunc) {
6061 const StringData* const* varNames = origFunc->localNames();
6062 for (Id i = 0; i < origFunc->numNamedLocals(); ++i) {
6063 Id id = genFunc->lookupVarId(varNames[i]);
6064 if (id != kInvalidId) {
6065 map[i] = id;
6066 } else {
6067 return false;
6070 return true;
6073 void TranslatorX64::emitCallFillCont(X64Assembler& a,
6074 const Func* orig,
6075 const Func* gen) {
6076 if (false) {
6077 ActRec* fp = NULL;
6078 c_Continuation *cont = NULL;
6079 cont =
6080 VMExecutionContext::fillContinuationVars(fp, orig, gen, cont);
6082 EMIT_CALL(a,
6083 VMExecutionContext::fillContinuationVars,
6084 R(rVmFp),
6085 IMM((intptr_t)orig),
6086 IMM((intptr_t)gen),
6087 R(rax));
6090 void TranslatorX64::translateCreateCont(const Tracelet& t,
6091 const NormalizedInstruction& i) {
6092 bool getArgs = i.imm[0].u_IVA;
6093 const StringData* genName = curUnit()->lookupLitstrId(i.imm[1].u_SA);
6094 const Func* origFunc = curFunc();
6095 const Func* genFunc = origFunc->getGeneratorBody(genName);
6097 if (false) {
6098 ActRec* fp = NULL;
6099 UNUSED c_Continuation* cont =
6100 VMExecutionContext::createContinuation<true>(fp, getArgs, origFunc,
6101 genFunc);
6102 VMExecutionContext::createContinuation<false>(fp, getArgs, origFunc,
6103 genFunc);
6106 // Even callee-saved regs need to be clean, because
6107 // createContinuation will read all locals.
6108 m_regMap.cleanAll();
6109 auto helper = origFunc->isNonClosureMethod() ?
6110 VMExecutionContext::createContinuation<true> :
6111 VMExecutionContext::createContinuation<false>;
6112 EMIT_CALL(a,
6113 (TCA)helper,
6114 R(rVmFp),
6115 IMM(getArgs),
6116 IMM((intptr_t)origFunc),
6117 IMM((intptr_t)genFunc));
6118 ScratchReg holdRax(m_regMap, rax);
6120 int origLocals = origFunc->numNamedLocals();
6121 int genLocals = genFunc->numNamedLocals() - 1;
6122 ContParamMap params;
6123 if (origLocals <= kMaxInlineContLocals &&
6124 mapContParams(params, origFunc, genFunc)) {
6125 ScratchReg rScratch(m_regMap);
6126 a. load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rScratch);
6127 a. test_reg64_reg64(*rScratch, *rScratch);
6128 DiamondReturn astubsRet;
6130 UnlikelyIfBlock<CC_NZ> ifVarEnv(a, astubs, &astubsRet);
6131 Stats::emitInc(astubs, Stats::Tx64_ContCreateSlow);
6132 emitCallFillCont(astubs, origFunc, genFunc);
6134 // fillContinuationVars returned the continuation in rax and
6135 // DiamondGuard marked rax as scratch again, so it's safe to keep
6136 // using it
6137 Stats::emitInc(a, Stats::Tx64_ContCreateFast);
6138 static const StringData* thisStr = StringData::GetStaticString("this");
6139 Id thisId = kInvalidId;
6140 bool fillThis = origFunc->isNonClosureMethod() && !origFunc->isStatic() &&
6141 ((thisId = genFunc->lookupVarId(thisStr)) != kInvalidId) &&
6142 (origFunc->lookupVarId(thisStr) == kInvalidId);
6143 ScratchReg rDest(m_regMap);
6144 if (origLocals > 0 || fillThis) {
6145 a.lea_reg64_disp_reg64(rax,
6146 c_Continuation::localsOffset(),
6147 *rDest);
6149 for (int i = 0; i < origLocals; ++i) {
6150 ASSERT(mapContains(params, i));
6151 int destOff = cellsToBytes(genLocals - params[i]);
6152 emitCopyTo(a, rVmFp, localOffset(i), *rDest, destOff, *rScratch);
6153 emitIncRefGenericRegSafe(*rDest, destOff, *rScratch);
6156 // Deal with a potential $this local in the generator body
6157 if (fillThis) {
6158 ASSERT(thisId != kInvalidId);
6159 a.load_reg64_disp_reg64(rax, CONTOFF(m_obj), *rScratch);
6160 a.test_reg64_reg64(*rScratch, *rScratch);
6162 JccBlock<CC_Z> ifObj(a);
6163 const int thisOff = cellsToBytes(genLocals - thisId);
6164 // We don't have to check for a static refcount since we
6165 // know it's an Object
6166 a.add_imm32_disp_reg32(1, TVOFF(_count), *rScratch);
6167 a.store_reg64_disp_reg64(*rScratch, thisOff + TVOFF(m_data), *rDest);
6168 a.store_imm32_disp_reg(KindOfObject, thisOff + TVOFF(m_type), *rDest);
6171 } else {
6172 Stats::emitInc(a, Stats::Tx64_ContCreateSlow);
6173 emitCallFillCont(a, origFunc, genFunc);
6175 m_regMap.bindScratch(holdRax, i.outStack->location, KindOfObject,
6176 RegInfo::DIRTY);
6179 void TranslatorX64::emitCallUnpack(X64Assembler& a,
6180 const NormalizedInstruction& i,
6181 int nCopy) {
6182 const int contIdx = 0;
6184 if (false) {
6185 c_Continuation* cont = NULL;
6186 TypedValue* dest = NULL;
6187 VMExecutionContext::unpackContinuation(cont, dest);
6189 EMIT_CALL(a,
6190 VMExecutionContext::unpackContinuation,
6191 V(i.inputs[contIdx]->location),
6192 A(Location(Location::Local, nCopy)));
6193 recordCall(a, i);
6196 void TranslatorX64::translateUnpackCont(const Tracelet& t,
6197 const NormalizedInstruction& i) {
6198 const int contIdx = 0;
6199 ASSERT(curFrame()->m_varEnv == NULL);
6200 ASSERT(i.inputs.size() == 1);
6201 ASSERT(i.inputs[contIdx]->location == Location(Location::Local, 0));
6202 ASSERT(i.outStack->outerType() == KindOfInt64);
6203 int nCopy = curFunc()->numNamedLocals() - 1;
6205 for (int loc = 1; loc <= nCopy; ++loc) {
6206 // We're at the beginning of the function. The only local in a
6207 // register should be local 0, our input
6208 ASSERT(!m_regMap.hasReg(Location(Location::Local, loc)));
6210 if (nCopy > kMaxInlineContLocals) {
6211 Stats::emitInc(a, Stats::Tx64_ContUnpackSlow);
6212 emitCallUnpack(a, i, nCopy);
6213 m_regMap.bind(rax, i.outStack->location, KindOfInt64,
6214 RegInfo::DIRTY);
6215 return;
6218 PhysReg rCont = getReg(i.inputs[contIdx]->location);
6219 ScratchReg rLabel(m_regMap);
6221 a. test_imm32_disp_reg32(0x1, CONTOFF(m_hasExtraVars), rCont);
6222 DiamondReturn astubsRet;
6224 UnlikelyIfBlock<CC_NZ> hasVars(a, astubs, &astubsRet);
6225 Stats::emitInc(astubs, Stats::Tx64_ContUnpackSlow);
6226 emitCallUnpack(astubs, i, nCopy);
6227 emitMovRegReg(astubs, rax, *rLabel);
6229 Stats::emitInc(a, Stats::Tx64_ContUnpackFast);
6231 a. load_reg64_disp_reg64(rCont, CONTOFF(m_label), *rLabel);
6232 ScratchReg rScratch(m_regMap);
6233 ScratchReg rSrc(m_regMap);
6234 ScratchReg rZero(m_regMap);
6235 if (nCopy > 0) {
6236 a. lea_reg64_disp_reg64(rCont,
6237 c_Continuation::localsOffset(),
6238 *rSrc);
6239 emitImmReg(a, 0, *rZero);
6241 for (int srcOff = 0, destOff = localOffset(nCopy);
6242 srcOff < (int)cellsToBytes(nCopy);
6243 srcOff += sizeof(Cell), destOff += sizeof(Cell)) {
6244 emitCopyTo(a, *rSrc, srcOff, rVmFp, destOff, *rScratch);
6245 a. store_reg32_disp_reg64(*rZero, srcOff + TVOFF(m_type), *rSrc);
6248 m_regMap.bindScratch(rLabel, i.outStack->location, KindOfInt64,
6249 RegInfo::DIRTY);
6252 void TranslatorX64::emitCallPack(X64Assembler& a,
6253 const NormalizedInstruction& i,
6254 int nCopy) {
6255 const int valIdx = 0;
6256 const int contIdx = 1;
6258 // packContinuation is going to read values directly from the stack
6259 // so we have to clean everything.
6260 m_regMap.cleanAll();
6261 if (false) {
6262 c_Continuation* cont = NULL;
6263 TypedValue* tv = NULL;
6264 ActRec* fp = NULL;
6265 int label = 0;
6266 VMExecutionContext::packContinuation(cont, fp, tv, label);
6268 EMIT_CALL(a,
6269 VMExecutionContext::packContinuation,
6270 V(i.inputs[contIdx]->location),
6271 R(rVmFp),
6272 A(i.inputs[valIdx]->location),
6273 IMM(i.imm[0].u_IVA));
6274 recordCall(a, i);
6275 m_regMap.invalidateLocals(1, nCopy);
6278 void TranslatorX64::translatePackCont(const Tracelet& t,
6279 const NormalizedInstruction& i) {
6280 const int valIdx = 0;
6281 const int contIdx = 1;
6283 int nCopy = curFunc()->numNamedLocals() - 1;
6284 if (nCopy > kMaxInlineContLocals) {
6285 Stats::emitInc(a, Stats::Tx64_ContPackSlow);
6286 emitCallPack(a, i, nCopy);
6287 emitDecRefInput(a, i, valIdx);
6288 return;
6291 ScratchReg rScratch(m_regMap);
6292 a. load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rScratch);
6293 a. test_reg64_reg64(*rScratch, *rScratch);
6294 DiamondReturn astubsRet;
6296 // TODO: Task #1132976: We can probably prove that this is impossible in
6297 // most cases using information from hphpc
6298 UnlikelyIfBlock<CC_NZ> varEnv(a, astubs, &astubsRet);
6299 Stats::emitInc(astubs, Stats::Tx64_ContPackSlow);
6300 emitCallPack(astubs, i, nCopy);
6301 emitDecRefInput(astubs, i, valIdx);
6303 Stats::emitInc(a, Stats::Tx64_ContPackFast);
6305 PhysReg rCont = getReg(i.inputs[contIdx]->location);
6306 ScratchReg rDest(m_regMap);
6307 ScratchReg rZero(m_regMap);
6308 if (nCopy > 0) {
6309 a. lea_reg64_disp_reg64(rCont,
6310 c_Continuation::localsOffset(),
6311 *rDest);
6312 emitImmReg(a, 0, *rZero);
6314 for (int idx = nCopy, destOff = 0, srcOff = localOffset(nCopy);
6315 idx > 0;
6316 --idx, destOff += sizeof(Cell), srcOff += sizeof(Cell)) {
6317 Location loc(Location::Local, idx);
6318 if (m_regMap.hasReg(loc)) {
6319 PhysReg reg = getReg(loc);
6320 spillTo(m_regMap.getInfo(reg)->m_type, reg, true, *rDest, destOff);
6321 } else {
6322 emitCopyTo(a, rVmFp, srcOff, *rDest, destOff, *rScratch);
6324 m_regMap.invalidate(loc);
6325 a. store_reg32_disp_reg64(*rZero, srcOff + TVOFF(m_type), rVmFp);
6328 // We're moving our reference to the value from the stack to the
6329 // continuation object, so we don't have to incRef or decRef
6330 Location valLoc = i.inputs[valIdx]->location;
6331 emitTvSet(i, getReg(valLoc), i.inputs[valIdx]->outerType(), rCont,
6332 CONTOFF(m_value), false);
6334 emitImmReg(a, i.imm[0].u_IVA, *rScratch);
6335 a. store_reg64_disp_reg64(*rScratch, CONTOFF(m_label), rCont);
6338 static void continuationRaiseHelper(c_Continuation* cont) {
6339 cont->t_raised();
6340 not_reached();
6343 void TranslatorX64::emitContRaiseCheck(X64Assembler& a,
6344 const NormalizedInstruction& i) {
6345 const int contIdx = 0;
6346 ASSERT(i.inputs[contIdx]->location == Location(Location::Local, 0));
6347 PhysReg rCont = getReg(i.inputs[contIdx]->location);
6348 a. test_imm32_disp_reg32(0x1, CONTOFF(m_should_throw), rCont);
6350 UnlikelyIfBlock<CC_NZ> ifThrow(a, astubs);
6351 if (false) {
6352 c_Continuation* c = NULL;
6353 continuationRaiseHelper(c);
6355 EMIT_CALL(astubs,
6356 continuationRaiseHelper,
6357 R(rCont));
6358 recordReentrantStubCall(i);
6359 translator_not_reached(astubs);
6363 void TranslatorX64::translateContReceive(const Tracelet& t,
6364 const NormalizedInstruction& i) {
6365 const int contIdx = 0;
6366 emitContRaiseCheck(a, i);
6367 ScratchReg rScratch(m_regMap);
6368 a. lea_reg64_disp_reg64(getReg(i.inputs[contIdx]->location),
6369 CONTOFF(m_received), *rScratch);
6370 emitIncRefGeneric(*rScratch, 0);
6371 emitCopyToStack(a, i, *rScratch, -1 * (int)sizeof(Cell));
6374 void TranslatorX64::translateContRaised(const Tracelet& t,
6375 const NormalizedInstruction& i) {
6376 emitContRaiseCheck(a, i);
6379 void TranslatorX64::translateContDone(const Tracelet& t,
6380 const NormalizedInstruction& i) {
6381 const int contIdx = 0;
6382 a. store_imm8_disp_reg(0x1, CONTOFF(m_done),
6383 getReg(i.inputs[contIdx]->location));
6386 static void contPreNextThrowHelper(c_Continuation* c) {
6387 c->preNext();
6388 not_reached();
6391 void TranslatorX64::emitContPreNext(const NormalizedInstruction& i,
6392 ScratchReg& rCont) {
6393 const Offset doneOffset = CONTOFF(m_done);
6394 CT_ASSERT((doneOffset + 1) == CONTOFF(m_running));
6395 // Check m_done and m_running at the same time
6396 a. test_imm32_disp_reg32(0x0101, doneOffset, *rCont);
6398 UnlikelyIfBlock<CC_NZ> ifThrow(a, astubs);
6399 EMIT_CALL(astubs, contPreNextThrowHelper, R(*rCont));
6400 recordReentrantStubCall(i);
6401 translator_not_reached(astubs);
6404 // ++m_index
6405 a. add_imm64_disp_reg64(0x1, CONTOFF(m_index), *rCont);
6406 // m_running = true
6407 a. store_imm8_disp_reg(0x1, CONTOFF(m_running), *rCont);
6410 void TranslatorX64::translateContNext(const Tracelet& t,
6411 const NormalizedInstruction& i) {
6412 ScratchReg rCont(m_regMap);
6413 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6414 emitContPreNext(i, rCont);
6416 // m_received.setNull()
6417 emitTvSet(i, reg::noreg, KindOfNull, *rCont, CONTOFF(m_received), false);
6420 static void contNextCheckThrowHelper(c_Continuation* cont) {
6421 cont->startedCheck();
6422 not_reached();
6425 void TranslatorX64::emitContStartedCheck(const NormalizedInstruction& i,
6426 ScratchReg& rCont) {
6427 // if (m_index < 0)
6428 a. cmp_imm64_disp_reg64(0, CONTOFF(m_index), *rCont);
6430 UnlikelyIfBlock<CC_L> whoops(a, astubs);
6431 EMIT_CALL(astubs, contNextCheckThrowHelper, *rCont);
6432 recordReentrantStubCall(i);
6433 translator_not_reached(astubs);
6437 template<bool raise>
6438 void TranslatorX64::translateContSendImpl(const NormalizedInstruction& i) {
6439 const int valIdx = 0;
6440 ASSERT(i.inputs[valIdx]->location == Location(Location::Local, 0));
6442 ScratchReg rCont(m_regMap);
6443 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6444 emitContStartedCheck(i, rCont);
6445 emitContPreNext(i, rCont);
6447 // m_received = value
6448 PhysReg valReg = getReg(i.inputs[valIdx]->location);
6449 DataType valType = i.inputs[valIdx]->outerType();
6450 emitTvSet(i, valReg, valType, *rCont, CONTOFF(m_received), true);
6452 // m_should_throw = true (maybe)
6453 if (raise) {
6454 a. store_imm8_disp_reg(0x1, CONTOFF(m_should_throw), *rCont);
6458 void TranslatorX64::translateContSend(const Tracelet& t,
6459 const NormalizedInstruction& i) {
6460 translateContSendImpl<false>(i);
6463 void TranslatorX64::translateContRaise(const Tracelet& t,
6464 const NormalizedInstruction& i) {
6465 translateContSendImpl<true>(i);
6468 void TranslatorX64::translateContValid(const Tracelet& t,
6469 const NormalizedInstruction& i) {
6470 ScratchReg rCont(m_regMap);
6471 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6473 m_regMap.allocOutputRegs(i);
6474 PhysReg validReg = getReg(i.outStack->location);
6475 // !m_done
6476 a. loadzxb_reg64_disp_reg64(*rCont, CONTOFF(m_done), validReg);
6477 a. xor_imm32_reg64(0x1, validReg);
6480 void TranslatorX64::translateContCurrent(const Tracelet& t,
6481 const NormalizedInstruction& i) {
6482 ScratchReg rCont(m_regMap);
6483 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6484 emitContStartedCheck(i, rCont);
6486 a. lea_reg64_disp_reg64(*rCont, CONTOFF(m_value), *rCont);
6487 emitIncRefGeneric(*rCont, 0);
6488 emitCopyToStack(a, i, *rCont, -1 * (int)sizeof(Cell));
6491 void TranslatorX64::translateContStopped(const Tracelet& t,
6492 const NormalizedInstruction& i) {
6493 ScratchReg rCont(m_regMap);
6494 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6495 a. store_imm8_disp_reg(0x0, CONTOFF(m_running), *rCont);
6498 void TranslatorX64::translateContHandle(const Tracelet& t,
6499 const NormalizedInstruction& i) {
6500 // Always interpreted
6501 not_reached();
6504 static void analyzeClassExistsImpl(NormalizedInstruction& i) {
6505 const int nameIdx = 1;
6506 const int autoIdx = 0;
6507 ASSERT(!i.inputs[nameIdx]->isVariant() && !i.inputs[autoIdx]->isVariant());
6508 i.m_txFlags = supportedPlan(i.inputs[nameIdx]->isString() &&
6509 i.inputs[autoIdx]->isBoolean());
6510 i.fuseBranch = (i.m_txFlags & Supported) &&
6511 i.inputs[nameIdx]->rtt.valueString() &&
6512 i.inputs[autoIdx]->rtt.valueBoolean() != RuntimeType::UnknownBool;
6515 void TranslatorX64::analyzeClassExists(Tracelet& t,
6516 NormalizedInstruction& i) {
6517 analyzeClassExistsImpl(i);
6520 void TranslatorX64::analyzeInterfaceExists(Tracelet& t,
6521 NormalizedInstruction& i) {
6522 analyzeClassExistsImpl(i);
6525 void TranslatorX64::analyzeTraitExists(Tracelet& t,
6526 NormalizedInstruction& i) {
6527 analyzeClassExistsImpl(i);
6530 static int64 classExistsSlow(const StringData* name, bool autoload,
6531 Attr typeAttr) {
6532 bool ret = Unit::classExists(name, autoload, typeAttr);
6533 // XXX: do we need to decref this during an exception?
6534 if (name->decRefCount() == 0) {
6535 const_cast<StringData*>(name)->release();
6537 return ret;
6540 void TranslatorX64::translateClassExistsImpl(const Tracelet& t,
6541 const NormalizedInstruction& i,
6542 Attr typeAttr) {
6543 const int nameIdx = 1;
6544 const int autoIdx = 0;
6545 const StringData* name = i.inputs[nameIdx]->rtt.valueString();
6546 ASSERT(IMPLIES(name, name->isStatic()));
6547 const int autoload = i.inputs[autoIdx]->rtt.valueBoolean();
6549 ScratchReg scratch(m_regMap);
6550 if (name != NULL && autoload != RuntimeType::UnknownBool) {
6551 ASSERT(i.fuseBranch);
6552 const Attr attrNotClass = Attr(AttrTrait | AttrInterface);
6553 const bool isClass = typeAttr == AttrNone;
6554 using namespace TargetCache;
6555 Stats::emitInc(a, Stats::Tx64_ClassExistsFast);
6556 CacheHandle ch = allocKnownClass(name);
6559 DiamondReturn astubsRet;
6560 a. load_reg64_disp_reg64(rVmTl, ch, *scratch);
6561 a. test_reg64_reg64(*scratch, *scratch);
6562 if (autoload) {
6563 UnlikelyIfBlock<CC_Z> ifNull(a, astubs, &astubsRet);
6564 if (false) {
6565 Class** c = NULL;
6566 UNUSED Class* ret = lookupKnownClass<true>(c, name, false);
6568 Stats::emitInc(astubs, Stats::TgtCache_ClassExistsMiss);
6569 // If the class exists after autoloading, the helper will
6570 // return the Class's flags. Otherwise, it will return a set
6571 // of flags such that our flag check at the join point below
6572 // will fail.
6573 EMIT_CALL(astubs, (lookupKnownClass_func_t)lookupKnownClass<true>,
6574 RPLUS(rVmTl, ch),
6575 IMM((uintptr_t)name),
6576 IMM(isClass));
6577 recordReentrantStubCall(i);
6578 emitMovRegReg(astubs, rax, *scratch);
6579 } else {
6580 UnlikelyIfBlock<CC_Z> ifNull(a, astubs, &astubsRet);
6581 // This isn't really a traditional slow path, count as a hit
6582 Stats::emitInc(astubs, Stats::TgtCache_ClassExistsHit);
6583 // Provide flags so the check back in a fails
6584 emitImmReg(astubs, isClass ? attrNotClass : AttrNone, *scratch);
6586 // If we don't take the slow/NULL path, load the Class's attrs
6587 // into *scratch to prepare for the flag check.
6588 Stats::emitInc(a, Stats::TgtCache_ClassExistsHit);
6589 a. load_reg64_disp_reg64(*scratch, Class::preClassOff(),
6590 *scratch);
6591 a. load_reg64_disp_reg32(*scratch, PreClass::attrsOffset(),
6592 *scratch);
6595 if (i.changesPC) {
6596 fuseBranchSync(t, i);
6598 a. test_imm32_reg32(isClass ? attrNotClass : typeAttr, *scratch);
6599 ConditionCode cc = isClass ? CC_Z : CC_NZ;
6600 if (i.changesPC) {
6601 fuseBranchAfterBool(t, i, cc);
6602 } else {
6603 a. setcc(cc, *scratch);
6604 a. mov_reg8_reg64_unsigned(*scratch, *scratch);
6605 m_regMap.bindScratch(scratch, i.outStack->location, KindOfBoolean,
6606 RegInfo::DIRTY);
6608 } else {
6609 ASSERT(!i.fuseBranch);
6610 Stats::emitInc(a, Stats::Tx64_ClassExistsSlow);
6611 if (false) {
6612 UNUSED bool ret = false;
6613 ret = classExistsSlow(name, ret, typeAttr);
6615 EMIT_CALL(a, classExistsSlow,
6616 V(i.inputs[nameIdx]->location),
6617 V(i.inputs[autoIdx]->location),
6618 IMM(typeAttr));
6619 recordReentrantCall(i);
6620 // Our helper decrefs the string
6621 m_regMap.bind(rax, i.outStack->location, KindOfBoolean, RegInfo::DIRTY);
6625 void TranslatorX64::translateClassExists(const Tracelet& t,
6626 const NormalizedInstruction& i) {
6627 translateClassExistsImpl(t, i, AttrNone);
6630 void TranslatorX64::translateInterfaceExists(const Tracelet& t,
6631 const NormalizedInstruction& i) {
6632 translateClassExistsImpl(t, i, AttrInterface);
6635 void TranslatorX64::translateTraitExists(const Tracelet& t,
6636 const NormalizedInstruction& i) {
6637 translateClassExistsImpl(t, i, AttrTrait);
6640 // Helper function for static property access. This function emits code
6641 // which leaves a pointer to the static property for clsInput::$propInput in
6642 // register scr. We destroy scr early on, yet do not consume inputs until
6643 // later, so scr must not alias an input register. This also handles
6644 // the decref for the case where prop is not a static string.
6645 void TranslatorX64::emitStaticPropInlineLookup(const NormalizedInstruction& i,
6646 const DynLocation& clsInput,
6647 const DynLocation& propInput,
6648 PhysReg scr) {
6649 const Class* cls = clsInput.rtt.valueClass();
6650 const StringData* propName = propInput.rtt.valueString();
6651 using namespace TargetCache;
6652 CacheHandle ch;
6654 ASSERT(cls && propName);
6655 // Use the uniquely known cls / prop to generate a single cache per prop
6656 const StringData* clsName = cls->preClass()->name();
6657 string sds(Util::toLower(clsName->data()) + ":" +
6658 string(propName->data(), propName->size()));
6659 StringData sd(sds.c_str(), sds.size(), AttachLiteral);
6660 ch = SPropCache::alloc(&sd);
6661 SKTRACE(1, i.source, "SPropInlineLookup %s %d\n", sd.data(), int(ch));
6663 Stats::emitInc(a, Stats::TgtCache_SPropHit);
6665 // For the simple case of statically known class and prop name, we inline
6666 // the target cache lookup, and outline the miss case.
6667 // Load the TV pointer out of the thread-private tl_targetCaches.
6668 BOOST_STATIC_ASSERT((offsetof(SPropCache, m_tv) == 0));
6669 a. load_reg64_disp_reg64(rVmTl, ch, scr);
6670 a. test_reg64_reg64(scr, scr);
6672 // Call the slow path.
6674 UnlikelyIfBlock<CC_Z> shucks(a, astubs);
6676 // Precondition for this lookup - we don't need to pass the preClass,
6677 // as we only translate in class lookups.
6678 ASSERT(cls == curFunc()->cls());
6679 if (false) { // typecheck
6680 StringData *data = NULL;
6681 SPropCache::lookup(ch, cls, data);
6684 EMIT_CALL(astubs, (TCA)SPropCache::lookup,
6685 IMM(ch), V(clsInput.location), IMM(uint64_t(propName)));
6686 recordReentrantStubCall(i);
6687 emitMovRegReg(astubs, rax, scr);
6689 // We're consuming the name as input, but it is static, no decref needed
6690 ASSERT(propInput.rtt.valueString()->isStatic());
6691 // astubs. jmp(a.code.frontier); -- implicit
6695 void TranslatorX64::analyzeCGetS(Tracelet& t, NormalizedInstruction& i) {
6696 ASSERT(i.inputs.size() == 2);
6697 ASSERT(i.inputs[0]->valueType() == KindOfClass);
6698 ASSERT(i.outStack);
6699 const Class* cls = i.inputs[0]->rtt.valueClass();
6700 const StringData* propName = i.inputs[1]->rtt.valueString();
6701 i.m_txFlags = supportedPlan(cls && propName && isContextFixed() &&
6702 curFunc()->cls() == cls);
6705 void TranslatorX64::translateCGetS(const Tracelet& t,
6706 const NormalizedInstruction& i) {
6707 const int kClassIdx = 0;
6708 const int kPropIdx = 1;
6710 ScratchReg sprop(m_regMap);
6711 emitStaticPropInlineLookup(i, *i.inputs[kClassIdx],
6712 *i.inputs[kPropIdx], *sprop);
6713 emitDerefIfVariant(a, *sprop);
6714 emitIncRefGeneric(*sprop, 0);
6715 // Finally copy the thing to the stack
6716 int stackDest = 2 * sizeof(Cell) - sizeof(Cell); // popped - pushed
6717 emitCopyToStack(a, i, *sprop, stackDest);
6720 void TranslatorX64::analyzeSetS(Tracelet& t, NormalizedInstruction& i) {
6721 ASSERT(i.inputs.size() == 3);
6722 ASSERT(i.inputs[1]->valueType() == KindOfClass);
6723 ASSERT(i.outStack);
6724 const Class* cls = i.inputs[1]->rtt.valueClass();
6725 const StringData* propName = i.inputs[2]->rtt.valueString();
6726 // XXX Need to check isContextFixed
6727 // Might be able to broaden this: if cls is an ancestor of the current context,
6728 // the context is Fixed, and the property is not private
6729 // Also if the m_hoistable in cls is set to AlwaysHoistable, defined in
6730 // the same unit as context, and the property is public
6731 i.m_txFlags = supportedPlan(cls && propName && isContextFixed() &&
6732 curFunc()->cls() == cls);
6735 void TranslatorX64::translateSetS(const Tracelet& t,
6736 const NormalizedInstruction& i) {
6737 const int kClassIdx = 1;
6739 ScratchReg sprop(m_regMap);
6740 const RuntimeType& rhsType = i.inputs[0]->rtt;
6741 emitStaticPropInlineLookup(i, *i.inputs[kClassIdx], *i.inputs[2], *sprop);
6743 ASSERT(m_regMap.getInfo(*sprop)->m_state == RegInfo::SCRATCH);
6744 ASSERT(!rhsType.isVariant());
6746 m_regMap.allocOutputRegs(i);
6747 PhysReg rhsReg = getReg(i.inputs[0]->location);
6748 PhysReg outReg = getReg(i.outStack->location);
6749 emitTvSet(i, rhsReg, rhsType.outerType(), *sprop);
6750 ASSERT(i.inputs[2]->location == i.outStack->location);
6751 emitMovRegReg(rhsReg, outReg);
6754 void TranslatorX64::analyzeSetG(Tracelet& t, NormalizedInstruction& i) {
6755 ASSERT(i.inputs.size() == 2);
6756 i.m_txFlags = supportedPlan(
6757 i.inputs[1]->isString() &&
6758 !i.inputs[0]->isVariant()
6760 if (i.m_txFlags) i.manuallyAllocInputs = true;
6763 void TranslatorX64::translateSetG(const Tracelet& t,
6764 const NormalizedInstruction& i) {
6765 ASSERT(i.outStack && !i.outLocal);
6766 ASSERT(i.inputs.size() == 2);
6767 ASSERT(i.inputs[1]->isString());
6768 ASSERT(i.inputs[1]->location == i.outStack->location);
6770 const DataType type = i.inputs[0]->rtt.outerType();
6773 * Grab the global from the target cache; rax will get a pointer to
6774 * the TypedValue in the globals array, maybe newly created as a
6775 * null.
6777 emitGetGlobal(i, 1, true /* allowCreate */);
6778 ScratchReg raxSaver(m_regMap, rax);
6779 m_regMap.allocInputReg(i, 0);
6780 PhysReg src = getReg(i.inputs[0]->location);
6781 m_regMap.allocOutputRegs(i);
6782 PhysReg out = getReg(i.outStack->location);
6784 emitTvSet(i, src, type, rax);
6785 emitMovRegReg(src, out);
6788 static TypedValue* lookupGlobal(StringData* name) {
6789 VarEnv* ve = g_vmContext->m_globalVarEnv;
6790 TypedValue* r = ve->lookup(name);
6791 // If the global didn't exist, we need to leave name un-decref'd for
6792 // the caller to raise warnings.
6793 if (r) {
6794 LITSTR_DECREF(name);
6795 if (r->m_type == KindOfRef) r = r->m_data.pref->tv();
6797 return r;
6800 static TypedValue* lookupAddGlobal(StringData* name) {
6801 VarEnv* ve = g_vmContext->m_globalVarEnv;
6802 TypedValue* r = ve->lookupAdd(name);
6803 if (r->m_type == KindOfRef) r = r->m_data.pref->tv();
6804 LITSTR_DECREF(name);
6805 return r;
6809 * Look up a global in the TargetCache with the name
6810 * i.inputs[nameIdx]. If `allowCreate' is true, also creates it. If
6811 * we don't create the global, the input name is not decref'd yet.
6813 void
6814 TranslatorX64::emitGetGlobal(const NormalizedInstruction& i, int nameIdx,
6815 bool allowCreate) {
6816 using namespace TargetCache;
6817 ASSERT(i.inputs.size() > size_t(nameIdx));
6818 ASSERT(i.inputs[nameIdx]->isString());
6820 const StringData *maybeName = i.inputs[nameIdx]->rtt.valueString();
6821 if (!maybeName) {
6822 m_regMap.allocInputReg(i, nameIdx, argNumToRegName[0]);
6823 // Always do a lookup when there's no statically-known name.
6824 // There's not much we can really cache here right now anyway.
6825 EMIT_CALL(a, allowCreate ? lookupAddGlobal : lookupGlobal,
6826 V(i.inputs[nameIdx]->location));
6827 recordCall(i);
6828 return;
6831 CacheHandle ch = GlobalCache::alloc(maybeName);
6832 if (false) { // typecheck
6833 StringData* UNUSED key = NULL;
6834 TypedValue* UNUSED glob = GlobalCache::lookup(ch, key);
6835 TypedValue* UNUSED glob2 = GlobalCache::lookupCreate(ch, key);
6837 SKTRACE(1, i.source, "ch %d\n", ch);
6838 EMIT_CALL(a, allowCreate ? GlobalCache::lookupCreate
6839 : GlobalCache::lookup,
6840 IMM(ch),
6841 IMM((uint64_t)maybeName));
6842 recordCall(i);
6845 static bool
6846 isSupportedInstrCGetG(const NormalizedInstruction& i) {
6847 ASSERT(i.inputs.size() == 1);
6848 return (i.inputs[0]->rtt.isString());
6851 void
6852 TranslatorX64::analyzeCGetG(Tracelet& t, NormalizedInstruction& i) {
6853 i.m_txFlags = simplePlan(isSupportedInstrCGetG(i));
6854 if (i.m_txFlags) i.manuallyAllocInputs = true;
6857 void
6858 TranslatorX64::translateCGetG(const Tracelet& t,
6859 const NormalizedInstruction& i) {
6860 ASSERT(i.outStack && !i.outLocal);
6861 ASSERT(i.inputs.size() == 1);
6862 ASSERT(i.inputs[0]->isString());
6864 emitGetGlobal(i, 0, false /* allowCreate */);
6865 ScratchReg raxHolder(m_regMap, rax);
6867 // If non-null, rax now points to the in-memory location of the
6868 // object of unknown type. lookup() has already decref'd the name.
6869 a. test_reg64_reg64(rax, rax);
6870 DiamondReturn astubsRet;
6872 UnlikelyIfBlock<CC_Z> ifNotRax(a, astubs, &astubsRet);
6873 if (!i.inputs[0]->rtt.valueString()) {
6874 m_regMap.allocInputReg(i, 0);
6875 PhysReg reg = getReg(i.inputs[0]->location);
6876 emitDecRef(astubs, i, reg, BitwiseKindOfString);
6878 // TODO: if (MoreWarnings) raise a undefined variable warning.
6879 // (Note: when changing this remember to change the Simple flag to
6880 // Supported in analyze.)
6881 emitStoreNull(astubs, vstackOffset(i, 0), rVmSp);
6882 m_regMap.invalidate(i.outStack->location);
6885 emitCopyToStack(a, i, rax, 0);
6886 emitIncRefGeneric(rax, 0);
6887 m_regMap.invalidate(i.outStack->location);
6890 void TranslatorX64::analyzeFPassL(Tracelet& t,
6891 NormalizedInstruction& ni) {
6892 if (ni.preppedByRef) {
6893 analyzeVGetL(t, ni);
6894 } else {
6895 analyzeCGetL(t, ni);
6899 void TranslatorX64::translateFPassL(const Tracelet& t,
6900 const NormalizedInstruction& ni) {
6901 if (ni.preppedByRef) {
6902 translateVGetL(t, ni);
6903 } else {
6904 translateCGetL(t, ni);
6908 void TranslatorX64::analyzeFPassS(Tracelet& t,
6909 NormalizedInstruction& ni) {
6910 if (ni.preppedByRef) {
6911 // We need a VGetS translation.
6912 ni.m_txFlags = Interp;
6913 } else {
6914 analyzeCGetS(t, ni);
6918 void TranslatorX64::translateFPassS(const Tracelet& t,
6919 const NormalizedInstruction& ni) {
6920 if (ni.preppedByRef) {
6921 ASSERT(false);
6922 } else {
6923 translateCGetS(t, ni);
6927 void TranslatorX64::analyzeFPassG(Tracelet& t,
6928 NormalizedInstruction& ni) {
6929 if (ni.preppedByRef) {
6930 analyzeVGetG(t, ni);
6931 } else {
6932 analyzeCGetG(t, ni);
6936 void TranslatorX64::translateFPassG(const Tracelet& t,
6937 const NormalizedInstruction& ni) {
6938 if (ni.preppedByRef) {
6939 translateVGetG(t, ni);
6940 } else {
6941 translateCGetG(t, ni);
6945 void TranslatorX64::analyzeCheckTypeOp(Tracelet& t,
6946 NormalizedInstruction& ni) {
6947 ASSERT(ni.inputs.size() == 1);
6949 if (ni.op() == OpIsObjectL || ni.op() == OpIsObjectC) {
6950 // is_object is weird because it's supposed to return false for
6951 // things where ObjectData::isResource() is true. For now we only
6952 // translate when it is not an object.
6953 if (ni.inputs[0]->valueType() == KindOfObject) {
6954 ni.m_txFlags = Interp;
6955 return;
6959 if (ni.inputs[0]->isLocal()) {
6960 ni.manuallyAllocInputs = true;
6961 if (ni.op() != OpIssetL && ni.inputs[0]->rtt.isUninit()) {
6962 ni.m_txFlags = Supported;
6963 } else {
6964 ni.m_txFlags = Native;
6966 return;
6969 ni.m_txFlags = planHingesOnRefcounting(ni.inputs[0]->valueType());
6972 static bool checkTypeHelper(Opcode op, DataType dt) {
6973 switch (op) {
6974 case OpIssetL: return !IS_NULL_TYPE(dt);
6975 case OpIsNullL: case OpIsNullC: return IS_NULL_TYPE(dt);
6976 case OpIsStringL: case OpIsStringC: return IS_STRING_TYPE(dt);
6977 case OpIsArrayL: case OpIsArrayC: return IS_ARRAY_TYPE(dt);
6978 case OpIsIntL: case OpIsIntC: return IS_INT_TYPE(dt);
6979 case OpIsBoolL: case OpIsBoolC: return IS_BOOL_TYPE(dt);
6980 case OpIsDoubleL: case OpIsDoubleC: return IS_DOUBLE_TYPE(dt);
6982 case OpIsObjectL: case OpIsObjectC:
6983 // Note: this is because we refused to translate if it was
6984 // actually an object for now. (We'd need to emit some kind of
6985 // call to ObjectData::isResource or something.)
6986 return 0;
6988 ASSERT(false);
6989 NOT_REACHED();
6992 void
6993 TranslatorX64::translateCheckTypeOp(const Tracelet& t,
6994 const NormalizedInstruction& ni) {
6995 ASSERT(ni.inputs.size() == 1);
6996 ASSERT(ni.outStack);
6998 const DataType dt = ni.inputs[0]->valueType();
6999 const bool isLocalOp = ni.inputs[0]->isLocal();
7000 const bool isType =
7001 checkTypeHelper(ni.op(), ni.inputs[0]->valueType()) != ni.invertCond;
7002 const bool doUninit = isLocalOp &&
7003 ni.op() != OpIssetL &&
7004 ni.inputs[0]->rtt.isUninit();
7006 if (!isLocalOp) {
7007 emitDecRef(ni, getReg(ni.inputs[0]->location), dt);
7009 if (doUninit) {
7010 const StringData* name = local_name(ni.inputs[0]->location);
7011 ASSERT(name->isStatic());
7012 EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
7013 recordReentrantCall(ni);
7015 m_regMap.allocOutputRegs(ni);
7016 if (ni.changesPC) {
7017 // Don't bother driving an output reg. Just take the branch
7018 // where it leads.
7019 Stats::emitInc(a, Stats::Tx64_FusedTypeCheck);
7020 fuseBranchAfterStaticBool(t, ni, isType);
7021 return;
7023 Stats::emitInc(a, Stats::Tx64_UnfusedTypeCheck);
7024 emitImmReg(a, isType, getReg(ni.outStack->location));
7027 static void badArray() {
7028 throw_bad_type_exception("array_key_exists expects an array or an object; "
7029 "false returned.");
7032 static void badKey() {
7033 raise_warning("Array key should be either a string or an integer");
7036 static inline int64 ak_exist_string_helper(StringData* key, ArrayData* arr) {
7037 int64 n;
7038 if (key->isStrictlyInteger(n)) {
7039 return arr->exists(n);
7041 return arr->exists(StrNR(key));
7044 static int64 ak_exist_string(StringData* key, ArrayData* arr) {
7045 int64 res = ak_exist_string_helper(key, arr);
7046 if (arr->decRefCount() == 0) {
7047 arr->release();
7049 if (key->decRefCount() == 0) {
7050 key->release();
7052 return res;
7055 static int64 ak_exist_int(int64 key, ArrayData* arr) {
7056 bool res = arr->exists(key);
7057 if (arr->decRefCount() == 0) {
7058 arr->release();
7060 return res;
7063 static int64 ak_exist_string_obj(StringData* key, ObjectData* obj) {
7064 CArrRef arr = obj->o_toArray();
7065 int64 res = ak_exist_string_helper(key, arr.get());
7066 if (obj->decRefCount() == 0) {
7067 obj->release();
7069 if (key->decRefCount() == 0) {
7070 key->release();
7072 return res;
7075 static int64 ak_exist_int_obj(int64 key, ObjectData* obj) {
7076 CArrRef arr = obj->o_toArray();
7077 bool res = arr.get()->exists(key);
7078 if (obj->decRefCount() == 0) {
7079 obj->release();
7081 return res;
7084 void
7085 TranslatorX64::analyzeAKExists(Tracelet& t, NormalizedInstruction& i) {
7086 const int keyIx = 1;
7087 const int arrIx = 0;
7089 const DataType dta = i.inputs[arrIx]->valueType();
7090 const DataType dtk = i.inputs[keyIx]->valueType();
7092 bool reentrant = (dta != KindOfArray && dta != KindOfObject) ||
7093 (!IS_STRING_TYPE(dtk) && dtk != KindOfInt64 && dtk != KindOfNull);
7095 i.m_txFlags = reentrant ? Supported : Simple;
7096 i.manuallyAllocInputs = true;
7099 void
7100 TranslatorX64::translateAKExists(const Tracelet& t,
7101 const NormalizedInstruction& ni) {
7102 ASSERT(ni.inputs.size() == 2);
7103 ASSERT(ni.outStack);
7105 const int keyIx = 1;
7106 const int arrIx = 0;
7108 const DataType dta = ni.inputs[arrIx]->valueType();
7109 const DataType dtk = ni.inputs[keyIx]->valueType();
7110 TCA string_func = (TCA)ak_exist_string;
7111 TCA int_func = (TCA)ak_exist_int;
7113 int result = -1;
7114 int args[2];
7115 args[keyIx] = 0;
7116 args[arrIx] = 1;
7117 switch (dta) {
7118 case KindOfObject:
7119 string_func = (TCA)ak_exist_string_obj;
7120 int_func = (TCA)ak_exist_int_obj;
7121 case KindOfArray:
7122 switch (dtk) {
7123 case BitwiseKindOfString:
7124 case KindOfStaticString:
7125 case KindOfInt64: {
7126 allocInputsForCall(ni, args);
7127 PhysReg rk = getReg(ni.inputs[keyIx]->location);
7128 PhysReg ra = getReg(ni.inputs[arrIx]->location);
7129 m_regMap.scrubStackEntries(ni.outStack->location.offset);
7130 EMIT_CALL(a, dtk == KindOfInt64 ? int_func : string_func,
7131 R(rk), R(ra));
7132 recordCall(ni);
7133 break;
7135 case KindOfNull:
7136 if (dta == KindOfArray) {
7137 args[keyIx] = ArgDontAllocate;
7138 allocInputsForCall(ni, args);
7139 PhysReg ra = getReg(ni.inputs[arrIx]->location);
7140 m_regMap.scrubStackEntries(ni.outStack->location.offset);
7141 EMIT_CALL(a, string_func,
7142 IMM((uint64_t)empty_string.get()), R(ra));
7143 recordCall(ni);
7144 } else {
7145 result = ni.invertCond;
7147 break;
7148 default:
7149 EMIT_CALL(a, badKey);
7150 recordReentrantCall(ni);
7151 result = ni.invertCond;
7152 break;
7154 break;
7155 default:
7156 EMIT_CALL(a, badArray);
7157 recordReentrantCall(ni);
7158 result = ni.invertCond;
7159 break;
7162 if (result >= 0) {
7163 if (ni.changesPC) {
7164 fuseBranchAfterStaticBool(t, ni, result);
7165 return;
7166 } else {
7167 m_regMap.allocOutputRegs(ni);
7168 emitImmReg(a, result, getReg(ni.outStack->location));
7170 } else {
7171 ScratchReg res(m_regMap, rax);
7172 if (ni.changesPC) {
7173 fuseBranchSync(t, ni);
7174 a. test_reg64_reg64(*res, *res);
7175 fuseBranchAfterBool(t, ni, ni.invertCond ? CC_Z : CC_NZ);
7176 } else {
7177 if (ni.invertCond) {
7178 a. xor_imm32_reg64(1, *res);
7180 m_regMap.bindScratch(res, ni.outStack->location, KindOfBoolean,
7181 RegInfo::DIRTY);
7186 void
7187 TranslatorX64::analyzeSetOpL(Tracelet& t, NormalizedInstruction& i) {
7188 ASSERT(i.inputs.size() == 2);
7189 const SetOpOp subOp = SetOpOp(i.imm[1].u_OA);
7190 Opcode arithOp = setOpOpToOpcodeOp(subOp);
7191 i.m_txFlags = nativePlan(i.inputs[0]->isInt() &&
7192 i.inputs[1]->isInt() &&
7193 (arithOp == OpAdd || arithOp == OpSub ||
7194 arithOp == OpMul ||
7195 arithOp == OpBitAnd || arithOp == OpBitOr ||
7196 arithOp == OpBitXor));
7199 void
7200 TranslatorX64::translateSetOpL(const Tracelet& t,
7201 const NormalizedInstruction& i) {
7202 const vector<DynLocation*>& inputs = i.inputs;
7203 ASSERT(inputs.size() >= 2);
7204 ASSERT(i.outStack && i.outLocal);
7205 const int valIdx = 0;
7206 const int localIdx = 1;
7207 ASSERT(inputs[localIdx]->isLocal());
7208 ASSERT(inputs[valIdx]->isStack());
7209 ASSERT(inputs[valIdx]->outerType() != KindOfRef);
7211 const SetOpOp subOp = SetOpOp(i.imm[1].u_OA);
7212 Opcode arithOp = setOpOpToOpcodeOp(subOp);
7213 m_regMap.allocOutputRegs(i);
7214 binaryArithLocal(i, arithOp, *inputs[valIdx], *inputs[localIdx],
7215 *i.outStack);
7218 void
7219 TranslatorX64::analyzeIncDecL(Tracelet& t, NormalizedInstruction& i) {
7220 i.m_txFlags = nativePlan(i.inputs[0]->isInt());
7223 void
7224 TranslatorX64::translateIncDecL(const Tracelet& t,
7225 const NormalizedInstruction& i) {
7226 const vector<DynLocation*>& inputs = i.inputs;
7227 ASSERT(inputs.size() == 1);
7228 ASSERT(i.outLocal);
7229 ASSERT(inputs[0]->isLocal());
7230 const IncDecOp oplet = IncDecOp(i.imm[1].u_OA);
7231 ASSERT(oplet == PreInc || oplet == PostInc || oplet == PreDec ||
7232 oplet == PostDec);
7233 ASSERT(inputs[0]->isInt() && (!i.outStack || i.outStack->isInt()));
7234 bool post = (oplet == PostInc || oplet == PostDec);
7235 bool pre = !post;
7236 bool inc = (oplet == PostInc || oplet == PreInc);
7238 m_regMap.allocOutputRegs(i);
7239 PhysReg localVal = getReg(inputs[0]->location);
7240 if (i.outStack && post) { // $a++, $a--
7241 PhysReg output = getReg(i.outStack->location);
7242 emitMovRegReg(localVal, output);
7244 if (inc) {
7245 a. add_imm32_reg64(1, localVal);
7246 } else {
7247 a. sub_imm32_reg64(1, localVal);
7249 if (i.outStack && pre) { // --$a, ++$a
7250 PhysReg output = getReg(i.outStack->location);
7251 emitMovRegReg(localVal, output);
7255 void
7256 TranslatorX64::translateUnsetL(const Tracelet& t,
7257 const NormalizedInstruction& i) {
7258 ASSERT(i.inputs.size() == 1);
7259 ASSERT(!i.outStack && i.outLocal);
7260 const int locIdx = 0;
7261 const DynLocation& localDl = *i.inputs[locIdx];
7262 ASSERT(localDl.isLocal());
7264 // We have to mark the output register as dirty to ensure that
7265 // the type gets spilled at the tend of the tracelet
7266 m_regMap.allocOutputRegs(i);
7268 DataType type = localDl.outerType();
7269 // decRef the value that currently lives in the local if appropriate.
7270 emitDecRef(i, getReg(localDl.location), type);
7274 void
7275 TranslatorX64::analyzeReqLit(Tracelet& t, NormalizedInstruction& i,
7276 InclOpFlags flags) {
7277 ASSERT(i.inputs.size() == 1);
7278 Eval::PhpFile* efile = g_vmContext->lookupIncludeRoot(
7279 (StringData*)i.inputs[0]->rtt.valueString(),
7280 flags, NULL);
7281 i.m_txFlags = supportedPlan(i.inputs[0]->isString() &&
7282 i.inputs[0]->rtt.valueString() != NULL &&
7283 efile &&
7284 (RuntimeOption::RepoAuthoritative ||
7285 RuntimeOption::ServerStatCache));
7286 if (efile && efile->unit()->getMainReturn()->m_type != KindOfUninit) {
7287 i.outStack->rtt = RuntimeType(efile->unit()->getMainReturn()->m_type);
7290 // We don't need the reference lookupIncludeRoot made for us.
7291 if (efile) efile->decRef();
7292 i.manuallyAllocInputs = true;
7295 void
7296 TranslatorX64::analyzeReqDoc(Tracelet& t, NormalizedInstruction& i) {
7297 analyzeReqLit(t, i, InclOpDocRoot);
7300 void
7301 TranslatorX64::analyzeReqMod(Tracelet& t, NormalizedInstruction& i) {
7302 analyzeReqLit(t, i, InclOpDocRoot | InclOpLocal);
7305 void
7306 TranslatorX64::analyzeReqSrc(Tracelet& t, NormalizedInstruction& i) {
7307 analyzeReqLit(t, i, InclOpRelative | InclOpLocal);
7310 void
7311 TranslatorX64::translateReqLit(const Tracelet& t,
7312 const NormalizedInstruction& i,
7313 InclOpFlags flags) {
7314 bool local = flags & InclOpLocal;
7315 StringData *s = const_cast<StringData*>(i.inputs[0]->rtt.valueString());
7316 HPHP::Eval::PhpFile* efile =
7317 g_vmContext->lookupIncludeRoot(s, flags, NULL);
7319 * lookupIncludeRoot increments the refcount for us. This reference is
7320 * going to be burned into the translation cache. We will remove it only
7321 * when the file changes (via invalidateFile), and we're sure that no
7322 * outstanding requests are using the old code (via the Treadmill
7323 * module).
7325 TRACE(1, "lookupIncludeRoot: %s -> %p c %d\n", s->data(), efile,
7326 efile->getRef());
7328 * Remember that this tracelet (not just this instruction) now depends on the
7329 * contents of the required file.
7331 m_srcDB.recordDependency(efile, t.m_sk);
7332 Unit *unit = efile->unit();
7333 Func *func = unit->getMain();
7335 const Offset after = nextSrcKey(t, i).offset();
7336 TRACE(1, "requireHelper: efile %p offset %d%s\n", efile, after,
7337 i.skipSync ? " [skipsync]" : "");
7339 if (i.skipSync) {
7341 * getting here means there was nothing to do between
7342 * the previous req and this one. Any spill code we generate
7343 * here would be broken (because the rbx is wrong), so
7344 * verify that we don't generate anything...
7346 TCA s DEBUG_ONLY = a.code.frontier;
7347 syncOutputs(0);
7348 ASSERT(s == a.code.frontier);
7349 } else {
7350 syncOutputs(i);
7352 ReqLitStaticArgs* args = m_globalData.alloc<ReqLitStaticArgs>();
7353 emitImmReg(a, (uint64_t)args, argNumToRegName[0]);
7354 emitCall(a, (TCA)reqLitHelper, true);
7356 args->m_efile = efile;
7357 args->m_pseudoMain = emitServiceReq(false, REQ_BIND_REQUIRE, 3,
7358 uint64_t(args),
7359 uint64_t(func), uint64_t(func->base()));
7360 args->m_pcOff = after;
7361 args->m_local = local;
7363 if (i.breaksBB) {
7364 SrcKey fallThru(curFunc(), after);
7365 emitBindJmp(fallThru);
7366 } else {
7368 * When we get here, rVmSp points to the actual top of stack,
7369 * but the rest of this tracelet assumes that rVmSp is set to
7370 * the top of the stack at the beginning of the tracelet, so we
7371 * have to fix it up here.
7374 if (!i.outStack) {
7375 /* as a special case, if we're followed by a pop, and
7376 we return a non-refcounted type, and then followed
7377 by another require, we can avoid the add here and the sub
7378 in the following require
7380 } else {
7381 int delta = i.stackOff + getStackDelta(i);
7382 if (delta != 0) {
7383 // i.stackOff is in negative Cells, not bytes.
7384 a. add_imm64_reg64(cellsToBytes(delta), rVmSp);
7390 void
7391 TranslatorX64::translateReqDoc(const Tracelet& t,
7392 const NormalizedInstruction& i) {
7393 translateReqLit(t, i, InclOpDocRoot);
7396 void
7397 TranslatorX64::translateReqMod(const Tracelet& t,
7398 const NormalizedInstruction& i) {
7399 translateReqLit(t, i, InclOpDocRoot | InclOpLocal);
7402 void
7403 TranslatorX64::translateReqSrc(const Tracelet& t,
7404 const NormalizedInstruction& i) {
7405 translateReqLit(t, i, InclOpRelative | InclOpLocal);
7409 TranslatorX64::emitNativeTrampoline(TCA helperAddr) {
7410 if (!atrampolines.code.canEmit(m_trampolineSize)) {
7411 // not enough space to emit a trampoline, so just return the
7412 // helper address and emitCall will the emit the right sequence
7413 // to call it indirectly
7414 TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
7415 ASSERT(false);
7416 return helperAddr;
7418 uint32_t index = m_numNativeTrampolines++;
7419 TCA trampAddr = atrampolines.code.frontier;
7420 if (Stats::enabled()) {
7421 Stats::emitInc(atrampolines, &Stats::tl_helper_counters[0], index);
7422 Stats::helperNames[index] = Util::getNativeFunctionName(helperAddr);
7424 atrampolines.mov_imm64_reg((int64_t)helperAddr, reg::rax);
7425 atrampolines.jmp_reg(reg::rax);
7426 atrampolines.ud2();
7427 trampolineMap[helperAddr] = trampAddr;
7428 if (m_trampolineSize == 0) {
7429 m_trampolineSize = atrampolines.code.frontier - trampAddr;
7430 ASSERT(m_trampolineSize >= kMinPerTrampolineSize);
7432 recordBCInstr(OpNativeTrampoline, atrampolines, trampAddr);
7433 return trampAddr;
7437 TranslatorX64::getNativeTrampoline(TCA helperAddr) {
7438 if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
7439 return helperAddr;
7441 TCA trampAddr = (TCA)mapGet<PointerMap>(trampolineMap, helperAddr);
7442 if (trampAddr) {
7443 return trampAddr;
7445 return emitNativeTrampoline(helperAddr);
7447 void TranslatorX64::analyzeDefCls(Tracelet& t,
7448 NormalizedInstruction& i) {
7449 i.m_txFlags = Supported;
7452 static void defClsHelper(PreClass *preClass) {
7453 ASSERT(tl_regState == REGSTATE_DIRTY);
7454 tl_regState = REGSTATE_CLEAN;
7455 Unit::defClass(preClass);
7458 * m_defClsHelper sync'd the registers for us already. This means
7459 * if an exception propagates we want to leave things as
7460 * REGSTATE_CLEAN, since we're still in sync. Only set it to dirty
7461 * if we are actually returning to run in the TC again.
7463 tl_regState = REGSTATE_DIRTY;
7466 void TranslatorX64::translateDefCls(const Tracelet& t,
7467 const NormalizedInstruction& i) {
7468 int cid = i.imm[0].u_IVA;
7469 const Opcode* after = curUnit()->at(i.source.offset());
7470 PreClass* c = curFunc()->unit()->lookupPreClassId(cid);
7472 ASSERT(m_defClsHelper);
7475 compute the corrected stack ptr as a pseudo-param to m_defClsHelper
7476 which it will store in g_vmContext, in case of fatals, or __autoload
7478 m_regMap.cleanReg(rax);
7479 m_regMap.smashReg(rax);
7480 ScratchReg offset(m_regMap, rax);
7481 a. lea_reg64_disp_reg64(rVmSp, -cellsToBytes(i.stackOff), rax);
7483 EMIT_CALL(a, m_defClsHelper, IMM((uint64)c), IMM((uint64)after));
7486 void TranslatorX64::analyzeDefFunc(Tracelet& t,
7487 NormalizedInstruction& i) {
7488 i.m_txFlags = Supported;
7491 void defFuncHelper(Func *f) {
7492 f->setCached();
7495 void TranslatorX64::translateDefFunc(const Tracelet& t,
7496 const NormalizedInstruction& i) {
7497 int fid = i.imm[0].u_IVA;
7498 Func* f = curFunc()->unit()->lookupFuncId(fid);
7500 EMIT_CALL(a, defFuncHelper, IMM((uint64)f));
7501 recordReentrantCall(i);
7504 void
7505 TranslatorX64::analyzeFPushFunc(Tracelet& t, NormalizedInstruction& i) {
7506 ASSERT(i.inputs.size() >= 1);
7507 // The input might be an object implementing __invoke()
7508 i.m_txFlags = simplePlan(i.inputs[0]->isString());
7511 void
7512 TranslatorX64::translateFPushFunc(const Tracelet& t,
7513 const NormalizedInstruction& i) {
7514 using namespace TargetCache;
7515 CacheHandle ch = FuncCache::alloc();
7516 ASSERT(i.inputs.size() == 1);
7517 Location& inLoc = i.inputs[0]->location;
7519 m_regMap.allocOutputRegs(i);
7520 m_regMap.scrubStackRange(i.stackOff - 1,
7521 i.stackOff - 1 + kNumActRecCells);
7522 // Popped one cell, pushed an actrec
7523 int startOfActRec = int(sizeof(Cell)) - int(sizeof(ActRec));
7524 size_t funcOff = AROFF(m_func) + startOfActRec;
7525 size_t thisOff = AROFF(m_this) + startOfActRec;
7526 emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
7527 emitPushAR(i, NULL, sizeof(Cell) /* bytesPopped */);
7528 if (false) { // typecheck
7529 StringData sd("foo");
7530 const UNUSED Func* f = FuncCache::lookup(ch, &sd);
7532 SKTRACE(1, i.source, "ch %d\n", ch);
7533 EMIT_CALL(a, FuncCache::lookup, IMM(ch), V(inLoc));
7534 recordCall(i);
7535 emitVStackStore(a, i, rax, funcOff, sz::qword);
7538 void
7539 TranslatorX64::analyzeFPushClsMethodD(Tracelet& t, NormalizedInstruction& i) {
7540 i.m_txFlags = supportedPlan(isContextFixed());
7543 void
7544 TranslatorX64::translateFPushClsMethodD(const Tracelet& t,
7545 const NormalizedInstruction& i) {
7546 using namespace TargetCache;
7547 const StringData* meth = curUnit()->lookupLitstrId(i.imm[1].u_SA);
7548 const NamedEntityPair& np = curUnit()->lookupNamedEntityPairId(i.imm[2].u_SA);
7549 const StringData* cls = np.first;
7550 ASSERT(meth && meth->isStatic() &&
7551 cls && cls->isStatic());
7552 ASSERT(i.inputs.size() == 0);
7554 const Class* baseClass = Unit::lookupClass(np.second);
7555 bool magicCall = false;
7556 const Func* func = lookupImmutableMethod(baseClass, meth, magicCall,
7557 true /* staticLookup */);
7559 m_regMap.scrubStackRange(i.stackOff,
7560 i.stackOff + kNumActRecCells);
7562 int startOfActRec = -int(sizeof(ActRec));
7563 SKTRACE(2, i.source, "FPushClsMethodD %s :: %s\n",
7564 cls->data(), meth->data());
7566 size_t clsOff = AROFF(m_cls) + startOfActRec;
7567 if (func) {
7568 emitKnownClassCheck(i, cls, reg::noreg);
7569 Stats::emitInc(a, Stats::TgtCache_StaticMethodBypass);
7570 emitPushAR(i, func, 0 /*bytesPopped*/,
7571 false /* isCtor */, false /* clearThis */,
7572 magicCall ? uintptr_t(meth) | 1 : 0 /* varEnvInvName */);
7574 setupActRecClsForStaticCall(i, func, baseClass, clsOff, false);
7575 } else {
7576 Stats::emitInc(a, Stats::TgtCache_StaticMethodHit);
7577 CacheHandle ch = StaticMethodCache::alloc(cls, meth, getContextName());
7578 ScratchReg rFunc(m_regMap);
7579 a. load_reg64_disp_reg64(rVmTl, ch, *rFunc);
7580 a. test_reg64_reg64(*rFunc, *rFunc);
7581 // Unconditionally set rCls; if we miss, the miss path will
7582 // clean it up for us. Careful! Flags are live. The fill path
7583 // has already |'ed in the necessary 1.
7584 ScratchReg rCls(m_regMap);
7585 a. load_reg64_disp_reg64(rVmTl,
7586 ch + offsetof(StaticMethodCache, m_cls),
7587 *rCls);
7588 emitVStackStore(a, i, *rCls, clsOff);
7589 TCA stubsSkipRet;
7591 UnlikelyIfBlock<CC_Z> miss(a, astubs);
7592 if (false) { // typecheck
7593 const UNUSED Func* f = StaticMethodCache::lookup(ch, np.second,
7594 cls, meth);
7596 EMIT_CALL(astubs,
7597 StaticMethodCache::lookup,
7598 IMM(ch),
7599 IMM(int64(np.second)),
7600 IMM(int64(cls)),
7601 IMM(int64(meth)));
7602 recordReentrantStubCall(i);
7603 emitMovRegReg(astubs, rax, *rFunc);
7604 // NULL return means our work is done; see also
7605 // translateFPushClsMethodF.
7606 miss.reconcileEarly();
7607 astubs.test_reg64_reg64(*rFunc, *rFunc);
7608 stubsSkipRet = astubs.code.frontier;
7609 astubs.jz(a.code.frontier); // 1f to be patched later
7613 FreezeRegs ice(m_regMap);
7614 emitPushAR(i, NULL);
7615 size_t funcOff = AROFF(m_func) + startOfActRec;
7616 emitVStackStore(a, i, *rFunc, funcOff, sz::qword);
7618 // 1:
7619 astubs.patchJcc(stubsSkipRet, a.code.frontier);
7623 void
7624 TranslatorX64::analyzeFPushClsMethodF(Tracelet& t,
7625 NormalizedInstruction& i) {
7626 ASSERT(i.inputs[0]->valueType() == KindOfClass);
7627 i.m_txFlags = supportedPlan(
7628 i.inputs[1]->rtt.valueString() != NULL && // We know the method name
7629 i.inputs[0]->valueType() == KindOfClass &&
7630 i.inputs[0]->rtt.valueClass() != NULL && // We know the class name
7631 isContextFixed()
7635 void
7636 TranslatorX64::translateFPushClsMethodF(const Tracelet& t,
7637 const NormalizedInstruction& i) {
7638 using namespace TargetCache;
7639 ASSERT(isContextFixed());
7640 ASSERT(!curFunc()->isPseudoMain());
7641 ASSERT(curFunc()->cls() != NULL); // self:: and parent:: should only
7642 // appear in methods
7643 DynLocation* clsLoc = i.inputs[0];
7644 DynLocation* nameLoc = i.inputs[1];
7645 const StringData* name = nameLoc->rtt.valueString();
7646 ASSERT(name && name->isStatic());
7648 // Even though we know the Class* at compile time, it's not
7649 // guaranteed to be the same between requests. The name, however, is
7650 // fixed, so we can use that.
7651 const Class* cls = clsLoc->rtt.valueClass();
7652 ASSERT(cls);
7653 bool magicCall = false;
7654 const Func* func = lookupImmutableMethod(cls, name, magicCall,
7655 true /* staticLookup */);
7657 const int bytesPopped = 2 * sizeof(Cell); // [A C] popped
7658 const int startOfActRec = -int(sizeof(ActRec)) + bytesPopped;
7659 const Offset clsOff = startOfActRec + AROFF(m_cls);
7661 UNUSED ActRec* fp = curFrame();
7662 ASSERT(!fp->hasThis() || fp->getThis()->instanceof(cls));
7663 if (func) {
7664 Stats::emitInc(a, Stats::TgtCache_StaticMethodFBypass);
7665 emitPushAR(i, func, bytesPopped,
7666 false /* isCtor */, false /* clearThis */,
7667 magicCall ? uintptr_t(name) | 1 : 0 /* varEnvInvName */);
7669 setupActRecClsForStaticCall(i, func, cls, clsOff, true);
7670 m_regMap.scrubStackRange(i.stackOff - 2,
7671 i.stackOff - 2 + kNumActRecCells);
7672 } else {
7673 const StringData* clsName = cls->name();
7674 CacheHandle ch = StaticMethodFCache::alloc(clsName, name, getContextName());
7676 Stats::emitInc(a, Stats::TgtCache_StaticMethodFHit);
7677 TCA stubsSkipRet;
7678 ScratchReg rFunc(m_regMap);
7679 a. load_reg64_disp_reg64(rVmTl, ch, *rFunc);
7680 a. test_reg64_reg64(*rFunc, *rFunc);
7682 UnlikelyIfBlock<CC_Z> miss(a, astubs);
7683 if (false) { // typecheck
7684 const UNUSED Func* f = StaticMethodFCache::lookup(ch, cls, name);
7686 EMIT_CALL(astubs,
7687 StaticMethodFCache::lookup,
7688 IMM(ch),
7689 V(clsLoc->location),
7690 V(nameLoc->location));
7691 recordReentrantStubCall(i);
7692 emitMovRegReg(astubs, rax, *rFunc);
7693 // if rax == NULL, the helper interpreted the entire
7694 // instruction for us. Skip over the rest of the emitted code in
7695 // a, but we don't want to skip the branch spill/fill code.
7696 miss.reconcileEarly();
7697 astubs.test_reg64_reg64(*rFunc, *rFunc);
7698 stubsSkipRet = astubs.code.frontier;
7699 astubs.jz(a.code.frontier); // to be patched later
7702 const Offset funcOff = startOfActRec + AROFF(m_func);
7703 m_regMap.scrubStackRange(i.stackOff - 2,
7704 i.stackOff - 2 + kNumActRecCells);
7706 FreezeRegs ice(m_regMap);
7707 emitPushAR(i, NULL, bytesPopped);
7708 emitVStackStore(a, i, *rFunc, funcOff);
7710 // We know we're in a method so we don't have to worry about
7711 // rVmFp->m_cls being NULL. We just have to figure out if it's a
7712 // Class* or $this, and whether or not we should pass along $this or
7713 // its class.
7714 PhysReg rCls = *rFunc; // no need to allocate another scratch
7715 a. load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
7716 a. test_imm32_reg64(1, rCls);
7718 JccBlock<CC_NZ> ifThis(a);
7719 // rCls is holding $this. Should we pass it to the callee?
7720 a. cmp_imm32_disp_reg32(1, ch + offsetof(StaticMethodFCache, m_static),
7721 rVmTl);
7723 IfElseBlock<CC_NE> ifStatic(a);
7724 // We're calling a static method. Load (this->m_cls | 0x1) into rCls.
7725 a.load_reg64_disp_reg64(rCls, ObjectData::getVMClassOffset(), rCls);
7726 a.or_imm32_reg64(1, rCls);
7728 ifStatic.Else();
7729 // We're calling an instance method. incRef $this.
7730 emitIncRef(rCls, KindOfObject);
7733 emitVStackStore(a, i, rCls, clsOff);
7736 astubs.patchJcc(stubsSkipRet, a.code.frontier);
7737 // No need to decref our inputs: one was KindOfClass and the other's
7738 // a static string.
7742 void
7743 TranslatorX64::analyzeFPushObjMethodD(Tracelet& t,
7744 NormalizedInstruction &i) {
7745 DynLocation* objLoc = i.inputs[0];
7746 i.m_txFlags = supportedPlan(objLoc->valueType() == KindOfObject &&
7747 isContextFixed());
7750 void
7751 TranslatorX64::translateFPushObjMethodD(const Tracelet &t,
7752 const NormalizedInstruction& i) {
7753 ASSERT(i.inputs.size() == 1);
7754 Location& objLoc = i.inputs[0]->location;
7755 ASSERT(i.inputs[0]->valueType() == KindOfObject);
7756 int id = i.imm[1].u_IVA;
7757 const StringData* name = curUnit()->lookupLitstrId(id);
7759 const Class* baseClass = i.inputs[0]->rtt.valueClass();
7760 bool magicCall = false;
7761 const Func* func = lookupImmutableMethod(baseClass, name, magicCall,
7762 false /* staticLookup */);
7763 m_regMap.scrubStackRange(i.stackOff - 1,
7764 i.stackOff - 1 + kNumActRecCells);
7765 // Popped one cell, pushed an actrec
7766 int startOfActRec = int(sizeof(Cell)) - int(sizeof(ActRec));
7767 size_t thisOff = AROFF(m_this) + startOfActRec;
7768 size_t funcOff = AROFF(m_func) + startOfActRec;
7769 emitPushAR(i, func, sizeof(Cell) /*bytesPopped*/,
7770 false /* isCtor */, false /* clearThis */,
7771 func && magicCall ? uintptr_t(name) | 1 : 0 /* varEnvInvName */);
7773 if (!func) {
7774 if (baseClass && !(baseClass->attrs() & AttrInterface)) {
7775 MethodLookup::LookupResult res =
7776 g_vmContext->lookupObjMethod(func, baseClass, name, false);
7777 if ((res == MethodLookup::MethodFoundWithThis ||
7778 res == MethodLookup::MethodFoundNoThis) &&
7779 !func->isAbstract()) {
7781 * if we found the func in baseClass, then either:
7782 * - its private, and this is always going to be the
7783 * called function, or
7784 * - any derived class must have a func that matches in
7785 * staticness, and is at least as accessible (and in
7786 * particular, you can't override a public/protected
7787 * method with a private method)
7789 if (func->attrs() & AttrPrivate) {
7790 emitVStackStoreImm(a, i, uintptr_t(func), funcOff, sz::qword);
7791 } else {
7792 Offset methodsOff = Class::getMethodsOffset();
7793 Offset vecOff = methodsOff + Class::MethodMap::vecOff();
7794 ScratchReg scratch(m_regMap);
7795 // get the object's class into *scratch
7796 a. load_reg64_disp_reg64(getReg(objLoc),
7797 ObjectData::getVMClassOffset(),
7798 *scratch);
7799 if (res == MethodLookup::MethodFoundNoThis) {
7800 emitDecRef(a, i, getReg(objLoc), KindOfObject);
7801 a. lea_reg64_disp_reg64(*scratch, 1, getReg(objLoc));
7803 emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
7805 // get the method vector into *scratch
7806 a. load_reg64_disp_reg64(*scratch, vecOff, *scratch);
7807 // get the func
7808 a. load_reg64_disp_reg64(*scratch,
7809 func->methodSlot() * sizeof(Func*),
7810 *scratch);
7811 emitVStackStore(a, i, *scratch, funcOff, sz::qword);
7812 Stats::emitInc(a, Stats::TgtCache_MethodFast);
7813 return;
7815 } else {
7816 func = NULL;
7821 if (func) {
7822 if (func->attrs() & AttrStatic) {
7823 if (func->attrs() & AttrPrivate) {
7824 emitVStackStoreImm(a, i, uintptr_t(curFunc()->cls()) | 1,
7825 thisOff, sz::qword);
7826 } else {
7827 ScratchReg scratch(m_regMap);
7828 a. load_reg64_disp_reg64(getReg(objLoc),
7829 ObjectData::getVMClassOffset(),
7830 *scratch);
7831 a. or_imm32_reg64(1, *scratch);
7832 emitVStackStore(a, i, *scratch, thisOff, sz::qword);
7834 emitDecRef(a, i, getReg(objLoc), KindOfObject);
7835 } else {
7836 emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
7838 Stats::emitInc(a, Stats::TgtCache_MethodBypass);
7839 } else {
7840 emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
7841 using namespace TargetCache;
7842 CacheHandle ch = MethodCache::alloc();
7843 if (false) { // typecheck
7844 ActRec* ar = NULL;
7845 MethodCache::lookup(ch, ar, name);
7847 int arOff = vstackOffset(i, startOfActRec);
7848 SKTRACE(1, i.source, "ch %d\n", ch);
7849 EMIT_CALL(a, MethodCache::lookup, IMM(ch),
7850 RPLUS(rVmSp, arOff), IMM(uint64_t(name)));
7851 recordReentrantCall(i);
7855 static inline ALWAYS_INLINE Class* getKnownClass(Class** classCache,
7856 const StringData* clsName) {
7857 Class* cls = *classCache;
7858 if (UNLIKELY(cls == NULL)) {
7859 // lookupKnownClass does its own VMRegAnchor'ing.
7860 cls = TargetCache::lookupKnownClass<false>(classCache, clsName, true);
7861 ASSERT(*classCache && *classCache == cls);
7863 ASSERT(cls);
7864 return cls;
7867 static Instance*
7868 HOT_FUNC_VM
7869 newInstanceHelperNoCtor(Class** classCache, const StringData* clsName) {
7870 Class* cls = getKnownClass(classCache, clsName);
7871 Instance* ret = newInstance(cls);
7872 ret->incRefCount();
7873 return ret;
7876 Instance*
7877 HOT_FUNC_VM
7878 newInstanceHelper(Class* cls, int numArgs, ActRec* ar, ActRec* prevAr) {
7879 const Func* f = cls->getCtor();
7880 Instance* ret = NULL;
7881 if (UNLIKELY(!(f->attrs() & AttrPublic))) {
7882 VMRegAnchor _;
7883 UNUSED MethodLookup::LookupResult res =
7884 g_vmContext->lookupCtorMethod(f, cls, true /*raise*/);
7885 ASSERT(res == MethodLookup::MethodFoundWithThis);
7887 // Don't start pushing the AR until newInstance returns; it may reenter.
7888 ret = newInstance(cls);
7889 f->validate();
7890 ar->m_func = f;
7891 ar->initNumArgs(numArgs, true /*fromCtor*/);
7892 // Count stack and this.
7893 ret->incRefCount();
7894 ret->incRefCount();
7895 ar->setThis(ret);
7896 ar->setVarEnv(NULL);
7897 arSetSfp(ar, prevAr);
7898 TRACE(2, "newInstanceHelper: AR %p: f %p, savedRbp %#lx, savedRip %#lx"
7899 " this %p\n",
7900 ar, ar->m_func, ar->m_savedRbp, ar->m_savedRip, ar->m_this);
7901 return ret;
7904 void TranslatorX64::translateFPushCtor(const Tracelet& t,
7905 const NormalizedInstruction& i) {
7906 int numArgs = i.imm[0].u_IVA;
7907 int arOff = vstackOffset(i, -int(sizeof(ActRec)));
7908 m_regMap.scrubStackRange(i.stackOff, i.stackOff + kNumActRecCells);
7909 EMIT_CALL(a, newInstanceHelper,
7910 V(i.inputs[0]->location),
7911 IMM(numArgs),
7912 RPLUS(rVmSp, arOff),
7913 R(rVmFp));
7914 recordReentrantCall(i);
7916 m_regMap.bind(rax, i.outStack->location, KindOfObject, RegInfo::DIRTY);
7919 Instance*
7920 HOT_FUNC_VM
7921 newInstanceHelperCached(Class** classCache,
7922 const StringData* clsName, int numArgs,
7923 ActRec* ar, ActRec* prevAr) {
7924 Class* cls = getKnownClass(classCache, clsName);
7925 return newInstanceHelper(cls, numArgs, ar, prevAr);
7928 void TranslatorX64::translateFPushCtorD(const Tracelet& t,
7929 const NormalizedInstruction& i) {
7930 using namespace TargetCache;
7931 int numArgs = i.imm[0].u_IVA;
7932 const StringData* clsName = curUnit()->lookupLitstrId(i.imm[1].u_SA);
7933 CacheHandle classCh = allocKnownClass(clsName);
7934 ScratchReg scr(m_regMap);
7935 a. lea_reg64_disp_reg64(rVmTl, classCh, *scr);
7936 // We first push the new object, then the actrec. Since we're going to
7937 // need to call out, and possibly reenter in the course of all this,
7938 // null out the object on the stack, in case we unwind before we're
7939 // ready.
7940 int arOff = vstackOffset(i, -int(sizeof(ActRec)) - cellsToBytes(1));
7941 m_regMap.scrubStackRange(i.stackOff, i.stackOff + kNumActRecCells + 1);
7942 if (i.noCtor) {
7943 EMIT_CALL(a, newInstanceHelperNoCtor,
7944 R(*scr),
7945 IMM(uintptr_t(clsName)));
7946 } else {
7947 EMIT_CALL(a, newInstanceHelperCached,
7948 R(*scr),
7949 IMM(uintptr_t(clsName)),
7950 IMM(numArgs),
7951 RPLUS(rVmSp, arOff), // ActRec
7952 R(rVmFp)); // prevAR
7954 recordReentrantCall(i);
7955 // The callee takes care of initializing the actRec, and returns the new
7956 // object.
7957 m_regMap.bind(rax, i.outStack->location, KindOfObject, RegInfo::DIRTY);
7960 static void fatalNullThis() { raise_error(Strings::FATAL_NULL_THIS); }
7962 void
7963 TranslatorX64::translateThis(const Tracelet &t,
7964 const NormalizedInstruction &i) {
7965 ASSERT(i.outStack && !i.outLocal);
7966 ASSERT(curFunc()->isPseudoMain() || curFunc()->cls());
7967 m_regMap.allocOutputRegs(i);
7968 PhysReg out = getReg(i.outStack->location);
7969 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), out);
7971 if (!i.guardedThis) {
7972 if (curFunc()->cls() == NULL) { // Non-class
7973 a. test_reg64_reg64(out, out);
7974 a. jz(astubs.code.frontier); // jz if_null
7977 a. test_imm32_reg64(1, out);
7979 UnlikelyIfBlock<CC_NZ> ifThisNull(a, astubs);
7980 // if_null:
7981 EMIT_CALL(astubs, fatalNullThis);
7982 recordReentrantStubCall(i);
7985 emitIncRef(out, KindOfObject);
7988 void
7989 TranslatorX64::translateInitThisLoc(const Tracelet& t,
7990 const NormalizedInstruction& i) {
7991 ASSERT(i.outLocal && !i.outStack);
7992 ASSERT(curFunc()->isPseudoMain() || curFunc()->cls());
7994 PhysReg base;
7995 int offset;
7996 locToRegDisp(i.outLocal->location, &base, &offset);
7997 ASSERT(base == rVmFp);
7999 ScratchReg thiz(m_regMap);
8000 a.load_reg64_disp_reg64(rVmFp, AROFF(m_this), *thiz);
8001 if (curFunc()->cls() == NULL) {
8002 // If we're in a pseudomain, m_this could be NULL
8003 a.test_reg64_reg64(*thiz, *thiz);
8004 a.jz(astubs.code.frontier); // jz if_null
8006 // Ok, it's not NULL but it might be a Class which should be treated
8007 // equivalently
8008 a.test_imm32_reg64(1, *thiz);
8009 a.jnz(astubs.code.frontier); // jnz if_null
8011 // We have a valid $this!
8012 a.store_imm32_disp_reg(KindOfObject, offset + TVOFF(m_type), base);
8013 a.store_reg64_disp_reg64(*thiz, offset + TVOFF(m_data), base);
8014 emitIncRef(*thiz, KindOfObject);
8016 // if_null:
8017 emitStoreUninitNull(astubs, offset, base);
8018 astubs.jmp(a.code.frontier);
8020 m_regMap.invalidate(i.outLocal->location);
8023 void
8024 TranslatorX64::analyzeFPushFuncD(Tracelet& t, NormalizedInstruction& i) {
8025 Id funcId = i.imm[1].u_SA;
8026 const NamedEntityPair nep = curUnit()->lookupNamedEntityPairId(funcId);
8027 const Func* func = Unit::lookupFunc(nep.second, nep.first);
8028 i.m_txFlags = supportedPlan(func != NULL);
8031 void
8032 TranslatorX64::translateFPushFuncD(const Tracelet& t,
8033 const NormalizedInstruction& i) {
8034 ASSERT(i.inputs.size() == 0);
8035 ASSERT(!i.outStack && !i.outLocal);
8036 Id funcId = i.imm[1].u_SA;
8037 const NamedEntityPair& nep = curUnit()->lookupNamedEntityPairId(funcId);
8038 const StringData* name = nep.first;
8039 const Func* func = Unit::lookupFunc(nep.second, name);
8041 // Translation is only supported if function lookup succeeds
8042 func->validate();
8043 if (Trace::enabled && !func) {
8044 TRACE(1, "Attempt to invoke undefined function %s\n", name->data());
8047 // Inform the register allocator that we just annihilated a range of
8048 // possibly-dirty stack entries.
8049 m_regMap.scrubStackRange(i.stackOff,
8050 i.stackOff + kNumActRecCells);
8052 size_t thisOff = AROFF(m_this) - sizeof(ActRec);
8053 bool funcCanChange = !func->isNameBindingImmutable(curUnit());
8054 emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
8055 emitPushAR(i, funcCanChange ? NULL : func, 0, false, false);
8056 if (funcCanChange) {
8057 // Look it up in a FuncCache.
8058 using namespace TargetCache;
8059 CacheHandle ch = FixedFuncCache::alloc(name);
8060 size_t funcOff = AROFF(m_func) - sizeof(ActRec);
8061 size_t funcCacheOff = ch + offsetof(FixedFuncCache, m_func);
8063 SKTRACE(1, i.source, "ch %d\n", ch);
8065 Stats::emitInc(a, Stats::TgtCache_FuncDHit);
8066 ScratchReg scratch(m_regMap);
8067 a.load_reg64_disp_reg64(rVmTl, funcCacheOff, *scratch);
8068 a.test_reg64_reg64(*scratch, *scratch);
8070 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8072 if (false) { // typecheck
8073 StringData sd("foo");
8074 FixedFuncCache::lookupFailed(&sd);
8077 EMIT_CALL(astubs, TCA(FixedFuncCache::lookupFailed),
8078 IMM(uintptr_t(name)));
8079 recordReentrantStubCall(i);
8080 emitMovRegReg(astubs, rax, *scratch);
8082 emitVStackStore(a, i, *scratch, funcOff, sz::qword);
8086 void
8087 TranslatorX64::translateFPushContFunc(const Tracelet& t,
8088 const NormalizedInstruction& i) {
8089 ASSERT(curFrame()->hasThis());
8090 Class* genClass = curFrame()->getThis()->getVMClass();
8091 ASSERT(genClass == SystemLib::s_MethodContinuationClass ||
8092 genClass == SystemLib::s_FunctionContinuationClass);
8093 bool isMethod = genClass == SystemLib::s_MethodContinuationClass;
8094 size_t thisOff = AROFF(m_this) - sizeof(ActRec);
8095 size_t funcOff = AROFF(m_func) - sizeof(ActRec);
8096 m_regMap.scrubStackRange(i.stackOff,
8097 i.stackOff + kNumActRecCells);
8098 emitPushAR(i, NULL, 0, false, false);
8099 ScratchReg rCont(m_regMap);
8100 ScratchReg rScratch(m_regMap);
8101 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
8103 // Store the func
8104 a.load_reg64_disp_reg64(*rCont, CONTOFF(m_vmFunc), *rScratch);
8105 emitVStackStore(a, i, *rScratch, funcOff, sz::qword);
8107 if (isMethod) {
8108 // Store m_this
8109 a. load_reg64_disp_reg64(*rCont, CONTOFF(m_obj), *rScratch);
8110 a. test_reg64_reg64(*rScratch, *rScratch);
8112 IfElseBlock<CC_Z> ifThis(a);
8113 emitVStackStore(a, i, *rScratch, thisOff, sz::qword);
8114 emitIncRef(*rScratch, KindOfObject);
8116 ifThis.Else();
8117 a.load_reg64_disp_reg64(*rCont, CONTOFF(m_vmCalledClass), *rScratch);
8118 // m_vmCalledClass already has its low bit set
8119 emitVStackStore(a, i, *rScratch, thisOff, sz::qword);
8121 } else {
8122 emitVStackStoreImm(a, i, 0, thisOff, sz::qword);
8126 const Func*
8127 TranslatorX64::findCuf(const NormalizedInstruction& ni,
8128 Class*& cls, StringData*& invName, bool& forward) {
8129 forward = (ni.op() == OpFPushCufF);
8130 cls = NULL;
8131 invName = NULL;
8133 DynLocation* callable = ni.inputs[ni.op() == OpFPushCufSafe ? 1 : 0];
8135 const StringData* str =
8136 callable->isString() ? callable->rtt.valueString() : NULL;
8137 const ArrayData* arr =
8138 callable->isArray() ? callable->rtt.valueArray() : NULL;
8140 StringData* sclass = NULL;
8141 StringData* sname = NULL;
8142 if (str) {
8143 Func* f = HPHP::VM::Unit::lookupFunc(str);
8144 if (f) return f;
8145 String name(const_cast<StringData*>(str));
8146 int pos = name.find("::");
8147 if (pos <= 0 || pos + 2 >= name.size() ||
8148 name.find("::", pos + 2) != String::npos) {
8149 return NULL;
8151 sclass = StringData::GetStaticString(name.substr(0, pos).get());
8152 sname = StringData::GetStaticString(name.substr(pos + 2).get());
8153 } else if (arr) {
8154 if (arr->size() != 2) return NULL;
8155 CVarRef e0 = arr->get(0LL, false);
8156 CVarRef e1 = arr->get(1LL, false);
8157 if (!e0.isString() || !e1.isString()) return NULL;
8158 sclass = e0.getStringData();
8159 sname = e1.getStringData();
8160 String name(sname);
8161 if (name.find("::") != String::npos) return NULL;
8162 } else {
8163 return NULL;
8166 if (!isContextFixed()) return NULL;
8168 Class* ctx = curFunc()->cls();
8170 if (sclass->isame(s_self.get())) {
8171 if (!ctx) return NULL;
8172 cls = ctx;
8173 forward = true;
8174 } else if (sclass->isame(s_parent.get())) {
8175 if (!ctx || !ctx->parent()) return NULL;
8176 cls = ctx->parent();
8177 forward = true;
8178 } else if (sclass->isame(s_static.get())) {
8179 return NULL;
8180 } else {
8181 cls = VM::Unit::lookupClass(sclass);
8182 if (!cls) return NULL;
8185 bool magicCall = false;
8186 const Func* f = lookupImmutableMethod(cls, sname, magicCall, true);
8187 if (!f || (forward && !ctx->classof(f->cls()))) {
8189 * To preserve the invariant that the lsb class
8190 * is an instance of the context class, we require
8191 * that f's class is an instance of the context class.
8192 * This is conservative, but without it, we would need
8193 * a runtime check to decide whether or not to forward
8194 * the lsb class
8196 return NULL;
8198 if (magicCall) invName = sname;
8199 return f;
8202 void
8203 TranslatorX64::analyzeFPushCufOp(Tracelet& t,
8204 NormalizedInstruction& ni) {
8205 Class* cls = NULL;
8206 StringData* invName = NULL;
8207 bool forward = false;
8208 const Func* func = findCuf(ni, cls, invName, forward);
8209 ni.m_txFlags = supportedPlan(func != NULL);
8210 ni.manuallyAllocInputs = true;
8213 void
8214 TranslatorX64::setupActRecClsForStaticCall(const NormalizedInstruction &i,
8215 const Func* func, const Class* cls,
8216 size_t clsOff, bool forward) {
8217 if (forward) {
8218 ScratchReg rClsScratch(m_regMap);
8219 PhysReg rCls = *rClsScratch;
8220 a. load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
8221 if (!(curFunc()->attrs() & AttrStatic)) {
8222 ASSERT(curFunc()->cls() &&
8223 curFunc()->cls()->classof(cls));
8224 /* the context is non-static, so we have to deal
8225 with passing in $this or getClass($this) */
8226 a. test_imm32_reg64(1, rCls);
8228 JccBlock<CC_NZ> ifThis(a);
8229 // rCls is holding a real $this.
8230 if (func->attrs() & AttrStatic) {
8231 // but we're a static method, so pass getClass($this)|1
8232 a.load_reg64_disp_reg64(rCls, ObjectData::getVMClassOffset(), rCls);
8233 a.or_imm32_reg64(1, rCls);
8234 } else {
8235 // We should pass $this to the callee
8236 emitIncRef(rCls, KindOfObject);
8240 emitVStackStore(a, i, rCls, clsOff);
8241 } else {
8242 if (!(func->attrs() & AttrStatic) &&
8243 !(curFunc()->attrs() & AttrStatic) &&
8244 curFunc()->cls() &&
8245 curFunc()->cls()->classof(cls)) {
8246 /* might be a non-static call */
8247 ScratchReg rClsScratch(m_regMap);
8248 PhysReg rCls = *rClsScratch;
8249 a. load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
8250 a. test_imm32_reg64(1, rCls);
8252 IfElseBlock<CC_NZ> ifThis(a);
8253 // rCls is holding $this. We should pass it to the callee
8254 emitIncRef(rCls, KindOfObject);
8255 emitVStackStore(a, i, rCls, clsOff);
8256 ifThis.Else();
8257 emitVStackStoreImm(a, i, uintptr_t(cls)|1, clsOff);
8259 } else {
8260 emitVStackStoreImm(a, i, uintptr_t(cls)|1, clsOff);
8265 template <bool warn>
8266 int64 checkClass(TargetCache::CacheHandle ch, StringData* clsName,
8267 ActRec *ar) {
8268 VMRegAnchor _;
8269 AutoloadHandler::s_instance->invokeHandler(clsName->data());
8270 if (*(Class**)TargetCache::handleToPtr(ch)) return true;
8271 ar->m_func = SystemLib::GetNullFunction();
8272 if (ar->hasThis()) {
8273 // cannot hit zero, we just inc'ed it
8274 ar->getThis()->decRefCount();
8276 ar->setThis(0);
8277 return false;
8280 static void warnMissingFunc(StringData* name) {
8281 throw_invalid_argument("function: method '%s' not found", name->data());
8284 void
8285 TranslatorX64::translateFPushCufOp(const Tracelet& t,
8286 const NormalizedInstruction& ni) {
8287 Class* cls = NULL;
8288 StringData* invName = NULL;
8289 bool forward = false;
8290 const Func* func = findCuf(ni, cls, invName, forward);
8291 ASSERT(func);
8293 int numPopped = ni.op() == OpFPushCufSafe ? 0 : 1;
8294 m_regMap.scrubStackRange(ni.stackOff - numPopped,
8295 ni.stackOff - numPopped + kNumActRecCells);
8297 int startOfActRec = int(numPopped * sizeof(Cell)) - int(sizeof(ActRec));
8299 emitPushAR(ni, cls ? func : NULL, numPopped * sizeof(Cell),
8300 false /* isCtor */, false /* clearThis */,
8301 invName ? uintptr_t(invName) | 1 : 0 /* varEnvInvName */);
8303 bool safe = (ni.op() == OpFPushCufSafe);
8304 size_t clsOff = AROFF(m_cls) + startOfActRec;
8305 size_t funcOff = AROFF(m_func) + startOfActRec;
8306 LazyScratchReg flag(m_regMap);
8307 if (safe) {
8308 flag.alloc();
8309 emitImmReg(a, true, *flag);
8311 if (cls) {
8312 setupActRecClsForStaticCall(ni, func, cls, clsOff, forward);
8313 TargetCache::CacheHandle ch = cls->m_cachedOffset;
8314 a. cmp_imm32_disp_reg32(0, ch, rVmTl);
8316 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8317 if (false) {
8318 checkClass<false>(0, NULL, NULL);
8319 checkClass<true>(0, NULL, NULL);
8321 EMIT_CALL(astubs, TCA(safe ? checkClass<false> : checkClass<true>),
8322 IMM(ch), IMM(uintptr_t(cls->name())),
8323 RPLUS(rVmSp, vstackOffset(ni, startOfActRec)));
8324 recordReentrantStubCall(ni, true);
8325 if (safe) {
8326 astubs. mov_reg64_reg64(rax, *flag);
8329 } else {
8330 ScratchReg funcReg(m_regMap);
8331 TargetCache::CacheHandle ch = func->getCachedOffset();
8332 a. load_reg64_disp_reg64(rVmTl, ch, *funcReg);
8333 emitVStackStore(a, ni, *funcReg, funcOff);
8334 emitVStackStoreImm(a, ni, 0, clsOff, sz::qword, &m_regMap);
8335 a. test_reg64_reg64(*funcReg, *funcReg);
8337 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8338 emitVStackStoreImm(astubs, ni,
8339 uintptr_t(SystemLib::GetNullFunction()), funcOff);
8340 if (safe) {
8341 emitImmReg(astubs, false, *flag);
8342 } else {
8343 EMIT_CALL(astubs, TCA(warnMissingFunc), IMM(uintptr_t(func->name())));
8344 recordReentrantStubCall(ni, true);
8349 if (safe) {
8350 DynLocation* outFlag = ni.outStack2;
8351 DynLocation* outDef = ni.outStack;
8353 DynLocation* inDef = ni.inputs[0];
8354 if (!m_regMap.hasReg(inDef->location)) {
8355 m_regMap.scrubStackRange(ni.stackOff - 2, ni.stackOff - 2);
8356 PhysReg base1, base2;
8357 int disp1, disp2;
8358 locToRegDisp(inDef->location, &base1, &disp1);
8359 locToRegDisp(outDef->location, &base2, &disp2);
8360 ScratchReg tmp(m_regMap);
8361 a. load_reg64_disp_reg64(base1, TVOFF(m_data) + disp1, *tmp);
8362 a. store_reg64_disp_reg64(*tmp, TVOFF(m_data) + disp2, base2);
8363 if (!inDef->rtt.isVagueValue()) {
8364 a. store_imm32_disp_reg(inDef->outerType(),
8365 TVOFF(m_type) + disp2, base2);
8366 } else {
8367 a. load_reg64_disp_reg32(base1, TVOFF(m_type) + disp1, *tmp);
8368 a. store_reg32_disp_reg64(*tmp, TVOFF(m_type) + disp2, base2);
8370 } else {
8371 PhysReg reg = m_regMap.getReg(inDef->location);
8372 m_regMap.scrubStackRange(ni.stackOff - 1, ni.stackOff - 1);
8373 m_regMap.bind(reg, outDef->location, inDef->rtt.outerType(),
8374 RegInfo::DIRTY);
8376 m_regMap.bindScratch(flag, outFlag->location, KindOfBoolean,
8377 RegInfo::DIRTY);
8381 void
8382 TranslatorX64::analyzeFPassCOp(Tracelet& t, NormalizedInstruction& i) {
8383 i.m_txFlags = nativePlan(!i.preppedByRef);
8386 void
8387 TranslatorX64::translateFPassCOp(const Tracelet& t,
8388 const NormalizedInstruction& i) {
8389 ASSERT(i.inputs.size() == 0);
8390 ASSERT(!i.outStack && !i.outLocal);
8391 ASSERT(!i.preppedByRef);
8394 void
8395 TranslatorX64::translateFPassR(const Tracelet& t,
8396 const NormalizedInstruction& i) {
8398 * Like FPassC, FPassR is able to cheat on boxing if the current
8399 * parameter is pass by reference but we have a cell: the box would refer
8400 * to exactly one datum (the value currently on the stack).
8402 * However, if the callee wants a cell and we have a variant we must
8403 * unbox; otherwise we might accidentally make callee changes to its
8404 * parameter globally visible.
8406 ASSERT(!i.inputs[0]->rtt.isVagueValue());
8408 ASSERT(i.inputs.size() == 1);
8409 const RuntimeType& inRtt = i.inputs[0]->rtt;
8410 if (inRtt.isVariant() && !i.preppedByRef) {
8411 emitUnboxTopOfStack(i);
8415 void
8416 TranslatorX64::translateFCall(const Tracelet& t,
8417 const NormalizedInstruction& i) {
8418 int numArgs = i.imm[0].u_IVA;
8419 const Opcode* atCall = i.pc();
8420 const Opcode* after = curUnit()->at(nextSrcKey(t, i).offset());
8421 const Func* srcFunc = curFunc();
8423 // Sync all dirty registers and adjust rVmSp to point to the
8424 // top of stack at the beginning of the current instruction
8425 syncOutputs(i);
8427 // We are "between" tracelets and don't use the register map
8428 // anymore. (Note that the currently executing trace may actually
8429 // continue past the FCall, but it will have to resume with a fresh
8430 // register map.)
8431 RegSet scratchRegs = kScratchCrossTraceRegs;
8432 DumbScratchReg retIPReg(scratchRegs);
8434 // Caller-specific fields: return addresses and the frame pointer
8435 // offset.
8436 ASSERT(sizeof(Cell) == 1 << 4);
8437 // Record the hardware return address. This will be patched up below; 2
8438 // is a magic number dependent on assembler implementation.
8439 MovImmPatcher retIP(a, (uint64_t)a.code.frontier, *retIPReg);
8440 a. store_reg64_disp_reg64 (*retIPReg,
8441 cellsToBytes(numArgs) + AROFF(m_savedRip),
8442 rVmSp);
8444 // The kooky offset here a) gets us to the current ActRec,
8445 // and b) accesses m_soff.
8446 int32 callOffsetInUnit = srcFunc->unit()->offsetOf(after - srcFunc->base());
8447 a. store_imm32_disp_reg(callOffsetInUnit,
8448 cellsToBytes(numArgs) + AROFF(m_soff),
8449 rVmSp);
8451 emitBindCall(t, i,
8452 curUnit()->offsetOf(atCall),
8453 curUnit()->offsetOf(after)); // ...
8454 retIP.patch(uint64(a.code.frontier));
8456 if (i.breaksBB) {
8457 SrcKey fallThru(curFunc(), after);
8458 emitBindJmp(fallThru);
8459 } else {
8461 * Before returning, the callee restored rVmSp to point to the
8462 * current top of stack but the rest of this tracelet assumes that
8463 * rVmSp is set to the top of the stack at the beginning of the
8464 * tracelet, so we have to fix it up here.
8466 * TODO: in the case of an inlined NativeImpl, we're essentially
8467 * emitting two adds to rVmSp in a row, which we can combine ...
8469 int delta = i.stackOff + getStackDelta(i);
8470 if (delta != 0) {
8471 // i.stackOff is in negative Cells, not bytes.
8472 a. add_imm64_reg64(cellsToBytes(delta), rVmSp);
8477 template <bool UseTC>
8478 static TypedValue*
8479 staticLocHelper(StringData* name, ActRec* fp, TypedValue* sp,
8480 TargetCache::CacheHandle ch) {
8481 if (UseTC) {
8482 Stats::inc(Stats::TgtCache_StaticMiss);
8483 Stats::inc(Stats::TgtCache_StaticHit, -1);
8485 HphpArray* map = get_static_locals(fp);
8486 TypedValue* retval = map->nvGet(name); // Local to num
8487 if (!retval) {
8488 // Read the initial value off the stack.
8489 TypedValue tv = *sp;
8490 map->nvSet(name, &tv, false);
8491 retval = map->nvGet(name);
8493 ASSERT(retval);
8494 if (retval->m_type != KindOfRef) {
8495 tvBox(retval);
8497 ASSERT(retval->m_type == KindOfRef);
8498 if (UseTC) {
8499 TypedValue** chTv = (TypedValue**)TargetCache::handleToPtr(ch);
8500 ASSERT(*chTv == NULL);
8501 return (*chTv = retval);
8502 } else {
8503 return retval;
8507 void
8508 TranslatorX64::emitCallStaticLocHelper(X64Assembler& as,
8509 const NormalizedInstruction& i,
8510 ScratchReg& output,
8511 TargetCache::CacheHandle ch) {
8512 // The helper is going to read the value from memory, so record it. We
8513 // could also pass type/value as parameters, but this is hopefully a
8514 // rare path.
8515 m_regMap.cleanLoc(i.inputs[0]->location);
8516 if (false) { // typecheck
8517 StringData* sd = NULL;
8518 ActRec* fp = NULL;
8519 TypedValue* sp = NULL;
8520 sp = staticLocHelper<true>(sd, fp, sp, ch);
8521 sp = staticLocHelper<false>(sd, fp, sp, ch);
8523 const StringData* name = curFunc()->unit()->lookupLitstrId(i.imm[1].u_SA);
8524 ASSERT(name->isStatic());
8525 if (ch) {
8526 EMIT_CALL(as, (TCA)staticLocHelper<true>, IMM(uintptr_t(name)), R(rVmFp),
8527 RPLUS(rVmSp, -cellsToBytes(i.stackOff)), IMM(ch));
8528 } else {
8529 EMIT_CALL(as, (TCA)staticLocHelper<false>, IMM(uintptr_t(name)), R(rVmFp),
8530 RPLUS(rVmSp, -cellsToBytes(i.stackOff)));
8532 recordCall(as, i);
8533 emitMovRegReg(as, rax, *output);
8536 void
8537 TranslatorX64::translateStaticLocInit(const Tracelet& t,
8538 const NormalizedInstruction& i) {
8539 using namespace TargetCache;
8540 ScratchReg output(m_regMap);
8541 const Location& outLoc = i.outLocal->location;
8543 // Closures and generators from closures don't satisfy the "one
8544 // static per source location" rule that the inline fastpath
8545 // requires
8546 if (!curFunc()->isClosureBody() &&
8547 !curFunc()->isGeneratorFromClosure()) {
8548 // Miss path explicitly decrements.
8549 Stats::emitInc(a, Stats::TgtCache_StaticHit);
8550 Stats::emitInc(a, Stats::Tx64_StaticLocFast);
8552 CacheHandle ch = allocStatic();
8553 ASSERT(ch);
8554 a. load_reg64_disp_reg64(rVmTl, ch, *output);
8555 a. test_reg64_reg64(*output, *output);
8557 UnlikelyIfBlock<CC_Z> fooey(a, astubs);
8558 emitCallStaticLocHelper(astubs, i, output, ch);
8560 } else {
8561 Stats::emitInc(a, Stats::Tx64_StaticLocSlow);
8562 emitCallStaticLocHelper(a, i, output, 0);
8564 // Now we've got the outer variant in *output. Get the address of the
8565 // inner cell, since that's the enregistered representation of a variant.
8566 emitDeref(a, *output, *output);
8567 emitIncRef(*output, KindOfRef);
8568 // Turn output into the local we just initialized.
8569 m_regMap.bindScratch(output, outLoc, KindOfRef, RegInfo::DIRTY);
8572 void
8573 TranslatorX64::analyzeVerifyParamType(Tracelet& t, NormalizedInstruction& i) {
8574 int param = i.imm[0].u_IVA;
8575 const TypeConstraint& tc = curFunc()->params()[param].typeConstraint();
8576 if (!tc.isObject()) {
8577 // We are actually using the translation-time value of this local as a
8578 // prediction; if the param check failed at compile-time, we predict it
8579 // will continue failing.
8580 bool compileTimeCheck = tc.check(frame_local(curFrame(), param), curFunc());
8581 i.m_txFlags = nativePlan(compileTimeCheck);
8582 i.manuallyAllocInputs = true;
8583 } else {
8584 bool trace = i.inputs[0]->isObject() ||
8585 (i.inputs[0]->isNull() && tc.nullable());
8586 i.m_txFlags = supportedPlan(trace);
8590 static void
8591 VerifyParamTypeFail(int paramNum) {
8592 VMRegAnchor _;
8593 const ActRec* ar = curFrame();
8594 const Func* func = ar->m_func;
8595 const TypeConstraint& tc = func->params()[paramNum].typeConstraint();
8596 ASSERT(tc.isObject());
8597 TypedValue* tv = frame_local(ar, paramNum);
8598 TRACE(3, "%s Obj %s, needs type %s\n",
8599 __func__,
8600 tv->m_data.pobj->getVMClass()->name()->data(),
8601 tc.typeName()->data());
8602 tc.verifyFail(func, paramNum, tv);
8605 // check class hierarchy and fail if no match
8606 static uint64_t
8607 VerifyParamTypeSlow(const Class* cls, const Class* constraint) {
8608 Stats::inc(Stats::Tx64_VerifyParamTypeSlow);
8609 Stats::inc(Stats::Tx64_VerifyParamTypeFast, -1);
8611 // ensure C++ returns a 0 or 1 with upper bits zeroed
8612 return static_cast<uint64_t>(constraint && cls->classof(constraint));
8615 void
8616 TranslatorX64::translateVerifyParamType(const Tracelet& t,
8617 const NormalizedInstruction& i) {
8618 Stats::emitInc(a, Stats::Tx64_VerifyParamTypeFast);
8620 int param = i.imm[0].u_IVA;
8621 const TypeConstraint& tc = curFunc()->params()[param].typeConstraint();
8622 // not quite a nop. The guards should have verified that the m_type field
8623 // is compatible, but for objects we need to go one step further and
8624 // ensure that we're dealing with the right class.
8625 // NULL inputs only get traced when constraint is nullable.
8626 ASSERT(i.inputs.size() == 1);
8627 if (!i.inputs[0]->isObject()) return; // nop.
8629 // Get the input's class from ObjectData->m_cls
8630 const Location& in = i.inputs[0]->location;
8631 PhysReg src = getReg(in);
8632 ScratchReg inCls(m_regMap);
8633 if (i.inputs[0]->rtt.isVariant()) {
8634 emitDeref(a, src, *inCls);
8635 a. load_reg64_disp_reg64(*inCls, ObjectData::getVMClassOffset(), *inCls);
8636 } else {
8637 a. load_reg64_disp_reg64(src, ObjectData::getVMClassOffset(), *inCls);
8640 ScratchReg cls(m_regMap);
8641 // Constraint may not be in the class-hierarchy of the method being traced,
8642 // look up the class handle and emit code to put the Class* into a reg.
8643 if (!tc.isSelf() && !tc.isParent()) {
8644 const StringData* clsName = tc.typeName();
8645 using namespace TargetCache;
8646 CacheHandle ch = allocKnownClass(clsName);
8647 a. load_reg64_disp_reg64(rVmTl, ch, *cls);
8648 } else {
8649 const Class *constraint = NULL;
8650 if (tc.isSelf()) {
8651 tc.selfToClass(curFunc(), &constraint);
8652 } else if (tc.isParent()) {
8653 tc.parentToClass(curFunc(), &constraint);
8655 emitImmReg(a, uintptr_t(constraint), *cls);
8657 // Compare this class to the incoming object's class. If the typehint's class
8658 // is not present, can not be an instance: fail
8659 a. cmp_reg64_reg64(*inCls, *cls);
8662 JccBlock<CC_Z> subclassCheck(a);
8663 // Call helper since ObjectData::instanceof is a member function
8664 if (false) {
8665 Class* cls = NULL;
8666 Class* constraint = NULL;
8667 VerifyParamTypeSlow(cls, constraint);
8669 EMIT_CALL(a, VerifyParamTypeSlow, R(*inCls), R(*cls));
8670 // Pin the return value, check if a match or take slow path
8671 m_regMap.bind(rax, Location(), KindOfInvalid, RegInfo::SCRATCH);
8672 a. test_reg64_reg64(rax, rax);
8673 m_regMap.freeScratchReg(rax);
8675 // Put the failure path into astubs
8677 UnlikelyIfBlock<CC_Z> fail(a, astubs);
8678 if (false) { // typecheck
8679 VerifyParamTypeFail(param);
8681 EMIT_CALL(astubs, VerifyParamTypeFail, IMM(param));
8682 recordReentrantStubCall(i);
8687 void
8688 TranslatorX64::analyzeInstanceOfD(Tracelet& t, NormalizedInstruction& i) {
8689 ASSERT(i.inputs.size() == 1);
8690 ASSERT(i.outStack && !i.outLocal);
8691 i.m_txFlags = planHingesOnRefcounting(i.inputs[0]->outerType());
8694 // check class hierarchy and fail if no match
8695 static uint64_t
8696 InstanceOfDSlow(const Class* cls, const Class* constraint) {
8697 Stats::inc(Stats::Tx64_InstanceOfDSlow);
8698 Stats::inc(Stats::Tx64_InstanceOfDFast, -1);
8700 // ensure C++ returns a 0 or 1 with upper bits zeroed
8701 return static_cast<uint64_t>(constraint && cls->classof(constraint));
8704 void
8705 TranslatorX64::translateInstanceOfD(const Tracelet& t,
8706 const NormalizedInstruction& i) {
8707 Stats::emitInc(a, Stats::Tx64_InstanceOfDFast);
8708 ASSERT(i.inputs.size() == 1);
8709 ASSERT(i.outStack && !i.outLocal);
8711 DynLocation* input0 = i.inputs[0];
8712 bool input0IsLoc = input0->isLocal();
8713 DataType type = input0->valueType();
8714 PhysReg srcReg = getReg(input0->location);
8715 ScratchReg result(m_regMap);
8717 if (type != KindOfObject) {
8718 // All non-object inputs are not instances
8719 if (!input0IsLoc) {
8720 ASSERT(!input0->isVariant());
8721 emitDecRef(i, srcReg, type);
8723 emitImmReg(a, false, *result);
8725 } else {
8726 // Get the input's class from ObjectData->m_cls
8727 ScratchReg inCls(m_regMap);
8728 if (input0->rtt.isVariant()) {
8729 ASSERT(input0IsLoc);
8730 emitDeref(a, srcReg, *inCls);
8731 a. load_reg64_disp_reg64(*inCls, ObjectData::getVMClassOffset(), *inCls);
8732 } else {
8733 a. load_reg64_disp_reg64(srcReg, ObjectData::getVMClassOffset(), *inCls);
8735 if (!input0IsLoc) {
8736 emitDecRef(i, srcReg, type);
8739 // Set result to true for now. If take slow path, use its return val
8740 emitImmReg(a, true, *result);
8741 ScratchReg cls(m_regMap);
8742 // Constraint may not be in the class-hierarchy of the method being traced,
8743 // look up the class handle and emit code to put the Class* into a reg.
8744 using namespace TargetCache;
8745 int param = i.imm[0].u_SA;
8746 const StringData* clsName = curUnit()->lookupLitstrId(param);
8747 CacheHandle ch = allocKnownClass(clsName);
8748 a. load_reg64_disp_reg64(rVmTl, ch, *cls);
8749 // Compare this class to the incoming object's class. If the typehint's
8750 // class is not present, can not be an instance: fail
8751 a. cmp_reg64_reg64(*inCls, *cls);
8754 UnlikelyIfBlock<CC_NZ> subclassCheck(a, astubs);
8755 // Call helper since ObjectData::instanceof is a member function
8756 if (false) {
8757 Class* cls = NULL;
8758 Class* constraint = NULL;
8759 InstanceOfDSlow(cls, constraint);
8761 EMIT_CALL(astubs, InstanceOfDSlow, R(*inCls), R(*cls));
8762 astubs. mov_reg32_reg32(rax, *result);
8765 // Bind result and destination
8766 m_regMap.bindScratch(result, i.outStack->location, i.outStack->outerType(),
8767 RegInfo::DIRTY);
8770 void
8771 TranslatorX64::analyzeIterInit(Tracelet& t, NormalizedInstruction& ni) {
8772 ni.m_txFlags = supportedPlan(ni.inputs[0]->valueType() == KindOfArray ||
8773 ni.inputs[0]->valueType() == KindOfObject);
8776 void
8777 TranslatorX64::translateIterInit(const Tracelet& t,
8778 const NormalizedInstruction& ni) {
8779 ASSERT(ni.inputs.size() == 1);
8780 ASSERT(!ni.outStack && !ni.outLocal);
8781 DynLocation* in = ni.inputs[0];
8782 ASSERT(in->outerType() != KindOfRef);
8783 SKTRACE(1, ni.source, "IterInit: committed to translation\n");
8784 PhysReg src = getReg(in->location);
8785 SrcKey taken, notTaken;
8786 branchDests(t, ni, &taken, &notTaken, 1 /* immIdx */);
8787 Location iterLoc(Location::Iter, ni.imm[0].u_IVA);
8788 switch (in->valueType()) {
8789 case KindOfArray: {
8790 if (false) { // typecheck
8791 Iter *dest = NULL;
8792 HphpArray *arr = NULL;
8793 new_iter_array(dest, arr);
8795 EMIT_RCALL(a, ni, new_iter_array, A(iterLoc), R(src));
8796 break;
8798 case KindOfObject: {
8799 if (false) { // typecheck
8800 Iter *dest = NULL;
8801 ObjectData *obj = NULL;
8802 Class *ctx = NULL;
8803 new_iter_object(dest, obj, ctx);
8805 bool ctxFixed = isContextFixed();
8806 PREP_CTX(ctxFixed, argNumToRegName[2]);
8807 EMIT_RCALL(a, ni, new_iter_object, A(iterLoc), R(src), CTX(ctxFixed));
8808 break;
8810 default: not_reached();
8812 syncOutputs(t); // Ends BB
8813 // If a new iterator is created, new_iter_* will not adjust the refcount of
8814 // the input. If a new iterator is not created, new_iter_* will decRef the
8815 // input for us. new_iter_* returns 0 if an iterator was not created,
8816 // otherwise it returns 1.
8817 a. test_reg64_reg64(rax, rax);
8818 emitCondJmp(taken, notTaken, CC_Z);
8821 void
8822 TranslatorX64::analyzeIterValueC(Tracelet& t, NormalizedInstruction& i) {
8823 i.m_txFlags = supportedPlan(
8824 i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
8825 i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
8828 void
8829 TranslatorX64::translateIterValueC(const Tracelet& t,
8830 const NormalizedInstruction& i) {
8831 ASSERT(i.inputs.size() == 1);
8832 ASSERT(i.inputs[0]->rtt.isIter());
8834 Location outLoc;
8835 Iter::Type iterType = i.inputs[0]->rtt.iterType();
8836 typedef void (*IterValueC)(Iter*, TypedValue*);
8837 IterValueC ivc;
8838 if (i.outStack) {
8839 outLoc = i.outStack->location;
8840 ivc = (iterType == Iter::TypeArray)
8841 ? iter_value_cell_array : iter_value_cell_iterator;
8842 } else {
8843 outLoc = i.outLocal->location;
8844 ivc = (iterType == Iter::TypeArray)
8845 ? iter_value_cell_local_array : iter_value_cell_local_iterator;
8847 EMIT_RCALL(a, i, ivc, A(i.inputs[0]->location), A(outLoc));
8848 m_regMap.invalidate(outLoc);
8851 void
8852 TranslatorX64::analyzeIterKey(Tracelet& t, NormalizedInstruction& i) {
8853 i.m_txFlags = supportedPlan(
8854 i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
8855 i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
8858 void
8859 TranslatorX64::translateIterKey(const Tracelet& t,
8860 const NormalizedInstruction& i) {
8861 ASSERT(i.inputs.size() == 1);
8862 ASSERT(i.inputs[0]->rtt.isIter());
8864 Location outLoc;
8865 Iter::Type iterType = i.inputs[0]->rtt.iterType();
8866 typedef void (*IterKey)(Iter*, TypedValue*);
8867 IterKey ik;
8868 if (i.outStack) {
8869 outLoc = i.outStack->location;
8870 ik = (iterType == Iter::TypeArray)
8871 ? iter_key_cell_array : iter_key_cell_iterator;
8872 } else {
8873 outLoc = i.outLocal->location;
8874 ik = (iterType == Iter::TypeArray)
8875 ? iter_key_cell_local_array : iter_key_cell_local_iterator;
8877 EMIT_RCALL(a, i, ik, A(i.inputs[0]->location), A(outLoc));
8878 m_regMap.invalidate(outLoc);
8881 void
8882 TranslatorX64::analyzeIterNext(Tracelet& t, NormalizedInstruction& i) {
8883 ASSERT(i.inputs.size() == 1);
8884 i.m_txFlags = supportedPlan(
8885 i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
8886 i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
8889 void
8890 TranslatorX64::translateIterNext(const Tracelet& t,
8891 const NormalizedInstruction& i) {
8892 ASSERT(i.inputs.size() == 1);
8893 ASSERT(!i.outStack && !i.outLocal);
8894 ASSERT(i.inputs[0]->rtt.isIter());
8896 if (false) { // type check
8897 Iter* it = NULL;
8898 int64 ret = iter_next_array(it);
8899 if (ret) printf("\n");
8901 m_regMap.cleanAll(); // input might be in-flight
8902 // If the iterator reaches the end, iter_next_array will handle
8903 // freeing the iterator and it will decRef the array
8904 EMIT_CALL(a, iter_next_array, A(i.inputs[0]->location));
8905 recordReentrantCall(a, i);
8906 // RAX is now a scratch register with no progam meaning...
8907 m_regMap.bind(rax, Location(), KindOfInvalid, RegInfo::SCRATCH);
8909 // syncOutputs before we handle the branch.
8910 syncOutputs(t);
8911 SrcKey taken, notTaken;
8912 branchDests(t, i, &taken, &notTaken, 1 /* destImmIdx */);
8914 a. test_reg64_reg64(rax, rax);
8915 emitCondJmp(taken, notTaken, CC_NZ);
8918 // PSEUDOINSTR_DISPATCH is a switch() fragment that routes opcodes to their
8919 // shared handlers, as per the PSEUDOINSTRS macro.
8920 #define PSEUDOINSTR_DISPATCH(func) \
8921 case OpBitAnd: \
8922 case OpBitOr: \
8923 case OpBitXor: \
8924 case OpSub: \
8925 case OpMul: \
8926 func(BinaryArithOp, t, i) \
8927 case OpSame: \
8928 case OpNSame: \
8929 func(SameOp, t, i) \
8930 case OpEq: \
8931 case OpNeq: \
8932 func(EqOp, t, i) \
8933 case OpLt: \
8934 case OpLte: \
8935 case OpGt: \
8936 case OpGte: \
8937 func(LtGtOp, t, i) \
8938 case OpEmptyL: \
8939 case OpCastBool: \
8940 func(UnaryBooleanOp, t, i) \
8941 case OpJmpZ: \
8942 case OpJmpNZ: \
8943 func(BranchOp, t, i) \
8944 case OpSetL: \
8945 case OpBindL: \
8946 func(AssignToLocalOp, t, i) \
8947 case OpFPassC: \
8948 case OpFPassCW: \
8949 case OpFPassCE: \
8950 func(FPassCOp, t, i) \
8951 case OpFPushCuf: \
8952 case OpFPushCufF: \
8953 case OpFPushCufSafe: \
8954 func(FPushCufOp, t, i) \
8955 case OpIssetL: \
8956 case OpIsNullL: \
8957 case OpIsStringL: \
8958 case OpIsArrayL: \
8959 case OpIsIntL: \
8960 case OpIsObjectL: \
8961 case OpIsBoolL: \
8962 case OpIsDoubleL: \
8963 case OpIsNullC: \
8964 case OpIsStringC: \
8965 case OpIsArrayC: \
8966 case OpIsIntC: \
8967 case OpIsObjectC: \
8968 case OpIsBoolC: \
8969 case OpIsDoubleC: \
8970 func(CheckTypeOp, t, i)
8972 void
8973 TranslatorX64::analyzeInstr(Tracelet& t,
8974 NormalizedInstruction& i) {
8975 const Opcode op = i.op();
8976 switch (op) {
8977 #define CASE(iNm) \
8978 case Op ## iNm: { \
8979 analyze ## iNm(t, i); \
8980 } break;
8981 #define ANALYZE(a, b, c) analyze ## a(b, c); break;
8982 INSTRS
8983 PSEUDOINSTR_DISPATCH(ANALYZE)
8985 #undef ANALYZE
8986 #undef CASE
8987 default: {
8988 ASSERT(i.m_txFlags == Interp);
8991 SKTRACE(1, i.source, "translation plan: %x\n", i.m_txFlags);
8994 bool
8995 TranslatorX64::dontGuardAnyInputs(Opcode op) {
8996 switch (op) {
8997 #define CASE(iNm) case Op ## iNm:
8998 #define NOOP(a, b, c)
8999 INSTRS
9000 PSEUDOINSTR_DISPATCH(NOOP)
9001 return false;
9003 return true;
9004 #undef NOOP
9005 #undef CASE
9009 void TranslatorX64::emitOneGuard(const Tracelet& t,
9010 const NormalizedInstruction& i,
9011 PhysReg reg, int disp, DataType type,
9012 TCA &sideExit) {
9013 bool isFirstInstr = (&i == t.m_instrStream.first);
9014 emitTypeCheck(a, type, reg, disp);
9015 if (isFirstInstr) {
9016 SrcRec& srcRec = *getSrcRec(t.m_sk);
9017 // If it's the first instruction, we haven't made any forward
9018 // progress yet, so this is really a tracelet-level guard rather
9019 // than a side exit. If we tried to "side exit", we'd come right
9020 // back to this check!
9022 // We need to record this as a fallback branch.
9023 emitFallbackJmp(srcRec);
9024 } else if (!sideExit) {
9025 UnlikelyIfBlock<CC_NZ> branchToSideExit(a, astubs);
9026 sideExit = astubs.code.frontier;
9027 emitSideExit(astubs, i, false /*next*/);
9028 } else {
9029 a. jnz(sideExit);
9033 // Emit necessary guards for variants and pseudo-main locals before instr i.
9034 // For HHIR, this only inserts guards for pseudo-main locals. Variants are
9035 // guarded in a different way.
9036 void
9037 TranslatorX64::emitVariantGuards(const Tracelet& t,
9038 const NormalizedInstruction& i) {
9039 bool pseudoMain = Translator::liveFrameIsPseudoMain();
9040 bool isFirstInstr = (&i == t.m_instrStream.first);
9041 TCA sideExit = NULL;
9042 const NormalizedInstruction *base = &i;
9043 while (base->grouped) {
9044 base = base->prev;
9045 ASSERT(base);
9047 for (size_t in = 0; in < i.inputs.size(); ++in) {
9048 DynLocation* input = i.inputs[in];
9049 if (!input->isValue()) continue;
9050 bool isRef = input->isVariant() &&
9051 !i.ignoreInnerType &&
9052 input->rtt.innerType() != KindOfInvalid;
9053 bool modifiableLocal = pseudoMain && input->isLocal() &&
9054 !input->rtt.isVagueValue();
9056 if (!modifiableLocal && !isRef) continue;
9058 SKTRACE(1, i.source, "guarding %s: (%s:%d) :: %d!\n",
9059 modifiableLocal ? "pseudoMain local" : "variant inner",
9060 input->location.spaceName(),
9061 input->location.offset,
9062 input->rtt.valueType());
9063 // TODO task 1122807: don't check the inner type if we've already
9064 // checked it and have executed no possibly-aliasing instructions in
9065 // the meanwhile.
9066 if (modifiableLocal) {
9067 if (m_useHHIR) {
9068 RuntimeType& rtt = input->rtt;
9069 JIT::Type::Tag type = JIT::Type::fromDataType(rtt.outerType(),
9070 rtt.innerType());
9071 if (isFirstInstr) {
9072 m_hhbcTrans->guardTypeLocal(input->location.offset, type);
9073 } else {
9074 m_hhbcTrans->checkTypeLocal(input->location.offset, type);
9076 } else {
9077 PhysReg reg;
9078 int disp;
9079 locToRegDisp(input->location, &reg, &disp);
9080 emitOneGuard(t, *base, reg, disp,
9081 input->rtt.outerType(), sideExit);
9084 if (isRef && !m_useHHIR) {
9085 m_regMap.allocInputReg(i, in);
9086 emitOneGuard(t, *base, getReg(input->location), 0,
9087 input->rtt.innerType(), sideExit);
9092 NormalizedInstruction::OutputUse
9093 NormalizedInstruction::outputIsUsed(DynLocation* output) const {
9094 for (NormalizedInstruction* succ = next;
9095 succ; succ = succ->next) {
9096 for (size_t i = 0; i < succ->inputs.size(); ++i) {
9097 if (succ->inputs[i] == output) {
9098 if (succ->inputWasInferred(i)) {
9099 return OutputInferred;
9101 if (Translator::Get()->dontGuardAnyInputs(succ->op())) {
9102 /* the consumer doesnt care about its inputs
9103 but we may still have inferred something about
9104 its outputs that a later instruction may depend on
9106 if (!outputDependsOnInput(succ->op()) ||
9107 !(succ->outStack && !succ->outStack->rtt.isVagueValue() &&
9108 succ->outputIsUsed(succ->outStack) != OutputUsed) ||
9109 !(succ->outLocal && !succ->outLocal->rtt.isVagueValue() &&
9110 succ->outputIsUsed(succ->outLocal)) != OutputUsed) {
9111 return OutputDoesntCare;
9114 return OutputUsed;
9118 return OutputUnused;
9121 void
9122 TranslatorX64::emitPredictionGuards(const NormalizedInstruction& i) {
9123 if (!i.outputPredicted || i.breaksBB) return;
9124 NormalizedInstruction::OutputUse u = i.outputIsUsed(i.outStack);
9126 if (m_useHHIR) {
9127 if (u == NormalizedInstruction::OutputUsed ||
9128 u == NormalizedInstruction::OutputInferred) {
9129 JIT::Type::Tag jitType = JIT::Type::fromDataType(i.outStack->outerType(),
9130 i.outStack->valueType());
9131 if (u == NormalizedInstruction::OutputInferred) {
9132 TRACE(1, "HHIR: emitPredictionGuards: output inferred to be %s\n",
9133 JIT::Type::Strings[jitType]);
9134 m_hhbcTrans->assertTypeStack(0, jitType);
9135 } else {
9136 TRACE(1, "HHIR: emitPredictionGuards: output predicted to be %s\n",
9137 JIT::Type::Strings[jitType]);
9138 m_hhbcTrans->checkTypeStack(0, jitType, i.next->offset());
9141 return;
9144 switch (u) {
9145 case NormalizedInstruction::OutputUsed:
9146 break;
9147 case NormalizedInstruction::OutputUnused:
9148 return;
9149 case NormalizedInstruction::OutputInferred:
9150 Stats::emitInc(a, Stats::TC_TypePredOverridden);
9151 return;
9152 case NormalizedInstruction::OutputDoesntCare:
9153 Stats::emitInc(a, Stats::TC_TypePredUnneeded);
9154 return;
9157 ASSERT(i.outStack);
9158 PhysReg base;
9159 int disp;
9160 locToRegDisp(i.outStack->location, &base, &disp);
9161 ASSERT(base == rVmSp);
9162 TRACE(1, "PREDGUARD: %p dt %d offset %d voffset %lld\n",
9163 a.code.frontier, i.outStack->outerType(), disp,
9164 i.outStack->location.offset);
9165 emitTypeCheck(a, i.outStack->outerType(), rVmSp, disp);
9167 UnlikelyIfBlock<CC_NZ> branchToSideExit(a, astubs);
9168 Stats::emitInc(astubs, Stats::TC_TypePredMiss);
9169 emitSideExit(astubs, i, true);
9171 Stats::emitInc(a, Stats::TC_TypePredHit);
9174 static void failedTypePred() {
9175 raise_error("A type prediction was incorrect");
9178 void
9179 TranslatorX64::translateInstrWork(const Tracelet& t,
9180 const NormalizedInstruction& i) {
9181 const Opcode op = i.op();
9182 switch (op) {
9183 #define CASE(iNm) \
9184 case Op ## iNm: \
9185 translate ## iNm(t, i); \
9186 break;
9187 #define TRANSLATE(a, b, c) translate ## a(b, c); break;
9188 INSTRS
9189 PSEUDOINSTR_DISPATCH(TRANSLATE)
9190 #undef TRANSLATE
9191 #undef CASE
9192 default:
9193 ASSERT(false);
9197 void
9198 TranslatorX64::translateInstr(const Tracelet& t,
9199 const NormalizedInstruction& i) {
9201 * translateInstr() translates an individual instruction in a tracelet,
9202 * either by directly emitting machine code for that instruction or by
9203 * emitting a call to the interpreter.
9205 * If the instruction ends the current tracelet, we must emit machine code
9206 * to transfer control to some target that will continue to make forward
9207 * progress. This target may be the beginning of another tracelet, or it may
9208 * be a translator service request. Before transferring control, a tracelet
9209 * must ensure the following invariants hold:
9210 * 1) The machine registers rVmFp and rVmSp are in sync with vmfp()
9211 * and vmsp().
9212 * 2) All "dirty" values are synced in memory. This includes the
9213 * evaluation stack, locals, globals, statics, and any other program
9214 * accessible locations. This also means that all refcounts must be
9215 * up to date.
9217 ASSERT(!m_useHHIR);
9218 ASSERT(!i.outStack || i.outStack->isStack());
9219 ASSERT(!i.outLocal || i.outLocal->isLocal());
9220 const char *opNames[] = {
9221 #define O(name, imm, push, pop, flags) \
9222 #name,
9223 OPCODES
9224 #undef O
9226 SpaceRecorder sr(opNames[i.op()], a);
9227 SKTRACE(1, i.source, "translate %#lx\n", long(a.code.frontier));
9228 const Opcode op = i.op();
9230 TCA start = a.code.frontier;
9231 TCA astart = astubs.code.frontier;
9233 m_regMap.bumpEpoch();
9234 // Allocate the input regs upfront unless instructed otherwise
9235 // or the instruction is interpreted
9236 if (!i.manuallyAllocInputs && i.m_txFlags) {
9237 m_regMap.allocInputRegs(i);
9240 if (debug) {
9241 for (unsigned j = 0; j < i.inputs.size(); j++) {
9242 if (i.inputWasInferred(j)) {
9243 DynLocation* dl = i.inputs[j];
9244 ASSERT(dl->rtt.isValue() &&
9245 !dl->rtt.isVagueValue() &&
9246 dl->outerType() != KindOfInvalid);
9247 PhysReg base;
9248 int disp;
9249 locToRegDisp(dl->location, &base, &disp);
9250 emitTypeCheck(a, dl->rtt.typeCheckValue(), base, disp);
9252 UnlikelyIfBlock<CC_NZ> typePredFailed(a, astubs);
9253 EMIT_CALL(astubs, failedTypePred);
9254 recordReentrantStubCall(i);
9260 if (!i.grouped) {
9261 emitVariantGuards(t, i);
9262 const NormalizedInstruction* n = &i;
9263 while (n->next && n->next->grouped) {
9264 n = n->next;
9265 emitVariantGuards(t, *n);
9269 // Allocate the input regs upfront unless instructed otherwise
9270 // or the instruction is interpreted
9271 if (!i.manuallyAllocInputs && i.m_txFlags) {
9272 m_regMap.allocInputRegs(i);
9275 if (i.m_txFlags == Interp || RuntimeOption::EvalThreadingJit) {
9276 // If the problem is local to this instruction, just call out to
9277 // the interpreter. emitInterpOne will perform end-of-tracelet duties
9278 // if this instruction ends the tracelet.
9279 SKTRACE(1, i.source, "Interp\n");
9280 emitInterpOne(t, i);
9281 } else {
9282 // Actually translate the instruction's body.
9283 Stats::emitIncTranslOp(a, op);
9285 translateInstrWork(t, i);
9288 // Invalidate locations that are no longer live
9289 for (unsigned k = 0; k < i.deadLocs.size(); ++k) {
9290 const Location& l = i.deadLocs[k];
9291 m_regMap.invalidate(l);
9294 emitPredictionGuards(i);
9295 recordBCInstr(op, a, start);
9296 recordBCInstr(op + Op_count, astubs, astart);
9298 if (i.breaksBB && !i.changesPC) {
9299 // If this instruction's opcode always ends the tracelet then the
9300 // instruction case is responsible for performing end-of-tracelet
9301 // duties. Otherwise, we handle ending the tracelet here.
9302 syncOutputs(t);
9303 emitBindJmp(t.m_nextSk);
9307 bool
9308 TranslatorX64::checkTranslationLimit(const SrcKey& sk,
9309 const SrcRec& srcRec) const {
9310 if (srcRec.translations().size() == SrcRec::kMaxTranslations) {
9311 INC_TPC(max_trans);
9312 if (debug && Trace::moduleEnabled(Trace::tx64, 2)) {
9313 const vector<TCA>& tns = srcRec.translations();
9314 TRACE(1, "Too many (%ld) translations: %s, BC offset %d\n",
9315 tns.size(), curUnit()->filepath()->data(),
9316 sk.offset());
9317 SKTRACE(2, sk, "{\n", tns.size());
9318 TCA topTrans = srcRec.getTopTranslation();
9319 for (size_t i = 0; i < tns.size(); ++i) {
9320 const TransRec* rec = getTransRec(tns[i]);
9321 ASSERT(rec);
9322 SKTRACE(2, sk, "%d %p\n", i, tns[i]);
9323 if (tns[i] == topTrans) {
9324 SKTRACE(2, sk, "%d: *Top*\n", i);
9326 if (rec->kind == TransAnchor) {
9327 SKTRACE(2, sk, "%d: Anchor\n", i);
9328 } else {
9329 SKTRACE(2, sk, "%d: guards {\n", i);
9330 for (unsigned j = 0; j < rec->dependencies.size(); ++j) {
9331 TRACE(2, rec->dependencies[j]);
9333 SKTRACE(2, sk, "%d } guards\n", i);
9336 SKTRACE(2, sk, "} /* Too many translations */\n");
9338 return true;
9341 return false;
9344 void
9345 TranslatorX64::emitGuardChecks(X64Assembler& a,
9346 const SrcKey& sk,
9347 const ChangeMap& dependencies,
9348 const RefDeps& refDeps,
9349 SrcRec& fail) {
9350 if (Trace::moduleEnabled(Trace::stats, 2)) {
9351 Stats::emitInc(a, Stats::TraceletGuard_enter);
9354 bool pseudoMain = Translator::liveFrameIsPseudoMain();
9356 emitRB(a, RBTypeTraceletGuards, sk);
9357 for (DepMap::const_iterator dep = dependencies.begin();
9358 dep != dependencies.end();
9359 ++dep) {
9360 if (!pseudoMain || !dep->second->isLocal() || !dep->second->isValue()) {
9361 checkType(a, dep->first, dep->second->rtt, fail);
9362 } else {
9363 TRACE(3, "Skipping tracelet guard for %s %d\n",
9364 dep->second->location.pretty().c_str(),
9365 (int)dep->second->rtt.outerType());
9369 checkRefs(a, sk, refDeps, fail);
9371 if (Trace::moduleEnabled(Trace::stats, 2)) {
9372 Stats::emitInc(a, Stats::TraceletGuard_execute);
9377 void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
9378 if (!debug) return;
9380 const SrcKey& sk = t.m_sk;
9382 TRACE(3, "----------------------------------------------\n");
9383 TRACE(3, " Translating from file %s:%d %s at %p:\n",
9384 curUnit()->filepath()->data(),
9385 curUnit()->getLineNumber(sk.offset()),
9386 curFunc()->name()->data(),
9387 postGuards);
9388 TRACE(3, " preconds:\n");
9389 TRACE(3, " types:\n");
9390 for (DepMap::const_iterator i = t.m_dependencies.begin();
9391 i != t.m_dependencies.end(); ++i) {
9392 TRACE(3, " %-5s\n", i->second->pretty().c_str());
9394 if (t.m_refDeps.size() != 0) {
9395 TRACE(3, " refs:\n");
9396 for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
9397 i != t.m_refDeps.m_arMap.end();
9398 ++i) {
9399 TRACE(3, " (ActRec %lld : %-5s)\n", i->first,
9400 i->second.pretty().c_str());
9403 TRACE(3, " postconds:\n");
9404 for (ChangeMap::const_iterator i = t.m_changes.begin();
9405 i != t.m_changes.end(); ++i) {
9406 TRACE(3, " %-5s\n", i->second->pretty().c_str());
9408 for (SrcKey traceKey(t.m_sk);
9409 traceKey != t.m_nextSk;
9410 traceKey.advance(curUnit())) {
9411 string s = instrToString(
9412 curUnit()->at(traceKey.offset()), curUnit());
9413 TRACE(3, " %6d: %s\n", traceKey.offset(), s.c_str());
9415 TRACE(3, "----------------------------------------------\n");
9416 if (Trace::moduleEnabled(Trace::tx64, 5)) {
9417 // prettyStack() expects to use vmpc(). Leave it in the state we
9418 // found it since this code is debug-only, and we don't want behavior
9419 // to vary across the optimized/debug builds.
9420 PC oldPC = vmpc();
9421 vmpc() = curUnit()->at(sk.offset());
9422 TRACE(3, g_vmContext->prettyStack(string(" tx64 ")));
9423 vmpc() = oldPC;
9424 TRACE(3, "----------------------------------------------\n");
9428 namespace {
9429 template<class T> struct Nuller : private boost::noncopyable {
9430 explicit Nuller(const T** p) : p(p) {}
9431 ~Nuller() { *p = 0; }
9432 T const** const p;
9436 void
9437 TranslatorX64::translateTracelet(const Tracelet& t) {
9438 const SrcKey &sk = t.m_sk;
9440 m_curTrace = &t;
9441 Nuller<Tracelet> ctNuller(&m_curTrace);
9443 SKTRACE(1, sk, "translateTracelet\n");
9444 ASSERT(m_srcDB.find(sk));
9445 ASSERT(m_regMap.pristine());
9446 TCA start = a.code.frontier;
9447 TCA stubStart = astubs.code.frontier;
9448 TCA counterStart = 0;
9449 uint8 counterLen = 0;
9450 SrcRec& srcRec = *getSrcRec(sk);
9451 vector<TransBCMapping> bcMapping;
9453 bool hhirSucceeded = irTranslateTracelet(t, start, stubStart);
9454 if (hhirSucceeded) {
9455 m_irAUsage += (a.code.frontier - start);
9456 m_irAstubsUsage += (astubs.code.frontier - stubStart);
9458 if (!hhirSucceeded) {
9459 ASSERT(m_pendingFixups.size() == 0);
9460 try {
9461 if (t.m_analysisFailed || checkTranslationLimit(t.m_sk, srcRec)) {
9462 punt();
9465 emitGuardChecks(a, t.m_sk, t.m_dependencies, t.m_refDeps, srcRec);
9466 dumpTranslationInfo(t, a.code.frontier);
9468 // after guards, add a counter for the translation if requested
9469 if (RuntimeOption::EvalJitTransCounters) {
9470 emitTransCounterInc(a);
9473 emitRB(a, RBTypeTraceletBody, t.m_sk);
9474 Stats::emitInc(a, Stats::Instr_TC, t.m_numOpcodes);
9475 recordBCInstr(OpTraceletGuard, a, start);
9477 // Translate each instruction in the tracelet
9478 for (NormalizedInstruction* ni = t.m_instrStream.first; ni; ni = ni->next) {
9479 if (isTransDBEnabled()) {
9480 bcMapping.push_back((TransBCMapping){ni->offset(),
9481 a.code.frontier,
9482 astubs.code.frontier});
9485 m_curNI = ni;
9486 Nuller<NormalizedInstruction> niNuller(&m_curNI);
9487 translateInstr(t, *ni);
9488 ASSERT(ni->source.offset() >= curFunc()->base());
9489 // We sometimes leave the tail of a truncated tracelet in place to aid
9490 // analysis, but breaksBB is authoritative.
9491 if (ni->breaksBB) break;
9493 } catch (TranslationFailedExc& tfe) {
9494 // The whole translation failed; give up on this BB. Since it is not
9495 // linked into srcDB yet, it is guaranteed not to be reachable.
9496 m_regMap.reset();
9497 // Permanent reset; nothing is reachable yet.
9498 a.code.frontier = start;
9499 astubs.code.frontier = stubStart;
9500 bcMapping.clear();
9501 // Discard any pending fixups.
9502 m_pendingFixups.clear();
9503 TRACE(1, "emitting %d-instr interp request for failed translation @%s:%d\n",
9504 int(t.m_numOpcodes), tfe.m_file, tfe.m_line);
9505 // Add a counter for the translation if requested
9506 if (RuntimeOption::EvalJitTransCounters) {
9507 emitTransCounterInc(a);
9509 a. jmp(
9510 emitServiceReq(REQ_INTERPRET, 2ull, uint64_t(t.m_sk.offset()),
9511 uint64_t(t.m_numOpcodes)));
9512 // Fall through.
9514 } // if (!hhirSucceeded)
9516 for (uint i = 0; i < m_pendingFixups.size(); i++) {
9517 TCA tca = m_pendingFixups[i].m_tca;
9518 ASSERT(isValidCodeAddress(tca));
9519 m_fixupMap.recordFixup(tca, m_pendingFixups[i].m_fixup);
9521 m_pendingFixups.clear();
9523 addTranslation(TransRec(t.m_sk, curUnit()->md5(), t, start,
9524 a.code.frontier - start, stubStart,
9525 astubs.code.frontier - stubStart,
9526 counterStart, counterLen,
9527 bcMapping));
9529 recordGdbTranslation(sk, curUnit(), a, start,
9530 false, false);
9531 recordGdbTranslation(sk, curUnit(), astubs, stubStart,
9532 false, false);
9533 // SrcRec::newTranslation() makes this code reachable. Do this last;
9534 // otherwise there's some chance of hitting in the reader threads whose
9535 // metadata is not yet visible.
9536 TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n", start, sk.m_funcId,
9537 sk.m_offset);
9538 srcRec.newTranslation(a, astubs, start);
9539 m_regMap.reset();
9540 TRACE(1, "tx64: %zd-byte tracelet\n", a.code.frontier - start);
9541 if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
9542 Trace::traceRelease(getUsage().c_str());
9546 static const size_t kASize = 512 << 20;
9547 static const size_t kAStubsSize = 512 << 20;
9548 static const size_t kGDataSize = kASize / 4;
9549 static const size_t kTotalSize = kASize + kAStubsSize +
9550 kTrampolinesBlockSize + kGDataSize;
9551 TranslatorX64::TranslatorX64()
9552 : Translator(),
9553 m_numNativeTrampolines(0),
9554 m_trampolineSize(0),
9555 m_spillFillCode(&a),
9556 m_interceptHelper(0),
9557 m_defClsHelper(0),
9558 m_funcPrologueRedispatch(0),
9559 m_irAUsage(0),
9560 m_irAstubsUsage(0),
9561 m_numHHIRTrans(0),
9562 m_irFactory(NULL),
9563 m_constTable(NULL),
9564 m_traceBuilder(NULL),
9565 m_hhbcTrans(NULL),
9566 m_regMap(kCallerSaved, kCalleeSaved, this),
9567 m_interceptsEnabled(false),
9568 m_unwindRegMap(128),
9569 m_curTrace(0),
9570 m_curNI(0)
9572 TRACE(1, "TranslatorX64@%p startup\n", this);
9573 tx64 = this;
9575 static_assert(kTotalSize < (2ul << 30),
9576 "Combined size of all code/data blocks in TranslatorX64 "
9577 "must be < 2GiB to support 32-bit relative addresses");
9579 static bool profileUp = false;
9580 if (!profileUp) {
9581 profileInit();
9582 profileUp = true;
9585 // We want to ensure that the block for "a", "astubs",
9586 // "atrampolines", and "m_globalData" are nearby so that we can
9587 // short jump/point between them. Thus we allocate one slab and
9588 // divide it between "a", "astubs", and "atrampolines".
9590 // Using sbrk to ensure its in the bottom 2G, so we avoid
9591 // the need for trampolines, and get to use shorter
9592 // instructions for tc addresses.
9593 static const size_t kRoundUp = 2 << 20;
9594 uint8_t *base = (uint8_t*)sbrk(kTotalSize + kRoundUp - 1);
9595 base += -(uint64_t)base & (kRoundUp - 1);
9596 if (RuntimeOption::EvalMapTCHuge) {
9597 hintHuge(base, kTotalSize);
9599 atrampolines.init(base, kTrampolinesBlockSize);
9600 base += kTrampolinesBlockSize;
9601 a.init(base, kASize);
9602 m_unwindRegistrar = register_unwind_region(base, kTotalSize);
9603 base += kASize;
9604 astubs.init(base, kAStubsSize);
9605 base += kAStubsSize;
9606 m_globalData.init(base, kGDataSize);
9608 // Emit some special helpers that are shared across translations.
9610 // Emit a byte of padding. This is a kind of hacky way to
9611 // avoid hitting an assert in recordGdbStub when we call
9612 // it with m_callToExit - 1 as the start address.
9613 astubs.emitNop(1);
9615 // Call to exit with whatever value the program leaves on
9616 // the return stack.
9617 m_callToExit = emitServiceReq(false, REQ_EXIT, 0ull);
9619 m_retHelper = emitRetFromInterpretedFrame();
9621 moveToAlign(astubs);
9622 m_resumeHelper = astubs.code.frontier;
9623 emitGetGContext(astubs, rax);
9624 astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
9625 rVmFp);
9626 astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
9627 Stack::topOfStackOffset(), rVmSp);
9628 emitServiceReq(false, REQ_RESUME, 0ull);
9630 // Helper for DefCls
9631 if (false) {
9632 PreClass *preClass = 0;
9633 defClsHelper(preClass);
9635 m_defClsHelper = TCA(a.code.frontier);
9636 PhysReg rEC = argNumToRegName[2];
9637 emitGetGContext(a, rEC);
9638 a. store_reg64_disp_reg64(rVmFp, offsetof(VMExecutionContext, m_fp), rEC);
9639 a. store_reg64_disp_reg64(argNumToRegName[1],
9640 offsetof(VMExecutionContext, m_pc), rEC);
9641 // rax holds the up-to-date top of stack pointer
9642 a. store_reg64_disp_reg64(rax,
9643 offsetof(VMExecutionContext, m_stack) +
9644 Stack::topOfStackOffset(), rEC);
9645 a. jmp((TCA)defClsHelper);
9647 moveToAlign(astubs);
9648 m_stackOverflowHelper = astubs.code.frontier;
9649 // We are called from emitStackCheck, with the new stack frame in
9650 // rStashedAR. Get the caller's PC into rdi and save it off.
9651 astubs. load_reg64_disp_reg64(rVmFp, AROFF(m_func), rax);
9652 astubs. load_reg64_disp_reg32(rStashedAR, AROFF(m_soff), rdi);
9653 astubs. load_reg64_disp_reg64(rax, Func::sharedOffset(), rax);
9654 astubs. load_reg64_disp_reg32(rax, Func::sharedBaseOffset(), rax);
9655 astubs. add_reg32_reg32(rax, rdi);
9657 emitEagerVMRegSave(astubs, SaveFP | SavePC);
9658 emitServiceReq(false, REQ_STACK_OVERFLOW, 0ull);
9660 // The decRef helper for when we bring the count down to zero. Callee needs to
9661 // bring the value into rdi. These can be burned in for all time, and for all
9662 // translations.
9663 if (false) { // type-check
9664 StringData* str = NULL;
9665 ArrayData* arr = NULL;
9666 ObjectData* obj = NULL;
9667 RefData* ref = NULL;
9668 tv_release_str(str);
9669 tv_release_arr(arr);
9670 tv_release_obj(obj);
9671 tv_release_ref(ref);
9673 typedef void* vp;
9674 m_dtorStubs[BitwiseKindOfString] = emitUnaryStub(a, vp(tv_release_str));
9675 m_dtorStubs[KindOfArray] = emitUnaryStub(a, vp(tv_release_arr));
9676 m_dtorStubs[KindOfObject] = emitUnaryStub(a, vp(tv_release_obj));
9677 m_dtorStubs[KindOfRef] = emitUnaryStub(a, vp(tv_release_ref));
9678 m_dtorGenericStub = genericRefCountStub(a);
9679 m_dtorGenericStubRegs = genericRefCountStubRegs(a);
9681 if (trustSigSegv) {
9682 // Install SIGSEGV handler for timeout exceptions
9683 struct sigaction sa;
9684 struct sigaction old_sa;
9685 sa.sa_sigaction = &TranslatorX64::SEGVHandler;
9686 sa.sa_flags = SA_SIGINFO;
9687 sigemptyset(&sa.sa_mask);
9688 if (sigaction(SIGSEGV, &sa, &old_sa) != 0) {
9689 throw std::runtime_error(
9690 std::string("Failed to install SIGSEGV handler: ") +
9691 strerror(errno));
9693 m_segvChain = old_sa.sa_flags & SA_SIGINFO ?
9694 old_sa.sa_sigaction : (sigaction_t)old_sa.sa_handler;
9698 // do gdb specific initialization. This has to happen after
9699 // the TranslatorX64 constructor is called, because gdb initialization
9700 // calls backs into TranslatorX64::Get()
9701 void TranslatorX64::initGdb() {
9702 // On a backtrace, gdb tries to locate the calling frame at address
9703 // returnRIP-1. However, for the first VM frame, there is no code at
9704 // returnRIP-1, since the AR was set up manually. For this frame,
9705 // record the tracelet address as starting from callToExit-1, so gdb
9706 // does not barf
9707 recordGdbStub(astubs, m_callToExit - 1, "HHVM::callToExit");
9709 recordBCInstr(OpRetFromInterp, astubs, m_retHelper);
9710 recordGdbStub(astubs, m_retHelper - 1, "HHVM::retHelper");
9711 recordBCInstr(OpResumeHelper, astubs, m_resumeHelper);
9712 recordBCInstr(OpDefClsHelper, a, m_defClsHelper);
9713 recordBCInstr(OpDtorStub, a, m_dtorStubs[BitwiseKindOfString]);
9714 recordGdbStub(a, m_dtorStubs[BitwiseKindOfString],
9715 "HHVM::destructorStub");
9718 TranslatorX64*
9719 TranslatorX64::Get() {
9721 * Called from outrageously early, pre-main code, and will
9722 * allocate the first translator space.
9724 if (!nextTx64) {
9725 nextTx64 = new TranslatorX64();
9726 nextTx64->initGdb();
9728 if (!tx64) {
9729 tx64 = nextTx64;
9731 ASSERT(tx64);
9732 return tx64;
9735 template<int Arity>
9736 TCA TranslatorX64::emitNAryStub(X64Assembler& a, void* fptr) {
9737 BOOST_STATIC_ASSERT((Arity < kNumRegisterArgs));
9739 // The callNAryStub has already saved these regs on a.
9740 RegSet alreadySaved;
9741 for (size_t i = 0; i < Arity; ++i) {
9742 alreadySaved |= RegSet(argNumToRegName[i]);
9746 * We've made a call instruction, and pushed Arity args on the
9747 * stack. So the stack address will be odd coming into the stub if
9748 * Arity + 1 (for the call) is odd. We need to correct for this
9749 * when saving other registers below to keep SSE-friendly alignment
9750 * of the stack.
9752 const int Parity = (Arity + 1) % 2;
9754 // These dtor stubs are meant to be called with the call
9755 // instruction, unlike most translator code.
9756 moveToAlign(a);
9757 TCA start = a.code.frontier;
9759 * Preserve most caller-saved regs. The calling code has already
9760 * preserved regs in `alreadySaved'; we push the rest of the caller
9761 * saved regs and rbp. It should take 9 qwords in total, and the
9762 * incoming call instruction made it 10. This is an even number of
9763 * pushes, so we preserve the SSE-friendliness of our execution
9764 * environment (without real intervention from PhysRegSaverParity).
9766 * Note that we don't need to clean all registers because the only
9767 * reason we could need those locations written back is if stack
9768 * unwinding were to happen. These stubs can re-enter due to user
9769 * destructors, but exceptions are not allowed to propagate out of
9770 * those, so it's not a problem.
9772 a. pushr(rbp); // {
9773 a. mov_reg64_reg64(rsp, rbp);
9775 RegSet s = kCallerSaved - alreadySaved;
9776 PhysRegSaverParity<Parity> rs(a, s);
9777 emitCall(a, TCA(fptr));
9779 a. popr(rbp); // }
9780 a. ret();
9781 return start;
9784 TCA TranslatorX64::emitUnaryStub(X64Assembler& a, void* fptr) {
9785 return emitNAryStub<1>(a, fptr);
9788 TCA TranslatorX64::emitBinaryStub(X64Assembler& a, void* fptr) {
9789 return emitNAryStub<2>(a, fptr);
9793 * Both callUnaryStubImpl and callBinaryStub assume that the stub they
9794 * are calling cannot throw an exception.
9797 template <bool reentrant>
9798 void
9799 TranslatorX64::callUnaryStubImpl(X64Assembler& a,
9800 const NormalizedInstruction& i,
9801 TCA stub, PhysReg arg, int disp/*=0*/) {
9802 // Call the generic dtor stub. They all take one arg.
9803 a. pushr(rdi);
9804 if (arg == rsp) {
9805 // Account for pushing rdi.
9806 disp += 8;
9808 if (disp == 0) {
9809 emitMovRegReg(a, arg, rdi);
9810 } else {
9811 a. lea_reg64_disp_reg64(arg, disp, rdi);
9813 ASSERT(isValidCodeAddress(stub));
9814 emitCall(a, stub);
9815 recordCallImpl<reentrant>(a, i);
9816 a. popr(rdi);
9819 void
9820 TranslatorX64::callBinaryStub(X64Assembler& a, const NormalizedInstruction& i,
9821 TCA stub, PhysReg arg1, PhysReg arg2) {
9822 a. pushr(rdi);
9823 a. pushr(rsi);
9825 // We need to be careful not to clobber our arguments when moving
9826 // them into the appropriate registers. (If we ever need ternary
9827 // stubs, this should probably be converted to use ArgManager.)
9828 if (arg2 == rdi && arg1 == rsi) {
9829 a. xchg_reg64_reg64(rdi, rsi);
9830 } else if (arg2 == rdi) {
9831 emitMovRegReg(a, arg2, rsi);
9832 emitMovRegReg(a, arg1, rdi);
9833 } else {
9834 emitMovRegReg(a, arg1, rdi);
9835 emitMovRegReg(a, arg2, rsi);
9838 ASSERT(isValidCodeAddress(stub));
9839 emitCall(a, stub);
9840 recordReentrantCall(a, i);
9841 a. popr(rsi);
9842 a. popr(rdi);
9845 namespace {
9847 struct DeferredFileInvalidate : public DeferredWorkItem {
9848 Eval::PhpFile* m_f;
9849 DeferredFileInvalidate(Eval::PhpFile* f) : m_f(f) {
9850 TRACE(2, "DeferredFileInvalidate @ %p, m_f %p\n", this, m_f); }
9851 void operator()() {
9852 TRACE(2, "DeferredFileInvalidate: Firing @ %p , m_f %p\n", this, m_f);
9853 tx64->invalidateFileWork(m_f);
9857 struct DeferredPathInvalidate : public DeferredWorkItem {
9858 const std::string m_path;
9859 DeferredPathInvalidate(const std::string& path) : m_path(path) {
9860 ASSERT(m_path.size() >= 1 && m_path[0] == '/');
9862 void operator()() {
9863 String spath(m_path);
9865 * inotify saw this path change. Now poke the file repository;
9866 * it will notice the underlying PhpFile* has changed, and notify
9867 * us via ::invalidateFile.
9869 * We don't actually need to *do* anything with the PhpFile* from
9870 * this lookup; since the path has changed, the file we'll get out is
9871 * going to be some new file, not the old file that needs invalidation.
9873 UNUSED Eval::PhpFile* f =
9874 g_vmContext->lookupPhpFile(spath.get(), "");
9875 // We don't keep around the extra ref.
9876 if (f) f->decRefAndDelete();
9882 void
9883 TranslatorX64::requestInit() {
9884 TRACE(1, "in requestInit(%ld)\n", g_vmContext->m_currentThreadIdx);
9885 tl_regState = REGSTATE_CLEAN;
9886 PendQ::drain();
9887 requestResetHighLevelTranslator();
9888 Treadmill::startRequest(g_vmContext->m_currentThreadIdx);
9889 memset(&s_perfCounters, 0, sizeof(s_perfCounters));
9892 void
9893 TranslatorX64::requestExit() {
9894 if (s_writeLease.amOwner()) {
9895 s_writeLease.drop();
9897 TRACE_MOD(txlease, 2, "%lx write lease stats: %15lld kept, %15lld grabbed\n",
9898 pthread_self(), s_writeLease.m_hintKept,
9899 s_writeLease.m_hintGrabbed);
9900 PendQ::drain();
9901 Treadmill::finishRequest(g_vmContext->m_currentThreadIdx);
9902 TRACE(1, "done requestExit(%ld)\n", g_vmContext->m_currentThreadIdx);
9903 Stats::dump();
9904 Stats::clear();
9906 if (Trace::moduleEnabledRelease(Trace::tx64stats, 1)) {
9907 Trace::traceRelease("TranslatorX64 perf counters for %s:\n",
9908 g_context->getRequestUrl(50).c_str());
9909 for (int i = 0; i < tpc_num_counters; i++) {
9910 Trace::traceRelease("%-20s %10lld\n",
9911 kPerfCounterNames[i], s_perfCounters[i]);
9913 Trace::traceRelease("\n");
9917 bool
9918 TranslatorX64::isPseudoEvent(const char* event) {
9919 for (int i = 0; i < tpc_num_counters; i++) {
9920 if (!strcmp(event, kPerfCounterNames[i])) {
9921 return true;
9924 return false;
9927 void
9928 TranslatorX64::getPerfCounters(Array& ret) {
9929 for (int i = 0; i < tpc_num_counters; i++) {
9930 // Until Perflab can automatically scale the values we give it to
9931 // an appropriate range, we have to fudge these numbers so they
9932 // look more like reasonable hardware counter values.
9933 ret.set(kPerfCounterNames[i], s_perfCounters[i] * 1000);
9937 TranslatorX64::~TranslatorX64() {
9938 freeSlab(atrampolines.code.base, kTotalSize);
9941 static Debug::TCRange rangeFrom(const X64Assembler& a, const TCA addr,
9942 bool isAstubs) {
9943 ASSERT(a.code.isValidAddress(addr));
9944 return Debug::TCRange(addr, a.code.frontier, isAstubs);
9947 void TranslatorX64::recordBCInstr(uint32_t op,
9948 const X64Assembler& a,
9949 const TCA addr) {
9950 if (addr != a.code.frontier) {
9951 m_debugInfo.recordBCInstr(Debug::TCRange(addr, a.code.frontier,
9952 &a == &astubs ? true : false), op);
9956 void TranslatorX64::recordGdbTranslation(const SrcKey& sk,
9957 const Unit* srcUnit,
9958 const X64Assembler& a,
9959 const TCA start,
9960 bool exit,
9961 bool inPrologue) {
9962 if (start != a.code.frontier && !RuntimeOption::EvalJitNoGdb) {
9963 ASSERT(s_writeLease.amOwner());
9964 m_debugInfo.recordTracelet(rangeFrom(a, start,
9965 &a == &astubs ? true : false),
9966 srcUnit,
9967 srcUnit->at(sk.offset()),
9968 exit, inPrologue);
9972 void TranslatorX64::recordGdbStub(const X64Assembler& a,
9973 const TCA start, const char* name) {
9974 if (!RuntimeOption::EvalJitNoGdb) {
9975 m_debugInfo.recordStub(rangeFrom(a, start, &a == &astubs ? true : false),
9976 name);
9980 void TranslatorX64::defineCns(StringData* name) {
9981 TargetCache::fillConstant(name);
9984 std::string TranslatorX64::getUsage() {
9985 std::string usage;
9986 size_t aUsage = a.code.frontier - a.code.base;
9987 size_t stubsUsage = astubs.code.frontier - astubs.code.base;
9988 size_t tcUsage = TargetCache::s_frontier;
9989 Util::string_printf(usage,
9990 "tx64: %9zd bytes (%ld%%) in a.code\n"
9991 "tx64: %9zd bytes (%ld%%) in astubs.code\n"
9992 "tx64: %9zd bytes (%ld%%) in a.code from ir\n"
9993 "tx64: %9zd bytes (%ld%%) in astubs.code from ir\n"
9994 "tx64: %9zd bytes (%ld%%) in targetCache\n",
9995 aUsage, 100 * aUsage / a.code.size,
9996 stubsUsage, 100 * stubsUsage / astubs.code.size,
9997 m_irAUsage, 100 * m_irAUsage / a.code.size,
9998 m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
9999 tcUsage,
10000 100 * tcUsage / TargetCache::tl_targetCaches.size);
10001 return usage;
10004 bool TranslatorX64::addDbgGuards(const Unit* unit) {
10005 // TODO refactor
10006 // It grabs the write lease and iterating through whole SrcDB...
10007 bool locked = s_writeLease.acquire(true);
10008 if (!locked) {
10009 return false;
10011 struct timespec tsBegin, tsEnd;
10012 gettime(CLOCK_MONOTONIC, &tsBegin);
10013 // Doc says even find _could_ invalidate iterator, in pactice it should
10014 // be very rare, so go with it now.
10015 for (SrcDB::iterator it = m_srcDB.begin(); it != m_srcDB.end(); ++it) {
10016 SrcKey const sk = SrcKey::fromAtomicInt(it->first);
10017 SrcRec& sr = *it->second;
10018 if (sr.unitMd5() == unit->md5() &&
10019 !sr.hasDebuggerGuard() &&
10020 isSrcKeyInBL(unit, sk)) {
10021 addDbgGuardImpl(sk, sr);
10024 s_writeLease.drop();
10025 gettime(CLOCK_MONOTONIC, &tsEnd);
10026 int64 elapsed = gettime_diff_us(tsBegin, tsEnd);
10027 if (Trace::moduleEnabledRelease(Trace::tx64, 5)) {
10028 Trace::traceRelease("addDbgGuards got lease for %lld us\n", elapsed);
10030 return true;
10033 bool TranslatorX64::addDbgGuard(const Func* func, Offset offset) {
10034 SrcKey sk(func, offset);
10036 if (SrcRec* sr = m_srcDB.find(sk)) {
10037 if (sr->hasDebuggerGuard()) {
10038 return true;
10040 } else {
10041 // no translation yet
10042 return true;
10045 if (debug) {
10046 if (!isSrcKeyInBL(func->unit(), sk)) {
10047 TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
10048 return false;
10051 bool locked = s_writeLease.acquire(true);
10052 if (!locked) {
10053 return false;
10056 if (SrcRec* sr = m_srcDB.find(sk)) {
10057 addDbgGuardImpl(sk, *sr);
10060 s_writeLease.drop();
10061 return true;
10064 void TranslatorX64::addDbgGuardImpl(const SrcKey& sk, SrcRec& srcRec) {
10065 TCA dbgGuard = a.code.frontier;
10066 // Emit the checks for debugger attach
10067 emitTLSLoad<ThreadInfo>(a, ThreadInfo::s_threadInfo, rScratch);
10068 static COff dbgOff = offsetof(ThreadInfo, m_reqInjectionData) +
10069 offsetof(RequestInjectionData, debugger);
10070 a. load_reg64_disp_reg32(rScratch, dbgOff, rScratch);
10071 a. test_imm32_reg32(0xff, rScratch);
10072 // Branch to a special REQ_INTERPRET if attached
10074 TCA fallback = emitServiceReq(REQ_INTERPRET, 2, uint64_t(sk.offset()), 0);
10075 a. jnz(fallback);
10077 // Emit a jump to the actual code
10078 TCA realCode = srcRec.getTopTranslation();
10079 prepareForSmash(kJmpLen);
10080 TCA dbgBranchGuardSrc = a.code.frontier;
10081 a. jmp(realCode);
10082 // Add it to srcRec
10083 srcRec.addDebuggerGuard(a, astubs, dbgGuard, dbgBranchGuardSrc);
10086 bool TranslatorX64::dumpTCCode(const char* filename) {
10087 string aFilename = string(filename).append("_a");
10088 string astubFilename = string(filename).append("_astub");
10089 FILE* aFile = fopen(aFilename.c_str(),"wb");
10090 if (aFile == NULL)
10091 return false;
10092 FILE* astubFile = fopen(astubFilename.c_str(),"wb");
10093 if (astubFile == NULL) {
10094 fclose(aFile);
10095 return false;
10097 string helperAddrFilename = string(filename).append("_helpers_addrs.txt");
10098 FILE* helperAddrFile = fopen(helperAddrFilename.c_str(),"wb");
10099 if (helperAddrFile == NULL) {
10100 fclose(aFile);
10101 fclose(astubFile);
10102 return false;
10104 // dump starting from the trampolines; this assumes processInit() places
10105 // trampolines before the translation cache
10106 size_t count = a.code.frontier-atrampolines.code.base;
10107 bool result = (fwrite(atrampolines.code.base, 1, count, aFile) == count);
10108 if (result) {
10109 count = astubs.code.frontier - astubs.code.base;
10110 result = (fwrite(astubs.code.base, 1, count, astubFile) == count);
10112 if (result) {
10113 for(PointerMap::iterator iter = trampolineMap.begin();
10114 iter != trampolineMap.end();
10115 iter++) {
10116 void* helperAddr = iter->first;
10117 void* trampAddr = iter->second;
10118 char* functionName = Util::getNativeFunctionName(helperAddr);
10119 fprintf(helperAddrFile,"%10p %10p %s\n",
10120 trampAddr, helperAddr,
10121 functionName);
10122 free(functionName);
10125 fclose(aFile);
10126 fclose(astubFile);
10127 fclose(helperAddrFile);
10128 return result;
10131 // Returns true on success
10132 bool TranslatorX64::dumpTC() {
10133 if (!s_writeLease.acquire(true)) return false;
10134 bool success = dumpTCData();
10135 if (success) {
10136 success = dumpTCCode("/tmp/tc_dump");
10138 s_writeLease.drop();
10139 return success;
10142 // Returns true on success
10143 bool tc_dump(void) {
10144 return TranslatorX64::Get()->dumpTC();
10147 // Returns true on success
10148 bool TranslatorX64::dumpTCData() {
10149 gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
10150 if (!tcDataFile) return false;
10152 if (!gzprintf(tcDataFile,
10153 "repo_schema = %s\n"
10154 "a.base = %p\n"
10155 "a.frontier = %p\n"
10156 "astubs.base = %p\n"
10157 "astubs.frontier = %p\n\n",
10158 Repo::kSchemaId,
10159 atrampolines.code.base, a.code.frontier,
10160 astubs.code.base, astubs.code.frontier)) {
10161 return false;
10164 if (!gzprintf(tcDataFile, "total_translations = %lu\n\n",
10165 m_translations.size())) {
10166 return false;
10169 for (size_t t = 0; t < m_translations.size(); t++) {
10170 if (gzputs(tcDataFile,
10171 m_translations[t].print(getTransCounter(t)).c_str()) == -1) {
10172 return false;
10176 gzclose(tcDataFile);
10177 return true;
10180 #define NATIVE_OP(X) PLAN(X, Native)
10181 #define SUPPORTED_OP(X) PLAN(X, Supported)
10182 #define SIMPLE_OP(X) PLAN(X, Simple)
10183 #define INTERP_OP(X) PLAN(X, Interp)
10185 #define SUPPORTED_OPS() \
10186 NATIVE_OP(Null) \
10187 NATIVE_OP(True) \
10188 NATIVE_OP(False) \
10189 NATIVE_OP(Int) \
10190 NATIVE_OP(String) \
10191 NATIVE_OP(Array) \
10192 NATIVE_OP(NewArray) \
10193 NATIVE_OP(InitThisLoc) \
10194 NATIVE_OP(Dup) \
10195 NATIVE_OP(FPushContFunc) \
10196 NATIVE_OP(ContDone) \
10197 NATIVE_OP(ContValid) \
10198 NATIVE_OP(ContStopped) \
10200 * Invariably call a possibly-reentrant helper.
10201 */ \
10202 SIMPLE_OP(Jmp) \
10203 SIMPLE_OP(FCall) \
10204 SIMPLE_OP(CreateCont) \
10205 SIMPLE_OP(UnpackCont) \
10207 * Translations with a reentrant helper.
10209 * TODO: neither UnboxR nor FPassR can actually call destructors.
10210 */ \
10211 SUPPORTED_OP(UnboxR) \
10212 SUPPORTED_OP(FPassR) \
10213 SUPPORTED_OP(NativeImpl) \
10214 SUPPORTED_OP(UnsetL) \
10215 SUPPORTED_OP(Cns) \
10216 SUPPORTED_OP(ClsCnsD) \
10217 SUPPORTED_OP(This) \
10218 SUPPORTED_OP(PackCont) \
10219 SUPPORTED_OP(ContReceive) \
10220 SUPPORTED_OP(ContRaised) \
10221 SUPPORTED_OP(ContNext) \
10222 SUPPORTED_OP(ContSend) \
10223 SUPPORTED_OP(ContRaise) \
10224 SUPPORTED_OP(ContCurrent) \
10225 SUPPORTED_OP(FPushCtor) \
10226 SUPPORTED_OP(FPushCtorD) \
10227 SUPPORTED_OP(StaticLocInit) \
10229 * Always-interp instructions,
10230 */ \
10231 INTERP_OP(ContHandle)
10233 // Define the trivial analyze methods
10234 #define PLAN(Op, Spt) \
10235 void \
10236 TranslatorX64::analyze ## Op(Tracelet& t, NormalizedInstruction& i) { \
10237 i.m_txFlags = Spt; \
10240 SUPPORTED_OPS()
10242 #undef NATIVE_OP
10243 #undef SUPPORTED_OP
10244 #undef SIMPLE_OP
10245 #undef INTERP_OP
10246 #undef SUPPORTED_OPS
10248 void TranslatorX64::invalidateSrcKey(const SrcKey& sk) {
10249 ASSERT(!RuntimeOption::RepoAuthoritative);
10250 ASSERT(s_writeLease.amOwner());
10252 * Reroute existing translations for SrcKey to an as-yet indeterminate
10253 * new one.
10255 SrcRec* sr = m_srcDB.find(sk);
10256 ASSERT(sr);
10258 * Since previous translations aren't reachable from here, we know we
10259 * just created some garbage in the TC. We currently have no mechanism
10260 * to reclaim this.
10262 sr->replaceOldTranslations(a, astubs);
10265 void TranslatorX64::invalidateFileWork(Eval::PhpFile* f) {
10266 class FileInvalidationTrigger : public Treadmill::WorkItem {
10267 Eval::PhpFile* m_f;
10268 int m_nRefs;
10269 public:
10270 FileInvalidationTrigger(Eval::PhpFile* f, int n) : m_f(f), m_nRefs(n) { }
10271 virtual void operator()() {
10272 if (m_f->decRef(m_nRefs) == 0) {
10273 Eval::FileRepository::onDelete(m_f);
10277 size_t nSmashed = m_srcDB.invalidateCode(f);
10278 if (nSmashed) {
10279 // The srcDB found an entry for this file. The entry's dependency
10280 // on this file was counted as a reference, and the code is no longer
10281 // reachable. We need to wait until the last outstanding request
10282 // drains to know that we can really remove the reference.
10283 Treadmill::WorkItem::enqueue(new FileInvalidationTrigger(f, nSmashed));
10287 bool TranslatorX64::invalidateFile(Eval::PhpFile* f) {
10288 // This is called from high rank, but we'll need the write lease to
10289 // invalidate code.
10290 if (!RuntimeOption::EvalJit) return false;
10291 ASSERT(f != NULL);
10292 PendQ::defer(new DeferredFileInvalidate(f));
10293 return true;
10296 } // HPHP::VM::Transl
10298 static const Trace::Module TRACEMOD = Trace::tx64;
10300 void invalidatePath(const std::string& path) {
10301 TRACE(1, "invalidatePath: abspath %s\n", path.c_str());
10302 PendQ::defer(new DeferredPathInvalidate(path));
10305 } } // HPHP::VM