Supported enregistered string and int keys in the vector translator
[hiphop-php.git] / src / runtime / vm / translator / translator-x64.cpp
blobe33fde2b232366bd9c879b1587a4c9881cd0f432
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include <stdint.h>
17 #include <assert.h>
18 #include <unistd.h>
19 #include <sys/mman.h>
20 #include <strstream>
21 #include <stdio.h>
22 #include <stdarg.h>
23 #include <strings.h>
24 #include <string>
25 #include <queue>
26 #include <zlib.h>
27 #include <unwind.h>
29 #ifdef __FreeBSD__
30 # include <ucontext.h>
31 typedef __sighandler_t *sighandler_t;
32 # define RIP_REGISTER(v) (v).mc_rip
33 #else
34 # define RIP_REGISTER(v) (v).gregs[REG_RIP]
35 #endif
37 #include <boost/bind.hpp>
38 #include <boost/optional.hpp>
39 #include <boost/utility/typed_in_place_factory.hpp>
40 #include <boost/scoped_ptr.hpp>
42 #include <util/pathtrack.h>
43 #include <util/trace.h>
44 #include <util/bitops.h>
45 #include <util/debug.h>
46 #include <util/ringbuffer.h>
47 #include <util/rank.h>
48 #include <util/timer.h>
49 #include <util/maphuge.h>
51 #include <runtime/base/tv_macros.h>
52 #include <runtime/vm/bytecode.h>
53 #include <runtime/vm/php_debug.h>
54 #include <runtime/vm/runtime.h>
55 #include <runtime/base/complex_types.h>
56 #include <runtime/base/execution_context.h>
57 #include <runtime/base/strings.h>
58 #include <runtime/base/zend/zend_string.h>
59 #include <runtime/base/runtime_option.h>
60 #include <runtime/base/server/source_root_info.h>
61 #include <runtime/ext/ext_continuation.h>
62 #include <runtime/vm/debug/debug.h>
63 #include <runtime/vm/translator/targetcache.h>
64 #include <runtime/vm/translator/log.h>
65 #include <runtime/vm/translator/translator-deps.h>
66 #include <runtime/vm/translator/translator-inline.h>
67 #include <runtime/vm/translator/translator-x64.h>
68 #include <runtime/vm/translator/asm-x64.h>
69 #include <runtime/vm/translator/srcdb.h>
70 #include <runtime/vm/translator/x64-util.h>
71 #include <runtime/vm/translator/unwind-x64.h>
72 #include <runtime/vm/pendq.h>
73 #include <runtime/vm/treadmill.h>
74 #include <runtime/vm/stats.h>
75 #include <runtime/vm/pendq.h>
76 #include <runtime/vm/treadmill.h>
77 #include <runtime/vm/repo.h>
78 #include <runtime/vm/type-profile.h>
79 #include <runtime/vm/member_operations.h>
80 #include <runtime/vm/translator/abi-x64.h>
81 #include <runtime/eval/runtime/file_repository.h>
82 #include <runtime/vm/translator/hopt/ir.h>
83 #include <runtime/vm/translator/hopt/linearscan.h>
84 #include <runtime/vm/translator/hopt/opt.h>
85 #include <runtime/vm/translator/hopt/codegen.h>
87 #include <runtime/vm/translator/translator-x64-internal.h>
89 namespace HPHP {
90 namespace VM {
91 namespace Transl {
93 using namespace reg;
94 using namespace Util;
95 using namespace Trace;
96 using std::max;
98 #define TRANS_PERF_COUNTERS \
99 TPC(translate) \
100 TPC(retranslate) \
101 TPC(interp_bb) \
102 TPC(interp_instr) \
103 TPC(interp_one) \
104 TPC(max_trans) \
105 TPC(enter_tc) \
106 TPC(service_req)
108 #define TPC(n) "trans_" #n,
109 static const char* const kPerfCounterNames[] = {
110 TRANS_PERF_COUNTERS
112 #undef TPC
114 #define TPC(n) tpc_ ## n,
115 enum TransPerfCounter {
116 TRANS_PERF_COUNTERS
117 tpc_num_counters
119 #undef TPC
120 static __thread int64 s_perfCounters[tpc_num_counters];
121 #define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
123 #define NULLCASE() \
124 case KindOfUninit: case KindOfNull
126 #define STRINGCASE() \
127 case BitwiseKindOfString: case KindOfStaticString
129 // nextTx64: Global shared state. The tx64 that should be used for
130 // new requests going forward.
131 TranslatorX64* volatile nextTx64;
132 // tx64: Thread-local state. The tx64 we're using for the current request.
133 __thread TranslatorX64* tx64;
135 // Register dirtiness: thread-private.
136 __thread VMRegState tl_regState = REGSTATE_CLEAN;
138 __thread UnlikelyHitMap* tl_unlikelyHits = nullptr;
139 static StaticString s___call(LITSTR_INIT("__call"));
140 static StaticString s___callStatic(LITSTR_INIT("__callStatic"));
142 // Initialize at most this many locals inline in function body prologue; more
143 // than this, and emitting a loop is more compact. To be precise, the actual
144 // crossover point in terms of code size is 6; 9 was determined by experiment to
145 // be the optimal point in certain benchmarks. #microoptimization
146 static const int kLocalsToInitializeInline = 9;
148 // An intentionally funny-looking-in-core-dumps constant for uninitialized
149 // instruction pointers.
150 static const uint64_t kUninitializedRIP = 0xba5eba11acc01ade;
152 static int
153 localOffset(int loc) {
154 PhysReg base;
155 int offset;
156 locToRegDisp(Location(Location::Local, loc), &base, &offset);
157 ASSERT(base == rVmFp);
158 return offset;
161 // Return the SrcKey for the operation that should follow the supplied
162 // NormalizedInstruction. (This might not be the next SrcKey in the
163 // unit if we merged some instructions or otherwise modified them
164 // during analysis.)
165 SrcKey nextSrcKey(const Tracelet& t, const NormalizedInstruction& i) {
166 return i.next ? i.next->source : t.m_nextSk;
169 // JccBlock --
170 // A raw condition-code block; assumes whatever comparison or ALU op
171 // that sets the Jcc has already executed.
172 template <int Jcc>
173 struct JccBlock {
174 mutable X64Assembler* m_a;
175 TCA m_jcc8;
176 mutable DiamondGuard* m_dg;
178 explicit JccBlock(X64Assembler& a)
179 : m_a(&a),
180 m_jcc8(a.code.frontier),
181 m_dg(new DiamondGuard(a)) {
182 a. jcc8(Jcc, m_a->code.frontier);
185 ~JccBlock() {
186 if (m_a) {
187 delete m_dg;
188 m_a->patchJcc8(m_jcc8, m_a->code.frontier);
192 private:
193 JccBlock(const JccBlock&);
194 JccBlock& operator=(const JccBlock&);
197 // IfElseBlock: like CondBlock, but with an else clause.
198 // a. test_reg_reg(rax, rax);
199 // { IfElseBlock<CC_Z> ifRax(a);
200 // // Code executed for rax != 0
201 // ifRax.Else();
202 // // Code executed for rax == 0
203 // }
205 template <int Jcc>
206 class IfElseBlock : boost::noncopyable {
207 X64Assembler& m_a;
208 TCA m_jcc8;
209 TCA m_jmp8;
210 public:
211 explicit IfElseBlock(X64Assembler& a) :
212 m_a(a), m_jcc8(a.code.frontier), m_jmp8(NULL) {
213 tx64->m_regMap.freeze();
214 m_a.jcc8(Jcc, m_a.code.frontier); // 1f
216 void Else() {
217 ASSERT(m_jmp8 == NULL);
218 m_jmp8 = m_a.code.frontier;
219 m_a.jmp8(m_jmp8); // 2f
220 // 1:
221 m_a.patchJcc8(m_jcc8, m_a.code.frontier);
223 ~IfElseBlock() {
224 ASSERT(m_jmp8 != NULL);
225 // 2:
226 m_a.patchJmp8(m_jmp8, m_a.code.frontier);
227 tx64->m_regMap.defrost();
231 static bool
232 typeCanBeStatic(DataType t) {
233 return t != KindOfObject && t != KindOfRef;
236 // IfCountNotStatic --
237 // Emits if (%reg->_count != RefCountStaticValue) { ... }.
238 // May short-circuit this check if the type is known to be
239 // static already.
240 struct IfCountNotStatic {
241 typedef CondBlock<TVOFF(_count),
242 RefCountStaticValue,
243 CC_Z> NonStaticCondBlock;
244 NonStaticCondBlock *m_cb; // might be null
245 IfCountNotStatic(X64Assembler& a,
246 PhysReg reg,
247 DataType t = KindOfInvalid) {
248 // Objects and variants cannot be static
249 if (typeCanBeStatic(t)) {
250 m_cb = new NonStaticCondBlock(a, reg);
251 } else {
252 m_cb = NULL;
256 ~IfCountNotStatic() {
257 delete m_cb;
261 // Segfault handler: figure out if it's an intentional segfault
262 // (timeout exception) and if so, act appropriately. Otherwise, pass
263 // the signal on.
264 void TranslatorX64::SEGVHandler(int signum, siginfo_t *info, void *ctx) {
265 TranslatorX64 *self = Get();
266 void *surprisePage =
267 ThreadInfo::s_threadInfo->m_reqInjectionData.surprisePage;
268 if (info->si_addr == surprisePage) {
269 ucontext_t *ucontext = (ucontext_t*)ctx;
270 TCA rip = (TCA)RIP_REGISTER(ucontext->uc_mcontext);
271 SignalStubMap::const_accessor a;
272 if (!self->m_segvStubs.find(a, rip)) {
273 NOT_REACHED();
275 TCA astubsCall = a->second;
277 // When this handler returns, "call" the astubs code for this
278 // surprise check.
279 RIP_REGISTER(ucontext->uc_mcontext) = (uintptr_t)astubsCall;
281 // We've processed this event; reset the page in case execution
282 // continues normally.
283 g_vmContext->m_stack.unprotect();
284 } else {
285 sighandler_t handler = (sighandler_t)self->m_segvChain;
286 if (handler == SIG_DFL || handler == SIG_IGN) {
287 signal(signum, handler);
288 raise(signum);
289 } else {
290 self->m_segvChain(signum, info, ctx);
296 * Copy a heap cell from memory to the stack.
298 * Use emitCopyToStack when you can safely change the state of the
299 * register map. When using emitCopyToStackRegSafe, you'll need to
300 * invalidate the stack location manually at an appropriate time.
303 void
304 TranslatorX64::emitCopyToStackRegSafe(X64Assembler& a,
305 const NormalizedInstruction& ni,
306 PhysReg src,
307 int off,
308 PhysReg tmpReg) {
309 ASSERT(off % sizeof(Cell) == 0);
310 emitCopyTo(a, src, 0, rVmSp, vstackOffset(ni, off), tmpReg);
313 void
314 TranslatorX64::emitCopyToStack(X64Assembler& a,
315 const NormalizedInstruction& ni,
316 PhysReg src,
317 int off) {
318 ScratchReg scratch(m_regMap);
320 FreezeRegs freeze(m_regMap);
321 emitCopyToStackRegSafe(a, ni, src, off, *scratch);
323 // Forget whatever we thought we knew about the stack.
324 m_regMap.invalidate(ni.outStack->location);
328 * Emit code that does the same thing as tvSet().
330 * The `oldType' and `oldData' registers are used for temporary
331 * storage and unconditionally destroyed.
332 * `toPtr' will be destroyed iff the cell we're storing to is
333 * KindOfRef.
334 * The variant check will not be performed if toOffset is nonzero, so
335 * only pass a nonzero offset if you know the destination is not
336 * KindOfRef.
337 * `from' will not be modified.
339 void TranslatorX64::emitTvSetRegSafe(const NormalizedInstruction& i,
340 PhysReg from,
341 DataType fromType,
342 PhysReg toPtr,
343 int toOffset,
344 PhysReg oldType,
345 PhysReg oldData,
346 bool incRefFrom) {
347 ASSERT(!i.isNative());
348 ASSERT(!i.isSimple());
349 ASSERT(fromType != KindOfRef);
351 if (toOffset == 0) {
352 emitDerefIfVariant(a, toPtr);
354 a. load_reg64_disp_reg32(toPtr, toOffset + TVOFF(m_type), oldType);
355 a. load_reg64_disp_reg64(toPtr, toOffset + TVOFF(m_data), oldData);
356 emitStoreTypedValue(a, fromType, from, toOffset, toPtr);
357 if (incRefFrom) {
358 emitIncRef(from, fromType);
360 emitDecRefGenericReg(oldData, oldType);
363 void TranslatorX64::emitTvSet(const NormalizedInstruction& i,
364 PhysReg from,
365 DataType fromType,
366 PhysReg toPtr,
367 int toOffset,
368 bool incRefFrom) {
369 ScratchReg oldType(m_regMap);
370 ScratchReg oldData(m_regMap);
371 emitTvSetRegSafe(i, from, fromType, toPtr, toOffset,
372 *oldType, *oldData, incRefFrom);
375 // Logical register move: ensures the value in src will be in dest
376 // after execution, but might do so in strange ways. Do not count on
377 // being able to smash dest to a different register in the future, e.g.
378 void
379 TranslatorX64::emitMovRegReg(X64Assembler& a, PhysReg src, PhysReg dest) {
380 SpaceRecorder("_RegMove", a);
381 if (src != dest) {
382 a. mov_reg64_reg64(src, dest);
386 void
387 TranslatorX64::emitMovRegReg(PhysReg src, PhysReg dest) {
388 emitMovRegReg(a, src, dest);
392 * emitPushAR --
394 * Push an activation record. Will return to the next instruction emitted by
395 * the invoker. Called on behalf of FPushFuncD and FPushFunc. If func is
396 * unknown, we will leave it to the caller to fill in m_func.
398 void
399 TranslatorX64::emitPushAR(const NormalizedInstruction& i, const Func* func,
400 const int bytesPopped /* = 0 */,
401 bool isCtor /* = false */,
402 bool clearThis /* = true */,
403 uintptr_t varEnvInvName /* = 0 */) {
404 if (func && phpBreakpointEnabled(func->name()->data())) {
405 translator_debug_break(a);
407 ASSERT(sizeof(Cell) < sizeof(ActRec));
408 // We are about to push an ActRec onto the stack. The stack grows down,
409 // so the offset of the beginning of the ActRec from the top of stack
410 // is -sizeof(ActRec).
411 int numArgs = i.imm[0].u_IVA;
412 int startOfActRec = bytesPopped - sizeof(ActRec);
413 size_t funcOff = startOfActRec + AROFF(m_func);
414 size_t thisOff = startOfActRec + AROFF(m_this);
415 size_t nargsOff = startOfActRec + AROFF(m_numArgsAndCtorFlag);
416 size_t varenvOff = startOfActRec + AROFF(m_varEnv);
417 size_t savedRbpOff = startOfActRec + AROFF(m_savedRbp);
419 BOOST_STATIC_ASSERT((
420 sizeof(((ActRec*)NULL)->m_numArgsAndCtorFlag) == sizeof(int32_t)
423 * rVmSp might not be up-to-date here, so we use emitVStackStore and
424 * emitVStackStoreImm which know how to compute the where the top of
425 * stack currently is.
427 if (func) {
428 emitVStackStoreImm(a, i, (uintptr_t)func, funcOff);
429 if (clearThis) {
430 emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
433 emitVStackStoreImm(a, i, ActRec::encodeNumArgs(numArgs, isCtor),
434 nargsOff, sz::dword);
435 emitVStackStoreImm(a, i, varEnvInvName, varenvOff, sz::qword, &m_regMap);
436 emitVStackStore(a, i, rVmFp, savedRbpOff, sz::qword);
439 void
440 TranslatorX64::emitCallSaveRegs() {
441 ASSERT(!m_regMap.frozen());
442 m_regMap.cleanRegs(kCallerSaved);
445 static void UNUSED tc_debug_print(const char* message,
446 uintptr_t r1,
447 uintptr_t r2,
448 uintptr_t r3,
449 ActRec* fp) {
450 TRACE(1, "*********************** %s: %p %p %p (for : %s)\n",
451 message, (void*)r1, (void*)r2, (void*)r3,
452 fp->m_func ? fp->m_func->fullName()->data() : "[?]");
455 // Utility for debugging translations that will print a message,
456 // followed by the value of up to three registers.
457 void TranslatorX64::emitDebugPrint(Asm& a,
458 const char* message,
459 PhysReg r1,
460 PhysReg r2,
461 PhysReg r3) {
462 boost::optional<PhysRegSaver> aSaver;
463 boost::optional<PhysRegSaverStub> astubsSaver;
465 if (&a == &this->a) {
466 aSaver = boost::in_place<PhysRegSaver>(boost::ref(a), kAllX64Regs);
467 } else {
468 astubsSaver = boost::in_place<PhysRegSaverStub>(boost::ref(a),
469 kAllX64Regs);
472 a. mov_imm64_reg (uintptr_t(message), argNumToRegName[0]);
473 a. mov_reg64_reg64(r1, argNumToRegName[1]);
474 a. mov_reg64_reg64(r2, argNumToRegName[2]);
475 a. mov_reg64_reg64(r3, argNumToRegName[3]);
476 a. mov_reg64_reg64(rVmFp, argNumToRegName[4]);
477 a. call((TCA)tc_debug_print);
480 void ArgManager::cleanLocs() {
481 for (size_t i = 0; i < m_args.size(); ++i) {
482 // We only need to clean locations we are passing the address of.
483 // (ArgLoc passes the value in the register mapped for a given
484 // location, not the address of the location itself, so it doesn't
485 // need cleaning here.)
486 if (m_args[i].m_kind != ArgContent::ArgLocAddr) continue;
487 m_tx64.m_regMap.cleanLoc(*m_args[i].m_loc);
491 void ArgManager::computeUsed(std::map<PhysReg, size_t> &used,
492 std::vector<PhysReg> &actual) {
493 size_t n = m_args.size();
494 for (size_t i = 0; i < n; i++) {
495 PhysReg reg = InvalidReg;
496 if (m_args[i].m_kind == ArgContent::ArgReg ||
497 m_args[i].m_kind == ArgContent::ArgRegPlus) {
498 reg = m_args[i].m_reg;
499 } else if (m_args[i].m_kind == ArgContent::ArgLoc ||
500 m_args[i].m_kind == ArgContent::ArgDeref) {
501 reg = m_tx64.getReg(*m_args[i].m_loc);
502 } else {
503 continue;
505 TRACE(6, "ArgManager: arg %zd incoming reg r%d\n", i, reg);
506 used[reg] = i;
507 actual[i] = reg;
511 void
512 TranslatorX64::emitRB(X64Assembler& a,
513 RingBufferType t,
514 SrcKey sk, RegSet toSave) {
515 if (!Trace::moduleEnabledRelease(Trace::tx64, 5)) {
516 return;
518 PhysRegSaver rs(a, toSave | kSpecialCrossTraceRegs);
519 int arg = 0;
520 emitImmReg(a, t, argNumToRegName[arg++]);
521 emitImmReg(a, sk.m_funcId, argNumToRegName[arg++]);
522 emitImmReg(a, sk.m_offset, argNumToRegName[arg++]);
523 a. call((TCA)ringbufferEntry);
526 void
527 TranslatorX64::emitRB(X64Assembler& a,
528 RingBufferType t,
529 const char* msg,
530 RegSet toSave) {
531 if (!Trace::moduleEnabledRelease(Trace::tx64, 5)) {
532 return;
534 PhysRegSaver save(a, toSave | kSpecialCrossTraceRegs);
535 int arg = 0;
536 emitImmReg(a, (uintptr_t)msg, argNumToRegName[arg++]);
537 emitImmReg(a, strlen(msg), argNumToRegName[arg++]);
538 emitImmReg(a, t, argNumToRegName[arg++]);
539 a. call((TCA)ringbufferMsg);
543 * allocate the input registers for i, trying to
544 * match inputs to call arguments.
545 * if args[j] == ArgDontAllocate, the input i.inputs[j] is skipped
546 * if args[j] == ArgAnyReg, it will be allocated as normal
547 * otherwise, args[j] should be a positional call argument,
548 * and allocInputsForCall will attempt to allocate it to
549 * argNumToRegName[args[j]].
551 void
552 TranslatorX64::allocInputsForCall(const NormalizedInstruction& i,
553 const int* args) {
554 RegSet blackList;
555 int arg;
557 * If any of the inputs is already in an argument
558 * register, blacklist it. ArgManager already takes
559 * care of shuffling registers efficiently
561 for (arg = i.inputs.size(); arg--; ) {
562 if (args[arg] != ArgDontAllocate &&
563 m_regMap.hasReg(i.inputs[arg]->location)) {
564 blackList |= RegSet(getReg(i.inputs[arg]->location));
567 bool hasAnyReg = false;
568 for (arg = i.inputs.size(); arg--; ) {
569 if (args[arg] != ArgAnyReg) {
570 if (args[arg] != ArgDontAllocate &&
571 !m_regMap.hasReg(i.inputs[arg]->location)) {
572 PhysReg target = argNumToRegName[args[arg]];
573 if (!blackList.contains(target)) {
574 m_regMap.cleanRegs(RegSet(target));
575 m_regMap.smashRegs(RegSet(target));
576 } else {
577 target = InvalidReg;
579 m_regMap.allocInputReg(i, arg, target);
581 } else {
582 hasAnyReg = true;
585 if (hasAnyReg) {
586 for (arg = i.inputs.size(); arg--; ) {
587 if (args[arg] == ArgAnyReg) {
588 m_regMap.allocInputReg(i, arg);
594 void ArgManager::shuffleRegisters(std::map<PhysReg, size_t> &used,
595 std::vector<PhysReg> &actual) {
596 size_t n = m_args.size();
597 for (size_t i = 0; i < n; i++) {
598 if (actual[i] == InvalidReg)
599 continue;
601 if (!mapContains(used, argNumToRegName[i])) {
602 // There's no conflict, so just copy
603 TRACE(6, "ArgManager: arg %zd reg available, copying from r%d to r%d\n",
604 i, actual[i], argNumToRegName[i]);
605 // Do copy and data structure update here, because this way
606 // we can reuse the register in actual[i] later without problems.
607 m_tx64.emitMovRegReg(m_a, actual[i], argNumToRegName[i]);
608 used.erase(actual[i]);
609 actual[i] = argNumToRegName[i];
610 } else {
611 size_t j = used[argNumToRegName[i]];
612 if (actual[j] != actual[i]) {
613 // The register is used by some other value, so we must swap the two
614 // registers.
615 ASSERT(j > i);
616 ASSERT(actual[j] != InvalidReg);
617 PhysReg ri = actual[i],
618 rj = actual[j];
619 TRACE(6, "ArgManager: arg %zd register used by arg %zd, "
620 "swapping r%d with r%d\n", i, j, ri, rj);
622 // Clean the registers first
623 RegSet regs = RegSet(ri) | RegSet(rj);
624 m_tx64.m_regMap.cleanRegs(regs);
626 // Emit the actual swap
627 m_tx64.m_regMap.swapRegisters(ri, rj);
628 m_a. xchg_reg64_reg64(ri, rj);
630 // Update the data structure for later steps
631 for (size_t k = 0; k < n; k++) {
632 if (actual[k] == ri) {
633 actual[k] = rj;
634 } else if (actual[k] == rj) {
635 actual[k] = ri;
638 used[ri] = j;
639 used[rj] = i;
645 void ArgManager::emitValues(std::vector<PhysReg> &actual) {
646 for (size_t i = 0; i < m_args.size(); i++) {
647 switch(m_args[i].m_kind) {
648 case ArgContent::ArgLoc:
649 case ArgContent::ArgDeref:
650 case ArgContent::ArgReg:
651 TRACE(6, "ArgManager: copying arg %zd from r%d to r%d\n",
652 i, actual[i], argNumToRegName[i]);
653 m_tx64.emitMovRegReg(m_a, actual[i], argNumToRegName[i]);
654 // Emit dereference if needed
655 if (m_args[i].m_kind == ArgContent::ArgDeref) {
656 emitDeref(m_a, argNumToRegName[i], argNumToRegName[i]);
658 break;
660 // For any of these cases, the register should already be available.
661 // If it was used previously by an input value, shuffleRegisters
662 // should have moved it to the proper register from argNumToRegName.
663 case ArgContent::ArgImm:
664 emitImmReg(m_a, m_args[i].m_imm, argNumToRegName[i]);
665 break;
667 case ArgContent::ArgRegPlus:
668 if (m_args[i].m_imm) {
669 m_a. add_imm32_reg64(m_args[i].m_imm, argNumToRegName[i]);
671 break;
673 case ArgContent::ArgLocAddr:
675 PhysReg base;
676 int disp;
677 locToRegDisp(*m_args[i].m_loc, &base, &disp);
678 m_a. lea_reg64_disp_reg64(base, disp, argNumToRegName[i]);
680 break;
682 default:
683 // Should never happen
684 ASSERT(false);
689 void
690 TranslatorX64::emitCall(X64Assembler& a, TCA dest, bool killRegs) {
691 if (a.jmpDeltaFits(dest) && !Stats::enabled()) {
692 a. call(dest);
693 } else {
694 a. call(getNativeTrampoline(dest));
696 if (killRegs) {
697 // All caller-saved regs are now suspect.
698 m_regMap.smashRegs(kCallerSaved);
702 void
703 TranslatorX64::recordSyncPoint(X64Assembler& a, Offset pcOff, Offset spOff) {
704 m_pendingFixups.push_back(PendingFixup(a.code.frontier,
705 Fixup(pcOff, spOff)));
708 void
709 TranslatorX64::recordCall(Asm& a, const NormalizedInstruction& i) {
710 recordCallImpl<false>(a, i);
713 void
714 TranslatorX64::recordCall(const NormalizedInstruction& i) {
715 recordCall(a, i);
718 template <bool reentrant>
719 void
720 TranslatorX64::recordCallImpl(X64Assembler& a,
721 const NormalizedInstruction& i,
722 bool advance /* = false */) {
723 SrcKey sk = i.source;
724 Offset stackOff = i.stackOff + (vmfp() - vmsp());
725 if (advance) {
726 sk.advance(curUnit());
727 stackOff += getStackDelta(i);
729 ASSERT(i.checkedInputs ||
730 (reentrant && !i.isSimple()) ||
731 (!reentrant && !i.isNative()));
732 Offset pcOff = sk.offset() - curFunc()->base();
733 SKTRACE(2, sk, "record%sCall pcOff %d\n",
734 reentrant ? "Reentrant" : "", int(pcOff));
735 recordSyncPoint(a, pcOff, stackOff);
736 SKTRACE(2, sk, "record%sCall stackOff %d\n",
737 reentrant ? "Reentrant" : "", int(stackOff));
740 * Right now we assume call sites that need to record sync points
741 * may also throw exceptions. We record information about dirty
742 * callee-saved registers so we can spill their contents during
743 * unwinding. See unwind-x64.cpp.
745 if (!m_pendingUnwindRegInfo.empty()) {
746 if (Trace::moduleLevel(Trace::tunwind) >= 2) {
747 sk.trace("recordCallImpl has dirty callee-saved regs\n");
748 TRACE_MOD(Trace::tunwind, 2,
749 "CTCA: %p saving dirty callee regs:\n",
750 a.code.frontier);
751 for (int i = 0; i < UnwindRegInfo::kMaxCalleeSaved; ++i) {
752 if (m_pendingUnwindRegInfo.m_regs[i].dirty) {
753 TRACE_MOD(Trace::tunwind, 2, " %s\n",
754 m_pendingUnwindRegInfo.m_regs[i].pretty().c_str());
758 m_unwindRegMap.insert(a.code.frontier, m_pendingUnwindRegInfo);
759 m_pendingUnwindRegInfo.clear();
763 void TranslatorX64::prepareCallSaveRegs() {
764 emitCallSaveRegs(); // Clean caller-saved regs.
765 m_pendingUnwindRegInfo.clear();
767 RegSet rset = kCalleeSaved;
768 PhysReg reg;
769 while (rset.findFirst(reg)) {
770 rset.remove(reg);
771 if (!m_regMap.regIsDirty(reg)) continue;
772 const RegInfo* ri = m_regMap.getInfo(reg);
773 ASSERT(ri->m_cont.m_kind == RegContent::Loc);
775 // If the register is dirty, we'll record this so that we can
776 // restore it during stack unwinding if an exception is thrown.
777 m_pendingUnwindRegInfo.add(reg, ri->m_type, ri->m_cont.m_loc);
781 void
782 TranslatorX64::emitIncRef(PhysReg base, DataType dtype) {
783 if (!IS_REFCOUNTED_TYPE(dtype) && dtype != KindOfInvalid) {
784 return;
786 ASSERT(m_regMap.getInfo(base));
787 SpaceRecorder sr("_IncRef", a);
788 ASSERT(sizeof(((Cell*)NULL)->_count == sizeof(int32_t)));
789 { // if !static then
790 IfCountNotStatic ins(a, base, dtype);
792 * The optimization guide cautions against using inc; while it is
793 * compact, it only writes the low-order 8 bits of eflags, causing a
794 * partial dependency for any downstream flags-dependent code.
796 a. add_imm32_disp_reg32(1, TVOFF(_count), base);
797 } // endif
800 void
801 TranslatorX64::emitIncRefGenericRegSafe(PhysReg base,
802 int disp,
803 PhysReg tmpReg) {
804 ASSERT(m_regMap.getInfo(base));
805 { // if RC
806 IfRefCounted irc(a, base, disp);
807 a. load_reg64_disp_reg64(base, disp + TVOFF(m_data),
808 tmpReg);
809 { // if !static
810 IfCountNotStatic ins(a, tmpReg);
811 a. add_imm32_disp_reg32(1, TVOFF(_count), tmpReg);
812 } // endif
813 } // endif
816 void TranslatorX64::emitIncRefGeneric(PhysReg base, int disp) {
817 ScratchReg tmpReg(m_regMap);
818 emitIncRefGenericRegSafe(base, disp, *tmpReg);
821 static void emitGetGContext(X64Assembler& a, PhysReg dest) {
822 emitTLSLoad<ExecutionContext>(a, g_context, dest);
825 // emitEagerVMRegSave --
826 // Inline. Saves regs in-place in the TC. This is an unusual need;
827 // you probably want to lazily save these regs via recordCall and
828 // its ilk.
830 // SaveFP uses rVmFp, as usual. SavePC requires the caller to have
831 // placed the PC offset of the instruction about to be executed in
832 // rdi.
833 enum RegSaveFlags {
834 SaveFP = 1,
835 SavePC = 2
838 static TCA
839 emitEagerVMRegSave(X64Assembler& a,
840 int flags /* :: RegSaveFlags */) {
841 TCA start = a.code.frontier;
842 bool saveFP = bool(flags & SaveFP);
843 bool savePC = bool(flags & SavePC);
844 ASSERT((flags & ~(SavePC | SaveFP)) == 0);
846 PhysReg pcReg = rdi;
847 PhysReg rEC = rScratch;
848 ASSERT(!kSpecialCrossTraceRegs.contains(rdi));
850 emitGetGContext(a, rEC);
852 static COff spOff = offsetof(VMExecutionContext, m_stack) +
853 Stack::topOfStackOffset();
854 static COff fpOff = offsetof(VMExecutionContext, m_fp) - spOff;
855 static COff pcOff = offsetof(VMExecutionContext, m_pc) - spOff;
857 ASSERT(spOff != 0);
858 // Instruction selection note: this is an lea, but add is more
859 // compact and we can afford the flags bash.
860 a. add_imm32_reg64(spOff, rEC);
861 a. store_reg64_disp_reg64 (rVmSp, 0, rEC);
862 if (savePC) {
863 // We're going to temporarily abuse rVmSp to hold the current unit.
864 PhysReg rBC = rVmSp;
865 a. pushr(rBC);
866 // m_fp -> m_func -> m_unit -> m_bc + pcReg
867 a. load_reg64_disp_reg64(rVmFp, AROFF(m_func), rBC);
868 a. load_reg64_disp_reg64(rBC, Func::unitOff(), rBC);
869 a. load_reg64_disp_reg64(rBC, Unit::bcOff(), rBC);
870 a. add_reg64_reg64(rBC, pcReg);
871 a. store_reg64_disp_reg64(pcReg, pcOff, rEC);
872 a. popr(rBC);
874 if (saveFP) {
875 a. store_reg64_disp_reg64 (rVmFp, fpOff, rEC);
877 return start;
881 * emitDecRef --
883 * Decrement a value's refcount and call the release helper if
884 * appropriate. emitDecRef requires that the caller knows the
885 * type at translation time.
887 void TranslatorX64::emitDecRef(Asm& a,
888 const NormalizedInstruction& i,
889 PhysReg rDatum,
890 DataType type) {
891 ASSERT(type != KindOfInvalid);
892 if (!IS_REFCOUNTED_TYPE(type)) {
893 return;
896 ASSERT(!i.isNative());
897 ASSERT(!i.isSimple() || !typeReentersOnRelease(type));
898 SpaceRecorder sr("_DecRef", a);
899 { // if !static
900 IfCountNotStatic ins(a, rDatum, type);
901 a. sub_imm32_disp_reg32(1, TVOFF(_count), rDatum);
903 ASSERT(type >= 0 && type < MaxNumDataTypes);
904 if (&a == &this->astubs) {
905 JccBlock<CC_NZ> ifZero(a);
906 callUnaryStub(a, i, m_dtorStubs[type], rDatum);
907 } else {
908 UnlikelyIfBlock<CC_Z> ifZero(this->a, astubs);
909 callUnaryStub(astubs, i, m_dtorStubs[type], rDatum);
911 } // endif
914 void TranslatorX64::emitDecRef(const NormalizedInstruction& i,
915 PhysReg rDatum,
916 DataType type) {
917 emitDecRef(a, i, rDatum, type);
920 void TranslatorX64::emitDecRefInput(Asm& a,
921 const NormalizedInstruction& i,
922 int input) {
923 DynLocation* value = i.inputs[input];
924 if (IS_REFCOUNTED_TYPE(value->outerType())) {
925 m_regMap.allocInputReg(i, input);
926 PhysReg rValue = getReg(value->location);
927 emitDecRef(a, i, rValue, value->outerType());
932 * emitDecRefGeneric --
934 * Decrement a value's refcount and call the release helper if
935 * appropriate. emitDecRefGeneric should only be used when the type
936 * is not known at translation time.
938 * emitDecRefGeneric operates on the memory location given by
939 * srcReg+disp, so the caller is responsible for ensuring that the
940 * memory location is up to date and not enregistered.
942 void TranslatorX64::emitDecRefGeneric(const NormalizedInstruction& i,
943 PhysReg srcReg, int disp /* = 0 */) {
944 SpaceRecorder sr("_DecRefGeneric", a);
946 * The full, inlined generic dec ref looks like:
948 * TypedValue* d = srcReg + disp;
949 * if (IS_REFCOUNTED_TYPE(d->m_type) && // a)
950 * d->_count != kStaticCount && // b)
951 * d->_count-- == 0) && // c)
952 * GenericDestroy(d); // d)
954 * We originally inlined *all* of a-d, and have experimented with sharing
955 * them all, too. At this writing (05-12-2012), inlining a) and outlining
956 * b-d seems to strike the right balance between compactness and not
957 * doing too much work in the common case where it is not refcounted.
960 IfRefCounted irc(a, srcReg, disp);
961 callUnaryReentrantStub(a, i, m_dtorGenericStub, srcReg, disp);
965 // Same as emitDecRefGeneric, except for when we have the type in a
966 // register as well. Same inlining/outlining choices as
967 // emitDecRefGeneric above.
968 void TranslatorX64::emitDecRefGenericReg(PhysReg rData, PhysReg rType) {
969 SpaceRecorder sr("_DecRefGeneric", a);
970 a. cmp_imm32_reg32(KindOfRefCountThreshold, rType);
972 JccBlock<CC_BE> ifRefCounted(a);
973 callBinaryStub(a, *m_curNI, m_dtorGenericStubRegs, rData, rType);
978 * genericRefCountStub --
980 * Shared code to decRef the TypedValue* of unknown, but refcounted, type
981 * in rdi. Tightly coupled with emitDecRefGeneric.
983 TCA TranslatorX64::genericRefCountStub(X64Assembler& a) {
984 moveToAlign(a);
985 FreezeRegs brr(m_regMap);
986 TCA retval = a.code.frontier;
988 // Note we make a real frame here: this is necessary so that the
989 // fixup map can chase back to the caller of this stub if it needs
990 // to sync regs.
991 a. pushr(rbp); // {
992 a. mov_reg64_reg64(rsp, rbp);
994 PhysRegSaverStub prs(a, RegSet(rsi));
995 // We already know the type was refcounted if we got here.
996 a. load_reg64_disp_reg64(rdi, TVOFF(m_data), rsi);
997 { // if !static
998 IfCountNotStatic ins(a, rsi, KindOfInvalid);
999 a. sub_imm32_disp_reg32(1, TVOFF(_count), rsi);
1000 { // if zero
1001 JccBlock<CC_NZ> ifZero(a);
1002 RegSet s = kCallerSaved - (RegSet(rdi) | RegSet(rsi));
1003 PhysRegSaver prs(a, s);
1004 a.call(TCA(tv_release_generic));
1005 } // endif
1006 } // endif
1008 a. popr(rbp); // }
1009 a. ret();
1010 return retval;
1013 TCA TranslatorX64::genericRefCountStubRegs(X64Assembler& a) {
1014 const PhysReg rData = argNumToRegName[0];
1015 const PhysReg rType = argNumToRegName[1];
1017 moveToAlign(a);
1018 TCA retval = a.code.frontier;
1019 FreezeRegs brr(m_regMap);
1021 // The frame here is needed for the same reason as in
1022 // genericRefCountStub.
1023 a. pushr(rbp); // {
1024 a. mov_reg64_reg64(rsp, rbp);
1026 IfCountNotStatic ins(a, rData, KindOfInvalid);
1027 a. sub_imm32_disp_reg32(1, TVOFF(_count), rData);
1029 JccBlock<CC_NZ> ifZero(a);
1030 // The arguments are already in the right registers.
1031 RegSet s = kCallerSaved - (RegSet(rData) | RegSet(rType));
1032 PhysRegSaverParity<1> saver(a, s);
1033 if (false) { // typecheck
1034 RefData* vp = NULL; DataType dt = KindOfUninit;
1035 (void)tv_release_typed(vp, dt);
1037 a.call(TCA(tv_release_typed));
1040 a. popr(rbp); // }
1041 a. ret();
1042 return retval;
1046 * Translation call targets. It is a lot easier, and a bit more
1047 * portable, to use C linkage from assembly.
1049 TCA TranslatorX64::retranslate(SrcKey sk, bool align, bool useHHIR) {
1050 if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
1051 // We are about to translate something known to be blacklisted by
1052 // debugger, exit early
1053 SKTRACE(1, sk, "retranslate abort due to debugger\n");
1054 return NULL;
1056 LeaseHolder writer(s_writeLease);
1057 if (!writer) return NULL;
1058 SKTRACE(1, sk, "retranslate\n");
1059 return translate(&sk, align, useHHIR);
1062 // Only use comes from HHIR's cgExitTrace() case TraceExitType::SlowNoProgress
1063 TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk,
1064 bool align,
1065 TCA toSmash) {
1066 if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
1067 // We are about to translate something known to be blacklisted by
1068 // debugger, exit early
1069 SKTRACE(1, sk, "retranslateAndPatchNoIR abort due to debugger\n");
1070 return NULL;
1072 LeaseHolder writer(s_writeLease);
1073 if (!writer) return NULL;
1074 SKTRACE(1, sk, "retranslateAndPatchNoIR\n");
1075 SrcRec* srcRec = getSrcRec(sk);
1076 if (srcRec->translations().size() == SrcRec::kMaxTranslations + 1) {
1077 // we've gone over the translation limit and already have an anchor
1078 // translation that will interpret, so just return NULL and force
1079 // interpretation of this BB.
1080 return NULL;
1082 TCA start = translate(&sk, align, false);
1083 if (start != NULL) {
1084 smash(getAsmFor(toSmash), toSmash, start);
1086 return start;
1090 * Satisfy an alignment constraint. If we're in a reachable section
1091 * of code, bridge the gap with nops. Otherwise, int3's.
1093 void
1094 TranslatorX64::moveToAlign(X64Assembler &aa,
1095 const size_t align /* =kJmpTargetAlign */,
1096 bool unreachable /* =true */) {
1097 using namespace HPHP::Util;
1098 SpaceRecorder sr("_Align", aa);
1099 ASSERT(isPowerOfTwo(align));
1100 size_t leftInBlock = align - ((align - 1) & uintptr_t(aa.code.frontier));
1101 if (leftInBlock == align) return;
1102 if (unreachable) {
1103 if (leftInBlock > 2) {
1104 aa.ud2();
1105 leftInBlock -= 2;
1107 if (leftInBlock > 0) {
1108 aa.emitInt3s(leftInBlock);
1110 return;
1112 aa.emitNop(leftInBlock);
1116 * Req machinery. We sometimes emit code that is unable to proceed
1117 * without translator assistance; e.g., a basic block whose successor is
1118 * unknown. We leave one of these request arg blobs in m_data, and point
1119 * to it at callout-time.
1122 // REQ_BIND_CALL
1123 struct ReqBindCall {
1124 SrcKey m_sourceInstr;
1125 TCA m_toSmash;
1126 int m_nArgs;
1127 bool m_isImmutable; // call was to known func.
1128 } m_bindCall;
1130 // ID to name mapping for tracing.
1131 static inline const char*
1132 reqName(int req) {
1133 static const char* reqNames[] = {
1134 #define REQ(nm) #nm,
1135 SERVICE_REQUESTS
1136 #undef REQ
1138 return reqNames[req];
1142 * Find or create a translation for sk. Returns TCA of "best" current
1143 * translation. May return NULL if it is currently impossible to create
1144 * a translation.
1147 TranslatorX64::getTranslation(const SrcKey *sk, bool align,
1148 bool forceNoHHIR /* = false */) {
1149 curFunc()->validate();
1150 SKTRACE(2, *sk, "getTranslation: curUnit %s funcId %llx offset %d\n",
1151 curUnit()->filepath()->data(),
1152 sk->m_funcId,
1153 sk->offset());
1154 SKTRACE(2, *sk, " funcId: %llx\n",
1155 curFunc()->getFuncId());
1157 if (curFrame()->hasVarEnv() && curFrame()->getVarEnv()->isGlobalScope()) {
1158 SKTRACE(2, *sk, "punting on pseudoMain\n");
1159 return NULL;
1161 if (const SrcRec* sr = m_srcDB.find(*sk)) {
1162 TCA tca = sr->getTopTranslation();
1163 if (tca) {
1164 SKTRACE(2, *sk, "getTranslation: found %p\n", tca);
1165 return tca;
1171 * Try to become the writer. We delay this until we *know* we will have
1172 * a need to create new translations, instead of just trying to win the
1173 * lottery at the dawn of time. Hopefully lots of requests won't require
1174 * any new translation.
1176 LeaseHolder writer(s_writeLease);
1177 if (!writer) return NULL;
1178 if (SrcRec* sr = m_srcDB.find(*sk)) {
1179 TCA tca = sr->getTopTranslation();
1180 if (tca) {
1181 // Handle extremely unlikely race; someone may have just already
1182 // added the first instance of this SrcRec while we did a
1183 // non-blocking wait on the write lease.
1184 return tca;
1185 } else {
1186 // Since we are holding the write lease, we know that sk is properly
1187 // initialized, except that it has no translations (due to
1188 // replaceOldTranslations)
1189 return retranslate(*sk, align,
1190 RuntimeOption::EvalJitUseIR && !forceNoHHIR);
1194 // We put retranslate requests at the end of our slab to more frequently
1195 // allow conditional jump fall-throughs
1197 TCA start = emitServiceReq(false, REQ_RETRANSLATE, 1, uint64_t(sk->offset()));
1198 SKTRACE(1, *sk, "inserting anchor translation for (%p,%d) at %p\n",
1199 curUnit(), sk->offset(), start);
1200 SrcRec* sr = m_srcDB.insert(*sk);
1201 sr->setFuncInfo(curFunc());
1202 sr->setAnchorTranslation(start);
1204 addTranslation(TransRec(*sk, curUnit()->md5(), TransAnchor, 0, 0, start,
1205 astubs.code.frontier - start));
1207 ASSERT(getTransRec(start)->kind == TransAnchor);
1209 return retranslate(*sk, align, RuntimeOption::EvalJitUseIR && !forceNoHHIR);
1213 TranslatorX64::translate(const SrcKey *sk, bool align, bool useHHIR) {
1214 INC_TPC(translate);
1215 ASSERT(vmfp() >= vmsp());
1216 ASSERT(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
1217 ASSERT(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
1219 if (useHHIR) {
1220 if (m_numHHIRTrans == RuntimeOption::EvalMaxHHIRTrans) {
1221 useHHIR = false;
1222 m_useHHIR = false;
1223 RuntimeOption::EvalJitUseIR = false;
1224 } else {
1225 hhirTraceStart(sk->offset());
1227 } else {
1228 ASSERT(m_useHHIR == false);
1231 Tracelet tlet;
1232 analyze(sk, tlet);
1234 if (align) {
1235 moveToAlign(a, kNonFallthroughAlign);
1238 TCA start = a.code.frontier;
1239 translateTracelet(tlet);
1240 SKTRACE(1, *sk, "translate moved head from %p to %p\n",
1241 getTopTranslation(*sk), start);
1242 if (Trace::moduleEnabledRelease(tcdump, 1)) {
1243 static __thread int n;
1244 if (++n % 10000 == 0) {
1245 std::ofstream f("cfg.dot", std::ios_base::trunc);
1246 drawCFG(f);
1247 f.close();
1250 return start;
1254 * Returns true if a's current frontier can have an nBytes-long
1255 * instruction written without any risk of cache-tearing.
1257 bool
1258 TranslatorX64::isSmashable(X64Assembler& a, int nBytes) {
1259 ASSERT(nBytes <= int(kX64CacheLineSize));
1260 uintptr_t iFrontier = uintptr_t(a.code.frontier);
1261 uintptr_t lastByte = iFrontier + nBytes - 1;
1262 return (iFrontier & ~kX64CacheLineMask) == (lastByte & ~kX64CacheLineMask);
1265 void
1266 TranslatorX64::prepareForSmash(X64Assembler& a, int nBytes) {
1267 if (!isSmashable(a, nBytes)) {
1268 moveToAlign(a, kX64CacheLineSize, false);
1270 ASSERT(isSmashable(a, nBytes));
1273 void
1274 TranslatorX64::prepareForSmash(int nBytes) {
1275 prepareForSmash(a, nBytes);
1278 void
1279 TranslatorX64::smash(X64Assembler &a, TCA src, TCA dest) {
1280 ASSERT(canWrite());
1281 TRACE(2, "smash: %p -> %p\n", src, dest);
1285 * We are about to smash reachable code in the translation cache. A
1286 * hardware thread might be executing the very instruction we're
1287 * modifying. This is safe because:
1289 * 1. We align smashable instructions so that they reside on a single
1290 * cache line;
1292 * 2. We modify the instruction with a single processor store; and
1294 * 3. The smashed region contains only a single instruction in the
1295 * orignal instruction stream (see jmp() -> emitJ32() -> bytes() in
1296 * the assembler.
1298 CodeCursor cg(a, src);
1299 ASSERT(isSmashable(a, kJmpLen));
1300 if (dest > src && dest - src <= 7) {
1301 a. emitNop(dest - src);
1302 } else {
1303 a. jmp(dest);
1307 void TranslatorX64::protectCode() {
1308 mprotect(tx64->a.code.base, tx64->a.code.size, PROT_READ | PROT_EXEC);
1312 void TranslatorX64::unprotectCode() {
1313 mprotect(tx64->a.code.base, tx64->a.code.size,
1314 PROT_READ | PROT_WRITE | PROT_EXEC);
1317 void
1318 TranslatorX64::emitStackCheck(int funcDepth, Offset pc) {
1319 uint64_t stackMask = cellsToBytes(RuntimeOption::EvalVMStackElms) - 1;
1320 a. mov_reg64_reg64(rVmSp, rScratch); // copy to destroy
1321 a. and_imm64_reg64(stackMask, rScratch);
1322 a. sub_imm64_reg64(funcDepth + Stack::sSurprisePageSize, rScratch);
1323 ASSERT(m_stackOverflowHelper);
1324 a. jl(m_stackOverflowHelper); // Unlikely branch to failure.
1325 // Success.
1328 // Tests the surprise flags for the current thread. Should be used
1329 // before a jnz to surprise handling code.
1330 void
1331 TranslatorX64::emitTestSurpriseFlags() {
1332 CT_ASSERT(sizeof(((RequestInjectionData*)0)->conditionFlags) == 8);
1333 a.test_imm64_disp_reg64(-1, TargetCache::kConditionFlagsOff, rVmTl);
1336 void
1337 TranslatorX64::emitCheckSurpriseFlagsEnter(bool inTracelet, Offset pcOff,
1338 Offset stackOff) {
1339 emitTestSurpriseFlags();
1341 UnlikelyIfBlock<CC_NZ> ifTracer(a, astubs);
1342 if (false) { // typecheck
1343 const ActRec* ar = NULL;
1344 EventHook::FunctionEnter(ar, 0);
1346 astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
1347 CT_ASSERT(EventHook::NormalFunc == 0);
1348 astubs.xor_reg32_reg32(argNumToRegName[1], argNumToRegName[1]);
1349 emitCall(astubs, (TCA)&EventHook::FunctionEnter);
1350 if (inTracelet) {
1351 recordSyncPoint(astubs, pcOff, stackOff);
1352 } else {
1353 // If we're being called while generating a func prologue, we
1354 // have to record the fixup directly in the fixup map instead of
1355 // going through m_pendingFixups like normal.
1356 m_fixupMap.recordFixup(astubs.code.frontier, Fixup(pcOff, stackOff));
1361 void
1362 TranslatorX64::setArgInActRec(ActRec* ar, int argNum, uint64_t datum,
1363 DataType t) {
1364 TypedValue* tv =
1365 (TypedValue*)(uintptr_t(ar) - (argNum+1) * sizeof(TypedValue));
1366 tv->m_data.num = datum;
1367 tv->m_type = t;
1371 TranslatorX64::shuffleArgsForMagicCall(ActRec* ar) {
1372 if (!ar->hasInvName()) {
1373 return 0;
1375 const Func* f UNUSED = ar->m_func;
1376 f->validate();
1377 ASSERT(f->name()->isame(s___call.get())
1378 || f->name()->isame(s___callStatic.get()));
1379 ASSERT(f->numParams() == 2);
1380 TRACE(1, "shuffleArgsForMagicCall: ar %p\n", ar);
1381 ASSERT(ar->hasInvName());
1382 StringData* invName = ar->getInvName();
1383 ASSERT(invName);
1384 ar->setVarEnv(NULL);
1385 int nargs = ar->numArgs();
1386 // We need to make an array containing all the arguments passed by the
1387 // caller and put it where the second argument is
1388 HphpArray* argArray = NEW(HphpArray)(nargs);
1389 argArray->incRefCount();
1390 for (int i = 0; i < nargs; ++i) {
1391 TypedValue* tv =
1392 (TypedValue*)(uintptr_t(ar) - (i+1) * sizeof(TypedValue));
1393 argArray->nvAppend(tv, false);
1394 tvRefcountedDecRef(tv);
1396 // Put invName in the slot for first argument
1397 setArgInActRec(ar, 0, uint64_t(invName), BitwiseKindOfString);
1398 // Put argArray in the slot for second argument
1399 setArgInActRec(ar, 1, uint64_t(argArray), KindOfArray);
1400 // Fix up ActRec's numArgs
1401 ar->initNumArgs(2);
1402 return 1;
1406 * The standard VMRegAnchor treatment won't work for some cases called
1407 * during function preludes.
1409 * The fp sync machinery is fundamentally based on the notion that
1410 * instruction pointers in the TC are uniquely associated with source
1411 * HHBC instructions, and that source HHBC instructions are in turn
1412 * uniquely associated with SP->FP deltas.
1414 * run_intercept_helper/trimExtraArgs is called from the prologue of
1415 * the callee. The prologue is 1) still in the caller frame for now,
1416 * and 2) shared across multiple call sites. 1 means that we have the
1417 * fp from the caller's frame, and 2 means that this fp is not enough
1418 * to figure out sp.
1420 * However, the prologue passes us the callee actRec, whose predecessor
1421 * has to be the caller. So we can sync sp and fp by ourselves here.
1422 * Geronimo!
1424 static void sync_regstate_to_caller(ActRec* preLive) {
1425 ASSERT(tl_regState == REGSTATE_DIRTY);
1426 vmfp() = (TypedValue*)preLive->m_savedRbp;
1427 vmsp() = (TypedValue*)preLive - preLive->numArgs();
1428 if (ActRec* fp = g_vmContext->m_fp) {
1429 if (fp->m_func && fp->m_func->unit()) {
1430 vmpc() = fp->m_func->unit()->at(fp->m_func->base() + preLive->m_soff);
1433 tl_regState = REGSTATE_CLEAN;
1436 static uint64 run_intercept_helper(ActRec* ar, Variant* ihandler) {
1437 sync_regstate_to_caller(ar);
1438 bool ret = run_intercept_handler<true>(ar, ihandler);
1440 * Restore tl_regState manually in the no-exception case only. (The
1441 * VM regs are clean here---we only need to set them dirty if we are
1442 * stopping to execute in the TC again, which we won't be doing if
1443 * an exception is propagating.)
1445 tl_regState = REGSTATE_DIRTY;
1446 return ret;
1449 void
1450 TranslatorX64::trimExtraArgs(ActRec* ar) {
1451 ASSERT(!ar->hasInvName());
1453 sync_regstate_to_caller(ar);
1454 const Func* f = ar->m_func;
1455 int numParams = f->numParams();
1456 int numArgs = ar->numArgs();
1457 ASSERT(numArgs > numParams);
1458 int numExtra = numArgs - numParams;
1460 TRACE(1, "trimExtraArgs: %d args, function %s takes only %d, ar %p\n",
1461 numArgs, f->name()->data(), numParams, ar);
1463 if (f->attrs() & AttrMayUseVV) {
1464 ASSERT(!ar->hasExtraArgs());
1465 ar->setExtraArgs(ExtraArgs::allocateCopy(
1466 (TypedValue*)(uintptr_t(ar) - numArgs * sizeof(TypedValue)),
1467 numArgs - numParams));
1468 } else {
1469 // Function is not marked as "MayUseVV", so discard the extra arguments
1470 TypedValue* tv = (TypedValue*)(uintptr_t(ar) - numArgs*sizeof(TypedValue));
1471 for (int i = 0; i < numExtra; ++i) {
1472 tvRefcountedDecRef(tv);
1473 ++tv;
1475 ar->setNumArgs(numParams);
1478 // Only go back to dirty in a non-exception case. (Same reason as
1479 // above.)
1480 tl_regState = REGSTATE_DIRTY;
1484 TranslatorX64::getInterceptHelper() {
1485 if (false) { // typecheck
1486 Variant *h = get_intercept_handler(CStrRef((StringData*)NULL),
1487 (char*)NULL);
1488 bool c UNUSED = run_intercept_helper((ActRec*)NULL, h);
1490 if (!m_interceptHelper) {
1491 m_interceptHelper = TCA(astubs.code.frontier);
1492 astubs. load_reg64_disp_reg64(rStashedAR, AROFF(m_func),
1493 rax);
1494 astubs. lea_reg64_disp_reg64(rax, Func::fullNameOff(),
1495 argNumToRegName[0]);
1497 astubs. lea_reg64_disp_reg64(rax, Func::maybeInterceptedOff(),
1498 argNumToRegName[1]);
1500 astubs. call(TCA(get_intercept_handler));
1501 astubs. test_reg64_reg64(rax, rax);
1503 JccBlock<CC_NZ> ifNotIntercepted(astubs);
1504 astubs. ret();
1507 // we might re-enter, so align the stack
1508 astubs. sub_imm32_reg64(8, rsp);
1509 // Copy the old rbp into the savedRbp pointer.
1510 astubs. store_reg64_disp_reg64(rbp, 0, rStashedAR);
1512 PhysReg rSavedRip = r13; // XXX ideally don't hardcode r13 ... but
1513 // we need callee-saved and don't have
1514 // any scratch ones.
1516 // Fish out the saved rip. We may need to jump there, and the helper will
1517 // have wiped out the ActRec.
1518 astubs. load_reg64_disp_reg64(rStashedAR, AROFF(m_savedRip),
1519 rSavedRip);
1520 astubs. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1521 astubs. mov_reg64_reg64(rax, argNumToRegName[1]);
1522 astubs. call(TCA(run_intercept_helper));
1524 // Normally we'd like to recordReentrantCall here, but the vmreg sync'ing
1525 // for run_intercept_handler is a special little snowflake. See
1526 // run_intercept_handler for details.
1527 astubs. test_reg64_reg64(rax, rax);
1529 // If the helper returned false, don't execute this function. The helper
1530 // will have cleaned up the interceptee's arguments and AR, and pushed
1531 // the handler's return value; we now need to get out.
1533 // We don't need to touch rVmFp; it's still pointing to the caller of
1534 // the interceptee. We need to adjust rVmSp. Then we need to jump to the
1535 // saved rip from the interceptee's ActRec.
1536 JccBlock<CC_NZ> ifDontEnterFunction(astubs);
1537 astubs. add_imm32_reg64(16, rsp);
1538 astubs. lea_reg64_disp_reg64(rStashedAR, AROFF(m_r), rVmSp);
1539 astubs. jmp_reg(rSavedRip);
1541 astubs. add_imm32_reg64(8, rsp);
1542 astubs. ret();
1544 return m_interceptHelper;
1548 TranslatorX64::getCallArrayProlog(Func* func) {
1549 TCA tca = func->getFuncBody();
1550 if (tca != (TCA)funcBodyHelperThunk) return tca;
1552 int numParams = func->numParams();
1553 std::vector<std::pair<int,Offset> > dvs;
1554 for (int i = 0; i < numParams; ++i) {
1555 const Func::ParamInfo& pi = func->params()[i];
1556 if (pi.hasDefaultValue()) {
1557 dvs.push_back(std::make_pair(i, pi.funcletOff()));
1560 if (dvs.size()) {
1561 LeaseHolder writer(s_writeLease);
1562 if (!writer) return NULL;
1563 tca = func->getFuncBody();
1564 if (tca != (TCA)funcBodyHelperThunk) return tca;
1565 tca = a.code.frontier;
1566 if (dvs.size() == 1) {
1567 a. cmp_imm32_disp_reg32(dvs[0].first,
1568 AROFF(m_numArgsAndCtorFlag), rVmFp);
1569 emitBindJcc(a, CC_LE, SrcKey(func, dvs[0].second));
1570 emitBindJmp(a, SrcKey(func, func->base()));
1571 } else {
1572 a. load_reg64_disp_reg32(rVmFp, AROFF(m_numArgsAndCtorFlag), rax);
1573 for (unsigned i = 0; i < dvs.size(); i++) {
1574 a. cmp_imm32_reg32(dvs[i].first, rax);
1575 emitBindJcc(a, CC_LE, SrcKey(func, dvs[i].second));
1577 emitBindJmp(a, SrcKey(func, func->base()));
1579 } else {
1580 SrcKey sk(func, func->base());
1581 tca = tx64->getTranslation(&sk, false);
1584 return tca;
1588 TranslatorX64::emitPrologueRedispatch(X64Assembler& a) {
1589 TCA retval;
1590 moveToAlign(a);
1591 retval = a.code.frontier;
1592 // We're in the wrong func prologue. By convention with emitFuncGuard,
1593 // rax contains the function we need to enter.
1595 ASSERT(kScratchCrossTraceRegs.contains(rax));
1596 ASSERT(kScratchCrossTraceRegs.contains(rdx));
1597 ASSERT(kScratchCrossTraceRegs.contains(rcx));
1599 // We don't know how many params we were invoked with. Infer it from
1600 // the stack and rStashedAR rather than reading it from the actrec.
1602 // mov %r15, %rdx
1603 // ld m_numParams(%rax), %ecx #ecx: targetFunc->numParams
1604 // sub %rbx, %rdx #edx: n_args
1605 // shr $4, rdx
1606 a. mov_reg64_reg64(rStashedAR, rdx);
1607 a. load_reg64_disp_reg32(rax, Func::numParamsOff(), rcx);
1608 a. sub_reg64_reg64(rVmSp, rdx);
1609 BOOST_STATIC_ASSERT(sizeof(TypedValue) == 16);
1610 a. shr_imm32_reg32(4, rdx); // rdx: numPassed
1612 // If we didn't pass too many args, directly dereference
1613 // func->m_prologues.
1614 a. cmp_reg32_reg32(rdx, rcx);
1615 TCA bToFixedProloguesCheck = a.code.frontier;
1616 a. jcc8(CC_L, bToFixedProloguesCheck);
1618 // cmp $kNumFixedPrologues, %rdx
1619 // jl numParamsCheck
1620 TCA actualDispatch = a.code.frontier;
1622 // rcx: prologueIdx
1623 // rax = func->prologues[numParams]
1624 // jmp rax
1625 a. load_reg64_disp_index_reg64(rax,
1626 Func::prologueTableOff(),
1627 rdx,
1628 rax);
1629 a. jmp_reg(rax);
1630 a. ud2();
1632 // Hmm, more parameters passed than the function expected. Did we pass
1633 // kNumFixedPrologues or more? If not, %rdx is still a perfectly
1634 // legitimate index into the func prologue table.
1635 // numParamsCheck:
1636 // cmp $kNumFixedPrologues, %rcx
1637 // jl dispatch
1638 a.patchJcc8(bToFixedProloguesCheck, a.code.frontier); // numParamsCheck:
1639 a. cmp_imm32_reg32(kNumFixedPrologues, rdx);
1640 a. jcc8(CC_L, actualDispatch);
1642 // Too many gosh-darned parameters passed. Go to numExpected + 1, which
1643 // is always a "too many params" entry point.
1645 // mov %rdx, %rcx
1646 // add $1, %rcx
1647 // jmp dispatch
1648 a. load_reg64_disp_index_reg64(rax,
1649 // %rcx + 1
1650 Func::prologueTableOff() + sizeof(TCA),
1651 rcx,
1652 rax);
1653 a. jmp_reg(rax);
1654 a. ud2();
1655 return retval;
1658 // The funcGuard gets skipped and patched by other code, so we have some
1659 // magic offsets.
1660 static const int kFuncMovImm = 6; // Offset to the immediate for expected func
1661 static const int kFuncGuardLen = 23;
1663 template<typename T>
1664 static T*
1665 funcGuardToFuncImm(TCA funcGuard) {
1666 T* retval = (T*)(funcGuard + kFuncMovImm + (2 - sizeof(T)/4));
1667 // We padded these so the immediate would fit inside an aligned 8 byte region
1668 // so the xor of the address of the first byte, with the address of the last
1669 // byte should only be non zero in the bottom 3 bits.
1670 ASSERT(((uintptr_t(retval) ^ (uintptr_t(retval + 1) - 1)) & ~7) == 0);
1671 return retval;
1674 static inline bool
1675 funcGuardIsForFunc(TCA funcGuard, const Func* func) {
1676 intptr_t iptr = uintptr_t(func);
1677 if (deltaFits(iptr, sz::dword)) {
1678 return *funcGuardToFuncImm<int32_t>(funcGuard) == iptr;
1680 return *funcGuardToFuncImm<int64_t>(funcGuard) == iptr;
1683 static void
1684 disableFuncGuard(TCA funcGuard, Func* func) {
1685 ASSERT(funcGuardIsForFunc(funcGuard, func));
1686 if (deltaFits((intptr_t)func, sz::dword)) {
1687 *funcGuardToFuncImm<int32_t>(funcGuard) = 0;
1688 } else {
1689 *funcGuardToFuncImm<int64_t>(funcGuard) = 0;
1691 ASSERT(!funcGuardIsForFunc(funcGuard, func));
1695 TranslatorX64::emitFuncGuard(X64Assembler& a, const Func* func) {
1696 ASSERT(kScratchCrossTraceRegs.contains(rax));
1697 ASSERT(kScratchCrossTraceRegs.contains(rdx));
1699 // Ensure the immediate is safely smashable; the immediate needs
1700 // to be at a qword boundary, so we need to start the movImm at
1701 // (kAlign - kFuncMovImm) % 8.
1702 static const int kAlign = 8;
1703 static const int kAlignMask = kAlign - 1;
1704 int loBits = uintptr_t(a.code.frontier) & kAlignMask;
1705 a.emitNop(((kAlign - kFuncMovImm) - loBits) & kAlignMask);
1706 ASSERT((uintptr_t(a.code.frontier) & kAlignMask) == kAlign - kFuncMovImm);
1707 TCA aStart = a.code.frontier;
1708 a. load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
1709 ASSERT((a.code.frontier - aStart) ==
1710 (kFuncMovImm - 2 /* rex + movimmOpcode */));
1711 a. mov_imm64_reg(uint64_t(func), rdx);
1712 a. cmp_reg64_reg64(rax, rdx);
1714 if (!m_funcPrologueRedispatch) {
1715 m_funcPrologueRedispatch = emitPrologueRedispatch(astubs);
1717 a. jnz(m_funcPrologueRedispatch);
1718 ASSERT(a.code.frontier - aStart <= kFuncGuardLen);
1719 a.emitNop(kFuncGuardLen - (a.code.frontier - aStart));
1720 ASSERT(a.code.frontier - aStart == kFuncGuardLen);
1721 return aStart;
1725 skipFuncCheck(TCA dest) {
1726 if (!dest || dest == (TCA)fcallHelperThunk) return dest;
1727 return dest + kFuncGuardLen;
1731 * funcPrologue --
1733 * Given a callee and a number of args, match up to the callee's
1734 * argument expectations and dispatch.
1736 * Call/return hand-shaking is a bit funny initially. At translation time,
1737 * we don't necessarily know what function we're calling. For instance,
1739 * f(g());
1741 * Will lead to a set of basic blocks like:
1743 * b1: pushfuncd "f"
1744 * pushfuncd "g"
1745 * fcall
1746 * b2: fcall
1748 * The fcallc labelled "b2" above is not statically bindable in our
1749 * execution model.
1751 * We decouple the call work into a per-callsite portion, responsible
1752 * for recording the return address, and a per-(callee, numArgs) portion,
1753 * responsible for fixing up arguments and dispatching to remaining
1754 * code. We call the per-callee portion a "prologue."
1756 * Also, we are called from two distinct environments. From REQ_BIND_CALL,
1757 * we're running "between" basic blocks, with all VM registers sync'ed.
1758 * However, we're also called in the middle of basic blocks, when dropping
1759 * entries into func->m_prologues. So don't go around using the
1760 * translation-time values of vmfp()/vmsp(), since they have an
1761 * unpredictable relationship to the source.
1763 bool
1764 TranslatorX64::checkCachedPrologue(const Func* func, int paramIdx,
1765 TCA& prologue) const {
1766 prologue = (TCA)func->getPrologue(paramIdx);
1767 if (prologue != (TCA)fcallHelperThunk && !s_replaceInFlight) {
1768 TRACE(1, "cached prologue %s(%d) -> cached %p\n",
1769 func->fullName()->data(), paramIdx, prologue);
1770 ASSERT(isValidCodeAddress(prologue));
1771 return true;
1773 return false;
1777 TranslatorX64::funcPrologue(Func* func, int nPassed) {
1778 func->validate();
1779 TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
1780 int numParams = func->numParams();
1781 int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
1783 bool funcIsMagic = func->isMagic();
1785 // Do a quick test before grabbing the write lease
1786 TCA prologue;
1787 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1789 // If the translator is getting replaced out from under us, refuse to
1790 // provide a prologue; we don't know whether this request is running on the
1791 // old or new context.
1792 LeaseHolder writer(s_writeLease);
1793 if (!writer || s_replaceInFlight) return NULL;
1794 // Double check the prologue array now that we have the write lease
1795 // in case another thread snuck in and set the prologue already.
1796 if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1798 SpaceRecorder sr("_FuncPrologue", a);
1799 // If we're close to a cache line boundary, just burn some space to
1800 // try to keep the func and its body on fewer total lines.
1801 if (((uintptr_t)a.code.frontier & kX64CacheLineMask) >= 32) {
1802 moveToAlign(a, kX64CacheLineSize);
1804 // Careful: this isn't necessarily the real entry point. For funcIsMagic
1805 // prologues, this is just a possible prologue.
1806 TCA aStart = a.code.frontier;
1807 TCA start = aStart;
1808 TCA stubStart = astubs.code.frontier;
1810 // Guard: we're in the right callee. This happens in magicStart for
1811 // magic callees.
1812 if (!funcIsMagic) {
1813 start = aStart = emitFuncGuard(a, func);
1816 emitRB(a, RBTypeFuncPrologueTry, func->fullName()->data());
1817 // Guard: we have stack enough stack space to complete this function.
1818 emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
1820 // NB: We have most of the register file to play with, since we know
1821 // we're between BB's. So, we hardcode some registers here rather
1822 // than using the scratch allocator.
1823 TRACE(2, "funcPrologue: user function: %s\n", func->name()->data());
1825 // Add a counter for the translation if requested
1826 if (RuntimeOption::EvalJitTransCounters) {
1827 emitTransCounterInc(a);
1830 if (!funcIsMagic) {
1831 // entry point for magic methods comes later
1832 emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1835 SrcKey skFuncBody = emitPrologue(func, nPassed);
1837 if (funcIsMagic) {
1838 // entry points for magic methods is here
1839 TCA magicStart = emitFuncGuard(a, func);
1840 ASSERT(numParams == 2);
1841 emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1842 // Special __call prologue
1843 a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1844 emitCall(a, TCA(TranslatorX64::shuffleArgsForMagicCall));
1845 // if shuffleArgs returns 0, that means this was not a magic call
1846 // and we should proceed to a prologue specialized for nPassed;
1847 // otherwise, proceed to a prologue specialized for nPassed==numParams (2).
1848 if (nPassed == 2) {
1849 a.jmp(start);
1850 } else {
1851 a.test_reg64_reg64(rax, rax);
1852 // z ==> not a magic call, go to prologue for nPassed
1853 if (deltaFits(start - (a.code.frontier + kJcc8Len), sz::byte)) {
1854 a.jcc8(CC_Z, start);
1855 } else {
1856 a.jcc(CC_Z, start);
1858 // this was a magic call
1859 // nPassed == 2
1860 // Fix up hardware stack pointer
1861 nPassed = 2;
1862 a. lea_reg64_disp_reg64(rStashedAR, -cellsToBytes(nPassed), rVmSp);
1863 // Optimization TODO: Reuse the prologue for args == 2
1864 emitPrologue(func, nPassed);
1866 start = magicStart;
1868 ASSERT(funcGuardIsForFunc(start, func));
1869 TRACE(2, "funcPrologue tx64 %p %s(%d) setting prologue %p\n",
1870 this, func->fullName()->data(), nPassed, start);
1871 ASSERT(isValidCodeAddress(start));
1872 func->setPrologue(paramIndex, start);
1874 addTranslation(TransRec(skFuncBody, func->unit()->md5(),
1875 TransProlog, aStart, a.code.frontier - aStart,
1876 stubStart, astubs.code.frontier - stubStart));
1878 recordGdbTranslation(skFuncBody, func->unit(),
1879 a, aStart,
1880 false, true);
1881 recordBCInstr(OpFuncPrologue, a, start);
1883 return start;
1886 static TCA callAndResume(ActRec *ar) {
1887 VMRegAnchor _(ar, true);
1888 g_vmContext->doFCall<true>(ar, g_vmContext->m_pc);
1889 return Translator::Get()->getResumeHelper();
1892 extern "C"
1893 TCA fcallHelper(ActRec* ar) {
1894 try {
1895 TCA tca =
1896 Translator::Get()->funcPrologue((Func*)ar->m_func, ar->numArgs());
1897 if (tca) {
1898 return tca;
1900 return callAndResume(ar);
1901 } catch (...) {
1903 The return address is set to __fcallHelperThunk,
1904 which has no unwind information. Its "logically"
1905 part of the tc, but the c++ unwinder wont know
1906 that. So point our return address at the called
1907 function's return address (which will be in the
1908 tc).
1909 Note that the registers really are clean - we
1910 just came from callAndResume which cleaned
1911 them for us - so we just have to tell the unwinder
1912 that.
1914 register ActRec* rbp asm("rbp");
1915 tl_regState = REGSTATE_CLEAN;
1916 rbp->m_savedRip = ar->m_savedRip;
1917 throw;
1922 TranslatorX64::emitInterceptPrologue(Func* func, TCA next) {
1923 TCA start = a.code.frontier;
1924 a.mov_imm64_reg((uintptr_t)&func->maybeIntercepted(), rax);
1925 a.cmp_imm8_disp_reg8(0, 0, rax);
1926 TCA jcc8PatchAddr = NULL;
1927 if (next == NULL) {
1928 jcc8PatchAddr = a.code.frontier;
1929 a.jcc8(CC_E, jcc8PatchAddr);
1930 } else {
1931 a.jcc(CC_E, next);
1933 // Prologues are not really sites for function entry yet; we can get
1934 // here via an optimistic bindCall. Check that the func is as expected.
1936 a. mov_imm64_reg(uint64_t(func), rax);
1937 a. cmp_reg64_disp_reg64(rax, AROFF(m_func), rStashedAR);
1939 JccBlock<CC_NZ> skip(a);
1940 a.call(getInterceptHelper());
1942 if (jcc8PatchAddr != NULL) {
1943 a.patchJcc8(jcc8PatchAddr, a.code.frontier);
1945 return start;
1948 void
1949 TranslatorX64::interceptPrologues(Func* func) {
1950 if (!RuntimeOption::EvalJitEnableRenameFunction &&
1951 !(func->attrs() & AttrDynamicInvoke)) {
1952 return;
1954 if (func->maybeIntercepted() == -1) {
1955 return;
1957 func->maybeIntercepted() = -1;
1958 ASSERT(s_writeLease.amOwner());
1959 int maxNumPrologues = func->numPrologues();
1960 for (int i = 0; i < maxNumPrologues; i++) {
1961 TCA prologue = func->getPrologue(i);
1962 if (prologue == (unsigned char*)fcallHelperThunk)
1963 continue;
1964 ASSERT(funcGuardIsForFunc(prologue, func));
1965 // There might already be calls hard-coded to this via FCall.
1966 // blow away immediate comparison, so that we always use the Func*'s
1967 // prologue table. We use 0 (== NULL on our architecture) as the bit
1968 // pattern for an impossible Func.
1970 // Note that we're modifying reachable code.
1971 disableFuncGuard(prologue, func);
1972 ASSERT(funcGuardIsForFunc(prologue, NULL));
1974 // There's a prologue already generated; redirect it to first
1975 // call the intercept helper. First, reset it (leaking the old
1976 // prologue), so funcPrologue will re-emit it.
1977 func->setPrologue(i, (TCA)fcallHelperThunk);
1978 TCA addr = funcPrologue(func, i);
1979 ASSERT(funcGuardIsForFunc(addr, func));
1980 ASSERT(addr);
1981 func->setPrologue(i, addr);
1982 TRACE(1, "interceptPrologues %s prologue[%d]=%p\n",
1983 func->fullName()->data(), i, (void*)addr);
1987 SrcKey
1988 TranslatorX64::emitPrologue(Func* func, int nPassed) {
1989 int numParams = func->numParams();
1990 ASSERT(IMPLIES(func->maybeIntercepted() == -1,
1991 m_interceptsEnabled));
1992 if (m_interceptsEnabled &&
1993 !func->isPseudoMain() &&
1994 (RuntimeOption::EvalJitEnableRenameFunction ||
1995 func->attrs() & AttrDynamicInvoke)) {
1996 emitInterceptPrologue(func);
1999 Offset dvInitializer = InvalidAbsoluteOffset;
2001 if (nPassed > numParams) {
2002 // Too many args; a weird case, so just callout. Stash ar
2003 // somewhere callee-saved.
2004 if (false) { // typecheck
2005 TranslatorX64::trimExtraArgs((ActRec*)NULL);
2007 a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
2008 emitCall(a, TCA(TranslatorX64::trimExtraArgs));
2009 // We'll fix rVmSp below.
2010 } else if (nPassed < numParams) {
2011 // Figure out which, if any, default value initializer to go to
2012 for (int i = nPassed; i < numParams; ++i) {
2013 const Func::ParamInfo& pi = func->params()[i];
2014 if (pi.hasDefaultValue()) {
2015 dvInitializer = pi.funcletOff();
2016 break;
2019 TRACE(1, "Only have %d of %d args; getting dvFunclet\n",
2020 nPassed, numParams);
2021 emitImmReg(a, nPassed, rax);
2022 // do { *(--rVmSp) = NULL; nPassed++; } while (nPassed < numParams);
2023 // This should be an unusual case, so optimize for code density
2024 // rather than execution speed; i.e., don't unroll the loop.
2025 TCA loopTop = a.code.frontier;
2026 a. sub_imm32_reg64(sizeof(Cell), rVmSp);
2027 a. add_imm32_reg32(1, rax);
2028 // XXX "missing argument" warnings need to go here
2029 emitStoreUninitNull(a, 0, rVmSp);
2030 a. cmp_imm32_reg32(numParams, rax);
2031 a. jcc8(CC_L, loopTop);
2034 // Entry point for numParams == nPassed is here.
2035 // Args are kosher. Frame linkage: set fp = ar.
2036 a. mov_reg64_reg64(rStashedAR, rVmFp);
2038 // We're in the callee frame; initialize locals. Unroll the loop all
2039 // the way if there are a modest number of locals to update;
2040 // otherwise, do it in a compact loop. If we're in a generator body,
2041 // named locals will be initialized by UnpackCont so we can leave
2042 // them alone here.
2043 int uninitLimit = func->isGenerator() ? func->numNamedLocals() : numParams;
2044 int numUninitLocals = func->numLocals() - uninitLimit;
2045 ASSERT(numUninitLocals >= 0);
2046 if (numUninitLocals > 0) {
2047 SpaceRecorder sr("_InitializeLocals", a);
2049 // If there are too many locals, then emitting a loop to initialize locals
2050 // is more compact, rather than emitting a slew of movs inline.
2051 if (numUninitLocals > kLocalsToInitializeInline) {
2052 PhysReg loopReg = rcx;
2054 // rVmFp + rcx points to the count/type fields of the TypedValue we're
2055 // about to write to.
2056 int loopStart = -func->numLocals() * sizeof(TypedValue)
2057 + TVOFF(_count);
2058 int loopEnd = -uninitLimit * sizeof(TypedValue)
2059 + TVOFF(_count);
2061 emitImmReg(a, loopStart, loopReg);
2062 emitImmReg(a, 0, rdx);
2064 TCA topOfLoop = a.code.frontier;
2065 // do {
2066 // rVmFp[rcx].m_type = KindOfUninit;
2067 // } while(++rcx != loopEnd);
2069 // mov %rdx, 0x0(%rVmFp, %rcx, 1)
2070 a. emitRM(instr_mov, rVmFp, loopReg, 1, 0, rdx);
2071 a. add_imm32_reg64(sizeof(Cell), loopReg);
2072 a. cmp_imm32_reg64(loopEnd, loopReg);
2073 a. jcc8(CC_NE, topOfLoop);
2074 } else {
2075 PhysReg base;
2076 int disp, k;
2077 for (k = uninitLimit; k < func->numLocals(); ++k) {
2078 locToRegDisp(Location(Location::Local, k), &base, &disp);
2079 emitStoreUninitNull(a, disp, base);
2084 // Move rVmSp to the right place: just past all locals
2085 int frameCells = func->numSlotsInFrame();
2086 a. lea_reg64_disp_reg64(rVmFp, -cellsToBytes(frameCells), rVmSp);
2087 const Opcode* destPC = func->unit()->entry() + func->base();
2088 if (dvInitializer != InvalidAbsoluteOffset) {
2089 // dispatch to funclet.
2090 destPC = func->unit()->entry() + dvInitializer;
2092 SrcKey funcBody(func, destPC);
2094 // Check surprise flags in the same place as the interpreter: after
2095 // setting up the callee's frame but before executing any of its
2096 // code
2097 emitCheckSurpriseFlagsEnter(false, funcBody.m_offset - func->base(),
2098 frameCells);
2100 emitBindJmp(funcBody);
2101 return funcBody;
2104 void
2105 TranslatorX64::emitBindCall(const Tracelet& t,
2106 const NormalizedInstruction &ni,
2107 Offset atCall, Offset afterCall) {
2108 int numArgs = ni.imm[0].u_IVA;
2110 // If this is a call to a builtin and we don't need any argument
2111 // munging, we can skip the prologue system and do it inline.
2112 if (ni.funcd && ni.funcd->isBuiltin() &&
2113 numArgs == ni.funcd->numParams()) {
2114 ASSERT(ni.funcd->numLocals() == ni.funcd->numParams());
2115 ASSERT(ni.funcd->numIterators() == 0);
2116 a. lea_reg64_disp_reg64(rVmSp, cellsToBytes(numArgs), rVmFp);
2117 emitCheckSurpriseFlagsEnter(true, 0, numArgs);
2118 // rVmSp is already correctly adjusted, because there's no locals
2119 // other than the arguments passed.
2120 return emitNativeImpl(ni.funcd, false /* don't jump to return */);
2123 // Stash callee's rVmFp into rStashedAR for the callee's prologue
2124 a. lea_reg64_disp_reg64(rVmSp, cellsToBytes(numArgs), rStashedAR);
2125 emitBindCallHelper(rStashedAR, ni.source, ni.funcd, numArgs, (bool)ni.funcd);
2126 return;
2129 void
2130 TranslatorX64::emitBindCallHelper(register_name_t stashedAR,
2131 SrcKey srcKey,
2132 const Func* funcd,
2133 int numArgs,
2134 bool isImmutable) {
2135 // Whatever prologue we're branching to will check at runtime that we
2136 // went to the right Func*, correcting if necessary. We treat the first
2137 // Func we encounter as a decent prediction. Make space to burn in a
2138 // TCA.
2139 ReqBindCall* req = m_globalData.alloc<ReqBindCall>();
2140 a. mov_reg64_reg64(rStashedAR, serviceReqArgRegs[1]);
2141 prepareForSmash(kJmpLen);
2142 TCA toSmash = a.code.frontier;
2143 a. jmp(emitServiceReq(false, REQ_BIND_CALL, 1ull, req));
2145 TRACE(1, "will bind static call: tca %p, this %p, funcd %p\n",
2146 toSmash, this, funcd);
2147 req->m_toSmash = toSmash;
2148 req->m_nArgs = numArgs;
2149 req->m_sourceInstr = srcKey;
2150 req->m_isImmutable = isImmutable;
2152 return;
2155 // for documentation see bindJmpccFirst below
2156 void
2157 TranslatorX64::emitCondJmp(const SrcKey &skTaken, const SrcKey &skNotTaken,
2158 ConditionCode cc) {
2159 // should be true for SrcKeys generated via OpJmpZ/OpJmpNZ
2160 ASSERT(skTaken.m_funcId == skNotTaken.m_funcId);
2162 // reserve space for a smashable jnz/jmp pair; both initially point
2163 // to our stub
2164 prepareForSmash(kJmpLen + kJmpccLen);
2165 TCA old = a.code.frontier;
2167 moveToAlign(astubs);
2168 TCA stub = astubs.code.frontier;
2170 // begin code for the stub
2172 // We need to be careful here, as we are passing an extra paramter to
2173 // REQ_BIND_JMPCC_FIRST. However we can't pass this parameter via
2174 // emitServiceReq because that only supports constants/immediates, so
2175 // compute the last argument via setcc.
2176 astubs.setcc(cc, serviceReqArgRegs[4]);
2177 emitServiceReq(false /* align */, REQ_BIND_JMPCC_FIRST, 4ull,
2178 old,
2179 uint64_t(skTaken.offset()),
2180 uint64_t(skNotTaken.offset()),
2181 uint64_t(cc));
2183 a.jcc(cc, stub); // MUST use 4-byte immediate form
2184 a.jmp(stub); // MUST use 4-byte immediate form
2187 static void skToName(const SrcKey& sk, char* name) {
2188 sprintf(name, "sk_%08lx_%05d",
2189 long(sk.m_funcId), sk.offset());
2192 static void skToClusterName(const SrcKey& sk, char* name) {
2193 sprintf(name, "skCluster_%08lx_%05d",
2194 long(sk.m_funcId), sk.offset());
2197 static void translToName(const TCA tca, char* name) {
2198 sprintf(name, "tc_%p", tca);
2201 void TranslatorX64::drawCFG(std::ofstream& out) const {
2202 if (!isTransDBEnabled()) return;
2203 const char* indent = " ";
2204 static int genCount;
2205 int numSrcKeys = 0;
2206 int numTranslations = 0;
2207 out << "digraph srcdb" << genCount++ <<" {\n";
2208 out << indent << "size = \"8,11\";\n";
2209 out << indent << "ratio = fill;\n";
2210 for (SrcDB::const_iterator entry = m_srcDB.begin();
2211 entry != m_srcDB.end(); ++entry) {
2212 const SrcKey sk = SrcKey::fromAtomicInt(entry->first);
2213 // 1 subgraph per srcKey.
2214 char name[64];
2215 skToClusterName(sk, name);
2216 numSrcKeys++;
2217 out << indent << "subgraph " << name << "{\n";
2218 char* indent = " ";
2219 skToName(sk, name);
2220 out << indent << name << "[shape=box];\n";
2221 const vector<TCA>& transls = entry->second->translations();
2222 for (vector<TCA>::const_iterator t = transls.begin(); t != transls.end();
2223 ++t) {
2224 out << indent << "// Translations: " << transls.size() << "\n";
2225 char transname[64];
2226 translToName(*t, transname);
2227 numTranslations++;
2228 out << indent << transname << "[fontsize=11.0];\n";
2229 out << indent << name << " -> " << transname << ";\n";
2231 // And, all translations on the same line
2232 out << indent << "{ rank = same; ";
2233 out << name << " ";
2234 for (vector<TCA>::const_iterator t = transls.begin(); t != transls.end();
2235 ++t) {
2236 char transname[64];
2237 translToName(*t, transname);
2238 out << transname << " ";
2240 out << indent << "}\n"; // subgraph
2241 out << indent << "}\n";
2244 // OK! Those were all the nodes. Now edges. While edges are physically
2245 // from translation to translation, they're virtually from srcKey to
2246 // srcKey, and that is how the db represents them.
2247 for (SrcDB::const_iterator entry = m_srcDB.begin(); entry != m_srcDB.end();
2248 ++entry) {
2249 char destName[64];
2250 skToName(SrcKey::fromAtomicInt(entry->first), destName);
2251 const vector<IncomingBranch>& ibs = entry->second->incomingBranches();
2252 out << indent << "// incoming branches to " << destName << "\n";
2253 for (vector<IncomingBranch>::const_iterator ib = ibs.begin();
2254 ib != ibs.end(); ++ib) {
2255 // Find the start of the translation that contains this branch
2256 const char *branchTypeToColorStr[] = {
2257 "black", // JMP
2258 "green", // JZ
2259 "red", // JNZ
2261 TransDB::const_iterator lowerTCA = m_transDB.lower_bound(ib->m_src);
2262 ASSERT(lowerTCA != m_transDB.end());
2263 char srcName[64];
2264 const TransRec* transRec = this->getTransRec(lowerTCA->second);
2265 skToName(transRec->src, srcName);
2266 out << indent << srcName << " -> " << destName << "[ color = " <<
2267 branchTypeToColorStr[ib->m_type] << "];\n";
2270 out << indent << "// " << numSrcKeys << " srckeys, " << numTranslations <<
2271 " tracelets\n";
2272 out << "}\n\n";
2276 * bindJmp --
2278 * Runtime service handler that patches a jmp to the translation of
2279 * u:dest from toSmash.
2282 TranslatorX64::bindJmp(TCA toSmash, SrcKey destSk, ServiceRequest req) {
2283 TCA tDest = getTranslation(&destSk, false, req == REQ_BIND_JMP_NO_IR);
2284 if (!tDest) return NULL;
2285 LeaseHolder writer(s_writeLease);
2286 if (!writer) return tDest;
2287 SrcRec* sr = getSrcRec(destSk);
2288 if (req == REQ_BIND_ADDR) {
2289 sr->chainFrom(a, IncomingBranch((TCA*)toSmash));
2290 } else if (req == REQ_BIND_JCC) {
2291 sr->chainFrom(getAsmFor(toSmash),
2292 IncomingBranch(IncomingBranch::JCC, toSmash));
2293 } else {
2294 sr->chainFrom(getAsmFor(toSmash), IncomingBranch(toSmash));
2296 return tDest;
2300 * When we end a tracelet with a conditional jump, emitCondJmp first emits:
2302 * 1: j<CC> stubJmpccFirst
2303 * jmp stubJmpccFirst
2305 * Our "taken" argument tells us whether the branch at 1: was taken or
2306 * not; and therefore which of offTaken and offNotTaken to continue executing.
2307 * If we did take the branch, we now rewrite the code so that the branch is
2308 * straightened. This predicts that subsequent executions will go the same way
2309 * as the first execution.
2311 * jn<CC> stubJmpccSecond:offNotTaken
2312 * nop5 ; fallthru, or jmp if there's already a translation.
2313 * offTaken:
2315 * If we did not take the branch, we leave the sense of the condition
2316 * intact, while patching it up to go to the unexplored code:
2318 * j<CC> stubJmpccSecond:offTaken
2319 * nop5
2320 * offNotTaken:
2323 TranslatorX64::bindJmpccFirst(TCA toSmash,
2324 Offset offTaken, Offset offNotTaken,
2325 bool taken,
2326 ConditionCode cc) {
2327 const Func* f = curFunc();
2328 LeaseHolder writer(s_writeLease);
2329 if (!writer) return NULL;
2330 Offset offWillExplore = taken ? offTaken : offNotTaken;
2331 Offset offWillDefer = taken ? offNotTaken : offTaken;
2332 SrcKey dest(f, offWillExplore);
2333 TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
2334 "taken %d\n",
2335 offWillExplore, offWillDefer, cc, taken);
2337 // We want the branch to point to whichever side has not been explored
2338 // yet.
2339 if (taken) cc = ccNegate(cc);
2340 TCA stub =
2341 emitServiceReq(false, REQ_BIND_JMPCC_SECOND, 3,
2342 toSmash, uint64_t(offWillDefer), uint64_t(cc));
2344 Asm &as = getAsmFor(toSmash);
2345 // Its not clear where chainFrom should go to if as is astubs
2346 ASSERT(&as == &a);
2348 // can we just directly fall through?
2349 // a jmp + jz takes 5 + 6 = 11 bytes
2350 bool fallThru = toSmash + kJmpccLen + kJmpLen == as.code.frontier &&
2351 !m_srcDB.find(dest);
2353 TCA tDest;
2354 tDest = getTranslation(&dest, !fallThru /* align */);
2355 if (!tDest) {
2356 return 0;
2358 ASSERT(s_writeLease.amOwner());
2360 * Roll over the jcc and the jmp/fallthru. E.g., from:
2362 * toSmash: jcc <jmpccFirstStub>
2363 * toSmash+6: jmp <jmpccFirstStub>
2364 * toSmash+11: <probably the new translation == tdest>
2366 * to:
2368 * toSmash: j[n]z <jmpccSecondStub>
2369 * toSmash+6: nop5
2370 * toSmash+11: newHotness
2372 CodeCursor cg(as, toSmash);
2373 a.jcc(cc, stub);
2374 getSrcRec(dest)->chainFrom(as, IncomingBranch(as.code.frontier));
2375 TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
2376 return tDest;
2379 // smashes a jcc to point to a new destination
2381 TranslatorX64::bindJmpccSecond(TCA toSmash, const Offset off,
2382 ConditionCode cc) {
2383 const Func* f = curFunc();
2384 SrcKey dest(f, off);
2385 TCA branch = getTranslation(&dest, true);
2386 LeaseHolder writer(s_writeLease, NO_ACQUIRE);
2387 if (branch && writer.acquire()) {
2388 SrcRec* destRec = getSrcRec(dest);
2389 destRec->chainFrom(getAsmFor(toSmash),
2390 IncomingBranch(IncomingBranch::JCC, toSmash));
2392 return branch;
2395 static void emitJmpOrJcc(X64Assembler& a, int cc, TCA addr) {
2396 if (cc < 0) {
2397 a. jmp(addr);
2398 } else {
2399 a. jcc((ConditionCode)cc, addr);
2404 * emitBindJ --
2406 * Emit code to lazily branch (optionally on condition cc) to the
2407 * srckey in next.
2408 * Assumes current basic block is closed (outputs synced, etc.).
2410 void
2411 TranslatorX64::emitBindJ(X64Assembler& _a, int cc,
2412 const SrcKey& dest, ServiceRequest req) {
2413 prepareForSmash(_a, cc < 0 ? (int)kJmpLen : kJmpccLen);
2414 TCA toSmash = _a.code.frontier;
2415 if (&_a == &astubs) {
2416 emitJmpOrJcc(_a, cc, toSmash);
2419 TCA sr = emitServiceReq(false, req, 2,
2420 toSmash, uint64_t(dest.offset()));
2422 if (&_a == &astubs) {
2423 CodeCursor cursor(_a, toSmash);
2424 emitJmpOrJcc(_a, cc, sr);
2425 } else {
2426 emitJmpOrJcc(_a, cc, sr);
2430 void
2431 TranslatorX64::emitBindJcc(X64Assembler& _a, ConditionCode cc,
2432 const SrcKey& dest,
2433 ServiceRequest req /* = REQ_BIND_JCC */) {
2434 emitBindJ(_a, cc, dest, req);
2437 void
2438 TranslatorX64::emitBindJmp(X64Assembler& _a,
2439 const SrcKey& dest,
2440 ServiceRequest req /* = REQ_BIND_JMP */) {
2441 emitBindJ(_a, -1, dest, req);
2444 void
2445 TranslatorX64::emitBindJmp(const SrcKey& dest) {
2446 emitBindJmp(a, dest);
2449 void
2450 TranslatorX64::emitStringCheck(X64Assembler& _a,
2451 PhysReg base, int offset, PhysReg tmp) {
2452 // Treat KindOfString and KindOfStaticString identically; they
2453 // are bitwise identical. This is a port of our IS_STRING_TYPE
2454 // macro to assembly, and will have to change in sync with it.
2455 static_assert(IS_STRING_TYPE(7) && IS_STRING_TYPE(6),
2456 "Assembly version of IS_STRING_TYPE needs to be updated");
2457 _a. load_reg64_disp_reg32(base, offset, tmp);
2458 _a. and_imm32_reg32((signed char)(0xfe), tmp); // use 1-byte immediate
2459 _a. cmp_imm32_reg32(6, tmp);
2462 void
2463 TranslatorX64::emitTypeCheck(X64Assembler& _a, DataType dt,
2464 PhysReg base, int offset,
2465 PhysReg tmp/*= InvalidReg*/) {
2466 offset += TVOFF(m_type);
2467 if (IS_STRING_TYPE(dt)) {
2468 LazyScratchReg scr(m_regMap);
2469 if (tmp == InvalidReg) {
2470 scr.alloc();
2471 tmp = *scr;
2473 emitStringCheck(_a, base, offset, tmp);
2474 } else {
2475 _a. cmp_imm32_disp_reg32(dt, offset, base);
2479 void
2480 TranslatorX64::checkType(X64Assembler& a,
2481 const Location& l,
2482 const RuntimeType& rtt,
2483 SrcRec& fail) {
2484 // We can get invalid inputs as a side effect of reading invalid
2485 // items out of BBs we truncate; they don't need guards.
2486 if (rtt.isVagueValue() || l.isThis()) return;
2488 if (m_useHHIR) {
2489 irCheckType(a, l, rtt, fail);
2490 return;
2493 PhysReg base;
2494 int disp = 0;
2495 SpaceRecorder sr("_CheckType", a);
2497 TRACE(1, Trace::prettyNode("Precond", DynLocation(l, rtt)) + "\n");
2499 locToRegDisp(l, &base, &disp);
2500 TRACE(2, "TypeCheck: %d(%%r%d)\n", disp, base);
2501 // Negative offsets from RSP are not yet allocated; they had
2502 // better not be inputs to the tracelet.
2503 ASSERT(l.space != Location::Stack || disp >= 0);
2504 if (Trace::moduleEnabled(Trace::stats, 2)) {
2505 Stats::emitInc(a, Stats::TraceletGuard_branch);
2507 if (rtt.isIter()) {
2508 a. cmp_imm32_disp_reg32(rtt.typeCheckValue(),
2509 disp + rtt.typeCheckOffset(),
2510 base);
2511 } else {
2512 emitTypeCheck(a, rtt.typeCheckValue(), base, disp, rax);
2514 emitFallbackJmp(fail);
2517 void
2518 TranslatorX64::emitFallbackJmp(SrcRec& dest) {
2519 prepareForSmash(kJmpccLen);
2520 dest.emitFallbackJump(a, a.code.frontier, CC_NZ);
2523 void
2524 TranslatorX64::emitFallbackJmp(Asm& as, SrcRec& dest) {
2525 prepareForSmash(as, kJmpccLen);
2526 dest.emitFallbackJump(as, as.code.frontier, CC_NZ);
2529 void
2530 TranslatorX64::emitFallbackUncondJmp(Asm& as, SrcRec& dest) {
2531 prepareForSmash(as, kJmpLen);
2532 dest.emitFallbackJump(as, as.code.frontier);
2535 void TranslatorX64::emitReqRetransNoIR(Asm& as, SrcKey& sk) {
2536 prepareForSmash(as, kJmpLen);
2537 TCA toSmash = as.code.frontier;
2538 if (&as == &astubs) {
2539 as.jmp(toSmash);
2542 TCA sr = emitServiceReq(REQ_RETRANSLATE_NO_IR, 2,
2543 toSmash, sk.offset());
2545 if (&as == &astubs) {
2546 CodeCursor cc(as, toSmash);
2547 as.jmp(sr);
2548 } else {
2549 as.jmp(sr);
2553 uint64_t TranslatorX64::packBitVec(const vector<bool>& bits, unsigned i) {
2554 uint64_t retval = 0;
2555 ASSERT(i % 64 == 0);
2556 ASSERT(i < bits.size());
2557 while (i < bits.size()) {
2558 retval |= bits[i] << (i % 64);
2559 if ((++i % 64) == 0) {
2560 break;
2563 return retval;
2566 void
2567 TranslatorX64::checkRefs(X64Assembler& a,
2568 const SrcKey& sk,
2569 const RefDeps& refDeps,
2570 SrcRec& fail) {
2571 if (refDeps.size() == 0) {
2572 return;
2576 * We're still between BB's, so we're not using the real register
2577 * allocator.
2579 RegSet unusedRegs = kScratchCrossTraceRegs;
2580 DumbScratchReg rNumParams(unusedRegs);
2581 DumbScratchReg rMask(unusedRegs);
2582 DumbScratchReg rBits(unusedRegs);
2583 DumbScratchReg rExpectedBits(unusedRegs);
2584 DumbScratchReg rBitsValue(unusedRegs);
2585 DumbScratchReg rFunc(unusedRegs);
2587 // Set up guards for each pushed ActRec that we've made reffiness
2588 // assumptions about
2589 for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
2590 it != refDeps.m_arMap.end(); ++it) {
2591 // Be careful! The actual Func might have fewer refs than the number
2592 // of args we're passing. To forestall this, we're going to have to
2593 // keep checking i against the number of params. We consider invocations
2594 // with too many arguments to have passed their checks.
2595 int entryArDelta = it->first;
2597 if (m_useHHIR) {
2598 m_hhbcTrans->guardRefs(entryArDelta,
2599 it->second.m_mask,
2600 it->second.m_vals);
2601 continue;
2604 int32_t funcOff = cellsToBytes(entryArDelta) + AROFF(m_func);
2605 a. load_reg64_disp_reg64(rVmSp, funcOff, *rFunc); // rFunc <- Func*
2606 a. load_reg64_disp_reg32(*rFunc, Func::numParamsOff(),
2607 *rNumParams);
2608 a. load_reg64_disp_reg64(*rFunc, Func::refBitVecOff(),
2609 *rBits); // rBits <- m_refBitVec
2611 for (unsigned i = 0; i < it->second.m_mask.size(); i += 64) {
2612 ASSERT(i < it->second.m_vals.size());
2613 uint64_t mask = packBitVec(it->second.m_mask, i);
2614 if (mask == 0) {
2615 continue;
2617 uint64_t value = packBitVec(it->second.m_vals, i);
2619 emitImmReg(a, mask, *rMask);
2620 emitImmReg(a, value, *rExpectedBits);
2623 * Before trying to load this block off the bit vector, make
2624 * sure it actually exists. It's ok to index past numArgs
2625 * within one of these words, because the remaining bits will be
2626 * set to zero (or one in the case of the variadic by ref
2627 * builtins).
2629 if (Trace::moduleEnabled(Trace::stats, 2)) {
2630 Stats::emitInc(a, Stats::TraceletGuard_branch);
2632 a. cmp_imm32_reg32(i + 1, *rNumParams);
2634 IfElseBlock<CC_L> ifFewEnoughArgs(a);
2636 // Load the appropriate qword off of the top actRec's func*.
2637 SKTRACE(2, sk, "reffiness mask %lx value %lx, ar @%d\n",
2638 mask, value, entryArDelta);
2639 a. load_reg64_disp_reg64(*rBits, sizeof(uint64) * (i / 64),
2640 *rBitsValue); // rBitsValue <- rBits[i / 64]
2641 a. and_reg64_reg64(*rMask, *rBitsValue); // rBitsValue &= rMask
2642 a. cmp_reg64_reg64(*rBitsValue, *rExpectedBits);
2643 emitFallbackJmp(fail);
2645 ifFewEnoughArgs.Else();
2647 a. test_imm32_disp_reg32(AttrVariadicByRef,
2648 Func::attrsOff(),
2649 *rFunc);
2651 IfElseBlock<CC_NZ> ifNotWeirdBuiltin(a);
2653 // Other than these builtins, we need to have all by value
2654 // args in this case.
2655 a. test_reg64_reg64(*rExpectedBits, *rExpectedBits);
2656 emitFallbackJmp(fail);
2658 ifNotWeirdBuiltin.Else();
2660 // If it is one of the weird builtins that has reffiness for
2661 // additional args, we have to make sure our expectation is
2662 // that these additional args are by ref.
2663 a. cmp_imm32_reg64((signed int)(-1ull & mask), *rExpectedBits);
2664 emitFallbackJmp(fail);
2672 * emitRetFromInterpretedFrame --
2674 * When the interpreter pushes a call frame, there is necessarily no
2675 * machine RIP available to return to. This helper fishes out the
2676 * destination from the frame and redirects execution to it via enterTC.
2679 TranslatorX64::emitRetFromInterpretedFrame() {
2680 int32_t arBase = sizeof(ActRec) - sizeof(Cell);
2681 moveToAlign(astubs);
2682 TCA stub = astubs.code.frontier;
2683 // Marshall our own args by hand here.
2684 astubs. lea_reg64_disp_reg64(rVmSp, -arBase, serviceReqArgRegs[0]);
2685 astubs. mov_reg64_reg64(rVmFp, serviceReqArgRegs[1]);
2686 (void) emitServiceReq(false, REQ_POST_INTERP_RET, 0ull);
2687 return stub;
2691 * fcallHelperThunk
2692 * Note: Assumes rStashedAR is r15
2694 static_assert(rStashedAR == r15,
2695 "__fcallHelperThunk needs to be modified for ABI changes");
2696 asm (
2697 ".byte 0\n"
2698 ".align 16\n"
2699 ".globl __fcallHelperThunk\n"
2700 "__fcallHelperThunk:\n"
2701 #ifdef HHVM
2702 "mov %r15, %rdi\n"
2703 "call fcallHelper\n"
2704 "jmp *%rax\n"
2705 #endif
2706 "ud2\n"
2710 * enterTCHelper
2712 * This helper routine is written in x64 assembly to take care of the details
2713 * when transferring control between jitted code and the translator.
2714 * rdi: Cell* vm_sp
2715 * rsi: Cell* vm_fp
2716 * rdx: unsigned char* start
2717 * rcx: TReqInfo* infoPtr
2718 * r8: ActRec* firstAR
2719 * r9: uint8_t* targetCacheBase
2721 * Note: enterTCHelper does not save callee-saved registers except
2722 * %rbp. This means when we call it from C++, we have to tell gcc to
2723 * clobber all the other callee-saved registers.
2725 static_assert(rVmSp == rbx &&
2726 rVmFp == rbp &&
2727 rVmTl == r12 &&
2728 rStashedAR == r15,
2729 "__enterTCHelper needs to be modified to use the correct ABI");
2730 static_assert(kReservedRSPScratchSpace == 0x80,
2731 "enterTCHelper needs to be updated for changes to "
2732 "kReservedRSPScratchSpace");
2733 asm (
2734 ".byte 0\n"
2735 ".align 16\n"
2736 "__enterTCHelper:\n"
2737 // Prologue
2738 ".cfi_startproc\n"
2739 "push %rbp\n"
2740 ".cfi_adjust_cfa_offset 8\n" // offset to previous frame relative to %rsp
2741 ".cfi_offset rbp, -16\n" // Where to find previous value of rbp
2743 // Set firstAR->m_savedRbp to point to this frame.
2744 "mov %rsp, (%r8)\n"
2746 // Save infoPtr
2747 "push %rcx\n"
2748 ".cfi_adjust_cfa_offset 8\n"
2750 // Set up special registers used for translated code.
2751 "mov %rdi, %rbx\n" // rVmSp
2752 "mov %r9, %r12\n" // rVmTl
2753 "mov %rsi, %rbp\n" // rVmFp
2754 "mov 0x30(%rcx), %r15\n" // rStashedAR saved across service requests
2757 * The translated code we are about to enter does not follow the
2758 * standard prologue of pushing rbp at entry, so we are purposely 8
2759 * bytes short of 16-byte alignment before this call instruction so
2760 * that the return address being pushed will make the native stack
2761 * 16-byte aligned.
2764 "sub $0x80, %rsp\n" // kReservedRSPScratchSpace
2765 // May need cfi_adjust_cfa_offset annotations: Task #1747813
2766 "call *%rdx\n"
2767 "add $0x80, %rsp\n"
2769 // Restore infoPtr into %rbx
2770 "pop %rbx\n"
2771 ".cfi_adjust_cfa_offset -8\n"
2773 // Copy the values passed from jitted code into *infoPtr
2774 "mov %rdi, 0x0(%rbx)\n"
2775 "mov %rsi, 0x8(%rbx)\n"
2776 "mov %rdx, 0x10(%rbx)\n"
2777 "mov %rcx, 0x18(%rbx)\n"
2778 "mov %r8, 0x20(%rbx)\n"
2779 "mov %r9, 0x28(%rbx)\n"
2781 // Service request "callee-saved". (Returnee-saved?)
2782 "mov %r15, 0x30(%rbx)\n"
2784 // Epilogue
2785 "pop %rbp\n"
2786 ".cfi_restore rbp\n"
2787 ".cfi_adjust_cfa_offset -8\n"
2788 "ret\n"
2789 ".cfi_endproc\n"
2792 struct TReqInfo {
2793 uintptr_t requestNum;
2794 uintptr_t args[5];
2796 // Some TC registers need to be preserved across service requests.
2797 uintptr_t saved_rStashedAr;
2800 void enterTCHelper(Cell* vm_sp,
2801 Cell* vm_fp,
2802 TCA start,
2803 TReqInfo* infoPtr,
2804 ActRec* firstAR,
2805 void* targetCacheBase) asm ("__enterTCHelper");
2807 struct DepthGuard {
2808 static __thread int m_depth;
2809 DepthGuard() { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
2810 ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
2812 __thread int DepthGuard::m_depth;
2813 void
2814 TranslatorX64::enterTC(SrcKey sk) {
2815 using namespace TargetCache;
2816 TCA start = getTranslation(&sk, true);
2818 DepthGuard d;
2819 TReqInfo info;
2820 const uintptr_t& requestNum = info.requestNum;
2821 uintptr_t* args = info.args;
2822 for (;;) {
2823 ASSERT(vmfp() >= vmsp() - 1);
2824 ASSERT(sizeof(Cell) == 16);
2825 ASSERT(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
2826 ASSERT(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
2828 TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
2829 vmfp(), ((ActRec*)vmfp())->m_func->name()->data(), vmsp());
2830 s_writeLease.gremlinUnlock();
2831 // Keep dispatching until we end up somewhere the translator
2832 // recognizes, or we luck out and the leaseholder exits.
2833 while (!start) {
2834 TRACE(2, "enterTC forwarding BB to interpreter\n");
2835 g_vmContext->m_pc = curUnit()->at(sk.offset());
2836 INC_TPC(interp_bb);
2837 g_vmContext->dispatchBB();
2838 sk = SrcKey(curFunc(), g_vmContext->getPC());
2839 start = getTranslation(&sk, true);
2841 ASSERT(start);
2842 ASSERT(isValidCodeAddress(start));
2843 tl_regState = REGSTATE_DIRTY;
2844 ASSERT(!s_writeLease.amOwner());
2845 curFunc()->validate();
2846 INC_TPC(enter_tc);
2848 // The asm volatile here is to force C++ to spill anything that
2849 // might be in a callee-saved register (aside from rbp).
2850 // enterTCHelper does not preserve these registers.
2851 asm volatile("" : : : "rbx","r12","r13","r14","r15");
2852 enterTCHelper(vmsp(), vmfp(), start, &info, vmFirstAR(),
2853 tl_targetCaches);
2854 asm volatile("" : : : "rbx","r12","r13","r14","r15");
2856 tl_regState = REGSTATE_CLEAN; // Careful: pc isn't sync'ed yet.
2857 // Debugging code: cede the write lease half the time.
2858 if (debug && (RuntimeOption::EvalJitStressLease)) {
2859 if (d.m_depth == 1 && (rand() % 2) == 0) {
2860 s_writeLease.gremlinLock();
2864 TRACE(4, "enterTC: %p fp%p sp%p } return\n", start,
2865 vmfp(), vmsp());
2866 TRACE(4, "enterTC: request(%s) args: %lx %lx %lx %lx %lx\n",
2867 reqName(requestNum),
2868 args[0], args[1], args[2], args[3], args[4]);
2869 ASSERT(vmfp() >= vmsp() - 1 || requestNum == REQ_EXIT);
2871 if (debug) {
2872 // Ensure that each case either returns, or drives start to a valid
2873 // value.
2874 start = TCA(0xbee5face);
2877 // The contract is that each case will either exit, by returning, or
2878 // set sk to the place where execution should resume, and optionally
2879 // set start to the hardware translation of the resumption point.
2881 // start and sk might be subtly different; i.e., there are cases where
2882 // start != NULL && start != getTranslation(sk). For instance,
2883 // REQ_BIND_CALL has not finished executing the OpCall when it gets
2884 // here, and has even done some work on its behalf. sk == OpFCall,
2885 // while start == the point in the TC that's "half-way through" the
2886 // Call instruction. If we punt to the interpreter, the interpreter
2887 // will redo some of the work that the translator has already done.
2888 INC_TPC(service_req);
2889 switch (requestNum) {
2890 case REQ_EXIT: {
2891 // fp is not valid anymore
2892 vmfp() = NULL;
2893 return;
2896 case REQ_BIND_CALL: {
2897 ReqBindCall* req = (ReqBindCall*)args[0];
2898 ActRec* calleeFrame = (ActRec*)args[1];
2899 TCA toSmash = req->m_toSmash;
2900 Func *func = const_cast<Func*>(calleeFrame->m_func);
2901 int nArgs = req->m_nArgs;
2902 bool isImmutable = req->m_isImmutable;
2903 TCA dest = tx64->funcPrologue(func, nArgs);
2904 TRACE(2, "enterTC: bindCall %s -> %p\n", func->name()->data(), dest);
2905 if (isImmutable) {
2906 // If we *know* we're calling the right function, don't bother
2907 // with the dynamic check of ar->m_func.
2908 dest = skipFuncCheck(dest);
2909 TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
2910 func->name()->data(), dest);
2912 LeaseHolder writer(s_writeLease, NO_ACQUIRE);
2913 if (dest && writer.acquire()) {
2914 TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
2915 smash(tx64->getAsmFor(toSmash), toSmash, dest);
2916 // sk: stale, but doesn't matter since we have a valid dest TCA.
2917 } else {
2918 // We need translator help; we're not at the callee yet, so
2919 // roll back. The prelude has done some work already, but it
2920 // should be safe to redo.
2921 TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
2922 toSmash, dest);
2923 sk = req->m_sourceInstr;
2925 start = dest;
2926 } break;
2928 case REQ_BIND_SIDE_EXIT:
2929 case REQ_BIND_JMP:
2930 case REQ_BIND_JCC:
2931 case REQ_BIND_JMP_NO_IR:
2932 case REQ_BIND_ADDR: {
2933 TCA toSmash = (TCA)args[0];
2934 Offset off = args[1];
2935 sk = SrcKey(curFunc(), off);
2936 if (requestNum == REQ_BIND_SIDE_EXIT) {
2937 SKTRACE(3, sk, "side exit taken!\n");
2939 start = bindJmp(toSmash, sk, (ServiceRequest)requestNum);
2940 } break;
2942 case REQ_BIND_JMPCC_FIRST: {
2943 TCA toSmash = (TCA)args[0];
2944 Offset offTaken = (Offset)args[1];
2945 Offset offNotTaken = (Offset)args[2];
2946 ConditionCode cc = ConditionCode(args[3]);
2947 bool taken = int64(args[4]) & 1;
2948 start = bindJmpccFirst(toSmash, offTaken, offNotTaken, taken, cc);
2949 // SrcKey: we basically need to emulate the fail
2950 sk = SrcKey(curFunc(), taken ? offTaken : offNotTaken);
2951 } break;
2953 case REQ_BIND_JMPCC_SECOND: {
2954 TCA toSmash = (TCA)args[0];
2955 Offset off = (Offset)args[1];
2956 ConditionCode cc = ConditionCode(args[2]);
2957 start = bindJmpccSecond(toSmash, off, cc);
2958 sk = SrcKey(curFunc(), off);
2959 } break;
2961 case REQ_BIND_REQUIRE: {
2962 ReqLitStaticArgs* rlsa = (ReqLitStaticArgs*)args[0];
2963 sk = SrcKey((Func*)args[1], (Offset)args[2]);
2964 start = getTranslation(&sk, true);
2965 if (start) {
2966 LeaseHolder writer(s_writeLease);
2967 if (writer) {
2968 SrcRec* sr = getSrcRec(sk);
2969 sr->chainFrom(a, IncomingBranch(&rlsa->m_pseudoMain));
2972 } break;
2974 case REQ_RETRANSLATE_NO_IR: {
2975 TCA toSmash = (TCA)args[0];
2976 sk = SrcKey(curFunc(), (Offset)args[1]);
2977 start = retranslateAndPatchNoIR(sk, true, toSmash);
2978 SKTRACE(2, sk, "retranslated (without IR) @%p\n", start);
2979 } break;
2981 case REQ_RETRANSLATE: {
2982 INC_TPC(retranslate);
2983 sk = SrcKey(curFunc(), (Offset)args[0]);
2984 start = retranslate(sk, true, RuntimeOption::EvalJitUseIR);
2985 SKTRACE(2, sk, "retranslated @%p\n", start);
2986 } break;
2988 case REQ_INTERPRET: {
2989 Offset off = args[0];
2990 int numInstrs = args[1];
2991 g_vmContext->m_pc = curUnit()->at(off);
2993 * We know the compilation unit has not changed; basic blocks do
2994 * not span files. I claim even exceptions do not violate this
2995 * axiom.
2997 ASSERT(numInstrs >= 0);
2998 ONTRACE(5, SrcKey(curFunc(), off).trace("interp: enter\n"));
2999 if (numInstrs) {
3000 s_perfCounters[tpc_interp_instr] += numInstrs;
3001 g_vmContext->dispatchN(numInstrs);
3002 } else {
3003 // numInstrs == 0 means it wants to dispatch until BB ends
3004 INC_TPC(interp_bb);
3005 g_vmContext->dispatchBB();
3007 SrcKey newSk(curFunc(), g_vmContext->getPC());
3008 SKTRACE(5, newSk, "interp: exit\n");
3009 sk = newSk;
3010 start = getTranslation(&newSk, true);
3011 } break;
3013 case REQ_POST_INTERP_RET: {
3014 // This is only responsible for the control-flow aspect of the Ret:
3015 // getting to the destination's translation, if any.
3016 ActRec* ar = (ActRec*)args[0];
3017 ActRec* caller = (ActRec*)args[1];
3018 ASSERT((Cell*) ar < vmsp()); // ar is already logically popped
3019 ASSERT((Cell*) caller > vmsp()); // caller is now active
3020 ASSERT((Cell*) caller == vmfp());
3021 Unit* destUnit = caller->m_func->unit();
3022 // Set PC so logging code in getTranslation doesn't get confused.
3023 vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
3024 SrcKey dest(caller->m_func, vmpc());
3025 sk = dest;
3026 start = getTranslation(&dest, true);
3027 TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
3028 ar->m_func->fullName()->data(),
3029 caller->m_func->fullName()->data());
3030 } break;
3032 case REQ_RESUME: {
3033 SrcKey dest(curFunc(), vmpc());
3034 sk = dest;
3035 start = getTranslation(&dest, true);
3036 } break;
3038 case REQ_STACK_OVERFLOW: {
3040 * we need to construct the pc of the fcall from the return
3041 * address (which will be after the fcall). Because fcall is
3042 * a variable length instruction, and because we sometimes
3043 * delete instructions from the instruction stream, we
3044 * need to use fpi regions to find the fcall.
3046 const FPIEnt* fe = curFunc()->findPrecedingFPI(
3047 curUnit()->offsetOf(vmpc()));
3048 vmpc() = curUnit()->at(fe->m_fcallOff);
3049 ASSERT(isFCallStar(*vmpc()));
3050 raise_error("Stack overflow");
3051 NOT_REACHED();
3055 NOT_REACHED();
3058 void TranslatorX64::resume(SrcKey sk) {
3059 enterTC(sk);
3063 * emitServiceReq --
3065 * Call a translator service co-routine. The code emitted here is
3066 * reenters the enterTC loop, invoking the requested service. Control
3067 * will be returned non-locally to the next logical instruction in
3068 * the TC.
3070 * Return value is a destination; we emit the bulky service
3071 * request code into astubs.
3075 TranslatorX64::emitServiceReqVA(bool align, ServiceRequest req, int numArgs,
3076 va_list args) {
3077 if (align) {
3078 moveToAlign(astubs);
3080 TCA retval = astubs.code.frontier;
3081 emitEagerVMRegSave(astubs, SaveFP);
3083 * Move args into appropriate regs.
3085 TRACE(3, "Emit Service Req %s(", reqName(req));
3086 for (int i = 0; i < numArgs; i++) {
3087 uint64_t argVal = va_arg(args, uint64_t);
3088 TRACE(3, "%p,", (void*)argVal);
3089 emitImmReg(astubs, argVal, serviceReqArgRegs[i]);
3091 TRACE(3, ")\n");
3092 emitImmReg(astubs, req, rdi);
3094 * Weird hand-shaking with enterTC: reverse-call a service routine.
3096 astubs. ret();
3097 recordBCInstr(OpServiceRequest, astubs, retval);
3098 translator_not_reached(astubs);
3099 return retval;
3103 TranslatorX64::emitServiceReq(ServiceRequest req, int numArgs, ...) {
3104 va_list args;
3105 va_start(args, numArgs);
3106 TCA retval = emitServiceReqVA(true, req, numArgs, args);
3107 va_end(args);
3108 return retval;
3112 TranslatorX64::emitServiceReq(bool align, ServiceRequest req, int numArgs,
3113 ...) {
3114 va_list args;
3115 va_start(args, numArgs);
3116 TCA retval = emitServiceReqVA(align, req, numArgs, args);
3117 va_end(args);
3118 return retval;
3122 TranslatorX64::emitTransCounterInc(X64Assembler& a) {
3123 TCA start = a.code.frontier;
3124 if (!isTransDBEnabled()) return start;
3125 uint64* counterAddr = getTransCounterAddr();
3127 a.mov_imm64_reg((uint64)counterAddr, rScratch);
3128 a.emitLockPrefix();
3129 a.inc_mem64(rScratch, 0);
3131 return start;
3134 void
3135 TranslatorX64::spillTo(DataType type, PhysReg reg, bool writeType,
3136 PhysReg base, int disp) {
3137 X64Assembler& a = *m_spillFillCode;
3138 // Zero out the count at the same time as writing the type.
3139 SpaceRecorder sr("_Spill", a);
3141 Stats::emitInc(a, Stats::Tx64_Spill);
3142 emitStoreTypedValue(a, type, reg, disp, base, writeType);
3145 void
3146 TranslatorX64::spill(const Location& loc, DataType type,
3147 PhysReg reg, bool writeType) {
3148 PhysReg base;
3149 int disp;
3150 locToRegDisp(loc, &base, &disp);
3151 spillTo(type, reg, writeType, base, disp);
3152 TRACE(2, "%s: (%s, %lld) -> v: %d(r%d) type%d\n",
3153 __func__,
3154 loc.spaceName(), loc.offset, int(disp + TVOFF(m_data)), base, type);
3157 void
3158 TranslatorX64::fill(const Location& loc, PhysReg reg) {
3159 SpaceRecorder sr("_Fill", *m_spillFillCode);
3160 if (loc.isThis()) {
3161 m_spillFillCode->load_reg64_disp_reg64(rVmFp, AROFF(m_this), reg);
3162 return;
3164 PhysReg base;
3165 int disp;
3166 locToRegDisp(loc, &base, &disp);
3167 TRACE(2, "fill: (%s, %lld) -> reg %d\n",
3168 loc.spaceName(), loc.offset, reg);
3169 m_spillFillCode->load_reg64_disp_reg64(base, disp + TVOFF(m_data), reg);
3172 void TranslatorX64::fillByMov(PhysReg src, PhysReg dst) {
3173 SpaceRecorder sr("_FillMov", *m_spillFillCode);
3174 ASSERT(src != dst);
3175 m_spillFillCode->mov_reg64_reg64(src, dst);
3178 void
3179 TranslatorX64::loadImm(int64 immVal, PhysReg reg) {
3180 SpaceRecorder sr("_FillImm", *m_spillFillCode);
3181 TRACE(2, "loadImm: 0x%llx -> reg %d\n", immVal, reg);
3182 emitImmReg(*m_spillFillCode, immVal, reg);
3185 void
3186 TranslatorX64::poison(PhysReg dest) {
3187 static const bool poison = false;
3188 if (poison) {
3189 emitImmReg(*m_spillFillCode, 0xbadf00d105e5babe, dest);
3194 * Spill all dirty registers, mark all registers as 'free' in the
3195 * register file, and update rVmSp to point to the top of stack at
3196 * the end of the tracelet.
3198 void
3199 TranslatorX64::syncOutputs(const Tracelet& t) {
3200 syncOutputs(t.m_stackChange);
3204 * Same as above, except that it sets rVmSp to point to the top of
3205 * stack at the beginning of the specified instruction.
3207 void
3208 TranslatorX64::syncOutputs(const NormalizedInstruction& i) {
3209 syncOutputs(i.stackOff);
3212 void
3213 TranslatorX64::syncOutputs(int stackOff) {
3214 SpaceRecorder sr("_SyncOuts", a);
3215 TCA start = a.code.frontier;
3216 // Mark all stack locations above the top of stack as dead
3217 m_regMap.scrubStackEntries(stackOff);
3218 // Spill all dirty registers
3219 m_regMap.cleanAll();
3220 if (stackOff != 0) {
3221 TRACE(1, "syncOutputs: rVmSp + %d\n", stackOff);
3222 // t.stackChange is in negative Cells, not bytes.
3223 a. add_imm32_reg64(-cellsToBytes(stackOff), rVmSp);
3225 // All registers have been smashed for realz, yo
3226 m_regMap.smashRegs(kAllRegs);
3227 recordBCInstr(OpSyncOutputs, a, start);
3231 * getBinaryStackInputs --
3233 * Helper for a common pattern of instruction, where two items are popped
3234 * and one is pushed. The second item on the stack at the beginning of
3235 * the instruction is both a source and destination.
3237 static void
3238 getBinaryStackInputs(RegAlloc& regmap, const NormalizedInstruction& i,
3239 PhysReg& rsrc, PhysReg& rsrcdest) {
3240 ASSERT(i.inputs.size() == 2);
3241 ASSERT(i.outStack && !i.outLocal);
3242 rsrcdest = regmap.getReg(i.outStack->location);
3243 rsrc = regmap.getReg(i.inputs[0]->location);
3244 ASSERT(regmap.getReg(i.inputs[1]->location) == rsrcdest);
3247 // emitBox --
3248 // Leave a boxed version of input in RAX. Destroys the register
3249 // mapping.
3250 void
3251 TranslatorX64::emitBox(DataType t, PhysReg rSrc) {
3252 if (false) { // typecheck
3253 RefData* retval = tvBoxHelper(KindOfArray, 0xdeadbeef01ul);
3254 (void)retval;
3256 // tvBoxHelper will set the refcount of the inner cell to 1
3257 // for us. Because the inner cell now holds a reference to the
3258 // original value, we don't need to perform a decRef.
3259 EMIT_CALL(a, tvBoxHelper, IMM(t), R(rSrc));
3262 // emitUnboxTopOfStack --
3263 // Unbox the known-to-be Variant on top of stack in place.
3264 void
3265 TranslatorX64::emitUnboxTopOfStack(const NormalizedInstruction& i) {
3266 const vector<DynLocation*>& inputs = i.inputs;
3268 ASSERT(inputs.size() == 1);
3269 ASSERT(i.outStack && !i.outLocal);
3270 ASSERT(inputs[0]->isStack());
3271 ASSERT(i.outStack && i.outStack->location == inputs[0]->location);
3272 DataType outType = inputs[0]->rtt.innerType();
3273 ASSERT(outType != KindOfInvalid);
3274 ASSERT(outType == i.outStack->outerType());
3275 PhysReg rSrc = getReg(inputs[0]->location);
3276 // Detach the register rSrc from the input location. We must
3277 // do this dance because the input and output location are the
3278 // same and we want to have separate registers for the input
3279 // and output.
3280 m_regMap.invalidate(inputs[0]->location);
3281 ScratchReg rSrcScratch(m_regMap, rSrc);
3282 // This call to allocOutputRegs will allocate a new register
3283 // for the output location
3284 m_regMap.allocOutputRegs(i);
3285 PhysReg rDest = getReg(i.outStack->location);
3286 emitDeref(a, rSrc, rDest);
3287 emitIncRef(rDest, outType);
3288 // decRef the var on the evaluation stack
3289 emitDecRef(i, rSrc, KindOfRef);
3292 // setOpOpToOpcodeOp --
3293 // The SetOp opcode space has nothing to do with the bytecode opcode
3294 // space. Reasonable people like it that way, so translate them here.
3295 static Opcode
3296 setOpOpToOpcodeOp(SetOpOp soo) {
3297 switch(soo) {
3298 #define SETOP_OP(_soo, _bc) case SetOp##_soo: return _bc;
3299 SETOP_OPS
3300 #undef SETOP_OP
3301 default: ASSERT(false);
3303 return -1;
3306 void
3307 TranslatorX64::binaryIntegerArith(const NormalizedInstruction& i,
3308 Opcode op,
3309 PhysReg srcReg,
3310 PhysReg srcDestReg) {
3311 switch (op) {
3312 #define CASEIMM(OpBc, x64op) \
3313 case OpBc: { \
3314 if (i.hasConstImm) { \
3315 a. x64op ## _imm64_reg64(i.constImm.u_I64A, srcDestReg); \
3316 } else { \
3317 a. x64op ## _reg64_reg64(srcReg, srcDestReg); \
3318 } } break;
3319 #define CASE(OpBc, x64op) \
3320 case OpBc: { \
3321 a. x64op ## _reg64_reg64(srcReg, srcDestReg); \
3322 } break;
3323 CASEIMM(OpAdd, add)
3324 CASEIMM(OpSub, sub)
3325 CASEIMM(OpBitAnd, and)
3326 CASEIMM(OpBitOr, or)
3327 CASEIMM(OpBitXor, xor)
3328 CASE(OpMul, imul)
3329 #undef CASE
3330 #undef CASEIMM
3332 default: {
3333 not_reached();
3338 void
3339 TranslatorX64::binaryArithCell(const NormalizedInstruction &i,
3340 Opcode op, const DynLocation& in1,
3341 const DynLocation& inout) {
3342 ASSERT(in1.rtt.isInt());
3343 ASSERT(inout.rtt.isInt());
3344 ASSERT(in1.outerType() != KindOfRef);
3345 ASSERT(in1.isStack());
3346 ASSERT(inout.outerType() != KindOfRef);
3347 ASSERT(inout.isStack());
3348 m_regMap.allocOutputRegs(i);
3349 PhysReg srcReg = m_regMap.getReg(in1.location);
3350 PhysReg srcDestReg = m_regMap.getReg(inout.location);
3351 binaryIntegerArith(i, op, srcReg, srcDestReg);
3354 void
3355 TranslatorX64::binaryArithLocal(const NormalizedInstruction &i,
3356 Opcode op,
3357 const DynLocation& in1,
3358 const DynLocation& in2,
3359 const DynLocation& out) {
3360 // The caller must guarantee that these conditions hold
3361 ASSERT(in1.rtt.isInt());
3362 ASSERT(in2.rtt.isInt());
3363 ASSERT(in1.outerType() != KindOfRef);
3364 ASSERT(in1.isStack());
3365 ASSERT(in2.isLocal());
3366 ASSERT(out.isStack());
3368 PhysReg srcReg = m_regMap.getReg(in1.location);
3369 PhysReg outReg = m_regMap.getReg(out.location);
3370 PhysReg localReg = m_regMap.getReg(in2.location);
3371 if (in2.outerType() != KindOfRef) {
3372 // The local is not a var, so we can operate directly on the
3373 // local's register. We will need to update outReg after the
3374 // operation.
3375 binaryIntegerArith(i, op, srcReg, localReg);
3376 // We operated directly on the local's register, so we need to update
3377 // outReg
3378 emitMovRegReg(localReg, outReg);
3379 } else {
3380 // The local is a var, so we have to read its value into outReg
3381 // on operate on that. We will need to write the result back
3382 // to the local after the operation.
3383 emitDeref(a, localReg, outReg);
3384 binaryIntegerArith(i, op, srcReg, outReg);
3385 // We operated on outReg, so we need to write the result back to the
3386 // local
3387 a. store_reg64_disp_reg64(outReg, 0, localReg);
3391 static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
3392 ASSERT(tl_regState == REGSTATE_DIRTY);
3393 tl_regState = REGSTATE_CLEAN;
3394 vmfp() = (Cell*)ar;
3395 vmsp() = sp;
3396 vmpc() = curUnit()->at(pcOff);
3397 ASSERT(vmsp() <= vmfp());
3400 #define O(opcode, imm, pusph, pop, flags) \
3402 * The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
3403 * calls into the interpreter, and then return a pointer to the
3404 * current ExecutionContext.
3405 */ \
3406 VMExecutionContext* \
3407 interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) { \
3408 interp_set_regs(ar, sp, pcOff); \
3409 SKTRACE(5, SrcKey(curFunc(), vmpc()), "%40s %p %p\n", \
3410 "interpOne" #opcode " before (fp,sp)", \
3411 vmfp(), vmsp()); \
3412 ASSERT(*vmpc() == Op ## opcode); \
3413 VMExecutionContext* ec = g_vmContext; \
3414 Stats::inc(Stats::Instr_InterpOne ## opcode); \
3415 INC_TPC(interp_one) \
3416 /* Correct for over-counting in TC-stats. */ \
3417 Stats::inc(Stats::Instr_TC, -1); \
3418 ec->op##opcode(); \
3420 * Only set regstate back to dirty if an exception is not
3421 * propagating. If an exception is throwing, regstate for this call
3422 * is actually still correct, and we don't have information in the
3423 * fixup map for interpOne calls anyway.
3424 */ \
3425 tl_regState = REGSTATE_DIRTY; \
3426 return ec; \
3429 OPCODES
3430 #undef O
3432 void* interpOneEntryPoints[] = {
3433 #define O(opcode, imm, pusph, pop, flags) \
3434 (void*)(interpOne ## opcode),
3435 OPCODES
3436 #undef O
3439 void TranslatorX64::fixupWork(VMExecutionContext* ec, ActRec* rbp) const {
3440 ASSERT(RuntimeOption::EvalJit);
3441 ActRec* nextAr = rbp;
3442 do {
3443 rbp = nextAr;
3444 FixupMap::VMRegs regs;
3445 TRACE(10, "considering frame %p, %p\n", rbp, (void*)rbp->m_savedRip);
3446 if (g_vmContext->m_stack.isValidAddress(rbp->m_savedRbp) &&
3447 m_fixupMap.getFrameRegs(rbp, &regs)) {
3448 TRACE(10, "fixup func %s fp %p sp %p pc %p\n",
3449 regs.m_fp->m_func->name()->data(),
3450 regs.m_fp, regs.m_sp, regs.m_pc);
3451 ec->m_fp = const_cast<ActRec*>(regs.m_fp);
3452 ec->m_pc = regs.m_pc;
3453 vmsp() = regs.m_sp;
3454 return;
3456 nextAr = (ActRec*)rbp->m_savedRbp;
3457 } while (rbp && rbp != nextAr);
3458 // OK, we've exhausted the entire actRec chain.
3459 // We are only invoking ::fixup() from contexts that were known
3460 // to be called out of the TC, so this cannot happen.
3461 NOT_REACHED();
3464 void TranslatorX64::fixup(VMExecutionContext* ec) const {
3465 // Start looking for fixup entries at the current (C++) frame. This
3466 // will walk the frames upward until we find a TC frame.
3467 ActRec* rbp;
3468 asm volatile("mov %%rbp, %0" : "=r"(rbp));
3469 fixupWork(ec, rbp);
3472 void
3473 TranslatorX64::syncWork() {
3474 ASSERT(tl_regState == REGSTATE_DIRTY);
3475 fixup(g_vmContext);
3476 tl_regState = REGSTATE_CLEAN;
3477 Stats::inc(Stats::TC_Sync);
3480 void
3481 TranslatorX64::emitInterpOne(const Tracelet& t,
3482 const NormalizedInstruction& ni) {
3483 // Write any dirty values to memory
3484 m_regMap.cleanAll();
3485 // Call into the appropriate interpOne method. Note that this call will
3486 // preserve the callee-saved registers including rVmFp and rVmSp.
3487 if (false) { /* typecheck */
3488 UNUSED VMExecutionContext* ec = interpOnePopC((ActRec*)vmfp(), vmsp(), 0);
3490 void* func = interpOneEntryPoints[ni.op()];
3491 TRACE(3, "ip %p of unit %p -> interpOne @%p\n", ni.pc(), ni.unit(), func);
3492 EMIT_CALL(a, func,
3493 R(rVmFp),
3494 RPLUS(rVmSp, -int32_t(cellsToBytes(ni.stackOff))),
3495 IMM(ni.source.offset()));
3496 // The interpreter may have written to memory, so we need to invalidate
3497 // all locations
3498 m_regMap.reset();
3499 // The interpOne method returned a pointer to the current
3500 // ExecutionContext in rax, so we can read the 'm_*' fields
3501 // by adding the appropriate offset to rax and dereferencing.
3503 // If this instruction ends the tracelet, we have some extra work to do.
3504 if (ni.breaksTracelet) {
3505 // Read the 'm_fp' and 'm_stack.m_top' fields into the rVmFp and
3506 // rVmSp registers.
3507 a. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
3508 rVmFp);
3509 a. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
3510 Stack::topOfStackOffset(), rVmSp);
3511 if (opcodeChangesPC(ni.op())) {
3512 // If interpreting this instruction can potentially set PC to point
3513 // to something other than the next instruction in the bytecode, so
3514 // we need to emit a service request to figure out where to go next
3515 TCA stubDest = emitServiceReq(REQ_RESUME, 0ull);
3516 a. jmp(stubDest);
3517 } else {
3518 // If this instruction always advances PC to the next instruction in
3519 // the bytecode, then we know what SrcKey to bind to
3520 emitBindJmp(nextSrcKey(t, ni));
3525 // could be static but used in hopt/codegen.cpp
3526 void raiseUndefVariable(StringData* nm) {
3527 raise_notice(Strings::UNDEFINED_VARIABLE, nm->data());
3528 // FIXME: do we need to decref the string if an exception is
3529 // propagating?
3530 if (nm->decRefCount() == 0) { nm->release(); }
3533 static TXFlags
3534 planBinaryArithOp(const NormalizedInstruction& i) {
3535 ASSERT(i.inputs.size() == 2);
3536 return nativePlan(i.inputs[0]->isInt() && i.inputs[1]->isInt());
3539 void
3540 TranslatorX64::analyzeBinaryArithOp(Tracelet& t, NormalizedInstruction& i) {
3541 i.m_txFlags = planBinaryArithOp(i);
3544 void
3545 TranslatorX64::translateBinaryArithOp(const Tracelet& t,
3546 const NormalizedInstruction& i) {
3547 const Opcode op = i.op();
3548 ASSERT(op == OpSub || op == OpMul || op == OpBitAnd ||
3549 op == OpBitOr || op == OpBitXor);
3550 ASSERT(planBinaryArithOp(i));
3551 ASSERT(i.inputs.size() == 2);
3553 binaryArithCell(i, op, *i.inputs[0], *i.outStack);
3556 static inline bool sameDataTypes(DataType t1, DataType t2) {
3557 return TypeConstraint::equivDataTypes(t1, t2);
3560 static TXFlags
3561 planSameOp_SameTypes(const NormalizedInstruction& i) {
3562 ASSERT(i.inputs.size() == 2);
3563 const RuntimeType& left = i.inputs[0]->rtt;
3564 const RuntimeType& right = i.inputs[1]->rtt;
3565 DataType leftType = left.outerType();
3566 DataType rightType = right.outerType();
3567 return nativePlan(sameDataTypes(leftType, rightType) &&
3568 (left.isNull() || leftType == KindOfBoolean ||
3569 left.isInt() || left.isString()));
3572 static TXFlags
3573 planSameOp_DifferentTypes(const NormalizedInstruction& i) {
3574 ASSERT(i.inputs.size() == 2);
3575 DataType leftType = i.inputs[0]->outerType();
3576 DataType rightType = i.inputs[1]->outerType();
3577 if (!sameDataTypes(leftType, rightType)) {
3578 if (IS_REFCOUNTED_TYPE(leftType) || IS_REFCOUNTED_TYPE(rightType)) {
3579 // For dissimilar datatypes, we might call out to handle a refcount.
3580 return Supported;
3582 return Native;
3584 return Interp;
3587 void
3588 TranslatorX64::analyzeSameOp(Tracelet& t, NormalizedInstruction& i) {
3589 ASSERT(!(planSameOp_SameTypes(i) && planSameOp_DifferentTypes(i)));
3590 i.m_txFlags = TXFlags(planSameOp_SameTypes(i) | planSameOp_DifferentTypes(i));
3591 i.manuallyAllocInputs = true;
3594 void
3595 TranslatorX64::translateSameOp(const Tracelet& t,
3596 const NormalizedInstruction& i) {
3597 const Opcode op = i.op();
3598 ASSERT(op == OpSame || op == OpNSame);
3599 const vector<DynLocation*>& inputs = i.inputs;
3600 bool instrNeg = (op == OpNSame);
3601 ASSERT(inputs.size() == 2);
3602 ASSERT(i.outStack && !i.outLocal);
3603 DataType leftType = i.inputs[0]->outerType();
3604 DataType rightType DEBUG_ONLY = i.inputs[1]->outerType();
3605 ASSERT(leftType != KindOfRef);
3606 ASSERT(rightType != KindOfRef);
3608 if (planSameOp_DifferentTypes(i)) {
3609 // Some easy cases: when the valueTypes do not match,
3610 // NSame -> true and Same -> false.
3611 SKTRACE(1, i.source, "different types %d %d\n",
3612 leftType, rightType);
3613 emitDecRefInput(a, i, 0);
3614 emitDecRefInput(a, i, 1);
3615 m_regMap.allocOutputRegs(i);
3616 emitImmReg(a, instrNeg, getReg(i.outStack->location));
3617 return; // Done
3620 ASSERT(planSameOp_SameTypes(i));
3622 if (IS_NULL_TYPE(leftType)) {
3623 m_regMap.allocOutputRegs(i);
3624 // null === null is always true
3625 SKTRACE(2, i.source, "straightening null/null comparison\n");
3626 emitImmReg(a, !instrNeg, getReg(i.outStack->location));
3627 return; // Done
3629 if (IS_STRING_TYPE(leftType)) {
3630 int args[2];
3631 args[0] = 0;
3632 args[1] = 1;
3633 allocInputsForCall(i, args);
3634 EMIT_CALL(a, same_str_str,
3635 V(inputs[0]->location),
3636 V(inputs[1]->location));
3637 if (instrNeg) {
3638 a. xor_imm32_reg32(1, rax);
3640 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3641 RegInfo::DIRTY);
3642 return; // Done
3644 m_regMap.allocInputRegs(i);
3645 PhysReg src, srcdest;
3646 getBinaryStackInputs(m_regMap, i, src, srcdest);
3647 m_regMap.allocOutputRegs(i);
3648 ASSERT(getReg(i.outStack->location) == srcdest);
3649 a. cmp_reg64_reg64(src, srcdest);
3650 if (op == OpSame) {
3651 a. sete(srcdest);
3652 } else {
3653 a. setne(srcdest);
3655 a. mov_reg8_reg64_unsigned(srcdest, srcdest);
3658 static bool
3659 trivialEquivType(const RuntimeType& rtt) {
3660 DataType t = rtt.valueType();
3661 return t == KindOfUninit || t == KindOfNull || t == KindOfBoolean ||
3662 rtt.isInt() || rtt.isString();
3665 static void
3666 emitConvertToBool(X64Assembler &a, PhysReg src, PhysReg dest, bool instrNeg) {
3667 a. test_reg64_reg64(src, src);
3668 if (instrNeg) {
3669 a. setz(dest);
3670 } else {
3671 a. setnz(dest);
3673 a. mov_reg8_reg64_unsigned(dest, dest);
3676 void
3677 TranslatorX64::analyzeEqOp(Tracelet& t, NormalizedInstruction& i) {
3678 ASSERT(i.inputs.size() == 2);
3679 RuntimeType &lt = i.inputs[0]->rtt;
3680 RuntimeType &rt = i.inputs[1]->rtt;
3681 i.m_txFlags = nativePlan(trivialEquivType(lt) &&
3682 trivialEquivType(rt));
3683 if (i.isNative() &&
3684 IS_NULL_TYPE(lt.outerType()) &&
3685 IS_NULL_TYPE(rt.outerType())) {
3686 i.manuallyAllocInputs = true;
3690 void
3691 TranslatorX64::translateEqOp(const Tracelet& t,
3692 const NormalizedInstruction& i) {
3693 const Opcode op = i.op();
3694 ASSERT(op == OpEq || op == OpNeq);
3695 ASSERT(i.isNative());
3696 const vector<DynLocation*>& inputs = i.inputs;
3697 bool instrNeg = (op == OpNeq);
3698 ASSERT(inputs.size() == 2);
3699 ASSERT(i.outStack && !i.outLocal);
3700 DataType leftType = i.inputs[0]->outerType();
3701 DataType rightType = i.inputs[1]->outerType();
3702 ASSERT(leftType != KindOfRef);
3703 ASSERT(rightType != KindOfRef);
3705 ConditionCode ccBranch = CC_E;
3706 if (instrNeg) ccBranch = ccNegate(ccBranch);
3708 // Inputless case.
3709 if (IS_NULL_TYPE(leftType) && IS_NULL_TYPE(rightType)) {
3710 ASSERT(i.manuallyAllocInputs);
3711 // null == null is always true
3712 bool result = !instrNeg;
3713 SKTRACE(2, i.source, "straightening null/null comparison\n");
3714 if (i.changesPC) {
3715 fuseBranchAfterStaticBool(t, i, result);
3716 } else {
3717 m_regMap.allocOutputRegs(i);
3718 emitImmReg(a, result, getReg(i.outStack->location));
3720 return; // Done
3723 if (IS_STRING_TYPE(leftType) || IS_STRING_TYPE(rightType)) {
3724 void* fptr = NULL;
3725 bool leftIsString = false;
3726 bool eqNullStr = false;
3727 switch (leftType) {
3728 STRINGCASE(): {
3729 leftIsString = true;
3730 switch (rightType) {
3731 STRINGCASE(): fptr = (void*)eq_str_str; break;
3732 case KindOfInt64: fptr = (void*)eq_int_str; break;
3733 case KindOfBoolean: fptr = (void*)eq_bool_str; break;
3734 NULLCASE(): fptr = (void*)eq_null_str; eqNullStr = true; break;
3735 default: ASSERT(false); break;
3737 } break;
3738 case KindOfInt64: fptr = (void*)eq_int_str; break;
3739 case KindOfBoolean: fptr = (void*)eq_bool_str; break;
3740 NULLCASE(): fptr = (void*)eq_null_str; eqNullStr = true; break;
3741 default: ASSERT(false); break;
3743 if (eqNullStr) {
3744 ASSERT(fptr == (void*)eq_null_str);
3745 EMIT_CALL(a, fptr,
3746 V(inputs[leftIsString ? 0 : 1]->location));
3747 } else {
3748 ASSERT(fptr != NULL);
3749 EMIT_CALL(a, fptr,
3750 V(inputs[leftIsString ? 1 : 0]->location),
3751 V(inputs[leftIsString ? 0 : 1]->location));
3753 if (i.changesPC) {
3754 fuseBranchSync(t, i);
3755 a. test_imm32_reg32(1, rax);
3756 fuseBranchAfterBool(t, i, ccNegate(ccBranch));
3757 return;
3759 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3760 RegInfo::DIRTY);
3761 if (instrNeg) {
3762 a. xor_imm32_reg32(1, rax);
3764 return;
3767 m_regMap.allocOutputRegs(i);
3768 PhysReg src, srcdest;
3769 getBinaryStackInputs(m_regMap, i, src, srcdest);
3770 ASSERT(getReg(i.outStack->location) == srcdest);
3771 if (i.changesPC) {
3772 fuseBranchSync(t, i);
3774 if (IS_NULL_TYPE(leftType) || IS_NULL_TYPE(rightType)) {
3775 if (IS_NULL_TYPE(leftType)) {
3776 a. test_reg64_reg64(srcdest, srcdest);
3777 } else {
3778 ASSERT(IS_NULL_TYPE(rightType));
3779 a. test_reg64_reg64(src, src);
3781 } else if (leftType == KindOfBoolean ||
3782 rightType == KindOfBoolean) {
3783 // OK to destroy src and srcdest in-place; their stack locations are
3784 // blown away by this instruction.
3785 if (leftType != KindOfBoolean)
3786 emitConvertToBool(a, src, src, false);
3787 if (rightType != KindOfBoolean)
3788 emitConvertToBool(a, srcdest, srcdest, false);
3789 a. cmp_reg64_reg64(src, srcdest);
3790 } else {
3791 a. cmp_reg64_reg64(src, srcdest);
3793 if (i.changesPC) {
3794 fuseBranchAfterBool(t, i, ccBranch);
3795 return;
3797 if (instrNeg) {
3798 a. setnz (srcdest);
3799 } else {
3800 a. setz (srcdest);
3802 a. mov_reg8_reg64_unsigned(srcdest, srcdest);
3805 void
3806 TranslatorX64::analyzeLtGtOp(Tracelet& t, NormalizedInstruction& i) {
3807 ASSERT(i.inputs.size() == 2);
3808 const RuntimeType& left = i.inputs[0]->rtt;
3809 DataType leftType = left.outerType();
3810 DataType rightType = i.inputs[1]->outerType();
3811 i.m_txFlags = nativePlan(sameDataTypes(leftType, rightType) &&
3812 (left.isNull() ||
3813 leftType == KindOfBoolean ||
3814 left.isInt()));
3815 if (i.isNative() && IS_NULL_TYPE(left.outerType())) {
3816 // No inputs. w00t.
3817 i.manuallyAllocInputs = true;
3821 void
3822 TranslatorX64::translateLtGtOp(const Tracelet& t,
3823 const NormalizedInstruction& i) {
3824 const Opcode op = i.op();
3825 ASSERT(op == OpLt || op == OpLte || op == OpGt || op == OpGte);
3826 ASSERT(i.inputs.size() == 2);
3827 ASSERT(i.outStack && !i.outLocal);
3828 ASSERT(i.inputs[0]->outerType() != KindOfRef);
3829 ASSERT(i.inputs[1]->outerType() != KindOfRef);
3830 ASSERT(i.isNative());
3832 bool fEquals = (op == OpLte || op == OpGte);
3833 bool fLessThan = (op == OpLt || op == OpLte);
3835 m_regMap.allocOutputRegs(i);
3836 if (IS_NULL_TYPE(i.inputs[0]->outerType())) {
3837 ASSERT(IS_NULL_TYPE(i.inputs[1]->outerType()));
3838 // null < null is always false, null <= null is always true
3839 SKTRACE(2, i.source, "straightening null/null comparison\n");
3840 PhysReg rOut = getReg(i.outStack->location);
3841 bool resultIsTrue = (op == OpLte || op == OpGte);
3842 if (i.changesPC) {
3843 fuseBranchAfterStaticBool(t, i, resultIsTrue);
3844 } else {
3845 emitImmReg(a, resultIsTrue, rOut);
3847 return;
3849 PhysReg src, srcdest;
3850 getBinaryStackInputs(m_regMap, i, src, srcdest);
3851 ASSERT(getReg(i.outStack->location) == srcdest);
3852 if (i.changesPC) {
3853 fuseBranchSync(t, i);
3855 a. cmp_reg64_reg64(src, srcdest);
3856 static const ConditionCode opToCc[2][2] = {
3857 // !fEquals fEquals
3858 { CC_G, CC_GE }, // !fLessThan
3859 { CC_L, CC_LE }, // fLessThan
3861 ConditionCode cc = opToCc[fLessThan][fEquals];
3862 if (i.changesPC) {
3863 // Fuse the coming branch.
3864 fuseBranchAfterBool(t, i, cc);
3865 return;
3867 a. setcc(cc, srcdest);
3868 a. mov_reg8_reg64_unsigned(srcdest, srcdest);
3871 static TXFlags
3872 planUnaryBooleanOp(const NormalizedInstruction& i) {
3873 ASSERT(i.inputs.size() == 1);
3874 RuntimeType& rtt = i.inputs[0]->rtt;
3875 DataType inType = rtt.valueType();
3876 if (inType == KindOfArray) {
3877 return Supported;
3879 if (rtt.isString()) {
3880 return Simple;
3882 return nativePlan(rtt.isNull() ||
3883 inType == KindOfBoolean || rtt.isInt());
3886 void
3887 TranslatorX64::analyzeUnaryBooleanOp(Tracelet& t, NormalizedInstruction& i) {
3888 i.m_txFlags = planUnaryBooleanOp(i);
3891 void
3892 TranslatorX64::translateUnaryBooleanOp(const Tracelet& t,
3893 const NormalizedInstruction& i) {
3894 const Opcode op = i.op();
3895 ASSERT(op == OpCastBool || op == OpEmptyL);
3896 const vector<DynLocation*>& inputs = i.inputs;
3897 ASSERT(inputs.size() == 1);
3898 ASSERT(i.outStack && !i.outLocal);
3899 bool instrNeg = (op == OpEmptyL);
3900 DataType inType = inputs[0]->valueType();
3901 const Location& inLoc = inputs[0]->location;
3902 bool boxedForm = (inputs[0]->outerType() == KindOfRef);
3905 switch (inType) {
3906 NULLCASE(): {
3907 m_regMap.allocOutputRegs(i);
3908 PhysReg outReg = getReg(i.outStack->location);
3909 emitImmReg(a, instrNeg, outReg);
3910 ASSERT(i.isNative());
3911 } break;
3912 case KindOfBoolean: {
3913 if (op == OpCastBool) {
3914 // Casting bool to bool is a nop. CastBool's input must be
3915 // a cell on the stack as per the bytecode specification.
3916 ASSERT(inputs[0]->isStack());
3917 ASSERT(inputs[0]->outerType() != KindOfRef);
3918 ASSERT(inputs[0]->location.space == Location::Stack);
3919 ASSERT(i.isNative());
3920 break;
3922 m_regMap.allocOutputRegs(i);
3923 PhysReg reg = getReg(inLoc);
3924 PhysReg outReg = getReg(i.outStack->location);
3925 if (boxedForm) {
3926 emitDeref(a, reg, outReg);
3927 } else {
3928 emitMovRegReg(reg, outReg);
3930 if (instrNeg) {
3931 a. xor_imm32_reg32(1, outReg);
3933 } break;
3934 case KindOfInt64: {
3935 m_regMap.allocOutputRegs(i);
3936 PhysReg reg = getReg(inLoc);
3937 PhysReg outReg = getReg(i.outStack->location);
3938 ScratchReg scratch(m_regMap);
3939 if (boxedForm) {
3940 emitDeref(a, reg, *scratch);
3941 emitConvertToBool(a, *scratch, outReg, instrNeg);
3942 } else {
3943 emitConvertToBool(a, reg, outReg, instrNeg);
3945 } break;
3946 STRINGCASE():
3947 case KindOfArray: {
3948 bool doDecRef = (inputs[0]->isStack());
3949 void* fptr = IS_STRING_TYPE(inType) ?
3950 (doDecRef ? (void*)str_to_bool : (void*)str0_to_bool) :
3951 (doDecRef ? (void*)arr_to_bool : (void*)arr0_to_bool);
3952 if (boxedForm) {
3953 EMIT_CALL(a, fptr, DEREF(inLoc));
3954 } else {
3955 EMIT_CALL(a, fptr, V(inLoc));
3957 if (!IS_STRING_TYPE(inType)) {
3958 recordReentrantCall(i);
3960 if (instrNeg) {
3961 a. xor_imm32_reg32(1, rax);
3963 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3964 RegInfo::DIRTY);
3965 } break;
3966 default: {
3967 ASSERT(false);
3968 } break;
3972 void
3973 TranslatorX64::analyzeBranchOp(Tracelet& t, NormalizedInstruction& i) {
3974 i.m_txFlags = Supported;
3977 // Helper for decoding dests of branch-like instructions at the end of
3978 // a basic block.
3979 static void branchDests(const Tracelet& t,
3980 const NormalizedInstruction& i,
3981 SrcKey* outTaken, SrcKey* outNotTaken,
3982 int immIdx = 0) {
3983 *outNotTaken = nextSrcKey(t, i);
3984 int dest = i.imm[immIdx].u_BA;
3985 *outTaken = SrcKey(curFunc(), i.offset() + dest);
3988 void TranslatorX64::branchWithFlagsSet(const Tracelet& t,
3989 const NormalizedInstruction& i,
3990 ConditionCode cc) {
3991 ASSERT(i.op() == OpJmpNZ || i.op() == OpJmpZ);
3992 // not_taken
3993 SrcKey taken, notTaken;
3994 branchDests(t, i, &taken, &notTaken);
3995 TRACE(3, "branchWithFlagsSet %d %d cc%02x jmp%sz\n",
3996 taken.offset(), notTaken.offset(), cc,
3997 i.isJmpNZ() ? "n" : "");
3998 emitCondJmp(taken, notTaken, cc);
4001 void TranslatorX64::fuseBranchAfterStaticBool(const Tracelet& t,
4002 const NormalizedInstruction& i,
4003 bool resultIsTrue) {
4004 ASSERT(i.breaksTracelet);
4005 ASSERT(i.next);
4006 NormalizedInstruction &nexti = *i.next;
4007 fuseBranchSync(t, i);
4008 bool isTaken = (resultIsTrue == nexti.isJmpNZ());
4009 SrcKey taken, notTaken;
4010 branchDests(t, nexti, &taken, &notTaken);
4011 if (isTaken) {
4012 emitBindJmp(taken);
4013 } else {
4014 emitBindJmp(notTaken);
4018 void TranslatorX64::fuseBranchSync(const Tracelet& t,
4019 const NormalizedInstruction& i) {
4020 // Don't bother sync'ing the output of this instruction.
4021 m_regMap.scrubStackEntries(i.outStack->location.offset);
4022 syncOutputs(t);
4025 void TranslatorX64::fuseBranchAfterBool(const Tracelet& t,
4026 const NormalizedInstruction& i,
4027 ConditionCode cc) {
4028 ASSERT(i.breaksTracelet);
4029 ASSERT(i.next);
4030 NormalizedInstruction &nexti = *i.next;
4031 if (!i.next->isJmpNZ()) cc = ccNegate(cc);
4032 branchWithFlagsSet(t, nexti, cc);
4035 void
4036 TranslatorX64::translateBranchOp(const Tracelet& t,
4037 const NormalizedInstruction& i) {
4038 DEBUG_ONLY const Opcode op = i.op();
4039 ASSERT(op == OpJmpZ || op == OpJmpNZ);
4041 bool isZ = !i.isJmpNZ();
4042 ASSERT(i.inputs.size() == 1);
4043 ASSERT(!i.outStack && !i.outLocal);
4044 m_regMap.allocOutputRegs(i);
4045 const DynLocation& in = *i.inputs[0];
4046 const RuntimeType& rtt = in.rtt;
4047 const Location& inLoc = in.location;
4048 DataType inputType = rtt.outerType();
4049 PhysReg src = getReg(inLoc);
4051 * Careful. We're operating with intimate knowledge of the
4052 * constraints of the register allocator from here out.
4054 if (rtt.isString() || inputType == KindOfArray) {
4055 // str_to_bool and arr_to_bool will decRef for us
4056 void* fptr = IS_STRING_TYPE(inputType) ? (void*)str_to_bool :
4057 (void*)arr_to_bool;
4058 EMIT_CALL(a, fptr, V(inLoc));
4059 src = rax;
4060 ScratchReg sr(m_regMap, rax);
4061 syncOutputs(t);
4062 } else if (inputType != KindOfUninit &&
4063 inputType != KindOfNull &&
4064 inputType != KindOfBoolean &&
4065 !rtt.isInt()) {
4066 // input might be in-flight
4067 m_regMap.cleanLoc(inLoc);
4068 // Cast to a bool.
4069 if (false) {
4070 TypedValue *tv = NULL;
4071 int64 ret = tv_to_bool(tv);
4072 if (ret) {
4073 printf("zoot");
4076 TRACE(2, Trace::prettyNode("tv_to_bool", inLoc) + string("\n"));
4077 // tv_to_bool will decRef for us if appropriate
4078 EMIT_CALL(a, tv_to_bool, A(inLoc));
4079 recordReentrantCall(i);
4080 src = rax;
4081 ScratchReg sr(m_regMap, rax);
4082 syncOutputs(t);
4083 } else {
4084 syncOutputs(t);
4087 // not_taken
4088 SrcKey taken, notTaken;
4089 branchDests(t, i, &taken, &notTaken);
4091 // Since null always evaluates to false, we can emit an
4092 // unconditional jump. OpJmpNZ will never take the branch
4093 // while OpJmpZ will always take the branch.
4094 if (IS_NULL_TYPE(inputType)) {
4095 TRACE(1, "branch on Null -> always Z\n");
4096 emitBindJmp(isZ ? taken : notTaken);
4097 return;
4099 a. test_reg64_reg64(src, src);
4100 branchWithFlagsSet(t, i, isZ ? CC_Z : CC_NZ);
4103 void
4104 TranslatorX64::analyzeCGetL(Tracelet& t, NormalizedInstruction& i) {
4105 ASSERT(i.inputs.size() == 1);
4106 const RuntimeType& type = i.inputs[0]->rtt;
4107 i.m_txFlags = type.isUninit() ? Supported : Native;
4110 void
4111 TranslatorX64::translateCGetL(const Tracelet& t,
4112 const NormalizedInstruction& i) {
4113 const DEBUG_ONLY Opcode op = i.op();
4114 ASSERT(op == OpFPassL || OpCGetL);
4115 const vector<DynLocation*>& inputs = i.inputs;
4116 ASSERT(inputs.size() == 1);
4117 ASSERT(inputs[0]->isLocal());
4118 DataType outType = i.inputs[0]->valueType();
4119 ASSERT(outType != KindOfInvalid);
4121 // Check for use of an undefined local.
4122 if (inputs[0]->rtt.isUninit()) {
4123 ASSERT(!i.outStack || i.outStack->outerType() == KindOfNull);
4124 outType = KindOfNull;
4125 ASSERT(inputs[0]->location.offset < curFunc()->numLocals());
4126 const StringData* name = local_name(inputs[0]->location);
4127 EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
4128 recordReentrantCall(i);
4129 if (i.outStack) {
4130 m_regMap.allocOutputRegs(i);
4132 return;
4136 * we can merge a CGetL with a following InstanceOfD
4137 * to avoid the incRef/decRef on the result (see
4138 * analyzeSecondPass).
4140 * outStack will be clear in that case.
4142 if (!i.outStack) return;
4143 ASSERT(outType == i.outStack->outerType());
4144 m_regMap.allocOutputRegs(i);
4145 if (IS_NULL_TYPE(outType)) return;
4146 PhysReg dest = getReg(i.outStack->location);
4148 if (i.manuallyAllocInputs && !m_regMap.hasReg(inputs[0]->location)) {
4149 fill(inputs[0]->location, dest);
4150 } else {
4151 PhysReg localReg = getReg(inputs[0]->location);
4152 emitMovRegReg(localReg, dest);
4154 if (inputs[0]->isVariant()) {
4155 emitDeref(a, dest, dest);
4157 ASSERT(outType != KindOfStaticString);
4158 emitIncRef(dest, outType);
4161 void
4162 TranslatorX64::analyzeCGetL2(Tracelet& t,
4163 NormalizedInstruction& ni) {
4164 const int locIdx = 1;
4165 ASSERT(ni.inputs.size() == 2);
4166 ni.m_txFlags = ni.inputs[locIdx]->rtt.isUninit() ? Supported : Native;
4169 void
4170 TranslatorX64::translateCGetL2(const Tracelet& t,
4171 const NormalizedInstruction& ni) {
4172 const int stackIdx = 0;
4173 const int locIdx = 1;
4175 // Note: even if it's an undefined local we need to move a few
4176 // values around to have outputs end up in the right place.
4177 const bool undefinedLocal = ni.inputs[locIdx]->rtt.isUninit();
4179 if (undefinedLocal) {
4180 ASSERT(ni.outStack->valueType() == KindOfNull);
4181 ASSERT(ni.inputs[locIdx]->location.offset < curFunc()->numLocals());
4182 const StringData* name = local_name(ni.inputs[locIdx]->location);
4184 EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
4185 recordReentrantCall(ni);
4187 m_regMap.allocInputRegs(ni);
4190 m_regMap.allocOutputRegs(ni);
4191 const PhysReg stackIn = getReg(ni.inputs[stackIdx]->location);
4192 const PhysReg localIn = getReg(ni.inputs[locIdx]->location);
4193 const PhysReg stackOut = getReg(ni.outStack2->location);
4194 ASSERT(ni.inputs[stackIdx]->location.isStack());
4195 ASSERT(ni.inputs[locIdx]->location.isLocal());
4198 * These registers overlap a bit, so we can swap a few bindings to
4199 * avoid a move.
4201 ASSERT(stackIn == getReg(ni.outStack->location) && localIn != stackOut);
4202 m_regMap.swapRegisters(stackIn, stackOut);
4203 const PhysReg cellOut = getReg(ni.outStack->location);
4204 ASSERT(cellOut != stackIn);
4205 if (ni.inputs[locIdx]->isVariant()) {
4206 emitDeref(a, localIn, cellOut);
4207 } else if (!undefinedLocal) {
4208 emitMovRegReg(localIn, cellOut);
4210 emitIncRef(cellOut, ni.inputs[locIdx]->valueType());
4213 void
4214 TranslatorX64::analyzeVGetL(Tracelet& t,
4215 NormalizedInstruction& i) {
4216 i.m_txFlags = Native;
4219 void
4220 TranslatorX64::translateVGetL(const Tracelet& t,
4221 const NormalizedInstruction& i) {
4222 const DEBUG_ONLY Opcode op = i.op();
4223 ASSERT(op == OpVGetL || op == OpFPassL);
4224 const vector<DynLocation*>& inputs = i.inputs;
4225 ASSERT(inputs.size() == 1);
4226 ASSERT(i.outStack);
4227 ASSERT(inputs[0]->isLocal());
4228 ASSERT(i.outStack->rtt.outerType() == KindOfRef);
4230 PhysReg localReg = getReg(inputs[0]->location);
4231 PhysReg dest;
4232 if (inputs[0]->rtt.outerType() != KindOfRef) {
4233 emitBox(inputs[0]->rtt.outerType(), localReg);
4234 m_regMap.bind(rax, inputs[0]->location, KindOfRef,
4235 RegInfo::DIRTY);
4236 m_regMap.allocOutputRegs(i);
4237 dest = getReg(i.outStack->location);
4238 emitMovRegReg(rax, dest);
4239 } else {
4240 m_regMap.allocOutputRegs(i);
4241 dest = getReg(i.outStack->location);
4242 emitMovRegReg(localReg, dest);
4244 emitIncRef(dest, KindOfRef);
4247 void
4248 TranslatorX64::analyzeAssignToLocalOp(Tracelet& t,
4249 NormalizedInstruction& ni) {
4250 const int locIdx = 1;
4251 ni.m_txFlags = planHingesOnRefcounting(ni.inputs[locIdx]->outerType());
4254 void
4255 TranslatorX64::translateAssignToLocalOp(const Tracelet& t,
4256 const NormalizedInstruction& ni) {
4257 const int rhsIdx = 0;
4258 const int locIdx = 1;
4259 const Opcode op = ni.op();
4260 ASSERT(op == OpSetL || op == OpBindL);
4261 ASSERT(ni.inputs.size() == 2);
4262 ASSERT((op == OpBindL) ==
4263 (ni.inputs[rhsIdx]->outerType() == KindOfRef));
4265 ASSERT(!ni.outStack || ni.inputs[locIdx]->location != ni.outStack->location);
4266 ASSERT(ni.outLocal);
4267 ASSERT(ni.inputs[locIdx]->location == ni.outLocal->location);
4268 ASSERT(ni.inputs[rhsIdx]->isStack());
4270 m_regMap.allocOutputRegs(ni);
4271 const PhysReg rhsReg = getReg(ni.inputs[rhsIdx]->location);
4272 const PhysReg localReg = getReg(ni.outLocal->location);
4273 const DataType oldLocalType = ni.inputs[locIdx]->outerType();
4274 const DataType rhsType = ni.inputs[rhsIdx]->outerType();
4275 ASSERT(localReg != rhsReg);
4277 LazyScratchReg oldLocalReg(m_regMap);
4278 DataType decRefType;
4280 // For SetL, when the local is boxed, we need to change the
4281 // type/value of the inner cell. If we're doing BindL, we don't
4282 // want to affect the old inner cell in any case (except to decref
4283 // it).
4284 const bool affectInnerCell = op == OpSetL &&
4285 oldLocalType == KindOfRef;
4286 if (affectInnerCell) {
4287 ASSERT(rhsType != KindOfRef);
4289 oldLocalReg.alloc();
4290 emitDeref(a, localReg, *oldLocalReg);
4291 emitStoreTypedValue(a, rhsType, rhsReg, 0, localReg);
4292 decRefType = ni.inputs[locIdx]->rtt.innerType();
4293 } else {
4295 * Instead of emitting a mov, just swap the locations these two
4296 * registers are mapped to.
4298 * TODO: this might not be the best idea now that the register
4299 * allocator has some awareness about what is a local. (Maybe we
4300 * should just xchg_reg64_reg64.)
4302 m_regMap.swapRegisters(rhsReg, localReg);
4303 decRefType = oldLocalType;
4306 // If we're giving stack output, it's important to incref before
4307 // calling a possible destructor, since the destructor could have
4308 // access to the local if it is a var.
4309 if (ni.outStack) {
4310 emitIncRef(rhsReg, rhsType);
4311 } else {
4312 SKTRACE(3, ni.source, "hoisting Pop* into current instr\n");
4315 emitDecRef(ni, oldLocalReg.isAllocated() ? *oldLocalReg : localReg,
4316 decRefType);
4318 if (ni.outStack && !IS_NULL_TYPE(ni.outStack->outerType())) {
4319 PhysReg stackReg = getReg(ni.outStack->location);
4320 emitMovRegReg(rhsReg, stackReg);
4324 static void
4325 planPop(NormalizedInstruction& i) {
4326 if (i.prev && i.prev->outputPredicted) {
4327 i.prev->outputPredicted = false;
4328 i.inputs[0]->rtt = RuntimeType(KindOfInvalid);
4330 DataType type = i.inputs[0]->outerType();
4331 i.m_txFlags =
4332 (type == KindOfInvalid || IS_REFCOUNTED_TYPE(type)) ? Supported : Native;
4333 i.manuallyAllocInputs = true;
4336 void TranslatorX64::analyzePopC(Tracelet& t, NormalizedInstruction& i) {
4337 planPop(i);
4340 void TranslatorX64::analyzePopV(Tracelet& t, NormalizedInstruction& i) {
4341 planPop(i);
4344 void TranslatorX64::analyzePopR(Tracelet& t, NormalizedInstruction& i) {
4345 planPop(i);
4348 void
4349 TranslatorX64::translatePopC(const Tracelet& t,
4350 const NormalizedInstruction& i) {
4351 ASSERT(i.inputs.size() == 1);
4352 ASSERT(!i.outStack && !i.outLocal);
4353 if (i.inputs[0]->rtt.isVagueValue()) {
4354 PhysReg base;
4355 int disp;
4356 locToRegDisp(i.inputs[0]->location, &base, &disp);
4357 emitDecRefGeneric(i, base, disp);
4358 } else {
4359 emitDecRefInput(a, i, 0);
4363 void
4364 TranslatorX64::translatePopV(const Tracelet& t,
4365 const NormalizedInstruction& i) {
4366 ASSERT(i.inputs[0]->rtt.isVagueValue() ||
4367 i.inputs[0]->isVariant());
4368 translatePopC(t, i);
4371 void
4372 TranslatorX64::translatePopR(const Tracelet& t,
4373 const NormalizedInstruction& i) {
4374 translatePopC(t, i);
4377 void
4378 TranslatorX64::translateUnboxR(const Tracelet& t,
4379 const NormalizedInstruction& i) {
4380 ASSERT(!i.inputs[0]->rtt.isVagueValue());
4382 // If the value on the top of a stack is a var, unbox it and
4383 // leave it on the top of the stack.
4384 if (i.inputs[0]->isVariant()) {
4385 emitUnboxTopOfStack(i);
4389 void
4390 TranslatorX64::translateNull(const Tracelet& t,
4391 const NormalizedInstruction& i) {
4392 ASSERT(i.inputs.size() == 0);
4393 ASSERT(!i.outLocal);
4394 if (i.outStack) {
4395 ASSERT(i.outStack->outerType() == KindOfNull);
4397 // We have to mark the output register as dirty to ensure that
4398 // the type gets spilled at the end of the tracelet
4399 m_regMap.allocOutputRegs(i);
4401 /* nop */
4404 void
4405 TranslatorX64::translateTrue(const Tracelet& t,
4406 const NormalizedInstruction& i) {
4407 ASSERT(i.inputs.size() == 0);
4408 ASSERT(!i.outLocal);
4409 if (i.outStack) {
4410 m_regMap.allocOutputRegs(i);
4411 PhysReg rdest = getReg(i.outStack->location);
4412 emitImmReg(a, 1, rdest);
4416 void
4417 TranslatorX64::translateFalse(const Tracelet& t,
4418 const NormalizedInstruction& i) {
4419 ASSERT(i.inputs.size() == 0);
4420 ASSERT(!i.outLocal);
4421 if (i.outStack) {
4422 m_regMap.allocOutputRegs(i);
4423 PhysReg dest = getReg(i.outStack->location);
4424 emitImmReg(a, false, dest);
4428 void
4429 TranslatorX64::translateInt(const Tracelet& t,
4430 const NormalizedInstruction& i) {
4431 ASSERT(i.inputs.size() == 0);
4432 ASSERT(!i.outLocal);
4433 if (i.outStack) {
4434 ASSERT(i.outStack->isInt());
4435 m_regMap.allocOutputRegs(i);
4436 PhysReg dest = getReg(i.outStack->location);
4437 uint64_t srcImm = i.imm[0].u_I64A;
4438 emitImmReg(a, srcImm, dest);
4442 void
4443 TranslatorX64::translateString(const Tracelet& t,
4444 const NormalizedInstruction& i) {
4445 ASSERT(i.inputs.size() == 0);
4446 ASSERT(!i.outLocal);
4447 if (!i.outStack) return;
4448 ASSERT(Translator::typeIsString(i.outStack->outerType()));
4449 m_regMap.allocOutputRegs(i);
4450 PhysReg dest = getReg(i.outStack->location);
4451 uint64_t srcImm = (uintptr_t)curUnit()->lookupLitstrId(i.imm[0].u_SA);
4452 // XXX: can simplify the lookup here by just fishing it out of the
4453 // output's valueString().
4454 // We are guaranteed that the string is static, so we do not need to
4455 // increment the refcount
4456 ASSERT(((StringData*)srcImm)->isStatic());
4457 SKTRACE(2, i.source, "Litstr %d -> %p \"%s\"\n",
4458 i.imm[0].u_SA, (StringData*)srcImm,
4459 Util::escapeStringForCPP(((StringData*)srcImm)->data()).c_str());
4460 emitImmReg(a, srcImm, dest);
4463 void
4464 TranslatorX64::translateArray(const Tracelet& t,
4465 const NormalizedInstruction& i) {
4466 ASSERT(i.inputs.size() == 0);
4467 ASSERT(!i.outLocal);
4468 if (i.outStack) {
4469 ASSERT(i.outStack->outerType() == KindOfArray);
4470 m_regMap.allocOutputRegs(i);
4471 ArrayData* ad = curUnit()->lookupArrayId(i.imm[0].u_AA);
4472 PhysReg r = getReg(i.outStack->location);
4473 emitImmReg(a, uint64(ad), r);
4474 // We are guaranteed that the array is static, so we do not need to
4475 // increment the refcount
4476 ASSERT(ad->isStatic());
4480 ArrayData*
4481 HOT_FUNC_VM
4482 newArrayHelper(int capacity) {
4483 ArrayData *a = NEW(HphpArray)(capacity);
4484 a->incRefCount();
4485 TRACE(2, "newArrayHelper: capacity %d\n", capacity);
4486 return a;
4489 void
4490 TranslatorX64::translateNewArray(const Tracelet& t,
4491 const NormalizedInstruction& i) {
4492 ASSERT(i.inputs.size() == 0);
4493 ASSERT(i.outStack && !i.outLocal);
4494 ASSERT(i.outStack->outerType() == KindOfArray);
4495 int capacity = i.imm[0].u_IVA;
4496 if (capacity == 0) {
4497 m_regMap.allocOutputRegs(i);
4498 PhysReg r = getReg(i.outStack->location);
4499 emitImmReg(a, uint64(HphpArray::GetStaticEmptyArray()), r);
4500 // We are guaranteed that the new array is static, so we do not need to
4501 // increment the refcount
4502 ASSERT(HphpArray::GetStaticEmptyArray()->isStatic());
4503 } else {
4504 // create an empty array with a nonzero capacity
4505 if (false) {
4506 ArrayData* a = newArrayHelper(42);
4507 printf("%p", a); // use ret
4509 EMIT_CALL(a, newArrayHelper, IMM(capacity));
4510 m_regMap.bind(rax, i.outStack->location, KindOfArray, RegInfo::DIRTY);
4514 void TranslatorX64::analyzeNewTuple(Tracelet& t, NormalizedInstruction& i) {
4515 i.m_txFlags = Simple; // the array constructors are not re-entrant.
4516 i.manuallyAllocInputs = true; // all values passed via stack.
4519 ArrayData* newTupleHelper(int n, TypedValue* values) {
4520 HphpArray* a = NEW(HphpArray)(n, values);
4521 a->incRefCount();
4522 TRACE(2, "newTupleHelper: size %d\n", n);
4523 return a;
4526 void TranslatorX64::translateNewTuple(const Tracelet& t,
4527 const NormalizedInstruction& i) {
4528 int arity = i.imm[0].u_IVA;
4529 ASSERT(arity > 0 && i.inputs.size() == unsigned(arity));
4530 ASSERT(i.outStack && !i.outLocal);
4531 for (int j = 0; j < arity; j++) {
4532 ASSERT(i.inputs[j]->outerType() != KindOfRef);
4533 ASSERT(i.inputs[j]->isStack());
4536 // We pass the values by address, so we need to sync them back to memory
4537 for (int j = 0; j < arity; j++) {
4538 m_regMap.cleanLoc(i.inputs[j]->location);
4540 if (false) {
4541 TypedValue* rhs = 0;
4542 ArrayData* ret = newTupleHelper(arity, rhs);
4543 printf("%p", ret); // use ret
4545 EMIT_CALL(a, newTupleHelper, IMM(arity), A(i.inputs[0]->location));
4546 // newTupleHelper returns the up-to-date array pointer in rax.
4547 // Therefore, we can bind rax to the result location and mark it as dirty.
4548 m_regMap.bind(rax, i.inputs[arity-1]->location, KindOfArray, RegInfo::DIRTY);
4551 void
4552 TranslatorX64::analyzeNop(Tracelet& t, NormalizedInstruction& i) {
4553 i.m_txFlags = Native;
4556 void
4557 TranslatorX64::translateNop(const Tracelet& t,
4558 const NormalizedInstruction& i) {
4561 void
4562 TranslatorX64::analyzeAddElemC(Tracelet& t, NormalizedInstruction& i) {
4563 i.m_txFlags = supportedPlan(i.inputs[2]->outerType() == KindOfArray &&
4564 (i.inputs[1]->isInt() ||
4565 i.inputs[1]->isString()));
4568 void
4569 TranslatorX64::translateAddElemC(const Tracelet& t,
4570 const NormalizedInstruction& i) {
4571 ASSERT(i.outStack && !i.outLocal);
4572 ASSERT(i.inputs.size() >= 3);
4573 const DynLocation& arr = *i.inputs[2];
4574 const DynLocation& key = *i.inputs[1];
4575 const DynLocation& val = *i.inputs[0];
4576 ASSERT(!arr.isVariant()); // not handling variants.
4577 ASSERT(!key.isVariant());
4578 ASSERT(!val.isVariant());
4580 const Location& arrLoc = arr.location;
4581 const Location& keyLoc = key.location;
4582 const Location& valLoc = val.location;
4584 ASSERT(arrLoc.isStack());
4585 ASSERT(keyLoc.isStack());
4586 ASSERT(arrLoc.isStack());
4588 // If either the key or the rhs is not Int64, we will need to pass the
4589 // rhs by address, so we need to sync it back to memory
4590 if (!key.rtt.isInt() || !val.rtt.isInt()) {
4591 m_regMap.cleanLoc(valLoc);
4594 // The array_setm helpers will decRef any old value that is
4595 // overwritten if appropriate. If copy-on-write occurs, it will also
4596 // incRef the new array and decRef the old array for us. Finally,
4597 // some of the array_setm helpers will decRef the key if it is a
4598 // string (for cases where the key is not a local), while others do
4599 // not (for cases where the key is a local).
4600 void* fptr;
4601 if (key.rtt.isInt() && val.rtt.isInt()) {
4602 if (false) { // type-check
4603 TypedValue* cell = NULL;
4604 ArrayData* arr = NULL;
4605 ArrayData* ret = array_setm_ik1_iv(cell, arr, 12, 3);
4606 printf("%p", ret); // use ret
4608 // If the rhs is Int64, we can use a specialized helper
4609 EMIT_CALL(a, array_setm_ik1_iv,
4610 IMM(0),
4611 V(arrLoc),
4612 V(keyLoc),
4613 V(valLoc));
4614 recordReentrantCall(i);
4615 } else if (key.rtt.isInt() || key.rtt.isString()) {
4616 if (false) { // type-check
4617 TypedValue* cell = NULL;
4618 TypedValue* rhs = NULL;
4619 StringData* strkey = NULL;
4620 ArrayData* arr = NULL;
4621 ArrayData* ret;
4622 ret = array_setm_ik1_v0(cell, arr, 12, rhs);
4623 printf("%p", ret); // use ret
4624 ret = array_setm_sk1_v0(cell, arr, strkey, rhs);
4625 printf("%p", ret); // use ret
4627 // Otherwise, we pass the rhs by address
4628 fptr = key.rtt.isString() ? (void*)array_setm_sk1_v0 :
4629 (void*)array_setm_ik1_v0;
4630 EMIT_CALL(a, fptr,
4631 IMM(0),
4632 V(arrLoc),
4633 V(keyLoc),
4634 A(valLoc));
4635 recordReentrantCall(i);
4636 } else {
4637 ASSERT(false);
4639 // The array value may have changed, so we need to invalidate any
4640 // register we have associated with arrLoc
4641 m_regMap.invalidate(arrLoc);
4642 // The array_setm helper returns the up-to-date array pointer in rax.
4643 // Therefore, we can bind rax to arrLoc and mark it as dirty.
4644 m_regMap.bind(rax, arrLoc, KindOfArray, RegInfo::DIRTY);
4647 void
4648 TranslatorX64::analyzeAddNewElemC(Tracelet& t, NormalizedInstruction& i) {
4649 ASSERT(i.inputs.size() == 2);
4650 i.m_txFlags = supportedPlan(i.inputs[1]->outerType() == KindOfArray);
4653 void
4654 TranslatorX64::translateAddNewElemC(const Tracelet& t,
4655 const NormalizedInstruction& i) {
4656 ASSERT(i.inputs.size() == 2);
4657 ASSERT(i.outStack && !i.outLocal);
4658 ASSERT(i.inputs[0]->outerType() != KindOfRef);
4659 ASSERT(i.inputs[1]->outerType() != KindOfRef);
4660 ASSERT(i.inputs[0]->isStack());
4661 ASSERT(i.inputs[1]->isStack());
4663 Location arrLoc = i.inputs[1]->location;
4664 Location valLoc = i.inputs[0]->location;
4666 // We pass the rhs by address, so we need to sync it back to memory
4667 m_regMap.cleanLoc(valLoc);
4669 // The array_setm helpers will decRef any old value that is
4670 // overwritten if appropriate. If copy-on-write occurs, it will also
4671 // incRef the new array and decRef the old array for us. Finally,
4672 // some of the array_setm helpers will decRef the key if it is a
4673 // string (for cases where the key is not a local), while others do
4674 // not (for cases where the key is a local).
4675 if (false) { // type-check
4676 TypedValue* cell = NULL;
4677 TypedValue* rhs = NULL;
4678 ArrayData* arr = NULL;
4679 ArrayData* ret;
4680 ret = array_setm_wk1_v0(cell, arr, rhs);
4681 printf("%p", ret); // use ret
4683 EMIT_CALL(a, array_setm_wk1_v0,
4684 IMM(0),
4685 V(arrLoc),
4686 A(valLoc));
4687 recordReentrantCall(i);
4688 // The array value may have changed, so we need to invalidate any
4689 // register we have associated with arrLoc
4690 m_regMap.invalidate(arrLoc);
4691 // The array_setm helper returns the up-to-date array pointer in rax.
4692 // Therefore, we can bind rax to arrLoc and mark it as dirty.
4693 m_regMap.bind(rax, arrLoc, KindOfArray, RegInfo::DIRTY);
4696 static void undefCns(const StringData* nm) {
4697 VMRegAnchor _;
4698 TypedValue *cns = g_vmContext->getCns(const_cast<StringData*>(nm));
4699 if (!cns) {
4700 raise_notice(Strings::UNDEFINED_CONSTANT, nm->data(), nm->data());
4701 g_vmContext->getStack().pushStringNoRc(const_cast<StringData*>(nm));
4702 } else {
4703 Cell* c1 = g_vmContext->getStack().allocC();
4704 TV_READ_CELL(cns, c1);
4708 void TranslatorX64::emitSideExit(Asm& a, const NormalizedInstruction& i,
4709 bool next) {
4710 const NormalizedInstruction& dest = next ? *i.next : i;
4712 SKTRACE(3, i.source, "sideexit check %p\n", a.code.frontier);
4713 // NB: if next == true, we are assuming here that stack elements
4714 // spit out by this instruction are already clean and sync'd back to
4715 // the top slot of the stack.
4716 m_regMap.scrubStackEntries(dest.stackOff);
4717 m_regMap.cleanAll();
4718 emitRB(a, RBTypeSideExit, i.source);
4719 int stackDisp = dest.stackOff;
4720 if (stackDisp != 0) {
4721 SKTRACE(3, i.source, "stack bump %d => %x\n", stackDisp,
4722 -cellsToBytes(stackDisp));
4723 a. add_imm32_reg64(-cellsToBytes(stackDisp), rVmSp);
4725 emitBindJmp(a, dest.source, REQ_BIND_SIDE_EXIT);
4728 void
4729 TranslatorX64::translateCns(const Tracelet& t,
4730 const NormalizedInstruction& i) {
4731 ASSERT(i.inputs.size() == 0);
4732 ASSERT(i.outStack && !i.outLocal);
4734 // OK to burn "name" into TC: it was merged into the static string
4735 // table, so as long as this code is reachable, so shoud the string
4736 // be.
4737 DataType outType = i.outStack->valueType();
4738 StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4739 const TypedValue* tv = g_vmContext->getCns(name, true, false);
4740 bool checkDefined = false;
4741 if (outType != KindOfInvalid && tv == NULL &&
4742 !RuntimeOption::RepoAuthoritative) {
4743 PreConstDepMap::accessor acc;
4744 tv = findUniquePreConst(acc, name);
4745 if (tv != NULL) {
4746 checkDefined = true;
4747 acc->second.srcKeys.insert(t.m_sk);
4748 Stats::emitInc(a, Stats::Tx64_CnsFast);
4749 } else {
4750 // We had a unique value while analyzing but don't anymore. This
4751 // should be rare so just punt to keep things simple.
4752 punt();
4755 using namespace TargetCache;
4756 if (tv && tvIsStatic(tv)) {
4757 m_regMap.allocOutputRegs(i);
4758 if (checkDefined) {
4759 size_t bit = allocCnsBit(name);
4760 uint32 mask;
4761 CacheHandle ch = bitOffToHandleAndMask(bit, mask);
4762 // The 'test' instruction takes a signed immediate and the mask is
4763 // unsigned, but everything works out okay because the immediate is
4764 // the same size as the other operand. However, we have to sign-extend
4765 // the mask to 64 bits to make the assembler happy.
4766 int64_t imm = (int64_t)(int32)mask;
4767 a.test_imm32_disp_reg32(imm, ch, rVmTl);
4769 // If we get to the optimistic translation and the constant
4770 // isn't defined, our tracelet is ruined because the type may
4771 // not be what we expect. If we were expecting KindOfString we
4772 // could theoretically keep going here since that's the type
4773 // of an undefined constant expression, but it should be rare
4774 // enough that it's not worth the complexity.
4775 UnlikelyIfBlock<CC_Z> ifZero(a, astubs);
4776 Stats::emitInc(astubs, Stats::Tx64_CnsFast, -1);
4777 emitSideExit(astubs, i, false);
4780 // Its type and value are known at compile-time.
4781 ASSERT(tv->m_type == outType ||
4782 (IS_STRING_TYPE(tv->m_type) && IS_STRING_TYPE(outType)));
4783 PhysReg r = getReg(i.outStack->location);
4784 a. mov_imm64_reg(tv->m_data.num, r);
4785 // tv is static; no need to incref
4786 return;
4789 Stats::emitInc(a, Stats::Tx64_CnsSlow);
4790 CacheHandle ch = allocConstant(name);
4791 TRACE(2, "Cns: %s -> ch %ld\n", name->data(), ch);
4792 // Load the constant out of the thread-private tl_targetCaches.
4793 ScratchReg cns(m_regMap);
4794 a. lea_reg64_disp_reg64(rVmTl, ch, *cns);
4795 a. cmp_imm32_disp_reg32(0, TVOFF(m_type), *cns);
4796 DiamondReturn astubsRet;
4797 int stackDest = 0 - int(sizeof(Cell)); // popped - pushed
4799 // It's tempting to dedup these, but not obvious we really can;
4800 // at least stackDest and tmp are specific to the translation
4801 // context.
4802 UnlikelyIfBlock<CC_Z> ifb(a, astubs, &astubsRet);
4803 EMIT_CALL(astubs, undefCns, IMM((uintptr_t)name));
4804 recordReentrantStubCall(i);
4805 m_regMap.invalidate(i.outStack->location);
4808 // Bitwise copy to output area.
4809 emitCopyToStack(a, i, *cns, stackDest);
4810 m_regMap.invalidate(i.outStack->location);
4813 void
4814 TranslatorX64::analyzeDefCns(Tracelet& t,
4815 NormalizedInstruction& i) {
4816 StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4817 /* don't bother to translate if it names a builtin constant */
4818 i.m_txFlags = supportedPlan(!g_vmContext->getCns(name, true, false));
4821 typedef void (*defCnsHelper_func_t)(TargetCache::CacheHandle ch, Variant *inout,
4822 StringData *name, size_t bit);
4823 template<bool setBit>
4824 static void defCnsHelper(TargetCache::CacheHandle ch, Variant *inout,
4825 StringData *name, size_t bit) {
4826 using namespace TargetCache;
4827 TypedValue *tv = (TypedValue*)handleToPtr(ch);
4828 if (LIKELY(tv->m_type == KindOfUninit &&
4829 inout->isAllowedAsConstantValue())) {
4830 inout->setEvalScalar();
4831 if (LIKELY(g_vmContext->insertCns(name, (TypedValue*)inout))) {
4832 tvDup((TypedValue*)inout, tv);
4833 *inout = true;
4834 if (setBit) {
4835 DEBUG_ONLY bool alreadyDefined = testAndSetBit(bit);
4836 ASSERT(!alreadyDefined);
4838 return;
4840 tv = (TypedValue*)&false_varNR;
4843 if (tv->m_type != KindOfUninit) {
4844 raise_warning(Strings::CONSTANT_ALREADY_DEFINED, name->data());
4845 } else {
4846 ASSERT(!inout->isAllowedAsConstantValue());
4847 raise_warning(Strings::CONSTANTS_MUST_BE_SCALAR);
4849 *inout = false;
4852 void
4853 TranslatorX64::translateDefCns(const Tracelet& t,
4854 const NormalizedInstruction& i) {
4855 StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4857 if (false) {
4858 TargetCache::CacheHandle ch = 0;
4859 size_t bit = 0;
4860 Variant *inout = 0;
4861 StringData *name = 0;
4862 defCnsHelper<true>(ch, inout, name, bit);
4863 defCnsHelper<false>(ch, inout, name, bit);
4866 using namespace TargetCache;
4867 CacheHandle ch = allocConstant(name);
4868 TRACE(2, "DefCns: %s -> ch %ld\n", name->data(), ch);
4870 m_regMap.cleanLoc(i.inputs[0]->location);
4871 if (RuntimeOption::RepoAuthoritative) {
4872 EMIT_CALL(a, (defCnsHelper_func_t)defCnsHelper<false>,
4873 IMM(ch), A(i.inputs[0]->location),
4874 IMM((uint64)name));
4875 } else {
4876 EMIT_CALL(a, (defCnsHelper_func_t)defCnsHelper<true>,
4877 IMM(ch), A(i.inputs[0]->location),
4878 IMM((uint64)name), IMM(allocCnsBit(name)));
4880 recordReentrantCall(i);
4881 m_regMap.invalidate(i.outStack->location);
4884 void
4885 TranslatorX64::translateClsCnsD(const Tracelet& t,
4886 const NormalizedInstruction& i) {
4887 using namespace TargetCache;
4888 const NamedEntityPair& namedEntityPair =
4889 curUnit()->lookupNamedEntityPairId(i.imm[1].u_SA);
4890 ASSERT(namedEntityPair.second);
4891 const StringData *clsName = namedEntityPair.first;
4892 ASSERT(clsName->isStatic());
4893 StringData* cnsName = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4894 ASSERT(cnsName->isStatic());
4895 StringData* fullName = StringData::GetStaticString(
4896 Util::toLower(clsName->data()) + "::" + cnsName->data());
4898 Stats::emitInc(a, Stats::TgtCache_ClsCnsHit);
4899 CacheHandle ch = allocClassConstant(fullName);
4900 ScratchReg cns(m_regMap);
4901 a.lea_reg64_disp_reg64(rVmTl, ch, *cns);
4902 a.cmp_imm32_disp_reg32(0, TVOFF(m_type), *cns);
4904 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
4906 if (false) { // typecheck
4907 TypedValue* tv = NULL;
4908 UNUSED TypedValue* ret =
4909 TargetCache::lookupClassConstant(tv, namedEntityPair.second,
4910 namedEntityPair.first, cnsName);
4913 EMIT_CALL(astubs, TCA(TargetCache::lookupClassConstant),
4914 R(*cns),
4915 IMM(uintptr_t(namedEntityPair.second)),
4916 IMM(uintptr_t(namedEntityPair.first)),
4917 IMM(uintptr_t(cnsName)));
4918 recordReentrantStubCall(i);
4919 // DiamondGuard will restore cns's SCRATCH state but not its
4920 // contents. lookupClassConstant returns the value we want.
4921 emitMovRegReg(astubs, rax, *cns);
4923 int stackDest = 0 - int(sizeof(Cell)); // 0 popped - 1 pushed
4924 emitCopyToStack(a, i, *cns, stackDest);
4927 void
4928 TranslatorX64::analyzeConcat(Tracelet& t, NormalizedInstruction& i) {
4929 ASSERT(i.inputs.size() == 2);
4930 const RuntimeType& r = i.inputs[0]->rtt;
4931 const RuntimeType& l = i.inputs[1]->rtt;
4932 // The concat translation isn't reentrant; objects that override
4933 // __toString() can cause reentry.
4934 i.m_txFlags = simplePlan(r.valueType() != KindOfObject &&
4935 l.valueType() != KindOfObject);
4938 void
4939 TranslatorX64::translateConcat(const Tracelet& t,
4940 const NormalizedInstruction& i) {
4941 ASSERT(i.inputs.size() == 2);
4942 const DynLocation& r = *i.inputs[0];
4943 const DynLocation& l = *i.inputs[1];
4944 // We have specialized helpers for concatenating two strings, a
4945 // string and an int, and an int an a string.
4946 void* fptr = NULL;
4947 if (l.rtt.isString() && r.rtt.isString()) {
4948 fptr = (void*)concat_ss;
4949 } else if (l.rtt.isString() && r.rtt.isInt()) {
4950 fptr = (void*)concat_si;
4951 } else if (l.rtt.isInt() && r.rtt.isString()) {
4952 fptr = (void*)concat_is;
4954 if (fptr) {
4955 // If we have a specialized helper, use it
4956 if (false) { // type check
4957 StringData* v1 = NULL;
4958 StringData* v2 = NULL;
4959 StringData* retval = concat_ss(v1, v2);
4960 printf("%p", retval); // use retval
4963 // The concat helper will decRef the inputs and incRef the output
4964 // for us if appropriate
4965 EMIT_CALL(a, fptr,
4966 V(l.location),
4967 V(r.location));
4968 ASSERT(i.outStack->rtt.isString());
4969 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4970 RegInfo::DIRTY);
4972 } else {
4973 // Otherwise, use the generic concat helper
4974 if (false) { // type check
4975 uint64_t v1 = 0, v2 = 0;
4976 DataType t1 = KindOfUninit, t2 = KindOfUninit;
4977 StringData *retval = concat(t1, v1, t2, v2);
4978 printf("%p", retval); // use retval
4980 // concat will decRef the two inputs and incRef the output
4981 // for us if appropriate
4982 EMIT_CALL(a, concat,
4983 IMM(l.valueType()), V(l.location),
4984 IMM(r.valueType()), V(r.location));
4985 ASSERT(i.outStack->isString());
4986 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4987 RegInfo::DIRTY);
4991 TXFlags
4992 planInstrAdd_Int(const NormalizedInstruction& i) {
4993 ASSERT(i.inputs.size() == 2);
4994 return nativePlan(i.inputs[0]->isInt() && i.inputs[1]->isInt());
4997 TXFlags
4998 planInstrAdd_Array(const NormalizedInstruction& i) {
4999 ASSERT(i.inputs.size() == 2);
5000 return supportedPlan(i.inputs[0]->valueType() == KindOfArray &&
5001 i.inputs[1]->valueType() == KindOfArray);
5004 void
5005 TranslatorX64::analyzeAdd(Tracelet& t, NormalizedInstruction& i) {
5006 i.m_txFlags = TXFlags(planInstrAdd_Int(i) | planInstrAdd_Array(i));
5009 void
5010 TranslatorX64::translateAdd(const Tracelet& t,
5011 const NormalizedInstruction& i) {
5012 ASSERT(i.inputs.size() == 2);
5014 if (planInstrAdd_Array(i)) {
5015 // Handle adding two arrays
5016 ASSERT(i.outStack->outerType() == KindOfArray);
5017 if (false) { // type check
5018 ArrayData* v = NULL;
5019 v = array_add(v, v);
5021 // The array_add helper will decRef the inputs and incRef the output
5022 // for us if appropriate
5023 EMIT_CALL(a, array_add,
5024 V(i.inputs[1]->location),
5025 V(i.inputs[0]->location));
5026 recordReentrantCall(i);
5027 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5028 RegInfo::DIRTY);
5029 return;
5032 ASSERT(planInstrAdd_Int(i));
5033 binaryArithCell(i, OpAdd, *i.inputs[0], *i.outStack);
5036 void
5037 TranslatorX64::analyzeXor(Tracelet& t, NormalizedInstruction& i) {
5038 i.m_txFlags = nativePlan((i.inputs[0]->outerType() == KindOfBoolean ||
5039 i.inputs[0]->isInt()) &&
5040 (i.inputs[1]->outerType() == KindOfBoolean ||
5041 i.inputs[1]->isInt()));
5044 static inline void
5045 emitIntToCCBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch,
5046 int CC) {
5048 * test %srcdest, %srcdest
5049 * set<CC> %scratchL
5050 * movzbq %scratchL, %srcdest
5052 a. test_reg64_reg64(srcdest, srcdest);
5053 a. setcc (CC, scratch);
5054 a. mov_reg8_reg64_unsigned(scratch, srcdest);
5057 static inline void
5058 emitIntToBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch) {
5059 emitIntToCCBool(a, srcdest, scratch, CC_NZ);
5062 static inline void
5063 emitIntToNegBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch) {
5064 emitIntToCCBool(a, srcdest, scratch, CC_Z);
5067 void
5068 TranslatorX64::translateXor(const Tracelet& t,
5069 const NormalizedInstruction& i) {
5070 PhysReg src, srcdest;
5071 getBinaryStackInputs(m_regMap, i, src, srcdest);
5072 m_regMap.allocOutputRegs(i);
5073 ScratchReg scr(m_regMap);
5074 if (i.inputs[0]->isInt()) {
5075 emitIntToBool(a, src, *scr);
5077 if (i.inputs[1]->isInt()) {
5078 emitIntToBool(a, srcdest, *scr);
5080 a. xor_reg64_reg64(src, srcdest);
5083 void
5084 TranslatorX64::analyzeNot(Tracelet& t, NormalizedInstruction& i) {
5085 ASSERT(i.inputs.size() == 1);
5086 i.m_txFlags = nativePlan(i.inputs[0]->isInt() ||
5087 i.inputs[0]->outerType() == KindOfBoolean);
5090 void
5091 TranslatorX64::translateNot(const Tracelet& t,
5092 const NormalizedInstruction& i) {
5093 ASSERT(i.isNative());
5094 ASSERT(i.outStack && !i.outLocal);
5095 ASSERT(!i.inputs[0]->isVariant());
5096 m_regMap.allocOutputRegs(i);
5097 PhysReg srcdest = m_regMap.getReg(i.outStack->location);
5098 ScratchReg scr(m_regMap);
5099 emitIntToNegBool(a, srcdest, *scr);
5102 void
5103 TranslatorX64::analyzeBitNot(Tracelet& t, NormalizedInstruction& i) {
5104 i.m_txFlags = nativePlan(i.inputs[0]->isInt());
5107 void
5108 TranslatorX64::translateBitNot(const Tracelet& t,
5109 const NormalizedInstruction& i) {
5110 ASSERT(i.outStack && !i.outLocal);
5111 m_regMap.allocOutputRegs(i);
5112 PhysReg srcdest = m_regMap.getReg(i.outStack->location);
5113 a. not_reg64(srcdest);
5116 void
5117 TranslatorX64::analyzeCastInt(Tracelet& t, NormalizedInstruction& i) {
5118 i.m_txFlags = nativePlan(i.inputs[0]->isInt());
5121 void
5122 TranslatorX64::translateCastInt(const Tracelet& t,
5123 const NormalizedInstruction& i) {
5124 ASSERT(i.inputs.size() == 1);
5125 ASSERT(i.outStack && !i.outLocal);
5127 /* nop */
5130 void
5131 TranslatorX64::analyzeCastString(Tracelet& t, NormalizedInstruction& i) {
5132 i.m_txFlags =
5133 i.inputs[0]->isArray() || i.inputs[0]->isObject() ? Supported :
5134 i.inputs[0]->isInt() ? Simple :
5135 Native;
5136 i.funcd = NULL;
5139 static void toStringError(StringData *cls) {
5140 raise_error("Method __toString() must return a string value");
5143 static const StringData* stringDataFromInt(int64 n) {
5144 StringData* s = buildStringData(n);
5145 s->incRefCount();
5146 return s;
5149 static const StringData* stringDataFromDouble(int64 n) {
5150 StringData* s = buildStringData(*(double*)&n);
5151 s->incRefCount();
5152 return s;
5155 void TranslatorX64::toStringHelper(ObjectData *obj) {
5156 // caller must set r15 to the new ActRec
5157 static_assert(rStashedAR == r15 &&
5158 rVmFp == rbp,
5159 "toStringHelper needs to be updated for ABI changes");
5160 register ActRec *ar asm("r15");
5161 register ActRec *rbp asm("rbp");
5163 const Class* cls = obj->getVMClass();
5164 const Func* toString = cls->getToString();
5165 if (!toString) {
5166 // the unwinder will restore rVmSp to
5167 // &ar->m_r, so we'd better make sure its
5168 // got a valid TypedValue there.
5169 TV_WRITE_UNINIT(&ar->m_r);
5170 std::string msg = cls->preClass()->name()->data();
5171 msg += "::__toString() was not defined";
5172 throw BadTypeConversionException(msg.c_str());
5174 // ar->m_savedRbp set by caller
5175 ar->m_savedRip = rbp->m_savedRip;
5176 ar->m_func = toString;
5177 // ar->m_soff set by caller
5178 ar->initNumArgs(0);
5179 ar->setThis(obj);
5180 ar->setVarEnv(0);
5181 // Point the return address of this C++ function at the prolog to
5182 // execute.
5183 rbp->m_savedRip = (uint64_t)toString->getPrologue(0);
5186 void
5187 TranslatorX64::translateCastString(const Tracelet& t,
5188 const NormalizedInstruction& i) {
5189 ASSERT(i.inputs.size() == 1);
5190 ASSERT(i.outStack && !i.outLocal);
5192 if (i.inputs[0]->isNull()) {
5193 m_regMap.allocOutputRegs(i);
5194 PhysReg dest = m_regMap.getReg(i.outStack->location);
5195 a. mov_imm64_reg((uint64)empty_string.get(), dest);
5196 } else if (i.inputs[0]->isBoolean()) {
5197 static StringData* s_1 = StringData::GetStaticString("1");
5198 m_regMap.allocOutputRegs(i);
5199 PhysReg dest = m_regMap.getReg(i.outStack->location);
5200 a. cmp_imm32_reg64(0, dest);
5201 a. mov_imm64_reg((uint64)empty_string.get(), dest);
5202 ScratchReg scratch(m_regMap);
5203 a. mov_imm64_reg((intptr_t)s_1, *scratch);
5204 a. cmov_reg64_reg64(CC_NZ, *scratch, dest);
5205 } else if (i.inputs[0]->isInt()) {
5206 EMIT_CALL(a, stringDataFromInt, V(i.inputs[0]->location));
5207 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5208 RegInfo::DIRTY);
5209 } else if (i.inputs[0]->isDouble()) {
5210 EMIT_CALL(a, stringDataFromDouble, V(i.inputs[0]->location));
5211 m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5212 RegInfo::DIRTY);
5213 } else if (i.inputs[0]->isString()) {
5214 // nop
5215 } else if (i.inputs[0]->isArray()) {
5216 static StringData* s_array = StringData::GetStaticString("Array");
5217 m_regMap.allocOutputRegs(i);
5218 PhysReg dest = m_regMap.getReg(i.outStack->location);
5219 emitDecRef(i, dest, KindOfArray);
5220 a. mov_imm64_reg((uint64)s_array, dest);
5221 } else if (i.inputs[0]->isObject()) {
5222 m_regMap.scrubStackEntries(i.stackOff - 1);
5223 m_regMap.cleanAll();
5224 int delta = i.stackOff + kNumActRecCells - 1;
5225 if (delta) {
5226 a. add_imm64_reg64(-cellsToBytes(delta), rVmSp);
5228 a. store_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmSp);
5229 a. store_imm32_disp_reg(nextSrcKey(t, i).offset() - curFunc()->base(),
5230 AROFF(m_soff), rVmSp);
5231 PhysReg obj = m_regMap.getReg(i.inputs[0]->location);
5232 if (obj != argNumToRegName[0]) {
5233 a. mov_reg64_reg64(obj, argNumToRegName[0]);
5235 m_regMap.smashRegs(kAllRegs);
5236 a. mov_reg64_reg64(rVmSp, rStashedAR);
5237 EMIT_CALL(a, TCA(toStringHelper));
5238 recordReentrantCall(i);
5239 if (i.stackOff != 0) {
5240 a. add_imm64_reg64(cellsToBytes(i.stackOff), rVmSp);
5243 PhysReg base;
5244 int disp;
5245 locToRegDisp(i.outStack->location, &base, &disp);
5246 ScratchReg scratch(m_regMap);
5247 emitStringCheck(a, base, disp + TVOFF(m_type), *scratch);
5249 UnlikelyIfBlock<CC_NZ> ifNotString(a, astubs);
5250 EMIT_CALL(astubs, toStringError, IMM(0));
5251 recordReentrantStubCall(i);
5253 } else {
5254 NOT_REACHED();
5258 void
5259 TranslatorX64::analyzePrint(Tracelet& t, NormalizedInstruction& i) {
5260 ASSERT(i.inputs.size() == 1);
5261 const RuntimeType& rtt = i.inputs[0]->rtt;
5262 DataType type = rtt.outerType();
5263 i.m_txFlags = simplePlan(
5264 type == KindOfUninit ||
5265 type == KindOfNull ||
5266 type == KindOfBoolean ||
5267 rtt.isInt() ||
5268 rtt.isString());
5271 void
5272 TranslatorX64::translatePrint(const Tracelet& t,
5273 const NormalizedInstruction& i) {
5274 const vector<DynLocation*>& inputs = i.inputs;
5275 ASSERT(inputs.size() == 1);
5276 ASSERT(!i.outLocal);
5277 ASSERT(!i.outStack || i.outStack->isInt());
5278 Location loc = inputs[0]->location;
5279 DataType type = inputs[0]->outerType();
5280 switch (type) {
5281 STRINGCASE(): EMIT_CALL(a, print_string, V(loc)); break;
5282 case KindOfInt64: EMIT_CALL(a, print_int, V(loc)); break;
5283 case KindOfBoolean: EMIT_CALL(a, print_boolean, V(loc)); break;
5284 NULLCASE(): /* do nothing */ break;
5285 default: {
5286 // Translation is only supported for Null, Boolean, Int, and String
5287 ASSERT(false);
5288 break;
5291 m_regMap.allocOutputRegs(i);
5292 if (i.outStack) {
5293 PhysReg outReg = getReg(i.outStack->location);
5294 emitImmReg(a, 1, outReg);
5298 void
5299 TranslatorX64::translateJmp(const Tracelet& t,
5300 const NormalizedInstruction& i) {
5301 ASSERT(!i.outStack && !i.outLocal);
5302 syncOutputs(t);
5304 // Check the surprise page on all backwards jumps
5305 if (i.imm[0].u_BA < 0 && !i.noSurprise) {
5306 if (trustSigSegv) {
5307 const uint64_t stackMask =
5308 ~(cellsToBytes(RuntimeOption::EvalVMStackElms) - 1);
5309 a.mov_reg64_reg64(rVmSp, rScratch);
5310 a.and_imm64_reg64(stackMask, rScratch);
5311 TCA surpriseLoad = a.code.frontier;
5312 a.load_reg64_disp_reg64(rScratch, 0, rScratch);
5314 if (!m_segvStubs.insert(SignalStubMap::value_type(surpriseLoad,
5315 astubs.code.frontier)))
5316 NOT_REACHED();
5318 * Note that it is safe not to register unwind information here,
5319 * because we just called syncOutputs so all registers are
5320 * already clean.
5322 astubs.call((TCA)&EventHook::CheckSurprise);
5323 recordStubCall(i);
5324 astubs.jmp(a.code.frontier);
5325 } else {
5326 emitTestSurpriseFlags();
5328 UnlikelyIfBlock<CC_NZ> ifSurprise(a, astubs);
5329 astubs.call((TCA)&EventHook::CheckSurprise);
5330 recordStubCall(i);
5334 SrcKey sk(curFunc(), i.offset() + i.imm[0].u_BA);
5335 emitBindJmp(sk);
5338 void
5339 TranslatorX64::analyzeSwitch(Tracelet& t,
5340 NormalizedInstruction& i) {
5341 RuntimeType& rtt = i.inputs[0]->rtt;
5342 ASSERT(rtt.outerType() != KindOfRef);
5343 switch (rtt.outerType()) {
5344 NULLCASE():
5345 case KindOfBoolean:
5346 case KindOfInt64:
5347 i.m_txFlags = Native;
5348 break;
5350 case KindOfDouble:
5351 i.m_txFlags = Simple;
5352 break;
5354 STRINGCASE():
5355 case KindOfObject:
5356 case KindOfArray:
5357 i.m_txFlags = Supported;
5358 break;
5360 default:
5361 not_reached();
5365 template <typename T>
5366 static int64 switchBoundsCheck(T v, int64 base, int64 nTargets) {
5367 // I'm relying on gcc to be smart enough to optimize away the next
5368 // two lines when T is int64.
5369 if (int64(v) == v) {
5370 int64 ival = v;
5371 if (ival >= base && ival < (base + nTargets)) {
5372 return ival - base;
5375 return nTargets + 1;
5378 static int64 switchDoubleHelper(int64 val, int64 base, int64 nTargets) {
5379 union {
5380 int64 intbits;
5381 double dblval;
5382 } u;
5383 u.intbits = val;
5384 return switchBoundsCheck(u.dblval, base, nTargets);
5387 static int64 switchStringHelper(StringData* s, int64 base, int64 nTargets) {
5388 int64 ival;
5389 double dval;
5390 switch (s->isNumericWithVal(ival, dval, 1)) {
5391 case KindOfNull:
5392 ival = switchBoundsCheck(0, base, nTargets);
5393 break;
5395 case KindOfDouble:
5396 ival = switchBoundsCheck(dval, base, nTargets);
5397 break;
5399 case KindOfInt64:
5400 ival = switchBoundsCheck(ival, base, nTargets);
5401 break;
5403 default:
5404 not_reached();
5406 if (s->decRefCount() == 0) {
5407 s->release();
5409 return ival;
5412 static int64 switchObjHelper(ObjectData* o, int64 base, int64 nTargets) {
5413 int64 ival = o->o_toInt64();
5414 if (o->decRefCount() == 0) {
5415 o->release();
5417 return switchBoundsCheck(ival, base, nTargets);
5420 void
5421 TranslatorX64::translateSwitch(const Tracelet& t,
5422 const NormalizedInstruction& i) {
5423 int64 base = i.imm[1].u_I64A;
5424 bool bounded = i.imm[2].u_IVA;
5425 const ImmVector& iv = i.immVec;
5426 int nTargets = bounded ? iv.size() - 2 : iv.size();
5427 int jmptabSize = nTargets;
5428 ASSERT(nTargets > 0);
5429 PhysReg valReg = getReg(i.inputs[0]->location);
5430 DataType inType = i.inputs[0]->outerType();
5431 ASSERT(IMPLIES(inType != KindOfInt64, bounded));
5432 ASSERT(IMPLIES(bounded, iv.size() > 2));
5433 syncOutputs(t); // this will mark valReg as FREE but it still has
5434 // its old value
5436 SrcKey defaultSk(curFunc(), i.offset() + iv.vec32()[iv.size() - 1]);
5437 SrcKey zeroSk(curFunc(), 0);
5438 if (0 >= base && 0 < (base + nTargets)) {
5439 zeroSk.m_offset = i.offset() + iv.vec32()[0 - base];
5440 } else {
5441 zeroSk.m_offset = defaultSk.m_offset;
5444 switch (i.inputs[0]->outerType()) {
5445 NULLCASE(): {
5446 emitBindJmp(zeroSk);
5447 return;
5450 case KindOfBoolean: {
5451 SrcKey nonzeroSk(curFunc(), i.offset() + iv.vec32()[iv.size() - 2]);
5452 a.test_reg64_reg64(valReg, valReg);
5453 emitCondJmp(nonzeroSk, zeroSk, CC_NZ);
5454 return;
5457 case KindOfInt64:
5458 // No special treatment needed
5459 break;
5461 case KindOfDouble:
5462 STRINGCASE():
5463 case KindOfObject: {
5464 // switch(Double|String|Obj)Helper do bounds-checking for us, so
5465 // we need to make sure the default case is in the jump table,
5466 // and don't emit our own bounds-checking code
5467 jmptabSize = iv.size();
5468 bounded = false;
5469 if (false) {
5470 StringData* s = NULL;
5471 ObjectData* o = NULL;
5472 switchDoubleHelper(0.0, 0, 0);
5473 switchStringHelper(s, 0, 0);
5474 switchObjHelper(o, 0, 0);
5476 EMIT_CALL(a,
5477 inType == KindOfDouble ? (TCA)switchDoubleHelper :
5478 (IS_STRING_TYPE(inType) ? (TCA)switchStringHelper :
5479 (TCA)switchObjHelper),
5480 R(valReg), IMM(base), IMM(nTargets));
5481 recordCall(i);
5482 valReg = rax;
5483 break;
5486 case KindOfArray:
5487 emitDecRef(a, i, valReg, KindOfArray);
5488 emitBindJmp(defaultSk);
5489 return;
5491 default:
5492 not_reached();
5495 if (bounded) {
5496 if (base) {
5497 a.sub_imm64_reg64(base, valReg);
5499 a.cmp_imm64_reg64(nTargets, valReg);
5500 prepareForSmash(a, kJmpccLen);
5501 TCA defaultStub =
5502 emitServiceReq(REQ_BIND_JMPCC_SECOND, 3,
5503 a.code.frontier, defaultSk.m_offset, CC_AE);
5504 // Unsigned comparison: check for < 0 and >= nTargets at the same time
5505 a.jae(defaultStub);
5508 TCA* jmptab = m_globalData.alloc<TCA>(sizeof(TCA), jmptabSize);
5509 TCA afterLea = a.code.frontier + kLeaRipLen;
5510 ptrdiff_t diff = (TCA)jmptab - afterLea;
5511 ASSERT(deltaFits(diff, sz::dword));
5512 a.lea_rip_disp_reg64(diff, rScratch);
5513 ASSERT(a.code.frontier == afterLea);
5514 a.jmp_reg64_index_displ(rScratch, valReg, 0);
5516 for (int idx = 0; idx < jmptabSize; ++idx) {
5517 SrcKey sk(curFunc(), i.offset() + iv.vec32()[idx]);
5518 jmptab[idx] = emitServiceReq(false, REQ_BIND_ADDR, 2ull,
5519 &jmptab[idx], uint64_t(sk.offset()));
5523 void
5524 TranslatorX64::analyzeRetC(Tracelet& t,
5525 NormalizedInstruction& i) {
5526 i.manuallyAllocInputs = true;
5527 i.m_txFlags = Supported;
5530 void
5531 TranslatorX64::analyzeRetV(Tracelet& t,
5532 NormalizedInstruction& i) {
5533 analyzeRetC(t, i);
5536 void TranslatorX64::emitReturnVal(
5537 Asm& a, const NormalizedInstruction& i,
5538 PhysReg dstBase, int dstOffset, PhysReg thisBase, int thisOffset,
5539 PhysReg scratch) {
5541 if (!i.grouped) return;
5542 TypedValue tv;
5543 TV_WRITE_UNINIT(&tv);
5544 tv.m_data.num = 0; // to keep the compiler happy
5547 * We suppressed the write of the (literal) return value
5548 * to the stack. Figure out what it was.
5550 NormalizedInstruction* prev = i.prev;
5551 ASSERT(!prev->outStack);
5552 switch (prev->op()) {
5553 case OpNull:
5554 tv.m_type = KindOfNull;
5555 break;
5556 case OpTrue:
5557 case OpFalse:
5558 tv.m_type = KindOfBoolean;
5559 tv.m_data.num = prev->op() == OpTrue;
5560 break;
5561 case OpInt:
5562 tv.m_type = KindOfInt64;
5563 tv.m_data.num = prev->imm[0].u_I64A;
5564 break;
5565 case OpDouble:
5566 tv.m_type = KindOfDouble;
5567 tv.m_data.dbl = prev->imm[0].u_DA;
5568 break;
5569 case OpString:
5570 tv.m_type = BitwiseKindOfString;
5571 tv.m_data.pstr = curUnit()->lookupLitstrId(prev->imm[0].u_SA);
5572 break;
5573 case OpArray:
5574 tv.m_type = KindOfArray;
5575 tv.m_data.parr = curUnit()->lookupArrayId(prev->imm[0].u_AA);
5576 break;
5577 case OpThis: {
5578 if (thisBase != dstBase || thisOffset != dstOffset) {
5579 a. load_reg64_disp_reg64(thisBase, thisOffset, scratch);
5580 a. store_reg64_disp_reg64(scratch, dstOffset, dstBase);
5582 emitStoreImm(a, KindOfObject,
5583 dstBase, dstOffset + TVOFF(m_type), sz::dword);
5584 return;
5586 case OpBareThis: {
5587 ASSERT(curFunc()->cls());
5588 a. mov_imm32_reg32(KindOfNull, scratch);
5589 a. test_imm64_disp_reg64(1, thisOffset, thisBase);
5591 JccBlock<CC_NZ> noThis(a);
5592 a. mov_imm32_reg32(KindOfObject, scratch);
5594 a. store_reg32_disp_reg64(scratch, dstOffset + TVOFF(m_type), dstBase);
5595 if (thisBase != dstBase || thisOffset != dstOffset) {
5596 a. load_reg64_disp_reg64(thisBase, thisOffset, scratch);
5597 a. store_reg64_disp_reg64(scratch, dstOffset, dstBase);
5599 return;
5601 default:
5602 not_reached();
5605 emitStoreImm(a, tv.m_type,
5606 dstBase, dstOffset + TVOFF(m_type), sz::dword);
5607 if (tv.m_type != KindOfNull) {
5608 emitStoreImm(a, tv.m_data.num,
5609 dstBase, dstOffset, sz::qword);
5614 // translateRetC --
5616 // Return to caller with the current activation record replaced with the
5617 // top-of-stack return value. Call with outputs sync'ed, so the code
5618 // we're emmitting runs "in between" basic blocks.
5619 void
5620 TranslatorX64::translateRetC(const Tracelet& t,
5621 const NormalizedInstruction& i) {
5622 if (i.skipSync) ASSERT(i.grouped);
5625 * This method chooses one of two ways to generate machine code for RetC
5626 * depending on whether we are generating a specialized return (where we
5627 * free the locals inline when possible) or a generic return (where we call
5628 * a helper function to free locals).
5630 * For the specialized return, we emit the following flow:
5632 * Check if varenv is NULL
5633 * If it's not NULL, branch to label 2
5634 * Free each local variable
5635 * 1:
5636 * Teleport the return value to appropriate memory location
5637 * Restore the old values for rVmFp and rVmSp, and
5638 * unconditionally transfer control back to the caller
5639 * 2:
5640 * Call the frame_free_locals helper
5641 * Jump to label 1
5643 * For a generic return, we emit the following flow:
5645 * Call the frame_free_locals helper
5646 * Teleport the return value to appropriate memory location
5647 * Restore the old values for rVmFp and rVmSp, and
5648 * unconditionally transfer control back to the caller
5651 int stackAdjustment = t.m_stackChange;
5652 if (i.skipSync) {
5653 SKTRACE(2, i.source, "i.skipSync\n");
5656 * getting here means there was nothing to do between
5657 * a previous reqXXX and this ret. Any spill code we generate
5658 * here would be broken (because the rbx is wrong), so
5659 * verify that we don't generate anything...
5661 TCA s DEBUG_ONLY = a.code.frontier;
5662 syncOutputs(0);
5663 ASSERT(s == a.code.frontier);
5664 stackAdjustment = 0;
5665 } else {
5667 * no need to syncOutputs here... we're going to update
5668 * rbx at the end of this function anyway, and we may want
5669 * to use enregistered locals on the fast path below
5671 m_regMap.scrubStackEntries(t.m_stackChange);
5672 m_regMap.cleanAll(); // TODO(#1339331): don't.
5675 bool noThis = !curFunc()->isPseudoMain() &&
5676 (!curFunc()->isMethod() || curFunc()->isStatic());
5677 bool mayUseVV = (curFunc()->attrs() & AttrMayUseVV);
5678 bool mergedThis = i.grouped && (i.prev->op() == OpThis ||
5679 i.prev->op() == OpBareThis);
5681 * figure out where to put the return value, and where to get it from
5683 ASSERT(i.stackOff == t.m_stackChange);
5684 const Location retValSrcLoc(Location::Stack, stackAdjustment - 1);
5686 const Func *callee = curFunc();
5687 ASSERT(callee);
5688 int nLocalCells =
5689 callee == NULL ? 0 : // This happens for returns from pseudo-main.
5690 callee->numSlotsInFrame();
5691 int retvalSrcBase = cellsToBytes(-stackAdjustment);
5693 ASSERT(cellsToBytes(locPhysicalOffset(retValSrcLoc)) == retvalSrcBase);
5696 * The (1 + nLocalCells) skips 1 slot for the return value.
5698 int retvalDestDisp = cellsToBytes(1 + nLocalCells - stackAdjustment) +
5699 AROFF(m_r);
5701 if (freeLocalsInline()) {
5702 SKTRACE(2, i.source, "emitting specialized inline return\n");
5704 // Emit specialized code inline to clean up the locals
5705 ASSERT(curFunc()->numLocals() == (int)i.inputs.size());
5707 ScratchReg rTmp(m_regMap);
5710 * If this function can possibly use variadic arguments or shared
5711 * variable environment, we need to check for it and go to a
5712 * generic return if so.
5714 boost::scoped_ptr<DiamondReturn> mayUseVVRet;
5715 if (mayUseVV) {
5716 SKTRACE(2, i.source, "emitting mayUseVV in UnlikelyIf\n");
5718 mayUseVVRet.reset(new DiamondReturn);
5719 a. load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rTmp);
5720 a. test_reg64_reg64(*rTmp, *rTmp);
5722 UnlikelyIfBlock<CC_NZ> varEnvCheck(a, astubs, mayUseVVRet.get());
5724 m_regMap.cleanAll();
5725 if (i.grouped) {
5726 ScratchReg s(m_regMap);
5727 emitReturnVal(astubs, i,
5728 rVmSp, retvalSrcBase, rVmFp, AROFF(m_this), *s);
5730 emitFrameRelease(astubs, i, noThis || mergedThis);
5734 for (unsigned int k = 0; k < i.inputs.size(); ++k) {
5735 // RetC's inputs should all be locals
5736 ASSERT(i.inputs[k]->location.space == Location::Local);
5737 DataType t = i.inputs[k]->outerType();
5738 if (IS_REFCOUNTED_TYPE(t)) {
5739 PhysReg reg = m_regMap.allocReg(i.inputs[k]->location, t,
5740 RegInfo::CLEAN);
5741 emitDecRef(i, reg, t);
5745 if (mergedThis) {
5746 // There is nothing to do, we're returning this,
5747 // but we didnt incRef it, so we dont have to
5748 // decRef here.
5749 } else {
5750 // If this is a instance method called on an object or if it is a
5751 // pseudomain, we need to decRef $this (if there is one)
5752 if (curFunc()->isMethod() && !curFunc()->isStatic()) {
5753 // This assert is weaker than it looks; it only checks the invocation
5754 // we happen to be translating for. The runtime "assert" is the
5755 // unconditional dereference of m_this we emit; if the frame has
5756 // neither this nor a class, then m_this will be null and we'll
5757 // SEGV.
5758 ASSERT(curFrame()->hasThis() || curFrame()->hasClass());
5759 // m_this and m_cls share a slot in the ActRec, so we check the
5760 // lowest bit (0 -> m_this, 1 -> m_cls)
5761 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rTmp);
5762 if (i.guardedThis) {
5763 emitDecRef(i, *rTmp, KindOfObject);
5764 } else {
5765 a. test_imm32_reg64(1, *rTmp);
5767 JccBlock<CC_NZ> ifZero(a);
5768 emitDecRef(i, *rTmp, KindOfObject); // this. decref it.
5771 } else if (curFunc()->isPseudoMain()) {
5772 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rTmp);
5773 a. shr_imm32_reg64(1, *rTmp); // sets c (from bit 0) and z
5774 FreezeRegs ice(m_regMap);
5776 // tests for Not Zero and Not Carry
5777 UnlikelyIfBlock<CC_NBE> ifRealThis(a, astubs);
5778 astubs. shl_imm32_reg64(1, *rTmp);
5779 emitDecRef(astubs, i, *rTmp, KindOfObject);
5784 // Register map is officially out of commission now.
5785 m_regMap.scrubLoc(retValSrcLoc);
5786 m_regMap.smashRegs(kAllRegs);
5788 emitTestSurpriseFlags();
5790 UnlikelyIfBlock<CC_NZ> ifTracer(a, astubs);
5791 if (i.grouped) {
5792 ScratchReg s(m_regMap);
5793 emitReturnVal(astubs, i,
5794 rVmSp, retvalSrcBase, rVmFp, AROFF(m_this), *s);
5796 astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5797 emitCall(astubs, (TCA)&EventHook::FunctionExit, true);
5798 recordReentrantStubCall(i);
5801 // The register map on the main line better be empty (everything
5802 // smashed) or some of the above DiamondReturns might generate
5803 // reconciliation code.
5804 ASSERT(m_regMap.empty());
5805 } else {
5806 SKTRACE(2, i.source, "emitting generic return\n");
5808 m_regMap.cleanAll();
5809 m_regMap.smashRegs(kAllRegs);
5810 if (i.grouped) {
5812 * What a pain: EventHook::onFunctionExit needs access
5813 * to the return value - so we have to write it to the
5814 * stack anyway. We still win for OpThis, and
5815 * OpBareThis, since we dont have to do any refCounting
5817 ScratchReg s(m_regMap);
5818 emitReturnVal(astubs, i,
5819 rVmSp, retvalSrcBase, rVmFp, AROFF(m_this), *s);
5821 // If we are doing the generic return flow, we emit a call to
5822 // frame_free_locals here
5823 ASSERT(i.inputs.size() == 0);
5824 emitFrameRelease(a, i, noThis || mergedThis);
5828 * We're officially between tracelets now, and the normal register
5829 * allocator is not being used.
5831 ASSERT(m_regMap.empty());
5832 RegSet scratchRegs = kScratchCrossTraceRegs;
5833 DumbScratchReg rRetAddr(scratchRegs);
5835 a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRip), *rRetAddr);
5836 a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
5839 * Having gotten everything we care about out of the current frame
5840 * pointer, smash the return address type and value over it. We don't
5841 * care about reference counts: as long as this runs to completion, we're
5842 * refcount-neutral.
5844 if (i.grouped) {
5845 DumbScratchReg s(scratchRegs);
5846 emitReturnVal(a, i, rVmSp, retvalDestDisp,
5847 rVmSp, retvalDestDisp - AROFF(m_r) + AROFF(m_this),
5848 *s);
5849 } else {
5850 ASSERT(sizeof(Cell) == 16);
5851 a. load_reg64_disp_reg64 (rVmSp, retvalSrcBase, rScratch);
5852 a. store_reg64_disp_reg64(rScratch, retvalDestDisp, rVmSp);
5853 a. load_reg64_disp_reg64 (rVmSp, retvalSrcBase + 8, rScratch);
5854 a. store_reg64_disp_reg64(rScratch, retvalDestDisp + 8, rVmSp);
5858 * Now update the principal hardware registers.
5860 * Stack pointer has to skip over all the locals as well as the
5861 * activation record.
5863 a. add_imm64_reg64(sizeof(ActRec) +
5864 cellsToBytes(nLocalCells - stackAdjustment), rVmSp);
5865 emitRB(a, RBTypeFuncExit, curFunc()->fullName()->data(), RegSet(*rRetAddr));
5866 a. jmp_reg (*rRetAddr);
5867 translator_not_reached(a);
5870 void
5871 TranslatorX64::translateRetV(const Tracelet& t,
5872 const NormalizedInstruction& i) {
5873 translateRetC(t, i);
5877 * NativeImpl is a special operation in the sense that it must be the
5878 * only opcode in a function body, and also functions as the return.
5880 * This function runs between tracelets and does not use m_regMap.
5882 void TranslatorX64::emitNativeImpl(const Func* func,
5883 bool emitSavedRIPReturn) {
5884 BuiltinFunction builtinFuncPtr = func->builtinFuncPtr();
5885 if (false) { // typecheck
5886 ActRec* ar = NULL;
5887 builtinFuncPtr(ar);
5890 TRACE(2, "calling builtin preClass %p func %p\n", func->preClass(),
5891 builtinFuncPtr);
5893 * Call the native implementation. This will free the locals for us in the
5894 * normal case. In the case where an exception is thrown, the VM unwinder
5895 * will handle it for us.
5897 a. mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5898 emitCall(a, (TCA)builtinFuncPtr, false /* smash regs */);
5901 * We're sometimes calling this while curFunc() isn't really the
5902 * builtin---make sure to properly record the sync point as if we
5903 * are inside the builtin.
5905 * The assumption here is that for builtins, the generated func
5906 * contains only a single opcode (NativeImpl), and there are no
5907 * non-argument locals.
5909 ASSERT(func->numIterators() == 0 && func->isBuiltin());
5910 ASSERT(func->numLocals() == func->numParams());
5911 ASSERT(*func->getEntry() == OpNativeImpl);
5912 ASSERT(instrLen(func->getEntry()) == func->past() - func->base());
5913 Offset pcOffset = 0; // NativeImpl is the only instruction in the func
5914 Offset stackOff = func->numLocals(); // Builtin stubs have no
5915 // non-arg locals
5916 recordSyncPoint(a, pcOffset, stackOff);
5918 RegSet unusedRegs = kScratchCrossTraceRegs;
5919 DumbScratchReg rRetAddr(unusedRegs);
5921 RegSet saveDuringEmitRB;
5922 if (emitSavedRIPReturn) {
5923 // Get the return address from the ActRec
5924 a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRip), *rRetAddr);
5925 saveDuringEmitRB |= RegSet(*rRetAddr);
5929 * The native implementation already put the return value on the
5930 * stack for us, and handled cleaning up the arguments. We have to
5931 * update the frame pointer and the stack pointer, and load the
5932 * return value into the return register so the trace we are
5933 * returning to has it where it expects.
5935 * TODO(#1273094): we should probably modify the actual builtins to
5936 * return values via registers (rax:edx) using the C ABI and do a
5937 * reg-to-reg move.
5939 int nLocalCells = func->numSlotsInFrame();
5940 a. add_imm64_reg64(sizeof(ActRec) + cellsToBytes(nLocalCells-1), rVmSp);
5941 a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
5943 emitRB(a, RBTypeFuncExit, func->fullName()->data(), saveDuringEmitRB);
5944 if (emitSavedRIPReturn) {
5945 a. jmp_reg (*rRetAddr);
5946 translator_not_reached(a);
5950 void
5951 TranslatorX64::translateNativeImpl(const Tracelet& t,
5952 const NormalizedInstruction& ni) {
5954 * We assume that NativeImpl is the only instruction in the trace,
5955 * and the only instruction for the implementation of the function.
5957 ASSERT(ni.stackOff == 0);
5958 ASSERT(m_regMap.empty());
5959 emitNativeImpl(curFunc(), true);
5962 // Warning: smashes rsi and rdi, and can't handle unclean registers.
5963 // Used between functions.
5964 void
5965 TranslatorX64::emitFrameRelease(X64Assembler& a,
5966 const NormalizedInstruction& i,
5967 bool noThis /*= false*/) {
5968 if (false) { // typecheck
5969 frame_free_locals(curFrame(), 0);
5971 a. mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5972 int numLocals = curFunc()->numLocals();
5973 emitImmReg(a, numLocals, argNumToRegName[1]);
5974 if (noThis) {
5975 emitCall(a, (TCA)frame_free_locals_no_this);
5976 } else {
5977 emitCall(a, (TCA)frame_free_locals);
5979 recordReentrantCall(a, i);
5982 // emitClsLocalIndex --
5983 // emitStringToClass --
5984 // emitStringToKnownClass --
5985 // emitObjToClass --
5986 // emitClsAndPals --
5987 // Helpers for AGetC/AGetL.
5989 const int kEmitClsLocalIdx = 0;
5992 * Determine if the class is defined, and fatal if not.
5993 * If reg is not noreg, return the Class* in it
5994 * If we can statically prove that the class is defined,
5995 * all checks are omitted (eg its a parent of the current,
5996 * fixed, context).
5998 void
5999 TranslatorX64::emitKnownClassCheck(const NormalizedInstruction& i,
6000 const StringData* clsName,
6001 register_name_t reg) {
6002 using namespace TargetCache;
6003 ASSERT(clsName);
6004 Class* klass = Unit::lookupClass(clsName);
6005 bool guarded = false;
6006 if (klass) {
6007 guarded = i.guardedCls;
6008 if (!guarded) {
6009 Class *ctx = curFunc()->cls();
6010 if (ctx && ctx->classof(klass)) {
6011 guarded = true;
6015 if (guarded) {
6016 if (reg != reg::noreg) {
6017 emitImmReg(a, (uint64_t)klass, reg);
6019 } else {
6020 Stats::emitInc(a, Stats::TgtCache_KnownClsHit);
6021 CacheHandle ch = allocKnownClass(clsName);
6022 if (reg == reg::noreg) {
6023 a. cmp_imm32_disp_reg32(0, ch, rVmTl);
6024 } else {
6025 a. load_reg64_disp_reg64(rVmTl, ch, reg);
6026 a. test_reg64_reg64(reg, reg);
6029 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
6030 ScratchReg clsPtr(m_regMap);
6031 astubs. lea_reg64_disp_reg64(rVmTl, ch, *clsPtr);
6032 if (false) { // typecheck
6033 Class** cache = NULL;
6034 UNUSED Class* ret =
6035 TargetCache::lookupKnownClass<false>(cache, clsName, true);
6037 // We're only passing two arguments to lookupKnownClass because
6038 // the third is ignored in the checkOnly == false case
6039 EMIT_CALL(astubs, ((TargetCache::lookupKnownClass_func_t)
6040 TargetCache::lookupKnownClass<false>),
6041 R(*clsPtr), IMM((uintptr_t)clsName));
6042 recordReentrantStubCall(i);
6043 if (reg != reg::noreg) {
6044 emitMovRegReg(astubs, rax, reg);
6050 void
6051 TranslatorX64::emitStringToKnownClass(const NormalizedInstruction& i,
6052 const StringData* clsName) {
6053 ScratchReg cls(m_regMap);
6054 emitKnownClassCheck(i, clsName, *cls);
6055 m_regMap.bindScratch(cls, i.outStack->location, KindOfClass, RegInfo::DIRTY);
6058 void
6059 TranslatorX64::emitStringToClass(const NormalizedInstruction& i) {
6060 using namespace TargetCache;
6061 if (!i.inputs[kEmitClsLocalIdx]->rtt.valueString()) {
6062 // Handle the case where we don't know the name of the class
6063 // at translation time
6064 const Location& in = i.inputs[kEmitClsLocalIdx]->location;
6065 const Location& out = i.outStack->location;
6066 CacheHandle ch = ClassCache::alloc();
6067 if (false) {
6068 StringData *name = NULL;
6069 const UNUSED Class* cls = ClassCache::lookup(ch, name);
6071 TRACE(1, "ClassCache @ %d\n", int(ch));
6072 if (i.inputs[kEmitClsLocalIdx]->rtt.isVariant()) {
6073 EMIT_CALL(a, ClassCache::lookup,
6074 IMM(ch),
6075 DEREF(in));
6076 } else {
6077 EMIT_CALL(a, ClassCache::lookup,
6078 IMM(ch),
6079 V(in));
6081 recordReentrantCall(i);
6082 m_regMap.bind(rax, out, KindOfClass, RegInfo::DIRTY);
6083 return;
6085 // We know the name of the class at translation time; use the
6086 // target cache associated with the name of the class
6087 const StringData* clsName = i.inputs[kEmitClsLocalIdx]->rtt.valueString();
6088 emitStringToKnownClass(i, clsName);
6091 void
6092 TranslatorX64::emitObjToClass(const NormalizedInstruction& i) {
6093 m_regMap.allocOutputRegs(i);
6094 const Location& in = i.inputs[kEmitClsLocalIdx]->location;
6095 const Location& out = i.outStack->location;
6096 PhysReg src = getReg(in);
6097 ScratchReg tmp(m_regMap);
6098 if (i.inputs[kEmitClsLocalIdx]->rtt.isVariant()) {
6099 emitDeref(a, src, *tmp);
6100 src = *tmp;
6102 ASSERT(i.outStack->valueType() == KindOfClass);
6103 a. load_reg64_disp_reg64(src, ObjectData::getVMClassOffset(), getReg(out));
6106 void
6107 TranslatorX64::emitClsAndPals(const NormalizedInstruction& ni) {
6108 if (ni.inputs[kEmitClsLocalIdx]->isString()) {
6109 emitStringToClass(ni);
6110 } else {
6111 emitObjToClass(ni);
6115 void
6116 TranslatorX64::analyzeAGetC(Tracelet& t, NormalizedInstruction& i) {
6117 ASSERT(i.inputs.size() == 1);
6118 ASSERT(i.outStack && !i.outLocal);
6119 ASSERT(i.outStack->valueType() == KindOfClass);
6120 const RuntimeType& rtt = i.inputs[0]->rtt;
6121 ASSERT(!rtt.isVariant());
6122 i.m_txFlags = supportedPlan(rtt.isString() ||
6123 rtt.valueType() == KindOfObject);
6124 if (rtt.isString() && rtt.valueString()) i.manuallyAllocInputs = true;
6127 void TranslatorX64::translateAGetC(const Tracelet& t,
6128 const NormalizedInstruction& ni) {
6129 if (ni.outStack) {
6130 emitClsAndPals(ni);
6134 void TranslatorX64::analyzeAGetL(Tracelet& t,
6135 NormalizedInstruction& ni) {
6136 ASSERT(ni.inputs.size() == 1);
6137 ASSERT(ni.inputs[0]->isLocal());
6138 const RuntimeType& rtt = ni.inputs[0]->rtt;
6139 ni.m_txFlags = supportedPlan(rtt.isString() ||
6140 rtt.valueType() == KindOfObject);
6143 void TranslatorX64::translateAGetL(const Tracelet& t,
6144 const NormalizedInstruction& ni) {
6145 emitClsAndPals(ni);
6148 void TranslatorX64::translateSelf(const Tracelet& t,
6149 const NormalizedInstruction& i) {
6150 m_regMap.allocOutputRegs(i);
6151 PhysReg tmp = getReg(i.outStack->location);
6152 ASSERT(curFunc()->cls());
6153 emitImmReg(a, (int64_t)curFunc()->cls(), tmp);
6156 void TranslatorX64::translateParent(const Tracelet& t,
6157 const NormalizedInstruction& i) {
6158 m_regMap.allocOutputRegs(i);
6159 PhysReg tmp = getReg(i.outStack->location);
6160 ASSERT(curFunc()->cls() && curFunc()->cls()->parent());
6161 emitImmReg(a, (int64_t)curFunc()->cls()->parent(), tmp);
6164 void TranslatorX64::analyzeSelf(Tracelet& t,NormalizedInstruction& i) {
6165 Class* clss = curClass();
6166 if (clss == NULL) {
6167 i.m_txFlags = Interp;
6168 return;
6170 i.m_txFlags = Supported;
6173 void TranslatorX64::analyzeParent(Tracelet& t,NormalizedInstruction& i) {
6174 Class* clss = curClass();
6175 if (clss == NULL) {
6176 i.m_txFlags = Interp;
6177 return;
6179 if (clss->parent() == NULL) {
6180 // clss has no parent; interpret to throw fatal
6181 i.m_txFlags = Interp;
6182 return;
6184 i.m_txFlags = Supported;
6187 void TranslatorX64::translateDup(const Tracelet& t,
6188 const NormalizedInstruction& ni) {
6189 ASSERT(ni.inputs.size() == 1);
6190 ASSERT(ni.outStack);
6191 ASSERT(!ni.inputs[0]->rtt.isVariant());
6192 m_regMap.allocOutputRegs(ni);
6193 PhysReg outR = getReg(ni.outStack->location);
6194 emitMovRegReg(a, getReg(ni.inputs[0]->location), outR);
6195 emitIncRef(outR, ni.inputs[0]->outerType());
6198 typedef std::map<int, int> ParamMap;
6200 * mapContParams determines if every named local in origFunc has a
6201 * corresponding named local in genFunc. If this step succeeds and
6202 * there's no VarEnv at runtime, the continuation's variables can be
6203 * filled completely inline in the TC (assuming there aren't too
6204 * many).
6206 bool TranslatorX64::mapContParams(ParamMap& map,
6207 const Func* origFunc, const Func* genFunc) {
6208 const StringData* const* varNames = origFunc->localNames();
6209 for (Id i = 0; i < origFunc->numNamedLocals(); ++i) {
6210 Id id = genFunc->lookupVarId(varNames[i]);
6211 if (id != kInvalidId) {
6212 map[i] = id;
6213 } else {
6214 return false;
6217 return true;
6220 void TranslatorX64::emitCallFillCont(X64Assembler& a,
6221 const Func* orig,
6222 const Func* gen) {
6223 if (false) {
6224 ActRec* fp = NULL;
6225 c_Continuation *cont = NULL;
6226 cont =
6227 VMExecutionContext::fillContinuationVars(fp, orig, gen, cont);
6229 EMIT_CALL(a,
6230 VMExecutionContext::fillContinuationVars,
6231 R(rVmFp),
6232 IMM((intptr_t)orig),
6233 IMM((intptr_t)gen),
6234 R(rax));
6237 void TranslatorX64::translateCreateCont(const Tracelet& t,
6238 const NormalizedInstruction& i) {
6239 bool getArgs = i.imm[0].u_IVA;
6240 const StringData* genName = curUnit()->lookupLitstrId(i.imm[1].u_SA);
6241 const Func* origFunc = curFunc();
6242 const Func* genFunc = origFunc->getGeneratorBody(genName);
6244 if (false) {
6245 ActRec* fp = NULL;
6246 UNUSED c_Continuation* cont =
6247 VMExecutionContext::createContinuation<true>(fp, getArgs, origFunc,
6248 genFunc);
6249 VMExecutionContext::createContinuation<false>(fp, getArgs, origFunc,
6250 genFunc);
6253 // Even callee-saved regs need to be clean, because
6254 // createContinuation will read all locals.
6255 m_regMap.cleanAll();
6256 auto helper = origFunc->isNonClosureMethod() ?
6257 VMExecutionContext::createContinuation<true> :
6258 VMExecutionContext::createContinuation<false>;
6259 EMIT_CALL(a,
6260 (TCA)helper,
6261 R(rVmFp),
6262 IMM(getArgs),
6263 IMM((intptr_t)origFunc),
6264 IMM((intptr_t)genFunc));
6265 ScratchReg holdRax(m_regMap, rax);
6267 int origLocals = origFunc->numNamedLocals();
6268 int genLocals = genFunc->numNamedLocals() - 1;
6269 ContParamMap params;
6270 if (origLocals <= kMaxInlineContLocals &&
6271 mapContParams(params, origFunc, genFunc)) {
6272 ScratchReg rScratch(m_regMap);
6273 a. load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rScratch);
6274 a. test_reg64_reg64(*rScratch, *rScratch);
6275 DiamondReturn astubsRet;
6277 UnlikelyIfBlock<CC_NZ> ifVarEnv(a, astubs, &astubsRet);
6278 Stats::emitInc(astubs, Stats::Tx64_ContCreateSlow);
6279 emitCallFillCont(astubs, origFunc, genFunc);
6281 // fillContinuationVars returned the continuation in rax and
6282 // DiamondGuard marked rax as scratch again, so it's safe to keep
6283 // using it
6284 Stats::emitInc(a, Stats::Tx64_ContCreateFast);
6285 static const StringData* thisStr = StringData::GetStaticString("this");
6286 Id thisId = kInvalidId;
6287 bool fillThis = origFunc->isNonClosureMethod() && !origFunc->isStatic() &&
6288 ((thisId = genFunc->lookupVarId(thisStr)) != kInvalidId) &&
6289 (origFunc->lookupVarId(thisStr) == kInvalidId);
6290 ScratchReg rDest(m_regMap);
6291 if (origLocals > 0 || fillThis) {
6292 a.lea_reg64_disp_reg64(rax,
6293 c_Continuation::localsOffset(),
6294 *rDest);
6296 for (int i = 0; i < origLocals; ++i) {
6297 ASSERT(mapContains(params, i));
6298 int destOff = cellsToBytes(genLocals - params[i]);
6299 emitCopyTo(a, rVmFp, localOffset(i), *rDest, destOff, *rScratch);
6300 emitIncRefGenericRegSafe(*rDest, destOff, *rScratch);
6303 // Deal with a potential $this local in the generator body
6304 if (fillThis) {
6305 ASSERT(thisId != kInvalidId);
6306 a.load_reg64_disp_reg64(rax, CONTOFF(m_obj), *rScratch);
6307 a.test_reg64_reg64(*rScratch, *rScratch);
6309 JccBlock<CC_Z> ifObj(a);
6310 const int thisOff = cellsToBytes(genLocals - thisId);
6311 // We don't have to check for a static refcount since we
6312 // know it's an Object
6313 a.add_imm32_disp_reg32(1, TVOFF(_count), *rScratch);
6314 a.store_reg64_disp_reg64(*rScratch, thisOff + TVOFF(m_data), *rDest);
6315 a.store_imm32_disp_reg(KindOfObject, thisOff + TVOFF(m_type), *rDest);
6318 } else {
6319 Stats::emitInc(a, Stats::Tx64_ContCreateSlow);
6320 emitCallFillCont(a, origFunc, genFunc);
6322 m_regMap.bindScratch(holdRax, i.outStack->location, KindOfObject,
6323 RegInfo::DIRTY);
6326 void TranslatorX64::emitCallUnpack(X64Assembler& a,
6327 const NormalizedInstruction& i,
6328 int nCopy) {
6329 const int contIdx = 0;
6331 if (false) {
6332 c_Continuation* cont = NULL;
6333 TypedValue* dest = NULL;
6334 VMExecutionContext::unpackContinuation(cont, dest);
6336 EMIT_CALL(a,
6337 VMExecutionContext::unpackContinuation,
6338 V(i.inputs[contIdx]->location),
6339 A(Location(Location::Local, nCopy)));
6340 recordCall(a, i);
6343 void TranslatorX64::translateUnpackCont(const Tracelet& t,
6344 const NormalizedInstruction& i) {
6345 const int contIdx = 0;
6346 ASSERT(curFrame()->m_varEnv == NULL);
6347 ASSERT(i.inputs.size() == 1);
6348 ASSERT(i.inputs[contIdx]->location == Location(Location::Local, 0));
6349 ASSERT(i.outStack->outerType() == KindOfInt64);
6350 int nCopy = curFunc()->numNamedLocals() - 1;
6352 for (int loc = 1; loc <= nCopy; ++loc) {
6353 // We're at the beginning of the function. The only local in a
6354 // register should be local 0, our input
6355 ASSERT(!m_regMap.hasReg(Location(Location::Local, loc)));
6357 if (nCopy > kMaxInlineContLocals) {
6358 Stats::emitInc(a, Stats::Tx64_ContUnpackSlow);
6359 emitCallUnpack(a, i, nCopy);
6360 m_regMap.bind(rax, i.outStack->location, KindOfInt64,
6361 RegInfo::DIRTY);
6362 return;
6365 PhysReg rCont = getReg(i.inputs[contIdx]->location);
6366 ScratchReg rLabel(m_regMap);
6368 a. test_imm32_disp_reg32(0x1, CONTOFF(m_hasExtraVars), rCont);
6369 DiamondReturn astubsRet;
6371 UnlikelyIfBlock<CC_NZ> hasVars(a, astubs, &astubsRet);
6372 Stats::emitInc(astubs, Stats::Tx64_ContUnpackSlow);
6373 emitCallUnpack(astubs, i, nCopy);
6374 emitMovRegReg(astubs, rax, *rLabel);
6376 Stats::emitInc(a, Stats::Tx64_ContUnpackFast);
6378 a. load_reg64_disp_reg64(rCont, CONTOFF(m_label), *rLabel);
6379 ScratchReg rScratch(m_regMap);
6380 ScratchReg rSrc(m_regMap);
6381 ScratchReg rZero(m_regMap);
6382 if (nCopy > 0) {
6383 a. lea_reg64_disp_reg64(rCont,
6384 c_Continuation::localsOffset(),
6385 *rSrc);
6386 emitImmReg(a, 0, *rZero);
6388 for (int srcOff = 0, destOff = localOffset(nCopy);
6389 srcOff < (int)cellsToBytes(nCopy);
6390 srcOff += sizeof(Cell), destOff += sizeof(Cell)) {
6391 emitCopyTo(a, *rSrc, srcOff, rVmFp, destOff, *rScratch);
6392 a. store_reg32_disp_reg64(*rZero, srcOff + TVOFF(m_type), *rSrc);
6395 m_regMap.bindScratch(rLabel, i.outStack->location, KindOfInt64,
6396 RegInfo::DIRTY);
6399 void TranslatorX64::emitCallPack(X64Assembler& a,
6400 const NormalizedInstruction& i,
6401 int nCopy) {
6402 const int valIdx = 0;
6403 const int contIdx = 1;
6405 // packContinuation is going to read values directly from the stack
6406 // so we have to clean everything.
6407 m_regMap.cleanAll();
6408 if (false) {
6409 c_Continuation* cont = NULL;
6410 TypedValue* tv = NULL;
6411 ActRec* fp = NULL;
6412 int label = 0;
6413 VMExecutionContext::packContinuation(cont, fp, tv, label);
6415 EMIT_CALL(a,
6416 VMExecutionContext::packContinuation,
6417 V(i.inputs[contIdx]->location),
6418 R(rVmFp),
6419 A(i.inputs[valIdx]->location),
6420 IMM(i.imm[0].u_IVA));
6421 recordCall(a, i);
6422 m_regMap.invalidateLocals(1, nCopy);
6425 void TranslatorX64::translatePackCont(const Tracelet& t,
6426 const NormalizedInstruction& i) {
6427 const int valIdx = 0;
6428 const int contIdx = 1;
6430 int nCopy = curFunc()->numNamedLocals() - 1;
6431 if (nCopy > kMaxInlineContLocals) {
6432 Stats::emitInc(a, Stats::Tx64_ContPackSlow);
6433 emitCallPack(a, i, nCopy);
6434 emitDecRefInput(a, i, valIdx);
6435 return;
6438 ScratchReg rScratch(m_regMap);
6439 a. load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rScratch);
6440 a. test_reg64_reg64(*rScratch, *rScratch);
6441 DiamondReturn astubsRet;
6443 // TODO: Task #1132976: We can probably prove that this is impossible in
6444 // most cases using information from hphpc
6445 UnlikelyIfBlock<CC_NZ> varEnv(a, astubs, &astubsRet);
6446 Stats::emitInc(astubs, Stats::Tx64_ContPackSlow);
6447 emitCallPack(astubs, i, nCopy);
6448 emitDecRefInput(astubs, i, valIdx);
6450 Stats::emitInc(a, Stats::Tx64_ContPackFast);
6452 PhysReg rCont = getReg(i.inputs[contIdx]->location);
6453 ScratchReg rDest(m_regMap);
6454 ScratchReg rZero(m_regMap);
6455 if (nCopy > 0) {
6456 a. lea_reg64_disp_reg64(rCont,
6457 c_Continuation::localsOffset(),
6458 *rDest);
6459 emitImmReg(a, 0, *rZero);
6461 for (int idx = nCopy, destOff = 0, srcOff = localOffset(nCopy);
6462 idx > 0;
6463 --idx, destOff += sizeof(Cell), srcOff += sizeof(Cell)) {
6464 Location loc(Location::Local, idx);
6465 if (m_regMap.hasReg(loc)) {
6466 PhysReg reg = getReg(loc);
6467 spillTo(m_regMap.getInfo(reg)->m_type, reg, true, *rDest, destOff);
6468 } else {
6469 emitCopyTo(a, rVmFp, srcOff, *rDest, destOff, *rScratch);
6471 m_regMap.invalidate(loc);
6472 a. store_reg32_disp_reg64(*rZero, srcOff + TVOFF(m_type), rVmFp);
6475 // We're moving our reference to the value from the stack to the
6476 // continuation object, so we don't have to incRef or decRef
6477 Location valLoc = i.inputs[valIdx]->location;
6478 emitTvSet(i, getReg(valLoc), i.inputs[valIdx]->outerType(), rCont,
6479 CONTOFF(m_value), false);
6481 emitImmReg(a, i.imm[0].u_IVA, *rScratch);
6482 a. store_reg64_disp_reg64(*rScratch, CONTOFF(m_label), rCont);
6485 static void continuationRaiseHelper(c_Continuation* cont) {
6486 cont->t_raised();
6487 not_reached();
6490 void TranslatorX64::emitContRaiseCheck(X64Assembler& a,
6491 const NormalizedInstruction& i) {
6492 const int contIdx = 0;
6493 ASSERT(i.inputs[contIdx]->location == Location(Location::Local, 0));
6494 PhysReg rCont = getReg(i.inputs[contIdx]->location);
6495 a. test_imm32_disp_reg32(0x1, CONTOFF(m_should_throw), rCont);
6497 UnlikelyIfBlock<CC_NZ> ifThrow(a, astubs);
6498 if (false) {
6499 c_Continuation* c = NULL;
6500 continuationRaiseHelper(c);
6502 EMIT_CALL(astubs,
6503 continuationRaiseHelper,
6504 R(rCont));
6505 recordReentrantStubCall(i);
6506 translator_not_reached(astubs);
6510 void TranslatorX64::translateContReceive(const Tracelet& t,
6511 const NormalizedInstruction& i) {
6512 const int contIdx = 0;
6513 emitContRaiseCheck(a, i);
6514 ScratchReg rScratch(m_regMap);
6515 a. lea_reg64_disp_reg64(getReg(i.inputs[contIdx]->location),
6516 CONTOFF(m_received), *rScratch);
6517 emitIncRefGeneric(*rScratch, 0);
6518 emitCopyToStack(a, i, *rScratch, -1 * (int)sizeof(Cell));
6521 void TranslatorX64::translateContRaised(const Tracelet& t,
6522 const NormalizedInstruction& i) {
6523 emitContRaiseCheck(a, i);
6526 void TranslatorX64::translateContDone(const Tracelet& t,
6527 const NormalizedInstruction& i) {
6528 const int contIdx = 0;
6529 a. store_imm8_disp_reg(0x1, CONTOFF(m_done),
6530 getReg(i.inputs[contIdx]->location));
6533 static void contPreNextThrowHelper(c_Continuation* c) {
6534 c->preNext();
6535 not_reached();
6538 void TranslatorX64::emitContPreNext(const NormalizedInstruction& i,
6539 ScratchReg& rCont) {
6540 const Offset doneOffset = CONTOFF(m_done);
6541 CT_ASSERT((doneOffset + 1) == CONTOFF(m_running));
6542 // Check m_done and m_running at the same time
6543 a. test_imm32_disp_reg32(0x0101, doneOffset, *rCont);
6545 UnlikelyIfBlock<CC_NZ> ifThrow(a, astubs);
6546 EMIT_CALL(astubs, contPreNextThrowHelper, R(*rCont));
6547 recordReentrantStubCall(i);
6548 translator_not_reached(astubs);
6551 // ++m_index
6552 a. add_imm64_disp_reg64(0x1, CONTOFF(m_index), *rCont);
6553 // m_running = true
6554 a. store_imm8_disp_reg(0x1, CONTOFF(m_running), *rCont);
6557 void TranslatorX64::translateContNext(const Tracelet& t,
6558 const NormalizedInstruction& i) {
6559 ScratchReg rCont(m_regMap);
6560 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6561 emitContPreNext(i, rCont);
6563 // m_received.setNull()
6564 emitTvSet(i, reg::noreg, KindOfNull, *rCont, CONTOFF(m_received), false);
6567 static void contNextCheckThrowHelper(c_Continuation* cont) {
6568 cont->startedCheck();
6569 not_reached();
6572 void TranslatorX64::emitContStartedCheck(const NormalizedInstruction& i,
6573 ScratchReg& rCont) {
6574 // if (m_index < 0)
6575 a. cmp_imm64_disp_reg64(0, CONTOFF(m_index), *rCont);
6577 UnlikelyIfBlock<CC_L> whoops(a, astubs);
6578 EMIT_CALL(astubs, contNextCheckThrowHelper, *rCont);
6579 recordReentrantStubCall(i);
6580 translator_not_reached(astubs);
6584 template<bool raise>
6585 void TranslatorX64::translateContSendImpl(const NormalizedInstruction& i) {
6586 const int valIdx = 0;
6587 ASSERT(i.inputs[valIdx]->location == Location(Location::Local, 0));
6589 ScratchReg rCont(m_regMap);
6590 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6591 emitContStartedCheck(i, rCont);
6592 emitContPreNext(i, rCont);
6594 // m_received = value
6595 PhysReg valReg = getReg(i.inputs[valIdx]->location);
6596 DataType valType = i.inputs[valIdx]->outerType();
6597 emitTvSet(i, valReg, valType, *rCont, CONTOFF(m_received), true);
6599 // m_should_throw = true (maybe)
6600 if (raise) {
6601 a. store_imm8_disp_reg(0x1, CONTOFF(m_should_throw), *rCont);
6605 void TranslatorX64::translateContSend(const Tracelet& t,
6606 const NormalizedInstruction& i) {
6607 translateContSendImpl<false>(i);
6610 void TranslatorX64::translateContRaise(const Tracelet& t,
6611 const NormalizedInstruction& i) {
6612 translateContSendImpl<true>(i);
6615 void TranslatorX64::translateContValid(const Tracelet& t,
6616 const NormalizedInstruction& i) {
6617 ScratchReg rCont(m_regMap);
6618 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6620 m_regMap.allocOutputRegs(i);
6621 PhysReg validReg = getReg(i.outStack->location);
6622 // !m_done
6623 a. loadzxb_reg64_disp_reg64(*rCont, CONTOFF(m_done), validReg);
6624 a. xor_imm32_reg64(0x1, validReg);
6627 void TranslatorX64::translateContCurrent(const Tracelet& t,
6628 const NormalizedInstruction& i) {
6629 ScratchReg rCont(m_regMap);
6630 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6631 emitContStartedCheck(i, rCont);
6633 a. lea_reg64_disp_reg64(*rCont, CONTOFF(m_value), *rCont);
6634 emitIncRefGeneric(*rCont, 0);
6635 emitCopyToStack(a, i, *rCont, -1 * (int)sizeof(Cell));
6638 void TranslatorX64::translateContStopped(const Tracelet& t,
6639 const NormalizedInstruction& i) {
6640 ScratchReg rCont(m_regMap);
6641 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6642 a. store_imm8_disp_reg(0x0, CONTOFF(m_running), *rCont);
6645 void TranslatorX64::translateContHandle(const Tracelet& t,
6646 const NormalizedInstruction& i) {
6647 // Always interpreted
6648 not_reached();
6651 static void analyzeClassExistsImpl(NormalizedInstruction& i) {
6652 const int nameIdx = 1;
6653 const int autoIdx = 0;
6654 ASSERT(!i.inputs[nameIdx]->isVariant() && !i.inputs[autoIdx]->isVariant());
6655 i.m_txFlags = supportedPlan(i.inputs[nameIdx]->isString() &&
6656 i.inputs[autoIdx]->isBoolean());
6657 i.fuseBranch = (i.m_txFlags & Supported) &&
6658 i.inputs[nameIdx]->rtt.valueString() &&
6659 i.inputs[autoIdx]->rtt.valueBoolean() != RuntimeType::UnknownBool;
6662 void TranslatorX64::analyzeClassExists(Tracelet& t,
6663 NormalizedInstruction& i) {
6664 analyzeClassExistsImpl(i);
6667 void TranslatorX64::analyzeInterfaceExists(Tracelet& t,
6668 NormalizedInstruction& i) {
6669 analyzeClassExistsImpl(i);
6672 void TranslatorX64::analyzeTraitExists(Tracelet& t,
6673 NormalizedInstruction& i) {
6674 analyzeClassExistsImpl(i);
6677 static int64 classExistsSlow(const StringData* name, bool autoload,
6678 Attr typeAttr) {
6679 bool ret = Unit::classExists(name, autoload, typeAttr);
6680 // XXX: do we need to decref this during an exception?
6681 if (name->decRefCount() == 0) {
6682 const_cast<StringData*>(name)->release();
6684 return ret;
6687 void TranslatorX64::translateClassExistsImpl(const Tracelet& t,
6688 const NormalizedInstruction& i,
6689 Attr typeAttr) {
6690 const int nameIdx = 1;
6691 const int autoIdx = 0;
6692 const StringData* name = i.inputs[nameIdx]->rtt.valueString();
6693 ASSERT(IMPLIES(name, name->isStatic()));
6694 const int autoload = i.inputs[autoIdx]->rtt.valueBoolean();
6696 ScratchReg scratch(m_regMap);
6697 if (name != NULL && autoload != RuntimeType::UnknownBool) {
6698 ASSERT(i.fuseBranch);
6699 const Attr attrNotClass = Attr(AttrTrait | AttrInterface);
6700 const bool isClass = typeAttr == AttrNone;
6701 using namespace TargetCache;
6702 Stats::emitInc(a, Stats::Tx64_ClassExistsFast);
6703 CacheHandle ch = allocKnownClass(name);
6706 DiamondReturn astubsRet;
6707 a. load_reg64_disp_reg64(rVmTl, ch, *scratch);
6708 a. test_reg64_reg64(*scratch, *scratch);
6709 if (autoload) {
6710 UnlikelyIfBlock<CC_Z> ifNull(a, astubs, &astubsRet);
6711 if (false) {
6712 Class** c = NULL;
6713 UNUSED Class* ret = lookupKnownClass<true>(c, name, false);
6715 Stats::emitInc(astubs, Stats::TgtCache_ClassExistsMiss);
6716 // If the class exists after autoloading, the helper will
6717 // return the Class's flags. Otherwise, it will return a set
6718 // of flags such that our flag check at the join point below
6719 // will fail.
6720 EMIT_CALL(astubs, (lookupKnownClass_func_t)lookupKnownClass<true>,
6721 RPLUS(rVmTl, ch),
6722 IMM((uintptr_t)name),
6723 IMM(isClass));
6724 recordReentrantStubCall(i);
6725 emitMovRegReg(astubs, rax, *scratch);
6726 } else {
6727 UnlikelyIfBlock<CC_Z> ifNull(a, astubs, &astubsRet);
6728 // This isn't really a traditional slow path, count as a hit
6729 Stats::emitInc(astubs, Stats::TgtCache_ClassExistsHit);
6730 // Provide flags so the check back in a fails
6731 emitImmReg(astubs, isClass ? attrNotClass : AttrNone, *scratch);
6733 // If we don't take the slow/NULL path, load the Class's attrs
6734 // into *scratch to prepare for the flag check.
6735 Stats::emitInc(a, Stats::TgtCache_ClassExistsHit);
6736 a. load_reg64_disp_reg64(*scratch, Class::preClassOff(),
6737 *scratch);
6738 a. load_reg64_disp_reg32(*scratch, PreClass::attrsOffset(),
6739 *scratch);
6742 if (i.changesPC) {
6743 fuseBranchSync(t, i);
6745 a. test_imm32_reg32(isClass ? attrNotClass : typeAttr, *scratch);
6746 ConditionCode cc = isClass ? CC_Z : CC_NZ;
6747 if (i.changesPC) {
6748 fuseBranchAfterBool(t, i, cc);
6749 } else {
6750 a. setcc(cc, *scratch);
6751 a. mov_reg8_reg64_unsigned(*scratch, *scratch);
6752 m_regMap.bindScratch(scratch, i.outStack->location, KindOfBoolean,
6753 RegInfo::DIRTY);
6755 } else {
6756 ASSERT(!i.fuseBranch);
6757 Stats::emitInc(a, Stats::Tx64_ClassExistsSlow);
6758 if (false) {
6759 UNUSED bool ret = false;
6760 ret = classExistsSlow(name, ret, typeAttr);
6762 EMIT_CALL(a, classExistsSlow,
6763 V(i.inputs[nameIdx]->location),
6764 V(i.inputs[autoIdx]->location),
6765 IMM(typeAttr));
6766 recordReentrantCall(i);
6767 // Our helper decrefs the string
6768 m_regMap.bind(rax, i.outStack->location, KindOfBoolean, RegInfo::DIRTY);
6772 void TranslatorX64::translateClassExists(const Tracelet& t,
6773 const NormalizedInstruction& i) {
6774 translateClassExistsImpl(t, i, AttrNone);
6777 void TranslatorX64::translateInterfaceExists(const Tracelet& t,
6778 const NormalizedInstruction& i) {
6779 translateClassExistsImpl(t, i, AttrInterface);
6782 void TranslatorX64::translateTraitExists(const Tracelet& t,
6783 const NormalizedInstruction& i) {
6784 translateClassExistsImpl(t, i, AttrTrait);
6787 // Helper function for static property access. This function emits code
6788 // which leaves a pointer to the static property for clsInput::$propInput in
6789 // register scr. We destroy scr early on, yet do not consume inputs until
6790 // later, so scr must not alias an input register. This also handles
6791 // the decref for the case where prop is not a static string.
6792 void TranslatorX64::emitStaticPropInlineLookup(const NormalizedInstruction& i,
6793 int classInputIdx,
6794 const DynLocation& propInput,
6795 PhysReg scr) {
6796 auto const& clsInput = *i.inputs[classInputIdx];
6797 const Class* cls = clsInput.rtt.valueClass();
6798 const StringData* propName = propInput.rtt.valueString();
6799 using namespace TargetCache;
6800 CacheHandle ch;
6802 ASSERT(cls && propName);
6803 // Use the uniquely known cls / prop to generate a single cache per prop
6804 const StringData* clsName = cls->preClass()->name();
6805 string sds(Util::toLower(clsName->data()) + ":" +
6806 string(propName->data(), propName->size()));
6807 StringData sd(sds.c_str(), sds.size(), AttachLiteral);
6808 ch = SPropCache::alloc(&sd);
6809 SKTRACE(1, i.source, "SPropInlineLookup %s %d\n", sd.data(), int(ch));
6811 Stats::emitInc(a, Stats::TgtCache_SPropHit);
6813 // For the simple case of statically known class and prop name, we inline
6814 // the target cache lookup, and outline the miss case.
6815 // Load the TV pointer out of the thread-private tl_targetCaches.
6816 BOOST_STATIC_ASSERT((offsetof(SPropCache, m_tv) == 0));
6817 a. load_reg64_disp_reg64(rVmTl, ch, scr);
6818 a. test_reg64_reg64(scr, scr);
6820 // Call the slow path.
6822 UnlikelyIfBlock<CC_Z> shucks(a, astubs);
6824 // Precondition for this lookup - we don't need to pass the preClass,
6825 // as we only translate in class lookups.
6826 ASSERT(cls == curFunc()->cls());
6827 if (false) { // typecheck
6828 StringData *data = NULL;
6829 SPropCache::lookup(ch, cls, data);
6832 std::vector<int> args(i.inputs.size(), ArgDontAllocate);
6833 args[classInputIdx] = 1;
6834 allocInputsForCall(i, &args[0]);
6836 EMIT_CALL(astubs, (TCA)SPropCache::lookup,
6837 IMM(ch), V(clsInput.location), IMM(uint64_t(propName)));
6838 recordReentrantStubCall(i);
6839 emitMovRegReg(astubs, rax, scr);
6841 // We're consuming the name as input, but it is static, no decref needed
6842 ASSERT(propInput.rtt.valueString()->isStatic());
6843 // astubs. jmp(a.code.frontier); -- implicit
6847 void TranslatorX64::analyzeCGetS(Tracelet& t, NormalizedInstruction& i) {
6848 ASSERT(i.inputs.size() == 2);
6849 ASSERT(i.inputs[0]->valueType() == KindOfClass);
6850 ASSERT(i.outStack);
6851 const Class* cls = i.inputs[0]->rtt.valueClass();
6852 const StringData* propName = i.inputs[1]->rtt.valueString();
6853 i.m_txFlags = supportedPlan(cls && propName && curFunc()->cls() == cls);
6854 i.manuallyAllocInputs = true;
6857 void TranslatorX64::translateCGetS(const Tracelet& t,
6858 const NormalizedInstruction& i) {
6859 const int kClassIdx = 0;
6860 const int kPropIdx = 1;
6862 ScratchReg sprop(m_regMap);
6863 emitStaticPropInlineLookup(i, kClassIdx, *i.inputs[kPropIdx], *sprop);
6864 emitDerefIfVariant(a, *sprop);
6865 emitIncRefGeneric(*sprop, 0);
6866 // Finally copy the thing to the stack
6867 int stackDest = 2 * sizeof(Cell) - sizeof(Cell); // popped - pushed
6868 emitCopyToStack(a, i, *sprop, stackDest);
6871 void TranslatorX64::analyzeSetS(Tracelet& t, NormalizedInstruction& i) {
6872 ASSERT(i.inputs.size() == 3);
6873 ASSERT(i.inputs[1]->valueType() == KindOfClass);
6874 ASSERT(i.outStack);
6875 const Class* cls = i.inputs[1]->rtt.valueClass();
6876 const StringData* propName = i.inputs[2]->rtt.valueString();
6877 // Might be able to broaden this: if cls is an ancestor of the current context,
6878 // the context is Fixed, and the property is not private
6879 // Also if the m_hoistable in cls is set to AlwaysHoistable, defined in
6880 // the same unit as context, and the property is public
6881 i.m_txFlags = supportedPlan(cls && propName && curFunc()->cls() == cls);
6882 i.manuallyAllocInputs = true;
6885 void TranslatorX64::translateSetS(const Tracelet& t,
6886 const NormalizedInstruction& i) {
6887 const int kClassIdx = 1;
6889 ScratchReg sprop(m_regMap);
6890 const RuntimeType& rhsType = i.inputs[0]->rtt;
6891 emitStaticPropInlineLookup(i, kClassIdx, *i.inputs[2], *sprop);
6893 ASSERT(m_regMap.getInfo(*sprop)->m_state == RegInfo::SCRATCH);
6894 ASSERT(!rhsType.isVariant());
6896 m_regMap.allocInputReg(i, 0);
6897 m_regMap.allocOutputRegs(i);
6898 PhysReg rhsReg = getReg(i.inputs[0]->location);
6899 PhysReg outReg = getReg(i.outStack->location);
6900 emitTvSet(i, rhsReg, rhsType.outerType(), *sprop);
6901 ASSERT(i.inputs[2]->location == i.outStack->location);
6902 emitMovRegReg(rhsReg, outReg);
6905 void TranslatorX64::analyzeSetG(Tracelet& t, NormalizedInstruction& i) {
6906 ASSERT(i.inputs.size() == 2);
6907 i.m_txFlags = supportedPlan(
6908 i.inputs[1]->isString() &&
6909 !i.inputs[0]->isVariant()
6911 if (i.m_txFlags) i.manuallyAllocInputs = true;
6914 void TranslatorX64::translateSetG(const Tracelet& t,
6915 const NormalizedInstruction& i) {
6916 ASSERT(i.outStack && !i.outLocal);
6917 ASSERT(i.inputs.size() == 2);
6918 ASSERT(i.inputs[1]->isString());
6919 ASSERT(i.inputs[1]->location == i.outStack->location);
6921 const DataType type = i.inputs[0]->rtt.outerType();
6924 * Grab the global from the target cache; rax will get a pointer to
6925 * the TypedValue in the globals array, maybe newly created as a
6926 * null.
6928 emitGetGlobal(i, 1, true /* allowCreate */);
6929 ScratchReg raxSaver(m_regMap, rax);
6930 m_regMap.allocInputReg(i, 0);
6931 PhysReg src = getReg(i.inputs[0]->location);
6932 m_regMap.allocOutputRegs(i);
6933 PhysReg out = getReg(i.outStack->location);
6935 emitTvSet(i, src, type, rax);
6936 emitMovRegReg(src, out);
6939 static TypedValue* lookupGlobal(StringData* name) {
6940 VarEnv* ve = g_vmContext->m_globalVarEnv;
6941 TypedValue* r = ve->lookup(name);
6942 // If the global didn't exist, we need to leave name un-decref'd for
6943 // the caller to raise warnings.
6944 if (r) {
6945 LITSTR_DECREF(name);
6946 if (r->m_type == KindOfRef) r = r->m_data.pref->tv();
6948 return r;
6951 static TypedValue* lookupAddGlobal(StringData* name) {
6952 VarEnv* ve = g_vmContext->m_globalVarEnv;
6953 TypedValue* r = ve->lookupAdd(name);
6954 if (r->m_type == KindOfRef) r = r->m_data.pref->tv();
6955 LITSTR_DECREF(name);
6956 return r;
6960 * Look up a global in the TargetCache with the name
6961 * i.inputs[nameIdx]. If `allowCreate' is true, also creates it. If
6962 * we don't create the global, the input name is not decref'd yet.
6964 void
6965 TranslatorX64::emitGetGlobal(const NormalizedInstruction& i, int nameIdx,
6966 bool allowCreate) {
6967 using namespace TargetCache;
6968 ASSERT(i.inputs.size() > size_t(nameIdx));
6969 ASSERT(i.inputs[nameIdx]->isString());
6971 const StringData *maybeName = i.inputs[nameIdx]->rtt.valueString();
6972 if (!maybeName) {
6973 m_regMap.allocInputReg(i, nameIdx, argNumToRegName[0]);
6974 // Always do a lookup when there's no statically-known name.
6975 // There's not much we can really cache here right now anyway.
6976 EMIT_CALL(a, allowCreate ? lookupAddGlobal : lookupGlobal,
6977 V(i.inputs[nameIdx]->location));
6978 recordCall(i);
6979 return;
6982 CacheHandle ch = GlobalCache::alloc(maybeName);
6983 if (false) { // typecheck
6984 StringData* UNUSED key = NULL;
6985 TypedValue* UNUSED glob = GlobalCache::lookup(ch, key);
6986 TypedValue* UNUSED glob2 = GlobalCache::lookupCreate(ch, key);
6988 SKTRACE(1, i.source, "ch %d\n", ch);
6989 EMIT_CALL(a, allowCreate ? GlobalCache::lookupCreate
6990 : GlobalCache::lookup,
6991 IMM(ch),
6992 IMM((uint64_t)maybeName));
6993 recordCall(i);
6996 static bool
6997 isSupportedInstrCGetG(const NormalizedInstruction& i) {
6998 ASSERT(i.inputs.size() == 1);
6999 return (i.inputs[0]->rtt.isString());
7002 void
7003 TranslatorX64::analyzeCGetG(Tracelet& t, NormalizedInstruction& i) {
7004 i.m_txFlags = simplePlan(isSupportedInstrCGetG(i));
7005 if (i.m_txFlags) i.manuallyAllocInputs = true;
7008 void
7009 TranslatorX64::translateCGetG(const Tracelet& t,
7010 const NormalizedInstruction& i) {
7011 ASSERT(i.outStack && !i.outLocal);
7012 ASSERT(i.inputs.size() == 1);
7013 ASSERT(i.inputs[0]->isString());
7015 emitGetGlobal(i, 0, false /* allowCreate */);
7016 ScratchReg raxHolder(m_regMap, rax);
7018 // If non-null, rax now points to the in-memory location of the
7019 // object of unknown type. lookup() has already decref'd the name.
7020 a. test_reg64_reg64(rax, rax);
7021 DiamondReturn astubsRet;
7023 UnlikelyIfBlock<CC_Z> ifNotRax(a, astubs, &astubsRet);
7024 if (!i.inputs[0]->rtt.valueString()) {
7025 m_regMap.allocInputReg(i, 0);
7026 PhysReg reg = getReg(i.inputs[0]->location);
7027 emitDecRef(astubs, i, reg, BitwiseKindOfString);
7029 // TODO: if (MoreWarnings) raise a undefined variable warning.
7030 // (Note: when changing this remember to change the Simple flag to
7031 // Supported in analyze.)
7032 emitStoreNull(astubs, vstackOffset(i, 0), rVmSp);
7033 m_regMap.invalidate(i.outStack->location);
7036 emitCopyToStack(a, i, rax, 0);
7037 emitIncRefGeneric(rax, 0);
7038 m_regMap.invalidate(i.outStack->location);
7041 void TranslatorX64::analyzeFPassL(Tracelet& t,
7042 NormalizedInstruction& ni) {
7043 if (ni.preppedByRef) {
7044 analyzeVGetL(t, ni);
7045 } else {
7046 analyzeCGetL(t, ni);
7050 void TranslatorX64::translateFPassL(const Tracelet& t,
7051 const NormalizedInstruction& ni) {
7052 if (ni.preppedByRef) {
7053 translateVGetL(t, ni);
7054 } else {
7055 translateCGetL(t, ni);
7059 void TranslatorX64::analyzeFPassS(Tracelet& t,
7060 NormalizedInstruction& ni) {
7061 if (ni.preppedByRef) {
7062 // We need a VGetS translation.
7063 ni.m_txFlags = Interp;
7064 } else {
7065 analyzeCGetS(t, ni);
7069 void TranslatorX64::translateFPassS(const Tracelet& t,
7070 const NormalizedInstruction& ni) {
7071 if (ni.preppedByRef) {
7072 ASSERT(false);
7073 } else {
7074 translateCGetS(t, ni);
7078 void TranslatorX64::analyzeFPassG(Tracelet& t,
7079 NormalizedInstruction& ni) {
7080 if (ni.preppedByRef) {
7081 analyzeVGetG(t, ni);
7082 } else {
7083 analyzeCGetG(t, ni);
7087 void TranslatorX64::translateFPassG(const Tracelet& t,
7088 const NormalizedInstruction& ni) {
7089 if (ni.preppedByRef) {
7090 translateVGetG(t, ni);
7091 } else {
7092 translateCGetG(t, ni);
7096 void TranslatorX64::analyzeCheckTypeOp(Tracelet& t,
7097 NormalizedInstruction& ni) {
7098 ASSERT(ni.inputs.size() == 1);
7100 if (ni.op() == OpIsObjectL || ni.op() == OpIsObjectC) {
7101 // is_object is weird because it's supposed to return false for
7102 // things where ObjectData::isResource() is true. For now we only
7103 // translate when it is not an object.
7104 if (ni.inputs[0]->valueType() == KindOfObject) {
7105 ni.m_txFlags = Interp;
7106 return;
7110 if (ni.inputs[0]->isLocal()) {
7111 ni.manuallyAllocInputs = true;
7112 if (ni.op() != OpIssetL && ni.inputs[0]->rtt.isUninit()) {
7113 ni.m_txFlags = Supported;
7114 } else {
7115 ni.m_txFlags = Native;
7117 return;
7120 ni.m_txFlags = planHingesOnRefcounting(ni.inputs[0]->valueType());
7123 static bool checkTypeHelper(Opcode op, DataType dt) {
7124 switch (op) {
7125 case OpIssetL: return !IS_NULL_TYPE(dt);
7126 case OpIsNullL: case OpIsNullC: return IS_NULL_TYPE(dt);
7127 case OpIsStringL: case OpIsStringC: return IS_STRING_TYPE(dt);
7128 case OpIsArrayL: case OpIsArrayC: return IS_ARRAY_TYPE(dt);
7129 case OpIsIntL: case OpIsIntC: return IS_INT_TYPE(dt);
7130 case OpIsBoolL: case OpIsBoolC: return IS_BOOL_TYPE(dt);
7131 case OpIsDoubleL: case OpIsDoubleC: return IS_DOUBLE_TYPE(dt);
7133 case OpIsObjectL: case OpIsObjectC:
7134 // Note: this is because we refused to translate if it was
7135 // actually an object for now. (We'd need to emit some kind of
7136 // call to ObjectData::isResource or something.)
7137 return 0;
7139 ASSERT(false);
7140 NOT_REACHED();
7143 static void warnNullThis() { raise_notice(Strings::WARN_NULL_THIS); }
7145 void
7146 TranslatorX64::translateCheckTypeOp(const Tracelet& t,
7147 const NormalizedInstruction& ni) {
7148 ASSERT(ni.inputs.size() == 1);
7149 ASSERT(ni.outStack);
7151 bool isType;
7153 if (ni.grouped && (ni.prev->op() == OpThis || ni.prev->op() == OpBareThis)) {
7154 ASSERT(ni.op() == OpIsNullC);
7155 if (ni.prev->op() == OpThis) {
7156 isType = false;
7157 } else {
7158 if (ni.changesPC) {
7159 fuseBranchSync(t, ni);
7160 a. test_imm64_disp_reg64(1, AROFF(m_this), rVmFp);
7161 if (ni.prev->imm[0].u_OA) {
7162 UnlikelyIfBlock<CC_NZ> nullThis(a, astubs);
7163 EMIT_CALL(astubs, warnNullThis);
7164 recordReentrantStubCall(ni);
7165 nullThis.reconcileEarly();
7166 astubs.test_imm64_disp_reg64(1, AROFF(m_this), rVmFp);
7168 fuseBranchAfterBool(t, ni, ni.invertCond ? CC_Z : CC_NZ);
7169 } else {
7170 m_regMap.allocOutputRegs(ni);
7171 PhysReg res = getReg(ni.outStack->location);
7172 a. test_imm64_disp_reg64(1, AROFF(m_this), rVmFp);
7173 a. setcc(ni.invertCond ? CC_Z : CC_NZ, res);
7174 if (ni.prev->imm[0].u_OA) {
7175 UnlikelyIfBlock<CC_NZ> nullThis(a, astubs);
7176 EMIT_CALL(astubs, warnNullThis);
7177 recordReentrantStubCall(ni);
7179 a. mov_reg8_reg64_unsigned(res, res);
7181 return;
7183 } else {
7184 const DataType dt = ni.inputs[0]->valueType();
7185 const bool isLocalOp = ni.inputs[0]->isLocal();
7187 isType = checkTypeHelper(ni.op(), dt) != ni.invertCond;
7188 if (!isLocalOp) {
7189 emitDecRef(ni, getReg(ni.inputs[0]->location), dt);
7191 if (isLocalOp &&
7192 ni.op() != OpIssetL &&
7193 ni.inputs[0]->rtt.isUninit()) {
7194 const StringData* name = local_name(ni.inputs[0]->location);
7195 ASSERT(name->isStatic());
7196 EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
7197 recordReentrantCall(ni);
7201 m_regMap.allocOutputRegs(ni);
7202 if (ni.changesPC) {
7203 // Don't bother driving an output reg. Just take the branch
7204 // where it leads.
7205 Stats::emitInc(a, Stats::Tx64_FusedTypeCheck);
7206 fuseBranchAfterStaticBool(t, ni, isType);
7207 return;
7209 Stats::emitInc(a, Stats::Tx64_UnfusedTypeCheck);
7210 emitImmReg(a, isType, getReg(ni.outStack->location));
7213 static void badArray() {
7214 throw_bad_type_exception("array_key_exists expects an array or an object; "
7215 "false returned.");
7218 static void badKey() {
7219 raise_warning("Array key should be either a string or an integer");
7222 static inline int64 ak_exist_string_helper(StringData* key, ArrayData* arr) {
7223 int64 n;
7224 if (key->isStrictlyInteger(n)) {
7225 return arr->exists(n);
7227 return arr->exists(StrNR(key));
7230 static int64 ak_exist_string(StringData* key, ArrayData* arr) {
7231 int64 res = ak_exist_string_helper(key, arr);
7232 if (arr->decRefCount() == 0) {
7233 arr->release();
7235 if (key->decRefCount() == 0) {
7236 key->release();
7238 return res;
7241 static int64 ak_exist_int(int64 key, ArrayData* arr) {
7242 bool res = arr->exists(key);
7243 if (arr->decRefCount() == 0) {
7244 arr->release();
7246 return res;
7249 static int64 ak_exist_string_obj(StringData* key, ObjectData* obj) {
7250 CArrRef arr = obj->o_toArray();
7251 int64 res = ak_exist_string_helper(key, arr.get());
7252 if (obj->decRefCount() == 0) {
7253 obj->release();
7255 if (key->decRefCount() == 0) {
7256 key->release();
7258 return res;
7261 static int64 ak_exist_int_obj(int64 key, ObjectData* obj) {
7262 CArrRef arr = obj->o_toArray();
7263 bool res = arr.get()->exists(key);
7264 if (obj->decRefCount() == 0) {
7265 obj->release();
7267 return res;
7270 void
7271 TranslatorX64::analyzeAKExists(Tracelet& t, NormalizedInstruction& i) {
7272 const int keyIx = 1;
7273 const int arrIx = 0;
7275 const DataType dta = i.inputs[arrIx]->valueType();
7276 const DataType dtk = i.inputs[keyIx]->valueType();
7278 bool reentrant = (dta != KindOfArray && dta != KindOfObject) ||
7279 (!IS_STRING_TYPE(dtk) && dtk != KindOfInt64 && dtk != KindOfNull);
7281 i.m_txFlags = reentrant ? Supported : Simple;
7282 i.manuallyAllocInputs = true;
7285 void
7286 TranslatorX64::translateAKExists(const Tracelet& t,
7287 const NormalizedInstruction& ni) {
7288 ASSERT(ni.inputs.size() == 2);
7289 ASSERT(ni.outStack);
7291 const int keyIx = 1;
7292 const int arrIx = 0;
7294 const DataType dta = ni.inputs[arrIx]->valueType();
7295 const DataType dtk = ni.inputs[keyIx]->valueType();
7296 TCA string_func = (TCA)ak_exist_string;
7297 TCA int_func = (TCA)ak_exist_int;
7299 int result = -1;
7300 int args[2];
7301 args[keyIx] = 0;
7302 args[arrIx] = 1;
7303 switch (dta) {
7304 case KindOfObject:
7305 string_func = (TCA)ak_exist_string_obj;
7306 int_func = (TCA)ak_exist_int_obj;
7307 case KindOfArray:
7308 switch (dtk) {
7309 case BitwiseKindOfString:
7310 case KindOfStaticString:
7311 case KindOfInt64: {
7312 allocInputsForCall(ni, args);
7313 PhysReg rk = getReg(ni.inputs[keyIx]->location);
7314 PhysReg ra = getReg(ni.inputs[arrIx]->location);
7315 m_regMap.scrubStackEntries(ni.outStack->location.offset);
7316 EMIT_CALL(a, dtk == KindOfInt64 ? int_func : string_func,
7317 R(rk), R(ra));
7318 recordCall(ni);
7319 break;
7321 case KindOfNull:
7322 if (dta == KindOfArray) {
7323 args[keyIx] = ArgDontAllocate;
7324 allocInputsForCall(ni, args);
7325 PhysReg ra = getReg(ni.inputs[arrIx]->location);
7326 m_regMap.scrubStackEntries(ni.outStack->location.offset);
7327 EMIT_CALL(a, string_func,
7328 IMM((uint64_t)empty_string.get()), R(ra));
7329 recordCall(ni);
7330 } else {
7331 result = ni.invertCond;
7333 break;
7334 default:
7335 EMIT_CALL(a, badKey);
7336 recordReentrantCall(ni);
7337 result = ni.invertCond;
7338 break;
7340 break;
7341 default:
7342 EMIT_CALL(a, badArray);
7343 recordReentrantCall(ni);
7344 result = ni.invertCond;
7345 break;
7348 if (result >= 0) {
7349 if (ni.changesPC) {
7350 fuseBranchAfterStaticBool(t, ni, result);
7351 return;
7352 } else {
7353 m_regMap.allocOutputRegs(ni);
7354 emitImmReg(a, result, getReg(ni.outStack->location));
7356 } else {
7357 ScratchReg res(m_regMap, rax);
7358 if (ni.changesPC) {
7359 fuseBranchSync(t, ni);
7360 a. test_reg64_reg64(*res, *res);
7361 fuseBranchAfterBool(t, ni, ni.invertCond ? CC_Z : CC_NZ);
7362 } else {
7363 if (ni.invertCond) {
7364 a. xor_imm32_reg64(1, *res);
7366 m_regMap.bindScratch(res, ni.outStack->location, KindOfBoolean,
7367 RegInfo::DIRTY);
7372 void
7373 TranslatorX64::analyzeSetOpL(Tracelet& t, NormalizedInstruction& i) {
7374 ASSERT(i.inputs.size() == 2);
7375 const SetOpOp subOp = SetOpOp(i.imm[1].u_OA);
7376 Opcode arithOp = setOpOpToOpcodeOp(subOp);
7377 i.m_txFlags = nativePlan(i.inputs[0]->isInt() &&
7378 i.inputs[1]->isInt() &&
7379 (arithOp == OpAdd || arithOp == OpSub ||
7380 arithOp == OpMul ||
7381 arithOp == OpBitAnd || arithOp == OpBitOr ||
7382 arithOp == OpBitXor));
7385 void
7386 TranslatorX64::translateSetOpL(const Tracelet& t,
7387 const NormalizedInstruction& i) {
7388 const vector<DynLocation*>& inputs = i.inputs;
7389 ASSERT(inputs.size() >= 2);
7390 ASSERT(i.outStack && i.outLocal);
7391 const int valIdx = 0;
7392 const int localIdx = 1;
7393 ASSERT(inputs[localIdx]->isLocal());
7394 ASSERT(inputs[valIdx]->isStack());
7395 ASSERT(inputs[valIdx]->outerType() != KindOfRef);
7397 const SetOpOp subOp = SetOpOp(i.imm[1].u_OA);
7398 Opcode arithOp = setOpOpToOpcodeOp(subOp);
7399 m_regMap.allocOutputRegs(i);
7400 binaryArithLocal(i, arithOp, *inputs[valIdx], *inputs[localIdx],
7401 *i.outStack);
7404 void
7405 TranslatorX64::analyzeIncDecL(Tracelet& t, NormalizedInstruction& i) {
7406 i.m_txFlags = nativePlan(i.inputs[0]->isInt());
7409 void
7410 TranslatorX64::translateIncDecL(const Tracelet& t,
7411 const NormalizedInstruction& i) {
7412 const vector<DynLocation*>& inputs = i.inputs;
7413 ASSERT(inputs.size() == 1);
7414 ASSERT(i.outLocal);
7415 ASSERT(inputs[0]->isLocal());
7416 const IncDecOp oplet = IncDecOp(i.imm[1].u_OA);
7417 ASSERT(oplet == PreInc || oplet == PostInc || oplet == PreDec ||
7418 oplet == PostDec);
7419 ASSERT(inputs[0]->isInt() && (!i.outStack || i.outStack->isInt()));
7420 bool post = (oplet == PostInc || oplet == PostDec);
7421 bool pre = !post;
7422 bool inc = (oplet == PostInc || oplet == PreInc);
7424 m_regMap.allocOutputRegs(i);
7425 PhysReg localVal = getReg(inputs[0]->location);
7426 if (i.outStack && post) { // $a++, $a--
7427 PhysReg output = getReg(i.outStack->location);
7428 emitMovRegReg(localVal, output);
7430 if (inc) {
7431 a. add_imm32_reg64(1, localVal);
7432 } else {
7433 a. sub_imm32_reg64(1, localVal);
7435 if (i.outStack && pre) { // --$a, ++$a
7436 PhysReg output = getReg(i.outStack->location);
7437 emitMovRegReg(localVal, output);
7441 void
7442 TranslatorX64::translateUnsetL(const Tracelet& t,
7443 const NormalizedInstruction& i) {
7444 ASSERT(i.inputs.size() == 1);
7445 ASSERT(!i.outStack && i.outLocal);
7446 const int locIdx = 0;
7447 const DynLocation& localDl = *i.inputs[locIdx];
7448 ASSERT(localDl.isLocal());
7450 // We have to mark the output register as dirty to ensure that
7451 // the type gets spilled at the tend of the tracelet
7452 m_regMap.allocOutputRegs(i);
7454 DataType type = localDl.outerType();
7455 // decRef the value that currently lives in the local if appropriate.
7456 emitDecRef(i, getReg(localDl.location), type);
7460 void
7461 TranslatorX64::analyzeReqLit(Tracelet& t, NormalizedInstruction& i,
7462 InclOpFlags flags) {
7463 ASSERT(i.inputs.size() == 1);
7464 Eval::PhpFile* efile = g_vmContext->lookupIncludeRoot(
7465 (StringData*)i.inputs[0]->rtt.valueString(),
7466 flags, NULL);
7467 i.m_txFlags = supportedPlan(i.inputs[0]->isString() &&
7468 i.inputs[0]->rtt.valueString() != NULL &&
7469 efile &&
7470 (RuntimeOption::RepoAuthoritative ||
7471 RuntimeOption::ServerStatCache));
7472 if (efile && efile->unit()->getMainReturn()->m_type != KindOfUninit) {
7473 i.outStack->rtt = RuntimeType(efile->unit()->getMainReturn()->m_type);
7476 // We don't need the reference lookupIncludeRoot made for us.
7477 if (efile) efile->decRef();
7478 i.manuallyAllocInputs = true;
7481 void
7482 TranslatorX64::analyzeReqDoc(Tracelet& t, NormalizedInstruction& i) {
7483 analyzeReqLit(t, i, InclOpDocRoot);
7486 void
7487 TranslatorX64::analyzeReqMod(Tracelet& t, NormalizedInstruction& i) {
7488 analyzeReqLit(t, i, InclOpDocRoot | InclOpLocal);
7491 void
7492 TranslatorX64::analyzeReqSrc(Tracelet& t, NormalizedInstruction& i) {
7493 analyzeReqLit(t, i, InclOpRelative | InclOpLocal);
7496 void
7497 TranslatorX64::translateReqLit(const Tracelet& t,
7498 const NormalizedInstruction& i,
7499 InclOpFlags flags) {
7500 bool local = flags & InclOpLocal;
7501 StringData *s = const_cast<StringData*>(i.inputs[0]->rtt.valueString());
7502 HPHP::Eval::PhpFile* efile =
7503 g_vmContext->lookupIncludeRoot(s, flags, NULL);
7505 * lookupIncludeRoot increments the refcount for us. This reference is
7506 * going to be burned into the translation cache. We will remove it only
7507 * when the file changes (via invalidateFile), and we're sure that no
7508 * outstanding requests are using the old code (via the Treadmill
7509 * module).
7511 TRACE(1, "lookupIncludeRoot: %s -> %p c %d\n", s->data(), efile,
7512 efile->getRef());
7514 * Remember that this tracelet (not just this instruction) now depends on the
7515 * contents of the required file.
7517 m_srcDB.recordDependency(efile, t.m_sk);
7518 Unit *unit = efile->unit();
7519 Func *func = unit->getMain(local ? NULL : curClass());
7521 const Offset after = nextSrcKey(t, i).offset();
7522 TRACE(1, "requireHelper: efile %p offset %d%s\n", efile, after,
7523 i.skipSync ? " [skipsync]" : "");
7525 if (i.skipSync) {
7527 * getting here means there was nothing to do between
7528 * the previous req and this one. Any spill code we generate
7529 * here would be broken (because the rbx is wrong), so
7530 * verify that we don't generate anything...
7532 TCA s DEBUG_ONLY = a.code.frontier;
7533 syncOutputs(0);
7534 ASSERT(s == a.code.frontier);
7535 } else {
7536 syncOutputs(i);
7538 ReqLitStaticArgs* args = m_globalData.alloc<ReqLitStaticArgs>();
7539 emitImmReg(a, (uint64_t)args, argNumToRegName[0]);
7540 emitCall(a, (TCA)reqLitHelper, true);
7542 args->m_efile = efile;
7543 args->m_pseudoMain = emitServiceReq(false, REQ_BIND_REQUIRE, 3,
7544 uint64_t(args),
7545 uint64_t(func), uint64_t(func->base()));
7546 args->m_pcOff = after;
7547 args->m_local = local;
7549 if (i.breaksTracelet) {
7550 SrcKey fallThru(curFunc(), after);
7551 emitBindJmp(fallThru);
7552 } else {
7554 * When we get here, rVmSp points to the actual top of stack,
7555 * but the rest of this tracelet assumes that rVmSp is set to
7556 * the top of the stack at the beginning of the tracelet, so we
7557 * have to fix it up here.
7560 if (!i.outStack) {
7561 /* as a special case, if we're followed by a pop, and
7562 we return a non-refcounted type, and then followed
7563 by another require, we can avoid the add here and the sub
7564 in the following require
7566 } else {
7567 int delta = i.stackOff + getStackDelta(i);
7568 if (delta != 0) {
7569 // i.stackOff is in negative Cells, not bytes.
7570 a. add_imm64_reg64(cellsToBytes(delta), rVmSp);
7576 void
7577 TranslatorX64::translateReqDoc(const Tracelet& t,
7578 const NormalizedInstruction& i) {
7579 translateReqLit(t, i, InclOpDocRoot);
7582 void
7583 TranslatorX64::translateReqMod(const Tracelet& t,
7584 const NormalizedInstruction& i) {
7585 translateReqLit(t, i, InclOpDocRoot | InclOpLocal);
7588 void
7589 TranslatorX64::translateReqSrc(const Tracelet& t,
7590 const NormalizedInstruction& i) {
7591 translateReqLit(t, i, InclOpRelative | InclOpLocal);
7595 TranslatorX64::emitNativeTrampoline(TCA helperAddr) {
7596 if (!atrampolines.code.canEmit(m_trampolineSize)) {
7597 // not enough space to emit a trampoline, so just return the
7598 // helper address and emitCall will the emit the right sequence
7599 // to call it indirectly
7600 TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
7601 ASSERT(false);
7602 return helperAddr;
7604 uint32_t index = m_numNativeTrampolines++;
7605 TCA trampAddr = atrampolines.code.frontier;
7606 if (Stats::enabled()) {
7607 Stats::emitInc(atrampolines, &Stats::tl_helper_counters[0], index);
7608 char* name = Util::getNativeFunctionName(helperAddr);
7609 const size_t limit = 50;
7610 if (strlen(name) > limit) {
7611 name[limit] = '\0';
7613 Stats::helperNames[index] = name;
7615 atrampolines.mov_imm64_reg((int64_t)helperAddr, rScratch);
7616 atrampolines.jmp_reg(rScratch);
7617 atrampolines.ud2();
7618 trampolineMap[helperAddr] = trampAddr;
7619 if (m_trampolineSize == 0) {
7620 m_trampolineSize = atrampolines.code.frontier - trampAddr;
7621 ASSERT(m_trampolineSize >= kMinPerTrampolineSize);
7623 recordBCInstr(OpNativeTrampoline, atrampolines, trampAddr);
7624 return trampAddr;
7628 TranslatorX64::getNativeTrampoline(TCA helperAddr) {
7629 if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
7630 return helperAddr;
7632 TCA trampAddr = (TCA)mapGet<PointerMap>(trampolineMap, helperAddr);
7633 if (trampAddr) {
7634 return trampAddr;
7636 return emitNativeTrampoline(helperAddr);
7638 void TranslatorX64::analyzeDefCls(Tracelet& t,
7639 NormalizedInstruction& i) {
7640 i.m_txFlags = Supported;
7643 static void defClsHelper(PreClass *preClass) {
7644 ASSERT(tl_regState == REGSTATE_DIRTY);
7645 tl_regState = REGSTATE_CLEAN;
7646 Unit::defClass(preClass);
7649 * m_defClsHelper sync'd the registers for us already. This means
7650 * if an exception propagates we want to leave things as
7651 * REGSTATE_CLEAN, since we're still in sync. Only set it to dirty
7652 * if we are actually returning to run in the TC again.
7654 tl_regState = REGSTATE_DIRTY;
7657 void TranslatorX64::translateDefCls(const Tracelet& t,
7658 const NormalizedInstruction& i) {
7659 int cid = i.imm[0].u_IVA;
7660 const Opcode* after = curUnit()->at(i.source.offset());
7661 PreClass* c = curFunc()->unit()->lookupPreClassId(cid);
7663 ASSERT(m_defClsHelper);
7666 compute the corrected stack ptr as a pseudo-param to m_defClsHelper
7667 which it will store in g_vmContext, in case of fatals, or __autoload
7669 m_regMap.cleanReg(rax);
7670 m_regMap.smashReg(rax);
7671 ScratchReg offset(m_regMap, rax);
7672 a. lea_reg64_disp_reg64(rVmSp, -cellsToBytes(i.stackOff), rax);
7674 EMIT_CALL(a, m_defClsHelper, IMM((uint64)c), IMM((uint64)after));
7677 void TranslatorX64::analyzeDefFunc(Tracelet& t,
7678 NormalizedInstruction& i) {
7679 i.m_txFlags = Supported;
7682 void defFuncHelper(Func *f) {
7683 f->setCached();
7686 void TranslatorX64::translateDefFunc(const Tracelet& t,
7687 const NormalizedInstruction& i) {
7688 int fid = i.imm[0].u_IVA;
7689 Func* f = curFunc()->unit()->lookupFuncId(fid);
7691 EMIT_CALL(a, defFuncHelper, IMM((uint64)f));
7692 recordReentrantCall(i);
7695 void
7696 TranslatorX64::analyzeFPushFunc(Tracelet& t, NormalizedInstruction& i) {
7697 ASSERT(i.inputs.size() >= 1);
7698 // The input might be an object implementing __invoke()
7699 i.m_txFlags = simplePlan(i.inputs[0]->isString());
7702 void
7703 TranslatorX64::translateFPushFunc(const Tracelet& t,
7704 const NormalizedInstruction& i) {
7705 using namespace TargetCache;
7706 CacheHandle ch = FuncCache::alloc();
7707 ASSERT(i.inputs.size() == 1);
7708 Location& inLoc = i.inputs[0]->location;
7710 m_regMap.allocOutputRegs(i);
7711 m_regMap.scrubStackRange(i.stackOff - 1,
7712 i.stackOff - 1 + kNumActRecCells);
7713 // Popped one cell, pushed an actrec
7714 int startOfActRec = int(sizeof(Cell)) - int(sizeof(ActRec));
7715 size_t funcOff = AROFF(m_func) + startOfActRec;
7716 size_t thisOff = AROFF(m_this) + startOfActRec;
7717 emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
7718 emitPushAR(i, NULL, sizeof(Cell) /* bytesPopped */);
7719 if (false) { // typecheck
7720 StringData sd("foo");
7721 const UNUSED Func* f = FuncCache::lookup(ch, &sd);
7723 SKTRACE(1, i.source, "ch %d\n", ch);
7724 EMIT_CALL(a, FuncCache::lookup, IMM(ch), V(inLoc));
7725 recordCall(i);
7726 emitVStackStore(a, i, rax, funcOff, sz::qword);
7729 void
7730 TranslatorX64::analyzeFPushClsMethodD(Tracelet& t, NormalizedInstruction& i) {
7731 i.m_txFlags = supportedPlan(true);
7734 void
7735 TranslatorX64::translateFPushClsMethodD(const Tracelet& t,
7736 const NormalizedInstruction& i) {
7737 using namespace TargetCache;
7738 const StringData* meth = curUnit()->lookupLitstrId(i.imm[1].u_SA);
7739 const NamedEntityPair& np = curUnit()->lookupNamedEntityPairId(i.imm[2].u_SA);
7740 const StringData* cls = np.first;
7741 ASSERT(meth && meth->isStatic() &&
7742 cls && cls->isStatic());
7743 ASSERT(i.inputs.size() == 0);
7745 const Class* baseClass = Unit::lookupClass(np.second);
7746 bool magicCall = false;
7747 const Func* func = lookupImmutableMethod(baseClass, meth, magicCall,
7748 true /* staticLookup */);
7750 m_regMap.scrubStackRange(i.stackOff,
7751 i.stackOff + kNumActRecCells);
7753 int startOfActRec = -int(sizeof(ActRec));
7754 SKTRACE(2, i.source, "FPushClsMethodD %s :: %s\n",
7755 cls->data(), meth->data());
7757 size_t clsOff = AROFF(m_cls) + startOfActRec;
7758 if (func) {
7759 emitKnownClassCheck(i, cls, reg::noreg);
7760 Stats::emitInc(a, Stats::TgtCache_StaticMethodBypass);
7761 emitPushAR(i, func, 0 /*bytesPopped*/,
7762 false /* isCtor */, false /* clearThis */,
7763 magicCall ? uintptr_t(meth) | 1 : 0 /* varEnvInvName */);
7765 setupActRecClsForStaticCall(i, func, baseClass, clsOff, false);
7766 } else {
7767 Stats::emitInc(a, Stats::TgtCache_StaticMethodHit);
7768 CacheHandle ch = StaticMethodCache::alloc(cls, meth, getContextName());
7769 ScratchReg rFunc(m_regMap);
7770 // Unconditionally set rCls; if we miss, the miss path will clean it up for
7771 // us. The fill path has already |'ed in the necessary 1.
7772 ScratchReg rCls(m_regMap);
7773 a. load_reg64_disp_reg64(rVmTl,
7774 ch + offsetof(StaticMethodCache, m_cls),
7775 *rCls);
7776 emitVStackStore(a, i, *rCls, clsOff);
7777 TCA stubsSkipRet;
7778 a. load_reg64_disp_reg64(rVmTl, ch, *rFunc);
7779 a. test_reg64_reg64(*rFunc, *rFunc);
7781 UnlikelyIfBlock<CC_Z> miss(a, astubs);
7782 if (false) { // typecheck
7783 const UNUSED Func* f = StaticMethodCache::lookup(ch, np.second,
7784 cls, meth);
7786 EMIT_CALL(astubs,
7787 StaticMethodCache::lookup,
7788 IMM(ch),
7789 IMM(int64(np.second)),
7790 IMM(int64(cls)),
7791 IMM(int64(meth)));
7792 recordReentrantStubCall(i);
7793 emitMovRegReg(astubs, rax, *rFunc);
7794 // NULL return means our work is done; see also
7795 // translateFPushClsMethodF.
7796 miss.reconcileEarly();
7797 astubs.test_reg64_reg64(*rFunc, *rFunc);
7798 stubsSkipRet = astubs.code.frontier;
7799 astubs.jz(a.code.frontier); // 1f to be patched later
7803 FreezeRegs ice(m_regMap);
7804 emitPushAR(i, NULL);
7805 size_t funcOff = AROFF(m_func) + startOfActRec;
7806 emitVStackStore(a, i, *rFunc, funcOff, sz::qword);
7808 // 1:
7809 astubs.patchJcc(stubsSkipRet, a.code.frontier);
7813 void
7814 TranslatorX64::analyzeFPushClsMethodF(Tracelet& t,
7815 NormalizedInstruction& i) {
7816 ASSERT(i.inputs[0]->valueType() == KindOfClass);
7817 i.m_txFlags = supportedPlan(
7818 i.inputs[1]->rtt.valueString() != NULL && // We know the method name
7819 i.inputs[0]->valueType() == KindOfClass &&
7820 i.inputs[0]->rtt.valueClass() != NULL // We know the class name
7824 void
7825 TranslatorX64::translateFPushClsMethodF(const Tracelet& t,
7826 const NormalizedInstruction& i) {
7827 using namespace TargetCache;
7828 ASSERT(!curFunc()->isPseudoMain());
7829 ASSERT(curFunc()->cls() != NULL); // self:: and parent:: should only
7830 // appear in methods
7831 DynLocation* clsLoc = i.inputs[0];
7832 DynLocation* nameLoc = i.inputs[1];
7833 const StringData* name = nameLoc->rtt.valueString();
7834 ASSERT(name && name->isStatic());
7836 // Even though we know the Class* at compile time, it's not
7837 // guaranteed to be the same between requests. The name, however, is
7838 // fixed, so we can use that.
7839 const Class* cls = clsLoc->rtt.valueClass();
7840 ASSERT(cls);
7841 bool magicCall = false;
7842 const Func* func = lookupImmutableMethod(cls, name, magicCall,
7843 true /* staticLookup */);
7845 const int bytesPopped = 2 * sizeof(Cell); // [A C] popped
7846 const int startOfActRec = -int(sizeof(ActRec)) + bytesPopped;
7847 const Offset clsOff = startOfActRec + AROFF(m_cls);
7849 UNUSED ActRec* fp = curFrame();
7850 ASSERT(!fp->hasThis() || fp->getThis()->instanceof(cls));
7851 if (func) {
7852 Stats::emitInc(a, Stats::TgtCache_StaticMethodFBypass);
7853 emitPushAR(i, func, bytesPopped,
7854 false /* isCtor */, false /* clearThis */,
7855 magicCall ? uintptr_t(name) | 1 : 0 /* varEnvInvName */);
7857 setupActRecClsForStaticCall(i, func, cls, clsOff, true);
7858 m_regMap.scrubStackRange(i.stackOff - 2,
7859 i.stackOff - 2 + kNumActRecCells);
7860 } else {
7861 const StringData* clsName = cls->name();
7862 CacheHandle ch = StaticMethodFCache::alloc(clsName, name, getContextName());
7864 Stats::emitInc(a, Stats::TgtCache_StaticMethodFHit);
7865 TCA stubsSkipRet;
7866 ScratchReg rFunc(m_regMap);
7867 a. load_reg64_disp_reg64(rVmTl, ch, *rFunc);
7868 a. test_reg64_reg64(*rFunc, *rFunc);
7870 UnlikelyIfBlock<CC_Z> miss(a, astubs);
7871 if (false) { // typecheck
7872 const UNUSED Func* f = StaticMethodFCache::lookup(ch, cls, name);
7874 EMIT_CALL(astubs,
7875 StaticMethodFCache::lookup,
7876 IMM(ch),
7877 V(clsLoc->location),
7878 V(nameLoc->location));
7879 recordReentrantStubCall(i);
7880 emitMovRegReg(astubs, rax, *rFunc);
7881 // if rax == NULL, the helper interpreted the entire
7882 // instruction for us. Skip over the rest of the emitted code in
7883 // a, but we don't want to skip the branch spill/fill code.
7884 miss.reconcileEarly();
7885 astubs.test_reg64_reg64(*rFunc, *rFunc);
7886 stubsSkipRet = astubs.code.frontier;
7887 astubs.jz(a.code.frontier); // to be patched later
7890 const Offset funcOff = startOfActRec + AROFF(m_func);
7891 m_regMap.scrubStackRange(i.stackOff - 2,
7892 i.stackOff - 2 + kNumActRecCells);
7894 FreezeRegs ice(m_regMap);
7895 emitPushAR(i, NULL, bytesPopped);
7896 emitVStackStore(a, i, *rFunc, funcOff);
7898 // We know we're in a method so we don't have to worry about
7899 // rVmFp->m_cls being NULL. We just have to figure out if it's a
7900 // Class* or $this, and whether or not we should pass along $this or
7901 // its class.
7902 PhysReg rCls = *rFunc; // no need to allocate another scratch
7903 a. load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
7904 a. test_imm32_reg64(1, rCls);
7906 JccBlock<CC_NZ> ifThis(a);
7907 // rCls is holding $this. Should we pass it to the callee?
7908 a. cmp_imm32_disp_reg32(1, ch + offsetof(StaticMethodFCache, m_static),
7909 rVmTl);
7911 IfElseBlock<CC_NE> ifStatic(a);
7912 // We're calling a static method. Load (this->m_cls | 0x1) into rCls.
7913 a.load_reg64_disp_reg64(rCls, ObjectData::getVMClassOffset(), rCls);
7914 a.or_imm32_reg64(1, rCls);
7916 ifStatic.Else();
7917 // We're calling an instance method. incRef $this.
7918 emitIncRef(rCls, KindOfObject);
7921 emitVStackStore(a, i, rCls, clsOff);
7924 astubs.patchJcc(stubsSkipRet, a.code.frontier);
7925 // No need to decref our inputs: one was KindOfClass and the other's
7926 // a static string.
7930 void
7931 TranslatorX64::analyzeFPushObjMethodD(Tracelet& t,
7932 NormalizedInstruction &i) {
7933 DynLocation* objLoc = i.inputs[0];
7934 i.m_txFlags = supportedPlan(objLoc->valueType() == KindOfObject);
7937 void
7938 TranslatorX64::translateFPushObjMethodD(const Tracelet &t,
7939 const NormalizedInstruction& i) {
7940 ASSERT(i.inputs.size() == 1);
7941 Location& objLoc = i.inputs[0]->location;
7942 ASSERT(i.inputs[0]->valueType() == KindOfObject);
7943 int id = i.imm[1].u_IVA;
7944 const StringData* name = curUnit()->lookupLitstrId(id);
7946 const Class* baseClass = i.inputs[0]->rtt.valueClass();
7947 bool magicCall = false;
7948 const Func* func = lookupImmutableMethod(baseClass, name, magicCall,
7949 false /* staticLookup */);
7950 m_regMap.scrubStackRange(i.stackOff - 1,
7951 i.stackOff - 1 + kNumActRecCells);
7952 // Popped one cell, pushed an actrec
7953 int startOfActRec = int(sizeof(Cell)) - int(sizeof(ActRec));
7954 size_t thisOff = AROFF(m_this) + startOfActRec;
7955 size_t funcOff = AROFF(m_func) + startOfActRec;
7956 emitPushAR(i, func, sizeof(Cell) /*bytesPopped*/,
7957 false /* isCtor */, false /* clearThis */,
7958 func && magicCall ? uintptr_t(name) | 1 : 0 /* varEnvInvName */);
7960 if (!func) {
7961 if (baseClass && !(baseClass->attrs() & AttrInterface)) {
7962 MethodLookup::LookupResult res =
7963 g_vmContext->lookupObjMethod(func, baseClass, name, false);
7964 if ((res == MethodLookup::MethodFoundWithThis ||
7965 res == MethodLookup::MethodFoundNoThis) &&
7966 !func->isAbstract()) {
7968 * if we found the func in baseClass, then either:
7969 * - its private, and this is always going to be the
7970 * called function, or
7971 * - any derived class must have a func that matches in
7972 * staticness, and is at least as accessible (and in
7973 * particular, you can't override a public/protected
7974 * method with a private method)
7976 if (func->attrs() & AttrPrivate) {
7977 emitVStackStoreImm(a, i, uintptr_t(func), funcOff, sz::qword);
7978 } else {
7979 Offset methodsOff = Class::getMethodsOffset();
7980 Offset vecOff = methodsOff + Class::MethodMap::vecOff();
7981 ScratchReg scratch(m_regMap);
7982 // get the object's class into *scratch
7983 a. load_reg64_disp_reg64(getReg(objLoc),
7984 ObjectData::getVMClassOffset(),
7985 *scratch);
7986 if (res == MethodLookup::MethodFoundNoThis) {
7987 emitDecRef(a, i, getReg(objLoc), KindOfObject);
7988 a. lea_reg64_disp_reg64(*scratch, 1, getReg(objLoc));
7990 emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
7992 // get the method vector into *scratch
7993 a. load_reg64_disp_reg64(*scratch, vecOff, *scratch);
7994 // get the func
7995 a. load_reg64_disp_reg64(*scratch,
7996 func->methodSlot() * sizeof(Func*),
7997 *scratch);
7998 emitVStackStore(a, i, *scratch, funcOff, sz::qword);
7999 Stats::emitInc(a, Stats::TgtCache_MethodFast);
8000 return;
8002 } else {
8003 func = NULL;
8008 if (func) {
8009 if (func->attrs() & AttrStatic) {
8010 if (func->attrs() & AttrPrivate) {
8011 emitVStackStoreImm(a, i, uintptr_t(curFunc()->cls()) | 1,
8012 thisOff, sz::qword);
8013 } else {
8014 ScratchReg scratch(m_regMap);
8015 a. load_reg64_disp_reg64(getReg(objLoc),
8016 ObjectData::getVMClassOffset(),
8017 *scratch);
8018 a. or_imm32_reg64(1, *scratch);
8019 emitVStackStore(a, i, *scratch, thisOff, sz::qword);
8021 emitDecRef(a, i, getReg(objLoc), KindOfObject);
8022 } else {
8023 emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
8025 Stats::emitInc(a, Stats::TgtCache_MethodBypass);
8026 } else {
8027 emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
8028 using namespace TargetCache;
8029 CacheHandle ch = MethodCache::alloc();
8030 if (false) { // typecheck
8031 ActRec* ar = NULL;
8032 MethodCache::lookup(ch, ar, name);
8034 int arOff = vstackOffset(i, startOfActRec);
8035 SKTRACE(1, i.source, "ch %d\n", ch);
8036 EMIT_CALL(a, MethodCache::lookup, IMM(ch),
8037 RPLUS(rVmSp, arOff), IMM(uint64_t(name)));
8038 recordReentrantCall(i);
8042 static inline ALWAYS_INLINE Class* getKnownClass(Class** classCache,
8043 const StringData* clsName) {
8044 Class* cls = *classCache;
8045 if (UNLIKELY(cls == NULL)) {
8046 // lookupKnownClass does its own VMRegAnchor'ing.
8047 cls = TargetCache::lookupKnownClass<false>(classCache, clsName, true);
8048 ASSERT(*classCache && *classCache == cls);
8050 ASSERT(cls);
8051 return cls;
8054 static Instance*
8055 HOT_FUNC_VM
8056 newInstanceHelperNoCtor(Class** classCache, const StringData* clsName) {
8057 Class* cls = getKnownClass(classCache, clsName);
8058 Instance* ret = newInstance(cls);
8059 ret->incRefCount();
8060 return ret;
8063 Instance*
8064 HOT_FUNC_VM
8065 newInstanceHelper(Class* cls, int numArgs, ActRec* ar, ActRec* prevAr) {
8066 const Func* f = cls->getCtor();
8067 Instance* ret = NULL;
8068 if (UNLIKELY(!(f->attrs() & AttrPublic))) {
8069 VMRegAnchor _;
8070 UNUSED MethodLookup::LookupResult res =
8071 g_vmContext->lookupCtorMethod(f, cls, true /*raise*/);
8072 ASSERT(res == MethodLookup::MethodFoundWithThis);
8074 // Don't start pushing the AR until newInstance returns; it may reenter.
8075 ret = newInstance(cls);
8076 f->validate();
8077 ar->m_func = f;
8078 ar->initNumArgs(numArgs, true /*fromCtor*/);
8079 // Count stack and this.
8080 ret->incRefCount();
8081 ret->incRefCount();
8082 ar->setThis(ret);
8083 ar->setVarEnv(NULL);
8084 arSetSfp(ar, prevAr);
8085 TRACE(2, "newInstanceHelper: AR %p: f %p, savedRbp %#lx, savedRip %#lx"
8086 " this %p\n",
8087 ar, ar->m_func, ar->m_savedRbp, ar->m_savedRip, ar->m_this);
8088 return ret;
8091 void TranslatorX64::translateFPushCtor(const Tracelet& t,
8092 const NormalizedInstruction& i) {
8093 int numArgs = i.imm[0].u_IVA;
8094 int arOff = vstackOffset(i, -int(sizeof(ActRec)));
8095 m_regMap.scrubStackRange(i.stackOff, i.stackOff + kNumActRecCells);
8096 EMIT_CALL(a, newInstanceHelper,
8097 V(i.inputs[0]->location),
8098 IMM(numArgs),
8099 RPLUS(rVmSp, arOff),
8100 R(rVmFp));
8101 recordReentrantCall(i);
8103 m_regMap.bind(rax, i.outStack->location, KindOfObject, RegInfo::DIRTY);
8106 Instance*
8107 HOT_FUNC_VM
8108 newInstanceHelperCached(Class** classCache,
8109 const StringData* clsName, int numArgs,
8110 ActRec* ar, ActRec* prevAr) {
8111 Class* cls = getKnownClass(classCache, clsName);
8112 return newInstanceHelper(cls, numArgs, ar, prevAr);
8115 void TranslatorX64::translateFPushCtorD(const Tracelet& t,
8116 const NormalizedInstruction& i) {
8117 using namespace TargetCache;
8118 int numArgs = i.imm[0].u_IVA;
8119 const StringData* clsName = curUnit()->lookupLitstrId(i.imm[1].u_SA);
8120 CacheHandle classCh = allocKnownClass(clsName);
8121 ScratchReg scr(m_regMap);
8122 a. lea_reg64_disp_reg64(rVmTl, classCh, *scr);
8123 // We first push the new object, then the actrec. Since we're going to
8124 // need to call out, and possibly reenter in the course of all this,
8125 // null out the object on the stack, in case we unwind before we're
8126 // ready.
8127 int arOff = vstackOffset(i, -int(sizeof(ActRec)) - cellsToBytes(1));
8128 m_regMap.scrubStackRange(i.stackOff, i.stackOff + kNumActRecCells + 1);
8129 if (i.noCtor) {
8130 EMIT_CALL(a, newInstanceHelperNoCtor,
8131 R(*scr),
8132 IMM(uintptr_t(clsName)));
8133 } else {
8134 EMIT_CALL(a, newInstanceHelperCached,
8135 R(*scr),
8136 IMM(uintptr_t(clsName)),
8137 IMM(numArgs),
8138 RPLUS(rVmSp, arOff), // ActRec
8139 R(rVmFp)); // prevAR
8141 recordReentrantCall(i);
8142 // The callee takes care of initializing the actRec, and returns the new
8143 // object.
8144 m_regMap.bind(rax, i.outStack->location, KindOfObject, RegInfo::DIRTY);
8147 static void fatalNullThis() { raise_error(Strings::FATAL_NULL_THIS); }
8149 void
8150 TranslatorX64::emitThisCheck(const NormalizedInstruction& i,
8151 PhysReg reg) {
8152 if (curFunc()->cls() == NULL) { // Non-class
8153 a.test_reg64_reg64(reg, reg);
8154 a.jz(astubs.code.frontier); // jz if_null
8157 a. test_imm32_reg64(1, reg);
8159 UnlikelyIfBlock<CC_NZ> ifThisNull(a, astubs);
8160 // if_null:
8161 EMIT_CALL(astubs, fatalNullThis);
8162 recordReentrantStubCall(i);
8166 void
8167 TranslatorX64::translateThis(const Tracelet &t,
8168 const NormalizedInstruction &i) {
8169 if (!i.outStack) {
8170 ASSERT(i.next && i.next->grouped);
8171 return;
8174 ASSERT(!i.outLocal);
8175 ASSERT(curFunc()->isPseudoMain() || curFunc()->cls());
8176 m_regMap.allocOutputRegs(i);
8177 PhysReg out = getReg(i.outStack->location);
8178 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), out);
8180 if (!i.guardedThis) {
8181 emitThisCheck(i, out);
8183 emitIncRef(out, KindOfObject);
8186 void
8187 TranslatorX64::translateBareThis(const Tracelet &t,
8188 const NormalizedInstruction &i) {
8189 if (!i.outStack) {
8190 ASSERT(i.next && i.next->grouped);
8191 return;
8193 ASSERT(!i.outLocal);
8194 ASSERT(curFunc()->cls());
8195 ScratchReg outScratch(m_regMap);
8196 PhysReg out = *outScratch;
8197 PhysReg base;
8198 int offset;
8199 locToRegDisp(i.outStack->location, &base, &offset);
8200 if (i.outStack->rtt.isVagueValue()) {
8201 m_regMap.scrubLoc(i.outStack->location);
8203 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), out);
8204 a. test_imm32_reg64(1, out);
8205 DiamondReturn astubsRet;
8207 UnlikelyIfBlock<CC_NZ> ifThisNull(a, astubs, &astubsRet);
8208 astubs. store_imm32_disp_reg(KindOfNull, TVOFF(m_type) + offset, base);
8209 if (i.imm[0].u_OA) {
8210 EMIT_CALL(astubs, warnNullThis);
8211 recordReentrantStubCall(i);
8213 if (i.next && !i.outStack->rtt.isVagueValue()) {
8214 // To handle the case where we predict that
8215 // the bare this will have type Object.
8216 // Using the normal type prediction mechanism
8217 // would require writing the object to the stack
8218 // anyway.
8219 // This is currently dead, however - I couldnt
8220 // find a win.
8221 emitSideExit(astubs, i, true);
8222 astubsRet.kill();
8225 emitIncRef(out, KindOfObject);
8226 if (i.outStack->rtt.isVagueValue()) {
8227 a. store_imm32_disp_reg(KindOfObject, TVOFF(m_type) + offset, base);
8228 a. store_reg64_disp_reg64(out, TVOFF(m_data) + offset, base);
8229 } else {
8230 ASSERT(i.outStack->isObject());
8231 m_regMap.bindScratch(outScratch, i.outStack->location, KindOfObject,
8232 RegInfo::DIRTY);
8236 void
8237 TranslatorX64::translateCheckThis(const Tracelet& t,
8238 const NormalizedInstruction& i) {
8239 ASSERT(i.inputs.size() == 1 &&
8240 i.inputs[0]->location == Location(Location::This));
8241 if (i.guardedThis) return;
8242 emitThisCheck(i, getReg(i.inputs[0]->location));
8245 void
8246 TranslatorX64::translateInitThisLoc(const Tracelet& t,
8247 const NormalizedInstruction& i) {
8248 ASSERT(i.outLocal && !i.outStack);
8249 ASSERT(curFunc()->isPseudoMain() || curFunc()->cls());
8251 PhysReg base;
8252 int offset;
8253 locToRegDisp(i.outLocal->location, &base, &offset);
8254 ASSERT(base == rVmFp);
8256 ScratchReg thiz(m_regMap);
8257 a.load_reg64_disp_reg64(rVmFp, AROFF(m_this), *thiz);
8258 if (curFunc()->cls() == NULL) {
8259 // If we're in a pseudomain, m_this could be NULL
8260 a.test_reg64_reg64(*thiz, *thiz);
8261 a.jz(astubs.code.frontier); // jz if_null
8263 // Ok, it's not NULL but it might be a Class which should be treated
8264 // equivalently
8265 a.test_imm32_reg64(1, *thiz);
8266 a.jnz(astubs.code.frontier); // jnz if_null
8268 // We have a valid $this!
8269 a.store_imm32_disp_reg(KindOfObject, offset + TVOFF(m_type), base);
8270 a.store_reg64_disp_reg64(*thiz, offset + TVOFF(m_data), base);
8271 emitIncRef(*thiz, KindOfObject);
8273 // if_null:
8274 emitStoreUninitNull(astubs, offset, base);
8275 astubs.jmp(a.code.frontier);
8277 m_regMap.invalidate(i.outLocal->location);
8280 void
8281 TranslatorX64::analyzeFPushFuncD(Tracelet& t, NormalizedInstruction& i) {
8282 Id funcId = i.imm[1].u_SA;
8283 const NamedEntityPair nep = curUnit()->lookupNamedEntityPairId(funcId);
8284 const Func* func = Unit::lookupFunc(nep.second, nep.first);
8285 i.m_txFlags = supportedPlan(func != NULL);
8288 void
8289 TranslatorX64::translateFPushFuncD(const Tracelet& t,
8290 const NormalizedInstruction& i) {
8291 ASSERT(i.inputs.size() == 0);
8292 ASSERT(!i.outStack && !i.outLocal);
8293 Id funcId = i.imm[1].u_SA;
8294 const NamedEntityPair& nep = curUnit()->lookupNamedEntityPairId(funcId);
8295 const StringData* name = nep.first;
8296 const Func* func = Unit::lookupFunc(nep.second, name);
8298 // Translation is only supported if function lookup succeeds
8299 func->validate();
8300 if (Trace::enabled && !func) {
8301 TRACE(1, "Attempt to invoke undefined function %s\n", name->data());
8304 // Inform the register allocator that we just annihilated a range of
8305 // possibly-dirty stack entries.
8306 m_regMap.scrubStackRange(i.stackOff,
8307 i.stackOff + kNumActRecCells);
8309 size_t thisOff = AROFF(m_this) - sizeof(ActRec);
8310 bool funcCanChange = !func->isNameBindingImmutable(curUnit());
8311 emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
8312 emitPushAR(i, funcCanChange ? NULL : func, 0, false, false);
8313 if (funcCanChange) {
8314 // Look it up in a FuncCache.
8315 using namespace TargetCache;
8316 CacheHandle ch = allocFixedFunction(nep.second, false);
8317 size_t funcOff = AROFF(m_func) - sizeof(ActRec);
8318 size_t funcCacheOff = ch + offsetof(FixedFuncCache, m_func);
8320 SKTRACE(1, i.source, "ch %d\n", ch);
8322 Stats::emitInc(a, Stats::TgtCache_FuncDHit);
8323 ScratchReg scratch(m_regMap);
8324 a.load_reg64_disp_reg64(rVmTl, funcCacheOff, *scratch);
8325 a.test_reg64_reg64(*scratch, *scratch);
8327 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8329 if (false) { // typecheck
8330 StringData sd("foo");
8331 FixedFuncCache::lookupFailed(&sd);
8334 EMIT_CALL(astubs, TCA(FixedFuncCache::lookupFailed),
8335 IMM(uintptr_t(name)));
8336 recordReentrantStubCall(i);
8337 emitMovRegReg(astubs, rax, *scratch);
8339 emitVStackStore(a, i, *scratch, funcOff, sz::qword);
8343 void
8344 TranslatorX64::translateFPushContFunc(const Tracelet& t,
8345 const NormalizedInstruction& i) {
8346 ASSERT(curFrame()->hasThis());
8347 Class* genClass = curFrame()->getThis()->getVMClass();
8348 ASSERT(genClass == SystemLib::s_MethodContinuationClass ||
8349 genClass == SystemLib::s_FunctionContinuationClass);
8350 bool isMethod = genClass == SystemLib::s_MethodContinuationClass;
8351 size_t thisOff = AROFF(m_this) - sizeof(ActRec);
8352 size_t funcOff = AROFF(m_func) - sizeof(ActRec);
8353 m_regMap.scrubStackRange(i.stackOff,
8354 i.stackOff + kNumActRecCells);
8355 emitPushAR(i, NULL, 0, false, false);
8356 ScratchReg rCont(m_regMap);
8357 ScratchReg rScratch(m_regMap);
8358 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
8360 // Store the func
8361 a.load_reg64_disp_reg64(*rCont, CONTOFF(m_vmFunc), *rScratch);
8362 emitVStackStore(a, i, *rScratch, funcOff, sz::qword);
8364 if (isMethod) {
8365 // Store m_this
8366 a. load_reg64_disp_reg64(*rCont, CONTOFF(m_obj), *rScratch);
8367 a. test_reg64_reg64(*rScratch, *rScratch);
8369 IfElseBlock<CC_Z> ifThis(a);
8370 emitVStackStore(a, i, *rScratch, thisOff, sz::qword);
8371 emitIncRef(*rScratch, KindOfObject);
8373 ifThis.Else();
8374 a.load_reg64_disp_reg64(*rCont, CONTOFF(m_vmCalledClass), *rScratch);
8375 // m_vmCalledClass already has its low bit set
8376 emitVStackStore(a, i, *rScratch, thisOff, sz::qword);
8378 } else {
8379 emitVStackStoreImm(a, i, 0, thisOff, sz::qword);
8383 const Func*
8384 TranslatorX64::findCuf(const NormalizedInstruction& ni,
8385 Class*& cls, StringData*& invName, bool& forward) {
8386 forward = (ni.op() == OpFPushCufF);
8387 cls = NULL;
8388 invName = NULL;
8390 DynLocation* callable = ni.inputs[ni.op() == OpFPushCufSafe ? 1 : 0];
8392 const StringData* str =
8393 callable->isString() ? callable->rtt.valueString() : NULL;
8394 const ArrayData* arr =
8395 callable->isArray() ? callable->rtt.valueArray() : NULL;
8397 StringData* sclass = NULL;
8398 StringData* sname = NULL;
8399 if (str) {
8400 Func* f = HPHP::VM::Unit::lookupFunc(str);
8401 if (f) return f;
8402 String name(const_cast<StringData*>(str));
8403 int pos = name.find("::");
8404 if (pos <= 0 || pos + 2 >= name.size() ||
8405 name.find("::", pos + 2) != String::npos) {
8406 return NULL;
8408 sclass = StringData::GetStaticString(name.substr(0, pos).get());
8409 sname = StringData::GetStaticString(name.substr(pos + 2).get());
8410 } else if (arr) {
8411 if (arr->size() != 2) return NULL;
8412 CVarRef e0 = arr->get(0LL, false);
8413 CVarRef e1 = arr->get(1LL, false);
8414 if (!e0.isString() || !e1.isString()) return NULL;
8415 sclass = e0.getStringData();
8416 sname = e1.getStringData();
8417 String name(sname);
8418 if (name.find("::") != String::npos) return NULL;
8419 } else {
8420 return NULL;
8423 Class* ctx = curFunc()->cls();
8425 if (sclass->isame(s_self.get())) {
8426 if (!ctx) return NULL;
8427 cls = ctx;
8428 forward = true;
8429 } else if (sclass->isame(s_parent.get())) {
8430 if (!ctx || !ctx->parent()) return NULL;
8431 cls = ctx->parent();
8432 forward = true;
8433 } else if (sclass->isame(s_static.get())) {
8434 return NULL;
8435 } else {
8436 cls = VM::Unit::lookupClass(sclass);
8437 if (!cls) return NULL;
8440 bool magicCall = false;
8441 const Func* f = lookupImmutableMethod(cls, sname, magicCall, true);
8442 if (!f || (forward && !ctx->classof(f->cls()))) {
8444 * To preserve the invariant that the lsb class
8445 * is an instance of the context class, we require
8446 * that f's class is an instance of the context class.
8447 * This is conservative, but without it, we would need
8448 * a runtime check to decide whether or not to forward
8449 * the lsb class
8451 return NULL;
8453 if (magicCall) invName = sname;
8454 return f;
8457 void
8458 TranslatorX64::analyzeFPushCufOp(Tracelet& t,
8459 NormalizedInstruction& ni) {
8460 Class* cls = NULL;
8461 StringData* invName = NULL;
8462 bool forward = false;
8463 const Func* func = findCuf(ni, cls, invName, forward);
8464 ni.m_txFlags = supportedPlan(func != NULL);
8465 ni.manuallyAllocInputs = true;
8468 void
8469 TranslatorX64::setupActRecClsForStaticCall(const NormalizedInstruction &i,
8470 const Func* func, const Class* cls,
8471 size_t clsOff, bool forward) {
8472 if (forward) {
8473 ScratchReg rClsScratch(m_regMap);
8474 PhysReg rCls = *rClsScratch;
8475 a. load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
8476 if (!(curFunc()->attrs() & AttrStatic)) {
8477 ASSERT(curFunc()->cls() &&
8478 curFunc()->cls()->classof(cls));
8479 /* the context is non-static, so we have to deal
8480 with passing in $this or getClass($this) */
8481 a. test_imm32_reg64(1, rCls);
8483 JccBlock<CC_NZ> ifThis(a);
8484 // rCls is holding a real $this.
8485 if (func->attrs() & AttrStatic) {
8486 // but we're a static method, so pass getClass($this)|1
8487 a.load_reg64_disp_reg64(rCls, ObjectData::getVMClassOffset(), rCls);
8488 a.or_imm32_reg64(1, rCls);
8489 } else {
8490 // We should pass $this to the callee
8491 emitIncRef(rCls, KindOfObject);
8495 emitVStackStore(a, i, rCls, clsOff);
8496 } else {
8497 if (!(func->attrs() & AttrStatic) &&
8498 !(curFunc()->attrs() & AttrStatic) &&
8499 curFunc()->cls() &&
8500 curFunc()->cls()->classof(cls)) {
8501 /* might be a non-static call */
8502 ScratchReg rClsScratch(m_regMap);
8503 PhysReg rCls = *rClsScratch;
8504 a. load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
8505 a. test_imm32_reg64(1, rCls);
8507 IfElseBlock<CC_NZ> ifThis(a);
8508 // rCls is holding $this. We should pass it to the callee
8509 emitIncRef(rCls, KindOfObject);
8510 emitVStackStore(a, i, rCls, clsOff);
8511 ifThis.Else();
8512 emitVStackStoreImm(a, i, uintptr_t(cls)|1, clsOff);
8514 } else {
8515 emitVStackStoreImm(a, i, uintptr_t(cls)|1, clsOff);
8520 template <bool warn>
8521 int64 checkClass(TargetCache::CacheHandle ch, StringData* clsName,
8522 ActRec *ar) {
8523 VMRegAnchor _;
8524 AutoloadHandler::s_instance->invokeHandler(clsName->data());
8525 if (*(Class**)TargetCache::handleToPtr(ch)) return true;
8526 ar->m_func = SystemLib::GetNullFunction();
8527 if (ar->hasThis()) {
8528 // cannot hit zero, we just inc'ed it
8529 ar->getThis()->decRefCount();
8531 ar->setThis(0);
8532 return false;
8535 static void warnMissingFunc(StringData* name) {
8536 throw_invalid_argument("function: method '%s' not found", name->data());
8539 void
8540 TranslatorX64::translateFPushCufOp(const Tracelet& t,
8541 const NormalizedInstruction& ni) {
8542 Class* cls = NULL;
8543 StringData* invName = NULL;
8544 bool forward = false;
8545 const Func* func = findCuf(ni, cls, invName, forward);
8546 ASSERT(func);
8548 int numPopped = ni.op() == OpFPushCufSafe ? 0 : 1;
8549 m_regMap.scrubStackRange(ni.stackOff - numPopped,
8550 ni.stackOff - numPopped + kNumActRecCells);
8552 int startOfActRec = int(numPopped * sizeof(Cell)) - int(sizeof(ActRec));
8554 emitPushAR(ni, cls ? func : NULL, numPopped * sizeof(Cell),
8555 false /* isCtor */, false /* clearThis */,
8556 invName ? uintptr_t(invName) | 1 : 0 /* varEnvInvName */);
8558 bool safe = (ni.op() == OpFPushCufSafe);
8559 size_t clsOff = AROFF(m_cls) + startOfActRec;
8560 size_t funcOff = AROFF(m_func) + startOfActRec;
8561 LazyScratchReg flag(m_regMap);
8562 if (safe) {
8563 flag.alloc();
8564 emitImmReg(a, true, *flag);
8566 if (cls) {
8567 setupActRecClsForStaticCall(ni, func, cls, clsOff, forward);
8568 TargetCache::CacheHandle ch = cls->m_cachedOffset;
8569 if (!TargetCache::isPersistentHandle(ch)) {
8570 a. cmp_imm32_disp_reg32(0, ch, rVmTl);
8572 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8573 if (false) {
8574 checkClass<false>(0, NULL, NULL);
8575 checkClass<true>(0, NULL, NULL);
8577 EMIT_CALL(astubs, TCA(safe ? checkClass<false> : checkClass<true>),
8578 IMM(ch), IMM(uintptr_t(cls->name())),
8579 RPLUS(rVmSp, vstackOffset(ni, startOfActRec)));
8580 recordReentrantStubCall(ni, true);
8581 if (safe) {
8582 astubs. mov_reg64_reg64(rax, *flag);
8586 } else {
8587 TargetCache::CacheHandle ch = func->getCachedOffset();
8588 if (TargetCache::isPersistentHandle(ch)) {
8589 emitVStackStoreImm(a, ni, uintptr_t(func), funcOff, sz::qword);
8590 emitVStackStoreImm(a, ni, 0, clsOff, sz::qword, &m_regMap);
8591 } else {
8592 ScratchReg funcReg(m_regMap);
8593 a. load_reg64_disp_reg64(rVmTl, ch, *funcReg);
8594 emitVStackStore(a, ni, *funcReg, funcOff);
8595 emitVStackStoreImm(a, ni, 0, clsOff, sz::qword, &m_regMap);
8596 a. test_reg64_reg64(*funcReg, *funcReg);
8598 UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8599 emitVStackStoreImm(astubs, ni,
8600 uintptr_t(SystemLib::GetNullFunction()), funcOff);
8601 if (safe) {
8602 emitImmReg(astubs, false, *flag);
8603 } else {
8604 EMIT_CALL(astubs, TCA(warnMissingFunc), IMM(uintptr_t(func->name())));
8605 recordReentrantStubCall(ni, true);
8611 if (safe) {
8612 DynLocation* outFlag = ni.outStack2;
8613 DynLocation* outDef = ni.outStack;
8615 DynLocation* inDef = ni.inputs[0];
8616 if (!m_regMap.hasReg(inDef->location)) {
8617 m_regMap.scrubStackRange(ni.stackOff - 2, ni.stackOff - 2);
8618 PhysReg base1, base2;
8619 int disp1, disp2;
8620 locToRegDisp(inDef->location, &base1, &disp1);
8621 locToRegDisp(outDef->location, &base2, &disp2);
8622 ScratchReg tmp(m_regMap);
8623 a. load_reg64_disp_reg64(base1, TVOFF(m_data) + disp1, *tmp);
8624 a. store_reg64_disp_reg64(*tmp, TVOFF(m_data) + disp2, base2);
8625 if (!inDef->rtt.isVagueValue()) {
8626 a. store_imm32_disp_reg(inDef->outerType(),
8627 TVOFF(m_type) + disp2, base2);
8628 } else {
8629 a. load_reg64_disp_reg32(base1, TVOFF(m_type) + disp1, *tmp);
8630 a. store_reg32_disp_reg64(*tmp, TVOFF(m_type) + disp2, base2);
8632 } else {
8633 PhysReg reg = m_regMap.getReg(inDef->location);
8634 m_regMap.scrubStackRange(ni.stackOff - 1, ni.stackOff - 1);
8635 m_regMap.bind(reg, outDef->location, inDef->rtt.outerType(),
8636 RegInfo::DIRTY);
8638 m_regMap.bindScratch(flag, outFlag->location, KindOfBoolean,
8639 RegInfo::DIRTY);
8643 void
8644 TranslatorX64::analyzeFPassCOp(Tracelet& t, NormalizedInstruction& i) {
8645 i.m_txFlags = nativePlan(!i.preppedByRef);
8648 void
8649 TranslatorX64::translateFPassCOp(const Tracelet& t,
8650 const NormalizedInstruction& i) {
8651 ASSERT(i.inputs.size() == 0);
8652 ASSERT(!i.outStack && !i.outLocal);
8653 ASSERT(!i.preppedByRef);
8656 void
8657 TranslatorX64::translateFPassR(const Tracelet& t,
8658 const NormalizedInstruction& i) {
8660 * Like FPassC, FPassR is able to cheat on boxing if the current
8661 * parameter is pass by reference but we have a cell: the box would refer
8662 * to exactly one datum (the value currently on the stack).
8664 * However, if the callee wants a cell and we have a variant we must
8665 * unbox; otherwise we might accidentally make callee changes to its
8666 * parameter globally visible.
8668 ASSERT(!i.inputs[0]->rtt.isVagueValue());
8670 ASSERT(i.inputs.size() == 1);
8671 const RuntimeType& inRtt = i.inputs[0]->rtt;
8672 if (inRtt.isVariant() && !i.preppedByRef) {
8673 emitUnboxTopOfStack(i);
8677 void
8678 TranslatorX64::translateFCall(const Tracelet& t,
8679 const NormalizedInstruction& i) {
8680 int numArgs = i.imm[0].u_IVA;
8681 const Opcode* atCall = i.pc();
8682 const Opcode* after = curUnit()->at(nextSrcKey(t, i).offset());
8683 const Func* srcFunc = curFunc();
8685 // Sync all dirty registers and adjust rVmSp to point to the
8686 // top of stack at the beginning of the current instruction
8687 syncOutputs(i);
8689 // We are "between" tracelets and don't use the register map
8690 // anymore. (Note that the currently executing trace may actually
8691 // continue past the FCall, but it will have to resume with a fresh
8692 // register map.)
8693 RegSet scratchRegs = kScratchCrossTraceRegs;
8694 DumbScratchReg retIPReg(scratchRegs);
8696 // Caller-specific fields: return addresses and the frame pointer
8697 // offset.
8698 ASSERT(sizeof(Cell) == 1 << 4);
8699 // Record the hardware return address. This will be patched up below; 2
8700 // is a magic number dependent on assembler implementation.
8701 MovImmPatcher retIP(a, (uint64_t)a.code.frontier, *retIPReg);
8702 a. store_reg64_disp_reg64 (*retIPReg,
8703 cellsToBytes(numArgs) + AROFF(m_savedRip),
8704 rVmSp);
8706 // The kooky offset here a) gets us to the current ActRec,
8707 // and b) accesses m_soff.
8708 int32 callOffsetInUnit = srcFunc->unit()->offsetOf(after - srcFunc->base());
8709 a. store_imm32_disp_reg(callOffsetInUnit,
8710 cellsToBytes(numArgs) + AROFF(m_soff),
8711 rVmSp);
8713 emitBindCall(t, i,
8714 curUnit()->offsetOf(atCall),
8715 curUnit()->offsetOf(after)); // ...
8716 retIP.patch(uint64(a.code.frontier));
8718 if (i.breaksTracelet) {
8719 SrcKey fallThru(curFunc(), after);
8720 emitBindJmp(fallThru);
8721 } else {
8723 * Before returning, the callee restored rVmSp to point to the
8724 * current top of stack but the rest of this tracelet assumes that
8725 * rVmSp is set to the top of the stack at the beginning of the
8726 * tracelet, so we have to fix it up here.
8728 * TODO: in the case of an inlined NativeImpl, we're essentially
8729 * emitting two adds to rVmSp in a row, which we can combine ...
8731 int delta = i.stackOff + getStackDelta(i);
8732 if (delta != 0) {
8733 // i.stackOff is in negative Cells, not bytes.
8734 a. add_imm64_reg64(cellsToBytes(delta), rVmSp);
8739 void TranslatorX64::analyzeFCallArray(Tracelet& t,
8740 NormalizedInstruction& i) {
8741 i.m_txFlags = Supported;
8744 void TranslatorX64::translateFCallArray(const Tracelet& t,
8745 const NormalizedInstruction& i) {
8746 const Offset after = nextSrcKey(t, i).offset();
8748 syncOutputs(i);
8750 FCallArrayArgs* args = m_globalData.alloc<FCallArrayArgs>();
8751 emitImmReg(a, (uint64_t)args, argNumToRegName[0]);
8752 emitCall(a, (TCA)fCallArrayHelper, true);
8754 args->m_pcOff = i.offset();
8755 args->m_pcNext = after;
8757 if (i.breaksTracelet) {
8758 SrcKey fallThru(curFunc(), after);
8759 emitBindJmp(fallThru);
8760 } else {
8762 * When we get here, rVmSp points to the actual top of stack,
8763 * but the rest of this tracelet assumes that rVmSp is set to
8764 * the top of the stack at the beginning of the tracelet, so we
8765 * have to fix it up here.
8768 ASSERT(i.outStack);
8769 int delta = i.stackOff + getStackDelta(i);
8770 if (delta != 0) {
8771 // i.stackOff is in negative Cells, not bytes.
8772 a. add_imm64_reg64(cellsToBytes(delta), rVmSp);
8777 template <bool UseTC>
8778 static TypedValue*
8779 staticLocHelper(StringData* name, ActRec* fp, TypedValue* sp,
8780 TargetCache::CacheHandle ch) {
8781 if (UseTC) {
8782 Stats::inc(Stats::TgtCache_StaticMiss);
8783 Stats::inc(Stats::TgtCache_StaticHit, -1);
8785 HphpArray* map = get_static_locals(fp);
8786 TypedValue* retval = map->nvGet(name); // Local to num
8787 if (!retval) {
8788 // Read the initial value off the stack.
8789 TypedValue tv = *sp;
8790 map->nvSet(name, &tv, false);
8791 retval = map->nvGet(name);
8793 ASSERT(retval);
8794 if (retval->m_type != KindOfRef) {
8795 tvBox(retval);
8797 ASSERT(retval->m_type == KindOfRef);
8798 if (UseTC) {
8799 TypedValue** chTv = (TypedValue**)TargetCache::handleToPtr(ch);
8800 ASSERT(*chTv == NULL);
8801 return (*chTv = retval);
8802 } else {
8803 return retval;
8807 void
8808 TranslatorX64::emitCallStaticLocHelper(X64Assembler& as,
8809 const NormalizedInstruction& i,
8810 ScratchReg& output,
8811 TargetCache::CacheHandle ch) {
8812 // The helper is going to read the value from memory, so record it. We
8813 // could also pass type/value as parameters, but this is hopefully a
8814 // rare path.
8815 m_regMap.cleanLoc(i.inputs[0]->location);
8816 if (false) { // typecheck
8817 StringData* sd = NULL;
8818 ActRec* fp = NULL;
8819 TypedValue* sp = NULL;
8820 sp = staticLocHelper<true>(sd, fp, sp, ch);
8821 sp = staticLocHelper<false>(sd, fp, sp, ch);
8823 const StringData* name = curFunc()->unit()->lookupLitstrId(i.imm[1].u_SA);
8824 ASSERT(name->isStatic());
8825 if (ch) {
8826 EMIT_CALL(as, (TCA)staticLocHelper<true>, IMM(uintptr_t(name)), R(rVmFp),
8827 RPLUS(rVmSp, -cellsToBytes(i.stackOff)), IMM(ch));
8828 } else {
8829 EMIT_CALL(as, (TCA)staticLocHelper<false>, IMM(uintptr_t(name)), R(rVmFp),
8830 RPLUS(rVmSp, -cellsToBytes(i.stackOff)));
8832 recordCall(as, i);
8833 emitMovRegReg(as, rax, *output);
8836 void
8837 TranslatorX64::translateStaticLocInit(const Tracelet& t,
8838 const NormalizedInstruction& i) {
8839 using namespace TargetCache;
8840 ScratchReg output(m_regMap);
8841 const Location& outLoc = i.outLocal->location;
8843 // Closures and generators from closures don't satisfy the "one
8844 // static per source location" rule that the inline fastpath
8845 // requires
8846 if (!curFunc()->isClosureBody() &&
8847 !curFunc()->isGeneratorFromClosure()) {
8848 // Miss path explicitly decrements.
8849 Stats::emitInc(a, Stats::TgtCache_StaticHit);
8850 Stats::emitInc(a, Stats::Tx64_StaticLocFast);
8852 CacheHandle ch = allocStatic();
8853 ASSERT(ch);
8854 a. load_reg64_disp_reg64(rVmTl, ch, *output);
8855 a. test_reg64_reg64(*output, *output);
8857 UnlikelyIfBlock<CC_Z> fooey(a, astubs);
8858 emitCallStaticLocHelper(astubs, i, output, ch);
8860 } else {
8861 Stats::emitInc(a, Stats::Tx64_StaticLocSlow);
8862 emitCallStaticLocHelper(a, i, output, 0);
8864 // Now we've got the outer variant in *output. Get the address of the
8865 // inner cell, since that's the enregistered representation of a variant.
8866 emitDeref(a, *output, *output);
8867 emitIncRef(*output, KindOfRef);
8868 // Turn output into the local we just initialized.
8869 m_regMap.bindScratch(output, outLoc, KindOfRef, RegInfo::DIRTY);
8872 void
8873 TranslatorX64::analyzeVerifyParamType(Tracelet& t, NormalizedInstruction& i) {
8874 int param = i.imm[0].u_IVA;
8875 const TypeConstraint& tc = curFunc()->params()[param].typeConstraint();
8876 if (!tc.isObject()) {
8877 // We are actually using the translation-time value of this local as a
8878 // prediction; if the param check failed at compile-time, we predict it
8879 // will continue failing.
8880 bool compileTimeCheck = tc.check(frame_local(curFrame(), param), curFunc());
8881 i.m_txFlags = nativePlan(compileTimeCheck);
8882 i.manuallyAllocInputs = true;
8883 } else {
8884 bool trace = i.inputs[0]->isObject() ||
8885 (i.inputs[0]->isNull() && tc.nullable());
8886 i.m_txFlags = supportedPlan(trace);
8890 static void
8891 VerifyParamTypeFail(int paramNum) {
8892 VMRegAnchor _;
8893 const ActRec* ar = curFrame();
8894 const Func* func = ar->m_func;
8895 const TypeConstraint& tc = func->params()[paramNum].typeConstraint();
8896 ASSERT(tc.isObject());
8897 TypedValue* tv = frame_local(ar, paramNum);
8898 TRACE(3, "%s Obj %s, needs type %s\n",
8899 __func__,
8900 tv->m_data.pobj->getVMClass()->name()->data(),
8901 tc.typeName()->data());
8902 tc.verifyFail(func, paramNum, tv);
8905 // check class hierarchy and fail if no match
8906 static uint64_t
8907 VerifyParamTypeSlow(const Class* cls, const Class* constraint) {
8908 Stats::inc(Stats::Tx64_VerifyParamTypeSlow);
8909 Stats::inc(Stats::Tx64_VerifyParamTypeFast, -1);
8911 // ensure C++ returns a 0 or 1 with upper bits zeroed
8912 return static_cast<uint64_t>(constraint && cls->classof(constraint));
8915 void
8916 TranslatorX64::translateVerifyParamType(const Tracelet& t,
8917 const NormalizedInstruction& i) {
8918 Stats::emitInc(a, Stats::Tx64_VerifyParamTypeFast);
8920 int param = i.imm[0].u_IVA;
8921 const TypeConstraint& tc = curFunc()->params()[param].typeConstraint();
8922 // not quite a nop. The guards should have verified that the m_type field
8923 // is compatible, but for objects we need to go one step further and
8924 // ensure that we're dealing with the right class.
8925 // NULL inputs only get traced when constraint is nullable.
8926 ASSERT(i.inputs.size() == 1);
8927 if (!i.inputs[0]->isObject()) return; // nop.
8929 // Get the input's class from ObjectData->m_cls
8930 const Location& in = i.inputs[0]->location;
8931 PhysReg src = getReg(in);
8932 ScratchReg inCls(m_regMap);
8933 if (i.inputs[0]->rtt.isVariant()) {
8934 emitDeref(a, src, *inCls);
8935 a. load_reg64_disp_reg64(*inCls, ObjectData::getVMClassOffset(), *inCls);
8936 } else {
8937 a. load_reg64_disp_reg64(src, ObjectData::getVMClassOffset(), *inCls);
8940 ScratchReg cls(m_regMap);
8941 // Constraint may not be in the class-hierarchy of the method being traced,
8942 // look up the class handle and emit code to put the Class* into a reg.
8943 if (!tc.isSelf() && !tc.isParent()) {
8944 const StringData* clsName = tc.typeName();
8945 using namespace TargetCache;
8946 CacheHandle ch = allocKnownClass(clsName);
8947 a. load_reg64_disp_reg64(rVmTl, ch, *cls);
8948 } else {
8949 const Class *constraint = NULL;
8950 if (tc.isSelf()) {
8951 tc.selfToClass(curFunc(), &constraint);
8952 } else if (tc.isParent()) {
8953 tc.parentToClass(curFunc(), &constraint);
8955 emitImmReg(a, uintptr_t(constraint), *cls);
8957 // Compare this class to the incoming object's class. If the typehint's class
8958 // is not present, can not be an instance: fail
8959 a. cmp_reg64_reg64(*inCls, *cls);
8962 JccBlock<CC_Z> subclassCheck(a);
8963 // Call helper since ObjectData::instanceof is a member function
8964 if (false) {
8965 Class* cls = NULL;
8966 Class* constraint = NULL;
8967 VerifyParamTypeSlow(cls, constraint);
8969 EMIT_CALL(a, VerifyParamTypeSlow, R(*inCls), R(*cls));
8970 // Pin the return value, check if a match or take slow path
8971 a. test_reg64_reg64(rax, rax);
8973 // Put the failure path into astubs
8975 UnlikelyIfBlock<CC_Z> fail(a, astubs);
8976 if (false) { // typecheck
8977 VerifyParamTypeFail(param);
8979 EMIT_CALL(astubs, VerifyParamTypeFail, IMM(param));
8980 recordReentrantStubCall(i);
8985 void
8986 TranslatorX64::analyzeInstanceOfD(Tracelet& t, NormalizedInstruction& i) {
8987 ASSERT(i.inputs.size() == 1);
8988 ASSERT(i.outStack && !i.outLocal);
8989 i.m_txFlags = planHingesOnRefcounting(i.inputs[0]->outerType());
8992 // check class hierarchy and fail if no match
8993 static uint64_t
8994 InstanceOfDSlow(const Class* cls, const Class* constraint) {
8995 Stats::inc(Stats::Tx64_InstanceOfDSlow);
8996 Stats::inc(Stats::Tx64_InstanceOfDFast, -1);
8998 // ensure C++ returns a 0 or 1 with upper bits zeroed
8999 return static_cast<uint64_t>(constraint && cls->classof(constraint));
9002 void
9003 TranslatorX64::translateInstanceOfD(const Tracelet& t,
9004 const NormalizedInstruction& i) {
9005 Stats::emitInc(a, Stats::Tx64_InstanceOfDFast);
9006 ASSERT(i.inputs.size() == 1);
9007 ASSERT(i.outStack && !i.outLocal);
9009 DynLocation* input0 = i.inputs[0];
9010 bool input0IsLoc = input0->isLocal();
9011 DataType type = input0->valueType();
9012 PhysReg srcReg;
9013 ScratchReg result(m_regMap);
9014 LazyScratchReg srcScratch(m_regMap);
9015 TCA patchAddr = NULL;
9016 boost::scoped_ptr<DiamondReturn> retFromNullThis;
9018 if (i.grouped && (i.prev->op() == OpThis || i.prev->op() == OpBareThis)) {
9019 srcScratch.alloc();
9020 srcReg = *srcScratch;
9021 a. load_reg64_disp_reg64(rVmFp, AROFF(m_this), srcReg);
9022 if (i.prev->op() == OpThis) {
9023 ASSERT(i.prev->guardedThis);
9024 } else {
9025 if (i.prev->imm[0].u_OA) {
9026 retFromNullThis.reset(new DiamondReturn);
9027 a. test_imm32_reg64(1, srcReg);
9029 UnlikelyIfBlock<CC_NZ> ifNull(a, astubs, retFromNullThis.get());
9030 EMIT_CALL(astubs, warnNullThis);
9031 recordReentrantStubCall(i);
9032 emitImmReg(astubs, false, *result);
9034 } else {
9035 emitImmReg(a, false, *result);
9036 a. test_imm32_reg64(1, srcReg);
9037 patchAddr = a.code.frontier;
9038 a. jcc(CC_NZ, patchAddr);
9041 input0IsLoc = true; // we dont want a decRef
9042 type = KindOfObject;
9043 } else {
9044 srcReg = getReg(input0->location);
9047 if (type != KindOfObject) {
9048 // All non-object inputs are not instances
9049 if (!input0IsLoc) {
9050 ASSERT(!input0->isVariant());
9051 emitDecRef(i, srcReg, type);
9053 emitImmReg(a, false, *result);
9055 } else {
9056 // Get the input's class from ObjectData->m_cls
9057 ScratchReg inCls(m_regMap);
9058 if (input0->rtt.isVariant()) {
9059 ASSERT(input0IsLoc);
9060 emitDeref(a, srcReg, *inCls);
9061 a. load_reg64_disp_reg64(*inCls, ObjectData::getVMClassOffset(), *inCls);
9062 } else {
9063 a. load_reg64_disp_reg64(srcReg, ObjectData::getVMClassOffset(), *inCls);
9065 if (!input0IsLoc) {
9066 emitDecRef(i, srcReg, type);
9069 // Set result to true for now. If take slow path, use its return val
9070 emitImmReg(a, true, *result);
9071 ScratchReg cls(m_regMap);
9072 // Constraint may not be in the class-hierarchy of the method being traced,
9073 // look up the class handle and emit code to put the Class* into a reg.
9074 using namespace TargetCache;
9075 int param = i.imm[0].u_SA;
9076 const StringData* clsName = curUnit()->lookupLitstrId(param);
9077 CacheHandle ch = allocKnownClass(clsName);
9078 a. load_reg64_disp_reg64(rVmTl, ch, *cls);
9079 // Compare this class to the incoming object's class. If the typehint's
9080 // class is not present, can not be an instance: fail
9081 a. cmp_reg64_reg64(*inCls, *cls);
9084 UnlikelyIfBlock<CC_NZ> subclassCheck(a, astubs);
9085 // Call helper since ObjectData::instanceof is a member function
9086 if (false) {
9087 Class* cls = NULL;
9088 Class* constraint = NULL;
9089 InstanceOfDSlow(cls, constraint);
9091 EMIT_CALL(astubs, InstanceOfDSlow, R(*inCls), R(*cls));
9092 astubs. mov_reg32_reg32(rax, *result);
9095 if (patchAddr) {
9096 a. patchJcc(patchAddr, a.code.frontier);
9098 retFromNullThis.reset();
9100 // Bind result and destination
9101 m_regMap.bindScratch(result, i.outStack->location, i.outStack->outerType(),
9102 RegInfo::DIRTY);
9105 void
9106 TranslatorX64::analyzeIterInit(Tracelet& t, NormalizedInstruction& ni) {
9107 DataType inType = ni.inputs[0]->valueType();
9108 ni.m_txFlags = supportedPlan(inType == KindOfArray || inType == KindOfObject);
9111 void
9112 TranslatorX64::translateIterInit(const Tracelet& t,
9113 const NormalizedInstruction& ni) {
9114 ASSERT(ni.inputs.size() == 1);
9115 ASSERT(!ni.outStack && !ni.outLocal);
9116 DynLocation* in = ni.inputs[0];
9117 ASSERT(in->outerType() != KindOfRef);
9118 SKTRACE(1, ni.source, "IterInit: committed to translation\n");
9119 PhysReg src = getReg(in->location);
9120 SrcKey taken, notTaken;
9121 branchDests(t, ni, &taken, &notTaken, 1 /* immIdx */);
9122 Location iterLoc(Location::Iter, ni.imm[0].u_IVA);
9123 switch (in->valueType()) {
9124 case KindOfArray: {
9125 if (false) { // typecheck
9126 Iter *dest = NULL;
9127 HphpArray *arr = NULL;
9128 new_iter_array(dest, arr);
9130 EMIT_RCALL(a, ni, new_iter_array, A(iterLoc), R(src));
9131 break;
9133 case KindOfObject: {
9134 if (false) { // typecheck
9135 Iter *dest = NULL;
9136 ObjectData *obj = NULL;
9137 Class *ctx = NULL;
9138 new_iter_object(dest, obj, ctx);
9140 Class* ctx = arGetContextClass(curFrame());
9141 EMIT_RCALL(a, ni, new_iter_object, A(iterLoc), R(src), IMM((uintptr_t)ctx));
9142 break;
9144 default: not_reached();
9146 syncOutputs(t); // Ends BB
9147 // If a new iterator is created, new_iter_* will not adjust the refcount of
9148 // the input. If a new iterator is not created, new_iter_* will decRef the
9149 // input for us. new_iter_* returns 0 if an iterator was not created,
9150 // otherwise it returns 1.
9151 a. test_reg64_reg64(rax, rax);
9152 emitCondJmp(taken, notTaken, CC_Z);
9155 void
9156 TranslatorX64::analyzeIterValueC(Tracelet& t, NormalizedInstruction& i) {
9157 i.m_txFlags = supportedPlan(
9158 i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
9159 i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
9162 void
9163 TranslatorX64::translateIterValueC(const Tracelet& t,
9164 const NormalizedInstruction& i) {
9165 ASSERT(i.inputs.size() == 1);
9166 ASSERT(i.inputs[0]->rtt.isIter());
9168 Location outLoc;
9169 Iter::Type iterType = i.inputs[0]->rtt.iterType();
9170 typedef void (*IterValueC)(Iter*, TypedValue*);
9171 IterValueC ivc;
9172 if (i.outStack) {
9173 outLoc = i.outStack->location;
9174 ivc = (iterType == Iter::TypeArray)
9175 ? iter_value_cell_array : iter_value_cell_iterator;
9176 } else {
9177 outLoc = i.outLocal->location;
9178 ivc = (iterType == Iter::TypeArray)
9179 ? iter_value_cell_local_array : iter_value_cell_local_iterator;
9181 EMIT_RCALL(a, i, ivc, A(i.inputs[0]->location), A(outLoc));
9182 m_regMap.invalidate(outLoc);
9185 void
9186 TranslatorX64::analyzeIterKey(Tracelet& t, NormalizedInstruction& i) {
9187 i.m_txFlags = supportedPlan(
9188 i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
9189 i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
9192 void
9193 TranslatorX64::translateIterKey(const Tracelet& t,
9194 const NormalizedInstruction& i) {
9195 ASSERT(i.inputs.size() == 1);
9196 ASSERT(i.inputs[0]->rtt.isIter());
9198 Location outLoc;
9199 Iter::Type iterType = i.inputs[0]->rtt.iterType();
9200 typedef void (*IterKey)(Iter*, TypedValue*);
9201 IterKey ik;
9202 if (i.outStack) {
9203 outLoc = i.outStack->location;
9204 ik = (iterType == Iter::TypeArray)
9205 ? iter_key_cell_array : iter_key_cell_iterator;
9206 } else {
9207 outLoc = i.outLocal->location;
9208 ik = (iterType == Iter::TypeArray)
9209 ? iter_key_cell_local_array : iter_key_cell_local_iterator;
9211 EMIT_RCALL(a, i, ik, A(i.inputs[0]->location), A(outLoc));
9212 m_regMap.invalidate(outLoc);
9215 void
9216 TranslatorX64::analyzeIterNext(Tracelet& t, NormalizedInstruction& i) {
9217 ASSERT(i.inputs.size() == 1);
9218 i.m_txFlags = supportedPlan(
9219 i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
9220 i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
9223 void
9224 TranslatorX64::translateIterNext(const Tracelet& t,
9225 const NormalizedInstruction& i) {
9226 ASSERT(i.inputs.size() == 1);
9227 ASSERT(!i.outStack && !i.outLocal);
9228 ASSERT(i.inputs[0]->rtt.isIter());
9230 if (false) { // type check
9231 Iter* it = NULL;
9232 int64 ret = iter_next_array(it);
9233 if (ret) printf("\n");
9235 m_regMap.cleanAll(); // input might be in-flight
9236 // If the iterator reaches the end, iter_next_array will handle
9237 // freeing the iterator and it will decRef the array
9238 EMIT_CALL(a, iter_next_array, A(i.inputs[0]->location));
9239 recordReentrantCall(a, i);
9240 ScratchReg raxScratch(m_regMap, rax);
9242 // syncOutputs before we handle the branch.
9243 syncOutputs(t);
9244 SrcKey taken, notTaken;
9245 branchDests(t, i, &taken, &notTaken, 1 /* destImmIdx */);
9247 a. test_reg64_reg64(rax, rax);
9248 emitCondJmp(taken, notTaken, CC_NZ);
9251 // PSEUDOINSTR_DISPATCH is a switch() fragment that routes opcodes to their
9252 // shared handlers, as per the PSEUDOINSTRS macro.
9253 #define PSEUDOINSTR_DISPATCH(func) \
9254 case OpBitAnd: \
9255 case OpBitOr: \
9256 case OpBitXor: \
9257 case OpSub: \
9258 case OpMul: \
9259 func(BinaryArithOp, t, i) \
9260 case OpSame: \
9261 case OpNSame: \
9262 func(SameOp, t, i) \
9263 case OpEq: \
9264 case OpNeq: \
9265 func(EqOp, t, i) \
9266 case OpLt: \
9267 case OpLte: \
9268 case OpGt: \
9269 case OpGte: \
9270 func(LtGtOp, t, i) \
9271 case OpEmptyL: \
9272 case OpCastBool: \
9273 func(UnaryBooleanOp, t, i) \
9274 case OpJmpZ: \
9275 case OpJmpNZ: \
9276 func(BranchOp, t, i) \
9277 case OpSetL: \
9278 case OpBindL: \
9279 func(AssignToLocalOp, t, i) \
9280 case OpFPassC: \
9281 case OpFPassCW: \
9282 case OpFPassCE: \
9283 func(FPassCOp, t, i) \
9284 case OpFPushCuf: \
9285 case OpFPushCufF: \
9286 case OpFPushCufSafe: \
9287 func(FPushCufOp, t, i) \
9288 case OpIssetL: \
9289 case OpIsNullL: \
9290 case OpIsStringL: \
9291 case OpIsArrayL: \
9292 case OpIsIntL: \
9293 case OpIsObjectL: \
9294 case OpIsBoolL: \
9295 case OpIsDoubleL: \
9296 case OpIsNullC: \
9297 case OpIsStringC: \
9298 case OpIsArrayC: \
9299 case OpIsIntC: \
9300 case OpIsObjectC: \
9301 case OpIsBoolC: \
9302 case OpIsDoubleC: \
9303 func(CheckTypeOp, t, i)
9305 void
9306 TranslatorX64::analyzeInstr(Tracelet& t,
9307 NormalizedInstruction& i) {
9308 const Opcode op = i.op();
9309 switch (op) {
9310 #define CASE(iNm) \
9311 case Op ## iNm: { \
9312 analyze ## iNm(t, i); \
9313 } break;
9314 #define ANALYZE(a, b, c) analyze ## a(b, c); break;
9315 INSTRS
9316 PSEUDOINSTR_DISPATCH(ANALYZE)
9318 #undef ANALYZE
9319 #undef CASE
9320 default: {
9321 ASSERT(i.m_txFlags == Interp);
9324 SKTRACE(1, i.source, "translation plan: %x\n", i.m_txFlags);
9327 bool
9328 TranslatorX64::dontGuardAnyInputs(Opcode op) {
9329 switch (op) {
9330 #define CASE(iNm) case Op ## iNm:
9331 #define NOOP(a, b, c)
9332 INSTRS
9333 PSEUDOINSTR_DISPATCH(NOOP)
9334 return false;
9336 return true;
9337 #undef NOOP
9338 #undef CASE
9342 void TranslatorX64::emitOneGuard(const Tracelet& t,
9343 const NormalizedInstruction& i,
9344 PhysReg reg, int disp, DataType type,
9345 TCA &sideExit) {
9346 bool isFirstInstr = (&i == t.m_instrStream.first);
9347 emitTypeCheck(a, type, reg, disp);
9348 if (isFirstInstr) {
9349 SrcRec& srcRec = *getSrcRec(t.m_sk);
9350 // If it's the first instruction, we haven't made any forward
9351 // progress yet, so this is really a tracelet-level guard rather
9352 // than a side exit. If we tried to "side exit", we'd come right
9353 // back to this check!
9355 // We need to record this as a fallback branch.
9356 emitFallbackJmp(srcRec);
9357 } else if (!sideExit) {
9358 UnlikelyIfBlock<CC_NZ> branchToSideExit(a, astubs);
9359 sideExit = astubs.code.frontier;
9360 emitSideExit(astubs, i, false /*next*/);
9361 } else {
9362 a. jnz(sideExit);
9366 // Emit necessary guards for variants and pseudo-main locals before instr i.
9367 // For HHIR, this only inserts guards for pseudo-main locals. Variants are
9368 // guarded in a different way.
9369 void
9370 TranslatorX64::emitVariantGuards(const Tracelet& t,
9371 const NormalizedInstruction& i) {
9372 bool pseudoMain = Translator::liveFrameIsPseudoMain();
9373 bool isFirstInstr = (&i == t.m_instrStream.first);
9374 TCA sideExit = NULL;
9375 const NormalizedInstruction *base = &i;
9376 while (base->grouped) {
9377 base = base->prev;
9378 ASSERT(base);
9380 for (size_t in = 0; in < i.inputs.size(); ++in) {
9381 DynLocation* input = i.inputs[in];
9382 if (!input->isValue()) continue;
9383 bool isRef = input->isVariant() &&
9384 !i.ignoreInnerType &&
9385 input->rtt.innerType() != KindOfInvalid;
9386 bool modifiableLocal = pseudoMain && input->isLocal() &&
9387 !input->rtt.isVagueValue();
9389 if (!modifiableLocal && !isRef) continue;
9391 SKTRACE(1, i.source, "guarding %s: (%s:%d) :: %d!\n",
9392 modifiableLocal ? "pseudoMain local" : "variant inner",
9393 input->location.spaceName(),
9394 input->location.offset,
9395 input->rtt.valueType());
9396 // TODO task 1122807: don't check the inner type if we've already
9397 // checked it and have executed no possibly-aliasing instructions in
9398 // the meanwhile.
9399 if (modifiableLocal) {
9400 if (m_useHHIR) {
9401 RuntimeType& rtt = input->rtt;
9402 JIT::Type::Tag type = JIT::Type::fromRuntimeType(rtt);
9403 if (isFirstInstr) {
9404 m_hhbcTrans->guardTypeLocal(input->location.offset, type);
9405 } else {
9406 m_hhbcTrans->checkTypeLocal(input->location.offset, type);
9408 } else {
9409 PhysReg reg;
9410 int disp;
9411 locToRegDisp(input->location, &reg, &disp);
9412 emitOneGuard(t, *base, reg, disp,
9413 input->rtt.outerType(), sideExit);
9416 if (isRef && !m_useHHIR) {
9417 m_regMap.allocInputReg(i, in);
9418 emitOneGuard(t, *base, getReg(input->location), 0,
9419 input->rtt.innerType(), sideExit);
9424 NormalizedInstruction::OutputUse
9425 NormalizedInstruction::outputIsUsed(DynLocation* output) const {
9426 for (NormalizedInstruction* succ = next;
9427 succ; succ = succ->next) {
9428 for (size_t i = 0; i < succ->inputs.size(); ++i) {
9429 if (succ->inputs[i] == output) {
9430 if (succ->inputWasInferred(i)) {
9431 return OutputInferred;
9433 if (Translator::Get()->dontGuardAnyInputs(succ->op())) {
9434 /* the consumer doesnt care about its inputs
9435 but we may still have inferred something about
9436 its outputs that a later instruction may depend on
9438 if (!outputDependsOnInput(succ->op()) ||
9439 !(succ->outStack && !succ->outStack->rtt.isVagueValue() &&
9440 succ->outputIsUsed(succ->outStack) != OutputUsed) ||
9441 !(succ->outLocal && !succ->outLocal->rtt.isVagueValue() &&
9442 succ->outputIsUsed(succ->outLocal)) != OutputUsed) {
9443 return OutputDoesntCare;
9446 return OutputUsed;
9450 return OutputUnused;
9453 void
9454 TranslatorX64::emitPredictionGuards(const NormalizedInstruction& i) {
9455 if (!i.outputPredicted || i.breaksTracelet) return;
9456 NormalizedInstruction::OutputUse u = i.outputIsUsed(i.outStack);
9458 if (m_useHHIR) {
9459 if (u == NormalizedInstruction::OutputUsed ||
9460 u == NormalizedInstruction::OutputInferred) {
9461 JIT::Type::Tag jitType = JIT::Type::fromRuntimeType(i.outStack->rtt);
9462 if (u == NormalizedInstruction::OutputInferred) {
9463 TRACE(1, "HHIR: emitPredictionGuards: output inferred to be %s\n",
9464 JIT::Type::Strings[jitType]);
9465 m_hhbcTrans->assertTypeStack(0, jitType);
9466 } else {
9467 TRACE(1, "HHIR: emitPredictionGuards: output predicted to be %s\n",
9468 JIT::Type::Strings[jitType]);
9469 m_hhbcTrans->checkTypeStack(0, jitType, i.next->offset());
9472 return;
9475 switch (u) {
9476 case NormalizedInstruction::OutputUsed:
9477 break;
9478 case NormalizedInstruction::OutputUnused:
9479 return;
9480 case NormalizedInstruction::OutputInferred:
9481 Stats::emitInc(a, Stats::TC_TypePredOverridden);
9482 return;
9483 case NormalizedInstruction::OutputDoesntCare:
9484 Stats::emitInc(a, Stats::TC_TypePredUnneeded);
9485 return;
9488 ASSERT(i.outStack);
9489 PhysReg base;
9490 int disp;
9491 locToRegDisp(i.outStack->location, &base, &disp);
9492 ASSERT(base == rVmSp);
9493 TRACE(1, "PREDGUARD: %p dt %d offset %d voffset %lld\n",
9494 a.code.frontier, i.outStack->outerType(), disp,
9495 i.outStack->location.offset);
9496 emitTypeCheck(a, i.outStack->outerType(), rVmSp, disp);
9498 UnlikelyIfBlock<CC_NZ> branchToSideExit(a, astubs);
9499 Stats::emitInc(astubs, Stats::TC_TypePredMiss);
9500 emitSideExit(astubs, i, true);
9502 Stats::emitInc(a, Stats::TC_TypePredHit);
9505 static void failedTypePred() {
9506 raise_error("A type prediction was incorrect");
9509 void
9510 TranslatorX64::translateInstrWork(const Tracelet& t,
9511 const NormalizedInstruction& i) {
9512 const Opcode op = i.op();
9513 switch (op) {
9514 #define CASE(iNm) \
9515 case Op ## iNm: \
9516 translate ## iNm(t, i); \
9517 break;
9518 #define TRANSLATE(a, b, c) translate ## a(b, c); break;
9519 INSTRS
9520 PSEUDOINSTR_DISPATCH(TRANSLATE)
9521 #undef TRANSLATE
9522 #undef CASE
9523 default:
9524 ASSERT(false);
9528 void
9529 TranslatorX64::translateInstr(const Tracelet& t,
9530 const NormalizedInstruction& i) {
9532 * translateInstr() translates an individual instruction in a tracelet,
9533 * either by directly emitting machine code for that instruction or by
9534 * emitting a call to the interpreter.
9536 * If the instruction ends the current tracelet, we must emit machine code
9537 * to transfer control to some target that will continue to make forward
9538 * progress. This target may be the beginning of another tracelet, or it may
9539 * be a translator service request. Before transferring control, a tracelet
9540 * must ensure the following invariants hold:
9541 * 1) The machine registers rVmFp and rVmSp are in sync with vmfp()
9542 * and vmsp().
9543 * 2) All "dirty" values are synced in memory. This includes the
9544 * evaluation stack, locals, globals, statics, and any other program
9545 * accessible locations. This also means that all refcounts must be
9546 * up to date.
9548 ASSERT(!m_useHHIR);
9549 ASSERT(!i.outStack || i.outStack->isStack());
9550 ASSERT(!i.outLocal || i.outLocal->isLocal());
9551 const char *opNames[] = {
9552 #define O(name, imm, push, pop, flags) \
9553 #name,
9554 OPCODES
9555 #undef O
9557 SpaceRecorder sr(opNames[i.op()], a);
9558 SKTRACE(1, i.source, "translate %#lx\n", long(a.code.frontier));
9559 const Opcode op = i.op();
9561 TCA start = a.code.frontier;
9562 TCA astart = astubs.code.frontier;
9564 m_regMap.bumpEpoch();
9565 // Allocate the input regs upfront unless instructed otherwise
9566 // or the instruction is interpreted
9567 if (!i.manuallyAllocInputs && i.m_txFlags) {
9568 m_regMap.allocInputRegs(i);
9571 if (debug) {
9572 for (unsigned j = 0; j < i.inputs.size(); j++) {
9573 if (i.inputWasInferred(j)) {
9574 DynLocation* dl = i.inputs[j];
9575 ASSERT(dl->rtt.isValue() &&
9576 !dl->rtt.isVagueValue() &&
9577 dl->outerType() != KindOfInvalid);
9578 PhysReg base;
9579 int disp;
9580 locToRegDisp(dl->location, &base, &disp);
9581 emitTypeCheck(a, dl->rtt.typeCheckValue(), base, disp);
9583 UnlikelyIfBlock<CC_NZ> typePredFailed(a, astubs);
9584 EMIT_CALL(astubs, failedTypePred);
9585 recordReentrantStubCall(i);
9591 if (!i.grouped) {
9592 emitVariantGuards(t, i);
9593 const NormalizedInstruction* n = &i;
9594 while (n->next && n->next->grouped) {
9595 n = n->next;
9596 emitVariantGuards(t, *n);
9600 // Allocate the input regs upfront unless instructed otherwise
9601 // or the instruction is interpreted
9602 if (!i.manuallyAllocInputs && i.m_txFlags) {
9603 m_regMap.allocInputRegs(i);
9606 if (i.m_txFlags == Interp || RuntimeOption::EvalThreadingJit) {
9607 // If the problem is local to this instruction, just call out to
9608 // the interpreter. emitInterpOne will perform end-of-tracelet duties
9609 // if this instruction ends the tracelet.
9610 SKTRACE(1, i.source, "Interp\n");
9611 emitInterpOne(t, i);
9612 } else {
9613 // Actually translate the instruction's body.
9614 Stats::emitIncTranslOp(a, op);
9616 translateInstrWork(t, i);
9619 // Invalidate locations that are no longer live
9620 for (unsigned k = 0; k < i.deadLocs.size(); ++k) {
9621 const Location& l = i.deadLocs[k];
9622 m_regMap.invalidate(l);
9625 emitPredictionGuards(i);
9626 recordBCInstr(op, a, start);
9627 recordBCInstr(op + Op_count, astubs, astart);
9629 if (i.breaksTracelet && !i.changesPC) {
9630 // If this instruction's opcode always ends the tracelet then the
9631 // instruction case is responsible for performing end-of-tracelet
9632 // duties. Otherwise, we handle ending the tracelet here.
9633 syncOutputs(t);
9634 emitBindJmp(t.m_nextSk);
9637 m_regMap.assertNoScratch();
9640 bool
9641 TranslatorX64::checkTranslationLimit(const SrcKey& sk,
9642 const SrcRec& srcRec) const {
9643 if (srcRec.translations().size() == SrcRec::kMaxTranslations) {
9644 INC_TPC(max_trans);
9645 if (debug && Trace::moduleEnabled(Trace::tx64, 2)) {
9646 const vector<TCA>& tns = srcRec.translations();
9647 TRACE(1, "Too many (%ld) translations: %s, BC offset %d\n",
9648 tns.size(), curUnit()->filepath()->data(),
9649 sk.offset());
9650 SKTRACE(2, sk, "{\n", tns.size());
9651 TCA topTrans = srcRec.getTopTranslation();
9652 for (size_t i = 0; i < tns.size(); ++i) {
9653 const TransRec* rec = getTransRec(tns[i]);
9654 ASSERT(rec);
9655 SKTRACE(2, sk, "%d %p\n", i, tns[i]);
9656 if (tns[i] == topTrans) {
9657 SKTRACE(2, sk, "%d: *Top*\n", i);
9659 if (rec->kind == TransAnchor) {
9660 SKTRACE(2, sk, "%d: Anchor\n", i);
9661 } else {
9662 SKTRACE(2, sk, "%d: guards {\n", i);
9663 for (unsigned j = 0; j < rec->dependencies.size(); ++j) {
9664 TRACE(2, rec->dependencies[j]);
9666 SKTRACE(2, sk, "%d } guards\n", i);
9669 SKTRACE(2, sk, "} /* Too many translations */\n");
9671 return true;
9674 return false;
9677 void
9678 TranslatorX64::emitGuardChecks(X64Assembler& a,
9679 const SrcKey& sk,
9680 const ChangeMap& dependencies,
9681 const RefDeps& refDeps,
9682 SrcRec& fail) {
9683 if (Trace::moduleEnabled(Trace::stats, 2)) {
9684 Stats::emitInc(a, Stats::TraceletGuard_enter);
9687 bool pseudoMain = Translator::liveFrameIsPseudoMain();
9689 emitRB(a, RBTypeTraceletGuards, sk);
9690 for (DepMap::const_iterator dep = dependencies.begin();
9691 dep != dependencies.end();
9692 ++dep) {
9693 if (!pseudoMain || !dep->second->isLocal() || !dep->second->isValue()) {
9694 checkType(a, dep->first, dep->second->rtt, fail);
9695 } else {
9696 TRACE(3, "Skipping tracelet guard for %s %d\n",
9697 dep->second->location.pretty().c_str(),
9698 (int)dep->second->rtt.outerType());
9702 checkRefs(a, sk, refDeps, fail);
9704 if (Trace::moduleEnabled(Trace::stats, 2)) {
9705 Stats::emitInc(a, Stats::TraceletGuard_execute);
9710 void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
9711 if (!debug) return;
9713 const SrcKey& sk = t.m_sk;
9715 TRACE(3, "----------------------------------------------\n");
9716 TRACE(3, " Translating from file %s:%d %s at %p:\n",
9717 curUnit()->filepath()->data(),
9718 curUnit()->getLineNumber(sk.offset()),
9719 curFunc()->name()->data(),
9720 postGuards);
9721 TRACE(3, " preconds:\n");
9722 TRACE(3, " types:\n");
9723 for (DepMap::const_iterator i = t.m_dependencies.begin();
9724 i != t.m_dependencies.end(); ++i) {
9725 TRACE(3, " %-5s\n", i->second->pretty().c_str());
9727 if (t.m_refDeps.size() != 0) {
9728 TRACE(3, " refs:\n");
9729 for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
9730 i != t.m_refDeps.m_arMap.end();
9731 ++i) {
9732 TRACE(3, " (ActRec %lld : %-5s)\n", i->first,
9733 i->second.pretty().c_str());
9736 TRACE(3, " postconds:\n");
9737 for (ChangeMap::const_iterator i = t.m_changes.begin();
9738 i != t.m_changes.end(); ++i) {
9739 TRACE(3, " %-5s\n", i->second->pretty().c_str());
9741 for (SrcKey traceKey(t.m_sk);
9742 traceKey != t.m_nextSk;
9743 traceKey.advance(curUnit())) {
9744 string s = instrToString(
9745 curUnit()->at(traceKey.offset()), curUnit());
9746 TRACE(3, " %6d: %s\n", traceKey.offset(), s.c_str());
9748 TRACE(3, "----------------------------------------------\n");
9749 if (Trace::moduleEnabled(Trace::tx64, 5)) {
9750 // prettyStack() expects to use vmpc(). Leave it in the state we
9751 // found it since this code is debug-only, and we don't want behavior
9752 // to vary across the optimized/debug builds.
9753 PC oldPC = vmpc();
9754 vmpc() = curUnit()->at(sk.offset());
9755 TRACE(3, g_vmContext->prettyStack(string(" tx64 ")));
9756 vmpc() = oldPC;
9757 TRACE(3, "----------------------------------------------\n");
9761 void
9762 TranslatorX64::translateTracelet(const Tracelet& t) {
9763 const SrcKey &sk = t.m_sk;
9765 m_curTrace = &t;
9766 Nuller<Tracelet> ctNuller(&m_curTrace);
9768 SKTRACE(1, sk, "translateTracelet\n");
9769 ASSERT(m_srcDB.find(sk));
9770 ASSERT(m_regMap.pristine());
9771 TCA start = a.code.frontier;
9772 TCA stubStart = astubs.code.frontier;
9773 TCA counterStart = 0;
9774 uint8 counterLen = 0;
9775 SrcRec& srcRec = *getSrcRec(sk);
9776 vector<TransBCMapping> bcMapping;
9778 bool hhirSucceeded = irTranslateTracelet(t, start, stubStart);
9779 if (hhirSucceeded) {
9780 m_irAUsage += (a.code.frontier - start);
9781 m_irAstubsUsage += (astubs.code.frontier - stubStart);
9783 if (!hhirSucceeded) {
9784 ASSERT(m_pendingFixups.size() == 0);
9785 ASSERT(srcRec.inProgressTailJumps().size() == 0);
9786 try {
9787 if (t.m_analysisFailed || checkTranslationLimit(t.m_sk, srcRec)) {
9788 punt();
9791 emitGuardChecks(a, t.m_sk, t.m_dependencies, t.m_refDeps, srcRec);
9792 dumpTranslationInfo(t, a.code.frontier);
9794 // after guards, add a counter for the translation if requested
9795 if (RuntimeOption::EvalJitTransCounters) {
9796 emitTransCounterInc(a);
9799 emitRB(a, RBTypeTraceletBody, t.m_sk);
9800 Stats::emitInc(a, Stats::Instr_TC, t.m_numOpcodes);
9801 recordBCInstr(OpTraceletGuard, a, start);
9803 // Translate each instruction in the tracelet
9804 for (NormalizedInstruction* ni = t.m_instrStream.first; ni; ni = ni->next) {
9805 if (isTransDBEnabled()) {
9806 bcMapping.push_back((TransBCMapping){ni->offset(),
9807 a.code.frontier,
9808 astubs.code.frontier});
9811 m_curNI = ni;
9812 Nuller<NormalizedInstruction> niNuller(&m_curNI);
9813 translateInstr(t, *ni);
9814 ASSERT(ni->source.offset() >= curFunc()->base());
9815 // We sometimes leave the tail of a truncated tracelet in place to aid
9816 // analysis, but breaksTracelet is authoritative.
9817 if (ni->breaksTracelet) break;
9819 } catch (TranslationFailedExc& tfe) {
9820 // The whole translation failed; give up on this BB. Since it is not
9821 // linked into srcDB yet, it is guaranteed not to be reachable.
9822 m_regMap.reset();
9823 // Permanent reset; nothing is reachable yet.
9824 a.code.frontier = start;
9825 astubs.code.frontier = stubStart;
9826 bcMapping.clear();
9827 // Discard any pending fixups.
9828 m_pendingFixups.clear();
9829 srcRec.clearInProgressTailJumps();
9830 TRACE(1, "emitting %d-instr interp request for failed translation @%s:%d\n",
9831 int(t.m_numOpcodes), tfe.m_file, tfe.m_line);
9832 // Add a counter for the translation if requested
9833 if (RuntimeOption::EvalJitTransCounters) {
9834 emitTransCounterInc(a);
9836 a. jmp(
9837 emitServiceReq(REQ_INTERPRET, 2ull, uint64_t(t.m_sk.offset()),
9838 uint64_t(t.m_numOpcodes)));
9839 // Fall through.
9841 } // if (!hhirSucceeded)
9843 for (uint i = 0; i < m_pendingFixups.size(); i++) {
9844 TCA tca = m_pendingFixups[i].m_tca;
9845 ASSERT(isValidCodeAddress(tca));
9846 m_fixupMap.recordFixup(tca, m_pendingFixups[i].m_fixup);
9848 m_pendingFixups.clear();
9850 addTranslation(TransRec(t.m_sk, curUnit()->md5(), t, start,
9851 a.code.frontier - start, stubStart,
9852 astubs.code.frontier - stubStart,
9853 counterStart, counterLen,
9854 bcMapping));
9856 recordGdbTranslation(sk, curUnit(), a, start,
9857 false, false);
9858 recordGdbTranslation(sk, curUnit(), astubs, stubStart,
9859 false, false);
9860 // SrcRec::newTranslation() makes this code reachable. Do this last;
9861 // otherwise there's some chance of hitting in the reader threads whose
9862 // metadata is not yet visible.
9863 TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n", start, sk.m_funcId,
9864 sk.m_offset);
9865 srcRec.newTranslation(a, astubs, start);
9866 m_regMap.reset();
9867 TRACE(1, "tx64: %zd-byte tracelet\n", a.code.frontier - start);
9868 if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
9869 Trace::traceRelease(getUsage().c_str());
9873 static const size_t kASize = 512 << 20;
9874 static const size_t kAStubsSize = 512 << 20;
9875 static const size_t kGDataSize = kASize / 4;
9876 static const size_t kTotalSize = kASize + kAStubsSize +
9877 kTrampolinesBlockSize + kGDataSize;
9878 TranslatorX64::TranslatorX64()
9879 : Translator(),
9880 m_numNativeTrampolines(0),
9881 m_trampolineSize(0),
9882 m_spillFillCode(&a),
9883 m_interceptHelper(0),
9884 m_defClsHelper(0),
9885 m_funcPrologueRedispatch(0),
9886 m_irAUsage(0),
9887 m_irAstubsUsage(0),
9888 m_numHHIRTrans(0),
9889 m_irFactory(NULL),
9890 m_constTable(NULL),
9891 m_traceBuilder(NULL),
9892 m_hhbcTrans(NULL),
9893 m_regMap(kCallerSaved, kCalleeSaved, this),
9894 m_interceptsEnabled(false),
9895 m_unwindRegMap(128),
9896 m_curTrace(0),
9897 m_curNI(0),
9898 m_curFile(NULL),
9899 m_curLine(0),
9900 m_curFunc(NULL),
9901 m_vecState(NULL)
9903 TRACE(1, "TranslatorX64@%p startup\n", this);
9904 tx64 = this;
9906 static_assert(kTotalSize < (2ul << 30),
9907 "Combined size of all code/data blocks in TranslatorX64 "
9908 "must be < 2GiB to support 32-bit relative addresses");
9910 static bool profileUp = false;
9911 if (!profileUp) {
9912 profileInit();
9913 profileUp = true;
9916 // We want to ensure that the block for "a", "astubs",
9917 // "atrampolines", and "m_globalData" are nearby so that we can
9918 // short jump/point between them. Thus we allocate one slab and
9919 // divide it between "a", "astubs", and "atrampolines".
9921 // Using sbrk to ensure its in the bottom 2G, so we avoid
9922 // the need for trampolines, and get to use shorter
9923 // instructions for tc addresses.
9924 static const size_t kRoundUp = 2 << 20;
9925 uint8_t *base = (uint8_t*)sbrk(kTotalSize + kRoundUp - 1);
9926 base += -(uint64_t)base & (kRoundUp - 1);
9927 if (RuntimeOption::EvalMapTCHuge) {
9928 hintHuge(base, kTotalSize);
9930 atrampolines.init(base, kTrampolinesBlockSize);
9931 base += kTrampolinesBlockSize;
9932 a.init(base, kASize);
9933 m_unwindRegistrar = register_unwind_region(base, kTotalSize);
9934 base += kASize;
9935 astubs.init(base, kAStubsSize);
9936 base += kAStubsSize;
9937 m_globalData.init(base, kGDataSize);
9939 // Emit some special helpers that are shared across translations.
9941 // Emit a byte of padding. This is a kind of hacky way to
9942 // avoid hitting an assert in recordGdbStub when we call
9943 // it with m_callToExit - 1 as the start address.
9944 astubs.emitNop(1);
9946 // Call to exit with whatever value the program leaves on
9947 // the return stack.
9948 m_callToExit = emitServiceReq(false, REQ_EXIT, 0ull);
9950 m_retHelper = emitRetFromInterpretedFrame();
9952 moveToAlign(astubs);
9953 m_resumeHelper = astubs.code.frontier;
9954 emitGetGContext(astubs, rax);
9955 astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
9956 rVmFp);
9957 astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
9958 Stack::topOfStackOffset(), rVmSp);
9959 emitServiceReq(false, REQ_RESUME, 0ull);
9961 // Helper for DefCls
9962 if (false) {
9963 PreClass *preClass = 0;
9964 defClsHelper(preClass);
9966 m_defClsHelper = TCA(a.code.frontier);
9967 PhysReg rEC = argNumToRegName[2];
9968 emitGetGContext(a, rEC);
9969 a. store_reg64_disp_reg64(rVmFp, offsetof(VMExecutionContext, m_fp), rEC);
9970 a. store_reg64_disp_reg64(argNumToRegName[1],
9971 offsetof(VMExecutionContext, m_pc), rEC);
9972 // rax holds the up-to-date top of stack pointer
9973 a. store_reg64_disp_reg64(rax,
9974 offsetof(VMExecutionContext, m_stack) +
9975 Stack::topOfStackOffset(), rEC);
9976 a. jmp((TCA)defClsHelper);
9978 moveToAlign(astubs);
9979 m_stackOverflowHelper = astubs.code.frontier;
9980 // We are called from emitStackCheck, with the new stack frame in
9981 // rStashedAR. Get the caller's PC into rdi and save it off.
9982 astubs. load_reg64_disp_reg64(rVmFp, AROFF(m_func), rax);
9983 astubs. load_reg64_disp_reg32(rStashedAR, AROFF(m_soff), rdi);
9984 astubs. load_reg64_disp_reg64(rax, Func::sharedOffset(), rax);
9985 astubs. load_reg64_disp_reg32(rax, Func::sharedBaseOffset(), rax);
9986 astubs. add_reg32_reg32(rax, rdi);
9988 emitEagerVMRegSave(astubs, SaveFP | SavePC);
9989 emitServiceReq(false, REQ_STACK_OVERFLOW, 0ull);
9991 // The decRef helper for when we bring the count down to zero. Callee needs to
9992 // bring the value into rdi. These can be burned in for all time, and for all
9993 // translations.
9994 if (false) { // type-check
9995 StringData* str = NULL;
9996 ArrayData* arr = NULL;
9997 ObjectData* obj = NULL;
9998 RefData* ref = NULL;
9999 tv_release_str(str);
10000 tv_release_arr(arr);
10001 tv_release_obj(obj);
10002 tv_release_ref(ref);
10004 typedef void* vp;
10005 m_dtorStubs[BitwiseKindOfString] = emitUnaryStub(a, vp(tv_release_str));
10006 m_dtorStubs[KindOfArray] = emitUnaryStub(a, vp(tv_release_arr));
10007 m_dtorStubs[KindOfObject] = emitUnaryStub(a, vp(tv_release_obj));
10008 m_dtorStubs[KindOfRef] = emitUnaryStub(a, vp(tv_release_ref));
10009 m_dtorGenericStub = genericRefCountStub(a);
10010 m_dtorGenericStubRegs = genericRefCountStubRegs(a);
10012 if (trustSigSegv) {
10013 // Install SIGSEGV handler for timeout exceptions
10014 struct sigaction sa;
10015 struct sigaction old_sa;
10016 sa.sa_sigaction = &TranslatorX64::SEGVHandler;
10017 sa.sa_flags = SA_SIGINFO;
10018 sigemptyset(&sa.sa_mask);
10019 if (sigaction(SIGSEGV, &sa, &old_sa) != 0) {
10020 throw std::runtime_error(
10021 std::string("Failed to install SIGSEGV handler: ") +
10022 strerror(errno));
10024 m_segvChain = old_sa.sa_flags & SA_SIGINFO ?
10025 old_sa.sa_sigaction : (sigaction_t)old_sa.sa_handler;
10029 // do gdb specific initialization. This has to happen after
10030 // the TranslatorX64 constructor is called, because gdb initialization
10031 // calls backs into TranslatorX64::Get()
10032 void TranslatorX64::initGdb() {
10033 // On a backtrace, gdb tries to locate the calling frame at address
10034 // returnRIP-1. However, for the first VM frame, there is no code at
10035 // returnRIP-1, since the AR was set up manually. For this frame,
10036 // record the tracelet address as starting from callToExit-1, so gdb
10037 // does not barf
10038 recordGdbStub(astubs, m_callToExit - 1, "HHVM::callToExit");
10040 recordBCInstr(OpRetFromInterp, astubs, m_retHelper);
10041 recordGdbStub(astubs, m_retHelper - 1, "HHVM::retHelper");
10042 recordBCInstr(OpResumeHelper, astubs, m_resumeHelper);
10043 recordBCInstr(OpDefClsHelper, a, m_defClsHelper);
10044 recordBCInstr(OpDtorStub, a, m_dtorStubs[BitwiseKindOfString]);
10045 recordGdbStub(a, m_dtorStubs[BitwiseKindOfString],
10046 "HHVM::destructorStub");
10049 TranslatorX64*
10050 TranslatorX64::Get() {
10052 * Called from outrageously early, pre-main code, and will
10053 * allocate the first translator space.
10055 if (!nextTx64) {
10056 nextTx64 = new TranslatorX64();
10057 nextTx64->initGdb();
10059 if (!tx64) {
10060 tx64 = nextTx64;
10062 ASSERT(tx64);
10063 return tx64;
10066 template<int Arity>
10067 TCA TranslatorX64::emitNAryStub(X64Assembler& a, void* fptr) {
10068 BOOST_STATIC_ASSERT((Arity < kNumRegisterArgs));
10070 // The callNAryStub has already saved these regs on a.
10071 RegSet alreadySaved;
10072 for (size_t i = 0; i < Arity; ++i) {
10073 alreadySaved |= RegSet(argNumToRegName[i]);
10077 * We've made a call instruction, and pushed Arity args on the
10078 * stack. So the stack address will be odd coming into the stub if
10079 * Arity + 1 (for the call) is odd. We need to correct for this
10080 * when saving other registers below to keep SSE-friendly alignment
10081 * of the stack.
10083 const int Parity = (Arity + 1) % 2;
10085 // These dtor stubs are meant to be called with the call
10086 // instruction, unlike most translator code.
10087 moveToAlign(a);
10088 TCA start = a.code.frontier;
10090 * Preserve most caller-saved regs. The calling code has already
10091 * preserved regs in `alreadySaved'; we push the rest of the caller
10092 * saved regs and rbp. It should take 9 qwords in total, and the
10093 * incoming call instruction made it 10. This is an even number of
10094 * pushes, so we preserve the SSE-friendliness of our execution
10095 * environment (without real intervention from PhysRegSaverParity).
10097 * Note that we don't need to clean all registers because the only
10098 * reason we could need those locations written back is if stack
10099 * unwinding were to happen. These stubs can re-enter due to user
10100 * destructors, but exceptions are not allowed to propagate out of
10101 * those, so it's not a problem.
10103 a. pushr(rbp); // {
10104 a. mov_reg64_reg64(rsp, rbp);
10106 RegSet s = kCallerSaved - alreadySaved;
10107 PhysRegSaverParity<Parity> rs(a, s);
10108 emitCall(a, TCA(fptr));
10110 a. popr(rbp); // }
10111 a. ret();
10112 return start;
10115 TCA TranslatorX64::emitUnaryStub(X64Assembler& a, void* fptr) {
10116 return emitNAryStub<1>(a, fptr);
10119 TCA TranslatorX64::emitBinaryStub(X64Assembler& a, void* fptr) {
10120 return emitNAryStub<2>(a, fptr);
10124 * Both callUnaryStubImpl and callBinaryStub assume that the stub they
10125 * are calling cannot throw an exception.
10128 template <bool reentrant>
10129 void
10130 TranslatorX64::callUnaryStubImpl(X64Assembler& a,
10131 const NormalizedInstruction& i,
10132 TCA stub, PhysReg arg, int disp/*=0*/) {
10133 // Call the generic dtor stub. They all take one arg.
10134 a. pushr(rdi);
10135 if (arg == rsp) {
10136 // Account for pushing rdi.
10137 disp += 8;
10139 if (disp == 0) {
10140 emitMovRegReg(a, arg, rdi);
10141 } else {
10142 a. lea_reg64_disp_reg64(arg, disp, rdi);
10144 ASSERT(isValidCodeAddress(stub));
10145 emitCall(a, stub);
10146 recordCallImpl<reentrant>(a, i);
10147 a. popr(rdi);
10150 void
10151 TranslatorX64::callBinaryStub(X64Assembler& a, const NormalizedInstruction& i,
10152 TCA stub, PhysReg arg1, PhysReg arg2) {
10153 a. pushr(rdi);
10154 a. pushr(rsi);
10156 // We need to be careful not to clobber our arguments when moving
10157 // them into the appropriate registers. (If we ever need ternary
10158 // stubs, this should probably be converted to use ArgManager.)
10159 if (arg2 == rdi && arg1 == rsi) {
10160 a. xchg_reg64_reg64(rdi, rsi);
10161 } else if (arg2 == rdi) {
10162 emitMovRegReg(a, arg2, rsi);
10163 emitMovRegReg(a, arg1, rdi);
10164 } else {
10165 emitMovRegReg(a, arg1, rdi);
10166 emitMovRegReg(a, arg2, rsi);
10169 ASSERT(isValidCodeAddress(stub));
10170 emitCall(a, stub);
10171 recordReentrantCall(a, i);
10172 a. popr(rsi);
10173 a. popr(rdi);
10176 namespace {
10178 struct DeferredFileInvalidate : public DeferredWorkItem {
10179 Eval::PhpFile* m_f;
10180 DeferredFileInvalidate(Eval::PhpFile* f) : m_f(f) {
10181 TRACE(2, "DeferredFileInvalidate @ %p, m_f %p\n", this, m_f); }
10182 void operator()() {
10183 TRACE(2, "DeferredFileInvalidate: Firing @ %p , m_f %p\n", this, m_f);
10184 tx64->invalidateFileWork(m_f);
10188 struct DeferredPathInvalidate : public DeferredWorkItem {
10189 const std::string m_path;
10190 DeferredPathInvalidate(const std::string& path) : m_path(path) {
10191 ASSERT(m_path.size() >= 1 && m_path[0] == '/');
10193 void operator()() {
10194 String spath(m_path);
10196 * inotify saw this path change. Now poke the file repository;
10197 * it will notice the underlying PhpFile* has changed, and notify
10198 * us via ::invalidateFile.
10200 * We don't actually need to *do* anything with the PhpFile* from
10201 * this lookup; since the path has changed, the file we'll get out is
10202 * going to be some new file, not the old file that needs invalidation.
10204 UNUSED Eval::PhpFile* f =
10205 g_vmContext->lookupPhpFile(spath.get(), "");
10206 // We don't keep around the extra ref.
10207 if (f) f->decRefAndDelete();
10213 void
10214 TranslatorX64::requestInit() {
10215 TRACE(1, "in requestInit(%ld)\n", g_vmContext->m_currentThreadIdx);
10216 tl_regState = REGSTATE_CLEAN;
10217 PendQ::drain();
10218 requestResetHighLevelTranslator();
10219 Treadmill::startRequest(g_vmContext->m_currentThreadIdx);
10220 memset(&s_perfCounters, 0, sizeof(s_perfCounters));
10221 initUnlikelyProfile();
10224 void
10225 TranslatorX64::requestExit() {
10226 if (s_writeLease.amOwner()) {
10227 s_writeLease.drop();
10229 TRACE_MOD(txlease, 2, "%lx write lease stats: %15lld kept, %15lld grabbed\n",
10230 pthread_self(), s_writeLease.m_hintKept,
10231 s_writeLease.m_hintGrabbed);
10232 PendQ::drain();
10233 Treadmill::finishRequest(g_vmContext->m_currentThreadIdx);
10234 TRACE(1, "done requestExit(%ld)\n", g_vmContext->m_currentThreadIdx);
10235 Stats::dump();
10236 Stats::clear();
10237 dumpUnlikelyProfile();
10239 if (Trace::moduleEnabledRelease(Trace::tx64stats, 1)) {
10240 Trace::traceRelease("TranslatorX64 perf counters for %s:\n",
10241 g_context->getRequestUrl(50).c_str());
10242 for (int i = 0; i < tpc_num_counters; i++) {
10243 Trace::traceRelease("%-20s %10lld\n",
10244 kPerfCounterNames[i], s_perfCounters[i]);
10246 Trace::traceRelease("\n");
10250 bool
10251 TranslatorX64::isPseudoEvent(const char* event) {
10252 for (int i = 0; i < tpc_num_counters; i++) {
10253 if (!strcmp(event, kPerfCounterNames[i])) {
10254 return true;
10257 return false;
10260 void
10261 TranslatorX64::getPerfCounters(Array& ret) {
10262 for (int i = 0; i < tpc_num_counters; i++) {
10263 // Until Perflab can automatically scale the values we give it to
10264 // an appropriate range, we have to fudge these numbers so they
10265 // look more like reasonable hardware counter values.
10266 ret.set(kPerfCounterNames[i], s_perfCounters[i] * 1000);
10270 TranslatorX64::~TranslatorX64() {
10271 freeSlab(atrampolines.code.base, kTotalSize);
10274 static Debug::TCRange rangeFrom(const X64Assembler& a, const TCA addr,
10275 bool isAstubs) {
10276 ASSERT(a.code.isValidAddress(addr));
10277 return Debug::TCRange(addr, a.code.frontier, isAstubs);
10280 void TranslatorX64::recordBCInstr(uint32_t op,
10281 const X64Assembler& a,
10282 const TCA addr) {
10283 if (addr != a.code.frontier) {
10284 m_debugInfo.recordBCInstr(Debug::TCRange(addr, a.code.frontier,
10285 &a == &astubs ? true : false), op);
10289 void TranslatorX64::recordGdbTranslation(const SrcKey& sk,
10290 const Unit* srcUnit,
10291 const X64Assembler& a,
10292 const TCA start,
10293 bool exit,
10294 bool inPrologue) {
10295 if (start != a.code.frontier && !RuntimeOption::EvalJitNoGdb) {
10296 ASSERT(s_writeLease.amOwner());
10297 m_debugInfo.recordTracelet(rangeFrom(a, start,
10298 &a == &astubs ? true : false),
10299 srcUnit,
10300 srcUnit->at(sk.offset()),
10301 exit, inPrologue);
10305 void TranslatorX64::recordGdbStub(const X64Assembler& a,
10306 const TCA start, const char* name) {
10307 if (!RuntimeOption::EvalJitNoGdb) {
10308 m_debugInfo.recordStub(rangeFrom(a, start, &a == &astubs ? true : false),
10309 name);
10313 void TranslatorX64::defineCns(StringData* name) {
10314 TargetCache::fillConstant(name);
10317 std::string TranslatorX64::getUsage() {
10318 std::string usage;
10319 size_t aUsage = a.code.frontier - a.code.base;
10320 size_t stubsUsage = astubs.code.frontier - astubs.code.base;
10321 size_t tcUsage = TargetCache::s_frontier;
10322 Util::string_printf(usage,
10323 "tx64: %9zd bytes (%ld%%) in a.code\n"
10324 "tx64: %9zd bytes (%ld%%) in astubs.code\n"
10325 "tx64: %9zd bytes (%ld%%) in a.code from ir\n"
10326 "tx64: %9zd bytes (%ld%%) in astubs.code from ir\n"
10327 "tx64: %9zd bytes (%ld%%) in targetCache\n",
10328 aUsage, 100 * aUsage / a.code.size,
10329 stubsUsage, 100 * stubsUsage / astubs.code.size,
10330 m_irAUsage, 100 * m_irAUsage / a.code.size,
10331 m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
10332 tcUsage,
10333 100 * tcUsage / RuntimeOption::EvalJitTargetCacheSize);
10334 return usage;
10337 bool TranslatorX64::addDbgGuards(const Unit* unit) {
10338 // TODO refactor
10339 // It grabs the write lease and iterating through whole SrcDB...
10340 bool locked = s_writeLease.acquire(true);
10341 if (!locked) {
10342 return false;
10344 struct timespec tsBegin, tsEnd;
10345 gettime(CLOCK_MONOTONIC, &tsBegin);
10346 // Doc says even find _could_ invalidate iterator, in pactice it should
10347 // be very rare, so go with it now.
10348 for (SrcDB::iterator it = m_srcDB.begin(); it != m_srcDB.end(); ++it) {
10349 SrcKey const sk = SrcKey::fromAtomicInt(it->first);
10350 SrcRec& sr = *it->second;
10351 if (sr.unitMd5() == unit->md5() &&
10352 !sr.hasDebuggerGuard() &&
10353 isSrcKeyInBL(unit, sk)) {
10354 addDbgGuardImpl(sk, sr);
10357 s_writeLease.drop();
10358 gettime(CLOCK_MONOTONIC, &tsEnd);
10359 int64 elapsed = gettime_diff_us(tsBegin, tsEnd);
10360 if (Trace::moduleEnabledRelease(Trace::tx64, 5)) {
10361 Trace::traceRelease("addDbgGuards got lease for %lld us\n", elapsed);
10363 return true;
10366 bool TranslatorX64::addDbgGuard(const Func* func, Offset offset) {
10367 SrcKey sk(func, offset);
10369 if (SrcRec* sr = m_srcDB.find(sk)) {
10370 if (sr->hasDebuggerGuard()) {
10371 return true;
10373 } else {
10374 // no translation yet
10375 return true;
10378 if (debug) {
10379 if (!isSrcKeyInBL(func->unit(), sk)) {
10380 TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
10381 return false;
10384 bool locked = s_writeLease.acquire(true);
10385 if (!locked) {
10386 return false;
10389 if (SrcRec* sr = m_srcDB.find(sk)) {
10390 addDbgGuardImpl(sk, *sr);
10393 s_writeLease.drop();
10394 return true;
10397 void TranslatorX64::addDbgGuardImpl(const SrcKey& sk, SrcRec& srcRec) {
10398 TCA dbgGuard = a.code.frontier;
10399 // Emit the checks for debugger attach
10400 emitTLSLoad<ThreadInfo>(a, ThreadInfo::s_threadInfo, rScratch);
10401 static COff dbgOff = offsetof(ThreadInfo, m_reqInjectionData) +
10402 offsetof(RequestInjectionData, debugger);
10403 a. load_reg64_disp_reg32(rScratch, dbgOff, rScratch);
10404 a. test_imm32_reg32(0xff, rScratch);
10405 // Branch to a special REQ_INTERPRET if attached
10407 TCA fallback = emitServiceReq(REQ_INTERPRET, 2, uint64_t(sk.offset()), 0);
10408 a. jnz(fallback);
10410 // Emit a jump to the actual code
10411 TCA realCode = srcRec.getTopTranslation();
10412 prepareForSmash(kJmpLen);
10413 TCA dbgBranchGuardSrc = a.code.frontier;
10414 a. jmp(realCode);
10415 // Add it to srcRec
10416 srcRec.addDebuggerGuard(a, astubs, dbgGuard, dbgBranchGuardSrc);
10419 bool TranslatorX64::dumpTCCode(const char* filename) {
10420 string aFilename = string(filename).append("_a");
10421 string astubFilename = string(filename).append("_astub");
10422 FILE* aFile = fopen(aFilename.c_str(),"wb");
10423 if (aFile == NULL)
10424 return false;
10425 FILE* astubFile = fopen(astubFilename.c_str(),"wb");
10426 if (astubFile == NULL) {
10427 fclose(aFile);
10428 return false;
10430 string helperAddrFilename = string(filename).append("_helpers_addrs.txt");
10431 FILE* helperAddrFile = fopen(helperAddrFilename.c_str(),"wb");
10432 if (helperAddrFile == NULL) {
10433 fclose(aFile);
10434 fclose(astubFile);
10435 return false;
10437 // dump starting from the trampolines; this assumes processInit() places
10438 // trampolines before the translation cache
10439 size_t count = a.code.frontier-atrampolines.code.base;
10440 bool result = (fwrite(atrampolines.code.base, 1, count, aFile) == count);
10441 if (result) {
10442 count = astubs.code.frontier - astubs.code.base;
10443 result = (fwrite(astubs.code.base, 1, count, astubFile) == count);
10445 if (result) {
10446 for(PointerMap::iterator iter = trampolineMap.begin();
10447 iter != trampolineMap.end();
10448 iter++) {
10449 void* helperAddr = iter->first;
10450 void* trampAddr = iter->second;
10451 char* functionName = Util::getNativeFunctionName(helperAddr);
10452 fprintf(helperAddrFile,"%10p %10p %s\n",
10453 trampAddr, helperAddr,
10454 functionName);
10455 free(functionName);
10458 fclose(aFile);
10459 fclose(astubFile);
10460 fclose(helperAddrFile);
10461 return result;
10464 // Returns true on success
10465 bool TranslatorX64::dumpTC(bool ignoreLease) {
10466 if (!ignoreLease && !s_writeLease.acquire(true)) return false;
10467 bool success = dumpTCData();
10468 if (success) {
10469 success = dumpTCCode("/tmp/tc_dump");
10471 if (!ignoreLease) s_writeLease.drop();
10472 return success;
10475 // Returns true on success
10476 bool tc_dump(void) {
10477 return TranslatorX64::Get()->dumpTC();
10480 // Returns true on success
10481 bool TranslatorX64::dumpTCData() {
10482 gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
10483 if (!tcDataFile) return false;
10485 if (!gzprintf(tcDataFile,
10486 "repo_schema = %s\n"
10487 "a.base = %p\n"
10488 "a.frontier = %p\n"
10489 "astubs.base = %p\n"
10490 "astubs.frontier = %p\n\n",
10491 Repo::kSchemaId,
10492 atrampolines.code.base, a.code.frontier,
10493 astubs.code.base, astubs.code.frontier)) {
10494 return false;
10497 if (!gzprintf(tcDataFile, "total_translations = %lu\n\n",
10498 m_translations.size())) {
10499 return false;
10502 for (size_t t = 0; t < m_translations.size(); t++) {
10503 if (gzputs(tcDataFile,
10504 m_translations[t].print(getTransCounter(t)).c_str()) == -1) {
10505 return false;
10509 gzclose(tcDataFile);
10510 return true;
10513 #define NATIVE_OP(X) PLAN(X, Native)
10514 #define SUPPORTED_OP(X) PLAN(X, Supported)
10515 #define SIMPLE_OP(X) PLAN(X, Simple)
10516 #define INTERP_OP(X) PLAN(X, Interp)
10518 #define SUPPORTED_OPS() \
10519 NATIVE_OP(Null) \
10520 NATIVE_OP(True) \
10521 NATIVE_OP(False) \
10522 NATIVE_OP(Int) \
10523 NATIVE_OP(String) \
10524 NATIVE_OP(Array) \
10525 NATIVE_OP(NewArray) \
10526 NATIVE_OP(InitThisLoc) \
10527 NATIVE_OP(Dup) \
10528 NATIVE_OP(FPushContFunc) \
10529 NATIVE_OP(ContDone) \
10530 NATIVE_OP(ContValid) \
10531 NATIVE_OP(ContStopped) \
10533 * Invariably call a possibly-reentrant helper.
10534 */ \
10535 SIMPLE_OP(Jmp) \
10536 SIMPLE_OP(FCall) \
10537 SIMPLE_OP(CreateCont) \
10538 SIMPLE_OP(UnpackCont) \
10540 * Translations with a reentrant helper.
10542 * TODO: neither UnboxR nor FPassR can actually call destructors.
10543 */ \
10544 SUPPORTED_OP(UnboxR) \
10545 SUPPORTED_OP(FPassR) \
10546 SUPPORTED_OP(NativeImpl) \
10547 SUPPORTED_OP(UnsetL) \
10548 SUPPORTED_OP(Cns) \
10549 SUPPORTED_OP(ClsCnsD) \
10550 SUPPORTED_OP(This) \
10551 SUPPORTED_OP(BareThis) \
10552 SUPPORTED_OP(CheckThis) \
10553 SUPPORTED_OP(PackCont) \
10554 SUPPORTED_OP(ContReceive) \
10555 SUPPORTED_OP(ContRaised) \
10556 SUPPORTED_OP(ContNext) \
10557 SUPPORTED_OP(ContSend) \
10558 SUPPORTED_OP(ContRaise) \
10559 SUPPORTED_OP(ContCurrent) \
10560 SUPPORTED_OP(FPushCtor) \
10561 SUPPORTED_OP(FPushCtorD) \
10562 SUPPORTED_OP(StaticLocInit) \
10564 * Always-interp instructions,
10565 */ \
10566 INTERP_OP(ContHandle)
10568 // Define the trivial analyze methods
10569 #define PLAN(Op, Spt) \
10570 void \
10571 TranslatorX64::analyze ## Op(Tracelet& t, NormalizedInstruction& i) { \
10572 i.m_txFlags = Spt; \
10575 SUPPORTED_OPS()
10577 #undef NATIVE_OP
10578 #undef SUPPORTED_OP
10579 #undef SIMPLE_OP
10580 #undef INTERP_OP
10581 #undef SUPPORTED_OPS
10583 void TranslatorX64::invalidateSrcKey(const SrcKey& sk) {
10584 ASSERT(!RuntimeOption::RepoAuthoritative);
10585 ASSERT(s_writeLease.amOwner());
10587 * Reroute existing translations for SrcKey to an as-yet indeterminate
10588 * new one.
10590 SrcRec* sr = m_srcDB.find(sk);
10591 ASSERT(sr);
10593 * Since previous translations aren't reachable from here, we know we
10594 * just created some garbage in the TC. We currently have no mechanism
10595 * to reclaim this.
10597 sr->replaceOldTranslations(a, astubs);
10600 void TranslatorX64::invalidateFileWork(Eval::PhpFile* f) {
10601 class FileInvalidationTrigger : public Treadmill::WorkItem {
10602 Eval::PhpFile* m_f;
10603 int m_nRefs;
10604 public:
10605 FileInvalidationTrigger(Eval::PhpFile* f, int n) : m_f(f), m_nRefs(n) { }
10606 virtual void operator()() {
10607 if (m_f->decRef(m_nRefs) == 0) {
10608 Eval::FileRepository::onDelete(m_f);
10612 size_t nSmashed = m_srcDB.invalidateCode(f);
10613 if (nSmashed) {
10614 // The srcDB found an entry for this file. The entry's dependency
10615 // on this file was counted as a reference, and the code is no longer
10616 // reachable. We need to wait until the last outstanding request
10617 // drains to know that we can really remove the reference.
10618 Treadmill::WorkItem::enqueue(new FileInvalidationTrigger(f, nSmashed));
10622 bool TranslatorX64::invalidateFile(Eval::PhpFile* f) {
10623 // This is called from high rank, but we'll need the write lease to
10624 // invalidate code.
10625 if (!RuntimeOption::EvalJit) return false;
10626 ASSERT(f != NULL);
10627 PendQ::defer(new DeferredFileInvalidate(f));
10628 return true;
10631 void TranslatorX64::invalidateOutStack(const NormalizedInstruction& ni) {
10632 if (ni.outStack) {
10633 m_regMap.invalidate(ni.outStack->location);
10637 void TranslatorX64::invalidateOutLocal(const NormalizedInstruction& ni) {
10638 if (ni.outLocal) {
10639 m_regMap.invalidate(ni.outLocal->location);
10643 } // HPHP::VM::Transl
10645 static const Trace::Module TRACEMOD = Trace::tx64;
10647 void invalidatePath(const std::string& path) {
10648 TRACE(1, "invalidatePath: abspath %s\n", path.c_str());
10649 PendQ::defer(new DeferredPathInvalidate(path));
10652 } } // HPHP::VM