src/runtime/vm/translator/translator-x64.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com)         |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include <stdint.h>
  17 #include <assert.h>
  18 #include <unistd.h>
  19 #include <sys/mman.h>
  20 #include <strstream>
  21 #include <stdio.h>
  22 #include <stdarg.h>
  23 #include <strings.h>
  24 #include <string>
  25 #include <queue>
  26 #include <zlib.h>
  27 #include <unwind.h>
  28
  29 #ifdef __FreeBSD__
  30 # include <ucontext.h>
  31 typedef __sighandler_t *sighandler_t;
  32 # define RIP_REGISTER(v) (v).mc_rip
  33 #else
  34 # define RIP_REGISTER(v) (v).gregs[REG_RIP]
  35 #endif
  36
  37 #include <boost/bind.hpp>
  38 #include <boost/optional.hpp>
  39 #include <boost/utility/typed_in_place_factory.hpp>
  40 #include <boost/scoped_ptr.hpp>
  41
  42 #include <util/pathtrack.h>
  43 #include <util/trace.h>
  44 #include <util/bitops.h>
  45 #include <util/debug.h>
  46 #include <util/ringbuffer.h>
  47 #include <util/rank.h>
  48 #include <util/timer.h>
  49 #include <util/maphuge.h>
  50
  51 #include <runtime/base/tv_macros.h>
  52 #include <runtime/vm/bytecode.h>
  53 #include <runtime/vm/php_debug.h>
  54 #include <runtime/vm/runtime.h>
  55 #include <runtime/base/complex_types.h>
  56 #include <runtime/base/execution_context.h>
  57 #include <runtime/base/strings.h>
  58 #include <runtime/base/zend/zend_string.h>
  59 #include <runtime/base/runtime_option.h>
  60 #include <runtime/base/server/source_root_info.h>
  61 #include <runtime/ext/ext_continuation.h>
  62 #include <runtime/vm/debug/debug.h>
  63 #include <runtime/vm/translator/targetcache.h>
  64 #include <runtime/vm/translator/log.h>
  65 #include <runtime/vm/translator/translator-deps.h>
  66 #include <runtime/vm/translator/translator-inline.h>
  67 #include <runtime/vm/translator/translator-x64.h>
  68 #include <runtime/vm/translator/asm-x64.h>
  69 #include <runtime/vm/translator/srcdb.h>
  70 #include <runtime/vm/translator/x64-util.h>
  71 #include <runtime/vm/translator/unwind-x64.h>
  72 #include <runtime/vm/pendq.h>
  73 #include <runtime/vm/treadmill.h>
  74 #include <runtime/vm/stats.h>
  75 #include <runtime/vm/pendq.h>
  76 #include <runtime/vm/treadmill.h>
  77 #include <runtime/vm/repo.h>
  78 #include <runtime/vm/type-profile.h>
  79 #include <runtime/vm/member_operations.h>
  80 #include <runtime/vm/translator/abi-x64.h>
  81 #include <runtime/eval/runtime/file_repository.h>
  82 #include <runtime/vm/translator/hopt/ir.h>
  83 #include <runtime/vm/translator/hopt/linearscan.h>
  84 #include <runtime/vm/translator/hopt/opt.h>
  85 #include <runtime/vm/translator/hopt/codegen.h>
  86
  87 #include <runtime/vm/translator/translator-x64-internal.h>
  88
  89 namespace HPHP {
  90 namespace VM {
  91 namespace Transl {
  92
  93 using namespace reg;
  94 using namespace Util;
  95 using namespace Trace;
  96 using std::max;
  97
  98 #define TRANS_PERF_COUNTERS \
  99   TPC(translate) \
 100   TPC(retranslate) \
 101   TPC(interp_bb) \
 102   TPC(interp_instr) \
 103   TPC(interp_one) \
 104   TPC(max_trans) \
 105   TPC(enter_tc) \
 106   TPC(service_req)
 107
 108 #define TPC(n) "trans_" #n,
 109 static const char* const kPerfCounterNames[] = {
 110   TRANS_PERF_COUNTERS
 111 };
 112 #undef TPC
 113
 114 #define TPC(n) tpc_ ## n,
 115 enum TransPerfCounter {
 116   TRANS_PERF_COUNTERS
 117   tpc_num_counters
 118 };
 119 #undef TPC
 120 static __thread int64 s_perfCounters[tpc_num_counters];
 121 #define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
 122
 123 #define NULLCASE() \
 124   case KindOfUninit: case KindOfNull
 125
 126 #define STRINGCASE() \
 127   case BitwiseKindOfString: case KindOfStaticString
 128
 129 // nextTx64: Global shared state. The tx64 that should be used for
 130 // new requests going forward.
 131 TranslatorX64* volatile nextTx64;
 132 // tx64: Thread-local state. The tx64 we're using for the current request.
 133 __thread TranslatorX64* tx64;
 134
 135 // Register dirtiness: thread-private.
 136 __thread VMRegState tl_regState = REGSTATE_CLEAN;
 137
 138 static StaticString s___call(LITSTR_INIT("__call"));
 139 static StaticString s___callStatic(LITSTR_INIT("__callStatic"));
 140
 141 // Initialize at most this many locals inline in function body prologue; more
 142 // than this, and emitting a loop is more compact. To be precise, the actual
 143 // crossover point in terms of code size is 6; 9 was determined by experiment to
 144 // be the optimal point in certain benchmarks. #microoptimization
 145 static const int kLocalsToInitializeInline = 9;
 146
 147 // An intentionally funny-looking-in-core-dumps constant for uninitialized
 148 // instruction pointers.
 149 static const uint64_t kUninitializedRIP = 0xba5eba11acc01ade;
 150
 151 static int
 152 localOffset(int loc) {
 153   PhysReg base;
 154   int offset;
 155   locToRegDisp(Location(Location::Local, loc), &base, &offset);
 156   ASSERT(base == rVmFp);
 157   return offset;
 158 }
 159
 160 // Return the SrcKey for the operation that should follow the supplied
 161 // NormalizedInstruction.  (This might not be the next SrcKey in the
 162 // unit if we merged some instructions or otherwise modified them
 163 // during analysis.)
 164 SrcKey nextSrcKey(const Tracelet& t, const NormalizedInstruction& i) {
 165   return i.next ? i.next->source : t.m_nextSk;
 166 }
 167
 168 // JccBlock --
 169 //   A raw condition-code block; assumes whatever comparison or ALU op
 170 //   that sets the Jcc has already executed.
 171 template <int Jcc>
 172 struct JccBlock {
 173   mutable X64Assembler* m_a;
 174   TCA m_jcc8;
 175   mutable DiamondGuard* m_dg;
 176
 177   explicit JccBlock(X64Assembler& a)
 178     : m_a(&a),
 179       m_jcc8(a.code.frontier),
 180       m_dg(new DiamondGuard(a)) {
 181     a.    jcc8(Jcc, m_a->code.frontier);
 182   }
 183
 184   ~JccBlock() {
 185     if (m_a) {
 186       delete m_dg;
 187       m_a->patchJcc8(m_jcc8, m_a->code.frontier);
 188     }
 189   }
 190
 191 private:
 192   JccBlock(const JccBlock&);
 193   JccBlock& operator=(const JccBlock&);
 194 };
 195
 196 // IfElseBlock: like CondBlock, but with an else clause.
 197 //    a.   test_reg_reg(rax, rax);
 198 //    {  IfElseBlock<CC_Z> ifRax(a);
 199 //       // Code executed for rax != 0
 200 //       ifRax.Else();
 201 //       // Code executed for rax == 0
 202 //    }
 203 //
 204 template <int Jcc>
 205 class IfElseBlock : boost::noncopyable {
 206   X64Assembler& m_a;
 207   TCA m_jcc8;
 208   TCA m_jmp8;
 209  public:
 210   explicit IfElseBlock(X64Assembler& a) :
 211     m_a(a), m_jcc8(a.code.frontier), m_jmp8(NULL) {
 212     tx64->m_regMap.freeze();
 213     m_a.jcc8(Jcc, m_a.code.frontier);  // 1f
 214   }
 215   void Else() {
 216     ASSERT(m_jmp8 == NULL);
 217     m_jmp8 = m_a.code.frontier;
 218     m_a.jmp8(m_jmp8); // 2f
 219     // 1:
 220     m_a.patchJcc8(m_jcc8, m_a.code.frontier);
 221   }
 222   ~IfElseBlock() {
 223     ASSERT(m_jmp8 != NULL);
 224     // 2:
 225     m_a.patchJmp8(m_jmp8, m_a.code.frontier);
 226     tx64->m_regMap.defrost();
 227   }
 228 };
 229
 230 static bool
 231 typeCanBeStatic(DataType t) {
 232   return t != KindOfObject && t != KindOfRef;
 233 }
 234
 235 // IfCountNotStatic --
 236 //   Emits if (%reg->_count != RefCountStaticValue) { ... }.
 237 //   May short-circuit this check if the type is known to be
 238 //   static already.
 239 struct IfCountNotStatic {
 240   typedef CondBlock<TVOFF(_count),
 241                     RefCountStaticValue,
 242                     CC_Z> NonStaticCondBlock;
 243   NonStaticCondBlock *m_cb; // might be null
 244   IfCountNotStatic(X64Assembler& a,
 245                    PhysReg reg,
 246                    DataType t = KindOfInvalid) {
 247     // Objects and variants cannot be static
 248     if (typeCanBeStatic(t)) {
 249       m_cb = new NonStaticCondBlock(a, reg);
 250     } else {
 251       m_cb = NULL;
 252     }
 253   }
 254
 255   ~IfCountNotStatic() {
 256     delete m_cb;
 257   }
 258 };
 259
 260 // Segfault handler: figure out if it's an intentional segfault
 261 // (timeout exception) and if so, act appropriately. Otherwise, pass
 262 // the signal on.
 263 void TranslatorX64::SEGVHandler(int signum, siginfo_t *info, void *ctx) {
 264   TranslatorX64 *self = Get();
 265   void *surprisePage =
 266     ThreadInfo::s_threadInfo->m_reqInjectionData.surprisePage;
 267   if (info->si_addr == surprisePage) {
 268     ucontext_t *ucontext = (ucontext_t*)ctx;
 269     TCA rip = (TCA)RIP_REGISTER(ucontext->uc_mcontext);
 270     SignalStubMap::const_accessor a;
 271     if (!self->m_segvStubs.find(a, rip)) {
 272       NOT_REACHED();
 273     }
 274     TCA astubsCall = a->second;
 275
 276     // When this handler returns, "call" the astubs code for this
 277     // surprise check.
 278     RIP_REGISTER(ucontext->uc_mcontext) = (uintptr_t)astubsCall;
 279
 280     // We've processed this event; reset the page in case execution
 281     // continues normally.
 282     g_vmContext->m_stack.unprotect();
 283   } else {
 284     sighandler_t handler = (sighandler_t)self->m_segvChain;
 285     if (handler == SIG_DFL || handler == SIG_IGN) {
 286       signal(signum, handler);
 287       raise(signum);
 288     } else {
 289       self->m_segvChain(signum, info, ctx);
 290     }
 291   }
 292 }
 293
 294 /*
 295  * Copy a heap cell from memory to the stack.
 296  *
 297  * Use emitCopyToStack when you can safely change the state of the
 298  * register map.  When using emitCopyToStackRegSafe, you'll need to
 299  * invalidate the stack location manually at an appropriate time.
 300  */
 301
 302 void
 303 TranslatorX64::emitCopyToStackRegSafe(X64Assembler& a,
 304                                       const NormalizedInstruction& ni,
 305                                       PhysReg src,
 306                                       int off,
 307                                       PhysReg tmpReg) {
 308   ASSERT(off % sizeof(Cell) == 0);
 309   emitCopyTo(a, src, 0, rVmSp, vstackOffset(ni, off), tmpReg);
 310 }
 311
 312 void
 313 TranslatorX64::emitCopyToStack(X64Assembler& a,
 314                                const NormalizedInstruction& ni,
 315                                PhysReg src,
 316                                int off) {
 317   ScratchReg scratch(m_regMap);
 318   {
 319     FreezeRegs freeze(m_regMap);
 320     emitCopyToStackRegSafe(a, ni, src, off, *scratch);
 321   }
 322   // Forget whatever we thought we knew about the stack.
 323   m_regMap.invalidate(ni.outStack->location);
 324 }
 325
 326 /*
 327  * Emit code that does the same thing as tvSet().
 328  *
 329  * The `oldType' and `oldData' registers are used for temporary
 330  * storage and unconditionally destroyed.
 331  * `toPtr' will be destroyed iff the cell we're storing to is
 332  * KindOfRef.
 333  * The variant check will not be performed if toOffset is nonzero, so
 334  * only pass a nonzero offset if you know the destination is not
 335  * KindOfRef.
 336  * `from' will not be modified.
 337  */
 338 void TranslatorX64::emitTvSetRegSafe(const NormalizedInstruction& i,
 339                                      PhysReg from,
 340                                      DataType fromType,
 341                                      PhysReg toPtr,
 342                                      int toOffset,
 343                                      PhysReg oldType,
 344                                      PhysReg oldData,
 345                                      bool incRefFrom) {
 346   ASSERT(!i.isNative());
 347   ASSERT(!i.isSimple());
 348   ASSERT(fromType != KindOfRef);
 349
 350   if (toOffset == 0) {
 351     emitDerefIfVariant(a, toPtr);
 352   }
 353   a.  load_reg64_disp_reg32(toPtr, toOffset + TVOFF(m_type), oldType);
 354   a.  load_reg64_disp_reg64(toPtr, toOffset + TVOFF(m_data), oldData);
 355   emitStoreTypedValue(a, fromType, from, toOffset, toPtr);
 356   if (incRefFrom) {
 357     emitIncRef(from, fromType);
 358   }
 359   emitDecRefGenericReg(oldData, oldType);
 360 }
 361
 362 void TranslatorX64::emitTvSet(const NormalizedInstruction& i,
 363                               PhysReg from,
 364                               DataType fromType,
 365                               PhysReg toPtr,
 366                               int toOffset,
 367                               bool incRefFrom) {
 368   ScratchReg oldType(m_regMap);
 369   ScratchReg oldData(m_regMap);
 370   emitTvSetRegSafe(i, from, fromType, toPtr, toOffset,
 371                    *oldType, *oldData, incRefFrom);
 372 }
 373
 374 // Logical register move: ensures the value in src will be in dest
 375 // after execution, but might do so in strange ways. Do not count on
 376 // being able to smash dest to a different register in the future, e.g.
 377 void
 378 TranslatorX64::emitMovRegReg(X64Assembler& a, PhysReg src, PhysReg dest) {
 379   SpaceRecorder("_RegMove", a);
 380   if (src != dest) {
 381     a.  mov_reg64_reg64(src, dest);
 382   }
 383 }
 384
 385 void
 386 TranslatorX64::emitMovRegReg(PhysReg src, PhysReg dest) {
 387   emitMovRegReg(a, src, dest);
 388 }
 389
 390 /*
 391  * emitPushAR --
 392  *
 393  *   Push an activation record. Will return to the next instruction emitted by
 394  *   the invoker. Called on behalf of FPushFuncD and FPushFunc. If func is
 395  *   unknown, we will leave it to the caller to fill in m_func.
 396  */
 397 void
 398 TranslatorX64::emitPushAR(const NormalizedInstruction& i, const Func* func,
 399                           const int bytesPopped /* = 0 */,
 400                           bool isCtor /* = false */,
 401                           bool clearThis /* = true */,
 402                           uintptr_t varEnvInvName /* = 0 */) {
 403   if (func && phpBreakpointEnabled(func->name()->data())) {
 404     translator_debug_break(a);
 405   }
 406   ASSERT(sizeof(Cell) < sizeof(ActRec));
 407   // We are about to push an ActRec onto the stack. The stack grows down,
 408   // so the offset of the beginning of the ActRec from the top of stack
 409   // is -sizeof(ActRec).
 410   int numArgs = i.imm[0].u_IVA;
 411   int startOfActRec = bytesPopped - sizeof(ActRec);
 412   size_t funcOff     = startOfActRec + AROFF(m_func);
 413   size_t thisOff     = startOfActRec + AROFF(m_this);
 414   size_t nargsOff    = startOfActRec + AROFF(m_numArgsAndCtorFlag);
 415   size_t varenvOff   = startOfActRec + AROFF(m_varEnv);
 416   size_t savedRbpOff = startOfActRec + AROFF(m_savedRbp);
 417
 418   BOOST_STATIC_ASSERT((
 419     sizeof(((ActRec*)NULL)->m_numArgsAndCtorFlag) == sizeof(int32_t)
 420   ));
 421   /*
 422    * rVmSp might not be up-to-date here, so we use emitVStackStore and
 423    * emitVStackStoreImm which know how to compute the where the top of
 424    * stack currently is.
 425    */
 426   if (func) {
 427     emitVStackStoreImm(a, i, (uintptr_t)func, funcOff);
 428     if (clearThis) {
 429       emitVStackStoreImm(a, i, 0,               thisOff, sz::qword, &m_regMap);
 430     }
 431   }
 432   emitVStackStoreImm(a, i,   ActRec::encodeNumArgs(numArgs, isCtor),
 433                                               nargsOff, sz::dword);
 434   emitVStackStoreImm(a, i,   varEnvInvName,   varenvOff, sz::qword, &m_regMap);
 435   emitVStackStore(a, i,      rVmFp,           savedRbpOff, sz::qword);
 436 }
 437
 438 template<int StackParity>
 439 class PhysRegSaverParity {
 440 protected:
 441   X64Assembler& a;
 442   RegSet s;
 443   int numElts;
 444 public:
 445   PhysRegSaverParity(X64Assembler& a_, RegSet s_) : a(a_), s(s_) {
 446     RegSet sCopy = s;
 447     numElts = 0;
 448     PhysReg reg;
 449     while (sCopy.findFirst(reg)) {
 450       a.   pushr(reg);
 451       sCopy.remove(reg);
 452       numElts++;
 453     }
 454     if ((numElts & 1) == StackParity) {
 455       // Maintain stack evenness for SIMD compatibility.
 456       a.   sub_imm32_reg64(8, rsp);
 457     }
 458   }
 459
 460   ~PhysRegSaverParity() {
 461     if ((numElts & 1) == StackParity) {
 462       // See above; stack parity.
 463       a.   add_imm32_reg64(8, rsp);
 464     }
 465     RegSet sCopy = s;
 466     PhysReg reg;
 467     while (sCopy.findLast(reg)) {
 468       a.   popr(reg);
 469       sCopy.remove(reg);
 470     }
 471   }
 472 };
 473
 474 // In shared stubs, we've already made the stack odd by calling
 475 // from a to astubs. Calls from a are on an even rsp.
 476 typedef PhysRegSaverParity<0> PhysRegSaverStub;
 477 typedef PhysRegSaverParity<1> PhysRegSaver;
 478
 479 void
 480 TranslatorX64::emitCallSaveRegs() {
 481   ASSERT(!m_regMap.frozen());
 482   m_regMap.cleanRegs(kCallerSaved);
 483 }
 484
 485 static void UNUSED tc_debug_print(const char* message,
 486                            uintptr_t r1,
 487                            uintptr_t r2,
 488                            uintptr_t r3,
 489                            ActRec* fp) {
 490   TRACE(1, "*********************** %s: %p %p %p  (for : %s)\n",
 491            message, (void*)r1, (void*)r2, (void*)r3,
 492            fp->m_func ? fp->m_func->fullName()->data() : "[?]");
 493 }
 494
 495 // Utility for debugging translations that will print a message,
 496 // followed by the value of up to three registers.
 497 void TranslatorX64::emitDebugPrint(Asm& a,
 498                                    const char* message,
 499                                    PhysReg r1,
 500                                    PhysReg r2,
 501                                    PhysReg r3) {
 502   boost::optional<PhysRegSaver> aSaver;
 503   boost::optional<PhysRegSaverStub> astubsSaver;
 504
 505   if (&a == &this->a) {
 506     aSaver = boost::in_place<PhysRegSaver>(boost::ref(a), kAllX64Regs);
 507   } else {
 508     astubsSaver = boost::in_place<PhysRegSaverStub>(boost::ref(a),
 509       kAllX64Regs);
 510   }
 511
 512   a.  mov_imm64_reg  (uintptr_t(message), argNumToRegName[0]);
 513   a.  mov_reg64_reg64(r1, argNumToRegName[1]);
 514   a.  mov_reg64_reg64(r2, argNumToRegName[2]);
 515   a.  mov_reg64_reg64(r3, argNumToRegName[3]);
 516   a.  mov_reg64_reg64(rVmFp, argNumToRegName[4]);
 517   a.  call((TCA)tc_debug_print);
 518 }
 519
 520 void ArgManager::cleanLocs() {
 521   for (size_t i = 0; i < m_args.size(); ++i) {
 522     // We only need to clean locations we are passing the address of.
 523     // (ArgLoc passes the value in the register mapped for a given
 524     // location, not the address of the location itself, so it doesn't
 525     // need cleaning here.)
 526     if (m_args[i].m_kind != ArgContent::ArgLocAddr) continue;
 527     m_tx64.m_regMap.cleanLoc(*m_args[i].m_loc);
 528   }
 529 }
 530
 531 void ArgManager::computeUsed(std::map<PhysReg, size_t> &used,
 532                              std::vector<PhysReg> &actual) {
 533   size_t n = m_args.size();
 534   for (size_t i = 0; i < n; i++) {
 535     PhysReg reg = InvalidReg;
 536     if (m_args[i].m_kind == ArgContent::ArgReg ||
 537         m_args[i].m_kind == ArgContent::ArgRegPlus) {
 538       reg = m_args[i].m_reg;
 539     } else if (m_args[i].m_kind == ArgContent::ArgLoc ||
 540                m_args[i].m_kind == ArgContent::ArgDeref) {
 541       reg = m_tx64.getReg(*m_args[i].m_loc);
 542     } else {
 543       continue;
 544     }
 545     TRACE(6, "ArgManager: arg %zd incoming reg r%d\n", i, reg);
 546     used[reg] = i;
 547     actual[i] = reg;
 548   }
 549 }
 550
 551 void
 552 TranslatorX64::emitRB(X64Assembler& a,
 553                       RingBufferType t,
 554                       SrcKey sk, RegSet toSave) {
 555   if (!Trace::moduleEnabledRelease(Trace::tx64, 5)) {
 556     return;
 557   }
 558   PhysRegSaver rs(a, toSave | kSpecialCrossTraceRegs);
 559   int arg = 0;
 560   emitImmReg(a, t, argNumToRegName[arg++]);
 561   emitImmReg(a, sk.m_funcId, argNumToRegName[arg++]);
 562   emitImmReg(a, sk.m_offset, argNumToRegName[arg++]);
 563   a.    call((TCA)ringbufferEntry);
 564 }
 565
 566 void
 567 TranslatorX64::emitRB(X64Assembler& a,
 568                       RingBufferType t,
 569                       const char* msg,
 570                       RegSet toSave) {
 571   if (!Trace::moduleEnabledRelease(Trace::tx64, 5)) {
 572     return;
 573   }
 574   PhysRegSaver save(a, toSave | kSpecialCrossTraceRegs);
 575   int arg = 0;
 576   emitImmReg(a, (uintptr_t)msg, argNumToRegName[arg++]);
 577   emitImmReg(a, strlen(msg), argNumToRegName[arg++]);
 578   emitImmReg(a, t, argNumToRegName[arg++]);
 579   a.    call((TCA)ringbufferMsg);
 580 }
 581
 582 /*
 583  * allocate the input registers for i, trying to
 584  * match inputs to call arguments.
 585  * if args[j] == ArgDontAllocate, the arg is skipped
 586  * if args[j] == ArgAnyReg, it will be allocated as normal
 587  * otherwise, args[j] should be a positional call argument,
 588  * and allocInputsForCall will attempt to allocate it to
 589  * argNumToRegName[args[j]].
 590  */
 591 void
 592 TranslatorX64::allocInputsForCall(const NormalizedInstruction& i,
 593                                   const int* args) {
 594   RegSet blackList;
 595   int arg;
 596   /*
 597    * If any of the inputs is already in an argument
 598    * register, blacklist it. ArgManager already takes
 599    * care of shuffling registers efficiently
 600    */
 601   for (arg = i.inputs.size(); arg--; ) {
 602     if (args[arg] != ArgDontAllocate &&
 603         m_regMap.hasReg(i.inputs[arg]->location)) {
 604       blackList |= RegSet(getReg(i.inputs[arg]->location));
 605     }
 606   }
 607   bool hasAnyReg = false;
 608   for (arg = i.inputs.size(); arg--; ) {
 609     if (args[arg] != ArgAnyReg) {
 610       if (args[arg] != ArgDontAllocate &&
 611         !m_regMap.hasReg(i.inputs[arg]->location)) {
 612         PhysReg target = argNumToRegName[args[arg]];
 613         if (!blackList.contains(target)) {
 614           m_regMap.cleanRegs(RegSet(target));
 615           m_regMap.smashRegs(RegSet(target));
 616         } else {
 617           target = InvalidReg;
 618         }
 619         m_regMap.allocInputReg(i, arg, target);
 620       }
 621     } else {
 622       hasAnyReg = true;
 623     }
 624   }
 625   if (hasAnyReg) {
 626     for (arg = i.inputs.size(); arg--; ) {
 627       if (args[arg] == ArgAnyReg) {
 628         m_regMap.allocInputReg(i, arg);
 629       }
 630     }
 631   }
 632 }
 633
 634 void ArgManager::shuffleRegisters(std::map<PhysReg, size_t> &used,
 635                                   std::vector<PhysReg> &actual) {
 636   size_t n = m_args.size();
 637   for (size_t i = 0; i < n; i++) {
 638     if (actual[i] == InvalidReg)
 639       continue;
 640
 641     if (!mapContains(used, argNumToRegName[i])) {
 642       // There's no conflict, so just copy
 643       TRACE(6, "ArgManager: arg %zd reg available, copying from r%d to r%d\n",
 644             i, actual[i], argNumToRegName[i]);
 645       // Do copy and data structure update here, because this way
 646       // we can reuse the register in actual[i] later without problems.
 647       m_tx64.emitMovRegReg(m_a, actual[i], argNumToRegName[i]);
 648       used.erase(actual[i]);
 649       actual[i] = argNumToRegName[i];
 650     } else {
 651       size_t j = used[argNumToRegName[i]];
 652       if (actual[j] != actual[i]) {
 653         // The register is used by some other value, so we must swap the two
 654         // registers.
 655         ASSERT(j > i);
 656         ASSERT(actual[j] != InvalidReg);
 657         PhysReg ri = actual[i],
 658                 rj = actual[j];
 659         TRACE(6, "ArgManager: arg %zd register used by arg %zd, "
 660                  "swapping r%d with r%d\n", i, j, ri, rj);
 661
 662         // Clean the registers first
 663         RegSet regs = RegSet(ri) | RegSet(rj);
 664         m_tx64.m_regMap.cleanRegs(regs);
 665
 666         // Emit the actual swap
 667         m_tx64.m_regMap.swapRegisters(ri, rj);
 668         m_a.  xchg_reg64_reg64(ri, rj);
 669
 670         // Update the data structure for later steps
 671         for (size_t k = 0; k < n; k++) {
 672           if (actual[k] == ri) {
 673             actual[k] = rj;
 674           } else if (actual[k] == rj) {
 675             actual[k] = ri;
 676           }
 677         }
 678         used[ri] = j;
 679         used[rj] = i;
 680       }
 681     }
 682   }
 683 }
 684
 685 void ArgManager::emitValues(std::vector<PhysReg> &actual) {
 686   for (size_t i = 0; i < m_args.size(); i++) {
 687     switch(m_args[i].m_kind) {
 688     case ArgContent::ArgLoc:
 689     case ArgContent::ArgDeref:
 690     case ArgContent::ArgReg:
 691       TRACE(6, "ArgManager: copying arg %zd from r%d to r%d\n",
 692             i, actual[i], argNumToRegName[i]);
 693       m_tx64.emitMovRegReg(m_a, actual[i], argNumToRegName[i]);
 694       // Emit dereference if needed
 695       if (m_args[i].m_kind == ArgContent::ArgDeref) {
 696         emitDeref(m_a, argNumToRegName[i], argNumToRegName[i]);
 697       }
 698       break;
 699
 700     // For any of these cases, the register should already be available.
 701     // If it was used previously by an input value, shuffleRegisters
 702     // should have moved it to the proper register from argNumToRegName.
 703     case ArgContent::ArgImm:
 704       emitImmReg(m_a, m_args[i].m_imm, argNumToRegName[i]);
 705       break;
 706
 707     case ArgContent::ArgRegPlus:
 708       if (m_args[i].m_imm) {
 709         m_a.  add_imm32_reg64(m_args[i].m_imm, argNumToRegName[i]);
 710       }
 711       break;
 712
 713     case ArgContent::ArgLocAddr:
 714       {
 715         PhysReg base;
 716         int disp;
 717         locToRegDisp(*m_args[i].m_loc, &base, &disp);
 718         m_a.  lea_reg64_disp_reg64(base, disp, argNumToRegName[i]);
 719       }
 720       break;
 721
 722     default:
 723       // Should never happen
 724       ASSERT(false);
 725     }
 726   }
 727 }
 728
 729 void
 730 TranslatorX64::emitCall(X64Assembler& a, TCA dest, bool killRegs) {
 731   if (a.jmpDeltaFits(dest)) {
 732     a.    call(dest);
 733   } else {
 734     a.    call(getNativeTrampoline(dest));
 735   }
 736   if (killRegs) {
 737     // All caller-saved regs are now suspect.
 738     m_regMap.smashRegs(kCallerSaved);
 739   }
 740 }
 741
 742 void
 743 TranslatorX64::recordSyncPoint(X64Assembler& a, Offset pcOff, Offset spOff) {
 744   m_pendingFixups.push_back(PendingFixup(a.code.frontier,
 745                                          Fixup(pcOff, spOff)));
 746 }
 747
 748 void
 749 TranslatorX64::recordCall(Asm& a, const NormalizedInstruction& i) {
 750   recordCallImpl<false>(a, i);
 751 }
 752
 753 void
 754 TranslatorX64::recordCall(const NormalizedInstruction& i) {
 755   recordCall(a, i);
 756 }
 757
 758 template <bool reentrant>
 759 void
 760 TranslatorX64::recordCallImpl(X64Assembler& a,
 761                               const NormalizedInstruction& i,
 762                               bool advance /* = false */) {
 763   SrcKey sk = i.source;
 764   Offset stackOff = i.stackOff + (vmfp() - vmsp());
 765   if (advance) {
 766     sk.advance(curUnit());
 767     stackOff += getStackDelta(i);
 768   }
 769   ASSERT(i.checkedInputs ||
 770          (reentrant && !i.isSimple()) ||
 771          (!reentrant && !i.isNative()));
 772   Offset pcOff = sk.offset() - curFunc()->base();
 773   SKTRACE(2, sk, "record%sCall pcOff %d\n",
 774              reentrant ? "Reentrant" : "", int(pcOff));
 775   recordSyncPoint(a, pcOff, stackOff);
 776   SKTRACE(2, sk, "record%sCall stackOff %d\n",
 777              reentrant ? "Reentrant" : "", int(stackOff));
 778
 779   /*
 780    * Right now we assume call sites that need to record sync points
 781    * may also throw exceptions.  We record information about dirty
 782    * callee-saved registers so we can spill their contents during
 783    * unwinding.  See unwind-x64.cpp.
 784    */
 785   if (!m_pendingUnwindRegInfo.empty()) {
 786     if (Trace::moduleLevel(Trace::tunwind) >= 2) {
 787       sk.trace("recordCallImpl has dirty callee-saved regs\n");
 788       TRACE_MOD(Trace::tunwind, 2,
 789                    "CTCA: %p saving dirty callee regs:\n",
 790                    a.code.frontier);
 791       for (int i = 0; i < UnwindRegInfo::kMaxCalleeSaved; ++i) {
 792         if (m_pendingUnwindRegInfo.m_regs[i].dirty) {
 793           TRACE_MOD(Trace::tunwind, 2, "  %s\n",
 794                     m_pendingUnwindRegInfo.m_regs[i].pretty().c_str());
 795         }
 796       }
 797     }
 798     m_unwindRegMap.insert(a.code.frontier, m_pendingUnwindRegInfo);
 799     m_pendingUnwindRegInfo.clear();
 800   }
 801 }
 802
 803 void TranslatorX64::prepareCallSaveRegs() {
 804   emitCallSaveRegs(); // Clean caller-saved regs.
 805   m_pendingUnwindRegInfo.clear();
 806
 807   RegSet rset = kCalleeSaved;
 808   PhysReg reg;
 809   while (rset.findFirst(reg)) {
 810     rset.remove(reg);
 811     if (!m_regMap.regIsDirty(reg)) continue;
 812     const RegInfo* ri = m_regMap.getInfo(reg);
 813     ASSERT(ri->m_cont.m_kind == RegContent::Loc);
 814
 815     // If the register is dirty, we'll record this so that we can
 816     // restore it during stack unwinding if an exception is thrown.
 817     m_pendingUnwindRegInfo.add(reg, ri->m_type, ri->m_cont.m_loc);
 818   }
 819 }
 820
 821 void
 822 TranslatorX64::emitIncRef(PhysReg base, DataType dtype) {
 823   if (!IS_REFCOUNTED_TYPE(dtype) && dtype != KindOfInvalid) {
 824     return;
 825   }
 826   ASSERT(m_regMap.getInfo(base));
 827   SpaceRecorder sr("_IncRef", a);
 828   ASSERT(sizeof(((Cell*)NULL)->_count == sizeof(int32_t)));
 829   { // if !static then
 830     IfCountNotStatic ins(a, base, dtype);
 831     /*
 832      * The optimization guide cautions against using inc; while it is
 833      * compact, it only writes the low-order 8 bits of eflags, causing a
 834      * partial dependency for any downstream flags-dependent code.
 835      */
 836     a.    add_imm32_disp_reg32(1, TVOFF(_count), base);
 837   } // endif
 838 }
 839
 840 void
 841 TranslatorX64::emitIncRefGenericRegSafe(PhysReg base,
 842                                         int disp,
 843                                         PhysReg tmpReg) {
 844   ASSERT(m_regMap.getInfo(base));
 845   { // if RC
 846     IfRefCounted irc(a, base, disp);
 847     a.    load_reg64_disp_reg64(base, disp + TVOFF(m_data),
 848                                 tmpReg);
 849     { // if !static
 850       IfCountNotStatic ins(a, tmpReg);
 851       a.  add_imm32_disp_reg32(1, TVOFF(_count), tmpReg);
 852     } // endif
 853   } // endif
 854 }
 855
 856 void TranslatorX64::emitIncRefGeneric(PhysReg base, int disp) {
 857   ScratchReg tmpReg(m_regMap);
 858   emitIncRefGenericRegSafe(base, disp, *tmpReg);
 859 }
 860
 861 static void emitGetGContext(X64Assembler& a, PhysReg dest) {
 862   emitTLSLoad<ExecutionContext>(a, g_context, dest);
 863 }
 864
 865 // emitEagerVMRegSave --
 866 //   Inline. Saves regs in-place in the TC. This is an unusual need;
 867 //   you probably want to lazily save these regs via recordCall and
 868 //   its ilk.
 869 //
 870 //   SaveFP uses rVmFp, as usual. SavePC requires the caller to have
 871 //   placed the PC offset of the instruction about to be executed in
 872 //   rdi.
 873 enum RegSaveFlags {
 874   SaveFP = 1,
 875   SavePC = 2
 876 };
 877
 878 static TCA
 879 emitEagerVMRegSave(X64Assembler& a,
 880                    int flags /* :: RegSaveFlags */) {
 881   TCA start = a.code.frontier;
 882   bool saveFP = bool(flags & SaveFP);
 883   bool savePC = bool(flags & SavePC);
 884   ASSERT((flags & ~(SavePC | SaveFP)) == 0);
 885
 886   PhysReg pcReg = rdi;
 887   PhysReg rEC = rScratch;
 888   ASSERT(!kSpecialCrossTraceRegs.contains(rdi));
 889
 890   emitGetGContext(a, rEC);
 891
 892   static COff spOff = offsetof(VMExecutionContext, m_stack) +
 893     Stack::topOfStackOffset();
 894   static COff fpOff = offsetof(VMExecutionContext, m_fp) - spOff;
 895   static COff pcOff = offsetof(VMExecutionContext, m_pc) - spOff;
 896
 897   ASSERT(spOff != 0);
 898   // Instruction selection note: this is an lea, but add is more
 899   // compact and we can afford the flags bash.
 900   a.    add_imm32_reg64(spOff, rEC);
 901   a.    store_reg64_disp_reg64 (rVmSp, 0, rEC);
 902   if (savePC) {
 903     // We're going to temporarily abuse rVmSp to hold the current unit.
 904     PhysReg rBC = rVmSp;
 905     a.  pushr(rBC);
 906     // m_fp -> m_func -> m_unit -> m_bc + pcReg
 907     a.  load_reg64_disp_reg64(rVmFp, AROFF(m_func), rBC);
 908     a.  load_reg64_disp_reg64(rBC, Func::unitOff(), rBC);
 909     a.  load_reg64_disp_reg64(rBC, Unit::bcOff(), rBC);
 910     a.  add_reg64_reg64(rBC, pcReg);
 911     a.  store_reg64_disp_reg64(pcReg, pcOff, rEC);
 912     a.  popr(rBC);
 913   }
 914   if (saveFP) {
 915     a.  store_reg64_disp_reg64 (rVmFp, fpOff, rEC);
 916   }
 917   return start;
 918 }
 919
 920 /**
 921  * emitDecRef --
 922  *
 923  *   Decrement a value's refcount and call the release helper if
 924  *   appropriate. emitDecRef requires that the caller knows the
 925  *   type at translation time.
 926  */
 927 void TranslatorX64::emitDecRef(Asm& a,
 928                                const NormalizedInstruction& i,
 929                                PhysReg rDatum,
 930                                DataType type) {
 931   ASSERT(type != KindOfInvalid);
 932   if (!IS_REFCOUNTED_TYPE(type)) {
 933     return;
 934   }
 935
 936   ASSERT(!i.isNative());
 937   ASSERT(!i.isSimple() || !typeReentersOnRelease(type));
 938   SpaceRecorder sr("_DecRef", a);
 939   { // if !static
 940     IfCountNotStatic ins(a, rDatum, type);
 941     a.    sub_imm32_disp_reg32(1, TVOFF(_count), rDatum);
 942
 943     ASSERT(type >= 0 && type < MaxNumDataTypes);
 944     if (&a == &this->astubs) {
 945       JccBlock<CC_NZ> ifZero(a);
 946       callUnaryStub(a, i, m_dtorStubs[type], rDatum);
 947     } else {
 948       UnlikelyIfBlock<CC_Z> ifZero(this->a, astubs);
 949       callUnaryStub(astubs, i, m_dtorStubs[type], rDatum);
 950     }
 951   } // endif
 952 }
 953
 954 void TranslatorX64::emitDecRef(const NormalizedInstruction& i,
 955                                PhysReg rDatum,
 956                                DataType type) {
 957   emitDecRef(a, i, rDatum, type);
 958 }
 959
 960 void TranslatorX64::emitDecRefInput(Asm& a,
 961                                     const NormalizedInstruction& i,
 962                                     int input) {
 963   DynLocation* value = i.inputs[input];
 964   if (IS_REFCOUNTED_TYPE(value->outerType())) {
 965     m_regMap.allocInputReg(i, input);
 966     PhysReg rValue = getReg(value->location);
 967     emitDecRef(a, i, rValue, value->outerType());
 968   }
 969 }
 970
 971 /**
 972  * emitDecRefGeneric --
 973  *
 974  *   Decrement a value's refcount and call the release helper if
 975  *   appropriate. emitDecRefGeneric should only be used when the type
 976  *   is not known at translation time.
 977  *
 978  *   emitDecRefGeneric operates on the memory location given by
 979  *   srcReg+disp, so the caller is responsible for ensuring that the
 980  *   memory location is up to date and not enregistered.
 981  */
 982 void TranslatorX64::emitDecRefGeneric(const NormalizedInstruction& i,
 983                                       PhysReg srcReg, int disp /* = 0 */) {
 984   SpaceRecorder sr("_DecRefGeneric", a);
 985   /*
 986    * The full, inlined generic dec ref looks like:
 987    *
 988    *    TypedValue* d = srcReg + disp;
 989    *    if (IS_REFCOUNTED_TYPE(d->m_type) && // a)
 990    *        d->_count != kStaticCount     && // b)
 991    *        d->_count-- == 0)             && // c)
 992    *            GenericDestroy(d);           // d)
 993    *
 994    * We originally inlined *all* of a-d, and have experimented with sharing
 995    * them all, too. At this writing (05-12-2012), inlining a) and outlining
 996    * b-d seems to strike the right balance between compactness and not
 997    * doing too much work in the common case where it is not refcounted.
 998    */
 999   {
1000     IfRefCounted irc(a, srcReg, disp);
1001     callUnaryReentrantStub(a, i, m_dtorGenericStub, srcReg, disp);
1002   }
1003 }
1004
1005 // Same as emitDecRefGeneric, except for when we have the type in a
1006 // register as well.  Same inlining/outlining choices as
1007 // emitDecRefGeneric above.
1008 void TranslatorX64::emitDecRefGenericReg(PhysReg rData, PhysReg rType) {
1009   SpaceRecorder sr("_DecRefGeneric", a);
1010   a.   cmp_imm32_reg32(KindOfRefCountThreshold, rType);
1011   {
1012     JccBlock<CC_BE> ifRefCounted(a);
1013     callBinaryStub(a, *m_curNI, m_dtorGenericStubRegs, rData, rType);
1014   }
1015 }
1016
1017 /**
1018  * genericRefCountStub --
1019  *
1020  *   Shared code to decRef the TypedValue* of unknown, but refcounted, type
1021  *   in rdi. Tightly coupled with emitDecRefGeneric.
1022  */
1023 TCA TranslatorX64::genericRefCountStub(X64Assembler& a) {
1024   moveToAlign(a);
1025   FreezeRegs brr(m_regMap);
1026   TCA retval = a.code.frontier;
1027
1028   // Note we make a real frame here: this is necessary so that the
1029   // fixup map can chase back to the caller of this stub if it needs
1030   // to sync regs.
1031   a.    pushr(rbp); // {
1032   a.    mov_reg64_reg64(rsp, rbp);
1033   {
1034     PhysRegSaverStub prs(a, RegSet(rsi));
1035     // We already know the type was refcounted if we got here.
1036     a.    load_reg64_disp_reg64(rdi, TVOFF(m_data), rsi);
1037     { // if !static
1038       IfCountNotStatic ins(a, rsi, KindOfInvalid);
1039       a.  sub_imm32_disp_reg32(1, TVOFF(_count), rsi);
1040       { // if zero
1041         JccBlock<CC_NZ> ifZero(a);
1042         RegSet s = kCallerSaved - (RegSet(rdi) | RegSet(rsi));
1043         PhysRegSaver prs(a, s);
1044         a.call(TCA(tv_release_generic));
1045       } // endif
1046     } // endif
1047   }
1048   a.    popr(rbp); // }
1049   a.    ret();
1050   return retval;
1051 }
1052
1053 TCA TranslatorX64::genericRefCountStubRegs(X64Assembler& a) {
1054   const PhysReg rData = argNumToRegName[0];
1055   const PhysReg rType = argNumToRegName[1];
1056
1057   moveToAlign(a);
1058   TCA retval = a.code.frontier;
1059   FreezeRegs brr(m_regMap);
1060
1061   // The frame here is needed for the same reason as in
1062   // genericRefCountStub.
1063   a.    pushr(rbp); // {
1064   a.    mov_reg64_reg64(rsp, rbp);
1065   {
1066     IfCountNotStatic ins(a, rData, KindOfInvalid);
1067     a.  sub_imm32_disp_reg32(1, TVOFF(_count), rData);
1068     {
1069       JccBlock<CC_NZ> ifZero(a);
1070       // The arguments are already in the right registers.
1071       RegSet s = kCallerSaved - (RegSet(rData) | RegSet(rType));
1072       PhysRegSaverParity<1> saver(a, s);
1073       if (false) { // typecheck
1074         RefData* vp = NULL; DataType dt = KindOfUninit;
1075         (void)tv_release_typed(vp, dt);
1076       }
1077       a.call(TCA(tv_release_typed));
1078     }
1079   }
1080   a.    popr(rbp); // }
1081   a.    ret();
1082   return retval;
1083 }
1084
1085 /*
1086  * Translation call targets. It is a lot easier, and a bit more
1087  * portable, to use C linkage from assembly.
1088  */
1089 TCA TranslatorX64::retranslate(SrcKey sk, bool align, bool useHHIR) {
1090   if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
1091     // We are about to translate something known to be blacklisted by
1092     // debugger, exit early
1093     SKTRACE(1, sk, "retranslate abort due to debugger\n");
1094     return NULL;
1095   }
1096   LeaseHolder writer(s_writeLease);
1097   if (!writer) return NULL;
1098   SKTRACE(1, sk, "retranslate\n");
1099   return translate(&sk, align, useHHIR);
1100 }
1101
1102 // Only use comes from HHIR's cgExitTrace() case TraceExitType::SlowNoProgress
1103 TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk,
1104                                            bool   align,
1105                                            TCA    toSmash) {
1106   if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
1107     // We are about to translate something known to be blacklisted by
1108     // debugger, exit early
1109     SKTRACE(1, sk, "retranslateAndPatchNoIR abort due to debugger\n");
1110     return NULL;
1111   }
1112   LeaseHolder writer(s_writeLease);
1113   if (!writer) return NULL;
1114   SKTRACE(1, sk, "retranslateAndPatchNoIR\n");
1115   SrcRec* srcRec = getSrcRec(sk);
1116   if (srcRec->translations().size() == SrcRec::kMaxTranslations + 1) {
1117     // we've gone over the translation limit and already have an anchor
1118     // translation that will interpret, so just return NULL and force
1119     // interpretation of this BB.
1120     return NULL;
1121   }
1122   TCA start = translate(&sk, align, false);
1123   if (start != NULL) {
1124     smash(getAsmFor(toSmash), toSmash, start);
1125   }
1126   return start;
1127 }
1128
1129 /*
1130  * Satisfy an alignment constraint. If we're in a reachable section
1131  * of code, bridge the gap with nops. Otherwise, int3's.
1132  */
1133 void
1134 TranslatorX64::moveToAlign(X64Assembler &aa,
1135                            const size_t align /* =kJmpTargetAlign */,
1136                            bool unreachable /* =true */) {
1137   using namespace HPHP::Util;
1138   SpaceRecorder sr("_Align", aa);
1139   ASSERT(isPowerOfTwo(align));
1140   size_t leftInBlock = align - ((align - 1) & uintptr_t(aa.code.frontier));
1141   if (leftInBlock == align) return;
1142   if (unreachable) {
1143     aa.emitInt3s(leftInBlock);
1144     return;
1145   }
1146   aa.emitNop(leftInBlock);
1147 }
1148
1149 /*
1150  * Req machinery. We sometimes emit code that is unable to proceed
1151  * without translator assistance; e.g., a basic block whose successor is
1152  * unknown. We leave one of these request arg blobs in m_data, and point
1153  * to it at callout-time.
1154  */
1155
1156 // REQ_BIND_CALL
1157 struct ReqBindCall {
1158   SrcKey m_sourceInstr;
1159   TCA m_toSmash;
1160   int m_nArgs;
1161   bool m_isImmutable; // call was to known func.
1162 } m_bindCall;
1163
1164 // ID to name mapping for tracing.
1165 static inline const char*
1166 reqName(int req) {
1167   static const char* reqNames[] = {
1168 #define REQ(nm) #nm,
1169     SERVICE_REQUESTS
1170 #undef REQ
1171   };
1172   return reqNames[req];
1173 }
1174
1175 /*
1176  * Find or create a translation for sk. Returns TCA of "best" current
1177  * translation. May return NULL if it is currently impossible to create
1178  * a translation.
1179  */
1180 TCA
1181 TranslatorX64::getTranslation(const SrcKey *sk, bool align,
1182                               bool forceNoHHIR /* = false */) {
1183   curFunc()->validate();
1184   SKTRACE(2, *sk, "getTranslation: curUnit %s funcId %llx offset %d\n",
1185           curUnit()->filepath()->data(),
1186           sk->m_funcId,
1187           sk->offset());
1188   SKTRACE(2, *sk, "   funcId: %llx\n",
1189           curFunc()->getFuncId());
1190   {
1191     if (curFrame()->hasVarEnv() && curFrame()->getVarEnv()->isGlobalScope()) {
1192       SKTRACE(2, *sk, "punting on pseudoMain\n");
1193       return NULL;
1194     }
1195     if (const SrcRec* sr = m_srcDB.find(*sk)) {
1196       TCA tca = sr->getTopTranslation();
1197       if (tca) {
1198         SKTRACE(2, *sk, "getTranslation: found %p\n", tca);
1199         return tca;
1200       }
1201     }
1202   }
1203
1204   /*
1205    * Try to become the writer. We delay this until we *know* we will have
1206    * a need to create new translations, instead of just trying to win the
1207    * lottery at the dawn of time. Hopefully lots of requests won't require
1208    * any new translation.
1209    */
1210   LeaseHolder writer(s_writeLease);
1211   if (!writer) return NULL;
1212   if (SrcRec* sr = m_srcDB.find(*sk)) {
1213     TCA tca = sr->getTopTranslation();
1214     if (tca) {
1215       // Handle extremely unlikely race; someone may have just already
1216       // added the first instance of this SrcRec while we did a
1217       // non-blocking wait on the write lease.
1218       return tca;
1219     } else {
1220       // Since we are holding the write lease, we know that sk is properly
1221       // initialized, except that it has no translations (due to
1222       // replaceOldTranslations)
1223       return retranslate(*sk, align,
1224                          RuntimeOption::EvalJitUseIR && !forceNoHHIR);
1225     }
1226   }
1227
1228   // We put retranslate requests at the end of our slab to more frequently
1229   //   allow conditional jump fall-throughs
1230
1231   TCA start = emitServiceReq(false, REQ_RETRANSLATE, 1, uint64_t(sk->offset()));
1232   SKTRACE(1, *sk, "inserting anchor translation for (%p,%d) at %p\n",
1233           curUnit(), sk->offset(), start);
1234   SrcRec* sr = m_srcDB.insert(*sk);
1235   sr->setFuncInfo(curFunc());
1236   sr->setAnchorTranslation(start);
1237
1238   addTranslation(TransRec(*sk, curUnit()->md5(), TransAnchor, 0, 0, start,
1239                           astubs.code.frontier - start));
1240
1241   ASSERT(getTransRec(start)->kind == TransAnchor);
1242
1243   return retranslate(*sk, align, RuntimeOption::EvalJitUseIR && !forceNoHHIR);
1244 }
1245
1246 TCA
1247 TranslatorX64::translate(const SrcKey *sk, bool align, bool useHHIR) {
1248   INC_TPC(translate);
1249   ASSERT(vmfp() >= vmsp());
1250   ASSERT(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
1251   ASSERT(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
1252
1253   if (useHHIR) {
1254     if (m_numHHIRTrans == RuntimeOption::EvalMaxHHIRTrans) {
1255       useHHIR = false;
1256       m_useHHIR = false;
1257       RuntimeOption::EvalJitUseIR = false;
1258     }
1259     hhirTraceStart(sk->offset());
1260   } else {
1261     ASSERT(m_useHHIR == false);
1262   }
1263
1264   Tracelet tlet;
1265   analyze(sk, tlet);
1266
1267   if (align) {
1268     moveToAlign(a);
1269   }
1270
1271   TCA start = a.code.frontier;
1272   translateTracelet(tlet);
1273   SKTRACE(1, *sk, "translate moved head from %p to %p\n",
1274           getTopTranslation(*sk), start);
1275   if (Trace::moduleEnabledRelease(tcdump, 1)) {
1276     static __thread int n;
1277     if (++n % 10000 == 0) {
1278       std::ofstream f("cfg.dot", std::ios_base::trunc);
1279       drawCFG(f);
1280       f.close();
1281     }
1282   }
1283   return start;
1284 }
1285
1286 /*
1287  * Returns true if a's current frontier can have an nBytes-long
1288  * instruction written without any risk of cache-tearing.
1289  */
1290 bool
1291 TranslatorX64::isSmashable(X64Assembler& a, int nBytes) {
1292   ASSERT(nBytes <= int(kX64CacheLineSize));
1293   static const uint64 kCacheMask = ~(uint64(kX64CacheLineSize) - 1);
1294   uintptr_t iFrontier = uintptr_t(a.code.frontier);
1295   uintptr_t lastByte = iFrontier + nBytes - 1;
1296   return (iFrontier & kCacheMask) == (lastByte & kCacheMask);
1297 }
1298
1299 void
1300 TranslatorX64::prepareForSmash(X64Assembler& a, int nBytes) {
1301   if (UNLIKELY(!isSmashable(a, nBytes))) {
1302     moveToAlign(a, kX64CacheLineSize, false);
1303   }
1304   ASSERT(isSmashable(a, nBytes));
1305 }
1306
1307 void
1308 TranslatorX64::prepareForSmash(int nBytes) {
1309   prepareForSmash(a, nBytes);
1310 }
1311
1312 void
1313 TranslatorX64::smash(X64Assembler &a, TCA src, TCA dest) {
1314   ASSERT(canWrite());
1315   TRACE(2, "smash: %p -> %p\n", src, dest);
1316   /*
1317    * !
1318    *
1319    * We are about to smash reachable code in the translation cache. A
1320    * hardware thread might be executing the very instruction we're
1321    * modifying. This is safe because:
1322    *
1323    *    1. We align smashable instructions so that they reside on a single
1324    *       cache line;
1325    *
1326    *    2. We modify the instruction with a single processor store; and
1327    *
1328    *    3. The smashed region contains only a single instruction in the
1329    *       orignal instruction stream (see jmp() -> emitJ32() -> bytes() in
1330    *       the assembler.
1331    */
1332   CodeCursor cg(a, src);
1333   ASSERT(isSmashable(a, kJmpLen));
1334   if (dest > src && dest - src <= 7) {
1335     a.    emitNop(dest - src);
1336   } else {
1337     a.    jmp(dest);
1338   }
1339 }
1340
1341 void TranslatorX64::protectCode() {
1342   mprotect(tx64->a.code.base, tx64->a.code.size, PROT_READ | PROT_EXEC);
1343
1344 }
1345
1346 void TranslatorX64::unprotectCode() {
1347   mprotect(tx64->a.code.base, tx64->a.code.size,
1348            PROT_READ | PROT_WRITE | PROT_EXEC);
1349 }
1350
1351 void
1352 TranslatorX64::emitStackCheck(int funcDepth, Offset pc) {
1353   uint64_t stackMask = cellsToBytes(RuntimeOption::EvalVMStackElms) - 1;
1354   a.    mov_reg64_reg64(rVmSp, rScratch); // copy to destroy
1355   a.    and_imm64_reg64(stackMask, rScratch);
1356   a.    sub_imm64_reg64(funcDepth + Stack::sSurprisePageSize, rScratch);
1357   ASSERT(m_stackOverflowHelper);
1358   a.    jl(m_stackOverflowHelper); // Unlikely branch to failure.
1359   // Success.
1360 }
1361
1362 // Tests the surprise flags for the current thread. Should be used
1363 // before a jnz to surprise handling code.
1364 void
1365 TranslatorX64::emitTestSurpriseFlags() {
1366   CT_ASSERT(sizeof(((RequestInjectionData*)0)->conditionFlags) == 8);
1367   a.test_imm64_disp_reg64(-1, TargetCache::kConditionFlagsOff, rVmTl);
1368 }
1369
1370 void
1371 TranslatorX64::emitCheckSurpriseFlagsEnter(bool inTracelet, Offset pcOff,
1372                                            Offset stackOff) {
1373   emitTestSurpriseFlags();
1374   {
1375     UnlikelyIfBlock<CC_NZ> ifTracer(a, astubs);
1376     if (false) { // typecheck
1377       const ActRec* ar = NULL;
1378       EventHook::FunctionEnter(ar, 0);
1379     }
1380     astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
1381     CT_ASSERT(EventHook::NormalFunc == 0);
1382     astubs.xor_reg32_reg32(argNumToRegName[1], argNumToRegName[1]);
1383     emitCall(astubs, (TCA)&EventHook::FunctionEnter);
1384     if (inTracelet) {
1385       recordSyncPoint(astubs, pcOff, stackOff);
1386     } else {
1387       // If we're being called while generating a func prologue, we
1388       // have to record the fixup directly in the fixup map instead of
1389       // going through m_pendingFixups like normal.
1390       m_fixupMap.recordFixup(astubs.code.frontier, Fixup(pcOff, stackOff));
1391     }
1392   }
1393 }
1394
1395 void
1396 TranslatorX64::setArgInActRec(ActRec* ar, int argNum, uint64_t datum,
1397                               DataType t) {
1398   TypedValue* tv =
1399     (TypedValue*)(uintptr_t(ar) - (argNum+1) * sizeof(TypedValue));
1400   tv->m_data.num = datum;
1401   tv->m_type = t;
1402 }
1403
1404 int
1405 TranslatorX64::shuffleArgsForMagicCall(ActRec* ar) {
1406   if (!ar->hasInvName()) {
1407     return 0;
1408   }
1409   const Func* f UNUSED = ar->m_func;
1410   f->validate();
1411   ASSERT(f->name()->isame(s___call.get())
1412          || f->name()->isame(s___callStatic.get()));
1413   ASSERT(f->numParams() == 2);
1414   TRACE(1, "shuffleArgsForMagicCall: ar %p\n", ar);
1415   ASSERT(ar->hasInvName());
1416   StringData* invName = ar->getInvName();
1417   ASSERT(invName);
1418   ar->setVarEnv(NULL);
1419   int nargs = ar->numArgs();
1420   // We need to make an array containing all the arguments passed by the
1421   // caller and put it where the second argument is
1422   HphpArray* argArray = NEW(HphpArray)(nargs);
1423   argArray->incRefCount();
1424   for (int i = 0; i < nargs; ++i) {
1425     TypedValue* tv =
1426       (TypedValue*)(uintptr_t(ar) - (i+1) * sizeof(TypedValue));
1427     argArray->nvAppend(tv, false);
1428     tvRefcountedDecRef(tv);
1429   }
1430   // Put invName in the slot for first argument
1431   setArgInActRec(ar, 0, uint64_t(invName), BitwiseKindOfString);
1432   // Put argArray in the slot for second argument
1433   setArgInActRec(ar, 1, uint64_t(argArray), KindOfArray);
1434   // Fix up ActRec's numArgs
1435   ar->initNumArgs(2);
1436   return 1;
1437 }
1438
1439 /*
1440  * The standard VMRegAnchor treatment won't work for some cases called
1441  * during function preludes.
1442  *
1443  * The fp sync machinery is fundamentally based on the notion that
1444  * instruction pointers in the TC are uniquely associated with source
1445  * HHBC instructions, and that source HHBC instructions are in turn
1446  * uniquely associated with SP->FP deltas.
1447  *
1448  * run_intercept_helper/trimExtraArgs is called from the prologue of
1449  * the callee. The prologue is 1) still in the caller frame for now,
1450  * and 2) shared across multiple call sites. 1 means that we have the
1451  * fp from the caller's frame, and 2 means that this fp is not enough
1452  * to figure out sp.
1453  *
1454  * However, the prologue passes us the callee actRec, whose predecessor
1455  * has to be the caller. So we can sync sp and fp by ourselves here.
1456  * Geronimo!
1457  */
1458 static void sync_regstate_to_caller(ActRec* preLive) {
1459   ASSERT(tl_regState == REGSTATE_DIRTY);
1460   vmfp() = (TypedValue*)preLive->m_savedRbp;
1461   vmsp() = (TypedValue*)preLive - preLive->numArgs();
1462   if (ActRec* fp = g_vmContext->m_fp) {
1463     if (fp->m_func && fp->m_func->unit()) {
1464       vmpc() = fp->m_func->unit()->at(fp->m_func->base() + preLive->m_soff);
1465     }
1466   }
1467   tl_regState = REGSTATE_CLEAN;
1468 }
1469
1470 static uint64 run_intercept_helper(ActRec* ar, Variant* ihandler) {
1471   sync_regstate_to_caller(ar);
1472   bool ret = run_intercept_handler<true>(ar, ihandler);
1473   /*
1474    * Restore tl_regState manually in the no-exception case only.  (The
1475    * VM regs are clean here---we only need to set them dirty if we are
1476    * stopping to execute in the TC again, which we won't be doing if
1477    * an exception is propagating.)
1478    */
1479   tl_regState = REGSTATE_DIRTY;
1480   return ret;
1481 }
1482
1483 void
1484 TranslatorX64::trimExtraArgs(ActRec* ar) {
1485   ASSERT(!ar->hasInvName());
1486
1487   sync_regstate_to_caller(ar);
1488   const Func* f = ar->m_func;
1489   int numParams = f->numParams();
1490   int numArgs = ar->numArgs();
1491   ASSERT(numArgs > numParams);
1492   int numExtra = numArgs - numParams;
1493
1494   TRACE(1, "trimExtraArgs: %d args, function %s takes only %d, ar %p\n",
1495         numArgs, f->name()->data(), numParams, ar);
1496
1497   if (f->attrs() & AttrMayUseVV) {
1498     ASSERT(!ar->hasExtraArgs());
1499     ar->setExtraArgs(ExtraArgs::allocateCopy(
1500       (TypedValue*)(uintptr_t(ar) - numArgs * sizeof(TypedValue)),
1501       numArgs - numParams));
1502   } else {
1503     // Function is not marked as "MayUseVV", so discard the extra arguments
1504     TypedValue* tv = (TypedValue*)(uintptr_t(ar) - numArgs*sizeof(TypedValue));
1505     for (int i = 0; i < numExtra; ++i) {
1506       tvRefcountedDecRef(tv);
1507       ++tv;
1508     }
1509     ar->setNumArgs(numParams);
1510   }
1511
1512   // Only go back to dirty in a non-exception case.  (Same reason as
1513   // above.)
1514   tl_regState = REGSTATE_DIRTY;
1515 }
1516
1517 TCA
1518 TranslatorX64::getInterceptHelper() {
1519   if (false) {  // typecheck
1520     Variant *h = get_intercept_handler(CStrRef((StringData*)NULL),
1521                                        (char*)NULL);
1522     bool c UNUSED = run_intercept_helper((ActRec*)NULL, h);
1523   }
1524   if (!m_interceptHelper) {
1525     m_interceptHelper = TCA(astubs.code.frontier);
1526     astubs.    load_reg64_disp_reg64(rStashedAR, AROFF(m_func),
1527                                      rax);
1528     astubs.    lea_reg64_disp_reg64(rax, Func::fullNameOff(),
1529                                     argNumToRegName[0]);
1530
1531     astubs.    lea_reg64_disp_reg64(rax, Func::maybeInterceptedOff(),
1532                                     argNumToRegName[1]);
1533
1534     astubs.    call(TCA(get_intercept_handler));
1535     astubs.    test_reg64_reg64(rax, rax);
1536     {
1537       JccBlock<CC_NZ> ifNotIntercepted(astubs);
1538       astubs.  ret();
1539     }
1540
1541     // we might re-enter, so align the stack
1542     astubs.    sub_imm32_reg64(8, rsp);
1543     // Copy the old rbp into the savedRbp pointer.
1544     astubs.    store_reg64_disp_reg64(rbp, 0, rStashedAR);
1545
1546     PhysReg rSavedRip = r13; // XXX ideally don't hardcode r13 ... but
1547                              // we need callee-saved and don't have
1548                              // any scratch ones.
1549
1550     // Fish out the saved rip. We may need to jump there, and the helper will
1551     // have wiped out the ActRec.
1552     astubs.    load_reg64_disp_reg64(rStashedAR, AROFF(m_savedRip),
1553                                      rSavedRip);
1554     astubs.    mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1555     astubs.    mov_reg64_reg64(rax, argNumToRegName[1]);
1556     astubs.    call(TCA(run_intercept_helper));
1557
1558     // Normally we'd like to recordReentrantCall here, but the vmreg sync'ing
1559     // for run_intercept_handler is a special little snowflake. See
1560     // run_intercept_handler for details.
1561     astubs.    test_reg64_reg64(rax, rax);
1562     {
1563       // If the helper returned false, don't execute this function. The helper
1564       // will have cleaned up the interceptee's arguments and AR, and pushed
1565       // the handler's return value; we now need to get out.
1566       //
1567       // We don't need to touch rVmFp; it's still pointing to the caller of
1568       // the interceptee. We need to adjust rVmSp. Then we need to jump to the
1569       // saved rip from the interceptee's ActRec.
1570       JccBlock<CC_NZ> ifDontEnterFunction(astubs);
1571       astubs.  add_imm32_reg64(16, rsp);
1572       astubs.  lea_reg64_disp_reg64(rStashedAR, AROFF(m_r), rVmSp);
1573       astubs.  jmp_reg(rSavedRip);
1574     }
1575     astubs.    add_imm32_reg64(8, rsp);
1576     astubs.    ret();
1577   }
1578   return m_interceptHelper;
1579 }
1580
1581 TCA
1582 TranslatorX64::emitPrologueRedispatch(X64Assembler& a) {
1583   TCA retval;
1584   moveToAlign(a);
1585   retval = a.code.frontier;
1586   // We're in the wrong func prologue. By convention with emitFuncGuard,
1587   // rax contains the function we need to enter.
1588
1589   ASSERT(kScratchCrossTraceRegs.contains(rax));
1590   ASSERT(kScratchCrossTraceRegs.contains(rdx));
1591   ASSERT(kScratchCrossTraceRegs.contains(rcx));
1592
1593   // We don't know how many params we were invoked with. Infer it from
1594   // the stack and rStashedAR rather than reading it from the actrec.
1595   //
1596   //    mov %r15, %rdx
1597   //    ld  m_numParams(%rax), %ecx #ecx: targetFunc->numParams
1598   //    sub %rbx, %rdx #edx: n_args
1599   //    shr $4, rdx
1600   a.    mov_reg64_reg64(rStashedAR, rdx);
1601   a.    load_reg64_disp_reg32(rax, Func::numParamsOff(), rcx);
1602   a.    sub_reg64_reg64(rVmSp, rdx);
1603   BOOST_STATIC_ASSERT(sizeof(TypedValue) == 16);
1604   a.    shr_imm32_reg32(4, rdx); // rdx: numPassed
1605
1606   // If we didn't pass too many args, directly dereference
1607   // func->m_prologues.
1608   a.    cmp_reg32_reg32(rdx, rcx);
1609   TCA bToFixedProloguesCheck = a.code.frontier;
1610   a.    jcc8(CC_L, bToFixedProloguesCheck);
1611
1612   //   cmp $kNumFixedPrologues, %rdx
1613   //   jl numParamsCheck
1614   TCA actualDispatch = a.code.frontier;
1615
1616   // rcx: prologueIdx
1617   // rax = func->prologues[numParams]
1618   // jmp rax
1619   a.    load_reg64_disp_index_reg64(rax,
1620                                     Func::prologueTableOff(),
1621                                     rdx,
1622                                     rax);
1623   a.    jmp_reg(rax);
1624   a.    ud2();
1625
1626   // Hmm, more parameters passed than the function expected. Did we pass
1627   // kNumFixedPrologues or more? If not, %rdx is still a perfectly
1628   // legitimate index into the func prologue table.
1629   // numParamsCheck:
1630   //    cmp $kNumFixedPrologues, %rcx
1631   //    jl  dispatch
1632   a.patchJcc8(bToFixedProloguesCheck, a.code.frontier); // numParamsCheck:
1633   a.    cmp_imm32_reg32(kNumFixedPrologues, rdx);
1634   a.    jcc8(CC_L, actualDispatch);
1635
1636   // Too many gosh-darned parameters passed. Go to numExpected + 1, which
1637   // is always a "too many params" entry point.
1638   //
1639   //    mov %rdx, %rcx
1640   //    add $1, %rcx
1641   //    jmp dispatch
1642   a.    load_reg64_disp_index_reg64(rax,
1643                                     // %rcx + 1
1644                                     Func::prologueTableOff() + sizeof(TCA),
1645                                     rcx,
1646                                     rax);
1647   a.    jmp_reg(rax);
1648   a.    ud2();
1649   return retval;
1650 }
1651
1652 // The funcGuard gets skipped and patched by other code, so we have some
1653 // magic offsets.
1654 static const int kFuncMovImm = 6; // Offset to the immediate for expected func
1655 static const int kFuncGuardLen = 23;
1656
1657 template<typename T>
1658 static T*
1659 funcGuardToFuncImm(TCA funcGuard) {
1660   T* retval = (T*)(funcGuard + kFuncMovImm + (2 - sizeof(T)/4));
1661   // We padded these so the immediate would fit inside an aligned 8 byte region
1662   // so the xor of the address of the first byte, with the address of the last
1663   // byte should only be non zero in the bottom 3 bits.
1664   ASSERT(((uintptr_t(retval) ^ (uintptr_t(retval + 1) - 1)) & ~7) == 0);
1665   return retval;
1666 }
1667
1668 static inline bool
1669 funcGuardIsForFunc(TCA funcGuard, const Func* func) {
1670   intptr_t iptr = uintptr_t(func);
1671   if (deltaFits(iptr, sz::dword)) {
1672     return *funcGuardToFuncImm<int32_t>(funcGuard) == iptr;
1673   }
1674   return *funcGuardToFuncImm<int64_t>(funcGuard) == iptr;
1675 }
1676
1677 static void
1678 disableFuncGuard(TCA funcGuard, Func* func) {
1679   ASSERT(funcGuardIsForFunc(funcGuard, func));
1680   if (deltaFits((intptr_t)func, sz::dword)) {
1681     *funcGuardToFuncImm<int32_t>(funcGuard) = 0;
1682   } else {
1683     *funcGuardToFuncImm<int64_t>(funcGuard) = 0;
1684   }
1685   ASSERT(!funcGuardIsForFunc(funcGuard, func));
1686 }
1687
1688 TCA
1689 TranslatorX64::emitFuncGuard(X64Assembler& a, const Func* func) {
1690   ASSERT(kScratchCrossTraceRegs.contains(rax));
1691   ASSERT(kScratchCrossTraceRegs.contains(rdx));
1692
1693   // Ensure the immediate is safely smashable; the immediate needs
1694   // to be at a qword boundary, so we need to start the movImm at
1695   // (kAlign - kFuncMovImm) % 8.
1696   static const int kAlign = 8;
1697   static const int kAlignMask = kAlign - 1;
1698   int loBits = uintptr_t(a.code.frontier) & kAlignMask;
1699   a.emitNop(((kAlign - kFuncMovImm) - loBits) & kAlignMask);
1700   ASSERT((uintptr_t(a.code.frontier) & kAlignMask) == kAlign - kFuncMovImm);
1701   TCA aStart = a.code.frontier;
1702   a.    load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
1703   ASSERT((a.code.frontier - aStart) ==
1704          (kFuncMovImm - 2 /* rex + movimmOpcode */));
1705   a.    mov_imm64_reg(uint64_t(func), rdx);
1706   a.    cmp_reg64_reg64(rax, rdx);
1707
1708   if (!m_funcPrologueRedispatch) {
1709     m_funcPrologueRedispatch = emitPrologueRedispatch(astubs);
1710   }
1711   a.    jnz(m_funcPrologueRedispatch);
1712   ASSERT(a.code.frontier - aStart <= kFuncGuardLen);
1713   a.emitNop(kFuncGuardLen - (a.code.frontier - aStart));
1714   ASSERT(a.code.frontier - aStart == kFuncGuardLen);
1715   return aStart;
1716 }
1717
1718 TCA
1719 skipFuncCheck(TCA dest) {
1720   if (!dest || dest == (TCA)fcallHelperThunk) return dest;
1721   return dest + kFuncGuardLen;
1722 }
1723
1724 /*
1725  * funcPrologue --
1726  *
1727  * Given a callee and a number of args, match up to the callee's
1728  * argument expectations and dispatch.
1729  *
1730  * Call/return hand-shaking is a bit funny initially. At translation time,
1731  * we don't necessarily know what function we're calling. For instance,
1732  *
1733  *   f(g());
1734  *
1735  * Will lead to a set of basic blocks like:
1736  *
1737  * b1: pushfuncd "f"
1738  *     pushfuncd "g"
1739  *     fcall
1740  * b2: fcall
1741  *
1742  * The fcallc labelled "b2" above is not statically bindable in our
1743  * execution model.
1744  *
1745  * We decouple the call work into a per-callsite portion, responsible
1746  * for recording the return address, and a per-(callee, numArgs) portion,
1747  * responsible for fixing up arguments and dispatching to remaining
1748  * code. We call the per-callee portion a "prologue."
1749  *
1750  * Also, we are called from two distinct environments. From REQ_BIND_CALL,
1751  * we're running "between" basic blocks, with all VM registers sync'ed.
1752  * However, we're also called in the middle of basic blocks, when dropping
1753  * entries into func->m_prologues. So don't go around using the
1754  * translation-time values of vmfp()/vmsp(), since they have an
1755  * unpredictable relationship to the source.
1756  */
1757 bool
1758 TranslatorX64::checkCachedPrologue(const Func* func, int paramIdx,
1759                                    TCA& prologue) const {
1760   prologue = (TCA)func->getPrologue(paramIdx);
1761   if (prologue != (TCA)fcallHelperThunk && !s_replaceInFlight) {
1762     TRACE(1, "cached prologue %s(%d) -> cached %p\n",
1763           func->fullName()->data(), paramIdx, prologue);
1764     ASSERT(isValidCodeAddress(prologue));
1765     return true;
1766   }
1767   return false;
1768 }
1769
1770 TCA
1771 TranslatorX64::funcPrologue(Func* func, int nPassed) {
1772   func->validate();
1773   TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
1774   int numParams = func->numParams();
1775   int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
1776
1777   bool funcIsMagic = func->isMagic();
1778
1779   // Do a quick test before grabbing the write lease
1780   TCA prologue;
1781   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1782
1783   // If the translator is getting replaced out from under us, refuse to
1784   // provide a prologue; we don't know whether this request is running on the
1785   // old or new context.
1786   LeaseHolder writer(s_writeLease);
1787   if (!writer || s_replaceInFlight) return NULL;
1788   // Double check the prologue array now that we have the write lease
1789   // in case another thread snuck in and set the prologue already.
1790   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1791
1792   SpaceRecorder sr("_FuncPrologue", a);
1793   // Careful: this isn't necessarily the real entry point. For funcIsMagic
1794   // prologues, this is just a possible prologue.
1795   TCA aStart    = a.code.frontier;
1796   TCA start     = aStart;
1797   TCA stubStart = astubs.code.frontier;
1798
1799   // Guard: we're in the right callee. This happens in magicStart for
1800   // magic callees.
1801   if (!funcIsMagic) {
1802     start = aStart = emitFuncGuard(a, func);
1803   }
1804
1805   emitRB(a, RBTypeFuncPrologueTry, func->fullName()->data());
1806   // Guard: we have stack enough stack space to complete this function.
1807   emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
1808
1809   // NB: We have most of the register file to play with, since we know
1810   // we're between BB's. So, we hardcode some registers here rather
1811   // than using the scratch allocator.
1812   TRACE(2, "funcPrologue: user function: %s\n", func->name()->data());
1813
1814   // Add a counter for the translation if requested
1815   if (RuntimeOption::EvalJitTransCounters) {
1816     emitTransCounterInc(a);
1817   }
1818
1819   if (!funcIsMagic) {
1820     // entry point for magic methods comes later
1821     emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1822   }
1823
1824   SrcKey skFuncBody = emitPrologue(func, nPassed);
1825
1826   if (funcIsMagic) {
1827     // entry points for magic methods is here
1828     TCA magicStart = emitFuncGuard(a, func);
1829     ASSERT(numParams == 2);
1830     emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1831     // Special __call prologue
1832     a.  mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1833     emitCall(a, TCA(TranslatorX64::shuffleArgsForMagicCall));
1834     // if shuffleArgs returns 0, that means this was not a magic call
1835     // and we should proceed to a prologue specialized for nPassed;
1836     // otherwise, proceed to a prologue specialized for nPassed==numParams (2).
1837     if (nPassed == 2) {
1838       a.jmp(start);
1839     } else {
1840       a.test_reg64_reg64(rax, rax);
1841       // z ==> not a magic call, go to prologue for nPassed
1842       if (deltaFits(start - (a.code.frontier + kJcc8Len), sz::byte)) {
1843         a.jcc8(CC_Z, start);
1844       } else {
1845         a.jcc(CC_Z, start);
1846       }
1847       // this was a magic call
1848       // nPassed == 2
1849       // Fix up hardware stack pointer
1850       nPassed = 2;
1851       a.  lea_reg64_disp_reg64(rStashedAR, -cellsToBytes(nPassed), rVmSp);
1852       // Optimization TODO: Reuse the prologue for args == 2
1853       emitPrologue(func, nPassed);
1854     }
1855     start = magicStart;
1856   }
1857   ASSERT(funcGuardIsForFunc(start, func));
1858   TRACE(2, "funcPrologue tx64 %p %s(%d) setting prologue %p\n",
1859         this, func->fullName()->data(), nPassed, start);
1860   ASSERT(isValidCodeAddress(start));
1861   func->setPrologue(paramIndex, start);
1862
1863   addTranslation(TransRec(skFuncBody, func->unit()->md5(),
1864                           TransProlog, aStart, a.code.frontier - aStart,
1865                           stubStart, astubs.code.frontier - stubStart));
1866
1867   recordGdbTranslation(skFuncBody, func->unit(),
1868                        a, aStart,
1869                        false, true);
1870   recordBCInstr(OpFuncPrologue, a, start);
1871
1872   return start;
1873 }
1874
1875 static TCA callAndResume(ActRec *ar) {
1876   VMRegAnchor _(ar,true);
1877   g_vmContext->doFCall<true>(ar, g_vmContext->m_pc);
1878   return Translator::Get()->getResumeHelper();
1879 }
1880
1881 extern "C"
1882 TCA fcallHelper(ActRec* ar) {
1883   TCA tca =
1884     Translator::Get()->funcPrologue((Func*)ar->m_func, ar->numArgs());
1885   if (tca) {
1886     return tca;
1887   }
1888   return callAndResume(ar);
1889 }
1890
1891 TCA
1892 TranslatorX64::emitInterceptPrologue(Func* func, TCA next) {
1893   TCA start = a.code.frontier;
1894   a.mov_imm64_reg((uintptr_t)&func->maybeIntercepted(), rax);
1895   a.cmp_imm8_disp_reg8(0, 0, rax);
1896   TCA jcc8PatchAddr = NULL;
1897   if (next == NULL) {
1898     jcc8PatchAddr = a.code.frontier;
1899     a.jcc8(CC_E, jcc8PatchAddr);
1900   } else {
1901     a.jcc(CC_E, next);
1902   }
1903   // Prologues are not really sites for function entry yet; we can get
1904   // here via an optimistic bindCall. Check that the func is as expected.
1905
1906   a.    mov_imm64_reg(uint64_t(func), rax);
1907   a.    cmp_reg64_disp_reg64(rax, AROFF(m_func), rStashedAR);
1908   {
1909     JccBlock<CC_NZ> skip(a);
1910     a.call(getInterceptHelper());
1911   }
1912   if (jcc8PatchAddr != NULL) {
1913     a.patchJcc8(jcc8PatchAddr, a.code.frontier);
1914   }
1915   return start;
1916 }
1917
1918 void
1919 TranslatorX64::interceptPrologues(Func* func) {
1920   if (!RuntimeOption::EvalJitEnableRenameFunction &&
1921       !(func->attrs() & AttrDynamicInvoke)) {
1922     return;
1923   }
1924   if (func->maybeIntercepted() == -1) {
1925     return;
1926   }
1927   func->maybeIntercepted() = -1;
1928   ASSERT(s_writeLease.amOwner());
1929   int maxNumPrologues = func->numPrologues();
1930   for (int i = 0; i < maxNumPrologues; i++) {
1931     TCA prologue = func->getPrologue(i);
1932     if (prologue == (unsigned char*)fcallHelperThunk)
1933       continue;
1934     ASSERT(funcGuardIsForFunc(prologue, func));
1935     // There might already be calls hard-coded to this via FCall.
1936     // blow away immediate comparison, so that we always use the Func*'s
1937     // prologue table. We use 0 (== NULL on our architecture) as the bit
1938     // pattern for an impossible Func.
1939     //
1940     // Note that we're modifying reachable code.
1941     disableFuncGuard(prologue, func);
1942     ASSERT(funcGuardIsForFunc(prologue, NULL));
1943
1944     // There's a prologue already generated; redirect it to first
1945     // call the intercept helper. First, reset it (leaking the old
1946     // prologue), so funcPrologue will re-emit it.
1947     func->setPrologue(i, (TCA)fcallHelperThunk);
1948     TCA addr = funcPrologue(func, i);
1949     ASSERT(funcGuardIsForFunc(addr, func));
1950     ASSERT(addr);
1951     func->setPrologue(i, addr);
1952     TRACE(1, "interceptPrologues %s prologue[%d]=%p\n",
1953           func->fullName()->data(), i, (void*)addr);
1954   }
1955 }
1956
1957 SrcKey
1958 TranslatorX64::emitPrologue(Func* func, int nPassed) {
1959   int numParams = func->numParams();
1960   ASSERT(IMPLIES(func->maybeIntercepted() == -1,
1961                  m_interceptsEnabled));
1962   if (m_interceptsEnabled &&
1963       !func->isPseudoMain() &&
1964       (RuntimeOption::EvalJitEnableRenameFunction ||
1965        func->attrs() & AttrDynamicInvoke)) {
1966     emitInterceptPrologue(func);
1967   }
1968
1969   Offset dvInitializer = InvalidAbsoluteOffset;
1970
1971   if (nPassed > numParams) {
1972     // Too many args; a weird case, so just callout. Stash ar
1973     // somewhere callee-saved.
1974     if (false) { // typecheck
1975       TranslatorX64::trimExtraArgs((ActRec*)NULL);
1976     }
1977     a.  mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1978     emitCall(a, TCA(TranslatorX64::trimExtraArgs));
1979     // We'll fix rVmSp below.
1980   } else if (nPassed < numParams) {
1981     // Figure out which, if any, default value initializer to go to
1982     for (int i = nPassed; i < numParams; ++i) {
1983       const Func::ParamInfo& pi = func->params()[i];
1984       if (pi.hasDefaultValue()) {
1985         dvInitializer = pi.funcletOff();
1986         break;
1987       }
1988     }
1989     TRACE(1, "Only have %d of %d args; getting dvFunclet\n",
1990           nPassed, numParams);
1991     emitImmReg(a, nPassed, rax);
1992     // do { *(--rVmSp) = NULL; nPassed++; } while (nPassed < numParams);
1993     // This should be an unusual case, so optimize for code density
1994     // rather than execution speed; i.e., don't unroll the loop.
1995     TCA loopTop = a.code.frontier;
1996     a.  sub_imm32_reg64(sizeof(Cell), rVmSp);
1997     a.  add_imm32_reg32(1, rax);
1998     // XXX "missing argument" warnings need to go here
1999     emitStoreUninitNull(a, 0, rVmSp);
2000     a.  cmp_imm32_reg32(numParams, rax);
2001     a.  jcc8(CC_L, loopTop);
2002   }
2003
2004   // Entry point for numParams == nPassed is here.
2005   // Args are kosher. Frame linkage: set fp = ar.
2006   a.    mov_reg64_reg64(rStashedAR, rVmFp);
2007
2008   // We're in the callee frame; initialize locals. Unroll the loop all
2009   // the way if there are a modest number of locals to update;
2010   // otherwise, do it in a compact loop. If we're in a generator body,
2011   // named locals will be initialized by UnpackCont so we can leave
2012   // them alone here.
2013   int uninitLimit = func->isGenerator() ? func->numNamedLocals() : numParams;
2014   int numUninitLocals = func->numLocals() - uninitLimit;
2015   ASSERT(numUninitLocals >= 0);
2016   if (numUninitLocals > 0) {
2017     SpaceRecorder sr("_InitializeLocals", a);
2018
2019     // If there are too many locals, then emitting a loop to initialize locals
2020     // is more compact, rather than emitting a slew of movs inline.
2021     if (numUninitLocals > kLocalsToInitializeInline) {
2022       PhysReg loopReg = rcx;
2023
2024       // rVmFp + rcx points to the count/type fields of the TypedValue we're
2025       // about to write to.
2026       int loopStart = -func->numLocals() * sizeof(TypedValue)
2027         + TVOFF(_count);
2028       int loopEnd = -uninitLimit * sizeof(TypedValue)
2029         + TVOFF(_count);
2030
2031       emitImmReg(a, loopStart, loopReg);
2032       emitImmReg(a, 0, rdx);
2033
2034       TCA topOfLoop = a.code.frontier;
2035       // do {
2036       //   rVmFp[rcx].m_type = KindOfUninit;
2037       // } while(++rcx != loopEnd);
2038
2039       //  mov %rdx, 0x0(%rVmFp, %rcx, 1)
2040       a.  emitRM(instr_mov, rVmFp, loopReg, 1, 0, rdx);
2041       a.  add_imm32_reg64(sizeof(Cell), loopReg);
2042       a.  cmp_imm32_reg64(loopEnd, loopReg);
2043       a.  jcc8(CC_NE, topOfLoop);
2044     } else {
2045       PhysReg base;
2046       int disp, k;
2047       for (k = uninitLimit; k < func->numLocals(); ++k) {
2048         locToRegDisp(Location(Location::Local, k), &base, &disp);
2049         emitStoreUninitNull(a, disp, base);
2050       }
2051     }
2052   }
2053
2054   // Move rVmSp to the right place: just past all locals
2055   int frameCells = func->numSlotsInFrame();
2056   a.   lea_reg64_disp_reg64(rVmFp, -cellsToBytes(frameCells), rVmSp);
2057   const Opcode* destPC = func->unit()->entry() + func->base();
2058   if (dvInitializer != InvalidAbsoluteOffset) {
2059     // dispatch to funclet.
2060     destPC = func->unit()->entry() + dvInitializer;
2061   }
2062   SrcKey funcBody(func, destPC);
2063
2064   // Check surprise flags in the same place as the interpreter: after
2065   // setting up the callee's frame but before executing any of its
2066   // code
2067   emitCheckSurpriseFlagsEnter(false, funcBody.m_offset - func->base(),
2068                               frameCells);
2069
2070   emitBindJmp(funcBody);
2071   return funcBody;
2072 }
2073
2074 void
2075 TranslatorX64::emitBindCall(const Tracelet& t,
2076                             const NormalizedInstruction &ni,
2077                             Offset atCall, Offset afterCall) {
2078   int numArgs = ni.imm[0].u_IVA;
2079
2080   // If this is a call to a builtin and we don't need any argument
2081   // munging, we can skip the prologue system and do it inline.
2082   if (ni.funcd && ni.funcd->isBuiltin() &&
2083       numArgs == ni.funcd->numParams()) {
2084     ASSERT(ni.funcd->numLocals() == ni.funcd->numParams());
2085     ASSERT(ni.funcd->numIterators() == 0);
2086     a.  lea_reg64_disp_reg64(rVmSp, cellsToBytes(numArgs), rVmFp);
2087     emitCheckSurpriseFlagsEnter(true, 0, numArgs);
2088     // rVmSp is already correctly adjusted, because there's no locals
2089     // other than the arguments passed.
2090     return emitNativeImpl(ni.funcd, false /* don't jump to return */);
2091   }
2092
2093   // Stash callee's rVmFp into rStashedAR for the callee's prologue
2094   a.    lea_reg64_disp_reg64(rVmSp, cellsToBytes(numArgs), rStashedAR);
2095   emitBindCallHelper(rStashedAR, ni.source, ni.funcd, numArgs, (bool)ni.funcd);
2096   return;
2097 }
2098
2099 void
2100 TranslatorX64::emitBindCallHelper(register_name_t stashedAR,
2101                                   SrcKey srcKey,
2102                                   const Func* funcd,
2103                                   int numArgs,
2104                                   bool isImmutable) {
2105   // Whatever prologue we're branching to will check at runtime that we
2106   // went to the right Func*, correcting if necessary. We treat the first
2107   // Func we encounter as a decent prediction. Make space to burn in a
2108   // TCA.
2109   ReqBindCall* req = m_globalData.alloc<ReqBindCall>();
2110   a.    mov_reg64_reg64(rStashedAR, serviceReqArgRegs[1]);
2111   prepareForSmash(kJmpLen);
2112   TCA toSmash = a.code.frontier;
2113   a.    jmp(emitServiceReq(false, REQ_BIND_CALL, 1ull, req));
2114
2115   TRACE(1, "will bind static call: tca %p, this %p, funcd %p\n",
2116         toSmash, this, funcd);
2117   req->m_toSmash = toSmash;
2118   req->m_nArgs = numArgs;
2119   req->m_sourceInstr = srcKey;
2120   req->m_isImmutable = isImmutable;
2121
2122   return;
2123 }
2124
2125 // for documentation see bindJmpccFirst below
2126 void
2127 TranslatorX64::emitCondJmp(const SrcKey &skTaken, const SrcKey &skNotTaken,
2128                            ConditionCode cc) {
2129   // should be true for SrcKeys generated via OpJmpZ/OpJmpNZ
2130   ASSERT(skTaken.m_funcId == skNotTaken.m_funcId);
2131
2132   // reserve space for a smashable jnz/jmp pair; both initially point
2133   // to our stub
2134   prepareForSmash(kJmpLen + kJmpccLen);
2135   TCA old = a.code.frontier;
2136
2137   moveToAlign(astubs);
2138   TCA stub = astubs.code.frontier;
2139
2140   // begin code for the stub
2141
2142   // We need to be careful here, as we are passing an extra paramter to
2143   //   REQ_BIND_JMPCC_FIRST. However we can't pass this parameter via
2144   //   emitServiceReq because that only supports constants/immediates, so
2145   //   compute the last argument via setcc.
2146   astubs.setcc(cc, serviceReqArgRegs[4]);
2147   emitServiceReq(false /* align */, REQ_BIND_JMPCC_FIRST, 4ull,
2148                  old,
2149                  uint64_t(skTaken.offset()),
2150                  uint64_t(skNotTaken.offset()),
2151                  uint64_t(cc));
2152
2153   a.jcc(cc, stub); // MUST use 4-byte immediate form
2154   a.jmp(stub); // MUST use 4-byte immediate form
2155 }
2156
2157 static void skToName(const SrcKey& sk, char* name) {
2158   sprintf(name, "sk_%08lx_%05d",
2159           long(sk.m_funcId), sk.offset());
2160 }
2161
2162 static void skToClusterName(const SrcKey& sk, char* name) {
2163   sprintf(name, "skCluster_%08lx_%05d",
2164           long(sk.m_funcId), sk.offset());
2165 }
2166
2167 static void translToName(const TCA tca, char* name) {
2168   sprintf(name, "tc_%p", tca);
2169 }
2170
2171 void TranslatorX64::drawCFG(std::ofstream& out) const {
2172   if (!isTransDBEnabled()) return;
2173   const char* indent = "    ";
2174   static int genCount;
2175   int numSrcKeys = 0;
2176   int numTranslations = 0;
2177   out << "digraph srcdb" << genCount++ <<" {\n";
2178   out << indent << "size = \"8,11\";\n";
2179   out << indent << "ratio = fill;\n";
2180   for (SrcDB::const_iterator entry = m_srcDB.begin();
2181        entry != m_srcDB.end(); ++entry) {
2182     const SrcKey sk = SrcKey::fromAtomicInt(entry->first);
2183     // 1 subgraph per srcKey.
2184     char name[64];
2185     skToClusterName(sk, name);
2186     numSrcKeys++;
2187     out << indent << "subgraph " << name << "{\n";
2188     char* indent = "        ";
2189     skToName(sk, name);
2190     out << indent << name << "[shape=box];\n";
2191     const vector<TCA>& transls = entry->second->translations();
2192     for (vector<TCA>::const_iterator t = transls.begin(); t != transls.end();
2193          ++t) {
2194       out << indent << "// Translations: " << transls.size() << "\n";
2195       char transname[64];
2196       translToName(*t, transname);
2197       numTranslations++;
2198       out << indent << transname << "[fontsize=11.0];\n";
2199       out << indent << name << " -> " << transname << ";\n";
2200     }
2201     // And, all translations on the same line
2202     out << indent << "{ rank = same; ";
2203     out << name << " ";
2204     for (vector<TCA>::const_iterator t = transls.begin(); t != transls.end();
2205          ++t) {
2206       char transname[64];
2207       translToName(*t, transname);
2208       out << transname << " ";
2209     }
2210     out << indent << "}\n"; // subgraph
2211     out << indent << "}\n";
2212   }
2213
2214   // OK! Those were all the nodes. Now edges. While edges are physically
2215   // from translation to translation, they're virtually from srcKey to
2216   // srcKey, and that is how the db represents them.
2217   for (SrcDB::const_iterator entry = m_srcDB.begin(); entry != m_srcDB.end();
2218        ++entry) {
2219     char destName[64];
2220     skToName(SrcKey::fromAtomicInt(entry->first), destName);
2221     const vector<IncomingBranch>& ibs = entry->second->incomingBranches();
2222     out << indent << "// incoming branches to " << destName << "\n";
2223     for (vector<IncomingBranch>::const_iterator ib = ibs.begin();
2224          ib != ibs.end(); ++ib) {
2225       // Find the start of the translation that contains this branch
2226       const char *branchTypeToColorStr[] = {
2227         "black", // JMP
2228         "green", // JZ
2229         "red",   // JNZ
2230       };
2231       TransDB::const_iterator lowerTCA = m_transDB.lower_bound(ib->m_src);
2232       ASSERT(lowerTCA != m_transDB.end());
2233       char srcName[64];
2234       const TransRec* transRec = this->getTransRec(lowerTCA->second);
2235       skToName(transRec->src, srcName);
2236       out << indent << srcName << " -> " << destName << "[ color = " <<
2237         branchTypeToColorStr[ib->m_type] << "];\n";
2238     }
2239   }
2240   out << indent << "// " << numSrcKeys << " srckeys, " << numTranslations <<
2241     " tracelets\n";
2242   out << "}\n\n";
2243 }
2244
2245 /*
2246  * bindJmp --
2247  *
2248  *   Runtime service handler that patches a jmp to the translation of
2249  *   u:dest from toSmash.
2250  */
2251 TCA
2252 TranslatorX64::bindJmp(TCA toSmash, SrcKey destSk, bool isAddr,
2253                        bool forceNoHHIR /* = false */) {
2254   TCA tDest = getTranslation(&destSk, false, forceNoHHIR);
2255   if (!tDest) return NULL;
2256   LeaseHolder writer(s_writeLease);
2257   if (!writer) return tDest;
2258   SrcRec* sr = getSrcRec(destSk);
2259   if (isAddr) {
2260     sr->chainFrom(a, IncomingBranch((TCA*)toSmash));
2261   } else {
2262     sr->chainFrom(getAsmFor(toSmash), IncomingBranch(toSmash));
2263   }
2264   return tDest;
2265 }
2266
2267 /*
2268  * When we end a tracelet with a conditional jump, emitCondJmp first emits:
2269  *
2270  *   1:         j<CC> stubJmpccFirst
2271  *              jmp   stubJmpccFirst
2272  *
2273  * Our "taken" argument tells us whether the branch at 1: was taken or
2274  * not; and therefore which of offTaken and offNotTaken to continue executing.
2275  * If we did take the branch, we now rewrite the code so that the branch is
2276  * straightened. This predicts that subsequent executions will go the same way
2277  * as the first execution.
2278  *
2279  *              jn<CC> stubJmpccSecond:offNotTaken
2280  *              nop5   ; fallthru, or jmp if there's already a translation.
2281  * offTaken:
2282  *
2283  * If we did not take the branch, we leave the sense of the condition
2284  * intact, while patching it up to go to the unexplored code:
2285  *
2286  *              j<CC> stubJmpccSecond:offTaken
2287  *              nop5
2288  * offNotTaken:
2289  */
2290 TCA
2291 TranslatorX64::bindJmpccFirst(TCA toSmash,
2292                               Offset offTaken, Offset offNotTaken,
2293                               bool taken,
2294                               ConditionCode cc) {
2295   const Func* f = curFunc();
2296   LeaseHolder writer(s_writeLease);
2297   if (!writer) return NULL;
2298   Offset offWillExplore = taken ? offTaken : offNotTaken;
2299   Offset offWillDefer = taken ? offNotTaken : offTaken;
2300   SrcKey dest(f, offWillExplore);
2301   TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
2302         "taken %d\n",
2303         offWillExplore, offWillDefer, cc, taken);
2304
2305   // We want the branch to point to whichever side has not been explored
2306   // yet.
2307   if (taken) cc = ccNegate(cc);
2308   TCA stub =
2309     emitServiceReq(false, REQ_BIND_JMPCC_SECOND, 3,
2310                    toSmash, uint64_t(offWillDefer), uint64_t(cc));
2311
2312   Asm &as = getAsmFor(toSmash);
2313   // Its not clear where chainFrom should go to if as is astubs
2314   ASSERT(&as == &a);
2315
2316   // can we just directly fall through?
2317   // a jmp + jz takes 5 + 6 = 11 bytes
2318   bool fallThru = toSmash + kJmpccLen + kJmpLen == as.code.frontier &&
2319     !m_srcDB.find(dest);
2320
2321   TCA tDest;
2322   tDest = getTranslation(&dest, !fallThru /* align */);
2323   if (!tDest) {
2324     return 0;
2325   }
2326   ASSERT(s_writeLease.amOwner());
2327   /*
2328    * Roll over the jcc and the jmp/fallthru. E.g., from:
2329    *
2330    *     toSmash:    jcc   <jmpccFirstStub>
2331    *     toSmash+6:  jmp   <jmpccFirstStub>
2332    *     toSmash+11: <probably the new translation == tdest>
2333    *
2334    * to:
2335    *
2336    *     toSmash:    j[n]z <jmpccSecondStub>
2337    *     toSmash+6:  nop5
2338    *     toSmash+11: newHotness
2339    */
2340   CodeCursor cg(as, toSmash);
2341   a.jcc(cc, stub);
2342   getSrcRec(dest)->chainFrom(as, IncomingBranch(as.code.frontier));
2343   TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
2344   return tDest;
2345 }
2346
2347 // smashes a jcc to point to a new destination
2348 TCA
2349 TranslatorX64::bindJmpccSecond(TCA toSmash, const Offset off,
2350                                ConditionCode cc) {
2351   const Func* f = curFunc();
2352   SrcKey dest(f, off);
2353   TCA branch = getTranslation(&dest, true);
2354   LeaseHolder writer(s_writeLease, NO_ACQUIRE);
2355   if (branch && writer.acquire()) {
2356     SrcRec* destRec = getSrcRec(dest);
2357     destRec->chainFrom(getAsmFor(toSmash), IncomingBranch(cc, toSmash));
2358   }
2359   return branch;
2360 }
2361
2362 /*
2363  * emitBindJmp --
2364  *
2365  *   Emit code to lazily branch to the srckey in next. Assumes current
2366  *   basic block is closed (outputs synced, etc.).
2367  */
2368 void
2369 TranslatorX64::emitBindJmp(X64Assembler& _a, const SrcKey& dest,
2370                            ServiceRequest req /* = REQ_BIND_JMP */) {
2371   prepareForSmash(_a, kJmpLen);
2372   TCA toSmash = _a.code.frontier;
2373   if (&_a == &astubs) {
2374     _a.   jmp(toSmash);
2375   }
2376
2377   TCA sr = emitServiceReq(false, req, 2,
2378                           toSmash, uint64_t(dest.offset()));
2379
2380   if (&_a == &astubs) {
2381     CodeCursor cc(_a, toSmash);
2382     _a.   jmp(sr);
2383   } else {
2384     _a.   jmp(sr);
2385   }
2386 }
2387
2388 void
2389 TranslatorX64::emitBindJmp(const SrcKey& dest) {
2390   emitBindJmp(a, dest);
2391 }
2392
2393 void
2394 TranslatorX64::emitStringCheck(X64Assembler& _a,
2395                                PhysReg base, int offset, PhysReg tmp) {
2396   // Treat KindOfString and KindOfStaticString identically; they
2397   // are bitwise identical. This is a port of our IS_STRING_TYPE
2398   // macro to assembly, and will have to change in sync with it.
2399   static_assert(IS_STRING_TYPE(7) && IS_STRING_TYPE(6),
2400                 "Assembly version of IS_STRING_TYPE needs to be updated");
2401   _a.   load_reg64_disp_reg32(base, offset, tmp);
2402   _a.   and_imm32_reg32((signed char)(0xfe), tmp); // use 1-byte immediate
2403   _a.   cmp_imm32_reg32(6, tmp);
2404 }
2405
2406 void
2407 TranslatorX64::emitTypeCheck(X64Assembler& _a, DataType dt,
2408                              PhysReg base, int offset,
2409                              PhysReg tmp/*= InvalidReg*/) {
2410   offset += TVOFF(m_type);
2411   if (IS_STRING_TYPE(dt)) {
2412     LazyScratchReg scr(m_regMap);
2413     if (tmp == InvalidReg) {
2414       scr.alloc();
2415       tmp = *scr;
2416     }
2417     emitStringCheck(_a, base, offset, tmp);
2418   } else {
2419     _a. cmp_imm32_disp_reg32(dt, offset, base);
2420   }
2421 }
2422
2423 void
2424 TranslatorX64::checkType(X64Assembler& a,
2425                          const Location& l,
2426                          const RuntimeType& rtt,
2427                          SrcRec& fail) {
2428   // We can get invalid inputs as a side effect of reading invalid
2429   // items out of BBs we truncate; they don't need guards.
2430   if (rtt.isVagueValue()) return;
2431
2432   if (m_useHHIR) {
2433     irCheckType(a, l, rtt, fail);
2434     return;
2435   }
2436
2437   PhysReg base;
2438   int disp = 0;
2439   SpaceRecorder sr("_CheckType", a);
2440
2441   TRACE(1, Trace::prettyNode("Precond", DynLocation(l, rtt)) + "\n");
2442
2443   locToRegDisp(l, &base, &disp);
2444   TRACE(2, "TypeCheck: %d(%%r%d)\n", disp, base);
2445   // Negative offsets from RSP are not yet allocated; they had
2446   // better not be inputs to the tracelet.
2447   ASSERT(l.space != Location::Stack || disp >= 0);
2448   if (Trace::moduleEnabled(Trace::stats, 2)) {
2449     Stats::emitInc(a, Stats::TraceletGuard_branch);
2450   }
2451   if (rtt.isIter()) {
2452     a.   cmp_imm32_disp_reg32(rtt.typeCheckValue(),
2453                               disp + rtt.typeCheckOffset(),
2454                               base);
2455   } else {
2456     emitTypeCheck(a, rtt.typeCheckValue(), base, disp, rax);
2457   }
2458   emitFallbackJmp(fail);
2459 }
2460
2461 void
2462 TranslatorX64::emitFallbackJmp(SrcRec& dest) {
2463   prepareForSmash(kJmpccLen);
2464   dest.emitFallbackJump(a, IncomingBranch(CC_NZ, a.code.frontier));
2465 }
2466
2467 void
2468 TranslatorX64::emitFallbackJmp(Asm& as, SrcRec& dest) {
2469   prepareForSmash(as, kJmpccLen);
2470   dest.emitFallbackJump(as, IncomingBranch(CC_NZ, as.code.frontier));
2471 }
2472
2473 void
2474 TranslatorX64::emitFallbackUncondJmp(Asm& as, SrcRec& dest) {
2475   prepareForSmash(as, kJmpLen);
2476   dest.emitFallbackJump(as, IncomingBranch(as.code.frontier));
2477 }
2478
2479 void TranslatorX64::emitReqRetransNoIR(Asm& as, SrcKey& sk) {
2480   prepareForSmash(as, kJmpLen);
2481   TCA toSmash = as.code.frontier;
2482   if (&as == &astubs) {
2483     as.jmp(toSmash);
2484   }
2485
2486   TCA sr = emitServiceReq(REQ_RETRANSLATE_NO_IR, 2,
2487                           toSmash, sk.offset());
2488
2489   if (&as == &astubs) {
2490     CodeCursor cc(as, toSmash);
2491     as.jmp(sr);
2492   } else {
2493     as.jmp(sr);
2494   }
2495 }
2496
2497 uint64_t TranslatorX64::packBitVec(const vector<bool>& bits, unsigned i) {
2498   uint64_t retval = 0;
2499   ASSERT(i % 64 == 0);
2500   ASSERT(i < bits.size());
2501   while (i < bits.size()) {
2502     retval |= bits[i] << (i % 64);
2503     if ((++i % 64) == 0) {
2504       break;
2505     }
2506   }
2507   return retval;
2508 }
2509
2510 void
2511 TranslatorX64::checkRefs(X64Assembler& a,
2512                          const SrcKey& sk,
2513                          const RefDeps& refDeps,
2514                          SrcRec& fail) {
2515   if (refDeps.size() == 0) {
2516     return;
2517   }
2518
2519   /*
2520    * We're still between BB's, so we're not using the real register
2521    * allocator.
2522    */
2523   RegSet unusedRegs = kScratchCrossTraceRegs;
2524   DumbScratchReg rNumParams(unusedRegs);
2525   DumbScratchReg rMask(unusedRegs);
2526   DumbScratchReg rBits(unusedRegs);
2527   DumbScratchReg rExpectedBits(unusedRegs);
2528   DumbScratchReg rBitsValue(unusedRegs);
2529   DumbScratchReg rFunc(unusedRegs);
2530
2531   // Set up guards for each pushed ActRec that we've made reffiness
2532   // assumptions about
2533   for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
2534        it != refDeps.m_arMap.end(); ++it) {
2535     // Be careful! The actual Func might have fewer refs than the number
2536     // of args we're passing. To forestall this, we're going to have to
2537     // keep checking i against the number of params. We consider invocations
2538     // with too many arguments to have passed their checks.
2539     int entryArDelta = it->first;
2540
2541     if (m_useHHIR) {
2542       m_hhbcTrans->guardRefs(entryArDelta,
2543                              it->second.m_mask,
2544                              it->second.m_vals);
2545       continue;
2546     }
2547
2548     int32_t funcOff = cellsToBytes(entryArDelta) + AROFF(m_func);
2549     a.    load_reg64_disp_reg64(rVmSp, funcOff, *rFunc); // rFunc <- Func*
2550     a.    load_reg64_disp_reg32(*rFunc, Func::numParamsOff(),
2551                                 *rNumParams);
2552     a.    load_reg64_disp_reg64(*rFunc, Func::refBitVecOff(),
2553                                 *rBits);  // rBits <- m_refBitVec
2554
2555     for (unsigned i = 0; i < it->second.m_mask.size(); i += 64) {
2556       ASSERT(i < it->second.m_vals.size());
2557       uint64_t mask = packBitVec(it->second.m_mask, i);
2558       if (mask == 0) {
2559         continue;
2560       }
2561       uint64_t value = packBitVec(it->second.m_vals, i);
2562
2563       emitImmReg(a, mask,  *rMask);
2564       emitImmReg(a, value, *rExpectedBits);
2565
2566       /*
2567        * Before trying to load this block off the bit vector, make
2568        * sure it actually exists.  It's ok to index past numArgs
2569        * within one of these words, because the remaining bits will be
2570        * set to zero (or one in the case of the variadic by ref
2571        * builtins).
2572        */
2573       if (Trace::moduleEnabled(Trace::stats, 2)) {
2574         Stats::emitInc(a, Stats::TraceletGuard_branch);
2575       }
2576       a.  cmp_imm32_reg32(i + 1, *rNumParams);
2577       {
2578         IfElseBlock<CC_L> ifFewEnoughArgs(a);
2579
2580         // Load the appropriate qword off of the top actRec's func*.
2581         SKTRACE(2, sk, "reffiness mask %lx value %lx, ar @%d\n",
2582                 mask, value, entryArDelta);
2583         a.  load_reg64_disp_reg64(*rBits, sizeof(uint64) * (i / 64),
2584                                   *rBitsValue);  // rBitsValue <- rBits[i / 64]
2585         a.  and_reg64_reg64(*rMask, *rBitsValue); // rBitsValue &= rMask
2586         a.  cmp_reg64_reg64(*rBitsValue, *rExpectedBits);
2587         emitFallbackJmp(fail);
2588
2589         ifFewEnoughArgs.Else();
2590
2591         a.  test_imm32_disp_reg32(AttrVariadicByRef,
2592                                   Func::attrsOff(),
2593                                   *rFunc);
2594         {
2595           IfElseBlock<CC_NZ> ifNotWeirdBuiltin(a);
2596
2597           // Other than these builtins, we need to have all by value
2598           // args in this case.
2599           a.  test_reg64_reg64(*rExpectedBits, *rExpectedBits);
2600           emitFallbackJmp(fail);
2601
2602           ifNotWeirdBuiltin.Else();
2603
2604           // If it is one of the weird builtins that has reffiness for
2605           // additional args, we have to make sure our expectation is
2606           // that these additional args are by ref.
2607           a.  cmp_imm32_reg64((signed int)(-1ull & mask), *rExpectedBits);
2608           emitFallbackJmp(fail);
2609         }
2610       }
2611     }
2612   }
2613 }
2614
2615 /*
2616  * emitRetFromInterpretedFrame --
2617  *
2618  *   When the interpreter pushes a call frame, there is necessarily no
2619  *   machine RIP available to return to. This helper fishes out the
2620  *   destination from the frame and redirects execution to it via enterTC.
2621  */
2622 TCA
2623 TranslatorX64::emitRetFromInterpretedFrame() {
2624   int32_t arBase = sizeof(ActRec) - sizeof(Cell);
2625   moveToAlign(astubs);
2626   TCA stub = astubs.code.frontier;
2627   // Marshall our own args by hand here.
2628   astubs.   lea_reg64_disp_reg64(rVmSp, -arBase, serviceReqArgRegs[0]);
2629   astubs.   mov_reg64_reg64(rVmFp, serviceReqArgRegs[1]);
2630   (void) emitServiceReq(false, REQ_POST_INTERP_RET, 0ull);
2631   return stub;
2632 }
2633
2634 /*
2635  * fcallHelperThunk
2636  * Note: Assumes rStashedAR is r15
2637  */
2638 static_assert(rStashedAR == r15,
2639   "__fcallHelperThunk needs to be modified for ABI changes");
2640 asm (
2641   ".byte 0\n"
2642   ".align 16\n"
2643   ".globl __fcallHelperThunk\n"
2644 "__fcallHelperThunk:\n"
2645 #ifdef HHVM
2646   "mov %r15, %rdi\n"
2647   "call fcallHelper\n"
2648   "jmp *%rax\n"
2649 #endif
2650   "ud2\n"
2651 );
2652
2653 /*
2654  * enterTCHelper
2655  *
2656  * This helper routine is written in x64 assembly to take care of the details
2657  * when transferring control between jitted code and the translator.
2658  *   rdi:  Cell* vm_sp
2659  *   rsi:  Cell* vm_fp
2660  *   rdx:  unsigned char* start
2661  *   rcx:  TReqInfo* infoPtr
2662  *   r8:   ActRec* firstAR
2663  *   r9:   uint8_t* targetCacheBase
2664  *
2665  * Note: enterTCHelper does not save callee-saved registers except
2666  * %rbp.  This means when we call it from C++, we have to tell gcc to
2667  * clobber all the other callee-saved registers.
2668  */
2669 static_assert(rVmSp == rbx &&
2670               rVmFp == rbp &&
2671               rVmTl == r12 &&
2672               rStashedAR == r15,
2673   "__enterTCHelper needs to be modified to use the correct ABI");
2674 asm (
2675   ".byte 0\n"
2676   ".align 16\n"
2677 "__enterTCHelper:\n"
2678   // Prologue
2679   ".cfi_startproc\n"
2680   "push %rbp\n"
2681   ".cfi_adjust_cfa_offset 8\n"  // offset to previous frame relative to %rsp
2682   ".cfi_offset rbp, -16\n"      // Where to find previous value of rbp
2683
2684   // Set firstAR->m_savedRbp to point to this frame.
2685   "mov %rsp, (%r8)\n"
2686
2687   // Save infoPtr
2688   "push %rcx\n"
2689   ".cfi_adjust_cfa_offset 8\n"
2690
2691   // Set up special registers used for translated code.
2692   "mov %rdi, %rbx\n"          // rVmSp
2693   "mov %r9, %r12\n"           // rVmTl
2694   "mov %rsi, %rbp\n"          // rVmFp
2695   "mov 0x30(%rcx), %r15\n"    // rStashedAR saved across service requests
2696
2697   // The translated code we are about to enter does not follow the
2698   // standard prologue of pushing rbp at entry, so we are purposely 8
2699   // bytes short of 16-byte alignment before this call instruction so
2700   // that the return address being pushed will make the native stack
2701   // 16-byte aligned.
2702   // Pre-allocate LinearScan::NumPreAllocatedSpillLocs * 8 bytes for
2703   // spill locations.
2704   // This value must be consistent with LinearScan::NumPreAllocatedSpillLocs.
2705   "sub $0x80, %rsp\n"
2706   // May need cfi_adjust_cfa_offset annotations: Task #1747813
2707   "call *%rdx\n"
2708   "add $0x80, %rsp\n"
2709
2710   // Restore infoPtr into %rbx
2711   "pop %rbx\n"
2712   ".cfi_adjust_cfa_offset -8\n"
2713
2714   // Copy the values passed from jitted code into *infoPtr
2715   "mov %rdi, 0x0(%rbx)\n"
2716   "mov %rsi, 0x8(%rbx)\n"
2717   "mov %rdx, 0x10(%rbx)\n"
2718   "mov %rcx, 0x18(%rbx)\n"
2719   "mov %r8,  0x20(%rbx)\n"
2720   "mov %r9,  0x28(%rbx)\n"
2721
2722   // Service request "callee-saved".  (Returnee-saved?)
2723   "mov %r15, 0x30(%rbx)\n"
2724
2725   // Epilogue
2726   "pop %rbp\n"
2727   ".cfi_restore rbp\n"
2728   ".cfi_adjust_cfa_offset -8\n"
2729   "ret\n"
2730   ".cfi_endproc\n"
2731 );
2732
2733 struct TReqInfo {
2734   uintptr_t requestNum;
2735   uintptr_t args[5];
2736
2737   // Some TC registers need to be preserved across service requests.
2738   uintptr_t saved_rStashedAr;
2739 };
2740
2741 void enterTCHelper(Cell* vm_sp,
2742                    Cell* vm_fp,
2743                    TCA start,
2744                    TReqInfo* infoPtr,
2745                    ActRec* firstAR,
2746                    uint8_t* targetCacheBase) asm ("__enterTCHelper");
2747
2748 struct DepthGuard {
2749   static __thread int m_depth;
2750   DepthGuard()  { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
2751   ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
2752 };
2753 __thread int DepthGuard::m_depth;
2754 void
2755 TranslatorX64::enterTC(SrcKey sk) {
2756   using namespace TargetCache;
2757   TCA start = getTranslation(&sk, true);
2758
2759   DepthGuard d;
2760   TReqInfo info;
2761   const uintptr_t& requestNum = info.requestNum;
2762   uintptr_t* args = info.args;
2763   for (;;) {
2764     ASSERT(vmfp() >= vmsp() - 1);
2765     ASSERT(sizeof(Cell) == 16);
2766     ASSERT(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
2767     ASSERT(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
2768
2769     TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
2770           vmfp(), ((ActRec*)vmfp())->m_func->name()->data(), vmsp());
2771     s_writeLease.gremlinUnlock();
2772     // Keep dispatching until we end up somewhere the translator
2773     // recognizes, or we luck out and the leaseholder exits.
2774     while (!start) {
2775       TRACE(2, "enterTC forwarding BB to interpreter\n");
2776       g_vmContext->m_pc = curUnit()->at(sk.offset());
2777       INC_TPC(interp_bb);
2778       g_vmContext->dispatchBB();
2779       sk = SrcKey(curFunc(), g_vmContext->getPC());
2780       start = getTranslation(&sk, true);
2781     }
2782     ASSERT(start);
2783     ASSERT(isValidCodeAddress(start));
2784     tl_regState = REGSTATE_DIRTY;
2785     ASSERT(!s_writeLease.amOwner());
2786     curFunc()->validate();
2787     INC_TPC(enter_tc);
2788
2789     // The asm volatile here is to force C++ to spill anything that
2790     // might be in a callee-saved register (aside from rbp).
2791     // enterTCHelper does not preserve these registers.
2792     asm volatile("" : : : "rbx","r12","r13","r14","r15");
2793     enterTCHelper(vmsp(), vmfp(), start, &info, vmFirstAR(),
2794                   tl_targetCaches.base);
2795     asm volatile("" : : : "rbx","r12","r13","r14","r15");
2796
2797     tl_regState = REGSTATE_CLEAN; // Careful: pc isn't sync'ed yet.
2798     // Debugging code: cede the write lease half the time.
2799     if (debug && (RuntimeOption::EvalJitStressLease)) {
2800       if (d.m_depth == 1 && (rand() % 2) == 0) {
2801         s_writeLease.gremlinLock();
2802       }
2803     }
2804
2805     TRACE(4, "enterTC: %p fp%p sp%p } return\n", start,
2806           vmfp(), vmsp());
2807     TRACE(4, "enterTC: request(%s) args: %lx %lx %lx %lx %lx\n",
2808           reqName(requestNum),
2809           args[0], args[1], args[2], args[3], args[4]);
2810     ASSERT(vmfp() >= vmsp() - 1 || requestNum == REQ_EXIT);
2811
2812     if (debug) {
2813       // Ensure that each case either returns, or drives start to a valid
2814       // value.
2815       start = TCA(0xbee5face);
2816     }
2817
2818     // The contract is that each case will either exit, by returning, or
2819     // set sk to the place where execution should resume, and optionally
2820     // set start to the hardware translation of the resumption point.
2821     //
2822     // start and sk might be subtly different; i.e., there are cases where
2823     // start != NULL && start != getTranslation(sk). For instance,
2824     // REQ_BIND_CALL has not finished executing the OpCall when it gets
2825     // here, and has even done some work on its behalf. sk == OpFCall,
2826     // while start == the point in the TC that's "half-way through" the
2827     // Call instruction. If we punt to the interpreter, the interpreter
2828     // will redo some of the work that the translator has already done.
2829     INC_TPC(service_req);
2830     switch (requestNum) {
2831       case REQ_EXIT: {
2832         // fp is not valid anymore
2833         vmfp() = NULL;
2834         return;
2835       }
2836
2837       case REQ_BIND_CALL: {
2838         ReqBindCall* req = (ReqBindCall*)args[0];
2839         ActRec* calleeFrame = (ActRec*)args[1];
2840         TCA toSmash = req->m_toSmash;
2841         Func *func = const_cast<Func*>(calleeFrame->m_func);
2842         int nArgs = req->m_nArgs;
2843         bool isImmutable = req->m_isImmutable;
2844         TCA dest = tx64->funcPrologue(func, nArgs);
2845         TRACE(2, "enterTC: bindCall %s -> %p\n", func->name()->data(), dest);
2846         if (isImmutable) {
2847           // If we *know* we're calling the right function, don't bother
2848           // with the dynamic check of ar->m_func.
2849           dest = skipFuncCheck(dest);
2850           TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
2851                 func->name()->data(), dest);
2852         }
2853         LeaseHolder writer(s_writeLease, NO_ACQUIRE);
2854         if (dest && writer.acquire()) {
2855           TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
2856           smash(tx64->getAsmFor(toSmash), toSmash, dest);
2857           // sk: stale, but doesn't matter since we have a valid dest TCA.
2858         } else {
2859           // We need translator help; we're not at the callee yet, so
2860           // roll back. The prelude has done some work already, but it
2861           // should be safe to redo.
2862           TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
2863                 toSmash, dest);
2864           sk = req->m_sourceInstr;
2865         }
2866         start = dest;
2867       } break;
2868
2869       case REQ_BIND_SIDE_EXIT:
2870       case REQ_BIND_JMP:
2871       case REQ_BIND_JMP_NO_IR:
2872       case REQ_BIND_ADDR: {
2873         TCA toSmash = (TCA)args[0];
2874         Offset off = args[1];
2875         sk = SrcKey(curFunc(), off);
2876         if (requestNum == REQ_BIND_SIDE_EXIT) {
2877           SKTRACE(3, sk, "side exit taken!\n");
2878         }
2879         start = bindJmp(toSmash, sk, requestNum == REQ_BIND_ADDR,
2880                         requestNum == REQ_BIND_JMP_NO_IR);
2881       } break;
2882
2883       case REQ_BIND_JMPCC_FIRST: {
2884         TCA toSmash = (TCA)args[0];
2885         Offset offTaken = (Offset)args[1];
2886         Offset offNotTaken = (Offset)args[2];
2887         ConditionCode cc = ConditionCode(args[3]);
2888         bool taken = int64(args[4]) & 1;
2889         start = bindJmpccFirst(toSmash, offTaken, offNotTaken, taken, cc);
2890         // SrcKey: we basically need to emulate the fail
2891         sk = SrcKey(curFunc(), taken ? offTaken : offNotTaken);
2892       } break;
2893
2894       case REQ_BIND_JMPCC_SECOND: {
2895         TCA toSmash = (TCA)args[0];
2896         Offset off = (Offset)args[1];
2897         ConditionCode cc = ConditionCode(args[2]);
2898         start = bindJmpccSecond(toSmash, off, cc);
2899         sk = SrcKey(curFunc(), off);
2900       } break;
2901
2902       case REQ_BIND_REQUIRE: {
2903         ReqLitStaticArgs* rlsa = (ReqLitStaticArgs*)args[0];
2904         sk = SrcKey((Func*)args[1], (Offset)args[2]);
2905         start = getTranslation(&sk, true);
2906         if (start) {
2907           LeaseHolder writer(s_writeLease);
2908           if (writer) {
2909             SrcRec* sr = getSrcRec(sk);
2910             sr->chainFrom(a, IncomingBranch(&rlsa->m_pseudoMain));
2911           }
2912         }
2913       } break;
2914
2915       case REQ_RETRANSLATE_NO_IR: {
2916         TCA toSmash = (TCA)args[0];
2917         sk = SrcKey(curFunc(), (Offset)args[1]);
2918         start = retranslateAndPatchNoIR(sk, true, toSmash);
2919         SKTRACE(2, sk, "retranslated (without IR) @%p\n", start);
2920       } break;
2921
2922       case REQ_RETRANSLATE: {
2923         INC_TPC(retranslate);
2924         sk = SrcKey(curFunc(), (Offset)args[0]);
2925         start = retranslate(sk, true, RuntimeOption::EvalJitUseIR);
2926         SKTRACE(2, sk, "retranslated @%p\n", start);
2927       } break;
2928
2929       case REQ_INTERPRET: {
2930         Offset off = args[0];
2931         int numInstrs = args[1];
2932         g_vmContext->m_pc = curUnit()->at(off);
2933         /*
2934          * We know the compilation unit has not changed; basic blocks do
2935          * not span files. I claim even exceptions do not violate this
2936          * axiom.
2937          */
2938         ASSERT(numInstrs >= 0);
2939         ONTRACE(5, SrcKey(curFunc(), off).trace("interp: enter\n"));
2940         if (numInstrs) {
2941           s_perfCounters[tpc_interp_instr] += numInstrs;
2942           g_vmContext->dispatchN(numInstrs);
2943         } else {
2944           // numInstrs == 0 means it wants to dispatch until BB ends
2945           INC_TPC(interp_bb);
2946           g_vmContext->dispatchBB();
2947         }
2948         SrcKey newSk(curFunc(), g_vmContext->getPC());
2949         SKTRACE(5, newSk, "interp: exit\n");
2950         sk = newSk;
2951         start = getTranslation(&newSk, true);
2952       } break;
2953
2954       case REQ_POST_INTERP_RET: {
2955         // This is only responsible for the control-flow aspect of the Ret:
2956         // getting to the destination's translation, if any.
2957         ActRec* ar = (ActRec*)args[0];
2958         ActRec* caller = (ActRec*)args[1];
2959         ASSERT((Cell*) ar < vmsp());      // ar is already logically popped
2960         ASSERT((Cell*) caller > vmsp());  // caller is now active
2961         ASSERT((Cell*) caller == vmfp());
2962         Unit* destUnit = caller->m_func->unit();
2963         // Set PC so logging code in getTranslation doesn't get confused.
2964         vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
2965         SrcKey dest(caller->m_func, vmpc());
2966         sk = dest;
2967         start = getTranslation(&dest, true);
2968         TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
2969               ar->m_func->fullName()->data(),
2970               caller->m_func->fullName()->data());
2971       } break;
2972
2973       case REQ_RESUME: {
2974         SrcKey dest(curFunc(), vmpc());
2975         sk = dest;
2976         start = getTranslation(&dest, true);
2977       } break;
2978
2979       case REQ_STACK_OVERFLOW: {
2980         /*
2981          * we need to construct the pc of the fcall from the return
2982          * address (which will be after the fcall). Because fcall is
2983          * a variable length instruction, and because we sometimes
2984          * delete instructions from the instruction stream, we
2985          * need to use fpi regions to find the fcall.
2986          */
2987         const FPIEnt* fe = curFunc()->findPrecedingFPI(
2988           curUnit()->offsetOf(vmpc()));
2989         vmpc() = curUnit()->at(fe->m_fcallOff);
2990         ASSERT(isFCallStar(*vmpc()));
2991         raise_error("Stack overflow");
2992         NOT_REACHED();
2993       }
2994     }
2995   }
2996   NOT_REACHED();
2997 }
2998
2999 void TranslatorX64::resume(SrcKey sk) {
3000   enterTC(sk);
3001 }
3002
3003 /*
3004  * emitServiceReq --
3005  *
3006  *   Call a translator service co-routine. The code emitted here is
3007  *   reenters the enterTC loop, invoking the requested service. Control
3008  *   will be returned non-locally to the next logical instruction in
3009  *   the TC.
3010  *
3011  *   Return value is a destination; we emit the bulky service
3012  *   request code into astubs.
3013  */
3014
3015 TCA
3016 TranslatorX64::emitServiceReqVA(bool align, ServiceRequest req, int numArgs,
3017                                 va_list args) {
3018   if (align) {
3019     moveToAlign(astubs);
3020   }
3021   TCA retval = astubs.code.frontier;
3022   emitEagerVMRegSave(astubs, SaveFP);
3023   /*
3024    * Move args into appropriate regs.
3025    */
3026   TRACE(3, "Emit Service Req %s(", reqName(req));
3027   for (int i = 0; i < numArgs; i++) {
3028     uint64_t argVal = va_arg(args, uint64_t);
3029     TRACE(3, "%p,", (void*)argVal);
3030     emitImmReg(astubs, argVal, serviceReqArgRegs[i]);
3031   }
3032   TRACE(3, ")\n");
3033   emitImmReg(astubs, req, rdi);
3034   /*
3035    * Weird hand-shaking with enterTC: reverse-call a service routine.
3036    */
3037   astubs.    ret();
3038   recordBCInstr(OpServiceRequest, astubs, retval);
3039   translator_not_reached(astubs);
3040   return retval;
3041 }
3042
3043 TCA
3044 TranslatorX64::emitServiceReq(ServiceRequest req, int numArgs, ...) {
3045   va_list args;
3046   va_start(args, numArgs);
3047   TCA retval = emitServiceReqVA(true, req, numArgs, args);
3048   va_end(args);
3049   return retval;
3050 }
3051
3052 TCA
3053 TranslatorX64::emitServiceReq(bool align, ServiceRequest req, int numArgs,
3054                               ...) {
3055   va_list args;
3056   va_start(args, numArgs);
3057   TCA retval = emitServiceReqVA(align, req, numArgs, args);
3058   va_end(args);
3059   return retval;
3060 }
3061
3062 TCA
3063 TranslatorX64::emitTransCounterInc(X64Assembler& a) {
3064   TCA start = a.code.frontier;
3065   if (!isTransDBEnabled()) return start;
3066   uint64* counterAddr = getTransCounterAddr();
3067
3068   a.mov_imm64_reg((uint64)counterAddr, rScratch);
3069   a.emitLockPrefix();
3070   a.inc_mem64(rScratch, 0);
3071
3072   return start;
3073 }
3074
3075 void
3076 TranslatorX64::spillTo(DataType type, PhysReg reg, bool writeType,
3077                        PhysReg base, int disp) {
3078   X64Assembler& a = *m_spillFillCode;
3079   // Zero out the count at the same time as writing the type.
3080   SpaceRecorder sr("_Spill", a);
3081
3082   Stats::emitInc(a, Stats::Tx64_Spill);
3083   emitStoreTypedValue(a, type, reg, disp, base, writeType);
3084 }
3085
3086 void
3087 TranslatorX64::spill(const Location& loc, DataType type,
3088                      PhysReg reg, bool writeType) {
3089   PhysReg base;
3090   int disp;
3091   locToRegDisp(loc, &base, &disp);
3092   spillTo(type, reg, writeType, base, disp);
3093   TRACE(2, "%s: (%s, %lld) -> v: %d(r%d) type%d\n",
3094         __func__,
3095         loc.spaceName(), loc.offset, int(disp + TVOFF(m_data)), base, type);
3096 }
3097
3098 void
3099 TranslatorX64::fill(const Location& loc, PhysReg reg) {
3100   SpaceRecorder sr("_Fill", *m_spillFillCode);
3101   PhysReg base;
3102   int disp;
3103   locToRegDisp(loc, &base, &disp);
3104   TRACE(2, "fill: (%s, %lld) -> reg %d\n",
3105         loc.spaceName(), loc.offset, reg);
3106   m_spillFillCode->load_reg64_disp_reg64(base, disp + TVOFF(m_data), reg);
3107 }
3108
3109 void TranslatorX64::fillByMov(PhysReg src, PhysReg dst) {
3110   SpaceRecorder sr("_FillMov", *m_spillFillCode);
3111   ASSERT(src != dst);
3112   m_spillFillCode->mov_reg64_reg64(src, dst);
3113 }
3114
3115 void
3116 TranslatorX64::loadImm(int64 immVal, PhysReg reg) {
3117   SpaceRecorder sr("_FillImm", *m_spillFillCode);
3118   TRACE(2, "loadImm: 0x%llx -> reg %d\n", immVal, reg);
3119   emitImmReg(*m_spillFillCode, immVal, reg);
3120 }
3121
3122 void
3123 TranslatorX64::poison(PhysReg dest) {
3124   static const bool poison = false;
3125   if (poison) {
3126     emitImmReg(*m_spillFillCode, 0xbadf00d105e5babe, dest);
3127   }
3128 }
3129
3130 /**
3131  * Spill all dirty registers, mark all registers as 'free' in the
3132  * register file, and update rVmSp to point to the top of stack at
3133  * the end of the tracelet.
3134  */
3135 void
3136 TranslatorX64::syncOutputs(const Tracelet& t) {
3137   syncOutputs(t.m_stackChange);
3138 }
3139
3140 /**
3141  * Same as above, except that it sets rVmSp to point to the top of
3142  * stack at the beginning of the specified instruction.
3143  */
3144 void
3145 TranslatorX64::syncOutputs(const NormalizedInstruction& i) {
3146   syncOutputs(i.stackOff);
3147 }
3148
3149 void
3150 TranslatorX64::syncOutputs(int stackOff) {
3151   SpaceRecorder sr("_SyncOuts", a);
3152   TCA start = a.code.frontier;
3153   // Mark all stack locations above the top of stack as dead
3154   m_regMap.scrubStackEntries(stackOff);
3155   // Spill all dirty registers
3156   m_regMap.cleanAll();
3157   if (stackOff != 0) {
3158     TRACE(1, "syncOutputs: rVmSp + %d\n", stackOff);
3159     // t.stackChange is in negative Cells, not bytes.
3160     a.    add_imm32_reg64(-cellsToBytes(stackOff), rVmSp);
3161   }
3162   // All registers have been smashed for realz, yo
3163   m_regMap.smashRegs(kAllRegs);
3164   recordBCInstr(OpSyncOutputs, a, start);
3165 }
3166
3167 /*
3168  * getBinaryStackInputs --
3169  *
3170  *   Helper for a common pattern of instruction, where two items are popped
3171  *   and one is pushed. The second item on the stack at the beginning of
3172  *   the instruction is both a source and destination.
3173  */
3174 static void
3175 getBinaryStackInputs(const RegAlloc& regmap, const NormalizedInstruction& i,
3176                      PhysReg& rsrc, PhysReg& rsrcdest) {
3177   ASSERT(i.inputs.size()   == 2);
3178   ASSERT(i.outStack && !i.outLocal);
3179   rsrcdest = regmap.getReg(i.outStack->location);
3180   rsrc     = regmap.getReg(i.inputs[0]->location);
3181   ASSERT(regmap.getReg(i.inputs[1]->location) == rsrcdest);
3182 }
3183
3184 // emitBox --
3185 //   Leave a boxed version of input in RAX. Destroys the register
3186 //   mapping.
3187 void
3188 TranslatorX64::emitBox(DataType t, PhysReg rSrc) {
3189   if (false) { // typecheck
3190     RefData* retval = tvBoxHelper(KindOfArray, 0xdeadbeef01ul);
3191     (void)retval;
3192   }
3193   // tvBoxHelper will set the refcount of the inner cell to 1
3194   // for us. Because the inner cell now holds a reference to the
3195   // original value, we don't need to perform a decRef.
3196   EMIT_CALL(a, tvBoxHelper, IMM(t), R(rSrc));
3197 }
3198
3199 // emitUnboxTopOfStack --
3200 //   Unbox the known-to-be Variant on top of stack in place.
3201 void
3202 TranslatorX64::emitUnboxTopOfStack(const NormalizedInstruction& i) {
3203   const vector<DynLocation*>& inputs  = i.inputs;
3204
3205   ASSERT(inputs.size() == 1);
3206   ASSERT(i.outStack && !i.outLocal);
3207   ASSERT(inputs[0]->isStack());
3208   ASSERT(i.outStack && i.outStack->location == inputs[0]->location);
3209   DataType outType = inputs[0]->rtt.innerType();
3210   ASSERT(outType != KindOfInvalid);
3211   ASSERT(outType == i.outStack->outerType());
3212   PhysReg rSrc = getReg(inputs[0]->location);
3213   // Detach the register rSrc from the input location. We must
3214   // do this dance because the input and output location are the
3215   // same and we want to have separate registers for the input
3216   // and output.
3217   m_regMap.invalidate(inputs[0]->location);
3218   m_regMap.bind(rSrc, Location(), KindOfInvalid, RegInfo::SCRATCH);
3219   // This call to allocOutputRegs will allocate a new register
3220   // for the output location
3221   m_regMap.allocOutputRegs(i);
3222   PhysReg rDest = getReg(i.outStack->location);
3223   emitDeref(a, rSrc, rDest);
3224   emitIncRef(rDest, outType);
3225   // decRef the var on the evaluation stack
3226   emitDecRef(i, rSrc, KindOfRef);
3227   m_regMap.freeScratchReg(rSrc);
3228 }
3229
3230 // setOpOpToOpcodeOp --
3231 //   The SetOp opcode space has nothing to do with the bytecode opcode
3232 //   space. Reasonable people like it that way, so translate them here.
3233 static Opcode
3234 setOpOpToOpcodeOp(SetOpOp soo) {
3235   switch(soo) {
3236 #define SETOP_OP(_soo, _bc) case SetOp##_soo: return _bc;
3237     SETOP_OPS
3238 #undef SETOP_OP
3239     default: ASSERT(false);
3240   }
3241   return -1;
3242 }
3243
3244 void
3245 TranslatorX64::binaryIntegerArith(const NormalizedInstruction& i,
3246                                   Opcode op,
3247                                   PhysReg srcReg,
3248                                   PhysReg srcDestReg) {
3249   switch (op) {
3250 #define CASEIMM(OpBc, x64op)                                       \
3251     case OpBc: {                                                   \
3252       if (i.hasConstImm) {                                         \
3253         a.   x64op ## _imm64_reg64(i.constImm.u_I64A, srcDestReg); \
3254       } else {                                                     \
3255         a.   x64op ## _reg64_reg64(srcReg, srcDestReg);            \
3256       } } break;
3257 #define CASE(OpBc, x64op)                                          \
3258     case OpBc: {                                                   \
3259         a.   x64op ## _reg64_reg64(srcReg, srcDestReg);            \
3260     } break;
3261     CASEIMM(OpAdd,    add)
3262     CASEIMM(OpSub,    sub)
3263     CASEIMM(OpBitAnd, and)
3264     CASEIMM(OpBitOr,  or)
3265     CASEIMM(OpBitXor, xor)
3266     CASE(OpMul,       imul)
3267 #undef CASE
3268 #undef CASEIMM
3269
3270     default: {
3271       not_reached();
3272     };
3273   }
3274 }
3275
3276 void
3277 TranslatorX64::binaryArithCell(const NormalizedInstruction &i,
3278                                Opcode op, const DynLocation& in1,
3279                                const DynLocation& inout) {
3280   ASSERT(in1.rtt.isInt());
3281   ASSERT(inout.rtt.isInt());
3282   ASSERT(in1.outerType() != KindOfRef);
3283   ASSERT(in1.isStack());
3284   ASSERT(inout.outerType() != KindOfRef);
3285   ASSERT(inout.isStack());
3286   m_regMap.allocOutputRegs(i);
3287   PhysReg     srcReg = m_regMap.getReg(in1.location);
3288   PhysReg srcDestReg = m_regMap.getReg(inout.location);
3289   binaryIntegerArith(i, op, srcReg, srcDestReg);
3290 }
3291
3292 void
3293 TranslatorX64::binaryArithLocal(const NormalizedInstruction &i,
3294                                 Opcode op,
3295                                 const DynLocation& in1,
3296                                 const DynLocation& in2,
3297                                 const DynLocation& out) {
3298   // The caller must guarantee that these conditions hold
3299   ASSERT(in1.rtt.isInt());
3300   ASSERT(in2.rtt.isInt());
3301   ASSERT(in1.outerType() != KindOfRef);
3302   ASSERT(in1.isStack());
3303   ASSERT(in2.isLocal());
3304   ASSERT(out.isStack());
3305
3306   PhysReg srcReg = m_regMap.getReg(in1.location);
3307   PhysReg outReg = m_regMap.getReg(out.location);
3308   PhysReg localReg = m_regMap.getReg(in2.location);
3309   if (in2.outerType() != KindOfRef) {
3310     // The local is not a var, so we can operate directly on the
3311     // local's register. We will need to update outReg after the
3312     // operation.
3313     binaryIntegerArith(i, op, srcReg, localReg);
3314     // We operated directly on the local's register, so we need to update
3315     // outReg
3316     emitMovRegReg(localReg, outReg);
3317   } else {
3318     // The local is a var, so we have to read its value into outReg
3319     // on operate on that. We will need to write the result back
3320     // to the local after the operation.
3321     emitDeref(a, localReg, outReg);
3322     binaryIntegerArith(i, op, srcReg, outReg);
3323     // We operated on outReg, so we need to write the result back to the
3324     // local
3325     a.    store_reg64_disp_reg64(outReg, 0, localReg);
3326   }
3327 }
3328
3329 static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
3330   ASSERT(tl_regState == REGSTATE_DIRTY);
3331   tl_regState = REGSTATE_CLEAN;
3332   vmfp() = (Cell*)ar;
3333   vmsp() = sp;
3334   vmpc() = curUnit()->at(pcOff);
3335   ASSERT(vmsp() <= vmfp());
3336 }
3337
3338 #define O(opcode, imm, pusph, pop, flags) \
3339 /**
3340  * The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
3341  * calls into the interpreter, and then return a pointer to the
3342  * current ExecutionContext.
3343  */  \
3344 VMExecutionContext*                                                     \
3345 interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) {                 \
3346   interp_set_regs(ar, sp, pcOff);                                       \
3347   SKTRACE(5, SrcKey(curFunc(), vmpc()), "%40s %p %p\n",                 \
3348           "interpOne" #opcode " before (fp,sp)",                        \
3349           vmfp(), vmsp());                                              \
3350   ASSERT(*vmpc() == Op ## opcode);                                      \
3351   VMExecutionContext* ec = g_vmContext;                                 \
3352   Stats::inc(Stats::Instr_InterpOne ## opcode);                         \
3353   INC_TPC(interp_one)                                                   \
3354   /* Correct for over-counting in TC-stats. */                          \
3355   Stats::inc(Stats::Instr_TC, -1);                                      \
3356   ec->op##opcode();                                                     \
3357   /*
3358    * Only set regstate back to dirty if an exception is not
3359    * propagating.  If an exception is throwing, regstate for this call
3360    * is actually still correct, and we don't have information in the
3361    * fixup map for interpOne calls anyway.
3362    */ \
3363   tl_regState = REGSTATE_DIRTY;                                         \
3364   return ec;                                                            \
3365 }
3366
3367 OPCODES
3368 #undef O
3369
3370 void* interpOneEntryPoints[] = {
3371 #define O(opcode, imm, pusph, pop, flags) \
3372   (void*)(interpOne ## opcode),
3373 OPCODES
3374 #undef O
3375 };
3376
3377 void TranslatorX64::fixupWork(VMExecutionContext* ec, ActRec* rbp) const {
3378   ASSERT(RuntimeOption::EvalJit);
3379   ActRec* nextAr = rbp;
3380   do {
3381     rbp = nextAr;
3382     FixupMap::VMRegs regs;
3383     TRACE(10, "considering frame %p, %p\n", rbp, (void*)rbp->m_savedRip);
3384     if (g_vmContext->m_stack.isValidAddress(rbp->m_savedRbp) &&
3385         m_fixupMap.getFrameRegs(rbp, &regs)) {
3386       TRACE(10, "fixup func %s fp %p sp %p pc %p\n",
3387             regs.m_fp->m_func->name()->data(),
3388             regs.m_fp, regs.m_sp, regs.m_pc);
3389       ec->m_fp = const_cast<ActRec*>(regs.m_fp);
3390       ec->m_pc = regs.m_pc;
3391       vmsp() = regs.m_sp;
3392       return;
3393     }
3394     nextAr = (ActRec*)rbp->m_savedRbp;
3395   } while (rbp && rbp != nextAr);
3396   // OK, we've exhausted the entire actRec chain.
3397   // We are only invoking ::fixup() from contexts that were known
3398   // to be called out of the TC, so this cannot happen.
3399   NOT_REACHED();
3400 }
3401
3402 void TranslatorX64::fixup(VMExecutionContext* ec) const {
3403   // Start looking for fixup entries at the current (C++) frame.  This
3404   // will walk the frames upward until we find a TC frame.
3405   ActRec* rbp;
3406   asm volatile("mov %%rbp, %0" : "=r"(rbp));
3407   fixupWork(ec, rbp);
3408 }
3409
3410 void
3411 TranslatorX64::syncWork() {
3412   ASSERT(tl_regState == REGSTATE_DIRTY);
3413   fixup(g_vmContext);
3414   tl_regState = REGSTATE_CLEAN;
3415   Stats::inc(Stats::TC_Sync);
3416 }
3417
3418 void
3419 TranslatorX64::emitInterpOne(const Tracelet& t,
3420                              const NormalizedInstruction& ni) {
3421   // Write any dirty values to memory
3422   m_regMap.cleanAll();
3423   // Call into the appropriate interpOne method. Note that this call will
3424   // preserve the callee-saved registers including rVmFp and rVmSp.
3425   if (false) { /* typecheck */
3426     UNUSED VMExecutionContext* ec = interpOnePopC((ActRec*)vmfp(), vmsp(), 0);
3427   }
3428   void* func = interpOneEntryPoints[ni.op()];
3429   TRACE(3, "ip %p of unit %p -> interpOne @%p\n", ni.pc(), ni.unit(), func);
3430   EMIT_CALL(a, func,
3431              R(rVmFp),
3432              RPLUS(rVmSp, -int32_t(cellsToBytes(ni.stackOff))),
3433              IMM(ni.source.offset()));
3434   // The interpreter may have written to memory, so we need to invalidate
3435   // all locations
3436   m_regMap.reset();
3437   // The interpOne method returned a pointer to the current
3438   // ExecutionContext in rax, so we can read the 'm_*' fields
3439   // by adding the appropriate offset to rax and dereferencing.
3440
3441   // If this instruction ends the tracelet, we have some extra work to do.
3442   if (ni.breaksBB) {
3443     // Read the 'm_fp' and 'm_stack.m_top' fields into the rVmFp and
3444     // rVmSp registers.
3445     a.  load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
3446                               rVmFp);
3447     a.  load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
3448                               Stack::topOfStackOffset(), rVmSp);
3449     if (opcodeChangesPC(ni.op())) {
3450       // If interpreting this instruction can potentially set PC to point
3451       // to something other than the next instruction in the bytecode, so
3452       // we need to emit a service request to figure out where to go next
3453       TCA stubDest = emitServiceReq(REQ_RESUME, 0ull);
3454       a.    jmp(stubDest);
3455     } else {
3456       // If this instruction always advances PC to the next instruction in
3457       // the bytecode, then we know what SrcKey to bind to
3458       emitBindJmp(nextSrcKey(t, ni));
3459     }
3460   }
3461 }
3462
3463 bool isContextFixed() {
3464   // Translations for pseudomains don't have a fixed context class
3465   return !curFunc()->isPseudoMain();
3466 }
3467
3468 // could be static but used in hopt/codegen.cpp
3469 void raiseUndefVariable(StringData* nm) {
3470   raise_notice(Strings::UNDEFINED_VARIABLE, nm->data());
3471   // FIXME: do we need to decref the string if an exception is
3472   // propagating?
3473   if (nm->decRefCount() == 0) { nm->release(); }
3474 }
3475
3476 static TXFlags
3477 planBinaryArithOp(const NormalizedInstruction& i) {
3478   ASSERT(i.inputs.size() == 2);
3479   return nativePlan(i.inputs[0]->isInt() && i.inputs[1]->isInt());
3480 }
3481
3482 void
3483 TranslatorX64::analyzeBinaryArithOp(Tracelet& t, NormalizedInstruction& i) {
3484   i.m_txFlags = planBinaryArithOp(i);
3485 }
3486
3487 void
3488 TranslatorX64::translateBinaryArithOp(const Tracelet& t,
3489                                       const NormalizedInstruction& i) {
3490   const Opcode op = i.op();
3491   ASSERT(op == OpSub || op == OpMul || op == OpBitAnd ||
3492          op == OpBitOr || op == OpBitXor);
3493   ASSERT(planBinaryArithOp(i));
3494   ASSERT(i.inputs.size() == 2);
3495
3496   binaryArithCell(i, op, *i.inputs[0], *i.outStack);
3497 }
3498
3499 static inline bool sameDataTypes(DataType t1, DataType t2) {
3500   return TypeConstraint::equivDataTypes(t1, t2);
3501 }
3502
3503 static TXFlags
3504 planSameOp_SameTypes(const NormalizedInstruction& i) {
3505   ASSERT(i.inputs.size() == 2);
3506   const RuntimeType& left = i.inputs[0]->rtt;
3507   const RuntimeType& right = i.inputs[1]->rtt;
3508   DataType leftType = left.outerType();
3509   DataType rightType = right.outerType();
3510   return nativePlan(sameDataTypes(leftType, rightType) &&
3511                     (left.isNull() || leftType == KindOfBoolean ||
3512                      left.isInt() || left.isString()));
3513 }
3514
3515 static TXFlags
3516 planSameOp_DifferentTypes(const NormalizedInstruction& i) {
3517   ASSERT(i.inputs.size() == 2);
3518   DataType leftType = i.inputs[0]->outerType();
3519   DataType rightType = i.inputs[1]->outerType();
3520   if (!sameDataTypes(leftType, rightType)) {
3521     if (IS_REFCOUNTED_TYPE(leftType) || IS_REFCOUNTED_TYPE(rightType)) {
3522       // For dissimilar datatypes, we might call out to handle a refcount.
3523       return Supported;
3524     }
3525     return Native;
3526   }
3527   return Interp;
3528 }
3529
3530 void
3531 TranslatorX64::analyzeSameOp(Tracelet& t, NormalizedInstruction& i) {
3532   ASSERT(!(planSameOp_SameTypes(i) && planSameOp_DifferentTypes(i)));
3533   i.m_txFlags = TXFlags(planSameOp_SameTypes(i) | planSameOp_DifferentTypes(i));
3534   i.manuallyAllocInputs = true;
3535 }
3536
3537 void
3538 TranslatorX64::translateSameOp(const Tracelet& t,
3539                                const NormalizedInstruction& i) {
3540   const Opcode op = i.op();
3541   ASSERT(op == OpSame || op == OpNSame);
3542   const vector<DynLocation*>& inputs  = i.inputs;
3543   bool instrNeg = (op == OpNSame);
3544   ASSERT(inputs.size() == 2);
3545   ASSERT(i.outStack && !i.outLocal);
3546   DataType leftType = i.inputs[0]->outerType();
3547   DataType rightType DEBUG_ONLY = i.inputs[1]->outerType();
3548   ASSERT(leftType != KindOfRef);
3549   ASSERT(rightType != KindOfRef);
3550
3551   if (planSameOp_DifferentTypes(i)) {
3552     // Some easy cases: when the valueTypes do not match,
3553     // NSame -> true and Same -> false.
3554     SKTRACE(1, i.source, "different types %d %d\n",
3555             leftType, rightType);
3556     emitDecRefInput(a, i, 0);
3557     emitDecRefInput(a, i, 1);
3558     m_regMap.allocOutputRegs(i);
3559     emitImmReg(a, instrNeg, getReg(i.outStack->location));
3560     return; // Done
3561   }
3562
3563   ASSERT(planSameOp_SameTypes(i));
3564
3565   if (IS_NULL_TYPE(leftType)) {
3566     m_regMap.allocOutputRegs(i);
3567     // null === null is always true
3568     SKTRACE(2, i.source, "straightening null/null comparison\n");
3569     emitImmReg(a, !instrNeg, getReg(i.outStack->location));
3570     return; // Done
3571   }
3572   if (IS_STRING_TYPE(leftType)) {
3573     int args[2];
3574     args[0] = 0;
3575     args[1] = 1;
3576     allocInputsForCall(i, args);
3577     EMIT_CALL(a, same_str_str,
3578                V(inputs[0]->location),
3579                V(inputs[1]->location));
3580     if (instrNeg) {
3581       a.  xor_imm32_reg32(1, rax);
3582     }
3583     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3584                   RegInfo::DIRTY);
3585     return; // Done
3586   }
3587   m_regMap.allocInputRegs(i);
3588   PhysReg src, srcdest;
3589   getBinaryStackInputs(m_regMap, i, src, srcdest);
3590   m_regMap.allocOutputRegs(i);
3591   ASSERT(getReg(i.outStack->location) == srcdest);
3592   a.    cmp_reg64_reg64(src, srcdest);
3593   if (op == OpSame) {
3594     a.  sete(srcdest);
3595   } else {
3596     a.  setne(srcdest);
3597   }
3598   a.    mov_reg8_reg64_unsigned(srcdest, srcdest);
3599 }
3600
3601 static bool
3602 trivialEquivType(const RuntimeType& rtt) {
3603   DataType t = rtt.valueType();
3604   return t == KindOfUninit || t == KindOfNull || t == KindOfBoolean ||
3605     rtt.isInt() || rtt.isString();
3606 }
3607
3608 static void
3609 emitConvertToBool(X64Assembler &a, PhysReg src, PhysReg dest, bool instrNeg) {
3610   a.    test_reg64_reg64(src, src);
3611   if (instrNeg) {
3612     a.  setz(dest);
3613   } else {
3614     a.  setnz(dest);
3615   }
3616   a.    mov_reg8_reg64_unsigned(dest, dest);
3617 }
3618
3619 void
3620 TranslatorX64::analyzeEqOp(Tracelet& t, NormalizedInstruction& i) {
3621   ASSERT(i.inputs.size() == 2);
3622   RuntimeType &lt = i.inputs[0]->rtt;
3623   RuntimeType &rt = i.inputs[1]->rtt;
3624   i.m_txFlags = nativePlan(trivialEquivType(lt) &&
3625                            trivialEquivType(rt));
3626   if (i.isNative() &&
3627       IS_NULL_TYPE(lt.outerType()) &&
3628       IS_NULL_TYPE(rt.outerType())) {
3629     i.manuallyAllocInputs = true;
3630   }
3631 }
3632
3633 void
3634 TranslatorX64::translateEqOp(const Tracelet& t,
3635                              const NormalizedInstruction& i) {
3636   const Opcode op = i.op();
3637   ASSERT(op == OpEq || op == OpNeq);
3638   ASSERT(i.isNative());
3639   const vector<DynLocation*>& inputs  = i.inputs;
3640   bool instrNeg = (op == OpNeq);
3641   ASSERT(inputs.size() == 2);
3642   ASSERT(i.outStack && !i.outLocal);
3643   DataType leftType = i.inputs[0]->outerType();
3644   DataType rightType = i.inputs[1]->outerType();
3645   ASSERT(leftType != KindOfRef);
3646   ASSERT(rightType != KindOfRef);
3647
3648   ConditionCode ccBranch = CC_E;
3649   if (instrNeg) ccBranch = ccNegate(ccBranch);
3650
3651   // Inputless case.
3652   if (IS_NULL_TYPE(leftType) && IS_NULL_TYPE(rightType)) {
3653     ASSERT(i.manuallyAllocInputs);
3654     // null == null is always true
3655     bool result = !instrNeg;
3656     SKTRACE(2, i.source, "straightening null/null comparison\n");
3657     if (i.changesPC) {
3658       fuseBranchAfterStaticBool(t, i, result);
3659     } else {
3660       m_regMap.allocOutputRegs(i);
3661       emitImmReg(a, result, getReg(i.outStack->location));
3662     }
3663     return; // Done
3664   }
3665
3666   if (IS_STRING_TYPE(leftType) || IS_STRING_TYPE(rightType)) {
3667     void* fptr = NULL;
3668     bool leftIsString = false;
3669     bool eqNullStr = false;
3670     switch (leftType) {
3671       STRINGCASE(): {
3672         leftIsString = true;
3673         switch (rightType) {
3674           STRINGCASE(): fptr = (void*)eq_str_str; break;
3675           case KindOfInt64: fptr = (void*)eq_int_str; break;
3676           case KindOfBoolean: fptr = (void*)eq_bool_str; break;
3677           NULLCASE(): fptr = (void*)eq_null_str; eqNullStr = true; break;
3678           default: ASSERT(false); break;
3679         }
3680       } break;
3681       case KindOfInt64: fptr = (void*)eq_int_str; break;
3682       case KindOfBoolean: fptr = (void*)eq_bool_str; break;
3683       NULLCASE(): fptr = (void*)eq_null_str; eqNullStr = true; break;
3684       default: ASSERT(false); break;
3685     }
3686     if (eqNullStr) {
3687       ASSERT(fptr == (void*)eq_null_str);
3688       EMIT_CALL(a, fptr,
3689                  V(inputs[leftIsString ? 0 : 1]->location));
3690     } else {
3691       ASSERT(fptr != NULL);
3692       EMIT_CALL(a, fptr,
3693                  V(inputs[leftIsString ? 1 : 0]->location),
3694                  V(inputs[leftIsString ? 0 : 1]->location));
3695     }
3696     if (i.changesPC) {
3697       fuseBranchSync(t, i);
3698       a.   test_imm32_reg32(1, rax);
3699       fuseBranchAfterBool(t, i, ccNegate(ccBranch));
3700       return;
3701     }
3702     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3703                   RegInfo::DIRTY);
3704     if (instrNeg) {
3705       a.  xor_imm32_reg32(1, rax);
3706     }
3707     return;
3708   }
3709
3710   m_regMap.allocOutputRegs(i);
3711   PhysReg src, srcdest;
3712   getBinaryStackInputs(m_regMap, i, src, srcdest);
3713   ASSERT(getReg(i.outStack->location) == srcdest);
3714   if (i.changesPC) {
3715     fuseBranchSync(t, i);
3716   }
3717   if (IS_NULL_TYPE(leftType) || IS_NULL_TYPE(rightType)) {
3718     if (IS_NULL_TYPE(leftType)) {
3719       a.   test_reg64_reg64(srcdest, srcdest);
3720     } else {
3721       ASSERT(IS_NULL_TYPE(rightType));
3722       a.   test_reg64_reg64(src, src);
3723     }
3724   } else if (leftType  == KindOfBoolean ||
3725              rightType == KindOfBoolean) {
3726     // OK to destroy src and srcdest in-place; their stack locations are
3727     // blown away by this instruction.
3728     if (leftType != KindOfBoolean)
3729       emitConvertToBool(a, src, src, false);
3730     if (rightType != KindOfBoolean)
3731       emitConvertToBool(a, srcdest, srcdest, false);
3732     a.   cmp_reg64_reg64(src, srcdest);
3733   } else {
3734     a.   cmp_reg64_reg64(src, srcdest);
3735   }
3736   if (i.changesPC) {
3737     fuseBranchAfterBool(t, i, ccBranch);
3738     return;
3739   }
3740   if (instrNeg) {
3741     a.   setnz          (srcdest);
3742   } else {
3743     a.   setz           (srcdest);
3744   }
3745   a.     mov_reg8_reg64_unsigned(srcdest, srcdest);
3746 }
3747
3748 void
3749 TranslatorX64::analyzeLtGtOp(Tracelet& t, NormalizedInstruction& i) {
3750   ASSERT(i.inputs.size() == 2);
3751   const RuntimeType& left = i.inputs[0]->rtt;
3752   DataType leftType = left.outerType();
3753   DataType rightType = i.inputs[1]->outerType();
3754   i.m_txFlags = nativePlan(sameDataTypes(leftType, rightType) &&
3755                            (left.isNull() ||
3756                             leftType == KindOfBoolean ||
3757                             left.isInt()));
3758   if (i.isNative() && IS_NULL_TYPE(left.outerType())) {
3759     // No inputs. w00t.
3760     i.manuallyAllocInputs = true;
3761   }
3762 }
3763
3764 void
3765 TranslatorX64::translateLtGtOp(const Tracelet& t,
3766                                const NormalizedInstruction& i) {
3767   const Opcode op = i.op();
3768   ASSERT(op == OpLt || op == OpLte || op == OpGt || op == OpGte);
3769   ASSERT(i.inputs.size() == 2);
3770   ASSERT(i.outStack && !i.outLocal);
3771   ASSERT(i.inputs[0]->outerType() != KindOfRef);
3772   ASSERT(i.inputs[1]->outerType() != KindOfRef);
3773   ASSERT(i.isNative());
3774
3775   bool fEquals = (op == OpLte || op == OpGte);
3776   bool fLessThan = (op == OpLt || op == OpLte);
3777
3778   m_regMap.allocOutputRegs(i);
3779   if (IS_NULL_TYPE(i.inputs[0]->outerType())) {
3780     ASSERT(IS_NULL_TYPE(i.inputs[1]->outerType()));
3781     // null < null is always false, null <= null is always true
3782     SKTRACE(2, i.source, "straightening null/null comparison\n");
3783     PhysReg rOut = getReg(i.outStack->location);
3784     bool resultIsTrue = (op == OpLte || op == OpGte);
3785     if (i.changesPC) {
3786       fuseBranchAfterStaticBool(t, i, resultIsTrue);
3787     } else {
3788       emitImmReg(a, resultIsTrue, rOut);
3789     }
3790     return;
3791   }
3792   PhysReg src, srcdest;
3793   getBinaryStackInputs(m_regMap, i, src, srcdest);
3794   ASSERT(getReg(i.outStack->location) == srcdest);
3795   if (i.changesPC) {
3796     fuseBranchSync(t, i);
3797   }
3798   a.       cmp_reg64_reg64(src, srcdest);
3799   static const ConditionCode opToCc[2][2] = {
3800     // !fEquals fEquals
3801     { CC_G,     CC_GE }, // !fLessThan
3802     { CC_L,     CC_LE }, // fLessThan
3803   };
3804   ConditionCode cc = opToCc[fLessThan][fEquals];
3805   if (i.changesPC) {
3806     // Fuse the coming branch.
3807     fuseBranchAfterBool(t, i, cc);
3808     return;
3809   }
3810   a.       setcc(cc, srcdest);
3811   a.       mov_reg8_reg64_unsigned(srcdest, srcdest);
3812 }
3813
3814 static TXFlags
3815 planUnaryBooleanOp(const NormalizedInstruction& i) {
3816   ASSERT(i.inputs.size() == 1);
3817   RuntimeType& rtt = i.inputs[0]->rtt;
3818   DataType inType = rtt.valueType();
3819   if (inType == KindOfArray) {
3820     return Supported;
3821   }
3822   if (rtt.isString()) {
3823     return Simple;
3824   }
3825   return nativePlan(rtt.isNull() ||
3826                     inType == KindOfBoolean || rtt.isInt());
3827 }
3828
3829 void
3830 TranslatorX64::analyzeUnaryBooleanOp(Tracelet& t, NormalizedInstruction& i) {
3831   i.m_txFlags = planUnaryBooleanOp(i);
3832 }
3833
3834 void
3835 TranslatorX64::translateUnaryBooleanOp(const Tracelet& t,
3836                                        const NormalizedInstruction& i) {
3837   const Opcode op = i.op();
3838   ASSERT(op == OpCastBool || op == OpEmptyL);
3839   const vector<DynLocation*>& inputs  = i.inputs;
3840   ASSERT(inputs.size() == 1);
3841   ASSERT(i.outStack && !i.outLocal);
3842   bool instrNeg = (op == OpEmptyL);
3843   DataType inType = inputs[0]->valueType();
3844   const Location& inLoc = inputs[0]->location;
3845   bool boxedForm = (inputs[0]->outerType() == KindOfRef);
3846
3847
3848   switch (inType) {
3849     NULLCASE(): {
3850       m_regMap.allocOutputRegs(i);
3851       PhysReg outReg = getReg(i.outStack->location);
3852       emitImmReg(a, instrNeg, outReg);
3853       ASSERT(i.isNative());
3854     } break;
3855     case KindOfBoolean: {
3856       if (op == OpCastBool) {
3857         // Casting bool to bool is a nop.  CastBool's input must be
3858         // a cell on the stack as per the bytecode specification.
3859         ASSERT(inputs[0]->isStack());
3860         ASSERT(inputs[0]->outerType() != KindOfRef);
3861         ASSERT(inputs[0]->location.space == Location::Stack);
3862         ASSERT(i.isNative());
3863         break;
3864       }
3865       m_regMap.allocOutputRegs(i);
3866       PhysReg reg = getReg(inLoc);
3867       PhysReg outReg = getReg(i.outStack->location);
3868       if (boxedForm) {
3869         emitDeref(a, reg, outReg);
3870       } else {
3871         emitMovRegReg(reg, outReg);
3872       }
3873       if (instrNeg) {
3874         a.  xor_imm32_reg32(1, outReg);
3875       }
3876     } break;
3877     case KindOfInt64: {
3878       m_regMap.allocOutputRegs(i);
3879       PhysReg reg = getReg(inLoc);
3880       PhysReg outReg = getReg(i.outStack->location);
3881       ScratchReg scratch(m_regMap);
3882       if (boxedForm) {
3883         emitDeref(a, reg, *scratch);
3884         emitConvertToBool(a, *scratch, outReg, instrNeg);
3885       } else {
3886         emitConvertToBool(a, reg, outReg, instrNeg);
3887       }
3888     } break;
3889     STRINGCASE():
3890     case KindOfArray: {
3891       bool doDecRef = (inputs[0]->isStack());
3892       void* fptr = IS_STRING_TYPE(inType) ?
3893           (doDecRef ? (void*)str_to_bool : (void*)str0_to_bool) :
3894           (doDecRef ? (void*)arr_to_bool : (void*)arr0_to_bool);
3895       if (boxedForm) {
3896         EMIT_CALL(a, fptr, DEREF(inLoc));
3897       } else {
3898         EMIT_CALL(a, fptr, V(inLoc));
3899       }
3900       if (!IS_STRING_TYPE(inType)) {
3901         recordReentrantCall(i);
3902       }
3903       if (instrNeg) {
3904         a.    xor_imm32_reg32(1, rax);
3905       }
3906       m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3907                     RegInfo::DIRTY);
3908     } break;
3909     default: {
3910       ASSERT(false);
3911     } break;
3912   }
3913 }
3914
3915 void
3916 TranslatorX64::analyzeBranchOp(Tracelet& t, NormalizedInstruction& i) {
3917   i.m_txFlags = Supported;
3918 }
3919
3920 // Helper for decoding dests of branch-like instructions at the end of
3921 // a basic block.
3922 static void branchDests(const Tracelet& t,
3923                         const NormalizedInstruction& i,
3924                         SrcKey* outTaken, SrcKey* outNotTaken,
3925                         int immIdx = 0) {
3926   *outNotTaken = nextSrcKey(t, i);
3927   int dest = i.imm[immIdx].u_BA;
3928   *outTaken = SrcKey(curFunc(), i.offset() + dest);
3929 }
3930
3931 void TranslatorX64::branchWithFlagsSet(const Tracelet& t,
3932                                        const NormalizedInstruction& i,
3933                                        ConditionCode cc) {
3934   ASSERT(i.op() == OpJmpNZ || i.op() == OpJmpZ);
3935   // not_taken
3936   SrcKey taken, notTaken;
3937   branchDests(t, i, &taken, &notTaken);
3938   TRACE(3, "branchWithFlagsSet %d %d cc%02x jmp%sz\n",
3939         taken.offset(), notTaken.offset(), cc,
3940         i.isJmpNZ() ? "n" : "");
3941   emitCondJmp(taken, notTaken, cc);
3942 }
3943
3944 void TranslatorX64::fuseBranchAfterStaticBool(const Tracelet& t,
3945                                               const NormalizedInstruction& i,
3946                                               bool resultIsTrue) {
3947   ASSERT(i.breaksBB);
3948   ASSERT(i.next);
3949   NormalizedInstruction &nexti = *i.next;
3950   fuseBranchSync(t, i);
3951   bool isTaken = (resultIsTrue == nexti.isJmpNZ());
3952   SrcKey taken, notTaken;
3953   branchDests(t, nexti, &taken, &notTaken);
3954   if (isTaken) {
3955     emitBindJmp(taken);
3956   } else {
3957     emitBindJmp(notTaken);
3958   }
3959 }
3960
3961 void TranslatorX64::fuseBranchSync(const Tracelet& t,
3962                                    const NormalizedInstruction& i) {
3963   // Don't bother sync'ing the output of this instruction.
3964   m_regMap.scrubStackEntries(i.outStack->location.offset);
3965   syncOutputs(t);
3966 }
3967
3968 void TranslatorX64::fuseBranchAfterBool(const Tracelet& t,
3969                                         const NormalizedInstruction& i,
3970                                         ConditionCode cc) {
3971   ASSERT(i.breaksBB);
3972   ASSERT(i.next);
3973   NormalizedInstruction &nexti = *i.next;
3974   if (!i.next->isJmpNZ()) cc = ccNegate(cc);
3975   branchWithFlagsSet(t, nexti, cc);
3976 }
3977
3978 void
3979 TranslatorX64::translateBranchOp(const Tracelet& t,
3980                                  const NormalizedInstruction& i) {
3981   DEBUG_ONLY const Opcode op = i.op();
3982   ASSERT(op == OpJmpZ || op == OpJmpNZ);
3983
3984   bool isZ = !i.isJmpNZ();
3985   ASSERT(i.inputs.size()  == 1);
3986   ASSERT(!i.outStack && !i.outLocal);
3987   m_regMap.allocOutputRegs(i);
3988   const DynLocation& in = *i.inputs[0];
3989   const RuntimeType& rtt = in.rtt;
3990   const Location& inLoc = in.location;
3991   DataType inputType = rtt.outerType();
3992   PhysReg src = getReg(inLoc);
3993   /*
3994    * Careful. We're operating with intimate knowledge of the
3995    * constraints of the register allocator from here out.
3996    */
3997   if (rtt.isString() || inputType == KindOfArray) {
3998     // str_to_bool and arr_to_bool will decRef for us
3999     void* fptr = IS_STRING_TYPE(inputType) ? (void*)str_to_bool :
4000                                                (void*)arr_to_bool;
4001     EMIT_CALL(a, fptr, V(inLoc));
4002     src = rax;
4003     ScratchReg sr(m_regMap, rax);
4004     syncOutputs(t);
4005   } else if (inputType != KindOfUninit &&
4006              inputType != KindOfNull &&
4007              inputType != KindOfBoolean &&
4008              !rtt.isInt()) {
4009     // input might be in-flight
4010     m_regMap.cleanLoc(inLoc);
4011     // Cast to a bool.
4012     if (false) {
4013       TypedValue *tv = NULL;
4014       int64 ret = tv_to_bool(tv);
4015       if (ret) {
4016         printf("zoot");
4017       }
4018     }
4019     TRACE(2, Trace::prettyNode("tv_to_bool", inLoc) + string("\n"));
4020     // tv_to_bool will decRef for us if appropriate
4021     EMIT_CALL(a, tv_to_bool, A(inLoc));
4022     recordReentrantCall(i);
4023     src = rax;
4024     ScratchReg sr(m_regMap, rax);
4025     syncOutputs(t);
4026   } else {
4027     syncOutputs(t);
4028   }
4029
4030   // not_taken
4031   SrcKey taken, notTaken;
4032   branchDests(t, i, &taken, &notTaken);
4033
4034   // Since null always evaluates to false, we can emit an
4035   // unconditional jump. OpJmpNZ will never take the branch
4036   // while OpJmpZ will always take the branch.
4037   if (IS_NULL_TYPE(inputType)) {
4038     TRACE(1, "branch on Null -> always Z\n");
4039     emitBindJmp(isZ ? taken : notTaken);
4040     return;
4041   }
4042   a.    test_reg64_reg64(src, src);
4043   branchWithFlagsSet(t, i, isZ ? CC_Z : CC_NZ);
4044 }
4045
4046 void
4047 TranslatorX64::analyzeCGetL(Tracelet& t, NormalizedInstruction& i) {
4048   ASSERT(i.inputs.size() == 1);
4049   const RuntimeType& type = i.inputs[0]->rtt;
4050   i.m_txFlags = type.isUninit() ? Supported : Native;
4051 }
4052
4053 void
4054 TranslatorX64::translateCGetL(const Tracelet& t,
4055                               const NormalizedInstruction& i) {
4056   const DEBUG_ONLY Opcode op = i.op();
4057   ASSERT(op == OpFPassL || OpCGetL);
4058   const vector<DynLocation*>& inputs = i.inputs;
4059   ASSERT(inputs.size() == 1);
4060   ASSERT(inputs[0]->isLocal());
4061   DataType outType = i.inputs[0]->valueType();
4062   ASSERT(outType != KindOfInvalid);
4063
4064   // Check for use of an undefined local.
4065   if (inputs[0]->rtt.isUninit()) {
4066     ASSERT(!i.outStack || i.outStack->outerType() == KindOfNull);
4067     outType = KindOfNull;
4068     ASSERT(inputs[0]->location.offset < curFunc()->numLocals());
4069     const StringData* name = local_name(inputs[0]->location);
4070     EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
4071     recordReentrantCall(i);
4072     if (i.outStack) {
4073       m_regMap.allocOutputRegs(i);
4074     }
4075     return;
4076   }
4077
4078   /*
4079    * we can merge a CGetL with a following InstanceOfD
4080    * to avoid the incRef/decRef on the result (see
4081    * analyzeSecondPass).
4082    *
4083    * outStack will be clear in that case.
4084    */
4085   if (!i.outStack) return;
4086   ASSERT(outType == i.outStack->outerType());
4087   m_regMap.allocOutputRegs(i);
4088   if (IS_NULL_TYPE(outType)) return;
4089   PhysReg dest = getReg(i.outStack->location);
4090
4091   if (i.manuallyAllocInputs && !m_regMap.hasReg(inputs[0]->location)) {
4092     fill(inputs[0]->location, dest);
4093   } else {
4094     PhysReg localReg = getReg(inputs[0]->location);
4095     emitMovRegReg(localReg, dest);
4096   }
4097   if (inputs[0]->isVariant()) {
4098     emitDeref(a, dest, dest);
4099   }
4100   ASSERT(outType != KindOfStaticString);
4101   emitIncRef(dest, outType);
4102 }
4103
4104 void
4105 TranslatorX64::analyzeCGetL2(Tracelet& t,
4106                              NormalizedInstruction& ni) {
4107   const int locIdx = 1;
4108   ASSERT(ni.inputs.size() == 2);
4109   ni.m_txFlags = ni.inputs[locIdx]->rtt.isUninit() ? Supported : Native;
4110 }
4111
4112 void
4113 TranslatorX64::translateCGetL2(const Tracelet& t,
4114                                const NormalizedInstruction& ni) {
4115   const int stackIdx = 0;
4116   const int locIdx   = 1;
4117
4118   // Note: even if it's an undefined local we need to move a few
4119   // values around to have outputs end up in the right place.
4120   const bool undefinedLocal = ni.inputs[locIdx]->rtt.isUninit();
4121
4122   if (undefinedLocal) {
4123     ASSERT(ni.outStack->valueType() == KindOfNull);
4124     ASSERT(ni.inputs[locIdx]->location.offset < curFunc()->numLocals());
4125     const StringData* name = local_name(ni.inputs[locIdx]->location);
4126
4127     EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
4128     recordReentrantCall(ni);
4129
4130     m_regMap.allocInputRegs(ni);
4131   }
4132
4133   m_regMap.allocOutputRegs(ni);
4134   const PhysReg stackIn  = getReg(ni.inputs[stackIdx]->location);
4135   const PhysReg localIn  = getReg(ni.inputs[locIdx]->location);
4136   const PhysReg stackOut = getReg(ni.outStack2->location);
4137   ASSERT(ni.inputs[stackIdx]->location.isStack());
4138   ASSERT(ni.inputs[locIdx]->location.isLocal());
4139
4140   /*
4141    * These registers overlap a bit, so we can swap a few bindings to
4142    * avoid a move.
4143    */
4144   ASSERT(stackIn == getReg(ni.outStack->location) && localIn != stackOut);
4145   m_regMap.swapRegisters(stackIn, stackOut);
4146   const PhysReg cellOut = getReg(ni.outStack->location);
4147   ASSERT(cellOut != stackIn);
4148   if (ni.inputs[locIdx]->isVariant()) {
4149     emitDeref(a, localIn, cellOut);
4150   } else if (!undefinedLocal) {
4151     emitMovRegReg(localIn, cellOut);
4152   }
4153   emitIncRef(cellOut, ni.inputs[locIdx]->valueType());
4154 }
4155
4156 void
4157 TranslatorX64::analyzeVGetL(Tracelet& t,
4158                             NormalizedInstruction& i) {
4159   i.m_txFlags = Native;
4160 }
4161
4162 void
4163 TranslatorX64::translateVGetL(const Tracelet& t,
4164                               const NormalizedInstruction& i) {
4165   const DEBUG_ONLY Opcode op = i.op();
4166   ASSERT(op == OpVGetL || op == OpFPassL);
4167   const vector<DynLocation*>& inputs = i.inputs;
4168   ASSERT(inputs.size() == 1);
4169   ASSERT(i.outStack);
4170   ASSERT(inputs[0]->isLocal());
4171   ASSERT(i.outStack->rtt.outerType() == KindOfRef);
4172
4173   PhysReg localReg = getReg(inputs[0]->location);
4174   PhysReg dest;
4175   if (inputs[0]->rtt.outerType() != KindOfRef) {
4176     emitBox(inputs[0]->rtt.outerType(), localReg);
4177     m_regMap.bind(rax, inputs[0]->location, KindOfRef,
4178                   RegInfo::DIRTY);
4179     m_regMap.allocOutputRegs(i);
4180     dest = getReg(i.outStack->location);
4181     emitMovRegReg(rax, dest);
4182   } else {
4183     m_regMap.allocOutputRegs(i);
4184     dest = getReg(i.outStack->location);
4185     emitMovRegReg(localReg, dest);
4186   }
4187   emitIncRef(dest, KindOfRef);
4188 }
4189
4190 void
4191 TranslatorX64::analyzeAssignToLocalOp(Tracelet& t,
4192                                       NormalizedInstruction& ni) {
4193   const int locIdx = 1;
4194   ni.m_txFlags = planHingesOnRefcounting(ni.inputs[locIdx]->outerType());
4195 }
4196
4197 void
4198 TranslatorX64::translateAssignToLocalOp(const Tracelet& t,
4199                                         const NormalizedInstruction& ni) {
4200   const int rhsIdx  = 0;
4201   const int locIdx  = 1;
4202   const Opcode op = ni.op();
4203   ASSERT(op == OpSetL || op == OpBindL);
4204   ASSERT(ni.inputs.size() == 2);
4205   ASSERT((op == OpBindL) ==
4206          (ni.inputs[rhsIdx]->outerType() == KindOfRef));
4207
4208   ASSERT(!ni.outStack || ni.inputs[locIdx]->location != ni.outStack->location);
4209   ASSERT(ni.outLocal);
4210   ASSERT(ni.inputs[locIdx]->location == ni.outLocal->location);
4211   ASSERT(ni.inputs[rhsIdx]->isStack());
4212
4213   m_regMap.allocOutputRegs(ni);
4214   const PhysReg rhsReg        = getReg(ni.inputs[rhsIdx]->location);
4215   const PhysReg localReg      = getReg(ni.outLocal->location);
4216   const DataType oldLocalType = ni.inputs[locIdx]->outerType();
4217   const DataType rhsType      = ni.inputs[rhsIdx]->outerType();
4218   ASSERT(localReg != rhsReg);
4219
4220   LazyScratchReg oldLocalReg(m_regMap);
4221   DataType decRefType;
4222
4223   // For SetL, when the local is boxed, we need to change the
4224   // type/value of the inner cell.  If we're doing BindL, we don't
4225   // want to affect the old inner cell in any case (except to decref
4226   // it).
4227   const bool affectInnerCell = op == OpSetL &&
4228                                oldLocalType == KindOfRef;
4229   if (affectInnerCell) {
4230     ASSERT(rhsType != KindOfRef);
4231
4232     oldLocalReg.alloc();
4233     emitDeref(a, localReg, *oldLocalReg);
4234     emitStoreTypedValue(a, rhsType, rhsReg, 0, localReg);
4235     decRefType = ni.inputs[locIdx]->rtt.innerType();
4236   } else {
4237     /*
4238      * Instead of emitting a mov, just swap the locations these two
4239      * registers are mapped to.
4240      *
4241      * TODO: this might not be the best idea now that the register
4242      * allocator has some awareness about what is a local.  (Maybe we
4243      * should just xchg_reg64_reg64.)
4244      */
4245     m_regMap.swapRegisters(rhsReg, localReg);
4246     decRefType = oldLocalType;
4247   }
4248
4249   // If we're giving stack output, it's important to incref before
4250   // calling a possible destructor, since the destructor could have
4251   // access to the local if it is a var.
4252   if (ni.outStack) {
4253     emitIncRef(rhsReg, rhsType);
4254   } else {
4255     SKTRACE(3, ni.source, "hoisting Pop* into current instr\n");
4256   }
4257
4258   emitDecRef(ni, oldLocalReg.isAllocated() ? *oldLocalReg : localReg,
4259     decRefType);
4260
4261   if (ni.outStack && !IS_NULL_TYPE(ni.outStack->outerType())) {
4262     PhysReg stackReg = getReg(ni.outStack->location);
4263     emitMovRegReg(rhsReg, stackReg);
4264   }
4265 }
4266
4267 static void
4268 planPop(NormalizedInstruction& i) {
4269   if (i.prev && i.prev->outputPredicted) {
4270     i.prev->outputPredicted = false;
4271     i.inputs[0]->rtt = RuntimeType(KindOfInvalid);
4272   }
4273   DataType type = i.inputs[0]->outerType();
4274   i.m_txFlags =
4275     (type == KindOfInvalid || IS_REFCOUNTED_TYPE(type)) ? Supported : Native;
4276   i.manuallyAllocInputs = true;
4277 }
4278
4279 void TranslatorX64::analyzePopC(Tracelet& t, NormalizedInstruction& i) {
4280   planPop(i);
4281 }
4282
4283 void TranslatorX64::analyzePopV(Tracelet& t, NormalizedInstruction& i) {
4284   planPop(i);
4285 }
4286
4287 void TranslatorX64::analyzePopR(Tracelet& t, NormalizedInstruction& i) {
4288   planPop(i);
4289 }
4290
4291 void
4292 TranslatorX64::translatePopC(const Tracelet& t,
4293                              const NormalizedInstruction& i) {
4294   ASSERT(i.inputs.size() == 1);
4295   ASSERT(!i.outStack && !i.outLocal);
4296   if (i.inputs[0]->rtt.isVagueValue()) {
4297     PhysReg base;
4298     int disp;
4299     locToRegDisp(i.inputs[0]->location, &base, &disp);
4300     emitDecRefGeneric(i, base, disp);
4301   } else {
4302     emitDecRefInput(a, i, 0);
4303   }
4304 }
4305
4306 void
4307 TranslatorX64::translatePopV(const Tracelet& t,
4308                              const NormalizedInstruction& i) {
4309   ASSERT(i.inputs[0]->rtt.isVagueValue() ||
4310          i.inputs[0]->isVariant());
4311   translatePopC(t, i);
4312 }
4313
4314 void
4315 TranslatorX64::translatePopR(const Tracelet& t,
4316                              const NormalizedInstruction& i) {
4317   translatePopC(t, i);
4318 }
4319
4320 void
4321 TranslatorX64::translateUnboxR(const Tracelet& t,
4322                                const NormalizedInstruction& i) {
4323   ASSERT(!i.inputs[0]->rtt.isVagueValue());
4324
4325   // If the value on the top of a stack is a var, unbox it and
4326   // leave it on the top of the stack.
4327   if (i.inputs[0]->isVariant()) {
4328     emitUnboxTopOfStack(i);
4329   }
4330 }
4331
4332 void
4333 TranslatorX64::translateNull(const Tracelet& t,
4334                              const NormalizedInstruction& i) {
4335   ASSERT(i.inputs.size() == 0);
4336   ASSERT(!i.outLocal);
4337   if (i.outStack) {
4338     ASSERT(i.outStack->outerType() == KindOfNull);
4339
4340     // We have to mark the output register as dirty to ensure that
4341     // the type gets spilled at the end of the tracelet
4342     m_regMap.allocOutputRegs(i);
4343   }
4344   /* nop */
4345 }
4346
4347 void
4348 TranslatorX64::translateTrue(const Tracelet& t,
4349                              const NormalizedInstruction& i) {
4350   ASSERT(i.inputs.size() == 0);
4351   ASSERT(!i.outLocal);
4352   if (i.outStack) {
4353     m_regMap.allocOutputRegs(i);
4354     PhysReg rdest = getReg(i.outStack->location);
4355     emitImmReg(a, 1, rdest);
4356   }
4357 }
4358
4359 void
4360 TranslatorX64::translateFalse(const Tracelet& t,
4361                               const NormalizedInstruction& i) {
4362   ASSERT(i.inputs.size() == 0);
4363   ASSERT(!i.outLocal);
4364   if (i.outStack) {
4365     m_regMap.allocOutputRegs(i);
4366     PhysReg dest = getReg(i.outStack->location);
4367     emitImmReg(a, false, dest);
4368   }
4369 }
4370
4371 void
4372 TranslatorX64::translateInt(const Tracelet& t,
4373                             const NormalizedInstruction& i) {
4374   ASSERT(i.inputs.size()  == 0);
4375   ASSERT(!i.outLocal);
4376   if (i.outStack) {
4377     ASSERT(i.outStack->isInt());
4378     m_regMap.allocOutputRegs(i);
4379     PhysReg dest = getReg(i.outStack->location);
4380     uint64_t srcImm = i.imm[0].u_I64A;
4381     emitImmReg(a, srcImm, dest);
4382   }
4383 }
4384
4385 void
4386 TranslatorX64::translateString(const Tracelet& t,
4387                                const NormalizedInstruction& i) {
4388   ASSERT(i.inputs.size()  == 0);
4389   ASSERT(!i.outLocal);
4390   if (!i.outStack) return;
4391   ASSERT(Translator::typeIsString(i.outStack->outerType()));
4392   m_regMap.allocOutputRegs(i);
4393   PhysReg dest = getReg(i.outStack->location);
4394   uint64_t srcImm = (uintptr_t)curUnit()->lookupLitstrId(i.imm[0].u_SA);
4395   // XXX: can simplify the lookup here by just fishing it out of the
4396   // output's valueString().
4397   // We are guaranteed that the string is static, so we do not need to
4398   // increment the refcount
4399   ASSERT(((StringData*)srcImm)->isStatic());
4400   SKTRACE(2, i.source, "Litstr %d -> %p \"%s\"\n",
4401       i.imm[0].u_SA, (StringData*)srcImm,
4402       Util::escapeStringForCPP(((StringData*)srcImm)->data()).c_str());
4403   emitImmReg(a, srcImm, dest);
4404 }
4405
4406 void
4407 TranslatorX64::translateArray(const Tracelet& t,
4408                               const NormalizedInstruction& i) {
4409   ASSERT(i.inputs.size() == 0);
4410   ASSERT(!i.outLocal);
4411   if (i.outStack) {
4412     ASSERT(i.outStack->outerType() == KindOfArray);
4413     m_regMap.allocOutputRegs(i);
4414     ArrayData* ad = curUnit()->lookupArrayId(i.imm[0].u_AA);
4415     PhysReg r = getReg(i.outStack->location);
4416     emitImmReg(a, uint64(ad), r);
4417     // We are guaranteed that the array is static, so we do not need to
4418     // increment the refcount
4419     ASSERT(ad->isStatic());
4420   }
4421 }
4422
4423 void
4424 TranslatorX64::translateNewArray(const Tracelet& t,
4425                                  const NormalizedInstruction& i) {
4426   ASSERT(i.inputs.size() == 0);
4427   ASSERT(i.outStack && !i.outLocal);
4428   ASSERT(i.outStack->outerType() == KindOfArray);
4429   m_regMap.allocOutputRegs(i);
4430   PhysReg r = getReg(i.outStack->location);
4431   emitImmReg(a, uint64(HphpArray::GetStaticEmptyArray()), r);
4432   // We are guaranteed that the new array is static, so we do not need to
4433   // increment the refcount
4434   ASSERT(HphpArray::GetStaticEmptyArray()->isStatic());
4435 }
4436
4437 void
4438 TranslatorX64::analyzeNop(Tracelet& t, NormalizedInstruction& i) {
4439   i.m_txFlags = Native;
4440 }
4441
4442 void
4443 TranslatorX64::translateNop(const Tracelet& t,
4444                             const NormalizedInstruction& i) {
4445 }
4446
4447 void
4448 TranslatorX64::analyzeAddElemC(Tracelet& t, NormalizedInstruction& i) {
4449   i.m_txFlags = supportedPlan(i.inputs[2]->outerType() == KindOfArray &&
4450                               (i.inputs[1]->isInt() ||
4451                                i.inputs[1]->isString()));
4452 }
4453
4454 void
4455 TranslatorX64::translateAddElemC(const Tracelet& t,
4456                                  const NormalizedInstruction& i) {
4457   ASSERT(i.outStack && !i.outLocal);
4458   ASSERT(i.inputs.size() >= 3);
4459   const DynLocation& arr = *i.inputs[2];
4460   const DynLocation& key = *i.inputs[1];
4461   const DynLocation& val = *i.inputs[0];
4462   ASSERT(!arr.isVariant()); // not handling variants.
4463   ASSERT(!key.isVariant());
4464   ASSERT(!val.isVariant());
4465
4466   const Location& arrLoc = arr.location;
4467   const Location& keyLoc = key.location;
4468   const Location& valLoc = val.location;
4469
4470   ASSERT(arrLoc.isStack());
4471   ASSERT(keyLoc.isStack());
4472   ASSERT(arrLoc.isStack());
4473
4474   // If either the key or the rhs is not Int64, we will need to pass the
4475   // rhs by address, so we need to sync it back to memory
4476   if (!key.rtt.isInt() || !val.rtt.isInt()) {
4477     m_regMap.cleanLoc(valLoc);
4478   }
4479
4480   // The array_setm helpers will decRef any old value that is
4481   // overwritten if appropriate. If copy-on-write occurs, it will also
4482   // incRef the new array and decRef the old array for us. Finally,
4483   // some of the array_setm helpers will decRef the key if it is a
4484   // string (for cases where the key is not a local), while others do
4485   // not (for cases where the key is a local).
4486   void* fptr;
4487   if (key.rtt.isInt() && val.rtt.isInt()) {
4488     if (false) { // type-check
4489       TypedValue* cell = NULL;
4490       ArrayData* arr = NULL;
4491       ArrayData* ret = array_setm_ik1_iv(cell, arr, 12, 3);
4492       printf("%p", ret); // use ret
4493     }
4494     // If the rhs is Int64, we can use a specialized helper
4495     EMIT_CALL(a, array_setm_ik1_iv,
4496                IMM(0),
4497                V(arrLoc),
4498                V(keyLoc),
4499                V(valLoc));
4500     recordReentrantCall(i);
4501   } else if (key.rtt.isInt() || key.rtt.isString()) {
4502     if (false) { // type-check
4503       TypedValue* cell = NULL;
4504       TypedValue* rhs = NULL;
4505       StringData* strkey = NULL;
4506       ArrayData* arr = NULL;
4507       ArrayData* ret;
4508       ret = array_setm_ik1_v0(cell, arr, 12, rhs);
4509       printf("%p", ret); // use ret
4510       ret = array_setm_sk1_v0(cell, arr, strkey, rhs);
4511       printf("%p", ret); // use ret
4512     }
4513     // Otherwise, we pass the rhs by address
4514     fptr = key.rtt.isString() ? (void*)array_setm_sk1_v0 :
4515       (void*)array_setm_ik1_v0;
4516     EMIT_CALL(a, fptr,
4517                IMM(0),
4518                V(arrLoc),
4519                V(keyLoc),
4520                A(valLoc));
4521     recordReentrantCall(i);
4522   } else {
4523     ASSERT(false);
4524   }
4525   // The array value may have changed, so we need to invalidate any
4526   // register we have associated with arrLoc
4527   m_regMap.invalidate(arrLoc);
4528   // The array_setm helper returns the up-to-date array pointer in rax.
4529   // Therefore, we can bind rax to arrLoc and mark it as dirty.
4530   m_regMap.bind(rax, arrLoc, KindOfArray, RegInfo::DIRTY);
4531 }
4532
4533 void
4534 TranslatorX64::analyzeAddNewElemC(Tracelet& t, NormalizedInstruction& i) {
4535   ASSERT(i.inputs.size() == 2);
4536   i.m_txFlags = supportedPlan(i.inputs[1]->outerType() == KindOfArray);
4537 }
4538
4539 void
4540 TranslatorX64::translateAddNewElemC(const Tracelet& t,
4541                                          const NormalizedInstruction& i) {
4542   ASSERT(i.inputs.size() == 2);
4543   ASSERT(i.outStack && !i.outLocal);
4544   ASSERT(i.inputs[0]->outerType() != KindOfRef);
4545   ASSERT(i.inputs[1]->outerType() != KindOfRef);
4546   ASSERT(i.inputs[0]->isStack());
4547   ASSERT(i.inputs[1]->isStack());
4548
4549   Location arrLoc = i.inputs[1]->location;
4550   Location valLoc = i.inputs[0]->location;
4551
4552   // We pass the rhs by address, so we need to sync it back to memory
4553   m_regMap.cleanLoc(valLoc);
4554
4555   // The array_setm helpers will decRef any old value that is
4556   // overwritten if appropriate. If copy-on-write occurs, it will also
4557   // incRef the new array and decRef the old array for us. Finally,
4558   // some of the array_setm helpers will decRef the key if it is a
4559   // string (for cases where the key is not a local), while others do
4560   // not (for cases where the key is a local).
4561   if (false) { // type-check
4562     TypedValue* cell = NULL;
4563     TypedValue* rhs = NULL;
4564     ArrayData* arr = NULL;
4565     ArrayData* ret;
4566     ret = array_setm_wk1_v0(cell, arr, rhs);
4567     printf("%p", ret); // use ret
4568   }
4569   EMIT_CALL(a, array_setm_wk1_v0,
4570              IMM(0),
4571              V(arrLoc),
4572              A(valLoc));
4573   recordReentrantCall(i);
4574   // The array value may have changed, so we need to invalidate any
4575   // register we have associated with arrLoc
4576   m_regMap.invalidate(arrLoc);
4577   // The array_setm helper returns the up-to-date array pointer in rax.
4578   // Therefore, we can bind rax to arrLoc and mark it as dirty.
4579   m_regMap.bind(rax, arrLoc, KindOfArray, RegInfo::DIRTY);
4580 }
4581
4582 static void undefCns(const StringData* nm) {
4583   VMRegAnchor _;
4584   TypedValue *cns = g_vmContext->getCns(const_cast<StringData*>(nm));
4585   if (!cns) {
4586     raise_notice(Strings::UNDEFINED_CONSTANT, nm->data(), nm->data());
4587     g_vmContext->getStack().pushStringNoRc(const_cast<StringData*>(nm));
4588   } else {
4589     Cell* c1 = g_vmContext->getStack().allocC();
4590     TV_READ_CELL(cns, c1);
4591   }
4592 }
4593
4594 void TranslatorX64::emitSideExit(Asm& a, const NormalizedInstruction& i,
4595                                  bool next) {
4596   const NormalizedInstruction& dest = next ? *i.next : i;
4597
4598   SKTRACE(3, i.source, "sideexit check %p\n", a.code.frontier);
4599   // NB: if next == true, we are assuming here that stack elements
4600   // spit out by this instruction are already clean and sync'd back to
4601   // the top slot of the stack.
4602   m_regMap.scrubStackEntries(dest.stackOff);
4603   m_regMap.cleanAll();
4604   emitRB(a, RBTypeSideExit, i.source);
4605   int stackDisp = dest.stackOff;
4606   if (stackDisp != 0) {
4607     SKTRACE(3, i.source, "stack bump %d => %x\n", stackDisp,
4608             -cellsToBytes(stackDisp));
4609     a.   add_imm32_reg64(-cellsToBytes(stackDisp), rVmSp);
4610   }
4611   emitBindJmp(a, dest.source, REQ_BIND_SIDE_EXIT);
4612 }
4613
4614 void
4615 TranslatorX64::translateCns(const Tracelet& t,
4616                             const NormalizedInstruction& i) {
4617   ASSERT(i.inputs.size() == 0);
4618   ASSERT(i.outStack && !i.outLocal);
4619
4620   // OK to burn "name" into TC: it was merged into the static string
4621   // table, so as long as this code is reachable, so shoud the string
4622   // be.
4623   DataType outType = i.outStack->valueType();
4624   StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4625   const TypedValue* tv = g_vmContext->getCns(name, true, false);
4626   bool checkDefined = false;
4627   if (outType != KindOfInvalid && tv == NULL &&
4628       !RuntimeOption::RepoAuthoritative) {
4629     PreConstDepMap::accessor acc;
4630     tv = findUniquePreConst(acc, name);
4631     if (tv != NULL) {
4632       checkDefined = true;
4633       acc->second.srcKeys.insert(t.m_sk);
4634       Stats::emitInc(a, Stats::Tx64_CnsFast);
4635     } else {
4636       // We had a unique value while analyzing but don't anymore. This
4637       // should be rare so just punt to keep things simple.
4638       punt();
4639     }
4640   }
4641   using namespace TargetCache;
4642   if (tv && tvIsStatic(tv)) {
4643     m_regMap.allocOutputRegs(i);
4644     if (checkDefined) {
4645       size_t bit = allocCnsBit(name);
4646       uint32 mask;
4647       CacheHandle ch = bitOffToHandleAndMask(bit, mask);
4648       // The 'test' instruction takes a signed immediate and the mask is
4649       // unsigned, but everything works out okay because the immediate is
4650       // the same size as the other operand. However, we have to sign-extend
4651       // the mask to 64 bits to make the assembler happy.
4652       int64_t imm = (int64_t)(int32)mask;
4653       a.test_imm32_disp_reg32(imm, ch, rVmTl);
4654       {
4655         // If we get to the optimistic translation and the constant
4656         // isn't defined, our tracelet is ruined because the type may
4657         // not be what we expect. If we were expecting KindOfString we
4658         // could theoretically keep going here since that's the type
4659         // of an undefined constant expression, but it should be rare
4660         // enough that it's not worth the complexity.
4661         UnlikelyIfBlock<CC_Z> ifZero(a, astubs);
4662         Stats::emitInc(astubs, Stats::Tx64_CnsFast, -1);
4663         emitSideExit(astubs, i, false);
4664       }
4665     }
4666     // Its type and value are known at compile-time.
4667     ASSERT(tv->m_type == outType ||
4668            (IS_STRING_TYPE(tv->m_type) && IS_STRING_TYPE(outType)));
4669     PhysReg r = getReg(i.outStack->location);
4670     a.   mov_imm64_reg(tv->m_data.num, r);
4671     // tv is static; no need to incref
4672     return;
4673   }
4674
4675   Stats::emitInc(a, Stats::Tx64_CnsSlow);
4676   CacheHandle ch = allocConstant(name);
4677   TRACE(2, "Cns: %s -> ch %ld\n", name->data(), ch);
4678   // Load the constant out of the thread-private tl_targetCaches.
4679   ScratchReg cns(m_regMap);
4680   a.    lea_reg64_disp_reg64(rVmTl, ch, *cns);
4681   a.    cmp_imm32_disp_reg32(0, TVOFF(m_type), *cns);
4682   DiamondReturn astubsRet;
4683   int stackDest = 0 - int(sizeof(Cell)); // popped - pushed
4684   {
4685     // It's tempting to dedup these, but not obvious we really can;
4686     // at least stackDest and tmp are specific to the translation
4687     // context.
4688     UnlikelyIfBlock<CC_Z> ifb(a, astubs, &astubsRet);
4689     EMIT_CALL(astubs, undefCns, IMM((uintptr_t)name));
4690     recordReentrantStubCall(i);
4691     m_regMap.invalidate(i.outStack->location);
4692   }
4693
4694   // Bitwise copy to output area.
4695   emitCopyToStack(a, i, *cns, stackDest);
4696   m_regMap.invalidate(i.outStack->location);
4697 }
4698
4699 void
4700 TranslatorX64::analyzeDefCns(Tracelet& t,
4701                              NormalizedInstruction& i) {
4702   StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4703   /* don't bother to translate if it names a builtin constant */
4704   i.m_txFlags = supportedPlan(!g_vmContext->getCns(name, true, false));
4705 }
4706
4707 typedef void (*defCnsHelper_func_t)(TargetCache::CacheHandle ch, Variant *inout,
4708                                     StringData *name, size_t bit);
4709 template<bool setBit>
4710 static void defCnsHelper(TargetCache::CacheHandle ch, Variant *inout,
4711                          StringData *name, size_t bit) {
4712   using namespace TargetCache;
4713   TypedValue *tv = (TypedValue*)handleToPtr(ch);
4714   if (LIKELY(tv->m_type == KindOfUninit &&
4715              inout->isAllowedAsConstantValue())) {
4716     inout->setEvalScalar();
4717     if (LIKELY(g_vmContext->insertCns(name, (TypedValue*)inout))) {
4718       tvDup((TypedValue*)inout, tv);
4719       *inout = true;
4720       if (setBit) {
4721         DEBUG_ONLY bool alreadyDefined = testAndSetBit(bit);
4722         ASSERT(!alreadyDefined);
4723       }
4724       return;
4725     }
4726     tv = (TypedValue*)&false_varNR;
4727   }
4728
4729   if (tv->m_type != KindOfUninit) {
4730     raise_warning(Strings::CONSTANT_ALREADY_DEFINED, name->data());
4731   } else {
4732     ASSERT(!inout->isAllowedAsConstantValue());
4733     raise_warning(Strings::CONSTANTS_MUST_BE_SCALAR);
4734   }
4735   *inout = false;
4736 }
4737
4738 void
4739 TranslatorX64::translateDefCns(const Tracelet& t,
4740                                const NormalizedInstruction& i) {
4741   StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4742
4743   if (false) {
4744     TargetCache::CacheHandle ch = 0;
4745     size_t bit = 0;
4746     Variant *inout = 0;
4747     StringData *name = 0;
4748     defCnsHelper<true>(ch, inout, name, bit);
4749     defCnsHelper<false>(ch, inout, name, bit);
4750   }
4751
4752   using namespace TargetCache;
4753   CacheHandle ch = allocConstant(name);
4754   TRACE(2, "DefCns: %s -> ch %ld\n", name->data(), ch);
4755
4756   m_regMap.cleanLoc(i.inputs[0]->location);
4757   if (RuntimeOption::RepoAuthoritative) {
4758     EMIT_CALL(a, (defCnsHelper_func_t)defCnsHelper<false>,
4759                IMM(ch), A(i.inputs[0]->location),
4760                IMM((uint64)name));
4761   } else {
4762     EMIT_CALL(a, (defCnsHelper_func_t)defCnsHelper<true>,
4763                IMM(ch), A(i.inputs[0]->location),
4764                IMM((uint64)name), IMM(allocCnsBit(name)));
4765   }
4766   recordReentrantCall(i);
4767   m_regMap.invalidate(i.outStack->location);
4768 }
4769
4770 void
4771 TranslatorX64::translateClsCnsD(const Tracelet& t,
4772                                 const NormalizedInstruction& i) {
4773   using namespace TargetCache;
4774   const NamedEntityPair& namedEntityPair =
4775     curUnit()->lookupNamedEntityPairId(i.imm[1].u_SA);
4776   ASSERT(namedEntityPair.second);
4777   const StringData *clsName = namedEntityPair.first;
4778   ASSERT(clsName->isStatic());
4779   StringData* cnsName = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4780   ASSERT(cnsName->isStatic());
4781   StringData* fullName = StringData::GetStaticString(
4782     Util::toLower(clsName->data()) + "::" + cnsName->data());
4783
4784   Stats::emitInc(a, Stats::TgtCache_ClsCnsHit);
4785   CacheHandle ch = allocClassConstant(fullName);
4786   ScratchReg cns(m_regMap);
4787   a.lea_reg64_disp_reg64(rVmTl, ch, *cns);
4788   a.cmp_imm32_disp_reg32(0, TVOFF(m_type), *cns);
4789   {
4790     UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
4791
4792     if (false) { // typecheck
4793       TypedValue* tv = NULL;
4794       UNUSED TypedValue* ret =
4795         TargetCache::lookupClassConstant(tv, namedEntityPair.second,
4796                                          namedEntityPair.first, cnsName);
4797     }
4798
4799     EMIT_CALL(astubs, TCA(TargetCache::lookupClassConstant),
4800               R(*cns),
4801               IMM(uintptr_t(namedEntityPair.second)),
4802               IMM(uintptr_t(namedEntityPair.first)),
4803               IMM(uintptr_t(cnsName)));
4804     recordReentrantStubCall(i);
4805     // DiamondGuard will restore cns's SCRATCH state but not its
4806     // contents. lookupClassConstant returns the value we want.
4807     emitMovRegReg(astubs, rax, *cns);
4808   }
4809   int stackDest = 0 - int(sizeof(Cell)); // 0 popped - 1 pushed
4810   emitCopyToStack(a, i, *cns, stackDest);
4811 }
4812
4813 void
4814 TranslatorX64::analyzeConcat(Tracelet& t, NormalizedInstruction& i) {
4815   ASSERT(i.inputs.size() == 2);
4816   const RuntimeType& r = i.inputs[0]->rtt;
4817   const RuntimeType& l = i.inputs[1]->rtt;
4818   // The concat translation isn't reentrant; objects that override
4819   // __toString() can cause reentry.
4820   i.m_txFlags = simplePlan(r.valueType() != KindOfObject &&
4821                            l.valueType() != KindOfObject);
4822 }
4823
4824 void
4825 TranslatorX64::translateConcat(const Tracelet& t,
4826                                const NormalizedInstruction& i) {
4827   ASSERT(i.inputs.size() == 2);
4828   const DynLocation& r = *i.inputs[0];
4829   const DynLocation& l = *i.inputs[1];
4830   // We have specialized helpers for concatenating two strings, a
4831   // string and an int, and an int an a string.
4832   void* fptr = NULL;
4833   if (l.rtt.isString() && r.rtt.isString()) {
4834     fptr = (void*)concat_ss;
4835   } else if (l.rtt.isString() && r.rtt.isInt()) {
4836     fptr = (void*)concat_si;
4837   } else if (l.rtt.isInt() && r.rtt.isString()) {
4838     fptr = (void*)concat_is;
4839   }
4840   if (fptr) {
4841     // If we have a specialized helper, use it
4842     if (false) { // type check
4843       StringData* v1 = NULL;
4844       StringData* v2 = NULL;
4845       StringData* retval = concat_ss(v1, v2);
4846       printf("%p", retval); // use retval
4847     }
4848
4849     // The concat helper will decRef the inputs and incRef the output
4850     // for us if appropriate
4851     EMIT_CALL(a, fptr,
4852                V(l.location),
4853                V(r.location));
4854     ASSERT(i.outStack->rtt.isString());
4855     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4856                   RegInfo::DIRTY);
4857
4858   } else {
4859     // Otherwise, use the generic concat helper
4860     if (false) { // type check
4861       uint64_t v1 = 0, v2 = 0;
4862       DataType t1 = KindOfUninit, t2 = KindOfUninit;
4863       StringData *retval = concat(t1, v1, t2, v2);
4864       printf("%p", retval); // use retval
4865     }
4866     // concat will decRef the two inputs and incRef the output
4867     // for us if appropriate
4868     EMIT_CALL(a, concat,
4869                IMM(l.valueType()), V(l.location),
4870                IMM(r.valueType()), V(r.location));
4871     ASSERT(i.outStack->isString());
4872     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4873                   RegInfo::DIRTY);
4874   }
4875 }
4876
4877 TXFlags
4878 planInstrAdd_Int(const NormalizedInstruction& i) {
4879   ASSERT(i.inputs.size() == 2);
4880   return nativePlan(i.inputs[0]->isInt() && i.inputs[1]->isInt());
4881 }
4882
4883 TXFlags
4884 planInstrAdd_Array(const NormalizedInstruction& i) {
4885   ASSERT(i.inputs.size() == 2);
4886   return supportedPlan(i.inputs[0]->valueType() == KindOfArray &&
4887                        i.inputs[1]->valueType() == KindOfArray);
4888 }
4889
4890 void
4891 TranslatorX64::analyzeAdd(Tracelet& t, NormalizedInstruction& i) {
4892   i.m_txFlags = TXFlags(planInstrAdd_Int(i) | planInstrAdd_Array(i));
4893 }
4894
4895 void
4896 TranslatorX64::translateAdd(const Tracelet& t,
4897                             const NormalizedInstruction& i) {
4898   ASSERT(i.inputs.size() == 2);
4899
4900   if (planInstrAdd_Array(i)) {
4901     // Handle adding two arrays
4902     ASSERT(i.outStack->outerType() == KindOfArray);
4903     if (false) { // type check
4904       ArrayData* v = NULL;
4905       v = array_add(v, v);
4906     }
4907     // The array_add helper will decRef the inputs and incRef the output
4908     // for us if appropriate
4909     EMIT_CALL(a, array_add,
4910                V(i.inputs[1]->location),
4911                V(i.inputs[0]->location));
4912     recordReentrantCall(i);
4913     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4914                   RegInfo::DIRTY);
4915     return;
4916   }
4917
4918   ASSERT(planInstrAdd_Int(i));
4919   binaryArithCell(i, OpAdd, *i.inputs[0], *i.outStack);
4920 }
4921
4922 void
4923 TranslatorX64::analyzeXor(Tracelet& t, NormalizedInstruction& i) {
4924   i.m_txFlags = nativePlan((i.inputs[0]->outerType() == KindOfBoolean ||
4925                             i.inputs[0]->isInt()) &&
4926                            (i.inputs[1]->outerType() == KindOfBoolean ||
4927                             i.inputs[1]->isInt()));
4928 }
4929
4930 static inline void
4931 emitIntToCCBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch,
4932                 int CC) {
4933   /*
4934    *    test    %srcdest, %srcdest
4935    *    set<CC> %scratchL
4936    *    movzbq  %scratchL, %srcdest
4937    */
4938   a.   test_reg64_reg64(srcdest, srcdest);
4939   a.   setcc           (CC, scratch);
4940   a.   mov_reg8_reg64_unsigned(scratch, srcdest);
4941 }
4942
4943 static inline void
4944 emitIntToBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch) {
4945   emitIntToCCBool(a, srcdest, scratch, CC_NZ);
4946 }
4947
4948 static inline void
4949 emitIntToNegBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch) {
4950   emitIntToCCBool(a, srcdest, scratch, CC_Z);
4951 }
4952
4953 void
4954 TranslatorX64::translateXor(const Tracelet& t,
4955                             const NormalizedInstruction& i) {
4956   PhysReg src, srcdest;
4957   getBinaryStackInputs(m_regMap, i, src, srcdest);
4958   m_regMap.allocOutputRegs(i);
4959   ScratchReg scr(m_regMap);
4960   if (i.inputs[0]->isInt()) {
4961     emitIntToBool(a, src, *scr);
4962   }
4963   if (i.inputs[1]->isInt()) {
4964     emitIntToBool(a, srcdest, *scr);
4965   }
4966   a.    xor_reg64_reg64(src, srcdest);
4967 }
4968
4969 void
4970 TranslatorX64::analyzeNot(Tracelet& t, NormalizedInstruction& i) {
4971   ASSERT(i.inputs.size() == 1);
4972   i.m_txFlags = nativePlan(i.inputs[0]->isInt() ||
4973                            i.inputs[0]->outerType() == KindOfBoolean);
4974 }
4975
4976 void
4977 TranslatorX64::translateNot(const Tracelet& t,
4978                             const NormalizedInstruction& i) {
4979   ASSERT(i.isNative());
4980   ASSERT(i.outStack && !i.outLocal);
4981   ASSERT(!i.inputs[0]->isVariant());
4982   m_regMap.allocOutputRegs(i);
4983   PhysReg srcdest = m_regMap.getReg(i.outStack->location);
4984   ScratchReg scr(m_regMap);
4985   emitIntToNegBool(a, srcdest, *scr);
4986 }
4987
4988 void
4989 TranslatorX64::analyzeBitNot(Tracelet& t, NormalizedInstruction& i) {
4990   i.m_txFlags = nativePlan(i.inputs[0]->isInt());
4991 }
4992
4993 void
4994 TranslatorX64::translateBitNot(const Tracelet& t,
4995                                     const NormalizedInstruction& i) {
4996   ASSERT(i.outStack && !i.outLocal);
4997   m_regMap.allocOutputRegs(i);
4998   PhysReg srcdest = m_regMap.getReg(i.outStack->location);
4999   a.   not_reg64(srcdest);
5000 }
5001
5002 void
5003 TranslatorX64::analyzeCastInt(Tracelet& t, NormalizedInstruction& i) {
5004   i.m_txFlags = nativePlan(i.inputs[0]->isInt());
5005 }
5006
5007 void
5008 TranslatorX64::translateCastInt(const Tracelet& t,
5009                                 const NormalizedInstruction& i) {
5010   ASSERT(i.inputs.size() == 1);
5011   ASSERT(i.outStack && !i.outLocal);
5012
5013   /* nop */
5014 }
5015
5016 void
5017 TranslatorX64::analyzeCastString(Tracelet& t, NormalizedInstruction& i) {
5018   i.m_txFlags =
5019     i.inputs[0]->isArray() || i.inputs[0]->isObject() ? Supported :
5020     i.inputs[0]->isInt() ? Simple :
5021     Native;
5022   i.funcd = NULL;
5023 }
5024
5025 static void toStringError(StringData *cls) {
5026   raise_error("Method __toString() must return a string value");
5027 }
5028
5029 static const StringData* stringDataFromInt(int64 n) {
5030   StringData* s = buildStringData(n);
5031   s->incRefCount();
5032   return s;
5033 }
5034
5035 static const StringData* stringDataFromDouble(int64 n) {
5036   StringData* s = buildStringData(*(double*)&n);
5037   s->incRefCount();
5038   return s;
5039 }
5040
5041 void TranslatorX64::toStringHelper(ObjectData *obj) {
5042   // caller must set r15 to the new ActRec
5043   static_assert(rStashedAR == r15 &&
5044                 rVmFp == rbp,
5045                 "toStringHelper needs to be updated for ABI changes");
5046   register ActRec *ar asm("r15");
5047   register ActRec *rbp asm("rbp");
5048
5049   const Class* cls = obj->getVMClass();
5050   const Func* toString = cls->getToString();
5051   if (!toString) {
5052     // the unwinder will restore rVmSp to
5053     // &ar->m_r, so we'd better make sure its
5054     // got a valid TypedValue there.
5055     TV_WRITE_UNINIT(&ar->m_r);
5056     std::string msg = cls->preClass()->name()->data();
5057     msg += "::__toString() was not defined";
5058     throw BadTypeConversionException(msg.c_str());
5059   }
5060   // ar->m_savedRbp set by caller
5061   ar->m_savedRip = rbp->m_savedRip;
5062   ar->m_func = toString;
5063   // ar->m_soff set by caller
5064   ar->initNumArgs(0);
5065   ar->setThis(obj);
5066   ar->setVarEnv(0);
5067   // Point the return address of this C++ function at the prolog to
5068   // execute.
5069   rbp->m_savedRip = (uint64_t)toString->getPrologue(0);
5070 }
5071
5072 void
5073 TranslatorX64::translateCastString(const Tracelet& t,
5074                                    const NormalizedInstruction& i) {
5075   ASSERT(i.inputs.size() == 1);
5076   ASSERT(i.outStack && !i.outLocal);
5077
5078   if (i.inputs[0]->isNull()) {
5079     m_regMap.allocOutputRegs(i);
5080     PhysReg dest = m_regMap.getReg(i.outStack->location);
5081     a.   mov_imm64_reg((uint64)empty_string.get(), dest);
5082   } else if (i.inputs[0]->isBoolean()) {
5083     static StringData* s_1 = StringData::GetStaticString("1");
5084     m_regMap.allocOutputRegs(i);
5085     PhysReg dest = m_regMap.getReg(i.outStack->location);
5086     a.   cmp_imm32_reg64(0, dest);
5087     a.   mov_imm64_reg((uint64)empty_string.get(), dest);
5088     ScratchReg scratch(m_regMap);
5089     a.   mov_imm64_reg((intptr_t)s_1, *scratch);
5090     a.   cmov_reg64_reg64(CC_NZ, *scratch, dest);
5091   } else if (i.inputs[0]->isInt()) {
5092     EMIT_CALL(a, stringDataFromInt, V(i.inputs[0]->location));
5093     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5094                   RegInfo::DIRTY);
5095   } else if (i.inputs[0]->isDouble()) {
5096     EMIT_CALL(a, stringDataFromDouble, V(i.inputs[0]->location));
5097     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5098                   RegInfo::DIRTY);
5099   } else if (i.inputs[0]->isString()) {
5100     // nop
5101   } else if (i.inputs[0]->isArray()) {
5102     static StringData* s_array = StringData::GetStaticString("Array");
5103     m_regMap.allocOutputRegs(i);
5104     PhysReg dest = m_regMap.getReg(i.outStack->location);
5105     emitDecRef(i, dest, KindOfArray);
5106     a.   mov_imm64_reg((uint64)s_array, dest);
5107   } else if (i.inputs[0]->isObject()) {
5108     m_regMap.scrubStackEntries(i.stackOff - 1);
5109     m_regMap.cleanAll();
5110     int delta = i.stackOff + kNumActRecCells - 1;
5111     if (delta) {
5112       a. add_imm64_reg64(-cellsToBytes(delta), rVmSp);
5113     }
5114     a.   store_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmSp);
5115     a.   store_imm32_disp_reg(nextSrcKey(t, i).offset() - curFunc()->base(),
5116                               AROFF(m_soff), rVmSp);
5117     PhysReg obj = m_regMap.getReg(i.inputs[0]->location);
5118     if (obj != argNumToRegName[0]) {
5119       a. mov_reg64_reg64(obj,  argNumToRegName[0]);
5120     }
5121     m_regMap.smashRegs(kAllRegs);
5122     a.   mov_reg64_reg64(rVmSp, rStashedAR);
5123     EMIT_CALL(a, TCA(toStringHelper));
5124     recordReentrantCall(i);
5125     if (i.stackOff != 0) {
5126       a. add_imm64_reg64(cellsToBytes(i.stackOff), rVmSp);
5127     }
5128
5129     PhysReg base;
5130     int disp;
5131     locToRegDisp(i.outStack->location, &base, &disp);
5132     ScratchReg scratch(m_regMap);
5133     emitStringCheck(a, base, disp + TVOFF(m_type), *scratch);
5134     {
5135       UnlikelyIfBlock<CC_NZ> ifNotString(a, astubs);
5136       EMIT_CALL(astubs, toStringError, IMM(0));
5137       recordReentrantStubCall(i);
5138     }
5139   } else {
5140     NOT_REACHED();
5141   }
5142 }
5143
5144 void
5145 TranslatorX64::analyzePrint(Tracelet& t, NormalizedInstruction& i) {
5146   ASSERT(i.inputs.size() == 1);
5147   const RuntimeType& rtt = i.inputs[0]->rtt;
5148   DataType type = rtt.outerType();
5149   i.m_txFlags = simplePlan(
5150     type == KindOfUninit ||
5151     type == KindOfNull ||
5152     type == KindOfBoolean ||
5153     rtt.isInt() ||
5154     rtt.isString());
5155 }
5156
5157 void
5158 TranslatorX64::translatePrint(const Tracelet& t,
5159                               const NormalizedInstruction& i) {
5160   const vector<DynLocation*>& inputs  = i.inputs;
5161   ASSERT(inputs.size()   == 1);
5162   ASSERT(!i.outLocal);
5163   ASSERT(!i.outStack || i.outStack->isInt());
5164   Location  loc = inputs[0]->location;
5165   DataType type = inputs[0]->outerType();
5166   switch (type) {
5167     STRINGCASE():       EMIT_CALL(a, print_string,  V(loc)); break;
5168     case KindOfInt64:   EMIT_CALL(a, print_int,     V(loc)); break;
5169     case KindOfBoolean: EMIT_CALL(a, print_boolean, V(loc)); break;
5170     NULLCASE():         /* do nothing */                   break;
5171     default: {
5172       // Translation is only supported for Null, Boolean, Int, and String
5173       ASSERT(false);
5174       break;
5175     }
5176   }
5177   m_regMap.allocOutputRegs(i);
5178   if (i.outStack) {
5179     PhysReg outReg = getReg(i.outStack->location);
5180     emitImmReg(a, 1, outReg);
5181   }
5182 }
5183
5184 void
5185 TranslatorX64::translateJmp(const Tracelet& t,
5186                             const NormalizedInstruction& i) {
5187   ASSERT(!i.outStack && !i.outLocal);
5188   syncOutputs(t);
5189
5190   // Check the surprise page on all backwards jumps
5191   if (i.imm[0].u_BA < 0) {
5192     if (trustSigSegv) {
5193       const uint64_t stackMask =
5194         ~(cellsToBytes(RuntimeOption::EvalVMStackElms) - 1);
5195       a.mov_reg64_reg64(rVmSp, rScratch);
5196       a.and_imm64_reg64(stackMask, rScratch);
5197       TCA surpriseLoad = a.code.frontier;
5198       a.load_reg64_disp_reg64(rScratch, 0, rScratch);
5199
5200       if (!m_segvStubs.insert(SignalStubMap::value_type(surpriseLoad,
5201                                                         astubs.code.frontier)))
5202         NOT_REACHED();
5203       /*
5204        * Note that it is safe not to register unwind information here,
5205        * because we just called syncOutputs so all registers are
5206        * already clean.
5207        */
5208       astubs.call((TCA)&EventHook::CheckSurprise);
5209       recordStubCall(i);
5210       astubs.jmp(a.code.frontier);
5211     } else {
5212       emitTestSurpriseFlags();
5213       {
5214         UnlikelyIfBlock<CC_NZ> ifSurprise(a, astubs);
5215         astubs.call((TCA)&EventHook::CheckSurprise);
5216         recordStubCall(i);
5217       }
5218     }
5219   }
5220   SrcKey sk(curFunc(), i.offset() + i.imm[0].u_BA);
5221   emitBindJmp(sk);
5222 }
5223
5224 void
5225 TranslatorX64::analyzeSwitch(Tracelet& t,
5226                              NormalizedInstruction& i) {
5227   RuntimeType& rtt = i.inputs[0]->rtt;
5228   ASSERT(rtt.outerType() != KindOfRef);
5229   switch (rtt.outerType()) {
5230     NULLCASE():
5231     case KindOfBoolean:
5232     case KindOfInt64:
5233       i.m_txFlags = Native;
5234       break;
5235
5236     case KindOfDouble:
5237       i.m_txFlags = Simple;
5238       break;
5239
5240     STRINGCASE():
5241     case KindOfObject:
5242     case KindOfArray:
5243       i.m_txFlags = Supported;
5244       break;
5245
5246     default:
5247       not_reached();
5248   }
5249 }
5250
5251 template <typename T>
5252 static int64 switchBoundsCheck(T v, int64 base, int64 nTargets) {
5253   // I'm relying on gcc to be smart enough to optimize away the next
5254   // two lines when T is int64.
5255   if (int64(v) == v) {
5256     int64 ival = v;
5257     if (ival >= base && ival < (base + nTargets)) {
5258       return ival - base;
5259     }
5260   }
5261   return nTargets + 1;
5262 }
5263
5264 static int64 switchDoubleHelper(int64 val, int64 base, int64 nTargets) {
5265   union {
5266     int64 intbits;
5267     double dblval;
5268   } u;
5269   u.intbits = val;
5270   return switchBoundsCheck(u.dblval, base, nTargets);
5271 }
5272
5273 static int64 switchStringHelper(StringData* s, int64 base, int64 nTargets) {
5274   int64 ival;
5275   double dval;
5276   switch (s->isNumericWithVal(ival, dval, 1)) {
5277     case KindOfNull:
5278       ival = switchBoundsCheck(0, base, nTargets);
5279       break;
5280
5281     case KindOfDouble:
5282       ival = switchBoundsCheck(dval, base, nTargets);
5283       break;
5284
5285     case KindOfInt64:
5286       ival = switchBoundsCheck(ival, base, nTargets);
5287       break;
5288
5289     default:
5290       not_reached();
5291   }
5292   if (s->decRefCount() == 0) {
5293     s->release();
5294   }
5295   return ival;
5296 }
5297
5298 static int64 switchObjHelper(ObjectData* o, int64 base, int64 nTargets) {
5299   int64 ival = o->o_toInt64();
5300   if (o->decRefCount() == 0) {
5301     o->release();
5302   }
5303   return switchBoundsCheck(ival, base, nTargets);
5304 }
5305
5306 void
5307 TranslatorX64::translateSwitch(const Tracelet& t,
5308                                const NormalizedInstruction& i) {
5309   int64 base = i.imm[1].u_I64A;
5310   bool bounded = i.imm[2].u_IVA;
5311   const ImmVector& iv = i.immVec;
5312   int nTargets = bounded ? iv.size() - 2 : iv.size();
5313   int jmptabSize = nTargets;
5314   ASSERT(nTargets > 0);
5315   PhysReg valReg = getReg(i.inputs[0]->location);
5316   DataType inType = i.inputs[0]->outerType();
5317   ASSERT(IMPLIES(inType != KindOfInt64, bounded));
5318   ASSERT(IMPLIES(bounded, iv.size() > 2));
5319   syncOutputs(t); // this will mark valReg as FREE but it still has
5320                   // its old value
5321
5322   SrcKey defaultSk(curFunc(), i.offset() + iv.vec32()[iv.size() - 1]);
5323   SrcKey zeroSk(curFunc(), 0);
5324   if (0 >= base && 0 < (base + nTargets)) {
5325     zeroSk.m_offset = i.offset() + iv.vec32()[0 - base];
5326   } else {
5327     zeroSk.m_offset = defaultSk.m_offset;
5328   }
5329
5330   switch (i.inputs[0]->outerType()) {
5331     NULLCASE(): {
5332       emitBindJmp(zeroSk);
5333       return;
5334     }
5335
5336     case KindOfBoolean: {
5337       SrcKey nonzeroSk(curFunc(), i.offset() + iv.vec32()[iv.size() - 2]);
5338       a.test_reg64_reg64(valReg, valReg);
5339       emitCondJmp(nonzeroSk, zeroSk, CC_NZ);
5340       return;
5341     }
5342
5343     case KindOfInt64:
5344       // No special treatment needed
5345       break;
5346
5347     case KindOfDouble:
5348     STRINGCASE():
5349     case KindOfObject: {
5350       // switch(Double|String|Obj)Helper do bounds-checking for us, so
5351       // we need to make sure the default case is in the jump table,
5352       // and don't emit our own bounds-checking code
5353       jmptabSize = iv.size();
5354       bounded = false;
5355       if (false) {
5356         StringData* s = NULL;
5357         ObjectData* o = NULL;
5358         switchDoubleHelper(0.0, 0, 0);
5359         switchStringHelper(s, 0, 0);
5360         switchObjHelper(o, 0, 0);
5361       }
5362       EMIT_CALL(a,
5363                  inType == KindOfDouble ? (TCA)switchDoubleHelper :
5364                  (IS_STRING_TYPE(inType) ? (TCA)switchStringHelper :
5365                   (TCA)switchObjHelper),
5366                  R(valReg), IMM(base), IMM(nTargets));
5367       recordCall(i);
5368       valReg = rax;
5369       break;
5370     }
5371
5372     case KindOfArray:
5373       emitDecRef(a, i, valReg, KindOfArray);
5374       emitBindJmp(defaultSk);
5375       return;
5376
5377     default:
5378       not_reached();
5379   }
5380
5381   if (bounded) {
5382     if (base) {
5383       a.sub_imm64_reg64(base, valReg);
5384     }
5385     a.cmp_imm64_reg64(nTargets, valReg);
5386     prepareForSmash(a, kJmpccLen);
5387     TCA defaultStub =
5388       emitServiceReq(REQ_BIND_JMPCC_SECOND, 3,
5389                      a.code.frontier, defaultSk.m_offset, CC_AE);
5390     // Unsigned comparison: check for < 0 and >= nTargets at the same time
5391     a.jae(defaultStub);
5392   }
5393
5394   TCA* jmptab = m_globalData.alloc<TCA>(sizeof(TCA), jmptabSize);
5395   TCA afterLea = a.code.frontier + kLeaRipLen;
5396   ptrdiff_t diff = (TCA)jmptab - afterLea;
5397   ASSERT(deltaFits(diff, sz::dword));
5398   a.lea_rip_disp_reg64(diff, rScratch);
5399   ASSERT(a.code.frontier == afterLea);
5400   a.jmp_reg64_index_displ(rScratch, valReg, 0);
5401
5402   for (int idx = 0; idx < jmptabSize; ++idx) {
5403     SrcKey sk(curFunc(), i.offset() + iv.vec32()[idx]);
5404     jmptab[idx] = emitServiceReq(false, REQ_BIND_ADDR, 2ull,
5405                                  &jmptab[idx], uint64_t(sk.offset()));
5406   }
5407 }
5408
5409 void
5410 TranslatorX64::analyzeRetC(Tracelet& t,
5411                            NormalizedInstruction& i) {
5412   i.manuallyAllocInputs = true;
5413   i.m_txFlags = Supported;
5414 }
5415
5416 void
5417 TranslatorX64::analyzeRetV(Tracelet& t,
5418                            NormalizedInstruction& i) {
5419   analyzeRetC(t, i);
5420 }
5421
5422 static TypedValue getGroupedRetTV(const NormalizedInstruction& i) {
5423   TypedValue tv;
5424   TV_WRITE_UNINIT(&tv);
5425   tv.m_data.num = 0; // to keep the compiler happy
5426   if (!i.grouped) return tv;
5427
5428   /*
5429    * We suppressed the write of the (literal) return value
5430    * to the stack. Figure out what it was.
5431    */
5432   NormalizedInstruction* prev = i.prev;
5433   ASSERT(!prev->outStack);
5434   switch (prev->op()) {
5435     case OpNull:
5436       tv.m_type = KindOfNull;
5437       break;
5438     case OpTrue:
5439     case OpFalse:
5440       tv.m_type = KindOfBoolean;
5441       tv.m_data.num = prev->op() == OpTrue;
5442       break;
5443     case OpInt:
5444       tv.m_type = KindOfInt64;
5445       tv.m_data.num = prev->imm[0].u_I64A;
5446       break;
5447     case OpDouble:
5448       tv.m_type = KindOfDouble;
5449       tv.m_data.dbl = prev->imm[0].u_DA;
5450       break;
5451     case OpString:
5452       tv.m_type = BitwiseKindOfString;
5453       tv.m_data.pstr = curUnit()->lookupLitstrId(prev->imm[0].u_SA);
5454       break;
5455     case OpArray:
5456       tv.m_type = KindOfArray;
5457       tv.m_data.parr = curUnit()->lookupArrayId(prev->imm[0].u_AA);
5458       break;
5459     default:
5460       not_reached();
5461   }
5462   return tv;
5463 }
5464
5465 // translateRetC --
5466 //
5467 //   Return to caller with the current activation record replaced with the
5468 //   top-of-stack return value. Call with outputs sync'ed, so the code
5469 //   we're emmitting runs "in between" basic blocks.
5470 void
5471 TranslatorX64::translateRetC(const Tracelet& t,
5472                              const NormalizedInstruction& i) {
5473   if (i.skipSync) ASSERT(i.grouped);
5474   if (i.grouped)  ASSERT(freeLocalsInline());
5475
5476   /*
5477    * This method chooses one of two ways to generate machine code for RetC
5478    * depending on whether we are generating a specialized return (where we
5479    * free the locals inline when possible) or a generic return (where we call
5480    * a helper function to free locals).
5481    *
5482    * For the specialized return, we emit the following flow:
5483    *
5484    *   Check if varenv is NULL
5485    *   If it's not NULL, branch to label 2
5486    *   Free each local variable
5487    * 1:
5488    *   Teleport the return value to appropriate memory location
5489    *   Restore the old values for rVmFp and rVmSp, and
5490    *   unconditionally transfer control back to the caller
5491    * 2:
5492    *   Call the frame_free_locals helper
5493    *   Jump to label 1
5494    *
5495    * For a generic return, we emit the following flow:
5496    *
5497    *   Call the frame_free_locals helper
5498    *   Teleport the return value to appropriate memory location
5499    *   Restore the old values for rVmFp and rVmSp, and
5500    *   unconditionally transfer control back to the caller
5501    */
5502
5503   int stackAdjustment = t.m_stackChange;
5504   if (i.skipSync) {
5505     SKTRACE(2, i.source, "i.skipSync\n");
5506
5507     /*
5508      * getting here means there was nothing to do between
5509      * a previous reqXXX and this ret. Any spill code we generate
5510      * here would be broken (because the rbx is wrong), so
5511      * verify that we don't generate anything...
5512      */
5513     TCA s DEBUG_ONLY = a.code.frontier;
5514     syncOutputs(0);
5515     ASSERT(s == a.code.frontier);
5516     stackAdjustment = 0;
5517   } else {
5518     /*
5519      * no need to syncOutputs here... we're going to update
5520      * rbx at the end of this function anyway, and we may want
5521      * to use enregistered locals on the fast path below
5522      */
5523     m_regMap.scrubStackEntries(t.m_stackChange);
5524     m_regMap.cleanAll(); // TODO(#1339331): don't.
5525   }
5526
5527   bool noThis = !curFunc()->isPseudoMain() &&
5528                 (!curFunc()->isMethod() || curFunc()->isStatic());
5529   bool mayUseVV = (curFunc()->attrs() & AttrMayUseVV);
5530
5531   const TypedValue groupedRetTV = getGroupedRetTV(i);
5532
5533   /*
5534    * figure out where to put the return value, and where to get it from
5535    */
5536   ASSERT(i.stackOff == t.m_stackChange);
5537   const Location retValSrcLoc(Location::Stack, stackAdjustment - 1);
5538
5539   const Func *callee = curFunc();
5540   ASSERT(callee);
5541   int nLocalCells =
5542     callee == NULL ? 0 : // This happens for returns from pseudo-main.
5543     callee->numSlotsInFrame();
5544   int retvalSrcBase = cellsToBytes(-stackAdjustment);
5545
5546   ASSERT(cellsToBytes(locPhysicalOffset(retValSrcLoc)) == retvalSrcBase);
5547
5548   /*
5549    * The (1 + nLocalCells) skips 1 slot for the return value.
5550    */
5551   int retvalDestDisp = cellsToBytes(1 + nLocalCells - stackAdjustment) +
5552     AROFF(m_r);
5553
5554   if (freeLocalsInline()) {
5555     SKTRACE(2, i.source, "emitting specialized inline return\n");
5556
5557     // Emit specialized code inline to clean up the locals
5558     ASSERT(curFunc()->numLocals() == (int)i.inputs.size());
5559
5560     ScratchReg rTmp(m_regMap);
5561
5562     /*
5563      * If this function can possibly use variadic arguments or shared
5564      * variable environment, we need to check for it and go to a
5565      * generic return if so.
5566      */
5567     boost::scoped_ptr<DiamondReturn> mayUseVVRet;
5568     if (mayUseVV) {
5569       SKTRACE(2, i.source, "emitting mayUseVV in UnlikelyIf\n");
5570
5571       mayUseVVRet.reset(new DiamondReturn);
5572       a.    load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rTmp);
5573       a.    test_reg64_reg64(*rTmp, *rTmp);
5574       {
5575         UnlikelyIfBlock<CC_NZ> varEnvCheck(a, astubs, mayUseVVRet.get());
5576
5577         m_regMap.cleanAll();
5578         if (i.grouped) {
5579           emitStoreImm(astubs, groupedRetTV.m_type,
5580                        rVmSp, retvalSrcBase + TVOFF(m_type), sz::dword);
5581           if (groupedRetTV.m_type != KindOfNull) {
5582             emitStoreImm(astubs, groupedRetTV.m_data.num,
5583                          rVmSp, retvalSrcBase, sz::qword);
5584           }
5585         }
5586         emitFrameRelease(astubs, i, noThis);
5587       }
5588     }
5589
5590     for (unsigned int k = 0; k < i.inputs.size(); ++k) {
5591       // RetC's inputs should all be locals
5592       ASSERT(i.inputs[k]->location.space == Location::Local);
5593       DataType t = i.inputs[k]->outerType();
5594       if (IS_REFCOUNTED_TYPE(t)) {
5595         PhysReg reg = m_regMap.allocReg(i.inputs[k]->location, t,
5596                                         RegInfo::CLEAN);
5597         emitDecRef(i, reg, t);
5598       }
5599     }
5600
5601     // If this is a instance method called on an object or if it is a
5602     // pseudomain, we need to decRef $this (if there is one)
5603     if (curFunc()->isMethod() && !curFunc()->isStatic()) {
5604       // This assert is weaker than it looks; it only checks the invocation
5605       // we happen to be translating for. The runtime "assert" is the
5606       // unconditional dereference of m_this we emit; if the frame has
5607       // neither this nor a class, then m_this will be null and we'll
5608       // SEGV.
5609       ASSERT(curFrame()->hasThis() || curFrame()->hasClass());
5610       // m_this and m_cls share a slot in the ActRec, so we check the
5611       // lowest bit (0 -> m_this, 1 -> m_cls)
5612       a.      load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rTmp);
5613       if (i.guardedThis) {
5614         emitDecRef(i, *rTmp, KindOfObject);
5615       } else {
5616         a.      test_imm32_reg64(1, *rTmp);
5617         {
5618           JccBlock<CC_NZ> ifZero(a);
5619           emitDecRef(i, *rTmp, KindOfObject); // this. decref it.
5620         }
5621       }
5622     } else if (curFunc()->isPseudoMain()) {
5623       a.      load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rTmp);
5624       a.      shr_imm32_reg64(1, *rTmp); // sets c (from bit 0) and z
5625       FreezeRegs ice(m_regMap);
5626       {
5627         // tests for Not Zero and Not Carry
5628         UnlikelyIfBlock<CC_NBE> ifRealThis(a, astubs);
5629         astubs.    shl_imm32_reg64(1, *rTmp);
5630         emitDecRef(astubs, i, *rTmp, KindOfObject);
5631       }
5632     }
5633
5634     // Register map is officially out of commission now.
5635     m_regMap.scrubLoc(retValSrcLoc);
5636     m_regMap.smashRegs(kAllRegs);
5637
5638     emitTestSurpriseFlags();
5639     {
5640       UnlikelyIfBlock<CC_NZ> ifTracer(a, astubs);
5641       if (i.grouped) {
5642         emitStoreImm(astubs, groupedRetTV.m_type,
5643                      rVmSp, retvalSrcBase + TVOFF(m_type), sz::dword);
5644         if (groupedRetTV.m_type != KindOfNull) {
5645           emitStoreImm(astubs, groupedRetTV.m_data.num,
5646                        rVmSp, retvalSrcBase, sz::qword);
5647         }
5648       }
5649       astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5650       emitCall(astubs, (TCA)&EventHook::FunctionExit, true);
5651       recordReentrantStubCall(i);
5652     }
5653
5654     // The register map on the main line better be empty (everything
5655     // smashed) or some of the above DiamondReturns might generate
5656     // reconciliation code.
5657     ASSERT(m_regMap.empty());
5658   } else {
5659     SKTRACE(2, i.source, "emitting generic return\n");
5660
5661     if (i.grouped) {
5662       /*
5663        * What a pain: EventHook::onFunctionExit needs access
5664        * to the return value - so we'd better not have suppressed
5665        * writing it to the stack...
5666        */
5667       ASSERT(false);
5668       not_reached();
5669     }
5670     m_regMap.cleanAll();
5671     m_regMap.smashRegs(kAllRegs);
5672     // If we are doing the generic return flow, we emit a call to
5673     // frame_free_locals here
5674     ASSERT(i.inputs.size() == 0);
5675     emitFrameRelease(a, i, noThis);
5676   }
5677
5678   /*
5679    * We're officially between tracelets now, and the normal register
5680    * allocator is not being used.
5681    */
5682   ASSERT(m_regMap.empty());
5683   RegSet scratchRegs = kScratchCrossTraceRegs;
5684   DumbScratchReg rRetAddr(scratchRegs);
5685
5686   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_savedRip), *rRetAddr);
5687   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
5688
5689   /*
5690    * Having gotten everything we care about out of the current frame
5691    * pointer, smash the return address type and value over it. We don't
5692    * care about reference counts: as long as this runs to completion, we're
5693    * refcount-neutral.
5694    */
5695   if (i.grouped) {
5696     emitStoreImm(a, groupedRetTV.m_type,
5697                  rVmSp, retvalDestDisp + TVOFF(m_type), sz::dword);
5698     if (groupedRetTV.m_type != KindOfNull) {
5699       emitStoreImm(a, groupedRetTV.m_data.num,
5700                    rVmSp, retvalDestDisp, sz::qword);
5701     }
5702   } else {
5703     ASSERT(sizeof(Cell) == 16);
5704     a.   load_reg64_disp_reg64 (rVmSp,    retvalSrcBase,      rScratch);
5705     a.   store_reg64_disp_reg64(rScratch, retvalDestDisp,     rVmSp);
5706     a.   load_reg64_disp_reg64 (rVmSp,    retvalSrcBase + 8,  rScratch);
5707     a.   store_reg64_disp_reg64(rScratch, retvalDestDisp + 8, rVmSp);
5708   }
5709
5710   /*
5711    * Now update the principal hardware registers.
5712    *
5713    * Stack pointer has to skip over all the locals as well as the
5714    * activation record.
5715    */
5716   a.   add_imm64_reg64(sizeof(ActRec) +
5717                        cellsToBytes(nLocalCells - stackAdjustment), rVmSp);
5718   emitRB(a, RBTypeFuncExit, curFunc()->fullName()->data(), RegSet(*rRetAddr));
5719   a.   jmp_reg        (*rRetAddr);
5720   translator_not_reached(a);
5721 }
5722
5723 void
5724 TranslatorX64::translateRetV(const Tracelet& t,
5725                              const NormalizedInstruction& i) {
5726   translateRetC(t, i);
5727 }
5728
5729 /*
5730  * NativeImpl is a special operation in the sense that it must be the
5731  * only opcode in a function body, and also functions as the return.
5732  *
5733  * This function runs between tracelets and does not use m_regMap.
5734  */
5735 void TranslatorX64::emitNativeImpl(const Func* func,
5736                                    bool emitSavedRIPReturn) {
5737   BuiltinFunction builtinFuncPtr = func->builtinFuncPtr();
5738   if (false) { // typecheck
5739     ActRec* ar = NULL;
5740     builtinFuncPtr(ar);
5741   }
5742
5743   TRACE(2, "calling builtin preClass %p func %p\n", func->preClass(),
5744     builtinFuncPtr);
5745   /*
5746    * Call the native implementation. This will free the locals for us in the
5747    * normal case. In the case where an exception is thrown, the VM unwinder
5748    * will handle it for us.
5749    */
5750   a.   mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5751   emitCall(a, (TCA)builtinFuncPtr, false /* smash regs */);
5752
5753   /*
5754    * We're sometimes calling this while curFunc() isn't really the
5755    * builtin---make sure to properly record the sync point as if we
5756    * are inside the builtin.
5757    *
5758    * The assumption here is that for builtins, the generated func
5759    * contains only a single opcode (NativeImpl), and there are no
5760    * non-argument locals.
5761    */
5762   ASSERT(func->numIterators() == 0 && func->isBuiltin());
5763   ASSERT(func->numLocals() == func->numParams());
5764   ASSERT(*func->getEntry() == OpNativeImpl);
5765   ASSERT(instrLen(func->getEntry()) == func->past() - func->base());
5766   Offset pcOffset = 0;  // NativeImpl is the only instruction in the func
5767   Offset stackOff = func->numLocals(); // Builtin stubs have no
5768                                        // non-arg locals
5769   recordSyncPoint(a, pcOffset, stackOff);
5770
5771   RegSet unusedRegs = kScratchCrossTraceRegs;
5772   DumbScratchReg rRetAddr(unusedRegs);
5773
5774   RegSet saveDuringEmitRB;
5775   if (emitSavedRIPReturn) {
5776     // Get the return address from the ActRec
5777     a.   load_reg64_disp_reg64(rVmFp, AROFF(m_savedRip), *rRetAddr);
5778     saveDuringEmitRB |= RegSet(*rRetAddr);
5779   }
5780
5781   /*
5782    * The native implementation already put the return value on the
5783    * stack for us, and handled cleaning up the arguments.  We have to
5784    * update the frame pointer and the stack pointer, and load the
5785    * return value into the return register so the trace we are
5786    * returning to has it where it expects.
5787    *
5788    * TODO(#1273094): we should probably modify the actual builtins to
5789    * return values via registers (rax:edx) using the C ABI and do a
5790    * reg-to-reg move.
5791    */
5792   int nLocalCells = func->numSlotsInFrame();
5793   a.   add_imm64_reg64(sizeof(ActRec) + cellsToBytes(nLocalCells-1), rVmSp);
5794   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
5795
5796   emitRB(a, RBTypeFuncExit, func->fullName()->data(), saveDuringEmitRB);
5797   if (emitSavedRIPReturn) {
5798     a.   jmp_reg        (*rRetAddr);
5799     translator_not_reached(a);
5800   }
5801 }
5802
5803 void
5804 TranslatorX64::translateNativeImpl(const Tracelet& t,
5805                                    const NormalizedInstruction& ni) {
5806   /*
5807    * We assume that NativeImpl is the only instruction in the trace,
5808    * and the only instruction for the implementation of the function.
5809    */
5810   ASSERT(ni.stackOff == 0);
5811   ASSERT(m_regMap.empty());
5812   emitNativeImpl(curFunc(), true);
5813 }
5814
5815 // Warning: smashes rsi and rdi, and can't handle unclean registers.
5816 // Used between functions.
5817 void
5818 TranslatorX64::emitFrameRelease(X64Assembler& a,
5819                                 const NormalizedInstruction& i,
5820                                 bool noThis /*= false*/) {
5821   if (false) { // typecheck
5822     frame_free_locals(curFrame(), 0);
5823   }
5824   a.     mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5825   int numLocals = curFunc()->numLocals();
5826   emitImmReg(a, numLocals, argNumToRegName[1]);
5827   if (noThis) {
5828     emitCall(a, (TCA)frame_free_locals_no_this);
5829   } else {
5830     emitCall(a, (TCA)frame_free_locals);
5831   }
5832   recordReentrantCall(a, i);
5833 }
5834
5835 // emitClsLocalIndex --
5836 // emitStringToClass --
5837 // emitStringToKnownClass --
5838 // emitObjToClass --
5839 // emitClsAndPals --
5840 //   Helpers for AGetC/AGetL.
5841
5842 const int kEmitClsLocalIdx = 0;
5843
5844 /*
5845  * Determine if the class is defined, and fatal if not.
5846  * If reg is not noreg, return the Class* in it
5847  * If we can statically prove that the class is defined,
5848  * all checks are omitted (eg its a parent of the current,
5849  * fixed, context).
5850  */
5851 void
5852 TranslatorX64::emitKnownClassCheck(const NormalizedInstruction& i,
5853                                    const StringData* clsName,
5854                                    register_name_t reg) {
5855   using namespace TargetCache;
5856   ASSERT(clsName);
5857   Class* klass = Unit::lookupClass(clsName);
5858   bool guarded = false;
5859   if (klass) {
5860     guarded = i.guardedCls;
5861     if (!guarded && isContextFixed()) {
5862       Class *ctx = curFunc()->cls();
5863       if (ctx && ctx->classof(klass)) {
5864         guarded = true;
5865       }
5866     }
5867   }
5868   if (guarded) {
5869     if (reg != reg::noreg) {
5870       emitImmReg(a, (uint64_t)klass, reg);
5871     }
5872   } else {
5873     Stats::emitInc(a, Stats::TgtCache_KnownClsHit);
5874     CacheHandle ch = allocKnownClass(clsName);
5875     if (reg == reg::noreg) {
5876       a.          cmp_imm32_disp_reg32(0, ch, rVmTl);
5877     } else {
5878       a.          load_reg64_disp_reg64(rVmTl, ch, reg);
5879       a.          test_reg64_reg64(reg, reg);
5880     }
5881     {
5882       UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
5883       ScratchReg clsPtr(m_regMap);
5884       astubs.   lea_reg64_disp_reg64(rVmTl, ch, *clsPtr);
5885       if (false) { // typecheck
5886         Class** cache = NULL;
5887         UNUSED Class* ret =
5888           TargetCache::lookupKnownClass<false>(cache, clsName, true);
5889       }
5890       // We're only passing two arguments to lookupKnownClass because
5891       // the third is ignored in the checkOnly == false case
5892       EMIT_CALL(astubs, ((TargetCache::lookupKnownClass_func_t)
5893                          TargetCache::lookupKnownClass<false>),
5894                 R(*clsPtr), IMM((uintptr_t)clsName));
5895       recordReentrantStubCall(i);
5896       if (reg != reg::noreg) {
5897         emitMovRegReg(astubs, rax, reg);
5898       }
5899     }
5900   }
5901 }
5902
5903 void
5904 TranslatorX64::emitStringToKnownClass(const NormalizedInstruction& i,
5905                                       const StringData* clsName) {
5906   ScratchReg cls(m_regMap);
5907   emitKnownClassCheck(i, clsName, *cls);
5908   m_regMap.bindScratch(cls, i.outStack->location, KindOfClass, RegInfo::DIRTY);
5909 }
5910
5911 void
5912 TranslatorX64::emitStringToClass(const NormalizedInstruction& i) {
5913   using namespace TargetCache;
5914   if (!i.inputs[kEmitClsLocalIdx]->rtt.valueString()) {
5915     // Handle the case where we don't know the name of the class
5916     // at translation time
5917     const Location& in = i.inputs[kEmitClsLocalIdx]->location;
5918     const Location& out = i.outStack->location;
5919     CacheHandle ch = ClassCache::alloc();
5920     if (false) {
5921       StringData *name = NULL;
5922       const UNUSED Class* cls = ClassCache::lookup(ch, name);
5923     }
5924     TRACE(1, "ClassCache @ %d\n", int(ch));
5925     if (i.inputs[kEmitClsLocalIdx]->rtt.isVariant()) {
5926         EMIT_CALL(a, ClassCache::lookup,
5927                    IMM(ch),
5928                    DEREF(in));
5929     } else {
5930         EMIT_CALL(a, ClassCache::lookup,
5931                    IMM(ch),
5932                    V(in));
5933     }
5934     recordReentrantCall(i);
5935     m_regMap.bind(rax, out, KindOfClass, RegInfo::DIRTY);
5936     return;
5937   }
5938   // We know the name of the class at translation time; use the
5939   // target cache associated with the name of the class
5940   const StringData* clsName = i.inputs[kEmitClsLocalIdx]->rtt.valueString();
5941   emitStringToKnownClass(i, clsName);
5942 }
5943
5944 void
5945 TranslatorX64::emitObjToClass(const NormalizedInstruction& i) {
5946   m_regMap.allocOutputRegs(i);
5947   const Location& in = i.inputs[kEmitClsLocalIdx]->location;
5948   const Location& out = i.outStack->location;
5949   PhysReg src = getReg(in);
5950   ScratchReg tmp(m_regMap);
5951   if (i.inputs[kEmitClsLocalIdx]->rtt.isVariant()) {
5952     emitDeref(a, src, *tmp);
5953     src = *tmp;
5954   }
5955   ASSERT(i.outStack->valueType() == KindOfClass);
5956   a.   load_reg64_disp_reg64(src, ObjectData::getVMClassOffset(), getReg(out));
5957 }
5958
5959 void
5960 TranslatorX64::emitClsAndPals(const NormalizedInstruction& ni) {
5961   if (ni.inputs[kEmitClsLocalIdx]->isString()) {
5962     emitStringToClass(ni);
5963   } else {
5964     emitObjToClass(ni);
5965   }
5966 }
5967
5968 void
5969 TranslatorX64::analyzeAGetC(Tracelet& t, NormalizedInstruction& i) {
5970   ASSERT(i.inputs.size() == 1);
5971   ASSERT(i.outStack && !i.outLocal);
5972   ASSERT(i.outStack->valueType() == KindOfClass);
5973   const RuntimeType& rtt = i.inputs[0]->rtt;
5974   ASSERT(!rtt.isVariant());
5975   i.m_txFlags = supportedPlan(rtt.isString() ||
5976                               rtt.valueType() == KindOfObject);
5977   if (rtt.isString() && rtt.valueString()) i.manuallyAllocInputs = true;
5978 }
5979
5980 void TranslatorX64::translateAGetC(const Tracelet& t,
5981                                    const NormalizedInstruction& ni) {
5982   if (ni.outStack) {
5983     emitClsAndPals(ni);
5984   }
5985 }
5986
5987 void TranslatorX64::analyzeAGetL(Tracelet& t,
5988                                  NormalizedInstruction& ni) {
5989   ASSERT(ni.inputs.size() == 1);
5990   ASSERT(ni.inputs[0]->isLocal());
5991   const RuntimeType& rtt = ni.inputs[0]->rtt;
5992   ni.m_txFlags = supportedPlan(rtt.isString() ||
5993                                rtt.valueType() == KindOfObject);
5994 }
5995
5996 void TranslatorX64::translateAGetL(const Tracelet& t,
5997                                    const NormalizedInstruction& ni) {
5998   emitClsAndPals(ni);
5999 }
6000
6001 void TranslatorX64::translateSelf(const Tracelet& t,
6002                                   const NormalizedInstruction& i) {
6003   m_regMap.allocOutputRegs(i);
6004   PhysReg tmp = getReg(i.outStack->location);
6005   ASSERT(isContextFixed() && curFunc()->cls());
6006   emitImmReg(a, (int64_t)curFunc()->cls(), tmp);
6007 }
6008
6009 void TranslatorX64::translateParent(const Tracelet& t,
6010                                     const NormalizedInstruction& i) {
6011   m_regMap.allocOutputRegs(i);
6012   PhysReg tmp = getReg(i.outStack->location);
6013   ASSERT(isContextFixed() && curFunc()->cls() && curFunc()->cls()->parent());
6014   emitImmReg(a, (int64_t)curFunc()->cls()->parent(), tmp);
6015 }
6016
6017 void TranslatorX64::analyzeSelf(Tracelet& t,NormalizedInstruction& i) {
6018   Class* clss = curClass();
6019   if (clss == NULL) {
6020     i.m_txFlags = Interp;
6021     return;
6022   }
6023   i.m_txFlags = Supported;
6024 }
6025
6026 void TranslatorX64::analyzeParent(Tracelet& t,NormalizedInstruction& i) {
6027   Class* clss = curClass();
6028   if (clss == NULL) {
6029     i.m_txFlags = Interp;
6030     return;
6031   }
6032   if (clss->parent() == NULL) {
6033     // clss has no parent; interpret to throw fatal
6034     i.m_txFlags = Interp;
6035     return;
6036   }
6037   i.m_txFlags = Supported;
6038 }
6039
6040 void TranslatorX64::translateDup(const Tracelet& t,
6041                                  const NormalizedInstruction& ni) {
6042   ASSERT(ni.inputs.size() == 1);
6043   ASSERT(ni.outStack);
6044   ASSERT(!ni.inputs[0]->rtt.isVariant());
6045   m_regMap.allocOutputRegs(ni);
6046   PhysReg outR = getReg(ni.outStack->location);
6047   emitMovRegReg(a, getReg(ni.inputs[0]->location), outR);
6048   emitIncRef(outR, ni.inputs[0]->outerType());
6049 }
6050
6051 typedef std::map<int, int> ParamMap;
6052 /*
6053  * mapContParams determines if every named local in origFunc has a
6054  * corresponding named local in genFunc. If this step succeeds and
6055  * there's no VarEnv at runtime, the continuation's variables can be
6056  * filled completely inline in the TC (assuming there aren't too
6057  * many).
6058  */
6059 bool TranslatorX64::mapContParams(ParamMap& map,
6060                                   const Func* origFunc, const Func* genFunc) {
6061   const StringData* const* varNames = origFunc->localNames();
6062   for (Id i = 0; i < origFunc->numNamedLocals(); ++i) {
6063     Id id = genFunc->lookupVarId(varNames[i]);
6064     if (id != kInvalidId) {
6065       map[i] = id;
6066     } else {
6067       return false;
6068     }
6069   }
6070   return true;
6071 }
6072
6073 void TranslatorX64::emitCallFillCont(X64Assembler& a,
6074                                      const Func* orig,
6075                                      const Func* gen) {
6076   if (false) {
6077     ActRec* fp = NULL;
6078     c_Continuation *cont = NULL;
6079     cont =
6080       VMExecutionContext::fillContinuationVars(fp, orig, gen, cont);
6081   }
6082   EMIT_CALL(a,
6083              VMExecutionContext::fillContinuationVars,
6084              R(rVmFp),
6085              IMM((intptr_t)orig),
6086              IMM((intptr_t)gen),
6087              R(rax));
6088 }
6089
6090 void TranslatorX64::translateCreateCont(const Tracelet& t,
6091                                         const NormalizedInstruction& i) {
6092   bool getArgs = i.imm[0].u_IVA;
6093   const StringData* genName = curUnit()->lookupLitstrId(i.imm[1].u_SA);
6094   const Func* origFunc = curFunc();
6095   const Func* genFunc = origFunc->getGeneratorBody(genName);
6096
6097   if (false) {
6098     ActRec* fp = NULL;
6099     UNUSED c_Continuation* cont =
6100       VMExecutionContext::createContinuation<true>(fp, getArgs, origFunc,
6101                                                    genFunc);
6102     VMExecutionContext::createContinuation<false>(fp, getArgs, origFunc,
6103                                                  genFunc);
6104   }
6105
6106   // Even callee-saved regs need to be clean, because
6107   // createContinuation will read all locals.
6108   m_regMap.cleanAll();
6109   auto helper = origFunc->isNonClosureMethod() ?
6110     VMExecutionContext::createContinuation<true> :
6111     VMExecutionContext::createContinuation<false>;
6112   EMIT_CALL(a,
6113              (TCA)helper,
6114              R(rVmFp),
6115              IMM(getArgs),
6116              IMM((intptr_t)origFunc),
6117              IMM((intptr_t)genFunc));
6118   ScratchReg holdRax(m_regMap, rax);
6119
6120   int origLocals = origFunc->numNamedLocals();
6121   int genLocals = genFunc->numNamedLocals() - 1;
6122   ContParamMap params;
6123   if (origLocals <= kMaxInlineContLocals &&
6124       mapContParams(params, origFunc, genFunc)) {
6125     ScratchReg rScratch(m_regMap);
6126     a.  load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rScratch);
6127     a.  test_reg64_reg64(*rScratch, *rScratch);
6128     DiamondReturn astubsRet;
6129     {
6130       UnlikelyIfBlock<CC_NZ> ifVarEnv(a, astubs, &astubsRet);
6131       Stats::emitInc(astubs, Stats::Tx64_ContCreateSlow);
6132       emitCallFillCont(astubs, origFunc, genFunc);
6133     }
6134     // fillContinuationVars returned the continuation in rax and
6135     // DiamondGuard marked rax as scratch again, so it's safe to keep
6136     // using it
6137     Stats::emitInc(a, Stats::Tx64_ContCreateFast);
6138     static const StringData* thisStr = StringData::GetStaticString("this");
6139     Id thisId = kInvalidId;
6140     bool fillThis = origFunc->isNonClosureMethod() && !origFunc->isStatic() &&
6141       ((thisId = genFunc->lookupVarId(thisStr)) != kInvalidId) &&
6142       (origFunc->lookupVarId(thisStr) == kInvalidId);
6143     ScratchReg rDest(m_regMap);
6144     if (origLocals > 0 || fillThis) {
6145       a.lea_reg64_disp_reg64(rax,
6146                              c_Continuation::localsOffset(),
6147                              *rDest);
6148     }
6149     for (int i = 0; i < origLocals; ++i) {
6150       ASSERT(mapContains(params, i));
6151       int destOff = cellsToBytes(genLocals - params[i]);
6152       emitCopyTo(a, rVmFp, localOffset(i), *rDest, destOff, *rScratch);
6153       emitIncRefGenericRegSafe(*rDest, destOff, *rScratch);
6154     }
6155
6156     // Deal with a potential $this local in the generator body
6157     if (fillThis) {
6158       ASSERT(thisId != kInvalidId);
6159       a.load_reg64_disp_reg64(rax, CONTOFF(m_obj), *rScratch);
6160       a.test_reg64_reg64(*rScratch, *rScratch);
6161       {
6162         JccBlock<CC_Z> ifObj(a);
6163         const int thisOff = cellsToBytes(genLocals - thisId);
6164         // We don't have to check for a static refcount since we
6165         // know it's an Object
6166         a.add_imm32_disp_reg32(1, TVOFF(_count), *rScratch);
6167         a.store_reg64_disp_reg64(*rScratch, thisOff + TVOFF(m_data), *rDest);
6168         a.store_imm32_disp_reg(KindOfObject, thisOff + TVOFF(m_type), *rDest);
6169       }
6170     }
6171   } else {
6172     Stats::emitInc(a, Stats::Tx64_ContCreateSlow);
6173     emitCallFillCont(a, origFunc, genFunc);
6174   }
6175   m_regMap.bindScratch(holdRax, i.outStack->location, KindOfObject,
6176                        RegInfo::DIRTY);
6177 }
6178
6179 void TranslatorX64::emitCallUnpack(X64Assembler& a,
6180                                    const NormalizedInstruction& i,
6181                                    int nCopy) {
6182   const int contIdx = 0;
6183
6184   if (false) {
6185     c_Continuation* cont = NULL;
6186     TypedValue* dest = NULL;
6187     VMExecutionContext::unpackContinuation(cont, dest);
6188   }
6189   EMIT_CALL(a,
6190              VMExecutionContext::unpackContinuation,
6191              V(i.inputs[contIdx]->location),
6192              A(Location(Location::Local, nCopy)));
6193   recordCall(a, i);
6194 }
6195
6196 void TranslatorX64::translateUnpackCont(const Tracelet& t,
6197                                         const NormalizedInstruction& i) {
6198   const int contIdx = 0;
6199   ASSERT(curFrame()->m_varEnv == NULL);
6200   ASSERT(i.inputs.size() == 1);
6201   ASSERT(i.inputs[contIdx]->location == Location(Location::Local, 0));
6202   ASSERT(i.outStack->outerType() == KindOfInt64);
6203   int nCopy = curFunc()->numNamedLocals() - 1;
6204
6205   for (int loc = 1; loc <= nCopy; ++loc) {
6206     // We're at the beginning of the function. The only local in a
6207     // register should be local 0, our input
6208     ASSERT(!m_regMap.hasReg(Location(Location::Local, loc)));
6209   }
6210   if (nCopy > kMaxInlineContLocals) {
6211     Stats::emitInc(a, Stats::Tx64_ContUnpackSlow);
6212     emitCallUnpack(a, i, nCopy);
6213     m_regMap.bind(rax, i.outStack->location, KindOfInt64,
6214                   RegInfo::DIRTY);
6215     return;
6216   }
6217
6218   PhysReg rCont = getReg(i.inputs[contIdx]->location);
6219   ScratchReg rLabel(m_regMap);
6220   {
6221     a.    test_imm32_disp_reg32(0x1, CONTOFF(m_hasExtraVars), rCont);
6222     DiamondReturn astubsRet;
6223     {
6224       UnlikelyIfBlock<CC_NZ> hasVars(a, astubs, &astubsRet);
6225       Stats::emitInc(astubs, Stats::Tx64_ContUnpackSlow);
6226       emitCallUnpack(astubs, i, nCopy);
6227       emitMovRegReg(astubs, rax, *rLabel);
6228     }
6229     Stats::emitInc(a, Stats::Tx64_ContUnpackFast);
6230
6231     a.    load_reg64_disp_reg64(rCont, CONTOFF(m_label), *rLabel);
6232     ScratchReg rScratch(m_regMap);
6233     ScratchReg rSrc(m_regMap);
6234     ScratchReg rZero(m_regMap);
6235     if (nCopy > 0) {
6236       a.  lea_reg64_disp_reg64(rCont,
6237                                c_Continuation::localsOffset(),
6238                                *rSrc);
6239       emitImmReg(a, 0, *rZero);
6240     }
6241     for (int srcOff = 0, destOff = localOffset(nCopy);
6242          srcOff < (int)cellsToBytes(nCopy);
6243          srcOff += sizeof(Cell), destOff += sizeof(Cell)) {
6244       emitCopyTo(a, *rSrc, srcOff, rVmFp, destOff, *rScratch);
6245       a.  store_reg32_disp_reg64(*rZero, srcOff + TVOFF(m_type), *rSrc);
6246     }
6247   }
6248   m_regMap.bindScratch(rLabel, i.outStack->location, KindOfInt64,
6249                        RegInfo::DIRTY);
6250 }
6251
6252 void TranslatorX64::emitCallPack(X64Assembler& a,
6253                                  const NormalizedInstruction& i,
6254                                  int nCopy) {
6255   const int valIdx = 0;
6256   const int contIdx = 1;
6257
6258   // packContinuation is going to read values directly from the stack
6259   // so we have to clean everything.
6260   m_regMap.cleanAll();
6261   if (false) {
6262     c_Continuation* cont = NULL;
6263     TypedValue* tv = NULL;
6264     ActRec* fp = NULL;
6265     int label = 0;
6266     VMExecutionContext::packContinuation(cont, fp, tv, label);
6267   }
6268   EMIT_CALL(a,
6269              VMExecutionContext::packContinuation,
6270              V(i.inputs[contIdx]->location),
6271              R(rVmFp),
6272              A(i.inputs[valIdx]->location),
6273              IMM(i.imm[0].u_IVA));
6274   recordCall(a, i);
6275   m_regMap.invalidateLocals(1, nCopy);
6276 }
6277
6278 void TranslatorX64::translatePackCont(const Tracelet& t,
6279                                       const NormalizedInstruction& i) {
6280   const int valIdx = 0;
6281   const int contIdx = 1;
6282
6283   int nCopy = curFunc()->numNamedLocals() - 1;
6284   if (nCopy > kMaxInlineContLocals) {
6285     Stats::emitInc(a, Stats::Tx64_ContPackSlow);
6286     emitCallPack(a, i, nCopy);
6287     emitDecRefInput(a, i, valIdx);
6288     return;
6289   }
6290
6291   ScratchReg rScratch(m_regMap);
6292   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rScratch);
6293   a.    test_reg64_reg64(*rScratch, *rScratch);
6294   DiamondReturn astubsRet;
6295   {
6296     // TODO: Task #1132976: We can probably prove that this is impossible in
6297     // most cases using information from hphpc
6298     UnlikelyIfBlock<CC_NZ> varEnv(a, astubs, &astubsRet);
6299     Stats::emitInc(astubs, Stats::Tx64_ContPackSlow);
6300     emitCallPack(astubs, i, nCopy);
6301     emitDecRefInput(astubs, i, valIdx);
6302   }
6303   Stats::emitInc(a, Stats::Tx64_ContPackFast);
6304
6305   PhysReg rCont = getReg(i.inputs[contIdx]->location);
6306   ScratchReg rDest(m_regMap);
6307   ScratchReg rZero(m_regMap);
6308   if (nCopy > 0) {
6309     a.  lea_reg64_disp_reg64(rCont,
6310                              c_Continuation::localsOffset(),
6311                              *rDest);
6312     emitImmReg(a, 0, *rZero);
6313   }
6314   for (int idx = nCopy, destOff = 0, srcOff = localOffset(nCopy);
6315        idx > 0;
6316        --idx, destOff += sizeof(Cell), srcOff += sizeof(Cell)) {
6317     Location loc(Location::Local, idx);
6318     if (m_regMap.hasReg(loc)) {
6319       PhysReg reg = getReg(loc);
6320       spillTo(m_regMap.getInfo(reg)->m_type, reg, true, *rDest, destOff);
6321     } else {
6322       emitCopyTo(a, rVmFp, srcOff, *rDest, destOff, *rScratch);
6323     }
6324     m_regMap.invalidate(loc);
6325     a.  store_reg32_disp_reg64(*rZero, srcOff + TVOFF(m_type), rVmFp);
6326   }
6327
6328   // We're moving our reference to the value from the stack to the
6329   // continuation object, so we don't have to incRef or decRef
6330   Location valLoc = i.inputs[valIdx]->location;
6331   emitTvSet(i, getReg(valLoc), i.inputs[valIdx]->outerType(), rCont,
6332             CONTOFF(m_value), false);
6333
6334   emitImmReg(a, i.imm[0].u_IVA, *rScratch);
6335   a.    store_reg64_disp_reg64(*rScratch, CONTOFF(m_label), rCont);
6336 }
6337
6338 static void continuationRaiseHelper(c_Continuation* cont) {
6339   cont->t_raised();
6340   not_reached();
6341 }
6342
6343 void TranslatorX64::emitContRaiseCheck(X64Assembler& a,
6344                                        const NormalizedInstruction& i) {
6345   const int contIdx = 0;
6346   ASSERT(i.inputs[contIdx]->location == Location(Location::Local, 0));
6347   PhysReg rCont = getReg(i.inputs[contIdx]->location);
6348   a.    test_imm32_disp_reg32(0x1, CONTOFF(m_should_throw), rCont);
6349   {
6350     UnlikelyIfBlock<CC_NZ> ifThrow(a, astubs);
6351     if (false) {
6352       c_Continuation* c = NULL;
6353       continuationRaiseHelper(c);
6354     }
6355     EMIT_CALL(astubs,
6356                continuationRaiseHelper,
6357                R(rCont));
6358     recordReentrantStubCall(i);
6359     translator_not_reached(astubs);
6360   }
6361 }
6362
6363 void TranslatorX64::translateContReceive(const Tracelet& t,
6364                                          const NormalizedInstruction& i) {
6365   const int contIdx = 0;
6366   emitContRaiseCheck(a, i);
6367   ScratchReg rScratch(m_regMap);
6368   a.   lea_reg64_disp_reg64(getReg(i.inputs[contIdx]->location),
6369                             CONTOFF(m_received), *rScratch);
6370   emitIncRefGeneric(*rScratch, 0);
6371   emitCopyToStack(a, i, *rScratch, -1 * (int)sizeof(Cell));
6372 }
6373
6374 void TranslatorX64::translateContRaised(const Tracelet& t,
6375                                         const NormalizedInstruction& i) {
6376   emitContRaiseCheck(a, i);
6377 }
6378
6379 void TranslatorX64::translateContDone(const Tracelet& t,
6380                                       const NormalizedInstruction& i) {
6381   const int contIdx = 0;
6382   a.    store_imm8_disp_reg(0x1, CONTOFF(m_done),
6383                             getReg(i.inputs[contIdx]->location));
6384 }
6385
6386 static void contPreNextThrowHelper(c_Continuation* c) {
6387   c->preNext();
6388   not_reached();
6389 }
6390
6391 void TranslatorX64::emitContPreNext(const NormalizedInstruction& i,
6392                                     ScratchReg& rCont) {
6393   const Offset doneOffset = CONTOFF(m_done);
6394   CT_ASSERT((doneOffset + 1) == CONTOFF(m_running));
6395   // Check m_done and m_running at the same time
6396   a.    test_imm32_disp_reg32(0x0101, doneOffset, *rCont);
6397   {
6398     UnlikelyIfBlock<CC_NZ> ifThrow(a, astubs);
6399     EMIT_CALL(astubs, contPreNextThrowHelper, R(*rCont));
6400     recordReentrantStubCall(i);
6401     translator_not_reached(astubs);
6402   }
6403
6404   // ++m_index
6405   a.    add_imm64_disp_reg64(0x1, CONTOFF(m_index), *rCont);
6406   // m_running = true
6407   a.    store_imm8_disp_reg(0x1, CONTOFF(m_running), *rCont);
6408 }
6409
6410 void TranslatorX64::translateContNext(const Tracelet& t,
6411                                       const NormalizedInstruction& i) {
6412   ScratchReg rCont(m_regMap);
6413   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6414   emitContPreNext(i, rCont);
6415
6416   // m_received.setNull()
6417   emitTvSet(i, reg::noreg, KindOfNull, *rCont, CONTOFF(m_received), false);
6418 }
6419
6420 static void contNextCheckThrowHelper(c_Continuation* cont) {
6421   cont->startedCheck();
6422   not_reached();
6423 }
6424
6425 void TranslatorX64::emitContStartedCheck(const NormalizedInstruction& i,
6426                                          ScratchReg& rCont) {
6427   // if (m_index < 0)
6428   a.    cmp_imm64_disp_reg64(0, CONTOFF(m_index), *rCont);
6429   {
6430     UnlikelyIfBlock<CC_L> whoops(a, astubs);
6431     EMIT_CALL(astubs, contNextCheckThrowHelper, *rCont);
6432     recordReentrantStubCall(i);
6433     translator_not_reached(astubs);
6434   }
6435 }
6436
6437 template<bool raise>
6438 void TranslatorX64::translateContSendImpl(const NormalizedInstruction& i) {
6439   const int valIdx = 0;
6440   ASSERT(i.inputs[valIdx]->location == Location(Location::Local, 0));
6441
6442   ScratchReg rCont(m_regMap);
6443   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6444   emitContStartedCheck(i, rCont);
6445   emitContPreNext(i, rCont);
6446
6447   // m_received = value
6448   PhysReg valReg = getReg(i.inputs[valIdx]->location);
6449   DataType valType = i.inputs[valIdx]->outerType();
6450   emitTvSet(i, valReg, valType, *rCont, CONTOFF(m_received), true);
6451
6452   // m_should_throw = true (maybe)
6453   if (raise) {
6454     a.  store_imm8_disp_reg(0x1, CONTOFF(m_should_throw), *rCont);
6455   }
6456 }
6457
6458 void TranslatorX64::translateContSend(const Tracelet& t,
6459                                       const NormalizedInstruction& i) {
6460   translateContSendImpl<false>(i);
6461 }
6462
6463 void TranslatorX64::translateContRaise(const Tracelet& t,
6464                                        const NormalizedInstruction& i) {
6465   translateContSendImpl<true>(i);
6466 }
6467
6468 void TranslatorX64::translateContValid(const Tracelet& t,
6469                                        const NormalizedInstruction& i) {
6470   ScratchReg rCont(m_regMap);
6471   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6472
6473   m_regMap.allocOutputRegs(i);
6474   PhysReg validReg = getReg(i.outStack->location);
6475   // !m_done
6476   a.    loadzxb_reg64_disp_reg64(*rCont, CONTOFF(m_done), validReg);
6477   a.    xor_imm32_reg64(0x1, validReg);
6478 }
6479
6480 void TranslatorX64::translateContCurrent(const Tracelet& t,
6481                                          const NormalizedInstruction& i) {
6482   ScratchReg rCont(m_regMap);
6483   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6484   emitContStartedCheck(i, rCont);
6485
6486   a.   lea_reg64_disp_reg64(*rCont, CONTOFF(m_value), *rCont);
6487   emitIncRefGeneric(*rCont, 0);
6488   emitCopyToStack(a, i, *rCont, -1 * (int)sizeof(Cell));
6489 }
6490
6491 void TranslatorX64::translateContStopped(const Tracelet& t,
6492                                          const NormalizedInstruction& i) {
6493   ScratchReg rCont(m_regMap);
6494   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6495   a.    store_imm8_disp_reg(0x0, CONTOFF(m_running), *rCont);
6496 }
6497
6498 void TranslatorX64::translateContHandle(const Tracelet& t,
6499                                         const NormalizedInstruction& i) {
6500   // Always interpreted
6501   not_reached();
6502 }
6503
6504 static void analyzeClassExistsImpl(NormalizedInstruction& i) {
6505   const int nameIdx = 1;
6506   const int autoIdx = 0;
6507   ASSERT(!i.inputs[nameIdx]->isVariant() && !i.inputs[autoIdx]->isVariant());
6508   i.m_txFlags = supportedPlan(i.inputs[nameIdx]->isString() &&
6509                               i.inputs[autoIdx]->isBoolean());
6510   i.fuseBranch = (i.m_txFlags & Supported) &&
6511     i.inputs[nameIdx]->rtt.valueString() &&
6512     i.inputs[autoIdx]->rtt.valueBoolean() != RuntimeType::UnknownBool;
6513 }
6514
6515 void TranslatorX64::analyzeClassExists(Tracelet& t,
6516                                        NormalizedInstruction& i) {
6517   analyzeClassExistsImpl(i);
6518 }
6519
6520 void TranslatorX64::analyzeInterfaceExists(Tracelet& t,
6521                                            NormalizedInstruction& i) {
6522   analyzeClassExistsImpl(i);
6523 }
6524
6525 void TranslatorX64::analyzeTraitExists(Tracelet& t,
6526                                        NormalizedInstruction& i) {
6527   analyzeClassExistsImpl(i);
6528 }
6529
6530 static int64 classExistsSlow(const StringData* name, bool autoload,
6531                              Attr typeAttr) {
6532   bool ret = Unit::classExists(name, autoload, typeAttr);
6533   // XXX: do we need to decref this during an exception?
6534   if (name->decRefCount() == 0) {
6535     const_cast<StringData*>(name)->release();
6536   }
6537   return ret;
6538 }
6539
6540 void TranslatorX64::translateClassExistsImpl(const Tracelet& t,
6541                                              const NormalizedInstruction& i,
6542                                              Attr typeAttr) {
6543   const int nameIdx = 1;
6544   const int autoIdx = 0;
6545   const StringData* name = i.inputs[nameIdx]->rtt.valueString();
6546   ASSERT(IMPLIES(name, name->isStatic()));
6547   const int autoload = i.inputs[autoIdx]->rtt.valueBoolean();
6548
6549   ScratchReg scratch(m_regMap);
6550   if (name != NULL && autoload != RuntimeType::UnknownBool) {
6551     ASSERT(i.fuseBranch);
6552     const Attr attrNotClass = Attr(AttrTrait | AttrInterface);
6553     const bool isClass = typeAttr == AttrNone;
6554     using namespace TargetCache;
6555     Stats::emitInc(a, Stats::Tx64_ClassExistsFast);
6556     CacheHandle ch = allocKnownClass(name);
6557
6558     {
6559       DiamondReturn astubsRet;
6560       a.  load_reg64_disp_reg64(rVmTl, ch, *scratch);
6561       a.  test_reg64_reg64(*scratch, *scratch);
6562       if (autoload) {
6563         UnlikelyIfBlock<CC_Z> ifNull(a, astubs, &astubsRet);
6564         if (false) {
6565           Class** c = NULL;
6566           UNUSED Class* ret = lookupKnownClass<true>(c, name, false);
6567         }
6568         Stats::emitInc(astubs, Stats::TgtCache_ClassExistsMiss);
6569         // If the class exists after autoloading, the helper will
6570         // return the Class's flags. Otherwise, it will return a set
6571         // of flags such that our flag check at the join point below
6572         // will fail.
6573         EMIT_CALL(astubs, (lookupKnownClass_func_t)lookupKnownClass<true>,
6574                    RPLUS(rVmTl, ch),
6575                    IMM((uintptr_t)name),
6576                    IMM(isClass));
6577         recordReentrantStubCall(i);
6578         emitMovRegReg(astubs, rax, *scratch);
6579       } else {
6580         UnlikelyIfBlock<CC_Z> ifNull(a, astubs, &astubsRet);
6581         // This isn't really a traditional slow path, count as a hit
6582         Stats::emitInc(astubs, Stats::TgtCache_ClassExistsHit);
6583         // Provide flags so the check back in a fails
6584         emitImmReg(astubs, isClass ? attrNotClass : AttrNone, *scratch);
6585       }
6586       // If we don't take the slow/NULL path, load the Class's attrs
6587       // into *scratch to prepare for the flag check.
6588       Stats::emitInc(a, Stats::TgtCache_ClassExistsHit);
6589       a.  load_reg64_disp_reg64(*scratch, Class::preClassOff(),
6590                                 *scratch);
6591       a.  load_reg64_disp_reg32(*scratch, PreClass::attrsOffset(),
6592                                 *scratch);
6593     }
6594
6595     if (i.changesPC) {
6596       fuseBranchSync(t, i);
6597     }
6598     a.    test_imm32_reg32(isClass ? attrNotClass : typeAttr, *scratch);
6599     ConditionCode cc = isClass ? CC_Z : CC_NZ;
6600     if (i.changesPC) {
6601       fuseBranchAfterBool(t, i, cc);
6602     } else {
6603       a.  setcc(cc, *scratch);
6604       a.  mov_reg8_reg64_unsigned(*scratch, *scratch);
6605       m_regMap.bindScratch(scratch, i.outStack->location, KindOfBoolean,
6606                            RegInfo::DIRTY);
6607     }
6608   } else {
6609     ASSERT(!i.fuseBranch);
6610     Stats::emitInc(a, Stats::Tx64_ClassExistsSlow);
6611     if (false) {
6612       UNUSED bool ret = false;
6613       ret = classExistsSlow(name, ret, typeAttr);
6614     }
6615     EMIT_CALL(a, classExistsSlow,
6616                V(i.inputs[nameIdx]->location),
6617                V(i.inputs[autoIdx]->location),
6618                IMM(typeAttr));
6619     recordReentrantCall(i);
6620     // Our helper decrefs the string
6621     m_regMap.bind(rax, i.outStack->location, KindOfBoolean, RegInfo::DIRTY);
6622   }
6623 }
6624
6625 void TranslatorX64::translateClassExists(const Tracelet& t,
6626                                          const NormalizedInstruction& i) {
6627   translateClassExistsImpl(t, i, AttrNone);
6628 }
6629
6630 void TranslatorX64::translateInterfaceExists(const Tracelet& t,
6631                                          const NormalizedInstruction& i) {
6632   translateClassExistsImpl(t, i, AttrInterface);
6633 }
6634
6635 void TranslatorX64::translateTraitExists(const Tracelet& t,
6636                                          const NormalizedInstruction& i) {
6637   translateClassExistsImpl(t, i, AttrTrait);
6638 }
6639
6640 // Helper function for static property access.  This function emits code
6641 // which leaves a pointer to the static property for clsInput::$propInput in
6642 // register scr. We destroy scr early on, yet do not consume inputs until
6643 // later, so scr must not alias an input register.  This also handles
6644 // the decref for the case where prop is not a static string.
6645 void TranslatorX64::emitStaticPropInlineLookup(const NormalizedInstruction& i,
6646                                                const DynLocation& clsInput,
6647                                                const DynLocation& propInput,
6648                                                PhysReg scr) {
6649   const Class* cls = clsInput.rtt.valueClass();
6650   const StringData* propName = propInput.rtt.valueString();
6651   using namespace TargetCache;
6652   CacheHandle ch;
6653
6654   ASSERT(cls && propName);
6655   // Use the uniquely known cls / prop to generate a single cache per prop
6656   const StringData* clsName = cls->preClass()->name();
6657   string sds(Util::toLower(clsName->data()) + ":" +
6658              string(propName->data(), propName->size()));
6659   StringData sd(sds.c_str(), sds.size(), AttachLiteral);
6660   ch = SPropCache::alloc(&sd);
6661   SKTRACE(1, i.source, "SPropInlineLookup %s %d\n", sd.data(), int(ch));
6662
6663   Stats::emitInc(a, Stats::TgtCache_SPropHit);
6664
6665   // For the simple case of statically known class and prop name, we inline
6666   // the target cache lookup, and outline the miss case.
6667   // Load the TV pointer out of the thread-private tl_targetCaches.
6668   BOOST_STATIC_ASSERT((offsetof(SPropCache, m_tv) == 0));
6669   a.   load_reg64_disp_reg64(rVmTl, ch, scr);
6670   a.   test_reg64_reg64(scr, scr);
6671
6672   // Call the slow path.
6673   {
6674     UnlikelyIfBlock<CC_Z> shucks(a, astubs);
6675
6676     // Precondition for this lookup - we don't need to pass the preClass,
6677     // as we only translate in class lookups.
6678     ASSERT(cls == curFunc()->cls());
6679     if (false) { // typecheck
6680       StringData *data = NULL;
6681       SPropCache::lookup(ch, cls, data);
6682     }
6683
6684     EMIT_CALL(astubs, (TCA)SPropCache::lookup,
6685                IMM(ch), V(clsInput.location), IMM(uint64_t(propName)));
6686     recordReentrantStubCall(i);
6687     emitMovRegReg(astubs, rax, scr);
6688
6689     // We're consuming the name as input, but it is static, no decref needed
6690     ASSERT(propInput.rtt.valueString()->isStatic());
6691     // astubs.  jmp(a.code.frontier); -- implicit
6692   }
6693 }
6694
6695 void TranslatorX64::analyzeCGetS(Tracelet& t, NormalizedInstruction& i) {
6696   ASSERT(i.inputs.size() == 2);
6697   ASSERT(i.inputs[0]->valueType() == KindOfClass);
6698   ASSERT(i.outStack);
6699   const Class* cls = i.inputs[0]->rtt.valueClass();
6700   const StringData* propName = i.inputs[1]->rtt.valueString();
6701   i.m_txFlags = supportedPlan(cls && propName && isContextFixed() &&
6702                               curFunc()->cls() == cls);
6703 }
6704
6705 void TranslatorX64::translateCGetS(const Tracelet& t,
6706                                    const NormalizedInstruction& i) {
6707   const int kClassIdx = 0;
6708   const int kPropIdx = 1;
6709
6710   ScratchReg sprop(m_regMap);
6711   emitStaticPropInlineLookup(i, *i.inputs[kClassIdx],
6712                                 *i.inputs[kPropIdx], *sprop);
6713   emitDerefIfVariant(a, *sprop);
6714   emitIncRefGeneric(*sprop, 0);
6715   // Finally copy the thing to the stack
6716   int stackDest = 2 * sizeof(Cell) - sizeof(Cell); // popped - pushed
6717   emitCopyToStack(a, i, *sprop, stackDest);
6718 }
6719
6720 void TranslatorX64::analyzeSetS(Tracelet& t, NormalizedInstruction& i) {
6721   ASSERT(i.inputs.size() == 3);
6722   ASSERT(i.inputs[1]->valueType() == KindOfClass);
6723   ASSERT(i.outStack);
6724   const Class* cls = i.inputs[1]->rtt.valueClass();
6725   const StringData* propName = i.inputs[2]->rtt.valueString();
6726   // XXX Need to check isContextFixed
6727   // Might be able to broaden this: if cls is an ancestor of the current context,
6728   // the context is Fixed, and the property is not private
6729   // Also if the m_hoistable in cls is set to AlwaysHoistable, defined in
6730   // the same unit as context, and the property is public
6731   i.m_txFlags = supportedPlan(cls && propName && isContextFixed() &&
6732                               curFunc()->cls() == cls);
6733 }
6734
6735 void TranslatorX64::translateSetS(const Tracelet& t,
6736                                   const NormalizedInstruction& i) {
6737   const int kClassIdx = 1;
6738
6739   ScratchReg sprop(m_regMap);
6740   const RuntimeType& rhsType = i.inputs[0]->rtt;
6741   emitStaticPropInlineLookup(i, *i.inputs[kClassIdx], *i.inputs[2], *sprop);
6742
6743   ASSERT(m_regMap.getInfo(*sprop)->m_state == RegInfo::SCRATCH);
6744   ASSERT(!rhsType.isVariant());
6745
6746   m_regMap.allocOutputRegs(i);
6747   PhysReg rhsReg = getReg(i.inputs[0]->location);
6748   PhysReg outReg = getReg(i.outStack->location);
6749   emitTvSet(i, rhsReg, rhsType.outerType(), *sprop);
6750   ASSERT(i.inputs[2]->location == i.outStack->location);
6751   emitMovRegReg(rhsReg, outReg);
6752 }
6753
6754 void TranslatorX64::analyzeSetG(Tracelet& t, NormalizedInstruction& i) {
6755   ASSERT(i.inputs.size() == 2);
6756   i.m_txFlags = supportedPlan(
6757     i.inputs[1]->isString() &&
6758     !i.inputs[0]->isVariant()
6759   );
6760   if (i.m_txFlags) i.manuallyAllocInputs = true;
6761 }
6762
6763 void TranslatorX64::translateSetG(const Tracelet& t,
6764                                   const NormalizedInstruction& i) {
6765   ASSERT(i.outStack && !i.outLocal);
6766   ASSERT(i.inputs.size() == 2);
6767   ASSERT(i.inputs[1]->isString());
6768   ASSERT(i.inputs[1]->location == i.outStack->location);
6769
6770   const DataType type = i.inputs[0]->rtt.outerType();
6771
6772   /*
6773    * Grab the global from the target cache; rax will get a pointer to
6774    * the TypedValue in the globals array, maybe newly created as a
6775    * null.
6776    */
6777   emitGetGlobal(i, 1, true /* allowCreate */);
6778   ScratchReg raxSaver(m_regMap, rax);
6779   m_regMap.allocInputReg(i, 0);
6780   PhysReg src = getReg(i.inputs[0]->location);
6781   m_regMap.allocOutputRegs(i);
6782   PhysReg out = getReg(i.outStack->location);
6783
6784   emitTvSet(i, src, type, rax);
6785   emitMovRegReg(src, out);
6786 }
6787
6788 static TypedValue* lookupGlobal(StringData* name) {
6789   VarEnv* ve = g_vmContext->m_globalVarEnv;
6790   TypedValue* r = ve->lookup(name);
6791   // If the global didn't exist, we need to leave name un-decref'd for
6792   // the caller to raise warnings.
6793   if (r) {
6794     LITSTR_DECREF(name);
6795     if (r->m_type == KindOfRef) r = r->m_data.pref->tv();
6796   }
6797   return r;
6798 }
6799
6800 static TypedValue* lookupAddGlobal(StringData* name) {
6801   VarEnv* ve = g_vmContext->m_globalVarEnv;
6802   TypedValue* r = ve->lookupAdd(name);
6803   if (r->m_type == KindOfRef) r = r->m_data.pref->tv();
6804   LITSTR_DECREF(name);
6805   return r;
6806 }
6807
6808 /*
6809  * Look up a global in the TargetCache with the name
6810  * i.inputs[nameIdx].  If `allowCreate' is true, also creates it.  If
6811  * we don't create the global, the input name is not decref'd yet.
6812  */
6813 void
6814 TranslatorX64::emitGetGlobal(const NormalizedInstruction& i, int nameIdx,
6815     bool allowCreate) {
6816   using namespace TargetCache;
6817   ASSERT(i.inputs.size() > size_t(nameIdx));
6818   ASSERT(i.inputs[nameIdx]->isString());
6819
6820   const StringData *maybeName = i.inputs[nameIdx]->rtt.valueString();
6821   if (!maybeName) {
6822     m_regMap.allocInputReg(i, nameIdx, argNumToRegName[0]);
6823     // Always do a lookup when there's no statically-known name.
6824     // There's not much we can really cache here right now anyway.
6825     EMIT_CALL(a, allowCreate ? lookupAddGlobal : lookupGlobal,
6826                   V(i.inputs[nameIdx]->location));
6827     recordCall(i);
6828     return;
6829   }
6830
6831   CacheHandle ch = GlobalCache::alloc(maybeName);
6832   if (false) { // typecheck
6833     StringData* UNUSED key = NULL;
6834     TypedValue* UNUSED glob = GlobalCache::lookup(ch, key);
6835     TypedValue* UNUSED glob2 = GlobalCache::lookupCreate(ch, key);
6836   }
6837   SKTRACE(1, i.source, "ch %d\n", ch);
6838   EMIT_CALL(a, allowCreate ? GlobalCache::lookupCreate
6839                            : GlobalCache::lookup,
6840             IMM(ch),
6841             IMM((uint64_t)maybeName));
6842   recordCall(i);
6843 }
6844
6845 static bool
6846 isSupportedInstrCGetG(const NormalizedInstruction& i) {
6847   ASSERT(i.inputs.size() == 1);
6848   return (i.inputs[0]->rtt.isString());
6849 }
6850
6851 void
6852 TranslatorX64::analyzeCGetG(Tracelet& t, NormalizedInstruction& i) {
6853   i.m_txFlags = simplePlan(isSupportedInstrCGetG(i));
6854   if (i.m_txFlags) i.manuallyAllocInputs = true;
6855 }
6856
6857 void
6858 TranslatorX64::translateCGetG(const Tracelet& t,
6859                               const NormalizedInstruction& i) {
6860   ASSERT(i.outStack && !i.outLocal);
6861   ASSERT(i.inputs.size() == 1);
6862   ASSERT(i.inputs[0]->isString());
6863
6864   emitGetGlobal(i, 0, false /* allowCreate */);
6865   ScratchReg raxHolder(m_regMap, rax);
6866
6867   // If non-null, rax now points to the in-memory location of the
6868   // object of unknown type. lookup() has already decref'd the name.
6869   a.  test_reg64_reg64(rax, rax);
6870   DiamondReturn astubsRet;
6871   {
6872     UnlikelyIfBlock<CC_Z> ifNotRax(a, astubs, &astubsRet);
6873     if (!i.inputs[0]->rtt.valueString()) {
6874       m_regMap.allocInputReg(i, 0);
6875       PhysReg reg = getReg(i.inputs[0]->location);
6876       emitDecRef(astubs, i, reg, BitwiseKindOfString);
6877     }
6878     // TODO: if (MoreWarnings) raise a undefined variable warning.
6879     // (Note: when changing this remember to change the Simple flag to
6880     // Supported in analyze.)
6881     emitStoreNull(astubs, vstackOffset(i, 0), rVmSp);
6882     m_regMap.invalidate(i.outStack->location);
6883   }
6884
6885   emitCopyToStack(a, i, rax, 0);
6886   emitIncRefGeneric(rax, 0);
6887   m_regMap.invalidate(i.outStack->location);
6888 }
6889
6890 void TranslatorX64::analyzeFPassL(Tracelet& t,
6891                                   NormalizedInstruction& ni) {
6892   if (ni.preppedByRef) {
6893     analyzeVGetL(t, ni);
6894   } else {
6895     analyzeCGetL(t, ni);
6896   }
6897 }
6898
6899 void TranslatorX64::translateFPassL(const Tracelet& t,
6900                                     const NormalizedInstruction& ni) {
6901   if (ni.preppedByRef) {
6902     translateVGetL(t, ni);
6903   } else {
6904     translateCGetL(t, ni);
6905   }
6906 }
6907
6908 void TranslatorX64::analyzeFPassS(Tracelet& t,
6909                                   NormalizedInstruction& ni) {
6910   if (ni.preppedByRef) {
6911     // We need a VGetS translation.
6912     ni.m_txFlags = Interp;
6913   } else {
6914     analyzeCGetS(t, ni);
6915   }
6916 }
6917
6918 void TranslatorX64::translateFPassS(const Tracelet& t,
6919                                     const NormalizedInstruction& ni) {
6920   if (ni.preppedByRef) {
6921     ASSERT(false);
6922   } else {
6923     translateCGetS(t, ni);
6924   }
6925 }
6926
6927 void TranslatorX64::analyzeFPassG(Tracelet& t,
6928                                   NormalizedInstruction& ni) {
6929   if (ni.preppedByRef) {
6930     analyzeVGetG(t, ni);
6931   } else {
6932     analyzeCGetG(t, ni);
6933   }
6934 }
6935
6936 void TranslatorX64::translateFPassG(const Tracelet& t,
6937                                     const NormalizedInstruction& ni) {
6938   if (ni.preppedByRef) {
6939     translateVGetG(t, ni);
6940   } else {
6941     translateCGetG(t, ni);
6942   }
6943 }
6944
6945 void TranslatorX64::analyzeCheckTypeOp(Tracelet& t,
6946                                        NormalizedInstruction& ni) {
6947   ASSERT(ni.inputs.size() == 1);
6948
6949   if (ni.op() == OpIsObjectL || ni.op() == OpIsObjectC) {
6950     // is_object is weird because it's supposed to return false for
6951     // things where ObjectData::isResource() is true.  For now we only
6952     // translate when it is not an object.
6953     if (ni.inputs[0]->valueType() == KindOfObject) {
6954       ni.m_txFlags = Interp;
6955       return;
6956     }
6957   }
6958
6959   if (ni.inputs[0]->isLocal()) {
6960     ni.manuallyAllocInputs = true;
6961     if (ni.op() != OpIssetL && ni.inputs[0]->rtt.isUninit()) {
6962       ni.m_txFlags = Supported;
6963     } else {
6964       ni.m_txFlags = Native;
6965     }
6966     return;
6967   }
6968
6969   ni.m_txFlags = planHingesOnRefcounting(ni.inputs[0]->valueType());
6970 }
6971
6972 static bool checkTypeHelper(Opcode op, DataType dt) {
6973   switch (op) {
6974   case OpIssetL:    return !IS_NULL_TYPE(dt);
6975   case OpIsNullL:   case OpIsNullC:   return IS_NULL_TYPE(dt);
6976   case OpIsStringL: case OpIsStringC: return IS_STRING_TYPE(dt);
6977   case OpIsArrayL:  case OpIsArrayC:  return IS_ARRAY_TYPE(dt);
6978   case OpIsIntL:    case OpIsIntC:    return IS_INT_TYPE(dt);
6979   case OpIsBoolL:   case OpIsBoolC:   return IS_BOOL_TYPE(dt);
6980   case OpIsDoubleL: case OpIsDoubleC: return IS_DOUBLE_TYPE(dt);
6981
6982   case OpIsObjectL: case OpIsObjectC:
6983     // Note: this is because we refused to translate if it was
6984     // actually an object for now.  (We'd need to emit some kind of
6985     // call to ObjectData::isResource or something.)
6986     return 0;
6987   }
6988   ASSERT(false);
6989   NOT_REACHED();
6990 }
6991
6992 void
6993 TranslatorX64::translateCheckTypeOp(const Tracelet& t,
6994                                     const NormalizedInstruction& ni) {
6995   ASSERT(ni.inputs.size() == 1);
6996   ASSERT(ni.outStack);
6997
6998   const DataType dt    =  ni.inputs[0]->valueType();
6999   const bool isLocalOp = ni.inputs[0]->isLocal();
7000   const bool isType    =
7001     checkTypeHelper(ni.op(), ni.inputs[0]->valueType()) != ni.invertCond;
7002   const bool doUninit  = isLocalOp &&
7003                          ni.op() != OpIssetL &&
7004                          ni.inputs[0]->rtt.isUninit();
7005
7006   if (!isLocalOp) {
7007     emitDecRef(ni, getReg(ni.inputs[0]->location), dt);
7008   }
7009   if (doUninit) {
7010     const StringData* name = local_name(ni.inputs[0]->location);
7011     ASSERT(name->isStatic());
7012     EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
7013     recordReentrantCall(ni);
7014   }
7015   m_regMap.allocOutputRegs(ni);
7016   if (ni.changesPC) {
7017     // Don't bother driving an output reg. Just take the branch
7018     // where it leads.
7019     Stats::emitInc(a, Stats::Tx64_FusedTypeCheck);
7020     fuseBranchAfterStaticBool(t, ni, isType);
7021     return;
7022   }
7023   Stats::emitInc(a, Stats::Tx64_UnfusedTypeCheck);
7024   emitImmReg(a, isType, getReg(ni.outStack->location));
7025 }
7026
7027 static void badArray() {
7028   throw_bad_type_exception("array_key_exists expects an array or an object; "
7029                            "false returned.");
7030 }
7031
7032 static void badKey() {
7033   raise_warning("Array key should be either a string or an integer");
7034 }
7035
7036 static inline int64 ak_exist_string_helper(StringData* key, ArrayData* arr) {
7037   int64 n;
7038   if (key->isStrictlyInteger(n)) {
7039     return arr->exists(n);
7040   }
7041   return arr->exists(StrNR(key));
7042 }
7043
7044 static int64 ak_exist_string(StringData* key, ArrayData* arr) {
7045   int64 res = ak_exist_string_helper(key, arr);
7046   if (arr->decRefCount() == 0) {
7047     arr->release();
7048   }
7049   if (key->decRefCount() == 0) {
7050     key->release();
7051   }
7052   return res;
7053 }
7054
7055 static int64 ak_exist_int(int64 key, ArrayData* arr) {
7056   bool res = arr->exists(key);
7057   if (arr->decRefCount() == 0) {
7058     arr->release();
7059   }
7060   return res;
7061 }
7062
7063 static int64 ak_exist_string_obj(StringData* key, ObjectData* obj) {
7064   CArrRef arr = obj->o_toArray();
7065   int64 res = ak_exist_string_helper(key, arr.get());
7066   if (obj->decRefCount() == 0) {
7067     obj->release();
7068   }
7069   if (key->decRefCount() == 0) {
7070     key->release();
7071   }
7072   return res;
7073 }
7074
7075 static int64 ak_exist_int_obj(int64 key, ObjectData* obj) {
7076   CArrRef arr = obj->o_toArray();
7077   bool res = arr.get()->exists(key);
7078   if (obj->decRefCount() == 0) {
7079     obj->release();
7080   }
7081   return res;
7082 }
7083
7084 void
7085 TranslatorX64::analyzeAKExists(Tracelet& t, NormalizedInstruction& i) {
7086   const int keyIx = 1;
7087   const int arrIx = 0;
7088
7089   const DataType dta = i.inputs[arrIx]->valueType();
7090   const DataType dtk = i.inputs[keyIx]->valueType();
7091
7092   bool reentrant = (dta != KindOfArray && dta != KindOfObject) ||
7093     (!IS_STRING_TYPE(dtk) && dtk != KindOfInt64 && dtk != KindOfNull);
7094
7095   i.m_txFlags = reentrant ? Supported : Simple;
7096   i.manuallyAllocInputs = true;
7097 }
7098
7099 void
7100 TranslatorX64::translateAKExists(const Tracelet& t,
7101                                  const NormalizedInstruction& ni) {
7102   ASSERT(ni.inputs.size() == 2);
7103   ASSERT(ni.outStack);
7104
7105   const int keyIx = 1;
7106   const int arrIx = 0;
7107
7108   const DataType dta = ni.inputs[arrIx]->valueType();
7109   const DataType dtk = ni.inputs[keyIx]->valueType();
7110   TCA string_func = (TCA)ak_exist_string;
7111   TCA int_func = (TCA)ak_exist_int;
7112
7113   int result = -1;
7114   int args[2];
7115   args[keyIx] = 0;
7116   args[arrIx] = 1;
7117   switch (dta) {
7118     case KindOfObject:
7119       string_func = (TCA)ak_exist_string_obj;
7120       int_func = (TCA)ak_exist_int_obj;
7121     case KindOfArray:
7122       switch (dtk) {
7123         case BitwiseKindOfString:
7124         case KindOfStaticString:
7125         case KindOfInt64: {
7126           allocInputsForCall(ni, args);
7127           PhysReg rk = getReg(ni.inputs[keyIx]->location);
7128           PhysReg ra = getReg(ni.inputs[arrIx]->location);
7129           m_regMap.scrubStackEntries(ni.outStack->location.offset);
7130           EMIT_CALL(a, dtk == KindOfInt64 ? int_func : string_func,
7131                     R(rk), R(ra));
7132           recordCall(ni);
7133           break;
7134         }
7135         case KindOfNull:
7136           if (dta == KindOfArray) {
7137             args[keyIx] = ArgDontAllocate;
7138             allocInputsForCall(ni, args);
7139             PhysReg ra = getReg(ni.inputs[arrIx]->location);
7140             m_regMap.scrubStackEntries(ni.outStack->location.offset);
7141             EMIT_CALL(a, string_func,
7142                       IMM((uint64_t)empty_string.get()), R(ra));
7143             recordCall(ni);
7144           } else {
7145             result = ni.invertCond;
7146           }
7147           break;
7148         default:
7149           EMIT_CALL(a, badKey);
7150           recordReentrantCall(ni);
7151           result = ni.invertCond;
7152           break;
7153       }
7154       break;
7155     default:
7156       EMIT_CALL(a, badArray);
7157       recordReentrantCall(ni);
7158       result = ni.invertCond;
7159       break;
7160   }
7161
7162   if (result >= 0) {
7163     if (ni.changesPC) {
7164       fuseBranchAfterStaticBool(t, ni, result);
7165       return;
7166     } else {
7167       m_regMap.allocOutputRegs(ni);
7168       emitImmReg(a, result, getReg(ni.outStack->location));
7169     }
7170   } else {
7171     ScratchReg res(m_regMap, rax);
7172     if (ni.changesPC) {
7173       fuseBranchSync(t, ni);
7174       a.    test_reg64_reg64(*res, *res);
7175       fuseBranchAfterBool(t, ni, ni.invertCond ? CC_Z : CC_NZ);
7176     } else {
7177       if (ni.invertCond) {
7178         a.  xor_imm32_reg64(1, *res);
7179       }
7180       m_regMap.bindScratch(res, ni.outStack->location, KindOfBoolean,
7181                            RegInfo::DIRTY);
7182     }
7183   }
7184 }
7185
7186 void
7187 TranslatorX64::analyzeSetOpL(Tracelet& t, NormalizedInstruction& i) {
7188   ASSERT(i.inputs.size() == 2);
7189   const SetOpOp subOp = SetOpOp(i.imm[1].u_OA);
7190   Opcode arithOp = setOpOpToOpcodeOp(subOp);
7191   i.m_txFlags = nativePlan(i.inputs[0]->isInt() &&
7192                            i.inputs[1]->isInt() &&
7193                            (arithOp == OpAdd || arithOp == OpSub ||
7194                             arithOp == OpMul ||
7195                             arithOp == OpBitAnd || arithOp == OpBitOr ||
7196                             arithOp == OpBitXor));
7197 }
7198
7199 void
7200 TranslatorX64::translateSetOpL(const Tracelet& t,
7201                                const NormalizedInstruction& i) {
7202   const vector<DynLocation*>& inputs  = i.inputs;
7203   ASSERT(inputs.size() >= 2);
7204   ASSERT(i.outStack && i.outLocal);
7205   const int valIdx   = 0;
7206   const int localIdx = 1;
7207   ASSERT(inputs[localIdx]->isLocal());
7208   ASSERT(inputs[valIdx]->isStack());
7209   ASSERT(inputs[valIdx]->outerType() != KindOfRef);
7210
7211   const SetOpOp subOp = SetOpOp(i.imm[1].u_OA);
7212   Opcode arithOp = setOpOpToOpcodeOp(subOp);
7213   m_regMap.allocOutputRegs(i);
7214   binaryArithLocal(i, arithOp, *inputs[valIdx], *inputs[localIdx],
7215                    *i.outStack);
7216 }
7217
7218 void
7219 TranslatorX64::analyzeIncDecL(Tracelet& t, NormalizedInstruction& i) {
7220   i.m_txFlags = nativePlan(i.inputs[0]->isInt());
7221 }
7222
7223 void
7224 TranslatorX64::translateIncDecL(const Tracelet& t,
7225                                 const NormalizedInstruction& i) {
7226   const vector<DynLocation*>& inputs = i.inputs;
7227   ASSERT(inputs.size() == 1);
7228   ASSERT(i.outLocal);
7229   ASSERT(inputs[0]->isLocal());
7230   const IncDecOp oplet = IncDecOp(i.imm[1].u_OA);
7231   ASSERT(oplet == PreInc || oplet == PostInc || oplet == PreDec ||
7232          oplet == PostDec);
7233   ASSERT(inputs[0]->isInt() && (!i.outStack || i.outStack->isInt()));
7234   bool post = (oplet == PostInc || oplet == PostDec);
7235   bool pre  = !post;
7236   bool inc  = (oplet == PostInc || oplet == PreInc);
7237
7238   m_regMap.allocOutputRegs(i);
7239   PhysReg localVal = getReg(inputs[0]->location);
7240   if (i.outStack && post) { // $a++, $a--
7241     PhysReg output   = getReg(i.outStack->location);
7242     emitMovRegReg(localVal, output);
7243   }
7244   if (inc) {
7245     a.  add_imm32_reg64(1, localVal);
7246   } else {
7247     a.  sub_imm32_reg64(1, localVal);
7248   }
7249   if (i.outStack && pre) { // --$a, ++$a
7250     PhysReg output   = getReg(i.outStack->location);
7251     emitMovRegReg(localVal, output);
7252   }
7253 }
7254
7255 void
7256 TranslatorX64::translateUnsetL(const Tracelet& t,
7257                                const NormalizedInstruction& i) {
7258   ASSERT(i.inputs.size() == 1);
7259   ASSERT(!i.outStack && i.outLocal);
7260   const int locIdx = 0;
7261   const DynLocation& localDl = *i.inputs[locIdx];
7262   ASSERT(localDl.isLocal());
7263
7264   // We have to mark the output register as dirty to ensure that
7265   // the type gets spilled at the tend of the tracelet
7266   m_regMap.allocOutputRegs(i);
7267
7268   DataType type = localDl.outerType();
7269   // decRef the value that currently lives in the local if appropriate.
7270   emitDecRef(i, getReg(localDl.location), type);
7271 }
7272
7273
7274 void
7275 TranslatorX64::analyzeReqLit(Tracelet& t, NormalizedInstruction& i,
7276                              InclOpFlags flags) {
7277   ASSERT(i.inputs.size() == 1);
7278   Eval::PhpFile* efile = g_vmContext->lookupIncludeRoot(
7279                                 (StringData*)i.inputs[0]->rtt.valueString(),
7280                                 flags, NULL);
7281   i.m_txFlags = supportedPlan(i.inputs[0]->isString() &&
7282                               i.inputs[0]->rtt.valueString() != NULL &&
7283                               efile &&
7284                               (RuntimeOption::RepoAuthoritative ||
7285                                RuntimeOption::ServerStatCache));
7286   if (efile && efile->unit()->getMainReturn()->m_type != KindOfUninit) {
7287     i.outStack->rtt = RuntimeType(efile->unit()->getMainReturn()->m_type);
7288   }
7289
7290   // We don't need the reference lookupIncludeRoot made for us.
7291   if (efile) efile->decRef();
7292   i.manuallyAllocInputs = true;
7293 }
7294
7295 void
7296 TranslatorX64::analyzeReqDoc(Tracelet& t, NormalizedInstruction& i) {
7297   analyzeReqLit(t, i, InclOpDocRoot);
7298 }
7299
7300 void
7301 TranslatorX64::analyzeReqMod(Tracelet& t, NormalizedInstruction& i) {
7302   analyzeReqLit(t, i, InclOpDocRoot | InclOpLocal);
7303 }
7304
7305 void
7306 TranslatorX64::analyzeReqSrc(Tracelet& t, NormalizedInstruction& i) {
7307   analyzeReqLit(t, i, InclOpRelative | InclOpLocal);
7308 }
7309
7310 void
7311 TranslatorX64::translateReqLit(const Tracelet& t,
7312                                const NormalizedInstruction& i,
7313                                InclOpFlags flags) {
7314   bool local = flags & InclOpLocal;
7315   StringData *s = const_cast<StringData*>(i.inputs[0]->rtt.valueString());
7316   HPHP::Eval::PhpFile* efile =
7317     g_vmContext->lookupIncludeRoot(s, flags, NULL);
7318   /*
7319    * lookupIncludeRoot increments the refcount for us. This reference is
7320    * going to be burned into the translation cache. We will remove it only
7321    * when the file changes (via invalidateFile), and we're sure that no
7322    * outstanding requests are using the old code (via the Treadmill
7323    * module).
7324    */
7325   TRACE(1, "lookupIncludeRoot: %s -> %p c %d\n", s->data(), efile,
7326         efile->getRef());
7327   /*
7328    * Remember that this tracelet (not just this instruction) now depends on the
7329    * contents of the required file.
7330    */
7331   m_srcDB.recordDependency(efile, t.m_sk);
7332   Unit *unit = efile->unit();
7333   Func *func = unit->getMain();
7334
7335   const Offset after = nextSrcKey(t, i).offset();
7336   TRACE(1, "requireHelper: efile %p offset %d%s\n", efile, after,
7337            i.skipSync ? " [skipsync]" : "");
7338
7339   if (i.skipSync) {
7340     /*
7341      * getting here means there was nothing to do between
7342      * the previous req and this one. Any spill code we generate
7343      * here would be broken (because the rbx is wrong), so
7344      * verify that we don't generate anything...
7345      */
7346     TCA s DEBUG_ONLY = a.code.frontier;
7347     syncOutputs(0);
7348     ASSERT(s == a.code.frontier);
7349   } else {
7350     syncOutputs(i);
7351   }
7352   ReqLitStaticArgs* args = m_globalData.alloc<ReqLitStaticArgs>();
7353   emitImmReg(a, (uint64_t)args, argNumToRegName[0]);
7354   emitCall(a, (TCA)reqLitHelper, true);
7355
7356   args->m_efile = efile;
7357   args->m_pseudoMain = emitServiceReq(false, REQ_BIND_REQUIRE, 3,
7358                                       uint64_t(args),
7359                                       uint64_t(func), uint64_t(func->base()));
7360   args->m_pcOff = after;
7361   args->m_local = local;
7362
7363   if (i.breaksBB) {
7364     SrcKey fallThru(curFunc(), after);
7365     emitBindJmp(fallThru);
7366   } else {
7367     /*
7368      * When we get here, rVmSp points to the actual top of stack,
7369      * but the rest of this tracelet assumes that rVmSp is set to
7370      * the top of the stack at the beginning of the tracelet, so we
7371      * have to fix it up here.
7372      *
7373      */
7374     if (!i.outStack) {
7375       /* as a special case, if we're followed by a pop, and
7376          we return a non-refcounted type, and then followed
7377          by another require, we can avoid the add here and the sub
7378          in the following require
7379       */
7380     } else {
7381       int delta = i.stackOff + getStackDelta(i);
7382       if (delta != 0) {
7383         // i.stackOff is in negative Cells, not bytes.
7384         a.    add_imm64_reg64(cellsToBytes(delta), rVmSp);
7385       }
7386     }
7387   }
7388 }
7389
7390 void
7391 TranslatorX64::translateReqDoc(const Tracelet& t,
7392                                const NormalizedInstruction& i) {
7393   translateReqLit(t, i, InclOpDocRoot);
7394 }
7395
7396 void
7397 TranslatorX64::translateReqMod(const Tracelet& t,
7398                                const NormalizedInstruction& i) {
7399   translateReqLit(t, i, InclOpDocRoot | InclOpLocal);
7400 }
7401
7402 void
7403 TranslatorX64::translateReqSrc(const Tracelet& t,
7404                                const NormalizedInstruction& i) {
7405   translateReqLit(t, i, InclOpRelative | InclOpLocal);
7406 }
7407
7408 TCA
7409 TranslatorX64::emitNativeTrampoline(TCA helperAddr) {
7410   if (!atrampolines.code.canEmit(m_trampolineSize)) {
7411     // not enough space to emit a trampoline, so just return the
7412     // helper address and emitCall will the emit the right sequence
7413     // to call it indirectly
7414     TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
7415     ASSERT(false);
7416     return helperAddr;
7417   }
7418   uint32_t index = m_numNativeTrampolines++;
7419   TCA trampAddr = atrampolines.code.frontier;
7420   if (Stats::enabled()) {
7421     Stats::emitInc(atrampolines, &Stats::tl_helper_counters[0], index);
7422     Stats::helperNames[index] = Util::getNativeFunctionName(helperAddr);
7423   }
7424   atrampolines.mov_imm64_reg((int64_t)helperAddr, reg::rax);
7425   atrampolines.jmp_reg(reg::rax);
7426   atrampolines.ud2();
7427   trampolineMap[helperAddr] = trampAddr;
7428   if (m_trampolineSize == 0) {
7429     m_trampolineSize = atrampolines.code.frontier - trampAddr;
7430     ASSERT(m_trampolineSize >= kMinPerTrampolineSize);
7431   }
7432   recordBCInstr(OpNativeTrampoline, atrampolines, trampAddr);
7433   return trampAddr;
7434 }
7435
7436 TCA
7437 TranslatorX64::getNativeTrampoline(TCA helperAddr) {
7438   if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
7439     return helperAddr;
7440   }
7441   TCA trampAddr = (TCA)mapGet<PointerMap>(trampolineMap, helperAddr);
7442   if (trampAddr) {
7443     return trampAddr;
7444   }
7445   return emitNativeTrampoline(helperAddr);
7446 }
7447 void TranslatorX64::analyzeDefCls(Tracelet& t,
7448                                   NormalizedInstruction& i) {
7449   i.m_txFlags = Supported;
7450 }
7451
7452 static void defClsHelper(PreClass *preClass) {
7453   ASSERT(tl_regState == REGSTATE_DIRTY);
7454   tl_regState = REGSTATE_CLEAN;
7455   Unit::defClass(preClass);
7456
7457   /*
7458    * m_defClsHelper sync'd the registers for us already.  This means
7459    * if an exception propagates we want to leave things as
7460    * REGSTATE_CLEAN, since we're still in sync.  Only set it to dirty
7461    * if we are actually returning to run in the TC again.
7462    */
7463   tl_regState = REGSTATE_DIRTY;
7464 }
7465
7466 void TranslatorX64::translateDefCls(const Tracelet& t,
7467                                     const NormalizedInstruction& i) {
7468   int cid = i.imm[0].u_IVA;
7469   const Opcode* after = curUnit()->at(i.source.offset());
7470   PreClass* c = curFunc()->unit()->lookupPreClassId(cid);
7471
7472   ASSERT(m_defClsHelper);
7473
7474   /*
7475      compute the corrected stack ptr as a pseudo-param to m_defClsHelper
7476      which it will store in g_vmContext, in case of fatals, or __autoload
7477   */
7478   m_regMap.cleanReg(rax);
7479   m_regMap.smashReg(rax);
7480   ScratchReg offset(m_regMap, rax);
7481   a.   lea_reg64_disp_reg64(rVmSp, -cellsToBytes(i.stackOff), rax);
7482
7483   EMIT_CALL(a, m_defClsHelper, IMM((uint64)c), IMM((uint64)after));
7484 }
7485
7486 void TranslatorX64::analyzeDefFunc(Tracelet& t,
7487                                    NormalizedInstruction& i) {
7488   i.m_txFlags = Supported;
7489 }
7490
7491 void defFuncHelper(Func *f) {
7492   f->setCached();
7493 }
7494
7495 void TranslatorX64::translateDefFunc(const Tracelet& t,
7496                                      const NormalizedInstruction& i) {
7497   int fid = i.imm[0].u_IVA;
7498   Func* f = curFunc()->unit()->lookupFuncId(fid);
7499
7500   EMIT_CALL(a, defFuncHelper, IMM((uint64)f));
7501   recordReentrantCall(i);
7502 }
7503
7504 void
7505 TranslatorX64::analyzeFPushFunc(Tracelet& t, NormalizedInstruction& i) {
7506   ASSERT(i.inputs.size() >= 1);
7507   // The input might be an object implementing __invoke()
7508   i.m_txFlags = simplePlan(i.inputs[0]->isString());
7509 }
7510
7511 void
7512 TranslatorX64::translateFPushFunc(const Tracelet& t,
7513                                   const NormalizedInstruction& i) {
7514   using namespace TargetCache;
7515   CacheHandle ch = FuncCache::alloc();
7516   ASSERT(i.inputs.size() == 1);
7517   Location& inLoc = i.inputs[0]->location;
7518
7519   m_regMap.allocOutputRegs(i);
7520   m_regMap.scrubStackRange(i.stackOff - 1,
7521                            i.stackOff - 1 + kNumActRecCells);
7522   // Popped one cell, pushed an actrec
7523   int startOfActRec = int(sizeof(Cell)) - int(sizeof(ActRec));
7524   size_t funcOff = AROFF(m_func) + startOfActRec;
7525   size_t thisOff = AROFF(m_this) + startOfActRec;
7526   emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
7527   emitPushAR(i, NULL, sizeof(Cell) /* bytesPopped */);
7528   if (false) { // typecheck
7529     StringData sd("foo");
7530     const UNUSED Func* f = FuncCache::lookup(ch, &sd);
7531   }
7532   SKTRACE(1, i.source, "ch %d\n", ch);
7533   EMIT_CALL(a, FuncCache::lookup, IMM(ch), V(inLoc));
7534   recordCall(i);
7535   emitVStackStore(a, i, rax, funcOff, sz::qword);
7536 }
7537
7538 void
7539 TranslatorX64::analyzeFPushClsMethodD(Tracelet& t, NormalizedInstruction& i) {
7540   i.m_txFlags = supportedPlan(isContextFixed());
7541 }
7542
7543 void
7544 TranslatorX64::translateFPushClsMethodD(const Tracelet& t,
7545                                         const NormalizedInstruction& i) {
7546   using namespace TargetCache;
7547   const StringData* meth = curUnit()->lookupLitstrId(i.imm[1].u_SA);
7548   const NamedEntityPair& np = curUnit()->lookupNamedEntityPairId(i.imm[2].u_SA);
7549   const StringData* cls = np.first;
7550   ASSERT(meth && meth->isStatic() &&
7551          cls && cls->isStatic());
7552   ASSERT(i.inputs.size() == 0);
7553
7554   const Class* baseClass = Unit::lookupClass(np.second);
7555   bool magicCall = false;
7556   const Func* func = lookupImmutableMethod(baseClass, meth, magicCall,
7557                                            true /* staticLookup */);
7558
7559   m_regMap.scrubStackRange(i.stackOff,
7560                            i.stackOff + kNumActRecCells);
7561
7562   int startOfActRec = -int(sizeof(ActRec));
7563   SKTRACE(2, i.source, "FPushClsMethodD %s :: %s\n",
7564           cls->data(), meth->data());
7565
7566   size_t clsOff  = AROFF(m_cls) + startOfActRec;
7567   if (func) {
7568     emitKnownClassCheck(i, cls, reg::noreg);
7569     Stats::emitInc(a, Stats::TgtCache_StaticMethodBypass);
7570     emitPushAR(i, func, 0 /*bytesPopped*/,
7571                false /* isCtor */, false /* clearThis */,
7572                magicCall ? uintptr_t(meth) | 1 : 0 /* varEnvInvName */);
7573
7574     setupActRecClsForStaticCall(i, func, baseClass, clsOff, false);
7575   } else {
7576     Stats::emitInc(a, Stats::TgtCache_StaticMethodHit);
7577     CacheHandle ch = StaticMethodCache::alloc(cls, meth, getContextName());
7578     ScratchReg rFunc(m_regMap);
7579     a.    load_reg64_disp_reg64(rVmTl, ch, *rFunc);
7580     a.    test_reg64_reg64(*rFunc, *rFunc);
7581     // Unconditionally set rCls; if we miss, the miss path will
7582     // clean it up for us. Careful! Flags are live. The fill path
7583     // has already |'ed in the necessary 1.
7584     ScratchReg rCls(m_regMap);
7585     a.    load_reg64_disp_reg64(rVmTl,
7586                                 ch + offsetof(StaticMethodCache, m_cls),
7587                                 *rCls);
7588     emitVStackStore(a, i, *rCls, clsOff);
7589     TCA stubsSkipRet;
7590     {
7591       UnlikelyIfBlock<CC_Z> miss(a, astubs);
7592       if (false) { // typecheck
7593         const UNUSED Func* f = StaticMethodCache::lookup(ch, np.second,
7594                                                          cls, meth);
7595       }
7596       EMIT_CALL(astubs,
7597                  StaticMethodCache::lookup,
7598                  IMM(ch),
7599                  IMM(int64(np.second)),
7600                  IMM(int64(cls)),
7601                  IMM(int64(meth)));
7602       recordReentrantStubCall(i);
7603       emitMovRegReg(astubs, rax, *rFunc);
7604       // NULL return means our work is done; see also
7605       // translateFPushClsMethodF.
7606       miss.reconcileEarly();
7607       astubs.test_reg64_reg64(*rFunc, *rFunc);
7608       stubsSkipRet = astubs.code.frontier;
7609       astubs.jz(a.code.frontier); // 1f to be patched later
7610     }
7611
7612     {
7613       FreezeRegs ice(m_regMap);
7614       emitPushAR(i, NULL);
7615       size_t funcOff = AROFF(m_func) + startOfActRec;
7616       emitVStackStore(a, i, *rFunc, funcOff, sz::qword);
7617     }
7618     // 1:
7619     astubs.patchJcc(stubsSkipRet, a.code.frontier);
7620   }
7621 }
7622
7623 void
7624 TranslatorX64::analyzeFPushClsMethodF(Tracelet& t,
7625                                       NormalizedInstruction& i) {
7626   ASSERT(i.inputs[0]->valueType() == KindOfClass);
7627   i.m_txFlags = supportedPlan(
7628     i.inputs[1]->rtt.valueString() != NULL && // We know the method name
7629     i.inputs[0]->valueType() == KindOfClass &&
7630     i.inputs[0]->rtt.valueClass() != NULL && // We know the class name
7631     isContextFixed()
7632   );
7633 }
7634
7635 void
7636 TranslatorX64::translateFPushClsMethodF(const Tracelet& t,
7637                                         const NormalizedInstruction& i) {
7638   using namespace TargetCache;
7639   ASSERT(isContextFixed());
7640   ASSERT(!curFunc()->isPseudoMain());
7641   ASSERT(curFunc()->cls() != NULL); // self:: and parent:: should only
7642                                     // appear in methods
7643   DynLocation* clsLoc = i.inputs[0];
7644   DynLocation* nameLoc = i.inputs[1];
7645   const StringData* name = nameLoc->rtt.valueString();
7646   ASSERT(name && name->isStatic());
7647
7648   // Even though we know the Class* at compile time, it's not
7649   // guaranteed to be the same between requests. The name, however, is
7650   // fixed, so we can use that.
7651   const Class* cls = clsLoc->rtt.valueClass();
7652   ASSERT(cls);
7653   bool magicCall = false;
7654   const Func* func = lookupImmutableMethod(cls, name, magicCall,
7655                                            true /* staticLookup */);
7656
7657   const int bytesPopped = 2 * sizeof(Cell); // [A C] popped
7658   const int startOfActRec = -int(sizeof(ActRec)) + bytesPopped;
7659   const Offset clsOff = startOfActRec + AROFF(m_cls);
7660
7661   UNUSED ActRec* fp = curFrame();
7662   ASSERT(!fp->hasThis() || fp->getThis()->instanceof(cls));
7663   if (func) {
7664     Stats::emitInc(a, Stats::TgtCache_StaticMethodFBypass);
7665     emitPushAR(i, func, bytesPopped,
7666                false /* isCtor */, false /* clearThis */,
7667                magicCall ? uintptr_t(name) | 1 : 0 /* varEnvInvName */);
7668
7669     setupActRecClsForStaticCall(i, func, cls, clsOff, true);
7670     m_regMap.scrubStackRange(i.stackOff - 2,
7671                              i.stackOff - 2 + kNumActRecCells);
7672   } else {
7673     const StringData* clsName = cls->name();
7674     CacheHandle ch = StaticMethodFCache::alloc(clsName, name, getContextName());
7675
7676     Stats::emitInc(a, Stats::TgtCache_StaticMethodFHit);
7677     TCA stubsSkipRet;
7678     ScratchReg rFunc(m_regMap);
7679     a.    load_reg64_disp_reg64(rVmTl, ch, *rFunc);
7680     a.    test_reg64_reg64(*rFunc, *rFunc);
7681     {
7682       UnlikelyIfBlock<CC_Z> miss(a, astubs);
7683       if (false) { // typecheck
7684         const UNUSED Func* f = StaticMethodFCache::lookup(ch, cls, name);
7685       }
7686       EMIT_CALL(astubs,
7687                  StaticMethodFCache::lookup,
7688                  IMM(ch),
7689                  V(clsLoc->location),
7690                  V(nameLoc->location));
7691       recordReentrantStubCall(i);
7692       emitMovRegReg(astubs, rax, *rFunc);
7693       // if rax == NULL, the helper interpreted the entire
7694       // instruction for us. Skip over the rest of the emitted code in
7695       // a, but we don't want to skip the branch spill/fill code.
7696       miss.reconcileEarly();
7697       astubs.test_reg64_reg64(*rFunc, *rFunc);
7698       stubsSkipRet = astubs.code.frontier;
7699       astubs.jz(a.code.frontier); // to be patched later
7700     }
7701
7702     const Offset funcOff = startOfActRec + AROFF(m_func);
7703     m_regMap.scrubStackRange(i.stackOff - 2,
7704                              i.stackOff - 2 + kNumActRecCells);
7705     {
7706       FreezeRegs ice(m_regMap);
7707       emitPushAR(i, NULL, bytesPopped);
7708       emitVStackStore(a, i, *rFunc, funcOff);
7709
7710       // We know we're in a method so we don't have to worry about
7711       // rVmFp->m_cls being NULL. We just have to figure out if it's a
7712       // Class* or $this, and whether or not we should pass along $this or
7713       // its class.
7714       PhysReg rCls = *rFunc; // no need to allocate another scratch
7715       a.    load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
7716       a.    test_imm32_reg64(1, rCls);
7717       {
7718         JccBlock<CC_NZ> ifThis(a);
7719         // rCls is holding $this. Should we pass it to the callee?
7720         a.  cmp_imm32_disp_reg32(1, ch + offsetof(StaticMethodFCache, m_static),
7721                                  rVmTl);
7722         {
7723           IfElseBlock<CC_NE> ifStatic(a);
7724           // We're calling a static method. Load (this->m_cls | 0x1) into rCls.
7725           a.load_reg64_disp_reg64(rCls, ObjectData::getVMClassOffset(), rCls);
7726           a.or_imm32_reg64(1, rCls);
7727
7728           ifStatic.Else();
7729           // We're calling an instance method. incRef $this.
7730           emitIncRef(rCls, KindOfObject);
7731         }
7732       }
7733       emitVStackStore(a, i, rCls, clsOff);
7734     }
7735
7736     astubs.patchJcc(stubsSkipRet, a.code.frontier);
7737     // No need to decref our inputs: one was KindOfClass and the other's
7738     // a static string.
7739   }
7740 }
7741
7742 void
7743 TranslatorX64::analyzeFPushObjMethodD(Tracelet& t,
7744                                       NormalizedInstruction &i) {
7745   DynLocation* objLoc = i.inputs[0];
7746   i.m_txFlags = supportedPlan(objLoc->valueType() == KindOfObject &&
7747                               isContextFixed());
7748 }
7749
7750 void
7751 TranslatorX64::translateFPushObjMethodD(const Tracelet &t,
7752                                         const NormalizedInstruction& i) {
7753   ASSERT(i.inputs.size() == 1);
7754   Location& objLoc = i.inputs[0]->location;
7755   ASSERT(i.inputs[0]->valueType() == KindOfObject);
7756   int id = i.imm[1].u_IVA;
7757   const StringData* name = curUnit()->lookupLitstrId(id);
7758
7759   const Class* baseClass = i.inputs[0]->rtt.valueClass();
7760   bool magicCall = false;
7761   const Func* func = lookupImmutableMethod(baseClass, name, magicCall,
7762                                            false /* staticLookup */);
7763   m_regMap.scrubStackRange(i.stackOff - 1,
7764                            i.stackOff - 1 + kNumActRecCells);
7765   // Popped one cell, pushed an actrec
7766   int startOfActRec = int(sizeof(Cell)) - int(sizeof(ActRec));
7767   size_t thisOff = AROFF(m_this) + startOfActRec;
7768   size_t funcOff = AROFF(m_func) + startOfActRec;
7769   emitPushAR(i, func, sizeof(Cell) /*bytesPopped*/,
7770              false /* isCtor */, false /* clearThis */,
7771              func && magicCall ? uintptr_t(name) | 1 : 0 /* varEnvInvName */);
7772
7773   if (!func) {
7774     if (baseClass && !(baseClass->attrs() & AttrInterface)) {
7775       MethodLookup::LookupResult res =
7776         g_vmContext->lookupObjMethod(func, baseClass, name, false);
7777       if ((res == MethodLookup::MethodFoundWithThis ||
7778            res == MethodLookup::MethodFoundNoThis) &&
7779           !func->isAbstract()) {
7780         /*
7781          * if we found the func in baseClass, then either:
7782          *  - its private, and this is always going to be the
7783          *    called function, or
7784          *  - any derived class must have a func that matches in
7785          *    staticness, and is at least as accessible (and in
7786          *    particular, you can't override a public/protected
7787          *    method with a private method)
7788          */
7789         if (func->attrs() & AttrPrivate) {
7790           emitVStackStoreImm(a, i, uintptr_t(func), funcOff, sz::qword);
7791         } else {
7792           Offset methodsOff = Class::getMethodsOffset();
7793           Offset vecOff = methodsOff + Class::MethodMap::vecOff();
7794           ScratchReg scratch(m_regMap);
7795           // get the object's class into *scratch
7796           a.   load_reg64_disp_reg64(getReg(objLoc),
7797                                      ObjectData::getVMClassOffset(),
7798                                      *scratch);
7799           if (res == MethodLookup::MethodFoundNoThis) {
7800             emitDecRef(a, i, getReg(objLoc), KindOfObject);
7801             a.   lea_reg64_disp_reg64(*scratch, 1, getReg(objLoc));
7802           }
7803           emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
7804
7805           // get the method vector into *scratch
7806           a.   load_reg64_disp_reg64(*scratch, vecOff, *scratch);
7807           // get the func
7808           a.   load_reg64_disp_reg64(*scratch,
7809                                      func->methodSlot() * sizeof(Func*),
7810                                      *scratch);
7811           emitVStackStore(a, i, *scratch, funcOff, sz::qword);
7812           Stats::emitInc(a, Stats::TgtCache_MethodFast);
7813           return;
7814         }
7815       } else {
7816         func = NULL;
7817       }
7818     }
7819   }
7820
7821   if (func) {
7822     if (func->attrs() & AttrStatic) {
7823       if (func->attrs() & AttrPrivate) {
7824         emitVStackStoreImm(a, i, uintptr_t(curFunc()->cls()) | 1,
7825                            thisOff, sz::qword);
7826       } else {
7827         ScratchReg scratch(m_regMap);
7828         a.   load_reg64_disp_reg64(getReg(objLoc),
7829                                    ObjectData::getVMClassOffset(),
7830                                    *scratch);
7831         a.   or_imm32_reg64(1, *scratch);
7832         emitVStackStore(a, i, *scratch, thisOff, sz::qword);
7833       }
7834       emitDecRef(a, i, getReg(objLoc), KindOfObject);
7835     } else {
7836       emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
7837     }
7838     Stats::emitInc(a, Stats::TgtCache_MethodBypass);
7839   } else {
7840     emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
7841     using namespace TargetCache;
7842     CacheHandle ch = MethodCache::alloc();
7843     if (false) { // typecheck
7844       ActRec* ar = NULL;
7845       MethodCache::lookup(ch, ar, name);
7846     }
7847     int arOff = vstackOffset(i, startOfActRec);
7848     SKTRACE(1, i.source, "ch %d\n", ch);
7849     EMIT_CALL(a, MethodCache::lookup, IMM(ch),
7850                RPLUS(rVmSp, arOff), IMM(uint64_t(name)));
7851     recordReentrantCall(i);
7852   }
7853 }
7854
7855 static inline ALWAYS_INLINE Class* getKnownClass(Class** classCache,
7856                                                  const StringData* clsName) {
7857   Class* cls = *classCache;
7858   if (UNLIKELY(cls == NULL)) {
7859     // lookupKnownClass does its own VMRegAnchor'ing.
7860     cls = TargetCache::lookupKnownClass<false>(classCache, clsName, true);
7861     ASSERT(*classCache && *classCache == cls);
7862   }
7863   ASSERT(cls);
7864   return cls;
7865 }
7866
7867 static Instance*
7868 HOT_FUNC_VM
7869 newInstanceHelperNoCtor(Class** classCache, const StringData* clsName) {
7870   Class* cls = getKnownClass(classCache, clsName);
7871   Instance* ret = newInstance(cls);
7872   ret->incRefCount();
7873   return ret;
7874 }
7875
7876 Instance*
7877 HOT_FUNC_VM
7878 newInstanceHelper(Class* cls, int numArgs, ActRec* ar, ActRec* prevAr) {
7879   const Func* f = cls->getCtor();
7880   Instance* ret = NULL;
7881   if (UNLIKELY(!(f->attrs() & AttrPublic))) {
7882     VMRegAnchor _;
7883     UNUSED MethodLookup::LookupResult res =
7884       g_vmContext->lookupCtorMethod(f, cls, true /*raise*/);
7885     ASSERT(res == MethodLookup::MethodFoundWithThis);
7886   }
7887   // Don't start pushing the AR until newInstance returns; it may reenter.
7888   ret = newInstance(cls);
7889   f->validate();
7890   ar->m_func = f;
7891   ar->initNumArgs(numArgs, true /*fromCtor*/);
7892   // Count stack and this.
7893   ret->incRefCount();
7894   ret->incRefCount();
7895   ar->setThis(ret);
7896   ar->setVarEnv(NULL);
7897   arSetSfp(ar, prevAr);
7898   TRACE(2, "newInstanceHelper: AR %p: f %p, savedRbp %#lx, savedRip %#lx"
7899         " this %p\n",
7900         ar, ar->m_func, ar->m_savedRbp, ar->m_savedRip, ar->m_this);
7901   return ret;
7902 }
7903
7904 void TranslatorX64::translateFPushCtor(const Tracelet& t,
7905                                        const NormalizedInstruction& i) {
7906   int numArgs = i.imm[0].u_IVA;
7907   int arOff = vstackOffset(i, -int(sizeof(ActRec)));
7908   m_regMap.scrubStackRange(i.stackOff, i.stackOff + kNumActRecCells);
7909   EMIT_CALL(a, newInstanceHelper,
7910             V(i.inputs[0]->location),
7911             IMM(numArgs),
7912             RPLUS(rVmSp, arOff),
7913             R(rVmFp));
7914   recordReentrantCall(i);
7915
7916   m_regMap.bind(rax, i.outStack->location, KindOfObject, RegInfo::DIRTY);
7917 }
7918
7919 Instance*
7920 HOT_FUNC_VM
7921 newInstanceHelperCached(Class** classCache,
7922                         const StringData* clsName, int numArgs,
7923                         ActRec* ar, ActRec* prevAr) {
7924   Class* cls = getKnownClass(classCache, clsName);
7925   return newInstanceHelper(cls, numArgs, ar, prevAr);
7926 }
7927
7928 void TranslatorX64::translateFPushCtorD(const Tracelet& t,
7929                                         const NormalizedInstruction& i) {
7930   using namespace TargetCache;
7931   int numArgs = i.imm[0].u_IVA;
7932   const StringData* clsName = curUnit()->lookupLitstrId(i.imm[1].u_SA);
7933   CacheHandle classCh = allocKnownClass(clsName);
7934   ScratchReg scr(m_regMap);
7935   a.   lea_reg64_disp_reg64(rVmTl, classCh, *scr);
7936   // We first push the new object, then the actrec. Since we're going to
7937   // need to call out, and possibly reenter in the course of all this,
7938   // null out the object on the stack, in case we unwind before we're
7939   // ready.
7940   int arOff = vstackOffset(i, -int(sizeof(ActRec)) - cellsToBytes(1));
7941   m_regMap.scrubStackRange(i.stackOff, i.stackOff + kNumActRecCells + 1);
7942   if (i.noCtor) {
7943     EMIT_CALL(a, newInstanceHelperNoCtor,
7944               R(*scr),
7945               IMM(uintptr_t(clsName)));
7946   } else {
7947     EMIT_CALL(a, newInstanceHelperCached,
7948               R(*scr),
7949               IMM(uintptr_t(clsName)),
7950               IMM(numArgs),
7951               RPLUS(rVmSp, arOff),     // ActRec
7952               R(rVmFp));               // prevAR
7953   }
7954   recordReentrantCall(i);
7955   // The callee takes care of initializing the actRec, and returns the new
7956   // object.
7957   m_regMap.bind(rax, i.outStack->location, KindOfObject, RegInfo::DIRTY);
7958 }
7959
7960 static void fatalNullThis() { raise_error(Strings::FATAL_NULL_THIS); }
7961
7962 void
7963 TranslatorX64::translateThis(const Tracelet &t,
7964                              const NormalizedInstruction &i) {
7965   ASSERT(i.outStack && !i.outLocal);
7966   ASSERT(curFunc()->isPseudoMain() || curFunc()->cls());
7967   m_regMap.allocOutputRegs(i);
7968   PhysReg out = getReg(i.outStack->location);
7969   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_this), out);
7970
7971   if (!i.guardedThis) {
7972     if (curFunc()->cls() == NULL) {  // Non-class
7973       a.   test_reg64_reg64(out, out);
7974       a.   jz(astubs.code.frontier); // jz if_null
7975     }
7976
7977     a.   test_imm32_reg64(1, out);
7978     {
7979       UnlikelyIfBlock<CC_NZ> ifThisNull(a, astubs);
7980       // if_null:
7981       EMIT_CALL(astubs, fatalNullThis);
7982       recordReentrantStubCall(i);
7983     }
7984   }
7985   emitIncRef(out, KindOfObject);
7986 }
7987
7988 void
7989 TranslatorX64::translateInitThisLoc(const Tracelet& t,
7990                                     const NormalizedInstruction& i) {
7991   ASSERT(i.outLocal && !i.outStack);
7992   ASSERT(curFunc()->isPseudoMain() || curFunc()->cls());
7993
7994   PhysReg base;
7995   int offset;
7996   locToRegDisp(i.outLocal->location, &base, &offset);
7997   ASSERT(base == rVmFp);
7998
7999   ScratchReg thiz(m_regMap);
8000   a.load_reg64_disp_reg64(rVmFp, AROFF(m_this), *thiz);
8001   if (curFunc()->cls() == NULL) {
8002     // If we're in a pseudomain, m_this could be NULL
8003     a.test_reg64_reg64(*thiz, *thiz);
8004     a.jz(astubs.code.frontier); // jz if_null
8005   }
8006   // Ok, it's not NULL but it might be a Class which should be treated
8007   // equivalently
8008   a.test_imm32_reg64(1, *thiz);
8009   a.jnz(astubs.code.frontier); // jnz if_null
8010
8011   // We have a valid $this!
8012   a.store_imm32_disp_reg(KindOfObject, offset + TVOFF(m_type), base);
8013   a.store_reg64_disp_reg64(*thiz, offset + TVOFF(m_data), base);
8014   emitIncRef(*thiz, KindOfObject);
8015
8016   // if_null:
8017   emitStoreUninitNull(astubs, offset, base);
8018   astubs.jmp(a.code.frontier);
8019
8020   m_regMap.invalidate(i.outLocal->location);
8021 }
8022
8023 void
8024 TranslatorX64::analyzeFPushFuncD(Tracelet& t, NormalizedInstruction& i) {
8025   Id funcId = i.imm[1].u_SA;
8026   const NamedEntityPair nep = curUnit()->lookupNamedEntityPairId(funcId);
8027   const Func* func = Unit::lookupFunc(nep.second, nep.first);
8028   i.m_txFlags = supportedPlan(func != NULL);
8029 }
8030
8031 void
8032 TranslatorX64::translateFPushFuncD(const Tracelet& t,
8033                                    const NormalizedInstruction& i) {
8034   ASSERT(i.inputs.size() == 0);
8035   ASSERT(!i.outStack && !i.outLocal);
8036   Id funcId = i.imm[1].u_SA;
8037   const NamedEntityPair& nep = curUnit()->lookupNamedEntityPairId(funcId);
8038   const StringData* name = nep.first;
8039   const Func* func       = Unit::lookupFunc(nep.second, name);
8040
8041   // Translation is only supported if function lookup succeeds
8042   func->validate();
8043   if (Trace::enabled && !func) {
8044     TRACE(1, "Attempt to invoke undefined function %s\n", name->data());
8045   }
8046
8047   // Inform the register allocator that we just annihilated a range of
8048   // possibly-dirty stack entries.
8049   m_regMap.scrubStackRange(i.stackOff,
8050                            i.stackOff + kNumActRecCells);
8051
8052   size_t thisOff = AROFF(m_this) - sizeof(ActRec);
8053   bool funcCanChange = !func->isNameBindingImmutable(curUnit());
8054   emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
8055   emitPushAR(i, funcCanChange ? NULL : func, 0, false, false);
8056   if (funcCanChange) {
8057     // Look it up in a FuncCache.
8058     using namespace TargetCache;
8059     CacheHandle ch = FixedFuncCache::alloc(name);
8060     size_t funcOff = AROFF(m_func) - sizeof(ActRec);
8061     size_t funcCacheOff = ch + offsetof(FixedFuncCache, m_func);
8062
8063     SKTRACE(1, i.source, "ch %d\n", ch);
8064
8065     Stats::emitInc(a, Stats::TgtCache_FuncDHit);
8066     ScratchReg scratch(m_regMap);
8067     a.load_reg64_disp_reg64(rVmTl, funcCacheOff, *scratch);
8068     a.test_reg64_reg64(*scratch, *scratch);
8069     {
8070       UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8071
8072       if (false) { // typecheck
8073         StringData sd("foo");
8074         FixedFuncCache::lookupFailed(&sd);
8075       }
8076
8077       EMIT_CALL(astubs, TCA(FixedFuncCache::lookupFailed),
8078                         IMM(uintptr_t(name)));
8079       recordReentrantStubCall(i);
8080       emitMovRegReg(astubs, rax, *scratch);
8081     }
8082     emitVStackStore(a, i, *scratch, funcOff, sz::qword);
8083   }
8084 }
8085
8086 void
8087 TranslatorX64::translateFPushContFunc(const Tracelet& t,
8088                                       const NormalizedInstruction& i) {
8089   ASSERT(curFrame()->hasThis());
8090   Class* genClass = curFrame()->getThis()->getVMClass();
8091   ASSERT(genClass == SystemLib::s_MethodContinuationClass ||
8092          genClass == SystemLib::s_FunctionContinuationClass);
8093   bool isMethod = genClass == SystemLib::s_MethodContinuationClass;
8094   size_t thisOff = AROFF(m_this) - sizeof(ActRec);
8095   size_t funcOff = AROFF(m_func) - sizeof(ActRec);
8096   m_regMap.scrubStackRange(i.stackOff,
8097                            i.stackOff + kNumActRecCells);
8098   emitPushAR(i, NULL, 0, false, false);
8099   ScratchReg rCont(m_regMap);
8100   ScratchReg rScratch(m_regMap);
8101   a.  load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
8102
8103   // Store the func
8104   a.load_reg64_disp_reg64(*rCont, CONTOFF(m_vmFunc), *rScratch);
8105   emitVStackStore(a, i, *rScratch, funcOff, sz::qword);
8106
8107   if (isMethod) {
8108     // Store m_this
8109     a.  load_reg64_disp_reg64(*rCont, CONTOFF(m_obj), *rScratch);
8110     a.  test_reg64_reg64(*rScratch, *rScratch);
8111     {
8112       IfElseBlock<CC_Z> ifThis(a);
8113       emitVStackStore(a, i, *rScratch, thisOff, sz::qword);
8114       emitIncRef(*rScratch, KindOfObject);
8115
8116       ifThis.Else();
8117       a.load_reg64_disp_reg64(*rCont, CONTOFF(m_vmCalledClass), *rScratch);
8118       // m_vmCalledClass already has its low bit set
8119       emitVStackStore(a, i, *rScratch, thisOff, sz::qword);
8120     }
8121   } else {
8122     emitVStackStoreImm(a, i, 0, thisOff, sz::qword);
8123   }
8124 }
8125
8126 const Func*
8127 TranslatorX64::findCuf(const NormalizedInstruction& ni,
8128                        Class*& cls, StringData*& invName, bool& forward) {
8129   forward = (ni.op() == OpFPushCufF);
8130   cls = NULL;
8131   invName = NULL;
8132
8133   DynLocation* callable = ni.inputs[ni.op() == OpFPushCufSafe ? 1 : 0];
8134
8135   const StringData* str =
8136     callable->isString() ? callable->rtt.valueString() : NULL;
8137   const ArrayData* arr =
8138     callable->isArray() ? callable->rtt.valueArray() : NULL;
8139
8140   StringData* sclass = NULL;
8141   StringData* sname = NULL;
8142   if (str) {
8143     Func* f = HPHP::VM::Unit::lookupFunc(str);
8144     if (f) return f;
8145     String name(const_cast<StringData*>(str));
8146     int pos = name.find("::");
8147     if (pos <= 0 || pos + 2 >= name.size() ||
8148         name.find("::", pos + 2) != String::npos) {
8149       return NULL;
8150     }
8151     sclass = StringData::GetStaticString(name.substr(0, pos).get());
8152     sname = StringData::GetStaticString(name.substr(pos + 2).get());
8153   } else if (arr) {
8154     if (arr->size() != 2) return NULL;
8155     CVarRef e0 = arr->get(0LL, false);
8156     CVarRef e1 = arr->get(1LL, false);
8157     if (!e0.isString() || !e1.isString()) return NULL;
8158     sclass = e0.getStringData();
8159     sname = e1.getStringData();
8160     String name(sname);
8161     if (name.find("::") != String::npos) return NULL;
8162   } else {
8163     return NULL;
8164   }
8165
8166   if (!isContextFixed()) return NULL;
8167
8168   Class* ctx = curFunc()->cls();
8169
8170   if (sclass->isame(s_self.get())) {
8171     if (!ctx) return NULL;
8172     cls = ctx;
8173     forward = true;
8174   } else if (sclass->isame(s_parent.get())) {
8175     if (!ctx || !ctx->parent()) return NULL;
8176     cls = ctx->parent();
8177     forward = true;
8178   } else if (sclass->isame(s_static.get())) {
8179     return NULL;
8180   } else {
8181     cls = VM::Unit::lookupClass(sclass);
8182     if (!cls) return NULL;
8183   }
8184
8185   bool magicCall = false;
8186   const Func* f = lookupImmutableMethod(cls, sname, magicCall, true);
8187   if (!f || (forward && !ctx->classof(f->cls()))) {
8188     /*
8189      * To preserve the invariant that the lsb class
8190      * is an instance of the context class, we require
8191      * that f's class is an instance of the context class.
8192      * This is conservative, but without it, we would need
8193      * a runtime check to decide whether or not to forward
8194      * the lsb class
8195      */
8196     return NULL;
8197   }
8198   if (magicCall) invName = sname;
8199   return f;
8200 }
8201
8202 void
8203 TranslatorX64::analyzeFPushCufOp(Tracelet& t,
8204                                  NormalizedInstruction& ni) {
8205   Class* cls = NULL;
8206   StringData* invName = NULL;
8207   bool forward = false;
8208   const Func* func = findCuf(ni, cls, invName, forward);
8209   ni.m_txFlags = supportedPlan(func != NULL);
8210   ni.manuallyAllocInputs = true;
8211 }
8212
8213 void
8214 TranslatorX64::setupActRecClsForStaticCall(const NormalizedInstruction &i,
8215                                            const Func* func, const Class* cls,
8216                                            size_t clsOff, bool forward) {
8217   if (forward) {
8218     ScratchReg rClsScratch(m_regMap);
8219     PhysReg rCls = *rClsScratch;
8220     a.    load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
8221     if (!(curFunc()->attrs() & AttrStatic)) {
8222       ASSERT(curFunc()->cls() &&
8223              curFunc()->cls()->classof(cls));
8224       /* the context is non-static, so we have to deal
8225          with passing in $this or getClass($this) */
8226       a.    test_imm32_reg64(1, rCls);
8227       {
8228         JccBlock<CC_NZ> ifThis(a);
8229         // rCls is holding a real $this.
8230         if (func->attrs() & AttrStatic) {
8231           // but we're a static method, so pass getClass($this)|1
8232           a.load_reg64_disp_reg64(rCls, ObjectData::getVMClassOffset(), rCls);
8233           a.or_imm32_reg64(1, rCls);
8234         } else {
8235           // We should pass $this to the callee
8236           emitIncRef(rCls, KindOfObject);
8237         }
8238       }
8239     }
8240     emitVStackStore(a, i, rCls, clsOff);
8241   } else {
8242     if (!(func->attrs() & AttrStatic) &&
8243         !(curFunc()->attrs() & AttrStatic) &&
8244         curFunc()->cls() &&
8245         curFunc()->cls()->classof(cls)) {
8246       /* might be a non-static call */
8247       ScratchReg rClsScratch(m_regMap);
8248       PhysReg rCls = *rClsScratch;
8249       a.    load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
8250       a.    test_imm32_reg64(1, rCls);
8251       {
8252         IfElseBlock<CC_NZ> ifThis(a);
8253         // rCls is holding $this. We should pass it to the callee
8254         emitIncRef(rCls, KindOfObject);
8255         emitVStackStore(a, i, rCls, clsOff);
8256         ifThis.Else();
8257         emitVStackStoreImm(a, i, uintptr_t(cls)|1, clsOff);
8258       }
8259     } else {
8260       emitVStackStoreImm(a, i, uintptr_t(cls)|1, clsOff);
8261     }
8262   }
8263 }
8264
8265 template <bool warn>
8266 int64 checkClass(TargetCache::CacheHandle ch, StringData* clsName,
8267                  ActRec *ar) {
8268   VMRegAnchor _;
8269   AutoloadHandler::s_instance->invokeHandler(clsName->data());
8270   if (*(Class**)TargetCache::handleToPtr(ch)) return true;
8271   ar->m_func = SystemLib::GetNullFunction();
8272   if (ar->hasThis()) {
8273     // cannot hit zero, we just inc'ed it
8274     ar->getThis()->decRefCount();
8275   }
8276   ar->setThis(0);
8277   return false;
8278 }
8279
8280 static void warnMissingFunc(StringData* name) {
8281   throw_invalid_argument("function: method '%s' not found", name->data());
8282 }
8283
8284 void
8285 TranslatorX64::translateFPushCufOp(const Tracelet& t,
8286                                    const NormalizedInstruction& ni) {
8287   Class* cls = NULL;
8288   StringData* invName = NULL;
8289   bool forward = false;
8290   const Func* func = findCuf(ni, cls, invName, forward);
8291   ASSERT(func);
8292
8293   int numPopped = ni.op() == OpFPushCufSafe ? 0 : 1;
8294   m_regMap.scrubStackRange(ni.stackOff - numPopped,
8295                            ni.stackOff - numPopped + kNumActRecCells);
8296
8297   int startOfActRec = int(numPopped * sizeof(Cell)) - int(sizeof(ActRec));
8298
8299   emitPushAR(ni, cls ? func : NULL, numPopped * sizeof(Cell),
8300              false /* isCtor */, false /* clearThis */,
8301              invName ? uintptr_t(invName) | 1 : 0 /* varEnvInvName */);
8302
8303   bool safe = (ni.op() == OpFPushCufSafe);
8304   size_t clsOff  = AROFF(m_cls) + startOfActRec;
8305   size_t funcOff  = AROFF(m_func) + startOfActRec;
8306   LazyScratchReg flag(m_regMap);
8307   if (safe) {
8308     flag.alloc();
8309     emitImmReg(a, true, *flag);
8310   }
8311   if (cls) {
8312     setupActRecClsForStaticCall(ni, func, cls, clsOff, forward);
8313     TargetCache::CacheHandle ch = cls->m_cachedOffset;
8314     a.          cmp_imm32_disp_reg32(0, ch, rVmTl);
8315     {
8316       UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8317       if (false) {
8318         checkClass<false>(0, NULL, NULL);
8319         checkClass<true>(0, NULL, NULL);
8320       }
8321       EMIT_CALL(astubs, TCA(safe ? checkClass<false> : checkClass<true>),
8322                 IMM(ch), IMM(uintptr_t(cls->name())),
8323                 RPLUS(rVmSp, vstackOffset(ni, startOfActRec)));
8324       recordReentrantStubCall(ni, true);
8325       if (safe) {
8326         astubs.  mov_reg64_reg64(rax, *flag);
8327       }
8328     }
8329   } else {
8330     ScratchReg funcReg(m_regMap);
8331     TargetCache::CacheHandle ch = func->getCachedOffset();
8332     a.          load_reg64_disp_reg64(rVmTl, ch, *funcReg);
8333     emitVStackStore(a, ni, *funcReg, funcOff);
8334     emitVStackStoreImm(a, ni, 0, clsOff, sz::qword, &m_regMap);
8335     a.          test_reg64_reg64(*funcReg, *funcReg);
8336     {
8337       UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8338       emitVStackStoreImm(astubs, ni,
8339                          uintptr_t(SystemLib::GetNullFunction()), funcOff);
8340       if (safe) {
8341         emitImmReg(astubs, false, *flag);
8342       } else {
8343         EMIT_CALL(astubs, TCA(warnMissingFunc), IMM(uintptr_t(func->name())));
8344         recordReentrantStubCall(ni, true);
8345       }
8346     }
8347   }
8348
8349   if (safe) {
8350     DynLocation* outFlag = ni.outStack2;
8351     DynLocation* outDef = ni.outStack;
8352
8353     DynLocation* inDef = ni.inputs[0];
8354     if (!m_regMap.hasReg(inDef->location)) {
8355       m_regMap.scrubStackRange(ni.stackOff - 2, ni.stackOff - 2);
8356       PhysReg base1, base2;
8357       int disp1, disp2;
8358       locToRegDisp(inDef->location, &base1, &disp1);
8359       locToRegDisp(outDef->location, &base2, &disp2);
8360       ScratchReg tmp(m_regMap);
8361       a.   load_reg64_disp_reg64(base1, TVOFF(m_data) + disp1, *tmp);
8362       a.   store_reg64_disp_reg64(*tmp, TVOFF(m_data) + disp2, base2);
8363       if (!inDef->rtt.isVagueValue()) {
8364         a. store_imm32_disp_reg(inDef->outerType(),
8365                                 TVOFF(m_type) + disp2, base2);
8366       } else {
8367         a. load_reg64_disp_reg32(base1, TVOFF(m_type) + disp1, *tmp);
8368         a. store_reg32_disp_reg64(*tmp, TVOFF(m_type) + disp2, base2);
8369       }
8370     } else {
8371       PhysReg reg = m_regMap.getReg(inDef->location);
8372       m_regMap.scrubStackRange(ni.stackOff - 1, ni.stackOff - 1);
8373       m_regMap.bind(reg, outDef->location, inDef->rtt.outerType(),
8374                     RegInfo::DIRTY);
8375     }
8376     m_regMap.bindScratch(flag, outFlag->location, KindOfBoolean,
8377                          RegInfo::DIRTY);
8378   }
8379 }
8380
8381 void
8382 TranslatorX64::analyzeFPassCOp(Tracelet& t, NormalizedInstruction& i) {
8383   i.m_txFlags = nativePlan(!i.preppedByRef);
8384 }
8385
8386 void
8387 TranslatorX64::translateFPassCOp(const Tracelet& t,
8388                                  const NormalizedInstruction& i) {
8389   ASSERT(i.inputs.size() == 0);
8390   ASSERT(!i.outStack && !i.outLocal);
8391   ASSERT(!i.preppedByRef);
8392 }
8393
8394 void
8395 TranslatorX64::translateFPassR(const Tracelet& t,
8396                                const NormalizedInstruction& i) {
8397   /*
8398    * Like FPassC, FPassR is able to cheat on boxing if the current
8399    * parameter is pass by reference but we have a cell: the box would refer
8400    * to exactly one datum (the value currently on the stack).
8401    *
8402    * However, if the callee wants a cell and we have a variant we must
8403    * unbox; otherwise we might accidentally make callee changes to its
8404    * parameter globally visible.
8405    */
8406   ASSERT(!i.inputs[0]->rtt.isVagueValue());
8407
8408   ASSERT(i.inputs.size() == 1);
8409   const RuntimeType& inRtt = i.inputs[0]->rtt;
8410   if (inRtt.isVariant() && !i.preppedByRef) {
8411     emitUnboxTopOfStack(i);
8412   }
8413 }
8414
8415 void
8416 TranslatorX64::translateFCall(const Tracelet& t,
8417                               const NormalizedInstruction& i) {
8418   int numArgs = i.imm[0].u_IVA;
8419   const Opcode* atCall = i.pc();
8420   const Opcode* after = curUnit()->at(nextSrcKey(t, i).offset());
8421   const Func* srcFunc = curFunc();
8422
8423   // Sync all dirty registers and adjust rVmSp to point to the
8424   // top of stack at the beginning of the current instruction
8425   syncOutputs(i);
8426
8427   // We are "between" tracelets and don't use the register map
8428   // anymore.  (Note that the currently executing trace may actually
8429   // continue past the FCall, but it will have to resume with a fresh
8430   // register map.)
8431   RegSet scratchRegs = kScratchCrossTraceRegs;
8432   DumbScratchReg retIPReg(scratchRegs);
8433
8434   // Caller-specific fields: return addresses and the frame pointer
8435   // offset.
8436   ASSERT(sizeof(Cell) == 1 << 4);
8437   // Record the hardware return address. This will be patched up below; 2
8438   // is a magic number dependent on assembler implementation.
8439   MovImmPatcher retIP(a, (uint64_t)a.code.frontier, *retIPReg);
8440   a.    store_reg64_disp_reg64 (*retIPReg,
8441                                 cellsToBytes(numArgs) + AROFF(m_savedRip),
8442                                 rVmSp);
8443
8444   // The kooky offset here a) gets us to the current ActRec,
8445   // and b) accesses m_soff.
8446   int32 callOffsetInUnit = srcFunc->unit()->offsetOf(after - srcFunc->base());
8447   a.    store_imm32_disp_reg(callOffsetInUnit,
8448                              cellsToBytes(numArgs) + AROFF(m_soff),
8449                              rVmSp);
8450
8451   emitBindCall(t, i,
8452                curUnit()->offsetOf(atCall),
8453                curUnit()->offsetOf(after)); // ...
8454   retIP.patch(uint64(a.code.frontier));
8455
8456   if (i.breaksBB) {
8457     SrcKey fallThru(curFunc(), after);
8458     emitBindJmp(fallThru);
8459   } else {
8460     /*
8461      * Before returning, the callee restored rVmSp to point to the
8462      * current top of stack but the rest of this tracelet assumes that
8463      * rVmSp is set to the top of the stack at the beginning of the
8464      * tracelet, so we have to fix it up here.
8465      *
8466      * TODO: in the case of an inlined NativeImpl, we're essentially
8467      * emitting two adds to rVmSp in a row, which we can combine ...
8468      */
8469     int delta = i.stackOff + getStackDelta(i);
8470     if (delta != 0) {
8471       // i.stackOff is in negative Cells, not bytes.
8472       a.    add_imm64_reg64(cellsToBytes(delta), rVmSp);
8473     }
8474   }
8475 }
8476
8477 template <bool UseTC>
8478 static TypedValue*
8479 staticLocHelper(StringData* name, ActRec* fp, TypedValue* sp,
8480                 TargetCache::CacheHandle ch) {
8481   if (UseTC) {
8482     Stats::inc(Stats::TgtCache_StaticMiss);
8483     Stats::inc(Stats::TgtCache_StaticHit, -1);
8484   }
8485   HphpArray* map = get_static_locals(fp);
8486   TypedValue* retval = map->nvGet(name); // Local to num
8487   if (!retval) {
8488     // Read the initial value off the stack.
8489     TypedValue tv = *sp;
8490     map->nvSet(name, &tv, false);
8491     retval = map->nvGet(name);
8492   }
8493   ASSERT(retval);
8494   if (retval->m_type != KindOfRef) {
8495     tvBox(retval);
8496   }
8497   ASSERT(retval->m_type == KindOfRef);
8498   if (UseTC) {
8499     TypedValue** chTv = (TypedValue**)TargetCache::handleToPtr(ch);
8500     ASSERT(*chTv == NULL);
8501     return (*chTv = retval);
8502   } else {
8503     return retval;
8504   }
8505 }
8506
8507 void
8508 TranslatorX64::emitCallStaticLocHelper(X64Assembler& as,
8509                                        const NormalizedInstruction& i,
8510                                        ScratchReg& output,
8511                                        TargetCache::CacheHandle ch) {
8512   // The helper is going to read the value from memory, so record it.  We
8513   // could also pass type/value as parameters, but this is hopefully a
8514   // rare path.
8515   m_regMap.cleanLoc(i.inputs[0]->location);
8516   if (false) { // typecheck
8517     StringData* sd = NULL;
8518     ActRec* fp = NULL;
8519     TypedValue* sp = NULL;
8520     sp = staticLocHelper<true>(sd, fp, sp, ch);
8521     sp = staticLocHelper<false>(sd, fp, sp, ch);
8522   }
8523   const StringData* name = curFunc()->unit()->lookupLitstrId(i.imm[1].u_SA);
8524   ASSERT(name->isStatic());
8525   if (ch) {
8526     EMIT_CALL(as, (TCA)staticLocHelper<true>, IMM(uintptr_t(name)), R(rVmFp),
8527               RPLUS(rVmSp, -cellsToBytes(i.stackOff)), IMM(ch));
8528   } else {
8529     EMIT_CALL(as, (TCA)staticLocHelper<false>, IMM(uintptr_t(name)), R(rVmFp),
8530               RPLUS(rVmSp, -cellsToBytes(i.stackOff)));
8531   }
8532   recordCall(as, i);
8533   emitMovRegReg(as, rax, *output);
8534 }
8535
8536 void
8537 TranslatorX64::translateStaticLocInit(const Tracelet& t,
8538                                       const NormalizedInstruction& i) {
8539   using namespace TargetCache;
8540   ScratchReg output(m_regMap);
8541   const Location& outLoc = i.outLocal->location;
8542
8543   // Closures and generators from closures don't satisfy the "one
8544   // static per source location" rule that the inline fastpath
8545   // requires
8546   if (!curFunc()->isClosureBody() &&
8547       !curFunc()->isGeneratorFromClosure()) {
8548     // Miss path explicitly decrements.
8549     Stats::emitInc(a, Stats::TgtCache_StaticHit);
8550     Stats::emitInc(a, Stats::Tx64_StaticLocFast);
8551
8552     CacheHandle ch = allocStatic();
8553     ASSERT(ch);
8554     a.  load_reg64_disp_reg64(rVmTl, ch, *output);
8555     a.  test_reg64_reg64(*output, *output);
8556     {
8557       UnlikelyIfBlock<CC_Z> fooey(a, astubs);
8558       emitCallStaticLocHelper(astubs, i, output, ch);
8559     }
8560   } else {
8561     Stats::emitInc(a, Stats::Tx64_StaticLocSlow);
8562     emitCallStaticLocHelper(a, i, output, 0);
8563   }
8564   // Now we've got the outer variant in *output. Get the address of the
8565   // inner cell, since that's the enregistered representation of a variant.
8566   emitDeref(a, *output, *output);
8567   emitIncRef(*output, KindOfRef);
8568   // Turn output into the local we just initialized.
8569   m_regMap.bindScratch(output, outLoc, KindOfRef, RegInfo::DIRTY);
8570 }
8571
8572 void
8573 TranslatorX64::analyzeVerifyParamType(Tracelet& t, NormalizedInstruction& i) {
8574   int param = i.imm[0].u_IVA;
8575   const TypeConstraint& tc = curFunc()->params()[param].typeConstraint();
8576   if (!tc.isObject()) {
8577     // We are actually using the translation-time value of this local as a
8578     // prediction; if the param check failed at compile-time, we predict it
8579     // will continue failing.
8580     bool compileTimeCheck = tc.check(frame_local(curFrame(), param), curFunc());
8581     i.m_txFlags = nativePlan(compileTimeCheck);
8582     i.manuallyAllocInputs = true;
8583   } else {
8584     bool trace = i.inputs[0]->isObject() ||
8585                  (i.inputs[0]->isNull() && tc.nullable());
8586     i.m_txFlags = supportedPlan(trace);
8587   }
8588 }
8589
8590 static void
8591 VerifyParamTypeFail(int paramNum) {
8592   VMRegAnchor _;
8593   const ActRec* ar = curFrame();
8594   const Func* func = ar->m_func;
8595   const TypeConstraint& tc = func->params()[paramNum].typeConstraint();
8596   ASSERT(tc.isObject());
8597   TypedValue* tv = frame_local(ar, paramNum);
8598   TRACE(3, "%s Obj %s, needs type %s\n",
8599         __func__,
8600         tv->m_data.pobj->getVMClass()->name()->data(),
8601         tc.typeName()->data());
8602   tc.verifyFail(func, paramNum, tv);
8603 }
8604
8605 // check class hierarchy and fail if no match
8606 static uint64_t
8607 VerifyParamTypeSlow(const Class* cls, const Class* constraint) {
8608   Stats::inc(Stats::Tx64_VerifyParamTypeSlow);
8609   Stats::inc(Stats::Tx64_VerifyParamTypeFast, -1);
8610
8611   // ensure C++ returns a 0 or 1 with upper bits zeroed
8612   return static_cast<uint64_t>(constraint && cls->classof(constraint));
8613 }
8614
8615 void
8616 TranslatorX64::translateVerifyParamType(const Tracelet& t,
8617                                         const NormalizedInstruction& i) {
8618   Stats::emitInc(a, Stats::Tx64_VerifyParamTypeFast);
8619
8620   int param = i.imm[0].u_IVA;
8621   const TypeConstraint& tc = curFunc()->params()[param].typeConstraint();
8622   // not quite a nop. The guards should have verified that the m_type field
8623   // is compatible, but for objects we need to go one step further and
8624   // ensure that we're dealing with the right class.
8625   // NULL inputs only get traced when constraint is nullable.
8626   ASSERT(i.inputs.size() == 1);
8627   if (!i.inputs[0]->isObject()) return; // nop.
8628
8629   // Get the input's class from ObjectData->m_cls
8630   const Location& in = i.inputs[0]->location;
8631   PhysReg src = getReg(in);
8632   ScratchReg inCls(m_regMap);
8633   if (i.inputs[0]->rtt.isVariant()) {
8634     emitDeref(a, src, *inCls);
8635     a.  load_reg64_disp_reg64(*inCls, ObjectData::getVMClassOffset(), *inCls);
8636   } else {
8637     a.  load_reg64_disp_reg64(src, ObjectData::getVMClassOffset(), *inCls);
8638   }
8639
8640   ScratchReg cls(m_regMap);
8641   // Constraint may not be in the class-hierarchy of the method being traced,
8642   // look up the class handle and emit code to put the Class* into a reg.
8643   if (!tc.isSelf() && !tc.isParent()) {
8644     const StringData* clsName = tc.typeName();
8645     using namespace TargetCache;
8646     CacheHandle ch = allocKnownClass(clsName);
8647     a.  load_reg64_disp_reg64(rVmTl, ch, *cls);
8648   } else {
8649     const Class *constraint = NULL;
8650     if (tc.isSelf()) {
8651       tc.selfToClass(curFunc(), &constraint);
8652     } else if (tc.isParent()) {
8653       tc.parentToClass(curFunc(), &constraint);
8654     }
8655     emitImmReg(a, uintptr_t(constraint), *cls);
8656   }
8657   // Compare this class to the incoming object's class. If the typehint's class
8658   // is not present, can not be an instance: fail
8659   a.  cmp_reg64_reg64(*inCls, *cls);
8660
8661   {
8662     JccBlock<CC_Z> subclassCheck(a);
8663     // Call helper since ObjectData::instanceof is a member function
8664     if (false) {
8665       Class* cls = NULL;
8666       Class* constraint = NULL;
8667       VerifyParamTypeSlow(cls, constraint);
8668     }
8669     EMIT_CALL(a, VerifyParamTypeSlow, R(*inCls), R(*cls));
8670     // Pin the return value, check if a match or take slow path
8671     m_regMap.bind(rax, Location(), KindOfInvalid, RegInfo::SCRATCH);
8672     a.  test_reg64_reg64(rax, rax);
8673     m_regMap.freeScratchReg(rax);
8674
8675     // Put the failure path into astubs
8676     {
8677       UnlikelyIfBlock<CC_Z> fail(a, astubs);
8678       if (false) { // typecheck
8679         VerifyParamTypeFail(param);
8680       }
8681       EMIT_CALL(astubs, VerifyParamTypeFail, IMM(param));
8682       recordReentrantStubCall(i);
8683     }
8684   }
8685 }
8686
8687 void
8688 TranslatorX64::analyzeInstanceOfD(Tracelet& t, NormalizedInstruction& i) {
8689   ASSERT(i.inputs.size() == 1);
8690   ASSERT(i.outStack && !i.outLocal);
8691   i.m_txFlags = planHingesOnRefcounting(i.inputs[0]->outerType());
8692 }
8693
8694 // check class hierarchy and fail if no match
8695 static uint64_t
8696 InstanceOfDSlow(const Class* cls, const Class* constraint) {
8697   Stats::inc(Stats::Tx64_InstanceOfDSlow);
8698   Stats::inc(Stats::Tx64_InstanceOfDFast, -1);
8699
8700   // ensure C++ returns a 0 or 1 with upper bits zeroed
8701   return static_cast<uint64_t>(constraint && cls->classof(constraint));
8702 }
8703
8704 void
8705 TranslatorX64::translateInstanceOfD(const Tracelet& t,
8706                                     const NormalizedInstruction& i) {
8707   Stats::emitInc(a, Stats::Tx64_InstanceOfDFast);
8708   ASSERT(i.inputs.size() == 1);
8709   ASSERT(i.outStack && !i.outLocal);
8710
8711   DynLocation* input0 = i.inputs[0];
8712   bool input0IsLoc = input0->isLocal();
8713   DataType type = input0->valueType();
8714   PhysReg srcReg = getReg(input0->location);
8715   ScratchReg result(m_regMap);
8716
8717   if (type != KindOfObject) {
8718     // All non-object inputs are not instances
8719     if (!input0IsLoc) {
8720       ASSERT(!input0->isVariant());
8721       emitDecRef(i, srcReg, type);
8722     }
8723     emitImmReg(a, false, *result);
8724
8725   } else {
8726     // Get the input's class from ObjectData->m_cls
8727     ScratchReg inCls(m_regMap);
8728     if (input0->rtt.isVariant()) {
8729       ASSERT(input0IsLoc);
8730       emitDeref(a, srcReg, *inCls);
8731       a.  load_reg64_disp_reg64(*inCls, ObjectData::getVMClassOffset(), *inCls);
8732     } else {
8733       a.  load_reg64_disp_reg64(srcReg, ObjectData::getVMClassOffset(), *inCls);
8734     }
8735     if (!input0IsLoc) {
8736       emitDecRef(i, srcReg, type);
8737     }
8738
8739     // Set result to true for now. If take slow path, use its return val
8740     emitImmReg(a, true, *result);
8741     ScratchReg cls(m_regMap);
8742     // Constraint may not be in the class-hierarchy of the method being traced,
8743     // look up the class handle and emit code to put the Class* into a reg.
8744     using namespace TargetCache;
8745     int param = i.imm[0].u_SA;
8746     const StringData* clsName = curUnit()->lookupLitstrId(param);
8747     CacheHandle ch = allocKnownClass(clsName);
8748     a.    load_reg64_disp_reg64(rVmTl, ch, *cls);
8749     // Compare this class to the incoming object's class. If the typehint's
8750     // class is not present, can not be an instance: fail
8751     a.    cmp_reg64_reg64(*inCls, *cls);
8752
8753     {
8754       UnlikelyIfBlock<CC_NZ> subclassCheck(a, astubs);
8755       // Call helper since ObjectData::instanceof is a member function
8756       if (false) {
8757         Class* cls = NULL;
8758         Class* constraint = NULL;
8759         InstanceOfDSlow(cls, constraint);
8760       }
8761       EMIT_CALL(astubs, InstanceOfDSlow, R(*inCls), R(*cls));
8762       astubs.  mov_reg32_reg32(rax, *result);
8763     }
8764   }
8765   // Bind result and destination
8766   m_regMap.bindScratch(result, i.outStack->location, i.outStack->outerType(),
8767                        RegInfo::DIRTY);
8768 }
8769
8770 void
8771 TranslatorX64::analyzeIterInit(Tracelet& t, NormalizedInstruction& ni) {
8772   ni.m_txFlags = supportedPlan(ni.inputs[0]->valueType() == KindOfArray ||
8773                                ni.inputs[0]->valueType() == KindOfObject);
8774 }
8775
8776 void
8777 TranslatorX64::translateIterInit(const Tracelet& t,
8778                                  const NormalizedInstruction& ni) {
8779   ASSERT(ni.inputs.size() == 1);
8780   ASSERT(!ni.outStack && !ni.outLocal);
8781   DynLocation* in = ni.inputs[0];
8782   ASSERT(in->outerType() != KindOfRef);
8783   SKTRACE(1, ni.source, "IterInit: committed to translation\n");
8784   PhysReg src = getReg(in->location);
8785   SrcKey taken, notTaken;
8786   branchDests(t, ni, &taken, &notTaken, 1 /* immIdx */);
8787   Location iterLoc(Location::Iter, ni.imm[0].u_IVA);
8788   switch (in->valueType()) {
8789   case KindOfArray: {
8790     if (false) { // typecheck
8791       Iter *dest = NULL;
8792       HphpArray *arr = NULL;
8793       new_iter_array(dest, arr);
8794     }
8795     EMIT_RCALL(a, ni, new_iter_array, A(iterLoc), R(src));
8796     break;
8797   }
8798   case KindOfObject: {
8799     if (false) { // typecheck
8800       Iter *dest = NULL;
8801       ObjectData *obj = NULL;
8802       Class *ctx = NULL;
8803       new_iter_object(dest, obj, ctx);
8804     }
8805     bool ctxFixed = isContextFixed();
8806     PREP_CTX(ctxFixed, argNumToRegName[2]);
8807     EMIT_RCALL(a, ni, new_iter_object, A(iterLoc), R(src), CTX(ctxFixed));
8808     break;
8809   }
8810   default: not_reached();
8811   }
8812   syncOutputs(t); // Ends BB
8813   // If a new iterator is created, new_iter_* will not adjust the refcount of
8814   // the input. If a new iterator is not created, new_iter_* will decRef the
8815   // input for us.  new_iter_* returns 0 if an iterator was not created,
8816   // otherwise it returns 1.
8817   a.    test_reg64_reg64(rax, rax);
8818   emitCondJmp(taken, notTaken, CC_Z);
8819 }
8820
8821 void
8822 TranslatorX64::analyzeIterValueC(Tracelet& t, NormalizedInstruction& i) {
8823   i.m_txFlags = supportedPlan(
8824     i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
8825     i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
8826 }
8827
8828 void
8829 TranslatorX64::translateIterValueC(const Tracelet& t,
8830                                    const NormalizedInstruction& i) {
8831   ASSERT(i.inputs.size() == 1);
8832   ASSERT(i.inputs[0]->rtt.isIter());
8833
8834   Location outLoc;
8835   Iter::Type iterType = i.inputs[0]->rtt.iterType();
8836   typedef void (*IterValueC)(Iter*, TypedValue*);
8837   IterValueC ivc;
8838   if (i.outStack) {
8839     outLoc = i.outStack->location;
8840     ivc = (iterType == Iter::TypeArray)
8841       ? iter_value_cell_array : iter_value_cell_iterator;
8842   } else {
8843     outLoc = i.outLocal->location;
8844     ivc = (iterType == Iter::TypeArray)
8845       ? iter_value_cell_local_array : iter_value_cell_local_iterator;
8846   }
8847   EMIT_RCALL(a, i, ivc, A(i.inputs[0]->location), A(outLoc));
8848   m_regMap.invalidate(outLoc);
8849 }
8850
8851 void
8852 TranslatorX64::analyzeIterKey(Tracelet& t, NormalizedInstruction& i) {
8853   i.m_txFlags = supportedPlan(
8854     i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
8855     i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
8856 }
8857
8858 void
8859 TranslatorX64::translateIterKey(const Tracelet& t,
8860                                      const NormalizedInstruction& i) {
8861   ASSERT(i.inputs.size() == 1);
8862   ASSERT(i.inputs[0]->rtt.isIter());
8863
8864   Location outLoc;
8865   Iter::Type iterType = i.inputs[0]->rtt.iterType();
8866   typedef void (*IterKey)(Iter*, TypedValue*);
8867   IterKey ik;
8868   if (i.outStack) {
8869     outLoc = i.outStack->location;
8870     ik = (iterType == Iter::TypeArray)
8871       ? iter_key_cell_array : iter_key_cell_iterator;
8872   } else {
8873     outLoc = i.outLocal->location;
8874     ik = (iterType == Iter::TypeArray)
8875       ? iter_key_cell_local_array : iter_key_cell_local_iterator;
8876   }
8877   EMIT_RCALL(a, i, ik, A(i.inputs[0]->location), A(outLoc));
8878   m_regMap.invalidate(outLoc);
8879 }
8880
8881 void
8882 TranslatorX64::analyzeIterNext(Tracelet& t, NormalizedInstruction& i) {
8883   ASSERT(i.inputs.size() == 1);
8884   i.m_txFlags = supportedPlan(
8885     i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
8886     i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
8887 }
8888
8889 void
8890 TranslatorX64::translateIterNext(const Tracelet& t,
8891                                  const NormalizedInstruction& i) {
8892   ASSERT(i.inputs.size() == 1);
8893   ASSERT(!i.outStack && !i.outLocal);
8894   ASSERT(i.inputs[0]->rtt.isIter());
8895
8896   if (false) { // type check
8897     Iter* it = NULL;
8898     int64 ret = iter_next_array(it);
8899     if (ret) printf("\n");
8900   }
8901   m_regMap.cleanAll(); // input might be in-flight
8902   // If the iterator reaches the end, iter_next_array will handle
8903   // freeing the iterator and it will decRef the array
8904   EMIT_CALL(a, iter_next_array, A(i.inputs[0]->location));
8905   recordReentrantCall(a, i);
8906   // RAX is now a scratch register with no progam meaning...
8907   m_regMap.bind(rax, Location(), KindOfInvalid, RegInfo::SCRATCH);
8908
8909   // syncOutputs before we handle the branch.
8910   syncOutputs(t);
8911   SrcKey taken, notTaken;
8912   branchDests(t, i, &taken, &notTaken, 1 /* destImmIdx */);
8913
8914   a.   test_reg64_reg64(rax, rax);
8915   emitCondJmp(taken, notTaken, CC_NZ);
8916 }
8917
8918 // PSEUDOINSTR_DISPATCH is a switch() fragment that routes opcodes to their
8919 // shared handlers, as per the PSEUDOINSTRS macro.
8920 #define PSEUDOINSTR_DISPATCH(func)              \
8921   case OpBitAnd:                                \
8922   case OpBitOr:                                 \
8923   case OpBitXor:                                \
8924   case OpSub:                                   \
8925   case OpMul:                                   \
8926     func(BinaryArithOp, t, i)                   \
8927   case OpSame:                                  \
8928   case OpNSame:                                 \
8929     func(SameOp, t, i)                          \
8930   case OpEq:                                    \
8931   case OpNeq:                                   \
8932     func(EqOp, t, i)                            \
8933   case OpLt:                                    \
8934   case OpLte:                                   \
8935   case OpGt:                                    \
8936   case OpGte:                                   \
8937     func(LtGtOp, t, i)                          \
8938   case OpEmptyL:                                \
8939   case OpCastBool:                              \
8940     func(UnaryBooleanOp, t, i)                  \
8941   case OpJmpZ:                                  \
8942   case OpJmpNZ:                                 \
8943     func(BranchOp, t, i)                        \
8944   case OpSetL:                                  \
8945   case OpBindL:                                 \
8946     func(AssignToLocalOp, t, i)                 \
8947   case OpFPassC:                                \
8948   case OpFPassCW:                               \
8949   case OpFPassCE:                               \
8950     func(FPassCOp, t, i)                        \
8951   case OpFPushCuf:                              \
8952   case OpFPushCufF:                             \
8953   case OpFPushCufSafe:                          \
8954     func(FPushCufOp, t, i)                      \
8955   case OpIssetL:                                \
8956   case OpIsNullL:                               \
8957   case OpIsStringL:                             \
8958   case OpIsArrayL:                              \
8959   case OpIsIntL:                                \
8960   case OpIsObjectL:                             \
8961   case OpIsBoolL:                               \
8962   case OpIsDoubleL:                             \
8963   case OpIsNullC:                               \
8964   case OpIsStringC:                             \
8965   case OpIsArrayC:                              \
8966   case OpIsIntC:                                \
8967   case OpIsObjectC:                             \
8968   case OpIsBoolC:                               \
8969   case OpIsDoubleC:                             \
8970     func(CheckTypeOp, t, i)
8971
8972 void
8973 TranslatorX64::analyzeInstr(Tracelet& t,
8974                             NormalizedInstruction& i) {
8975   const Opcode op = i.op();
8976   switch (op) {
8977 #define CASE(iNm) \
8978   case Op ## iNm: { \
8979     analyze ## iNm(t, i); \
8980   } break;
8981 #define ANALYZE(a, b, c) analyze ## a(b, c); break;
8982   INSTRS
8983   PSEUDOINSTR_DISPATCH(ANALYZE)
8984
8985 #undef ANALYZE
8986 #undef CASE
8987     default: {
8988       ASSERT(i.m_txFlags == Interp);
8989     }
8990   }
8991   SKTRACE(1, i.source, "translation plan: %x\n", i.m_txFlags);
8992 }
8993
8994 bool
8995 TranslatorX64::dontGuardAnyInputs(Opcode op) {
8996   switch (op) {
8997 #define CASE(iNm) case Op ## iNm:
8998 #define NOOP(a, b, c)
8999   INSTRS
9000     PSEUDOINSTR_DISPATCH(NOOP)
9001     return false;
9002   }
9003   return true;
9004 #undef NOOP
9005 #undef CASE
9006
9007 }
9008
9009 void TranslatorX64::emitOneGuard(const Tracelet& t,
9010                                  const NormalizedInstruction& i,
9011                                  PhysReg reg, int disp, DataType type,
9012                                  TCA &sideExit) {
9013   bool isFirstInstr = (&i == t.m_instrStream.first);
9014   emitTypeCheck(a, type, reg, disp);
9015   if (isFirstInstr) {
9016     SrcRec& srcRec = *getSrcRec(t.m_sk);
9017     // If it's the first instruction, we haven't made any forward
9018     // progress yet, so this is really a tracelet-level guard rather
9019     // than a side exit. If we tried to "side exit", we'd come right
9020     // back to this check!
9021     //
9022     // We need to record this as a fallback branch.
9023     emitFallbackJmp(srcRec);
9024   } else if (!sideExit) {
9025     UnlikelyIfBlock<CC_NZ> branchToSideExit(a, astubs);
9026     sideExit = astubs.code.frontier;
9027     emitSideExit(astubs, i, false /*next*/);
9028   } else {
9029     a.    jnz(sideExit);
9030   }
9031 }
9032
9033 // Emit necessary guards for variants and pseudo-main locals before instr i.
9034 // For HHIR, this only inserts guards for pseudo-main locals.  Variants are
9035 // guarded in a different way.
9036 void
9037 TranslatorX64::emitVariantGuards(const Tracelet& t,
9038                                  const NormalizedInstruction& i) {
9039   bool pseudoMain = Translator::liveFrameIsPseudoMain();
9040   bool isFirstInstr = (&i == t.m_instrStream.first);
9041   TCA sideExit = NULL;
9042   const NormalizedInstruction *base = &i;
9043   while (base->grouped) {
9044     base = base->prev;
9045     ASSERT(base);
9046   }
9047   for (size_t in = 0; in < i.inputs.size(); ++in) {
9048     DynLocation* input = i.inputs[in];
9049     if (!input->isValue()) continue;
9050     bool isRef = input->isVariant() &&
9051       !i.ignoreInnerType &&
9052       input->rtt.innerType() != KindOfInvalid;
9053     bool modifiableLocal = pseudoMain && input->isLocal() &&
9054       !input->rtt.isVagueValue();
9055
9056     if (!modifiableLocal && !isRef) continue;
9057
9058     SKTRACE(1, i.source, "guarding %s: (%s:%d) :: %d!\n",
9059             modifiableLocal ? "pseudoMain local" : "variant inner",
9060             input->location.spaceName(),
9061             input->location.offset,
9062             input->rtt.valueType());
9063     // TODO task 1122807: don't check the inner type if we've already
9064     // checked it and have executed no possibly-aliasing instructions in
9065     // the meanwhile.
9066     if (modifiableLocal) {
9067       if (m_useHHIR) {
9068         RuntimeType& rtt = input->rtt;
9069         JIT::Type::Tag type = JIT::Type::fromDataType(rtt.outerType(),
9070                                                       rtt.innerType());
9071         if (isFirstInstr) {
9072           m_hhbcTrans->guardTypeLocal(input->location.offset, type);
9073         } else {
9074           m_hhbcTrans->checkTypeLocal(input->location.offset, type);
9075         }
9076       } else {
9077         PhysReg reg;
9078         int disp;
9079         locToRegDisp(input->location, &reg, &disp);
9080         emitOneGuard(t, *base, reg, disp,
9081                      input->rtt.outerType(), sideExit);
9082       }
9083     }
9084     if (isRef && !m_useHHIR) {
9085       m_regMap.allocInputReg(i, in);
9086       emitOneGuard(t, *base, getReg(input->location), 0,
9087                    input->rtt.innerType(), sideExit);
9088     }
9089   }
9090 }
9091
9092 NormalizedInstruction::OutputUse
9093 NormalizedInstruction::outputIsUsed(DynLocation* output) const {
9094   for (NormalizedInstruction* succ = next;
9095        succ; succ = succ->next) {
9096     for (size_t i = 0; i < succ->inputs.size(); ++i) {
9097       if (succ->inputs[i] == output) {
9098         if (succ->inputWasInferred(i)) {
9099           return OutputInferred;
9100         }
9101         if (Translator::Get()->dontGuardAnyInputs(succ->op())) {
9102           /* the consumer doesnt care about its inputs
9103              but we may still have inferred something about
9104              its outputs that a later instruction may depend on
9105           */
9106           if (!outputDependsOnInput(succ->op()) ||
9107               !(succ->outStack && !succ->outStack->rtt.isVagueValue() &&
9108                 succ->outputIsUsed(succ->outStack) != OutputUsed) ||
9109               !(succ->outLocal && !succ->outLocal->rtt.isVagueValue() &&
9110                 succ->outputIsUsed(succ->outLocal)) != OutputUsed) {
9111             return OutputDoesntCare;
9112           }
9113         }
9114         return OutputUsed;
9115       }
9116     }
9117   }
9118   return OutputUnused;
9119 }
9120
9121 void
9122 TranslatorX64::emitPredictionGuards(const NormalizedInstruction& i) {
9123   if (!i.outputPredicted || i.breaksBB) return;
9124   NormalizedInstruction::OutputUse u = i.outputIsUsed(i.outStack);
9125
9126   if (m_useHHIR) {
9127     if (u == NormalizedInstruction::OutputUsed ||
9128         u == NormalizedInstruction::OutputInferred) {
9129       JIT::Type::Tag jitType = JIT::Type::fromDataType(i.outStack->outerType(),
9130                                                        i.outStack->valueType());
9131       if (u == NormalizedInstruction::OutputInferred) {
9132         TRACE(1, "HHIR: emitPredictionGuards: output inferred to be %s\n",
9133               JIT::Type::Strings[jitType]);
9134         m_hhbcTrans->assertTypeStack(0, jitType);
9135       } else {
9136         TRACE(1, "HHIR: emitPredictionGuards: output predicted to be %s\n",
9137               JIT::Type::Strings[jitType]);
9138         m_hhbcTrans->checkTypeStack(0, jitType, i.next->offset());
9139       }
9140     }
9141     return;
9142   }
9143
9144   switch (u) {
9145     case NormalizedInstruction::OutputUsed:
9146       break;
9147     case NormalizedInstruction::OutputUnused:
9148       return;
9149     case NormalizedInstruction::OutputInferred:
9150       Stats::emitInc(a, Stats::TC_TypePredOverridden);
9151       return;
9152     case NormalizedInstruction::OutputDoesntCare:
9153       Stats::emitInc(a, Stats::TC_TypePredUnneeded);
9154       return;
9155   }
9156
9157   ASSERT(i.outStack);
9158   PhysReg base;
9159   int disp;
9160   locToRegDisp(i.outStack->location, &base, &disp);
9161   ASSERT(base == rVmSp);
9162   TRACE(1, "PREDGUARD: %p dt %d offset %d voffset %lld\n",
9163         a.code.frontier, i.outStack->outerType(), disp,
9164         i.outStack->location.offset);
9165   emitTypeCheck(a, i.outStack->outerType(), rVmSp, disp);
9166   {
9167     UnlikelyIfBlock<CC_NZ> branchToSideExit(a, astubs);
9168     Stats::emitInc(astubs, Stats::TC_TypePredMiss);
9169     emitSideExit(astubs, i, true);
9170   }
9171   Stats::emitInc(a, Stats::TC_TypePredHit);
9172 }
9173
9174 static void failedTypePred() {
9175   raise_error("A type prediction was incorrect");
9176 }
9177
9178 void
9179 TranslatorX64::translateInstrWork(const Tracelet& t,
9180                                   const NormalizedInstruction& i) {
9181   const Opcode op = i.op();
9182   switch (op) {
9183 #define CASE(iNm)                               \
9184   case Op ## iNm:                             \
9185                   translate ## iNm(t, i);     \
9186     break;
9187 #define TRANSLATE(a, b, c) translate ## a(b, c); break;
9188     INSTRS
9189       PSEUDOINSTR_DISPATCH(TRANSLATE)
9190 #undef TRANSLATE
9191 #undef CASE
9192   default:
9193     ASSERT(false);
9194   }
9195 }
9196
9197 void
9198 TranslatorX64::translateInstr(const Tracelet& t,
9199                               const NormalizedInstruction& i) {
9200   /**
9201    * translateInstr() translates an individual instruction in a tracelet,
9202    * either by directly emitting machine code for that instruction or by
9203    * emitting a call to the interpreter.
9204    *
9205    * If the instruction ends the current tracelet, we must emit machine code
9206    * to transfer control to some target that will continue to make forward
9207    * progress. This target may be the beginning of another tracelet, or it may
9208    * be a translator service request. Before transferring control, a tracelet
9209    * must ensure the following invariants hold:
9210    *   1) The machine registers rVmFp and rVmSp are in sync with vmfp()
9211    *      and vmsp().
9212    *   2) All "dirty" values are synced in memory. This includes the
9213    *      evaluation stack, locals, globals, statics, and any other program
9214    *      accessible locations. This also means that all refcounts must be
9215    *      up to date.
9216    */
9217   ASSERT(!m_useHHIR);
9218   ASSERT(!i.outStack || i.outStack->isStack());
9219   ASSERT(!i.outLocal || i.outLocal->isLocal());
9220   const char *opNames[] = {
9221 #define O(name, imm, push, pop, flags) \
9222 #name,
9223   OPCODES
9224 #undef O
9225   };
9226   SpaceRecorder sr(opNames[i.op()], a);
9227   SKTRACE(1, i.source, "translate %#lx\n", long(a.code.frontier));
9228   const Opcode op = i.op();
9229
9230   TCA start = a.code.frontier;
9231   TCA astart = astubs.code.frontier;
9232
9233   m_regMap.bumpEpoch();
9234   // Allocate the input regs upfront unless instructed otherwise
9235   // or the instruction is interpreted
9236   if (!i.manuallyAllocInputs && i.m_txFlags) {
9237     m_regMap.allocInputRegs(i);
9238   }
9239
9240   if (debug) {
9241     for (unsigned j = 0; j < i.inputs.size(); j++) {
9242       if (i.inputWasInferred(j)) {
9243         DynLocation* dl = i.inputs[j];
9244         ASSERT(dl->rtt.isValue() &&
9245                !dl->rtt.isVagueValue() &&
9246                dl->outerType() != KindOfInvalid);
9247         PhysReg base;
9248         int disp;
9249         locToRegDisp(dl->location, &base, &disp);
9250         emitTypeCheck(a, dl->rtt.typeCheckValue(), base, disp);
9251         {
9252           UnlikelyIfBlock<CC_NZ> typePredFailed(a, astubs);
9253           EMIT_CALL(astubs, failedTypePred);
9254           recordReentrantStubCall(i);
9255         }
9256       }
9257     }
9258   }
9259
9260   if (!i.grouped) {
9261     emitVariantGuards(t, i);
9262     const NormalizedInstruction* n = &i;
9263     while (n->next && n->next->grouped) {
9264       n = n->next;
9265       emitVariantGuards(t, *n);
9266     }
9267   }
9268
9269   // Allocate the input regs upfront unless instructed otherwise
9270   // or the instruction is interpreted
9271   if (!i.manuallyAllocInputs && i.m_txFlags) {
9272     m_regMap.allocInputRegs(i);
9273   }
9274
9275   if (i.m_txFlags == Interp || RuntimeOption::EvalThreadingJit) {
9276     // If the problem is local to this instruction, just call out to
9277     // the interpreter. emitInterpOne will perform end-of-tracelet duties
9278     // if this instruction ends the tracelet.
9279     SKTRACE(1, i.source, "Interp\n");
9280     emitInterpOne(t, i);
9281   } else {
9282     // Actually translate the instruction's body.
9283     Stats::emitIncTranslOp(a, op);
9284
9285     translateInstrWork(t, i);
9286   }
9287
9288   // Invalidate locations that are no longer live
9289   for (unsigned k = 0; k < i.deadLocs.size(); ++k) {
9290     const Location& l = i.deadLocs[k];
9291     m_regMap.invalidate(l);
9292   }
9293
9294   emitPredictionGuards(i);
9295   recordBCInstr(op, a, start);
9296   recordBCInstr(op + Op_count, astubs, astart);
9297
9298   if (i.breaksBB && !i.changesPC) {
9299     // If this instruction's opcode always ends the tracelet then the
9300     // instruction case is responsible for performing end-of-tracelet
9301     // duties. Otherwise, we handle ending the tracelet here.
9302     syncOutputs(t);
9303     emitBindJmp(t.m_nextSk);
9304   }
9305 }
9306
9307 bool
9308 TranslatorX64::checkTranslationLimit(const SrcKey& sk,
9309                                      const SrcRec& srcRec) const {
9310   if (srcRec.translations().size() == SrcRec::kMaxTranslations) {
9311     INC_TPC(max_trans);
9312     if (debug && Trace::moduleEnabled(Trace::tx64, 2)) {
9313       const vector<TCA>& tns = srcRec.translations();
9314       TRACE(1, "Too many (%ld) translations: %s, BC offset %d\n",
9315             tns.size(), curUnit()->filepath()->data(),
9316             sk.offset());
9317       SKTRACE(2, sk, "{\n", tns.size());
9318       TCA topTrans = srcRec.getTopTranslation();
9319       for (size_t i = 0; i < tns.size(); ++i) {
9320         const TransRec* rec = getTransRec(tns[i]);
9321         ASSERT(rec);
9322         SKTRACE(2, sk, "%d %p\n", i, tns[i]);
9323         if (tns[i] == topTrans) {
9324           SKTRACE(2, sk, "%d: *Top*\n", i);
9325         }
9326         if (rec->kind == TransAnchor) {
9327           SKTRACE(2, sk, "%d: Anchor\n", i);
9328         } else {
9329           SKTRACE(2, sk, "%d: guards {\n", i);
9330           for (unsigned j = 0; j < rec->dependencies.size(); ++j) {
9331             TRACE(2, rec->dependencies[j]);
9332           }
9333           SKTRACE(2, sk, "%d } guards\n", i);
9334         }
9335       }
9336       SKTRACE(2, sk, "} /* Too many translations */\n");
9337     }
9338     return true;
9339   }
9340
9341   return false;
9342 }
9343
9344 void
9345 TranslatorX64::emitGuardChecks(X64Assembler& a,
9346                                const SrcKey& sk,
9347                                const ChangeMap& dependencies,
9348                                const RefDeps& refDeps,
9349                                SrcRec& fail) {
9350   if (Trace::moduleEnabled(Trace::stats, 2)) {
9351     Stats::emitInc(a, Stats::TraceletGuard_enter);
9352   }
9353
9354   bool pseudoMain = Translator::liveFrameIsPseudoMain();
9355
9356   emitRB(a, RBTypeTraceletGuards, sk);
9357   for (DepMap::const_iterator dep = dependencies.begin();
9358        dep != dependencies.end();
9359        ++dep) {
9360     if (!pseudoMain || !dep->second->isLocal() || !dep->second->isValue()) {
9361       checkType(a, dep->first, dep->second->rtt, fail);
9362     } else {
9363       TRACE(3, "Skipping tracelet guard for %s %d\n",
9364             dep->second->location.pretty().c_str(),
9365             (int)dep->second->rtt.outerType());
9366     }
9367   }
9368
9369   checkRefs(a, sk, refDeps, fail);
9370
9371   if (Trace::moduleEnabled(Trace::stats, 2)) {
9372     Stats::emitInc(a, Stats::TraceletGuard_execute);
9373   }
9374 }
9375
9376
9377 void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
9378   if (!debug) return;
9379
9380   const SrcKey& sk = t.m_sk;
9381
9382   TRACE(3, "----------------------------------------------\n");
9383   TRACE(3, "  Translating from file %s:%d %s at %p:\n",
9384         curUnit()->filepath()->data(),
9385         curUnit()->getLineNumber(sk.offset()),
9386         curFunc()->name()->data(),
9387         postGuards);
9388   TRACE(3, "  preconds:\n");
9389   TRACE(3, "    types:\n");
9390   for (DepMap::const_iterator i = t.m_dependencies.begin();
9391        i != t.m_dependencies.end(); ++i) {
9392     TRACE(3, "      %-5s\n", i->second->pretty().c_str());
9393   }
9394   if (t.m_refDeps.size() != 0) {
9395     TRACE(3, "    refs:\n");
9396     for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
9397         i != t.m_refDeps.m_arMap.end();
9398         ++i) {
9399       TRACE(3, "      (ActRec %lld : %-5s)\n", i->first,
9400         i->second.pretty().c_str());
9401     }
9402   }
9403   TRACE(3, "  postconds:\n");
9404   for (ChangeMap::const_iterator i = t.m_changes.begin();
9405        i != t.m_changes.end(); ++i) {
9406     TRACE(3, "    %-5s\n", i->second->pretty().c_str());
9407   }
9408   for (SrcKey traceKey(t.m_sk);
9409       traceKey != t.m_nextSk;
9410       traceKey.advance(curUnit())) {
9411     string s = instrToString(
9412       curUnit()->at(traceKey.offset()), curUnit());
9413     TRACE(3, "  %6d: %s\n", traceKey.offset(), s.c_str());
9414   }
9415   TRACE(3, "----------------------------------------------\n");
9416   if (Trace::moduleEnabled(Trace::tx64, 5)) {
9417     // prettyStack() expects to use vmpc(). Leave it in the state we
9418     // found it since this code is debug-only, and we don't want behavior
9419     // to vary across the optimized/debug builds.
9420     PC oldPC = vmpc();
9421     vmpc() = curUnit()->at(sk.offset());
9422     TRACE(3, g_vmContext->prettyStack(string(" tx64 ")));
9423     vmpc() = oldPC;
9424     TRACE(3, "----------------------------------------------\n");
9425   }
9426 }
9427
9428 namespace {
9429 template<class T> struct Nuller : private boost::noncopyable {
9430   explicit Nuller(const T** p) : p(p) {}
9431   ~Nuller() { *p = 0; }
9432   T const** const p;
9433 };
9434 }
9435
9436 void
9437 TranslatorX64::translateTracelet(const Tracelet& t) {
9438   const SrcKey &sk = t.m_sk;
9439
9440   m_curTrace = &t;
9441   Nuller<Tracelet> ctNuller(&m_curTrace);
9442
9443   SKTRACE(1, sk, "translateTracelet\n");
9444   ASSERT(m_srcDB.find(sk));
9445   ASSERT(m_regMap.pristine());
9446   TCA                     start = a.code.frontier;
9447   TCA                     stubStart = astubs.code.frontier;
9448   TCA                     counterStart = 0;
9449   uint8                   counterLen = 0;
9450   SrcRec&                 srcRec = *getSrcRec(sk);
9451   vector<TransBCMapping>  bcMapping;
9452
9453   bool hhirSucceeded = irTranslateTracelet(t, start, stubStart);
9454   if (hhirSucceeded) {
9455     m_irAUsage += (a.code.frontier - start);
9456     m_irAstubsUsage += (astubs.code.frontier - stubStart);
9457   }
9458   if (!hhirSucceeded) {
9459     ASSERT(m_pendingFixups.size() == 0);
9460     try {
9461       if (t.m_analysisFailed || checkTranslationLimit(t.m_sk, srcRec)) {
9462         punt();
9463       }
9464
9465       emitGuardChecks(a, t.m_sk, t.m_dependencies, t.m_refDeps, srcRec);
9466       dumpTranslationInfo(t, a.code.frontier);
9467
9468       // after guards, add a counter for the translation if requested
9469       if (RuntimeOption::EvalJitTransCounters) {
9470         emitTransCounterInc(a);
9471       }
9472
9473       emitRB(a, RBTypeTraceletBody, t.m_sk);
9474       Stats::emitInc(a, Stats::Instr_TC, t.m_numOpcodes);
9475       recordBCInstr(OpTraceletGuard, a, start);
9476
9477       // Translate each instruction in the tracelet
9478       for (NormalizedInstruction* ni = t.m_instrStream.first; ni; ni = ni->next) {
9479         if (isTransDBEnabled()) {
9480           bcMapping.push_back((TransBCMapping){ni->offset(),
9481                                                a.code.frontier,
9482                                                astubs.code.frontier});
9483         }
9484
9485         m_curNI = ni;
9486         Nuller<NormalizedInstruction> niNuller(&m_curNI);
9487         translateInstr(t, *ni);
9488         ASSERT(ni->source.offset() >= curFunc()->base());
9489         // We sometimes leave the tail of a truncated tracelet in place to aid
9490         // analysis, but breaksBB is authoritative.
9491         if (ni->breaksBB) break;
9492       }
9493     } catch (TranslationFailedExc& tfe) {
9494       // The whole translation failed; give up on this BB. Since it is not
9495       // linked into srcDB yet, it is guaranteed not to be reachable.
9496       m_regMap.reset();
9497       // Permanent reset; nothing is reachable yet.
9498       a.code.frontier = start;
9499       astubs.code.frontier = stubStart;
9500       bcMapping.clear();
9501       // Discard any pending fixups.
9502       m_pendingFixups.clear();
9503       TRACE(1, "emitting %d-instr interp request for failed translation @%s:%d\n",
9504             int(t.m_numOpcodes), tfe.m_file, tfe.m_line);
9505       // Add a counter for the translation if requested
9506       if (RuntimeOption::EvalJitTransCounters) {
9507         emitTransCounterInc(a);
9508       }
9509       a.    jmp(
9510                 emitServiceReq(REQ_INTERPRET, 2ull, uint64_t(t.m_sk.offset()),
9511                                uint64_t(t.m_numOpcodes)));
9512       // Fall through.
9513     }
9514   } // if (!hhirSucceeded)
9515
9516   for (uint i = 0; i < m_pendingFixups.size(); i++) {
9517     TCA tca = m_pendingFixups[i].m_tca;
9518     ASSERT(isValidCodeAddress(tca));
9519     m_fixupMap.recordFixup(tca, m_pendingFixups[i].m_fixup);
9520   }
9521   m_pendingFixups.clear();
9522
9523   addTranslation(TransRec(t.m_sk, curUnit()->md5(), t, start,
9524                           a.code.frontier - start, stubStart,
9525                           astubs.code.frontier - stubStart,
9526                           counterStart, counterLen,
9527                           bcMapping));
9528
9529   recordGdbTranslation(sk, curUnit(), a, start,
9530                        false, false);
9531   recordGdbTranslation(sk, curUnit(), astubs, stubStart,
9532                        false, false);
9533   // SrcRec::newTranslation() makes this code reachable. Do this last;
9534   // otherwise there's some chance of hitting in the reader threads whose
9535   // metadata is not yet visible.
9536   TRACE(1, "newTranslation: %p  sk: (func %d, bcOff %d)\n", start, sk.m_funcId,
9537         sk.m_offset);
9538   srcRec.newTranslation(a, astubs, start);
9539   m_regMap.reset();
9540   TRACE(1, "tx64: %zd-byte tracelet\n", a.code.frontier - start);
9541   if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
9542     Trace::traceRelease(getUsage().c_str());
9543   }
9544 }
9545
9546 static const size_t kASize = 512 << 20;
9547 static const size_t kAStubsSize = 512 << 20;
9548 static const size_t kGDataSize = kASize / 4;
9549 static const size_t kTotalSize = kASize + kAStubsSize +
9550                                          kTrampolinesBlockSize + kGDataSize;
9551 TranslatorX64::TranslatorX64()
9552 : Translator(),
9553   m_numNativeTrampolines(0),
9554   m_trampolineSize(0),
9555   m_spillFillCode(&a),
9556   m_interceptHelper(0),
9557   m_defClsHelper(0),
9558   m_funcPrologueRedispatch(0),
9559   m_irAUsage(0),
9560   m_irAstubsUsage(0),
9561   m_numHHIRTrans(0),
9562   m_irFactory(NULL),
9563   m_constTable(NULL),
9564   m_traceBuilder(NULL),
9565   m_hhbcTrans(NULL),
9566   m_regMap(kCallerSaved, kCalleeSaved, this),
9567   m_interceptsEnabled(false),
9568   m_unwindRegMap(128),
9569   m_curTrace(0),
9570   m_curNI(0)
9571 {
9572   TRACE(1, "TranslatorX64@%p startup\n", this);
9573   tx64 = this;
9574
9575   static_assert(kTotalSize < (2ul << 30),
9576                 "Combined size of all code/data blocks in TranslatorX64 "
9577                 "must be < 2GiB to support 32-bit relative addresses");
9578
9579   static bool profileUp = false;
9580   if (!profileUp) {
9581     profileInit();
9582     profileUp = true;
9583   }
9584
9585   // We want to ensure that the block for "a", "astubs",
9586   // "atrampolines", and "m_globalData" are nearby so that we can
9587   // short jump/point between them. Thus we allocate one slab and
9588   // divide it between "a", "astubs", and "atrampolines".
9589
9590   // Using sbrk to ensure its in the bottom 2G, so we avoid
9591   // the need for trampolines, and get to use shorter
9592   // instructions for tc addresses.
9593   static const size_t kRoundUp = 2 << 20;
9594   uint8_t *base = (uint8_t*)sbrk(kTotalSize + kRoundUp - 1);
9595   base += -(uint64_t)base & (kRoundUp - 1);
9596   if (RuntimeOption::EvalMapTCHuge) {
9597     hintHuge(base, kTotalSize);
9598   }
9599   atrampolines.init(base, kTrampolinesBlockSize);
9600   base += kTrampolinesBlockSize;
9601   a.init(base, kASize);
9602   m_unwindRegistrar = register_unwind_region(base, kTotalSize);
9603   base += kASize;
9604   astubs.init(base, kAStubsSize);
9605   base += kAStubsSize;
9606   m_globalData.init(base, kGDataSize);
9607
9608   // Emit some special helpers that are shared across translations.
9609
9610   // Emit a byte of padding. This is a kind of hacky way to
9611   // avoid hitting an assert in recordGdbStub when we call
9612   // it with m_callToExit - 1 as the start address.
9613   astubs.emitNop(1);
9614
9615   // Call to exit with whatever value the program leaves on
9616   // the return stack.
9617   m_callToExit = emitServiceReq(false, REQ_EXIT, 0ull);
9618
9619   m_retHelper = emitRetFromInterpretedFrame();
9620
9621   moveToAlign(astubs);
9622   m_resumeHelper = astubs.code.frontier;
9623   emitGetGContext(astubs, rax);
9624   astubs.   load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
9625                                        rVmFp);
9626   astubs.   load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
9627                                        Stack::topOfStackOffset(), rVmSp);
9628   emitServiceReq(false, REQ_RESUME, 0ull);
9629
9630   // Helper for DefCls
9631   if (false) {
9632     PreClass *preClass = 0;
9633     defClsHelper(preClass);
9634   }
9635   m_defClsHelper = TCA(a.code.frontier);
9636   PhysReg rEC = argNumToRegName[2];
9637   emitGetGContext(a, rEC);
9638   a.   store_reg64_disp_reg64(rVmFp, offsetof(VMExecutionContext, m_fp), rEC);
9639   a.   store_reg64_disp_reg64(argNumToRegName[1],
9640                               offsetof(VMExecutionContext, m_pc), rEC);
9641   // rax holds the up-to-date top of stack pointer
9642   a.   store_reg64_disp_reg64(rax,
9643                               offsetof(VMExecutionContext, m_stack) +
9644                               Stack::topOfStackOffset(), rEC);
9645   a.   jmp((TCA)defClsHelper);
9646
9647   moveToAlign(astubs);
9648   m_stackOverflowHelper = astubs.code.frontier;
9649   // We are called from emitStackCheck, with the new stack frame in
9650   // rStashedAR. Get the caller's PC into rdi and save it off.
9651   astubs.    load_reg64_disp_reg64(rVmFp, AROFF(m_func), rax);
9652   astubs.    load_reg64_disp_reg32(rStashedAR, AROFF(m_soff), rdi);
9653   astubs.    load_reg64_disp_reg64(rax, Func::sharedOffset(), rax);
9654   astubs.    load_reg64_disp_reg32(rax, Func::sharedBaseOffset(), rax);
9655   astubs.    add_reg32_reg32(rax, rdi);
9656
9657   emitEagerVMRegSave(astubs, SaveFP | SavePC);
9658   emitServiceReq(false, REQ_STACK_OVERFLOW, 0ull);
9659
9660   // The decRef helper for when we bring the count down to zero. Callee needs to
9661   // bring the value into rdi. These can be burned in for all time, and for all
9662   // translations.
9663   if (false) { // type-check
9664     StringData* str = NULL;
9665     ArrayData* arr = NULL;
9666     ObjectData* obj = NULL;
9667     RefData* ref = NULL;
9668     tv_release_str(str);
9669     tv_release_arr(arr);
9670     tv_release_obj(obj);
9671     tv_release_ref(ref);
9672   }
9673   typedef void* vp;
9674   m_dtorStubs[BitwiseKindOfString] = emitUnaryStub(a, vp(tv_release_str));
9675   m_dtorStubs[KindOfArray]         = emitUnaryStub(a, vp(tv_release_arr));
9676   m_dtorStubs[KindOfObject]        = emitUnaryStub(a, vp(tv_release_obj));
9677   m_dtorStubs[KindOfRef]           = emitUnaryStub(a, vp(tv_release_ref));
9678   m_dtorGenericStub                = genericRefCountStub(a);
9679   m_dtorGenericStubRegs            = genericRefCountStubRegs(a);
9680
9681   if (trustSigSegv) {
9682     // Install SIGSEGV handler for timeout exceptions
9683     struct sigaction sa;
9684     struct sigaction old_sa;
9685     sa.sa_sigaction = &TranslatorX64::SEGVHandler;
9686     sa.sa_flags = SA_SIGINFO;
9687     sigemptyset(&sa.sa_mask);
9688     if (sigaction(SIGSEGV, &sa, &old_sa) != 0) {
9689       throw std::runtime_error(
9690         std::string("Failed to install SIGSEGV handler: ") +
9691           strerror(errno));
9692     }
9693     m_segvChain = old_sa.sa_flags & SA_SIGINFO ?
9694       old_sa.sa_sigaction : (sigaction_t)old_sa.sa_handler;
9695   }
9696 }
9697
9698 // do gdb specific initialization. This has to happen after
9699 // the TranslatorX64 constructor is called, because gdb initialization
9700 // calls backs into TranslatorX64::Get()
9701 void TranslatorX64::initGdb() {
9702   // On a backtrace, gdb tries to locate the calling frame at address
9703   // returnRIP-1. However, for the first VM frame, there is no code at
9704   // returnRIP-1, since the AR was set up manually. For this frame,
9705   // record the tracelet address as starting from callToExit-1, so gdb
9706   // does not barf
9707   recordGdbStub(astubs, m_callToExit - 1, "HHVM::callToExit");
9708
9709   recordBCInstr(OpRetFromInterp, astubs, m_retHelper);
9710   recordGdbStub(astubs, m_retHelper - 1, "HHVM::retHelper");
9711   recordBCInstr(OpResumeHelper, astubs, m_resumeHelper);
9712   recordBCInstr(OpDefClsHelper, a, m_defClsHelper);
9713   recordBCInstr(OpDtorStub, a, m_dtorStubs[BitwiseKindOfString]);
9714   recordGdbStub(a, m_dtorStubs[BitwiseKindOfString],
9715                     "HHVM::destructorStub");
9716 }
9717
9718 TranslatorX64*
9719 TranslatorX64::Get() {
9720   /*
9721    * Called from outrageously early, pre-main code, and will
9722    * allocate the first translator space.
9723    */
9724   if (!nextTx64) {
9725     nextTx64 = new TranslatorX64();
9726     nextTx64->initGdb();
9727   }
9728   if (!tx64) {
9729     tx64 = nextTx64;
9730   }
9731   ASSERT(tx64);
9732   return tx64;
9733 }
9734
9735 template<int Arity>
9736 TCA TranslatorX64::emitNAryStub(X64Assembler& a, void* fptr) {
9737   BOOST_STATIC_ASSERT((Arity < kNumRegisterArgs));
9738
9739   // The callNAryStub has already saved these regs on a.
9740   RegSet alreadySaved;
9741   for (size_t i = 0; i < Arity; ++i) {
9742     alreadySaved |= RegSet(argNumToRegName[i]);
9743   }
9744
9745   /*
9746    * We've made a call instruction, and pushed Arity args on the
9747    * stack.  So the stack address will be odd coming into the stub if
9748    * Arity + 1 (for the call) is odd.  We need to correct for this
9749    * when saving other registers below to keep SSE-friendly alignment
9750    * of the stack.
9751    */
9752   const int Parity = (Arity + 1) % 2;
9753
9754   // These dtor stubs are meant to be called with the call
9755   // instruction, unlike most translator code.
9756   moveToAlign(a);
9757   TCA start = a.code.frontier;
9758   /*
9759    * Preserve most caller-saved regs. The calling code has already
9760    * preserved regs in `alreadySaved'; we push the rest of the caller
9761    * saved regs and rbp.  It should take 9 qwords in total, and the
9762    * incoming call instruction made it 10.  This is an even number of
9763    * pushes, so we preserve the SSE-friendliness of our execution
9764    * environment (without real intervention from PhysRegSaverParity).
9765    *
9766    * Note that we don't need to clean all registers because the only
9767    * reason we could need those locations written back is if stack
9768    * unwinding were to happen.  These stubs can re-enter due to user
9769    * destructors, but exceptions are not allowed to propagate out of
9770    * those, so it's not a problem.
9771    */
9772   a.    pushr(rbp); // {
9773   a.    mov_reg64_reg64(rsp, rbp);
9774   {
9775     RegSet s = kCallerSaved - alreadySaved;
9776     PhysRegSaverParity<Parity> rs(a, s);
9777     emitCall(a, TCA(fptr));
9778   }
9779   a.    popr(rbp);  // }
9780   a.    ret();
9781   return start;
9782 }
9783
9784 TCA TranslatorX64::emitUnaryStub(X64Assembler& a, void* fptr) {
9785   return emitNAryStub<1>(a, fptr);
9786 }
9787
9788 TCA TranslatorX64::emitBinaryStub(X64Assembler& a, void* fptr) {
9789   return emitNAryStub<2>(a, fptr);
9790 }
9791
9792 /*
9793  * Both callUnaryStubImpl and callBinaryStub assume that the stub they
9794  * are calling cannot throw an exception.
9795  */
9796
9797 template <bool reentrant>
9798 void
9799 TranslatorX64::callUnaryStubImpl(X64Assembler& a,
9800                                  const NormalizedInstruction& i,
9801                                  TCA stub, PhysReg arg, int disp/*=0*/) {
9802   // Call the generic dtor stub. They all take one arg.
9803   a.    pushr(rdi);
9804   if (arg == rsp) {
9805     // Account for pushing rdi.
9806     disp += 8;
9807   }
9808   if (disp == 0) {
9809     emitMovRegReg(a, arg, rdi);
9810   } else {
9811     a.    lea_reg64_disp_reg64(arg, disp, rdi);
9812   }
9813   ASSERT(isValidCodeAddress(stub));
9814   emitCall(a, stub);
9815   recordCallImpl<reentrant>(a, i);
9816   a.    popr(rdi);
9817 }
9818
9819 void
9820 TranslatorX64::callBinaryStub(X64Assembler& a, const NormalizedInstruction& i,
9821                               TCA stub, PhysReg arg1, PhysReg arg2) {
9822   a.    pushr(rdi);
9823   a.    pushr(rsi);
9824
9825   // We need to be careful not to clobber our arguments when moving
9826   // them into the appropriate registers.  (If we ever need ternary
9827   // stubs, this should probably be converted to use ArgManager.)
9828   if (arg2 == rdi && arg1 == rsi) {
9829     a.  xchg_reg64_reg64(rdi, rsi);
9830   } else if (arg2 == rdi) {
9831     emitMovRegReg(a, arg2, rsi);
9832     emitMovRegReg(a, arg1, rdi);
9833   } else {
9834     emitMovRegReg(a, arg1, rdi);
9835     emitMovRegReg(a, arg2, rsi);
9836   }
9837
9838   ASSERT(isValidCodeAddress(stub));
9839   emitCall(a, stub);
9840   recordReentrantCall(a, i);
9841   a.    popr(rsi);
9842   a.    popr(rdi);
9843 }
9844
9845 namespace {
9846
9847 struct DeferredFileInvalidate : public DeferredWorkItem {
9848   Eval::PhpFile* m_f;
9849   DeferredFileInvalidate(Eval::PhpFile* f) : m_f(f) {
9850     TRACE(2, "DeferredFileInvalidate @ %p, m_f %p\n", this, m_f); }
9851   void operator()() {
9852     TRACE(2, "DeferredFileInvalidate: Firing @ %p , m_f %p\n", this, m_f);
9853     tx64->invalidateFileWork(m_f);
9854   }
9855 };
9856
9857 struct DeferredPathInvalidate : public DeferredWorkItem {
9858   const std::string m_path;
9859   DeferredPathInvalidate(const std::string& path) : m_path(path) {
9860     ASSERT(m_path.size() >= 1 && m_path[0] == '/');
9861   }
9862   void operator()() {
9863     String spath(m_path);
9864     /*
9865      * inotify saw this path change. Now poke the file repository;
9866      * it will notice the underlying PhpFile* has changed, and notify
9867      * us via ::invalidateFile.
9868      *
9869      * We don't actually need to *do* anything with the PhpFile* from
9870      * this lookup; since the path has changed, the file we'll get out is
9871      * going to be some new file, not the old file that needs invalidation.
9872      */
9873     UNUSED Eval::PhpFile* f =
9874       g_vmContext->lookupPhpFile(spath.get(), "");
9875     // We don't keep around the extra ref.
9876     if (f) f->decRefAndDelete();
9877   }
9878 };
9879
9880 }
9881
9882 void
9883 TranslatorX64::requestInit() {
9884   TRACE(1, "in requestInit(%ld)\n", g_vmContext->m_currentThreadIdx);
9885   tl_regState = REGSTATE_CLEAN;
9886   PendQ::drain();
9887   requestResetHighLevelTranslator();
9888   Treadmill::startRequest(g_vmContext->m_currentThreadIdx);
9889   memset(&s_perfCounters, 0, sizeof(s_perfCounters));
9890 }
9891
9892 void
9893 TranslatorX64::requestExit() {
9894   if (s_writeLease.amOwner()) {
9895     s_writeLease.drop();
9896   }
9897   TRACE_MOD(txlease, 2, "%lx write lease stats: %15lld kept, %15lld grabbed\n",
9898             pthread_self(), s_writeLease.m_hintKept,
9899             s_writeLease.m_hintGrabbed);
9900   PendQ::drain();
9901   Treadmill::finishRequest(g_vmContext->m_currentThreadIdx);
9902   TRACE(1, "done requestExit(%ld)\n", g_vmContext->m_currentThreadIdx);
9903   Stats::dump();
9904   Stats::clear();
9905
9906   if (Trace::moduleEnabledRelease(Trace::tx64stats, 1)) {
9907     Trace::traceRelease("TranslatorX64 perf counters for %s:\n",
9908                         g_context->getRequestUrl(50).c_str());
9909     for (int i = 0; i < tpc_num_counters; i++) {
9910       Trace::traceRelease("%-20s %10lld\n",
9911                           kPerfCounterNames[i], s_perfCounters[i]);
9912     }
9913     Trace::traceRelease("\n");
9914   }
9915 }
9916
9917 bool
9918 TranslatorX64::isPseudoEvent(const char* event) {
9919   for (int i = 0; i < tpc_num_counters; i++) {
9920     if (!strcmp(event, kPerfCounterNames[i])) {
9921       return true;
9922     }
9923   }
9924   return false;
9925 }
9926
9927 void
9928 TranslatorX64::getPerfCounters(Array& ret) {
9929   for (int i = 0; i < tpc_num_counters; i++) {
9930     // Until Perflab can automatically scale the values we give it to
9931     // an appropriate range, we have to fudge these numbers so they
9932     // look more like reasonable hardware counter values.
9933     ret.set(kPerfCounterNames[i], s_perfCounters[i] * 1000);
9934   }
9935 }
9936
9937 TranslatorX64::~TranslatorX64() {
9938   freeSlab(atrampolines.code.base, kTotalSize);
9939 }
9940
9941 static Debug::TCRange rangeFrom(const X64Assembler& a, const TCA addr,
9942                                 bool isAstubs) {
9943   ASSERT(a.code.isValidAddress(addr));
9944   return Debug::TCRange(addr, a.code.frontier, isAstubs);
9945 }
9946
9947 void TranslatorX64::recordBCInstr(uint32_t op,
9948                                   const X64Assembler& a,
9949                                   const TCA addr) {
9950   if (addr != a.code.frontier) {
9951     m_debugInfo.recordBCInstr(Debug::TCRange(addr, a.code.frontier,
9952                                              &a == &astubs ? true : false), op);
9953   }
9954 }
9955
9956 void TranslatorX64::recordGdbTranslation(const SrcKey& sk,
9957                                          const Unit* srcUnit,
9958                                          const X64Assembler& a,
9959                                          const TCA start,
9960                                          bool exit,
9961                                          bool inPrologue) {
9962   if (start != a.code.frontier && !RuntimeOption::EvalJitNoGdb) {
9963     ASSERT(s_writeLease.amOwner());
9964     m_debugInfo.recordTracelet(rangeFrom(a, start,
9965                                          &a == &astubs ? true : false),
9966                                srcUnit,
9967                                srcUnit->at(sk.offset()),
9968                                exit, inPrologue);
9969   }
9970 }
9971
9972 void TranslatorX64::recordGdbStub(const X64Assembler& a,
9973                                   const TCA start, const char* name) {
9974   if (!RuntimeOption::EvalJitNoGdb) {
9975     m_debugInfo.recordStub(rangeFrom(a, start, &a == &astubs ? true : false),
9976                            name);
9977   }
9978 }
9979
9980 void TranslatorX64::defineCns(StringData* name) {
9981   TargetCache::fillConstant(name);
9982 }
9983
9984 std::string TranslatorX64::getUsage() {
9985   std::string usage;
9986   size_t aUsage = a.code.frontier - a.code.base;
9987   size_t stubsUsage = astubs.code.frontier - astubs.code.base;
9988   size_t tcUsage = TargetCache::s_frontier;
9989   Util::string_printf(usage,
9990                       "tx64: %9zd bytes (%ld%%) in a.code\n"
9991                       "tx64: %9zd bytes (%ld%%) in astubs.code\n"
9992                       "tx64: %9zd bytes (%ld%%) in a.code from ir\n"
9993                       "tx64: %9zd bytes (%ld%%) in astubs.code from ir\n"
9994                       "tx64: %9zd bytes (%ld%%) in targetCache\n",
9995                       aUsage,     100 * aUsage / a.code.size,
9996                       stubsUsage, 100 * stubsUsage / astubs.code.size,
9997                       m_irAUsage,     100 * m_irAUsage / a.code.size,
9998                       m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
9999                       tcUsage,
10000                       100 * tcUsage / TargetCache::tl_targetCaches.size);
10001   return usage;
10002 }
10003
10004 bool TranslatorX64::addDbgGuards(const Unit* unit) {
10005   // TODO refactor
10006   // It grabs the write lease and iterating through whole SrcDB...
10007   bool locked = s_writeLease.acquire(true);
10008   if (!locked) {
10009     return false;
10010   }
10011   struct timespec tsBegin, tsEnd;
10012   gettime(CLOCK_MONOTONIC, &tsBegin);
10013   // Doc says even find _could_ invalidate iterator, in pactice it should
10014   // be very rare, so go with it now.
10015   for (SrcDB::iterator it = m_srcDB.begin(); it != m_srcDB.end(); ++it) {
10016     SrcKey const sk = SrcKey::fromAtomicInt(it->first);
10017     SrcRec& sr = *it->second;
10018     if (sr.unitMd5() == unit->md5() &&
10019         !sr.hasDebuggerGuard() &&
10020         isSrcKeyInBL(unit, sk)) {
10021       addDbgGuardImpl(sk, sr);
10022     }
10023   }
10024   s_writeLease.drop();
10025   gettime(CLOCK_MONOTONIC, &tsEnd);
10026   int64 elapsed = gettime_diff_us(tsBegin, tsEnd);
10027   if (Trace::moduleEnabledRelease(Trace::tx64, 5)) {
10028     Trace::traceRelease("addDbgGuards got lease for %lld us\n", elapsed);
10029   }
10030   return true;
10031 }
10032
10033 bool TranslatorX64::addDbgGuard(const Func* func, Offset offset) {
10034   SrcKey sk(func, offset);
10035   {
10036     if (SrcRec* sr = m_srcDB.find(sk)) {
10037       if (sr->hasDebuggerGuard()) {
10038         return true;
10039       }
10040     } else {
10041       // no translation yet
10042       return true;
10043     }
10044   }
10045   if (debug) {
10046     if (!isSrcKeyInBL(func->unit(), sk)) {
10047       TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
10048       return false;
10049     }
10050   }
10051   bool locked = s_writeLease.acquire(true);
10052   if (!locked) {
10053     return false;
10054   }
10055   {
10056     if (SrcRec* sr = m_srcDB.find(sk)) {
10057       addDbgGuardImpl(sk, *sr);
10058     }
10059   }
10060   s_writeLease.drop();
10061   return true;
10062 }
10063
10064 void TranslatorX64::addDbgGuardImpl(const SrcKey& sk, SrcRec& srcRec) {
10065   TCA dbgGuard = a.code.frontier;
10066   // Emit the checks for debugger attach
10067   emitTLSLoad<ThreadInfo>(a, ThreadInfo::s_threadInfo, rScratch);
10068   static COff dbgOff = offsetof(ThreadInfo, m_reqInjectionData) +
10069                        offsetof(RequestInjectionData, debugger);
10070   a.   load_reg64_disp_reg32(rScratch, dbgOff, rScratch);
10071   a.   test_imm32_reg32(0xff, rScratch);
10072   // Branch to a special REQ_INTERPRET if attached
10073   {
10074     TCA fallback = emitServiceReq(REQ_INTERPRET, 2, uint64_t(sk.offset()), 0);
10075     a. jnz(fallback);
10076   }
10077   // Emit a jump to the actual code
10078   TCA realCode = srcRec.getTopTranslation();
10079   prepareForSmash(kJmpLen);
10080   TCA dbgBranchGuardSrc = a.code.frontier;
10081   a.   jmp(realCode);
10082   // Add it to srcRec
10083   srcRec.addDebuggerGuard(a, astubs, dbgGuard, dbgBranchGuardSrc);
10084 }
10085
10086 bool TranslatorX64::dumpTCCode(const char* filename) {
10087   string aFilename = string(filename).append("_a");
10088   string astubFilename = string(filename).append("_astub");
10089   FILE* aFile = fopen(aFilename.c_str(),"wb");
10090   if (aFile == NULL)
10091     return false;
10092   FILE* astubFile = fopen(astubFilename.c_str(),"wb");
10093   if (astubFile == NULL) {
10094     fclose(aFile);
10095     return false;
10096   }
10097   string helperAddrFilename = string(filename).append("_helpers_addrs.txt");
10098   FILE* helperAddrFile = fopen(helperAddrFilename.c_str(),"wb");
10099   if (helperAddrFile == NULL) {
10100     fclose(aFile);
10101     fclose(astubFile);
10102     return false;
10103   }
10104   // dump starting from the trampolines; this assumes processInit() places
10105   // trampolines before the translation cache
10106   size_t count = a.code.frontier-atrampolines.code.base;
10107   bool result = (fwrite(atrampolines.code.base, 1, count, aFile) == count);
10108   if (result) {
10109     count = astubs.code.frontier - astubs.code.base;
10110     result = (fwrite(astubs.code.base, 1, count, astubFile) == count);
10111   }
10112   if (result) {
10113     for(PointerMap::iterator iter = trampolineMap.begin();
10114         iter != trampolineMap.end();
10115         iter++) {
10116       void* helperAddr = iter->first;
10117       void* trampAddr = iter->second;
10118       char* functionName = Util::getNativeFunctionName(helperAddr);
10119       fprintf(helperAddrFile,"%10p %10p %s\n",
10120               trampAddr, helperAddr,
10121               functionName);
10122       free(functionName);
10123     }
10124   }
10125   fclose(aFile);
10126   fclose(astubFile);
10127   fclose(helperAddrFile);
10128   return result;
10129 }
10130
10131 // Returns true on success
10132 bool TranslatorX64::dumpTC() {
10133   if (!s_writeLease.acquire(true)) return false;
10134   bool success = dumpTCData();
10135   if (success) {
10136     success = dumpTCCode("/tmp/tc_dump");
10137   }
10138   s_writeLease.drop();
10139   return success;
10140 }
10141
10142 // Returns true on success
10143 bool tc_dump(void) {
10144   return TranslatorX64::Get()->dumpTC();
10145 }
10146
10147 // Returns true on success
10148 bool TranslatorX64::dumpTCData() {
10149   gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
10150   if (!tcDataFile) return false;
10151
10152   if (!gzprintf(tcDataFile,
10153                 "repo_schema     = %s\n"
10154                 "a.base          = %p\n"
10155                 "a.frontier      = %p\n"
10156                 "astubs.base     = %p\n"
10157                 "astubs.frontier = %p\n\n",
10158                 Repo::kSchemaId,
10159                 atrampolines.code.base, a.code.frontier,
10160                 astubs.code.base, astubs.code.frontier)) {
10161     return false;
10162   }
10163
10164   if (!gzprintf(tcDataFile, "total_translations = %lu\n\n",
10165                 m_translations.size())) {
10166     return false;
10167   }
10168
10169   for (size_t t = 0; t < m_translations.size(); t++) {
10170     if (gzputs(tcDataFile,
10171                m_translations[t].print(getTransCounter(t)).c_str()) == -1) {
10172       return false;
10173     }
10174   }
10175
10176   gzclose(tcDataFile);
10177   return true;
10178 }
10179
10180 #define NATIVE_OP(X) PLAN(X, Native)
10181 #define SUPPORTED_OP(X) PLAN(X, Supported)
10182 #define SIMPLE_OP(X) PLAN(X, Simple)
10183 #define INTERP_OP(X) PLAN(X, Interp)
10184
10185 #define SUPPORTED_OPS() \
10186   NATIVE_OP(Null) \
10187   NATIVE_OP(True) \
10188   NATIVE_OP(False) \
10189   NATIVE_OP(Int) \
10190   NATIVE_OP(String) \
10191   NATIVE_OP(Array) \
10192   NATIVE_OP(NewArray) \
10193   NATIVE_OP(InitThisLoc) \
10194   NATIVE_OP(Dup) \
10195   NATIVE_OP(FPushContFunc) \
10196   NATIVE_OP(ContDone) \
10197   NATIVE_OP(ContValid) \
10198   NATIVE_OP(ContStopped) \
10199   /*
10200    * Invariably call a possibly-reentrant helper.
10201    */ \
10202   SIMPLE_OP(Jmp) \
10203   SIMPLE_OP(FCall) \
10204   SIMPLE_OP(CreateCont) \
10205   SIMPLE_OP(UnpackCont) \
10206   /*
10207    * Translations with a reentrant helper.
10208    *
10209    * TODO: neither UnboxR nor FPassR can actually call destructors.
10210    */ \
10211   SUPPORTED_OP(UnboxR) \
10212   SUPPORTED_OP(FPassR) \
10213   SUPPORTED_OP(NativeImpl) \
10214   SUPPORTED_OP(UnsetL) \
10215   SUPPORTED_OP(Cns) \
10216   SUPPORTED_OP(ClsCnsD) \
10217   SUPPORTED_OP(This) \
10218   SUPPORTED_OP(PackCont) \
10219   SUPPORTED_OP(ContReceive) \
10220   SUPPORTED_OP(ContRaised) \
10221   SUPPORTED_OP(ContNext) \
10222   SUPPORTED_OP(ContSend) \
10223   SUPPORTED_OP(ContRaise) \
10224   SUPPORTED_OP(ContCurrent) \
10225   SUPPORTED_OP(FPushCtor) \
10226   SUPPORTED_OP(FPushCtorD) \
10227   SUPPORTED_OP(StaticLocInit) \
10228   /*
10229    * Always-interp instructions,
10230    */ \
10231   INTERP_OP(ContHandle)
10232
10233 // Define the trivial analyze methods
10234 #define PLAN(Op, Spt) \
10235 void \
10236 TranslatorX64::analyze ## Op(Tracelet& t, NormalizedInstruction& i) { \
10237   i.m_txFlags = Spt; \
10238 }
10239
10240 SUPPORTED_OPS()
10241
10242 #undef NATIVE_OP
10243 #undef SUPPORTED_OP
10244 #undef SIMPLE_OP
10245 #undef INTERP_OP
10246 #undef SUPPORTED_OPS
10247
10248 void TranslatorX64::invalidateSrcKey(const SrcKey& sk) {
10249   ASSERT(!RuntimeOption::RepoAuthoritative);
10250   ASSERT(s_writeLease.amOwner());
10251   /*
10252    * Reroute existing translations for SrcKey to an as-yet indeterminate
10253    * new one.
10254    */
10255   SrcRec* sr = m_srcDB.find(sk);
10256   ASSERT(sr);
10257   /*
10258    * Since previous translations aren't reachable from here, we know we
10259    * just created some garbage in the TC. We currently have no mechanism
10260    * to reclaim this.
10261    */
10262   sr->replaceOldTranslations(a, astubs);
10263 }
10264
10265 void TranslatorX64::invalidateFileWork(Eval::PhpFile* f) {
10266   class FileInvalidationTrigger : public Treadmill::WorkItem {
10267     Eval::PhpFile* m_f;
10268     int m_nRefs;
10269   public:
10270     FileInvalidationTrigger(Eval::PhpFile* f, int n) : m_f(f), m_nRefs(n) { }
10271     virtual void operator()() {
10272       if (m_f->decRef(m_nRefs) == 0) {
10273         Eval::FileRepository::onDelete(m_f);
10274       }
10275     }
10276   };
10277   size_t nSmashed = m_srcDB.invalidateCode(f);
10278   if (nSmashed) {
10279     // The srcDB found an entry for this file. The entry's dependency
10280     // on this file was counted as a reference, and the code is no longer
10281     // reachable. We need to wait until the last outstanding request
10282     // drains to know that we can really remove the reference.
10283     Treadmill::WorkItem::enqueue(new FileInvalidationTrigger(f, nSmashed));
10284   }
10285 }
10286
10287 bool TranslatorX64::invalidateFile(Eval::PhpFile* f) {
10288   // This is called from high rank, but we'll need the write lease to
10289   // invalidate code.
10290   if (!RuntimeOption::EvalJit) return false;
10291   ASSERT(f != NULL);
10292   PendQ::defer(new DeferredFileInvalidate(f));
10293   return true;
10294 }
10295
10296 } // HPHP::VM::Transl
10297
10298 static const Trace::Module TRACEMOD = Trace::tx64;
10299
10300 void invalidatePath(const std::string& path) {
10301   TRACE(1, "invalidatePath: abspath %s\n", path.c_str());
10302   PendQ::defer(new DeferredPathInvalidate(path));
10303 }
10304
10305 } } // HPHP::VM