src/runtime/vm/translator/translator-x64.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com)         |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include <stdint.h>
  17 #include <assert.h>
  18 #include <unistd.h>
  19 #include <sys/mman.h>
  20 #include <strstream>
  21 #include <stdio.h>
  22 #include <stdarg.h>
  23 #include <strings.h>
  24 #include <string>
  25 #include <queue>
  26 #include <zlib.h>
  27 #include <unwind.h>
  28
  29 #ifdef __FreeBSD__
  30 # include <ucontext.h>
  31 typedef __sighandler_t *sighandler_t;
  32 # define RIP_REGISTER(v) (v).mc_rip
  33 #else
  34 # define RIP_REGISTER(v) (v).gregs[REG_RIP]
  35 #endif
  36
  37 #include <boost/bind.hpp>
  38 #include <boost/optional.hpp>
  39 #include <boost/utility/typed_in_place_factory.hpp>
  40 #include <boost/scoped_ptr.hpp>
  41
  42 #include <util/pathtrack.h>
  43 #include <util/trace.h>
  44 #include <util/bitops.h>
  45 #include <util/debug.h>
  46 #include <util/ringbuffer.h>
  47 #include <util/rank.h>
  48 #include <util/timer.h>
  49 #include <util/maphuge.h>
  50
  51 #include <runtime/base/tv_macros.h>
  52 #include <runtime/vm/bytecode.h>
  53 #include <runtime/vm/php_debug.h>
  54 #include <runtime/vm/runtime.h>
  55 #include <runtime/base/complex_types.h>
  56 #include <runtime/base/execution_context.h>
  57 #include <runtime/base/strings.h>
  58 #include <runtime/base/zend/zend_string.h>
  59 #include <runtime/base/runtime_option.h>
  60 #include <runtime/base/server/source_root_info.h>
  61 #include <runtime/ext/ext_continuation.h>
  62 #include <runtime/vm/debug/debug.h>
  63 #include <runtime/vm/translator/targetcache.h>
  64 #include <runtime/vm/translator/log.h>
  65 #include <runtime/vm/translator/translator-deps.h>
  66 #include <runtime/vm/translator/translator-inline.h>
  67 #include <runtime/vm/translator/translator-x64.h>
  68 #include <runtime/vm/translator/asm-x64.h>
  69 #include <runtime/vm/translator/srcdb.h>
  70 #include <runtime/vm/translator/x64-util.h>
  71 #include <runtime/vm/translator/unwind-x64.h>
  72 #include <runtime/vm/pendq.h>
  73 #include <runtime/vm/treadmill.h>
  74 #include <runtime/vm/stats.h>
  75 #include <runtime/vm/pendq.h>
  76 #include <runtime/vm/treadmill.h>
  77 #include <runtime/vm/repo.h>
  78 #include <runtime/vm/type-profile.h>
  79 #include <runtime/vm/member_operations.h>
  80 #include <runtime/vm/translator/abi-x64.h>
  81 #include <runtime/eval/runtime/file_repository.h>
  82 #include <runtime/vm/translator/hopt/ir.h>
  83 #include <runtime/vm/translator/hopt/linearscan.h>
  84 #include <runtime/vm/translator/hopt/opt.h>
  85 #include <runtime/vm/translator/hopt/codegen.h>
  86
  87 #include <runtime/vm/translator/translator-x64-internal.h>
  88
  89 namespace HPHP {
  90 namespace VM {
  91 namespace Transl {
  92
  93 using namespace reg;
  94 using namespace Util;
  95 using namespace Trace;
  96 using std::max;
  97
  98 #define TRANS_PERF_COUNTERS \
  99   TPC(translate) \
 100   TPC(retranslate) \
 101   TPC(interp_bb) \
 102   TPC(interp_instr) \
 103   TPC(interp_one) \
 104   TPC(max_trans) \
 105   TPC(enter_tc) \
 106   TPC(service_req)
 107
 108 #define TPC(n) "trans_" #n,
 109 static const char* const kPerfCounterNames[] = {
 110   TRANS_PERF_COUNTERS
 111 };
 112 #undef TPC
 113
 114 #define TPC(n) tpc_ ## n,
 115 enum TransPerfCounter {
 116   TRANS_PERF_COUNTERS
 117   tpc_num_counters
 118 };
 119 #undef TPC
 120 static __thread int64 s_perfCounters[tpc_num_counters];
 121 #define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
 122
 123 #define NULLCASE() \
 124   case KindOfUninit: case KindOfNull
 125
 126 #define STRINGCASE() \
 127   case BitwiseKindOfString: case KindOfStaticString
 128
 129 // nextTx64: Global shared state. The tx64 that should be used for
 130 // new requests going forward.
 131 TranslatorX64* volatile nextTx64;
 132 // tx64: Thread-local state. The tx64 we're using for the current request.
 133 __thread TranslatorX64* tx64;
 134
 135 // Register dirtiness: thread-private.
 136 __thread VMRegState tl_regState = REGSTATE_CLEAN;
 137
 138 __thread UnlikelyHitMap* tl_unlikelyHits = nullptr;
 139 static StaticString s___call(LITSTR_INIT("__call"));
 140 static StaticString s___callStatic(LITSTR_INIT("__callStatic"));
 141
 142 // Initialize at most this many locals inline in function body prologue; more
 143 // than this, and emitting a loop is more compact. To be precise, the actual
 144 // crossover point in terms of code size is 6; 9 was determined by experiment to
 145 // be the optimal point in certain benchmarks. #microoptimization
 146 static const int kLocalsToInitializeInline = 9;
 147
 148 // An intentionally funny-looking-in-core-dumps constant for uninitialized
 149 // instruction pointers.
 150 static const uint64_t kUninitializedRIP = 0xba5eba11acc01ade;
 151
 152 static int
 153 localOffset(int loc) {
 154   PhysReg base;
 155   int offset;
 156   locToRegDisp(Location(Location::Local, loc), &base, &offset);
 157   ASSERT(base == rVmFp);
 158   return offset;
 159 }
 160
 161 // Return the SrcKey for the operation that should follow the supplied
 162 // NormalizedInstruction.  (This might not be the next SrcKey in the
 163 // unit if we merged some instructions or otherwise modified them
 164 // during analysis.)
 165 SrcKey nextSrcKey(const Tracelet& t, const NormalizedInstruction& i) {
 166   return i.next ? i.next->source : t.m_nextSk;
 167 }
 168
 169 // JccBlock --
 170 //   A raw condition-code block; assumes whatever comparison or ALU op
 171 //   that sets the Jcc has already executed.
 172 template <int Jcc>
 173 struct JccBlock {
 174   mutable X64Assembler* m_a;
 175   TCA m_jcc8;
 176   mutable DiamondGuard* m_dg;
 177
 178   explicit JccBlock(X64Assembler& a)
 179     : m_a(&a),
 180       m_jcc8(a.code.frontier),
 181       m_dg(new DiamondGuard(a)) {
 182     a.    jcc8(Jcc, m_a->code.frontier);
 183   }
 184
 185   ~JccBlock() {
 186     if (m_a) {
 187       delete m_dg;
 188       m_a->patchJcc8(m_jcc8, m_a->code.frontier);
 189     }
 190   }
 191
 192 private:
 193   JccBlock(const JccBlock&);
 194   JccBlock& operator=(const JccBlock&);
 195 };
 196
 197 // IfElseBlock: like CondBlock, but with an else clause.
 198 //    a.   test_reg_reg(rax, rax);
 199 //    {  IfElseBlock<CC_Z> ifRax(a);
 200 //       // Code executed for rax != 0
 201 //       ifRax.Else();
 202 //       // Code executed for rax == 0
 203 //    }
 204 //
 205 template <int Jcc>
 206 class IfElseBlock : boost::noncopyable {
 207   X64Assembler& m_a;
 208   TCA m_jcc8;
 209   TCA m_jmp8;
 210  public:
 211   explicit IfElseBlock(X64Assembler& a) :
 212     m_a(a), m_jcc8(a.code.frontier), m_jmp8(NULL) {
 213     tx64->m_regMap.freeze();
 214     m_a.jcc8(Jcc, m_a.code.frontier);  // 1f
 215   }
 216   void Else() {
 217     ASSERT(m_jmp8 == NULL);
 218     m_jmp8 = m_a.code.frontier;
 219     m_a.jmp8(m_jmp8); // 2f
 220     // 1:
 221     m_a.patchJcc8(m_jcc8, m_a.code.frontier);
 222   }
 223   ~IfElseBlock() {
 224     ASSERT(m_jmp8 != NULL);
 225     // 2:
 226     m_a.patchJmp8(m_jmp8, m_a.code.frontier);
 227     tx64->m_regMap.defrost();
 228   }
 229 };
 230
 231 static bool
 232 typeCanBeStatic(DataType t) {
 233   return t != KindOfObject && t != KindOfRef;
 234 }
 235
 236 // IfCountNotStatic --
 237 //   Emits if (%reg->_count != RefCountStaticValue) { ... }.
 238 //   May short-circuit this check if the type is known to be
 239 //   static already.
 240 struct IfCountNotStatic {
 241   typedef CondBlock<TVOFF(_count),
 242                     RefCountStaticValue,
 243                     CC_Z> NonStaticCondBlock;
 244   NonStaticCondBlock *m_cb; // might be null
 245   IfCountNotStatic(X64Assembler& a,
 246                    PhysReg reg,
 247                    DataType t = KindOfInvalid) {
 248     // Objects and variants cannot be static
 249     if (typeCanBeStatic(t)) {
 250       m_cb = new NonStaticCondBlock(a, reg);
 251     } else {
 252       m_cb = NULL;
 253     }
 254   }
 255
 256   ~IfCountNotStatic() {
 257     delete m_cb;
 258   }
 259 };
 260
 261 // Segfault handler: figure out if it's an intentional segfault
 262 // (timeout exception) and if so, act appropriately. Otherwise, pass
 263 // the signal on.
 264 void TranslatorX64::SEGVHandler(int signum, siginfo_t *info, void *ctx) {
 265   TranslatorX64 *self = Get();
 266   void *surprisePage =
 267     ThreadInfo::s_threadInfo->m_reqInjectionData.surprisePage;
 268   if (info->si_addr == surprisePage) {
 269     ucontext_t *ucontext = (ucontext_t*)ctx;
 270     TCA rip = (TCA)RIP_REGISTER(ucontext->uc_mcontext);
 271     SignalStubMap::const_accessor a;
 272     if (!self->m_segvStubs.find(a, rip)) {
 273       NOT_REACHED();
 274     }
 275     TCA astubsCall = a->second;
 276
 277     // When this handler returns, "call" the astubs code for this
 278     // surprise check.
 279     RIP_REGISTER(ucontext->uc_mcontext) = (uintptr_t)astubsCall;
 280
 281     // We've processed this event; reset the page in case execution
 282     // continues normally.
 283     g_vmContext->m_stack.unprotect();
 284   } else {
 285     sighandler_t handler = (sighandler_t)self->m_segvChain;
 286     if (handler == SIG_DFL || handler == SIG_IGN) {
 287       signal(signum, handler);
 288       raise(signum);
 289     } else {
 290       self->m_segvChain(signum, info, ctx);
 291     }
 292   }
 293 }
 294
 295 /*
 296  * Copy a heap cell from memory to the stack.
 297  *
 298  * Use emitCopyToStack when you can safely change the state of the
 299  * register map.  When using emitCopyToStackRegSafe, you'll need to
 300  * invalidate the stack location manually at an appropriate time.
 301  */
 302
 303 void
 304 TranslatorX64::emitCopyToStackRegSafe(X64Assembler& a,
 305                                       const NormalizedInstruction& ni,
 306                                       PhysReg src,
 307                                       int off,
 308                                       PhysReg tmpReg) {
 309   ASSERT(off % sizeof(Cell) == 0);
 310   emitCopyTo(a, src, 0, rVmSp, vstackOffset(ni, off), tmpReg);
 311 }
 312
 313 void
 314 TranslatorX64::emitCopyToStack(X64Assembler& a,
 315                                const NormalizedInstruction& ni,
 316                                PhysReg src,
 317                                int off) {
 318   ScratchReg scratch(m_regMap);
 319   {
 320     FreezeRegs freeze(m_regMap);
 321     emitCopyToStackRegSafe(a, ni, src, off, *scratch);
 322   }
 323   // Forget whatever we thought we knew about the stack.
 324   m_regMap.invalidate(ni.outStack->location);
 325 }
 326
 327 /*
 328  * Emit code that does the same thing as tvSet().
 329  *
 330  * The `oldType' and `oldData' registers are used for temporary
 331  * storage and unconditionally destroyed.
 332  * `toPtr' will be destroyed iff the cell we're storing to is
 333  * KindOfRef.
 334  * The variant check will not be performed if toOffset is nonzero, so
 335  * only pass a nonzero offset if you know the destination is not
 336  * KindOfRef.
 337  * `from' will not be modified.
 338  */
 339 void TranslatorX64::emitTvSetRegSafe(const NormalizedInstruction& i,
 340                                      PhysReg from,
 341                                      DataType fromType,
 342                                      PhysReg toPtr,
 343                                      int toOffset,
 344                                      PhysReg oldType,
 345                                      PhysReg oldData,
 346                                      bool incRefFrom) {
 347   ASSERT(!i.isNative());
 348   ASSERT(!i.isSimple());
 349   ASSERT(fromType != KindOfRef);
 350
 351   if (toOffset == 0) {
 352     emitDerefIfVariant(a, toPtr);
 353   }
 354   a.  load_reg64_disp_reg32(toPtr, toOffset + TVOFF(m_type), oldType);
 355   a.  load_reg64_disp_reg64(toPtr, toOffset + TVOFF(m_data), oldData);
 356   emitStoreTypedValue(a, fromType, from, toOffset, toPtr);
 357   if (incRefFrom) {
 358     emitIncRef(from, fromType);
 359   }
 360   emitDecRefGenericReg(oldData, oldType);
 361 }
 362
 363 void TranslatorX64::emitTvSet(const NormalizedInstruction& i,
 364                               PhysReg from,
 365                               DataType fromType,
 366                               PhysReg toPtr,
 367                               int toOffset,
 368                               bool incRefFrom) {
 369   ScratchReg oldType(m_regMap);
 370   ScratchReg oldData(m_regMap);
 371   emitTvSetRegSafe(i, from, fromType, toPtr, toOffset,
 372                    *oldType, *oldData, incRefFrom);
 373 }
 374
 375 // Logical register move: ensures the value in src will be in dest
 376 // after execution, but might do so in strange ways. Do not count on
 377 // being able to smash dest to a different register in the future, e.g.
 378 void
 379 TranslatorX64::emitMovRegReg(X64Assembler& a, PhysReg src, PhysReg dest) {
 380   SpaceRecorder("_RegMove", a);
 381   if (src != dest) {
 382     a.  mov_reg64_reg64(src, dest);
 383   }
 384 }
 385
 386 void
 387 TranslatorX64::emitMovRegReg(PhysReg src, PhysReg dest) {
 388   emitMovRegReg(a, src, dest);
 389 }
 390
 391 /*
 392  * emitPushAR --
 393  *
 394  *   Push an activation record. Will return to the next instruction emitted by
 395  *   the invoker. Called on behalf of FPushFuncD and FPushFunc. If func is
 396  *   unknown, we will leave it to the caller to fill in m_func.
 397  */
 398 void
 399 TranslatorX64::emitPushAR(const NormalizedInstruction& i, const Func* func,
 400                           const int bytesPopped /* = 0 */,
 401                           bool isCtor /* = false */,
 402                           bool clearThis /* = true */,
 403                           uintptr_t varEnvInvName /* = 0 */) {
 404   if (func && phpBreakpointEnabled(func->name()->data())) {
 405     translator_debug_break(a);
 406   }
 407   ASSERT(sizeof(Cell) < sizeof(ActRec));
 408   // We are about to push an ActRec onto the stack. The stack grows down,
 409   // so the offset of the beginning of the ActRec from the top of stack
 410   // is -sizeof(ActRec).
 411   int numArgs = i.imm[0].u_IVA;
 412   int startOfActRec = bytesPopped - sizeof(ActRec);
 413   size_t funcOff     = startOfActRec + AROFF(m_func);
 414   size_t thisOff     = startOfActRec + AROFF(m_this);
 415   size_t nargsOff    = startOfActRec + AROFF(m_numArgsAndCtorFlag);
 416   size_t varenvOff   = startOfActRec + AROFF(m_varEnv);
 417   size_t savedRbpOff = startOfActRec + AROFF(m_savedRbp);
 418
 419   BOOST_STATIC_ASSERT((
 420     sizeof(((ActRec*)NULL)->m_numArgsAndCtorFlag) == sizeof(int32_t)
 421   ));
 422   /*
 423    * rVmSp might not be up-to-date here, so we use emitVStackStore and
 424    * emitVStackStoreImm which know how to compute the where the top of
 425    * stack currently is.
 426    */
 427   if (func) {
 428     emitVStackStoreImm(a, i, (uintptr_t)func, funcOff);
 429     if (clearThis) {
 430       emitVStackStoreImm(a, i, 0,               thisOff, sz::qword, &m_regMap);
 431     }
 432   }
 433   emitVStackStoreImm(a, i,   ActRec::encodeNumArgs(numArgs, isCtor),
 434                                               nargsOff, sz::dword);
 435   emitVStackStoreImm(a, i,   varEnvInvName,   varenvOff, sz::qword, &m_regMap);
 436   emitVStackStore(a, i,      rVmFp,           savedRbpOff, sz::qword);
 437 }
 438
 439 void
 440 TranslatorX64::emitCallSaveRegs() {
 441   ASSERT(!m_regMap.frozen());
 442   m_regMap.cleanRegs(kCallerSaved);
 443 }
 444
 445 static void UNUSED tc_debug_print(const char* message,
 446                            uintptr_t r1,
 447                            uintptr_t r2,
 448                            uintptr_t r3,
 449                            ActRec* fp) {
 450   TRACE(1, "*********************** %s: %p %p %p  (for : %s)\n",
 451            message, (void*)r1, (void*)r2, (void*)r3,
 452            fp->m_func ? fp->m_func->fullName()->data() : "[?]");
 453 }
 454
 455 // Utility for debugging translations that will print a message,
 456 // followed by the value of up to three registers.
 457 void TranslatorX64::emitDebugPrint(Asm& a,
 458                                    const char* message,
 459                                    PhysReg r1,
 460                                    PhysReg r2,
 461                                    PhysReg r3) {
 462   boost::optional<PhysRegSaver> aSaver;
 463   boost::optional<PhysRegSaverStub> astubsSaver;
 464
 465   if (&a == &this->a) {
 466     aSaver = boost::in_place<PhysRegSaver>(boost::ref(a), kAllX64Regs);
 467   } else {
 468     astubsSaver = boost::in_place<PhysRegSaverStub>(boost::ref(a),
 469       kAllX64Regs);
 470   }
 471
 472   a.  mov_imm64_reg  (uintptr_t(message), argNumToRegName[0]);
 473   a.  mov_reg64_reg64(r1, argNumToRegName[1]);
 474   a.  mov_reg64_reg64(r2, argNumToRegName[2]);
 475   a.  mov_reg64_reg64(r3, argNumToRegName[3]);
 476   a.  mov_reg64_reg64(rVmFp, argNumToRegName[4]);
 477   a.  call((TCA)tc_debug_print);
 478 }
 479
 480 void ArgManager::cleanLocs() {
 481   for (size_t i = 0; i < m_args.size(); ++i) {
 482     // We only need to clean locations we are passing the address of.
 483     // (ArgLoc passes the value in the register mapped for a given
 484     // location, not the address of the location itself, so it doesn't
 485     // need cleaning here.)
 486     if (m_args[i].m_kind != ArgContent::ArgLocAddr) continue;
 487     m_tx64.m_regMap.cleanLoc(*m_args[i].m_loc);
 488   }
 489 }
 490
 491 void ArgManager::computeUsed(std::map<PhysReg, size_t> &used,
 492                              std::vector<PhysReg> &actual) {
 493   size_t n = m_args.size();
 494   for (size_t i = 0; i < n; i++) {
 495     PhysReg reg = InvalidReg;
 496     if (m_args[i].m_kind == ArgContent::ArgReg ||
 497         m_args[i].m_kind == ArgContent::ArgRegPlus) {
 498       reg = m_args[i].m_reg;
 499     } else if (m_args[i].m_kind == ArgContent::ArgLoc ||
 500                m_args[i].m_kind == ArgContent::ArgDeref) {
 501       reg = m_tx64.getReg(*m_args[i].m_loc);
 502     } else {
 503       continue;
 504     }
 505     TRACE(6, "ArgManager: arg %zd incoming reg r%d\n", i, reg);
 506     used[reg] = i;
 507     actual[i] = reg;
 508   }
 509 }
 510
 511 void
 512 TranslatorX64::emitRB(X64Assembler& a,
 513                       RingBufferType t,
 514                       SrcKey sk, RegSet toSave) {
 515   if (!Trace::moduleEnabledRelease(Trace::tx64, 5)) {
 516     return;
 517   }
 518   PhysRegSaver rs(a, toSave | kSpecialCrossTraceRegs);
 519   int arg = 0;
 520   emitImmReg(a, t, argNumToRegName[arg++]);
 521   emitImmReg(a, sk.m_funcId, argNumToRegName[arg++]);
 522   emitImmReg(a, sk.m_offset, argNumToRegName[arg++]);
 523   a.    call((TCA)ringbufferEntry);
 524 }
 525
 526 void
 527 TranslatorX64::emitRB(X64Assembler& a,
 528                       RingBufferType t,
 529                       const char* msg,
 530                       RegSet toSave) {
 531   if (!Trace::moduleEnabledRelease(Trace::tx64, 5)) {
 532     return;
 533   }
 534   PhysRegSaver save(a, toSave | kSpecialCrossTraceRegs);
 535   int arg = 0;
 536   emitImmReg(a, (uintptr_t)msg, argNumToRegName[arg++]);
 537   emitImmReg(a, strlen(msg), argNumToRegName[arg++]);
 538   emitImmReg(a, t, argNumToRegName[arg++]);
 539   a.    call((TCA)ringbufferMsg);
 540 }
 541
 542 /*
 543  * allocate the input registers for i, trying to
 544  * match inputs to call arguments.
 545  * if args[j] == ArgDontAllocate, the input i.inputs[j] is skipped
 546  * if args[j] == ArgAnyReg, it will be allocated as normal
 547  * otherwise, args[j] should be a positional call argument,
 548  * and allocInputsForCall will attempt to allocate it to
 549  * argNumToRegName[args[j]].
 550  */
 551 void
 552 TranslatorX64::allocInputsForCall(const NormalizedInstruction& i,
 553                                   const int* args) {
 554   RegSet blackList;
 555   int arg;
 556   /*
 557    * If any of the inputs is already in an argument
 558    * register, blacklist it. ArgManager already takes
 559    * care of shuffling registers efficiently
 560    */
 561   for (arg = i.inputs.size(); arg--; ) {
 562     if (args[arg] != ArgDontAllocate &&
 563         m_regMap.hasReg(i.inputs[arg]->location)) {
 564       blackList |= RegSet(getReg(i.inputs[arg]->location));
 565     }
 566   }
 567   bool hasAnyReg = false;
 568   for (arg = i.inputs.size(); arg--; ) {
 569     if (args[arg] != ArgAnyReg) {
 570       if (args[arg] != ArgDontAllocate &&
 571         !m_regMap.hasReg(i.inputs[arg]->location)) {
 572         PhysReg target = argNumToRegName[args[arg]];
 573         if (!blackList.contains(target)) {
 574           m_regMap.cleanRegs(RegSet(target));
 575           m_regMap.smashRegs(RegSet(target));
 576         } else {
 577           target = InvalidReg;
 578         }
 579         m_regMap.allocInputReg(i, arg, target);
 580       }
 581     } else {
 582       hasAnyReg = true;
 583     }
 584   }
 585   if (hasAnyReg) {
 586     for (arg = i.inputs.size(); arg--; ) {
 587       if (args[arg] == ArgAnyReg) {
 588         m_regMap.allocInputReg(i, arg);
 589       }
 590     }
 591   }
 592 }
 593
 594 void ArgManager::shuffleRegisters(std::map<PhysReg, size_t> &used,
 595                                   std::vector<PhysReg> &actual) {
 596   size_t n = m_args.size();
 597   for (size_t i = 0; i < n; i++) {
 598     if (actual[i] == InvalidReg)
 599       continue;
 600
 601     if (!mapContains(used, argNumToRegName[i])) {
 602       // There's no conflict, so just copy
 603       TRACE(6, "ArgManager: arg %zd reg available, copying from r%d to r%d\n",
 604             i, actual[i], argNumToRegName[i]);
 605       // Do copy and data structure update here, because this way
 606       // we can reuse the register in actual[i] later without problems.
 607       m_tx64.emitMovRegReg(m_a, actual[i], argNumToRegName[i]);
 608       used.erase(actual[i]);
 609       actual[i] = argNumToRegName[i];
 610     } else {
 611       size_t j = used[argNumToRegName[i]];
 612       if (actual[j] != actual[i]) {
 613         // The register is used by some other value, so we must swap the two
 614         // registers.
 615         ASSERT(j > i);
 616         ASSERT(actual[j] != InvalidReg);
 617         PhysReg ri = actual[i],
 618                 rj = actual[j];
 619         TRACE(6, "ArgManager: arg %zd register used by arg %zd, "
 620                  "swapping r%d with r%d\n", i, j, ri, rj);
 621
 622         // Clean the registers first
 623         RegSet regs = RegSet(ri) | RegSet(rj);
 624         m_tx64.m_regMap.cleanRegs(regs);
 625
 626         // Emit the actual swap
 627         m_tx64.m_regMap.swapRegisters(ri, rj);
 628         m_a.  xchg_reg64_reg64(ri, rj);
 629
 630         // Update the data structure for later steps
 631         for (size_t k = 0; k < n; k++) {
 632           if (actual[k] == ri) {
 633             actual[k] = rj;
 634           } else if (actual[k] == rj) {
 635             actual[k] = ri;
 636           }
 637         }
 638         used[ri] = j;
 639         used[rj] = i;
 640       }
 641     }
 642   }
 643 }
 644
 645 void ArgManager::emitValues(std::vector<PhysReg> &actual) {
 646   for (size_t i = 0; i < m_args.size(); i++) {
 647     switch(m_args[i].m_kind) {
 648     case ArgContent::ArgLoc:
 649     case ArgContent::ArgDeref:
 650     case ArgContent::ArgReg:
 651       TRACE(6, "ArgManager: copying arg %zd from r%d to r%d\n",
 652             i, actual[i], argNumToRegName[i]);
 653       m_tx64.emitMovRegReg(m_a, actual[i], argNumToRegName[i]);
 654       // Emit dereference if needed
 655       if (m_args[i].m_kind == ArgContent::ArgDeref) {
 656         emitDeref(m_a, argNumToRegName[i], argNumToRegName[i]);
 657       }
 658       break;
 659
 660     // For any of these cases, the register should already be available.
 661     // If it was used previously by an input value, shuffleRegisters
 662     // should have moved it to the proper register from argNumToRegName.
 663     case ArgContent::ArgImm:
 664       emitImmReg(m_a, m_args[i].m_imm, argNumToRegName[i]);
 665       break;
 666
 667     case ArgContent::ArgRegPlus:
 668       if (m_args[i].m_imm) {
 669         m_a.  add_imm32_reg64(m_args[i].m_imm, argNumToRegName[i]);
 670       }
 671       break;
 672
 673     case ArgContent::ArgLocAddr:
 674       {
 675         PhysReg base;
 676         int disp;
 677         locToRegDisp(*m_args[i].m_loc, &base, &disp);
 678         m_a.  lea_reg64_disp_reg64(base, disp, argNumToRegName[i]);
 679       }
 680       break;
 681
 682     default:
 683       // Should never happen
 684       ASSERT(false);
 685     }
 686   }
 687 }
 688
 689 void
 690 TranslatorX64::emitCall(X64Assembler& a, TCA dest, bool killRegs) {
 691   if (a.jmpDeltaFits(dest) && !Stats::enabled()) {
 692     a.    call(dest);
 693   } else {
 694     a.    call(getNativeTrampoline(dest));
 695   }
 696   if (killRegs) {
 697     // All caller-saved regs are now suspect.
 698     m_regMap.smashRegs(kCallerSaved);
 699   }
 700 }
 701
 702 void
 703 TranslatorX64::recordSyncPoint(X64Assembler& a, Offset pcOff, Offset spOff) {
 704   m_pendingFixups.push_back(PendingFixup(a.code.frontier,
 705                                          Fixup(pcOff, spOff)));
 706 }
 707
 708 void
 709 TranslatorX64::recordCall(Asm& a, const NormalizedInstruction& i) {
 710   recordCallImpl<false>(a, i);
 711 }
 712
 713 void
 714 TranslatorX64::recordCall(const NormalizedInstruction& i) {
 715   recordCall(a, i);
 716 }
 717
 718 template <bool reentrant>
 719 void
 720 TranslatorX64::recordCallImpl(X64Assembler& a,
 721                               const NormalizedInstruction& i,
 722                               bool advance /* = false */) {
 723   SrcKey sk = i.source;
 724   Offset stackOff = i.stackOff + (vmfp() - vmsp());
 725   if (advance) {
 726     sk.advance(curUnit());
 727     stackOff += getStackDelta(i);
 728   }
 729   ASSERT(i.checkedInputs ||
 730          (reentrant && !i.isSimple()) ||
 731          (!reentrant && !i.isNative()));
 732   Offset pcOff = sk.offset() - curFunc()->base();
 733   SKTRACE(2, sk, "record%sCall pcOff %d\n",
 734              reentrant ? "Reentrant" : "", int(pcOff));
 735   recordSyncPoint(a, pcOff, stackOff);
 736   SKTRACE(2, sk, "record%sCall stackOff %d\n",
 737              reentrant ? "Reentrant" : "", int(stackOff));
 738
 739   /*
 740    * Right now we assume call sites that need to record sync points
 741    * may also throw exceptions.  We record information about dirty
 742    * callee-saved registers so we can spill their contents during
 743    * unwinding.  See unwind-x64.cpp.
 744    */
 745   if (!m_pendingUnwindRegInfo.empty()) {
 746     if (Trace::moduleLevel(Trace::tunwind) >= 2) {
 747       sk.trace("recordCallImpl has dirty callee-saved regs\n");
 748       TRACE_MOD(Trace::tunwind, 2,
 749                    "CTCA: %p saving dirty callee regs:\n",
 750                    a.code.frontier);
 751       for (int i = 0; i < UnwindRegInfo::kMaxCalleeSaved; ++i) {
 752         if (m_pendingUnwindRegInfo.m_regs[i].dirty) {
 753           TRACE_MOD(Trace::tunwind, 2, "  %s\n",
 754                     m_pendingUnwindRegInfo.m_regs[i].pretty().c_str());
 755         }
 756       }
 757     }
 758     m_unwindRegMap.insert(a.code.frontier, m_pendingUnwindRegInfo);
 759     m_pendingUnwindRegInfo.clear();
 760   }
 761 }
 762
 763 void TranslatorX64::prepareCallSaveRegs() {
 764   emitCallSaveRegs(); // Clean caller-saved regs.
 765   m_pendingUnwindRegInfo.clear();
 766
 767   RegSet rset = kCalleeSaved;
 768   PhysReg reg;
 769   while (rset.findFirst(reg)) {
 770     rset.remove(reg);
 771     if (!m_regMap.regIsDirty(reg)) continue;
 772     const RegInfo* ri = m_regMap.getInfo(reg);
 773     ASSERT(ri->m_cont.m_kind == RegContent::Loc);
 774
 775     // If the register is dirty, we'll record this so that we can
 776     // restore it during stack unwinding if an exception is thrown.
 777     m_pendingUnwindRegInfo.add(reg, ri->m_type, ri->m_cont.m_loc);
 778   }
 779 }
 780
 781 void
 782 TranslatorX64::emitIncRef(PhysReg base, DataType dtype) {
 783   if (!IS_REFCOUNTED_TYPE(dtype) && dtype != KindOfInvalid) {
 784     return;
 785   }
 786   ASSERT(m_regMap.getInfo(base));
 787   SpaceRecorder sr("_IncRef", a);
 788   ASSERT(sizeof(((Cell*)NULL)->_count == sizeof(int32_t)));
 789   { // if !static then
 790     IfCountNotStatic ins(a, base, dtype);
 791     /*
 792      * The optimization guide cautions against using inc; while it is
 793      * compact, it only writes the low-order 8 bits of eflags, causing a
 794      * partial dependency for any downstream flags-dependent code.
 795      */
 796     a.    add_imm32_disp_reg32(1, TVOFF(_count), base);
 797   } // endif
 798 }
 799
 800 void
 801 TranslatorX64::emitIncRefGenericRegSafe(PhysReg base,
 802                                         int disp,
 803                                         PhysReg tmpReg) {
 804   ASSERT(m_regMap.getInfo(base));
 805   { // if RC
 806     IfRefCounted irc(a, base, disp);
 807     a.    load_reg64_disp_reg64(base, disp + TVOFF(m_data),
 808                                 tmpReg);
 809     { // if !static
 810       IfCountNotStatic ins(a, tmpReg);
 811       a.  add_imm32_disp_reg32(1, TVOFF(_count), tmpReg);
 812     } // endif
 813   } // endif
 814 }
 815
 816 void TranslatorX64::emitIncRefGeneric(PhysReg base, int disp) {
 817   ScratchReg tmpReg(m_regMap);
 818   emitIncRefGenericRegSafe(base, disp, *tmpReg);
 819 }
 820
 821 static void emitGetGContext(X64Assembler& a, PhysReg dest) {
 822   emitTLSLoad<ExecutionContext>(a, g_context, dest);
 823 }
 824
 825 // emitEagerVMRegSave --
 826 //   Inline. Saves regs in-place in the TC. This is an unusual need;
 827 //   you probably want to lazily save these regs via recordCall and
 828 //   its ilk.
 829 //
 830 //   SaveFP uses rVmFp, as usual. SavePC requires the caller to have
 831 //   placed the PC offset of the instruction about to be executed in
 832 //   rdi.
 833 enum RegSaveFlags {
 834   SaveFP = 1,
 835   SavePC = 2
 836 };
 837
 838 static TCA
 839 emitEagerVMRegSave(X64Assembler& a,
 840                    int flags /* :: RegSaveFlags */) {
 841   TCA start = a.code.frontier;
 842   bool saveFP = bool(flags & SaveFP);
 843   bool savePC = bool(flags & SavePC);
 844   ASSERT((flags & ~(SavePC | SaveFP)) == 0);
 845
 846   PhysReg pcReg = rdi;
 847   PhysReg rEC = rScratch;
 848   ASSERT(!kSpecialCrossTraceRegs.contains(rdi));
 849
 850   emitGetGContext(a, rEC);
 851
 852   static COff spOff = offsetof(VMExecutionContext, m_stack) +
 853     Stack::topOfStackOffset();
 854   static COff fpOff = offsetof(VMExecutionContext, m_fp) - spOff;
 855   static COff pcOff = offsetof(VMExecutionContext, m_pc) - spOff;
 856
 857   ASSERT(spOff != 0);
 858   // Instruction selection note: this is an lea, but add is more
 859   // compact and we can afford the flags bash.
 860   a.    add_imm32_reg64(spOff, rEC);
 861   a.    store_reg64_disp_reg64 (rVmSp, 0, rEC);
 862   if (savePC) {
 863     // We're going to temporarily abuse rVmSp to hold the current unit.
 864     PhysReg rBC = rVmSp;
 865     a.  pushr(rBC);
 866     // m_fp -> m_func -> m_unit -> m_bc + pcReg
 867     a.  load_reg64_disp_reg64(rVmFp, AROFF(m_func), rBC);
 868     a.  load_reg64_disp_reg64(rBC, Func::unitOff(), rBC);
 869     a.  load_reg64_disp_reg64(rBC, Unit::bcOff(), rBC);
 870     a.  add_reg64_reg64(rBC, pcReg);
 871     a.  store_reg64_disp_reg64(pcReg, pcOff, rEC);
 872     a.  popr(rBC);
 873   }
 874   if (saveFP) {
 875     a.  store_reg64_disp_reg64 (rVmFp, fpOff, rEC);
 876   }
 877   return start;
 878 }
 879
 880 /**
 881  * emitDecRef --
 882  *
 883  *   Decrement a value's refcount and call the release helper if
 884  *   appropriate. emitDecRef requires that the caller knows the
 885  *   type at translation time.
 886  */
 887 void TranslatorX64::emitDecRef(Asm& a,
 888                                const NormalizedInstruction& i,
 889                                PhysReg rDatum,
 890                                DataType type) {
 891   ASSERT(type != KindOfInvalid);
 892   if (!IS_REFCOUNTED_TYPE(type)) {
 893     return;
 894   }
 895
 896   ASSERT(!i.isNative());
 897   ASSERT(!i.isSimple() || !typeReentersOnRelease(type));
 898   SpaceRecorder sr("_DecRef", a);
 899   { // if !static
 900     IfCountNotStatic ins(a, rDatum, type);
 901     a.    sub_imm32_disp_reg32(1, TVOFF(_count), rDatum);
 902
 903     ASSERT(type >= 0 && type < MaxNumDataTypes);
 904     if (&a == &this->astubs) {
 905       JccBlock<CC_NZ> ifZero(a);
 906       callUnaryStub(a, i, m_dtorStubs[type], rDatum);
 907     } else {
 908       UnlikelyIfBlock<CC_Z> ifZero(this->a, astubs);
 909       callUnaryStub(astubs, i, m_dtorStubs[type], rDatum);
 910     }
 911   } // endif
 912 }
 913
 914 void TranslatorX64::emitDecRef(const NormalizedInstruction& i,
 915                                PhysReg rDatum,
 916                                DataType type) {
 917   emitDecRef(a, i, rDatum, type);
 918 }
 919
 920 void TranslatorX64::emitDecRefInput(Asm& a,
 921                                     const NormalizedInstruction& i,
 922                                     int input) {
 923   DynLocation* value = i.inputs[input];
 924   if (IS_REFCOUNTED_TYPE(value->outerType())) {
 925     m_regMap.allocInputReg(i, input);
 926     PhysReg rValue = getReg(value->location);
 927     emitDecRef(a, i, rValue, value->outerType());
 928   }
 929 }
 930
 931 /**
 932  * emitDecRefGeneric --
 933  *
 934  *   Decrement a value's refcount and call the release helper if
 935  *   appropriate. emitDecRefGeneric should only be used when the type
 936  *   is not known at translation time.
 937  *
 938  *   emitDecRefGeneric operates on the memory location given by
 939  *   srcReg+disp, so the caller is responsible for ensuring that the
 940  *   memory location is up to date and not enregistered.
 941  */
 942 void TranslatorX64::emitDecRefGeneric(const NormalizedInstruction& i,
 943                                       PhysReg srcReg, int disp /* = 0 */) {
 944   SpaceRecorder sr("_DecRefGeneric", a);
 945   /*
 946    * The full, inlined generic dec ref looks like:
 947    *
 948    *    TypedValue* d = srcReg + disp;
 949    *    if (IS_REFCOUNTED_TYPE(d->m_type) && // a)
 950    *        d->_count != kStaticCount     && // b)
 951    *        d->_count-- == 0)             && // c)
 952    *            GenericDestroy(d);           // d)
 953    *
 954    * We originally inlined *all* of a-d, and have experimented with sharing
 955    * them all, too. At this writing (05-12-2012), inlining a) and outlining
 956    * b-d seems to strike the right balance between compactness and not
 957    * doing too much work in the common case where it is not refcounted.
 958    */
 959   {
 960     IfRefCounted irc(a, srcReg, disp);
 961     callUnaryReentrantStub(a, i, m_dtorGenericStub, srcReg, disp);
 962   }
 963 }
 964
 965 // Same as emitDecRefGeneric, except for when we have the type in a
 966 // register as well.  Same inlining/outlining choices as
 967 // emitDecRefGeneric above.
 968 void TranslatorX64::emitDecRefGenericReg(PhysReg rData, PhysReg rType) {
 969   SpaceRecorder sr("_DecRefGeneric", a);
 970   a.   cmp_imm32_reg32(KindOfRefCountThreshold, rType);
 971   {
 972     JccBlock<CC_BE> ifRefCounted(a);
 973     callBinaryStub(a, *m_curNI, m_dtorGenericStubRegs, rData, rType);
 974   }
 975 }
 976
 977 /**
 978  * genericRefCountStub --
 979  *
 980  *   Shared code to decRef the TypedValue* of unknown, but refcounted, type
 981  *   in rdi. Tightly coupled with emitDecRefGeneric.
 982  */
 983 TCA TranslatorX64::genericRefCountStub(X64Assembler& a) {
 984   moveToAlign(a);
 985   FreezeRegs brr(m_regMap);
 986   TCA retval = a.code.frontier;
 987
 988   // Note we make a real frame here: this is necessary so that the
 989   // fixup map can chase back to the caller of this stub if it needs
 990   // to sync regs.
 991   a.    pushr(rbp); // {
 992   a.    mov_reg64_reg64(rsp, rbp);
 993   {
 994     PhysRegSaverStub prs(a, RegSet(rsi));
 995     // We already know the type was refcounted if we got here.
 996     a.    load_reg64_disp_reg64(rdi, TVOFF(m_data), rsi);
 997     { // if !static
 998       IfCountNotStatic ins(a, rsi, KindOfInvalid);
 999       a.  sub_imm32_disp_reg32(1, TVOFF(_count), rsi);
1000       { // if zero
1001         JccBlock<CC_NZ> ifZero(a);
1002         RegSet s = kCallerSaved - (RegSet(rdi) | RegSet(rsi));
1003         PhysRegSaver prs(a, s);
1004         a.call(TCA(tv_release_generic));
1005       } // endif
1006     } // endif
1007   }
1008   a.    popr(rbp); // }
1009   a.    ret();
1010   return retval;
1011 }
1012
1013 TCA TranslatorX64::genericRefCountStubRegs(X64Assembler& a) {
1014   const PhysReg rData = argNumToRegName[0];
1015   const PhysReg rType = argNumToRegName[1];
1016
1017   moveToAlign(a);
1018   TCA retval = a.code.frontier;
1019   FreezeRegs brr(m_regMap);
1020
1021   // The frame here is needed for the same reason as in
1022   // genericRefCountStub.
1023   a.    pushr(rbp); // {
1024   a.    mov_reg64_reg64(rsp, rbp);
1025   {
1026     IfCountNotStatic ins(a, rData, KindOfInvalid);
1027     a.  sub_imm32_disp_reg32(1, TVOFF(_count), rData);
1028     {
1029       JccBlock<CC_NZ> ifZero(a);
1030       // The arguments are already in the right registers.
1031       RegSet s = kCallerSaved - (RegSet(rData) | RegSet(rType));
1032       PhysRegSaverParity<1> saver(a, s);
1033       if (false) { // typecheck
1034         RefData* vp = NULL; DataType dt = KindOfUninit;
1035         (void)tv_release_typed(vp, dt);
1036       }
1037       a.call(TCA(tv_release_typed));
1038     }
1039   }
1040   a.    popr(rbp); // }
1041   a.    ret();
1042   return retval;
1043 }
1044
1045 /*
1046  * Translation call targets. It is a lot easier, and a bit more
1047  * portable, to use C linkage from assembly.
1048  */
1049 TCA TranslatorX64::retranslate(SrcKey sk, bool align, bool useHHIR) {
1050   if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
1051     // We are about to translate something known to be blacklisted by
1052     // debugger, exit early
1053     SKTRACE(1, sk, "retranslate abort due to debugger\n");
1054     return NULL;
1055   }
1056   LeaseHolder writer(s_writeLease);
1057   if (!writer) return NULL;
1058   SKTRACE(1, sk, "retranslate\n");
1059   return translate(&sk, align, useHHIR);
1060 }
1061
1062 // Only use comes from HHIR's cgExitTrace() case TraceExitType::SlowNoProgress
1063 TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk,
1064                                            bool   align,
1065                                            TCA    toSmash) {
1066   if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
1067     // We are about to translate something known to be blacklisted by
1068     // debugger, exit early
1069     SKTRACE(1, sk, "retranslateAndPatchNoIR abort due to debugger\n");
1070     return NULL;
1071   }
1072   LeaseHolder writer(s_writeLease);
1073   if (!writer) return NULL;
1074   SKTRACE(1, sk, "retranslateAndPatchNoIR\n");
1075   SrcRec* srcRec = getSrcRec(sk);
1076   if (srcRec->translations().size() == SrcRec::kMaxTranslations + 1) {
1077     // we've gone over the translation limit and already have an anchor
1078     // translation that will interpret, so just return NULL and force
1079     // interpretation of this BB.
1080     return NULL;
1081   }
1082   TCA start = translate(&sk, align, false);
1083   if (start != NULL) {
1084     smash(getAsmFor(toSmash), toSmash, start);
1085   }
1086   return start;
1087 }
1088
1089 /*
1090  * Satisfy an alignment constraint. If we're in a reachable section
1091  * of code, bridge the gap with nops. Otherwise, int3's.
1092  */
1093 void
1094 TranslatorX64::moveToAlign(X64Assembler &aa,
1095                            const size_t align /* =kJmpTargetAlign */,
1096                            bool unreachable /* =true */) {
1097   using namespace HPHP::Util;
1098   SpaceRecorder sr("_Align", aa);
1099   ASSERT(isPowerOfTwo(align));
1100   size_t leftInBlock = align - ((align - 1) & uintptr_t(aa.code.frontier));
1101   if (leftInBlock == align) return;
1102   if (unreachable) {
1103     if (leftInBlock > 2) {
1104       aa.ud2();
1105       leftInBlock -= 2;
1106     }
1107     if (leftInBlock > 0) {
1108       aa.emitInt3s(leftInBlock);
1109     }
1110     return;
1111   }
1112   aa.emitNop(leftInBlock);
1113 }
1114
1115 /*
1116  * Req machinery. We sometimes emit code that is unable to proceed
1117  * without translator assistance; e.g., a basic block whose successor is
1118  * unknown. We leave one of these request arg blobs in m_data, and point
1119  * to it at callout-time.
1120  */
1121
1122 // REQ_BIND_CALL
1123 struct ReqBindCall {
1124   SrcKey m_sourceInstr;
1125   TCA m_toSmash;
1126   int m_nArgs;
1127   bool m_isImmutable; // call was to known func.
1128 } m_bindCall;
1129
1130 // ID to name mapping for tracing.
1131 static inline const char*
1132 reqName(int req) {
1133   static const char* reqNames[] = {
1134 #define REQ(nm) #nm,
1135     SERVICE_REQUESTS
1136 #undef REQ
1137   };
1138   return reqNames[req];
1139 }
1140
1141 /*
1142  * Find or create a translation for sk. Returns TCA of "best" current
1143  * translation. May return NULL if it is currently impossible to create
1144  * a translation.
1145  */
1146 TCA
1147 TranslatorX64::getTranslation(const SrcKey *sk, bool align,
1148                               bool forceNoHHIR /* = false */) {
1149   curFunc()->validate();
1150   SKTRACE(2, *sk, "getTranslation: curUnit %s funcId %llx offset %d\n",
1151           curUnit()->filepath()->data(),
1152           sk->m_funcId,
1153           sk->offset());
1154   SKTRACE(2, *sk, "   funcId: %llx\n",
1155           curFunc()->getFuncId());
1156   {
1157     if (curFrame()->hasVarEnv() && curFrame()->getVarEnv()->isGlobalScope()) {
1158       SKTRACE(2, *sk, "punting on pseudoMain\n");
1159       return NULL;
1160     }
1161     if (const SrcRec* sr = m_srcDB.find(*sk)) {
1162       TCA tca = sr->getTopTranslation();
1163       if (tca) {
1164         SKTRACE(2, *sk, "getTranslation: found %p\n", tca);
1165         return tca;
1166       }
1167     }
1168   }
1169
1170   /*
1171    * Try to become the writer. We delay this until we *know* we will have
1172    * a need to create new translations, instead of just trying to win the
1173    * lottery at the dawn of time. Hopefully lots of requests won't require
1174    * any new translation.
1175    */
1176   LeaseHolder writer(s_writeLease);
1177   if (!writer) return NULL;
1178   if (SrcRec* sr = m_srcDB.find(*sk)) {
1179     TCA tca = sr->getTopTranslation();
1180     if (tca) {
1181       // Handle extremely unlikely race; someone may have just already
1182       // added the first instance of this SrcRec while we did a
1183       // non-blocking wait on the write lease.
1184       return tca;
1185     } else {
1186       // Since we are holding the write lease, we know that sk is properly
1187       // initialized, except that it has no translations (due to
1188       // replaceOldTranslations)
1189       return retranslate(*sk, align,
1190                          RuntimeOption::EvalJitUseIR && !forceNoHHIR);
1191     }
1192   }
1193
1194   // We put retranslate requests at the end of our slab to more frequently
1195   //   allow conditional jump fall-throughs
1196
1197   TCA start = emitServiceReq(false, REQ_RETRANSLATE, 1, uint64_t(sk->offset()));
1198   SKTRACE(1, *sk, "inserting anchor translation for (%p,%d) at %p\n",
1199           curUnit(), sk->offset(), start);
1200   SrcRec* sr = m_srcDB.insert(*sk);
1201   sr->setFuncInfo(curFunc());
1202   sr->setAnchorTranslation(start);
1203
1204   addTranslation(TransRec(*sk, curUnit()->md5(), TransAnchor, 0, 0, start,
1205                           astubs.code.frontier - start));
1206
1207   ASSERT(getTransRec(start)->kind == TransAnchor);
1208
1209   return retranslate(*sk, align, RuntimeOption::EvalJitUseIR && !forceNoHHIR);
1210 }
1211
1212 TCA
1213 TranslatorX64::translate(const SrcKey *sk, bool align, bool useHHIR) {
1214   INC_TPC(translate);
1215   ASSERT(vmfp() >= vmsp());
1216   ASSERT(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
1217   ASSERT(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
1218
1219   if (useHHIR) {
1220     if (m_numHHIRTrans == RuntimeOption::EvalMaxHHIRTrans) {
1221       useHHIR = false;
1222       m_useHHIR = false;
1223       RuntimeOption::EvalJitUseIR = false;
1224     } else {
1225       hhirTraceStart(sk->offset());
1226     }
1227   } else {
1228     ASSERT(m_useHHIR == false);
1229   }
1230
1231   Tracelet tlet;
1232   analyze(sk, tlet);
1233
1234   if (align) {
1235     moveToAlign(a, kNonFallthroughAlign);
1236   }
1237
1238   TCA start = a.code.frontier;
1239   translateTracelet(tlet);
1240   SKTRACE(1, *sk, "translate moved head from %p to %p\n",
1241           getTopTranslation(*sk), start);
1242   if (Trace::moduleEnabledRelease(tcdump, 1)) {
1243     static __thread int n;
1244     if (++n % 10000 == 0) {
1245       std::ofstream f("cfg.dot", std::ios_base::trunc);
1246       drawCFG(f);
1247       f.close();
1248     }
1249   }
1250   return start;
1251 }
1252
1253 /*
1254  * Returns true if a's current frontier can have an nBytes-long
1255  * instruction written without any risk of cache-tearing.
1256  */
1257 bool
1258 TranslatorX64::isSmashable(X64Assembler& a, int nBytes) {
1259   ASSERT(nBytes <= int(kX64CacheLineSize));
1260   uintptr_t iFrontier = uintptr_t(a.code.frontier);
1261   uintptr_t lastByte = iFrontier + nBytes - 1;
1262   return (iFrontier & ~kX64CacheLineMask) == (lastByte & ~kX64CacheLineMask);
1263 }
1264
1265 void
1266 TranslatorX64::prepareForSmash(X64Assembler& a, int nBytes) {
1267   if (!isSmashable(a, nBytes)) {
1268     moveToAlign(a, kX64CacheLineSize, false);
1269   }
1270   ASSERT(isSmashable(a, nBytes));
1271 }
1272
1273 void
1274 TranslatorX64::prepareForSmash(int nBytes) {
1275   prepareForSmash(a, nBytes);
1276 }
1277
1278 void
1279 TranslatorX64::smash(X64Assembler &a, TCA src, TCA dest) {
1280   ASSERT(canWrite());
1281   TRACE(2, "smash: %p -> %p\n", src, dest);
1282   /*
1283    * !
1284    *
1285    * We are about to smash reachable code in the translation cache. A
1286    * hardware thread might be executing the very instruction we're
1287    * modifying. This is safe because:
1288    *
1289    *    1. We align smashable instructions so that they reside on a single
1290    *       cache line;
1291    *
1292    *    2. We modify the instruction with a single processor store; and
1293    *
1294    *    3. The smashed region contains only a single instruction in the
1295    *       orignal instruction stream (see jmp() -> emitJ32() -> bytes() in
1296    *       the assembler.
1297    */
1298   CodeCursor cg(a, src);
1299   ASSERT(isSmashable(a, kJmpLen));
1300   if (dest > src && dest - src <= 7) {
1301     a.    emitNop(dest - src);
1302   } else {
1303     a.    jmp(dest);
1304   }
1305 }
1306
1307 void TranslatorX64::protectCode() {
1308   mprotect(tx64->a.code.base, tx64->a.code.size, PROT_READ | PROT_EXEC);
1309
1310 }
1311
1312 void TranslatorX64::unprotectCode() {
1313   mprotect(tx64->a.code.base, tx64->a.code.size,
1314            PROT_READ | PROT_WRITE | PROT_EXEC);
1315 }
1316
1317 void
1318 TranslatorX64::emitStackCheck(int funcDepth, Offset pc) {
1319   uint64_t stackMask = cellsToBytes(RuntimeOption::EvalVMStackElms) - 1;
1320   a.    mov_reg64_reg64(rVmSp, rScratch); // copy to destroy
1321   a.    and_imm64_reg64(stackMask, rScratch);
1322   a.    sub_imm64_reg64(funcDepth + Stack::sSurprisePageSize, rScratch);
1323   ASSERT(m_stackOverflowHelper);
1324   a.    jl(m_stackOverflowHelper); // Unlikely branch to failure.
1325   // Success.
1326 }
1327
1328 // Tests the surprise flags for the current thread. Should be used
1329 // before a jnz to surprise handling code.
1330 void
1331 TranslatorX64::emitTestSurpriseFlags() {
1332   CT_ASSERT(sizeof(((RequestInjectionData*)0)->conditionFlags) == 8);
1333   a.test_imm64_disp_reg64(-1, TargetCache::kConditionFlagsOff, rVmTl);
1334 }
1335
1336 void
1337 TranslatorX64::emitCheckSurpriseFlagsEnter(bool inTracelet, Offset pcOff,
1338                                            Offset stackOff) {
1339   emitTestSurpriseFlags();
1340   {
1341     UnlikelyIfBlock<CC_NZ> ifTracer(a, astubs);
1342     if (false) { // typecheck
1343       const ActRec* ar = NULL;
1344       EventHook::FunctionEnter(ar, 0);
1345     }
1346     astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
1347     CT_ASSERT(EventHook::NormalFunc == 0);
1348     astubs.xor_reg32_reg32(argNumToRegName[1], argNumToRegName[1]);
1349     emitCall(astubs, (TCA)&EventHook::FunctionEnter);
1350     if (inTracelet) {
1351       recordSyncPoint(astubs, pcOff, stackOff);
1352     } else {
1353       // If we're being called while generating a func prologue, we
1354       // have to record the fixup directly in the fixup map instead of
1355       // going through m_pendingFixups like normal.
1356       m_fixupMap.recordFixup(astubs.code.frontier, Fixup(pcOff, stackOff));
1357     }
1358   }
1359 }
1360
1361 void
1362 TranslatorX64::setArgInActRec(ActRec* ar, int argNum, uint64_t datum,
1363                               DataType t) {
1364   TypedValue* tv =
1365     (TypedValue*)(uintptr_t(ar) - (argNum+1) * sizeof(TypedValue));
1366   tv->m_data.num = datum;
1367   tv->m_type = t;
1368 }
1369
1370 int
1371 TranslatorX64::shuffleArgsForMagicCall(ActRec* ar) {
1372   if (!ar->hasInvName()) {
1373     return 0;
1374   }
1375   const Func* f UNUSED = ar->m_func;
1376   f->validate();
1377   ASSERT(f->name()->isame(s___call.get())
1378          || f->name()->isame(s___callStatic.get()));
1379   ASSERT(f->numParams() == 2);
1380   TRACE(1, "shuffleArgsForMagicCall: ar %p\n", ar);
1381   ASSERT(ar->hasInvName());
1382   StringData* invName = ar->getInvName();
1383   ASSERT(invName);
1384   ar->setVarEnv(NULL);
1385   int nargs = ar->numArgs();
1386   // We need to make an array containing all the arguments passed by the
1387   // caller and put it where the second argument is
1388   HphpArray* argArray = NEW(HphpArray)(nargs);
1389   argArray->incRefCount();
1390   for (int i = 0; i < nargs; ++i) {
1391     TypedValue* tv =
1392       (TypedValue*)(uintptr_t(ar) - (i+1) * sizeof(TypedValue));
1393     argArray->nvAppend(tv, false);
1394     tvRefcountedDecRef(tv);
1395   }
1396   // Put invName in the slot for first argument
1397   setArgInActRec(ar, 0, uint64_t(invName), BitwiseKindOfString);
1398   // Put argArray in the slot for second argument
1399   setArgInActRec(ar, 1, uint64_t(argArray), KindOfArray);
1400   // Fix up ActRec's numArgs
1401   ar->initNumArgs(2);
1402   return 1;
1403 }
1404
1405 /*
1406  * The standard VMRegAnchor treatment won't work for some cases called
1407  * during function preludes.
1408  *
1409  * The fp sync machinery is fundamentally based on the notion that
1410  * instruction pointers in the TC are uniquely associated with source
1411  * HHBC instructions, and that source HHBC instructions are in turn
1412  * uniquely associated with SP->FP deltas.
1413  *
1414  * run_intercept_helper/trimExtraArgs is called from the prologue of
1415  * the callee. The prologue is 1) still in the caller frame for now,
1416  * and 2) shared across multiple call sites. 1 means that we have the
1417  * fp from the caller's frame, and 2 means that this fp is not enough
1418  * to figure out sp.
1419  *
1420  * However, the prologue passes us the callee actRec, whose predecessor
1421  * has to be the caller. So we can sync sp and fp by ourselves here.
1422  * Geronimo!
1423  */
1424 static void sync_regstate_to_caller(ActRec* preLive) {
1425   ASSERT(tl_regState == REGSTATE_DIRTY);
1426   vmfp() = (TypedValue*)preLive->m_savedRbp;
1427   vmsp() = (TypedValue*)preLive - preLive->numArgs();
1428   if (ActRec* fp = g_vmContext->m_fp) {
1429     if (fp->m_func && fp->m_func->unit()) {
1430       vmpc() = fp->m_func->unit()->at(fp->m_func->base() + preLive->m_soff);
1431     }
1432   }
1433   tl_regState = REGSTATE_CLEAN;
1434 }
1435
1436 static uint64 run_intercept_helper(ActRec* ar, Variant* ihandler) {
1437   sync_regstate_to_caller(ar);
1438   bool ret = run_intercept_handler<true>(ar, ihandler);
1439   /*
1440    * Restore tl_regState manually in the no-exception case only.  (The
1441    * VM regs are clean here---we only need to set them dirty if we are
1442    * stopping to execute in the TC again, which we won't be doing if
1443    * an exception is propagating.)
1444    */
1445   tl_regState = REGSTATE_DIRTY;
1446   return ret;
1447 }
1448
1449 void
1450 TranslatorX64::trimExtraArgs(ActRec* ar) {
1451   ASSERT(!ar->hasInvName());
1452
1453   sync_regstate_to_caller(ar);
1454   const Func* f = ar->m_func;
1455   int numParams = f->numParams();
1456   int numArgs = ar->numArgs();
1457   ASSERT(numArgs > numParams);
1458   int numExtra = numArgs - numParams;
1459
1460   TRACE(1, "trimExtraArgs: %d args, function %s takes only %d, ar %p\n",
1461         numArgs, f->name()->data(), numParams, ar);
1462
1463   if (f->attrs() & AttrMayUseVV) {
1464     ASSERT(!ar->hasExtraArgs());
1465     ar->setExtraArgs(ExtraArgs::allocateCopy(
1466       (TypedValue*)(uintptr_t(ar) - numArgs * sizeof(TypedValue)),
1467       numArgs - numParams));
1468   } else {
1469     // Function is not marked as "MayUseVV", so discard the extra arguments
1470     TypedValue* tv = (TypedValue*)(uintptr_t(ar) - numArgs*sizeof(TypedValue));
1471     for (int i = 0; i < numExtra; ++i) {
1472       tvRefcountedDecRef(tv);
1473       ++tv;
1474     }
1475     ar->setNumArgs(numParams);
1476   }
1477
1478   // Only go back to dirty in a non-exception case.  (Same reason as
1479   // above.)
1480   tl_regState = REGSTATE_DIRTY;
1481 }
1482
1483 TCA
1484 TranslatorX64::getInterceptHelper() {
1485   if (false) {  // typecheck
1486     Variant *h = get_intercept_handler(CStrRef((StringData*)NULL),
1487                                        (char*)NULL);
1488     bool c UNUSED = run_intercept_helper((ActRec*)NULL, h);
1489   }
1490   if (!m_interceptHelper) {
1491     m_interceptHelper = TCA(astubs.code.frontier);
1492     astubs.    load_reg64_disp_reg64(rStashedAR, AROFF(m_func),
1493                                      rax);
1494     astubs.    lea_reg64_disp_reg64(rax, Func::fullNameOff(),
1495                                     argNumToRegName[0]);
1496
1497     astubs.    lea_reg64_disp_reg64(rax, Func::maybeInterceptedOff(),
1498                                     argNumToRegName[1]);
1499
1500     astubs.    call(TCA(get_intercept_handler));
1501     astubs.    test_reg64_reg64(rax, rax);
1502     {
1503       JccBlock<CC_NZ> ifNotIntercepted(astubs);
1504       astubs.  ret();
1505     }
1506
1507     // we might re-enter, so align the stack
1508     astubs.    sub_imm32_reg64(8, rsp);
1509     // Copy the old rbp into the savedRbp pointer.
1510     astubs.    store_reg64_disp_reg64(rbp, 0, rStashedAR);
1511
1512     PhysReg rSavedRip = r13; // XXX ideally don't hardcode r13 ... but
1513                              // we need callee-saved and don't have
1514                              // any scratch ones.
1515
1516     // Fish out the saved rip. We may need to jump there, and the helper will
1517     // have wiped out the ActRec.
1518     astubs.    load_reg64_disp_reg64(rStashedAR, AROFF(m_savedRip),
1519                                      rSavedRip);
1520     astubs.    mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1521     astubs.    mov_reg64_reg64(rax, argNumToRegName[1]);
1522     astubs.    call(TCA(run_intercept_helper));
1523
1524     // Normally we'd like to recordReentrantCall here, but the vmreg sync'ing
1525     // for run_intercept_handler is a special little snowflake. See
1526     // run_intercept_handler for details.
1527     astubs.    test_reg64_reg64(rax, rax);
1528     {
1529       // If the helper returned false, don't execute this function. The helper
1530       // will have cleaned up the interceptee's arguments and AR, and pushed
1531       // the handler's return value; we now need to get out.
1532       //
1533       // We don't need to touch rVmFp; it's still pointing to the caller of
1534       // the interceptee. We need to adjust rVmSp. Then we need to jump to the
1535       // saved rip from the interceptee's ActRec.
1536       JccBlock<CC_NZ> ifDontEnterFunction(astubs);
1537       astubs.  add_imm32_reg64(16, rsp);
1538       astubs.  lea_reg64_disp_reg64(rStashedAR, AROFF(m_r), rVmSp);
1539       astubs.  jmp_reg(rSavedRip);
1540     }
1541     astubs.    add_imm32_reg64(8, rsp);
1542     astubs.    ret();
1543   }
1544   return m_interceptHelper;
1545 }
1546
1547 TCA
1548 TranslatorX64::getCallArrayProlog(Func* func) {
1549   TCA tca = func->getFuncBody();
1550   if (tca != (TCA)funcBodyHelperThunk) return tca;
1551
1552   int numParams = func->numParams();
1553   std::vector<std::pair<int,Offset> > dvs;
1554   for (int i = 0; i < numParams; ++i) {
1555     const Func::ParamInfo& pi = func->params()[i];
1556     if (pi.hasDefaultValue()) {
1557       dvs.push_back(std::make_pair(i, pi.funcletOff()));
1558     }
1559   }
1560   if (dvs.size()) {
1561     LeaseHolder writer(s_writeLease);
1562     if (!writer) return NULL;
1563     tca = func->getFuncBody();
1564     if (tca != (TCA)funcBodyHelperThunk) return tca;
1565     tca = a.code.frontier;
1566     if (dvs.size() == 1) {
1567       a.   cmp_imm32_disp_reg32(dvs[0].first,
1568                                 AROFF(m_numArgsAndCtorFlag), rVmFp);
1569       emitBindJcc(a, CC_LE, SrcKey(func, dvs[0].second));
1570       emitBindJmp(a, SrcKey(func, func->base()));
1571     } else {
1572       a.   load_reg64_disp_reg32(rVmFp, AROFF(m_numArgsAndCtorFlag), rax);
1573       for (unsigned i = 0; i < dvs.size(); i++) {
1574         a.   cmp_imm32_reg32(dvs[i].first, rax);
1575         emitBindJcc(a, CC_LE, SrcKey(func, dvs[i].second));
1576       }
1577       emitBindJmp(a, SrcKey(func, func->base()));
1578     }
1579   } else {
1580     SrcKey sk(func, func->base());
1581     tca = tx64->getTranslation(&sk, false);
1582   }
1583
1584   return tca;
1585 }
1586
1587 TCA
1588 TranslatorX64::emitPrologueRedispatch(X64Assembler& a) {
1589   TCA retval;
1590   moveToAlign(a);
1591   retval = a.code.frontier;
1592   // We're in the wrong func prologue. By convention with emitFuncGuard,
1593   // rax contains the function we need to enter.
1594
1595   ASSERT(kScratchCrossTraceRegs.contains(rax));
1596   ASSERT(kScratchCrossTraceRegs.contains(rdx));
1597   ASSERT(kScratchCrossTraceRegs.contains(rcx));
1598
1599   // We don't know how many params we were invoked with. Infer it from
1600   // the stack and rStashedAR rather than reading it from the actrec.
1601   //
1602   //    mov %r15, %rdx
1603   //    ld  m_numParams(%rax), %ecx #ecx: targetFunc->numParams
1604   //    sub %rbx, %rdx #edx: n_args
1605   //    shr $4, rdx
1606   a.    mov_reg64_reg64(rStashedAR, rdx);
1607   a.    load_reg64_disp_reg32(rax, Func::numParamsOff(), rcx);
1608   a.    sub_reg64_reg64(rVmSp, rdx);
1609   BOOST_STATIC_ASSERT(sizeof(TypedValue) == 16);
1610   a.    shr_imm32_reg32(4, rdx); // rdx: numPassed
1611
1612   // If we didn't pass too many args, directly dereference
1613   // func->m_prologues.
1614   a.    cmp_reg32_reg32(rdx, rcx);
1615   TCA bToFixedProloguesCheck = a.code.frontier;
1616   a.    jcc8(CC_L, bToFixedProloguesCheck);
1617
1618   //   cmp $kNumFixedPrologues, %rdx
1619   //   jl numParamsCheck
1620   TCA actualDispatch = a.code.frontier;
1621
1622   // rcx: prologueIdx
1623   // rax = func->prologues[numParams]
1624   // jmp rax
1625   a.    load_reg64_disp_index_reg64(rax,
1626                                     Func::prologueTableOff(),
1627                                     rdx,
1628                                     rax);
1629   a.    jmp_reg(rax);
1630   a.    ud2();
1631
1632   // Hmm, more parameters passed than the function expected. Did we pass
1633   // kNumFixedPrologues or more? If not, %rdx is still a perfectly
1634   // legitimate index into the func prologue table.
1635   // numParamsCheck:
1636   //    cmp $kNumFixedPrologues, %rcx
1637   //    jl  dispatch
1638   a.patchJcc8(bToFixedProloguesCheck, a.code.frontier); // numParamsCheck:
1639   a.    cmp_imm32_reg32(kNumFixedPrologues, rdx);
1640   a.    jcc8(CC_L, actualDispatch);
1641
1642   // Too many gosh-darned parameters passed. Go to numExpected + 1, which
1643   // is always a "too many params" entry point.
1644   //
1645   //    mov %rdx, %rcx
1646   //    add $1, %rcx
1647   //    jmp dispatch
1648   a.    load_reg64_disp_index_reg64(rax,
1649                                     // %rcx + 1
1650                                     Func::prologueTableOff() + sizeof(TCA),
1651                                     rcx,
1652                                     rax);
1653   a.    jmp_reg(rax);
1654   a.    ud2();
1655   return retval;
1656 }
1657
1658 // The funcGuard gets skipped and patched by other code, so we have some
1659 // magic offsets.
1660 static const int kFuncMovImm = 6; // Offset to the immediate for expected func
1661 static const int kFuncGuardLen = 23;
1662
1663 template<typename T>
1664 static T*
1665 funcGuardToFuncImm(TCA funcGuard) {
1666   T* retval = (T*)(funcGuard + kFuncMovImm + (2 - sizeof(T)/4));
1667   // We padded these so the immediate would fit inside an aligned 8 byte region
1668   // so the xor of the address of the first byte, with the address of the last
1669   // byte should only be non zero in the bottom 3 bits.
1670   ASSERT(((uintptr_t(retval) ^ (uintptr_t(retval + 1) - 1)) & ~7) == 0);
1671   return retval;
1672 }
1673
1674 static inline bool
1675 funcGuardIsForFunc(TCA funcGuard, const Func* func) {
1676   intptr_t iptr = uintptr_t(func);
1677   if (deltaFits(iptr, sz::dword)) {
1678     return *funcGuardToFuncImm<int32_t>(funcGuard) == iptr;
1679   }
1680   return *funcGuardToFuncImm<int64_t>(funcGuard) == iptr;
1681 }
1682
1683 static void
1684 disableFuncGuard(TCA funcGuard, Func* func) {
1685   ASSERT(funcGuardIsForFunc(funcGuard, func));
1686   if (deltaFits((intptr_t)func, sz::dword)) {
1687     *funcGuardToFuncImm<int32_t>(funcGuard) = 0;
1688   } else {
1689     *funcGuardToFuncImm<int64_t>(funcGuard) = 0;
1690   }
1691   ASSERT(!funcGuardIsForFunc(funcGuard, func));
1692 }
1693
1694 TCA
1695 TranslatorX64::emitFuncGuard(X64Assembler& a, const Func* func) {
1696   ASSERT(kScratchCrossTraceRegs.contains(rax));
1697   ASSERT(kScratchCrossTraceRegs.contains(rdx));
1698
1699   // Ensure the immediate is safely smashable; the immediate needs
1700   // to be at a qword boundary, so we need to start the movImm at
1701   // (kAlign - kFuncMovImm) % 8.
1702   static const int kAlign = 8;
1703   static const int kAlignMask = kAlign - 1;
1704   int loBits = uintptr_t(a.code.frontier) & kAlignMask;
1705   a.emitNop(((kAlign - kFuncMovImm) - loBits) & kAlignMask);
1706   ASSERT((uintptr_t(a.code.frontier) & kAlignMask) == kAlign - kFuncMovImm);
1707   TCA aStart = a.code.frontier;
1708   a.    load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
1709   ASSERT((a.code.frontier - aStart) ==
1710          (kFuncMovImm - 2 /* rex + movimmOpcode */));
1711   a.    mov_imm64_reg(uint64_t(func), rdx);
1712   a.    cmp_reg64_reg64(rax, rdx);
1713
1714   if (!m_funcPrologueRedispatch) {
1715     m_funcPrologueRedispatch = emitPrologueRedispatch(astubs);
1716   }
1717   a.    jnz(m_funcPrologueRedispatch);
1718   ASSERT(a.code.frontier - aStart <= kFuncGuardLen);
1719   a.emitNop(kFuncGuardLen - (a.code.frontier - aStart));
1720   ASSERT(a.code.frontier - aStart == kFuncGuardLen);
1721   return aStart;
1722 }
1723
1724 TCA
1725 skipFuncCheck(TCA dest) {
1726   if (!dest || dest == (TCA)fcallHelperThunk) return dest;
1727   return dest + kFuncGuardLen;
1728 }
1729
1730 /*
1731  * funcPrologue --
1732  *
1733  * Given a callee and a number of args, match up to the callee's
1734  * argument expectations and dispatch.
1735  *
1736  * Call/return hand-shaking is a bit funny initially. At translation time,
1737  * we don't necessarily know what function we're calling. For instance,
1738  *
1739  *   f(g());
1740  *
1741  * Will lead to a set of basic blocks like:
1742  *
1743  * b1: pushfuncd "f"
1744  *     pushfuncd "g"
1745  *     fcall
1746  * b2: fcall
1747  *
1748  * The fcallc labelled "b2" above is not statically bindable in our
1749  * execution model.
1750  *
1751  * We decouple the call work into a per-callsite portion, responsible
1752  * for recording the return address, and a per-(callee, numArgs) portion,
1753  * responsible for fixing up arguments and dispatching to remaining
1754  * code. We call the per-callee portion a "prologue."
1755  *
1756  * Also, we are called from two distinct environments. From REQ_BIND_CALL,
1757  * we're running "between" basic blocks, with all VM registers sync'ed.
1758  * However, we're also called in the middle of basic blocks, when dropping
1759  * entries into func->m_prologues. So don't go around using the
1760  * translation-time values of vmfp()/vmsp(), since they have an
1761  * unpredictable relationship to the source.
1762  */
1763 bool
1764 TranslatorX64::checkCachedPrologue(const Func* func, int paramIdx,
1765                                    TCA& prologue) const {
1766   prologue = (TCA)func->getPrologue(paramIdx);
1767   if (prologue != (TCA)fcallHelperThunk && !s_replaceInFlight) {
1768     TRACE(1, "cached prologue %s(%d) -> cached %p\n",
1769           func->fullName()->data(), paramIdx, prologue);
1770     ASSERT(isValidCodeAddress(prologue));
1771     return true;
1772   }
1773   return false;
1774 }
1775
1776 TCA
1777 TranslatorX64::funcPrologue(Func* func, int nPassed) {
1778   func->validate();
1779   TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
1780   int numParams = func->numParams();
1781   int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
1782
1783   bool funcIsMagic = func->isMagic();
1784
1785   // Do a quick test before grabbing the write lease
1786   TCA prologue;
1787   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1788
1789   // If the translator is getting replaced out from under us, refuse to
1790   // provide a prologue; we don't know whether this request is running on the
1791   // old or new context.
1792   LeaseHolder writer(s_writeLease);
1793   if (!writer || s_replaceInFlight) return NULL;
1794   // Double check the prologue array now that we have the write lease
1795   // in case another thread snuck in and set the prologue already.
1796   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
1797
1798   SpaceRecorder sr("_FuncPrologue", a);
1799   // If we're close to a cache line boundary, just burn some space to
1800   // try to keep the func and its body on fewer total lines.
1801   if (((uintptr_t)a.code.frontier & kX64CacheLineMask) >= 32) {
1802     moveToAlign(a, kX64CacheLineSize);
1803   }
1804   // Careful: this isn't necessarily the real entry point. For funcIsMagic
1805   // prologues, this is just a possible prologue.
1806   TCA aStart    = a.code.frontier;
1807   TCA start     = aStart;
1808   TCA stubStart = astubs.code.frontier;
1809
1810   // Guard: we're in the right callee. This happens in magicStart for
1811   // magic callees.
1812   if (!funcIsMagic) {
1813     start = aStart = emitFuncGuard(a, func);
1814   }
1815
1816   emitRB(a, RBTypeFuncPrologueTry, func->fullName()->data());
1817   // Guard: we have stack enough stack space to complete this function.
1818   emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
1819
1820   // NB: We have most of the register file to play with, since we know
1821   // we're between BB's. So, we hardcode some registers here rather
1822   // than using the scratch allocator.
1823   TRACE(2, "funcPrologue: user function: %s\n", func->name()->data());
1824
1825   // Add a counter for the translation if requested
1826   if (RuntimeOption::EvalJitTransCounters) {
1827     emitTransCounterInc(a);
1828   }
1829
1830   if (!funcIsMagic) {
1831     // entry point for magic methods comes later
1832     emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1833   }
1834
1835   SrcKey skFuncBody = emitPrologue(func, nPassed);
1836
1837   if (funcIsMagic) {
1838     // entry points for magic methods is here
1839     TCA magicStart = emitFuncGuard(a, func);
1840     ASSERT(numParams == 2);
1841     emitRB(a, RBTypeFuncEntry, func->fullName()->data());
1842     // Special __call prologue
1843     a.  mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
1844     emitCall(a, TCA(TranslatorX64::shuffleArgsForMagicCall));
1845     // if shuffleArgs returns 0, that means this was not a magic call
1846     // and we should proceed to a prologue specialized for nPassed;
1847     // otherwise, proceed to a prologue specialized for nPassed==numParams (2).
1848     if (nPassed == 2) {
1849       a.jmp(start);
1850     } else {
1851       a.test_reg64_reg64(rax, rax);
1852       // z ==> not a magic call, go to prologue for nPassed
1853       if (deltaFits(start - (a.code.frontier + kJcc8Len), sz::byte)) {
1854         a.jcc8(CC_Z, start);
1855       } else {
1856         a.jcc(CC_Z, start);
1857       }
1858       // this was a magic call
1859       // nPassed == 2
1860       // Fix up hardware stack pointer
1861       nPassed = 2;
1862       a.  lea_reg64_disp_reg64(rStashedAR, -cellsToBytes(nPassed), rVmSp);
1863       // Optimization TODO: Reuse the prologue for args == 2
1864       emitPrologue(func, nPassed);
1865     }
1866     start = magicStart;
1867   }
1868   ASSERT(funcGuardIsForFunc(start, func));
1869   TRACE(2, "funcPrologue tx64 %p %s(%d) setting prologue %p\n",
1870         this, func->fullName()->data(), nPassed, start);
1871   ASSERT(isValidCodeAddress(start));
1872   func->setPrologue(paramIndex, start);
1873
1874   addTranslation(TransRec(skFuncBody, func->unit()->md5(),
1875                           TransProlog, aStart, a.code.frontier - aStart,
1876                           stubStart, astubs.code.frontier - stubStart));
1877
1878   recordGdbTranslation(skFuncBody, func->unit(),
1879                        a, aStart,
1880                        false, true);
1881   recordBCInstr(OpFuncPrologue, a, start);
1882
1883   return start;
1884 }
1885
1886 static TCA callAndResume(ActRec *ar) {
1887   VMRegAnchor _(ar, true);
1888   g_vmContext->doFCall<true>(ar, g_vmContext->m_pc);
1889   return Translator::Get()->getResumeHelper();
1890 }
1891
1892 extern "C"
1893 TCA fcallHelper(ActRec* ar) {
1894   try {
1895     TCA tca =
1896       Translator::Get()->funcPrologue((Func*)ar->m_func, ar->numArgs());
1897     if (tca) {
1898       return tca;
1899     }
1900     return callAndResume(ar);
1901   } catch (...) {
1902     /*
1903       The return address is set to __fcallHelperThunk,
1904       which has no unwind information. Its "logically"
1905       part of the tc, but the c++ unwinder wont know
1906       that. So point our return address at the called
1907       function's return address (which will be in the
1908       tc).
1909       Note that the registers really are clean - we
1910       just came from callAndResume which cleaned
1911       them for us - so we just have to tell the unwinder
1912       that.
1913     */
1914     register ActRec* rbp asm("rbp");
1915     tl_regState = REGSTATE_CLEAN;
1916     rbp->m_savedRip = ar->m_savedRip;
1917     throw;
1918   }
1919 }
1920
1921 TCA
1922 TranslatorX64::emitInterceptPrologue(Func* func, TCA next) {
1923   TCA start = a.code.frontier;
1924   a.mov_imm64_reg((uintptr_t)&func->maybeIntercepted(), rax);
1925   a.cmp_imm8_disp_reg8(0, 0, rax);
1926   TCA jcc8PatchAddr = NULL;
1927   if (next == NULL) {
1928     jcc8PatchAddr = a.code.frontier;
1929     a.jcc8(CC_E, jcc8PatchAddr);
1930   } else {
1931     a.jcc(CC_E, next);
1932   }
1933   // Prologues are not really sites for function entry yet; we can get
1934   // here via an optimistic bindCall. Check that the func is as expected.
1935
1936   a.    mov_imm64_reg(uint64_t(func), rax);
1937   a.    cmp_reg64_disp_reg64(rax, AROFF(m_func), rStashedAR);
1938   {
1939     JccBlock<CC_NZ> skip(a);
1940     a.call(getInterceptHelper());
1941   }
1942   if (jcc8PatchAddr != NULL) {
1943     a.patchJcc8(jcc8PatchAddr, a.code.frontier);
1944   }
1945   return start;
1946 }
1947
1948 void
1949 TranslatorX64::interceptPrologues(Func* func) {
1950   if (!RuntimeOption::EvalJitEnableRenameFunction &&
1951       !(func->attrs() & AttrDynamicInvoke)) {
1952     return;
1953   }
1954   if (func->maybeIntercepted() == -1) {
1955     return;
1956   }
1957   func->maybeIntercepted() = -1;
1958   ASSERT(s_writeLease.amOwner());
1959   int maxNumPrologues = func->numPrologues();
1960   for (int i = 0; i < maxNumPrologues; i++) {
1961     TCA prologue = func->getPrologue(i);
1962     if (prologue == (unsigned char*)fcallHelperThunk)
1963       continue;
1964     ASSERT(funcGuardIsForFunc(prologue, func));
1965     // There might already be calls hard-coded to this via FCall.
1966     // blow away immediate comparison, so that we always use the Func*'s
1967     // prologue table. We use 0 (== NULL on our architecture) as the bit
1968     // pattern for an impossible Func.
1969     //
1970     // Note that we're modifying reachable code.
1971     disableFuncGuard(prologue, func);
1972     ASSERT(funcGuardIsForFunc(prologue, NULL));
1973
1974     // There's a prologue already generated; redirect it to first
1975     // call the intercept helper. First, reset it (leaking the old
1976     // prologue), so funcPrologue will re-emit it.
1977     func->setPrologue(i, (TCA)fcallHelperThunk);
1978     TCA addr = funcPrologue(func, i);
1979     ASSERT(funcGuardIsForFunc(addr, func));
1980     ASSERT(addr);
1981     func->setPrologue(i, addr);
1982     TRACE(1, "interceptPrologues %s prologue[%d]=%p\n",
1983           func->fullName()->data(), i, (void*)addr);
1984   }
1985 }
1986
1987 SrcKey
1988 TranslatorX64::emitPrologue(Func* func, int nPassed) {
1989   int numParams = func->numParams();
1990   ASSERT(IMPLIES(func->maybeIntercepted() == -1,
1991                  m_interceptsEnabled));
1992   if (m_interceptsEnabled &&
1993       !func->isPseudoMain() &&
1994       (RuntimeOption::EvalJitEnableRenameFunction ||
1995        func->attrs() & AttrDynamicInvoke)) {
1996     emitInterceptPrologue(func);
1997   }
1998
1999   Offset dvInitializer = InvalidAbsoluteOffset;
2000
2001   if (nPassed > numParams) {
2002     // Too many args; a weird case, so just callout. Stash ar
2003     // somewhere callee-saved.
2004     if (false) { // typecheck
2005       TranslatorX64::trimExtraArgs((ActRec*)NULL);
2006     }
2007     a.  mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
2008     emitCall(a, TCA(TranslatorX64::trimExtraArgs));
2009     // We'll fix rVmSp below.
2010   } else if (nPassed < numParams) {
2011     // Figure out which, if any, default value initializer to go to
2012     for (int i = nPassed; i < numParams; ++i) {
2013       const Func::ParamInfo& pi = func->params()[i];
2014       if (pi.hasDefaultValue()) {
2015         dvInitializer = pi.funcletOff();
2016         break;
2017       }
2018     }
2019     TRACE(1, "Only have %d of %d args; getting dvFunclet\n",
2020           nPassed, numParams);
2021     emitImmReg(a, nPassed, rax);
2022     // do { *(--rVmSp) = NULL; nPassed++; } while (nPassed < numParams);
2023     // This should be an unusual case, so optimize for code density
2024     // rather than execution speed; i.e., don't unroll the loop.
2025     TCA loopTop = a.code.frontier;
2026     a.  sub_imm32_reg64(sizeof(Cell), rVmSp);
2027     a.  add_imm32_reg32(1, rax);
2028     // XXX "missing argument" warnings need to go here
2029     emitStoreUninitNull(a, 0, rVmSp);
2030     a.  cmp_imm32_reg32(numParams, rax);
2031     a.  jcc8(CC_L, loopTop);
2032   }
2033
2034   // Entry point for numParams == nPassed is here.
2035   // Args are kosher. Frame linkage: set fp = ar.
2036   a.    mov_reg64_reg64(rStashedAR, rVmFp);
2037
2038   // We're in the callee frame; initialize locals. Unroll the loop all
2039   // the way if there are a modest number of locals to update;
2040   // otherwise, do it in a compact loop. If we're in a generator body,
2041   // named locals will be initialized by UnpackCont so we can leave
2042   // them alone here.
2043   int uninitLimit = func->isGenerator() ? func->numNamedLocals() : numParams;
2044   int numUninitLocals = func->numLocals() - uninitLimit;
2045   ASSERT(numUninitLocals >= 0);
2046   if (numUninitLocals > 0) {
2047     SpaceRecorder sr("_InitializeLocals", a);
2048
2049     // If there are too many locals, then emitting a loop to initialize locals
2050     // is more compact, rather than emitting a slew of movs inline.
2051     if (numUninitLocals > kLocalsToInitializeInline) {
2052       PhysReg loopReg = rcx;
2053
2054       // rVmFp + rcx points to the count/type fields of the TypedValue we're
2055       // about to write to.
2056       int loopStart = -func->numLocals() * sizeof(TypedValue)
2057         + TVOFF(_count);
2058       int loopEnd = -uninitLimit * sizeof(TypedValue)
2059         + TVOFF(_count);
2060
2061       emitImmReg(a, loopStart, loopReg);
2062       emitImmReg(a, 0, rdx);
2063
2064       TCA topOfLoop = a.code.frontier;
2065       // do {
2066       //   rVmFp[rcx].m_type = KindOfUninit;
2067       // } while(++rcx != loopEnd);
2068
2069       //  mov %rdx, 0x0(%rVmFp, %rcx, 1)
2070       a.  emitRM(instr_mov, rVmFp, loopReg, 1, 0, rdx);
2071       a.  add_imm32_reg64(sizeof(Cell), loopReg);
2072       a.  cmp_imm32_reg64(loopEnd, loopReg);
2073       a.  jcc8(CC_NE, topOfLoop);
2074     } else {
2075       PhysReg base;
2076       int disp, k;
2077       for (k = uninitLimit; k < func->numLocals(); ++k) {
2078         locToRegDisp(Location(Location::Local, k), &base, &disp);
2079         emitStoreUninitNull(a, disp, base);
2080       }
2081     }
2082   }
2083
2084   // Move rVmSp to the right place: just past all locals
2085   int frameCells = func->numSlotsInFrame();
2086   a.   lea_reg64_disp_reg64(rVmFp, -cellsToBytes(frameCells), rVmSp);
2087   const Opcode* destPC = func->unit()->entry() + func->base();
2088   if (dvInitializer != InvalidAbsoluteOffset) {
2089     // dispatch to funclet.
2090     destPC = func->unit()->entry() + dvInitializer;
2091   }
2092   SrcKey funcBody(func, destPC);
2093
2094   // Check surprise flags in the same place as the interpreter: after
2095   // setting up the callee's frame but before executing any of its
2096   // code
2097   emitCheckSurpriseFlagsEnter(false, funcBody.m_offset - func->base(),
2098                               frameCells);
2099
2100   emitBindJmp(funcBody);
2101   return funcBody;
2102 }
2103
2104 void
2105 TranslatorX64::emitBindCall(const Tracelet& t,
2106                             const NormalizedInstruction &ni,
2107                             Offset atCall, Offset afterCall) {
2108   int numArgs = ni.imm[0].u_IVA;
2109
2110   // If this is a call to a builtin and we don't need any argument
2111   // munging, we can skip the prologue system and do it inline.
2112   if (ni.funcd && ni.funcd->isBuiltin() &&
2113       numArgs == ni.funcd->numParams()) {
2114     ASSERT(ni.funcd->numLocals() == ni.funcd->numParams());
2115     ASSERT(ni.funcd->numIterators() == 0);
2116     a.  lea_reg64_disp_reg64(rVmSp, cellsToBytes(numArgs), rVmFp);
2117     emitCheckSurpriseFlagsEnter(true, 0, numArgs);
2118     // rVmSp is already correctly adjusted, because there's no locals
2119     // other than the arguments passed.
2120     return emitNativeImpl(ni.funcd, false /* don't jump to return */);
2121   }
2122
2123   // Stash callee's rVmFp into rStashedAR for the callee's prologue
2124   a.    lea_reg64_disp_reg64(rVmSp, cellsToBytes(numArgs), rStashedAR);
2125   emitBindCallHelper(rStashedAR, ni.source, ni.funcd, numArgs, (bool)ni.funcd);
2126   return;
2127 }
2128
2129 void
2130 TranslatorX64::emitBindCallHelper(register_name_t stashedAR,
2131                                   SrcKey srcKey,
2132                                   const Func* funcd,
2133                                   int numArgs,
2134                                   bool isImmutable) {
2135   // Whatever prologue we're branching to will check at runtime that we
2136   // went to the right Func*, correcting if necessary. We treat the first
2137   // Func we encounter as a decent prediction. Make space to burn in a
2138   // TCA.
2139   ReqBindCall* req = m_globalData.alloc<ReqBindCall>();
2140   a.    mov_reg64_reg64(rStashedAR, serviceReqArgRegs[1]);
2141   prepareForSmash(kJmpLen);
2142   TCA toSmash = a.code.frontier;
2143   a.    jmp(emitServiceReq(false, REQ_BIND_CALL, 1ull, req));
2144
2145   TRACE(1, "will bind static call: tca %p, this %p, funcd %p\n",
2146         toSmash, this, funcd);
2147   req->m_toSmash = toSmash;
2148   req->m_nArgs = numArgs;
2149   req->m_sourceInstr = srcKey;
2150   req->m_isImmutable = isImmutable;
2151
2152   return;
2153 }
2154
2155 // for documentation see bindJmpccFirst below
2156 void
2157 TranslatorX64::emitCondJmp(const SrcKey &skTaken, const SrcKey &skNotTaken,
2158                            ConditionCode cc) {
2159   // should be true for SrcKeys generated via OpJmpZ/OpJmpNZ
2160   ASSERT(skTaken.m_funcId == skNotTaken.m_funcId);
2161
2162   // reserve space for a smashable jnz/jmp pair; both initially point
2163   // to our stub
2164   prepareForSmash(kJmpLen + kJmpccLen);
2165   TCA old = a.code.frontier;
2166
2167   moveToAlign(astubs);
2168   TCA stub = astubs.code.frontier;
2169
2170   // begin code for the stub
2171
2172   // We need to be careful here, as we are passing an extra paramter to
2173   //   REQ_BIND_JMPCC_FIRST. However we can't pass this parameter via
2174   //   emitServiceReq because that only supports constants/immediates, so
2175   //   compute the last argument via setcc.
2176   astubs.setcc(cc, serviceReqArgRegs[4]);
2177   emitServiceReq(false /* align */, REQ_BIND_JMPCC_FIRST, 4ull,
2178                  old,
2179                  uint64_t(skTaken.offset()),
2180                  uint64_t(skNotTaken.offset()),
2181                  uint64_t(cc));
2182
2183   a.jcc(cc, stub); // MUST use 4-byte immediate form
2184   a.jmp(stub); // MUST use 4-byte immediate form
2185 }
2186
2187 static void skToName(const SrcKey& sk, char* name) {
2188   sprintf(name, "sk_%08lx_%05d",
2189           long(sk.m_funcId), sk.offset());
2190 }
2191
2192 static void skToClusterName(const SrcKey& sk, char* name) {
2193   sprintf(name, "skCluster_%08lx_%05d",
2194           long(sk.m_funcId), sk.offset());
2195 }
2196
2197 static void translToName(const TCA tca, char* name) {
2198   sprintf(name, "tc_%p", tca);
2199 }
2200
2201 void TranslatorX64::drawCFG(std::ofstream& out) const {
2202   if (!isTransDBEnabled()) return;
2203   const char* indent = "    ";
2204   static int genCount;
2205   int numSrcKeys = 0;
2206   int numTranslations = 0;
2207   out << "digraph srcdb" << genCount++ <<" {\n";
2208   out << indent << "size = \"8,11\";\n";
2209   out << indent << "ratio = fill;\n";
2210   for (SrcDB::const_iterator entry = m_srcDB.begin();
2211        entry != m_srcDB.end(); ++entry) {
2212     const SrcKey sk = SrcKey::fromAtomicInt(entry->first);
2213     // 1 subgraph per srcKey.
2214     char name[64];
2215     skToClusterName(sk, name);
2216     numSrcKeys++;
2217     out << indent << "subgraph " << name << "{\n";
2218     char* indent = "        ";
2219     skToName(sk, name);
2220     out << indent << name << "[shape=box];\n";
2221     const vector<TCA>& transls = entry->second->translations();
2222     for (vector<TCA>::const_iterator t = transls.begin(); t != transls.end();
2223          ++t) {
2224       out << indent << "// Translations: " << transls.size() << "\n";
2225       char transname[64];
2226       translToName(*t, transname);
2227       numTranslations++;
2228       out << indent << transname << "[fontsize=11.0];\n";
2229       out << indent << name << " -> " << transname << ";\n";
2230     }
2231     // And, all translations on the same line
2232     out << indent << "{ rank = same; ";
2233     out << name << " ";
2234     for (vector<TCA>::const_iterator t = transls.begin(); t != transls.end();
2235          ++t) {
2236       char transname[64];
2237       translToName(*t, transname);
2238       out << transname << " ";
2239     }
2240     out << indent << "}\n"; // subgraph
2241     out << indent << "}\n";
2242   }
2243
2244   // OK! Those were all the nodes. Now edges. While edges are physically
2245   // from translation to translation, they're virtually from srcKey to
2246   // srcKey, and that is how the db represents them.
2247   for (SrcDB::const_iterator entry = m_srcDB.begin(); entry != m_srcDB.end();
2248        ++entry) {
2249     char destName[64];
2250     skToName(SrcKey::fromAtomicInt(entry->first), destName);
2251     const vector<IncomingBranch>& ibs = entry->second->incomingBranches();
2252     out << indent << "// incoming branches to " << destName << "\n";
2253     for (vector<IncomingBranch>::const_iterator ib = ibs.begin();
2254          ib != ibs.end(); ++ib) {
2255       // Find the start of the translation that contains this branch
2256       const char *branchTypeToColorStr[] = {
2257         "black", // JMP
2258         "green", // JZ
2259         "red",   // JNZ
2260       };
2261       TransDB::const_iterator lowerTCA = m_transDB.lower_bound(ib->m_src);
2262       ASSERT(lowerTCA != m_transDB.end());
2263       char srcName[64];
2264       const TransRec* transRec = this->getTransRec(lowerTCA->second);
2265       skToName(transRec->src, srcName);
2266       out << indent << srcName << " -> " << destName << "[ color = " <<
2267         branchTypeToColorStr[ib->m_type] << "];\n";
2268     }
2269   }
2270   out << indent << "// " << numSrcKeys << " srckeys, " << numTranslations <<
2271     " tracelets\n";
2272   out << "}\n\n";
2273 }
2274
2275 /*
2276  * bindJmp --
2277  *
2278  *   Runtime service handler that patches a jmp to the translation of
2279  *   u:dest from toSmash.
2280  */
2281 TCA
2282 TranslatorX64::bindJmp(TCA toSmash, SrcKey destSk, ServiceRequest req) {
2283   TCA tDest = getTranslation(&destSk, false, req == REQ_BIND_JMP_NO_IR);
2284   if (!tDest) return NULL;
2285   LeaseHolder writer(s_writeLease);
2286   if (!writer) return tDest;
2287   SrcRec* sr = getSrcRec(destSk);
2288   if (req == REQ_BIND_ADDR) {
2289     sr->chainFrom(a, IncomingBranch((TCA*)toSmash));
2290   } else if (req == REQ_BIND_JCC) {
2291     sr->chainFrom(getAsmFor(toSmash),
2292                   IncomingBranch(IncomingBranch::JCC, toSmash));
2293   } else {
2294     sr->chainFrom(getAsmFor(toSmash), IncomingBranch(toSmash));
2295   }
2296   return tDest;
2297 }
2298
2299 /*
2300  * When we end a tracelet with a conditional jump, emitCondJmp first emits:
2301  *
2302  *   1:         j<CC> stubJmpccFirst
2303  *              jmp   stubJmpccFirst
2304  *
2305  * Our "taken" argument tells us whether the branch at 1: was taken or
2306  * not; and therefore which of offTaken and offNotTaken to continue executing.
2307  * If we did take the branch, we now rewrite the code so that the branch is
2308  * straightened. This predicts that subsequent executions will go the same way
2309  * as the first execution.
2310  *
2311  *              jn<CC> stubJmpccSecond:offNotTaken
2312  *              nop5   ; fallthru, or jmp if there's already a translation.
2313  * offTaken:
2314  *
2315  * If we did not take the branch, we leave the sense of the condition
2316  * intact, while patching it up to go to the unexplored code:
2317  *
2318  *              j<CC> stubJmpccSecond:offTaken
2319  *              nop5
2320  * offNotTaken:
2321  */
2322 TCA
2323 TranslatorX64::bindJmpccFirst(TCA toSmash,
2324                               Offset offTaken, Offset offNotTaken,
2325                               bool taken,
2326                               ConditionCode cc) {
2327   const Func* f = curFunc();
2328   LeaseHolder writer(s_writeLease);
2329   if (!writer) return NULL;
2330   Offset offWillExplore = taken ? offTaken : offNotTaken;
2331   Offset offWillDefer = taken ? offNotTaken : offTaken;
2332   SrcKey dest(f, offWillExplore);
2333   TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
2334         "taken %d\n",
2335         offWillExplore, offWillDefer, cc, taken);
2336
2337   // We want the branch to point to whichever side has not been explored
2338   // yet.
2339   if (taken) cc = ccNegate(cc);
2340   TCA stub =
2341     emitServiceReq(false, REQ_BIND_JMPCC_SECOND, 3,
2342                    toSmash, uint64_t(offWillDefer), uint64_t(cc));
2343
2344   Asm &as = getAsmFor(toSmash);
2345   // Its not clear where chainFrom should go to if as is astubs
2346   ASSERT(&as == &a);
2347
2348   // can we just directly fall through?
2349   // a jmp + jz takes 5 + 6 = 11 bytes
2350   bool fallThru = toSmash + kJmpccLen + kJmpLen == as.code.frontier &&
2351     !m_srcDB.find(dest);
2352
2353   TCA tDest;
2354   tDest = getTranslation(&dest, !fallThru /* align */);
2355   if (!tDest) {
2356     return 0;
2357   }
2358   ASSERT(s_writeLease.amOwner());
2359   /*
2360    * Roll over the jcc and the jmp/fallthru. E.g., from:
2361    *
2362    *     toSmash:    jcc   <jmpccFirstStub>
2363    *     toSmash+6:  jmp   <jmpccFirstStub>
2364    *     toSmash+11: <probably the new translation == tdest>
2365    *
2366    * to:
2367    *
2368    *     toSmash:    j[n]z <jmpccSecondStub>
2369    *     toSmash+6:  nop5
2370    *     toSmash+11: newHotness
2371    */
2372   CodeCursor cg(as, toSmash);
2373   a.jcc(cc, stub);
2374   getSrcRec(dest)->chainFrom(as, IncomingBranch(as.code.frontier));
2375   TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
2376   return tDest;
2377 }
2378
2379 // smashes a jcc to point to a new destination
2380 TCA
2381 TranslatorX64::bindJmpccSecond(TCA toSmash, const Offset off,
2382                                ConditionCode cc) {
2383   const Func* f = curFunc();
2384   SrcKey dest(f, off);
2385   TCA branch = getTranslation(&dest, true);
2386   LeaseHolder writer(s_writeLease, NO_ACQUIRE);
2387   if (branch && writer.acquire()) {
2388     SrcRec* destRec = getSrcRec(dest);
2389     destRec->chainFrom(getAsmFor(toSmash),
2390                        IncomingBranch(IncomingBranch::JCC, toSmash));
2391   }
2392   return branch;
2393 }
2394
2395 static void emitJmpOrJcc(X64Assembler& a, int cc, TCA addr) {
2396   if (cc < 0) {
2397     a.   jmp(addr);
2398   } else {
2399     a.   jcc((ConditionCode)cc, addr);
2400   }
2401 }
2402
2403 /*
2404  * emitBindJ --
2405  *
2406  *   Emit code to lazily branch (optionally on condition cc) to the
2407  *   srckey in next.
2408  *   Assumes current basic block is closed (outputs synced, etc.).
2409  */
2410 void
2411 TranslatorX64::emitBindJ(X64Assembler& _a, int cc,
2412                          const SrcKey& dest, ServiceRequest req) {
2413   prepareForSmash(_a, cc < 0 ? (int)kJmpLen : kJmpccLen);
2414   TCA toSmash = _a.code.frontier;
2415   if (&_a == &astubs) {
2416     emitJmpOrJcc(_a, cc, toSmash);
2417   }
2418
2419   TCA sr = emitServiceReq(false, req, 2,
2420                           toSmash, uint64_t(dest.offset()));
2421
2422   if (&_a == &astubs) {
2423     CodeCursor cursor(_a, toSmash);
2424     emitJmpOrJcc(_a, cc, sr);
2425   } else {
2426     emitJmpOrJcc(_a, cc, sr);
2427   }
2428 }
2429
2430 void
2431 TranslatorX64::emitBindJcc(X64Assembler& _a, ConditionCode cc,
2432                            const SrcKey& dest,
2433                            ServiceRequest req /* = REQ_BIND_JCC */) {
2434   emitBindJ(_a, cc, dest, req);
2435 }
2436
2437 void
2438 TranslatorX64::emitBindJmp(X64Assembler& _a,
2439                            const SrcKey& dest,
2440                            ServiceRequest req /* = REQ_BIND_JMP */) {
2441   emitBindJ(_a, -1, dest, req);
2442 }
2443
2444 void
2445 TranslatorX64::emitBindJmp(const SrcKey& dest) {
2446   emitBindJmp(a, dest);
2447 }
2448
2449 void
2450 TranslatorX64::emitStringCheck(X64Assembler& _a,
2451                                PhysReg base, int offset, PhysReg tmp) {
2452   // Treat KindOfString and KindOfStaticString identically; they
2453   // are bitwise identical. This is a port of our IS_STRING_TYPE
2454   // macro to assembly, and will have to change in sync with it.
2455   static_assert(IS_STRING_TYPE(7) && IS_STRING_TYPE(6),
2456                 "Assembly version of IS_STRING_TYPE needs to be updated");
2457   _a.   load_reg64_disp_reg32(base, offset, tmp);
2458   _a.   and_imm32_reg32((signed char)(0xfe), tmp); // use 1-byte immediate
2459   _a.   cmp_imm32_reg32(6, tmp);
2460 }
2461
2462 void
2463 TranslatorX64::emitTypeCheck(X64Assembler& _a, DataType dt,
2464                              PhysReg base, int offset,
2465                              PhysReg tmp/*= InvalidReg*/) {
2466   offset += TVOFF(m_type);
2467   if (IS_STRING_TYPE(dt)) {
2468     LazyScratchReg scr(m_regMap);
2469     if (tmp == InvalidReg) {
2470       scr.alloc();
2471       tmp = *scr;
2472     }
2473     emitStringCheck(_a, base, offset, tmp);
2474   } else {
2475     _a. cmp_imm32_disp_reg32(dt, offset, base);
2476   }
2477 }
2478
2479 void
2480 TranslatorX64::checkType(X64Assembler& a,
2481                          const Location& l,
2482                          const RuntimeType& rtt,
2483                          SrcRec& fail) {
2484   // We can get invalid inputs as a side effect of reading invalid
2485   // items out of BBs we truncate; they don't need guards.
2486   if (rtt.isVagueValue() || l.isThis()) return;
2487
2488   if (m_useHHIR) {
2489     irCheckType(a, l, rtt, fail);
2490     return;
2491   }
2492
2493   PhysReg base;
2494   int disp = 0;
2495   SpaceRecorder sr("_CheckType", a);
2496
2497   TRACE(1, Trace::prettyNode("Precond", DynLocation(l, rtt)) + "\n");
2498
2499   locToRegDisp(l, &base, &disp);
2500   TRACE(2, "TypeCheck: %d(%%r%d)\n", disp, base);
2501   // Negative offsets from RSP are not yet allocated; they had
2502   // better not be inputs to the tracelet.
2503   ASSERT(l.space != Location::Stack || disp >= 0);
2504   if (Trace::moduleEnabled(Trace::stats, 2)) {
2505     Stats::emitInc(a, Stats::TraceletGuard_branch);
2506   }
2507   if (rtt.isIter()) {
2508     a.   cmp_imm32_disp_reg32(rtt.typeCheckValue(),
2509                               disp + rtt.typeCheckOffset(),
2510                               base);
2511   } else {
2512     emitTypeCheck(a, rtt.typeCheckValue(), base, disp, rax);
2513   }
2514   emitFallbackJmp(fail);
2515 }
2516
2517 void
2518 TranslatorX64::emitFallbackJmp(SrcRec& dest) {
2519   prepareForSmash(kJmpccLen);
2520   dest.emitFallbackJump(a, a.code.frontier, CC_NZ);
2521 }
2522
2523 void
2524 TranslatorX64::emitFallbackJmp(Asm& as, SrcRec& dest) {
2525   prepareForSmash(as, kJmpccLen);
2526   dest.emitFallbackJump(as, as.code.frontier, CC_NZ);
2527 }
2528
2529 void
2530 TranslatorX64::emitFallbackUncondJmp(Asm& as, SrcRec& dest) {
2531   prepareForSmash(as, kJmpLen);
2532   dest.emitFallbackJump(as, as.code.frontier);
2533 }
2534
2535 void TranslatorX64::emitReqRetransNoIR(Asm& as, SrcKey& sk) {
2536   prepareForSmash(as, kJmpLen);
2537   TCA toSmash = as.code.frontier;
2538   if (&as == &astubs) {
2539     as.jmp(toSmash);
2540   }
2541
2542   TCA sr = emitServiceReq(REQ_RETRANSLATE_NO_IR, 2,
2543                           toSmash, sk.offset());
2544
2545   if (&as == &astubs) {
2546     CodeCursor cc(as, toSmash);
2547     as.jmp(sr);
2548   } else {
2549     as.jmp(sr);
2550   }
2551 }
2552
2553 uint64_t TranslatorX64::packBitVec(const vector<bool>& bits, unsigned i) {
2554   uint64_t retval = 0;
2555   ASSERT(i % 64 == 0);
2556   ASSERT(i < bits.size());
2557   while (i < bits.size()) {
2558     retval |= bits[i] << (i % 64);
2559     if ((++i % 64) == 0) {
2560       break;
2561     }
2562   }
2563   return retval;
2564 }
2565
2566 void
2567 TranslatorX64::checkRefs(X64Assembler& a,
2568                          const SrcKey& sk,
2569                          const RefDeps& refDeps,
2570                          SrcRec& fail) {
2571   if (refDeps.size() == 0) {
2572     return;
2573   }
2574
2575   /*
2576    * We're still between BB's, so we're not using the real register
2577    * allocator.
2578    */
2579   RegSet unusedRegs = kScratchCrossTraceRegs;
2580   DumbScratchReg rNumParams(unusedRegs);
2581   DumbScratchReg rMask(unusedRegs);
2582   DumbScratchReg rBits(unusedRegs);
2583   DumbScratchReg rExpectedBits(unusedRegs);
2584   DumbScratchReg rBitsValue(unusedRegs);
2585   DumbScratchReg rFunc(unusedRegs);
2586
2587   // Set up guards for each pushed ActRec that we've made reffiness
2588   // assumptions about
2589   for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
2590        it != refDeps.m_arMap.end(); ++it) {
2591     // Be careful! The actual Func might have fewer refs than the number
2592     // of args we're passing. To forestall this, we're going to have to
2593     // keep checking i against the number of params. We consider invocations
2594     // with too many arguments to have passed their checks.
2595     int entryArDelta = it->first;
2596
2597     if (m_useHHIR) {
2598       m_hhbcTrans->guardRefs(entryArDelta,
2599                              it->second.m_mask,
2600                              it->second.m_vals);
2601       continue;
2602     }
2603
2604     int32_t funcOff = cellsToBytes(entryArDelta) + AROFF(m_func);
2605     a.    load_reg64_disp_reg64(rVmSp, funcOff, *rFunc); // rFunc <- Func*
2606     a.    load_reg64_disp_reg32(*rFunc, Func::numParamsOff(),
2607                                 *rNumParams);
2608     a.    load_reg64_disp_reg64(*rFunc, Func::refBitVecOff(),
2609                                 *rBits);  // rBits <- m_refBitVec
2610
2611     for (unsigned i = 0; i < it->second.m_mask.size(); i += 64) {
2612       ASSERT(i < it->second.m_vals.size());
2613       uint64_t mask = packBitVec(it->second.m_mask, i);
2614       if (mask == 0) {
2615         continue;
2616       }
2617       uint64_t value = packBitVec(it->second.m_vals, i);
2618
2619       emitImmReg(a, mask,  *rMask);
2620       emitImmReg(a, value, *rExpectedBits);
2621
2622       /*
2623        * Before trying to load this block off the bit vector, make
2624        * sure it actually exists.  It's ok to index past numArgs
2625        * within one of these words, because the remaining bits will be
2626        * set to zero (or one in the case of the variadic by ref
2627        * builtins).
2628        */
2629       if (Trace::moduleEnabled(Trace::stats, 2)) {
2630         Stats::emitInc(a, Stats::TraceletGuard_branch);
2631       }
2632       a.  cmp_imm32_reg32(i + 1, *rNumParams);
2633       {
2634         IfElseBlock<CC_L> ifFewEnoughArgs(a);
2635
2636         // Load the appropriate qword off of the top actRec's func*.
2637         SKTRACE(2, sk, "reffiness mask %lx value %lx, ar @%d\n",
2638                 mask, value, entryArDelta);
2639         a.  load_reg64_disp_reg64(*rBits, sizeof(uint64) * (i / 64),
2640                                   *rBitsValue);  // rBitsValue <- rBits[i / 64]
2641         a.  and_reg64_reg64(*rMask, *rBitsValue); // rBitsValue &= rMask
2642         a.  cmp_reg64_reg64(*rBitsValue, *rExpectedBits);
2643         emitFallbackJmp(fail);
2644
2645         ifFewEnoughArgs.Else();
2646
2647         a.  test_imm32_disp_reg32(AttrVariadicByRef,
2648                                   Func::attrsOff(),
2649                                   *rFunc);
2650         {
2651           IfElseBlock<CC_NZ> ifNotWeirdBuiltin(a);
2652
2653           // Other than these builtins, we need to have all by value
2654           // args in this case.
2655           a.  test_reg64_reg64(*rExpectedBits, *rExpectedBits);
2656           emitFallbackJmp(fail);
2657
2658           ifNotWeirdBuiltin.Else();
2659
2660           // If it is one of the weird builtins that has reffiness for
2661           // additional args, we have to make sure our expectation is
2662           // that these additional args are by ref.
2663           a.  cmp_imm32_reg64((signed int)(-1ull & mask), *rExpectedBits);
2664           emitFallbackJmp(fail);
2665         }
2666       }
2667     }
2668   }
2669 }
2670
2671 /*
2672  * emitRetFromInterpretedFrame --
2673  *
2674  *   When the interpreter pushes a call frame, there is necessarily no
2675  *   machine RIP available to return to. This helper fishes out the
2676  *   destination from the frame and redirects execution to it via enterTC.
2677  */
2678 TCA
2679 TranslatorX64::emitRetFromInterpretedFrame() {
2680   int32_t arBase = sizeof(ActRec) - sizeof(Cell);
2681   moveToAlign(astubs);
2682   TCA stub = astubs.code.frontier;
2683   // Marshall our own args by hand here.
2684   astubs.   lea_reg64_disp_reg64(rVmSp, -arBase, serviceReqArgRegs[0]);
2685   astubs.   mov_reg64_reg64(rVmFp, serviceReqArgRegs[1]);
2686   (void) emitServiceReq(false, REQ_POST_INTERP_RET, 0ull);
2687   return stub;
2688 }
2689
2690 /*
2691  * fcallHelperThunk
2692  * Note: Assumes rStashedAR is r15
2693  */
2694 static_assert(rStashedAR == r15,
2695   "__fcallHelperThunk needs to be modified for ABI changes");
2696 asm (
2697   ".byte 0\n"
2698   ".align 16\n"
2699   ".globl __fcallHelperThunk\n"
2700 "__fcallHelperThunk:\n"
2701 #ifdef HHVM
2702   "mov %r15, %rdi\n"
2703   "call fcallHelper\n"
2704   "jmp *%rax\n"
2705 #endif
2706   "ud2\n"
2707 );
2708
2709 /*
2710  * enterTCHelper
2711  *
2712  * This helper routine is written in x64 assembly to take care of the details
2713  * when transferring control between jitted code and the translator.
2714  *   rdi:  Cell* vm_sp
2715  *   rsi:  Cell* vm_fp
2716  *   rdx:  unsigned char* start
2717  *   rcx:  TReqInfo* infoPtr
2718  *   r8:   ActRec* firstAR
2719  *   r9:   uint8_t* targetCacheBase
2720  *
2721  * Note: enterTCHelper does not save callee-saved registers except
2722  * %rbp.  This means when we call it from C++, we have to tell gcc to
2723  * clobber all the other callee-saved registers.
2724  */
2725 static_assert(rVmSp == rbx &&
2726               rVmFp == rbp &&
2727               rVmTl == r12 &&
2728               rStashedAR == r15,
2729   "__enterTCHelper needs to be modified to use the correct ABI");
2730 static_assert(kReservedRSPScratchSpace == 0x80,
2731               "enterTCHelper needs to be updated for changes to "
2732               "kReservedRSPScratchSpace");
2733 asm (
2734   ".byte 0\n"
2735   ".align 16\n"
2736 "__enterTCHelper:\n"
2737   // Prologue
2738   ".cfi_startproc\n"
2739   "push %rbp\n"
2740   ".cfi_adjust_cfa_offset 8\n"  // offset to previous frame relative to %rsp
2741   ".cfi_offset rbp, -16\n"      // Where to find previous value of rbp
2742
2743   // Set firstAR->m_savedRbp to point to this frame.
2744   "mov %rsp, (%r8)\n"
2745
2746   // Save infoPtr
2747   "push %rcx\n"
2748   ".cfi_adjust_cfa_offset 8\n"
2749
2750   // Set up special registers used for translated code.
2751   "mov %rdi, %rbx\n"          // rVmSp
2752   "mov %r9, %r12\n"           // rVmTl
2753   "mov %rsi, %rbp\n"          // rVmFp
2754   "mov 0x30(%rcx), %r15\n"    // rStashedAR saved across service requests
2755
2756   /*
2757    * The translated code we are about to enter does not follow the
2758    * standard prologue of pushing rbp at entry, so we are purposely 8
2759    * bytes short of 16-byte alignment before this call instruction so
2760    * that the return address being pushed will make the native stack
2761    * 16-byte aligned.
2762    */
2763
2764   "sub $0x80, %rsp\n" // kReservedRSPScratchSpace
2765   // May need cfi_adjust_cfa_offset annotations: Task #1747813
2766   "call *%rdx\n"
2767   "add $0x80, %rsp\n"
2768
2769   // Restore infoPtr into %rbx
2770   "pop %rbx\n"
2771   ".cfi_adjust_cfa_offset -8\n"
2772
2773   // Copy the values passed from jitted code into *infoPtr
2774   "mov %rdi, 0x0(%rbx)\n"
2775   "mov %rsi, 0x8(%rbx)\n"
2776   "mov %rdx, 0x10(%rbx)\n"
2777   "mov %rcx, 0x18(%rbx)\n"
2778   "mov %r8,  0x20(%rbx)\n"
2779   "mov %r9,  0x28(%rbx)\n"
2780
2781   // Service request "callee-saved".  (Returnee-saved?)
2782   "mov %r15, 0x30(%rbx)\n"
2783
2784   // Epilogue
2785   "pop %rbp\n"
2786   ".cfi_restore rbp\n"
2787   ".cfi_adjust_cfa_offset -8\n"
2788   "ret\n"
2789   ".cfi_endproc\n"
2790 );
2791
2792 struct TReqInfo {
2793   uintptr_t requestNum;
2794   uintptr_t args[5];
2795
2796   // Some TC registers need to be preserved across service requests.
2797   uintptr_t saved_rStashedAr;
2798 };
2799
2800 void enterTCHelper(Cell* vm_sp,
2801                    Cell* vm_fp,
2802                    TCA start,
2803                    TReqInfo* infoPtr,
2804                    ActRec* firstAR,
2805                    void* targetCacheBase) asm ("__enterTCHelper");
2806
2807 struct DepthGuard {
2808   static __thread int m_depth;
2809   DepthGuard()  { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
2810   ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
2811 };
2812 __thread int DepthGuard::m_depth;
2813 void
2814 TranslatorX64::enterTC(SrcKey sk) {
2815   using namespace TargetCache;
2816   TCA start = getTranslation(&sk, true);
2817
2818   DepthGuard d;
2819   TReqInfo info;
2820   const uintptr_t& requestNum = info.requestNum;
2821   uintptr_t* args = info.args;
2822   for (;;) {
2823     ASSERT(vmfp() >= vmsp() - 1);
2824     ASSERT(sizeof(Cell) == 16);
2825     ASSERT(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
2826     ASSERT(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
2827
2828     TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
2829           vmfp(), ((ActRec*)vmfp())->m_func->name()->data(), vmsp());
2830     s_writeLease.gremlinUnlock();
2831     // Keep dispatching until we end up somewhere the translator
2832     // recognizes, or we luck out and the leaseholder exits.
2833     while (!start) {
2834       TRACE(2, "enterTC forwarding BB to interpreter\n");
2835       g_vmContext->m_pc = curUnit()->at(sk.offset());
2836       INC_TPC(interp_bb);
2837       g_vmContext->dispatchBB();
2838       sk = SrcKey(curFunc(), g_vmContext->getPC());
2839       start = getTranslation(&sk, true);
2840     }
2841     ASSERT(start);
2842     ASSERT(isValidCodeAddress(start));
2843     tl_regState = REGSTATE_DIRTY;
2844     ASSERT(!s_writeLease.amOwner());
2845     curFunc()->validate();
2846     INC_TPC(enter_tc);
2847
2848     // The asm volatile here is to force C++ to spill anything that
2849     // might be in a callee-saved register (aside from rbp).
2850     // enterTCHelper does not preserve these registers.
2851     asm volatile("" : : : "rbx","r12","r13","r14","r15");
2852     enterTCHelper(vmsp(), vmfp(), start, &info, vmFirstAR(),
2853                   tl_targetCaches);
2854     asm volatile("" : : : "rbx","r12","r13","r14","r15");
2855
2856     tl_regState = REGSTATE_CLEAN; // Careful: pc isn't sync'ed yet.
2857     // Debugging code: cede the write lease half the time.
2858     if (debug && (RuntimeOption::EvalJitStressLease)) {
2859       if (d.m_depth == 1 && (rand() % 2) == 0) {
2860         s_writeLease.gremlinLock();
2861       }
2862     }
2863
2864     TRACE(4, "enterTC: %p fp%p sp%p } return\n", start,
2865           vmfp(), vmsp());
2866     TRACE(4, "enterTC: request(%s) args: %lx %lx %lx %lx %lx\n",
2867           reqName(requestNum),
2868           args[0], args[1], args[2], args[3], args[4]);
2869     ASSERT(vmfp() >= vmsp() - 1 || requestNum == REQ_EXIT);
2870
2871     if (debug) {
2872       // Ensure that each case either returns, or drives start to a valid
2873       // value.
2874       start = TCA(0xbee5face);
2875     }
2876
2877     // The contract is that each case will either exit, by returning, or
2878     // set sk to the place where execution should resume, and optionally
2879     // set start to the hardware translation of the resumption point.
2880     //
2881     // start and sk might be subtly different; i.e., there are cases where
2882     // start != NULL && start != getTranslation(sk). For instance,
2883     // REQ_BIND_CALL has not finished executing the OpCall when it gets
2884     // here, and has even done some work on its behalf. sk == OpFCall,
2885     // while start == the point in the TC that's "half-way through" the
2886     // Call instruction. If we punt to the interpreter, the interpreter
2887     // will redo some of the work that the translator has already done.
2888     INC_TPC(service_req);
2889     switch (requestNum) {
2890       case REQ_EXIT: {
2891         // fp is not valid anymore
2892         vmfp() = NULL;
2893         return;
2894       }
2895
2896       case REQ_BIND_CALL: {
2897         ReqBindCall* req = (ReqBindCall*)args[0];
2898         ActRec* calleeFrame = (ActRec*)args[1];
2899         TCA toSmash = req->m_toSmash;
2900         Func *func = const_cast<Func*>(calleeFrame->m_func);
2901         int nArgs = req->m_nArgs;
2902         bool isImmutable = req->m_isImmutable;
2903         TCA dest = tx64->funcPrologue(func, nArgs);
2904         TRACE(2, "enterTC: bindCall %s -> %p\n", func->name()->data(), dest);
2905         if (isImmutable) {
2906           // If we *know* we're calling the right function, don't bother
2907           // with the dynamic check of ar->m_func.
2908           dest = skipFuncCheck(dest);
2909           TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
2910                 func->name()->data(), dest);
2911         }
2912         LeaseHolder writer(s_writeLease, NO_ACQUIRE);
2913         if (dest && writer.acquire()) {
2914           TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
2915           smash(tx64->getAsmFor(toSmash), toSmash, dest);
2916           // sk: stale, but doesn't matter since we have a valid dest TCA.
2917         } else {
2918           // We need translator help; we're not at the callee yet, so
2919           // roll back. The prelude has done some work already, but it
2920           // should be safe to redo.
2921           TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
2922                 toSmash, dest);
2923           sk = req->m_sourceInstr;
2924         }
2925         start = dest;
2926       } break;
2927
2928       case REQ_BIND_SIDE_EXIT:
2929       case REQ_BIND_JMP:
2930       case REQ_BIND_JCC:
2931       case REQ_BIND_JMP_NO_IR:
2932       case REQ_BIND_ADDR: {
2933         TCA toSmash = (TCA)args[0];
2934         Offset off = args[1];
2935         sk = SrcKey(curFunc(), off);
2936         if (requestNum == REQ_BIND_SIDE_EXIT) {
2937           SKTRACE(3, sk, "side exit taken!\n");
2938         }
2939         start = bindJmp(toSmash, sk, (ServiceRequest)requestNum);
2940       } break;
2941
2942       case REQ_BIND_JMPCC_FIRST: {
2943         TCA toSmash = (TCA)args[0];
2944         Offset offTaken = (Offset)args[1];
2945         Offset offNotTaken = (Offset)args[2];
2946         ConditionCode cc = ConditionCode(args[3]);
2947         bool taken = int64(args[4]) & 1;
2948         start = bindJmpccFirst(toSmash, offTaken, offNotTaken, taken, cc);
2949         // SrcKey: we basically need to emulate the fail
2950         sk = SrcKey(curFunc(), taken ? offTaken : offNotTaken);
2951       } break;
2952
2953       case REQ_BIND_JMPCC_SECOND: {
2954         TCA toSmash = (TCA)args[0];
2955         Offset off = (Offset)args[1];
2956         ConditionCode cc = ConditionCode(args[2]);
2957         start = bindJmpccSecond(toSmash, off, cc);
2958         sk = SrcKey(curFunc(), off);
2959       } break;
2960
2961       case REQ_BIND_REQUIRE: {
2962         ReqLitStaticArgs* rlsa = (ReqLitStaticArgs*)args[0];
2963         sk = SrcKey((Func*)args[1], (Offset)args[2]);
2964         start = getTranslation(&sk, true);
2965         if (start) {
2966           LeaseHolder writer(s_writeLease);
2967           if (writer) {
2968             SrcRec* sr = getSrcRec(sk);
2969             sr->chainFrom(a, IncomingBranch(&rlsa->m_pseudoMain));
2970           }
2971         }
2972       } break;
2973
2974       case REQ_RETRANSLATE_NO_IR: {
2975         TCA toSmash = (TCA)args[0];
2976         sk = SrcKey(curFunc(), (Offset)args[1]);
2977         start = retranslateAndPatchNoIR(sk, true, toSmash);
2978         SKTRACE(2, sk, "retranslated (without IR) @%p\n", start);
2979       } break;
2980
2981       case REQ_RETRANSLATE: {
2982         INC_TPC(retranslate);
2983         sk = SrcKey(curFunc(), (Offset)args[0]);
2984         start = retranslate(sk, true, RuntimeOption::EvalJitUseIR);
2985         SKTRACE(2, sk, "retranslated @%p\n", start);
2986       } break;
2987
2988       case REQ_INTERPRET: {
2989         Offset off = args[0];
2990         int numInstrs = args[1];
2991         g_vmContext->m_pc = curUnit()->at(off);
2992         /*
2993          * We know the compilation unit has not changed; basic blocks do
2994          * not span files. I claim even exceptions do not violate this
2995          * axiom.
2996          */
2997         ASSERT(numInstrs >= 0);
2998         ONTRACE(5, SrcKey(curFunc(), off).trace("interp: enter\n"));
2999         if (numInstrs) {
3000           s_perfCounters[tpc_interp_instr] += numInstrs;
3001           g_vmContext->dispatchN(numInstrs);
3002         } else {
3003           // numInstrs == 0 means it wants to dispatch until BB ends
3004           INC_TPC(interp_bb);
3005           g_vmContext->dispatchBB();
3006         }
3007         SrcKey newSk(curFunc(), g_vmContext->getPC());
3008         SKTRACE(5, newSk, "interp: exit\n");
3009         sk = newSk;
3010         start = getTranslation(&newSk, true);
3011       } break;
3012
3013       case REQ_POST_INTERP_RET: {
3014         // This is only responsible for the control-flow aspect of the Ret:
3015         // getting to the destination's translation, if any.
3016         ActRec* ar = (ActRec*)args[0];
3017         ActRec* caller = (ActRec*)args[1];
3018         ASSERT((Cell*) ar < vmsp());      // ar is already logically popped
3019         ASSERT((Cell*) caller > vmsp());  // caller is now active
3020         ASSERT((Cell*) caller == vmfp());
3021         Unit* destUnit = caller->m_func->unit();
3022         // Set PC so logging code in getTranslation doesn't get confused.
3023         vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
3024         SrcKey dest(caller->m_func, vmpc());
3025         sk = dest;
3026         start = getTranslation(&dest, true);
3027         TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
3028               ar->m_func->fullName()->data(),
3029               caller->m_func->fullName()->data());
3030       } break;
3031
3032       case REQ_RESUME: {
3033         SrcKey dest(curFunc(), vmpc());
3034         sk = dest;
3035         start = getTranslation(&dest, true);
3036       } break;
3037
3038       case REQ_STACK_OVERFLOW: {
3039         /*
3040          * we need to construct the pc of the fcall from the return
3041          * address (which will be after the fcall). Because fcall is
3042          * a variable length instruction, and because we sometimes
3043          * delete instructions from the instruction stream, we
3044          * need to use fpi regions to find the fcall.
3045          */
3046         const FPIEnt* fe = curFunc()->findPrecedingFPI(
3047           curUnit()->offsetOf(vmpc()));
3048         vmpc() = curUnit()->at(fe->m_fcallOff);
3049         ASSERT(isFCallStar(*vmpc()));
3050         raise_error("Stack overflow");
3051         NOT_REACHED();
3052       }
3053     }
3054   }
3055   NOT_REACHED();
3056 }
3057
3058 void TranslatorX64::resume(SrcKey sk) {
3059   enterTC(sk);
3060 }
3061
3062 /*
3063  * emitServiceReq --
3064  *
3065  *   Call a translator service co-routine. The code emitted here is
3066  *   reenters the enterTC loop, invoking the requested service. Control
3067  *   will be returned non-locally to the next logical instruction in
3068  *   the TC.
3069  *
3070  *   Return value is a destination; we emit the bulky service
3071  *   request code into astubs.
3072  */
3073
3074 TCA
3075 TranslatorX64::emitServiceReqVA(bool align, ServiceRequest req, int numArgs,
3076                                 va_list args) {
3077   if (align) {
3078     moveToAlign(astubs);
3079   }
3080   TCA retval = astubs.code.frontier;
3081   emitEagerVMRegSave(astubs, SaveFP);
3082   /*
3083    * Move args into appropriate regs.
3084    */
3085   TRACE(3, "Emit Service Req %s(", reqName(req));
3086   for (int i = 0; i < numArgs; i++) {
3087     uint64_t argVal = va_arg(args, uint64_t);
3088     TRACE(3, "%p,", (void*)argVal);
3089     emitImmReg(astubs, argVal, serviceReqArgRegs[i]);
3090   }
3091   TRACE(3, ")\n");
3092   emitImmReg(astubs, req, rdi);
3093   /*
3094    * Weird hand-shaking with enterTC: reverse-call a service routine.
3095    */
3096   astubs.    ret();
3097   recordBCInstr(OpServiceRequest, astubs, retval);
3098   translator_not_reached(astubs);
3099   return retval;
3100 }
3101
3102 TCA
3103 TranslatorX64::emitServiceReq(ServiceRequest req, int numArgs, ...) {
3104   va_list args;
3105   va_start(args, numArgs);
3106   TCA retval = emitServiceReqVA(true, req, numArgs, args);
3107   va_end(args);
3108   return retval;
3109 }
3110
3111 TCA
3112 TranslatorX64::emitServiceReq(bool align, ServiceRequest req, int numArgs,
3113                               ...) {
3114   va_list args;
3115   va_start(args, numArgs);
3116   TCA retval = emitServiceReqVA(align, req, numArgs, args);
3117   va_end(args);
3118   return retval;
3119 }
3120
3121 TCA
3122 TranslatorX64::emitTransCounterInc(X64Assembler& a) {
3123   TCA start = a.code.frontier;
3124   if (!isTransDBEnabled()) return start;
3125   uint64* counterAddr = getTransCounterAddr();
3126
3127   a.mov_imm64_reg((uint64)counterAddr, rScratch);
3128   a.emitLockPrefix();
3129   a.inc_mem64(rScratch, 0);
3130
3131   return start;
3132 }
3133
3134 void
3135 TranslatorX64::spillTo(DataType type, PhysReg reg, bool writeType,
3136                        PhysReg base, int disp) {
3137   X64Assembler& a = *m_spillFillCode;
3138   // Zero out the count at the same time as writing the type.
3139   SpaceRecorder sr("_Spill", a);
3140
3141   Stats::emitInc(a, Stats::Tx64_Spill);
3142   emitStoreTypedValue(a, type, reg, disp, base, writeType);
3143 }
3144
3145 void
3146 TranslatorX64::spill(const Location& loc, DataType type,
3147                      PhysReg reg, bool writeType) {
3148   PhysReg base;
3149   int disp;
3150   locToRegDisp(loc, &base, &disp);
3151   spillTo(type, reg, writeType, base, disp);
3152   TRACE(2, "%s: (%s, %lld) -> v: %d(r%d) type%d\n",
3153         __func__,
3154         loc.spaceName(), loc.offset, int(disp + TVOFF(m_data)), base, type);
3155 }
3156
3157 void
3158 TranslatorX64::fill(const Location& loc, PhysReg reg) {
3159   SpaceRecorder sr("_Fill", *m_spillFillCode);
3160   if (loc.isThis()) {
3161     m_spillFillCode->load_reg64_disp_reg64(rVmFp, AROFF(m_this), reg);
3162     return;
3163   }
3164   PhysReg base;
3165   int disp;
3166   locToRegDisp(loc, &base, &disp);
3167   TRACE(2, "fill: (%s, %lld) -> reg %d\n",
3168         loc.spaceName(), loc.offset, reg);
3169   m_spillFillCode->load_reg64_disp_reg64(base, disp + TVOFF(m_data), reg);
3170 }
3171
3172 void TranslatorX64::fillByMov(PhysReg src, PhysReg dst) {
3173   SpaceRecorder sr("_FillMov", *m_spillFillCode);
3174   ASSERT(src != dst);
3175   m_spillFillCode->mov_reg64_reg64(src, dst);
3176 }
3177
3178 void
3179 TranslatorX64::loadImm(int64 immVal, PhysReg reg) {
3180   SpaceRecorder sr("_FillImm", *m_spillFillCode);
3181   TRACE(2, "loadImm: 0x%llx -> reg %d\n", immVal, reg);
3182   emitImmReg(*m_spillFillCode, immVal, reg);
3183 }
3184
3185 void
3186 TranslatorX64::poison(PhysReg dest) {
3187   static const bool poison = false;
3188   if (poison) {
3189     emitImmReg(*m_spillFillCode, 0xbadf00d105e5babe, dest);
3190   }
3191 }
3192
3193 /**
3194  * Spill all dirty registers, mark all registers as 'free' in the
3195  * register file, and update rVmSp to point to the top of stack at
3196  * the end of the tracelet.
3197  */
3198 void
3199 TranslatorX64::syncOutputs(const Tracelet& t) {
3200   syncOutputs(t.m_stackChange);
3201 }
3202
3203 /**
3204  * Same as above, except that it sets rVmSp to point to the top of
3205  * stack at the beginning of the specified instruction.
3206  */
3207 void
3208 TranslatorX64::syncOutputs(const NormalizedInstruction& i) {
3209   syncOutputs(i.stackOff);
3210 }
3211
3212 void
3213 TranslatorX64::syncOutputs(int stackOff) {
3214   SpaceRecorder sr("_SyncOuts", a);
3215   TCA start = a.code.frontier;
3216   // Mark all stack locations above the top of stack as dead
3217   m_regMap.scrubStackEntries(stackOff);
3218   // Spill all dirty registers
3219   m_regMap.cleanAll();
3220   if (stackOff != 0) {
3221     TRACE(1, "syncOutputs: rVmSp + %d\n", stackOff);
3222     // t.stackChange is in negative Cells, not bytes.
3223     a.    add_imm32_reg64(-cellsToBytes(stackOff), rVmSp);
3224   }
3225   // All registers have been smashed for realz, yo
3226   m_regMap.smashRegs(kAllRegs);
3227   recordBCInstr(OpSyncOutputs, a, start);
3228 }
3229
3230 /*
3231  * getBinaryStackInputs --
3232  *
3233  *   Helper for a common pattern of instruction, where two items are popped
3234  *   and one is pushed. The second item on the stack at the beginning of
3235  *   the instruction is both a source and destination.
3236  */
3237 static void
3238 getBinaryStackInputs(RegAlloc& regmap, const NormalizedInstruction& i,
3239                      PhysReg& rsrc, PhysReg& rsrcdest) {
3240   ASSERT(i.inputs.size()   == 2);
3241   ASSERT(i.outStack && !i.outLocal);
3242   rsrcdest = regmap.getReg(i.outStack->location);
3243   rsrc     = regmap.getReg(i.inputs[0]->location);
3244   ASSERT(regmap.getReg(i.inputs[1]->location) == rsrcdest);
3245 }
3246
3247 // emitBox --
3248 //   Leave a boxed version of input in RAX. Destroys the register
3249 //   mapping.
3250 void
3251 TranslatorX64::emitBox(DataType t, PhysReg rSrc) {
3252   if (false) { // typecheck
3253     RefData* retval = tvBoxHelper(KindOfArray, 0xdeadbeef01ul);
3254     (void)retval;
3255   }
3256   // tvBoxHelper will set the refcount of the inner cell to 1
3257   // for us. Because the inner cell now holds a reference to the
3258   // original value, we don't need to perform a decRef.
3259   EMIT_CALL(a, tvBoxHelper, IMM(t), R(rSrc));
3260 }
3261
3262 // emitUnboxTopOfStack --
3263 //   Unbox the known-to-be Variant on top of stack in place.
3264 void
3265 TranslatorX64::emitUnboxTopOfStack(const NormalizedInstruction& i) {
3266   const vector<DynLocation*>& inputs  = i.inputs;
3267
3268   ASSERT(inputs.size() == 1);
3269   ASSERT(i.outStack && !i.outLocal);
3270   ASSERT(inputs[0]->isStack());
3271   ASSERT(i.outStack && i.outStack->location == inputs[0]->location);
3272   DataType outType = inputs[0]->rtt.innerType();
3273   ASSERT(outType != KindOfInvalid);
3274   ASSERT(outType == i.outStack->outerType());
3275   PhysReg rSrc = getReg(inputs[0]->location);
3276   // Detach the register rSrc from the input location. We must
3277   // do this dance because the input and output location are the
3278   // same and we want to have separate registers for the input
3279   // and output.
3280   m_regMap.invalidate(inputs[0]->location);
3281   ScratchReg rSrcScratch(m_regMap, rSrc);
3282   // This call to allocOutputRegs will allocate a new register
3283   // for the output location
3284   m_regMap.allocOutputRegs(i);
3285   PhysReg rDest = getReg(i.outStack->location);
3286   emitDeref(a, rSrc, rDest);
3287   emitIncRef(rDest, outType);
3288   // decRef the var on the evaluation stack
3289   emitDecRef(i, rSrc, KindOfRef);
3290 }
3291
3292 // setOpOpToOpcodeOp --
3293 //   The SetOp opcode space has nothing to do with the bytecode opcode
3294 //   space. Reasonable people like it that way, so translate them here.
3295 static Opcode
3296 setOpOpToOpcodeOp(SetOpOp soo) {
3297   switch(soo) {
3298 #define SETOP_OP(_soo, _bc) case SetOp##_soo: return _bc;
3299     SETOP_OPS
3300 #undef SETOP_OP
3301     default: ASSERT(false);
3302   }
3303   return -1;
3304 }
3305
3306 void
3307 TranslatorX64::binaryIntegerArith(const NormalizedInstruction& i,
3308                                   Opcode op,
3309                                   PhysReg srcReg,
3310                                   PhysReg srcDestReg) {
3311   switch (op) {
3312 #define CASEIMM(OpBc, x64op)                                       \
3313     case OpBc: {                                                   \
3314       if (i.hasConstImm) {                                         \
3315         a.   x64op ## _imm64_reg64(i.constImm.u_I64A, srcDestReg); \
3316       } else {                                                     \
3317         a.   x64op ## _reg64_reg64(srcReg, srcDestReg);            \
3318       } } break;
3319 #define CASE(OpBc, x64op)                                          \
3320     case OpBc: {                                                   \
3321         a.   x64op ## _reg64_reg64(srcReg, srcDestReg);            \
3322     } break;
3323     CASEIMM(OpAdd,    add)
3324     CASEIMM(OpSub,    sub)
3325     CASEIMM(OpBitAnd, and)
3326     CASEIMM(OpBitOr,  or)
3327     CASEIMM(OpBitXor, xor)
3328     CASE(OpMul,       imul)
3329 #undef CASE
3330 #undef CASEIMM
3331
3332     default: {
3333       not_reached();
3334     };
3335   }
3336 }
3337
3338 void
3339 TranslatorX64::binaryArithCell(const NormalizedInstruction &i,
3340                                Opcode op, const DynLocation& in1,
3341                                const DynLocation& inout) {
3342   ASSERT(in1.rtt.isInt());
3343   ASSERT(inout.rtt.isInt());
3344   ASSERT(in1.outerType() != KindOfRef);
3345   ASSERT(in1.isStack());
3346   ASSERT(inout.outerType() != KindOfRef);
3347   ASSERT(inout.isStack());
3348   m_regMap.allocOutputRegs(i);
3349   PhysReg     srcReg = m_regMap.getReg(in1.location);
3350   PhysReg srcDestReg = m_regMap.getReg(inout.location);
3351   binaryIntegerArith(i, op, srcReg, srcDestReg);
3352 }
3353
3354 void
3355 TranslatorX64::binaryArithLocal(const NormalizedInstruction &i,
3356                                 Opcode op,
3357                                 const DynLocation& in1,
3358                                 const DynLocation& in2,
3359                                 const DynLocation& out) {
3360   // The caller must guarantee that these conditions hold
3361   ASSERT(in1.rtt.isInt());
3362   ASSERT(in2.rtt.isInt());
3363   ASSERT(in1.outerType() != KindOfRef);
3364   ASSERT(in1.isStack());
3365   ASSERT(in2.isLocal());
3366   ASSERT(out.isStack());
3367
3368   PhysReg srcReg = m_regMap.getReg(in1.location);
3369   PhysReg outReg = m_regMap.getReg(out.location);
3370   PhysReg localReg = m_regMap.getReg(in2.location);
3371   if (in2.outerType() != KindOfRef) {
3372     // The local is not a var, so we can operate directly on the
3373     // local's register. We will need to update outReg after the
3374     // operation.
3375     binaryIntegerArith(i, op, srcReg, localReg);
3376     // We operated directly on the local's register, so we need to update
3377     // outReg
3378     emitMovRegReg(localReg, outReg);
3379   } else {
3380     // The local is a var, so we have to read its value into outReg
3381     // on operate on that. We will need to write the result back
3382     // to the local after the operation.
3383     emitDeref(a, localReg, outReg);
3384     binaryIntegerArith(i, op, srcReg, outReg);
3385     // We operated on outReg, so we need to write the result back to the
3386     // local
3387     a.    store_reg64_disp_reg64(outReg, 0, localReg);
3388   }
3389 }
3390
3391 static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
3392   ASSERT(tl_regState == REGSTATE_DIRTY);
3393   tl_regState = REGSTATE_CLEAN;
3394   vmfp() = (Cell*)ar;
3395   vmsp() = sp;
3396   vmpc() = curUnit()->at(pcOff);
3397   ASSERT(vmsp() <= vmfp());
3398 }
3399
3400 #define O(opcode, imm, pusph, pop, flags) \
3401 /**
3402  * The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
3403  * calls into the interpreter, and then return a pointer to the
3404  * current ExecutionContext.
3405  */  \
3406 VMExecutionContext*                                                     \
3407 interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) {                 \
3408   interp_set_regs(ar, sp, pcOff);                                       \
3409   SKTRACE(5, SrcKey(curFunc(), vmpc()), "%40s %p %p\n",                 \
3410           "interpOne" #opcode " before (fp,sp)",                        \
3411           vmfp(), vmsp());                                              \
3412   ASSERT(*vmpc() == Op ## opcode);                                      \
3413   VMExecutionContext* ec = g_vmContext;                                 \
3414   Stats::inc(Stats::Instr_InterpOne ## opcode);                         \
3415   INC_TPC(interp_one)                                                   \
3416   /* Correct for over-counting in TC-stats. */                          \
3417   Stats::inc(Stats::Instr_TC, -1);                                      \
3418   ec->op##opcode();                                                     \
3419   /*
3420    * Only set regstate back to dirty if an exception is not
3421    * propagating.  If an exception is throwing, regstate for this call
3422    * is actually still correct, and we don't have information in the
3423    * fixup map for interpOne calls anyway.
3424    */ \
3425   tl_regState = REGSTATE_DIRTY;                                         \
3426   return ec;                                                            \
3427 }
3428
3429 OPCODES
3430 #undef O
3431
3432 void* interpOneEntryPoints[] = {
3433 #define O(opcode, imm, pusph, pop, flags) \
3434   (void*)(interpOne ## opcode),
3435 OPCODES
3436 #undef O
3437 };
3438
3439 void TranslatorX64::fixupWork(VMExecutionContext* ec, ActRec* rbp) const {
3440   ASSERT(RuntimeOption::EvalJit);
3441   ActRec* nextAr = rbp;
3442   do {
3443     rbp = nextAr;
3444     FixupMap::VMRegs regs;
3445     TRACE(10, "considering frame %p, %p\n", rbp, (void*)rbp->m_savedRip);
3446     if (g_vmContext->m_stack.isValidAddress(rbp->m_savedRbp) &&
3447         m_fixupMap.getFrameRegs(rbp, &regs)) {
3448       TRACE(10, "fixup func %s fp %p sp %p pc %p\n",
3449             regs.m_fp->m_func->name()->data(),
3450             regs.m_fp, regs.m_sp, regs.m_pc);
3451       ec->m_fp = const_cast<ActRec*>(regs.m_fp);
3452       ec->m_pc = regs.m_pc;
3453       vmsp() = regs.m_sp;
3454       return;
3455     }
3456     nextAr = (ActRec*)rbp->m_savedRbp;
3457   } while (rbp && rbp != nextAr);
3458   // OK, we've exhausted the entire actRec chain.
3459   // We are only invoking ::fixup() from contexts that were known
3460   // to be called out of the TC, so this cannot happen.
3461   NOT_REACHED();
3462 }
3463
3464 void TranslatorX64::fixup(VMExecutionContext* ec) const {
3465   // Start looking for fixup entries at the current (C++) frame.  This
3466   // will walk the frames upward until we find a TC frame.
3467   ActRec* rbp;
3468   asm volatile("mov %%rbp, %0" : "=r"(rbp));
3469   fixupWork(ec, rbp);
3470 }
3471
3472 void
3473 TranslatorX64::syncWork() {
3474   ASSERT(tl_regState == REGSTATE_DIRTY);
3475   fixup(g_vmContext);
3476   tl_regState = REGSTATE_CLEAN;
3477   Stats::inc(Stats::TC_Sync);
3478 }
3479
3480 void
3481 TranslatorX64::emitInterpOne(const Tracelet& t,
3482                              const NormalizedInstruction& ni) {
3483   // Write any dirty values to memory
3484   m_regMap.cleanAll();
3485   // Call into the appropriate interpOne method. Note that this call will
3486   // preserve the callee-saved registers including rVmFp and rVmSp.
3487   if (false) { /* typecheck */
3488     UNUSED VMExecutionContext* ec = interpOnePopC((ActRec*)vmfp(), vmsp(), 0);
3489   }
3490   void* func = interpOneEntryPoints[ni.op()];
3491   TRACE(3, "ip %p of unit %p -> interpOne @%p\n", ni.pc(), ni.unit(), func);
3492   EMIT_CALL(a, func,
3493              R(rVmFp),
3494              RPLUS(rVmSp, -int32_t(cellsToBytes(ni.stackOff))),
3495              IMM(ni.source.offset()));
3496   // The interpreter may have written to memory, so we need to invalidate
3497   // all locations
3498   m_regMap.reset();
3499   // The interpOne method returned a pointer to the current
3500   // ExecutionContext in rax, so we can read the 'm_*' fields
3501   // by adding the appropriate offset to rax and dereferencing.
3502
3503   // If this instruction ends the tracelet, we have some extra work to do.
3504   if (ni.breaksTracelet) {
3505     // Read the 'm_fp' and 'm_stack.m_top' fields into the rVmFp and
3506     // rVmSp registers.
3507     a.  load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
3508                               rVmFp);
3509     a.  load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
3510                               Stack::topOfStackOffset(), rVmSp);
3511     if (opcodeChangesPC(ni.op())) {
3512       // If interpreting this instruction can potentially set PC to point
3513       // to something other than the next instruction in the bytecode, so
3514       // we need to emit a service request to figure out where to go next
3515       TCA stubDest = emitServiceReq(REQ_RESUME, 0ull);
3516       a.    jmp(stubDest);
3517     } else {
3518       // If this instruction always advances PC to the next instruction in
3519       // the bytecode, then we know what SrcKey to bind to
3520       emitBindJmp(nextSrcKey(t, ni));
3521     }
3522   }
3523 }
3524
3525 // could be static but used in hopt/codegen.cpp
3526 void raiseUndefVariable(StringData* nm) {
3527   raise_notice(Strings::UNDEFINED_VARIABLE, nm->data());
3528   // FIXME: do we need to decref the string if an exception is
3529   // propagating?
3530   if (nm->decRefCount() == 0) { nm->release(); }
3531 }
3532
3533 static TXFlags
3534 planBinaryArithOp(const NormalizedInstruction& i) {
3535   ASSERT(i.inputs.size() == 2);
3536   return nativePlan(i.inputs[0]->isInt() && i.inputs[1]->isInt());
3537 }
3538
3539 void
3540 TranslatorX64::analyzeBinaryArithOp(Tracelet& t, NormalizedInstruction& i) {
3541   i.m_txFlags = planBinaryArithOp(i);
3542 }
3543
3544 void
3545 TranslatorX64::translateBinaryArithOp(const Tracelet& t,
3546                                       const NormalizedInstruction& i) {
3547   const Opcode op = i.op();
3548   ASSERT(op == OpSub || op == OpMul || op == OpBitAnd ||
3549          op == OpBitOr || op == OpBitXor);
3550   ASSERT(planBinaryArithOp(i));
3551   ASSERT(i.inputs.size() == 2);
3552
3553   binaryArithCell(i, op, *i.inputs[0], *i.outStack);
3554 }
3555
3556 static inline bool sameDataTypes(DataType t1, DataType t2) {
3557   return TypeConstraint::equivDataTypes(t1, t2);
3558 }
3559
3560 static TXFlags
3561 planSameOp_SameTypes(const NormalizedInstruction& i) {
3562   ASSERT(i.inputs.size() == 2);
3563   const RuntimeType& left = i.inputs[0]->rtt;
3564   const RuntimeType& right = i.inputs[1]->rtt;
3565   DataType leftType = left.outerType();
3566   DataType rightType = right.outerType();
3567   return nativePlan(sameDataTypes(leftType, rightType) &&
3568                     (left.isNull() || leftType == KindOfBoolean ||
3569                      left.isInt() || left.isString()));
3570 }
3571
3572 static TXFlags
3573 planSameOp_DifferentTypes(const NormalizedInstruction& i) {
3574   ASSERT(i.inputs.size() == 2);
3575   DataType leftType = i.inputs[0]->outerType();
3576   DataType rightType = i.inputs[1]->outerType();
3577   if (!sameDataTypes(leftType, rightType)) {
3578     if (IS_REFCOUNTED_TYPE(leftType) || IS_REFCOUNTED_TYPE(rightType)) {
3579       // For dissimilar datatypes, we might call out to handle a refcount.
3580       return Supported;
3581     }
3582     return Native;
3583   }
3584   return Interp;
3585 }
3586
3587 void
3588 TranslatorX64::analyzeSameOp(Tracelet& t, NormalizedInstruction& i) {
3589   ASSERT(!(planSameOp_SameTypes(i) && planSameOp_DifferentTypes(i)));
3590   i.m_txFlags = TXFlags(planSameOp_SameTypes(i) | planSameOp_DifferentTypes(i));
3591   i.manuallyAllocInputs = true;
3592 }
3593
3594 void
3595 TranslatorX64::translateSameOp(const Tracelet& t,
3596                                const NormalizedInstruction& i) {
3597   const Opcode op = i.op();
3598   ASSERT(op == OpSame || op == OpNSame);
3599   const vector<DynLocation*>& inputs  = i.inputs;
3600   bool instrNeg = (op == OpNSame);
3601   ASSERT(inputs.size() == 2);
3602   ASSERT(i.outStack && !i.outLocal);
3603   DataType leftType = i.inputs[0]->outerType();
3604   DataType rightType DEBUG_ONLY = i.inputs[1]->outerType();
3605   ASSERT(leftType != KindOfRef);
3606   ASSERT(rightType != KindOfRef);
3607
3608   if (planSameOp_DifferentTypes(i)) {
3609     // Some easy cases: when the valueTypes do not match,
3610     // NSame -> true and Same -> false.
3611     SKTRACE(1, i.source, "different types %d %d\n",
3612             leftType, rightType);
3613     emitDecRefInput(a, i, 0);
3614     emitDecRefInput(a, i, 1);
3615     m_regMap.allocOutputRegs(i);
3616     emitImmReg(a, instrNeg, getReg(i.outStack->location));
3617     return; // Done
3618   }
3619
3620   ASSERT(planSameOp_SameTypes(i));
3621
3622   if (IS_NULL_TYPE(leftType)) {
3623     m_regMap.allocOutputRegs(i);
3624     // null === null is always true
3625     SKTRACE(2, i.source, "straightening null/null comparison\n");
3626     emitImmReg(a, !instrNeg, getReg(i.outStack->location));
3627     return; // Done
3628   }
3629   if (IS_STRING_TYPE(leftType)) {
3630     int args[2];
3631     args[0] = 0;
3632     args[1] = 1;
3633     allocInputsForCall(i, args);
3634     EMIT_CALL(a, same_str_str,
3635                V(inputs[0]->location),
3636                V(inputs[1]->location));
3637     if (instrNeg) {
3638       a.  xor_imm32_reg32(1, rax);
3639     }
3640     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3641                   RegInfo::DIRTY);
3642     return; // Done
3643   }
3644   m_regMap.allocInputRegs(i);
3645   PhysReg src, srcdest;
3646   getBinaryStackInputs(m_regMap, i, src, srcdest);
3647   m_regMap.allocOutputRegs(i);
3648   ASSERT(getReg(i.outStack->location) == srcdest);
3649   a.    cmp_reg64_reg64(src, srcdest);
3650   if (op == OpSame) {
3651     a.  sete(srcdest);
3652   } else {
3653     a.  setne(srcdest);
3654   }
3655   a.    mov_reg8_reg64_unsigned(srcdest, srcdest);
3656 }
3657
3658 static bool
3659 trivialEquivType(const RuntimeType& rtt) {
3660   DataType t = rtt.valueType();
3661   return t == KindOfUninit || t == KindOfNull || t == KindOfBoolean ||
3662     rtt.isInt() || rtt.isString();
3663 }
3664
3665 static void
3666 emitConvertToBool(X64Assembler &a, PhysReg src, PhysReg dest, bool instrNeg) {
3667   a.    test_reg64_reg64(src, src);
3668   if (instrNeg) {
3669     a.  setz(dest);
3670   } else {
3671     a.  setnz(dest);
3672   }
3673   a.    mov_reg8_reg64_unsigned(dest, dest);
3674 }
3675
3676 void
3677 TranslatorX64::analyzeEqOp(Tracelet& t, NormalizedInstruction& i) {
3678   ASSERT(i.inputs.size() == 2);
3679   RuntimeType &lt = i.inputs[0]->rtt;
3680   RuntimeType &rt = i.inputs[1]->rtt;
3681   i.m_txFlags = nativePlan(trivialEquivType(lt) &&
3682                            trivialEquivType(rt));
3683   if (i.isNative() &&
3684       IS_NULL_TYPE(lt.outerType()) &&
3685       IS_NULL_TYPE(rt.outerType())) {
3686     i.manuallyAllocInputs = true;
3687   }
3688 }
3689
3690 void
3691 TranslatorX64::translateEqOp(const Tracelet& t,
3692                              const NormalizedInstruction& i) {
3693   const Opcode op = i.op();
3694   ASSERT(op == OpEq || op == OpNeq);
3695   ASSERT(i.isNative());
3696   const vector<DynLocation*>& inputs  = i.inputs;
3697   bool instrNeg = (op == OpNeq);
3698   ASSERT(inputs.size() == 2);
3699   ASSERT(i.outStack && !i.outLocal);
3700   DataType leftType = i.inputs[0]->outerType();
3701   DataType rightType = i.inputs[1]->outerType();
3702   ASSERT(leftType != KindOfRef);
3703   ASSERT(rightType != KindOfRef);
3704
3705   ConditionCode ccBranch = CC_E;
3706   if (instrNeg) ccBranch = ccNegate(ccBranch);
3707
3708   // Inputless case.
3709   if (IS_NULL_TYPE(leftType) && IS_NULL_TYPE(rightType)) {
3710     ASSERT(i.manuallyAllocInputs);
3711     // null == null is always true
3712     bool result = !instrNeg;
3713     SKTRACE(2, i.source, "straightening null/null comparison\n");
3714     if (i.changesPC) {
3715       fuseBranchAfterStaticBool(t, i, result);
3716     } else {
3717       m_regMap.allocOutputRegs(i);
3718       emitImmReg(a, result, getReg(i.outStack->location));
3719     }
3720     return; // Done
3721   }
3722
3723   if (IS_STRING_TYPE(leftType) || IS_STRING_TYPE(rightType)) {
3724     void* fptr = NULL;
3725     bool leftIsString = false;
3726     bool eqNullStr = false;
3727     switch (leftType) {
3728       STRINGCASE(): {
3729         leftIsString = true;
3730         switch (rightType) {
3731           STRINGCASE(): fptr = (void*)eq_str_str; break;
3732           case KindOfInt64: fptr = (void*)eq_int_str; break;
3733           case KindOfBoolean: fptr = (void*)eq_bool_str; break;
3734           NULLCASE(): fptr = (void*)eq_null_str; eqNullStr = true; break;
3735           default: ASSERT(false); break;
3736         }
3737       } break;
3738       case KindOfInt64: fptr = (void*)eq_int_str; break;
3739       case KindOfBoolean: fptr = (void*)eq_bool_str; break;
3740       NULLCASE(): fptr = (void*)eq_null_str; eqNullStr = true; break;
3741       default: ASSERT(false); break;
3742     }
3743     if (eqNullStr) {
3744       ASSERT(fptr == (void*)eq_null_str);
3745       EMIT_CALL(a, fptr,
3746                  V(inputs[leftIsString ? 0 : 1]->location));
3747     } else {
3748       ASSERT(fptr != NULL);
3749       EMIT_CALL(a, fptr,
3750                  V(inputs[leftIsString ? 1 : 0]->location),
3751                  V(inputs[leftIsString ? 0 : 1]->location));
3752     }
3753     if (i.changesPC) {
3754       fuseBranchSync(t, i);
3755       a.   test_imm32_reg32(1, rax);
3756       fuseBranchAfterBool(t, i, ccNegate(ccBranch));
3757       return;
3758     }
3759     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3760                   RegInfo::DIRTY);
3761     if (instrNeg) {
3762       a.  xor_imm32_reg32(1, rax);
3763     }
3764     return;
3765   }
3766
3767   m_regMap.allocOutputRegs(i);
3768   PhysReg src, srcdest;
3769   getBinaryStackInputs(m_regMap, i, src, srcdest);
3770   ASSERT(getReg(i.outStack->location) == srcdest);
3771   if (i.changesPC) {
3772     fuseBranchSync(t, i);
3773   }
3774   if (IS_NULL_TYPE(leftType) || IS_NULL_TYPE(rightType)) {
3775     if (IS_NULL_TYPE(leftType)) {
3776       a.   test_reg64_reg64(srcdest, srcdest);
3777     } else {
3778       ASSERT(IS_NULL_TYPE(rightType));
3779       a.   test_reg64_reg64(src, src);
3780     }
3781   } else if (leftType  == KindOfBoolean ||
3782              rightType == KindOfBoolean) {
3783     // OK to destroy src and srcdest in-place; their stack locations are
3784     // blown away by this instruction.
3785     if (leftType != KindOfBoolean)
3786       emitConvertToBool(a, src, src, false);
3787     if (rightType != KindOfBoolean)
3788       emitConvertToBool(a, srcdest, srcdest, false);
3789     a.   cmp_reg64_reg64(src, srcdest);
3790   } else {
3791     a.   cmp_reg64_reg64(src, srcdest);
3792   }
3793   if (i.changesPC) {
3794     fuseBranchAfterBool(t, i, ccBranch);
3795     return;
3796   }
3797   if (instrNeg) {
3798     a.   setnz          (srcdest);
3799   } else {
3800     a.   setz           (srcdest);
3801   }
3802   a.     mov_reg8_reg64_unsigned(srcdest, srcdest);
3803 }
3804
3805 void
3806 TranslatorX64::analyzeLtGtOp(Tracelet& t, NormalizedInstruction& i) {
3807   ASSERT(i.inputs.size() == 2);
3808   const RuntimeType& left = i.inputs[0]->rtt;
3809   DataType leftType = left.outerType();
3810   DataType rightType = i.inputs[1]->outerType();
3811   i.m_txFlags = nativePlan(sameDataTypes(leftType, rightType) &&
3812                            (left.isNull() ||
3813                             leftType == KindOfBoolean ||
3814                             left.isInt()));
3815   if (i.isNative() && IS_NULL_TYPE(left.outerType())) {
3816     // No inputs. w00t.
3817     i.manuallyAllocInputs = true;
3818   }
3819 }
3820
3821 void
3822 TranslatorX64::translateLtGtOp(const Tracelet& t,
3823                                const NormalizedInstruction& i) {
3824   const Opcode op = i.op();
3825   ASSERT(op == OpLt || op == OpLte || op == OpGt || op == OpGte);
3826   ASSERT(i.inputs.size() == 2);
3827   ASSERT(i.outStack && !i.outLocal);
3828   ASSERT(i.inputs[0]->outerType() != KindOfRef);
3829   ASSERT(i.inputs[1]->outerType() != KindOfRef);
3830   ASSERT(i.isNative());
3831
3832   bool fEquals = (op == OpLte || op == OpGte);
3833   bool fLessThan = (op == OpLt || op == OpLte);
3834
3835   m_regMap.allocOutputRegs(i);
3836   if (IS_NULL_TYPE(i.inputs[0]->outerType())) {
3837     ASSERT(IS_NULL_TYPE(i.inputs[1]->outerType()));
3838     // null < null is always false, null <= null is always true
3839     SKTRACE(2, i.source, "straightening null/null comparison\n");
3840     PhysReg rOut = getReg(i.outStack->location);
3841     bool resultIsTrue = (op == OpLte || op == OpGte);
3842     if (i.changesPC) {
3843       fuseBranchAfterStaticBool(t, i, resultIsTrue);
3844     } else {
3845       emitImmReg(a, resultIsTrue, rOut);
3846     }
3847     return;
3848   }
3849   PhysReg src, srcdest;
3850   getBinaryStackInputs(m_regMap, i, src, srcdest);
3851   ASSERT(getReg(i.outStack->location) == srcdest);
3852   if (i.changesPC) {
3853     fuseBranchSync(t, i);
3854   }
3855   a.       cmp_reg64_reg64(src, srcdest);
3856   static const ConditionCode opToCc[2][2] = {
3857     // !fEquals fEquals
3858     { CC_G,     CC_GE }, // !fLessThan
3859     { CC_L,     CC_LE }, // fLessThan
3860   };
3861   ConditionCode cc = opToCc[fLessThan][fEquals];
3862   if (i.changesPC) {
3863     // Fuse the coming branch.
3864     fuseBranchAfterBool(t, i, cc);
3865     return;
3866   }
3867   a.       setcc(cc, srcdest);
3868   a.       mov_reg8_reg64_unsigned(srcdest, srcdest);
3869 }
3870
3871 static TXFlags
3872 planUnaryBooleanOp(const NormalizedInstruction& i) {
3873   ASSERT(i.inputs.size() == 1);
3874   RuntimeType& rtt = i.inputs[0]->rtt;
3875   DataType inType = rtt.valueType();
3876   if (inType == KindOfArray) {
3877     return Supported;
3878   }
3879   if (rtt.isString()) {
3880     return Simple;
3881   }
3882   return nativePlan(rtt.isNull() ||
3883                     inType == KindOfBoolean || rtt.isInt());
3884 }
3885
3886 void
3887 TranslatorX64::analyzeUnaryBooleanOp(Tracelet& t, NormalizedInstruction& i) {
3888   i.m_txFlags = planUnaryBooleanOp(i);
3889 }
3890
3891 void
3892 TranslatorX64::translateUnaryBooleanOp(const Tracelet& t,
3893                                        const NormalizedInstruction& i) {
3894   const Opcode op = i.op();
3895   ASSERT(op == OpCastBool || op == OpEmptyL);
3896   const vector<DynLocation*>& inputs  = i.inputs;
3897   ASSERT(inputs.size() == 1);
3898   ASSERT(i.outStack && !i.outLocal);
3899   bool instrNeg = (op == OpEmptyL);
3900   DataType inType = inputs[0]->valueType();
3901   const Location& inLoc = inputs[0]->location;
3902   bool boxedForm = (inputs[0]->outerType() == KindOfRef);
3903
3904
3905   switch (inType) {
3906     NULLCASE(): {
3907       m_regMap.allocOutputRegs(i);
3908       PhysReg outReg = getReg(i.outStack->location);
3909       emitImmReg(a, instrNeg, outReg);
3910       ASSERT(i.isNative());
3911     } break;
3912     case KindOfBoolean: {
3913       if (op == OpCastBool) {
3914         // Casting bool to bool is a nop.  CastBool's input must be
3915         // a cell on the stack as per the bytecode specification.
3916         ASSERT(inputs[0]->isStack());
3917         ASSERT(inputs[0]->outerType() != KindOfRef);
3918         ASSERT(inputs[0]->location.space == Location::Stack);
3919         ASSERT(i.isNative());
3920         break;
3921       }
3922       m_regMap.allocOutputRegs(i);
3923       PhysReg reg = getReg(inLoc);
3924       PhysReg outReg = getReg(i.outStack->location);
3925       if (boxedForm) {
3926         emitDeref(a, reg, outReg);
3927       } else {
3928         emitMovRegReg(reg, outReg);
3929       }
3930       if (instrNeg) {
3931         a.  xor_imm32_reg32(1, outReg);
3932       }
3933     } break;
3934     case KindOfInt64: {
3935       m_regMap.allocOutputRegs(i);
3936       PhysReg reg = getReg(inLoc);
3937       PhysReg outReg = getReg(i.outStack->location);
3938       ScratchReg scratch(m_regMap);
3939       if (boxedForm) {
3940         emitDeref(a, reg, *scratch);
3941         emitConvertToBool(a, *scratch, outReg, instrNeg);
3942       } else {
3943         emitConvertToBool(a, reg, outReg, instrNeg);
3944       }
3945     } break;
3946     STRINGCASE():
3947     case KindOfArray: {
3948       bool doDecRef = (inputs[0]->isStack());
3949       void* fptr = IS_STRING_TYPE(inType) ?
3950           (doDecRef ? (void*)str_to_bool : (void*)str0_to_bool) :
3951           (doDecRef ? (void*)arr_to_bool : (void*)arr0_to_bool);
3952       if (boxedForm) {
3953         EMIT_CALL(a, fptr, DEREF(inLoc));
3954       } else {
3955         EMIT_CALL(a, fptr, V(inLoc));
3956       }
3957       if (!IS_STRING_TYPE(inType)) {
3958         recordReentrantCall(i);
3959       }
3960       if (instrNeg) {
3961         a.    xor_imm32_reg32(1, rax);
3962       }
3963       m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
3964                     RegInfo::DIRTY);
3965     } break;
3966     default: {
3967       ASSERT(false);
3968     } break;
3969   }
3970 }
3971
3972 void
3973 TranslatorX64::analyzeBranchOp(Tracelet& t, NormalizedInstruction& i) {
3974   i.m_txFlags = Supported;
3975 }
3976
3977 // Helper for decoding dests of branch-like instructions at the end of
3978 // a basic block.
3979 static void branchDests(const Tracelet& t,
3980                         const NormalizedInstruction& i,
3981                         SrcKey* outTaken, SrcKey* outNotTaken,
3982                         int immIdx = 0) {
3983   *outNotTaken = nextSrcKey(t, i);
3984   int dest = i.imm[immIdx].u_BA;
3985   *outTaken = SrcKey(curFunc(), i.offset() + dest);
3986 }
3987
3988 void TranslatorX64::branchWithFlagsSet(const Tracelet& t,
3989                                        const NormalizedInstruction& i,
3990                                        ConditionCode cc) {
3991   ASSERT(i.op() == OpJmpNZ || i.op() == OpJmpZ);
3992   // not_taken
3993   SrcKey taken, notTaken;
3994   branchDests(t, i, &taken, &notTaken);
3995   TRACE(3, "branchWithFlagsSet %d %d cc%02x jmp%sz\n",
3996         taken.offset(), notTaken.offset(), cc,
3997         i.isJmpNZ() ? "n" : "");
3998   emitCondJmp(taken, notTaken, cc);
3999 }
4000
4001 void TranslatorX64::fuseBranchAfterStaticBool(const Tracelet& t,
4002                                               const NormalizedInstruction& i,
4003                                               bool resultIsTrue) {
4004   ASSERT(i.breaksTracelet);
4005   ASSERT(i.next);
4006   NormalizedInstruction &nexti = *i.next;
4007   fuseBranchSync(t, i);
4008   bool isTaken = (resultIsTrue == nexti.isJmpNZ());
4009   SrcKey taken, notTaken;
4010   branchDests(t, nexti, &taken, &notTaken);
4011   if (isTaken) {
4012     emitBindJmp(taken);
4013   } else {
4014     emitBindJmp(notTaken);
4015   }
4016 }
4017
4018 void TranslatorX64::fuseBranchSync(const Tracelet& t,
4019                                    const NormalizedInstruction& i) {
4020   // Don't bother sync'ing the output of this instruction.
4021   m_regMap.scrubStackEntries(i.outStack->location.offset);
4022   syncOutputs(t);
4023 }
4024
4025 void TranslatorX64::fuseBranchAfterBool(const Tracelet& t,
4026                                         const NormalizedInstruction& i,
4027                                         ConditionCode cc) {
4028   ASSERT(i.breaksTracelet);
4029   ASSERT(i.next);
4030   NormalizedInstruction &nexti = *i.next;
4031   if (!i.next->isJmpNZ()) cc = ccNegate(cc);
4032   branchWithFlagsSet(t, nexti, cc);
4033 }
4034
4035 void
4036 TranslatorX64::translateBranchOp(const Tracelet& t,
4037                                  const NormalizedInstruction& i) {
4038   DEBUG_ONLY const Opcode op = i.op();
4039   ASSERT(op == OpJmpZ || op == OpJmpNZ);
4040
4041   bool isZ = !i.isJmpNZ();
4042   ASSERT(i.inputs.size()  == 1);
4043   ASSERT(!i.outStack && !i.outLocal);
4044   m_regMap.allocOutputRegs(i);
4045   const DynLocation& in = *i.inputs[0];
4046   const RuntimeType& rtt = in.rtt;
4047   const Location& inLoc = in.location;
4048   DataType inputType = rtt.outerType();
4049   PhysReg src = getReg(inLoc);
4050   /*
4051    * Careful. We're operating with intimate knowledge of the
4052    * constraints of the register allocator from here out.
4053    */
4054   if (rtt.isString() || inputType == KindOfArray) {
4055     // str_to_bool and arr_to_bool will decRef for us
4056     void* fptr = IS_STRING_TYPE(inputType) ? (void*)str_to_bool :
4057                                                (void*)arr_to_bool;
4058     EMIT_CALL(a, fptr, V(inLoc));
4059     src = rax;
4060     ScratchReg sr(m_regMap, rax);
4061     syncOutputs(t);
4062   } else if (inputType != KindOfUninit &&
4063              inputType != KindOfNull &&
4064              inputType != KindOfBoolean &&
4065              !rtt.isInt()) {
4066     // input might be in-flight
4067     m_regMap.cleanLoc(inLoc);
4068     // Cast to a bool.
4069     if (false) {
4070       TypedValue *tv = NULL;
4071       int64 ret = tv_to_bool(tv);
4072       if (ret) {
4073         printf("zoot");
4074       }
4075     }
4076     TRACE(2, Trace::prettyNode("tv_to_bool", inLoc) + string("\n"));
4077     // tv_to_bool will decRef for us if appropriate
4078     EMIT_CALL(a, tv_to_bool, A(inLoc));
4079     recordReentrantCall(i);
4080     src = rax;
4081     ScratchReg sr(m_regMap, rax);
4082     syncOutputs(t);
4083   } else {
4084     syncOutputs(t);
4085   }
4086
4087   // not_taken
4088   SrcKey taken, notTaken;
4089   branchDests(t, i, &taken, &notTaken);
4090
4091   // Since null always evaluates to false, we can emit an
4092   // unconditional jump. OpJmpNZ will never take the branch
4093   // while OpJmpZ will always take the branch.
4094   if (IS_NULL_TYPE(inputType)) {
4095     TRACE(1, "branch on Null -> always Z\n");
4096     emitBindJmp(isZ ? taken : notTaken);
4097     return;
4098   }
4099   a.    test_reg64_reg64(src, src);
4100   branchWithFlagsSet(t, i, isZ ? CC_Z : CC_NZ);
4101 }
4102
4103 void
4104 TranslatorX64::analyzeCGetL(Tracelet& t, NormalizedInstruction& i) {
4105   ASSERT(i.inputs.size() == 1);
4106   const RuntimeType& type = i.inputs[0]->rtt;
4107   i.m_txFlags = type.isUninit() ? Supported : Native;
4108 }
4109
4110 void
4111 TranslatorX64::translateCGetL(const Tracelet& t,
4112                               const NormalizedInstruction& i) {
4113   const DEBUG_ONLY Opcode op = i.op();
4114   ASSERT(op == OpFPassL || OpCGetL);
4115   const vector<DynLocation*>& inputs = i.inputs;
4116   ASSERT(inputs.size() == 1);
4117   ASSERT(inputs[0]->isLocal());
4118   DataType outType = i.inputs[0]->valueType();
4119   ASSERT(outType != KindOfInvalid);
4120
4121   // Check for use of an undefined local.
4122   if (inputs[0]->rtt.isUninit()) {
4123     ASSERT(!i.outStack || i.outStack->outerType() == KindOfNull);
4124     outType = KindOfNull;
4125     ASSERT(inputs[0]->location.offset < curFunc()->numLocals());
4126     const StringData* name = local_name(inputs[0]->location);
4127     EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
4128     recordReentrantCall(i);
4129     if (i.outStack) {
4130       m_regMap.allocOutputRegs(i);
4131     }
4132     return;
4133   }
4134
4135   /*
4136    * we can merge a CGetL with a following InstanceOfD
4137    * to avoid the incRef/decRef on the result (see
4138    * analyzeSecondPass).
4139    *
4140    * outStack will be clear in that case.
4141    */
4142   if (!i.outStack) return;
4143   ASSERT(outType == i.outStack->outerType());
4144   m_regMap.allocOutputRegs(i);
4145   if (IS_NULL_TYPE(outType)) return;
4146   PhysReg dest = getReg(i.outStack->location);
4147
4148   if (i.manuallyAllocInputs && !m_regMap.hasReg(inputs[0]->location)) {
4149     fill(inputs[0]->location, dest);
4150   } else {
4151     PhysReg localReg = getReg(inputs[0]->location);
4152     emitMovRegReg(localReg, dest);
4153   }
4154   if (inputs[0]->isVariant()) {
4155     emitDeref(a, dest, dest);
4156   }
4157   ASSERT(outType != KindOfStaticString);
4158   emitIncRef(dest, outType);
4159 }
4160
4161 void
4162 TranslatorX64::analyzeCGetL2(Tracelet& t,
4163                              NormalizedInstruction& ni) {
4164   const int locIdx = 1;
4165   ASSERT(ni.inputs.size() == 2);
4166   ni.m_txFlags = ni.inputs[locIdx]->rtt.isUninit() ? Supported : Native;
4167 }
4168
4169 void
4170 TranslatorX64::translateCGetL2(const Tracelet& t,
4171                                const NormalizedInstruction& ni) {
4172   const int stackIdx = 0;
4173   const int locIdx   = 1;
4174
4175   // Note: even if it's an undefined local we need to move a few
4176   // values around to have outputs end up in the right place.
4177   const bool undefinedLocal = ni.inputs[locIdx]->rtt.isUninit();
4178
4179   if (undefinedLocal) {
4180     ASSERT(ni.outStack->valueType() == KindOfNull);
4181     ASSERT(ni.inputs[locIdx]->location.offset < curFunc()->numLocals());
4182     const StringData* name = local_name(ni.inputs[locIdx]->location);
4183
4184     EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
4185     recordReentrantCall(ni);
4186
4187     m_regMap.allocInputRegs(ni);
4188   }
4189
4190   m_regMap.allocOutputRegs(ni);
4191   const PhysReg stackIn  = getReg(ni.inputs[stackIdx]->location);
4192   const PhysReg localIn  = getReg(ni.inputs[locIdx]->location);
4193   const PhysReg stackOut = getReg(ni.outStack2->location);
4194   ASSERT(ni.inputs[stackIdx]->location.isStack());
4195   ASSERT(ni.inputs[locIdx]->location.isLocal());
4196
4197   /*
4198    * These registers overlap a bit, so we can swap a few bindings to
4199    * avoid a move.
4200    */
4201   ASSERT(stackIn == getReg(ni.outStack->location) && localIn != stackOut);
4202   m_regMap.swapRegisters(stackIn, stackOut);
4203   const PhysReg cellOut = getReg(ni.outStack->location);
4204   ASSERT(cellOut != stackIn);
4205   if (ni.inputs[locIdx]->isVariant()) {
4206     emitDeref(a, localIn, cellOut);
4207   } else if (!undefinedLocal) {
4208     emitMovRegReg(localIn, cellOut);
4209   }
4210   emitIncRef(cellOut, ni.inputs[locIdx]->valueType());
4211 }
4212
4213 void
4214 TranslatorX64::analyzeVGetL(Tracelet& t,
4215                             NormalizedInstruction& i) {
4216   i.m_txFlags = Native;
4217 }
4218
4219 void
4220 TranslatorX64::translateVGetL(const Tracelet& t,
4221                               const NormalizedInstruction& i) {
4222   const DEBUG_ONLY Opcode op = i.op();
4223   ASSERT(op == OpVGetL || op == OpFPassL);
4224   const vector<DynLocation*>& inputs = i.inputs;
4225   ASSERT(inputs.size() == 1);
4226   ASSERT(i.outStack);
4227   ASSERT(inputs[0]->isLocal());
4228   ASSERT(i.outStack->rtt.outerType() == KindOfRef);
4229
4230   PhysReg localReg = getReg(inputs[0]->location);
4231   PhysReg dest;
4232   if (inputs[0]->rtt.outerType() != KindOfRef) {
4233     emitBox(inputs[0]->rtt.outerType(), localReg);
4234     m_regMap.bind(rax, inputs[0]->location, KindOfRef,
4235                   RegInfo::DIRTY);
4236     m_regMap.allocOutputRegs(i);
4237     dest = getReg(i.outStack->location);
4238     emitMovRegReg(rax, dest);
4239   } else {
4240     m_regMap.allocOutputRegs(i);
4241     dest = getReg(i.outStack->location);
4242     emitMovRegReg(localReg, dest);
4243   }
4244   emitIncRef(dest, KindOfRef);
4245 }
4246
4247 void
4248 TranslatorX64::analyzeAssignToLocalOp(Tracelet& t,
4249                                       NormalizedInstruction& ni) {
4250   const int locIdx = 1;
4251   ni.m_txFlags = planHingesOnRefcounting(ni.inputs[locIdx]->outerType());
4252 }
4253
4254 void
4255 TranslatorX64::translateAssignToLocalOp(const Tracelet& t,
4256                                         const NormalizedInstruction& ni) {
4257   const int rhsIdx  = 0;
4258   const int locIdx  = 1;
4259   const Opcode op = ni.op();
4260   ASSERT(op == OpSetL || op == OpBindL);
4261   ASSERT(ni.inputs.size() == 2);
4262   ASSERT((op == OpBindL) ==
4263          (ni.inputs[rhsIdx]->outerType() == KindOfRef));
4264
4265   ASSERT(!ni.outStack || ni.inputs[locIdx]->location != ni.outStack->location);
4266   ASSERT(ni.outLocal);
4267   ASSERT(ni.inputs[locIdx]->location == ni.outLocal->location);
4268   ASSERT(ni.inputs[rhsIdx]->isStack());
4269
4270   m_regMap.allocOutputRegs(ni);
4271   const PhysReg rhsReg        = getReg(ni.inputs[rhsIdx]->location);
4272   const PhysReg localReg      = getReg(ni.outLocal->location);
4273   const DataType oldLocalType = ni.inputs[locIdx]->outerType();
4274   const DataType rhsType      = ni.inputs[rhsIdx]->outerType();
4275   ASSERT(localReg != rhsReg);
4276
4277   LazyScratchReg oldLocalReg(m_regMap);
4278   DataType decRefType;
4279
4280   // For SetL, when the local is boxed, we need to change the
4281   // type/value of the inner cell.  If we're doing BindL, we don't
4282   // want to affect the old inner cell in any case (except to decref
4283   // it).
4284   const bool affectInnerCell = op == OpSetL &&
4285                                oldLocalType == KindOfRef;
4286   if (affectInnerCell) {
4287     ASSERT(rhsType != KindOfRef);
4288
4289     oldLocalReg.alloc();
4290     emitDeref(a, localReg, *oldLocalReg);
4291     emitStoreTypedValue(a, rhsType, rhsReg, 0, localReg);
4292     decRefType = ni.inputs[locIdx]->rtt.innerType();
4293   } else {
4294     /*
4295      * Instead of emitting a mov, just swap the locations these two
4296      * registers are mapped to.
4297      *
4298      * TODO: this might not be the best idea now that the register
4299      * allocator has some awareness about what is a local.  (Maybe we
4300      * should just xchg_reg64_reg64.)
4301      */
4302     m_regMap.swapRegisters(rhsReg, localReg);
4303     decRefType = oldLocalType;
4304   }
4305
4306   // If we're giving stack output, it's important to incref before
4307   // calling a possible destructor, since the destructor could have
4308   // access to the local if it is a var.
4309   if (ni.outStack) {
4310     emitIncRef(rhsReg, rhsType);
4311   } else {
4312     SKTRACE(3, ni.source, "hoisting Pop* into current instr\n");
4313   }
4314
4315   emitDecRef(ni, oldLocalReg.isAllocated() ? *oldLocalReg : localReg,
4316     decRefType);
4317
4318   if (ni.outStack && !IS_NULL_TYPE(ni.outStack->outerType())) {
4319     PhysReg stackReg = getReg(ni.outStack->location);
4320     emitMovRegReg(rhsReg, stackReg);
4321   }
4322 }
4323
4324 static void
4325 planPop(NormalizedInstruction& i) {
4326   if (i.prev && i.prev->outputPredicted) {
4327     i.prev->outputPredicted = false;
4328     i.inputs[0]->rtt = RuntimeType(KindOfInvalid);
4329   }
4330   DataType type = i.inputs[0]->outerType();
4331   i.m_txFlags =
4332     (type == KindOfInvalid || IS_REFCOUNTED_TYPE(type)) ? Supported : Native;
4333   i.manuallyAllocInputs = true;
4334 }
4335
4336 void TranslatorX64::analyzePopC(Tracelet& t, NormalizedInstruction& i) {
4337   planPop(i);
4338 }
4339
4340 void TranslatorX64::analyzePopV(Tracelet& t, NormalizedInstruction& i) {
4341   planPop(i);
4342 }
4343
4344 void TranslatorX64::analyzePopR(Tracelet& t, NormalizedInstruction& i) {
4345   planPop(i);
4346 }
4347
4348 void
4349 TranslatorX64::translatePopC(const Tracelet& t,
4350                              const NormalizedInstruction& i) {
4351   ASSERT(i.inputs.size() == 1);
4352   ASSERT(!i.outStack && !i.outLocal);
4353   if (i.inputs[0]->rtt.isVagueValue()) {
4354     PhysReg base;
4355     int disp;
4356     locToRegDisp(i.inputs[0]->location, &base, &disp);
4357     emitDecRefGeneric(i, base, disp);
4358   } else {
4359     emitDecRefInput(a, i, 0);
4360   }
4361 }
4362
4363 void
4364 TranslatorX64::translatePopV(const Tracelet& t,
4365                              const NormalizedInstruction& i) {
4366   ASSERT(i.inputs[0]->rtt.isVagueValue() ||
4367          i.inputs[0]->isVariant());
4368   translatePopC(t, i);
4369 }
4370
4371 void
4372 TranslatorX64::translatePopR(const Tracelet& t,
4373                              const NormalizedInstruction& i) {
4374   translatePopC(t, i);
4375 }
4376
4377 void
4378 TranslatorX64::translateUnboxR(const Tracelet& t,
4379                                const NormalizedInstruction& i) {
4380   ASSERT(!i.inputs[0]->rtt.isVagueValue());
4381
4382   // If the value on the top of a stack is a var, unbox it and
4383   // leave it on the top of the stack.
4384   if (i.inputs[0]->isVariant()) {
4385     emitUnboxTopOfStack(i);
4386   }
4387 }
4388
4389 void
4390 TranslatorX64::translateNull(const Tracelet& t,
4391                              const NormalizedInstruction& i) {
4392   ASSERT(i.inputs.size() == 0);
4393   ASSERT(!i.outLocal);
4394   if (i.outStack) {
4395     ASSERT(i.outStack->outerType() == KindOfNull);
4396
4397     // We have to mark the output register as dirty to ensure that
4398     // the type gets spilled at the end of the tracelet
4399     m_regMap.allocOutputRegs(i);
4400   }
4401   /* nop */
4402 }
4403
4404 void
4405 TranslatorX64::translateTrue(const Tracelet& t,
4406                              const NormalizedInstruction& i) {
4407   ASSERT(i.inputs.size() == 0);
4408   ASSERT(!i.outLocal);
4409   if (i.outStack) {
4410     m_regMap.allocOutputRegs(i);
4411     PhysReg rdest = getReg(i.outStack->location);
4412     emitImmReg(a, 1, rdest);
4413   }
4414 }
4415
4416 void
4417 TranslatorX64::translateFalse(const Tracelet& t,
4418                               const NormalizedInstruction& i) {
4419   ASSERT(i.inputs.size() == 0);
4420   ASSERT(!i.outLocal);
4421   if (i.outStack) {
4422     m_regMap.allocOutputRegs(i);
4423     PhysReg dest = getReg(i.outStack->location);
4424     emitImmReg(a, false, dest);
4425   }
4426 }
4427
4428 void
4429 TranslatorX64::translateInt(const Tracelet& t,
4430                             const NormalizedInstruction& i) {
4431   ASSERT(i.inputs.size()  == 0);
4432   ASSERT(!i.outLocal);
4433   if (i.outStack) {
4434     ASSERT(i.outStack->isInt());
4435     m_regMap.allocOutputRegs(i);
4436     PhysReg dest = getReg(i.outStack->location);
4437     uint64_t srcImm = i.imm[0].u_I64A;
4438     emitImmReg(a, srcImm, dest);
4439   }
4440 }
4441
4442 void
4443 TranslatorX64::translateString(const Tracelet& t,
4444                                const NormalizedInstruction& i) {
4445   ASSERT(i.inputs.size()  == 0);
4446   ASSERT(!i.outLocal);
4447   if (!i.outStack) return;
4448   ASSERT(Translator::typeIsString(i.outStack->outerType()));
4449   m_regMap.allocOutputRegs(i);
4450   PhysReg dest = getReg(i.outStack->location);
4451   uint64_t srcImm = (uintptr_t)curUnit()->lookupLitstrId(i.imm[0].u_SA);
4452   // XXX: can simplify the lookup here by just fishing it out of the
4453   // output's valueString().
4454   // We are guaranteed that the string is static, so we do not need to
4455   // increment the refcount
4456   ASSERT(((StringData*)srcImm)->isStatic());
4457   SKTRACE(2, i.source, "Litstr %d -> %p \"%s\"\n",
4458       i.imm[0].u_SA, (StringData*)srcImm,
4459       Util::escapeStringForCPP(((StringData*)srcImm)->data()).c_str());
4460   emitImmReg(a, srcImm, dest);
4461 }
4462
4463 void
4464 TranslatorX64::translateArray(const Tracelet& t,
4465                               const NormalizedInstruction& i) {
4466   ASSERT(i.inputs.size() == 0);
4467   ASSERT(!i.outLocal);
4468   if (i.outStack) {
4469     ASSERT(i.outStack->outerType() == KindOfArray);
4470     m_regMap.allocOutputRegs(i);
4471     ArrayData* ad = curUnit()->lookupArrayId(i.imm[0].u_AA);
4472     PhysReg r = getReg(i.outStack->location);
4473     emitImmReg(a, uint64(ad), r);
4474     // We are guaranteed that the array is static, so we do not need to
4475     // increment the refcount
4476     ASSERT(ad->isStatic());
4477   }
4478 }
4479
4480 ArrayData*
4481 HOT_FUNC_VM
4482 newArrayHelper(int capacity) {
4483   ArrayData *a = NEW(HphpArray)(capacity);
4484   a->incRefCount();
4485   TRACE(2, "newArrayHelper: capacity %d\n", capacity);
4486   return a;
4487 }
4488
4489 void
4490 TranslatorX64::translateNewArray(const Tracelet& t,
4491                                  const NormalizedInstruction& i) {
4492   ASSERT(i.inputs.size() == 0);
4493   ASSERT(i.outStack && !i.outLocal);
4494   ASSERT(i.outStack->outerType() == KindOfArray);
4495   int capacity = i.imm[0].u_IVA;
4496   if (capacity == 0) {
4497     m_regMap.allocOutputRegs(i);
4498     PhysReg r = getReg(i.outStack->location);
4499     emitImmReg(a, uint64(HphpArray::GetStaticEmptyArray()), r);
4500     // We are guaranteed that the new array is static, so we do not need to
4501     // increment the refcount
4502     ASSERT(HphpArray::GetStaticEmptyArray()->isStatic());
4503   } else {
4504     // create an empty array with a nonzero capacity
4505     if (false) {
4506       ArrayData* a = newArrayHelper(42);
4507       printf("%p", a); // use ret
4508     }
4509     EMIT_CALL(a, newArrayHelper, IMM(capacity));
4510     m_regMap.bind(rax, i.outStack->location, KindOfArray, RegInfo::DIRTY);
4511   }
4512 }
4513
4514 void TranslatorX64::analyzeNewTuple(Tracelet& t, NormalizedInstruction& i) {
4515   i.m_txFlags = Simple; // the array constructors are not re-entrant.
4516   i.manuallyAllocInputs = true; // all values passed via stack.
4517 }
4518
4519 ArrayData* newTupleHelper(int n, TypedValue* values) {
4520   HphpArray* a = NEW(HphpArray)(n, values);
4521   a->incRefCount();
4522   TRACE(2, "newTupleHelper: size %d\n", n);
4523   return a;
4524 }
4525
4526 void TranslatorX64::translateNewTuple(const Tracelet& t,
4527                                       const NormalizedInstruction& i) {
4528   int arity = i.imm[0].u_IVA;
4529   ASSERT(arity > 0 && i.inputs.size() == unsigned(arity));
4530   ASSERT(i.outStack && !i.outLocal);
4531   for (int j = 0; j < arity; j++) {
4532     ASSERT(i.inputs[j]->outerType() != KindOfRef);
4533     ASSERT(i.inputs[j]->isStack());
4534   }
4535
4536   // We pass the values by address, so we need to sync them back to memory
4537   for (int j = 0; j < arity; j++) {
4538     m_regMap.cleanLoc(i.inputs[j]->location);
4539   }
4540   if (false) {
4541     TypedValue* rhs = 0;
4542     ArrayData* ret = newTupleHelper(arity, rhs);
4543     printf("%p", ret); // use ret
4544   }
4545   EMIT_CALL(a, newTupleHelper, IMM(arity), A(i.inputs[0]->location));
4546   // newTupleHelper returns the up-to-date array pointer in rax.
4547   // Therefore, we can bind rax to the result location and mark it as dirty.
4548   m_regMap.bind(rax, i.inputs[arity-1]->location, KindOfArray, RegInfo::DIRTY);
4549 }
4550
4551 void
4552 TranslatorX64::analyzeNop(Tracelet& t, NormalizedInstruction& i) {
4553   i.m_txFlags = Native;
4554 }
4555
4556 void
4557 TranslatorX64::translateNop(const Tracelet& t,
4558                             const NormalizedInstruction& i) {
4559 }
4560
4561 void
4562 TranslatorX64::analyzeAddElemC(Tracelet& t, NormalizedInstruction& i) {
4563   i.m_txFlags = supportedPlan(i.inputs[2]->outerType() == KindOfArray &&
4564                               (i.inputs[1]->isInt() ||
4565                                i.inputs[1]->isString()));
4566 }
4567
4568 void
4569 TranslatorX64::translateAddElemC(const Tracelet& t,
4570                                  const NormalizedInstruction& i) {
4571   ASSERT(i.outStack && !i.outLocal);
4572   ASSERT(i.inputs.size() >= 3);
4573   const DynLocation& arr = *i.inputs[2];
4574   const DynLocation& key = *i.inputs[1];
4575   const DynLocation& val = *i.inputs[0];
4576   ASSERT(!arr.isVariant()); // not handling variants.
4577   ASSERT(!key.isVariant());
4578   ASSERT(!val.isVariant());
4579
4580   const Location& arrLoc = arr.location;
4581   const Location& keyLoc = key.location;
4582   const Location& valLoc = val.location;
4583
4584   ASSERT(arrLoc.isStack());
4585   ASSERT(keyLoc.isStack());
4586   ASSERT(arrLoc.isStack());
4587
4588   // If either the key or the rhs is not Int64, we will need to pass the
4589   // rhs by address, so we need to sync it back to memory
4590   if (!key.rtt.isInt() || !val.rtt.isInt()) {
4591     m_regMap.cleanLoc(valLoc);
4592   }
4593
4594   // The array_setm helpers will decRef any old value that is
4595   // overwritten if appropriate. If copy-on-write occurs, it will also
4596   // incRef the new array and decRef the old array for us. Finally,
4597   // some of the array_setm helpers will decRef the key if it is a
4598   // string (for cases where the key is not a local), while others do
4599   // not (for cases where the key is a local).
4600   void* fptr;
4601   if (key.rtt.isInt() && val.rtt.isInt()) {
4602     if (false) { // type-check
4603       TypedValue* cell = NULL;
4604       ArrayData* arr = NULL;
4605       ArrayData* ret = array_setm_ik1_iv(cell, arr, 12, 3);
4606       printf("%p", ret); // use ret
4607     }
4608     // If the rhs is Int64, we can use a specialized helper
4609     EMIT_CALL(a, array_setm_ik1_iv,
4610                IMM(0),
4611                V(arrLoc),
4612                V(keyLoc),
4613                V(valLoc));
4614     recordReentrantCall(i);
4615   } else if (key.rtt.isInt() || key.rtt.isString()) {
4616     if (false) { // type-check
4617       TypedValue* cell = NULL;
4618       TypedValue* rhs = NULL;
4619       StringData* strkey = NULL;
4620       ArrayData* arr = NULL;
4621       ArrayData* ret;
4622       ret = array_setm_ik1_v0(cell, arr, 12, rhs);
4623       printf("%p", ret); // use ret
4624       ret = array_setm_sk1_v0(cell, arr, strkey, rhs);
4625       printf("%p", ret); // use ret
4626     }
4627     // Otherwise, we pass the rhs by address
4628     fptr = key.rtt.isString() ? (void*)array_setm_sk1_v0 :
4629       (void*)array_setm_ik1_v0;
4630     EMIT_CALL(a, fptr,
4631                IMM(0),
4632                V(arrLoc),
4633                V(keyLoc),
4634                A(valLoc));
4635     recordReentrantCall(i);
4636   } else {
4637     ASSERT(false);
4638   }
4639   // The array value may have changed, so we need to invalidate any
4640   // register we have associated with arrLoc
4641   m_regMap.invalidate(arrLoc);
4642   // The array_setm helper returns the up-to-date array pointer in rax.
4643   // Therefore, we can bind rax to arrLoc and mark it as dirty.
4644   m_regMap.bind(rax, arrLoc, KindOfArray, RegInfo::DIRTY);
4645 }
4646
4647 void
4648 TranslatorX64::analyzeAddNewElemC(Tracelet& t, NormalizedInstruction& i) {
4649   ASSERT(i.inputs.size() == 2);
4650   i.m_txFlags = supportedPlan(i.inputs[1]->outerType() == KindOfArray);
4651 }
4652
4653 void
4654 TranslatorX64::translateAddNewElemC(const Tracelet& t,
4655                                          const NormalizedInstruction& i) {
4656   ASSERT(i.inputs.size() == 2);
4657   ASSERT(i.outStack && !i.outLocal);
4658   ASSERT(i.inputs[0]->outerType() != KindOfRef);
4659   ASSERT(i.inputs[1]->outerType() != KindOfRef);
4660   ASSERT(i.inputs[0]->isStack());
4661   ASSERT(i.inputs[1]->isStack());
4662
4663   Location arrLoc = i.inputs[1]->location;
4664   Location valLoc = i.inputs[0]->location;
4665
4666   // We pass the rhs by address, so we need to sync it back to memory
4667   m_regMap.cleanLoc(valLoc);
4668
4669   // The array_setm helpers will decRef any old value that is
4670   // overwritten if appropriate. If copy-on-write occurs, it will also
4671   // incRef the new array and decRef the old array for us. Finally,
4672   // some of the array_setm helpers will decRef the key if it is a
4673   // string (for cases where the key is not a local), while others do
4674   // not (for cases where the key is a local).
4675   if (false) { // type-check
4676     TypedValue* cell = NULL;
4677     TypedValue* rhs = NULL;
4678     ArrayData* arr = NULL;
4679     ArrayData* ret;
4680     ret = array_setm_wk1_v0(cell, arr, rhs);
4681     printf("%p", ret); // use ret
4682   }
4683   EMIT_CALL(a, array_setm_wk1_v0,
4684              IMM(0),
4685              V(arrLoc),
4686              A(valLoc));
4687   recordReentrantCall(i);
4688   // The array value may have changed, so we need to invalidate any
4689   // register we have associated with arrLoc
4690   m_regMap.invalidate(arrLoc);
4691   // The array_setm helper returns the up-to-date array pointer in rax.
4692   // Therefore, we can bind rax to arrLoc and mark it as dirty.
4693   m_regMap.bind(rax, arrLoc, KindOfArray, RegInfo::DIRTY);
4694 }
4695
4696 static void undefCns(const StringData* nm) {
4697   VMRegAnchor _;
4698   TypedValue *cns = g_vmContext->getCns(const_cast<StringData*>(nm));
4699   if (!cns) {
4700     raise_notice(Strings::UNDEFINED_CONSTANT, nm->data(), nm->data());
4701     g_vmContext->getStack().pushStringNoRc(const_cast<StringData*>(nm));
4702   } else {
4703     Cell* c1 = g_vmContext->getStack().allocC();
4704     TV_READ_CELL(cns, c1);
4705   }
4706 }
4707
4708 void TranslatorX64::emitSideExit(Asm& a, const NormalizedInstruction& i,
4709                                  bool next) {
4710   const NormalizedInstruction& dest = next ? *i.next : i;
4711
4712   SKTRACE(3, i.source, "sideexit check %p\n", a.code.frontier);
4713   // NB: if next == true, we are assuming here that stack elements
4714   // spit out by this instruction are already clean and sync'd back to
4715   // the top slot of the stack.
4716   m_regMap.scrubStackEntries(dest.stackOff);
4717   m_regMap.cleanAll();
4718   emitRB(a, RBTypeSideExit, i.source);
4719   int stackDisp = dest.stackOff;
4720   if (stackDisp != 0) {
4721     SKTRACE(3, i.source, "stack bump %d => %x\n", stackDisp,
4722             -cellsToBytes(stackDisp));
4723     a.   add_imm32_reg64(-cellsToBytes(stackDisp), rVmSp);
4724   }
4725   emitBindJmp(a, dest.source, REQ_BIND_SIDE_EXIT);
4726 }
4727
4728 void
4729 TranslatorX64::translateCns(const Tracelet& t,
4730                             const NormalizedInstruction& i) {
4731   ASSERT(i.inputs.size() == 0);
4732   ASSERT(i.outStack && !i.outLocal);
4733
4734   // OK to burn "name" into TC: it was merged into the static string
4735   // table, so as long as this code is reachable, so shoud the string
4736   // be.
4737   DataType outType = i.outStack->valueType();
4738   StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4739   const TypedValue* tv = g_vmContext->getCns(name, true, false);
4740   bool checkDefined = false;
4741   if (outType != KindOfInvalid && tv == NULL &&
4742       !RuntimeOption::RepoAuthoritative) {
4743     PreConstDepMap::accessor acc;
4744     tv = findUniquePreConst(acc, name);
4745     if (tv != NULL) {
4746       checkDefined = true;
4747       acc->second.srcKeys.insert(t.m_sk);
4748       Stats::emitInc(a, Stats::Tx64_CnsFast);
4749     } else {
4750       // We had a unique value while analyzing but don't anymore. This
4751       // should be rare so just punt to keep things simple.
4752       punt();
4753     }
4754   }
4755   using namespace TargetCache;
4756   if (tv && tvIsStatic(tv)) {
4757     m_regMap.allocOutputRegs(i);
4758     if (checkDefined) {
4759       size_t bit = allocCnsBit(name);
4760       uint32 mask;
4761       CacheHandle ch = bitOffToHandleAndMask(bit, mask);
4762       // The 'test' instruction takes a signed immediate and the mask is
4763       // unsigned, but everything works out okay because the immediate is
4764       // the same size as the other operand. However, we have to sign-extend
4765       // the mask to 64 bits to make the assembler happy.
4766       int64_t imm = (int64_t)(int32)mask;
4767       a.test_imm32_disp_reg32(imm, ch, rVmTl);
4768       {
4769         // If we get to the optimistic translation and the constant
4770         // isn't defined, our tracelet is ruined because the type may
4771         // not be what we expect. If we were expecting KindOfString we
4772         // could theoretically keep going here since that's the type
4773         // of an undefined constant expression, but it should be rare
4774         // enough that it's not worth the complexity.
4775         UnlikelyIfBlock<CC_Z> ifZero(a, astubs);
4776         Stats::emitInc(astubs, Stats::Tx64_CnsFast, -1);
4777         emitSideExit(astubs, i, false);
4778       }
4779     }
4780     // Its type and value are known at compile-time.
4781     ASSERT(tv->m_type == outType ||
4782            (IS_STRING_TYPE(tv->m_type) && IS_STRING_TYPE(outType)));
4783     PhysReg r = getReg(i.outStack->location);
4784     a.   mov_imm64_reg(tv->m_data.num, r);
4785     // tv is static; no need to incref
4786     return;
4787   }
4788
4789   Stats::emitInc(a, Stats::Tx64_CnsSlow);
4790   CacheHandle ch = allocConstant(name);
4791   TRACE(2, "Cns: %s -> ch %ld\n", name->data(), ch);
4792   // Load the constant out of the thread-private tl_targetCaches.
4793   ScratchReg cns(m_regMap);
4794   a.    lea_reg64_disp_reg64(rVmTl, ch, *cns);
4795   a.    cmp_imm32_disp_reg32(0, TVOFF(m_type), *cns);
4796   DiamondReturn astubsRet;
4797   int stackDest = 0 - int(sizeof(Cell)); // popped - pushed
4798   {
4799     // It's tempting to dedup these, but not obvious we really can;
4800     // at least stackDest and tmp are specific to the translation
4801     // context.
4802     UnlikelyIfBlock<CC_Z> ifb(a, astubs, &astubsRet);
4803     EMIT_CALL(astubs, undefCns, IMM((uintptr_t)name));
4804     recordReentrantStubCall(i);
4805     m_regMap.invalidate(i.outStack->location);
4806   }
4807
4808   // Bitwise copy to output area.
4809   emitCopyToStack(a, i, *cns, stackDest);
4810   m_regMap.invalidate(i.outStack->location);
4811 }
4812
4813 void
4814 TranslatorX64::analyzeDefCns(Tracelet& t,
4815                              NormalizedInstruction& i) {
4816   StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4817   /* don't bother to translate if it names a builtin constant */
4818   i.m_txFlags = supportedPlan(!g_vmContext->getCns(name, true, false));
4819 }
4820
4821 typedef void (*defCnsHelper_func_t)(TargetCache::CacheHandle ch, Variant *inout,
4822                                     StringData *name, size_t bit);
4823 template<bool setBit>
4824 static void defCnsHelper(TargetCache::CacheHandle ch, Variant *inout,
4825                          StringData *name, size_t bit) {
4826   using namespace TargetCache;
4827   TypedValue *tv = (TypedValue*)handleToPtr(ch);
4828   if (LIKELY(tv->m_type == KindOfUninit &&
4829              inout->isAllowedAsConstantValue())) {
4830     inout->setEvalScalar();
4831     if (LIKELY(g_vmContext->insertCns(name, (TypedValue*)inout))) {
4832       tvDup((TypedValue*)inout, tv);
4833       *inout = true;
4834       if (setBit) {
4835         DEBUG_ONLY bool alreadyDefined = testAndSetBit(bit);
4836         ASSERT(!alreadyDefined);
4837       }
4838       return;
4839     }
4840     tv = (TypedValue*)&false_varNR;
4841   }
4842
4843   if (tv->m_type != KindOfUninit) {
4844     raise_warning(Strings::CONSTANT_ALREADY_DEFINED, name->data());
4845   } else {
4846     ASSERT(!inout->isAllowedAsConstantValue());
4847     raise_warning(Strings::CONSTANTS_MUST_BE_SCALAR);
4848   }
4849   *inout = false;
4850 }
4851
4852 void
4853 TranslatorX64::translateDefCns(const Tracelet& t,
4854                                const NormalizedInstruction& i) {
4855   StringData* name = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4856
4857   if (false) {
4858     TargetCache::CacheHandle ch = 0;
4859     size_t bit = 0;
4860     Variant *inout = 0;
4861     StringData *name = 0;
4862     defCnsHelper<true>(ch, inout, name, bit);
4863     defCnsHelper<false>(ch, inout, name, bit);
4864   }
4865
4866   using namespace TargetCache;
4867   CacheHandle ch = allocConstant(name);
4868   TRACE(2, "DefCns: %s -> ch %ld\n", name->data(), ch);
4869
4870   m_regMap.cleanLoc(i.inputs[0]->location);
4871   if (RuntimeOption::RepoAuthoritative) {
4872     EMIT_CALL(a, (defCnsHelper_func_t)defCnsHelper<false>,
4873                IMM(ch), A(i.inputs[0]->location),
4874                IMM((uint64)name));
4875   } else {
4876     EMIT_CALL(a, (defCnsHelper_func_t)defCnsHelper<true>,
4877                IMM(ch), A(i.inputs[0]->location),
4878                IMM((uint64)name), IMM(allocCnsBit(name)));
4879   }
4880   recordReentrantCall(i);
4881   m_regMap.invalidate(i.outStack->location);
4882 }
4883
4884 void
4885 TranslatorX64::translateClsCnsD(const Tracelet& t,
4886                                 const NormalizedInstruction& i) {
4887   using namespace TargetCache;
4888   const NamedEntityPair& namedEntityPair =
4889     curUnit()->lookupNamedEntityPairId(i.imm[1].u_SA);
4890   ASSERT(namedEntityPair.second);
4891   const StringData *clsName = namedEntityPair.first;
4892   ASSERT(clsName->isStatic());
4893   StringData* cnsName = curUnit()->lookupLitstrId(i.imm[0].u_SA);
4894   ASSERT(cnsName->isStatic());
4895   StringData* fullName = StringData::GetStaticString(
4896     Util::toLower(clsName->data()) + "::" + cnsName->data());
4897
4898   Stats::emitInc(a, Stats::TgtCache_ClsCnsHit);
4899   CacheHandle ch = allocClassConstant(fullName);
4900   ScratchReg cns(m_regMap);
4901   a.lea_reg64_disp_reg64(rVmTl, ch, *cns);
4902   a.cmp_imm32_disp_reg32(0, TVOFF(m_type), *cns);
4903   {
4904     UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
4905
4906     if (false) { // typecheck
4907       TypedValue* tv = NULL;
4908       UNUSED TypedValue* ret =
4909         TargetCache::lookupClassConstant(tv, namedEntityPair.second,
4910                                          namedEntityPair.first, cnsName);
4911     }
4912
4913     EMIT_CALL(astubs, TCA(TargetCache::lookupClassConstant),
4914               R(*cns),
4915               IMM(uintptr_t(namedEntityPair.second)),
4916               IMM(uintptr_t(namedEntityPair.first)),
4917               IMM(uintptr_t(cnsName)));
4918     recordReentrantStubCall(i);
4919     // DiamondGuard will restore cns's SCRATCH state but not its
4920     // contents. lookupClassConstant returns the value we want.
4921     emitMovRegReg(astubs, rax, *cns);
4922   }
4923   int stackDest = 0 - int(sizeof(Cell)); // 0 popped - 1 pushed
4924   emitCopyToStack(a, i, *cns, stackDest);
4925 }
4926
4927 void
4928 TranslatorX64::analyzeConcat(Tracelet& t, NormalizedInstruction& i) {
4929   ASSERT(i.inputs.size() == 2);
4930   const RuntimeType& r = i.inputs[0]->rtt;
4931   const RuntimeType& l = i.inputs[1]->rtt;
4932   // The concat translation isn't reentrant; objects that override
4933   // __toString() can cause reentry.
4934   i.m_txFlags = simplePlan(r.valueType() != KindOfObject &&
4935                            l.valueType() != KindOfObject);
4936 }
4937
4938 void
4939 TranslatorX64::translateConcat(const Tracelet& t,
4940                                const NormalizedInstruction& i) {
4941   ASSERT(i.inputs.size() == 2);
4942   const DynLocation& r = *i.inputs[0];
4943   const DynLocation& l = *i.inputs[1];
4944   // We have specialized helpers for concatenating two strings, a
4945   // string and an int, and an int an a string.
4946   void* fptr = NULL;
4947   if (l.rtt.isString() && r.rtt.isString()) {
4948     fptr = (void*)concat_ss;
4949   } else if (l.rtt.isString() && r.rtt.isInt()) {
4950     fptr = (void*)concat_si;
4951   } else if (l.rtt.isInt() && r.rtt.isString()) {
4952     fptr = (void*)concat_is;
4953   }
4954   if (fptr) {
4955     // If we have a specialized helper, use it
4956     if (false) { // type check
4957       StringData* v1 = NULL;
4958       StringData* v2 = NULL;
4959       StringData* retval = concat_ss(v1, v2);
4960       printf("%p", retval); // use retval
4961     }
4962
4963     // The concat helper will decRef the inputs and incRef the output
4964     // for us if appropriate
4965     EMIT_CALL(a, fptr,
4966                V(l.location),
4967                V(r.location));
4968     ASSERT(i.outStack->rtt.isString());
4969     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4970                   RegInfo::DIRTY);
4971
4972   } else {
4973     // Otherwise, use the generic concat helper
4974     if (false) { // type check
4975       uint64_t v1 = 0, v2 = 0;
4976       DataType t1 = KindOfUninit, t2 = KindOfUninit;
4977       StringData *retval = concat(t1, v1, t2, v2);
4978       printf("%p", retval); // use retval
4979     }
4980     // concat will decRef the two inputs and incRef the output
4981     // for us if appropriate
4982     EMIT_CALL(a, concat,
4983                IMM(l.valueType()), V(l.location),
4984                IMM(r.valueType()), V(r.location));
4985     ASSERT(i.outStack->isString());
4986     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
4987                   RegInfo::DIRTY);
4988   }
4989 }
4990
4991 TXFlags
4992 planInstrAdd_Int(const NormalizedInstruction& i) {
4993   ASSERT(i.inputs.size() == 2);
4994   return nativePlan(i.inputs[0]->isInt() && i.inputs[1]->isInt());
4995 }
4996
4997 TXFlags
4998 planInstrAdd_Array(const NormalizedInstruction& i) {
4999   ASSERT(i.inputs.size() == 2);
5000   return supportedPlan(i.inputs[0]->valueType() == KindOfArray &&
5001                        i.inputs[1]->valueType() == KindOfArray);
5002 }
5003
5004 void
5005 TranslatorX64::analyzeAdd(Tracelet& t, NormalizedInstruction& i) {
5006   i.m_txFlags = TXFlags(planInstrAdd_Int(i) | planInstrAdd_Array(i));
5007 }
5008
5009 void
5010 TranslatorX64::translateAdd(const Tracelet& t,
5011                             const NormalizedInstruction& i) {
5012   ASSERT(i.inputs.size() == 2);
5013
5014   if (planInstrAdd_Array(i)) {
5015     // Handle adding two arrays
5016     ASSERT(i.outStack->outerType() == KindOfArray);
5017     if (false) { // type check
5018       ArrayData* v = NULL;
5019       v = array_add(v, v);
5020     }
5021     // The array_add helper will decRef the inputs and incRef the output
5022     // for us if appropriate
5023     EMIT_CALL(a, array_add,
5024                V(i.inputs[1]->location),
5025                V(i.inputs[0]->location));
5026     recordReentrantCall(i);
5027     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5028                   RegInfo::DIRTY);
5029     return;
5030   }
5031
5032   ASSERT(planInstrAdd_Int(i));
5033   binaryArithCell(i, OpAdd, *i.inputs[0], *i.outStack);
5034 }
5035
5036 void
5037 TranslatorX64::analyzeXor(Tracelet& t, NormalizedInstruction& i) {
5038   i.m_txFlags = nativePlan((i.inputs[0]->outerType() == KindOfBoolean ||
5039                             i.inputs[0]->isInt()) &&
5040                            (i.inputs[1]->outerType() == KindOfBoolean ||
5041                             i.inputs[1]->isInt()));
5042 }
5043
5044 static inline void
5045 emitIntToCCBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch,
5046                 int CC) {
5047   /*
5048    *    test    %srcdest, %srcdest
5049    *    set<CC> %scratchL
5050    *    movzbq  %scratchL, %srcdest
5051    */
5052   a.   test_reg64_reg64(srcdest, srcdest);
5053   a.   setcc           (CC, scratch);
5054   a.   mov_reg8_reg64_unsigned(scratch, srcdest);
5055 }
5056
5057 static inline void
5058 emitIntToBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch) {
5059   emitIntToCCBool(a, srcdest, scratch, CC_NZ);
5060 }
5061
5062 static inline void
5063 emitIntToNegBool(X64Assembler &a, PhysReg srcdest, PhysReg scratch) {
5064   emitIntToCCBool(a, srcdest, scratch, CC_Z);
5065 }
5066
5067 void
5068 TranslatorX64::translateXor(const Tracelet& t,
5069                             const NormalizedInstruction& i) {
5070   PhysReg src, srcdest;
5071   getBinaryStackInputs(m_regMap, i, src, srcdest);
5072   m_regMap.allocOutputRegs(i);
5073   ScratchReg scr(m_regMap);
5074   if (i.inputs[0]->isInt()) {
5075     emitIntToBool(a, src, *scr);
5076   }
5077   if (i.inputs[1]->isInt()) {
5078     emitIntToBool(a, srcdest, *scr);
5079   }
5080   a.    xor_reg64_reg64(src, srcdest);
5081 }
5082
5083 void
5084 TranslatorX64::analyzeNot(Tracelet& t, NormalizedInstruction& i) {
5085   ASSERT(i.inputs.size() == 1);
5086   i.m_txFlags = nativePlan(i.inputs[0]->isInt() ||
5087                            i.inputs[0]->outerType() == KindOfBoolean);
5088 }
5089
5090 void
5091 TranslatorX64::translateNot(const Tracelet& t,
5092                             const NormalizedInstruction& i) {
5093   ASSERT(i.isNative());
5094   ASSERT(i.outStack && !i.outLocal);
5095   ASSERT(!i.inputs[0]->isVariant());
5096   m_regMap.allocOutputRegs(i);
5097   PhysReg srcdest = m_regMap.getReg(i.outStack->location);
5098   ScratchReg scr(m_regMap);
5099   emitIntToNegBool(a, srcdest, *scr);
5100 }
5101
5102 void
5103 TranslatorX64::analyzeBitNot(Tracelet& t, NormalizedInstruction& i) {
5104   i.m_txFlags = nativePlan(i.inputs[0]->isInt());
5105 }
5106
5107 void
5108 TranslatorX64::translateBitNot(const Tracelet& t,
5109                                     const NormalizedInstruction& i) {
5110   ASSERT(i.outStack && !i.outLocal);
5111   m_regMap.allocOutputRegs(i);
5112   PhysReg srcdest = m_regMap.getReg(i.outStack->location);
5113   a.   not_reg64(srcdest);
5114 }
5115
5116 void
5117 TranslatorX64::analyzeCastInt(Tracelet& t, NormalizedInstruction& i) {
5118   i.m_txFlags = nativePlan(i.inputs[0]->isInt());
5119 }
5120
5121 void
5122 TranslatorX64::translateCastInt(const Tracelet& t,
5123                                 const NormalizedInstruction& i) {
5124   ASSERT(i.inputs.size() == 1);
5125   ASSERT(i.outStack && !i.outLocal);
5126
5127   /* nop */
5128 }
5129
5130 void
5131 TranslatorX64::analyzeCastString(Tracelet& t, NormalizedInstruction& i) {
5132   i.m_txFlags =
5133     i.inputs[0]->isArray() || i.inputs[0]->isObject() ? Supported :
5134     i.inputs[0]->isInt() ? Simple :
5135     Native;
5136   i.funcd = NULL;
5137 }
5138
5139 static void toStringError(StringData *cls) {
5140   raise_error("Method __toString() must return a string value");
5141 }
5142
5143 static const StringData* stringDataFromInt(int64 n) {
5144   StringData* s = buildStringData(n);
5145   s->incRefCount();
5146   return s;
5147 }
5148
5149 static const StringData* stringDataFromDouble(int64 n) {
5150   StringData* s = buildStringData(*(double*)&n);
5151   s->incRefCount();
5152   return s;
5153 }
5154
5155 void TranslatorX64::toStringHelper(ObjectData *obj) {
5156   // caller must set r15 to the new ActRec
5157   static_assert(rStashedAR == r15 &&
5158                 rVmFp == rbp,
5159                 "toStringHelper needs to be updated for ABI changes");
5160   register ActRec *ar asm("r15");
5161   register ActRec *rbp asm("rbp");
5162
5163   const Class* cls = obj->getVMClass();
5164   const Func* toString = cls->getToString();
5165   if (!toString) {
5166     // the unwinder will restore rVmSp to
5167     // &ar->m_r, so we'd better make sure its
5168     // got a valid TypedValue there.
5169     TV_WRITE_UNINIT(&ar->m_r);
5170     std::string msg = cls->preClass()->name()->data();
5171     msg += "::__toString() was not defined";
5172     throw BadTypeConversionException(msg.c_str());
5173   }
5174   // ar->m_savedRbp set by caller
5175   ar->m_savedRip = rbp->m_savedRip;
5176   ar->m_func = toString;
5177   // ar->m_soff set by caller
5178   ar->initNumArgs(0);
5179   ar->setThis(obj);
5180   ar->setVarEnv(0);
5181   // Point the return address of this C++ function at the prolog to
5182   // execute.
5183   rbp->m_savedRip = (uint64_t)toString->getPrologue(0);
5184 }
5185
5186 void
5187 TranslatorX64::translateCastString(const Tracelet& t,
5188                                    const NormalizedInstruction& i) {
5189   ASSERT(i.inputs.size() == 1);
5190   ASSERT(i.outStack && !i.outLocal);
5191
5192   if (i.inputs[0]->isNull()) {
5193     m_regMap.allocOutputRegs(i);
5194     PhysReg dest = m_regMap.getReg(i.outStack->location);
5195     a.   mov_imm64_reg((uint64)empty_string.get(), dest);
5196   } else if (i.inputs[0]->isBoolean()) {
5197     static StringData* s_1 = StringData::GetStaticString("1");
5198     m_regMap.allocOutputRegs(i);
5199     PhysReg dest = m_regMap.getReg(i.outStack->location);
5200     a.   cmp_imm32_reg64(0, dest);
5201     a.   mov_imm64_reg((uint64)empty_string.get(), dest);
5202     ScratchReg scratch(m_regMap);
5203     a.   mov_imm64_reg((intptr_t)s_1, *scratch);
5204     a.   cmov_reg64_reg64(CC_NZ, *scratch, dest);
5205   } else if (i.inputs[0]->isInt()) {
5206     EMIT_CALL(a, stringDataFromInt, V(i.inputs[0]->location));
5207     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5208                   RegInfo::DIRTY);
5209   } else if (i.inputs[0]->isDouble()) {
5210     EMIT_CALL(a, stringDataFromDouble, V(i.inputs[0]->location));
5211     m_regMap.bind(rax, i.outStack->location, i.outStack->outerType(),
5212                   RegInfo::DIRTY);
5213   } else if (i.inputs[0]->isString()) {
5214     // nop
5215   } else if (i.inputs[0]->isArray()) {
5216     static StringData* s_array = StringData::GetStaticString("Array");
5217     m_regMap.allocOutputRegs(i);
5218     PhysReg dest = m_regMap.getReg(i.outStack->location);
5219     emitDecRef(i, dest, KindOfArray);
5220     a.   mov_imm64_reg((uint64)s_array, dest);
5221   } else if (i.inputs[0]->isObject()) {
5222     m_regMap.scrubStackEntries(i.stackOff - 1);
5223     m_regMap.cleanAll();
5224     int delta = i.stackOff + kNumActRecCells - 1;
5225     if (delta) {
5226       a. add_imm64_reg64(-cellsToBytes(delta), rVmSp);
5227     }
5228     a.   store_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmSp);
5229     a.   store_imm32_disp_reg(nextSrcKey(t, i).offset() - curFunc()->base(),
5230                               AROFF(m_soff), rVmSp);
5231     PhysReg obj = m_regMap.getReg(i.inputs[0]->location);
5232     if (obj != argNumToRegName[0]) {
5233       a. mov_reg64_reg64(obj,  argNumToRegName[0]);
5234     }
5235     m_regMap.smashRegs(kAllRegs);
5236     a.   mov_reg64_reg64(rVmSp, rStashedAR);
5237     EMIT_CALL(a, TCA(toStringHelper));
5238     recordReentrantCall(i);
5239     if (i.stackOff != 0) {
5240       a. add_imm64_reg64(cellsToBytes(i.stackOff), rVmSp);
5241     }
5242
5243     PhysReg base;
5244     int disp;
5245     locToRegDisp(i.outStack->location, &base, &disp);
5246     ScratchReg scratch(m_regMap);
5247     emitStringCheck(a, base, disp + TVOFF(m_type), *scratch);
5248     {
5249       UnlikelyIfBlock<CC_NZ> ifNotString(a, astubs);
5250       EMIT_CALL(astubs, toStringError, IMM(0));
5251       recordReentrantStubCall(i);
5252     }
5253   } else {
5254     NOT_REACHED();
5255   }
5256 }
5257
5258 void
5259 TranslatorX64::analyzePrint(Tracelet& t, NormalizedInstruction& i) {
5260   ASSERT(i.inputs.size() == 1);
5261   const RuntimeType& rtt = i.inputs[0]->rtt;
5262   DataType type = rtt.outerType();
5263   i.m_txFlags = simplePlan(
5264     type == KindOfUninit ||
5265     type == KindOfNull ||
5266     type == KindOfBoolean ||
5267     rtt.isInt() ||
5268     rtt.isString());
5269 }
5270
5271 void
5272 TranslatorX64::translatePrint(const Tracelet& t,
5273                               const NormalizedInstruction& i) {
5274   const vector<DynLocation*>& inputs  = i.inputs;
5275   ASSERT(inputs.size()   == 1);
5276   ASSERT(!i.outLocal);
5277   ASSERT(!i.outStack || i.outStack->isInt());
5278   Location  loc = inputs[0]->location;
5279   DataType type = inputs[0]->outerType();
5280   switch (type) {
5281     STRINGCASE():       EMIT_CALL(a, print_string,  V(loc)); break;
5282     case KindOfInt64:   EMIT_CALL(a, print_int,     V(loc)); break;
5283     case KindOfBoolean: EMIT_CALL(a, print_boolean, V(loc)); break;
5284     NULLCASE():         /* do nothing */                   break;
5285     default: {
5286       // Translation is only supported for Null, Boolean, Int, and String
5287       ASSERT(false);
5288       break;
5289     }
5290   }
5291   m_regMap.allocOutputRegs(i);
5292   if (i.outStack) {
5293     PhysReg outReg = getReg(i.outStack->location);
5294     emitImmReg(a, 1, outReg);
5295   }
5296 }
5297
5298 void
5299 TranslatorX64::translateJmp(const Tracelet& t,
5300                             const NormalizedInstruction& i) {
5301   ASSERT(!i.outStack && !i.outLocal);
5302   syncOutputs(t);
5303
5304   // Check the surprise page on all backwards jumps
5305   if (i.imm[0].u_BA < 0 && !i.noSurprise) {
5306     if (trustSigSegv) {
5307       const uint64_t stackMask =
5308         ~(cellsToBytes(RuntimeOption::EvalVMStackElms) - 1);
5309       a.mov_reg64_reg64(rVmSp, rScratch);
5310       a.and_imm64_reg64(stackMask, rScratch);
5311       TCA surpriseLoad = a.code.frontier;
5312       a.load_reg64_disp_reg64(rScratch, 0, rScratch);
5313
5314       if (!m_segvStubs.insert(SignalStubMap::value_type(surpriseLoad,
5315                                                         astubs.code.frontier)))
5316         NOT_REACHED();
5317       /*
5318        * Note that it is safe not to register unwind information here,
5319        * because we just called syncOutputs so all registers are
5320        * already clean.
5321        */
5322       astubs.call((TCA)&EventHook::CheckSurprise);
5323       recordStubCall(i);
5324       astubs.jmp(a.code.frontier);
5325     } else {
5326       emitTestSurpriseFlags();
5327       {
5328         UnlikelyIfBlock<CC_NZ> ifSurprise(a, astubs);
5329         astubs.call((TCA)&EventHook::CheckSurprise);
5330         recordStubCall(i);
5331       }
5332     }
5333   }
5334   SrcKey sk(curFunc(), i.offset() + i.imm[0].u_BA);
5335   emitBindJmp(sk);
5336 }
5337
5338 void
5339 TranslatorX64::analyzeSwitch(Tracelet& t,
5340                              NormalizedInstruction& i) {
5341   RuntimeType& rtt = i.inputs[0]->rtt;
5342   ASSERT(rtt.outerType() != KindOfRef);
5343   switch (rtt.outerType()) {
5344     NULLCASE():
5345     case KindOfBoolean:
5346     case KindOfInt64:
5347       i.m_txFlags = Native;
5348       break;
5349
5350     case KindOfDouble:
5351       i.m_txFlags = Simple;
5352       break;
5353
5354     STRINGCASE():
5355     case KindOfObject:
5356     case KindOfArray:
5357       i.m_txFlags = Supported;
5358       break;
5359
5360     default:
5361       not_reached();
5362   }
5363 }
5364
5365 template <typename T>
5366 static int64 switchBoundsCheck(T v, int64 base, int64 nTargets) {
5367   // I'm relying on gcc to be smart enough to optimize away the next
5368   // two lines when T is int64.
5369   if (int64(v) == v) {
5370     int64 ival = v;
5371     if (ival >= base && ival < (base + nTargets)) {
5372       return ival - base;
5373     }
5374   }
5375   return nTargets + 1;
5376 }
5377
5378 static int64 switchDoubleHelper(int64 val, int64 base, int64 nTargets) {
5379   union {
5380     int64 intbits;
5381     double dblval;
5382   } u;
5383   u.intbits = val;
5384   return switchBoundsCheck(u.dblval, base, nTargets);
5385 }
5386
5387 static int64 switchStringHelper(StringData* s, int64 base, int64 nTargets) {
5388   int64 ival;
5389   double dval;
5390   switch (s->isNumericWithVal(ival, dval, 1)) {
5391     case KindOfNull:
5392       ival = switchBoundsCheck(0, base, nTargets);
5393       break;
5394
5395     case KindOfDouble:
5396       ival = switchBoundsCheck(dval, base, nTargets);
5397       break;
5398
5399     case KindOfInt64:
5400       ival = switchBoundsCheck(ival, base, nTargets);
5401       break;
5402
5403     default:
5404       not_reached();
5405   }
5406   if (s->decRefCount() == 0) {
5407     s->release();
5408   }
5409   return ival;
5410 }
5411
5412 static int64 switchObjHelper(ObjectData* o, int64 base, int64 nTargets) {
5413   int64 ival = o->o_toInt64();
5414   if (o->decRefCount() == 0) {
5415     o->release();
5416   }
5417   return switchBoundsCheck(ival, base, nTargets);
5418 }
5419
5420 void
5421 TranslatorX64::translateSwitch(const Tracelet& t,
5422                                const NormalizedInstruction& i) {
5423   int64 base = i.imm[1].u_I64A;
5424   bool bounded = i.imm[2].u_IVA;
5425   const ImmVector& iv = i.immVec;
5426   int nTargets = bounded ? iv.size() - 2 : iv.size();
5427   int jmptabSize = nTargets;
5428   ASSERT(nTargets > 0);
5429   PhysReg valReg = getReg(i.inputs[0]->location);
5430   DataType inType = i.inputs[0]->outerType();
5431   ASSERT(IMPLIES(inType != KindOfInt64, bounded));
5432   ASSERT(IMPLIES(bounded, iv.size() > 2));
5433   syncOutputs(t); // this will mark valReg as FREE but it still has
5434                   // its old value
5435
5436   SrcKey defaultSk(curFunc(), i.offset() + iv.vec32()[iv.size() - 1]);
5437   SrcKey zeroSk(curFunc(), 0);
5438   if (0 >= base && 0 < (base + nTargets)) {
5439     zeroSk.m_offset = i.offset() + iv.vec32()[0 - base];
5440   } else {
5441     zeroSk.m_offset = defaultSk.m_offset;
5442   }
5443
5444   switch (i.inputs[0]->outerType()) {
5445     NULLCASE(): {
5446       emitBindJmp(zeroSk);
5447       return;
5448     }
5449
5450     case KindOfBoolean: {
5451       SrcKey nonzeroSk(curFunc(), i.offset() + iv.vec32()[iv.size() - 2]);
5452       a.test_reg64_reg64(valReg, valReg);
5453       emitCondJmp(nonzeroSk, zeroSk, CC_NZ);
5454       return;
5455     }
5456
5457     case KindOfInt64:
5458       // No special treatment needed
5459       break;
5460
5461     case KindOfDouble:
5462     STRINGCASE():
5463     case KindOfObject: {
5464       // switch(Double|String|Obj)Helper do bounds-checking for us, so
5465       // we need to make sure the default case is in the jump table,
5466       // and don't emit our own bounds-checking code
5467       jmptabSize = iv.size();
5468       bounded = false;
5469       if (false) {
5470         StringData* s = NULL;
5471         ObjectData* o = NULL;
5472         switchDoubleHelper(0.0, 0, 0);
5473         switchStringHelper(s, 0, 0);
5474         switchObjHelper(o, 0, 0);
5475       }
5476       EMIT_CALL(a,
5477                  inType == KindOfDouble ? (TCA)switchDoubleHelper :
5478                  (IS_STRING_TYPE(inType) ? (TCA)switchStringHelper :
5479                   (TCA)switchObjHelper),
5480                  R(valReg), IMM(base), IMM(nTargets));
5481       recordCall(i);
5482       valReg = rax;
5483       break;
5484     }
5485
5486     case KindOfArray:
5487       emitDecRef(a, i, valReg, KindOfArray);
5488       emitBindJmp(defaultSk);
5489       return;
5490
5491     default:
5492       not_reached();
5493   }
5494
5495   if (bounded) {
5496     if (base) {
5497       a.sub_imm64_reg64(base, valReg);
5498     }
5499     a.cmp_imm64_reg64(nTargets, valReg);
5500     prepareForSmash(a, kJmpccLen);
5501     TCA defaultStub =
5502       emitServiceReq(REQ_BIND_JMPCC_SECOND, 3,
5503                      a.code.frontier, defaultSk.m_offset, CC_AE);
5504     // Unsigned comparison: check for < 0 and >= nTargets at the same time
5505     a.jae(defaultStub);
5506   }
5507
5508   TCA* jmptab = m_globalData.alloc<TCA>(sizeof(TCA), jmptabSize);
5509   TCA afterLea = a.code.frontier + kLeaRipLen;
5510   ptrdiff_t diff = (TCA)jmptab - afterLea;
5511   ASSERT(deltaFits(diff, sz::dword));
5512   a.lea_rip_disp_reg64(diff, rScratch);
5513   ASSERT(a.code.frontier == afterLea);
5514   a.jmp_reg64_index_displ(rScratch, valReg, 0);
5515
5516   for (int idx = 0; idx < jmptabSize; ++idx) {
5517     SrcKey sk(curFunc(), i.offset() + iv.vec32()[idx]);
5518     jmptab[idx] = emitServiceReq(false, REQ_BIND_ADDR, 2ull,
5519                                  &jmptab[idx], uint64_t(sk.offset()));
5520   }
5521 }
5522
5523 void
5524 TranslatorX64::analyzeRetC(Tracelet& t,
5525                            NormalizedInstruction& i) {
5526   i.manuallyAllocInputs = true;
5527   i.m_txFlags = Supported;
5528 }
5529
5530 void
5531 TranslatorX64::analyzeRetV(Tracelet& t,
5532                            NormalizedInstruction& i) {
5533   analyzeRetC(t, i);
5534 }
5535
5536 void TranslatorX64::emitReturnVal(
5537   Asm& a, const NormalizedInstruction& i,
5538   PhysReg dstBase, int dstOffset, PhysReg thisBase, int thisOffset,
5539   PhysReg scratch) {
5540
5541   if (!i.grouped) return;
5542   TypedValue tv;
5543   TV_WRITE_UNINIT(&tv);
5544   tv.m_data.num = 0; // to keep the compiler happy
5545
5546   /*
5547    * We suppressed the write of the (literal) return value
5548    * to the stack. Figure out what it was.
5549    */
5550   NormalizedInstruction* prev = i.prev;
5551   ASSERT(!prev->outStack);
5552   switch (prev->op()) {
5553     case OpNull:
5554       tv.m_type = KindOfNull;
5555       break;
5556     case OpTrue:
5557     case OpFalse:
5558       tv.m_type = KindOfBoolean;
5559       tv.m_data.num = prev->op() == OpTrue;
5560       break;
5561     case OpInt:
5562       tv.m_type = KindOfInt64;
5563       tv.m_data.num = prev->imm[0].u_I64A;
5564       break;
5565     case OpDouble:
5566       tv.m_type = KindOfDouble;
5567       tv.m_data.dbl = prev->imm[0].u_DA;
5568       break;
5569     case OpString:
5570       tv.m_type = BitwiseKindOfString;
5571       tv.m_data.pstr = curUnit()->lookupLitstrId(prev->imm[0].u_SA);
5572       break;
5573     case OpArray:
5574       tv.m_type = KindOfArray;
5575       tv.m_data.parr = curUnit()->lookupArrayId(prev->imm[0].u_AA);
5576       break;
5577     case OpThis: {
5578       if (thisBase != dstBase || thisOffset != dstOffset) {
5579         a.  load_reg64_disp_reg64(thisBase, thisOffset, scratch);
5580         a.  store_reg64_disp_reg64(scratch, dstOffset, dstBase);
5581       }
5582       emitStoreImm(a, KindOfObject,
5583                    dstBase, dstOffset + TVOFF(m_type), sz::dword);
5584       return;
5585     }
5586     case OpBareThis: {
5587       ASSERT(curFunc()->cls());
5588       a.    mov_imm32_reg32(KindOfNull, scratch);
5589       a.    test_imm64_disp_reg64(1, thisOffset, thisBase);
5590       {
5591         JccBlock<CC_NZ> noThis(a);
5592         a.  mov_imm32_reg32(KindOfObject, scratch);
5593       }
5594       a.   store_reg32_disp_reg64(scratch, dstOffset + TVOFF(m_type), dstBase);
5595       if (thisBase != dstBase || thisOffset != dstOffset) {
5596         a.  load_reg64_disp_reg64(thisBase, thisOffset, scratch);
5597         a.  store_reg64_disp_reg64(scratch, dstOffset, dstBase);
5598       }
5599       return;
5600     }
5601     default:
5602       not_reached();
5603   }
5604
5605   emitStoreImm(a, tv.m_type,
5606                dstBase, dstOffset + TVOFF(m_type), sz::dword);
5607   if (tv.m_type != KindOfNull) {
5608     emitStoreImm(a, tv.m_data.num,
5609                  dstBase, dstOffset, sz::qword);
5610   }
5611
5612 }
5613
5614 // translateRetC --
5615 //
5616 //   Return to caller with the current activation record replaced with the
5617 //   top-of-stack return value. Call with outputs sync'ed, so the code
5618 //   we're emmitting runs "in between" basic blocks.
5619 void
5620 TranslatorX64::translateRetC(const Tracelet& t,
5621                              const NormalizedInstruction& i) {
5622   if (i.skipSync) ASSERT(i.grouped);
5623
5624   /*
5625    * This method chooses one of two ways to generate machine code for RetC
5626    * depending on whether we are generating a specialized return (where we
5627    * free the locals inline when possible) or a generic return (where we call
5628    * a helper function to free locals).
5629    *
5630    * For the specialized return, we emit the following flow:
5631    *
5632    *   Check if varenv is NULL
5633    *   If it's not NULL, branch to label 2
5634    *   Free each local variable
5635    * 1:
5636    *   Teleport the return value to appropriate memory location
5637    *   Restore the old values for rVmFp and rVmSp, and
5638    *   unconditionally transfer control back to the caller
5639    * 2:
5640    *   Call the frame_free_locals helper
5641    *   Jump to label 1
5642    *
5643    * For a generic return, we emit the following flow:
5644    *
5645    *   Call the frame_free_locals helper
5646    *   Teleport the return value to appropriate memory location
5647    *   Restore the old values for rVmFp and rVmSp, and
5648    *   unconditionally transfer control back to the caller
5649    */
5650
5651   int stackAdjustment = t.m_stackChange;
5652   if (i.skipSync) {
5653     SKTRACE(2, i.source, "i.skipSync\n");
5654
5655     /*
5656      * getting here means there was nothing to do between
5657      * a previous reqXXX and this ret. Any spill code we generate
5658      * here would be broken (because the rbx is wrong), so
5659      * verify that we don't generate anything...
5660      */
5661     TCA s DEBUG_ONLY = a.code.frontier;
5662     syncOutputs(0);
5663     ASSERT(s == a.code.frontier);
5664     stackAdjustment = 0;
5665   } else {
5666     /*
5667      * no need to syncOutputs here... we're going to update
5668      * rbx at the end of this function anyway, and we may want
5669      * to use enregistered locals on the fast path below
5670      */
5671     m_regMap.scrubStackEntries(t.m_stackChange);
5672     m_regMap.cleanAll(); // TODO(#1339331): don't.
5673   }
5674
5675   bool noThis = !curFunc()->isPseudoMain() &&
5676     (!curFunc()->isMethod() || curFunc()->isStatic());
5677   bool mayUseVV = (curFunc()->attrs() & AttrMayUseVV);
5678   bool mergedThis = i.grouped && (i.prev->op() == OpThis ||
5679                                   i.prev->op() == OpBareThis);
5680   /*
5681    * figure out where to put the return value, and where to get it from
5682    */
5683   ASSERT(i.stackOff == t.m_stackChange);
5684   const Location retValSrcLoc(Location::Stack, stackAdjustment - 1);
5685
5686   const Func *callee = curFunc();
5687   ASSERT(callee);
5688   int nLocalCells =
5689     callee == NULL ? 0 : // This happens for returns from pseudo-main.
5690     callee->numSlotsInFrame();
5691   int retvalSrcBase = cellsToBytes(-stackAdjustment);
5692
5693   ASSERT(cellsToBytes(locPhysicalOffset(retValSrcLoc)) == retvalSrcBase);
5694
5695   /*
5696    * The (1 + nLocalCells) skips 1 slot for the return value.
5697    */
5698   int retvalDestDisp = cellsToBytes(1 + nLocalCells - stackAdjustment) +
5699     AROFF(m_r);
5700
5701   if (freeLocalsInline()) {
5702     SKTRACE(2, i.source, "emitting specialized inline return\n");
5703
5704     // Emit specialized code inline to clean up the locals
5705     ASSERT(curFunc()->numLocals() == (int)i.inputs.size());
5706
5707     ScratchReg rTmp(m_regMap);
5708
5709     /*
5710      * If this function can possibly use variadic arguments or shared
5711      * variable environment, we need to check for it and go to a
5712      * generic return if so.
5713      */
5714     boost::scoped_ptr<DiamondReturn> mayUseVVRet;
5715     if (mayUseVV) {
5716       SKTRACE(2, i.source, "emitting mayUseVV in UnlikelyIf\n");
5717
5718       mayUseVVRet.reset(new DiamondReturn);
5719       a.    load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rTmp);
5720       a.    test_reg64_reg64(*rTmp, *rTmp);
5721       {
5722         UnlikelyIfBlock<CC_NZ> varEnvCheck(a, astubs, mayUseVVRet.get());
5723
5724         m_regMap.cleanAll();
5725         if (i.grouped) {
5726           ScratchReg s(m_regMap);
5727           emitReturnVal(astubs, i,
5728                         rVmSp, retvalSrcBase, rVmFp, AROFF(m_this), *s);
5729         }
5730         emitFrameRelease(astubs, i, noThis || mergedThis);
5731       }
5732     }
5733
5734     for (unsigned int k = 0; k < i.inputs.size(); ++k) {
5735       // RetC's inputs should all be locals
5736       ASSERT(i.inputs[k]->location.space == Location::Local);
5737       DataType t = i.inputs[k]->outerType();
5738       if (IS_REFCOUNTED_TYPE(t)) {
5739         PhysReg reg = m_regMap.allocReg(i.inputs[k]->location, t,
5740                                         RegInfo::CLEAN);
5741         emitDecRef(i, reg, t);
5742       }
5743     }
5744
5745     if (mergedThis) {
5746       // There is nothing to do, we're returning this,
5747       // but we didnt incRef it, so we dont have to
5748       // decRef here.
5749     } else {
5750       // If this is a instance method called on an object or if it is a
5751       // pseudomain, we need to decRef $this (if there is one)
5752       if (curFunc()->isMethod() && !curFunc()->isStatic()) {
5753         // This assert is weaker than it looks; it only checks the invocation
5754         // we happen to be translating for. The runtime "assert" is the
5755         // unconditional dereference of m_this we emit; if the frame has
5756         // neither this nor a class, then m_this will be null and we'll
5757         // SEGV.
5758         ASSERT(curFrame()->hasThis() || curFrame()->hasClass());
5759         // m_this and m_cls share a slot in the ActRec, so we check the
5760         // lowest bit (0 -> m_this, 1 -> m_cls)
5761         a.      load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rTmp);
5762         if (i.guardedThis) {
5763           emitDecRef(i, *rTmp, KindOfObject);
5764         } else {
5765           a.      test_imm32_reg64(1, *rTmp);
5766           {
5767             JccBlock<CC_NZ> ifZero(a);
5768             emitDecRef(i, *rTmp, KindOfObject); // this. decref it.
5769           }
5770         }
5771       } else if (curFunc()->isPseudoMain()) {
5772         a.      load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rTmp);
5773         a.      shr_imm32_reg64(1, *rTmp); // sets c (from bit 0) and z
5774         FreezeRegs ice(m_regMap);
5775         {
5776           // tests for Not Zero and Not Carry
5777           UnlikelyIfBlock<CC_NBE> ifRealThis(a, astubs);
5778           astubs.    shl_imm32_reg64(1, *rTmp);
5779           emitDecRef(astubs, i, *rTmp, KindOfObject);
5780         }
5781       }
5782     }
5783
5784     // Register map is officially out of commission now.
5785     m_regMap.scrubLoc(retValSrcLoc);
5786     m_regMap.smashRegs(kAllRegs);
5787
5788     emitTestSurpriseFlags();
5789     {
5790       UnlikelyIfBlock<CC_NZ> ifTracer(a, astubs);
5791       if (i.grouped) {
5792         ScratchReg s(m_regMap);
5793         emitReturnVal(astubs, i,
5794                       rVmSp, retvalSrcBase, rVmFp, AROFF(m_this), *s);
5795       }
5796       astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5797       emitCall(astubs, (TCA)&EventHook::FunctionExit, true);
5798       recordReentrantStubCall(i);
5799     }
5800
5801     // The register map on the main line better be empty (everything
5802     // smashed) or some of the above DiamondReturns might generate
5803     // reconciliation code.
5804     ASSERT(m_regMap.empty());
5805   } else {
5806     SKTRACE(2, i.source, "emitting generic return\n");
5807
5808     m_regMap.cleanAll();
5809     m_regMap.smashRegs(kAllRegs);
5810     if (i.grouped) {
5811       /*
5812        * What a pain: EventHook::onFunctionExit needs access
5813        * to the return value - so we have to write it to the
5814        * stack anyway. We still win for OpThis, and
5815        * OpBareThis, since we dont have to do any refCounting
5816        */
5817       ScratchReg s(m_regMap);
5818       emitReturnVal(astubs, i,
5819                     rVmSp, retvalSrcBase, rVmFp, AROFF(m_this), *s);
5820     }
5821     // If we are doing the generic return flow, we emit a call to
5822     // frame_free_locals here
5823     ASSERT(i.inputs.size() == 0);
5824     emitFrameRelease(a, i, noThis || mergedThis);
5825   }
5826
5827   /*
5828    * We're officially between tracelets now, and the normal register
5829    * allocator is not being used.
5830    */
5831   ASSERT(m_regMap.empty());
5832   RegSet scratchRegs = kScratchCrossTraceRegs;
5833   DumbScratchReg rRetAddr(scratchRegs);
5834
5835   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_savedRip), *rRetAddr);
5836   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
5837
5838   /*
5839    * Having gotten everything we care about out of the current frame
5840    * pointer, smash the return address type and value over it. We don't
5841    * care about reference counts: as long as this runs to completion, we're
5842    * refcount-neutral.
5843    */
5844   if (i.grouped) {
5845     DumbScratchReg s(scratchRegs);
5846     emitReturnVal(a, i, rVmSp, retvalDestDisp,
5847                   rVmSp, retvalDestDisp - AROFF(m_r) + AROFF(m_this),
5848                   *s);
5849   } else {
5850     ASSERT(sizeof(Cell) == 16);
5851     a.   load_reg64_disp_reg64 (rVmSp,    retvalSrcBase,      rScratch);
5852     a.   store_reg64_disp_reg64(rScratch, retvalDestDisp,     rVmSp);
5853     a.   load_reg64_disp_reg64 (rVmSp,    retvalSrcBase + 8,  rScratch);
5854     a.   store_reg64_disp_reg64(rScratch, retvalDestDisp + 8, rVmSp);
5855   }
5856
5857   /*
5858    * Now update the principal hardware registers.
5859    *
5860    * Stack pointer has to skip over all the locals as well as the
5861    * activation record.
5862    */
5863   a.   add_imm64_reg64(sizeof(ActRec) +
5864                        cellsToBytes(nLocalCells - stackAdjustment), rVmSp);
5865   emitRB(a, RBTypeFuncExit, curFunc()->fullName()->data(), RegSet(*rRetAddr));
5866   a.   jmp_reg        (*rRetAddr);
5867   translator_not_reached(a);
5868 }
5869
5870 void
5871 TranslatorX64::translateRetV(const Tracelet& t,
5872                              const NormalizedInstruction& i) {
5873   translateRetC(t, i);
5874 }
5875
5876 /*
5877  * NativeImpl is a special operation in the sense that it must be the
5878  * only opcode in a function body, and also functions as the return.
5879  *
5880  * This function runs between tracelets and does not use m_regMap.
5881  */
5882 void TranslatorX64::emitNativeImpl(const Func* func,
5883                                    bool emitSavedRIPReturn) {
5884   BuiltinFunction builtinFuncPtr = func->builtinFuncPtr();
5885   if (false) { // typecheck
5886     ActRec* ar = NULL;
5887     builtinFuncPtr(ar);
5888   }
5889
5890   TRACE(2, "calling builtin preClass %p func %p\n", func->preClass(),
5891     builtinFuncPtr);
5892   /*
5893    * Call the native implementation. This will free the locals for us in the
5894    * normal case. In the case where an exception is thrown, the VM unwinder
5895    * will handle it for us.
5896    */
5897   a.   mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5898   emitCall(a, (TCA)builtinFuncPtr, false /* smash regs */);
5899
5900   /*
5901    * We're sometimes calling this while curFunc() isn't really the
5902    * builtin---make sure to properly record the sync point as if we
5903    * are inside the builtin.
5904    *
5905    * The assumption here is that for builtins, the generated func
5906    * contains only a single opcode (NativeImpl), and there are no
5907    * non-argument locals.
5908    */
5909   ASSERT(func->numIterators() == 0 && func->isBuiltin());
5910   ASSERT(func->numLocals() == func->numParams());
5911   ASSERT(*func->getEntry() == OpNativeImpl);
5912   ASSERT(instrLen(func->getEntry()) == func->past() - func->base());
5913   Offset pcOffset = 0;  // NativeImpl is the only instruction in the func
5914   Offset stackOff = func->numLocals(); // Builtin stubs have no
5915                                        // non-arg locals
5916   recordSyncPoint(a, pcOffset, stackOff);
5917
5918   RegSet unusedRegs = kScratchCrossTraceRegs;
5919   DumbScratchReg rRetAddr(unusedRegs);
5920
5921   RegSet saveDuringEmitRB;
5922   if (emitSavedRIPReturn) {
5923     // Get the return address from the ActRec
5924     a.   load_reg64_disp_reg64(rVmFp, AROFF(m_savedRip), *rRetAddr);
5925     saveDuringEmitRB |= RegSet(*rRetAddr);
5926   }
5927
5928   /*
5929    * The native implementation already put the return value on the
5930    * stack for us, and handled cleaning up the arguments.  We have to
5931    * update the frame pointer and the stack pointer, and load the
5932    * return value into the return register so the trace we are
5933    * returning to has it where it expects.
5934    *
5935    * TODO(#1273094): we should probably modify the actual builtins to
5936    * return values via registers (rax:edx) using the C ABI and do a
5937    * reg-to-reg move.
5938    */
5939   int nLocalCells = func->numSlotsInFrame();
5940   a.   add_imm64_reg64(sizeof(ActRec) + cellsToBytes(nLocalCells-1), rVmSp);
5941   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
5942
5943   emitRB(a, RBTypeFuncExit, func->fullName()->data(), saveDuringEmitRB);
5944   if (emitSavedRIPReturn) {
5945     a.   jmp_reg        (*rRetAddr);
5946     translator_not_reached(a);
5947   }
5948 }
5949
5950 void
5951 TranslatorX64::translateNativeImpl(const Tracelet& t,
5952                                    const NormalizedInstruction& ni) {
5953   /*
5954    * We assume that NativeImpl is the only instruction in the trace,
5955    * and the only instruction for the implementation of the function.
5956    */
5957   ASSERT(ni.stackOff == 0);
5958   ASSERT(m_regMap.empty());
5959   emitNativeImpl(curFunc(), true);
5960 }
5961
5962 // Warning: smashes rsi and rdi, and can't handle unclean registers.
5963 // Used between functions.
5964 void
5965 TranslatorX64::emitFrameRelease(X64Assembler& a,
5966                                 const NormalizedInstruction& i,
5967                                 bool noThis /*= false*/) {
5968   if (false) { // typecheck
5969     frame_free_locals(curFrame(), 0);
5970   }
5971   a.     mov_reg64_reg64(rVmFp, argNumToRegName[0]);
5972   int numLocals = curFunc()->numLocals();
5973   emitImmReg(a, numLocals, argNumToRegName[1]);
5974   if (noThis) {
5975     emitCall(a, (TCA)frame_free_locals_no_this);
5976   } else {
5977     emitCall(a, (TCA)frame_free_locals);
5978   }
5979   recordReentrantCall(a, i);
5980 }
5981
5982 // emitClsLocalIndex --
5983 // emitStringToClass --
5984 // emitStringToKnownClass --
5985 // emitObjToClass --
5986 // emitClsAndPals --
5987 //   Helpers for AGetC/AGetL.
5988
5989 const int kEmitClsLocalIdx = 0;
5990
5991 /*
5992  * Determine if the class is defined, and fatal if not.
5993  * If reg is not noreg, return the Class* in it
5994  * If we can statically prove that the class is defined,
5995  * all checks are omitted (eg its a parent of the current,
5996  * fixed, context).
5997  */
5998 void
5999 TranslatorX64::emitKnownClassCheck(const NormalizedInstruction& i,
6000                                    const StringData* clsName,
6001                                    register_name_t reg) {
6002   using namespace TargetCache;
6003   ASSERT(clsName);
6004   Class* klass = Unit::lookupClass(clsName);
6005   bool guarded = false;
6006   if (klass) {
6007     guarded = i.guardedCls;
6008     if (!guarded) {
6009       Class *ctx = curFunc()->cls();
6010       if (ctx && ctx->classof(klass)) {
6011         guarded = true;
6012       }
6013     }
6014   }
6015   if (guarded) {
6016     if (reg != reg::noreg) {
6017       emitImmReg(a, (uint64_t)klass, reg);
6018     }
6019   } else {
6020     Stats::emitInc(a, Stats::TgtCache_KnownClsHit);
6021     CacheHandle ch = allocKnownClass(clsName);
6022     if (reg == reg::noreg) {
6023       a.          cmp_imm32_disp_reg32(0, ch, rVmTl);
6024     } else {
6025       a.          load_reg64_disp_reg64(rVmTl, ch, reg);
6026       a.          test_reg64_reg64(reg, reg);
6027     }
6028     {
6029       UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
6030       ScratchReg clsPtr(m_regMap);
6031       astubs.   lea_reg64_disp_reg64(rVmTl, ch, *clsPtr);
6032       if (false) { // typecheck
6033         Class** cache = NULL;
6034         UNUSED Class* ret =
6035           TargetCache::lookupKnownClass<false>(cache, clsName, true);
6036       }
6037       // We're only passing two arguments to lookupKnownClass because
6038       // the third is ignored in the checkOnly == false case
6039       EMIT_CALL(astubs, ((TargetCache::lookupKnownClass_func_t)
6040                          TargetCache::lookupKnownClass<false>),
6041                 R(*clsPtr), IMM((uintptr_t)clsName));
6042       recordReentrantStubCall(i);
6043       if (reg != reg::noreg) {
6044         emitMovRegReg(astubs, rax, reg);
6045       }
6046     }
6047   }
6048 }
6049
6050 void
6051 TranslatorX64::emitStringToKnownClass(const NormalizedInstruction& i,
6052                                       const StringData* clsName) {
6053   ScratchReg cls(m_regMap);
6054   emitKnownClassCheck(i, clsName, *cls);
6055   m_regMap.bindScratch(cls, i.outStack->location, KindOfClass, RegInfo::DIRTY);
6056 }
6057
6058 void
6059 TranslatorX64::emitStringToClass(const NormalizedInstruction& i) {
6060   using namespace TargetCache;
6061   if (!i.inputs[kEmitClsLocalIdx]->rtt.valueString()) {
6062     // Handle the case where we don't know the name of the class
6063     // at translation time
6064     const Location& in = i.inputs[kEmitClsLocalIdx]->location;
6065     const Location& out = i.outStack->location;
6066     CacheHandle ch = ClassCache::alloc();
6067     if (false) {
6068       StringData *name = NULL;
6069       const UNUSED Class* cls = ClassCache::lookup(ch, name);
6070     }
6071     TRACE(1, "ClassCache @ %d\n", int(ch));
6072     if (i.inputs[kEmitClsLocalIdx]->rtt.isVariant()) {
6073         EMIT_CALL(a, ClassCache::lookup,
6074                    IMM(ch),
6075                    DEREF(in));
6076     } else {
6077         EMIT_CALL(a, ClassCache::lookup,
6078                    IMM(ch),
6079                    V(in));
6080     }
6081     recordReentrantCall(i);
6082     m_regMap.bind(rax, out, KindOfClass, RegInfo::DIRTY);
6083     return;
6084   }
6085   // We know the name of the class at translation time; use the
6086   // target cache associated with the name of the class
6087   const StringData* clsName = i.inputs[kEmitClsLocalIdx]->rtt.valueString();
6088   emitStringToKnownClass(i, clsName);
6089 }
6090
6091 void
6092 TranslatorX64::emitObjToClass(const NormalizedInstruction& i) {
6093   m_regMap.allocOutputRegs(i);
6094   const Location& in = i.inputs[kEmitClsLocalIdx]->location;
6095   const Location& out = i.outStack->location;
6096   PhysReg src = getReg(in);
6097   ScratchReg tmp(m_regMap);
6098   if (i.inputs[kEmitClsLocalIdx]->rtt.isVariant()) {
6099     emitDeref(a, src, *tmp);
6100     src = *tmp;
6101   }
6102   ASSERT(i.outStack->valueType() == KindOfClass);
6103   a.   load_reg64_disp_reg64(src, ObjectData::getVMClassOffset(), getReg(out));
6104 }
6105
6106 void
6107 TranslatorX64::emitClsAndPals(const NormalizedInstruction& ni) {
6108   if (ni.inputs[kEmitClsLocalIdx]->isString()) {
6109     emitStringToClass(ni);
6110   } else {
6111     emitObjToClass(ni);
6112   }
6113 }
6114
6115 void
6116 TranslatorX64::analyzeAGetC(Tracelet& t, NormalizedInstruction& i) {
6117   ASSERT(i.inputs.size() == 1);
6118   ASSERT(i.outStack && !i.outLocal);
6119   ASSERT(i.outStack->valueType() == KindOfClass);
6120   const RuntimeType& rtt = i.inputs[0]->rtt;
6121   ASSERT(!rtt.isVariant());
6122   i.m_txFlags = supportedPlan(rtt.isString() ||
6123                               rtt.valueType() == KindOfObject);
6124   if (rtt.isString() && rtt.valueString()) i.manuallyAllocInputs = true;
6125 }
6126
6127 void TranslatorX64::translateAGetC(const Tracelet& t,
6128                                    const NormalizedInstruction& ni) {
6129   if (ni.outStack) {
6130     emitClsAndPals(ni);
6131   }
6132 }
6133
6134 void TranslatorX64::analyzeAGetL(Tracelet& t,
6135                                  NormalizedInstruction& ni) {
6136   ASSERT(ni.inputs.size() == 1);
6137   ASSERT(ni.inputs[0]->isLocal());
6138   const RuntimeType& rtt = ni.inputs[0]->rtt;
6139   ni.m_txFlags = supportedPlan(rtt.isString() ||
6140                                rtt.valueType() == KindOfObject);
6141 }
6142
6143 void TranslatorX64::translateAGetL(const Tracelet& t,
6144                                    const NormalizedInstruction& ni) {
6145   emitClsAndPals(ni);
6146 }
6147
6148 void TranslatorX64::translateSelf(const Tracelet& t,
6149                                   const NormalizedInstruction& i) {
6150   m_regMap.allocOutputRegs(i);
6151   PhysReg tmp = getReg(i.outStack->location);
6152   ASSERT(curFunc()->cls());
6153   emitImmReg(a, (int64_t)curFunc()->cls(), tmp);
6154 }
6155
6156 void TranslatorX64::translateParent(const Tracelet& t,
6157                                     const NormalizedInstruction& i) {
6158   m_regMap.allocOutputRegs(i);
6159   PhysReg tmp = getReg(i.outStack->location);
6160   ASSERT(curFunc()->cls() && curFunc()->cls()->parent());
6161   emitImmReg(a, (int64_t)curFunc()->cls()->parent(), tmp);
6162 }
6163
6164 void TranslatorX64::analyzeSelf(Tracelet& t,NormalizedInstruction& i) {
6165   Class* clss = curClass();
6166   if (clss == NULL) {
6167     i.m_txFlags = Interp;
6168     return;
6169   }
6170   i.m_txFlags = Supported;
6171 }
6172
6173 void TranslatorX64::analyzeParent(Tracelet& t,NormalizedInstruction& i) {
6174   Class* clss = curClass();
6175   if (clss == NULL) {
6176     i.m_txFlags = Interp;
6177     return;
6178   }
6179   if (clss->parent() == NULL) {
6180     // clss has no parent; interpret to throw fatal
6181     i.m_txFlags = Interp;
6182     return;
6183   }
6184   i.m_txFlags = Supported;
6185 }
6186
6187 void TranslatorX64::translateDup(const Tracelet& t,
6188                                  const NormalizedInstruction& ni) {
6189   ASSERT(ni.inputs.size() == 1);
6190   ASSERT(ni.outStack);
6191   ASSERT(!ni.inputs[0]->rtt.isVariant());
6192   m_regMap.allocOutputRegs(ni);
6193   PhysReg outR = getReg(ni.outStack->location);
6194   emitMovRegReg(a, getReg(ni.inputs[0]->location), outR);
6195   emitIncRef(outR, ni.inputs[0]->outerType());
6196 }
6197
6198 typedef std::map<int, int> ParamMap;
6199 /*
6200  * mapContParams determines if every named local in origFunc has a
6201  * corresponding named local in genFunc. If this step succeeds and
6202  * there's no VarEnv at runtime, the continuation's variables can be
6203  * filled completely inline in the TC (assuming there aren't too
6204  * many).
6205  */
6206 bool TranslatorX64::mapContParams(ParamMap& map,
6207                                   const Func* origFunc, const Func* genFunc) {
6208   const StringData* const* varNames = origFunc->localNames();
6209   for (Id i = 0; i < origFunc->numNamedLocals(); ++i) {
6210     Id id = genFunc->lookupVarId(varNames[i]);
6211     if (id != kInvalidId) {
6212       map[i] = id;
6213     } else {
6214       return false;
6215     }
6216   }
6217   return true;
6218 }
6219
6220 void TranslatorX64::emitCallFillCont(X64Assembler& a,
6221                                      const Func* orig,
6222                                      const Func* gen) {
6223   if (false) {
6224     ActRec* fp = NULL;
6225     c_Continuation *cont = NULL;
6226     cont =
6227       VMExecutionContext::fillContinuationVars(fp, orig, gen, cont);
6228   }
6229   EMIT_CALL(a,
6230              VMExecutionContext::fillContinuationVars,
6231              R(rVmFp),
6232              IMM((intptr_t)orig),
6233              IMM((intptr_t)gen),
6234              R(rax));
6235 }
6236
6237 void TranslatorX64::translateCreateCont(const Tracelet& t,
6238                                         const NormalizedInstruction& i) {
6239   bool getArgs = i.imm[0].u_IVA;
6240   const StringData* genName = curUnit()->lookupLitstrId(i.imm[1].u_SA);
6241   const Func* origFunc = curFunc();
6242   const Func* genFunc = origFunc->getGeneratorBody(genName);
6243
6244   if (false) {
6245     ActRec* fp = NULL;
6246     UNUSED c_Continuation* cont =
6247       VMExecutionContext::createContinuation<true>(fp, getArgs, origFunc,
6248                                                    genFunc);
6249     VMExecutionContext::createContinuation<false>(fp, getArgs, origFunc,
6250                                                  genFunc);
6251   }
6252
6253   // Even callee-saved regs need to be clean, because
6254   // createContinuation will read all locals.
6255   m_regMap.cleanAll();
6256   auto helper = origFunc->isNonClosureMethod() ?
6257     VMExecutionContext::createContinuation<true> :
6258     VMExecutionContext::createContinuation<false>;
6259   EMIT_CALL(a,
6260              (TCA)helper,
6261              R(rVmFp),
6262              IMM(getArgs),
6263              IMM((intptr_t)origFunc),
6264              IMM((intptr_t)genFunc));
6265   ScratchReg holdRax(m_regMap, rax);
6266
6267   int origLocals = origFunc->numNamedLocals();
6268   int genLocals = genFunc->numNamedLocals() - 1;
6269   ContParamMap params;
6270   if (origLocals <= kMaxInlineContLocals &&
6271       mapContParams(params, origFunc, genFunc)) {
6272     ScratchReg rScratch(m_regMap);
6273     a.  load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rScratch);
6274     a.  test_reg64_reg64(*rScratch, *rScratch);
6275     DiamondReturn astubsRet;
6276     {
6277       UnlikelyIfBlock<CC_NZ> ifVarEnv(a, astubs, &astubsRet);
6278       Stats::emitInc(astubs, Stats::Tx64_ContCreateSlow);
6279       emitCallFillCont(astubs, origFunc, genFunc);
6280     }
6281     // fillContinuationVars returned the continuation in rax and
6282     // DiamondGuard marked rax as scratch again, so it's safe to keep
6283     // using it
6284     Stats::emitInc(a, Stats::Tx64_ContCreateFast);
6285     static const StringData* thisStr = StringData::GetStaticString("this");
6286     Id thisId = kInvalidId;
6287     bool fillThis = origFunc->isNonClosureMethod() && !origFunc->isStatic() &&
6288       ((thisId = genFunc->lookupVarId(thisStr)) != kInvalidId) &&
6289       (origFunc->lookupVarId(thisStr) == kInvalidId);
6290     ScratchReg rDest(m_regMap);
6291     if (origLocals > 0 || fillThis) {
6292       a.lea_reg64_disp_reg64(rax,
6293                              c_Continuation::localsOffset(),
6294                              *rDest);
6295     }
6296     for (int i = 0; i < origLocals; ++i) {
6297       ASSERT(mapContains(params, i));
6298       int destOff = cellsToBytes(genLocals - params[i]);
6299       emitCopyTo(a, rVmFp, localOffset(i), *rDest, destOff, *rScratch);
6300       emitIncRefGenericRegSafe(*rDest, destOff, *rScratch);
6301     }
6302
6303     // Deal with a potential $this local in the generator body
6304     if (fillThis) {
6305       ASSERT(thisId != kInvalidId);
6306       a.load_reg64_disp_reg64(rax, CONTOFF(m_obj), *rScratch);
6307       a.test_reg64_reg64(*rScratch, *rScratch);
6308       {
6309         JccBlock<CC_Z> ifObj(a);
6310         const int thisOff = cellsToBytes(genLocals - thisId);
6311         // We don't have to check for a static refcount since we
6312         // know it's an Object
6313         a.add_imm32_disp_reg32(1, TVOFF(_count), *rScratch);
6314         a.store_reg64_disp_reg64(*rScratch, thisOff + TVOFF(m_data), *rDest);
6315         a.store_imm32_disp_reg(KindOfObject, thisOff + TVOFF(m_type), *rDest);
6316       }
6317     }
6318   } else {
6319     Stats::emitInc(a, Stats::Tx64_ContCreateSlow);
6320     emitCallFillCont(a, origFunc, genFunc);
6321   }
6322   m_regMap.bindScratch(holdRax, i.outStack->location, KindOfObject,
6323                        RegInfo::DIRTY);
6324 }
6325
6326 void TranslatorX64::emitCallUnpack(X64Assembler& a,
6327                                    const NormalizedInstruction& i,
6328                                    int nCopy) {
6329   const int contIdx = 0;
6330
6331   if (false) {
6332     c_Continuation* cont = NULL;
6333     TypedValue* dest = NULL;
6334     VMExecutionContext::unpackContinuation(cont, dest);
6335   }
6336   EMIT_CALL(a,
6337              VMExecutionContext::unpackContinuation,
6338              V(i.inputs[contIdx]->location),
6339              A(Location(Location::Local, nCopy)));
6340   recordCall(a, i);
6341 }
6342
6343 void TranslatorX64::translateUnpackCont(const Tracelet& t,
6344                                         const NormalizedInstruction& i) {
6345   const int contIdx = 0;
6346   ASSERT(curFrame()->m_varEnv == NULL);
6347   ASSERT(i.inputs.size() == 1);
6348   ASSERT(i.inputs[contIdx]->location == Location(Location::Local, 0));
6349   ASSERT(i.outStack->outerType() == KindOfInt64);
6350   int nCopy = curFunc()->numNamedLocals() - 1;
6351
6352   for (int loc = 1; loc <= nCopy; ++loc) {
6353     // We're at the beginning of the function. The only local in a
6354     // register should be local 0, our input
6355     ASSERT(!m_regMap.hasReg(Location(Location::Local, loc)));
6356   }
6357   if (nCopy > kMaxInlineContLocals) {
6358     Stats::emitInc(a, Stats::Tx64_ContUnpackSlow);
6359     emitCallUnpack(a, i, nCopy);
6360     m_regMap.bind(rax, i.outStack->location, KindOfInt64,
6361                   RegInfo::DIRTY);
6362     return;
6363   }
6364
6365   PhysReg rCont = getReg(i.inputs[contIdx]->location);
6366   ScratchReg rLabel(m_regMap);
6367   {
6368     a.    test_imm32_disp_reg32(0x1, CONTOFF(m_hasExtraVars), rCont);
6369     DiamondReturn astubsRet;
6370     {
6371       UnlikelyIfBlock<CC_NZ> hasVars(a, astubs, &astubsRet);
6372       Stats::emitInc(astubs, Stats::Tx64_ContUnpackSlow);
6373       emitCallUnpack(astubs, i, nCopy);
6374       emitMovRegReg(astubs, rax, *rLabel);
6375     }
6376     Stats::emitInc(a, Stats::Tx64_ContUnpackFast);
6377
6378     a.    load_reg64_disp_reg64(rCont, CONTOFF(m_label), *rLabel);
6379     ScratchReg rScratch(m_regMap);
6380     ScratchReg rSrc(m_regMap);
6381     ScratchReg rZero(m_regMap);
6382     if (nCopy > 0) {
6383       a.  lea_reg64_disp_reg64(rCont,
6384                                c_Continuation::localsOffset(),
6385                                *rSrc);
6386       emitImmReg(a, 0, *rZero);
6387     }
6388     for (int srcOff = 0, destOff = localOffset(nCopy);
6389          srcOff < (int)cellsToBytes(nCopy);
6390          srcOff += sizeof(Cell), destOff += sizeof(Cell)) {
6391       emitCopyTo(a, *rSrc, srcOff, rVmFp, destOff, *rScratch);
6392       a.  store_reg32_disp_reg64(*rZero, srcOff + TVOFF(m_type), *rSrc);
6393     }
6394   }
6395   m_regMap.bindScratch(rLabel, i.outStack->location, KindOfInt64,
6396                        RegInfo::DIRTY);
6397 }
6398
6399 void TranslatorX64::emitCallPack(X64Assembler& a,
6400                                  const NormalizedInstruction& i,
6401                                  int nCopy) {
6402   const int valIdx = 0;
6403   const int contIdx = 1;
6404
6405   // packContinuation is going to read values directly from the stack
6406   // so we have to clean everything.
6407   m_regMap.cleanAll();
6408   if (false) {
6409     c_Continuation* cont = NULL;
6410     TypedValue* tv = NULL;
6411     ActRec* fp = NULL;
6412     int label = 0;
6413     VMExecutionContext::packContinuation(cont, fp, tv, label);
6414   }
6415   EMIT_CALL(a,
6416              VMExecutionContext::packContinuation,
6417              V(i.inputs[contIdx]->location),
6418              R(rVmFp),
6419              A(i.inputs[valIdx]->location),
6420              IMM(i.imm[0].u_IVA));
6421   recordCall(a, i);
6422   m_regMap.invalidateLocals(1, nCopy);
6423 }
6424
6425 void TranslatorX64::translatePackCont(const Tracelet& t,
6426                                       const NormalizedInstruction& i) {
6427   const int valIdx = 0;
6428   const int contIdx = 1;
6429
6430   int nCopy = curFunc()->numNamedLocals() - 1;
6431   if (nCopy > kMaxInlineContLocals) {
6432     Stats::emitInc(a, Stats::Tx64_ContPackSlow);
6433     emitCallPack(a, i, nCopy);
6434     emitDecRefInput(a, i, valIdx);
6435     return;
6436   }
6437
6438   ScratchReg rScratch(m_regMap);
6439   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_varEnv), *rScratch);
6440   a.    test_reg64_reg64(*rScratch, *rScratch);
6441   DiamondReturn astubsRet;
6442   {
6443     // TODO: Task #1132976: We can probably prove that this is impossible in
6444     // most cases using information from hphpc
6445     UnlikelyIfBlock<CC_NZ> varEnv(a, astubs, &astubsRet);
6446     Stats::emitInc(astubs, Stats::Tx64_ContPackSlow);
6447     emitCallPack(astubs, i, nCopy);
6448     emitDecRefInput(astubs, i, valIdx);
6449   }
6450   Stats::emitInc(a, Stats::Tx64_ContPackFast);
6451
6452   PhysReg rCont = getReg(i.inputs[contIdx]->location);
6453   ScratchReg rDest(m_regMap);
6454   ScratchReg rZero(m_regMap);
6455   if (nCopy > 0) {
6456     a.  lea_reg64_disp_reg64(rCont,
6457                              c_Continuation::localsOffset(),
6458                              *rDest);
6459     emitImmReg(a, 0, *rZero);
6460   }
6461   for (int idx = nCopy, destOff = 0, srcOff = localOffset(nCopy);
6462        idx > 0;
6463        --idx, destOff += sizeof(Cell), srcOff += sizeof(Cell)) {
6464     Location loc(Location::Local, idx);
6465     if (m_regMap.hasReg(loc)) {
6466       PhysReg reg = getReg(loc);
6467       spillTo(m_regMap.getInfo(reg)->m_type, reg, true, *rDest, destOff);
6468     } else {
6469       emitCopyTo(a, rVmFp, srcOff, *rDest, destOff, *rScratch);
6470     }
6471     m_regMap.invalidate(loc);
6472     a.  store_reg32_disp_reg64(*rZero, srcOff + TVOFF(m_type), rVmFp);
6473   }
6474
6475   // We're moving our reference to the value from the stack to the
6476   // continuation object, so we don't have to incRef or decRef
6477   Location valLoc = i.inputs[valIdx]->location;
6478   emitTvSet(i, getReg(valLoc), i.inputs[valIdx]->outerType(), rCont,
6479             CONTOFF(m_value), false);
6480
6481   emitImmReg(a, i.imm[0].u_IVA, *rScratch);
6482   a.    store_reg64_disp_reg64(*rScratch, CONTOFF(m_label), rCont);
6483 }
6484
6485 static void continuationRaiseHelper(c_Continuation* cont) {
6486   cont->t_raised();
6487   not_reached();
6488 }
6489
6490 void TranslatorX64::emitContRaiseCheck(X64Assembler& a,
6491                                        const NormalizedInstruction& i) {
6492   const int contIdx = 0;
6493   ASSERT(i.inputs[contIdx]->location == Location(Location::Local, 0));
6494   PhysReg rCont = getReg(i.inputs[contIdx]->location);
6495   a.    test_imm32_disp_reg32(0x1, CONTOFF(m_should_throw), rCont);
6496   {
6497     UnlikelyIfBlock<CC_NZ> ifThrow(a, astubs);
6498     if (false) {
6499       c_Continuation* c = NULL;
6500       continuationRaiseHelper(c);
6501     }
6502     EMIT_CALL(astubs,
6503                continuationRaiseHelper,
6504                R(rCont));
6505     recordReentrantStubCall(i);
6506     translator_not_reached(astubs);
6507   }
6508 }
6509
6510 void TranslatorX64::translateContReceive(const Tracelet& t,
6511                                          const NormalizedInstruction& i) {
6512   const int contIdx = 0;
6513   emitContRaiseCheck(a, i);
6514   ScratchReg rScratch(m_regMap);
6515   a.   lea_reg64_disp_reg64(getReg(i.inputs[contIdx]->location),
6516                             CONTOFF(m_received), *rScratch);
6517   emitIncRefGeneric(*rScratch, 0);
6518   emitCopyToStack(a, i, *rScratch, -1 * (int)sizeof(Cell));
6519 }
6520
6521 void TranslatorX64::translateContRaised(const Tracelet& t,
6522                                         const NormalizedInstruction& i) {
6523   emitContRaiseCheck(a, i);
6524 }
6525
6526 void TranslatorX64::translateContDone(const Tracelet& t,
6527                                       const NormalizedInstruction& i) {
6528   const int contIdx = 0;
6529   a.    store_imm8_disp_reg(0x1, CONTOFF(m_done),
6530                             getReg(i.inputs[contIdx]->location));
6531 }
6532
6533 static void contPreNextThrowHelper(c_Continuation* c) {
6534   c->preNext();
6535   not_reached();
6536 }
6537
6538 void TranslatorX64::emitContPreNext(const NormalizedInstruction& i,
6539                                     ScratchReg& rCont) {
6540   const Offset doneOffset = CONTOFF(m_done);
6541   CT_ASSERT((doneOffset + 1) == CONTOFF(m_running));
6542   // Check m_done and m_running at the same time
6543   a.    test_imm32_disp_reg32(0x0101, doneOffset, *rCont);
6544   {
6545     UnlikelyIfBlock<CC_NZ> ifThrow(a, astubs);
6546     EMIT_CALL(astubs, contPreNextThrowHelper, R(*rCont));
6547     recordReentrantStubCall(i);
6548     translator_not_reached(astubs);
6549   }
6550
6551   // ++m_index
6552   a.    add_imm64_disp_reg64(0x1, CONTOFF(m_index), *rCont);
6553   // m_running = true
6554   a.    store_imm8_disp_reg(0x1, CONTOFF(m_running), *rCont);
6555 }
6556
6557 void TranslatorX64::translateContNext(const Tracelet& t,
6558                                       const NormalizedInstruction& i) {
6559   ScratchReg rCont(m_regMap);
6560   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6561   emitContPreNext(i, rCont);
6562
6563   // m_received.setNull()
6564   emitTvSet(i, reg::noreg, KindOfNull, *rCont, CONTOFF(m_received), false);
6565 }
6566
6567 static void contNextCheckThrowHelper(c_Continuation* cont) {
6568   cont->startedCheck();
6569   not_reached();
6570 }
6571
6572 void TranslatorX64::emitContStartedCheck(const NormalizedInstruction& i,
6573                                          ScratchReg& rCont) {
6574   // if (m_index < 0)
6575   a.    cmp_imm64_disp_reg64(0, CONTOFF(m_index), *rCont);
6576   {
6577     UnlikelyIfBlock<CC_L> whoops(a, astubs);
6578     EMIT_CALL(astubs, contNextCheckThrowHelper, *rCont);
6579     recordReentrantStubCall(i);
6580     translator_not_reached(astubs);
6581   }
6582 }
6583
6584 template<bool raise>
6585 void TranslatorX64::translateContSendImpl(const NormalizedInstruction& i) {
6586   const int valIdx = 0;
6587   ASSERT(i.inputs[valIdx]->location == Location(Location::Local, 0));
6588
6589   ScratchReg rCont(m_regMap);
6590   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6591   emitContStartedCheck(i, rCont);
6592   emitContPreNext(i, rCont);
6593
6594   // m_received = value
6595   PhysReg valReg = getReg(i.inputs[valIdx]->location);
6596   DataType valType = i.inputs[valIdx]->outerType();
6597   emitTvSet(i, valReg, valType, *rCont, CONTOFF(m_received), true);
6598
6599   // m_should_throw = true (maybe)
6600   if (raise) {
6601     a.  store_imm8_disp_reg(0x1, CONTOFF(m_should_throw), *rCont);
6602   }
6603 }
6604
6605 void TranslatorX64::translateContSend(const Tracelet& t,
6606                                       const NormalizedInstruction& i) {
6607   translateContSendImpl<false>(i);
6608 }
6609
6610 void TranslatorX64::translateContRaise(const Tracelet& t,
6611                                        const NormalizedInstruction& i) {
6612   translateContSendImpl<true>(i);
6613 }
6614
6615 void TranslatorX64::translateContValid(const Tracelet& t,
6616                                        const NormalizedInstruction& i) {
6617   ScratchReg rCont(m_regMap);
6618   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6619
6620   m_regMap.allocOutputRegs(i);
6621   PhysReg validReg = getReg(i.outStack->location);
6622   // !m_done
6623   a.    loadzxb_reg64_disp_reg64(*rCont, CONTOFF(m_done), validReg);
6624   a.    xor_imm32_reg64(0x1, validReg);
6625 }
6626
6627 void TranslatorX64::translateContCurrent(const Tracelet& t,
6628                                          const NormalizedInstruction& i) {
6629   ScratchReg rCont(m_regMap);
6630   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6631   emitContStartedCheck(i, rCont);
6632
6633   a.   lea_reg64_disp_reg64(*rCont, CONTOFF(m_value), *rCont);
6634   emitIncRefGeneric(*rCont, 0);
6635   emitCopyToStack(a, i, *rCont, -1 * (int)sizeof(Cell));
6636 }
6637
6638 void TranslatorX64::translateContStopped(const Tracelet& t,
6639                                          const NormalizedInstruction& i) {
6640   ScratchReg rCont(m_regMap);
6641   a.    load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
6642   a.    store_imm8_disp_reg(0x0, CONTOFF(m_running), *rCont);
6643 }
6644
6645 void TranslatorX64::translateContHandle(const Tracelet& t,
6646                                         const NormalizedInstruction& i) {
6647   // Always interpreted
6648   not_reached();
6649 }
6650
6651 static void analyzeClassExistsImpl(NormalizedInstruction& i) {
6652   const int nameIdx = 1;
6653   const int autoIdx = 0;
6654   ASSERT(!i.inputs[nameIdx]->isVariant() && !i.inputs[autoIdx]->isVariant());
6655   i.m_txFlags = supportedPlan(i.inputs[nameIdx]->isString() &&
6656                               i.inputs[autoIdx]->isBoolean());
6657   i.fuseBranch = (i.m_txFlags & Supported) &&
6658     i.inputs[nameIdx]->rtt.valueString() &&
6659     i.inputs[autoIdx]->rtt.valueBoolean() != RuntimeType::UnknownBool;
6660 }
6661
6662 void TranslatorX64::analyzeClassExists(Tracelet& t,
6663                                        NormalizedInstruction& i) {
6664   analyzeClassExistsImpl(i);
6665 }
6666
6667 void TranslatorX64::analyzeInterfaceExists(Tracelet& t,
6668                                            NormalizedInstruction& i) {
6669   analyzeClassExistsImpl(i);
6670 }
6671
6672 void TranslatorX64::analyzeTraitExists(Tracelet& t,
6673                                        NormalizedInstruction& i) {
6674   analyzeClassExistsImpl(i);
6675 }
6676
6677 static int64 classExistsSlow(const StringData* name, bool autoload,
6678                              Attr typeAttr) {
6679   bool ret = Unit::classExists(name, autoload, typeAttr);
6680   // XXX: do we need to decref this during an exception?
6681   if (name->decRefCount() == 0) {
6682     const_cast<StringData*>(name)->release();
6683   }
6684   return ret;
6685 }
6686
6687 void TranslatorX64::translateClassExistsImpl(const Tracelet& t,
6688                                              const NormalizedInstruction& i,
6689                                              Attr typeAttr) {
6690   const int nameIdx = 1;
6691   const int autoIdx = 0;
6692   const StringData* name = i.inputs[nameIdx]->rtt.valueString();
6693   ASSERT(IMPLIES(name, name->isStatic()));
6694   const int autoload = i.inputs[autoIdx]->rtt.valueBoolean();
6695
6696   ScratchReg scratch(m_regMap);
6697   if (name != NULL && autoload != RuntimeType::UnknownBool) {
6698     ASSERT(i.fuseBranch);
6699     const Attr attrNotClass = Attr(AttrTrait | AttrInterface);
6700     const bool isClass = typeAttr == AttrNone;
6701     using namespace TargetCache;
6702     Stats::emitInc(a, Stats::Tx64_ClassExistsFast);
6703     CacheHandle ch = allocKnownClass(name);
6704
6705     {
6706       DiamondReturn astubsRet;
6707       a.  load_reg64_disp_reg64(rVmTl, ch, *scratch);
6708       a.  test_reg64_reg64(*scratch, *scratch);
6709       if (autoload) {
6710         UnlikelyIfBlock<CC_Z> ifNull(a, astubs, &astubsRet);
6711         if (false) {
6712           Class** c = NULL;
6713           UNUSED Class* ret = lookupKnownClass<true>(c, name, false);
6714         }
6715         Stats::emitInc(astubs, Stats::TgtCache_ClassExistsMiss);
6716         // If the class exists after autoloading, the helper will
6717         // return the Class's flags. Otherwise, it will return a set
6718         // of flags such that our flag check at the join point below
6719         // will fail.
6720         EMIT_CALL(astubs, (lookupKnownClass_func_t)lookupKnownClass<true>,
6721                    RPLUS(rVmTl, ch),
6722                    IMM((uintptr_t)name),
6723                    IMM(isClass));
6724         recordReentrantStubCall(i);
6725         emitMovRegReg(astubs, rax, *scratch);
6726       } else {
6727         UnlikelyIfBlock<CC_Z> ifNull(a, astubs, &astubsRet);
6728         // This isn't really a traditional slow path, count as a hit
6729         Stats::emitInc(astubs, Stats::TgtCache_ClassExistsHit);
6730         // Provide flags so the check back in a fails
6731         emitImmReg(astubs, isClass ? attrNotClass : AttrNone, *scratch);
6732       }
6733       // If we don't take the slow/NULL path, load the Class's attrs
6734       // into *scratch to prepare for the flag check.
6735       Stats::emitInc(a, Stats::TgtCache_ClassExistsHit);
6736       a.  load_reg64_disp_reg64(*scratch, Class::preClassOff(),
6737                                 *scratch);
6738       a.  load_reg64_disp_reg32(*scratch, PreClass::attrsOffset(),
6739                                 *scratch);
6740     }
6741
6742     if (i.changesPC) {
6743       fuseBranchSync(t, i);
6744     }
6745     a.    test_imm32_reg32(isClass ? attrNotClass : typeAttr, *scratch);
6746     ConditionCode cc = isClass ? CC_Z : CC_NZ;
6747     if (i.changesPC) {
6748       fuseBranchAfterBool(t, i, cc);
6749     } else {
6750       a.  setcc(cc, *scratch);
6751       a.  mov_reg8_reg64_unsigned(*scratch, *scratch);
6752       m_regMap.bindScratch(scratch, i.outStack->location, KindOfBoolean,
6753                            RegInfo::DIRTY);
6754     }
6755   } else {
6756     ASSERT(!i.fuseBranch);
6757     Stats::emitInc(a, Stats::Tx64_ClassExistsSlow);
6758     if (false) {
6759       UNUSED bool ret = false;
6760       ret = classExistsSlow(name, ret, typeAttr);
6761     }
6762     EMIT_CALL(a, classExistsSlow,
6763                V(i.inputs[nameIdx]->location),
6764                V(i.inputs[autoIdx]->location),
6765                IMM(typeAttr));
6766     recordReentrantCall(i);
6767     // Our helper decrefs the string
6768     m_regMap.bind(rax, i.outStack->location, KindOfBoolean, RegInfo::DIRTY);
6769   }
6770 }
6771
6772 void TranslatorX64::translateClassExists(const Tracelet& t,
6773                                          const NormalizedInstruction& i) {
6774   translateClassExistsImpl(t, i, AttrNone);
6775 }
6776
6777 void TranslatorX64::translateInterfaceExists(const Tracelet& t,
6778                                          const NormalizedInstruction& i) {
6779   translateClassExistsImpl(t, i, AttrInterface);
6780 }
6781
6782 void TranslatorX64::translateTraitExists(const Tracelet& t,
6783                                          const NormalizedInstruction& i) {
6784   translateClassExistsImpl(t, i, AttrTrait);
6785 }
6786
6787 // Helper function for static property access.  This function emits code
6788 // which leaves a pointer to the static property for clsInput::$propInput in
6789 // register scr. We destroy scr early on, yet do not consume inputs until
6790 // later, so scr must not alias an input register.  This also handles
6791 // the decref for the case where prop is not a static string.
6792 void TranslatorX64::emitStaticPropInlineLookup(const NormalizedInstruction& i,
6793                                                int classInputIdx,
6794                                                const DynLocation& propInput,
6795                                                PhysReg scr) {
6796   auto const& clsInput = *i.inputs[classInputIdx];
6797   const Class* cls = clsInput.rtt.valueClass();
6798   const StringData* propName = propInput.rtt.valueString();
6799   using namespace TargetCache;
6800   CacheHandle ch;
6801
6802   ASSERT(cls && propName);
6803   // Use the uniquely known cls / prop to generate a single cache per prop
6804   const StringData* clsName = cls->preClass()->name();
6805   string sds(Util::toLower(clsName->data()) + ":" +
6806              string(propName->data(), propName->size()));
6807   StringData sd(sds.c_str(), sds.size(), AttachLiteral);
6808   ch = SPropCache::alloc(&sd);
6809   SKTRACE(1, i.source, "SPropInlineLookup %s %d\n", sd.data(), int(ch));
6810
6811   Stats::emitInc(a, Stats::TgtCache_SPropHit);
6812
6813   // For the simple case of statically known class and prop name, we inline
6814   // the target cache lookup, and outline the miss case.
6815   // Load the TV pointer out of the thread-private tl_targetCaches.
6816   BOOST_STATIC_ASSERT((offsetof(SPropCache, m_tv) == 0));
6817   a.   load_reg64_disp_reg64(rVmTl, ch, scr);
6818   a.   test_reg64_reg64(scr, scr);
6819
6820   // Call the slow path.
6821   {
6822     UnlikelyIfBlock<CC_Z> shucks(a, astubs);
6823
6824     // Precondition for this lookup - we don't need to pass the preClass,
6825     // as we only translate in class lookups.
6826     ASSERT(cls == curFunc()->cls());
6827     if (false) { // typecheck
6828       StringData *data = NULL;
6829       SPropCache::lookup(ch, cls, data);
6830     }
6831
6832     std::vector<int> args(i.inputs.size(), ArgDontAllocate);
6833     args[classInputIdx] = 1;
6834     allocInputsForCall(i, &args[0]);
6835
6836     EMIT_CALL(astubs, (TCA)SPropCache::lookup,
6837                IMM(ch), V(clsInput.location), IMM(uint64_t(propName)));
6838     recordReentrantStubCall(i);
6839     emitMovRegReg(astubs, rax, scr);
6840
6841     // We're consuming the name as input, but it is static, no decref needed
6842     ASSERT(propInput.rtt.valueString()->isStatic());
6843     // astubs.  jmp(a.code.frontier); -- implicit
6844   }
6845 }
6846
6847 void TranslatorX64::analyzeCGetS(Tracelet& t, NormalizedInstruction& i) {
6848   ASSERT(i.inputs.size() == 2);
6849   ASSERT(i.inputs[0]->valueType() == KindOfClass);
6850   ASSERT(i.outStack);
6851   const Class* cls = i.inputs[0]->rtt.valueClass();
6852   const StringData* propName = i.inputs[1]->rtt.valueString();
6853   i.m_txFlags = supportedPlan(cls && propName && curFunc()->cls() == cls);
6854   i.manuallyAllocInputs = true;
6855 }
6856
6857 void TranslatorX64::translateCGetS(const Tracelet& t,
6858                                    const NormalizedInstruction& i) {
6859   const int kClassIdx = 0;
6860   const int kPropIdx = 1;
6861
6862   ScratchReg sprop(m_regMap);
6863   emitStaticPropInlineLookup(i, kClassIdx, *i.inputs[kPropIdx], *sprop);
6864   emitDerefIfVariant(a, *sprop);
6865   emitIncRefGeneric(*sprop, 0);
6866   // Finally copy the thing to the stack
6867   int stackDest = 2 * sizeof(Cell) - sizeof(Cell); // popped - pushed
6868   emitCopyToStack(a, i, *sprop, stackDest);
6869 }
6870
6871 void TranslatorX64::analyzeSetS(Tracelet& t, NormalizedInstruction& i) {
6872   ASSERT(i.inputs.size() == 3);
6873   ASSERT(i.inputs[1]->valueType() == KindOfClass);
6874   ASSERT(i.outStack);
6875   const Class* cls = i.inputs[1]->rtt.valueClass();
6876   const StringData* propName = i.inputs[2]->rtt.valueString();
6877   // Might be able to broaden this: if cls is an ancestor of the current context,
6878   // the context is Fixed, and the property is not private
6879   // Also if the m_hoistable in cls is set to AlwaysHoistable, defined in
6880   // the same unit as context, and the property is public
6881   i.m_txFlags = supportedPlan(cls && propName && curFunc()->cls() == cls);
6882   i.manuallyAllocInputs = true;
6883 }
6884
6885 void TranslatorX64::translateSetS(const Tracelet& t,
6886                                   const NormalizedInstruction& i) {
6887   const int kClassIdx = 1;
6888
6889   ScratchReg sprop(m_regMap);
6890   const RuntimeType& rhsType = i.inputs[0]->rtt;
6891   emitStaticPropInlineLookup(i, kClassIdx, *i.inputs[2], *sprop);
6892
6893   ASSERT(m_regMap.getInfo(*sprop)->m_state == RegInfo::SCRATCH);
6894   ASSERT(!rhsType.isVariant());
6895
6896   m_regMap.allocInputReg(i, 0);
6897   m_regMap.allocOutputRegs(i);
6898   PhysReg rhsReg = getReg(i.inputs[0]->location);
6899   PhysReg outReg = getReg(i.outStack->location);
6900   emitTvSet(i, rhsReg, rhsType.outerType(), *sprop);
6901   ASSERT(i.inputs[2]->location == i.outStack->location);
6902   emitMovRegReg(rhsReg, outReg);
6903 }
6904
6905 void TranslatorX64::analyzeSetG(Tracelet& t, NormalizedInstruction& i) {
6906   ASSERT(i.inputs.size() == 2);
6907   i.m_txFlags = supportedPlan(
6908     i.inputs[1]->isString() &&
6909     !i.inputs[0]->isVariant()
6910   );
6911   if (i.m_txFlags) i.manuallyAllocInputs = true;
6912 }
6913
6914 void TranslatorX64::translateSetG(const Tracelet& t,
6915                                   const NormalizedInstruction& i) {
6916   ASSERT(i.outStack && !i.outLocal);
6917   ASSERT(i.inputs.size() == 2);
6918   ASSERT(i.inputs[1]->isString());
6919   ASSERT(i.inputs[1]->location == i.outStack->location);
6920
6921   const DataType type = i.inputs[0]->rtt.outerType();
6922
6923   /*
6924    * Grab the global from the target cache; rax will get a pointer to
6925    * the TypedValue in the globals array, maybe newly created as a
6926    * null.
6927    */
6928   emitGetGlobal(i, 1, true /* allowCreate */);
6929   ScratchReg raxSaver(m_regMap, rax);
6930   m_regMap.allocInputReg(i, 0);
6931   PhysReg src = getReg(i.inputs[0]->location);
6932   m_regMap.allocOutputRegs(i);
6933   PhysReg out = getReg(i.outStack->location);
6934
6935   emitTvSet(i, src, type, rax);
6936   emitMovRegReg(src, out);
6937 }
6938
6939 static TypedValue* lookupGlobal(StringData* name) {
6940   VarEnv* ve = g_vmContext->m_globalVarEnv;
6941   TypedValue* r = ve->lookup(name);
6942   // If the global didn't exist, we need to leave name un-decref'd for
6943   // the caller to raise warnings.
6944   if (r) {
6945     LITSTR_DECREF(name);
6946     if (r->m_type == KindOfRef) r = r->m_data.pref->tv();
6947   }
6948   return r;
6949 }
6950
6951 static TypedValue* lookupAddGlobal(StringData* name) {
6952   VarEnv* ve = g_vmContext->m_globalVarEnv;
6953   TypedValue* r = ve->lookupAdd(name);
6954   if (r->m_type == KindOfRef) r = r->m_data.pref->tv();
6955   LITSTR_DECREF(name);
6956   return r;
6957 }
6958
6959 /*
6960  * Look up a global in the TargetCache with the name
6961  * i.inputs[nameIdx].  If `allowCreate' is true, also creates it.  If
6962  * we don't create the global, the input name is not decref'd yet.
6963  */
6964 void
6965 TranslatorX64::emitGetGlobal(const NormalizedInstruction& i, int nameIdx,
6966     bool allowCreate) {
6967   using namespace TargetCache;
6968   ASSERT(i.inputs.size() > size_t(nameIdx));
6969   ASSERT(i.inputs[nameIdx]->isString());
6970
6971   const StringData *maybeName = i.inputs[nameIdx]->rtt.valueString();
6972   if (!maybeName) {
6973     m_regMap.allocInputReg(i, nameIdx, argNumToRegName[0]);
6974     // Always do a lookup when there's no statically-known name.
6975     // There's not much we can really cache here right now anyway.
6976     EMIT_CALL(a, allowCreate ? lookupAddGlobal : lookupGlobal,
6977                   V(i.inputs[nameIdx]->location));
6978     recordCall(i);
6979     return;
6980   }
6981
6982   CacheHandle ch = GlobalCache::alloc(maybeName);
6983   if (false) { // typecheck
6984     StringData* UNUSED key = NULL;
6985     TypedValue* UNUSED glob = GlobalCache::lookup(ch, key);
6986     TypedValue* UNUSED glob2 = GlobalCache::lookupCreate(ch, key);
6987   }
6988   SKTRACE(1, i.source, "ch %d\n", ch);
6989   EMIT_CALL(a, allowCreate ? GlobalCache::lookupCreate
6990                            : GlobalCache::lookup,
6991             IMM(ch),
6992             IMM((uint64_t)maybeName));
6993   recordCall(i);
6994 }
6995
6996 static bool
6997 isSupportedInstrCGetG(const NormalizedInstruction& i) {
6998   ASSERT(i.inputs.size() == 1);
6999   return (i.inputs[0]->rtt.isString());
7000 }
7001
7002 void
7003 TranslatorX64::analyzeCGetG(Tracelet& t, NormalizedInstruction& i) {
7004   i.m_txFlags = simplePlan(isSupportedInstrCGetG(i));
7005   if (i.m_txFlags) i.manuallyAllocInputs = true;
7006 }
7007
7008 void
7009 TranslatorX64::translateCGetG(const Tracelet& t,
7010                               const NormalizedInstruction& i) {
7011   ASSERT(i.outStack && !i.outLocal);
7012   ASSERT(i.inputs.size() == 1);
7013   ASSERT(i.inputs[0]->isString());
7014
7015   emitGetGlobal(i, 0, false /* allowCreate */);
7016   ScratchReg raxHolder(m_regMap, rax);
7017
7018   // If non-null, rax now points to the in-memory location of the
7019   // object of unknown type. lookup() has already decref'd the name.
7020   a.  test_reg64_reg64(rax, rax);
7021   DiamondReturn astubsRet;
7022   {
7023     UnlikelyIfBlock<CC_Z> ifNotRax(a, astubs, &astubsRet);
7024     if (!i.inputs[0]->rtt.valueString()) {
7025       m_regMap.allocInputReg(i, 0);
7026       PhysReg reg = getReg(i.inputs[0]->location);
7027       emitDecRef(astubs, i, reg, BitwiseKindOfString);
7028     }
7029     // TODO: if (MoreWarnings) raise a undefined variable warning.
7030     // (Note: when changing this remember to change the Simple flag to
7031     // Supported in analyze.)
7032     emitStoreNull(astubs, vstackOffset(i, 0), rVmSp);
7033     m_regMap.invalidate(i.outStack->location);
7034   }
7035
7036   emitCopyToStack(a, i, rax, 0);
7037   emitIncRefGeneric(rax, 0);
7038   m_regMap.invalidate(i.outStack->location);
7039 }
7040
7041 void TranslatorX64::analyzeFPassL(Tracelet& t,
7042                                   NormalizedInstruction& ni) {
7043   if (ni.preppedByRef) {
7044     analyzeVGetL(t, ni);
7045   } else {
7046     analyzeCGetL(t, ni);
7047   }
7048 }
7049
7050 void TranslatorX64::translateFPassL(const Tracelet& t,
7051                                     const NormalizedInstruction& ni) {
7052   if (ni.preppedByRef) {
7053     translateVGetL(t, ni);
7054   } else {
7055     translateCGetL(t, ni);
7056   }
7057 }
7058
7059 void TranslatorX64::analyzeFPassS(Tracelet& t,
7060                                   NormalizedInstruction& ni) {
7061   if (ni.preppedByRef) {
7062     // We need a VGetS translation.
7063     ni.m_txFlags = Interp;
7064   } else {
7065     analyzeCGetS(t, ni);
7066   }
7067 }
7068
7069 void TranslatorX64::translateFPassS(const Tracelet& t,
7070                                     const NormalizedInstruction& ni) {
7071   if (ni.preppedByRef) {
7072     ASSERT(false);
7073   } else {
7074     translateCGetS(t, ni);
7075   }
7076 }
7077
7078 void TranslatorX64::analyzeFPassG(Tracelet& t,
7079                                   NormalizedInstruction& ni) {
7080   if (ni.preppedByRef) {
7081     analyzeVGetG(t, ni);
7082   } else {
7083     analyzeCGetG(t, ni);
7084   }
7085 }
7086
7087 void TranslatorX64::translateFPassG(const Tracelet& t,
7088                                     const NormalizedInstruction& ni) {
7089   if (ni.preppedByRef) {
7090     translateVGetG(t, ni);
7091   } else {
7092     translateCGetG(t, ni);
7093   }
7094 }
7095
7096 void TranslatorX64::analyzeCheckTypeOp(Tracelet& t,
7097                                        NormalizedInstruction& ni) {
7098   ASSERT(ni.inputs.size() == 1);
7099
7100   if (ni.op() == OpIsObjectL || ni.op() == OpIsObjectC) {
7101     // is_object is weird because it's supposed to return false for
7102     // things where ObjectData::isResource() is true.  For now we only
7103     // translate when it is not an object.
7104     if (ni.inputs[0]->valueType() == KindOfObject) {
7105       ni.m_txFlags = Interp;
7106       return;
7107     }
7108   }
7109
7110   if (ni.inputs[0]->isLocal()) {
7111     ni.manuallyAllocInputs = true;
7112     if (ni.op() != OpIssetL && ni.inputs[0]->rtt.isUninit()) {
7113       ni.m_txFlags = Supported;
7114     } else {
7115       ni.m_txFlags = Native;
7116     }
7117     return;
7118   }
7119
7120   ni.m_txFlags = planHingesOnRefcounting(ni.inputs[0]->valueType());
7121 }
7122
7123 static bool checkTypeHelper(Opcode op, DataType dt) {
7124   switch (op) {
7125   case OpIssetL:    return !IS_NULL_TYPE(dt);
7126   case OpIsNullL:   case OpIsNullC:   return IS_NULL_TYPE(dt);
7127   case OpIsStringL: case OpIsStringC: return IS_STRING_TYPE(dt);
7128   case OpIsArrayL:  case OpIsArrayC:  return IS_ARRAY_TYPE(dt);
7129   case OpIsIntL:    case OpIsIntC:    return IS_INT_TYPE(dt);
7130   case OpIsBoolL:   case OpIsBoolC:   return IS_BOOL_TYPE(dt);
7131   case OpIsDoubleL: case OpIsDoubleC: return IS_DOUBLE_TYPE(dt);
7132
7133   case OpIsObjectL: case OpIsObjectC:
7134     // Note: this is because we refused to translate if it was
7135     // actually an object for now.  (We'd need to emit some kind of
7136     // call to ObjectData::isResource or something.)
7137     return 0;
7138   }
7139   ASSERT(false);
7140   NOT_REACHED();
7141 }
7142
7143 static void warnNullThis() { raise_notice(Strings::WARN_NULL_THIS); }
7144
7145 void
7146 TranslatorX64::translateCheckTypeOp(const Tracelet& t,
7147                                     const NormalizedInstruction& ni) {
7148   ASSERT(ni.inputs.size() == 1);
7149   ASSERT(ni.outStack);
7150
7151   bool isType;
7152
7153   if (ni.grouped && (ni.prev->op() == OpThis || ni.prev->op() == OpBareThis)) {
7154     ASSERT(ni.op() == OpIsNullC);
7155     if (ni.prev->op() == OpThis) {
7156       isType = false;
7157     } else {
7158       if (ni.changesPC) {
7159         fuseBranchSync(t, ni);
7160         a.   test_imm64_disp_reg64(1, AROFF(m_this), rVmFp);
7161         if (ni.prev->imm[0].u_OA) {
7162           UnlikelyIfBlock<CC_NZ> nullThis(a, astubs);
7163           EMIT_CALL(astubs, warnNullThis);
7164           recordReentrantStubCall(ni);
7165           nullThis.reconcileEarly();
7166           astubs.test_imm64_disp_reg64(1, AROFF(m_this), rVmFp);
7167         }
7168         fuseBranchAfterBool(t, ni, ni.invertCond ? CC_Z : CC_NZ);
7169       } else {
7170         m_regMap.allocOutputRegs(ni);
7171         PhysReg res = getReg(ni.outStack->location);
7172         a.   test_imm64_disp_reg64(1, AROFF(m_this), rVmFp);
7173         a.   setcc(ni.invertCond ? CC_Z : CC_NZ, res);
7174         if (ni.prev->imm[0].u_OA) {
7175           UnlikelyIfBlock<CC_NZ> nullThis(a, astubs);
7176           EMIT_CALL(astubs, warnNullThis);
7177           recordReentrantStubCall(ni);
7178         }
7179         a.   mov_reg8_reg64_unsigned(res, res);
7180       }
7181       return;
7182     }
7183   } else {
7184     const DataType dt    = ni.inputs[0]->valueType();
7185     const bool isLocalOp = ni.inputs[0]->isLocal();
7186
7187     isType = checkTypeHelper(ni.op(), dt) != ni.invertCond;
7188     if (!isLocalOp) {
7189       emitDecRef(ni, getReg(ni.inputs[0]->location), dt);
7190     }
7191     if (isLocalOp &&
7192         ni.op() != OpIssetL &&
7193         ni.inputs[0]->rtt.isUninit()) {
7194       const StringData* name = local_name(ni.inputs[0]->location);
7195       ASSERT(name->isStatic());
7196       EMIT_CALL(a, raiseUndefVariable, IMM((uintptr_t)name));
7197       recordReentrantCall(ni);
7198     }
7199   }
7200
7201   m_regMap.allocOutputRegs(ni);
7202   if (ni.changesPC) {
7203     // Don't bother driving an output reg. Just take the branch
7204     // where it leads.
7205     Stats::emitInc(a, Stats::Tx64_FusedTypeCheck);
7206     fuseBranchAfterStaticBool(t, ni, isType);
7207     return;
7208   }
7209   Stats::emitInc(a, Stats::Tx64_UnfusedTypeCheck);
7210   emitImmReg(a, isType, getReg(ni.outStack->location));
7211 }
7212
7213 static void badArray() {
7214   throw_bad_type_exception("array_key_exists expects an array or an object; "
7215                            "false returned.");
7216 }
7217
7218 static void badKey() {
7219   raise_warning("Array key should be either a string or an integer");
7220 }
7221
7222 static inline int64 ak_exist_string_helper(StringData* key, ArrayData* arr) {
7223   int64 n;
7224   if (key->isStrictlyInteger(n)) {
7225     return arr->exists(n);
7226   }
7227   return arr->exists(StrNR(key));
7228 }
7229
7230 static int64 ak_exist_string(StringData* key, ArrayData* arr) {
7231   int64 res = ak_exist_string_helper(key, arr);
7232   if (arr->decRefCount() == 0) {
7233     arr->release();
7234   }
7235   if (key->decRefCount() == 0) {
7236     key->release();
7237   }
7238   return res;
7239 }
7240
7241 static int64 ak_exist_int(int64 key, ArrayData* arr) {
7242   bool res = arr->exists(key);
7243   if (arr->decRefCount() == 0) {
7244     arr->release();
7245   }
7246   return res;
7247 }
7248
7249 static int64 ak_exist_string_obj(StringData* key, ObjectData* obj) {
7250   CArrRef arr = obj->o_toArray();
7251   int64 res = ak_exist_string_helper(key, arr.get());
7252   if (obj->decRefCount() == 0) {
7253     obj->release();
7254   }
7255   if (key->decRefCount() == 0) {
7256     key->release();
7257   }
7258   return res;
7259 }
7260
7261 static int64 ak_exist_int_obj(int64 key, ObjectData* obj) {
7262   CArrRef arr = obj->o_toArray();
7263   bool res = arr.get()->exists(key);
7264   if (obj->decRefCount() == 0) {
7265     obj->release();
7266   }
7267   return res;
7268 }
7269
7270 void
7271 TranslatorX64::analyzeAKExists(Tracelet& t, NormalizedInstruction& i) {
7272   const int keyIx = 1;
7273   const int arrIx = 0;
7274
7275   const DataType dta = i.inputs[arrIx]->valueType();
7276   const DataType dtk = i.inputs[keyIx]->valueType();
7277
7278   bool reentrant = (dta != KindOfArray && dta != KindOfObject) ||
7279     (!IS_STRING_TYPE(dtk) && dtk != KindOfInt64 && dtk != KindOfNull);
7280
7281   i.m_txFlags = reentrant ? Supported : Simple;
7282   i.manuallyAllocInputs = true;
7283 }
7284
7285 void
7286 TranslatorX64::translateAKExists(const Tracelet& t,
7287                                  const NormalizedInstruction& ni) {
7288   ASSERT(ni.inputs.size() == 2);
7289   ASSERT(ni.outStack);
7290
7291   const int keyIx = 1;
7292   const int arrIx = 0;
7293
7294   const DataType dta = ni.inputs[arrIx]->valueType();
7295   const DataType dtk = ni.inputs[keyIx]->valueType();
7296   TCA string_func = (TCA)ak_exist_string;
7297   TCA int_func = (TCA)ak_exist_int;
7298
7299   int result = -1;
7300   int args[2];
7301   args[keyIx] = 0;
7302   args[arrIx] = 1;
7303   switch (dta) {
7304     case KindOfObject:
7305       string_func = (TCA)ak_exist_string_obj;
7306       int_func = (TCA)ak_exist_int_obj;
7307     case KindOfArray:
7308       switch (dtk) {
7309         case BitwiseKindOfString:
7310         case KindOfStaticString:
7311         case KindOfInt64: {
7312           allocInputsForCall(ni, args);
7313           PhysReg rk = getReg(ni.inputs[keyIx]->location);
7314           PhysReg ra = getReg(ni.inputs[arrIx]->location);
7315           m_regMap.scrubStackEntries(ni.outStack->location.offset);
7316           EMIT_CALL(a, dtk == KindOfInt64 ? int_func : string_func,
7317                     R(rk), R(ra));
7318           recordCall(ni);
7319           break;
7320         }
7321         case KindOfNull:
7322           if (dta == KindOfArray) {
7323             args[keyIx] = ArgDontAllocate;
7324             allocInputsForCall(ni, args);
7325             PhysReg ra = getReg(ni.inputs[arrIx]->location);
7326             m_regMap.scrubStackEntries(ni.outStack->location.offset);
7327             EMIT_CALL(a, string_func,
7328                       IMM((uint64_t)empty_string.get()), R(ra));
7329             recordCall(ni);
7330           } else {
7331             result = ni.invertCond;
7332           }
7333           break;
7334         default:
7335           EMIT_CALL(a, badKey);
7336           recordReentrantCall(ni);
7337           result = ni.invertCond;
7338           break;
7339       }
7340       break;
7341     default:
7342       EMIT_CALL(a, badArray);
7343       recordReentrantCall(ni);
7344       result = ni.invertCond;
7345       break;
7346   }
7347
7348   if (result >= 0) {
7349     if (ni.changesPC) {
7350       fuseBranchAfterStaticBool(t, ni, result);
7351       return;
7352     } else {
7353       m_regMap.allocOutputRegs(ni);
7354       emitImmReg(a, result, getReg(ni.outStack->location));
7355     }
7356   } else {
7357     ScratchReg res(m_regMap, rax);
7358     if (ni.changesPC) {
7359       fuseBranchSync(t, ni);
7360       a.    test_reg64_reg64(*res, *res);
7361       fuseBranchAfterBool(t, ni, ni.invertCond ? CC_Z : CC_NZ);
7362     } else {
7363       if (ni.invertCond) {
7364         a.  xor_imm32_reg64(1, *res);
7365       }
7366       m_regMap.bindScratch(res, ni.outStack->location, KindOfBoolean,
7367                            RegInfo::DIRTY);
7368     }
7369   }
7370 }
7371
7372 void
7373 TranslatorX64::analyzeSetOpL(Tracelet& t, NormalizedInstruction& i) {
7374   ASSERT(i.inputs.size() == 2);
7375   const SetOpOp subOp = SetOpOp(i.imm[1].u_OA);
7376   Opcode arithOp = setOpOpToOpcodeOp(subOp);
7377   i.m_txFlags = nativePlan(i.inputs[0]->isInt() &&
7378                            i.inputs[1]->isInt() &&
7379                            (arithOp == OpAdd || arithOp == OpSub ||
7380                             arithOp == OpMul ||
7381                             arithOp == OpBitAnd || arithOp == OpBitOr ||
7382                             arithOp == OpBitXor));
7383 }
7384
7385 void
7386 TranslatorX64::translateSetOpL(const Tracelet& t,
7387                                const NormalizedInstruction& i) {
7388   const vector<DynLocation*>& inputs  = i.inputs;
7389   ASSERT(inputs.size() >= 2);
7390   ASSERT(i.outStack && i.outLocal);
7391   const int valIdx   = 0;
7392   const int localIdx = 1;
7393   ASSERT(inputs[localIdx]->isLocal());
7394   ASSERT(inputs[valIdx]->isStack());
7395   ASSERT(inputs[valIdx]->outerType() != KindOfRef);
7396
7397   const SetOpOp subOp = SetOpOp(i.imm[1].u_OA);
7398   Opcode arithOp = setOpOpToOpcodeOp(subOp);
7399   m_regMap.allocOutputRegs(i);
7400   binaryArithLocal(i, arithOp, *inputs[valIdx], *inputs[localIdx],
7401                    *i.outStack);
7402 }
7403
7404 void
7405 TranslatorX64::analyzeIncDecL(Tracelet& t, NormalizedInstruction& i) {
7406   i.m_txFlags = nativePlan(i.inputs[0]->isInt());
7407 }
7408
7409 void
7410 TranslatorX64::translateIncDecL(const Tracelet& t,
7411                                 const NormalizedInstruction& i) {
7412   const vector<DynLocation*>& inputs = i.inputs;
7413   ASSERT(inputs.size() == 1);
7414   ASSERT(i.outLocal);
7415   ASSERT(inputs[0]->isLocal());
7416   const IncDecOp oplet = IncDecOp(i.imm[1].u_OA);
7417   ASSERT(oplet == PreInc || oplet == PostInc || oplet == PreDec ||
7418          oplet == PostDec);
7419   ASSERT(inputs[0]->isInt() && (!i.outStack || i.outStack->isInt()));
7420   bool post = (oplet == PostInc || oplet == PostDec);
7421   bool pre  = !post;
7422   bool inc  = (oplet == PostInc || oplet == PreInc);
7423
7424   m_regMap.allocOutputRegs(i);
7425   PhysReg localVal = getReg(inputs[0]->location);
7426   if (i.outStack && post) { // $a++, $a--
7427     PhysReg output   = getReg(i.outStack->location);
7428     emitMovRegReg(localVal, output);
7429   }
7430   if (inc) {
7431     a.  add_imm32_reg64(1, localVal);
7432   } else {
7433     a.  sub_imm32_reg64(1, localVal);
7434   }
7435   if (i.outStack && pre) { // --$a, ++$a
7436     PhysReg output   = getReg(i.outStack->location);
7437     emitMovRegReg(localVal, output);
7438   }
7439 }
7440
7441 void
7442 TranslatorX64::translateUnsetL(const Tracelet& t,
7443                                const NormalizedInstruction& i) {
7444   ASSERT(i.inputs.size() == 1);
7445   ASSERT(!i.outStack && i.outLocal);
7446   const int locIdx = 0;
7447   const DynLocation& localDl = *i.inputs[locIdx];
7448   ASSERT(localDl.isLocal());
7449
7450   // We have to mark the output register as dirty to ensure that
7451   // the type gets spilled at the tend of the tracelet
7452   m_regMap.allocOutputRegs(i);
7453
7454   DataType type = localDl.outerType();
7455   // decRef the value that currently lives in the local if appropriate.
7456   emitDecRef(i, getReg(localDl.location), type);
7457 }
7458
7459
7460 void
7461 TranslatorX64::analyzeReqLit(Tracelet& t, NormalizedInstruction& i,
7462                              InclOpFlags flags) {
7463   ASSERT(i.inputs.size() == 1);
7464   Eval::PhpFile* efile = g_vmContext->lookupIncludeRoot(
7465                                 (StringData*)i.inputs[0]->rtt.valueString(),
7466                                 flags, NULL);
7467   i.m_txFlags = supportedPlan(i.inputs[0]->isString() &&
7468                               i.inputs[0]->rtt.valueString() != NULL &&
7469                               efile &&
7470                               (RuntimeOption::RepoAuthoritative ||
7471                                RuntimeOption::ServerStatCache));
7472   if (efile && efile->unit()->getMainReturn()->m_type != KindOfUninit) {
7473     i.outStack->rtt = RuntimeType(efile->unit()->getMainReturn()->m_type);
7474   }
7475
7476   // We don't need the reference lookupIncludeRoot made for us.
7477   if (efile) efile->decRef();
7478   i.manuallyAllocInputs = true;
7479 }
7480
7481 void
7482 TranslatorX64::analyzeReqDoc(Tracelet& t, NormalizedInstruction& i) {
7483   analyzeReqLit(t, i, InclOpDocRoot);
7484 }
7485
7486 void
7487 TranslatorX64::analyzeReqMod(Tracelet& t, NormalizedInstruction& i) {
7488   analyzeReqLit(t, i, InclOpDocRoot | InclOpLocal);
7489 }
7490
7491 void
7492 TranslatorX64::analyzeReqSrc(Tracelet& t, NormalizedInstruction& i) {
7493   analyzeReqLit(t, i, InclOpRelative | InclOpLocal);
7494 }
7495
7496 void
7497 TranslatorX64::translateReqLit(const Tracelet& t,
7498                                const NormalizedInstruction& i,
7499                                InclOpFlags flags) {
7500   bool local = flags & InclOpLocal;
7501   StringData *s = const_cast<StringData*>(i.inputs[0]->rtt.valueString());
7502   HPHP::Eval::PhpFile* efile =
7503     g_vmContext->lookupIncludeRoot(s, flags, NULL);
7504   /*
7505    * lookupIncludeRoot increments the refcount for us. This reference is
7506    * going to be burned into the translation cache. We will remove it only
7507    * when the file changes (via invalidateFile), and we're sure that no
7508    * outstanding requests are using the old code (via the Treadmill
7509    * module).
7510    */
7511   TRACE(1, "lookupIncludeRoot: %s -> %p c %d\n", s->data(), efile,
7512         efile->getRef());
7513   /*
7514    * Remember that this tracelet (not just this instruction) now depends on the
7515    * contents of the required file.
7516    */
7517   m_srcDB.recordDependency(efile, t.m_sk);
7518   Unit *unit = efile->unit();
7519   Func *func = unit->getMain(local ? NULL : curClass());
7520
7521   const Offset after = nextSrcKey(t, i).offset();
7522   TRACE(1, "requireHelper: efile %p offset %d%s\n", efile, after,
7523            i.skipSync ? " [skipsync]" : "");
7524
7525   if (i.skipSync) {
7526     /*
7527      * getting here means there was nothing to do between
7528      * the previous req and this one. Any spill code we generate
7529      * here would be broken (because the rbx is wrong), so
7530      * verify that we don't generate anything...
7531      */
7532     TCA s DEBUG_ONLY = a.code.frontier;
7533     syncOutputs(0);
7534     ASSERT(s == a.code.frontier);
7535   } else {
7536     syncOutputs(i);
7537   }
7538   ReqLitStaticArgs* args = m_globalData.alloc<ReqLitStaticArgs>();
7539   emitImmReg(a, (uint64_t)args, argNumToRegName[0]);
7540   emitCall(a, (TCA)reqLitHelper, true);
7541
7542   args->m_efile = efile;
7543   args->m_pseudoMain = emitServiceReq(false, REQ_BIND_REQUIRE, 3,
7544                                       uint64_t(args),
7545                                       uint64_t(func), uint64_t(func->base()));
7546   args->m_pcOff = after;
7547   args->m_local = local;
7548
7549   if (i.breaksTracelet) {
7550     SrcKey fallThru(curFunc(), after);
7551     emitBindJmp(fallThru);
7552   } else {
7553     /*
7554      * When we get here, rVmSp points to the actual top of stack,
7555      * but the rest of this tracelet assumes that rVmSp is set to
7556      * the top of the stack at the beginning of the tracelet, so we
7557      * have to fix it up here.
7558      *
7559      */
7560     if (!i.outStack) {
7561       /* as a special case, if we're followed by a pop, and
7562          we return a non-refcounted type, and then followed
7563          by another require, we can avoid the add here and the sub
7564          in the following require
7565       */
7566     } else {
7567       int delta = i.stackOff + getStackDelta(i);
7568       if (delta != 0) {
7569         // i.stackOff is in negative Cells, not bytes.
7570         a.    add_imm64_reg64(cellsToBytes(delta), rVmSp);
7571       }
7572     }
7573   }
7574 }
7575
7576 void
7577 TranslatorX64::translateReqDoc(const Tracelet& t,
7578                                const NormalizedInstruction& i) {
7579   translateReqLit(t, i, InclOpDocRoot);
7580 }
7581
7582 void
7583 TranslatorX64::translateReqMod(const Tracelet& t,
7584                                const NormalizedInstruction& i) {
7585   translateReqLit(t, i, InclOpDocRoot | InclOpLocal);
7586 }
7587
7588 void
7589 TranslatorX64::translateReqSrc(const Tracelet& t,
7590                                const NormalizedInstruction& i) {
7591   translateReqLit(t, i, InclOpRelative | InclOpLocal);
7592 }
7593
7594 TCA
7595 TranslatorX64::emitNativeTrampoline(TCA helperAddr) {
7596   if (!atrampolines.code.canEmit(m_trampolineSize)) {
7597     // not enough space to emit a trampoline, so just return the
7598     // helper address and emitCall will the emit the right sequence
7599     // to call it indirectly
7600     TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
7601     ASSERT(false);
7602     return helperAddr;
7603   }
7604   uint32_t index = m_numNativeTrampolines++;
7605   TCA trampAddr = atrampolines.code.frontier;
7606   if (Stats::enabled()) {
7607     Stats::emitInc(atrampolines, &Stats::tl_helper_counters[0], index);
7608     char* name = Util::getNativeFunctionName(helperAddr);
7609     const size_t limit = 50;
7610     if (strlen(name) > limit) {
7611       name[limit] = '\0';
7612     }
7613     Stats::helperNames[index] = name;
7614   }
7615   atrampolines.mov_imm64_reg((int64_t)helperAddr, rScratch);
7616   atrampolines.jmp_reg(rScratch);
7617   atrampolines.ud2();
7618   trampolineMap[helperAddr] = trampAddr;
7619   if (m_trampolineSize == 0) {
7620     m_trampolineSize = atrampolines.code.frontier - trampAddr;
7621     ASSERT(m_trampolineSize >= kMinPerTrampolineSize);
7622   }
7623   recordBCInstr(OpNativeTrampoline, atrampolines, trampAddr);
7624   return trampAddr;
7625 }
7626
7627 TCA
7628 TranslatorX64::getNativeTrampoline(TCA helperAddr) {
7629   if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
7630     return helperAddr;
7631   }
7632   TCA trampAddr = (TCA)mapGet<PointerMap>(trampolineMap, helperAddr);
7633   if (trampAddr) {
7634     return trampAddr;
7635   }
7636   return emitNativeTrampoline(helperAddr);
7637 }
7638 void TranslatorX64::analyzeDefCls(Tracelet& t,
7639                                   NormalizedInstruction& i) {
7640   i.m_txFlags = Supported;
7641 }
7642
7643 static void defClsHelper(PreClass *preClass) {
7644   ASSERT(tl_regState == REGSTATE_DIRTY);
7645   tl_regState = REGSTATE_CLEAN;
7646   Unit::defClass(preClass);
7647
7648   /*
7649    * m_defClsHelper sync'd the registers for us already.  This means
7650    * if an exception propagates we want to leave things as
7651    * REGSTATE_CLEAN, since we're still in sync.  Only set it to dirty
7652    * if we are actually returning to run in the TC again.
7653    */
7654   tl_regState = REGSTATE_DIRTY;
7655 }
7656
7657 void TranslatorX64::translateDefCls(const Tracelet& t,
7658                                     const NormalizedInstruction& i) {
7659   int cid = i.imm[0].u_IVA;
7660   const Opcode* after = curUnit()->at(i.source.offset());
7661   PreClass* c = curFunc()->unit()->lookupPreClassId(cid);
7662
7663   ASSERT(m_defClsHelper);
7664
7665   /*
7666      compute the corrected stack ptr as a pseudo-param to m_defClsHelper
7667      which it will store in g_vmContext, in case of fatals, or __autoload
7668   */
7669   m_regMap.cleanReg(rax);
7670   m_regMap.smashReg(rax);
7671   ScratchReg offset(m_regMap, rax);
7672   a.   lea_reg64_disp_reg64(rVmSp, -cellsToBytes(i.stackOff), rax);
7673
7674   EMIT_CALL(a, m_defClsHelper, IMM((uint64)c), IMM((uint64)after));
7675 }
7676
7677 void TranslatorX64::analyzeDefFunc(Tracelet& t,
7678                                    NormalizedInstruction& i) {
7679   i.m_txFlags = Supported;
7680 }
7681
7682 void defFuncHelper(Func *f) {
7683   f->setCached();
7684 }
7685
7686 void TranslatorX64::translateDefFunc(const Tracelet& t,
7687                                      const NormalizedInstruction& i) {
7688   int fid = i.imm[0].u_IVA;
7689   Func* f = curFunc()->unit()->lookupFuncId(fid);
7690
7691   EMIT_CALL(a, defFuncHelper, IMM((uint64)f));
7692   recordReentrantCall(i);
7693 }
7694
7695 void
7696 TranslatorX64::analyzeFPushFunc(Tracelet& t, NormalizedInstruction& i) {
7697   ASSERT(i.inputs.size() >= 1);
7698   // The input might be an object implementing __invoke()
7699   i.m_txFlags = simplePlan(i.inputs[0]->isString());
7700 }
7701
7702 void
7703 TranslatorX64::translateFPushFunc(const Tracelet& t,
7704                                   const NormalizedInstruction& i) {
7705   using namespace TargetCache;
7706   CacheHandle ch = FuncCache::alloc();
7707   ASSERT(i.inputs.size() == 1);
7708   Location& inLoc = i.inputs[0]->location;
7709
7710   m_regMap.allocOutputRegs(i);
7711   m_regMap.scrubStackRange(i.stackOff - 1,
7712                            i.stackOff - 1 + kNumActRecCells);
7713   // Popped one cell, pushed an actrec
7714   int startOfActRec = int(sizeof(Cell)) - int(sizeof(ActRec));
7715   size_t funcOff = AROFF(m_func) + startOfActRec;
7716   size_t thisOff = AROFF(m_this) + startOfActRec;
7717   emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
7718   emitPushAR(i, NULL, sizeof(Cell) /* bytesPopped */);
7719   if (false) { // typecheck
7720     StringData sd("foo");
7721     const UNUSED Func* f = FuncCache::lookup(ch, &sd);
7722   }
7723   SKTRACE(1, i.source, "ch %d\n", ch);
7724   EMIT_CALL(a, FuncCache::lookup, IMM(ch), V(inLoc));
7725   recordCall(i);
7726   emitVStackStore(a, i, rax, funcOff, sz::qword);
7727 }
7728
7729 void
7730 TranslatorX64::analyzeFPushClsMethodD(Tracelet& t, NormalizedInstruction& i) {
7731   i.m_txFlags = supportedPlan(true);
7732 }
7733
7734 void
7735 TranslatorX64::translateFPushClsMethodD(const Tracelet& t,
7736                                         const NormalizedInstruction& i) {
7737   using namespace TargetCache;
7738   const StringData* meth = curUnit()->lookupLitstrId(i.imm[1].u_SA);
7739   const NamedEntityPair& np = curUnit()->lookupNamedEntityPairId(i.imm[2].u_SA);
7740   const StringData* cls = np.first;
7741   ASSERT(meth && meth->isStatic() &&
7742          cls && cls->isStatic());
7743   ASSERT(i.inputs.size() == 0);
7744
7745   const Class* baseClass = Unit::lookupClass(np.second);
7746   bool magicCall = false;
7747   const Func* func = lookupImmutableMethod(baseClass, meth, magicCall,
7748                                            true /* staticLookup */);
7749
7750   m_regMap.scrubStackRange(i.stackOff,
7751                            i.stackOff + kNumActRecCells);
7752
7753   int startOfActRec = -int(sizeof(ActRec));
7754   SKTRACE(2, i.source, "FPushClsMethodD %s :: %s\n",
7755           cls->data(), meth->data());
7756
7757   size_t clsOff  = AROFF(m_cls) + startOfActRec;
7758   if (func) {
7759     emitKnownClassCheck(i, cls, reg::noreg);
7760     Stats::emitInc(a, Stats::TgtCache_StaticMethodBypass);
7761     emitPushAR(i, func, 0 /*bytesPopped*/,
7762                false /* isCtor */, false /* clearThis */,
7763                magicCall ? uintptr_t(meth) | 1 : 0 /* varEnvInvName */);
7764
7765     setupActRecClsForStaticCall(i, func, baseClass, clsOff, false);
7766   } else {
7767     Stats::emitInc(a, Stats::TgtCache_StaticMethodHit);
7768     CacheHandle ch = StaticMethodCache::alloc(cls, meth, getContextName());
7769     ScratchReg rFunc(m_regMap);
7770     // Unconditionally set rCls; if we miss, the miss path will clean it up for
7771     // us. The fill path has already |'ed in the necessary 1.
7772     ScratchReg rCls(m_regMap);
7773     a.    load_reg64_disp_reg64(rVmTl,
7774                                 ch + offsetof(StaticMethodCache, m_cls),
7775                                 *rCls);
7776     emitVStackStore(a, i, *rCls, clsOff);
7777     TCA stubsSkipRet;
7778     a.    load_reg64_disp_reg64(rVmTl, ch, *rFunc);
7779     a.    test_reg64_reg64(*rFunc, *rFunc);
7780     {
7781       UnlikelyIfBlock<CC_Z> miss(a, astubs);
7782       if (false) { // typecheck
7783         const UNUSED Func* f = StaticMethodCache::lookup(ch, np.second,
7784                                                          cls, meth);
7785       }
7786       EMIT_CALL(astubs,
7787                  StaticMethodCache::lookup,
7788                  IMM(ch),
7789                  IMM(int64(np.second)),
7790                  IMM(int64(cls)),
7791                  IMM(int64(meth)));
7792       recordReentrantStubCall(i);
7793       emitMovRegReg(astubs, rax, *rFunc);
7794       // NULL return means our work is done; see also
7795       // translateFPushClsMethodF.
7796       miss.reconcileEarly();
7797       astubs.test_reg64_reg64(*rFunc, *rFunc);
7798       stubsSkipRet = astubs.code.frontier;
7799       astubs.jz(a.code.frontier); // 1f to be patched later
7800     }
7801
7802     {
7803       FreezeRegs ice(m_regMap);
7804       emitPushAR(i, NULL);
7805       size_t funcOff = AROFF(m_func) + startOfActRec;
7806       emitVStackStore(a, i, *rFunc, funcOff, sz::qword);
7807     }
7808     // 1:
7809     astubs.patchJcc(stubsSkipRet, a.code.frontier);
7810   }
7811 }
7812
7813 void
7814 TranslatorX64::analyzeFPushClsMethodF(Tracelet& t,
7815                                       NormalizedInstruction& i) {
7816   ASSERT(i.inputs[0]->valueType() == KindOfClass);
7817   i.m_txFlags = supportedPlan(
7818     i.inputs[1]->rtt.valueString() != NULL && // We know the method name
7819     i.inputs[0]->valueType() == KindOfClass &&
7820     i.inputs[0]->rtt.valueClass() != NULL // We know the class name
7821   );
7822 }
7823
7824 void
7825 TranslatorX64::translateFPushClsMethodF(const Tracelet& t,
7826                                         const NormalizedInstruction& i) {
7827   using namespace TargetCache;
7828   ASSERT(!curFunc()->isPseudoMain());
7829   ASSERT(curFunc()->cls() != NULL); // self:: and parent:: should only
7830                                     // appear in methods
7831   DynLocation* clsLoc = i.inputs[0];
7832   DynLocation* nameLoc = i.inputs[1];
7833   const StringData* name = nameLoc->rtt.valueString();
7834   ASSERT(name && name->isStatic());
7835
7836   // Even though we know the Class* at compile time, it's not
7837   // guaranteed to be the same between requests. The name, however, is
7838   // fixed, so we can use that.
7839   const Class* cls = clsLoc->rtt.valueClass();
7840   ASSERT(cls);
7841   bool magicCall = false;
7842   const Func* func = lookupImmutableMethod(cls, name, magicCall,
7843                                            true /* staticLookup */);
7844
7845   const int bytesPopped = 2 * sizeof(Cell); // [A C] popped
7846   const int startOfActRec = -int(sizeof(ActRec)) + bytesPopped;
7847   const Offset clsOff = startOfActRec + AROFF(m_cls);
7848
7849   UNUSED ActRec* fp = curFrame();
7850   ASSERT(!fp->hasThis() || fp->getThis()->instanceof(cls));
7851   if (func) {
7852     Stats::emitInc(a, Stats::TgtCache_StaticMethodFBypass);
7853     emitPushAR(i, func, bytesPopped,
7854                false /* isCtor */, false /* clearThis */,
7855                magicCall ? uintptr_t(name) | 1 : 0 /* varEnvInvName */);
7856
7857     setupActRecClsForStaticCall(i, func, cls, clsOff, true);
7858     m_regMap.scrubStackRange(i.stackOff - 2,
7859                              i.stackOff - 2 + kNumActRecCells);
7860   } else {
7861     const StringData* clsName = cls->name();
7862     CacheHandle ch = StaticMethodFCache::alloc(clsName, name, getContextName());
7863
7864     Stats::emitInc(a, Stats::TgtCache_StaticMethodFHit);
7865     TCA stubsSkipRet;
7866     ScratchReg rFunc(m_regMap);
7867     a.    load_reg64_disp_reg64(rVmTl, ch, *rFunc);
7868     a.    test_reg64_reg64(*rFunc, *rFunc);
7869     {
7870       UnlikelyIfBlock<CC_Z> miss(a, astubs);
7871       if (false) { // typecheck
7872         const UNUSED Func* f = StaticMethodFCache::lookup(ch, cls, name);
7873       }
7874       EMIT_CALL(astubs,
7875                  StaticMethodFCache::lookup,
7876                  IMM(ch),
7877                  V(clsLoc->location),
7878                  V(nameLoc->location));
7879       recordReentrantStubCall(i);
7880       emitMovRegReg(astubs, rax, *rFunc);
7881       // if rax == NULL, the helper interpreted the entire
7882       // instruction for us. Skip over the rest of the emitted code in
7883       // a, but we don't want to skip the branch spill/fill code.
7884       miss.reconcileEarly();
7885       astubs.test_reg64_reg64(*rFunc, *rFunc);
7886       stubsSkipRet = astubs.code.frontier;
7887       astubs.jz(a.code.frontier); // to be patched later
7888     }
7889
7890     const Offset funcOff = startOfActRec + AROFF(m_func);
7891     m_regMap.scrubStackRange(i.stackOff - 2,
7892                              i.stackOff - 2 + kNumActRecCells);
7893     {
7894       FreezeRegs ice(m_regMap);
7895       emitPushAR(i, NULL, bytesPopped);
7896       emitVStackStore(a, i, *rFunc, funcOff);
7897
7898       // We know we're in a method so we don't have to worry about
7899       // rVmFp->m_cls being NULL. We just have to figure out if it's a
7900       // Class* or $this, and whether or not we should pass along $this or
7901       // its class.
7902       PhysReg rCls = *rFunc; // no need to allocate another scratch
7903       a.    load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
7904       a.    test_imm32_reg64(1, rCls);
7905       {
7906         JccBlock<CC_NZ> ifThis(a);
7907         // rCls is holding $this. Should we pass it to the callee?
7908         a.  cmp_imm32_disp_reg32(1, ch + offsetof(StaticMethodFCache, m_static),
7909                                  rVmTl);
7910         {
7911           IfElseBlock<CC_NE> ifStatic(a);
7912           // We're calling a static method. Load (this->m_cls | 0x1) into rCls.
7913           a.load_reg64_disp_reg64(rCls, ObjectData::getVMClassOffset(), rCls);
7914           a.or_imm32_reg64(1, rCls);
7915
7916           ifStatic.Else();
7917           // We're calling an instance method. incRef $this.
7918           emitIncRef(rCls, KindOfObject);
7919         }
7920       }
7921       emitVStackStore(a, i, rCls, clsOff);
7922     }
7923
7924     astubs.patchJcc(stubsSkipRet, a.code.frontier);
7925     // No need to decref our inputs: one was KindOfClass and the other's
7926     // a static string.
7927   }
7928 }
7929
7930 void
7931 TranslatorX64::analyzeFPushObjMethodD(Tracelet& t,
7932                                       NormalizedInstruction &i) {
7933   DynLocation* objLoc = i.inputs[0];
7934   i.m_txFlags = supportedPlan(objLoc->valueType() == KindOfObject);
7935 }
7936
7937 void
7938 TranslatorX64::translateFPushObjMethodD(const Tracelet &t,
7939                                         const NormalizedInstruction& i) {
7940   ASSERT(i.inputs.size() == 1);
7941   Location& objLoc = i.inputs[0]->location;
7942   ASSERT(i.inputs[0]->valueType() == KindOfObject);
7943   int id = i.imm[1].u_IVA;
7944   const StringData* name = curUnit()->lookupLitstrId(id);
7945
7946   const Class* baseClass = i.inputs[0]->rtt.valueClass();
7947   bool magicCall = false;
7948   const Func* func = lookupImmutableMethod(baseClass, name, magicCall,
7949                                            false /* staticLookup */);
7950   m_regMap.scrubStackRange(i.stackOff - 1,
7951                            i.stackOff - 1 + kNumActRecCells);
7952   // Popped one cell, pushed an actrec
7953   int startOfActRec = int(sizeof(Cell)) - int(sizeof(ActRec));
7954   size_t thisOff = AROFF(m_this) + startOfActRec;
7955   size_t funcOff = AROFF(m_func) + startOfActRec;
7956   emitPushAR(i, func, sizeof(Cell) /*bytesPopped*/,
7957              false /* isCtor */, false /* clearThis */,
7958              func && magicCall ? uintptr_t(name) | 1 : 0 /* varEnvInvName */);
7959
7960   if (!func) {
7961     if (baseClass && !(baseClass->attrs() & AttrInterface)) {
7962       MethodLookup::LookupResult res =
7963         g_vmContext->lookupObjMethod(func, baseClass, name, false);
7964       if ((res == MethodLookup::MethodFoundWithThis ||
7965            res == MethodLookup::MethodFoundNoThis) &&
7966           !func->isAbstract()) {
7967         /*
7968          * if we found the func in baseClass, then either:
7969          *  - its private, and this is always going to be the
7970          *    called function, or
7971          *  - any derived class must have a func that matches in
7972          *    staticness, and is at least as accessible (and in
7973          *    particular, you can't override a public/protected
7974          *    method with a private method)
7975          */
7976         if (func->attrs() & AttrPrivate) {
7977           emitVStackStoreImm(a, i, uintptr_t(func), funcOff, sz::qword);
7978         } else {
7979           Offset methodsOff = Class::getMethodsOffset();
7980           Offset vecOff = methodsOff + Class::MethodMap::vecOff();
7981           ScratchReg scratch(m_regMap);
7982           // get the object's class into *scratch
7983           a.   load_reg64_disp_reg64(getReg(objLoc),
7984                                      ObjectData::getVMClassOffset(),
7985                                      *scratch);
7986           if (res == MethodLookup::MethodFoundNoThis) {
7987             emitDecRef(a, i, getReg(objLoc), KindOfObject);
7988             a.   lea_reg64_disp_reg64(*scratch, 1, getReg(objLoc));
7989           }
7990           emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
7991
7992           // get the method vector into *scratch
7993           a.   load_reg64_disp_reg64(*scratch, vecOff, *scratch);
7994           // get the func
7995           a.   load_reg64_disp_reg64(*scratch,
7996                                      func->methodSlot() * sizeof(Func*),
7997                                      *scratch);
7998           emitVStackStore(a, i, *scratch, funcOff, sz::qword);
7999           Stats::emitInc(a, Stats::TgtCache_MethodFast);
8000           return;
8001         }
8002       } else {
8003         func = NULL;
8004       }
8005     }
8006   }
8007
8008   if (func) {
8009     if (func->attrs() & AttrStatic) {
8010       if (func->attrs() & AttrPrivate) {
8011         emitVStackStoreImm(a, i, uintptr_t(curFunc()->cls()) | 1,
8012                            thisOff, sz::qword);
8013       } else {
8014         ScratchReg scratch(m_regMap);
8015         a.   load_reg64_disp_reg64(getReg(objLoc),
8016                                    ObjectData::getVMClassOffset(),
8017                                    *scratch);
8018         a.   or_imm32_reg64(1, *scratch);
8019         emitVStackStore(a, i, *scratch, thisOff, sz::qword);
8020       }
8021       emitDecRef(a, i, getReg(objLoc), KindOfObject);
8022     } else {
8023       emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
8024     }
8025     Stats::emitInc(a, Stats::TgtCache_MethodBypass);
8026   } else {
8027     emitVStackStore(a, i, getReg(objLoc), thisOff, sz::qword);
8028     using namespace TargetCache;
8029     CacheHandle ch = MethodCache::alloc();
8030     if (false) { // typecheck
8031       ActRec* ar = NULL;
8032       MethodCache::lookup(ch, ar, name);
8033     }
8034     int arOff = vstackOffset(i, startOfActRec);
8035     SKTRACE(1, i.source, "ch %d\n", ch);
8036     EMIT_CALL(a, MethodCache::lookup, IMM(ch),
8037                RPLUS(rVmSp, arOff), IMM(uint64_t(name)));
8038     recordReentrantCall(i);
8039   }
8040 }
8041
8042 static inline ALWAYS_INLINE Class* getKnownClass(Class** classCache,
8043                                                  const StringData* clsName) {
8044   Class* cls = *classCache;
8045   if (UNLIKELY(cls == NULL)) {
8046     // lookupKnownClass does its own VMRegAnchor'ing.
8047     cls = TargetCache::lookupKnownClass<false>(classCache, clsName, true);
8048     ASSERT(*classCache && *classCache == cls);
8049   }
8050   ASSERT(cls);
8051   return cls;
8052 }
8053
8054 static Instance*
8055 HOT_FUNC_VM
8056 newInstanceHelperNoCtor(Class** classCache, const StringData* clsName) {
8057   Class* cls = getKnownClass(classCache, clsName);
8058   Instance* ret = newInstance(cls);
8059   ret->incRefCount();
8060   return ret;
8061 }
8062
8063 Instance*
8064 HOT_FUNC_VM
8065 newInstanceHelper(Class* cls, int numArgs, ActRec* ar, ActRec* prevAr) {
8066   const Func* f = cls->getCtor();
8067   Instance* ret = NULL;
8068   if (UNLIKELY(!(f->attrs() & AttrPublic))) {
8069     VMRegAnchor _;
8070     UNUSED MethodLookup::LookupResult res =
8071       g_vmContext->lookupCtorMethod(f, cls, true /*raise*/);
8072     ASSERT(res == MethodLookup::MethodFoundWithThis);
8073   }
8074   // Don't start pushing the AR until newInstance returns; it may reenter.
8075   ret = newInstance(cls);
8076   f->validate();
8077   ar->m_func = f;
8078   ar->initNumArgs(numArgs, true /*fromCtor*/);
8079   // Count stack and this.
8080   ret->incRefCount();
8081   ret->incRefCount();
8082   ar->setThis(ret);
8083   ar->setVarEnv(NULL);
8084   arSetSfp(ar, prevAr);
8085   TRACE(2, "newInstanceHelper: AR %p: f %p, savedRbp %#lx, savedRip %#lx"
8086         " this %p\n",
8087         ar, ar->m_func, ar->m_savedRbp, ar->m_savedRip, ar->m_this);
8088   return ret;
8089 }
8090
8091 void TranslatorX64::translateFPushCtor(const Tracelet& t,
8092                                        const NormalizedInstruction& i) {
8093   int numArgs = i.imm[0].u_IVA;
8094   int arOff = vstackOffset(i, -int(sizeof(ActRec)));
8095   m_regMap.scrubStackRange(i.stackOff, i.stackOff + kNumActRecCells);
8096   EMIT_CALL(a, newInstanceHelper,
8097             V(i.inputs[0]->location),
8098             IMM(numArgs),
8099             RPLUS(rVmSp, arOff),
8100             R(rVmFp));
8101   recordReentrantCall(i);
8102
8103   m_regMap.bind(rax, i.outStack->location, KindOfObject, RegInfo::DIRTY);
8104 }
8105
8106 Instance*
8107 HOT_FUNC_VM
8108 newInstanceHelperCached(Class** classCache,
8109                         const StringData* clsName, int numArgs,
8110                         ActRec* ar, ActRec* prevAr) {
8111   Class* cls = getKnownClass(classCache, clsName);
8112   return newInstanceHelper(cls, numArgs, ar, prevAr);
8113 }
8114
8115 void TranslatorX64::translateFPushCtorD(const Tracelet& t,
8116                                         const NormalizedInstruction& i) {
8117   using namespace TargetCache;
8118   int numArgs = i.imm[0].u_IVA;
8119   const StringData* clsName = curUnit()->lookupLitstrId(i.imm[1].u_SA);
8120   CacheHandle classCh = allocKnownClass(clsName);
8121   ScratchReg scr(m_regMap);
8122   a.   lea_reg64_disp_reg64(rVmTl, classCh, *scr);
8123   // We first push the new object, then the actrec. Since we're going to
8124   // need to call out, and possibly reenter in the course of all this,
8125   // null out the object on the stack, in case we unwind before we're
8126   // ready.
8127   int arOff = vstackOffset(i, -int(sizeof(ActRec)) - cellsToBytes(1));
8128   m_regMap.scrubStackRange(i.stackOff, i.stackOff + kNumActRecCells + 1);
8129   if (i.noCtor) {
8130     EMIT_CALL(a, newInstanceHelperNoCtor,
8131               R(*scr),
8132               IMM(uintptr_t(clsName)));
8133   } else {
8134     EMIT_CALL(a, newInstanceHelperCached,
8135               R(*scr),
8136               IMM(uintptr_t(clsName)),
8137               IMM(numArgs),
8138               RPLUS(rVmSp, arOff),     // ActRec
8139               R(rVmFp));               // prevAR
8140   }
8141   recordReentrantCall(i);
8142   // The callee takes care of initializing the actRec, and returns the new
8143   // object.
8144   m_regMap.bind(rax, i.outStack->location, KindOfObject, RegInfo::DIRTY);
8145 }
8146
8147 static void fatalNullThis() { raise_error(Strings::FATAL_NULL_THIS); }
8148
8149 void
8150 TranslatorX64::emitThisCheck(const NormalizedInstruction& i,
8151                              PhysReg reg) {
8152   if (curFunc()->cls() == NULL) {  // Non-class
8153     a.test_reg64_reg64(reg, reg);
8154     a.jz(astubs.code.frontier); // jz if_null
8155   }
8156
8157   a.  test_imm32_reg64(1, reg);
8158   {
8159     UnlikelyIfBlock<CC_NZ> ifThisNull(a, astubs);
8160     // if_null:
8161     EMIT_CALL(astubs, fatalNullThis);
8162     recordReentrantStubCall(i);
8163   }
8164 }
8165
8166 void
8167 TranslatorX64::translateThis(const Tracelet &t,
8168                              const NormalizedInstruction &i) {
8169   if (!i.outStack) {
8170     ASSERT(i.next && i.next->grouped);
8171     return;
8172   }
8173
8174   ASSERT(!i.outLocal);
8175   ASSERT(curFunc()->isPseudoMain() || curFunc()->cls());
8176   m_regMap.allocOutputRegs(i);
8177   PhysReg out = getReg(i.outStack->location);
8178   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_this), out);
8179
8180   if (!i.guardedThis) {
8181     emitThisCheck(i, out);
8182   }
8183   emitIncRef(out, KindOfObject);
8184 }
8185
8186 void
8187 TranslatorX64::translateBareThis(const Tracelet &t,
8188                                 const NormalizedInstruction &i) {
8189   if (!i.outStack) {
8190     ASSERT(i.next && i.next->grouped);
8191     return;
8192   }
8193   ASSERT(!i.outLocal);
8194   ASSERT(curFunc()->cls());
8195   ScratchReg outScratch(m_regMap);
8196   PhysReg out = *outScratch;
8197   PhysReg base;
8198   int offset;
8199   locToRegDisp(i.outStack->location, &base, &offset);
8200   if (i.outStack->rtt.isVagueValue()) {
8201     m_regMap.scrubLoc(i.outStack->location);
8202   }
8203   a.   load_reg64_disp_reg64(rVmFp, AROFF(m_this), out);
8204   a.   test_imm32_reg64(1, out);
8205   DiamondReturn astubsRet;
8206   {
8207     UnlikelyIfBlock<CC_NZ> ifThisNull(a, astubs, &astubsRet);
8208     astubs. store_imm32_disp_reg(KindOfNull, TVOFF(m_type) + offset, base);
8209     if (i.imm[0].u_OA) {
8210       EMIT_CALL(astubs, warnNullThis);
8211       recordReentrantStubCall(i);
8212     }
8213     if (i.next && !i.outStack->rtt.isVagueValue()) {
8214       // To handle the case where we predict that
8215       // the bare this will have type Object.
8216       // Using the normal type prediction mechanism
8217       // would require writing the object to the stack
8218       // anyway.
8219       // This is currently dead, however - I couldnt
8220       // find a win.
8221       emitSideExit(astubs, i, true);
8222       astubsRet.kill();
8223     }
8224   }
8225   emitIncRef(out, KindOfObject);
8226   if (i.outStack->rtt.isVagueValue()) {
8227     a. store_imm32_disp_reg(KindOfObject, TVOFF(m_type) + offset, base);
8228     a. store_reg64_disp_reg64(out, TVOFF(m_data) + offset, base);
8229   } else {
8230     ASSERT(i.outStack->isObject());
8231     m_regMap.bindScratch(outScratch, i.outStack->location, KindOfObject,
8232                          RegInfo::DIRTY);
8233   }
8234 }
8235
8236 void
8237 TranslatorX64::translateCheckThis(const Tracelet& t,
8238                                   const NormalizedInstruction& i) {
8239   ASSERT(i.inputs.size() == 1 &&
8240          i.inputs[0]->location == Location(Location::This));
8241   if (i.guardedThis) return;
8242   emitThisCheck(i, getReg(i.inputs[0]->location));
8243 }
8244
8245 void
8246 TranslatorX64::translateInitThisLoc(const Tracelet& t,
8247                                     const NormalizedInstruction& i) {
8248   ASSERT(i.outLocal && !i.outStack);
8249   ASSERT(curFunc()->isPseudoMain() || curFunc()->cls());
8250
8251   PhysReg base;
8252   int offset;
8253   locToRegDisp(i.outLocal->location, &base, &offset);
8254   ASSERT(base == rVmFp);
8255
8256   ScratchReg thiz(m_regMap);
8257   a.load_reg64_disp_reg64(rVmFp, AROFF(m_this), *thiz);
8258   if (curFunc()->cls() == NULL) {
8259     // If we're in a pseudomain, m_this could be NULL
8260     a.test_reg64_reg64(*thiz, *thiz);
8261     a.jz(astubs.code.frontier); // jz if_null
8262   }
8263   // Ok, it's not NULL but it might be a Class which should be treated
8264   // equivalently
8265   a.test_imm32_reg64(1, *thiz);
8266   a.jnz(astubs.code.frontier); // jnz if_null
8267
8268   // We have a valid $this!
8269   a.store_imm32_disp_reg(KindOfObject, offset + TVOFF(m_type), base);
8270   a.store_reg64_disp_reg64(*thiz, offset + TVOFF(m_data), base);
8271   emitIncRef(*thiz, KindOfObject);
8272
8273   // if_null:
8274   emitStoreUninitNull(astubs, offset, base);
8275   astubs.jmp(a.code.frontier);
8276
8277   m_regMap.invalidate(i.outLocal->location);
8278 }
8279
8280 void
8281 TranslatorX64::analyzeFPushFuncD(Tracelet& t, NormalizedInstruction& i) {
8282   Id funcId = i.imm[1].u_SA;
8283   const NamedEntityPair nep = curUnit()->lookupNamedEntityPairId(funcId);
8284   const Func* func = Unit::lookupFunc(nep.second, nep.first);
8285   i.m_txFlags = supportedPlan(func != NULL);
8286 }
8287
8288 void
8289 TranslatorX64::translateFPushFuncD(const Tracelet& t,
8290                                    const NormalizedInstruction& i) {
8291   ASSERT(i.inputs.size() == 0);
8292   ASSERT(!i.outStack && !i.outLocal);
8293   Id funcId = i.imm[1].u_SA;
8294   const NamedEntityPair& nep = curUnit()->lookupNamedEntityPairId(funcId);
8295   const StringData* name = nep.first;
8296   const Func* func       = Unit::lookupFunc(nep.second, name);
8297
8298   // Translation is only supported if function lookup succeeds
8299   func->validate();
8300   if (Trace::enabled && !func) {
8301     TRACE(1, "Attempt to invoke undefined function %s\n", name->data());
8302   }
8303
8304   // Inform the register allocator that we just annihilated a range of
8305   // possibly-dirty stack entries.
8306   m_regMap.scrubStackRange(i.stackOff,
8307                            i.stackOff + kNumActRecCells);
8308
8309   size_t thisOff = AROFF(m_this) - sizeof(ActRec);
8310   bool funcCanChange = !func->isNameBindingImmutable(curUnit());
8311   emitVStackStoreImm(a, i, 0, thisOff, sz::qword, &m_regMap);
8312   emitPushAR(i, funcCanChange ? NULL : func, 0, false, false);
8313   if (funcCanChange) {
8314     // Look it up in a FuncCache.
8315     using namespace TargetCache;
8316     CacheHandle ch = allocFixedFunction(nep.second, false);
8317     size_t funcOff = AROFF(m_func) - sizeof(ActRec);
8318     size_t funcCacheOff = ch + offsetof(FixedFuncCache, m_func);
8319
8320     SKTRACE(1, i.source, "ch %d\n", ch);
8321
8322     Stats::emitInc(a, Stats::TgtCache_FuncDHit);
8323     ScratchReg scratch(m_regMap);
8324     a.load_reg64_disp_reg64(rVmTl, funcCacheOff, *scratch);
8325     a.test_reg64_reg64(*scratch, *scratch);
8326     {
8327       UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8328
8329       if (false) { // typecheck
8330         StringData sd("foo");
8331         FixedFuncCache::lookupFailed(&sd);
8332       }
8333
8334       EMIT_CALL(astubs, TCA(FixedFuncCache::lookupFailed),
8335                         IMM(uintptr_t(name)));
8336       recordReentrantStubCall(i);
8337       emitMovRegReg(astubs, rax, *scratch);
8338     }
8339     emitVStackStore(a, i, *scratch, funcOff, sz::qword);
8340   }
8341 }
8342
8343 void
8344 TranslatorX64::translateFPushContFunc(const Tracelet& t,
8345                                       const NormalizedInstruction& i) {
8346   ASSERT(curFrame()->hasThis());
8347   Class* genClass = curFrame()->getThis()->getVMClass();
8348   ASSERT(genClass == SystemLib::s_MethodContinuationClass ||
8349          genClass == SystemLib::s_FunctionContinuationClass);
8350   bool isMethod = genClass == SystemLib::s_MethodContinuationClass;
8351   size_t thisOff = AROFF(m_this) - sizeof(ActRec);
8352   size_t funcOff = AROFF(m_func) - sizeof(ActRec);
8353   m_regMap.scrubStackRange(i.stackOff,
8354                            i.stackOff + kNumActRecCells);
8355   emitPushAR(i, NULL, 0, false, false);
8356   ScratchReg rCont(m_regMap);
8357   ScratchReg rScratch(m_regMap);
8358   a.  load_reg64_disp_reg64(rVmFp, AROFF(m_this), *rCont);
8359
8360   // Store the func
8361   a.load_reg64_disp_reg64(*rCont, CONTOFF(m_vmFunc), *rScratch);
8362   emitVStackStore(a, i, *rScratch, funcOff, sz::qword);
8363
8364   if (isMethod) {
8365     // Store m_this
8366     a.  load_reg64_disp_reg64(*rCont, CONTOFF(m_obj), *rScratch);
8367     a.  test_reg64_reg64(*rScratch, *rScratch);
8368     {
8369       IfElseBlock<CC_Z> ifThis(a);
8370       emitVStackStore(a, i, *rScratch, thisOff, sz::qword);
8371       emitIncRef(*rScratch, KindOfObject);
8372
8373       ifThis.Else();
8374       a.load_reg64_disp_reg64(*rCont, CONTOFF(m_vmCalledClass), *rScratch);
8375       // m_vmCalledClass already has its low bit set
8376       emitVStackStore(a, i, *rScratch, thisOff, sz::qword);
8377     }
8378   } else {
8379     emitVStackStoreImm(a, i, 0, thisOff, sz::qword);
8380   }
8381 }
8382
8383 const Func*
8384 TranslatorX64::findCuf(const NormalizedInstruction& ni,
8385                        Class*& cls, StringData*& invName, bool& forward) {
8386   forward = (ni.op() == OpFPushCufF);
8387   cls = NULL;
8388   invName = NULL;
8389
8390   DynLocation* callable = ni.inputs[ni.op() == OpFPushCufSafe ? 1 : 0];
8391
8392   const StringData* str =
8393     callable->isString() ? callable->rtt.valueString() : NULL;
8394   const ArrayData* arr =
8395     callable->isArray() ? callable->rtt.valueArray() : NULL;
8396
8397   StringData* sclass = NULL;
8398   StringData* sname = NULL;
8399   if (str) {
8400     Func* f = HPHP::VM::Unit::lookupFunc(str);
8401     if (f) return f;
8402     String name(const_cast<StringData*>(str));
8403     int pos = name.find("::");
8404     if (pos <= 0 || pos + 2 >= name.size() ||
8405         name.find("::", pos + 2) != String::npos) {
8406       return NULL;
8407     }
8408     sclass = StringData::GetStaticString(name.substr(0, pos).get());
8409     sname = StringData::GetStaticString(name.substr(pos + 2).get());
8410   } else if (arr) {
8411     if (arr->size() != 2) return NULL;
8412     CVarRef e0 = arr->get(0LL, false);
8413     CVarRef e1 = arr->get(1LL, false);
8414     if (!e0.isString() || !e1.isString()) return NULL;
8415     sclass = e0.getStringData();
8416     sname = e1.getStringData();
8417     String name(sname);
8418     if (name.find("::") != String::npos) return NULL;
8419   } else {
8420     return NULL;
8421   }
8422
8423   Class* ctx = curFunc()->cls();
8424
8425   if (sclass->isame(s_self.get())) {
8426     if (!ctx) return NULL;
8427     cls = ctx;
8428     forward = true;
8429   } else if (sclass->isame(s_parent.get())) {
8430     if (!ctx || !ctx->parent()) return NULL;
8431     cls = ctx->parent();
8432     forward = true;
8433   } else if (sclass->isame(s_static.get())) {
8434     return NULL;
8435   } else {
8436     cls = VM::Unit::lookupClass(sclass);
8437     if (!cls) return NULL;
8438   }
8439
8440   bool magicCall = false;
8441   const Func* f = lookupImmutableMethod(cls, sname, magicCall, true);
8442   if (!f || (forward && !ctx->classof(f->cls()))) {
8443     /*
8444      * To preserve the invariant that the lsb class
8445      * is an instance of the context class, we require
8446      * that f's class is an instance of the context class.
8447      * This is conservative, but without it, we would need
8448      * a runtime check to decide whether or not to forward
8449      * the lsb class
8450      */
8451     return NULL;
8452   }
8453   if (magicCall) invName = sname;
8454   return f;
8455 }
8456
8457 void
8458 TranslatorX64::analyzeFPushCufOp(Tracelet& t,
8459                                  NormalizedInstruction& ni) {
8460   Class* cls = NULL;
8461   StringData* invName = NULL;
8462   bool forward = false;
8463   const Func* func = findCuf(ni, cls, invName, forward);
8464   ni.m_txFlags = supportedPlan(func != NULL);
8465   ni.manuallyAllocInputs = true;
8466 }
8467
8468 void
8469 TranslatorX64::setupActRecClsForStaticCall(const NormalizedInstruction &i,
8470                                            const Func* func, const Class* cls,
8471                                            size_t clsOff, bool forward) {
8472   if (forward) {
8473     ScratchReg rClsScratch(m_regMap);
8474     PhysReg rCls = *rClsScratch;
8475     a.    load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
8476     if (!(curFunc()->attrs() & AttrStatic)) {
8477       ASSERT(curFunc()->cls() &&
8478              curFunc()->cls()->classof(cls));
8479       /* the context is non-static, so we have to deal
8480          with passing in $this or getClass($this) */
8481       a.    test_imm32_reg64(1, rCls);
8482       {
8483         JccBlock<CC_NZ> ifThis(a);
8484         // rCls is holding a real $this.
8485         if (func->attrs() & AttrStatic) {
8486           // but we're a static method, so pass getClass($this)|1
8487           a.load_reg64_disp_reg64(rCls, ObjectData::getVMClassOffset(), rCls);
8488           a.or_imm32_reg64(1, rCls);
8489         } else {
8490           // We should pass $this to the callee
8491           emitIncRef(rCls, KindOfObject);
8492         }
8493       }
8494     }
8495     emitVStackStore(a, i, rCls, clsOff);
8496   } else {
8497     if (!(func->attrs() & AttrStatic) &&
8498         !(curFunc()->attrs() & AttrStatic) &&
8499         curFunc()->cls() &&
8500         curFunc()->cls()->classof(cls)) {
8501       /* might be a non-static call */
8502       ScratchReg rClsScratch(m_regMap);
8503       PhysReg rCls = *rClsScratch;
8504       a.    load_reg64_disp_reg64(rVmFp, AROFF(m_cls), rCls);
8505       a.    test_imm32_reg64(1, rCls);
8506       {
8507         IfElseBlock<CC_NZ> ifThis(a);
8508         // rCls is holding $this. We should pass it to the callee
8509         emitIncRef(rCls, KindOfObject);
8510         emitVStackStore(a, i, rCls, clsOff);
8511         ifThis.Else();
8512         emitVStackStoreImm(a, i, uintptr_t(cls)|1, clsOff);
8513       }
8514     } else {
8515       emitVStackStoreImm(a, i, uintptr_t(cls)|1, clsOff);
8516     }
8517   }
8518 }
8519
8520 template <bool warn>
8521 int64 checkClass(TargetCache::CacheHandle ch, StringData* clsName,
8522                  ActRec *ar) {
8523   VMRegAnchor _;
8524   AutoloadHandler::s_instance->invokeHandler(clsName->data());
8525   if (*(Class**)TargetCache::handleToPtr(ch)) return true;
8526   ar->m_func = SystemLib::GetNullFunction();
8527   if (ar->hasThis()) {
8528     // cannot hit zero, we just inc'ed it
8529     ar->getThis()->decRefCount();
8530   }
8531   ar->setThis(0);
8532   return false;
8533 }
8534
8535 static void warnMissingFunc(StringData* name) {
8536   throw_invalid_argument("function: method '%s' not found", name->data());
8537 }
8538
8539 void
8540 TranslatorX64::translateFPushCufOp(const Tracelet& t,
8541                                    const NormalizedInstruction& ni) {
8542   Class* cls = NULL;
8543   StringData* invName = NULL;
8544   bool forward = false;
8545   const Func* func = findCuf(ni, cls, invName, forward);
8546   ASSERT(func);
8547
8548   int numPopped = ni.op() == OpFPushCufSafe ? 0 : 1;
8549   m_regMap.scrubStackRange(ni.stackOff - numPopped,
8550                            ni.stackOff - numPopped + kNumActRecCells);
8551
8552   int startOfActRec = int(numPopped * sizeof(Cell)) - int(sizeof(ActRec));
8553
8554   emitPushAR(ni, cls ? func : NULL, numPopped * sizeof(Cell),
8555              false /* isCtor */, false /* clearThis */,
8556              invName ? uintptr_t(invName) | 1 : 0 /* varEnvInvName */);
8557
8558   bool safe = (ni.op() == OpFPushCufSafe);
8559   size_t clsOff  = AROFF(m_cls) + startOfActRec;
8560   size_t funcOff  = AROFF(m_func) + startOfActRec;
8561   LazyScratchReg flag(m_regMap);
8562   if (safe) {
8563     flag.alloc();
8564     emitImmReg(a, true, *flag);
8565   }
8566   if (cls) {
8567     setupActRecClsForStaticCall(ni, func, cls, clsOff, forward);
8568     TargetCache::CacheHandle ch = cls->m_cachedOffset;
8569     if (!TargetCache::isPersistentHandle(ch)) {
8570       a.          cmp_imm32_disp_reg32(0, ch, rVmTl);
8571       {
8572         UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8573         if (false) {
8574           checkClass<false>(0, NULL, NULL);
8575           checkClass<true>(0, NULL, NULL);
8576         }
8577         EMIT_CALL(astubs, TCA(safe ? checkClass<false> : checkClass<true>),
8578                   IMM(ch), IMM(uintptr_t(cls->name())),
8579                   RPLUS(rVmSp, vstackOffset(ni, startOfActRec)));
8580         recordReentrantStubCall(ni, true);
8581         if (safe) {
8582           astubs.  mov_reg64_reg64(rax, *flag);
8583         }
8584       }
8585     }
8586   } else {
8587     TargetCache::CacheHandle ch = func->getCachedOffset();
8588     if (TargetCache::isPersistentHandle(ch)) {
8589       emitVStackStoreImm(a, ni, uintptr_t(func), funcOff, sz::qword);
8590       emitVStackStoreImm(a, ni, 0, clsOff, sz::qword, &m_regMap);
8591     } else {
8592       ScratchReg funcReg(m_regMap);
8593       a.          load_reg64_disp_reg64(rVmTl, ch, *funcReg);
8594       emitVStackStore(a, ni, *funcReg, funcOff);
8595       emitVStackStoreImm(a, ni, 0, clsOff, sz::qword, &m_regMap);
8596       a.          test_reg64_reg64(*funcReg, *funcReg);
8597       {
8598         UnlikelyIfBlock<CC_Z> ifNull(a, astubs);
8599         emitVStackStoreImm(astubs, ni,
8600                            uintptr_t(SystemLib::GetNullFunction()), funcOff);
8601         if (safe) {
8602           emitImmReg(astubs, false, *flag);
8603         } else {
8604           EMIT_CALL(astubs, TCA(warnMissingFunc), IMM(uintptr_t(func->name())));
8605           recordReentrantStubCall(ni, true);
8606         }
8607       }
8608     }
8609   }
8610
8611   if (safe) {
8612     DynLocation* outFlag = ni.outStack2;
8613     DynLocation* outDef = ni.outStack;
8614
8615     DynLocation* inDef = ni.inputs[0];
8616     if (!m_regMap.hasReg(inDef->location)) {
8617       m_regMap.scrubStackRange(ni.stackOff - 2, ni.stackOff - 2);
8618       PhysReg base1, base2;
8619       int disp1, disp2;
8620       locToRegDisp(inDef->location, &base1, &disp1);
8621       locToRegDisp(outDef->location, &base2, &disp2);
8622       ScratchReg tmp(m_regMap);
8623       a.   load_reg64_disp_reg64(base1, TVOFF(m_data) + disp1, *tmp);
8624       a.   store_reg64_disp_reg64(*tmp, TVOFF(m_data) + disp2, base2);
8625       if (!inDef->rtt.isVagueValue()) {
8626         a. store_imm32_disp_reg(inDef->outerType(),
8627                                 TVOFF(m_type) + disp2, base2);
8628       } else {
8629         a. load_reg64_disp_reg32(base1, TVOFF(m_type) + disp1, *tmp);
8630         a. store_reg32_disp_reg64(*tmp, TVOFF(m_type) + disp2, base2);
8631       }
8632     } else {
8633       PhysReg reg = m_regMap.getReg(inDef->location);
8634       m_regMap.scrubStackRange(ni.stackOff - 1, ni.stackOff - 1);
8635       m_regMap.bind(reg, outDef->location, inDef->rtt.outerType(),
8636                     RegInfo::DIRTY);
8637     }
8638     m_regMap.bindScratch(flag, outFlag->location, KindOfBoolean,
8639                          RegInfo::DIRTY);
8640   }
8641 }
8642
8643 void
8644 TranslatorX64::analyzeFPassCOp(Tracelet& t, NormalizedInstruction& i) {
8645   i.m_txFlags = nativePlan(!i.preppedByRef);
8646 }
8647
8648 void
8649 TranslatorX64::translateFPassCOp(const Tracelet& t,
8650                                  const NormalizedInstruction& i) {
8651   ASSERT(i.inputs.size() == 0);
8652   ASSERT(!i.outStack && !i.outLocal);
8653   ASSERT(!i.preppedByRef);
8654 }
8655
8656 void
8657 TranslatorX64::translateFPassR(const Tracelet& t,
8658                                const NormalizedInstruction& i) {
8659   /*
8660    * Like FPassC, FPassR is able to cheat on boxing if the current
8661    * parameter is pass by reference but we have a cell: the box would refer
8662    * to exactly one datum (the value currently on the stack).
8663    *
8664    * However, if the callee wants a cell and we have a variant we must
8665    * unbox; otherwise we might accidentally make callee changes to its
8666    * parameter globally visible.
8667    */
8668   ASSERT(!i.inputs[0]->rtt.isVagueValue());
8669
8670   ASSERT(i.inputs.size() == 1);
8671   const RuntimeType& inRtt = i.inputs[0]->rtt;
8672   if (inRtt.isVariant() && !i.preppedByRef) {
8673     emitUnboxTopOfStack(i);
8674   }
8675 }
8676
8677 void
8678 TranslatorX64::translateFCall(const Tracelet& t,
8679                               const NormalizedInstruction& i) {
8680   int numArgs = i.imm[0].u_IVA;
8681   const Opcode* atCall = i.pc();
8682   const Opcode* after = curUnit()->at(nextSrcKey(t, i).offset());
8683   const Func* srcFunc = curFunc();
8684
8685   // Sync all dirty registers and adjust rVmSp to point to the
8686   // top of stack at the beginning of the current instruction
8687   syncOutputs(i);
8688
8689   // We are "between" tracelets and don't use the register map
8690   // anymore.  (Note that the currently executing trace may actually
8691   // continue past the FCall, but it will have to resume with a fresh
8692   // register map.)
8693   RegSet scratchRegs = kScratchCrossTraceRegs;
8694   DumbScratchReg retIPReg(scratchRegs);
8695
8696   // Caller-specific fields: return addresses and the frame pointer
8697   // offset.
8698   ASSERT(sizeof(Cell) == 1 << 4);
8699   // Record the hardware return address. This will be patched up below; 2
8700   // is a magic number dependent on assembler implementation.
8701   MovImmPatcher retIP(a, (uint64_t)a.code.frontier, *retIPReg);
8702   a.    store_reg64_disp_reg64 (*retIPReg,
8703                                 cellsToBytes(numArgs) + AROFF(m_savedRip),
8704                                 rVmSp);
8705
8706   // The kooky offset here a) gets us to the current ActRec,
8707   // and b) accesses m_soff.
8708   int32 callOffsetInUnit = srcFunc->unit()->offsetOf(after - srcFunc->base());
8709   a.    store_imm32_disp_reg(callOffsetInUnit,
8710                              cellsToBytes(numArgs) + AROFF(m_soff),
8711                              rVmSp);
8712
8713   emitBindCall(t, i,
8714                curUnit()->offsetOf(atCall),
8715                curUnit()->offsetOf(after)); // ...
8716   retIP.patch(uint64(a.code.frontier));
8717
8718   if (i.breaksTracelet) {
8719     SrcKey fallThru(curFunc(), after);
8720     emitBindJmp(fallThru);
8721   } else {
8722     /*
8723      * Before returning, the callee restored rVmSp to point to the
8724      * current top of stack but the rest of this tracelet assumes that
8725      * rVmSp is set to the top of the stack at the beginning of the
8726      * tracelet, so we have to fix it up here.
8727      *
8728      * TODO: in the case of an inlined NativeImpl, we're essentially
8729      * emitting two adds to rVmSp in a row, which we can combine ...
8730      */
8731     int delta = i.stackOff + getStackDelta(i);
8732     if (delta != 0) {
8733       // i.stackOff is in negative Cells, not bytes.
8734       a.    add_imm64_reg64(cellsToBytes(delta), rVmSp);
8735     }
8736   }
8737 }
8738
8739 void TranslatorX64::analyzeFCallArray(Tracelet& t,
8740                                       NormalizedInstruction& i) {
8741   i.m_txFlags = Supported;
8742 }
8743
8744 void TranslatorX64::translateFCallArray(const Tracelet& t,
8745                                         const NormalizedInstruction& i) {
8746   const Offset after = nextSrcKey(t, i).offset();
8747
8748   syncOutputs(i);
8749
8750   FCallArrayArgs* args = m_globalData.alloc<FCallArrayArgs>();
8751   emitImmReg(a, (uint64_t)args, argNumToRegName[0]);
8752   emitCall(a, (TCA)fCallArrayHelper, true);
8753
8754   args->m_pcOff = i.offset();
8755   args->m_pcNext = after;
8756
8757   if (i.breaksTracelet) {
8758     SrcKey fallThru(curFunc(), after);
8759     emitBindJmp(fallThru);
8760   } else {
8761     /*
8762      * When we get here, rVmSp points to the actual top of stack,
8763      * but the rest of this tracelet assumes that rVmSp is set to
8764      * the top of the stack at the beginning of the tracelet, so we
8765      * have to fix it up here.
8766      *
8767      */
8768     ASSERT(i.outStack);
8769     int delta = i.stackOff + getStackDelta(i);
8770     if (delta != 0) {
8771       // i.stackOff is in negative Cells, not bytes.
8772       a.    add_imm64_reg64(cellsToBytes(delta), rVmSp);
8773     }
8774   }
8775 }
8776
8777 template <bool UseTC>
8778 static TypedValue*
8779 staticLocHelper(StringData* name, ActRec* fp, TypedValue* sp,
8780                 TargetCache::CacheHandle ch) {
8781   if (UseTC) {
8782     Stats::inc(Stats::TgtCache_StaticMiss);
8783     Stats::inc(Stats::TgtCache_StaticHit, -1);
8784   }
8785   HphpArray* map = get_static_locals(fp);
8786   TypedValue* retval = map->nvGet(name); // Local to num
8787   if (!retval) {
8788     // Read the initial value off the stack.
8789     TypedValue tv = *sp;
8790     map->nvSet(name, &tv, false);
8791     retval = map->nvGet(name);
8792   }
8793   ASSERT(retval);
8794   if (retval->m_type != KindOfRef) {
8795     tvBox(retval);
8796   }
8797   ASSERT(retval->m_type == KindOfRef);
8798   if (UseTC) {
8799     TypedValue** chTv = (TypedValue**)TargetCache::handleToPtr(ch);
8800     ASSERT(*chTv == NULL);
8801     return (*chTv = retval);
8802   } else {
8803     return retval;
8804   }
8805 }
8806
8807 void
8808 TranslatorX64::emitCallStaticLocHelper(X64Assembler& as,
8809                                        const NormalizedInstruction& i,
8810                                        ScratchReg& output,
8811                                        TargetCache::CacheHandle ch) {
8812   // The helper is going to read the value from memory, so record it.  We
8813   // could also pass type/value as parameters, but this is hopefully a
8814   // rare path.
8815   m_regMap.cleanLoc(i.inputs[0]->location);
8816   if (false) { // typecheck
8817     StringData* sd = NULL;
8818     ActRec* fp = NULL;
8819     TypedValue* sp = NULL;
8820     sp = staticLocHelper<true>(sd, fp, sp, ch);
8821     sp = staticLocHelper<false>(sd, fp, sp, ch);
8822   }
8823   const StringData* name = curFunc()->unit()->lookupLitstrId(i.imm[1].u_SA);
8824   ASSERT(name->isStatic());
8825   if (ch) {
8826     EMIT_CALL(as, (TCA)staticLocHelper<true>, IMM(uintptr_t(name)), R(rVmFp),
8827               RPLUS(rVmSp, -cellsToBytes(i.stackOff)), IMM(ch));
8828   } else {
8829     EMIT_CALL(as, (TCA)staticLocHelper<false>, IMM(uintptr_t(name)), R(rVmFp),
8830               RPLUS(rVmSp, -cellsToBytes(i.stackOff)));
8831   }
8832   recordCall(as, i);
8833   emitMovRegReg(as, rax, *output);
8834 }
8835
8836 void
8837 TranslatorX64::translateStaticLocInit(const Tracelet& t,
8838                                       const NormalizedInstruction& i) {
8839   using namespace TargetCache;
8840   ScratchReg output(m_regMap);
8841   const Location& outLoc = i.outLocal->location;
8842
8843   // Closures and generators from closures don't satisfy the "one
8844   // static per source location" rule that the inline fastpath
8845   // requires
8846   if (!curFunc()->isClosureBody() &&
8847       !curFunc()->isGeneratorFromClosure()) {
8848     // Miss path explicitly decrements.
8849     Stats::emitInc(a, Stats::TgtCache_StaticHit);
8850     Stats::emitInc(a, Stats::Tx64_StaticLocFast);
8851
8852     CacheHandle ch = allocStatic();
8853     ASSERT(ch);
8854     a.  load_reg64_disp_reg64(rVmTl, ch, *output);
8855     a.  test_reg64_reg64(*output, *output);
8856     {
8857       UnlikelyIfBlock<CC_Z> fooey(a, astubs);
8858       emitCallStaticLocHelper(astubs, i, output, ch);
8859     }
8860   } else {
8861     Stats::emitInc(a, Stats::Tx64_StaticLocSlow);
8862     emitCallStaticLocHelper(a, i, output, 0);
8863   }
8864   // Now we've got the outer variant in *output. Get the address of the
8865   // inner cell, since that's the enregistered representation of a variant.
8866   emitDeref(a, *output, *output);
8867   emitIncRef(*output, KindOfRef);
8868   // Turn output into the local we just initialized.
8869   m_regMap.bindScratch(output, outLoc, KindOfRef, RegInfo::DIRTY);
8870 }
8871
8872 void
8873 TranslatorX64::analyzeVerifyParamType(Tracelet& t, NormalizedInstruction& i) {
8874   int param = i.imm[0].u_IVA;
8875   const TypeConstraint& tc = curFunc()->params()[param].typeConstraint();
8876   if (!tc.isObject()) {
8877     // We are actually using the translation-time value of this local as a
8878     // prediction; if the param check failed at compile-time, we predict it
8879     // will continue failing.
8880     bool compileTimeCheck = tc.check(frame_local(curFrame(), param), curFunc());
8881     i.m_txFlags = nativePlan(compileTimeCheck);
8882     i.manuallyAllocInputs = true;
8883   } else {
8884     bool trace = i.inputs[0]->isObject() ||
8885                  (i.inputs[0]->isNull() && tc.nullable());
8886     i.m_txFlags = supportedPlan(trace);
8887   }
8888 }
8889
8890 static void
8891 VerifyParamTypeFail(int paramNum) {
8892   VMRegAnchor _;
8893   const ActRec* ar = curFrame();
8894   const Func* func = ar->m_func;
8895   const TypeConstraint& tc = func->params()[paramNum].typeConstraint();
8896   ASSERT(tc.isObject());
8897   TypedValue* tv = frame_local(ar, paramNum);
8898   TRACE(3, "%s Obj %s, needs type %s\n",
8899         __func__,
8900         tv->m_data.pobj->getVMClass()->name()->data(),
8901         tc.typeName()->data());
8902   tc.verifyFail(func, paramNum, tv);
8903 }
8904
8905 // check class hierarchy and fail if no match
8906 static uint64_t
8907 VerifyParamTypeSlow(const Class* cls, const Class* constraint) {
8908   Stats::inc(Stats::Tx64_VerifyParamTypeSlow);
8909   Stats::inc(Stats::Tx64_VerifyParamTypeFast, -1);
8910
8911   // ensure C++ returns a 0 or 1 with upper bits zeroed
8912   return static_cast<uint64_t>(constraint && cls->classof(constraint));
8913 }
8914
8915 void
8916 TranslatorX64::translateVerifyParamType(const Tracelet& t,
8917                                         const NormalizedInstruction& i) {
8918   Stats::emitInc(a, Stats::Tx64_VerifyParamTypeFast);
8919
8920   int param = i.imm[0].u_IVA;
8921   const TypeConstraint& tc = curFunc()->params()[param].typeConstraint();
8922   // not quite a nop. The guards should have verified that the m_type field
8923   // is compatible, but for objects we need to go one step further and
8924   // ensure that we're dealing with the right class.
8925   // NULL inputs only get traced when constraint is nullable.
8926   ASSERT(i.inputs.size() == 1);
8927   if (!i.inputs[0]->isObject()) return; // nop.
8928
8929   // Get the input's class from ObjectData->m_cls
8930   const Location& in = i.inputs[0]->location;
8931   PhysReg src = getReg(in);
8932   ScratchReg inCls(m_regMap);
8933   if (i.inputs[0]->rtt.isVariant()) {
8934     emitDeref(a, src, *inCls);
8935     a.  load_reg64_disp_reg64(*inCls, ObjectData::getVMClassOffset(), *inCls);
8936   } else {
8937     a.  load_reg64_disp_reg64(src, ObjectData::getVMClassOffset(), *inCls);
8938   }
8939
8940   ScratchReg cls(m_regMap);
8941   // Constraint may not be in the class-hierarchy of the method being traced,
8942   // look up the class handle and emit code to put the Class* into a reg.
8943   if (!tc.isSelf() && !tc.isParent()) {
8944     const StringData* clsName = tc.typeName();
8945     using namespace TargetCache;
8946     CacheHandle ch = allocKnownClass(clsName);
8947     a.  load_reg64_disp_reg64(rVmTl, ch, *cls);
8948   } else {
8949     const Class *constraint = NULL;
8950     if (tc.isSelf()) {
8951       tc.selfToClass(curFunc(), &constraint);
8952     } else if (tc.isParent()) {
8953       tc.parentToClass(curFunc(), &constraint);
8954     }
8955     emitImmReg(a, uintptr_t(constraint), *cls);
8956   }
8957   // Compare this class to the incoming object's class. If the typehint's class
8958   // is not present, can not be an instance: fail
8959   a.  cmp_reg64_reg64(*inCls, *cls);
8960
8961   {
8962     JccBlock<CC_Z> subclassCheck(a);
8963     // Call helper since ObjectData::instanceof is a member function
8964     if (false) {
8965       Class* cls = NULL;
8966       Class* constraint = NULL;
8967       VerifyParamTypeSlow(cls, constraint);
8968     }
8969     EMIT_CALL(a, VerifyParamTypeSlow, R(*inCls), R(*cls));
8970     // Pin the return value, check if a match or take slow path
8971     a.  test_reg64_reg64(rax, rax);
8972
8973     // Put the failure path into astubs
8974     {
8975       UnlikelyIfBlock<CC_Z> fail(a, astubs);
8976       if (false) { // typecheck
8977         VerifyParamTypeFail(param);
8978       }
8979       EMIT_CALL(astubs, VerifyParamTypeFail, IMM(param));
8980       recordReentrantStubCall(i);
8981     }
8982   }
8983 }
8984
8985 void
8986 TranslatorX64::analyzeInstanceOfD(Tracelet& t, NormalizedInstruction& i) {
8987   ASSERT(i.inputs.size() == 1);
8988   ASSERT(i.outStack && !i.outLocal);
8989   i.m_txFlags = planHingesOnRefcounting(i.inputs[0]->outerType());
8990 }
8991
8992 // check class hierarchy and fail if no match
8993 static uint64_t
8994 InstanceOfDSlow(const Class* cls, const Class* constraint) {
8995   Stats::inc(Stats::Tx64_InstanceOfDSlow);
8996   Stats::inc(Stats::Tx64_InstanceOfDFast, -1);
8997
8998   // ensure C++ returns a 0 or 1 with upper bits zeroed
8999   return static_cast<uint64_t>(constraint && cls->classof(constraint));
9000 }
9001
9002 void
9003 TranslatorX64::translateInstanceOfD(const Tracelet& t,
9004                                     const NormalizedInstruction& i) {
9005   Stats::emitInc(a, Stats::Tx64_InstanceOfDFast);
9006   ASSERT(i.inputs.size() == 1);
9007   ASSERT(i.outStack && !i.outLocal);
9008
9009   DynLocation* input0 = i.inputs[0];
9010   bool input0IsLoc = input0->isLocal();
9011   DataType type = input0->valueType();
9012   PhysReg srcReg;
9013   ScratchReg result(m_regMap);
9014   LazyScratchReg srcScratch(m_regMap);
9015   TCA patchAddr = NULL;
9016   boost::scoped_ptr<DiamondReturn> retFromNullThis;
9017
9018   if (i.grouped && (i.prev->op() == OpThis || i.prev->op() == OpBareThis)) {
9019     srcScratch.alloc();
9020     srcReg = *srcScratch;
9021     a.    load_reg64_disp_reg64(rVmFp, AROFF(m_this), srcReg);
9022     if (i.prev->op() == OpThis) {
9023       ASSERT(i.prev->guardedThis);
9024     } else {
9025       if (i.prev->imm[0].u_OA) {
9026         retFromNullThis.reset(new DiamondReturn);
9027         a.  test_imm32_reg64(1, srcReg);
9028         {
9029           UnlikelyIfBlock<CC_NZ> ifNull(a, astubs, retFromNullThis.get());
9030           EMIT_CALL(astubs, warnNullThis);
9031           recordReentrantStubCall(i);
9032           emitImmReg(astubs, false, *result);
9033         }
9034       } else {
9035         emitImmReg(a, false, *result);
9036         a.  test_imm32_reg64(1, srcReg);
9037         patchAddr = a.code.frontier;
9038         a.  jcc(CC_NZ, patchAddr);
9039       }
9040     }
9041     input0IsLoc = true; // we dont want a decRef
9042     type = KindOfObject;
9043   } else {
9044     srcReg = getReg(input0->location);
9045   }
9046
9047   if (type != KindOfObject) {
9048     // All non-object inputs are not instances
9049     if (!input0IsLoc) {
9050       ASSERT(!input0->isVariant());
9051       emitDecRef(i, srcReg, type);
9052     }
9053     emitImmReg(a, false, *result);
9054
9055   } else {
9056     // Get the input's class from ObjectData->m_cls
9057     ScratchReg inCls(m_regMap);
9058     if (input0->rtt.isVariant()) {
9059       ASSERT(input0IsLoc);
9060       emitDeref(a, srcReg, *inCls);
9061       a.  load_reg64_disp_reg64(*inCls, ObjectData::getVMClassOffset(), *inCls);
9062     } else {
9063       a.  load_reg64_disp_reg64(srcReg, ObjectData::getVMClassOffset(), *inCls);
9064     }
9065     if (!input0IsLoc) {
9066       emitDecRef(i, srcReg, type);
9067     }
9068
9069     // Set result to true for now. If take slow path, use its return val
9070     emitImmReg(a, true, *result);
9071     ScratchReg cls(m_regMap);
9072     // Constraint may not be in the class-hierarchy of the method being traced,
9073     // look up the class handle and emit code to put the Class* into a reg.
9074     using namespace TargetCache;
9075     int param = i.imm[0].u_SA;
9076     const StringData* clsName = curUnit()->lookupLitstrId(param);
9077     CacheHandle ch = allocKnownClass(clsName);
9078     a.    load_reg64_disp_reg64(rVmTl, ch, *cls);
9079     // Compare this class to the incoming object's class. If the typehint's
9080     // class is not present, can not be an instance: fail
9081     a.    cmp_reg64_reg64(*inCls, *cls);
9082
9083     {
9084       UnlikelyIfBlock<CC_NZ> subclassCheck(a, astubs);
9085       // Call helper since ObjectData::instanceof is a member function
9086       if (false) {
9087         Class* cls = NULL;
9088         Class* constraint = NULL;
9089         InstanceOfDSlow(cls, constraint);
9090       }
9091       EMIT_CALL(astubs, InstanceOfDSlow, R(*inCls), R(*cls));
9092       astubs.  mov_reg32_reg32(rax, *result);
9093     }
9094   }
9095   if (patchAddr) {
9096     a. patchJcc(patchAddr, a.code.frontier);
9097   }
9098   retFromNullThis.reset();
9099
9100   // Bind result and destination
9101   m_regMap.bindScratch(result, i.outStack->location, i.outStack->outerType(),
9102                        RegInfo::DIRTY);
9103 }
9104
9105 void
9106 TranslatorX64::analyzeIterInit(Tracelet& t, NormalizedInstruction& ni) {
9107   DataType inType = ni.inputs[0]->valueType();
9108   ni.m_txFlags = supportedPlan(inType == KindOfArray || inType == KindOfObject);
9109 }
9110
9111 void
9112 TranslatorX64::translateIterInit(const Tracelet& t,
9113                                  const NormalizedInstruction& ni) {
9114   ASSERT(ni.inputs.size() == 1);
9115   ASSERT(!ni.outStack && !ni.outLocal);
9116   DynLocation* in = ni.inputs[0];
9117   ASSERT(in->outerType() != KindOfRef);
9118   SKTRACE(1, ni.source, "IterInit: committed to translation\n");
9119   PhysReg src = getReg(in->location);
9120   SrcKey taken, notTaken;
9121   branchDests(t, ni, &taken, &notTaken, 1 /* immIdx */);
9122   Location iterLoc(Location::Iter, ni.imm[0].u_IVA);
9123   switch (in->valueType()) {
9124   case KindOfArray: {
9125     if (false) { // typecheck
9126       Iter *dest = NULL;
9127       HphpArray *arr = NULL;
9128       new_iter_array(dest, arr);
9129     }
9130     EMIT_RCALL(a, ni, new_iter_array, A(iterLoc), R(src));
9131     break;
9132   }
9133   case KindOfObject: {
9134     if (false) { // typecheck
9135       Iter *dest = NULL;
9136       ObjectData *obj = NULL;
9137       Class *ctx = NULL;
9138       new_iter_object(dest, obj, ctx);
9139     }
9140     Class* ctx = arGetContextClass(curFrame());
9141     EMIT_RCALL(a, ni, new_iter_object, A(iterLoc), R(src), IMM((uintptr_t)ctx));
9142     break;
9143   }
9144   default: not_reached();
9145   }
9146   syncOutputs(t); // Ends BB
9147   // If a new iterator is created, new_iter_* will not adjust the refcount of
9148   // the input. If a new iterator is not created, new_iter_* will decRef the
9149   // input for us.  new_iter_* returns 0 if an iterator was not created,
9150   // otherwise it returns 1.
9151   a.    test_reg64_reg64(rax, rax);
9152   emitCondJmp(taken, notTaken, CC_Z);
9153 }
9154
9155 void
9156 TranslatorX64::analyzeIterValueC(Tracelet& t, NormalizedInstruction& i) {
9157   i.m_txFlags = supportedPlan(
9158     i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
9159     i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
9160 }
9161
9162 void
9163 TranslatorX64::translateIterValueC(const Tracelet& t,
9164                                    const NormalizedInstruction& i) {
9165   ASSERT(i.inputs.size() == 1);
9166   ASSERT(i.inputs[0]->rtt.isIter());
9167
9168   Location outLoc;
9169   Iter::Type iterType = i.inputs[0]->rtt.iterType();
9170   typedef void (*IterValueC)(Iter*, TypedValue*);
9171   IterValueC ivc;
9172   if (i.outStack) {
9173     outLoc = i.outStack->location;
9174     ivc = (iterType == Iter::TypeArray)
9175       ? iter_value_cell_array : iter_value_cell_iterator;
9176   } else {
9177     outLoc = i.outLocal->location;
9178     ivc = (iterType == Iter::TypeArray)
9179       ? iter_value_cell_local_array : iter_value_cell_local_iterator;
9180   }
9181   EMIT_RCALL(a, i, ivc, A(i.inputs[0]->location), A(outLoc));
9182   m_regMap.invalidate(outLoc);
9183 }
9184
9185 void
9186 TranslatorX64::analyzeIterKey(Tracelet& t, NormalizedInstruction& i) {
9187   i.m_txFlags = supportedPlan(
9188     i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
9189     i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
9190 }
9191
9192 void
9193 TranslatorX64::translateIterKey(const Tracelet& t,
9194                                      const NormalizedInstruction& i) {
9195   ASSERT(i.inputs.size() == 1);
9196   ASSERT(i.inputs[0]->rtt.isIter());
9197
9198   Location outLoc;
9199   Iter::Type iterType = i.inputs[0]->rtt.iterType();
9200   typedef void (*IterKey)(Iter*, TypedValue*);
9201   IterKey ik;
9202   if (i.outStack) {
9203     outLoc = i.outStack->location;
9204     ik = (iterType == Iter::TypeArray)
9205       ? iter_key_cell_array : iter_key_cell_iterator;
9206   } else {
9207     outLoc = i.outLocal->location;
9208     ik = (iterType == Iter::TypeArray)
9209       ? iter_key_cell_local_array : iter_key_cell_local_iterator;
9210   }
9211   EMIT_RCALL(a, i, ik, A(i.inputs[0]->location), A(outLoc));
9212   m_regMap.invalidate(outLoc);
9213 }
9214
9215 void
9216 TranslatorX64::analyzeIterNext(Tracelet& t, NormalizedInstruction& i) {
9217   ASSERT(i.inputs.size() == 1);
9218   i.m_txFlags = supportedPlan(
9219     i.inputs[0]->rtt.iterType() == Iter::TypeArray ||
9220     i.inputs[0]->rtt.iterType() == Iter::TypeIterator);
9221 }
9222
9223 void
9224 TranslatorX64::translateIterNext(const Tracelet& t,
9225                                  const NormalizedInstruction& i) {
9226   ASSERT(i.inputs.size() == 1);
9227   ASSERT(!i.outStack && !i.outLocal);
9228   ASSERT(i.inputs[0]->rtt.isIter());
9229
9230   if (false) { // type check
9231     Iter* it = NULL;
9232     int64 ret = iter_next_array(it);
9233     if (ret) printf("\n");
9234   }
9235   m_regMap.cleanAll(); // input might be in-flight
9236   // If the iterator reaches the end, iter_next_array will handle
9237   // freeing the iterator and it will decRef the array
9238   EMIT_CALL(a, iter_next_array, A(i.inputs[0]->location));
9239   recordReentrantCall(a, i);
9240   ScratchReg raxScratch(m_regMap, rax);
9241
9242   // syncOutputs before we handle the branch.
9243   syncOutputs(t);
9244   SrcKey taken, notTaken;
9245   branchDests(t, i, &taken, &notTaken, 1 /* destImmIdx */);
9246
9247   a.   test_reg64_reg64(rax, rax);
9248   emitCondJmp(taken, notTaken, CC_NZ);
9249 }
9250
9251 // PSEUDOINSTR_DISPATCH is a switch() fragment that routes opcodes to their
9252 // shared handlers, as per the PSEUDOINSTRS macro.
9253 #define PSEUDOINSTR_DISPATCH(func)              \
9254   case OpBitAnd:                                \
9255   case OpBitOr:                                 \
9256   case OpBitXor:                                \
9257   case OpSub:                                   \
9258   case OpMul:                                   \
9259     func(BinaryArithOp, t, i)                   \
9260   case OpSame:                                  \
9261   case OpNSame:                                 \
9262     func(SameOp, t, i)                          \
9263   case OpEq:                                    \
9264   case OpNeq:                                   \
9265     func(EqOp, t, i)                            \
9266   case OpLt:                                    \
9267   case OpLte:                                   \
9268   case OpGt:                                    \
9269   case OpGte:                                   \
9270     func(LtGtOp, t, i)                          \
9271   case OpEmptyL:                                \
9272   case OpCastBool:                              \
9273     func(UnaryBooleanOp, t, i)                  \
9274   case OpJmpZ:                                  \
9275   case OpJmpNZ:                                 \
9276     func(BranchOp, t, i)                        \
9277   case OpSetL:                                  \
9278   case OpBindL:                                 \
9279     func(AssignToLocalOp, t, i)                 \
9280   case OpFPassC:                                \
9281   case OpFPassCW:                               \
9282   case OpFPassCE:                               \
9283     func(FPassCOp, t, i)                        \
9284   case OpFPushCuf:                              \
9285   case OpFPushCufF:                             \
9286   case OpFPushCufSafe:                          \
9287     func(FPushCufOp, t, i)                      \
9288   case OpIssetL:                                \
9289   case OpIsNullL:                               \
9290   case OpIsStringL:                             \
9291   case OpIsArrayL:                              \
9292   case OpIsIntL:                                \
9293   case OpIsObjectL:                             \
9294   case OpIsBoolL:                               \
9295   case OpIsDoubleL:                             \
9296   case OpIsNullC:                               \
9297   case OpIsStringC:                             \
9298   case OpIsArrayC:                              \
9299   case OpIsIntC:                                \
9300   case OpIsObjectC:                             \
9301   case OpIsBoolC:                               \
9302   case OpIsDoubleC:                             \
9303     func(CheckTypeOp, t, i)
9304
9305 void
9306 TranslatorX64::analyzeInstr(Tracelet& t,
9307                             NormalizedInstruction& i) {
9308   const Opcode op = i.op();
9309   switch (op) {
9310 #define CASE(iNm) \
9311   case Op ## iNm: { \
9312     analyze ## iNm(t, i); \
9313   } break;
9314 #define ANALYZE(a, b, c) analyze ## a(b, c); break;
9315   INSTRS
9316   PSEUDOINSTR_DISPATCH(ANALYZE)
9317
9318 #undef ANALYZE
9319 #undef CASE
9320     default: {
9321       ASSERT(i.m_txFlags == Interp);
9322     }
9323   }
9324   SKTRACE(1, i.source, "translation plan: %x\n", i.m_txFlags);
9325 }
9326
9327 bool
9328 TranslatorX64::dontGuardAnyInputs(Opcode op) {
9329   switch (op) {
9330 #define CASE(iNm) case Op ## iNm:
9331 #define NOOP(a, b, c)
9332   INSTRS
9333     PSEUDOINSTR_DISPATCH(NOOP)
9334     return false;
9335   }
9336   return true;
9337 #undef NOOP
9338 #undef CASE
9339
9340 }
9341
9342 void TranslatorX64::emitOneGuard(const Tracelet& t,
9343                                  const NormalizedInstruction& i,
9344                                  PhysReg reg, int disp, DataType type,
9345                                  TCA &sideExit) {
9346   bool isFirstInstr = (&i == t.m_instrStream.first);
9347   emitTypeCheck(a, type, reg, disp);
9348   if (isFirstInstr) {
9349     SrcRec& srcRec = *getSrcRec(t.m_sk);
9350     // If it's the first instruction, we haven't made any forward
9351     // progress yet, so this is really a tracelet-level guard rather
9352     // than a side exit. If we tried to "side exit", we'd come right
9353     // back to this check!
9354     //
9355     // We need to record this as a fallback branch.
9356     emitFallbackJmp(srcRec);
9357   } else if (!sideExit) {
9358     UnlikelyIfBlock<CC_NZ> branchToSideExit(a, astubs);
9359     sideExit = astubs.code.frontier;
9360     emitSideExit(astubs, i, false /*next*/);
9361   } else {
9362     a.    jnz(sideExit);
9363   }
9364 }
9365
9366 // Emit necessary guards for variants and pseudo-main locals before instr i.
9367 // For HHIR, this only inserts guards for pseudo-main locals.  Variants are
9368 // guarded in a different way.
9369 void
9370 TranslatorX64::emitVariantGuards(const Tracelet& t,
9371                                  const NormalizedInstruction& i) {
9372   bool pseudoMain = Translator::liveFrameIsPseudoMain();
9373   bool isFirstInstr = (&i == t.m_instrStream.first);
9374   TCA sideExit = NULL;
9375   const NormalizedInstruction *base = &i;
9376   while (base->grouped) {
9377     base = base->prev;
9378     ASSERT(base);
9379   }
9380   for (size_t in = 0; in < i.inputs.size(); ++in) {
9381     DynLocation* input = i.inputs[in];
9382     if (!input->isValue()) continue;
9383     bool isRef = input->isVariant() &&
9384       !i.ignoreInnerType &&
9385       input->rtt.innerType() != KindOfInvalid;
9386     bool modifiableLocal = pseudoMain && input->isLocal() &&
9387       !input->rtt.isVagueValue();
9388
9389     if (!modifiableLocal && !isRef) continue;
9390
9391     SKTRACE(1, i.source, "guarding %s: (%s:%d) :: %d!\n",
9392             modifiableLocal ? "pseudoMain local" : "variant inner",
9393             input->location.spaceName(),
9394             input->location.offset,
9395             input->rtt.valueType());
9396     // TODO task 1122807: don't check the inner type if we've already
9397     // checked it and have executed no possibly-aliasing instructions in
9398     // the meanwhile.
9399     if (modifiableLocal) {
9400       if (m_useHHIR) {
9401         RuntimeType& rtt = input->rtt;
9402         JIT::Type::Tag type = JIT::Type::fromRuntimeType(rtt);
9403         if (isFirstInstr) {
9404           m_hhbcTrans->guardTypeLocal(input->location.offset, type);
9405         } else {
9406           m_hhbcTrans->checkTypeLocal(input->location.offset, type);
9407         }
9408       } else {
9409         PhysReg reg;
9410         int disp;
9411         locToRegDisp(input->location, &reg, &disp);
9412         emitOneGuard(t, *base, reg, disp,
9413                      input->rtt.outerType(), sideExit);
9414       }
9415     }
9416     if (isRef && !m_useHHIR) {
9417       m_regMap.allocInputReg(i, in);
9418       emitOneGuard(t, *base, getReg(input->location), 0,
9419                    input->rtt.innerType(), sideExit);
9420     }
9421   }
9422 }
9423
9424 NormalizedInstruction::OutputUse
9425 NormalizedInstruction::outputIsUsed(DynLocation* output) const {
9426   for (NormalizedInstruction* succ = next;
9427        succ; succ = succ->next) {
9428     for (size_t i = 0; i < succ->inputs.size(); ++i) {
9429       if (succ->inputs[i] == output) {
9430         if (succ->inputWasInferred(i)) {
9431           return OutputInferred;
9432         }
9433         if (Translator::Get()->dontGuardAnyInputs(succ->op())) {
9434           /* the consumer doesnt care about its inputs
9435              but we may still have inferred something about
9436              its outputs that a later instruction may depend on
9437           */
9438           if (!outputDependsOnInput(succ->op()) ||
9439               !(succ->outStack && !succ->outStack->rtt.isVagueValue() &&
9440                 succ->outputIsUsed(succ->outStack) != OutputUsed) ||
9441               !(succ->outLocal && !succ->outLocal->rtt.isVagueValue() &&
9442                 succ->outputIsUsed(succ->outLocal)) != OutputUsed) {
9443             return OutputDoesntCare;
9444           }
9445         }
9446         return OutputUsed;
9447       }
9448     }
9449   }
9450   return OutputUnused;
9451 }
9452
9453 void
9454 TranslatorX64::emitPredictionGuards(const NormalizedInstruction& i) {
9455   if (!i.outputPredicted || i.breaksTracelet) return;
9456   NormalizedInstruction::OutputUse u = i.outputIsUsed(i.outStack);
9457
9458   if (m_useHHIR) {
9459     if (u == NormalizedInstruction::OutputUsed ||
9460         u == NormalizedInstruction::OutputInferred) {
9461       JIT::Type::Tag jitType = JIT::Type::fromRuntimeType(i.outStack->rtt);
9462       if (u == NormalizedInstruction::OutputInferred) {
9463         TRACE(1, "HHIR: emitPredictionGuards: output inferred to be %s\n",
9464               JIT::Type::Strings[jitType]);
9465         m_hhbcTrans->assertTypeStack(0, jitType);
9466       } else {
9467         TRACE(1, "HHIR: emitPredictionGuards: output predicted to be %s\n",
9468               JIT::Type::Strings[jitType]);
9469         m_hhbcTrans->checkTypeStack(0, jitType, i.next->offset());
9470       }
9471     }
9472     return;
9473   }
9474
9475   switch (u) {
9476     case NormalizedInstruction::OutputUsed:
9477       break;
9478     case NormalizedInstruction::OutputUnused:
9479       return;
9480     case NormalizedInstruction::OutputInferred:
9481       Stats::emitInc(a, Stats::TC_TypePredOverridden);
9482       return;
9483     case NormalizedInstruction::OutputDoesntCare:
9484       Stats::emitInc(a, Stats::TC_TypePredUnneeded);
9485       return;
9486   }
9487
9488   ASSERT(i.outStack);
9489   PhysReg base;
9490   int disp;
9491   locToRegDisp(i.outStack->location, &base, &disp);
9492   ASSERT(base == rVmSp);
9493   TRACE(1, "PREDGUARD: %p dt %d offset %d voffset %lld\n",
9494         a.code.frontier, i.outStack->outerType(), disp,
9495         i.outStack->location.offset);
9496   emitTypeCheck(a, i.outStack->outerType(), rVmSp, disp);
9497   {
9498     UnlikelyIfBlock<CC_NZ> branchToSideExit(a, astubs);
9499     Stats::emitInc(astubs, Stats::TC_TypePredMiss);
9500     emitSideExit(astubs, i, true);
9501   }
9502   Stats::emitInc(a, Stats::TC_TypePredHit);
9503 }
9504
9505 static void failedTypePred() {
9506   raise_error("A type prediction was incorrect");
9507 }
9508
9509 void
9510 TranslatorX64::translateInstrWork(const Tracelet& t,
9511                                   const NormalizedInstruction& i) {
9512   const Opcode op = i.op();
9513   switch (op) {
9514 #define CASE(iNm)                               \
9515   case Op ## iNm:                             \
9516                   translate ## iNm(t, i);     \
9517     break;
9518 #define TRANSLATE(a, b, c) translate ## a(b, c); break;
9519     INSTRS
9520       PSEUDOINSTR_DISPATCH(TRANSLATE)
9521 #undef TRANSLATE
9522 #undef CASE
9523   default:
9524     ASSERT(false);
9525   }
9526 }
9527
9528 void
9529 TranslatorX64::translateInstr(const Tracelet& t,
9530                               const NormalizedInstruction& i) {
9531   /**
9532    * translateInstr() translates an individual instruction in a tracelet,
9533    * either by directly emitting machine code for that instruction or by
9534    * emitting a call to the interpreter.
9535    *
9536    * If the instruction ends the current tracelet, we must emit machine code
9537    * to transfer control to some target that will continue to make forward
9538    * progress. This target may be the beginning of another tracelet, or it may
9539    * be a translator service request. Before transferring control, a tracelet
9540    * must ensure the following invariants hold:
9541    *   1) The machine registers rVmFp and rVmSp are in sync with vmfp()
9542    *      and vmsp().
9543    *   2) All "dirty" values are synced in memory. This includes the
9544    *      evaluation stack, locals, globals, statics, and any other program
9545    *      accessible locations. This also means that all refcounts must be
9546    *      up to date.
9547    */
9548   ASSERT(!m_useHHIR);
9549   ASSERT(!i.outStack || i.outStack->isStack());
9550   ASSERT(!i.outLocal || i.outLocal->isLocal());
9551   const char *opNames[] = {
9552 #define O(name, imm, push, pop, flags) \
9553 #name,
9554   OPCODES
9555 #undef O
9556   };
9557   SpaceRecorder sr(opNames[i.op()], a);
9558   SKTRACE(1, i.source, "translate %#lx\n", long(a.code.frontier));
9559   const Opcode op = i.op();
9560
9561   TCA start = a.code.frontier;
9562   TCA astart = astubs.code.frontier;
9563
9564   m_regMap.bumpEpoch();
9565   // Allocate the input regs upfront unless instructed otherwise
9566   // or the instruction is interpreted
9567   if (!i.manuallyAllocInputs && i.m_txFlags) {
9568     m_regMap.allocInputRegs(i);
9569   }
9570
9571   if (debug) {
9572     for (unsigned j = 0; j < i.inputs.size(); j++) {
9573       if (i.inputWasInferred(j)) {
9574         DynLocation* dl = i.inputs[j];
9575         ASSERT(dl->rtt.isValue() &&
9576                !dl->rtt.isVagueValue() &&
9577                dl->outerType() != KindOfInvalid);
9578         PhysReg base;
9579         int disp;
9580         locToRegDisp(dl->location, &base, &disp);
9581         emitTypeCheck(a, dl->rtt.typeCheckValue(), base, disp);
9582         {
9583           UnlikelyIfBlock<CC_NZ> typePredFailed(a, astubs);
9584           EMIT_CALL(astubs, failedTypePred);
9585           recordReentrantStubCall(i);
9586         }
9587       }
9588     }
9589   }
9590
9591   if (!i.grouped) {
9592     emitVariantGuards(t, i);
9593     const NormalizedInstruction* n = &i;
9594     while (n->next && n->next->grouped) {
9595       n = n->next;
9596       emitVariantGuards(t, *n);
9597     }
9598   }
9599
9600   // Allocate the input regs upfront unless instructed otherwise
9601   // or the instruction is interpreted
9602   if (!i.manuallyAllocInputs && i.m_txFlags) {
9603     m_regMap.allocInputRegs(i);
9604   }
9605
9606   if (i.m_txFlags == Interp || RuntimeOption::EvalThreadingJit) {
9607     // If the problem is local to this instruction, just call out to
9608     // the interpreter. emitInterpOne will perform end-of-tracelet duties
9609     // if this instruction ends the tracelet.
9610     SKTRACE(1, i.source, "Interp\n");
9611     emitInterpOne(t, i);
9612   } else {
9613     // Actually translate the instruction's body.
9614     Stats::emitIncTranslOp(a, op);
9615
9616     translateInstrWork(t, i);
9617   }
9618
9619   // Invalidate locations that are no longer live
9620   for (unsigned k = 0; k < i.deadLocs.size(); ++k) {
9621     const Location& l = i.deadLocs[k];
9622     m_regMap.invalidate(l);
9623   }
9624
9625   emitPredictionGuards(i);
9626   recordBCInstr(op, a, start);
9627   recordBCInstr(op + Op_count, astubs, astart);
9628
9629   if (i.breaksTracelet && !i.changesPC) {
9630     // If this instruction's opcode always ends the tracelet then the
9631     // instruction case is responsible for performing end-of-tracelet
9632     // duties. Otherwise, we handle ending the tracelet here.
9633     syncOutputs(t);
9634     emitBindJmp(t.m_nextSk);
9635   }
9636
9637   m_regMap.assertNoScratch();
9638 }
9639
9640 bool
9641 TranslatorX64::checkTranslationLimit(const SrcKey& sk,
9642                                      const SrcRec& srcRec) const {
9643   if (srcRec.translations().size() == SrcRec::kMaxTranslations) {
9644     INC_TPC(max_trans);
9645     if (debug && Trace::moduleEnabled(Trace::tx64, 2)) {
9646       const vector<TCA>& tns = srcRec.translations();
9647       TRACE(1, "Too many (%ld) translations: %s, BC offset %d\n",
9648             tns.size(), curUnit()->filepath()->data(),
9649             sk.offset());
9650       SKTRACE(2, sk, "{\n", tns.size());
9651       TCA topTrans = srcRec.getTopTranslation();
9652       for (size_t i = 0; i < tns.size(); ++i) {
9653         const TransRec* rec = getTransRec(tns[i]);
9654         ASSERT(rec);
9655         SKTRACE(2, sk, "%d %p\n", i, tns[i]);
9656         if (tns[i] == topTrans) {
9657           SKTRACE(2, sk, "%d: *Top*\n", i);
9658         }
9659         if (rec->kind == TransAnchor) {
9660           SKTRACE(2, sk, "%d: Anchor\n", i);
9661         } else {
9662           SKTRACE(2, sk, "%d: guards {\n", i);
9663           for (unsigned j = 0; j < rec->dependencies.size(); ++j) {
9664             TRACE(2, rec->dependencies[j]);
9665           }
9666           SKTRACE(2, sk, "%d } guards\n", i);
9667         }
9668       }
9669       SKTRACE(2, sk, "} /* Too many translations */\n");
9670     }
9671     return true;
9672   }
9673
9674   return false;
9675 }
9676
9677 void
9678 TranslatorX64::emitGuardChecks(X64Assembler& a,
9679                                const SrcKey& sk,
9680                                const ChangeMap& dependencies,
9681                                const RefDeps& refDeps,
9682                                SrcRec& fail) {
9683   if (Trace::moduleEnabled(Trace::stats, 2)) {
9684     Stats::emitInc(a, Stats::TraceletGuard_enter);
9685   }
9686
9687   bool pseudoMain = Translator::liveFrameIsPseudoMain();
9688
9689   emitRB(a, RBTypeTraceletGuards, sk);
9690   for (DepMap::const_iterator dep = dependencies.begin();
9691        dep != dependencies.end();
9692        ++dep) {
9693     if (!pseudoMain || !dep->second->isLocal() || !dep->second->isValue()) {
9694       checkType(a, dep->first, dep->second->rtt, fail);
9695     } else {
9696       TRACE(3, "Skipping tracelet guard for %s %d\n",
9697             dep->second->location.pretty().c_str(),
9698             (int)dep->second->rtt.outerType());
9699     }
9700   }
9701
9702   checkRefs(a, sk, refDeps, fail);
9703
9704   if (Trace::moduleEnabled(Trace::stats, 2)) {
9705     Stats::emitInc(a, Stats::TraceletGuard_execute);
9706   }
9707 }
9708
9709
9710 void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
9711   if (!debug) return;
9712
9713   const SrcKey& sk = t.m_sk;
9714
9715   TRACE(3, "----------------------------------------------\n");
9716   TRACE(3, "  Translating from file %s:%d %s at %p:\n",
9717         curUnit()->filepath()->data(),
9718         curUnit()->getLineNumber(sk.offset()),
9719         curFunc()->name()->data(),
9720         postGuards);
9721   TRACE(3, "  preconds:\n");
9722   TRACE(3, "    types:\n");
9723   for (DepMap::const_iterator i = t.m_dependencies.begin();
9724        i != t.m_dependencies.end(); ++i) {
9725     TRACE(3, "      %-5s\n", i->second->pretty().c_str());
9726   }
9727   if (t.m_refDeps.size() != 0) {
9728     TRACE(3, "    refs:\n");
9729     for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
9730         i != t.m_refDeps.m_arMap.end();
9731         ++i) {
9732       TRACE(3, "      (ActRec %lld : %-5s)\n", i->first,
9733         i->second.pretty().c_str());
9734     }
9735   }
9736   TRACE(3, "  postconds:\n");
9737   for (ChangeMap::const_iterator i = t.m_changes.begin();
9738        i != t.m_changes.end(); ++i) {
9739     TRACE(3, "    %-5s\n", i->second->pretty().c_str());
9740   }
9741   for (SrcKey traceKey(t.m_sk);
9742       traceKey != t.m_nextSk;
9743       traceKey.advance(curUnit())) {
9744     string s = instrToString(
9745       curUnit()->at(traceKey.offset()), curUnit());
9746     TRACE(3, "  %6d: %s\n", traceKey.offset(), s.c_str());
9747   }
9748   TRACE(3, "----------------------------------------------\n");
9749   if (Trace::moduleEnabled(Trace::tx64, 5)) {
9750     // prettyStack() expects to use vmpc(). Leave it in the state we
9751     // found it since this code is debug-only, and we don't want behavior
9752     // to vary across the optimized/debug builds.
9753     PC oldPC = vmpc();
9754     vmpc() = curUnit()->at(sk.offset());
9755     TRACE(3, g_vmContext->prettyStack(string(" tx64 ")));
9756     vmpc() = oldPC;
9757     TRACE(3, "----------------------------------------------\n");
9758   }
9759 }
9760
9761 void
9762 TranslatorX64::translateTracelet(const Tracelet& t) {
9763   const SrcKey &sk = t.m_sk;
9764
9765   m_curTrace = &t;
9766   Nuller<Tracelet> ctNuller(&m_curTrace);
9767
9768   SKTRACE(1, sk, "translateTracelet\n");
9769   ASSERT(m_srcDB.find(sk));
9770   ASSERT(m_regMap.pristine());
9771   TCA                     start = a.code.frontier;
9772   TCA                     stubStart = astubs.code.frontier;
9773   TCA                     counterStart = 0;
9774   uint8                   counterLen = 0;
9775   SrcRec&                 srcRec = *getSrcRec(sk);
9776   vector<TransBCMapping>  bcMapping;
9777
9778   bool hhirSucceeded = irTranslateTracelet(t, start, stubStart);
9779   if (hhirSucceeded) {
9780     m_irAUsage += (a.code.frontier - start);
9781     m_irAstubsUsage += (astubs.code.frontier - stubStart);
9782   }
9783   if (!hhirSucceeded) {
9784     ASSERT(m_pendingFixups.size() == 0);
9785     ASSERT(srcRec.inProgressTailJumps().size() == 0);
9786     try {
9787       if (t.m_analysisFailed || checkTranslationLimit(t.m_sk, srcRec)) {
9788         punt();
9789       }
9790
9791       emitGuardChecks(a, t.m_sk, t.m_dependencies, t.m_refDeps, srcRec);
9792       dumpTranslationInfo(t, a.code.frontier);
9793
9794       // after guards, add a counter for the translation if requested
9795       if (RuntimeOption::EvalJitTransCounters) {
9796         emitTransCounterInc(a);
9797       }
9798
9799       emitRB(a, RBTypeTraceletBody, t.m_sk);
9800       Stats::emitInc(a, Stats::Instr_TC, t.m_numOpcodes);
9801       recordBCInstr(OpTraceletGuard, a, start);
9802
9803       // Translate each instruction in the tracelet
9804       for (NormalizedInstruction* ni = t.m_instrStream.first; ni; ni = ni->next) {
9805         if (isTransDBEnabled()) {
9806           bcMapping.push_back((TransBCMapping){ni->offset(),
9807                                                a.code.frontier,
9808                                                astubs.code.frontier});
9809         }
9810
9811         m_curNI = ni;
9812         Nuller<NormalizedInstruction> niNuller(&m_curNI);
9813         translateInstr(t, *ni);
9814         ASSERT(ni->source.offset() >= curFunc()->base());
9815         // We sometimes leave the tail of a truncated tracelet in place to aid
9816         // analysis, but breaksTracelet is authoritative.
9817         if (ni->breaksTracelet) break;
9818       }
9819     } catch (TranslationFailedExc& tfe) {
9820       // The whole translation failed; give up on this BB. Since it is not
9821       // linked into srcDB yet, it is guaranteed not to be reachable.
9822       m_regMap.reset();
9823       // Permanent reset; nothing is reachable yet.
9824       a.code.frontier = start;
9825       astubs.code.frontier = stubStart;
9826       bcMapping.clear();
9827       // Discard any pending fixups.
9828       m_pendingFixups.clear();
9829       srcRec.clearInProgressTailJumps();
9830       TRACE(1, "emitting %d-instr interp request for failed translation @%s:%d\n",
9831             int(t.m_numOpcodes), tfe.m_file, tfe.m_line);
9832       // Add a counter for the translation if requested
9833       if (RuntimeOption::EvalJitTransCounters) {
9834         emitTransCounterInc(a);
9835       }
9836       a.    jmp(
9837                 emitServiceReq(REQ_INTERPRET, 2ull, uint64_t(t.m_sk.offset()),
9838                                uint64_t(t.m_numOpcodes)));
9839       // Fall through.
9840     }
9841   } // if (!hhirSucceeded)
9842
9843   for (uint i = 0; i < m_pendingFixups.size(); i++) {
9844     TCA tca = m_pendingFixups[i].m_tca;
9845     ASSERT(isValidCodeAddress(tca));
9846     m_fixupMap.recordFixup(tca, m_pendingFixups[i].m_fixup);
9847   }
9848   m_pendingFixups.clear();
9849
9850   addTranslation(TransRec(t.m_sk, curUnit()->md5(), t, start,
9851                           a.code.frontier - start, stubStart,
9852                           astubs.code.frontier - stubStart,
9853                           counterStart, counterLen,
9854                           bcMapping));
9855
9856   recordGdbTranslation(sk, curUnit(), a, start,
9857                        false, false);
9858   recordGdbTranslation(sk, curUnit(), astubs, stubStart,
9859                        false, false);
9860   // SrcRec::newTranslation() makes this code reachable. Do this last;
9861   // otherwise there's some chance of hitting in the reader threads whose
9862   // metadata is not yet visible.
9863   TRACE(1, "newTranslation: %p  sk: (func %d, bcOff %d)\n", start, sk.m_funcId,
9864         sk.m_offset);
9865   srcRec.newTranslation(a, astubs, start);
9866   m_regMap.reset();
9867   TRACE(1, "tx64: %zd-byte tracelet\n", a.code.frontier - start);
9868   if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
9869     Trace::traceRelease(getUsage().c_str());
9870   }
9871 }
9872
9873 static const size_t kASize = 512 << 20;
9874 static const size_t kAStubsSize = 512 << 20;
9875 static const size_t kGDataSize = kASize / 4;
9876 static const size_t kTotalSize = kASize + kAStubsSize +
9877                                          kTrampolinesBlockSize + kGDataSize;
9878 TranslatorX64::TranslatorX64()
9879 : Translator(),
9880   m_numNativeTrampolines(0),
9881   m_trampolineSize(0),
9882   m_spillFillCode(&a),
9883   m_interceptHelper(0),
9884   m_defClsHelper(0),
9885   m_funcPrologueRedispatch(0),
9886   m_irAUsage(0),
9887   m_irAstubsUsage(0),
9888   m_numHHIRTrans(0),
9889   m_irFactory(NULL),
9890   m_constTable(NULL),
9891   m_traceBuilder(NULL),
9892   m_hhbcTrans(NULL),
9893   m_regMap(kCallerSaved, kCalleeSaved, this),
9894   m_interceptsEnabled(false),
9895   m_unwindRegMap(128),
9896   m_curTrace(0),
9897   m_curNI(0),
9898   m_curFile(NULL),
9899   m_curLine(0),
9900   m_curFunc(NULL),
9901   m_vecState(NULL)
9902 {
9903   TRACE(1, "TranslatorX64@%p startup\n", this);
9904   tx64 = this;
9905
9906   static_assert(kTotalSize < (2ul << 30),
9907                 "Combined size of all code/data blocks in TranslatorX64 "
9908                 "must be < 2GiB to support 32-bit relative addresses");
9909
9910   static bool profileUp = false;
9911   if (!profileUp) {
9912     profileInit();
9913     profileUp = true;
9914   }
9915
9916   // We want to ensure that the block for "a", "astubs",
9917   // "atrampolines", and "m_globalData" are nearby so that we can
9918   // short jump/point between them. Thus we allocate one slab and
9919   // divide it between "a", "astubs", and "atrampolines".
9920
9921   // Using sbrk to ensure its in the bottom 2G, so we avoid
9922   // the need for trampolines, and get to use shorter
9923   // instructions for tc addresses.
9924   static const size_t kRoundUp = 2 << 20;
9925   uint8_t *base = (uint8_t*)sbrk(kTotalSize + kRoundUp - 1);
9926   base += -(uint64_t)base & (kRoundUp - 1);
9927   if (RuntimeOption::EvalMapTCHuge) {
9928     hintHuge(base, kTotalSize);
9929   }
9930   atrampolines.init(base, kTrampolinesBlockSize);
9931   base += kTrampolinesBlockSize;
9932   a.init(base, kASize);
9933   m_unwindRegistrar = register_unwind_region(base, kTotalSize);
9934   base += kASize;
9935   astubs.init(base, kAStubsSize);
9936   base += kAStubsSize;
9937   m_globalData.init(base, kGDataSize);
9938
9939   // Emit some special helpers that are shared across translations.
9940
9941   // Emit a byte of padding. This is a kind of hacky way to
9942   // avoid hitting an assert in recordGdbStub when we call
9943   // it with m_callToExit - 1 as the start address.
9944   astubs.emitNop(1);
9945
9946   // Call to exit with whatever value the program leaves on
9947   // the return stack.
9948   m_callToExit = emitServiceReq(false, REQ_EXIT, 0ull);
9949
9950   m_retHelper = emitRetFromInterpretedFrame();
9951
9952   moveToAlign(astubs);
9953   m_resumeHelper = astubs.code.frontier;
9954   emitGetGContext(astubs, rax);
9955   astubs.   load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
9956                                        rVmFp);
9957   astubs.   load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
9958                                        Stack::topOfStackOffset(), rVmSp);
9959   emitServiceReq(false, REQ_RESUME, 0ull);
9960
9961   // Helper for DefCls
9962   if (false) {
9963     PreClass *preClass = 0;
9964     defClsHelper(preClass);
9965   }
9966   m_defClsHelper = TCA(a.code.frontier);
9967   PhysReg rEC = argNumToRegName[2];
9968   emitGetGContext(a, rEC);
9969   a.   store_reg64_disp_reg64(rVmFp, offsetof(VMExecutionContext, m_fp), rEC);
9970   a.   store_reg64_disp_reg64(argNumToRegName[1],
9971                               offsetof(VMExecutionContext, m_pc), rEC);
9972   // rax holds the up-to-date top of stack pointer
9973   a.   store_reg64_disp_reg64(rax,
9974                               offsetof(VMExecutionContext, m_stack) +
9975                               Stack::topOfStackOffset(), rEC);
9976   a.   jmp((TCA)defClsHelper);
9977
9978   moveToAlign(astubs);
9979   m_stackOverflowHelper = astubs.code.frontier;
9980   // We are called from emitStackCheck, with the new stack frame in
9981   // rStashedAR. Get the caller's PC into rdi and save it off.
9982   astubs.    load_reg64_disp_reg64(rVmFp, AROFF(m_func), rax);
9983   astubs.    load_reg64_disp_reg32(rStashedAR, AROFF(m_soff), rdi);
9984   astubs.    load_reg64_disp_reg64(rax, Func::sharedOffset(), rax);
9985   astubs.    load_reg64_disp_reg32(rax, Func::sharedBaseOffset(), rax);
9986   astubs.    add_reg32_reg32(rax, rdi);
9987
9988   emitEagerVMRegSave(astubs, SaveFP | SavePC);
9989   emitServiceReq(false, REQ_STACK_OVERFLOW, 0ull);
9990
9991   // The decRef helper for when we bring the count down to zero. Callee needs to
9992   // bring the value into rdi. These can be burned in for all time, and for all
9993   // translations.
9994   if (false) { // type-check
9995     StringData* str = NULL;
9996     ArrayData* arr = NULL;
9997     ObjectData* obj = NULL;
9998     RefData* ref = NULL;
9999     tv_release_str(str);
10000     tv_release_arr(arr);
10001     tv_release_obj(obj);
10002     tv_release_ref(ref);
10003   }
10004   typedef void* vp;
10005   m_dtorStubs[BitwiseKindOfString] = emitUnaryStub(a, vp(tv_release_str));
10006   m_dtorStubs[KindOfArray]         = emitUnaryStub(a, vp(tv_release_arr));
10007   m_dtorStubs[KindOfObject]        = emitUnaryStub(a, vp(tv_release_obj));
10008   m_dtorStubs[KindOfRef]           = emitUnaryStub(a, vp(tv_release_ref));
10009   m_dtorGenericStub                = genericRefCountStub(a);
10010   m_dtorGenericStubRegs            = genericRefCountStubRegs(a);
10011
10012   if (trustSigSegv) {
10013     // Install SIGSEGV handler for timeout exceptions
10014     struct sigaction sa;
10015     struct sigaction old_sa;
10016     sa.sa_sigaction = &TranslatorX64::SEGVHandler;
10017     sa.sa_flags = SA_SIGINFO;
10018     sigemptyset(&sa.sa_mask);
10019     if (sigaction(SIGSEGV, &sa, &old_sa) != 0) {
10020       throw std::runtime_error(
10021         std::string("Failed to install SIGSEGV handler: ") +
10022           strerror(errno));
10023     }
10024     m_segvChain = old_sa.sa_flags & SA_SIGINFO ?
10025       old_sa.sa_sigaction : (sigaction_t)old_sa.sa_handler;
10026   }
10027 }
10028
10029 // do gdb specific initialization. This has to happen after
10030 // the TranslatorX64 constructor is called, because gdb initialization
10031 // calls backs into TranslatorX64::Get()
10032 void TranslatorX64::initGdb() {
10033   // On a backtrace, gdb tries to locate the calling frame at address
10034   // returnRIP-1. However, for the first VM frame, there is no code at
10035   // returnRIP-1, since the AR was set up manually. For this frame,
10036   // record the tracelet address as starting from callToExit-1, so gdb
10037   // does not barf
10038   recordGdbStub(astubs, m_callToExit - 1, "HHVM::callToExit");
10039
10040   recordBCInstr(OpRetFromInterp, astubs, m_retHelper);
10041   recordGdbStub(astubs, m_retHelper - 1, "HHVM::retHelper");
10042   recordBCInstr(OpResumeHelper, astubs, m_resumeHelper);
10043   recordBCInstr(OpDefClsHelper, a, m_defClsHelper);
10044   recordBCInstr(OpDtorStub, a, m_dtorStubs[BitwiseKindOfString]);
10045   recordGdbStub(a, m_dtorStubs[BitwiseKindOfString],
10046                     "HHVM::destructorStub");
10047 }
10048
10049 TranslatorX64*
10050 TranslatorX64::Get() {
10051   /*
10052    * Called from outrageously early, pre-main code, and will
10053    * allocate the first translator space.
10054    */
10055   if (!nextTx64) {
10056     nextTx64 = new TranslatorX64();
10057     nextTx64->initGdb();
10058   }
10059   if (!tx64) {
10060     tx64 = nextTx64;
10061   }
10062   ASSERT(tx64);
10063   return tx64;
10064 }
10065
10066 template<int Arity>
10067 TCA TranslatorX64::emitNAryStub(X64Assembler& a, void* fptr) {
10068   BOOST_STATIC_ASSERT((Arity < kNumRegisterArgs));
10069
10070   // The callNAryStub has already saved these regs on a.
10071   RegSet alreadySaved;
10072   for (size_t i = 0; i < Arity; ++i) {
10073     alreadySaved |= RegSet(argNumToRegName[i]);
10074   }
10075
10076   /*
10077    * We've made a call instruction, and pushed Arity args on the
10078    * stack.  So the stack address will be odd coming into the stub if
10079    * Arity + 1 (for the call) is odd.  We need to correct for this
10080    * when saving other registers below to keep SSE-friendly alignment
10081    * of the stack.
10082    */
10083   const int Parity = (Arity + 1) % 2;
10084
10085   // These dtor stubs are meant to be called with the call
10086   // instruction, unlike most translator code.
10087   moveToAlign(a);
10088   TCA start = a.code.frontier;
10089   /*
10090    * Preserve most caller-saved regs. The calling code has already
10091    * preserved regs in `alreadySaved'; we push the rest of the caller
10092    * saved regs and rbp.  It should take 9 qwords in total, and the
10093    * incoming call instruction made it 10.  This is an even number of
10094    * pushes, so we preserve the SSE-friendliness of our execution
10095    * environment (without real intervention from PhysRegSaverParity).
10096    *
10097    * Note that we don't need to clean all registers because the only
10098    * reason we could need those locations written back is if stack
10099    * unwinding were to happen.  These stubs can re-enter due to user
10100    * destructors, but exceptions are not allowed to propagate out of
10101    * those, so it's not a problem.
10102    */
10103   a.    pushr(rbp); // {
10104   a.    mov_reg64_reg64(rsp, rbp);
10105   {
10106     RegSet s = kCallerSaved - alreadySaved;
10107     PhysRegSaverParity<Parity> rs(a, s);
10108     emitCall(a, TCA(fptr));
10109   }
10110   a.    popr(rbp);  // }
10111   a.    ret();
10112   return start;
10113 }
10114
10115 TCA TranslatorX64::emitUnaryStub(X64Assembler& a, void* fptr) {
10116   return emitNAryStub<1>(a, fptr);
10117 }
10118
10119 TCA TranslatorX64::emitBinaryStub(X64Assembler& a, void* fptr) {
10120   return emitNAryStub<2>(a, fptr);
10121 }
10122
10123 /*
10124  * Both callUnaryStubImpl and callBinaryStub assume that the stub they
10125  * are calling cannot throw an exception.
10126  */
10127
10128 template <bool reentrant>
10129 void
10130 TranslatorX64::callUnaryStubImpl(X64Assembler& a,
10131                                  const NormalizedInstruction& i,
10132                                  TCA stub, PhysReg arg, int disp/*=0*/) {
10133   // Call the generic dtor stub. They all take one arg.
10134   a.    pushr(rdi);
10135   if (arg == rsp) {
10136     // Account for pushing rdi.
10137     disp += 8;
10138   }
10139   if (disp == 0) {
10140     emitMovRegReg(a, arg, rdi);
10141   } else {
10142     a.    lea_reg64_disp_reg64(arg, disp, rdi);
10143   }
10144   ASSERT(isValidCodeAddress(stub));
10145   emitCall(a, stub);
10146   recordCallImpl<reentrant>(a, i);
10147   a.    popr(rdi);
10148 }
10149
10150 void
10151 TranslatorX64::callBinaryStub(X64Assembler& a, const NormalizedInstruction& i,
10152                               TCA stub, PhysReg arg1, PhysReg arg2) {
10153   a.    pushr(rdi);
10154   a.    pushr(rsi);
10155
10156   // We need to be careful not to clobber our arguments when moving
10157   // them into the appropriate registers.  (If we ever need ternary
10158   // stubs, this should probably be converted to use ArgManager.)
10159   if (arg2 == rdi && arg1 == rsi) {
10160     a.  xchg_reg64_reg64(rdi, rsi);
10161   } else if (arg2 == rdi) {
10162     emitMovRegReg(a, arg2, rsi);
10163     emitMovRegReg(a, arg1, rdi);
10164   } else {
10165     emitMovRegReg(a, arg1, rdi);
10166     emitMovRegReg(a, arg2, rsi);
10167   }
10168
10169   ASSERT(isValidCodeAddress(stub));
10170   emitCall(a, stub);
10171   recordReentrantCall(a, i);
10172   a.    popr(rsi);
10173   a.    popr(rdi);
10174 }
10175
10176 namespace {
10177
10178 struct DeferredFileInvalidate : public DeferredWorkItem {
10179   Eval::PhpFile* m_f;
10180   DeferredFileInvalidate(Eval::PhpFile* f) : m_f(f) {
10181     TRACE(2, "DeferredFileInvalidate @ %p, m_f %p\n", this, m_f); }
10182   void operator()() {
10183     TRACE(2, "DeferredFileInvalidate: Firing @ %p , m_f %p\n", this, m_f);
10184     tx64->invalidateFileWork(m_f);
10185   }
10186 };
10187
10188 struct DeferredPathInvalidate : public DeferredWorkItem {
10189   const std::string m_path;
10190   DeferredPathInvalidate(const std::string& path) : m_path(path) {
10191     ASSERT(m_path.size() >= 1 && m_path[0] == '/');
10192   }
10193   void operator()() {
10194     String spath(m_path);
10195     /*
10196      * inotify saw this path change. Now poke the file repository;
10197      * it will notice the underlying PhpFile* has changed, and notify
10198      * us via ::invalidateFile.
10199      *
10200      * We don't actually need to *do* anything with the PhpFile* from
10201      * this lookup; since the path has changed, the file we'll get out is
10202      * going to be some new file, not the old file that needs invalidation.
10203      */
10204     UNUSED Eval::PhpFile* f =
10205       g_vmContext->lookupPhpFile(spath.get(), "");
10206     // We don't keep around the extra ref.
10207     if (f) f->decRefAndDelete();
10208   }
10209 };
10210
10211 }
10212
10213 void
10214 TranslatorX64::requestInit() {
10215   TRACE(1, "in requestInit(%ld)\n", g_vmContext->m_currentThreadIdx);
10216   tl_regState = REGSTATE_CLEAN;
10217   PendQ::drain();
10218   requestResetHighLevelTranslator();
10219   Treadmill::startRequest(g_vmContext->m_currentThreadIdx);
10220   memset(&s_perfCounters, 0, sizeof(s_perfCounters));
10221   initUnlikelyProfile();
10222 }
10223
10224 void
10225 TranslatorX64::requestExit() {
10226   if (s_writeLease.amOwner()) {
10227     s_writeLease.drop();
10228   }
10229   TRACE_MOD(txlease, 2, "%lx write lease stats: %15lld kept, %15lld grabbed\n",
10230             pthread_self(), s_writeLease.m_hintKept,
10231             s_writeLease.m_hintGrabbed);
10232   PendQ::drain();
10233   Treadmill::finishRequest(g_vmContext->m_currentThreadIdx);
10234   TRACE(1, "done requestExit(%ld)\n", g_vmContext->m_currentThreadIdx);
10235   Stats::dump();
10236   Stats::clear();
10237   dumpUnlikelyProfile();
10238
10239   if (Trace::moduleEnabledRelease(Trace::tx64stats, 1)) {
10240     Trace::traceRelease("TranslatorX64 perf counters for %s:\n",
10241                         g_context->getRequestUrl(50).c_str());
10242     for (int i = 0; i < tpc_num_counters; i++) {
10243       Trace::traceRelease("%-20s %10lld\n",
10244                           kPerfCounterNames[i], s_perfCounters[i]);
10245     }
10246     Trace::traceRelease("\n");
10247   }
10248 }
10249
10250 bool
10251 TranslatorX64::isPseudoEvent(const char* event) {
10252   for (int i = 0; i < tpc_num_counters; i++) {
10253     if (!strcmp(event, kPerfCounterNames[i])) {
10254       return true;
10255     }
10256   }
10257   return false;
10258 }
10259
10260 void
10261 TranslatorX64::getPerfCounters(Array& ret) {
10262   for (int i = 0; i < tpc_num_counters; i++) {
10263     // Until Perflab can automatically scale the values we give it to
10264     // an appropriate range, we have to fudge these numbers so they
10265     // look more like reasonable hardware counter values.
10266     ret.set(kPerfCounterNames[i], s_perfCounters[i] * 1000);
10267   }
10268 }
10269
10270 TranslatorX64::~TranslatorX64() {
10271   freeSlab(atrampolines.code.base, kTotalSize);
10272 }
10273
10274 static Debug::TCRange rangeFrom(const X64Assembler& a, const TCA addr,
10275                                 bool isAstubs) {
10276   ASSERT(a.code.isValidAddress(addr));
10277   return Debug::TCRange(addr, a.code.frontier, isAstubs);
10278 }
10279
10280 void TranslatorX64::recordBCInstr(uint32_t op,
10281                                   const X64Assembler& a,
10282                                   const TCA addr) {
10283   if (addr != a.code.frontier) {
10284     m_debugInfo.recordBCInstr(Debug::TCRange(addr, a.code.frontier,
10285                                              &a == &astubs ? true : false), op);
10286   }
10287 }
10288
10289 void TranslatorX64::recordGdbTranslation(const SrcKey& sk,
10290                                          const Unit* srcUnit,
10291                                          const X64Assembler& a,
10292                                          const TCA start,
10293                                          bool exit,
10294                                          bool inPrologue) {
10295   if (start != a.code.frontier && !RuntimeOption::EvalJitNoGdb) {
10296     ASSERT(s_writeLease.amOwner());
10297     m_debugInfo.recordTracelet(rangeFrom(a, start,
10298                                          &a == &astubs ? true : false),
10299                                srcUnit,
10300                                srcUnit->at(sk.offset()),
10301                                exit, inPrologue);
10302   }
10303 }
10304
10305 void TranslatorX64::recordGdbStub(const X64Assembler& a,
10306                                   const TCA start, const char* name) {
10307   if (!RuntimeOption::EvalJitNoGdb) {
10308     m_debugInfo.recordStub(rangeFrom(a, start, &a == &astubs ? true : false),
10309                            name);
10310   }
10311 }
10312
10313 void TranslatorX64::defineCns(StringData* name) {
10314   TargetCache::fillConstant(name);
10315 }
10316
10317 std::string TranslatorX64::getUsage() {
10318   std::string usage;
10319   size_t aUsage = a.code.frontier - a.code.base;
10320   size_t stubsUsage = astubs.code.frontier - astubs.code.base;
10321   size_t tcUsage = TargetCache::s_frontier;
10322   Util::string_printf(usage,
10323                       "tx64: %9zd bytes (%ld%%) in a.code\n"
10324                       "tx64: %9zd bytes (%ld%%) in astubs.code\n"
10325                       "tx64: %9zd bytes (%ld%%) in a.code from ir\n"
10326                       "tx64: %9zd bytes (%ld%%) in astubs.code from ir\n"
10327                       "tx64: %9zd bytes (%ld%%) in targetCache\n",
10328                       aUsage,     100 * aUsage / a.code.size,
10329                       stubsUsage, 100 * stubsUsage / astubs.code.size,
10330                       m_irAUsage,     100 * m_irAUsage / a.code.size,
10331                       m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
10332                       tcUsage,
10333                       100 * tcUsage / RuntimeOption::EvalJitTargetCacheSize);
10334   return usage;
10335 }
10336
10337 bool TranslatorX64::addDbgGuards(const Unit* unit) {
10338   // TODO refactor
10339   // It grabs the write lease and iterating through whole SrcDB...
10340   bool locked = s_writeLease.acquire(true);
10341   if (!locked) {
10342     return false;
10343   }
10344   struct timespec tsBegin, tsEnd;
10345   gettime(CLOCK_MONOTONIC, &tsBegin);
10346   // Doc says even find _could_ invalidate iterator, in pactice it should
10347   // be very rare, so go with it now.
10348   for (SrcDB::iterator it = m_srcDB.begin(); it != m_srcDB.end(); ++it) {
10349     SrcKey const sk = SrcKey::fromAtomicInt(it->first);
10350     SrcRec& sr = *it->second;
10351     if (sr.unitMd5() == unit->md5() &&
10352         !sr.hasDebuggerGuard() &&
10353         isSrcKeyInBL(unit, sk)) {
10354       addDbgGuardImpl(sk, sr);
10355     }
10356   }
10357   s_writeLease.drop();
10358   gettime(CLOCK_MONOTONIC, &tsEnd);
10359   int64 elapsed = gettime_diff_us(tsBegin, tsEnd);
10360   if (Trace::moduleEnabledRelease(Trace::tx64, 5)) {
10361     Trace::traceRelease("addDbgGuards got lease for %lld us\n", elapsed);
10362   }
10363   return true;
10364 }
10365
10366 bool TranslatorX64::addDbgGuard(const Func* func, Offset offset) {
10367   SrcKey sk(func, offset);
10368   {
10369     if (SrcRec* sr = m_srcDB.find(sk)) {
10370       if (sr->hasDebuggerGuard()) {
10371         return true;
10372       }
10373     } else {
10374       // no translation yet
10375       return true;
10376     }
10377   }
10378   if (debug) {
10379     if (!isSrcKeyInBL(func->unit(), sk)) {
10380       TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
10381       return false;
10382     }
10383   }
10384   bool locked = s_writeLease.acquire(true);
10385   if (!locked) {
10386     return false;
10387   }
10388   {
10389     if (SrcRec* sr = m_srcDB.find(sk)) {
10390       addDbgGuardImpl(sk, *sr);
10391     }
10392   }
10393   s_writeLease.drop();
10394   return true;
10395 }
10396
10397 void TranslatorX64::addDbgGuardImpl(const SrcKey& sk, SrcRec& srcRec) {
10398   TCA dbgGuard = a.code.frontier;
10399   // Emit the checks for debugger attach
10400   emitTLSLoad<ThreadInfo>(a, ThreadInfo::s_threadInfo, rScratch);
10401   static COff dbgOff = offsetof(ThreadInfo, m_reqInjectionData) +
10402                        offsetof(RequestInjectionData, debugger);
10403   a.   load_reg64_disp_reg32(rScratch, dbgOff, rScratch);
10404   a.   test_imm32_reg32(0xff, rScratch);
10405   // Branch to a special REQ_INTERPRET if attached
10406   {
10407     TCA fallback = emitServiceReq(REQ_INTERPRET, 2, uint64_t(sk.offset()), 0);
10408     a. jnz(fallback);
10409   }
10410   // Emit a jump to the actual code
10411   TCA realCode = srcRec.getTopTranslation();
10412   prepareForSmash(kJmpLen);
10413   TCA dbgBranchGuardSrc = a.code.frontier;
10414   a.   jmp(realCode);
10415   // Add it to srcRec
10416   srcRec.addDebuggerGuard(a, astubs, dbgGuard, dbgBranchGuardSrc);
10417 }
10418
10419 bool TranslatorX64::dumpTCCode(const char* filename) {
10420   string aFilename = string(filename).append("_a");
10421   string astubFilename = string(filename).append("_astub");
10422   FILE* aFile = fopen(aFilename.c_str(),"wb");
10423   if (aFile == NULL)
10424     return false;
10425   FILE* astubFile = fopen(astubFilename.c_str(),"wb");
10426   if (astubFile == NULL) {
10427     fclose(aFile);
10428     return false;
10429   }
10430   string helperAddrFilename = string(filename).append("_helpers_addrs.txt");
10431   FILE* helperAddrFile = fopen(helperAddrFilename.c_str(),"wb");
10432   if (helperAddrFile == NULL) {
10433     fclose(aFile);
10434     fclose(astubFile);
10435     return false;
10436   }
10437   // dump starting from the trampolines; this assumes processInit() places
10438   // trampolines before the translation cache
10439   size_t count = a.code.frontier-atrampolines.code.base;
10440   bool result = (fwrite(atrampolines.code.base, 1, count, aFile) == count);
10441   if (result) {
10442     count = astubs.code.frontier - astubs.code.base;
10443     result = (fwrite(astubs.code.base, 1, count, astubFile) == count);
10444   }
10445   if (result) {
10446     for(PointerMap::iterator iter = trampolineMap.begin();
10447         iter != trampolineMap.end();
10448         iter++) {
10449       void* helperAddr = iter->first;
10450       void* trampAddr = iter->second;
10451       char* functionName = Util::getNativeFunctionName(helperAddr);
10452       fprintf(helperAddrFile,"%10p %10p %s\n",
10453               trampAddr, helperAddr,
10454               functionName);
10455       free(functionName);
10456     }
10457   }
10458   fclose(aFile);
10459   fclose(astubFile);
10460   fclose(helperAddrFile);
10461   return result;
10462 }
10463
10464 // Returns true on success
10465 bool TranslatorX64::dumpTC(bool ignoreLease) {
10466   if (!ignoreLease && !s_writeLease.acquire(true)) return false;
10467   bool success = dumpTCData();
10468   if (success) {
10469     success = dumpTCCode("/tmp/tc_dump");
10470   }
10471   if (!ignoreLease) s_writeLease.drop();
10472   return success;
10473 }
10474
10475 // Returns true on success
10476 bool tc_dump(void) {
10477   return TranslatorX64::Get()->dumpTC();
10478 }
10479
10480 // Returns true on success
10481 bool TranslatorX64::dumpTCData() {
10482   gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
10483   if (!tcDataFile) return false;
10484
10485   if (!gzprintf(tcDataFile,
10486                 "repo_schema     = %s\n"
10487                 "a.base          = %p\n"
10488                 "a.frontier      = %p\n"
10489                 "astubs.base     = %p\n"
10490                 "astubs.frontier = %p\n\n",
10491                 Repo::kSchemaId,
10492                 atrampolines.code.base, a.code.frontier,
10493                 astubs.code.base, astubs.code.frontier)) {
10494     return false;
10495   }
10496
10497   if (!gzprintf(tcDataFile, "total_translations = %lu\n\n",
10498                 m_translations.size())) {
10499     return false;
10500   }
10501
10502   for (size_t t = 0; t < m_translations.size(); t++) {
10503     if (gzputs(tcDataFile,
10504                m_translations[t].print(getTransCounter(t)).c_str()) == -1) {
10505       return false;
10506     }
10507   }
10508
10509   gzclose(tcDataFile);
10510   return true;
10511 }
10512
10513 #define NATIVE_OP(X) PLAN(X, Native)
10514 #define SUPPORTED_OP(X) PLAN(X, Supported)
10515 #define SIMPLE_OP(X) PLAN(X, Simple)
10516 #define INTERP_OP(X) PLAN(X, Interp)
10517
10518 #define SUPPORTED_OPS() \
10519   NATIVE_OP(Null) \
10520   NATIVE_OP(True) \
10521   NATIVE_OP(False) \
10522   NATIVE_OP(Int) \
10523   NATIVE_OP(String) \
10524   NATIVE_OP(Array) \
10525   NATIVE_OP(NewArray) \
10526   NATIVE_OP(InitThisLoc) \
10527   NATIVE_OP(Dup) \
10528   NATIVE_OP(FPushContFunc) \
10529   NATIVE_OP(ContDone) \
10530   NATIVE_OP(ContValid) \
10531   NATIVE_OP(ContStopped) \
10532   /*
10533    * Invariably call a possibly-reentrant helper.
10534    */ \
10535   SIMPLE_OP(Jmp) \
10536   SIMPLE_OP(FCall) \
10537   SIMPLE_OP(CreateCont) \
10538   SIMPLE_OP(UnpackCont) \
10539   /*
10540    * Translations with a reentrant helper.
10541    *
10542    * TODO: neither UnboxR nor FPassR can actually call destructors.
10543    */ \
10544   SUPPORTED_OP(UnboxR) \
10545   SUPPORTED_OP(FPassR) \
10546   SUPPORTED_OP(NativeImpl) \
10547   SUPPORTED_OP(UnsetL) \
10548   SUPPORTED_OP(Cns) \
10549   SUPPORTED_OP(ClsCnsD) \
10550   SUPPORTED_OP(This) \
10551   SUPPORTED_OP(BareThis) \
10552   SUPPORTED_OP(CheckThis) \
10553   SUPPORTED_OP(PackCont) \
10554   SUPPORTED_OP(ContReceive) \
10555   SUPPORTED_OP(ContRaised) \
10556   SUPPORTED_OP(ContNext) \
10557   SUPPORTED_OP(ContSend) \
10558   SUPPORTED_OP(ContRaise) \
10559   SUPPORTED_OP(ContCurrent) \
10560   SUPPORTED_OP(FPushCtor) \
10561   SUPPORTED_OP(FPushCtorD) \
10562   SUPPORTED_OP(StaticLocInit) \
10563   /*
10564    * Always-interp instructions,
10565    */ \
10566   INTERP_OP(ContHandle)
10567
10568 // Define the trivial analyze methods
10569 #define PLAN(Op, Spt) \
10570 void \
10571 TranslatorX64::analyze ## Op(Tracelet& t, NormalizedInstruction& i) { \
10572   i.m_txFlags = Spt; \
10573 }
10574
10575 SUPPORTED_OPS()
10576
10577 #undef NATIVE_OP
10578 #undef SUPPORTED_OP
10579 #undef SIMPLE_OP
10580 #undef INTERP_OP
10581 #undef SUPPORTED_OPS
10582
10583 void TranslatorX64::invalidateSrcKey(const SrcKey& sk) {
10584   ASSERT(!RuntimeOption::RepoAuthoritative);
10585   ASSERT(s_writeLease.amOwner());
10586   /*
10587    * Reroute existing translations for SrcKey to an as-yet indeterminate
10588    * new one.
10589    */
10590   SrcRec* sr = m_srcDB.find(sk);
10591   ASSERT(sr);
10592   /*
10593    * Since previous translations aren't reachable from here, we know we
10594    * just created some garbage in the TC. We currently have no mechanism
10595    * to reclaim this.
10596    */
10597   sr->replaceOldTranslations(a, astubs);
10598 }
10599
10600 void TranslatorX64::invalidateFileWork(Eval::PhpFile* f) {
10601   class FileInvalidationTrigger : public Treadmill::WorkItem {
10602     Eval::PhpFile* m_f;
10603     int m_nRefs;
10604   public:
10605     FileInvalidationTrigger(Eval::PhpFile* f, int n) : m_f(f), m_nRefs(n) { }
10606     virtual void operator()() {
10607       if (m_f->decRef(m_nRefs) == 0) {
10608         Eval::FileRepository::onDelete(m_f);
10609       }
10610     }
10611   };
10612   size_t nSmashed = m_srcDB.invalidateCode(f);
10613   if (nSmashed) {
10614     // The srcDB found an entry for this file. The entry's dependency
10615     // on this file was counted as a reference, and the code is no longer
10616     // reachable. We need to wait until the last outstanding request
10617     // drains to know that we can really remove the reference.
10618     Treadmill::WorkItem::enqueue(new FileInvalidationTrigger(f, nSmashed));
10619   }
10620 }
10621
10622 bool TranslatorX64::invalidateFile(Eval::PhpFile* f) {
10623   // This is called from high rank, but we'll need the write lease to
10624   // invalidate code.
10625   if (!RuntimeOption::EvalJit) return false;
10626   ASSERT(f != NULL);
10627   PendQ::defer(new DeferredFileInvalidate(f));
10628   return true;
10629 }
10630
10631 void TranslatorX64::invalidateOutStack(const NormalizedInstruction& ni) {
10632   if (ni.outStack) {
10633     m_regMap.invalidate(ni.outStack->location);
10634   }
10635 }
10636
10637 void TranslatorX64::invalidateOutLocal(const NormalizedInstruction& ni) {
10638   if (ni.outLocal) {
10639     m_regMap.invalidate(ni.outLocal->location);
10640   }
10641 }
10642
10643 } // HPHP::VM::Transl
10644
10645 static const Trace::Module TRACEMOD = Trace::tx64;
10646
10647 void invalidatePath(const std::string& path) {
10648   TRACE(1, "invalidatePath: abspath %s\n", path.c_str());
10649   PendQ::defer(new DeferredPathInvalidate(path));
10650 }
10651
10652 } } // HPHP::VM