hphp/runtime/vm/jit/back-end-x64.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16
  17 #include "hphp/runtime/vm/jit/back-end-x64.h"
  18
  19 #include "hphp/util/asm-x64.h"
  20 #include "hphp/util/disasm.h"
  21 #include "hphp/util/text-color.h"
  22
  23 #include "hphp/runtime/vm/jit/abi-x64.h"
  24 #include "hphp/runtime/vm/jit/block.h"
  25 #include "hphp/runtime/vm/jit/check.h"
  26 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
  27 #include "hphp/runtime/vm/jit/code-gen-x64.h"
  28 #include "hphp/runtime/vm/jit/func-prologues-x64.h"
  29 #include "hphp/runtime/vm/jit/cfg.h"
  30 #include "hphp/runtime/vm/jit/mc-generator.h"
  31 #include "hphp/runtime/vm/jit/print.h"
  32 #include "hphp/runtime/vm/jit/reg-alloc-x64.h"
  33 #include "hphp/runtime/vm/jit/service-requests-inline.h"
  34 #include "hphp/runtime/vm/jit/service-requests-x64.h"
  35 #include "hphp/runtime/vm/jit/timer.h"
  36 #include "hphp/runtime/vm/jit/unique-stubs-x64.h"
  37 #include "hphp/runtime/vm/jit/unwind-x64.h"
  38 #include "hphp/runtime/vm/jit/vasm-print.h"
  39 #include "hphp/runtime/vm/jit/vasm-llvm.h"
  40
  41 namespace HPHP { namespace jit {
  42
  43 using namespace reg;
  44
  45 extern "C" void enterTCHelper(Cell* vm_sp,
  46                               ActRec* vm_fp,
  47                               TCA start,
  48                               TReqInfo* infoPtr,
  49                               ActRec* firstAR,
  50                               void* targetCacheBase);
  51
  52 namespace x64 {
  53
  54 TRACE_SET_MOD(hhir);
  55
  56 struct BackEnd : public jit::BackEnd {
  57   BackEnd() {}
  58   ~BackEnd() {}
  59
  60   Abi abi() override {
  61     return x64::abi;
  62   }
  63
  64   size_t cacheLineSize() override {
  65     return 64;
  66   }
  67
  68   PhysReg rSp() override {
  69     return PhysReg(reg::rsp);
  70   }
  71
  72   PhysReg rVmSp() override {
  73     return x64::rVmSp;
  74   }
  75
  76   PhysReg rVmFp() override {
  77     return x64::rVmFp;
  78   }
  79
  80   PhysReg rVmTl() override {
  81     return x64::rVmTl;
  82   }
  83
  84   bool storesCell(const IRInstruction& inst, uint32_t srcIdx) override {
  85     return x64::storesCell(inst, srcIdx);
  86   }
  87
  88   bool loadsCell(const IRInstruction& inst) override {
  89     return x64::loadsCell(inst.op());
  90   }
  91
  92   /*
  93    * enterTCHelper does not save callee-saved registers except %rbp. This means
  94    * when we call it from C++, we have to tell gcc to clobber all the other
  95    * callee-saved registers.
  96    */
  97   #define CALLEE_SAVED_BARRIER()                                    \
  98       asm volatile("" : : : "rbx", "r12", "r13", "r14", "r15");
  99
 100   /*
 101    * enterTCHelper is a handwritten assembly function that transfers control in
 102    * and out of the TC.
 103    */
 104   static_assert(x64::rVmSp == rbx &&
 105                 x64::rVmFp == rbp &&
 106                 x64::rVmTl == r12 &&
 107                 x64::rStashedAR == r15,
 108                 "__enterTCHelper needs to be modified to use the correct ABI");
 109   static_assert(REQ_BIND_CALL == 0x1,
 110                 "Update assembly test for REQ_BIND_CALL in __enterTCHelper");
 111
 112   void enterTCHelper(TCA start, TReqInfo& info) override {
 113     // We have to force C++ to spill anything that might be in a callee-saved
 114     // register (aside from rbp). enterTCHelper does not save them.
 115     CALLEE_SAVED_BARRIER();
 116     auto& regs = vmRegsUnsafe();
 117     jit::enterTCHelper(regs.stack.top(), regs.fp, start,
 118                        &info, vmFirstAR(), RDS::tl_base);
 119     CALLEE_SAVED_BARRIER();
 120   }
 121
 122   void moveToAlign(CodeBlock& cb,
 123                    MoveToAlignFlags alignment
 124                    = MoveToAlignFlags::kJmpTargetAlign) override {
 125     size_t x64Alignment;
 126
 127     switch (alignment) {
 128     case MoveToAlignFlags::kJmpTargetAlign:
 129       x64Alignment = kJmpTargetAlign;
 130       break;
 131     case MoveToAlignFlags::kNonFallthroughAlign:
 132       x64Alignment = jit::kNonFallthroughAlign;
 133       break;
 134     case MoveToAlignFlags::kCacheLineAlign:
 135       x64Alignment = kCacheLineSize;
 136       break;
 137     }
 138     x64::moveToAlign(cb, x64Alignment);
 139   }
 140
 141   UniqueStubs emitUniqueStubs() override {
 142     return x64::emitUniqueStubs();
 143   }
 144
 145   TCA emitServiceReqWork(CodeBlock& cb, TCA start, SRFlags flags,
 146                          ServiceRequest req,
 147                          const ServiceReqArgVec& argv) override {
 148     return x64::emitServiceReqWork(cb, start, flags, req, argv);
 149   }
 150
 151   void emitInterpReq(CodeBlock& mainCode, CodeBlock& coldCode,
 152                      SrcKey sk) override {
 153     Asm a { mainCode };
 154     // Add a counter for the translation if requested
 155     if (RuntimeOption::EvalJitTransCounters) {
 156       x64::emitTransCounterInc(a);
 157     }
 158     a.    jmp(emitServiceReq(coldCode, REQ_INTERPRET, sk.offset()));
 159   }
 160
 161   bool funcPrologueHasGuard(TCA prologue, const Func* func) override {
 162     return x64::funcPrologueHasGuard(prologue, func);
 163   }
 164
 165   TCA funcPrologueToGuard(TCA prologue, const Func* func) override {
 166     return x64::funcPrologueToGuard(prologue, func);
 167   }
 168
 169   SrcKey emitFuncPrologue(CodeBlock& mainCode, CodeBlock& coldCode, Func* func,
 170                           bool funcIsMagic, int nPassed, TCA& start,
 171                           TCA& aStart) override {
 172     return funcIsMagic
 173           ? x64::emitMagicFuncPrologue(func, nPassed, start)
 174           : x64::emitFuncPrologue(func, nPassed, start);
 175   }
 176
 177   TCA emitCallArrayPrologue(Func* func, DVFuncletsVec& dvs) override {
 178     return x64::emitCallArrayPrologue(func, dvs);
 179   }
 180
 181   void funcPrologueSmashGuard(TCA prologue, const Func* func) override {
 182     x64::funcPrologueSmashGuard(prologue, func);
 183   }
 184
 185   void emitIncStat(CodeBlock& cb, intptr_t disp, int n) override {
 186     X64Assembler a { cb };
 187
 188     a.    pushf ();
 189     //    addq $n, [%fs:disp]
 190     a.    fs().addq(n, baseless(disp));
 191     a.    popf  ();
 192   }
 193
 194   void emitTraceCall(CodeBlock& cb, Offset pcOff) override {
 195     x64::emitTraceCall(cb, pcOff);
 196   }
 197
 198   bool isSmashable(Address frontier, int nBytes, int offset = 0) override {
 199     assert(nBytes <= int(kCacheLineSize));
 200     uintptr_t iFrontier = uintptr_t(frontier) + offset;
 201     uintptr_t lastByte = uintptr_t(frontier) + nBytes - 1;
 202     return (iFrontier & ~kCacheLineMask) == (lastByte & ~kCacheLineMask);
 203   }
 204
 205  private:
 206   void prepareForSmashImpl(CodeBlock& cb, int nBytes, int offset) {
 207     if (!isSmashable(cb.frontier(), nBytes, offset)) {
 208       X64Assembler a { cb };
 209       int gapSize = (~(uintptr_t(a.frontier()) + offset) & kCacheLineMask) + 1;
 210       a.emitNop(gapSize);
 211       assert(isSmashable(a.frontier(), nBytes, offset));
 212     }
 213   }
 214
 215  public:
 216   void prepareForSmash(CodeBlock& cb, int nBytes, int offset = 0) override {
 217     prepareForSmashImpl(cb, nBytes, offset);
 218     mcg->cgFixups().m_alignFixups.emplace(cb.frontier(),
 219                                           std::make_pair(nBytes, offset));
 220   }
 221
 222   void prepareForTestAndSmash(CodeBlock& cb, int testBytes,
 223                               TestAndSmashFlags flags) override {
 224     using namespace x64;
 225     switch (flags) {
 226       case TestAndSmashFlags::kAlignJcc:
 227         prepareForSmash(cb, testBytes + kJmpccLen, testBytes);
 228         assert(isSmashable(cb.frontier() + testBytes, kJmpccLen));
 229         break;
 230       case TestAndSmashFlags::kAlignJccImmediate:
 231         prepareForSmash(cb,
 232                         testBytes + kJmpccLen,
 233                         testBytes + kJmpccLen - kJmpImmBytes);
 234         assert(isSmashable(cb.frontier() + testBytes, kJmpccLen,
 235                            kJmpccLen - kJmpImmBytes));
 236         break;
 237       case TestAndSmashFlags::kAlignJccAndJmp:
 238         // Ensure that the entire jcc, and the entire jmp are smashable
 239         // (but we dont need them both to be in the same cache line)
 240         prepareForSmashImpl(cb, testBytes + kJmpccLen, testBytes);
 241         prepareForSmashImpl(cb, testBytes + kJmpccLen + kJmpLen,
 242                             testBytes + kJmpccLen);
 243         mcg->cgFixups().m_alignFixups.emplace(
 244           cb.frontier(), std::make_pair(testBytes + kJmpccLen, testBytes));
 245         mcg->cgFixups().m_alignFixups.emplace(
 246           cb.frontier(), std::make_pair(testBytes + kJmpccLen + kJmpLen,
 247                                         testBytes + kJmpccLen));
 248         assert(isSmashable(cb.frontier() + testBytes, kJmpccLen));
 249         assert(isSmashable(cb.frontier() + testBytes + kJmpccLen, kJmpLen));
 250         break;
 251     }
 252   }
 253
 254   bool supportsRelocation() const override {
 255     return true;
 256   }
 257
 258   typedef hphp_hash_set<void*> WideJmpSet;
 259   struct JmpOutOfRange : std::exception {};
 260
 261   size_t relocate(RelocationInfo& rel,
 262                   CodeBlock& destBlock,
 263                   TCA start, TCA end,
 264                   CodeGenFixups& fixups,
 265                   TCA* exitAddr) override {
 266     WideJmpSet wideJmps;
 267     while (true) {
 268       try {
 269         return relocateImpl(rel, destBlock, start, end,
 270                             fixups, exitAddr, wideJmps);
 271       } catch (JmpOutOfRange& j) {
 272       }
 273     }
 274   }
 275
 276   size_t relocateImpl(RelocationInfo& rel,
 277                       CodeBlock& destBlock,
 278                       TCA start, TCA end,
 279                       CodeGenFixups& fixups,
 280                       TCA* exitAddr,
 281                       WideJmpSet& wideJmps) {
 282     TCA src = start;
 283     size_t range = end - src;
 284     bool hasInternalRefs = false;
 285     bool internalRefsNeedUpdating = false;
 286     TCA destStart = destBlock.frontier();
 287     size_t asm_count{0};
 288     TCA jmpDest = nullptr;
 289     TCA keepNopLow = nullptr;
 290     TCA keepNopHigh = nullptr;
 291     try {
 292       while (src != end) {
 293         assert(src < end);
 294         DecodedInstruction di(src);
 295         asm_count++;
 296
 297         int destRange = 0;
 298         auto af = fixups.m_alignFixups.equal_range(src);
 299         while (af.first != af.second) {
 300           auto low = src + af.first->second.second;
 301           auto hi = src + af.first->second.first;
 302           assert(low < hi);
 303           if (!keepNopLow || keepNopLow > low) keepNopLow = low;
 304           if (!keepNopHigh || keepNopHigh < hi) keepNopHigh = hi;
 305           TCA tmp = destBlock.frontier();
 306           prepareForSmashImpl(destBlock,
 307                               af.first->second.first, af.first->second.second);
 308           if (destBlock.frontier() != tmp) {
 309             destRange += destBlock.frontier() - tmp;
 310             internalRefsNeedUpdating = true;
 311           }
 312           ++af.first;
 313         }
 314
 315         bool preserveAlignment = keepNopLow && keepNopHigh &&
 316           keepNopLow <= src && keepNopHigh > src;
 317         TCA target = nullptr;
 318         TCA dest = destBlock.frontier();
 319         destBlock.bytes(di.size(), src);
 320         DecodedInstruction d2(dest);
 321         if (di.hasPicOffset()) {
 322           if (di.isBranch(false)) {
 323             target = di.picAddress();
 324           }
 325           /*
 326            * Rip-relative offsets that point outside the range
 327            * being moved need to be adjusted so they continue
 328            * to point at the right thing
 329            */
 330           if (size_t(di.picAddress() - start) >= range) {
 331             bool DEBUG_ONLY success = d2.setPicAddress(di.picAddress());
 332             assert(success);
 333           } else {
 334             if (!preserveAlignment && d2.isBranch()) {
 335               if (wideJmps.count(src)) {
 336                 if (d2.size() < kJmpLen) {
 337                   d2.widenBranch();
 338                   internalRefsNeedUpdating = true;
 339                 }
 340               } else if (d2.shrinkBranch()) {
 341                 internalRefsNeedUpdating = true;
 342               }
 343             }
 344             hasInternalRefs = true;
 345           }
 346         }
 347         if (di.hasImmediate()) {
 348           if (fixups.m_addressImmediates.count(src)) {
 349             if (size_t(di.immediate() - (uint64_t)start) < range) {
 350               hasInternalRefs = internalRefsNeedUpdating = true;
 351             }
 352           } else {
 353             if (fixups.m_addressImmediates.count((TCA)~uintptr_t(src))) {
 354               // Handle weird, encoded offset, used by cgLdObjMethod
 355               always_assert(di.immediate() == ((uintptr_t(src) << 1) | 1));
 356               bool DEBUG_ONLY success =
 357                 d2.setImmediate(((uintptr_t)dest << 1) | 1);
 358               assert(success);
 359             }
 360             /*
 361              * An immediate that points into the range being moved, but which
 362              * isn't tagged as an addressImmediate, is most likely a bug
 363              * and its instruction's address needs to be put into
 364              * fixups.m_addressImmediates. But it could just happen by bad
 365              * luck, so just log it.
 366              */
 367             if (size_t(di.immediate() - (uint64_t)start) < range) {
 368               FTRACE(3,
 369                      "relocate: instruction at {} has immediate 0x{:x}"
 370                      "which looks like an address that needs relocating\n",
 371                      src, di.immediate());
 372             }
 373           }
 374         }
 375
 376         if (src == start) {
 377           // for the start of the range, we only want to overwrite the "after"
 378           // address (since the "before" address could belong to the previous
 379           // tracelet, which could be being relocated to a completely different
 380           // address. recordRange will do that for us, so just make sure we
 381           // have the right address setup.
 382           destStart = dest;
 383         } else {
 384           rel.recordAddress(src, dest - destRange, destRange);
 385         }
 386         if (preserveAlignment && di.size() == kJmpLen &&
 387             di.isNop() && src + kJmpLen == end) {
 388           smashJmp(dest, src + kJmpLen);
 389           dest += kJmpLen;
 390         } else if (di.isNop() && !preserveAlignment) {
 391           internalRefsNeedUpdating = true;
 392         } else {
 393           dest += d2.size();
 394         }
 395         jmpDest = target;
 396         assert(dest <= destBlock.frontier());
 397         destBlock.setFrontier(dest);
 398         src += di.size();
 399         if (keepNopHigh && src >= keepNopHigh) {
 400           keepNopLow = keepNopHigh = nullptr;
 401         }
 402       }
 403
 404       if (exitAddr) {
 405         *exitAddr = jmpDest;
 406       }
 407
 408       rel.recordRange(start, end, destStart, destBlock.frontier());
 409
 410       if (hasInternalRefs && internalRefsNeedUpdating) {
 411         src = start;
 412         bool ok = true;
 413         while (src != end) {
 414           DecodedInstruction di(src);
 415           TCA newPicAddress = nullptr;
 416           int64_t newImmediate = 0;
 417           if (di.hasPicOffset() &&
 418               size_t(di.picAddress() - start) < range) {
 419             newPicAddress = rel.adjustedAddressAfter(di.picAddress());
 420             always_assert(newPicAddress);
 421           }
 422           if (di.hasImmediate() &&
 423               size_t((TCA)di.immediate() - start) < range &&
 424               fixups.m_addressImmediates.count(src)) {
 425             newImmediate =
 426               (int64_t)rel.adjustedAddressAfter((TCA)di.immediate());
 427             always_assert(newImmediate);
 428           }
 429           if (newImmediate || newPicAddress) {
 430             TCA dest = rel.adjustedAddressAfter(src);
 431             DecodedInstruction d2(dest);
 432             if (newPicAddress) {
 433               if (!d2.setPicAddress(newPicAddress)) {
 434                 always_assert(d2.isBranch() && d2.size() == 2);
 435                 wideJmps.insert(src);
 436                 ok = false;
 437               }
 438             }
 439             if (newImmediate) {
 440               if (!d2.setImmediate(newImmediate)) {
 441                 always_assert(false);
 442               }
 443             }
 444           }
 445           src += di.size();
 446         }
 447         if (!ok) {
 448           throw JmpOutOfRange();
 449         }
 450       }
 451       rel.markAddressImmediates(fixups.m_addressImmediates);
 452     } catch (...) {
 453       rel.rewind(start, end);
 454       destBlock.setFrontier(destStart);
 455       throw;
 456     }
 457     return asm_count;
 458   }
 459
 460   template <typename T>
 461   void fixupStateVector(StateVector<T, TcaRange>& sv, RelocationInfo& rel) {
 462     for (auto& ii : sv) {
 463       if (!ii.empty()) {
 464         /*
 465          * We have to be careful with before/after here.
 466          * If we relocate two consecutive regions of memory,
 467          * but relocate them to two different destinations, then
 468          * the end address of the first region is also the start
 469          * address of the second region; so adjustedAddressBefore(end)
 470          * gives us the relocated address of the end of the first
 471          * region, while adjustedAddressAfter(end) gives us the
 472          * relocated address of the start of the second region.
 473          */
 474         auto s = rel.adjustedAddressAfter(ii.begin());
 475         auto e = rel.adjustedAddressBefore(ii.end());
 476         if (e || s) {
 477           if (!s) s = ii.begin();
 478           if (!e) e = ii.end();
 479           ii = TcaRange(s, e);
 480         }
 481       }
 482     }
 483   }
 484
 485   void adjustForRelocation(RelocationInfo& rel) override {
 486     for (const auto& range : rel.srcRanges()) {
 487       adjustForRelocation(rel, range.first, range.second);
 488     }
 489   }
 490
 491   void adjustForRelocation(RelocationInfo& rel,
 492                            TCA srcStart, TCA srcEnd) override {
 493     auto start = rel.adjustedAddressAfter(srcStart);
 494     auto end = rel.adjustedAddressBefore(srcEnd);
 495     if (!start) {
 496       start = srcStart;
 497       end = srcEnd;
 498     } else {
 499       always_assert(end);
 500     }
 501     while (start != end) {
 502       assert(start < end);
 503       DecodedInstruction di(start);
 504
 505       if (di.hasPicOffset()) {
 506         /*
 507          * A pointer into something that has been relocated needs to be
 508          * updated.
 509          */
 510         if (TCA adjusted = rel.adjustedAddressAfter(di.picAddress())) {
 511           di.setPicAddress(adjusted);
 512         }
 513       }
 514
 515       if (di.hasImmediate()) {
 516         /*
 517          * Similarly for addressImmediates - and see comment above
 518          * for non-address immediates.
 519          */
 520         if (TCA adjusted = rel.adjustedAddressAfter((TCA)di.immediate())) {
 521           if (rel.isAddressImmediate(start)) {
 522             di.setImmediate((int64_t)adjusted);
 523           } else {
 524             FTRACE(3,
 525                    "relocate: instruction at {} has immediate 0x{:x}"
 526                    "which looks like an address that needs relocating\n",
 527                    start, di.immediate());
 528           }
 529         }
 530       }
 531
 532       start += di.size();
 533
 534       if (start == end && di.isNop() &&
 535           di.size() == kJmpLen &&
 536           rel.adjustedAddressAfter(srcEnd)) {
 537
 538         smashJmp(start - di.size(), rel.adjustedAddressAfter(end));
 539       }
 540     }
 541   }
 542
 543   /*
 544    * Adjusts the addresses in asmInfo and fixups to match the new
 545    * location of the code.
 546    * This will not "hook up" the relocated code in any way, so is safe
 547    * to call before the relocated code is ready to run.
 548    */
 549   void adjustMetaDataForRelocation(RelocationInfo& rel,
 550                                    AsmInfo* asmInfo,
 551                                    CodeGenFixups& fixups) override {
 552     auto& ip = fixups.m_inProgressTailJumps;
 553     for (size_t i = 0; i < ip.size(); ++i) {
 554       IncomingBranch& ib = const_cast<IncomingBranch&>(ip[i]);
 555       TCA adjusted = rel.adjustedAddressAfter(ib.toSmash());
 556       always_assert(adjusted);
 557       ib.adjust(adjusted);
 558     }
 559
 560     for (auto& fixup : fixups.m_pendingFixups) {
 561       /*
 562        * Pending fixups always point after the call instruction,
 563        * so use the "before" address, since there may be nops
 564        * before the next actual instruction.
 565        */
 566       if (TCA adjusted = rel.adjustedAddressBefore(fixup.m_tca)) {
 567         fixup.m_tca = adjusted;
 568       }
 569     }
 570
 571     for (auto& ct : fixups.m_pendingCatchTraces) {
 572       /*
 573        * Similar to fixups - this is a return address so get
 574        * the address returned to.
 575        */
 576       if (CTCA adjusted = rel.adjustedAddressBefore(ct.first)) {
 577         ct.first = adjusted;
 578       }
 579       /*
 580        * But the target is an instruction, so skip over any nops
 581        * that might have been inserted (eg for alignment).
 582        */
 583       if (TCA adjusted = rel.adjustedAddressAfter(ct.second)) {
 584         ct.second = adjusted;
 585       }
 586     }
 587
 588     for (auto& jt : fixups.m_pendingJmpTransIDs) {
 589       if (TCA adjusted = rel.adjustedAddressAfter(jt.first)) {
 590         jt.first = adjusted;
 591       }
 592     }
 593
 594     /*
 595      * Most of the time we want to adjust to a corresponding "before" address
 596      * with the exception of the start of the range where "before" can point to
 597      * the end of a previous range.
 598      */
 599     if (!fixups.m_bcMap.empty()) {
 600       auto const aStart = fixups.m_bcMap[0].aStart;
 601       auto const acoldStart = fixups.m_bcMap[0].acoldStart;
 602       auto const afrozenStart = fixups.m_bcMap[0].afrozenStart;
 603       for (auto& tbc : fixups.m_bcMap) {
 604         if (TCA adjusted = (tbc.aStart == aStart
 605                               ? rel.adjustedAddressAfter(aStart)
 606                               : rel.adjustedAddressBefore(tbc.aStart))) {
 607           tbc.aStart = adjusted;
 608         }
 609         if (TCA adjusted = (tbc.acoldStart == acoldStart
 610                               ? rel.adjustedAddressAfter(acoldStart)
 611                               : rel.adjustedAddressBefore(tbc.acoldStart))) {
 612           tbc.acoldStart = adjusted;
 613         }
 614         if (TCA adjusted = (tbc.afrozenStart == afrozenStart
 615                               ? rel.adjustedAddressAfter(afrozenStart)
 616                               : rel.adjustedAddressBefore(tbc.afrozenStart))) {
 617           tbc.afrozenStart = adjusted;
 618         }
 619       }
 620     }
 621
 622     decltype(fixups.m_addressImmediates) updatedAI;
 623     for (auto addrImm : fixups.m_addressImmediates) {
 624       if (TCA adjusted = rel.adjustedAddressAfter(addrImm)) {
 625         updatedAI.insert(adjusted);
 626       } else if (TCA odd = rel.adjustedAddressAfter((TCA)~uintptr_t(addrImm))) {
 627         // just for cgLdObjMethod
 628         updatedAI.insert((TCA)~uintptr_t(odd));
 629       } else {
 630         updatedAI.insert(addrImm);
 631       }
 632     }
 633     updatedAI.swap(fixups.m_addressImmediates);
 634
 635     decltype(fixups.m_alignFixups) updatedAF;
 636     for (auto af : fixups.m_alignFixups) {
 637       if (TCA adjusted = rel.adjustedAddressAfter(af.first)) {
 638         updatedAF.emplace(adjusted, af.second);
 639       } else {
 640         updatedAF.emplace(af);
 641       }
 642     }
 643     updatedAF.swap(fixups.m_alignFixups);
 644
 645     if (asmInfo) {
 646       fixupStateVector(asmInfo->asmInstRanges, rel);
 647       fixupStateVector(asmInfo->asmBlockRanges, rel);
 648       fixupStateVector(asmInfo->coldInstRanges, rel);
 649       fixupStateVector(asmInfo->coldBlockRanges, rel);
 650       fixupStateVector(asmInfo->frozenInstRanges, rel);
 651       fixupStateVector(asmInfo->frozenBlockRanges, rel);
 652     }
 653   }
 654
 655   void adjustCodeForRelocation(RelocationInfo& rel,
 656                                CodeGenFixups& fixups) override {
 657     for (auto addr : fixups.m_reusedStubs) {
 658       /*
 659        * The stubs are terminated by a ud2. Check for it.
 660        */
 661       while (addr[0] != 0x0f || addr[1] != 0x0b) {
 662         DecodedInstruction di(addr);
 663         if (di.hasPicOffset()) {
 664           if (TCA adjusted = rel.adjustedAddressAfter(di.picAddress())) {
 665             di.setPicAddress(adjusted);
 666           }
 667         }
 668         addr += di.size();
 669       }
 670     }
 671
 672     for (auto codePtr : fixups.m_codePointers) {
 673       if (TCA adjusted = rel.adjustedAddressAfter(*codePtr)) {
 674         *codePtr = adjusted;
 675       }
 676     }
 677   }
 678
 679  private:
 680   void smashJmpOrCall(TCA addr, TCA dest, bool isCall) {
 681     // Unconditional rip-relative jmps can also be encoded with an EB as the
 682     // first byte, but that means the delta is 1 byte, and we shouldn't be
 683     // encoding smashable jumps that way.
 684     assert(kJmpLen == kCallLen);
 685
 686     // XXX The LLVM check here is terrible and awful and temporary until we fix
 687     // llvm's smashable tail call support: t5742980. For now it just means it's
 688     // not safe to run multiple PHP threads when LLVM is enabled.
 689     always_assert(RuntimeOption::EvalJitLLVM ||
 690                   isSmashable(addr, x64::kJmpLen));
 691
 692     auto& cb = mcg->code.blockFor(addr);
 693     CodeCursor cursor { cb, addr };
 694     X64Assembler a { cb };
 695     if (dest > addr && dest - addr <= x64::kJmpLen) {
 696       assert(!isCall);
 697       a.  emitNop(dest - addr);
 698     } else if (isCall) {
 699       a.  call   (dest);
 700     } else {
 701       a.  jmp    (dest);
 702     }
 703   }
 704
 705  public:
 706   void smashJmp(TCA jmpAddr, TCA newDest) override {
 707     assert(MCGenerator::canWrite());
 708     FTRACE(2, "smashJmp: {} -> {}\n", jmpAddr, newDest);
 709     smashJmpOrCall(jmpAddr, newDest, false);
 710   }
 711
 712   void smashCall(TCA callAddr, TCA newDest) override {
 713     assert(MCGenerator::canWrite());
 714     FTRACE(2, "smashCall: {} -> {}\n", callAddr, newDest);
 715     smashJmpOrCall(callAddr, newDest, true);
 716   }
 717
 718   void smashJcc(TCA jccAddr, TCA newDest) override {
 719     assert(MCGenerator::canWrite());
 720     FTRACE(2, "smashJcc: {} -> {}\n", jccAddr, newDest);
 721     // Make sure the encoding is what we expect. It has to be a rip-relative jcc
 722     // with a 4-byte delta.
 723     assert(*jccAddr == 0x0F && (*(jccAddr + 1) & 0xF0) == 0x80);
 724     assert(isSmashable(jccAddr, x64::kJmpccLen));
 725
 726     // Can't use the assembler to write out a new instruction, because we have
 727     // to preserve the condition code.
 728     auto newDelta = safe_cast<int32_t>(newDest - jccAddr - x64::kJmpccLen);
 729     auto deltaAddr = reinterpret_cast<int32_t*>(jccAddr
 730                                                 + x64::kJmpccLen
 731                                                 - x64::kJmpImmBytes);
 732     *deltaAddr = newDelta;
 733   }
 734
 735   void emitSmashableJump(CodeBlock& cb, TCA dest, ConditionCode cc) override {
 736     X64Assembler a { cb };
 737     if (cc == CC_None) {
 738       assert(isSmashable(cb.frontier(), x64::kJmpLen));
 739       a.  jmp(dest);
 740     } else {
 741       assert(isSmashable(cb.frontier(), x64::kJmpccLen));
 742       a.  jcc(cc, dest);
 743     }
 744   }
 745
 746   TCA smashableCallFromReturn(TCA retAddr) override {
 747     auto addr = retAddr - x64::kCallLen;
 748     assert(isSmashable(addr, x64::kCallLen));
 749     return addr;
 750   }
 751
 752   void emitSmashableCall(CodeBlock& cb, TCA dest) override {
 753     X64Assembler a { cb };
 754     assert(isSmashable(cb.frontier(), x64::kCallLen));
 755     a.  call(dest);
 756   }
 757
 758   TCA jmpTarget(TCA jmp) override {
 759     if (jmp[0] != 0xe9) {
 760       if (jmp[0] == 0x0f &&
 761           jmp[1] == 0x1f &&
 762           jmp[2] == 0x44) {
 763         // 5 byte nop
 764         return jmp + 5;
 765       }
 766       return nullptr;
 767     }
 768     return jmp + 5 + ((int32_t*)(jmp + 5))[-1];
 769   }
 770
 771   TCA jccTarget(TCA jmp) override {
 772     if (jmp[0] != 0x0F || (jmp[1] & 0xF0) != 0x80) return nullptr;
 773     return jmp + 6 + ((int32_t*)(jmp + 6))[-1];
 774   }
 775
 776   TCA callTarget(TCA call) override {
 777     if (call[0] != 0xE8) return nullptr;
 778     return call + 5 + ((int32_t*)(call + 5))[-1];
 779   }
 780
 781   void addDbgGuard(CodeBlock& codeMain, CodeBlock& codeCold,
 782                    SrcKey sk, size_t dbgOff) override {
 783     Asm a { codeMain };
 784
 785     // Emit the checks for debugger attach
 786     auto rtmp = rAsm;
 787     emitTLSLoad<ThreadInfo>(a, ThreadInfo::s_threadInfo, rtmp);
 788     a.   loadb  (rtmp[dbgOff], rbyte(rtmp));
 789     a.   testb  ((int8_t)0xff, rbyte(rtmp));
 790
 791     // Branch to a special REQ_INTERPRET if attached
 792     auto const fallback =
 793       emitServiceReq(codeCold, REQ_INTERPRET, sk.offset());
 794     a.   jnz    (fallback);
 795   }
 796
 797   void streamPhysReg(std::ostream& os, PhysReg reg) override {
 798     auto name = (reg.type() == PhysReg::GP) ? reg::regname(Reg64(reg)) :
 799       (reg.type() == PhysReg::SIMD) ? reg::regname(RegXMM(reg)) :
 800       /* (reg.type() == PhysReg::SF) ? */ reg::regname(RegSF(reg));
 801     os << name;
 802   }
 803
 804   void disasmRange(std::ostream& os, int indent, bool dumpIR, TCA begin,
 805                    TCA end) override {
 806     Disasm disasm(Disasm::Options().indent(indent + 4)
 807                   .printEncoding(dumpIR)
 808                   .color(color(ANSI_COLOR_BROWN)));
 809     disasm.disasm(os, begin, end);
 810   }
 811
 812   void genCodeImpl(IRUnit& unit, AsmInfo*) override;
 813 };
 814
 815 std::unique_ptr<jit::BackEnd> newBackEnd() {
 816   return folly::make_unique<BackEnd>();
 817 }
 818
 819 static size_t genBlock(CodegenState& state, Vout& v, Vout& vc, Block* block) {
 820   FTRACE(6, "genBlock: {}\n", block->id());
 821   CodeGenerator cg(state, v, vc);
 822   size_t hhir_count{0};
 823   for (IRInstruction& inst : *block) {
 824     hhir_count++;
 825     if (inst.is(EndGuards)) state.pastGuards = true;
 826     v.setOrigin(&inst);
 827     vc.setOrigin(&inst);
 828     cg.cgInst(&inst);
 829   }
 830   return hhir_count;
 831 }
 832
 833 auto const vasm_gp = x64::abi.gpUnreserved | RegSet(rAsm).add(r11);
 834 auto const vasm_simd = x64::kXMMRegs;
 835 UNUSED const Abi vasm_abi {
 836   .gpUnreserved = vasm_gp,
 837   .gpReserved = x64::abi.gp() - vasm_gp,
 838   .simdUnreserved = vasm_simd,
 839   .simdReserved = x64::abi.simd() - vasm_simd,
 840   .calleeSaved = x64::kCalleeSaved,
 841   .sf = x64::abi.sf
 842 };
 843
 844 void BackEnd::genCodeImpl(IRUnit& unit, AsmInfo* asmInfo) {
 845   Timer _t(Timer::codeGen);
 846   CodeBlock& mainCodeIn   = mcg->code.main();
 847   CodeBlock& coldCodeIn   = mcg->code.cold();
 848   CodeBlock* frozenCode   = &mcg->code.frozen();
 849
 850   CodeBlock mainCode;
 851   CodeBlock coldCode;
 852   const bool useLLVM = mcg->useLLVM();
 853   bool relocate = false;
 854   if (!useLLVM &&
 855       RuntimeOption::EvalJitRelocationSize &&
 856       supportsRelocation() &&
 857       coldCodeIn.canEmit(RuntimeOption::EvalJitRelocationSize * 3)) {
 858     /*
 859      * This is mainly to exercise the relocator, and ensure that its
 860      * not broken by new non-relocatable code. Later, it will be
 861      * used to do some peephole optimizations, such as reducing branch
 862      * sizes.
 863      * Allocate enough space that the relocated cold code doesn't
 864      * overlap the emitted cold code.
 865      */
 866
 867     static unsigned seed = 42;
 868     auto off = rand_r(&seed) & (cacheLineSize() - 1);
 869     coldCode.init(coldCodeIn.frontier() +
 870                    RuntimeOption::EvalJitRelocationSize + off,
 871                    RuntimeOption::EvalJitRelocationSize - off, "cgRelocCold");
 872
 873     mainCode.init(coldCode.frontier() +
 874                   RuntimeOption::EvalJitRelocationSize + off,
 875                   RuntimeOption::EvalJitRelocationSize - off, "cgRelocMain");
 876
 877     relocate = true;
 878   } else {
 879     /*
 880      * Use separate code blocks, so that attempts to use the mcg's
 881      * code blocks directly will fail (eg by overwriting the same
 882      * memory being written through these locals).
 883      */
 884     coldCode.init(coldCodeIn.frontier(), coldCodeIn.available(),
 885                   coldCodeIn.name().c_str());
 886     mainCode.init(mainCodeIn.frontier(), mainCodeIn.available(),
 887                   mainCodeIn.name().c_str());
 888   }
 889
 890   if (frozenCode == &coldCodeIn) {
 891     frozenCode = &coldCode;
 892   }
 893
 894   auto frozenStart = frozenCode->frontier();
 895   auto coldStart DEBUG_ONLY = coldCodeIn.frontier();
 896   auto mainStart DEBUG_ONLY = mainCodeIn.frontier();
 897   size_t hhir_count{0};
 898
 899   {
 900     mcg->code.lock();
 901     mcg->cgFixups().setBlocks(&mainCode, &coldCode, frozenCode);
 902
 903     SCOPE_EXIT {
 904       mcg->cgFixups().setBlocks(nullptr, nullptr, nullptr);
 905       mcg->code.unlock();
 906     };
 907
 908     if (RuntimeOption::EvalHHIRGenerateAsserts) {
 909       emitTraceCall(mainCode, unit.bcOff());
 910     }
 911
 912     CodegenState state(unit, asmInfo, *frozenCode);
 913     auto const blocks = rpoSortCfg(unit);
 914     Vasm vasm;
 915     auto& vunit = vasm.unit();
 916     // create the initial set of vasm numbered the same as hhir blocks.
 917     for (uint32_t i = 0, n = unit.numBlocks(); i < n; ++i) {
 918       state.labels[i] = vunit.makeBlock(AreaIndex::Main);
 919     }
 920     // create vregs for all relevant SSATmps
 921     assignRegs(unit, vunit, state, blocks, this);
 922     vunit.entry = state.labels[unit.entry()];
 923     vasm.main(mainCode);
 924     vasm.cold(coldCode);
 925     vasm.frozen(*frozenCode);
 926     for (auto block : blocks) {
 927       auto& v = block->hint() == Block::Hint::Unlikely ? vasm.cold() :
 928                block->hint() == Block::Hint::Unused ? vasm.frozen() :
 929                vasm.main();
 930       FTRACE(6, "genBlock {} on {}\n", block->id(),
 931              area_names[(unsigned)v.area()]);
 932       auto b = state.labels[block];
 933       vunit.blocks[b].area = v.area();
 934       v.use(b);
 935       hhir_count += genBlock(state, v, vasm.cold(), block);
 936       assert(v.closed());
 937       assert(vasm.main().empty() || vasm.main().closed());
 938       assert(vasm.cold().empty() || vasm.cold().closed());
 939       assert(vasm.frozen().empty() || vasm.frozen().closed());
 940     }
 941     printUnit(kInitialVasmLevel, "after initial vasm generation", vunit);
 942     assert(check(vunit));
 943
 944     if (useLLVM) {
 945       try {
 946         genCodeLLVM(vunit, vasm.areas(), sortBlocks(vunit));
 947       } catch (const FailedLLVMCodeGen& e) {
 948         FTRACE(1, "LLVM codegen failed ({}); falling back to x64 backend\n",
 949                e.what());
 950         vasm.finishX64(vasm_abi, state.asmInfo);
 951       }
 952     } else {
 953       vasm.finishX64(vasm_abi, state.asmInfo);
 954     }
 955   }
 956
 957   auto bcMap = &mcg->cgFixups().m_bcMap;
 958   if (relocate && !bcMap->empty()) {
 959     TRACE(1, "BCMAPS before relocation\n");
 960     for (UNUSED auto& map : *bcMap) {
 961       TRACE(1, "%s %-6d %p %p %p\n", map.md5.toString().c_str(),
 962              map.bcStart, map.aStart, map.acoldStart, map.afrozenStart);
 963     }
 964   }
 965
 966   assert(coldCodeIn.frontier() == coldStart);
 967   assert(mainCodeIn.frontier() == mainStart);
 968
 969   if (relocate) {
 970     if (asmInfo) {
 971       printUnit(kRelocationLevel, unit, " before relocation ", asmInfo);
 972     }
 973
 974     auto& be = mcg->backEnd();
 975     RelocationInfo rel;
 976     size_t asm_count{0};
 977     asm_count += be.relocate(rel, mainCodeIn,
 978                              mainCode.base(), mainCode.frontier(),
 979                              mcg->cgFixups(), nullptr);
 980
 981     asm_count += be.relocate(rel, coldCodeIn,
 982                              coldCode.base(), coldCode.frontier(),
 983                              mcg->cgFixups(), nullptr);
 984     TRACE(1, "hhir-inst-count %ld asm %ld\n", hhir_count, asm_count);
 985
 986     if (frozenCode != &coldCode) {
 987       rel.recordRange(frozenStart, frozenCode->frontier(),
 988                       frozenStart, frozenCode->frontier());
 989     }
 990     be.adjustForRelocation(rel);
 991     be.adjustMetaDataForRelocation(rel, asmInfo, mcg->cgFixups());
 992     be.adjustCodeForRelocation(rel, mcg->cgFixups());
 993
 994     if (asmInfo) {
 995       static int64_t mainDeltaTot = 0, coldDeltaTot = 0;
 996       int64_t mainDelta =
 997         (mainCodeIn.frontier() - mainStart) -
 998         (mainCode.frontier() - mainCode.base());
 999       int64_t coldDelta =
1000         (coldCodeIn.frontier() - coldStart) -
1001         (coldCode.frontier() - coldCode.base());
1002
1003       mainDeltaTot += mainDelta;
1004       HPHP::Trace::traceRelease("main delta after relocation: "
1005                                 "%" PRId64 " (%" PRId64 ")\n",
1006                                 mainDelta, mainDeltaTot);
1007       coldDeltaTot += coldDelta;
1008       HPHP::Trace::traceRelease("cold delta after relocation: "
1009                                 "%" PRId64 " (%" PRId64 ")\n",
1010                                 coldDelta, coldDeltaTot);
1011     }
1012 #ifndef NDEBUG
1013     auto& ip = mcg->cgFixups().m_inProgressTailJumps;
1014     for (size_t i = 0; i < ip.size(); ++i) {
1015       const auto& ib = ip[i];
1016       assert(!mainCode.contains(ib.toSmash()));
1017       assert(!coldCode.contains(ib.toSmash()));
1018     }
1019     memset(mainCode.base(), 0xcc, mainCode.frontier() - mainCode.base());
1020     memset(coldCode.base(), 0xcc, coldCode.frontier() - coldCode.base());
1021 #endif
1022   } else {
1023     coldCodeIn.skip(coldCode.frontier() - coldCodeIn.frontier());
1024     mainCodeIn.skip(mainCode.frontier() - mainCodeIn.frontier());
1025   }
1026
1027   if (asmInfo) {
1028     printUnit(kCodeGenLevel, unit, " after code gen ", asmInfo);
1029   }
1030 }
1031
1032 }}}