hphp/runtime/vm/jit/vasm-llvm.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16
  17 #include "hphp/runtime/vm/jit/vasm-llvm.h"
  18
  19 #include "hphp/util/assertions.h"
  20 #include "hphp/util/disasm.h"
  21
  22 #include "hphp/runtime/vm/jit/abi-x64.h"
  23 #include "hphp/runtime/vm/jit/back-end-x64.h"
  24 #include "hphp/runtime/vm/jit/code-gen-x64.h"
  25 #include "hphp/runtime/vm/jit/ir-instruction.h"
  26 #include "hphp/runtime/vm/jit/llvm-locrecs.h"
  27 #include "hphp/runtime/vm/jit/llvm-stack-maps.h"
  28 #include "hphp/runtime/vm/jit/mc-generator.h"
  29 #include "hphp/runtime/vm/jit/reserved-stack.h"
  30 #include "hphp/runtime/vm/jit/service-requests-inline.h"
  31 #include "hphp/runtime/vm/jit/unwind-x64.h"
  32 #include "hphp/runtime/vm/jit/vasm-print.h"
  33
  34 #ifdef USE_LLVM
  35
  36 #include <llvm/Analysis/Passes.h>
  37 #include <llvm/CodeGen/MachineFunctionAnalysis.h>
  38 #include <llvm/CodeGen/Passes.h>
  39 #include <llvm/ExecutionEngine/ExecutionEngine.h>
  40 #include <llvm/ExecutionEngine/MCJIT.h>
  41 #include <llvm/ExecutionEngine/ObjectCache.h>
  42 #include <llvm/ExecutionEngine/RuntimeDyld.h>
  43 #include <llvm/IR/AssemblyAnnotationWriter.h>
  44 #include <llvm/IR/ConstantFolder.h>
  45 #include <llvm/IR/DataLayout.h>
  46 #include <llvm/IR/DerivedTypes.h>
  47 #include <llvm/IR/IRBuilder.h>
  48 #include <llvm/IR/InlineAsm.h>
  49 #include <llvm/IR/Intrinsics.h>
  50 #include <llvm/IR/LLVMContext.h>
  51 #include <llvm/IR/Module.h>
  52 #include <llvm/IR/TypeBuilder.h>
  53 #include <llvm/IR/Verifier.h>
  54 #include <llvm/IRReader/IRReader.h>
  55 #include <llvm/PassManager.h>
  56 #include <llvm/Support/CommandLine.h>
  57 #include <llvm/Support/ErrorHandling.h>
  58 #include <llvm/Support/FileSystem.h>
  59 #include <llvm/Support/FormattedStream.h>
  60 #include <llvm/Support/Path.h>
  61 #include <llvm/Support/SourceMgr.h>
  62 #include <llvm/Support/TargetSelect.h>
  63 #include <llvm/Support/raw_ostream.h>
  64 #include <llvm/Target/TargetMachine.h>
  65 #include <llvm/Transforms/Scalar.h>
  66
  67 TRACE_SET_MOD(llvm);
  68
  69 namespace HPHP { namespace jit {
  70
  71 namespace {
  72
  73 /*
  74  * Read an unsigned LEB128 value from data, advancing it past the value.
  75  */
  76 uintptr_t readULEB128(const uint8_t*& data) {
  77   uintptr_t result = 0;
  78   uintptr_t shift = 0;
  79   unsigned char byte;
  80
  81   do {
  82     byte = *data++;
  83     result |= (byte & 0x7f) << shift;
  84     shift += 7;
  85   } while (byte & 0x80);
  86
  87   return result;
  88 }
  89
  90 /*
  91  * Read a signed LEB128 value from data, advancing it past the value.
  92  */
  93 uintptr_t readSLEB128(const uint8_t*& data) {
  94   uintptr_t result = 0;
  95   uintptr_t shift = 0;
  96   unsigned char byte;
  97
  98   do {
  99     byte = *data++;
 100     result |= (byte & 0x7f) << shift;
 101     shift += 7;
 102   } while (byte & 0x80);
 103
 104   if ((byte & 0x40) && (shift < (sizeof(result) << 3))) {
 105     result |= (~0 << shift);
 106   }
 107
 108   return result;
 109 }
 110
 111 /*
 112  * Read and return a T from data, advancing it past the read item.
 113  */
 114 template<typename T>
 115 T readValue(const uint8_t*& data) {
 116   T val;
 117   memcpy(&val, data, sizeof(T));
 118   data += sizeof(T);
 119   return val;
 120 }
 121
 122 /*
 123  * Read an encoded DWARF value from data, advancing it past any data read. This
 124  * function was adapted from the ExceptionDemo.cpp example in llvm.
 125  */
 126 uintptr_t readEncodedPointer(const uint8_t*& data, uint8_t encoding) {
 127   uintptr_t result = 0;
 128   auto const start = data;
 129
 130   if (encoding == DW_EH_PE_omit) return result;
 131
 132   // first get value
 133   switch (encoding & 0x0F) {
 134     case DW_EH_PE_absptr:
 135       result = readValue<uintptr_t>(data);
 136       break;
 137     case DW_EH_PE_uleb128:
 138       result = readULEB128(data);
 139       break;
 140     case DW_EH_PE_sleb128:
 141       result = readSLEB128(data);
 142       break;
 143     case DW_EH_PE_udata2:
 144       result = readValue<uint16_t>(data);
 145       break;
 146     case DW_EH_PE_udata4:
 147       result = readValue<uint32_t>(data);
 148       break;
 149     case DW_EH_PE_udata8:
 150       result = readValue<uint64_t>(data);
 151       break;
 152     case DW_EH_PE_sdata2:
 153       result = readValue<int16_t>(data);
 154       break;
 155     case DW_EH_PE_sdata4:
 156       result = readValue<int32_t>(data);
 157       break;
 158     case DW_EH_PE_sdata8:
 159       result = readValue<int64_t>(data);
 160       break;
 161     default:
 162       not_implemented();
 163   }
 164
 165   // then add relative offset
 166   switch (encoding & 0x70) {
 167     case DW_EH_PE_absptr:
 168       // do nothing
 169       break;
 170     case DW_EH_PE_pcrel:
 171       result += reinterpret_cast<uintptr_t>(start);
 172       break;
 173     case DW_EH_PE_textrel:
 174     case DW_EH_PE_datarel:
 175     case DW_EH_PE_funcrel:
 176     case DW_EH_PE_aligned:
 177     default:
 178       not_implemented();
 179   }
 180
 181   // then apply indirection
 182   if (encoding & 0x80 /*DW_EH_PE_indirect*/) {
 183     result = *((uintptr_t*)result);
 184   }
 185
 186   return result;
 187 }
 188
 189 /*
 190  * Information parsed out of the .gcc_except_table section. start and
 191  * landingPad are offsets from the beginning of the function.
 192  */
 193 struct EHInfo {
 194   uintptr_t start;
 195   uintptr_t length;
 196   uintptr_t landingPad;
 197 };
 198
 199 /*
 200  * Parse a .gcc_except_table section as generated by LLVM, extracting regions
 201  * with nonzero landingpads. This function was also adapted from the
 202  * ExceptionDemo.cpp example in llvm.
 203  */
 204 jit::vector<EHInfo> parse_gcc_except_table(const uint8_t* ptr) {
 205   jit::vector<EHInfo> ret;
 206
 207   FTRACE(2, "Parsing exception table at {}\n", ptr);
 208   uint8_t lpStartEncoding = *ptr++;
 209
 210   if (lpStartEncoding != DW_EH_PE_omit) {
 211     readEncodedPointer(ptr, lpStartEncoding);
 212   }
 213
 214   uint8_t ttypeEncoding = *ptr++;
 215
 216   if (ttypeEncoding != DW_EH_PE_omit) {
 217     readULEB128(ptr);
 218   }
 219
 220   uint8_t         callSiteEncoding = *ptr++;
 221   uint32_t        callSiteTableLength = readULEB128(ptr);
 222   const uint8_t*  callSiteTableStart = ptr;
 223   const uint8_t*  callSiteTableEnd = callSiteTableStart + callSiteTableLength;
 224   const uint8_t*  callSitePtr = callSiteTableStart;
 225
 226   while (callSitePtr < callSiteTableEnd) {
 227     uintptr_t start = readEncodedPointer(callSitePtr, callSiteEncoding);
 228     uintptr_t length = readEncodedPointer(callSitePtr, callSiteEncoding);
 229     uintptr_t landingPad = readEncodedPointer(callSitePtr, callSiteEncoding);
 230
 231     uintptr_t actionEntry = readULEB128(callSitePtr);
 232     // 0 indicates a cleanup entry, the only kind we generate
 233     always_assert(actionEntry == 0);
 234     if (landingPad == 0) continue;
 235
 236     FTRACE(2, "Adding entry: [{},{}): landingPad {}\n",
 237            start, start + length, landingPad);
 238     ret.emplace_back(EHInfo{start, length, landingPad});
 239   }
 240
 241   return ret;
 242 }
 243
 244 void reportLLVMError(void* data, const std::string& err, bool gen_crash_diag) {
 245   always_assert_flog(false, "LLVM fatal error: {}", err);
 246 }
 247
 248 struct LLVMErrorInit {
 249   LLVMErrorInit() {
 250     llvm::install_fatal_error_handler(reportLLVMError);
 251   }
 252
 253   ~LLVMErrorInit() {
 254     llvm::remove_fatal_error_handler();
 255   }
 256 };
 257 static LLVMErrorInit s_llvmErrorInit;
 258
 259 /*
 260  * TCMemoryManager allows llvm to emit code into the appropriate places in the
 261  * TC. Currently all code goes into the Main code block.
 262  */
 263 struct TCMemoryManager : public llvm::RTDyldMemoryManager {
 264   struct SectionInfo {
 265     std::unique_ptr<uint8_t[]> data;
 266     size_t size;
 267   };
 268
 269   explicit TCMemoryManager(Vasm::AreaList& areas)
 270     : m_areas(areas)
 271   {
 272   }
 273
 274   ~TCMemoryManager() {
 275   }
 276
 277   uint8_t* allocateCodeSection(
 278     uintptr_t Size, unsigned Alignment, unsigned SectionID,
 279     llvm::StringRef SectionName
 280   ) override {
 281     auto& code = m_areas[static_cast<size_t>(AreaIndex::Main)].code;
 282
 283     // We override/ignore the alignment and use skew value to compensate.
 284     uint8_t* ret = code.alloc<uint8_t>(1, Size);
 285     assert(Alignment < x64::kCacheLineSize &&
 286            "alignment exceeds cache line size");
 287     assert(
 288       m_codeSkew == (reinterpret_cast<size_t>(ret) & (x64::kCacheLineSize - 1))
 289       && "drift in code skew detected");
 290
 291     FTRACE(1, "Allocate code section \"{}\" id={} at addr={}, size={},"
 292            " alignment={}, skew={}\n",
 293            SectionName.str(), SectionID, ret, Size, Alignment, m_codeSkew);
 294     return ret;
 295   }
 296
 297   uint8_t* allocateDataSection(
 298     uintptr_t Size, unsigned Alignment, unsigned SectionID,
 299     llvm::StringRef SectionName, bool IsReadOnly
 300   ) override {
 301     assert_not_implemented(Alignment <= 8);
 302     std::unique_ptr<uint8_t[]> data{new uint8_t[Size]};
 303
 304     FTRACE(1, "Allocate {} data section \"{}\" id={} at addr={}, size={},"
 305            " alignment={}\n",
 306            IsReadOnly ? "read-only" : "read-write",
 307            SectionName.str(), SectionID, data.get(), Size, Alignment);
 308     auto it = m_dataSections.emplace(SectionName.str(),
 309                                      SectionInfo({std::move(data), Size}));
 310     return it.first->second.data.get();
 311   }
 312
 313   virtual void reserveAllocationSpace(uintptr_t CodeSize,
 314                                       uintptr_t DataSizeRO,
 315                                       uintptr_t DataSizeRW) override {
 316     FTRACE(1, "reserve CodeSize={}, DataSizeRO={}, DataSizeRW={}\n", CodeSize,
 317            DataSizeRO, DataSizeRW);
 318   }
 319
 320   virtual bool needsToReserveAllocationSpace() override {
 321     return true;
 322   }
 323
 324   virtual void registerEHFrames(uint8_t* Addr, uint64_t LoadAddr,
 325                                 size_t Size) override {
 326     // Do nothing; the TC has one huge eh frame.
 327   }
 328
 329   virtual void deregisterEHFrames(uint8_t* Addr, uint64_t LoadAddr,
 330                                   size_t Size) override {
 331     // Do nothing; the TC has one huge eh frame.
 332   }
 333
 334   virtual bool finalizeMemory(std::string* ErrMsg = nullptr) override {
 335     return false;
 336   }
 337
 338   virtual uint64_t getSymbolAddress(const std::string& name) override {
 339     FTRACE(1, "getSymbolAddress({})\n", name);
 340     auto element = m_symbols.find(name);
 341     return element == m_symbols.end() ? 0 : element->second;
 342   }
 343
 344   /*
 345    * Register a symbol's name for later lookup by llvm.
 346    */
 347   void registerSymbolAddress(const std::string& name, uint64_t address) {
 348     auto it = m_symbols.emplace(name, address);
 349     always_assert((it.second == true || it.first->second == address) &&
 350                   "symbol already registered with a different value");
 351   }
 352
 353   /*
 354    * Append an arbitrary id to the end of the given prefix to make it unique.
 355    */
 356   std::string getUniqueSymbolName(const std::string& prefix) {
 357     auto name = prefix;
 358     while (m_symbols.count(name)) {
 359       name = folly::to<std::string>(prefix, '_', m_nextSymbolId++);
 360     }
 361     return name;
 362   }
 363
 364   const SectionInfo* getDataSection(const std::string& name) const {
 365     auto it = m_dataSections.find(name);
 366     return (it == m_dataSections.end()) ? nullptr : &it->second;
 367   }
 368
 369   bool hasDataSection(const std::string& name) const {
 370     return m_dataSections.count(name);
 371   }
 372
 373   uint32_t computeCodeSkew(unsigned alignment) {
 374     auto& code = m_areas[static_cast<size_t>(AreaIndex::Main)].code;
 375     m_codeSkew = reinterpret_cast<uint64_t>(code.frontier()) & (alignment - 1);
 376     return m_codeSkew;
 377   }
 378
 379 private:
 380   Vasm::AreaList& m_areas;
 381
 382   std::unordered_map<std::string, SectionInfo> m_dataSections;
 383
 384   uint32_t m_codeSkew{0};
 385
 386   jit::hash_map<std::string, uint64_t> m_symbols;
 387   uint32_t m_nextSymbolId{0};
 388 };
 389
 390 template<typename T>
 391 std::string llshow(T* val) {
 392   std::string str;
 393   {
 394     llvm::raw_string_ostream os(str);
 395     val->print(os);
 396   }
 397   return str;
 398 }
 399
 400 struct VasmAnnotationWriter : llvm::AssemblyAnnotationWriter {
 401   explicit VasmAnnotationWriter(const std::vector<std::string>& strs)
 402     : m_strs(strs)
 403   {}
 404
 405   void emitBasicBlockStartAnnot(const llvm::BasicBlock* b,
 406                                 llvm::formatted_raw_ostream& os) override {
 407     m_curId = -1;
 408     m_prefix = "";
 409   }
 410
 411   void emitInstructionAnnot(const llvm::Instruction* i,
 412                             llvm::formatted_raw_ostream& os) override {
 413     SCOPE_EXIT { m_prefix = "\n"; };
 414
 415     auto dbg = i->getDebugLoc();
 416     if (dbg.isUnknown() || m_curId == dbg.getLine()) return;
 417
 418     m_curId = dbg.getLine();
 419     os << m_prefix << m_strs[m_curId] << "\n";
 420   }
 421
 422  private:
 423   const std::vector<std::string>& m_strs;
 424   size_t m_curId;
 425   const char* m_prefix{nullptr};
 426 };
 427
 428 /*
 429  * LLVMEmitter is responsible for transforming a Vunit into LLVM IR, then
 430  * optimizing that and emitting machine code from the result.
 431  */
 432 struct LLVMEmitter {
 433   explicit LLVMEmitter(const Vunit& unit, Vasm::AreaList& areas)
 434     : m_context(llvm::getGlobalContext())
 435     , m_module(new llvm::Module("", m_context))
 436     , m_function(llvm::Function::Create(
 437       llvm::FunctionType::get(
 438           llvm::Type::getVoidTy(m_context),
 439           std::vector<llvm::Type*>({
 440               llvm::IntegerType::get(m_context, 64),
 441               llvm::IntegerType::get(m_context, 64),
 442               llvm::IntegerType::get(m_context, 64)}),
 443           false),
 444       llvm::Function::ExternalLinkage, "", m_module.get()))
 445     , m_irb(m_context,
 446             llvm::ConstantFolder(),
 447             IRBuilderVasmInserter(*this))
 448     , m_tcMM(new TCMemoryManager(areas))
 449     , m_valueInfo(unit.next_vr)
 450     , m_blocks(unit.blocks.size())
 451     , m_unit(unit)
 452     , m_areas(areas)
 453   {
 454     llvm::InitializeNativeTarget();
 455     llvm::InitializeNativeTargetAsmPrinter();
 456     llvm::InitializeNativeTargetAsmParser();
 457
 458     m_function->setCallingConv(llvm::CallingConv::X86_64_HHVM_TC);
 459     m_function->setAlignment(1);
 460
 461     // TODO(#5398968): find a better way to disable 16-byte alignment.
 462     m_function->addFnAttr(llvm::Attribute::OptimizeForSize);
 463
 464     m_irb.SetInsertPoint(
 465       llvm::BasicBlock::Create(m_context,
 466                               folly::to<std::string>('B', size_t(unit.entry)),
 467                               m_function));
 468     m_blocks[unit.entry] = m_irb.GetInsertBlock();
 469
 470     // Register all unit's constants.
 471     for (auto const& pair : unit.cpool) {
 472       auto val = pair.first.isByte ? cns(uint8_t(pair.first.val))
 473                                    : cns(pair.first.val);
 474       defineValue(pair.second, val);
 475     }
 476
 477     auto args = m_function->arg_begin();
 478     m_valueInfo[Vreg(x64::rVmSp)].llval = args++;
 479     m_rVmTl = m_valueInfo[Vreg(x64::rVmTl)].llval = args++;
 480     m_rVmTl->setName("rVmTl");
 481     m_rVmFp = m_valueInfo[Vreg(x64::rVmFp)].llval = args++;
 482     m_rVmFp->setName("rVmFp");
 483
 484     // Commonly used types and values.
 485     m_int8  = m_irb.getInt8Ty();
 486     m_int16 = m_irb.getInt16Ty();
 487     m_int32 = m_irb.getInt32Ty();
 488     m_int64 = m_irb.getInt64Ty();
 489
 490     m_int8Ptr  = llvm::Type::getInt8PtrTy(m_context);
 491     m_int16Ptr = llvm::Type::getInt16PtrTy(m_context);
 492     m_int32Ptr = llvm::Type::getInt32PtrTy(m_context);
 493     m_int64Ptr = llvm::Type::getInt64PtrTy(m_context);
 494
 495     m_int8FSPtr  = llvm::Type::getInt8PtrTy(m_context,  kFSAddressSpace);
 496     m_int16FSPtr = llvm::Type::getInt16PtrTy(m_context, kFSAddressSpace);
 497     m_int32FSPtr = llvm::Type::getInt32PtrTy(m_context, kFSAddressSpace);
 498     m_int64FSPtr = llvm::Type::getInt64PtrTy(m_context, kFSAddressSpace);
 499
 500     m_int8Zero  = m_irb.getInt8(0);
 501     m_int8One   = m_irb.getInt8(1);
 502     m_int16Zero = m_irb.getInt16(0);
 503     m_int16One  = m_irb.getInt16(1);
 504     m_int32Zero = m_irb.getInt32(0);
 505     m_int32One  = m_irb.getInt32(1);
 506     m_int64Zero = m_irb.getInt64(0);
 507     m_int64One  = m_irb.getInt64(1);
 508
 509     m_int64Undef  = llvm::UndefValue::get(m_int64);
 510
 511     auto m_personalityFTy = llvm::FunctionType::get(m_int32, false);
 512     m_personalityFunc =
 513       llvm::Function::Create(m_personalityFTy,
 514                              llvm::GlobalValue::ExternalLinkage,
 515                              "personality0",
 516                              m_module.get());
 517     m_personalityFunc->setCallingConv(llvm::CallingConv::C);
 518     m_tcMM->registerSymbolAddress("personality0", 0xbadbadbad);
 519
 520     m_traceletFnTy = llvm::FunctionType::get(
 521       m_irb.getVoidTy(),
 522       std::vector<llvm::Type*>({m_int64, m_int64, m_int64}),
 523       false
 524     );
 525
 526     m_typedValueType = llvm::StructType::get(
 527         m_context,
 528         packed_tv
 529           ? std::vector<llvm::Type*>({m_int8,   // padding
 530                                       m_int8,   // m_type
 531                                       m_int64}) // m_data
 532           : std::vector<llvm::Type*>({m_int64,  // m_data
 533                                       m_int8}), // m_type
 534         /*isPacked*/ false);
 535   }
 536
 537   ~LLVMEmitter() {
 538   }
 539
 540   std::string showModule() const {
 541     std::string s;
 542     llvm::raw_string_ostream stream(s);
 543     VasmAnnotationWriter vw(m_instStrs);
 544     m_module->print(stream,
 545                     HPHP::Trace::moduleEnabled(Trace::llvm, 5) ? &vw : nullptr);
 546     return stream.str();
 547   }
 548
 549   void verifyModule() const {
 550     std::string err;
 551     llvm::raw_string_ostream stream(err);
 552     always_assert_flog(!llvm::verifyModule(*m_module, &stream),
 553                        "LLVM verifier failed:\n{}\n{:-^80}\n{}\n{:-^80}\n{}",
 554                        stream.str(), " vasm unit ", show(m_unit),
 555                        " llvm module ", showModule());
 556   }
 557
 558   /*
 559    * Finalize the code generation process by optimizing and generating code for
 560    * m_module.
 561    */
 562   void finalize() {
 563     FTRACE(1, "{:-^80}\n{}\n", " LLVM IR before optimizing ", showModule());
 564     verifyModule();
 565
 566     // TODO(#5406596): teach LLVM our alignment rules. For the moment override
 567     // 16-byte ABI default.
 568     llvm::TargetOptions targetOptions;
 569     targetOptions.StackAlignmentOverride = 8;
 570     targetOptions.GuaranteedTailCallOpt = true;
 571
 572     auto tcMM = m_tcMM.release();
 573     std::string errStr;
 574     std::unique_ptr<llvm::ExecutionEngine> ee(
 575       llvm::EngineBuilder(m_module.get())
 576       .setErrorStr(&errStr)
 577       .setUseMCJIT(true)
 578       .setMCJITMemoryManager(tcMM)
 579       .setOptLevel(llvm::CodeGenOpt::Aggressive)
 580       .setRelocationModel(llvm::Reloc::Static)
 581       .setCodeModel(llvm::CodeModel::Small)
 582       .setVerifyModules(true)
 583       .setTargetOptions(targetOptions)
 584       .create());
 585     always_assert_flog(ee, "ExecutionEngine creation failed: {}\n", errStr);
 586
 587     assert(m_module != nullptr);
 588
 589     llvm::LLVMTargetMachine* targetMachine =
 590       static_cast<llvm::LLVMTargetMachine*>(ee->getTargetMachine());
 591
 592     auto fpm = folly::make_unique<llvm::FunctionPassManager>(m_module.get());
 593     fpm->add(new llvm::DataLayoutPass(m_module.get()));
 594     targetMachine->addAnalysisPasses(*fpm);
 595
 596     fpm->add(llvm::createBasicAliasAnalysisPass());
 597     fpm->add(llvm::createVerifierPass(true));
 598     fpm->add(llvm::createDebugInfoVerifierPass(false));
 599     fpm->add(llvm::createLoopSimplifyPass());
 600     fpm->add(llvm::createGCLoweringPass());
 601     fpm->add(llvm::createUnreachableBlockEliminationPass());
 602     fpm->add(llvm::createPromoteMemoryToRegisterPass());
 603     fpm->add(llvm::createInstructionCombiningPass());
 604     fpm->add(llvm::createReassociatePass());
 605     fpm->add(llvm::createGVNPass());
 606     fpm->add(llvm::createCFGSimplificationPass());
 607     fpm->add(llvm::createTailCallEliminationPass());
 608     fpm->doInitialization();
 609
 610     m_module->addModuleFlag(llvm::Module::Error, "code_skew",
 611                             tcMM->computeCodeSkew(x64::kCacheLineSize));
 612
 613     for (auto it = m_module->begin(); it != m_module->end(); ++it) {
 614       fpm->run(*it);
 615     }
 616     FTRACE(2, "{:-^80}\n{}\n", " LLVM IR after optimizing ", showModule());
 617
 618     m_module.release(); // ee took ownership of the module.
 619
 620     ee->setProcessAllSections(true);
 621     ee->finalizeObject();
 622
 623     // Now that codegen is done, we need to parse location records and
 624     // gcc_except_table sections and update our own metadata.
 625     uint8_t* funcStart =
 626       static_cast<uint8_t*>(ee->getPointerToFunction(m_function));
 627     FTRACE(2, "LLVM function address: {}\n", funcStart);
 628
 629     if (auto secLocRecs = tcMM->getDataSection(".llvm_locrecs")) {
 630       auto const recs = parseLocRecs(secLocRecs->data.get(),
 631                                      secLocRecs->size);
 632       FTRACE(2, "LLVM experimental locrecs:\n{}", show(recs));
 633       auto it = recs.functionRecords.find(funcStart);
 634       if (it != recs.functionRecords.end()) {
 635         processFixups(it->second, funcStart);
 636         processSvcReqs(it->second, funcStart);
 637       }
 638     }
 639
 640     if (auto secGEH = tcMM->getDataSection(".gcc_except_table")) {
 641       auto const ehInfos = parse_gcc_except_table(secGEH->data.get());
 642       processEHInfos(ehInfos, funcStart);
 643     }
 644   }
 645
 646   /*
 647    * For each entry in m_fixups, find its corresponding locrec entry, find
 648    * the actual call instruction, and register the fixup.
 649    */
 650   void processFixups(const LocRecs::FunctionRecord& funcRec,
 651                      uint8_t* funcStart) {
 652     for (auto& fix : m_fixups) {
 653       auto it = funcRec.records.find(fix.id);
 654       if (it == funcRec.records.end()) {
 655         // The call was optimized away.
 656         continue;
 657       }
 658
 659       auto afterCall = [&] {
 660         for (auto& record : it->second) {
 661           auto ip = funcStart + record.offset;
 662           DecodedInstruction di(ip);
 663           if (di.isCall()) return ip + di.size();
 664         }
 665         always_assert(false && "call instruction cannot be found");
 666       }();
 667
 668       FTRACE(2, "From afterCall for fixup = {}\n", afterCall);
 669       mcg->recordSyncPoint(afterCall, fix.fixup.pcOffset, fix.fixup.spOffset);
 670     }
 671   }
 672
 673   void processSvcReqs(const LocRecs::FunctionRecord& funcRec,
 674                       uint8_t* funcStart) {
 675     auto findJmp = [&](const jit::vector<LocRecs::LocationRecord>& records) {
 676       for (auto& record : records) {
 677         auto ip = funcStart + record.offset;
 678         DecodedInstruction di(ip);
 679         if (di.isJmp()) return ip;
 680       }
 681       always_assert(false && "jmp instruction cannot be found");
 682     };
 683
 684     for (auto& req : m_bindjmps) {
 685       auto it = funcRec.records.find(req.id);
 686       if (it == funcRec.records.end()) continue;
 687
 688       auto jmpIp = findJmp(it->second);
 689       FTRACE(2, "Processing bindjmp at {}, stub {}\n", jmpIp, req.stub);
 690
 691       mcg->cgFixups().m_alignFixups.emplace(
 692         jmpIp, std::make_pair(x64::kJmpLen, kX64CacheLineSize));
 693       mcg->setJmpTransID(jmpIp);
 694
 695       // Patch the rip-relative lea in the stub to point at the jmp.
 696       auto leaIp = req.stub;
 697       always_assert((leaIp[0] & 0x48) == 0x48); // REX.W
 698       always_assert(leaIp[1] == 0x8d); // lea
 699       auto afterLea = leaIp + x64::kRipLeaLen;
 700       auto delta = safe_cast<int32_t>(jmpIp - afterLea);
 701       memcpy(afterLea - sizeof(delta), &delta, sizeof(delta));
 702     }
 703
 704     for (auto& req : m_fallbacks) {
 705       auto it = funcRec.records.find(req.id);
 706       if (it == funcRec.records.end()) continue;
 707
 708       auto destSR = mcg->tx().getSrcRec(req.dest);
 709       destSR->registerFallbackJump(findJmp(it->second));
 710     }
 711   }
 712
 713   /*
 714    * For each entry in infos, find all call instructions in the region and
 715    * register the landing pad as a catch block for each one.
 716    */
 717   void processEHInfos(const jit::vector<EHInfo>& infos, uint8_t* funcStart) {
 718     for (auto& info : infos) {
 719       auto ip = funcStart + info.start;
 720       auto const end = ip + info.length;
 721       auto const landingPad = funcStart + info.landingPad;
 722
 723       FTRACE(2, "Looking for calls for landingPad {}, in EH region [{},{})\n",
 724              landingPad, ip, end);
 725       auto found = false;
 726       while (ip < end) {
 727         DecodedInstruction di(ip);
 728         ip += di.size();
 729         if (di.isCall()) {
 730           FTRACE(2, "  afterCall: {}\n", ip);
 731           mcg->registerCatchBlock(ip, landingPad);
 732           found = true;
 733         }
 734       }
 735
 736       always_assert(found && "EH region with no calls");
 737     }
 738   }
 739
 740   /*
 741    * Register the fact that llvmVal represents the Vasm value in tmp.
 742    */
 743   void defineValue(Vreg tmp, llvm::Value* llvmVal) {
 744     always_assert(tmp.isPhys() || m_valueInfo.at(tmp).llval == nullptr);
 745
 746     if (tmp.isVirt()) {
 747       llvmVal->setName(folly::to<std::string>('t', size_t(tmp)));
 748     }
 749     m_valueInfo[tmp].llval = llvmVal;
 750   }
 751
 752   /*
 753    * Look up the llvm::Value representing tmp.
 754    */
 755   llvm::Value* value(Vreg tmp) const {
 756     auto& info = m_valueInfo.at(tmp);
 757     always_assert(info.llval);
 758     return info.llval;
 759   }
 760
 761   /*
 762    * Register the fact that tmp is defined by inst, since vasm units don't have
 763    * a natural way of following use-def chains.
 764    */
 765   void defineValue(Vreg tmp, const Vinstr& inst) {
 766     always_assert(m_valueInfo.at(tmp).inst.op == Vinstr::ud2 || !tmp.isVirt());
 767     m_valueInfo[tmp].inst = inst;
 768   }
 769
 770   /*
 771    * Look up the Vinstr that defined tmp.
 772    */
 773   const Vinstr& defInst(Vreg tmp) const {
 774     return m_valueInfo.at(tmp).inst;
 775   }
 776
 777   /*
 778    * Certain vasm instructions compute or load intermediate values that aren't
 779    * destinations of the instruction, but are used to produce a status flags
 780    * register that is. One example of this is the incwm instruction: it loads
 781    * an int16 from memory, increments it, produces a status flags register
 782    * based on the incremented value, and finally stores the incremented value
 783    * back to memory. When emitting code for an instruction that consumes this
 784    * status flag register, we need access to the intermediate value due to the
 785    * way llvm handles conditional jumps. We call this value a "flag temporary",
 786    * and is stored in a side table keyed on the status flags Vreg it
 787    * corresponds to.
 788    */
 789   void defineFlagTmp(Vreg vr, llvm::Value* tmp) {
 790     always_assert(m_valueInfo.at(vr).flagTmp == nullptr);
 791     m_valueInfo[vr].flagTmp = tmp;
 792   }
 793
 794   /*
 795    * Get the flag temp for the given vr.
 796    */
 797   llvm::Value* flagTmp(Vreg vr) const {
 798     auto& info = m_valueInfo.at(vr);
 799     always_assert(info.flagTmp);
 800     return info.flagTmp;
 801   }
 802
 803   /*
 804    * Look up or create the llvm block corresponding to the given vasm block.
 805    */
 806   llvm::BasicBlock* block(Vlabel l) {
 807     if (m_blocks[l] == nullptr) {
 808       return m_blocks[l] =
 809         llvm::BasicBlock::Create(m_context,
 810                                  folly::to<std::string>('B', size_t(l)),
 811                                  m_function);
 812     }
 813
 814     return m_blocks[l];
 815   }
 816
 817   /*
 818    * Generate an llvm::Value representing the given integral constant, with an
 819    * approprate bit width.
 820    */
 821   template<typename Int>
 822   typename std::enable_if<std::is_integral<Int>::value, llvm::Value*>::type
 823   cns(Int val) const {
 824     return llvm::ConstantInt::get(
 825       m_context,
 826       llvm::APInt(sizeof(val) * CHAR_BIT, val, std::is_signed<Int>::value)
 827     );
 828   }
 829
 830   /*
 831    * Assuming val is already an integer type, zero-extend or truncate it to the
 832    * given size integer type.
 833    */
 834   llvm::Value* asInt(llvm::Value* val, size_t bits) {
 835     assert(val->getType()->isIntegerTy());
 836     return m_irb.CreateZExtOrTrunc(val, intNType(bits));
 837   }
 838
 839   /*
 840    * Bitcast val to Double.
 841    */
 842   llvm::Value* asDbl(llvm::Value* val) {
 843     return m_irb.CreateBitCast(val, m_irb.getDoubleTy());
 844   }
 845
 846   /*
 847    * emit LLVM IR for the given list of vasm blocks.
 848    */
 849   void emit(const jit::vector<Vlabel>& labels);
 850
 851 private:
 852
 853   /*
 854    * Custom LLVM IR inserter that can emit inline metadata for tracking
 855    * vasm origins of IR instructions in debug dumps.
 856    */
 857   struct IRBuilderVasmInserter {
 858     explicit IRBuilderVasmInserter(LLVMEmitter& e)
 859       : m_emitter(e)
 860       , m_mdNode(llvm::MDNode::get(m_emitter.m_context,
 861                                    std::vector<llvm::Value*>{}))
 862     {}
 863
 864     void setVinstId(size_t id) { m_instId = id; }
 865
 866    protected:
 867     void InsertHelper(llvm::Instruction* I, const llvm::Twine& Name,
 868                       llvm::BasicBlock* BB,
 869                       llvm::BasicBlock::iterator InsertPt) const {
 870       if (BB) BB->getInstList().insert(InsertPt, I);
 871       I->setName(Name);
 872
 873       ONTRACE(5, I->setDebugLoc(llvm::DebugLoc::get(m_instId, 0, m_mdNode)));
 874     }
 875
 876    private:
 877     LLVMEmitter& m_emitter;
 878     llvm::MDNode* m_mdNode;
 879     size_t m_instId{0};
 880   };
 881
 882   /*
 883    * RegInfo is used to track information about Vregs, including their
 884    * corresponding llvm::Value and the Vinstr that defined them.
 885    */
 886   struct RegInfo {
 887     llvm::Value* llval;
 888     llvm::Value* flagTmp;
 889     Vinstr inst;
 890   };
 891
 892   struct LLVMFixup {
 893     uint64_t id;
 894     Fixup fixup;
 895   };
 896
 897   struct LLVMBindJmp {
 898     uint32_t id;
 899     TCA stub;
 900   };
 901
 902   struct LLVMFallback {
 903     uint32_t id;
 904     SrcKey dest;
 905   };
 906
 907   /*
 908    * PhiInfo is responsible for the bookkeeping needed to transform vasm's
 909    * phijmp/phidef instructions into LLVM phi nodes.
 910    */
 911   struct PhiInfo {
 912     void phij(LLVMEmitter& e, llvm::BasicBlock* fromLabel,
 913               const VregList& uses) {
 914       UseInfo useInfo{fromLabel, uses};
 915
 916       // Update LLVM phi instructions if they've already been emitted; otherwise
 917       // enqueue useInfo so that phidef() can emit the uses.
 918       if (m_phis.size() != 0) {
 919         assert(m_phis.size() == useInfo.uses.size());
 920         for (auto phiInd = 0; phiInd < m_phis.size(); ++phiInd) {
 921           addIncoming(e, useInfo, phiInd);
 922         }
 923       } else {
 924         m_pendingPreds.emplace_back(std::move(useInfo));
 925       }
 926     }
 927
 928     void phidef(LLVMEmitter& e, llvm::BasicBlock* toLabel,
 929                 const VregList& defs) {
 930       assert(m_phis.size() == 0);
 931       assert(m_pendingPreds.size() > 0);
 932
 933       m_toLabel = toLabel;
 934       m_defs = defs;
 935
 936       for (auto phiInd = 0; phiInd < m_defs.size(); ++phiInd) {
 937         llvm::Type* type = e.value(m_pendingPreds[0].uses[phiInd])->getType();
 938         llvm::PHINode* phi = e.m_irb.CreatePHI(type, 1);
 939         m_phis.push_back(phi);
 940
 941         // Emit uses for the phi* instructions which preceded this phidef.
 942         for (auto& useInfo : m_pendingPreds) {
 943           addIncoming(e, useInfo, phiInd);
 944         }
 945         e.defineValue(m_defs[phiInd], phi);
 946       }
 947     }
 948
 949    private:
 950     struct UseInfo {
 951       llvm::BasicBlock* fromLabel;
 952       VregList uses;
 953     };
 954
 955     void addIncoming(LLVMEmitter& e, UseInfo& useInfo, unsigned phiInd) {
 956       m_phis[phiInd]->addIncoming(e.value(useInfo.uses[phiInd]),
 957                                   useInfo.fromLabel);
 958       auto typeStr = llshow(e.value(useInfo.uses[phiInd])->getType());
 959       FTRACE(1,
 960              "phidef --> phiInd:{}, type:{}, incoming:{}, use:%{}, "
 961              "block:{}, def:%{}\n", phiInd, typeStr,
 962              useInfo.fromLabel->getName().str(),
 963              size_t{useInfo.uses[phiInd]},
 964              m_toLabel->getName().str(), size_t{m_defs[phiInd]});
 965     }
 966
 967     jit::vector<UseInfo> m_pendingPreds;
 968     jit::vector<llvm::PHINode*> m_phis;
 969     VregList m_defs;
 970     llvm::BasicBlock* m_toLabel;
 971   };
 972
 973   static constexpr unsigned kFSAddressSpace = 257;
 974
 975 #define O(name, ...) void emit(const name&);
 976 VASM_OPCODES
 977 #undef O
 978
 979   llvm::Value* getGlobal(const std::string& name, int64_t value,
 980                          llvm::Type* type);
 981   void emitTrap();
 982   void emitCall(const Vinstr& instr);
 983   void emit(const bindjcc1st&, SrcKey);
 984   void emit(const bindjcc2nd&, SrcKey);
 985   llvm::Value* emitCmpForCC(Vreg sf, ConditionCode cc);
 986
 987   llvm::Value* emitFuncPtr(const std::string& name,
 988                            llvm::FunctionType* type,
 989                            uint64_t address);
 990   llvm::CallInst* emitTraceletTailCall(llvm::Value* target);
 991
 992   // Emit code for the pointer. Return value is of the given type, or
 993   // <i{bits} *> for the second overload.
 994   llvm::Value* emitPtr(const Vptr s, llvm::Type* ptrTy);
 995   llvm::Value* emitPtr(const Vptr s, size_t bits = 64);
 996
 997   template<typename Taken>
 998   void emitJcc(Vreg sf, ConditionCode cc, const char* takenSuffix,
 999                Taken taken);
1000
1001   llvm::Type* ptrType(llvm::Type* ty, unsigned addressSpace = 0) const;
1002   llvm::Type* intNType(size_t bits) const;
1003   llvm::Type* ptrIntNType(size_t bits, bool inFS) const;
1004
1005   llvm::Value* getReturnAddress();
1006   UNUSED llvm::Value* getFrameAddress();
1007   UNUSED llvm::Value* getStackPointer();
1008
1009   UNUSED void emitAsm(const std::string& asmStatement,
1010                       const std::string& asmConstraints,
1011                       bool hasSideEffects);
1012
1013
1014   llvm::LLVMContext& m_context;
1015   std::unique_ptr<llvm::Module> m_module;
1016   llvm::Function* m_function;
1017   llvm::IRBuilder<true, llvm::ConstantFolder, IRBuilderVasmInserter> m_irb;
1018   std::unique_ptr<TCMemoryManager> m_tcMM;
1019
1020   // Function type used for tail calls to other tracelets.
1021   llvm::FunctionType* m_traceletFnTy{nullptr};
1022
1023   // Mimic HHVM's TypedValue.
1024   llvm::StructType* m_typedValueType{nullptr};
1025
1026   // Saved LLVM intrinsics.
1027   llvm::Function* m_llvmFrameAddress{nullptr};
1028   llvm::Function* m_llvmReadRegister{nullptr};
1029   llvm::Function* m_llvmReturnAddress{nullptr};
1030
1031   // Vreg -> RegInfo map
1032   jit::vector<RegInfo> m_valueInfo;
1033
1034   // Vlabel -> llvm::BasicBlock map
1035   jit::vector<llvm::BasicBlock*> m_blocks;
1036
1037   jit::hash_map<llvm::BasicBlock*, PhiInfo> m_phiInfos;
1038
1039   // Pending Fixups that must be processed after codegen
1040   jit::vector<LLVMFixup> m_fixups;
1041
1042   // Pending service requests that must be processed after codegen
1043   jit::vector<LLVMBindJmp> m_bindjmps;
1044   jit::vector<LLVMFallback> m_fallbacks;
1045
1046   // Vector of vasm instruction strings, used for printing in llvm IR dumps.
1047   jit::vector<std::string> m_instStrs;
1048
1049   // The next id to use for LLVM location record. These IDs only have to
1050   // be unique within the function.
1051   uint32_t m_nextLocRec{1};
1052
1053   const Vunit& m_unit;
1054   Vasm::AreaList& m_areas;
1055
1056   // Special regs.
1057   llvm::Value* m_rVmTl{nullptr};
1058   llvm::Value* m_rVmFp{nullptr};
1059
1060   // Faux personality for emitting landingpad.
1061   llvm::Function* m_personalityFunc;
1062
1063   llvm::Function* m_fabs{nullptr};
1064
1065   // Commonly used types. Some LLVM APIs require non-consts.
1066   llvm::IntegerType* m_int8;
1067   llvm::IntegerType* m_int16;
1068   llvm::IntegerType* m_int32;
1069   llvm::IntegerType* m_int64;
1070
1071   llvm::PointerType* m_int8Ptr;
1072   llvm::PointerType* m_int16Ptr;
1073   llvm::PointerType* m_int32Ptr;
1074   llvm::PointerType* m_int64Ptr;
1075
1076   llvm::PointerType* m_int8FSPtr;
1077   llvm::PointerType* m_int16FSPtr;
1078   llvm::PointerType* m_int32FSPtr;
1079   llvm::PointerType* m_int64FSPtr;
1080
1081   // Commonly used constants. No const either.
1082   llvm::ConstantInt* m_int8Zero;
1083   llvm::ConstantInt* m_int8One;
1084   llvm::ConstantInt* m_int16Zero;
1085   llvm::ConstantInt* m_int16One;
1086   llvm::ConstantInt* m_int32Zero;
1087   llvm::ConstantInt* m_int32One;
1088   llvm::ConstantInt* m_int64Zero;
1089   llvm::ConstantInt* m_int64One;
1090
1091   llvm::UndefValue*  m_int64Undef;
1092 };
1093
1094 void LLVMEmitter::emit(const jit::vector<Vlabel>& labels) {
1095   // Make sure all the llvm blocks are emitted in the order given by
1096   // layoutBlocks, regardless of which ones we need to use as jump targets
1097   // first.
1098   for (auto label : labels) {
1099     block(label);
1100   }
1101
1102   auto const traceInstrs = Trace::moduleEnabled(Trace::llvm, 5);
1103
1104   for (auto label : labels) {
1105     auto& b = m_unit.blocks[label];
1106     m_irb.SetInsertPoint(block(label));
1107
1108     // TODO(#5376594): before rVmFp is SSA-ified we are using the hack below.
1109     m_valueInfo[Vreg(x64::rVmFp)].llval = m_rVmFp;
1110
1111     for (auto& inst : b.code) {
1112       if (traceInstrs) {
1113         m_irb.setVinstId(m_instStrs.size());
1114         m_instStrs.emplace_back(show(m_unit, inst).c_str());
1115       }
1116
1117       switch (inst.op) {
1118 // This list will eventually go away; for now only a very small subset of
1119 // operations are supported.
1120 #define SUPPORTED_OPS \
1121 O(addli) \
1122 O(addlm) \
1123 O(addq) \
1124 O(addqi) \
1125 O(addqim) \
1126 O(addsd) \
1127 O(andb) \
1128 O(andbi) \
1129 O(andbim) \
1130 O(andl) \
1131 O(andli) \
1132 O(andq) \
1133 O(andqi) \
1134 O(bindjmp) \
1135 O(bindexit) \
1136 O(bindaddr) \
1137 O(debugtrap) \
1138 O(defvmsp) \
1139 O(cloadq) \
1140 O(cmovq) \
1141 O(cmpb) \
1142 O(cmpbi) \
1143 O(cmpbim) \
1144 O(cmpl) \
1145 O(cmpli) \
1146 O(cmplim) \
1147 O(cmplm) \
1148 O(cmpq) \
1149 O(cmpqi) \
1150 O(cmpqim) \
1151 O(cmpqm) \
1152 O(cvttsd2siq) \
1153 O(cvtsi2sd) \
1154 O(cvtsi2sdm) \
1155 O(copy) \
1156 O(copy2) \
1157 O(copyargs) \
1158 O(decl) \
1159 O(declm) \
1160 O(decq) \
1161 O(decqm) \
1162 O(divsd) \
1163 O(imul) \
1164 O(fallback) \
1165 O(fallbackcc) \
1166 O(incwm) \
1167 O(incl) \
1168 O(inclm) \
1169 O(incq) \
1170 O(incqm) \
1171 O(incqmlock) \
1172 O(jcc) \
1173 O(jmp) \
1174 O(jmpr) \
1175 O(jmpm) \
1176 O(ldimmb) \
1177 O(ldimm) \
1178 O(lea) \
1179 O(loaddqu) \
1180 O(load) \
1181 O(loadb) \
1182 O(loadl) \
1183 O(loadsd) \
1184 O(loadzbl) \
1185 O(loadzbq) \
1186 O(loadzlq) \
1187 O(loadqp) \
1188 O(leap) \
1189 O(movb) \
1190 O(movl) \
1191 O(movzbl) \
1192 O(movzbq) \
1193 O(mulsd) \
1194 O(mul) \
1195 O(neg) \
1196 O(nop) \
1197 O(not) \
1198 O(orq) \
1199 O(orqi) \
1200 O(orqim) \
1201 O(roundsd) \
1202 O(srem) \
1203 O(sar) \
1204 O(sarqi) \
1205 O(setcc) \
1206 O(shlli) \
1207 O(shl) \
1208 O(shlqi) \
1209 O(shrli) \
1210 O(shrqi) \
1211 O(sqrtsd) \
1212 O(store) \
1213 O(storeb) \
1214 O(storebi) \
1215 O(storedqu) \
1216 O(storel) \
1217 O(storeli) \
1218 O(storeqi) \
1219 O(storesd) \
1220 O(storew) \
1221 O(storewi) \
1222 O(subbi) \
1223 O(subl) \
1224 O(subli) \
1225 O(subq) \
1226 O(subqi) \
1227 O(subsd) \
1228 O(svcreq) \
1229 O(syncvmfp) \
1230 O(syncvmsp) \
1231 O(testb) \
1232 O(testbi) \
1233 O(testbim) \
1234 O(testl) \
1235 O(testli) \
1236 O(testlim) \
1237 O(testq) \
1238 O(testqm) \
1239 O(testqim) \
1240 O(ud2) \
1241 O(xorb) \
1242 O(xorbi) \
1243 O(xorq) \
1244 O(xorqi) \
1245 O(landingpad) \
1246 O(ldretaddr) \
1247 O(movretaddr) \
1248 O(retctrl) \
1249 O(absdbl) \
1250 O(phijmp) \
1251 O(phijcc) \
1252 O(phidef)
1253 #define O(name) case Vinstr::name: emit(inst.name##_); break;
1254   SUPPORTED_OPS
1255 #undef O
1256 #undef SUPPORTED_OPS
1257
1258       case Vinstr::vcall:
1259       case Vinstr::vinvoke:
1260         emitCall(inst);
1261         break;
1262
1263       case Vinstr::bindjcc1st:
1264         emit(inst.bindjcc1st_, inst.origin->marker().sk());
1265         break;
1266
1267       case Vinstr::bindjcc2nd:
1268         emit(inst.bindjcc2nd_, inst.origin->marker().sk());
1269         break;
1270
1271       // These instructions are intentionally unsupported for a variety of
1272       // reasons, and if code-gen-x64.cpp emits one it's a bug:
1273       case Vinstr::call:
1274       case Vinstr::callm:
1275       case Vinstr::callr:
1276       case Vinstr::unwind:
1277       case Vinstr::nothrow:
1278       case Vinstr::syncpoint:
1279       case Vinstr::cqo:
1280       case Vinstr::idiv:
1281       case Vinstr::sarq:
1282       case Vinstr::shlq:
1283       case Vinstr::ret:
1284       case Vinstr::push:
1285       case Vinstr::pushm:
1286       case Vinstr::pop:
1287       case Vinstr::popm:
1288       case Vinstr::psllq:
1289       case Vinstr::psrlq:
1290       case Vinstr::fallthru:
1291         always_assert_flog(false,
1292                            "Banned opcode in B{}: {}",
1293                            size_t(label), show(m_unit, inst));
1294
1295       // ARM opcodes:
1296       case Vinstr::asrv:
1297       case Vinstr::brk:
1298       case Vinstr::cbcc:
1299       case Vinstr::hcsync:
1300       case Vinstr::hcnocatch:
1301       case Vinstr::hcunwind:
1302       case Vinstr::hostcall:
1303       case Vinstr::lslv:
1304       case Vinstr::tbcc:
1305       // Fallthrough. Eventually we won't have a default case.
1306       default:
1307         throw FailedLLVMCodeGen("Unsupported opcode in B{}: {}",
1308                                 size_t(label), show(m_unit, inst));
1309       }
1310
1311       visitDefs(m_unit, inst, [&](Vreg def) {
1312         defineValue(def, inst);
1313       });
1314     }
1315   }
1316
1317   finalize();
1318 }
1319
1320 llvm::Value* LLVMEmitter::getGlobal(const std::string& name,
1321                                     int64_t address,
1322                                     llvm::Type* type) {
1323   m_tcMM->registerSymbolAddress(name, address);
1324   return m_module->getOrInsertGlobal(name, type);
1325 }
1326
1327 void LLVMEmitter::emit(const addli& inst) {
1328   defineValue(inst.d, m_irb.CreateAdd(cns(inst.s0.l()), value(inst.s1)));
1329 }
1330
1331 void LLVMEmitter::emit(const addlm& inst) {
1332   auto ptr = emitPtr(inst.m, 32);
1333   auto result = m_irb.CreateAdd(value(inst.s0), m_irb.CreateLoad(ptr));
1334   defineFlagTmp(inst.sf, result);
1335   m_irb.CreateStore(result, ptr);
1336 }
1337
1338 void LLVMEmitter::emit(const addq& inst) {
1339   defineValue(inst.d, m_irb.CreateAdd(value(inst.s0), value(inst.s1)));
1340 }
1341
1342 void LLVMEmitter::emit(const addqi& inst) {
1343   // TODO(#5134526): ignore %rsp adjustments for now. They are typically
1344   // emitted in unwind handler.
1345   if (inst.s1 == reg::rsp) return;
1346   defineValue(inst.d, m_irb.CreateAdd(value(inst.s1), cns(inst.s0.q())));
1347 }
1348
1349 void LLVMEmitter::emit(const addqim& inst) {
1350   auto ptr = emitPtr(inst.m, 64);
1351   auto result = m_irb.CreateAdd(cns(inst.s0.q()), m_irb.CreateLoad(ptr));
1352   defineFlagTmp(inst.sf, result);
1353   m_irb.CreateStore(result, ptr);
1354 }
1355
1356 void LLVMEmitter::emit(const addsd& inst) {
1357   defineValue(inst.d, m_irb.CreateFAdd(asDbl(value(inst.s0)),
1358                                        asDbl(value(inst.s1))));
1359 }
1360
1361 void LLVMEmitter::emit(const andb& inst) {
1362   defineValue(inst.d, m_irb.CreateAnd(value(inst.s0), value(inst.s1)));
1363 }
1364
1365 void LLVMEmitter::emit(const andbi& inst) {
1366   defineValue(inst.d, m_irb.CreateAnd(cns(inst.s0.b()), value(inst.s1)));
1367 }
1368
1369 void LLVMEmitter::emit(const andbim& inst) {
1370   auto ptr = emitPtr(inst.m, 8);
1371   auto result = m_irb.CreateAnd(cns(inst.s.b()), m_irb.CreateLoad(ptr));
1372   defineFlagTmp(inst.sf, result);
1373   m_irb.CreateStore(result, ptr);
1374 }
1375
1376 void LLVMEmitter::emit(const andl& inst) {
1377   defineValue(inst.d, m_irb.CreateAnd(value(inst.s0), value(inst.s1)));
1378 }
1379
1380 void LLVMEmitter::emit(const andli& inst) {
1381   defineValue(inst.d, m_irb.CreateAnd(cns(inst.s0.l()), value(inst.s1)));
1382 }
1383
1384 void LLVMEmitter::emit(const andq& inst) {
1385   defineValue(inst.d, m_irb.CreateAnd(value(inst.s0), value(inst.s1)));
1386 }
1387
1388 void LLVMEmitter::emit(const andqi& inst) {
1389   defineValue(inst.d, m_irb.CreateAnd(cns(inst.s0.q()), value(inst.s1)));
1390 }
1391
1392 template<typename Taken>
1393 void LLVMEmitter::emitJcc(Vreg sf, ConditionCode cc, const char* takenName,
1394                           Taken taken) {
1395   auto blockName = m_irb.GetInsertBlock()->getName().str();
1396   auto nextBlock =
1397     llvm::BasicBlock::Create(m_context,
1398                              folly::to<std::string>(blockName, '_'),
1399                              m_function);
1400   nextBlock->moveAfter(m_irb.GetInsertBlock());
1401   auto takenBlock =
1402     llvm::BasicBlock::Create(m_context,
1403                              folly::to<std::string>(blockName, "_", takenName),
1404                              m_function);
1405   takenBlock->moveAfter(nextBlock);
1406
1407   auto cond = emitCmpForCC(sf, cc);
1408   m_irb.CreateCondBr(cond, takenBlock, nextBlock);
1409
1410   m_irb.SetInsertPoint(takenBlock);
1411   taken();
1412
1413   m_irb.SetInsertPoint(nextBlock);
1414 }
1415
1416 void LLVMEmitter::emit(const bindjmp& inst) {
1417   // bindjmp is a smashable tail call to a service request stub. The stub needs
1418   // to reference the address of the tail call, but we don't know the address
1419   // of the tail call until after codegen. This means we either need to emit
1420   // the stub here and patch the rip-relative lea in the stub after codegen, or
1421   // emit a tail call to a dummy address that we'll patch after codegen when we
1422   // can emit the stub with the right address. When given the choice between
1423   // lying to our stub emitter or llvm it's generally better to lie to the stub
1424   // emitter, so that's what we do here.
1425
1426   auto& frozen = m_areas[size_t(AreaIndex::Frozen)].code;
1427   auto stub = mcg->getFreeStub(frozen, &mcg->cgFixups());
1428   ServiceReqArgVec args;
1429   packServiceReqArgs(args,
1430                      RipRelative(mcg->code.base()),
1431                      inst.target.toAtomicInt(),
1432                      inst.trflags.packed);
1433   auto reqIp = mcg->backEnd().emitServiceReqWork(
1434     frozen, stub, SRFlags::None, REQ_BIND_JMP, args);
1435
1436   auto stubName = folly::sformat("bindjmpStub_{}", reqIp);
1437   auto stubFunc = emitFuncPtr(stubName, m_traceletFnTy, uint64_t(reqIp));
1438   auto call = emitTraceletTailCall(stubFunc);
1439   // TODO(t5742996): call->setSmashable();
1440
1441   auto id = m_nextLocRec++;
1442   call->setMetadata(llvm::LLVMContext::MD_locrec,
1443                     llvm::MDNode::get(m_context, cns(id)));
1444   m_bindjmps.emplace_back(LLVMBindJmp{id, reqIp});
1445 }
1446
1447 // Emitting a real REQ_BIND_SIDE_EXIT or REQ_BIND_JMPCC_(FIRST|SECOND) only
1448 // makes sense if we can guarantee that llvm will emit a smashable jcc. Until
1449 // then, we jcc to a REQ_BIND_JMP.
1450 void LLVMEmitter::emit(const bindexit& inst) {
1451   emitJcc(
1452     inst.sf, inst.cc, "exit",
1453     [&] {
1454       emit(bindjmp{inst.target, inst.trflags});
1455     }
1456   );
1457 }
1458
1459 void LLVMEmitter::emit(const bindjcc1st& inst, SrcKey instSk) {
1460   emitJcc(
1461     inst.sf, inst.cc, "jcc1",
1462     [&] {
1463       emit(bindjmp{{instSk.func(), inst.targets[1], instSk.resumed()}});
1464     }
1465   );
1466
1467   emit(bindjmp{{instSk.func(), inst.targets[0], instSk.resumed()}});
1468 }
1469
1470 void LLVMEmitter::emit(const bindjcc2nd& inst, SrcKey instSk) {
1471   emitJcc(
1472     inst.sf, inst.cc, "jcc2",
1473     [&] {
1474       emit(bindjmp{{instSk.func(), inst.target, instSk.resumed()}});
1475     }
1476   );
1477 }
1478
1479 void LLVMEmitter::emit(const bindaddr& inst) {
1480   // inst.dest is a pointer to memory allocated in globalData, so we can just
1481   // do what vasm does here.
1482
1483   auto& frozen = m_areas[size_t(AreaIndex::Frozen)].code;
1484   mcg->setJmpTransID((TCA)inst.dest);
1485   *inst.dest = emitEphemeralServiceReq(
1486     frozen,
1487     mcg->getFreeStub(frozen, &mcg->cgFixups()),
1488     REQ_BIND_ADDR,
1489     inst.dest,
1490     inst.sk.toAtomicInt(),
1491     TransFlags{}.packed
1492   );
1493   mcg->cgFixups().m_codePointers.insert(inst.dest);
1494 }
1495
1496 void LLVMEmitter::emit(const defvmsp& inst) {
1497   defineValue(inst.d, value(x64::rVmSp));
1498 }
1499
1500 llvm::Value* LLVMEmitter::emitFuncPtr(const std::string& name,
1501                                       llvm::FunctionType* type,
1502                                       uint64_t address) {
1503   auto funcPtr = m_module->getFunction(name);
1504   if (!funcPtr) {
1505     m_tcMM->registerSymbolAddress(name, address);
1506     funcPtr = llvm::Function::Create(type,
1507                                      llvm::GlobalValue::ExternalLinkage,
1508                                      name,
1509                                      m_module.get());
1510   }
1511   return funcPtr;
1512 }
1513
1514 llvm::CallInst* LLVMEmitter::emitTraceletTailCall(llvm::Value* target) {
1515   std::vector<llvm::Value*> args{
1516     value(x64::rVmSp), value(x64::rVmTl), value(x64::rVmFp)
1517   };
1518   auto call = m_irb.CreateCall(target, args);
1519   call->setCallingConv(llvm::CallingConv::X86_64_HHVM_TC);
1520   call->setTailCallKind(llvm::CallInst::TCK_MustTail);
1521   m_irb.CreateRetVoid();
1522   return call;
1523 }
1524
1525 void LLVMEmitter::emitCall(const Vinstr& inst) {
1526   auto const is_vcall = inst.op == Vinstr::vcall;
1527   auto const vcall = inst.vcall_;
1528   auto const vinvoke = inst.vinvoke_;
1529
1530   // Extract all the relevant information from the appropriate instruction.
1531   auto const call = is_vcall ? vcall.call : vinvoke.call;
1532   auto const& vargs = m_unit.vcallArgs[is_vcall ? vcall.args : vinvoke.args];
1533   auto const dests = m_unit.tuples[is_vcall ? vcall.d : vinvoke.d];
1534   auto const destType = is_vcall ? vcall.destType : vinvoke.destType;
1535   auto const fixup = is_vcall ? vcall.fixup : vinvoke.fixup;
1536
1537   // Generate the right function signature to be used by call/invoke.
1538   // Perhaps caching it can improve the performance.
1539   llvm::Type* returnType = nullptr;
1540   switch (destType) {
1541   case DestType::None:
1542     returnType = m_irb.getVoidTy();
1543     break;
1544   case DestType::SSA:
1545     returnType = m_int64;
1546     break;
1547   case DestType::Byte:
1548     returnType = m_int8;
1549     break;
1550   case DestType::Dbl:
1551     returnType = m_irb.getDoubleTy();
1552     break;
1553   case DestType::SIMD:
1554   case DestType::TV:
1555     returnType = m_typedValueType;
1556     break;
1557   }
1558
1559   std::vector<llvm::Type*> argTypes = { m_int64 };
1560   std::vector<llvm::Value*> args = { value(x64::rVmFp) };
1561   auto doArgs = [&] (const VregList& srcs) {
1562     for(int i = 0; i < srcs.size(); ++i) {
1563       args.push_back(value(srcs[i]));
1564       argTypes.push_back(value(srcs[i])->getType());
1565     }
1566   };
1567   doArgs(vargs.args);
1568   // Handle special case of TypedValue being put on the stack while not
1569   // all arg regs have being used. Since TypedValue is already split into
1570   // regs, we need to manually insert the padding arg.
1571   if (vargs.stkArgs.size() &&
1572       vargs.stkArgs[0].isGP() &&
1573       vargs.args.size() == x64::kNumRegisterArgs - 1) {
1574     args.push_back(m_int64Undef);
1575     argTypes.push_back(m_int64);
1576   }
1577   doArgs(vargs.simdArgs);
1578   doArgs(vargs.stkArgs);
1579
1580   auto const funcType = llvm::FunctionType::get(returnType, argTypes, false);
1581
1582   llvm::Value* funcPtr = nullptr;
1583   switch (call.kind()) {
1584     case CppCall::Kind::Direct:
1585       funcPtr = emitFuncPtr(getNativeFunctionName(call.address()),
1586                             funcType,
1587                             uint64_t(call.address()));
1588       break;
1589
1590     case CppCall::Kind::Virtual:
1591     case CppCall::Kind::ArrayVirt:
1592       throw FailedLLVMCodeGen("Unsupported call type: {}",
1593                               (int)call.kind());
1594
1595     case CppCall::Kind::Destructor: {
1596       assert(vargs.args.size() == 1);
1597       llvm::Value* type = value(call.reg());
1598       type = m_irb.CreateLShr(asInt(type, 64),
1599                               kShiftDataTypeToDestrIndex, "typeIdx");
1600
1601       auto destructors = getGlobal("g_destructors", uint64_t(g_destructors),
1602                                    llvm::VectorType::get(ptrType(funcType),
1603                                                          kDestrTableSize));
1604       funcPtr = m_irb.CreateExtractElement(m_irb.CreateLoad(destructors), type);
1605       break;
1606     }
1607   }
1608
1609   llvm::Instruction* callInst = nullptr;
1610   if (is_vcall) {
1611     auto call = m_irb.CreateCall(funcPtr, args);
1612     call->setCallingConv(llvm::CallingConv::X86_64_HHVM_C);
1613     callInst = call;
1614   } else {
1615     auto normal = block(vinvoke.targets[0]);
1616     auto unwind = block(vinvoke.targets[1]);
1617     auto invoke = m_irb.CreateInvoke(funcPtr, normal, unwind, args);
1618     invoke->setCallingConv(llvm::CallingConv::X86_64_HHVM_C);
1619     callInst = invoke;
1620     // The result can only be used on normal path. The unwind branch cannot
1621     // access return values.
1622     m_irb.SetInsertPoint(normal);
1623   }
1624
1625   // Record location of the call/invoke instruction.
1626   if (fixup.isValid()) {
1627     auto id = m_nextLocRec++;
1628     m_fixups.emplace_back(LLVMFixup{id, fixup});
1629     FTRACE(2, "Adding fixup id {} for {}\n", id, llshow(callInst));
1630     callInst->setMetadata(llvm::LLVMContext::MD_locrec,
1631                           llvm::MDNode::get(m_context, cns(id)));
1632   }
1633
1634   // Extract value(s) from the call.
1635   switch (destType) {
1636   case DestType::None:
1637     // nothing to do
1638     assert(dests.size() == 0);
1639     break;
1640   case DestType::SSA:
1641   case DestType::Byte:
1642   case DestType::Dbl:
1643     assert(dests.size() == 1);
1644     defineValue(dests[0], callInst);
1645     break;
1646   case DestType::TV: {
1647     assert(dests.size() == 2);
1648     if (packed_tv) {
1649       defineValue(dests[0], m_irb.CreateExtractValue(callInst, 2)); // m_data
1650       defineValue(dests[1],
1651                   asInt(m_irb.CreateExtractValue(callInst, 1), 64)); // m_type
1652     } else {
1653       defineValue(dests[0], m_irb.CreateExtractValue(callInst, 0)); // m_data
1654       defineValue(dests[1],
1655                   asInt(m_irb.CreateExtractValue(callInst, 1), 64)); // m_type
1656     }
1657     break;
1658   }
1659   case DestType::SIMD: {
1660     assert(dests.size() == 1);
1661     // Do we want to pack it manually into a <2 x i64>? Or bitcast to X86_MMX?
1662     // Leave it as TypedValue for now and see what LLVM optimizer does.
1663     defineValue(dests[0], callInst);
1664     break;
1665   }
1666   }
1667 }
1668
1669 void LLVMEmitter::emit(const cloadq& inst) {
1670   auto trueVal = m_irb.CreateLoad(emitPtr(inst.t, 64));
1671   auto falseVal = value(inst.f);
1672   auto cond = emitCmpForCC(inst.sf, inst.cc);
1673   defineValue(inst.d, m_irb.CreateSelect(cond, trueVal, falseVal));
1674 }
1675
1676 void LLVMEmitter::emit(const cmovq& inst) {
1677   auto cond = emitCmpForCC(inst.sf, inst.cc);
1678   defineValue(inst.d, m_irb.CreateSelect(cond, value(inst.t), value(inst.f)));
1679 }
1680
1681 void LLVMEmitter::emit(const cmpb& inst) {
1682   // no-op. The real work for this and other non-memory cmps happens in
1683   // emitCmpForCC.
1684 }
1685
1686 void LLVMEmitter::emit(const cmpbi& inst) {
1687   // no-op.
1688 }
1689
1690 void LLVMEmitter::emit(const cmpbim& inst) {
1691   defineFlagTmp(inst.sf, m_irb.CreateLoad(emitPtr(inst.s1, 8)));
1692 }
1693
1694 void LLVMEmitter::emit(const cmpl& inst) {
1695   // no-op.
1696 }
1697
1698 void LLVMEmitter::emit(const cmpli& inst) {
1699   // no-op.
1700 }
1701
1702 void LLVMEmitter::emit(const cmplim& inst) {
1703   defineFlagTmp(inst.sf, m_irb.CreateLoad(emitPtr(inst.s1, 32)));
1704 }
1705
1706 void LLVMEmitter::emit(const cmplm& inst) {
1707   defineFlagTmp(inst.sf, m_irb.CreateLoad(emitPtr(inst.s1, 32)));
1708 }
1709
1710 void LLVMEmitter::emit(const cmpq& inst) {
1711   // no-op.
1712 }
1713
1714 void LLVMEmitter::emit(const cmpqi& inst) {
1715   // no-op.
1716 }
1717
1718 void LLVMEmitter::emit(const cmpqim& inst) {
1719   defineFlagTmp(inst.sf, m_irb.CreateLoad(emitPtr(inst.s1, 64)));
1720 }
1721
1722 void LLVMEmitter::emit(const cmpqm& inst) {
1723   defineFlagTmp(inst.sf, m_irb.CreateLoad(emitPtr(inst.s1, 64)));
1724 }
1725
1726 void LLVMEmitter::emit(const cvttsd2siq& inst) {
1727   defineValue(inst.d, m_irb.CreateFPToSI(value(inst.s), m_int64));
1728 }
1729
1730 void LLVMEmitter::emit(const cvtsi2sd& inst) {
1731   defineValue(inst.d, m_irb.CreateSIToFP(value(inst.s), m_irb.getDoubleTy()));
1732 }
1733
1734 void LLVMEmitter::emit(const cvtsi2sdm& inst) {
1735   auto intVal = m_irb.CreateLoad(emitPtr(inst.s, 64));
1736   defineValue(inst.d, m_irb.CreateSIToFP(intVal, m_irb.getDoubleTy()));
1737 }
1738
1739 void LLVMEmitter::emit(const copy& inst) {
1740   defineValue(inst.d, value(inst.s));
1741 }
1742
1743 void LLVMEmitter::emit(const copy2& inst) {
1744   defineValue(inst.d0, value(inst.s0));
1745   defineValue(inst.d1, value(inst.s1));
1746 }
1747
1748 void LLVMEmitter::emit(const copyargs& inst) {
1749   auto& srcs = m_unit.tuples[inst.s];
1750   auto& dsts = m_unit.tuples[inst.d];
1751   assert(srcs.size() == dsts.size());
1752   for (unsigned i = 0, n = srcs.size(); i < n; ++i) {
1753     defineValue(dsts[i], value(srcs[i]));
1754   }
1755 }
1756
1757 void LLVMEmitter::emit(const debugtrap& inst) {
1758   auto trap = llvm::Intrinsic::getDeclaration(m_module.get(),
1759                                               llvm::Intrinsic::debugtrap);
1760   m_irb.CreateCall(trap);
1761   m_irb.CreateUnreachable();
1762 }
1763
1764 void LLVMEmitter::emit(const decl& inst) {
1765   defineValue(inst.d, m_irb.CreateSub(value(inst.s), m_int32One));
1766 }
1767
1768 void LLVMEmitter::emit(const declm& inst) {
1769   auto ptr = emitPtr(inst.m, 32);
1770   auto load = m_irb.CreateLoad(ptr);
1771   auto sub = m_irb.CreateSub(load, m_int32One);
1772   defineFlagTmp(inst.sf, sub);
1773   m_irb.CreateStore(sub, ptr);
1774 }
1775
1776 void LLVMEmitter::emit(const decq& inst) {
1777   defineValue(inst.d, m_irb.CreateSub(value(inst.s), m_int64One));
1778 }
1779
1780 void LLVMEmitter::emit(const decqm& inst) {
1781   auto ptr = emitPtr(inst.m, 64);
1782   auto oldVal = m_irb.CreateLoad(ptr);
1783   auto newVal = m_irb.CreateSub(oldVal, m_int64One);
1784   defineFlagTmp(inst.sf, newVal);
1785   m_irb.CreateStore(newVal, ptr);
1786 }
1787
1788 void LLVMEmitter::emit(const divsd& inst) {
1789   defineValue(inst.d, m_irb.CreateFDiv(asDbl(value(inst.s1)),
1790                                        asDbl(value(inst.s0))));
1791 }
1792
1793 void LLVMEmitter::emit(const imul& inst) {
1794   defineValue(inst.d, m_irb.CreateMul(value(inst.s0), value(inst.s1)));
1795 }
1796
1797 void LLVMEmitter::emit(const fallback& inst) {
1798   assert_not_implemented(inst.trflags.packed == 0);
1799
1800   auto destSR = mcg->tx().getSrcRec(inst.dest);
1801   auto fallback = destSR->getFallbackTranslation();
1802   auto func = emitFuncPtr(folly::sformat("reqRetranslate_{}", fallback),
1803                           m_traceletFnTy,
1804                           uint64_t(destSR->getFallbackTranslation()));
1805   auto call = emitTraceletTailCall(func);
1806   // TODO(t5742996): call->setSmashable();
1807
1808   LLVMFallback req{m_nextLocRec++, inst.dest};
1809   call->setMetadata(llvm::LLVMContext::MD_locrec,
1810                     llvm::MDNode::get(m_context, cns(req.id)));
1811   m_fallbacks.emplace_back(req);
1812 }
1813
1814 void LLVMEmitter::emit(const fallbackcc& inst) {
1815   assert_not_implemented(inst.trflags.packed == 0);
1816
1817   emitJcc(
1818     inst.sf, inst.cc, "guard",
1819     [&] {
1820       emit(fallback{inst.dest, inst.trflags});
1821     }
1822   );
1823 }
1824
1825 void LLVMEmitter::emit(const incwm& inst) {
1826   auto ptr = emitPtr(inst.m, 16);
1827   auto oldVal = m_irb.CreateLoad(ptr);
1828   auto newVal = m_irb.CreateAdd(oldVal, m_int16One);
1829   defineFlagTmp(inst.sf, newVal);
1830   m_irb.CreateStore(newVal, ptr);
1831 }
1832
1833 void LLVMEmitter::emit(const incl& inst) {
1834   defineValue(inst.d, m_irb.CreateAdd(value(inst.s), m_int32One));
1835 }
1836
1837 void LLVMEmitter::emit(const inclm& inst) {
1838   auto ptr = emitPtr(inst.m, 32);
1839   auto load = m_irb.CreateLoad(ptr);
1840   auto add = m_irb.CreateAdd(load, m_int32One);
1841   defineFlagTmp(inst.sf, add);
1842   m_irb.CreateStore(add, ptr);
1843 }
1844
1845 void LLVMEmitter::emit(const incq& inst) {
1846   defineValue(inst.d, m_irb.CreateAdd(value(inst.s), m_int64One));
1847 }
1848
1849 void LLVMEmitter::emit(const incqm& inst) {
1850   auto ptr = emitPtr(inst.m, 64);
1851   auto load = m_irb.CreateLoad(ptr);
1852   auto add = m_irb.CreateAdd(load, m_int64One);
1853   defineFlagTmp(inst.sf, add);
1854   m_irb.CreateStore(add, ptr);
1855 }
1856
1857 void LLVMEmitter::emit(const incqmlock& inst) {
1858   auto ptr = emitPtr(inst.m, 64);
1859   m_irb.CreateAtomicRMW(llvm::AtomicRMWInst::Add, ptr,
1860                         m_int64One, llvm::SequentiallyConsistent);
1861   // Unlike the other inc*m instruction, we don't define a flagTmp here. The
1862   // value returned by llvm's atomicrmw is the old value, while the x64 incq
1863   // instruction this is based on sets flags based on the new value. Nothing
1864   // currently consumes the sf from an incqmlock instruction; if this changes
1865   // we'll deal with it then.
1866 }
1867
1868 static llvm::CmpInst::Predicate ccToPred(ConditionCode cc) {
1869   using Cmp = llvm::CmpInst;
1870   switch (cc) {
1871     case CC_E:  return Cmp::ICMP_EQ;
1872     case CC_NE: return Cmp::ICMP_NE;
1873     case CC_L:  return Cmp::ICMP_SLT;
1874     case CC_LE: return Cmp::ICMP_SLE;
1875     case CC_G:  return Cmp::ICMP_SGT;
1876     case CC_GE: return Cmp::ICMP_SGE;
1877     case CC_B:  return Cmp::ICMP_ULT;
1878     case CC_BE: return Cmp::ICMP_ULE;
1879     case CC_A:  return Cmp::ICMP_UGT;
1880     case CC_AE: return Cmp::ICMP_UGE;
1881     default:    throw FailedLLVMCodeGen("Unsupported CC {}", cc_names[cc]);
1882   }
1883 }
1884
1885 llvm::Value* LLVMEmitter::emitCmpForCC(Vreg sf, ConditionCode cc) {
1886   auto& cmp = defInst(sf);
1887   llvm::Value* lhs = nullptr;
1888   llvm::Value* rhs = nullptr;
1889
1890   if (cmp.op == Vinstr::addq) {
1891     lhs = asInt(value(cmp.addq_.d), 64);
1892     rhs = m_int64Zero;
1893   } else if (cmp.op == Vinstr::addqi) {
1894     lhs = asInt(value(cmp.addqi_.d), 64);
1895     rhs = m_int64Zero;
1896   } else if (cmp.op == Vinstr::addqim) {
1897     lhs = flagTmp(sf);
1898     rhs = m_int64Zero;
1899   } else if (cmp.op == Vinstr::cmpb) {
1900     lhs = asInt(value(cmp.cmpb_.s1), 8);
1901     rhs = asInt(value(cmp.cmpb_.s0), 8);
1902   } else if (cmp.op == Vinstr::cmpbi) {
1903     lhs = asInt(value(cmp.cmpbi_.s1), 8);
1904     rhs = cns(cmp.cmpbi_.s0.b());
1905   } else if (cmp.op == Vinstr::cmpbim) {
1906     lhs = flagTmp(sf);
1907     rhs = cns(cmp.cmpbim_.s0.b());
1908   } else if (cmp.op == Vinstr::cmpl) {
1909     lhs = asInt(value(cmp.cmpl_.s1), 32);
1910     rhs = asInt(value(cmp.cmpl_.s0), 32);
1911   } else if (cmp.op == Vinstr::cmpli) {
1912     lhs = asInt(value(cmp.cmpli_.s1), 32);
1913     rhs = cns(cmp.cmpli_.s0.l());
1914   } else if (cmp.op == Vinstr::cmplim) {
1915     lhs = flagTmp(sf);
1916     rhs = cns(cmp.cmplim_.s0.l());
1917   } else if (cmp.op == Vinstr::cmplm) {
1918     lhs = flagTmp(sf);
1919     rhs = asInt(value(cmp.cmplm_.s0), 32);
1920   } else if (cmp.op == Vinstr::cmpq) {
1921     lhs = asInt(value(cmp.cmpq_.s1), 64);
1922     rhs = asInt(value(cmp.cmpq_.s0), 64);
1923   } else if (cmp.op == Vinstr::cmpqi) {
1924     lhs = asInt(value(cmp.cmpqi_.s1), 64);
1925     rhs = cns(cmp.cmpqi_.s0.q());
1926   } else if (cmp.op == Vinstr::cmpqim) {
1927     lhs = flagTmp(sf);
1928     rhs = cns(cmp.cmpqim_.s0.q());
1929   } else if (cmp.op == Vinstr::cmpqm) {
1930     lhs = flagTmp(sf);
1931     rhs = asInt(value(cmp.cmpqm_.s0), 64);
1932   } else if (cmp.op == Vinstr::decl) {
1933     lhs = asInt(value(cmp.decl_.d), 32);
1934     rhs = m_int32Zero;
1935   } else if (cmp.op == Vinstr::declm) {
1936     lhs = flagTmp(sf);
1937     rhs = m_int32Zero;
1938   } else if (cmp.op == Vinstr::decq) {
1939     lhs = asInt(value(cmp.decq_.d), 64);
1940     rhs = m_int64Zero;
1941   } else if (cmp.op == Vinstr::decqm) {
1942     lhs = flagTmp(sf);
1943     rhs = m_int64Zero;
1944   } else if (cmp.op == Vinstr::inclm) {
1945     lhs = flagTmp(sf);
1946     rhs = m_int32Zero;
1947   } else if (cmp.op == Vinstr::incwm) {
1948     lhs = flagTmp(sf);
1949     rhs = m_int16Zero;
1950   } else if (cmp.op == Vinstr::subbi) {
1951     lhs = asInt(value(cmp.subbi_.d), 8);
1952     rhs = m_int8Zero;
1953   } else if (cmp.op == Vinstr::subl) {
1954     lhs = asInt(value(cmp.subl_.d), 32);
1955     rhs = m_int32Zero;
1956   } else if (cmp.op == Vinstr::subli) {
1957     lhs = asInt(value(cmp.subli_.d), 32);
1958     rhs = m_int32Zero;
1959   } else if (cmp.op == Vinstr::subq) {
1960     lhs = asInt(value(cmp.subq_.d), 64);
1961     rhs = m_int64Zero;
1962   } else if (cmp.op == Vinstr::subqi) {
1963     lhs = asInt(value(cmp.subqi_.d), 64);
1964     rhs = m_int64Zero;
1965   } else if (cmp.op == Vinstr::testb ||
1966              cmp.op == Vinstr::testbi ||
1967              cmp.op == Vinstr::testbim) {
1968     lhs = flagTmp(sf);
1969     rhs = m_int8Zero;
1970   } else if (cmp.op == Vinstr::testl ||
1971              cmp.op == Vinstr::testli ||
1972              cmp.op == Vinstr::testlim) {
1973     lhs = flagTmp(sf);
1974     rhs = m_int32Zero;
1975   } else if (cmp.op == Vinstr::testq ||
1976              cmp.op == Vinstr::testqm ||
1977              cmp.op == Vinstr::testqim) {
1978     lhs = flagTmp(sf);
1979     rhs = m_int64Zero;
1980   } else {
1981     throw FailedLLVMCodeGen("Unsupported flags src: {}",
1982                             show(m_unit, cmp));
1983   }
1984
1985   return m_irb.CreateICmp(ccToPred(cc), lhs, rhs);
1986 }
1987
1988 void LLVMEmitter::emit(const jcc& inst) {
1989   auto cond = emitCmpForCC(inst.sf, inst.cc);
1990   auto next  = block(inst.targets[0]);
1991   auto taken = block(inst.targets[1]);
1992
1993   m_irb.CreateCondBr(cond, taken, next);
1994 }
1995
1996 void LLVMEmitter::emit(const jmp& inst) {
1997   m_irb.CreateBr(block(inst.target));
1998 }
1999
2000 void LLVMEmitter::emit(const jmpr& inst) {
2001   auto func = m_irb.CreateIntToPtr(value(inst.target), ptrType(m_traceletFnTy));
2002   emitTraceletTailCall(func);
2003 }
2004
2005 void LLVMEmitter::emit(const jmpm& inst) {
2006   auto func = m_irb.CreateLoad(emitPtr(inst.target,
2007                                        ptrType(ptrType(m_traceletFnTy))));
2008   emitTraceletTailCall(func);
2009 }
2010
2011 void LLVMEmitter::emit(const ldimmb& inst) {
2012   defineValue(inst.d, cns(inst.s.b()));
2013 }
2014
2015 void LLVMEmitter::emit(const ldimm& inst) {
2016   assert(inst.d.isVirt());
2017   defineValue(inst.d, cns(inst.s.q()));
2018 }
2019
2020 void LLVMEmitter::emit(const lea& inst) {
2021   auto value = m_irb.CreatePtrToInt(emitPtr(inst.s, 8), m_int64, "conv");
2022   defineValue(inst.d, value);
2023 }
2024
2025 void LLVMEmitter::emit(const loaddqu& inst) {
2026   // This will need to change if we ever use loaddqu with values that aren't
2027   // TypedValues. Ideally, we'd leave this kind of decision to llvm anyway.
2028   auto value = m_irb.CreateLoad(emitPtr(inst.s, ptrType(m_typedValueType)));
2029   defineValue(inst.d, value);
2030 }
2031
2032 void LLVMEmitter::emit(const load& inst) {
2033   defineValue(inst.d, m_irb.CreateLoad(emitPtr(inst.s, 64)));
2034 }
2035
2036 void LLVMEmitter::emit(const loadb& inst) {
2037   defineValue(inst.d, m_irb.CreateLoad(emitPtr(inst.s, 8)));
2038 }
2039
2040 void LLVMEmitter::emit(const loadl& inst) {
2041   defineValue(inst.d, m_irb.CreateLoad(emitPtr(inst.s, 32)));
2042 }
2043
2044 void LLVMEmitter::emit(const loadsd& inst) {
2045   defineValue(inst.d,
2046               m_irb.CreateLoad(emitPtr(inst.s, ptrType(m_irb.getDoubleTy()))));
2047 }
2048
2049 void LLVMEmitter::emit(const loadzbl& inst) {
2050   auto byteVal = m_irb.CreateLoad(emitPtr(inst.s, 8));
2051   defineValue(inst.d, m_irb.CreateZExt(byteVal, m_int32));
2052 }
2053
2054 void LLVMEmitter::emit(const loadzbq& inst) {
2055   auto byteVal = m_irb.CreateLoad(emitPtr(inst.s, 8));
2056   defineValue(inst.d, m_irb.CreateZExt(byteVal, m_int64));
2057 }
2058
2059 void LLVMEmitter::emit(const loadzlq& inst) {
2060   auto val = m_irb.CreateLoad(emitPtr(inst.s, 32));
2061   defineValue(inst.d, m_irb.CreateZExt(val, m_int64));
2062 }
2063
2064 // loadqp/leap are intended to be rip-relative instructions, but that's not
2065 // necessary for correctness. Depending on the target of the load, it may be
2066 // needed to work with code relocation - see t5662452 for details.
2067 void LLVMEmitter::emit(const loadqp& inst) {
2068   auto addr = m_irb.CreateIntToPtr(cns(inst.s.r.disp), m_int64Ptr);
2069   defineValue(inst.d, m_irb.CreateLoad(addr));
2070 }
2071
2072 void LLVMEmitter::emit(const leap& inst) {
2073   defineValue(inst.d, cns(inst.s.r.disp));
2074 }
2075
2076 void LLVMEmitter::emit(const movb& inst) {
2077   defineValue(inst.d, value(inst.s));
2078 }
2079
2080 void LLVMEmitter::emit(const movl& inst) {
2081   defineValue(inst.d, value(inst.s));
2082 }
2083
2084 void LLVMEmitter::emit(const movzbl& inst) {
2085   defineValue(inst.d, m_irb.CreateZExt(value(inst.s), m_int32));
2086 }
2087
2088 void LLVMEmitter::emit(const movzbq& inst) {
2089   defineValue(inst.d, m_irb.CreateZExt(value(inst.s), m_int64));
2090 }
2091
2092 void LLVMEmitter::emit(const mulsd& inst) {
2093   defineValue(inst.d, m_irb.CreateFMul(asDbl(value(inst.s0)),
2094                                        asDbl(value(inst.s1))));
2095 }
2096
2097 void LLVMEmitter::emit(const mul& inst) {
2098   defineValue(inst.d, m_irb.CreateFMul(asDbl(value(inst.s0)),
2099                                        asDbl(value(inst.s1))));
2100 }
2101
2102 void LLVMEmitter::emit(const neg& inst) {
2103   defineValue(inst.d, m_irb.CreateSub(m_int64Zero, value(inst.s)));
2104 }
2105
2106 void LLVMEmitter::emit(const nop& inst) {
2107 }
2108
2109 void LLVMEmitter::emit(const not& inst) {
2110   defineValue(inst.d, m_irb.CreateXor(value(inst.s), cns(int64_t{-1})));
2111 }
2112
2113 void LLVMEmitter::emit(const orq& inst) {
2114   defineValue(inst.d, m_irb.CreateOr(value(inst.s0), value(inst.s1)));
2115 }
2116
2117 void LLVMEmitter::emit(const orqi& inst) {
2118   defineValue(inst.d, m_irb.CreateOr(cns(inst.s0.q()), value(inst.s1)));
2119 }
2120
2121 void LLVMEmitter::emit(const orqim& inst) {
2122   auto ptr = emitPtr(inst.m, 64);
2123   auto value = m_irb.CreateOr(cns(inst.s0.q()), m_irb.CreateLoad(ptr));
2124   defineFlagTmp(inst.sf, value);
2125   m_irb.CreateStore(value, ptr);
2126 }
2127
2128 void LLVMEmitter::emit(const phijmp& inst) {
2129   m_phiInfos[block(inst.target)].phij(*this, m_irb.GetInsertBlock(),
2130                                       m_unit.tuples[inst.uses]);
2131   m_irb.CreateBr(block(inst.target));
2132 }
2133
2134 void LLVMEmitter::emit(const phijcc& inst) {
2135   auto curBlock = m_irb.GetInsertBlock();
2136   auto next  = block(inst.targets[0]);
2137   auto taken = block(inst.targets[1]);
2138   auto& uses = m_unit.tuples[inst.uses];
2139
2140   m_phiInfos[next].phij(*this, curBlock, uses);
2141   m_phiInfos[taken].phij(*this, curBlock, uses);
2142
2143   auto cond = emitCmpForCC(inst.sf, inst.cc);
2144   m_irb.CreateCondBr(cond, taken, next);
2145 }
2146
2147 void LLVMEmitter::emit(const phidef& inst) {
2148   const VregList& defs = m_unit.tuples[inst.defs];
2149   auto block = m_irb.GetInsertBlock();
2150   m_phiInfos.at(block).phidef(*this, block, defs);
2151 }
2152
2153 llvm::Value* LLVMEmitter::getReturnAddress() {
2154   if (!m_llvmReturnAddress) {
2155     m_llvmReturnAddress =
2156       llvm::Intrinsic::getDeclaration(m_module.get(),
2157                                       llvm::Intrinsic::returnaddress);
2158   }
2159   auto rac = m_irb.CreateCall(m_llvmReturnAddress, m_int32Zero);
2160   rac->setCallingConv(llvm::CallingConv::C);
2161   return m_irb.CreatePtrToInt(rac, m_int64, "retaddr");
2162 }
2163
2164 UNUSED llvm::Value* LLVMEmitter::getFrameAddress() {
2165   if (!m_llvmFrameAddress) {
2166     m_llvmFrameAddress =
2167       llvm::Intrinsic::getDeclaration(m_module.get(),
2168                                       llvm::Intrinsic::frameaddress);
2169   }
2170   auto call = m_irb.CreateCall(m_llvmFrameAddress, m_int32Zero, "framep");
2171   call->setCallingConv(llvm::CallingConv::C);
2172   return m_irb.CreatePtrToInt(call, m_int64, "frameaddress");
2173 }
2174
2175 UNUSED llvm::Value* LLVMEmitter::getStackPointer() {
2176   if (!m_llvmReadRegister) {
2177     m_llvmReadRegister =
2178       llvm::Intrinsic::getDeclaration(m_module.get(),
2179                                       llvm::Intrinsic::read_register,
2180                                       m_int64);
2181   }
2182   auto metadata =
2183     llvm::MDNode::get(m_context, llvm::MDString::get(m_context, "rsp"));
2184   auto call = m_irb.CreateCall(m_llvmReadRegister, metadata, "rspcall");
2185   call->setCallingConv(llvm::CallingConv::C);
2186   return call;
2187 }
2188
2189 UNUSED void LLVMEmitter::emitAsm(const std::string& asmStatement,
2190                                  const std::string& asmConstraints,
2191                                  bool hasSideEffects) {
2192   auto const funcType =
2193     llvm::FunctionType::get(m_irb.getVoidTy(), false);
2194   auto const iasm = llvm::InlineAsm::get(funcType, asmStatement, asmConstraints,
2195                                          hasSideEffects);
2196   auto call = m_irb.CreateCall(iasm, "");
2197   call->setCallingConv(llvm::CallingConv::C);
2198 }
2199
2200 void LLVMEmitter::emit(const ldretaddr& inst) {
2201   auto const ptr = m_irb.CreateBitCast(emitPtr(inst.s, 8),
2202                                        ptrType(ptrType(m_traceletFnTy)),
2203                                        "bcast");
2204   defineValue(inst.d, m_irb.CreateLoad(ptr));
2205 }
2206
2207 void LLVMEmitter::emit(const movretaddr& inst) {
2208   defineValue(inst.d, m_irb.CreatePtrToInt(value(inst.s), m_int64));
2209 }
2210
2211 void LLVMEmitter::emit(const retctrl& inst) {
2212   // "Return" with a tail call to the loaded address
2213   emitTraceletTailCall(value(inst.s));
2214 }
2215
2216 void LLVMEmitter::emit(const absdbl& inst) {
2217   if (!m_fabs) {
2218     m_fabs = llvm::Intrinsic::getDeclaration(
2219       m_module.get(),
2220       llvm::Intrinsic::fabs,
2221       std::vector<llvm::Type*>{m_irb.getDoubleTy()}
2222     );
2223   }
2224   defineValue(inst.d, m_irb.CreateCall(m_fabs, asDbl(value(inst.s))));
2225 }
2226
2227 void LLVMEmitter::emit(const roundsd& inst) {
2228   auto roundID = [&]{
2229     switch (inst.dir) {
2230       case RoundDirection::nearest:
2231         return llvm::Intrinsic::round;
2232
2233       case RoundDirection::floor:
2234         return llvm::Intrinsic::floor;
2235
2236       case RoundDirection::ceil:
2237         return llvm::Intrinsic::ceil;
2238
2239       case RoundDirection::truncate:
2240         return llvm::Intrinsic::trunc;
2241     }
2242     not_reached();
2243   }();
2244
2245   auto func = llvm::Intrinsic::getDeclaration(m_module.get(), roundID);
2246   defineValue(inst.d, m_irb.CreateCall(func, value(inst.s)));
2247 }
2248
2249 void LLVMEmitter::emit(const srem& inst) {
2250   defineValue(inst.d, m_irb.CreateSRem(value(inst.s0), value(inst.s1)));
2251 }
2252
2253 void LLVMEmitter::emit(const sar& inst) {
2254   defineValue(inst.d, m_irb.CreateAShr(value(inst.s1), value(inst.s0)));
2255 }
2256
2257 void LLVMEmitter::emit(const sarqi& inst) {
2258   defineValue(inst.d, m_irb.CreateAShr(value(inst.s1), inst.s0.q()));
2259 }
2260
2261 void LLVMEmitter::emit(const setcc& inst) {
2262   defineValue(inst.d,
2263               m_irb.CreateZExt(emitCmpForCC(inst.sf, inst.cc), m_int8));
2264 }
2265
2266 void LLVMEmitter::emit(const shlli& inst) {
2267   defineValue(inst.d, m_irb.CreateShl(value(inst.s1), inst.s0.q()));
2268 }
2269
2270 void LLVMEmitter::emit(const shl& inst) {
2271   defineValue(inst.d, m_irb.CreateShl(value(inst.s1), value(inst.s0)));
2272 }
2273
2274 void LLVMEmitter::emit(const shlqi& inst) {
2275   defineValue(inst.d, m_irb.CreateShl(value(inst.s1), inst.s0.q()));
2276 }
2277
2278 void LLVMEmitter::emit(const shrli& inst) {
2279   defineValue(inst.d, m_irb.CreateLShr(value(inst.s1), inst.s0.q()));
2280 }
2281
2282 void LLVMEmitter::emit(const shrqi& inst) {
2283   defineValue(inst.d, m_irb.CreateLShr(value(inst.s1), inst.s0.q()));
2284 }
2285
2286 void LLVMEmitter::emit(const sqrtsd& inst) {
2287   auto sqrtFunc = llvm::Intrinsic::getDeclaration(m_module.get(),
2288                                                   llvm::Intrinsic::sqrt);
2289   defineValue(inst.d, m_irb.CreateCall(sqrtFunc, value(inst.s)));
2290 }
2291
2292 void LLVMEmitter::emit(const store& inst) {
2293   auto val = value(inst.s);
2294   assert(val->getType()->getPrimitiveSizeInBits() == 64);
2295   m_irb.CreateStore(val, emitPtr(inst.d, ptrType(val->getType())));
2296 }
2297
2298 void LLVMEmitter::emit(const storeb& inst) {
2299   m_irb.CreateStore(m_irb.CreateZExtOrTrunc(value(inst.s), m_int8),
2300                     emitPtr(inst.m, 8));
2301 }
2302
2303 void LLVMEmitter::emit(const storebi& inst) {
2304   m_irb.CreateStore(cns(inst.s.b()), emitPtr(inst.m, 8));
2305 }
2306
2307 void LLVMEmitter::emit(const storedqu& inst) {
2308   // Like loaddqu, this will need to change if we ever use storedqu with values
2309   // that aren't TypedValues.
2310   m_irb.CreateStore(value(inst.s), emitPtr(inst.m, ptrType(m_typedValueType)));
2311 }
2312
2313 void LLVMEmitter::emit(const storel& inst) {
2314   m_irb.CreateStore(value(inst.s), emitPtr(inst.m, 32));
2315 }
2316
2317 void LLVMEmitter::emit(const storeli& inst) {
2318   m_irb.CreateStore(cns(inst.s.l()), emitPtr(inst.m, 32));
2319 }
2320
2321 void LLVMEmitter::emit(const storeqi& inst) {
2322   m_irb.CreateStore(cns(inst.s.q()), emitPtr(inst.m, 64));
2323 }
2324
2325 void LLVMEmitter::emit(const storesd& inst) {
2326   m_irb.CreateStore(value(inst.s),
2327                     emitPtr(inst.m, ptrType(m_irb.getDoubleTy())));
2328 }
2329
2330 void LLVMEmitter::emit(const storew& inst) {
2331   m_irb.CreateStore(value(inst.s), emitPtr(inst.m, 16));
2332 }
2333
2334 void LLVMEmitter::emit(const storewi& inst) {
2335   m_irb.CreateStore(cns(inst.s.w()), emitPtr(inst.m, 16));
2336 }
2337
2338 void LLVMEmitter::emit(const subbi& inst) {
2339   defineValue(inst.d, m_irb.CreateSub(asInt(value(inst.s1), 8),
2340                                       cns(inst.s0.b())));
2341 }
2342
2343 void LLVMEmitter::emit(const subl& inst) {
2344   defineValue(inst.d, m_irb.CreateSub(value(inst.s1), value(inst.s0)));
2345 }
2346
2347 void LLVMEmitter::emit(const subli& inst) {
2348   defineValue(inst.d, m_irb.CreateSub(value(inst.s1), cns(inst.s0.l())));
2349 }
2350
2351 void LLVMEmitter::emit(const subq& inst) {
2352   defineValue(inst.d, m_irb.CreateSub(value(inst.s1), value(inst.s0)));
2353 }
2354
2355 void LLVMEmitter::emit(const subqi& inst) {
2356   defineValue(inst.d, m_irb.CreateSub(value(inst.s1), cns(inst.s0.q())));
2357 }
2358
2359 void LLVMEmitter::emit(const subsd& inst) {
2360   defineValue(inst.d, m_irb.CreateFSub(asDbl(value(inst.s1)),
2361                                        asDbl(value(inst.s0))));
2362 }
2363
2364 /*
2365  * To leave the TC and perform a service request, translated code is supposed
2366  * to execute a ret instruction after populating the right registers. There are
2367  * a number of different ways to do this, but the most straightforward for now
2368  * is to do a tail call with the right calling convention to a stub with a
2369  * single ret instruction. Rather than emitting a dedicated stub for this, we
2370  * just reuse the ret at the end of enterTCHelper().
2371  */
2372 extern "C" void enterTCReturn();
2373
2374 void LLVMEmitter::emit(const svcreq& inst) {
2375   std::vector<llvm::Value*> args{
2376     value(x64::rVmSp),
2377     value(x64::rVmTl),
2378     value(x64::rVmFp),
2379     cns(reinterpret_cast<uintptr_t>(inst.stub_block)),
2380     cns(uint64_t{inst.req})
2381   };
2382   for (auto arg : m_unit.tuples[inst.args]) {
2383     args.push_back(value(arg));
2384   }
2385
2386   std::vector<llvm::Type*> argTypes(args.size(), m_int64);
2387   auto funcType = llvm::FunctionType::get(m_irb.getVoidTy(), argTypes, false);
2388   auto func = emitFuncPtr(folly::to<std::string>("enterTCServiceReqLLVM_",
2389                                                  argTypes.size()),
2390                           funcType,
2391                           uint64_t(enterTCReturn));
2392   auto call = m_irb.CreateCall(func, args);
2393   call->setCallingConv(llvm::CallingConv::X86_64_HHVM_SR);
2394   call->setTailCallKind(llvm::CallInst::TCK_Tail);
2395   m_irb.CreateRetVoid();
2396 }
2397
2398 void LLVMEmitter::emit(const syncvmfp& inst) {
2399   // Nothing to do really.
2400 }
2401
2402 void LLVMEmitter::emit(const syncvmsp& inst) {
2403   defineValue(x64::rVmSp, value(inst.s));
2404 }
2405
2406 void LLVMEmitter::emit(const testb& inst) {
2407   auto result = m_irb.CreateAnd(asInt(value(inst.s1), 8),
2408                                 asInt(value(inst.s0), 8));
2409   defineFlagTmp(inst.sf, result);
2410 }
2411
2412 void LLVMEmitter::emit(const testbi& inst) {
2413   auto result = m_irb.CreateAnd(asInt(value(inst.s1), 8), inst.s0.b());
2414   defineFlagTmp(inst.sf, result);
2415 }
2416
2417 void LLVMEmitter::emit(const testbim& inst) {
2418   auto lhs = m_irb.CreateLoad(emitPtr(inst.s1, 8));
2419   defineFlagTmp(inst.sf, m_irb.CreateAnd(lhs, inst.s0.b()));
2420 }
2421
2422 void LLVMEmitter::emit(const testl& inst) {
2423   defineFlagTmp(inst.sf, m_irb.CreateAnd(value(inst.s1), value(inst.s0)));
2424 }
2425
2426 void LLVMEmitter::emit(const testli& inst) {
2427   defineFlagTmp(inst.sf, m_irb.CreateAnd(value(inst.s1), inst.s0.l()));
2428 }
2429
2430 void LLVMEmitter::emit(const testlim& inst) {
2431   auto lhs = m_irb.CreateLoad(emitPtr(inst.s1, 32));
2432   defineFlagTmp(inst.sf, m_irb.CreateAnd(lhs, inst.s0.l()));
2433 }
2434
2435 void LLVMEmitter::emit(const testq& inst) {
2436   defineFlagTmp(inst.sf, m_irb.CreateAnd(value(inst.s1), value(inst.s0)));
2437 }
2438
2439 void LLVMEmitter::emit(const testqm& inst) {
2440   auto lhs = m_irb.CreateLoad(emitPtr(inst.s1, 64));
2441   defineFlagTmp(inst.sf, m_irb.CreateAnd(lhs, value(inst.s0)));
2442 }
2443
2444 void LLVMEmitter::emit(const testqim& inst) {
2445   auto lhs = m_irb.CreateLoad(emitPtr(inst.s1, 64));
2446   defineFlagTmp(inst.sf, m_irb.CreateAnd(lhs, inst.s0.q()));
2447 }
2448
2449 void LLVMEmitter::emit(const ud2& inst) {
2450   emitTrap();
2451 }
2452
2453 void LLVMEmitter::emit(const xorb& inst) {
2454   defineValue(inst.d, m_irb.CreateXor(value(inst.s1), value(inst.s0)));
2455 }
2456
2457 void LLVMEmitter::emit(const xorbi& inst) {
2458   defineValue(inst.d, m_irb.CreateXor(value(inst.s1), inst.s0.b()));
2459 }
2460
2461 void LLVMEmitter::emit(const xorq& inst) {
2462   defineValue(inst.d, m_irb.CreateXor(value(inst.s1), value(inst.s0)));
2463 }
2464
2465 void LLVMEmitter::emit(const xorqi& inst) {
2466   defineValue(inst.d, m_irb.CreateXor(value(inst.s1), inst.s0.q()));
2467 }
2468
2469 void LLVMEmitter::emit(const landingpad& inst) {
2470   // This is far from correct, but it's enough to keep the llvm verifier happy
2471   // for now.
2472   auto pad = m_irb.CreateLandingPad(m_typedValueType, m_personalityFunc, 0);
2473   pad->setCleanup(true);
2474 }
2475
2476 void LLVMEmitter::emitTrap() {
2477   auto trap = llvm::Intrinsic::getDeclaration(m_module.get(),
2478                                               llvm::Intrinsic::trap);
2479   m_irb.CreateCall(trap);
2480   m_irb.CreateUnreachable();
2481 }
2482
2483 llvm::Value* LLVMEmitter::emitPtr(const Vptr s, llvm::Type* ptrTy) {
2484   bool inFS =
2485     llvm::cast<llvm::PointerType>(ptrTy)->getAddressSpace() == kFSAddressSpace;
2486   always_assert(s.base != reg::rsp);
2487
2488   auto ptr = s.base.isValid() ? asInt(value(s.base), 64) : cns(int64_t{0});
2489   auto disp = cns(int64_t{s.disp});
2490   if (s.index.isValid()) {
2491     auto scaledIdx = m_irb.CreateMul(asInt(value(s.index), 64),
2492                                      cns(int64_t{s.scale}),
2493                                      "mul");
2494     disp = m_irb.CreateAdd(disp, scaledIdx, "add");
2495   }
2496   ptr = m_irb.CreateIntToPtr(ptr, inFS ? m_int8FSPtr : m_int8Ptr, "conv");
2497   ptr = m_irb.CreateGEP(ptr, disp, "getelem");
2498
2499   if (ptrTy != m_int8) {
2500     ptr = m_irb.CreateBitCast(ptr, ptrTy);
2501   }
2502
2503   return ptr;
2504 }
2505
2506 llvm::Value* LLVMEmitter::emitPtr(const Vptr s, size_t bits) {
2507   return emitPtr(s, ptrIntNType(bits, s.seg == Vptr::FS));
2508 }
2509
2510 llvm::Type* LLVMEmitter::ptrType(llvm::Type* ty, unsigned addressSpace) const {
2511   return llvm::PointerType::get(ty, addressSpace);
2512 }
2513
2514 llvm::Type* LLVMEmitter::intNType(size_t bits) const {
2515   switch (bits) {
2516   default: always_assert(0 && "unsupported bit width");
2517   case 8:  return m_int8;
2518   case 16: return m_int16;
2519   case 32: return m_int32;
2520   case 64: return m_int64;
2521   }
2522 }
2523
2524 llvm::Type* LLVMEmitter::ptrIntNType(size_t bits, bool inFS) const {
2525   switch (bits) {
2526   default: always_assert(0 && "unsupported bit width");
2527   case 8:  return inFS ? m_int8FSPtr  : m_int8Ptr;
2528   case 16: return inFS ? m_int16FSPtr : m_int16Ptr;
2529   case 32: return inFS ? m_int32FSPtr : m_int32Ptr;
2530   case 64: return inFS ? m_int64FSPtr : m_int64Ptr;
2531   }
2532 }
2533
2534 std::string showNewCode(const Vasm::AreaList& areas) DEBUG_ONLY;
2535 std::string showNewCode(const Vasm::AreaList& areas) {
2536   std::ostringstream str;
2537   Disasm disasm(Disasm::Options().indent(2));
2538
2539   for (unsigned i = 0, n = areas.size(); i < n; ++i) {
2540     auto& area = areas[i];
2541     auto const start = area.start;
2542     auto const end = area.code.frontier();
2543
2544     if (start != end) {
2545       str << folly::format("emitted {} bytes of code into area {}:\n",
2546                            end - start, i);
2547       disasm.disasm(str, start, end);
2548       str << '\n';
2549     }
2550   }
2551
2552   return str.str();
2553 }
2554
2555 } // unnamed namespace
2556
2557 void genCodeLLVM(const Vunit& unit, Vasm::AreaList& areas,
2558                  const jit::vector<Vlabel>& labels) {
2559   FTRACE(2, "\nTrying to emit LLVM IR for Vunit:\n{}\n", show(unit));
2560
2561   jit::vector<UndoMarker> undoAll = {UndoMarker(mcg->globalData())};
2562   for(auto const& area : areas) {
2563     undoAll.emplace_back(area.code);
2564   }
2565
2566   try {
2567     LLVMEmitter(unit, areas).emit(labels);
2568     FTRACE(3, "\n{:-^80}\n{}\n",
2569            " x64 after LLVM codegen ", showNewCode(areas));
2570   } catch (const FailedLLVMCodeGen& e) {
2571     always_assert_flog(
2572       RuntimeOption::EvalJitLLVM < 3,
2573       "Mandatory LLVM codegen failed with reason `{}' on unit:\n{}",
2574       e.what(), show(unit)
2575     );
2576     FTRACE(1, "LLVM codegen failed: {}\n", e.what());
2577
2578     // Undo any code/data we may have allocated.
2579     for(auto& marker : undoAll) {
2580       marker.undo();
2581     }
2582     throw e;
2583   } catch (const std::exception& e) {
2584     always_assert_flog(false,
2585                        "Unexpected exception during LLVM codegen: {}\n",
2586                        e.what());
2587   }
2588 }
2589
2590 } }
2591
2592 #else // #ifdef USE_LLVM
2593
2594 namespace HPHP { namespace jit {
2595
2596 void genCodeLLVM(const Vunit& unit, Vasm::AreaList& areas,
2597                  const jit::vector<Vlabel>& labels) {
2598   throw FailedLLVMCodeGen("This build does not support the LLVM backend");
2599 }
2600
2601 } }
2602
2603 #endif // #ifdef USE_LLVM