2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm-llvm.h"
19 #include "hphp/util/assertions.h"
20 #include "hphp/util/disasm.h"
22 #include "hphp/runtime/vm/jit/abi-x64.h"
23 #include "hphp/runtime/vm/jit/back-end-x64.h"
24 #include "hphp/runtime/vm/jit/code-gen-x64.h"
25 #include "hphp/runtime/vm/jit/ir-instruction.h"
26 #include "hphp/runtime/vm/jit/llvm-locrecs.h"
27 #include "hphp/runtime/vm/jit/llvm-stack-maps.h"
28 #include "hphp/runtime/vm/jit/mc-generator.h"
29 #include "hphp/runtime/vm/jit/reserved-stack.h"
30 #include "hphp/runtime/vm/jit/service-requests-inline.h"
31 #include "hphp/runtime/vm/jit/unwind-x64.h"
32 #include "hphp/runtime/vm/jit/vasm-print.h"
36 #include <llvm/Analysis/Passes.h>
37 #include <llvm/CodeGen/MachineFunctionAnalysis.h>
38 #include <llvm/CodeGen/Passes.h>
39 #include <llvm/ExecutionEngine/ExecutionEngine.h>
40 #include <llvm/ExecutionEngine/MCJIT.h>
41 #include <llvm/ExecutionEngine/ObjectCache.h>
42 #include <llvm/ExecutionEngine/RuntimeDyld.h>
43 #include <llvm/IR/AssemblyAnnotationWriter.h>
44 #include <llvm/IR/ConstantFolder.h>
45 #include <llvm/IR/DataLayout.h>
46 #include <llvm/IR/DerivedTypes.h>
47 #include <llvm/IR/IRBuilder.h>
48 #include <llvm/IR/InlineAsm.h>
49 #include <llvm/IR/Intrinsics.h>
50 #include <llvm/IR/LLVMContext.h>
51 #include <llvm/IR/Module.h>
52 #include <llvm/IR/TypeBuilder.h>
53 #include <llvm/IR/Verifier.h>
54 #include <llvm/IRReader/IRReader.h>
55 #include <llvm/PassManager.h>
56 #include <llvm/Support/CommandLine.h>
57 #include <llvm/Support/ErrorHandling.h>
58 #include <llvm/Support/FileSystem.h>
59 #include <llvm/Support/FormattedStream.h>
60 #include <llvm/Support/Path.h>
61 #include <llvm/Support/SourceMgr.h>
62 #include <llvm/Support/TargetSelect.h>
63 #include <llvm/Support/raw_ostream.h>
64 #include <llvm/Target/TargetMachine.h>
65 #include <llvm/Transforms/Scalar.h>
69 namespace HPHP
{ namespace jit
{
74 * Read an unsigned LEB128 value from data, advancing it past the value.
76 uintptr_t readULEB128(const uint8_t*& data
) {
83 result
|= (byte
& 0x7f) << shift
;
85 } while (byte
& 0x80);
91 * Read a signed LEB128 value from data, advancing it past the value.
93 uintptr_t readSLEB128(const uint8_t*& data
) {
100 result
|= (byte
& 0x7f) << shift
;
102 } while (byte
& 0x80);
104 if ((byte
& 0x40) && (shift
< (sizeof(result
) << 3))) {
105 result
|= (~0 << shift
);
112 * Read and return a T from data, advancing it past the read item.
115 T
readValue(const uint8_t*& data
) {
117 memcpy(&val
, data
, sizeof(T
));
123 * Read an encoded DWARF value from data, advancing it past any data read. This
124 * function was adapted from the ExceptionDemo.cpp example in llvm.
126 uintptr_t readEncodedPointer(const uint8_t*& data
, uint8_t encoding
) {
127 uintptr_t result
= 0;
128 auto const start
= data
;
130 if (encoding
== DW_EH_PE_omit
) return result
;
133 switch (encoding
& 0x0F) {
134 case DW_EH_PE_absptr
:
135 result
= readValue
<uintptr_t>(data
);
137 case DW_EH_PE_uleb128
:
138 result
= readULEB128(data
);
140 case DW_EH_PE_sleb128
:
141 result
= readSLEB128(data
);
143 case DW_EH_PE_udata2
:
144 result
= readValue
<uint16_t>(data
);
146 case DW_EH_PE_udata4
:
147 result
= readValue
<uint32_t>(data
);
149 case DW_EH_PE_udata8
:
150 result
= readValue
<uint64_t>(data
);
152 case DW_EH_PE_sdata2
:
153 result
= readValue
<int16_t>(data
);
155 case DW_EH_PE_sdata4
:
156 result
= readValue
<int32_t>(data
);
158 case DW_EH_PE_sdata8
:
159 result
= readValue
<int64_t>(data
);
165 // then add relative offset
166 switch (encoding
& 0x70) {
167 case DW_EH_PE_absptr
:
171 result
+= reinterpret_cast<uintptr_t>(start
);
173 case DW_EH_PE_textrel
:
174 case DW_EH_PE_datarel
:
175 case DW_EH_PE_funcrel
:
176 case DW_EH_PE_aligned
:
181 // then apply indirection
182 if (encoding
& 0x80 /*DW_EH_PE_indirect*/) {
183 result
= *((uintptr_t*)result
);
190 * Information parsed out of the .gcc_except_table section. start and
191 * landingPad are offsets from the beginning of the function.
196 uintptr_t landingPad
;
200 * Parse a .gcc_except_table section as generated by LLVM, extracting regions
201 * with nonzero landingpads. This function was also adapted from the
202 * ExceptionDemo.cpp example in llvm.
204 jit::vector
<EHInfo
> parse_gcc_except_table(const uint8_t* ptr
) {
205 jit::vector
<EHInfo
> ret
;
207 FTRACE(2, "Parsing exception table at {}\n", ptr
);
208 uint8_t lpStartEncoding
= *ptr
++;
210 if (lpStartEncoding
!= DW_EH_PE_omit
) {
211 readEncodedPointer(ptr
, lpStartEncoding
);
214 uint8_t ttypeEncoding
= *ptr
++;
216 if (ttypeEncoding
!= DW_EH_PE_omit
) {
220 uint8_t callSiteEncoding
= *ptr
++;
221 uint32_t callSiteTableLength
= readULEB128(ptr
);
222 const uint8_t* callSiteTableStart
= ptr
;
223 const uint8_t* callSiteTableEnd
= callSiteTableStart
+ callSiteTableLength
;
224 const uint8_t* callSitePtr
= callSiteTableStart
;
226 while (callSitePtr
< callSiteTableEnd
) {
227 uintptr_t start
= readEncodedPointer(callSitePtr
, callSiteEncoding
);
228 uintptr_t length
= readEncodedPointer(callSitePtr
, callSiteEncoding
);
229 uintptr_t landingPad
= readEncodedPointer(callSitePtr
, callSiteEncoding
);
231 uintptr_t actionEntry
= readULEB128(callSitePtr
);
232 // 0 indicates a cleanup entry, the only kind we generate
233 always_assert(actionEntry
== 0);
234 if (landingPad
== 0) continue;
236 FTRACE(2, "Adding entry: [{},{}): landingPad {}\n",
237 start
, start
+ length
, landingPad
);
238 ret
.emplace_back(EHInfo
{start
, length
, landingPad
});
244 void reportLLVMError(void* data
, const std::string
& err
, bool gen_crash_diag
) {
245 always_assert_flog(false, "LLVM fatal error: {}", err
);
248 struct LLVMErrorInit
{
250 llvm::install_fatal_error_handler(reportLLVMError
);
254 llvm::remove_fatal_error_handler();
257 static LLVMErrorInit s_llvmErrorInit
;
260 * TCMemoryManager allows llvm to emit code into the appropriate places in the
261 * TC. Currently all code goes into the Main code block.
263 struct TCMemoryManager
: public llvm::RTDyldMemoryManager
{
265 std::unique_ptr
<uint8_t[]> data
;
269 explicit TCMemoryManager(Vasm::AreaList
& areas
)
277 uint8_t* allocateCodeSection(
278 uintptr_t Size
, unsigned Alignment
, unsigned SectionID
,
279 llvm::StringRef SectionName
281 auto& code
= m_areas
[static_cast<size_t>(AreaIndex::Main
)].code
;
283 // We override/ignore the alignment and use skew value to compensate.
284 uint8_t* ret
= code
.alloc
<uint8_t>(1, Size
);
285 assert(Alignment
< x64::kCacheLineSize
&&
286 "alignment exceeds cache line size");
288 m_codeSkew
== (reinterpret_cast<size_t>(ret
) & (x64::kCacheLineSize
- 1))
289 && "drift in code skew detected");
291 FTRACE(1, "Allocate code section \"{}\" id={} at addr={}, size={},"
292 " alignment={}, skew={}\n",
293 SectionName
.str(), SectionID
, ret
, Size
, Alignment
, m_codeSkew
);
297 uint8_t* allocateDataSection(
298 uintptr_t Size
, unsigned Alignment
, unsigned SectionID
,
299 llvm::StringRef SectionName
, bool IsReadOnly
301 assert_not_implemented(Alignment
<= 8);
302 std::unique_ptr
<uint8_t[]> data
{new uint8_t[Size
]};
304 FTRACE(1, "Allocate {} data section \"{}\" id={} at addr={}, size={},"
306 IsReadOnly
? "read-only" : "read-write",
307 SectionName
.str(), SectionID
, data
.get(), Size
, Alignment
);
308 auto it
= m_dataSections
.emplace(SectionName
.str(),
309 SectionInfo({std::move(data
), Size
}));
310 return it
.first
->second
.data
.get();
313 virtual void reserveAllocationSpace(uintptr_t CodeSize
,
314 uintptr_t DataSizeRO
,
315 uintptr_t DataSizeRW
) override
{
316 FTRACE(1, "reserve CodeSize={}, DataSizeRO={}, DataSizeRW={}\n", CodeSize
,
317 DataSizeRO
, DataSizeRW
);
320 virtual bool needsToReserveAllocationSpace() override
{
324 virtual void registerEHFrames(uint8_t* Addr
, uint64_t LoadAddr
,
325 size_t Size
) override
{
326 // Do nothing; the TC has one huge eh frame.
329 virtual void deregisterEHFrames(uint8_t* Addr
, uint64_t LoadAddr
,
330 size_t Size
) override
{
331 // Do nothing; the TC has one huge eh frame.
334 virtual bool finalizeMemory(std::string
* ErrMsg
= nullptr) override
{
338 virtual uint64_t getSymbolAddress(const std::string
& name
) override
{
339 FTRACE(1, "getSymbolAddress({})\n", name
);
340 auto element
= m_symbols
.find(name
);
341 return element
== m_symbols
.end() ? 0 : element
->second
;
345 * Register a symbol's name for later lookup by llvm.
347 void registerSymbolAddress(const std::string
& name
, uint64_t address
) {
348 auto it
= m_symbols
.emplace(name
, address
);
349 always_assert((it
.second
== true || it
.first
->second
== address
) &&
350 "symbol already registered with a different value");
354 * Append an arbitrary id to the end of the given prefix to make it unique.
356 std::string
getUniqueSymbolName(const std::string
& prefix
) {
358 while (m_symbols
.count(name
)) {
359 name
= folly::to
<std::string
>(prefix
, '_', m_nextSymbolId
++);
364 const SectionInfo
* getDataSection(const std::string
& name
) const {
365 auto it
= m_dataSections
.find(name
);
366 return (it
== m_dataSections
.end()) ? nullptr : &it
->second
;
369 bool hasDataSection(const std::string
& name
) const {
370 return m_dataSections
.count(name
);
373 uint32_t computeCodeSkew(unsigned alignment
) {
374 auto& code
= m_areas
[static_cast<size_t>(AreaIndex::Main
)].code
;
375 m_codeSkew
= reinterpret_cast<uint64_t>(code
.frontier()) & (alignment
- 1);
380 Vasm::AreaList
& m_areas
;
382 std::unordered_map
<std::string
, SectionInfo
> m_dataSections
;
384 uint32_t m_codeSkew
{0};
386 jit::hash_map
<std::string
, uint64_t> m_symbols
;
387 uint32_t m_nextSymbolId
{0};
391 std::string
llshow(T
* val
) {
394 llvm::raw_string_ostream
os(str
);
400 struct VasmAnnotationWriter
: llvm::AssemblyAnnotationWriter
{
401 explicit VasmAnnotationWriter(const std::vector
<std::string
>& strs
)
405 void emitBasicBlockStartAnnot(const llvm::BasicBlock
* b
,
406 llvm::formatted_raw_ostream
& os
) override
{
411 void emitInstructionAnnot(const llvm::Instruction
* i
,
412 llvm::formatted_raw_ostream
& os
) override
{
413 SCOPE_EXIT
{ m_prefix
= "\n"; };
415 auto dbg
= i
->getDebugLoc();
416 if (dbg
.isUnknown() || m_curId
== dbg
.getLine()) return;
418 m_curId
= dbg
.getLine();
419 os
<< m_prefix
<< m_strs
[m_curId
] << "\n";
423 const std::vector
<std::string
>& m_strs
;
425 const char* m_prefix
{nullptr};
429 * LLVMEmitter is responsible for transforming a Vunit into LLVM IR, then
430 * optimizing that and emitting machine code from the result.
433 explicit LLVMEmitter(const Vunit
& unit
, Vasm::AreaList
& areas
)
434 : m_context(llvm::getGlobalContext())
435 , m_module(new llvm::Module("", m_context
))
436 , m_function(llvm::Function::Create(
437 llvm::FunctionType::get(
438 llvm::Type::getVoidTy(m_context
),
439 std::vector
<llvm::Type
*>({
440 llvm::IntegerType::get(m_context
, 64),
441 llvm::IntegerType::get(m_context
, 64),
442 llvm::IntegerType::get(m_context
, 64)}),
444 llvm::Function::ExternalLinkage
, "", m_module
.get()))
446 llvm::ConstantFolder(),
447 IRBuilderVasmInserter(*this))
448 , m_tcMM(new TCMemoryManager(areas
))
449 , m_valueInfo(unit
.next_vr
)
450 , m_blocks(unit
.blocks
.size())
454 llvm::InitializeNativeTarget();
455 llvm::InitializeNativeTargetAsmPrinter();
456 llvm::InitializeNativeTargetAsmParser();
458 m_function
->setCallingConv(llvm::CallingConv::X86_64_HHVM_TC
);
459 m_function
->setAlignment(1);
461 // TODO(#5398968): find a better way to disable 16-byte alignment.
462 m_function
->addFnAttr(llvm::Attribute::OptimizeForSize
);
464 m_irb
.SetInsertPoint(
465 llvm::BasicBlock::Create(m_context
,
466 folly::to
<std::string
>('B', size_t(unit
.entry
)),
468 m_blocks
[unit
.entry
] = m_irb
.GetInsertBlock();
470 // Register all unit's constants.
471 for (auto const& pair
: unit
.cpool
) {
472 auto val
= pair
.first
.isByte
? cns(uint8_t(pair
.first
.val
))
473 : cns(pair
.first
.val
);
474 defineValue(pair
.second
, val
);
477 auto args
= m_function
->arg_begin();
478 m_valueInfo
[Vreg(x64::rVmSp
)].llval
= args
++;
479 m_rVmTl
= m_valueInfo
[Vreg(x64::rVmTl
)].llval
= args
++;
480 m_rVmTl
->setName("rVmTl");
481 m_rVmFp
= m_valueInfo
[Vreg(x64::rVmFp
)].llval
= args
++;
482 m_rVmFp
->setName("rVmFp");
484 // Commonly used types and values.
485 m_int8
= m_irb
.getInt8Ty();
486 m_int16
= m_irb
.getInt16Ty();
487 m_int32
= m_irb
.getInt32Ty();
488 m_int64
= m_irb
.getInt64Ty();
490 m_int8Ptr
= llvm::Type::getInt8PtrTy(m_context
);
491 m_int16Ptr
= llvm::Type::getInt16PtrTy(m_context
);
492 m_int32Ptr
= llvm::Type::getInt32PtrTy(m_context
);
493 m_int64Ptr
= llvm::Type::getInt64PtrTy(m_context
);
495 m_int8FSPtr
= llvm::Type::getInt8PtrTy(m_context
, kFSAddressSpace
);
496 m_int16FSPtr
= llvm::Type::getInt16PtrTy(m_context
, kFSAddressSpace
);
497 m_int32FSPtr
= llvm::Type::getInt32PtrTy(m_context
, kFSAddressSpace
);
498 m_int64FSPtr
= llvm::Type::getInt64PtrTy(m_context
, kFSAddressSpace
);
500 m_int8Zero
= m_irb
.getInt8(0);
501 m_int8One
= m_irb
.getInt8(1);
502 m_int16Zero
= m_irb
.getInt16(0);
503 m_int16One
= m_irb
.getInt16(1);
504 m_int32Zero
= m_irb
.getInt32(0);
505 m_int32One
= m_irb
.getInt32(1);
506 m_int64Zero
= m_irb
.getInt64(0);
507 m_int64One
= m_irb
.getInt64(1);
509 m_int64Undef
= llvm::UndefValue::get(m_int64
);
511 auto m_personalityFTy
= llvm::FunctionType::get(m_int32
, false);
513 llvm::Function::Create(m_personalityFTy
,
514 llvm::GlobalValue::ExternalLinkage
,
517 m_personalityFunc
->setCallingConv(llvm::CallingConv::C
);
518 m_tcMM
->registerSymbolAddress("personality0", 0xbadbadbad);
520 m_traceletFnTy
= llvm::FunctionType::get(
522 std::vector
<llvm::Type
*>({m_int64
, m_int64
, m_int64
}),
526 m_typedValueType
= llvm::StructType::get(
529 ? std::vector
<llvm::Type
*>({m_int8
, // padding
532 : std::vector
<llvm::Type
*>({m_int64
, // m_data
540 std::string
showModule() const {
542 llvm::raw_string_ostream
stream(s
);
543 VasmAnnotationWriter
vw(m_instStrs
);
544 m_module
->print(stream
,
545 HPHP::Trace::moduleEnabled(Trace::llvm
, 5) ? &vw
: nullptr);
549 void verifyModule() const {
551 llvm::raw_string_ostream
stream(err
);
552 always_assert_flog(!llvm::verifyModule(*m_module
, &stream
),
553 "LLVM verifier failed:\n{}\n{:-^80}\n{}\n{:-^80}\n{}",
554 stream
.str(), " vasm unit ", show(m_unit
),
555 " llvm module ", showModule());
559 * Finalize the code generation process by optimizing and generating code for
563 FTRACE(1, "{:-^80}\n{}\n", " LLVM IR before optimizing ", showModule());
566 // TODO(#5406596): teach LLVM our alignment rules. For the moment override
567 // 16-byte ABI default.
568 llvm::TargetOptions targetOptions
;
569 targetOptions
.StackAlignmentOverride
= 8;
570 targetOptions
.GuaranteedTailCallOpt
= true;
572 auto tcMM
= m_tcMM
.release();
574 std::unique_ptr
<llvm::ExecutionEngine
> ee(
575 llvm::EngineBuilder(m_module
.get())
576 .setErrorStr(&errStr
)
578 .setMCJITMemoryManager(tcMM
)
579 .setOptLevel(llvm::CodeGenOpt::Aggressive
)
580 .setRelocationModel(llvm::Reloc::Static
)
581 .setCodeModel(llvm::CodeModel::Small
)
582 .setVerifyModules(true)
583 .setTargetOptions(targetOptions
)
585 always_assert_flog(ee
, "ExecutionEngine creation failed: {}\n", errStr
);
587 assert(m_module
!= nullptr);
589 llvm::LLVMTargetMachine
* targetMachine
=
590 static_cast<llvm::LLVMTargetMachine
*>(ee
->getTargetMachine());
592 auto fpm
= folly::make_unique
<llvm::FunctionPassManager
>(m_module
.get());
593 fpm
->add(new llvm::DataLayoutPass(m_module
.get()));
594 targetMachine
->addAnalysisPasses(*fpm
);
596 fpm
->add(llvm::createBasicAliasAnalysisPass());
597 fpm
->add(llvm::createVerifierPass(true));
598 fpm
->add(llvm::createDebugInfoVerifierPass(false));
599 fpm
->add(llvm::createLoopSimplifyPass());
600 fpm
->add(llvm::createGCLoweringPass());
601 fpm
->add(llvm::createUnreachableBlockEliminationPass());
602 fpm
->add(llvm::createPromoteMemoryToRegisterPass());
603 fpm
->add(llvm::createInstructionCombiningPass());
604 fpm
->add(llvm::createReassociatePass());
605 fpm
->add(llvm::createGVNPass());
606 fpm
->add(llvm::createCFGSimplificationPass());
607 fpm
->add(llvm::createTailCallEliminationPass());
608 fpm
->doInitialization();
610 m_module
->addModuleFlag(llvm::Module::Error
, "code_skew",
611 tcMM
->computeCodeSkew(x64::kCacheLineSize
));
613 for (auto it
= m_module
->begin(); it
!= m_module
->end(); ++it
) {
616 FTRACE(2, "{:-^80}\n{}\n", " LLVM IR after optimizing ", showModule());
618 m_module
.release(); // ee took ownership of the module.
620 ee
->setProcessAllSections(true);
621 ee
->finalizeObject();
623 // Now that codegen is done, we need to parse location records and
624 // gcc_except_table sections and update our own metadata.
626 static_cast<uint8_t*>(ee
->getPointerToFunction(m_function
));
627 FTRACE(2, "LLVM function address: {}\n", funcStart
);
629 if (auto secLocRecs
= tcMM
->getDataSection(".llvm_locrecs")) {
630 auto const recs
= parseLocRecs(secLocRecs
->data
.get(),
632 FTRACE(2, "LLVM experimental locrecs:\n{}", show(recs
));
633 auto it
= recs
.functionRecords
.find(funcStart
);
634 if (it
!= recs
.functionRecords
.end()) {
635 processFixups(it
->second
, funcStart
);
636 processSvcReqs(it
->second
, funcStart
);
640 if (auto secGEH
= tcMM
->getDataSection(".gcc_except_table")) {
641 auto const ehInfos
= parse_gcc_except_table(secGEH
->data
.get());
642 processEHInfos(ehInfos
, funcStart
);
647 * For each entry in m_fixups, find its corresponding locrec entry, find
648 * the actual call instruction, and register the fixup.
650 void processFixups(const LocRecs::FunctionRecord
& funcRec
,
651 uint8_t* funcStart
) {
652 for (auto& fix
: m_fixups
) {
653 auto it
= funcRec
.records
.find(fix
.id
);
654 if (it
== funcRec
.records
.end()) {
655 // The call was optimized away.
659 auto afterCall
= [&] {
660 for (auto& record
: it
->second
) {
661 auto ip
= funcStart
+ record
.offset
;
662 DecodedInstruction
di(ip
);
663 if (di
.isCall()) return ip
+ di
.size();
665 always_assert(false && "call instruction cannot be found");
668 FTRACE(2, "From afterCall for fixup = {}\n", afterCall
);
669 mcg
->recordSyncPoint(afterCall
, fix
.fixup
.pcOffset
, fix
.fixup
.spOffset
);
673 void processSvcReqs(const LocRecs::FunctionRecord
& funcRec
,
674 uint8_t* funcStart
) {
675 auto findJmp
= [&](const jit::vector
<LocRecs::LocationRecord
>& records
) {
676 for (auto& record
: records
) {
677 auto ip
= funcStart
+ record
.offset
;
678 DecodedInstruction
di(ip
);
679 if (di
.isJmp()) return ip
;
681 always_assert(false && "jmp instruction cannot be found");
684 for (auto& req
: m_bindjmps
) {
685 auto it
= funcRec
.records
.find(req
.id
);
686 if (it
== funcRec
.records
.end()) continue;
688 auto jmpIp
= findJmp(it
->second
);
689 FTRACE(2, "Processing bindjmp at {}, stub {}\n", jmpIp
, req
.stub
);
691 mcg
->cgFixups().m_alignFixups
.emplace(
692 jmpIp
, std::make_pair(x64::kJmpLen
, kX64CacheLineSize
));
693 mcg
->setJmpTransID(jmpIp
);
695 // Patch the rip-relative lea in the stub to point at the jmp.
696 auto leaIp
= req
.stub
;
697 always_assert((leaIp
[0] & 0x48) == 0x48); // REX.W
698 always_assert(leaIp
[1] == 0x8d); // lea
699 auto afterLea
= leaIp
+ x64::kRipLeaLen
;
700 auto delta
= safe_cast
<int32_t>(jmpIp
- afterLea
);
701 memcpy(afterLea
- sizeof(delta
), &delta
, sizeof(delta
));
704 for (auto& req
: m_fallbacks
) {
705 auto it
= funcRec
.records
.find(req
.id
);
706 if (it
== funcRec
.records
.end()) continue;
708 auto destSR
= mcg
->tx().getSrcRec(req
.dest
);
709 destSR
->registerFallbackJump(findJmp(it
->second
));
714 * For each entry in infos, find all call instructions in the region and
715 * register the landing pad as a catch block for each one.
717 void processEHInfos(const jit::vector
<EHInfo
>& infos
, uint8_t* funcStart
) {
718 for (auto& info
: infos
) {
719 auto ip
= funcStart
+ info
.start
;
720 auto const end
= ip
+ info
.length
;
721 auto const landingPad
= funcStart
+ info
.landingPad
;
723 FTRACE(2, "Looking for calls for landingPad {}, in EH region [{},{})\n",
724 landingPad
, ip
, end
);
727 DecodedInstruction
di(ip
);
730 FTRACE(2, " afterCall: {}\n", ip
);
731 mcg
->registerCatchBlock(ip
, landingPad
);
736 always_assert(found
&& "EH region with no calls");
741 * Register the fact that llvmVal represents the Vasm value in tmp.
743 void defineValue(Vreg tmp
, llvm::Value
* llvmVal
) {
744 always_assert(tmp
.isPhys() || m_valueInfo
.at(tmp
).llval
== nullptr);
747 llvmVal
->setName(folly::to
<std::string
>('t', size_t(tmp
)));
749 m_valueInfo
[tmp
].llval
= llvmVal
;
753 * Look up the llvm::Value representing tmp.
755 llvm::Value
* value(Vreg tmp
) const {
756 auto& info
= m_valueInfo
.at(tmp
);
757 always_assert(info
.llval
);
762 * Register the fact that tmp is defined by inst, since vasm units don't have
763 * a natural way of following use-def chains.
765 void defineValue(Vreg tmp
, const Vinstr
& inst
) {
766 always_assert(m_valueInfo
.at(tmp
).inst
.op
== Vinstr::ud2
|| !tmp
.isVirt());
767 m_valueInfo
[tmp
].inst
= inst
;
771 * Look up the Vinstr that defined tmp.
773 const Vinstr
& defInst(Vreg tmp
) const {
774 return m_valueInfo
.at(tmp
).inst
;
778 * Certain vasm instructions compute or load intermediate values that aren't
779 * destinations of the instruction, but are used to produce a status flags
780 * register that is. One example of this is the incwm instruction: it loads
781 * an int16 from memory, increments it, produces a status flags register
782 * based on the incremented value, and finally stores the incremented value
783 * back to memory. When emitting code for an instruction that consumes this
784 * status flag register, we need access to the intermediate value due to the
785 * way llvm handles conditional jumps. We call this value a "flag temporary",
786 * and is stored in a side table keyed on the status flags Vreg it
789 void defineFlagTmp(Vreg vr
, llvm::Value
* tmp
) {
790 always_assert(m_valueInfo
.at(vr
).flagTmp
== nullptr);
791 m_valueInfo
[vr
].flagTmp
= tmp
;
795 * Get the flag temp for the given vr.
797 llvm::Value
* flagTmp(Vreg vr
) const {
798 auto& info
= m_valueInfo
.at(vr
);
799 always_assert(info
.flagTmp
);
804 * Look up or create the llvm block corresponding to the given vasm block.
806 llvm::BasicBlock
* block(Vlabel l
) {
807 if (m_blocks
[l
] == nullptr) {
809 llvm::BasicBlock::Create(m_context
,
810 folly::to
<std::string
>('B', size_t(l
)),
818 * Generate an llvm::Value representing the given integral constant, with an
819 * approprate bit width.
821 template<typename Int
>
822 typename
std::enable_if
<std::is_integral
<Int
>::value
, llvm::Value
*>::type
824 return llvm::ConstantInt::get(
826 llvm::APInt(sizeof(val
) * CHAR_BIT
, val
, std::is_signed
<Int
>::value
)
831 * Assuming val is already an integer type, zero-extend or truncate it to the
832 * given size integer type.
834 llvm::Value
* asInt(llvm::Value
* val
, size_t bits
) {
835 assert(val
->getType()->isIntegerTy());
836 return m_irb
.CreateZExtOrTrunc(val
, intNType(bits
));
840 * Bitcast val to Double.
842 llvm::Value
* asDbl(llvm::Value
* val
) {
843 return m_irb
.CreateBitCast(val
, m_irb
.getDoubleTy());
847 * emit LLVM IR for the given list of vasm blocks.
849 void emit(const jit::vector
<Vlabel
>& labels
);
854 * Custom LLVM IR inserter that can emit inline metadata for tracking
855 * vasm origins of IR instructions in debug dumps.
857 struct IRBuilderVasmInserter
{
858 explicit IRBuilderVasmInserter(LLVMEmitter
& e
)
860 , m_mdNode(llvm::MDNode::get(m_emitter
.m_context
,
861 std::vector
<llvm::Value
*>{}))
864 void setVinstId(size_t id
) { m_instId
= id
; }
867 void InsertHelper(llvm::Instruction
* I
, const llvm::Twine
& Name
,
868 llvm::BasicBlock
* BB
,
869 llvm::BasicBlock::iterator InsertPt
) const {
870 if (BB
) BB
->getInstList().insert(InsertPt
, I
);
873 ONTRACE(5, I
->setDebugLoc(llvm::DebugLoc::get(m_instId
, 0, m_mdNode
)));
877 LLVMEmitter
& m_emitter
;
878 llvm::MDNode
* m_mdNode
;
883 * RegInfo is used to track information about Vregs, including their
884 * corresponding llvm::Value and the Vinstr that defined them.
888 llvm::Value
* flagTmp
;
902 struct LLVMFallback
{
908 * PhiInfo is responsible for the bookkeeping needed to transform vasm's
909 * phijmp/phidef instructions into LLVM phi nodes.
912 void phij(LLVMEmitter
& e
, llvm::BasicBlock
* fromLabel
,
913 const VregList
& uses
) {
914 UseInfo useInfo
{fromLabel
, uses
};
916 // Update LLVM phi instructions if they've already been emitted; otherwise
917 // enqueue useInfo so that phidef() can emit the uses.
918 if (m_phis
.size() != 0) {
919 assert(m_phis
.size() == useInfo
.uses
.size());
920 for (auto phiInd
= 0; phiInd
< m_phis
.size(); ++phiInd
) {
921 addIncoming(e
, useInfo
, phiInd
);
924 m_pendingPreds
.emplace_back(std::move(useInfo
));
928 void phidef(LLVMEmitter
& e
, llvm::BasicBlock
* toLabel
,
929 const VregList
& defs
) {
930 assert(m_phis
.size() == 0);
931 assert(m_pendingPreds
.size() > 0);
936 for (auto phiInd
= 0; phiInd
< m_defs
.size(); ++phiInd
) {
937 llvm::Type
* type
= e
.value(m_pendingPreds
[0].uses
[phiInd
])->getType();
938 llvm::PHINode
* phi
= e
.m_irb
.CreatePHI(type
, 1);
939 m_phis
.push_back(phi
);
941 // Emit uses for the phi* instructions which preceded this phidef.
942 for (auto& useInfo
: m_pendingPreds
) {
943 addIncoming(e
, useInfo
, phiInd
);
945 e
.defineValue(m_defs
[phiInd
], phi
);
951 llvm::BasicBlock
* fromLabel
;
955 void addIncoming(LLVMEmitter
& e
, UseInfo
& useInfo
, unsigned phiInd
) {
956 m_phis
[phiInd
]->addIncoming(e
.value(useInfo
.uses
[phiInd
]),
958 auto typeStr
= llshow(e
.value(useInfo
.uses
[phiInd
])->getType());
960 "phidef --> phiInd:{}, type:{}, incoming:{}, use:%{}, "
961 "block:{}, def:%{}\n", phiInd
, typeStr
,
962 useInfo
.fromLabel
->getName().str(),
963 size_t{useInfo
.uses
[phiInd
]},
964 m_toLabel
->getName().str(), size_t{m_defs
[phiInd
]});
967 jit::vector
<UseInfo
> m_pendingPreds
;
968 jit::vector
<llvm::PHINode
*> m_phis
;
970 llvm::BasicBlock
* m_toLabel
;
973 static constexpr unsigned kFSAddressSpace
= 257;
975 #define O(name, ...) void emit(const name&);
979 llvm::Value
* getGlobal(const std::string
& name
, int64_t value
,
982 void emitCall(const Vinstr
& instr
);
983 void emit(const bindjcc1st
&, SrcKey
);
984 void emit(const bindjcc2nd
&, SrcKey
);
985 llvm::Value
* emitCmpForCC(Vreg sf
, ConditionCode cc
);
987 llvm::Value
* emitFuncPtr(const std::string
& name
,
988 llvm::FunctionType
* type
,
990 llvm::CallInst
* emitTraceletTailCall(llvm::Value
* target
);
992 // Emit code for the pointer. Return value is of the given type, or
993 // <i{bits} *> for the second overload.
994 llvm::Value
* emitPtr(const Vptr s
, llvm::Type
* ptrTy
);
995 llvm::Value
* emitPtr(const Vptr s
, size_t bits
= 64);
997 template<typename Taken
>
998 void emitJcc(Vreg sf
, ConditionCode cc
, const char* takenSuffix
,
1001 llvm::Type
* ptrType(llvm::Type
* ty
, unsigned addressSpace
= 0) const;
1002 llvm::Type
* intNType(size_t bits
) const;
1003 llvm::Type
* ptrIntNType(size_t bits
, bool inFS
) const;
1005 llvm::Value
* getReturnAddress();
1006 UNUSED
llvm::Value
* getFrameAddress();
1007 UNUSED
llvm::Value
* getStackPointer();
1009 UNUSED
void emitAsm(const std::string
& asmStatement
,
1010 const std::string
& asmConstraints
,
1011 bool hasSideEffects
);
1014 llvm::LLVMContext
& m_context
;
1015 std::unique_ptr
<llvm::Module
> m_module
;
1016 llvm::Function
* m_function
;
1017 llvm::IRBuilder
<true, llvm::ConstantFolder
, IRBuilderVasmInserter
> m_irb
;
1018 std::unique_ptr
<TCMemoryManager
> m_tcMM
;
1020 // Function type used for tail calls to other tracelets.
1021 llvm::FunctionType
* m_traceletFnTy
{nullptr};
1023 // Mimic HHVM's TypedValue.
1024 llvm::StructType
* m_typedValueType
{nullptr};
1026 // Saved LLVM intrinsics.
1027 llvm::Function
* m_llvmFrameAddress
{nullptr};
1028 llvm::Function
* m_llvmReadRegister
{nullptr};
1029 llvm::Function
* m_llvmReturnAddress
{nullptr};
1031 // Vreg -> RegInfo map
1032 jit::vector
<RegInfo
> m_valueInfo
;
1034 // Vlabel -> llvm::BasicBlock map
1035 jit::vector
<llvm::BasicBlock
*> m_blocks
;
1037 jit::hash_map
<llvm::BasicBlock
*, PhiInfo
> m_phiInfos
;
1039 // Pending Fixups that must be processed after codegen
1040 jit::vector
<LLVMFixup
> m_fixups
;
1042 // Pending service requests that must be processed after codegen
1043 jit::vector
<LLVMBindJmp
> m_bindjmps
;
1044 jit::vector
<LLVMFallback
> m_fallbacks
;
1046 // Vector of vasm instruction strings, used for printing in llvm IR dumps.
1047 jit::vector
<std::string
> m_instStrs
;
1049 // The next id to use for LLVM location record. These IDs only have to
1050 // be unique within the function.
1051 uint32_t m_nextLocRec
{1};
1053 const Vunit
& m_unit
;
1054 Vasm::AreaList
& m_areas
;
1057 llvm::Value
* m_rVmTl
{nullptr};
1058 llvm::Value
* m_rVmFp
{nullptr};
1060 // Faux personality for emitting landingpad.
1061 llvm::Function
* m_personalityFunc
;
1063 llvm::Function
* m_fabs
{nullptr};
1065 // Commonly used types. Some LLVM APIs require non-consts.
1066 llvm::IntegerType
* m_int8
;
1067 llvm::IntegerType
* m_int16
;
1068 llvm::IntegerType
* m_int32
;
1069 llvm::IntegerType
* m_int64
;
1071 llvm::PointerType
* m_int8Ptr
;
1072 llvm::PointerType
* m_int16Ptr
;
1073 llvm::PointerType
* m_int32Ptr
;
1074 llvm::PointerType
* m_int64Ptr
;
1076 llvm::PointerType
* m_int8FSPtr
;
1077 llvm::PointerType
* m_int16FSPtr
;
1078 llvm::PointerType
* m_int32FSPtr
;
1079 llvm::PointerType
* m_int64FSPtr
;
1081 // Commonly used constants. No const either.
1082 llvm::ConstantInt
* m_int8Zero
;
1083 llvm::ConstantInt
* m_int8One
;
1084 llvm::ConstantInt
* m_int16Zero
;
1085 llvm::ConstantInt
* m_int16One
;
1086 llvm::ConstantInt
* m_int32Zero
;
1087 llvm::ConstantInt
* m_int32One
;
1088 llvm::ConstantInt
* m_int64Zero
;
1089 llvm::ConstantInt
* m_int64One
;
1091 llvm::UndefValue
* m_int64Undef
;
1094 void LLVMEmitter::emit(const jit::vector
<Vlabel
>& labels
) {
1095 // Make sure all the llvm blocks are emitted in the order given by
1096 // layoutBlocks, regardless of which ones we need to use as jump targets
1098 for (auto label
: labels
) {
1102 auto const traceInstrs
= Trace::moduleEnabled(Trace::llvm
, 5);
1104 for (auto label
: labels
) {
1105 auto& b
= m_unit
.blocks
[label
];
1106 m_irb
.SetInsertPoint(block(label
));
1108 // TODO(#5376594): before rVmFp is SSA-ified we are using the hack below.
1109 m_valueInfo
[Vreg(x64::rVmFp
)].llval
= m_rVmFp
;
1111 for (auto& inst
: b
.code
) {
1113 m_irb
.setVinstId(m_instStrs
.size());
1114 m_instStrs
.emplace_back(show(m_unit
, inst
).c_str());
1118 // This list will eventually go away; for now only a very small subset of
1119 // operations are supported.
1120 #define SUPPORTED_OPS \
1253 #define O(name) case Vinstr::name: emit(inst.name##_); break;
1256 #undef SUPPORTED_OPS
1259 case Vinstr::vinvoke
:
1263 case Vinstr::bindjcc1st
:
1264 emit(inst
.bindjcc1st_
, inst
.origin
->marker().sk());
1267 case Vinstr::bindjcc2nd
:
1268 emit(inst
.bindjcc2nd_
, inst
.origin
->marker().sk());
1271 // These instructions are intentionally unsupported for a variety of
1272 // reasons, and if code-gen-x64.cpp emits one it's a bug:
1276 case Vinstr::unwind
:
1277 case Vinstr::nothrow
:
1278 case Vinstr::syncpoint
:
1290 case Vinstr::fallthru
:
1291 always_assert_flog(false,
1292 "Banned opcode in B{}: {}",
1293 size_t(label
), show(m_unit
, inst
));
1299 case Vinstr::hcsync
:
1300 case Vinstr::hcnocatch
:
1301 case Vinstr::hcunwind
:
1302 case Vinstr::hostcall
:
1305 // Fallthrough. Eventually we won't have a default case.
1307 throw FailedLLVMCodeGen("Unsupported opcode in B{}: {}",
1308 size_t(label
), show(m_unit
, inst
));
1311 visitDefs(m_unit
, inst
, [&](Vreg def
) {
1312 defineValue(def
, inst
);
1320 llvm::Value
* LLVMEmitter::getGlobal(const std::string
& name
,
1323 m_tcMM
->registerSymbolAddress(name
, address
);
1324 return m_module
->getOrInsertGlobal(name
, type
);
1327 void LLVMEmitter::emit(const addli
& inst
) {
1328 defineValue(inst
.d
, m_irb
.CreateAdd(cns(inst
.s0
.l()), value(inst
.s1
)));
1331 void LLVMEmitter::emit(const addlm
& inst
) {
1332 auto ptr
= emitPtr(inst
.m
, 32);
1333 auto result
= m_irb
.CreateAdd(value(inst
.s0
), m_irb
.CreateLoad(ptr
));
1334 defineFlagTmp(inst
.sf
, result
);
1335 m_irb
.CreateStore(result
, ptr
);
1338 void LLVMEmitter::emit(const addq
& inst
) {
1339 defineValue(inst
.d
, m_irb
.CreateAdd(value(inst
.s0
), value(inst
.s1
)));
1342 void LLVMEmitter::emit(const addqi
& inst
) {
1343 // TODO(#5134526): ignore %rsp adjustments for now. They are typically
1344 // emitted in unwind handler.
1345 if (inst
.s1
== reg::rsp
) return;
1346 defineValue(inst
.d
, m_irb
.CreateAdd(value(inst
.s1
), cns(inst
.s0
.q())));
1349 void LLVMEmitter::emit(const addqim
& inst
) {
1350 auto ptr
= emitPtr(inst
.m
, 64);
1351 auto result
= m_irb
.CreateAdd(cns(inst
.s0
.q()), m_irb
.CreateLoad(ptr
));
1352 defineFlagTmp(inst
.sf
, result
);
1353 m_irb
.CreateStore(result
, ptr
);
1356 void LLVMEmitter::emit(const addsd
& inst
) {
1357 defineValue(inst
.d
, m_irb
.CreateFAdd(asDbl(value(inst
.s0
)),
1358 asDbl(value(inst
.s1
))));
1361 void LLVMEmitter::emit(const andb
& inst
) {
1362 defineValue(inst
.d
, m_irb
.CreateAnd(value(inst
.s0
), value(inst
.s1
)));
1365 void LLVMEmitter::emit(const andbi
& inst
) {
1366 defineValue(inst
.d
, m_irb
.CreateAnd(cns(inst
.s0
.b()), value(inst
.s1
)));
1369 void LLVMEmitter::emit(const andbim
& inst
) {
1370 auto ptr
= emitPtr(inst
.m
, 8);
1371 auto result
= m_irb
.CreateAnd(cns(inst
.s
.b()), m_irb
.CreateLoad(ptr
));
1372 defineFlagTmp(inst
.sf
, result
);
1373 m_irb
.CreateStore(result
, ptr
);
1376 void LLVMEmitter::emit(const andl
& inst
) {
1377 defineValue(inst
.d
, m_irb
.CreateAnd(value(inst
.s0
), value(inst
.s1
)));
1380 void LLVMEmitter::emit(const andli
& inst
) {
1381 defineValue(inst
.d
, m_irb
.CreateAnd(cns(inst
.s0
.l()), value(inst
.s1
)));
1384 void LLVMEmitter::emit(const andq
& inst
) {
1385 defineValue(inst
.d
, m_irb
.CreateAnd(value(inst
.s0
), value(inst
.s1
)));
1388 void LLVMEmitter::emit(const andqi
& inst
) {
1389 defineValue(inst
.d
, m_irb
.CreateAnd(cns(inst
.s0
.q()), value(inst
.s1
)));
1392 template<typename Taken
>
1393 void LLVMEmitter::emitJcc(Vreg sf
, ConditionCode cc
, const char* takenName
,
1395 auto blockName
= m_irb
.GetInsertBlock()->getName().str();
1397 llvm::BasicBlock::Create(m_context
,
1398 folly::to
<std::string
>(blockName
, '_'),
1400 nextBlock
->moveAfter(m_irb
.GetInsertBlock());
1402 llvm::BasicBlock::Create(m_context
,
1403 folly::to
<std::string
>(blockName
, "_", takenName
),
1405 takenBlock
->moveAfter(nextBlock
);
1407 auto cond
= emitCmpForCC(sf
, cc
);
1408 m_irb
.CreateCondBr(cond
, takenBlock
, nextBlock
);
1410 m_irb
.SetInsertPoint(takenBlock
);
1413 m_irb
.SetInsertPoint(nextBlock
);
1416 void LLVMEmitter::emit(const bindjmp
& inst
) {
1417 // bindjmp is a smashable tail call to a service request stub. The stub needs
1418 // to reference the address of the tail call, but we don't know the address
1419 // of the tail call until after codegen. This means we either need to emit
1420 // the stub here and patch the rip-relative lea in the stub after codegen, or
1421 // emit a tail call to a dummy address that we'll patch after codegen when we
1422 // can emit the stub with the right address. When given the choice between
1423 // lying to our stub emitter or llvm it's generally better to lie to the stub
1424 // emitter, so that's what we do here.
1426 auto& frozen
= m_areas
[size_t(AreaIndex::Frozen
)].code
;
1427 auto stub
= mcg
->getFreeStub(frozen
, &mcg
->cgFixups());
1428 ServiceReqArgVec args
;
1429 packServiceReqArgs(args
,
1430 RipRelative(mcg
->code
.base()),
1431 inst
.target
.toAtomicInt(),
1432 inst
.trflags
.packed
);
1433 auto reqIp
= mcg
->backEnd().emitServiceReqWork(
1434 frozen
, stub
, SRFlags::None
, REQ_BIND_JMP
, args
);
1436 auto stubName
= folly::sformat("bindjmpStub_{}", reqIp
);
1437 auto stubFunc
= emitFuncPtr(stubName
, m_traceletFnTy
, uint64_t(reqIp
));
1438 auto call
= emitTraceletTailCall(stubFunc
);
1439 // TODO(t5742996): call->setSmashable();
1441 auto id
= m_nextLocRec
++;
1442 call
->setMetadata(llvm::LLVMContext::MD_locrec
,
1443 llvm::MDNode::get(m_context
, cns(id
)));
1444 m_bindjmps
.emplace_back(LLVMBindJmp
{id
, reqIp
});
1447 // Emitting a real REQ_BIND_SIDE_EXIT or REQ_BIND_JMPCC_(FIRST|SECOND) only
1448 // makes sense if we can guarantee that llvm will emit a smashable jcc. Until
1449 // then, we jcc to a REQ_BIND_JMP.
1450 void LLVMEmitter::emit(const bindexit
& inst
) {
1452 inst
.sf
, inst
.cc
, "exit",
1454 emit(bindjmp
{inst
.target
, inst
.trflags
});
1459 void LLVMEmitter::emit(const bindjcc1st
& inst
, SrcKey instSk
) {
1461 inst
.sf
, inst
.cc
, "jcc1",
1463 emit(bindjmp
{{instSk
.func(), inst
.targets
[1], instSk
.resumed()}});
1467 emit(bindjmp
{{instSk
.func(), inst
.targets
[0], instSk
.resumed()}});
1470 void LLVMEmitter::emit(const bindjcc2nd
& inst
, SrcKey instSk
) {
1472 inst
.sf
, inst
.cc
, "jcc2",
1474 emit(bindjmp
{{instSk
.func(), inst
.target
, instSk
.resumed()}});
1479 void LLVMEmitter::emit(const bindaddr
& inst
) {
1480 // inst.dest is a pointer to memory allocated in globalData, so we can just
1481 // do what vasm does here.
1483 auto& frozen
= m_areas
[size_t(AreaIndex::Frozen
)].code
;
1484 mcg
->setJmpTransID((TCA
)inst
.dest
);
1485 *inst
.dest
= emitEphemeralServiceReq(
1487 mcg
->getFreeStub(frozen
, &mcg
->cgFixups()),
1490 inst
.sk
.toAtomicInt(),
1493 mcg
->cgFixups().m_codePointers
.insert(inst
.dest
);
1496 void LLVMEmitter::emit(const defvmsp
& inst
) {
1497 defineValue(inst
.d
, value(x64::rVmSp
));
1500 llvm::Value
* LLVMEmitter::emitFuncPtr(const std::string
& name
,
1501 llvm::FunctionType
* type
,
1503 auto funcPtr
= m_module
->getFunction(name
);
1505 m_tcMM
->registerSymbolAddress(name
, address
);
1506 funcPtr
= llvm::Function::Create(type
,
1507 llvm::GlobalValue::ExternalLinkage
,
1514 llvm::CallInst
* LLVMEmitter::emitTraceletTailCall(llvm::Value
* target
) {
1515 std::vector
<llvm::Value
*> args
{
1516 value(x64::rVmSp
), value(x64::rVmTl
), value(x64::rVmFp
)
1518 auto call
= m_irb
.CreateCall(target
, args
);
1519 call
->setCallingConv(llvm::CallingConv::X86_64_HHVM_TC
);
1520 call
->setTailCallKind(llvm::CallInst::TCK_MustTail
);
1521 m_irb
.CreateRetVoid();
1525 void LLVMEmitter::emitCall(const Vinstr
& inst
) {
1526 auto const is_vcall
= inst
.op
== Vinstr::vcall
;
1527 auto const vcall
= inst
.vcall_
;
1528 auto const vinvoke
= inst
.vinvoke_
;
1530 // Extract all the relevant information from the appropriate instruction.
1531 auto const call
= is_vcall
? vcall
.call
: vinvoke
.call
;
1532 auto const& vargs
= m_unit
.vcallArgs
[is_vcall
? vcall
.args
: vinvoke
.args
];
1533 auto const dests
= m_unit
.tuples
[is_vcall
? vcall
.d
: vinvoke
.d
];
1534 auto const destType
= is_vcall
? vcall
.destType
: vinvoke
.destType
;
1535 auto const fixup
= is_vcall
? vcall
.fixup
: vinvoke
.fixup
;
1537 // Generate the right function signature to be used by call/invoke.
1538 // Perhaps caching it can improve the performance.
1539 llvm::Type
* returnType
= nullptr;
1541 case DestType::None
:
1542 returnType
= m_irb
.getVoidTy();
1545 returnType
= m_int64
;
1547 case DestType::Byte
:
1548 returnType
= m_int8
;
1551 returnType
= m_irb
.getDoubleTy();
1553 case DestType::SIMD
:
1555 returnType
= m_typedValueType
;
1559 std::vector
<llvm::Type
*> argTypes
= { m_int64
};
1560 std::vector
<llvm::Value
*> args
= { value(x64::rVmFp
) };
1561 auto doArgs
= [&] (const VregList
& srcs
) {
1562 for(int i
= 0; i
< srcs
.size(); ++i
) {
1563 args
.push_back(value(srcs
[i
]));
1564 argTypes
.push_back(value(srcs
[i
])->getType());
1568 // Handle special case of TypedValue being put on the stack while not
1569 // all arg regs have being used. Since TypedValue is already split into
1570 // regs, we need to manually insert the padding arg.
1571 if (vargs
.stkArgs
.size() &&
1572 vargs
.stkArgs
[0].isGP() &&
1573 vargs
.args
.size() == x64::kNumRegisterArgs
- 1) {
1574 args
.push_back(m_int64Undef
);
1575 argTypes
.push_back(m_int64
);
1577 doArgs(vargs
.simdArgs
);
1578 doArgs(vargs
.stkArgs
);
1580 auto const funcType
= llvm::FunctionType::get(returnType
, argTypes
, false);
1582 llvm::Value
* funcPtr
= nullptr;
1583 switch (call
.kind()) {
1584 case CppCall::Kind::Direct
:
1585 funcPtr
= emitFuncPtr(getNativeFunctionName(call
.address()),
1587 uint64_t(call
.address()));
1590 case CppCall::Kind::Virtual
:
1591 case CppCall::Kind::ArrayVirt
:
1592 throw FailedLLVMCodeGen("Unsupported call type: {}",
1595 case CppCall::Kind::Destructor
: {
1596 assert(vargs
.args
.size() == 1);
1597 llvm::Value
* type
= value(call
.reg());
1598 type
= m_irb
.CreateLShr(asInt(type
, 64),
1599 kShiftDataTypeToDestrIndex
, "typeIdx");
1601 auto destructors
= getGlobal("g_destructors", uint64_t(g_destructors
),
1602 llvm::VectorType::get(ptrType(funcType
),
1604 funcPtr
= m_irb
.CreateExtractElement(m_irb
.CreateLoad(destructors
), type
);
1609 llvm::Instruction
* callInst
= nullptr;
1611 auto call
= m_irb
.CreateCall(funcPtr
, args
);
1612 call
->setCallingConv(llvm::CallingConv::X86_64_HHVM_C
);
1615 auto normal
= block(vinvoke
.targets
[0]);
1616 auto unwind
= block(vinvoke
.targets
[1]);
1617 auto invoke
= m_irb
.CreateInvoke(funcPtr
, normal
, unwind
, args
);
1618 invoke
->setCallingConv(llvm::CallingConv::X86_64_HHVM_C
);
1620 // The result can only be used on normal path. The unwind branch cannot
1621 // access return values.
1622 m_irb
.SetInsertPoint(normal
);
1625 // Record location of the call/invoke instruction.
1626 if (fixup
.isValid()) {
1627 auto id
= m_nextLocRec
++;
1628 m_fixups
.emplace_back(LLVMFixup
{id
, fixup
});
1629 FTRACE(2, "Adding fixup id {} for {}\n", id
, llshow(callInst
));
1630 callInst
->setMetadata(llvm::LLVMContext::MD_locrec
,
1631 llvm::MDNode::get(m_context
, cns(id
)));
1634 // Extract value(s) from the call.
1636 case DestType::None
:
1638 assert(dests
.size() == 0);
1641 case DestType::Byte
:
1643 assert(dests
.size() == 1);
1644 defineValue(dests
[0], callInst
);
1646 case DestType::TV
: {
1647 assert(dests
.size() == 2);
1649 defineValue(dests
[0], m_irb
.CreateExtractValue(callInst
, 2)); // m_data
1650 defineValue(dests
[1],
1651 asInt(m_irb
.CreateExtractValue(callInst
, 1), 64)); // m_type
1653 defineValue(dests
[0], m_irb
.CreateExtractValue(callInst
, 0)); // m_data
1654 defineValue(dests
[1],
1655 asInt(m_irb
.CreateExtractValue(callInst
, 1), 64)); // m_type
1659 case DestType::SIMD
: {
1660 assert(dests
.size() == 1);
1661 // Do we want to pack it manually into a <2 x i64>? Or bitcast to X86_MMX?
1662 // Leave it as TypedValue for now and see what LLVM optimizer does.
1663 defineValue(dests
[0], callInst
);
1669 void LLVMEmitter::emit(const cloadq
& inst
) {
1670 auto trueVal
= m_irb
.CreateLoad(emitPtr(inst
.t
, 64));
1671 auto falseVal
= value(inst
.f
);
1672 auto cond
= emitCmpForCC(inst
.sf
, inst
.cc
);
1673 defineValue(inst
.d
, m_irb
.CreateSelect(cond
, trueVal
, falseVal
));
1676 void LLVMEmitter::emit(const cmovq
& inst
) {
1677 auto cond
= emitCmpForCC(inst
.sf
, inst
.cc
);
1678 defineValue(inst
.d
, m_irb
.CreateSelect(cond
, value(inst
.t
), value(inst
.f
)));
1681 void LLVMEmitter::emit(const cmpb
& inst
) {
1682 // no-op. The real work for this and other non-memory cmps happens in
1686 void LLVMEmitter::emit(const cmpbi
& inst
) {
1690 void LLVMEmitter::emit(const cmpbim
& inst
) {
1691 defineFlagTmp(inst
.sf
, m_irb
.CreateLoad(emitPtr(inst
.s1
, 8)));
1694 void LLVMEmitter::emit(const cmpl
& inst
) {
1698 void LLVMEmitter::emit(const cmpli
& inst
) {
1702 void LLVMEmitter::emit(const cmplim
& inst
) {
1703 defineFlagTmp(inst
.sf
, m_irb
.CreateLoad(emitPtr(inst
.s1
, 32)));
1706 void LLVMEmitter::emit(const cmplm
& inst
) {
1707 defineFlagTmp(inst
.sf
, m_irb
.CreateLoad(emitPtr(inst
.s1
, 32)));
1710 void LLVMEmitter::emit(const cmpq
& inst
) {
1714 void LLVMEmitter::emit(const cmpqi
& inst
) {
1718 void LLVMEmitter::emit(const cmpqim
& inst
) {
1719 defineFlagTmp(inst
.sf
, m_irb
.CreateLoad(emitPtr(inst
.s1
, 64)));
1722 void LLVMEmitter::emit(const cmpqm
& inst
) {
1723 defineFlagTmp(inst
.sf
, m_irb
.CreateLoad(emitPtr(inst
.s1
, 64)));
1726 void LLVMEmitter::emit(const cvttsd2siq
& inst
) {
1727 defineValue(inst
.d
, m_irb
.CreateFPToSI(value(inst
.s
), m_int64
));
1730 void LLVMEmitter::emit(const cvtsi2sd
& inst
) {
1731 defineValue(inst
.d
, m_irb
.CreateSIToFP(value(inst
.s
), m_irb
.getDoubleTy()));
1734 void LLVMEmitter::emit(const cvtsi2sdm
& inst
) {
1735 auto intVal
= m_irb
.CreateLoad(emitPtr(inst
.s
, 64));
1736 defineValue(inst
.d
, m_irb
.CreateSIToFP(intVal
, m_irb
.getDoubleTy()));
1739 void LLVMEmitter::emit(const copy
& inst
) {
1740 defineValue(inst
.d
, value(inst
.s
));
1743 void LLVMEmitter::emit(const copy2
& inst
) {
1744 defineValue(inst
.d0
, value(inst
.s0
));
1745 defineValue(inst
.d1
, value(inst
.s1
));
1748 void LLVMEmitter::emit(const copyargs
& inst
) {
1749 auto& srcs
= m_unit
.tuples
[inst
.s
];
1750 auto& dsts
= m_unit
.tuples
[inst
.d
];
1751 assert(srcs
.size() == dsts
.size());
1752 for (unsigned i
= 0, n
= srcs
.size(); i
< n
; ++i
) {
1753 defineValue(dsts
[i
], value(srcs
[i
]));
1757 void LLVMEmitter::emit(const debugtrap
& inst
) {
1758 auto trap
= llvm::Intrinsic::getDeclaration(m_module
.get(),
1759 llvm::Intrinsic::debugtrap
);
1760 m_irb
.CreateCall(trap
);
1761 m_irb
.CreateUnreachable();
1764 void LLVMEmitter::emit(const decl
& inst
) {
1765 defineValue(inst
.d
, m_irb
.CreateSub(value(inst
.s
), m_int32One
));
1768 void LLVMEmitter::emit(const declm
& inst
) {
1769 auto ptr
= emitPtr(inst
.m
, 32);
1770 auto load
= m_irb
.CreateLoad(ptr
);
1771 auto sub
= m_irb
.CreateSub(load
, m_int32One
);
1772 defineFlagTmp(inst
.sf
, sub
);
1773 m_irb
.CreateStore(sub
, ptr
);
1776 void LLVMEmitter::emit(const decq
& inst
) {
1777 defineValue(inst
.d
, m_irb
.CreateSub(value(inst
.s
), m_int64One
));
1780 void LLVMEmitter::emit(const decqm
& inst
) {
1781 auto ptr
= emitPtr(inst
.m
, 64);
1782 auto oldVal
= m_irb
.CreateLoad(ptr
);
1783 auto newVal
= m_irb
.CreateSub(oldVal
, m_int64One
);
1784 defineFlagTmp(inst
.sf
, newVal
);
1785 m_irb
.CreateStore(newVal
, ptr
);
1788 void LLVMEmitter::emit(const divsd
& inst
) {
1789 defineValue(inst
.d
, m_irb
.CreateFDiv(asDbl(value(inst
.s1
)),
1790 asDbl(value(inst
.s0
))));
1793 void LLVMEmitter::emit(const imul
& inst
) {
1794 defineValue(inst
.d
, m_irb
.CreateMul(value(inst
.s0
), value(inst
.s1
)));
1797 void LLVMEmitter::emit(const fallback
& inst
) {
1798 assert_not_implemented(inst
.trflags
.packed
== 0);
1800 auto destSR
= mcg
->tx().getSrcRec(inst
.dest
);
1801 auto fallback
= destSR
->getFallbackTranslation();
1802 auto func
= emitFuncPtr(folly::sformat("reqRetranslate_{}", fallback
),
1804 uint64_t(destSR
->getFallbackTranslation()));
1805 auto call
= emitTraceletTailCall(func
);
1806 // TODO(t5742996): call->setSmashable();
1808 LLVMFallback req
{m_nextLocRec
++, inst
.dest
};
1809 call
->setMetadata(llvm::LLVMContext::MD_locrec
,
1810 llvm::MDNode::get(m_context
, cns(req
.id
)));
1811 m_fallbacks
.emplace_back(req
);
1814 void LLVMEmitter::emit(const fallbackcc
& inst
) {
1815 assert_not_implemented(inst
.trflags
.packed
== 0);
1818 inst
.sf
, inst
.cc
, "guard",
1820 emit(fallback
{inst
.dest
, inst
.trflags
});
1825 void LLVMEmitter::emit(const incwm
& inst
) {
1826 auto ptr
= emitPtr(inst
.m
, 16);
1827 auto oldVal
= m_irb
.CreateLoad(ptr
);
1828 auto newVal
= m_irb
.CreateAdd(oldVal
, m_int16One
);
1829 defineFlagTmp(inst
.sf
, newVal
);
1830 m_irb
.CreateStore(newVal
, ptr
);
1833 void LLVMEmitter::emit(const incl
& inst
) {
1834 defineValue(inst
.d
, m_irb
.CreateAdd(value(inst
.s
), m_int32One
));
1837 void LLVMEmitter::emit(const inclm
& inst
) {
1838 auto ptr
= emitPtr(inst
.m
, 32);
1839 auto load
= m_irb
.CreateLoad(ptr
);
1840 auto add
= m_irb
.CreateAdd(load
, m_int32One
);
1841 defineFlagTmp(inst
.sf
, add
);
1842 m_irb
.CreateStore(add
, ptr
);
1845 void LLVMEmitter::emit(const incq
& inst
) {
1846 defineValue(inst
.d
, m_irb
.CreateAdd(value(inst
.s
), m_int64One
));
1849 void LLVMEmitter::emit(const incqm
& inst
) {
1850 auto ptr
= emitPtr(inst
.m
, 64);
1851 auto load
= m_irb
.CreateLoad(ptr
);
1852 auto add
= m_irb
.CreateAdd(load
, m_int64One
);
1853 defineFlagTmp(inst
.sf
, add
);
1854 m_irb
.CreateStore(add
, ptr
);
1857 void LLVMEmitter::emit(const incqmlock
& inst
) {
1858 auto ptr
= emitPtr(inst
.m
, 64);
1859 m_irb
.CreateAtomicRMW(llvm::AtomicRMWInst::Add
, ptr
,
1860 m_int64One
, llvm::SequentiallyConsistent
);
1861 // Unlike the other inc*m instruction, we don't define a flagTmp here. The
1862 // value returned by llvm's atomicrmw is the old value, while the x64 incq
1863 // instruction this is based on sets flags based on the new value. Nothing
1864 // currently consumes the sf from an incqmlock instruction; if this changes
1865 // we'll deal with it then.
1868 static llvm::CmpInst::Predicate
ccToPred(ConditionCode cc
) {
1869 using Cmp
= llvm::CmpInst
;
1871 case CC_E
: return Cmp::ICMP_EQ
;
1872 case CC_NE
: return Cmp::ICMP_NE
;
1873 case CC_L
: return Cmp::ICMP_SLT
;
1874 case CC_LE
: return Cmp::ICMP_SLE
;
1875 case CC_G
: return Cmp::ICMP_SGT
;
1876 case CC_GE
: return Cmp::ICMP_SGE
;
1877 case CC_B
: return Cmp::ICMP_ULT
;
1878 case CC_BE
: return Cmp::ICMP_ULE
;
1879 case CC_A
: return Cmp::ICMP_UGT
;
1880 case CC_AE
: return Cmp::ICMP_UGE
;
1881 default: throw FailedLLVMCodeGen("Unsupported CC {}", cc_names
[cc
]);
1885 llvm::Value
* LLVMEmitter::emitCmpForCC(Vreg sf
, ConditionCode cc
) {
1886 auto& cmp
= defInst(sf
);
1887 llvm::Value
* lhs
= nullptr;
1888 llvm::Value
* rhs
= nullptr;
1890 if (cmp
.op
== Vinstr::addq
) {
1891 lhs
= asInt(value(cmp
.addq_
.d
), 64);
1893 } else if (cmp
.op
== Vinstr::addqi
) {
1894 lhs
= asInt(value(cmp
.addqi_
.d
), 64);
1896 } else if (cmp
.op
== Vinstr::addqim
) {
1899 } else if (cmp
.op
== Vinstr::cmpb
) {
1900 lhs
= asInt(value(cmp
.cmpb_
.s1
), 8);
1901 rhs
= asInt(value(cmp
.cmpb_
.s0
), 8);
1902 } else if (cmp
.op
== Vinstr::cmpbi
) {
1903 lhs
= asInt(value(cmp
.cmpbi_
.s1
), 8);
1904 rhs
= cns(cmp
.cmpbi_
.s0
.b());
1905 } else if (cmp
.op
== Vinstr::cmpbim
) {
1907 rhs
= cns(cmp
.cmpbim_
.s0
.b());
1908 } else if (cmp
.op
== Vinstr::cmpl
) {
1909 lhs
= asInt(value(cmp
.cmpl_
.s1
), 32);
1910 rhs
= asInt(value(cmp
.cmpl_
.s0
), 32);
1911 } else if (cmp
.op
== Vinstr::cmpli
) {
1912 lhs
= asInt(value(cmp
.cmpli_
.s1
), 32);
1913 rhs
= cns(cmp
.cmpli_
.s0
.l());
1914 } else if (cmp
.op
== Vinstr::cmplim
) {
1916 rhs
= cns(cmp
.cmplim_
.s0
.l());
1917 } else if (cmp
.op
== Vinstr::cmplm
) {
1919 rhs
= asInt(value(cmp
.cmplm_
.s0
), 32);
1920 } else if (cmp
.op
== Vinstr::cmpq
) {
1921 lhs
= asInt(value(cmp
.cmpq_
.s1
), 64);
1922 rhs
= asInt(value(cmp
.cmpq_
.s0
), 64);
1923 } else if (cmp
.op
== Vinstr::cmpqi
) {
1924 lhs
= asInt(value(cmp
.cmpqi_
.s1
), 64);
1925 rhs
= cns(cmp
.cmpqi_
.s0
.q());
1926 } else if (cmp
.op
== Vinstr::cmpqim
) {
1928 rhs
= cns(cmp
.cmpqim_
.s0
.q());
1929 } else if (cmp
.op
== Vinstr::cmpqm
) {
1931 rhs
= asInt(value(cmp
.cmpqm_
.s0
), 64);
1932 } else if (cmp
.op
== Vinstr::decl
) {
1933 lhs
= asInt(value(cmp
.decl_
.d
), 32);
1935 } else if (cmp
.op
== Vinstr::declm
) {
1938 } else if (cmp
.op
== Vinstr::decq
) {
1939 lhs
= asInt(value(cmp
.decq_
.d
), 64);
1941 } else if (cmp
.op
== Vinstr::decqm
) {
1944 } else if (cmp
.op
== Vinstr::inclm
) {
1947 } else if (cmp
.op
== Vinstr::incwm
) {
1950 } else if (cmp
.op
== Vinstr::subbi
) {
1951 lhs
= asInt(value(cmp
.subbi_
.d
), 8);
1953 } else if (cmp
.op
== Vinstr::subl
) {
1954 lhs
= asInt(value(cmp
.subl_
.d
), 32);
1956 } else if (cmp
.op
== Vinstr::subli
) {
1957 lhs
= asInt(value(cmp
.subli_
.d
), 32);
1959 } else if (cmp
.op
== Vinstr::subq
) {
1960 lhs
= asInt(value(cmp
.subq_
.d
), 64);
1962 } else if (cmp
.op
== Vinstr::subqi
) {
1963 lhs
= asInt(value(cmp
.subqi_
.d
), 64);
1965 } else if (cmp
.op
== Vinstr::testb
||
1966 cmp
.op
== Vinstr::testbi
||
1967 cmp
.op
== Vinstr::testbim
) {
1970 } else if (cmp
.op
== Vinstr::testl
||
1971 cmp
.op
== Vinstr::testli
||
1972 cmp
.op
== Vinstr::testlim
) {
1975 } else if (cmp
.op
== Vinstr::testq
||
1976 cmp
.op
== Vinstr::testqm
||
1977 cmp
.op
== Vinstr::testqim
) {
1981 throw FailedLLVMCodeGen("Unsupported flags src: {}",
1985 return m_irb
.CreateICmp(ccToPred(cc
), lhs
, rhs
);
1988 void LLVMEmitter::emit(const jcc
& inst
) {
1989 auto cond
= emitCmpForCC(inst
.sf
, inst
.cc
);
1990 auto next
= block(inst
.targets
[0]);
1991 auto taken
= block(inst
.targets
[1]);
1993 m_irb
.CreateCondBr(cond
, taken
, next
);
1996 void LLVMEmitter::emit(const jmp
& inst
) {
1997 m_irb
.CreateBr(block(inst
.target
));
2000 void LLVMEmitter::emit(const jmpr
& inst
) {
2001 auto func
= m_irb
.CreateIntToPtr(value(inst
.target
), ptrType(m_traceletFnTy
));
2002 emitTraceletTailCall(func
);
2005 void LLVMEmitter::emit(const jmpm
& inst
) {
2006 auto func
= m_irb
.CreateLoad(emitPtr(inst
.target
,
2007 ptrType(ptrType(m_traceletFnTy
))));
2008 emitTraceletTailCall(func
);
2011 void LLVMEmitter::emit(const ldimmb
& inst
) {
2012 defineValue(inst
.d
, cns(inst
.s
.b()));
2015 void LLVMEmitter::emit(const ldimm
& inst
) {
2016 assert(inst
.d
.isVirt());
2017 defineValue(inst
.d
, cns(inst
.s
.q()));
2020 void LLVMEmitter::emit(const lea
& inst
) {
2021 auto value
= m_irb
.CreatePtrToInt(emitPtr(inst
.s
, 8), m_int64
, "conv");
2022 defineValue(inst
.d
, value
);
2025 void LLVMEmitter::emit(const loaddqu
& inst
) {
2026 // This will need to change if we ever use loaddqu with values that aren't
2027 // TypedValues. Ideally, we'd leave this kind of decision to llvm anyway.
2028 auto value
= m_irb
.CreateLoad(emitPtr(inst
.s
, ptrType(m_typedValueType
)));
2029 defineValue(inst
.d
, value
);
2032 void LLVMEmitter::emit(const load
& inst
) {
2033 defineValue(inst
.d
, m_irb
.CreateLoad(emitPtr(inst
.s
, 64)));
2036 void LLVMEmitter::emit(const loadb
& inst
) {
2037 defineValue(inst
.d
, m_irb
.CreateLoad(emitPtr(inst
.s
, 8)));
2040 void LLVMEmitter::emit(const loadl
& inst
) {
2041 defineValue(inst
.d
, m_irb
.CreateLoad(emitPtr(inst
.s
, 32)));
2044 void LLVMEmitter::emit(const loadsd
& inst
) {
2046 m_irb
.CreateLoad(emitPtr(inst
.s
, ptrType(m_irb
.getDoubleTy()))));
2049 void LLVMEmitter::emit(const loadzbl
& inst
) {
2050 auto byteVal
= m_irb
.CreateLoad(emitPtr(inst
.s
, 8));
2051 defineValue(inst
.d
, m_irb
.CreateZExt(byteVal
, m_int32
));
2054 void LLVMEmitter::emit(const loadzbq
& inst
) {
2055 auto byteVal
= m_irb
.CreateLoad(emitPtr(inst
.s
, 8));
2056 defineValue(inst
.d
, m_irb
.CreateZExt(byteVal
, m_int64
));
2059 void LLVMEmitter::emit(const loadzlq
& inst
) {
2060 auto val
= m_irb
.CreateLoad(emitPtr(inst
.s
, 32));
2061 defineValue(inst
.d
, m_irb
.CreateZExt(val
, m_int64
));
2064 // loadqp/leap are intended to be rip-relative instructions, but that's not
2065 // necessary for correctness. Depending on the target of the load, it may be
2066 // needed to work with code relocation - see t5662452 for details.
2067 void LLVMEmitter::emit(const loadqp
& inst
) {
2068 auto addr
= m_irb
.CreateIntToPtr(cns(inst
.s
.r
.disp
), m_int64Ptr
);
2069 defineValue(inst
.d
, m_irb
.CreateLoad(addr
));
2072 void LLVMEmitter::emit(const leap
& inst
) {
2073 defineValue(inst
.d
, cns(inst
.s
.r
.disp
));
2076 void LLVMEmitter::emit(const movb
& inst
) {
2077 defineValue(inst
.d
, value(inst
.s
));
2080 void LLVMEmitter::emit(const movl
& inst
) {
2081 defineValue(inst
.d
, value(inst
.s
));
2084 void LLVMEmitter::emit(const movzbl
& inst
) {
2085 defineValue(inst
.d
, m_irb
.CreateZExt(value(inst
.s
), m_int32
));
2088 void LLVMEmitter::emit(const movzbq
& inst
) {
2089 defineValue(inst
.d
, m_irb
.CreateZExt(value(inst
.s
), m_int64
));
2092 void LLVMEmitter::emit(const mulsd
& inst
) {
2093 defineValue(inst
.d
, m_irb
.CreateFMul(asDbl(value(inst
.s0
)),
2094 asDbl(value(inst
.s1
))));
2097 void LLVMEmitter::emit(const mul
& inst
) {
2098 defineValue(inst
.d
, m_irb
.CreateFMul(asDbl(value(inst
.s0
)),
2099 asDbl(value(inst
.s1
))));
2102 void LLVMEmitter::emit(const neg
& inst
) {
2103 defineValue(inst
.d
, m_irb
.CreateSub(m_int64Zero
, value(inst
.s
)));
2106 void LLVMEmitter::emit(const nop
& inst
) {
2109 void LLVMEmitter::emit(const not& inst
) {
2110 defineValue(inst
.d
, m_irb
.CreateXor(value(inst
.s
), cns(int64_t{-1})));
2113 void LLVMEmitter::emit(const orq
& inst
) {
2114 defineValue(inst
.d
, m_irb
.CreateOr(value(inst
.s0
), value(inst
.s1
)));
2117 void LLVMEmitter::emit(const orqi
& inst
) {
2118 defineValue(inst
.d
, m_irb
.CreateOr(cns(inst
.s0
.q()), value(inst
.s1
)));
2121 void LLVMEmitter::emit(const orqim
& inst
) {
2122 auto ptr
= emitPtr(inst
.m
, 64);
2123 auto value
= m_irb
.CreateOr(cns(inst
.s0
.q()), m_irb
.CreateLoad(ptr
));
2124 defineFlagTmp(inst
.sf
, value
);
2125 m_irb
.CreateStore(value
, ptr
);
2128 void LLVMEmitter::emit(const phijmp
& inst
) {
2129 m_phiInfos
[block(inst
.target
)].phij(*this, m_irb
.GetInsertBlock(),
2130 m_unit
.tuples
[inst
.uses
]);
2131 m_irb
.CreateBr(block(inst
.target
));
2134 void LLVMEmitter::emit(const phijcc
& inst
) {
2135 auto curBlock
= m_irb
.GetInsertBlock();
2136 auto next
= block(inst
.targets
[0]);
2137 auto taken
= block(inst
.targets
[1]);
2138 auto& uses
= m_unit
.tuples
[inst
.uses
];
2140 m_phiInfos
[next
].phij(*this, curBlock
, uses
);
2141 m_phiInfos
[taken
].phij(*this, curBlock
, uses
);
2143 auto cond
= emitCmpForCC(inst
.sf
, inst
.cc
);
2144 m_irb
.CreateCondBr(cond
, taken
, next
);
2147 void LLVMEmitter::emit(const phidef
& inst
) {
2148 const VregList
& defs
= m_unit
.tuples
[inst
.defs
];
2149 auto block
= m_irb
.GetInsertBlock();
2150 m_phiInfos
.at(block
).phidef(*this, block
, defs
);
2153 llvm::Value
* LLVMEmitter::getReturnAddress() {
2154 if (!m_llvmReturnAddress
) {
2155 m_llvmReturnAddress
=
2156 llvm::Intrinsic::getDeclaration(m_module
.get(),
2157 llvm::Intrinsic::returnaddress
);
2159 auto rac
= m_irb
.CreateCall(m_llvmReturnAddress
, m_int32Zero
);
2160 rac
->setCallingConv(llvm::CallingConv::C
);
2161 return m_irb
.CreatePtrToInt(rac
, m_int64
, "retaddr");
2164 UNUSED
llvm::Value
* LLVMEmitter::getFrameAddress() {
2165 if (!m_llvmFrameAddress
) {
2166 m_llvmFrameAddress
=
2167 llvm::Intrinsic::getDeclaration(m_module
.get(),
2168 llvm::Intrinsic::frameaddress
);
2170 auto call
= m_irb
.CreateCall(m_llvmFrameAddress
, m_int32Zero
, "framep");
2171 call
->setCallingConv(llvm::CallingConv::C
);
2172 return m_irb
.CreatePtrToInt(call
, m_int64
, "frameaddress");
2175 UNUSED
llvm::Value
* LLVMEmitter::getStackPointer() {
2176 if (!m_llvmReadRegister
) {
2177 m_llvmReadRegister
=
2178 llvm::Intrinsic::getDeclaration(m_module
.get(),
2179 llvm::Intrinsic::read_register
,
2183 llvm::MDNode::get(m_context
, llvm::MDString::get(m_context
, "rsp"));
2184 auto call
= m_irb
.CreateCall(m_llvmReadRegister
, metadata
, "rspcall");
2185 call
->setCallingConv(llvm::CallingConv::C
);
2189 UNUSED
void LLVMEmitter::emitAsm(const std::string
& asmStatement
,
2190 const std::string
& asmConstraints
,
2191 bool hasSideEffects
) {
2192 auto const funcType
=
2193 llvm::FunctionType::get(m_irb
.getVoidTy(), false);
2194 auto const iasm
= llvm::InlineAsm::get(funcType
, asmStatement
, asmConstraints
,
2196 auto call
= m_irb
.CreateCall(iasm
, "");
2197 call
->setCallingConv(llvm::CallingConv::C
);
2200 void LLVMEmitter::emit(const ldretaddr
& inst
) {
2201 auto const ptr
= m_irb
.CreateBitCast(emitPtr(inst
.s
, 8),
2202 ptrType(ptrType(m_traceletFnTy
)),
2204 defineValue(inst
.d
, m_irb
.CreateLoad(ptr
));
2207 void LLVMEmitter::emit(const movretaddr
& inst
) {
2208 defineValue(inst
.d
, m_irb
.CreatePtrToInt(value(inst
.s
), m_int64
));
2211 void LLVMEmitter::emit(const retctrl
& inst
) {
2212 // "Return" with a tail call to the loaded address
2213 emitTraceletTailCall(value(inst
.s
));
2216 void LLVMEmitter::emit(const absdbl
& inst
) {
2218 m_fabs
= llvm::Intrinsic::getDeclaration(
2220 llvm::Intrinsic::fabs
,
2221 std::vector
<llvm::Type
*>{m_irb
.getDoubleTy()}
2224 defineValue(inst
.d
, m_irb
.CreateCall(m_fabs
, asDbl(value(inst
.s
))));
2227 void LLVMEmitter::emit(const roundsd
& inst
) {
2230 case RoundDirection::nearest
:
2231 return llvm::Intrinsic::round
;
2233 case RoundDirection::floor
:
2234 return llvm::Intrinsic::floor
;
2236 case RoundDirection::ceil
:
2237 return llvm::Intrinsic::ceil
;
2239 case RoundDirection::truncate
:
2240 return llvm::Intrinsic::trunc
;
2245 auto func
= llvm::Intrinsic::getDeclaration(m_module
.get(), roundID
);
2246 defineValue(inst
.d
, m_irb
.CreateCall(func
, value(inst
.s
)));
2249 void LLVMEmitter::emit(const srem
& inst
) {
2250 defineValue(inst
.d
, m_irb
.CreateSRem(value(inst
.s0
), value(inst
.s1
)));
2253 void LLVMEmitter::emit(const sar
& inst
) {
2254 defineValue(inst
.d
, m_irb
.CreateAShr(value(inst
.s1
), value(inst
.s0
)));
2257 void LLVMEmitter::emit(const sarqi
& inst
) {
2258 defineValue(inst
.d
, m_irb
.CreateAShr(value(inst
.s1
), inst
.s0
.q()));
2261 void LLVMEmitter::emit(const setcc
& inst
) {
2263 m_irb
.CreateZExt(emitCmpForCC(inst
.sf
, inst
.cc
), m_int8
));
2266 void LLVMEmitter::emit(const shlli
& inst
) {
2267 defineValue(inst
.d
, m_irb
.CreateShl(value(inst
.s1
), inst
.s0
.q()));
2270 void LLVMEmitter::emit(const shl
& inst
) {
2271 defineValue(inst
.d
, m_irb
.CreateShl(value(inst
.s1
), value(inst
.s0
)));
2274 void LLVMEmitter::emit(const shlqi
& inst
) {
2275 defineValue(inst
.d
, m_irb
.CreateShl(value(inst
.s1
), inst
.s0
.q()));
2278 void LLVMEmitter::emit(const shrli
& inst
) {
2279 defineValue(inst
.d
, m_irb
.CreateLShr(value(inst
.s1
), inst
.s0
.q()));
2282 void LLVMEmitter::emit(const shrqi
& inst
) {
2283 defineValue(inst
.d
, m_irb
.CreateLShr(value(inst
.s1
), inst
.s0
.q()));
2286 void LLVMEmitter::emit(const sqrtsd
& inst
) {
2287 auto sqrtFunc
= llvm::Intrinsic::getDeclaration(m_module
.get(),
2288 llvm::Intrinsic::sqrt
);
2289 defineValue(inst
.d
, m_irb
.CreateCall(sqrtFunc
, value(inst
.s
)));
2292 void LLVMEmitter::emit(const store
& inst
) {
2293 auto val
= value(inst
.s
);
2294 assert(val
->getType()->getPrimitiveSizeInBits() == 64);
2295 m_irb
.CreateStore(val
, emitPtr(inst
.d
, ptrType(val
->getType())));
2298 void LLVMEmitter::emit(const storeb
& inst
) {
2299 m_irb
.CreateStore(m_irb
.CreateZExtOrTrunc(value(inst
.s
), m_int8
),
2300 emitPtr(inst
.m
, 8));
2303 void LLVMEmitter::emit(const storebi
& inst
) {
2304 m_irb
.CreateStore(cns(inst
.s
.b()), emitPtr(inst
.m
, 8));
2307 void LLVMEmitter::emit(const storedqu
& inst
) {
2308 // Like loaddqu, this will need to change if we ever use storedqu with values
2309 // that aren't TypedValues.
2310 m_irb
.CreateStore(value(inst
.s
), emitPtr(inst
.m
, ptrType(m_typedValueType
)));
2313 void LLVMEmitter::emit(const storel
& inst
) {
2314 m_irb
.CreateStore(value(inst
.s
), emitPtr(inst
.m
, 32));
2317 void LLVMEmitter::emit(const storeli
& inst
) {
2318 m_irb
.CreateStore(cns(inst
.s
.l()), emitPtr(inst
.m
, 32));
2321 void LLVMEmitter::emit(const storeqi
& inst
) {
2322 m_irb
.CreateStore(cns(inst
.s
.q()), emitPtr(inst
.m
, 64));
2325 void LLVMEmitter::emit(const storesd
& inst
) {
2326 m_irb
.CreateStore(value(inst
.s
),
2327 emitPtr(inst
.m
, ptrType(m_irb
.getDoubleTy())));
2330 void LLVMEmitter::emit(const storew
& inst
) {
2331 m_irb
.CreateStore(value(inst
.s
), emitPtr(inst
.m
, 16));
2334 void LLVMEmitter::emit(const storewi
& inst
) {
2335 m_irb
.CreateStore(cns(inst
.s
.w()), emitPtr(inst
.m
, 16));
2338 void LLVMEmitter::emit(const subbi
& inst
) {
2339 defineValue(inst
.d
, m_irb
.CreateSub(asInt(value(inst
.s1
), 8),
2343 void LLVMEmitter::emit(const subl
& inst
) {
2344 defineValue(inst
.d
, m_irb
.CreateSub(value(inst
.s1
), value(inst
.s0
)));
2347 void LLVMEmitter::emit(const subli
& inst
) {
2348 defineValue(inst
.d
, m_irb
.CreateSub(value(inst
.s1
), cns(inst
.s0
.l())));
2351 void LLVMEmitter::emit(const subq
& inst
) {
2352 defineValue(inst
.d
, m_irb
.CreateSub(value(inst
.s1
), value(inst
.s0
)));
2355 void LLVMEmitter::emit(const subqi
& inst
) {
2356 defineValue(inst
.d
, m_irb
.CreateSub(value(inst
.s1
), cns(inst
.s0
.q())));
2359 void LLVMEmitter::emit(const subsd
& inst
) {
2360 defineValue(inst
.d
, m_irb
.CreateFSub(asDbl(value(inst
.s1
)),
2361 asDbl(value(inst
.s0
))));
2365 * To leave the TC and perform a service request, translated code is supposed
2366 * to execute a ret instruction after populating the right registers. There are
2367 * a number of different ways to do this, but the most straightforward for now
2368 * is to do a tail call with the right calling convention to a stub with a
2369 * single ret instruction. Rather than emitting a dedicated stub for this, we
2370 * just reuse the ret at the end of enterTCHelper().
2372 extern "C" void enterTCReturn();
2374 void LLVMEmitter::emit(const svcreq
& inst
) {
2375 std::vector
<llvm::Value
*> args
{
2379 cns(reinterpret_cast<uintptr_t>(inst
.stub_block
)),
2380 cns(uint64_t{inst
.req
})
2382 for (auto arg
: m_unit
.tuples
[inst
.args
]) {
2383 args
.push_back(value(arg
));
2386 std::vector
<llvm::Type
*> argTypes(args
.size(), m_int64
);
2387 auto funcType
= llvm::FunctionType::get(m_irb
.getVoidTy(), argTypes
, false);
2388 auto func
= emitFuncPtr(folly::to
<std::string
>("enterTCServiceReqLLVM_",
2391 uint64_t(enterTCReturn
));
2392 auto call
= m_irb
.CreateCall(func
, args
);
2393 call
->setCallingConv(llvm::CallingConv::X86_64_HHVM_SR
);
2394 call
->setTailCallKind(llvm::CallInst::TCK_Tail
);
2395 m_irb
.CreateRetVoid();
2398 void LLVMEmitter::emit(const syncvmfp
& inst
) {
2399 // Nothing to do really.
2402 void LLVMEmitter::emit(const syncvmsp
& inst
) {
2403 defineValue(x64::rVmSp
, value(inst
.s
));
2406 void LLVMEmitter::emit(const testb
& inst
) {
2407 auto result
= m_irb
.CreateAnd(asInt(value(inst
.s1
), 8),
2408 asInt(value(inst
.s0
), 8));
2409 defineFlagTmp(inst
.sf
, result
);
2412 void LLVMEmitter::emit(const testbi
& inst
) {
2413 auto result
= m_irb
.CreateAnd(asInt(value(inst
.s1
), 8), inst
.s0
.b());
2414 defineFlagTmp(inst
.sf
, result
);
2417 void LLVMEmitter::emit(const testbim
& inst
) {
2418 auto lhs
= m_irb
.CreateLoad(emitPtr(inst
.s1
, 8));
2419 defineFlagTmp(inst
.sf
, m_irb
.CreateAnd(lhs
, inst
.s0
.b()));
2422 void LLVMEmitter::emit(const testl
& inst
) {
2423 defineFlagTmp(inst
.sf
, m_irb
.CreateAnd(value(inst
.s1
), value(inst
.s0
)));
2426 void LLVMEmitter::emit(const testli
& inst
) {
2427 defineFlagTmp(inst
.sf
, m_irb
.CreateAnd(value(inst
.s1
), inst
.s0
.l()));
2430 void LLVMEmitter::emit(const testlim
& inst
) {
2431 auto lhs
= m_irb
.CreateLoad(emitPtr(inst
.s1
, 32));
2432 defineFlagTmp(inst
.sf
, m_irb
.CreateAnd(lhs
, inst
.s0
.l()));
2435 void LLVMEmitter::emit(const testq
& inst
) {
2436 defineFlagTmp(inst
.sf
, m_irb
.CreateAnd(value(inst
.s1
), value(inst
.s0
)));
2439 void LLVMEmitter::emit(const testqm
& inst
) {
2440 auto lhs
= m_irb
.CreateLoad(emitPtr(inst
.s1
, 64));
2441 defineFlagTmp(inst
.sf
, m_irb
.CreateAnd(lhs
, value(inst
.s0
)));
2444 void LLVMEmitter::emit(const testqim
& inst
) {
2445 auto lhs
= m_irb
.CreateLoad(emitPtr(inst
.s1
, 64));
2446 defineFlagTmp(inst
.sf
, m_irb
.CreateAnd(lhs
, inst
.s0
.q()));
2449 void LLVMEmitter::emit(const ud2
& inst
) {
2453 void LLVMEmitter::emit(const xorb
& inst
) {
2454 defineValue(inst
.d
, m_irb
.CreateXor(value(inst
.s1
), value(inst
.s0
)));
2457 void LLVMEmitter::emit(const xorbi
& inst
) {
2458 defineValue(inst
.d
, m_irb
.CreateXor(value(inst
.s1
), inst
.s0
.b()));
2461 void LLVMEmitter::emit(const xorq
& inst
) {
2462 defineValue(inst
.d
, m_irb
.CreateXor(value(inst
.s1
), value(inst
.s0
)));
2465 void LLVMEmitter::emit(const xorqi
& inst
) {
2466 defineValue(inst
.d
, m_irb
.CreateXor(value(inst
.s1
), inst
.s0
.q()));
2469 void LLVMEmitter::emit(const landingpad
& inst
) {
2470 // This is far from correct, but it's enough to keep the llvm verifier happy
2472 auto pad
= m_irb
.CreateLandingPad(m_typedValueType
, m_personalityFunc
, 0);
2473 pad
->setCleanup(true);
2476 void LLVMEmitter::emitTrap() {
2477 auto trap
= llvm::Intrinsic::getDeclaration(m_module
.get(),
2478 llvm::Intrinsic::trap
);
2479 m_irb
.CreateCall(trap
);
2480 m_irb
.CreateUnreachable();
2483 llvm::Value
* LLVMEmitter::emitPtr(const Vptr s
, llvm::Type
* ptrTy
) {
2485 llvm::cast
<llvm::PointerType
>(ptrTy
)->getAddressSpace() == kFSAddressSpace
;
2486 always_assert(s
.base
!= reg::rsp
);
2488 auto ptr
= s
.base
.isValid() ? asInt(value(s
.base
), 64) : cns(int64_t{0});
2489 auto disp
= cns(int64_t{s
.disp
});
2490 if (s
.index
.isValid()) {
2491 auto scaledIdx
= m_irb
.CreateMul(asInt(value(s
.index
), 64),
2492 cns(int64_t{s
.scale
}),
2494 disp
= m_irb
.CreateAdd(disp
, scaledIdx
, "add");
2496 ptr
= m_irb
.CreateIntToPtr(ptr
, inFS
? m_int8FSPtr
: m_int8Ptr
, "conv");
2497 ptr
= m_irb
.CreateGEP(ptr
, disp
, "getelem");
2499 if (ptrTy
!= m_int8
) {
2500 ptr
= m_irb
.CreateBitCast(ptr
, ptrTy
);
2506 llvm::Value
* LLVMEmitter::emitPtr(const Vptr s
, size_t bits
) {
2507 return emitPtr(s
, ptrIntNType(bits
, s
.seg
== Vptr::FS
));
2510 llvm::Type
* LLVMEmitter::ptrType(llvm::Type
* ty
, unsigned addressSpace
) const {
2511 return llvm::PointerType::get(ty
, addressSpace
);
2514 llvm::Type
* LLVMEmitter::intNType(size_t bits
) const {
2516 default: always_assert(0 && "unsupported bit width");
2517 case 8: return m_int8
;
2518 case 16: return m_int16
;
2519 case 32: return m_int32
;
2520 case 64: return m_int64
;
2524 llvm::Type
* LLVMEmitter::ptrIntNType(size_t bits
, bool inFS
) const {
2526 default: always_assert(0 && "unsupported bit width");
2527 case 8: return inFS
? m_int8FSPtr
: m_int8Ptr
;
2528 case 16: return inFS
? m_int16FSPtr
: m_int16Ptr
;
2529 case 32: return inFS
? m_int32FSPtr
: m_int32Ptr
;
2530 case 64: return inFS
? m_int64FSPtr
: m_int64Ptr
;
2534 std::string
showNewCode(const Vasm::AreaList
& areas
) DEBUG_ONLY
;
2535 std::string
showNewCode(const Vasm::AreaList
& areas
) {
2536 std::ostringstream str
;
2537 Disasm
disasm(Disasm::Options().indent(2));
2539 for (unsigned i
= 0, n
= areas
.size(); i
< n
; ++i
) {
2540 auto& area
= areas
[i
];
2541 auto const start
= area
.start
;
2542 auto const end
= area
.code
.frontier();
2545 str
<< folly::format("emitted {} bytes of code into area {}:\n",
2547 disasm
.disasm(str
, start
, end
);
2555 } // unnamed namespace
2557 void genCodeLLVM(const Vunit
& unit
, Vasm::AreaList
& areas
,
2558 const jit::vector
<Vlabel
>& labels
) {
2559 FTRACE(2, "\nTrying to emit LLVM IR for Vunit:\n{}\n", show(unit
));
2561 jit::vector
<UndoMarker
> undoAll
= {UndoMarker(mcg
->globalData())};
2562 for(auto const& area
: areas
) {
2563 undoAll
.emplace_back(area
.code
);
2567 LLVMEmitter(unit
, areas
).emit(labels
);
2568 FTRACE(3, "\n{:-^80}\n{}\n",
2569 " x64 after LLVM codegen ", showNewCode(areas
));
2570 } catch (const FailedLLVMCodeGen
& e
) {
2572 RuntimeOption::EvalJitLLVM
< 3,
2573 "Mandatory LLVM codegen failed with reason `{}' on unit:\n{}",
2574 e
.what(), show(unit
)
2576 FTRACE(1, "LLVM codegen failed: {}\n", e
.what());
2578 // Undo any code/data we may have allocated.
2579 for(auto& marker
: undoAll
) {
2583 } catch (const std::exception
& e
) {
2584 always_assert_flog(false,
2585 "Unexpected exception during LLVM codegen: {}\n",
2592 #else // #ifdef USE_LLVM
2594 namespace HPHP
{ namespace jit
{
2596 void genCodeLLVM(const Vunit
& unit
, Vasm::AreaList
& areas
,
2597 const jit::vector
<Vlabel
>& labels
) {
2598 throw FailedLLVMCodeGen("This build does not support the LLVM backend");
2603 #endif // #ifdef USE_LLVM