2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/back-end-x64.h"
19 #include "hphp/util/asm-x64.h"
20 #include "hphp/util/disasm.h"
21 #include "hphp/util/text-color.h"
23 #include "hphp/runtime/vm/jit/abi-x64.h"
24 #include "hphp/runtime/vm/jit/block.h"
25 #include "hphp/runtime/vm/jit/check.h"
26 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
27 #include "hphp/runtime/vm/jit/code-gen-x64.h"
28 #include "hphp/runtime/vm/jit/func-prologues-x64.h"
29 #include "hphp/runtime/vm/jit/cfg.h"
30 #include "hphp/runtime/vm/jit/mc-generator.h"
31 #include "hphp/runtime/vm/jit/print.h"
32 #include "hphp/runtime/vm/jit/reg-alloc-x64.h"
33 #include "hphp/runtime/vm/jit/service-requests-inline.h"
34 #include "hphp/runtime/vm/jit/service-requests-x64.h"
35 #include "hphp/runtime/vm/jit/timer.h"
36 #include "hphp/runtime/vm/jit/unique-stubs-x64.h"
37 #include "hphp/runtime/vm/jit/unwind-x64.h"
38 #include "hphp/runtime/vm/jit/vasm-print.h"
39 #include "hphp/runtime/vm/jit/vasm-llvm.h"
41 namespace HPHP
{ namespace jit
{
45 extern "C" void enterTCHelper(Cell
* vm_sp
,
50 void* targetCacheBase
);
56 struct BackEnd
: public jit::BackEnd
{
64 size_t cacheLineSize() override
{
68 PhysReg
rSp() override
{
69 return PhysReg(reg::rsp
);
72 PhysReg
rVmSp() override
{
76 PhysReg
rVmFp() override
{
80 PhysReg
rVmTl() override
{
84 bool storesCell(const IRInstruction
& inst
, uint32_t srcIdx
) override
{
85 return x64::storesCell(inst
, srcIdx
);
88 bool loadsCell(const IRInstruction
& inst
) override
{
89 return x64::loadsCell(inst
.op());
93 * enterTCHelper does not save callee-saved registers except %rbp. This means
94 * when we call it from C++, we have to tell gcc to clobber all the other
95 * callee-saved registers.
97 #define CALLEE_SAVED_BARRIER() \
98 asm volatile("" : : : "rbx", "r12", "r13", "r14", "r15");
101 * enterTCHelper is a handwritten assembly function that transfers control in
104 static_assert(x64::rVmSp
== rbx
&&
107 x64::rStashedAR
== r15
,
108 "__enterTCHelper needs to be modified to use the correct ABI");
109 static_assert(REQ_BIND_CALL
== 0x1,
110 "Update assembly test for REQ_BIND_CALL in __enterTCHelper");
112 void enterTCHelper(TCA start
, TReqInfo
& info
) override
{
113 // We have to force C++ to spill anything that might be in a callee-saved
114 // register (aside from rbp). enterTCHelper does not save them.
115 CALLEE_SAVED_BARRIER();
116 auto& regs
= vmRegsUnsafe();
117 jit::enterTCHelper(regs
.stack
.top(), regs
.fp
, start
,
118 &info
, vmFirstAR(), RDS::tl_base
);
119 CALLEE_SAVED_BARRIER();
122 void moveToAlign(CodeBlock
& cb
,
123 MoveToAlignFlags alignment
124 = MoveToAlignFlags::kJmpTargetAlign
) override
{
128 case MoveToAlignFlags::kJmpTargetAlign
:
129 x64Alignment
= kJmpTargetAlign
;
131 case MoveToAlignFlags::kNonFallthroughAlign
:
132 x64Alignment
= jit::kNonFallthroughAlign
;
134 case MoveToAlignFlags::kCacheLineAlign
:
135 x64Alignment
= kCacheLineSize
;
138 x64::moveToAlign(cb
, x64Alignment
);
141 UniqueStubs
emitUniqueStubs() override
{
142 return x64::emitUniqueStubs();
145 TCA
emitServiceReqWork(CodeBlock
& cb
, TCA start
, SRFlags flags
,
147 const ServiceReqArgVec
& argv
) override
{
148 return x64::emitServiceReqWork(cb
, start
, flags
, req
, argv
);
151 void emitInterpReq(CodeBlock
& mainCode
, CodeBlock
& coldCode
,
152 SrcKey sk
) override
{
154 // Add a counter for the translation if requested
155 if (RuntimeOption::EvalJitTransCounters
) {
156 x64::emitTransCounterInc(a
);
158 a
. jmp(emitServiceReq(coldCode
, REQ_INTERPRET
, sk
.offset()));
161 bool funcPrologueHasGuard(TCA prologue
, const Func
* func
) override
{
162 return x64::funcPrologueHasGuard(prologue
, func
);
165 TCA
funcPrologueToGuard(TCA prologue
, const Func
* func
) override
{
166 return x64::funcPrologueToGuard(prologue
, func
);
169 SrcKey
emitFuncPrologue(CodeBlock
& mainCode
, CodeBlock
& coldCode
, Func
* func
,
170 bool funcIsMagic
, int nPassed
, TCA
& start
,
171 TCA
& aStart
) override
{
173 ? x64::emitMagicFuncPrologue(func
, nPassed
, start
)
174 : x64::emitFuncPrologue(func
, nPassed
, start
);
177 TCA
emitCallArrayPrologue(Func
* func
, DVFuncletsVec
& dvs
) override
{
178 return x64::emitCallArrayPrologue(func
, dvs
);
181 void funcPrologueSmashGuard(TCA prologue
, const Func
* func
) override
{
182 x64::funcPrologueSmashGuard(prologue
, func
);
185 void emitIncStat(CodeBlock
& cb
, intptr_t disp
, int n
) override
{
186 X64Assembler a
{ cb
};
189 // addq $n, [%fs:disp]
190 a
. fs().addq(n
, baseless(disp
));
194 void emitTraceCall(CodeBlock
& cb
, Offset pcOff
) override
{
195 x64::emitTraceCall(cb
, pcOff
);
198 bool isSmashable(Address frontier
, int nBytes
, int offset
= 0) override
{
199 assert(nBytes
<= int(kCacheLineSize
));
200 uintptr_t iFrontier
= uintptr_t(frontier
) + offset
;
201 uintptr_t lastByte
= uintptr_t(frontier
) + nBytes
- 1;
202 return (iFrontier
& ~kCacheLineMask
) == (lastByte
& ~kCacheLineMask
);
206 void prepareForSmashImpl(CodeBlock
& cb
, int nBytes
, int offset
) {
207 if (!isSmashable(cb
.frontier(), nBytes
, offset
)) {
208 X64Assembler a
{ cb
};
209 int gapSize
= (~(uintptr_t(a
.frontier()) + offset
) & kCacheLineMask
) + 1;
211 assert(isSmashable(a
.frontier(), nBytes
, offset
));
216 void prepareForSmash(CodeBlock
& cb
, int nBytes
, int offset
= 0) override
{
217 prepareForSmashImpl(cb
, nBytes
, offset
);
218 mcg
->cgFixups().m_alignFixups
.emplace(cb
.frontier(),
219 std::make_pair(nBytes
, offset
));
222 void prepareForTestAndSmash(CodeBlock
& cb
, int testBytes
,
223 TestAndSmashFlags flags
) override
{
226 case TestAndSmashFlags::kAlignJcc
:
227 prepareForSmash(cb
, testBytes
+ kJmpccLen
, testBytes
);
228 assert(isSmashable(cb
.frontier() + testBytes
, kJmpccLen
));
230 case TestAndSmashFlags::kAlignJccImmediate
:
232 testBytes
+ kJmpccLen
,
233 testBytes
+ kJmpccLen
- kJmpImmBytes
);
234 assert(isSmashable(cb
.frontier() + testBytes
, kJmpccLen
,
235 kJmpccLen
- kJmpImmBytes
));
237 case TestAndSmashFlags::kAlignJccAndJmp
:
238 // Ensure that the entire jcc, and the entire jmp are smashable
239 // (but we dont need them both to be in the same cache line)
240 prepareForSmashImpl(cb
, testBytes
+ kJmpccLen
, testBytes
);
241 prepareForSmashImpl(cb
, testBytes
+ kJmpccLen
+ kJmpLen
,
242 testBytes
+ kJmpccLen
);
243 mcg
->cgFixups().m_alignFixups
.emplace(
244 cb
.frontier(), std::make_pair(testBytes
+ kJmpccLen
, testBytes
));
245 mcg
->cgFixups().m_alignFixups
.emplace(
246 cb
.frontier(), std::make_pair(testBytes
+ kJmpccLen
+ kJmpLen
,
247 testBytes
+ kJmpccLen
));
248 assert(isSmashable(cb
.frontier() + testBytes
, kJmpccLen
));
249 assert(isSmashable(cb
.frontier() + testBytes
+ kJmpccLen
, kJmpLen
));
254 bool supportsRelocation() const override
{
258 typedef hphp_hash_set
<void*> WideJmpSet
;
259 struct JmpOutOfRange
: std::exception
{};
261 size_t relocate(RelocationInfo
& rel
,
262 CodeBlock
& destBlock
,
264 CodeGenFixups
& fixups
,
265 TCA
* exitAddr
) override
{
269 return relocateImpl(rel
, destBlock
, start
, end
,
270 fixups
, exitAddr
, wideJmps
);
271 } catch (JmpOutOfRange
& j
) {
276 size_t relocateImpl(RelocationInfo
& rel
,
277 CodeBlock
& destBlock
,
279 CodeGenFixups
& fixups
,
281 WideJmpSet
& wideJmps
) {
283 size_t range
= end
- src
;
284 bool hasInternalRefs
= false;
285 bool internalRefsNeedUpdating
= false;
286 TCA destStart
= destBlock
.frontier();
288 TCA jmpDest
= nullptr;
289 TCA keepNopLow
= nullptr;
290 TCA keepNopHigh
= nullptr;
294 DecodedInstruction
di(src
);
298 auto af
= fixups
.m_alignFixups
.equal_range(src
);
299 while (af
.first
!= af
.second
) {
300 auto low
= src
+ af
.first
->second
.second
;
301 auto hi
= src
+ af
.first
->second
.first
;
303 if (!keepNopLow
|| keepNopLow
> low
) keepNopLow
= low
;
304 if (!keepNopHigh
|| keepNopHigh
< hi
) keepNopHigh
= hi
;
305 TCA tmp
= destBlock
.frontier();
306 prepareForSmashImpl(destBlock
,
307 af
.first
->second
.first
, af
.first
->second
.second
);
308 if (destBlock
.frontier() != tmp
) {
309 destRange
+= destBlock
.frontier() - tmp
;
310 internalRefsNeedUpdating
= true;
315 bool preserveAlignment
= keepNopLow
&& keepNopHigh
&&
316 keepNopLow
<= src
&& keepNopHigh
> src
;
317 TCA target
= nullptr;
318 TCA dest
= destBlock
.frontier();
319 destBlock
.bytes(di
.size(), src
);
320 DecodedInstruction
d2(dest
);
321 if (di
.hasPicOffset()) {
322 if (di
.isBranch(false)) {
323 target
= di
.picAddress();
326 * Rip-relative offsets that point outside the range
327 * being moved need to be adjusted so they continue
328 * to point at the right thing
330 if (size_t(di
.picAddress() - start
) >= range
) {
331 bool DEBUG_ONLY success
= d2
.setPicAddress(di
.picAddress());
334 if (!preserveAlignment
&& d2
.isBranch()) {
335 if (wideJmps
.count(src
)) {
336 if (d2
.size() < kJmpLen
) {
338 internalRefsNeedUpdating
= true;
340 } else if (d2
.shrinkBranch()) {
341 internalRefsNeedUpdating
= true;
344 hasInternalRefs
= true;
347 if (di
.hasImmediate()) {
348 if (fixups
.m_addressImmediates
.count(src
)) {
349 if (size_t(di
.immediate() - (uint64_t)start
) < range
) {
350 hasInternalRefs
= internalRefsNeedUpdating
= true;
353 if (fixups
.m_addressImmediates
.count((TCA
)~uintptr_t(src
))) {
354 // Handle weird, encoded offset, used by cgLdObjMethod
355 always_assert(di
.immediate() == ((uintptr_t(src
) << 1) | 1));
356 bool DEBUG_ONLY success
=
357 d2
.setImmediate(((uintptr_t)dest
<< 1) | 1);
361 * An immediate that points into the range being moved, but which
362 * isn't tagged as an addressImmediate, is most likely a bug
363 * and its instruction's address needs to be put into
364 * fixups.m_addressImmediates. But it could just happen by bad
365 * luck, so just log it.
367 if (size_t(di
.immediate() - (uint64_t)start
) < range
) {
369 "relocate: instruction at {} has immediate 0x{:x}"
370 "which looks like an address that needs relocating\n",
371 src
, di
.immediate());
377 // for the start of the range, we only want to overwrite the "after"
378 // address (since the "before" address could belong to the previous
379 // tracelet, which could be being relocated to a completely different
380 // address. recordRange will do that for us, so just make sure we
381 // have the right address setup.
384 rel
.recordAddress(src
, dest
- destRange
, destRange
);
386 if (preserveAlignment
&& di
.size() == kJmpLen
&&
387 di
.isNop() && src
+ kJmpLen
== end
) {
388 smashJmp(dest
, src
+ kJmpLen
);
390 } else if (di
.isNop() && !preserveAlignment
) {
391 internalRefsNeedUpdating
= true;
396 assert(dest
<= destBlock
.frontier());
397 destBlock
.setFrontier(dest
);
399 if (keepNopHigh
&& src
>= keepNopHigh
) {
400 keepNopLow
= keepNopHigh
= nullptr;
408 rel
.recordRange(start
, end
, destStart
, destBlock
.frontier());
410 if (hasInternalRefs
&& internalRefsNeedUpdating
) {
414 DecodedInstruction
di(src
);
415 TCA newPicAddress
= nullptr;
416 int64_t newImmediate
= 0;
417 if (di
.hasPicOffset() &&
418 size_t(di
.picAddress() - start
) < range
) {
419 newPicAddress
= rel
.adjustedAddressAfter(di
.picAddress());
420 always_assert(newPicAddress
);
422 if (di
.hasImmediate() &&
423 size_t((TCA
)di
.immediate() - start
) < range
&&
424 fixups
.m_addressImmediates
.count(src
)) {
426 (int64_t)rel
.adjustedAddressAfter((TCA
)di
.immediate());
427 always_assert(newImmediate
);
429 if (newImmediate
|| newPicAddress
) {
430 TCA dest
= rel
.adjustedAddressAfter(src
);
431 DecodedInstruction
d2(dest
);
433 if (!d2
.setPicAddress(newPicAddress
)) {
434 always_assert(d2
.isBranch() && d2
.size() == 2);
435 wideJmps
.insert(src
);
440 if (!d2
.setImmediate(newImmediate
)) {
441 always_assert(false);
448 throw JmpOutOfRange();
451 rel
.markAddressImmediates(fixups
.m_addressImmediates
);
453 rel
.rewind(start
, end
);
454 destBlock
.setFrontier(destStart
);
460 template <typename T
>
461 void fixupStateVector(StateVector
<T
, TcaRange
>& sv
, RelocationInfo
& rel
) {
462 for (auto& ii
: sv
) {
465 * We have to be careful with before/after here.
466 * If we relocate two consecutive regions of memory,
467 * but relocate them to two different destinations, then
468 * the end address of the first region is also the start
469 * address of the second region; so adjustedAddressBefore(end)
470 * gives us the relocated address of the end of the first
471 * region, while adjustedAddressAfter(end) gives us the
472 * relocated address of the start of the second region.
474 auto s
= rel
.adjustedAddressAfter(ii
.begin());
475 auto e
= rel
.adjustedAddressBefore(ii
.end());
477 if (!s
) s
= ii
.begin();
478 if (!e
) e
= ii
.end();
485 void adjustForRelocation(RelocationInfo
& rel
) override
{
486 for (const auto& range
: rel
.srcRanges()) {
487 adjustForRelocation(rel
, range
.first
, range
.second
);
491 void adjustForRelocation(RelocationInfo
& rel
,
492 TCA srcStart
, TCA srcEnd
) override
{
493 auto start
= rel
.adjustedAddressAfter(srcStart
);
494 auto end
= rel
.adjustedAddressBefore(srcEnd
);
501 while (start
!= end
) {
503 DecodedInstruction
di(start
);
505 if (di
.hasPicOffset()) {
507 * A pointer into something that has been relocated needs to be
510 if (TCA adjusted
= rel
.adjustedAddressAfter(di
.picAddress())) {
511 di
.setPicAddress(adjusted
);
515 if (di
.hasImmediate()) {
517 * Similarly for addressImmediates - and see comment above
518 * for non-address immediates.
520 if (TCA adjusted
= rel
.adjustedAddressAfter((TCA
)di
.immediate())) {
521 if (rel
.isAddressImmediate(start
)) {
522 di
.setImmediate((int64_t)adjusted
);
525 "relocate: instruction at {} has immediate 0x{:x}"
526 "which looks like an address that needs relocating\n",
527 start
, di
.immediate());
534 if (start
== end
&& di
.isNop() &&
535 di
.size() == kJmpLen
&&
536 rel
.adjustedAddressAfter(srcEnd
)) {
538 smashJmp(start
- di
.size(), rel
.adjustedAddressAfter(end
));
544 * Adjusts the addresses in asmInfo and fixups to match the new
545 * location of the code.
546 * This will not "hook up" the relocated code in any way, so is safe
547 * to call before the relocated code is ready to run.
549 void adjustMetaDataForRelocation(RelocationInfo
& rel
,
551 CodeGenFixups
& fixups
) override
{
552 auto& ip
= fixups
.m_inProgressTailJumps
;
553 for (size_t i
= 0; i
< ip
.size(); ++i
) {
554 IncomingBranch
& ib
= const_cast<IncomingBranch
&>(ip
[i
]);
555 TCA adjusted
= rel
.adjustedAddressAfter(ib
.toSmash());
556 always_assert(adjusted
);
560 for (auto& fixup
: fixups
.m_pendingFixups
) {
562 * Pending fixups always point after the call instruction,
563 * so use the "before" address, since there may be nops
564 * before the next actual instruction.
566 if (TCA adjusted
= rel
.adjustedAddressBefore(fixup
.m_tca
)) {
567 fixup
.m_tca
= adjusted
;
571 for (auto& ct
: fixups
.m_pendingCatchTraces
) {
573 * Similar to fixups - this is a return address so get
574 * the address returned to.
576 if (CTCA adjusted
= rel
.adjustedAddressBefore(ct
.first
)) {
580 * But the target is an instruction, so skip over any nops
581 * that might have been inserted (eg for alignment).
583 if (TCA adjusted
= rel
.adjustedAddressAfter(ct
.second
)) {
584 ct
.second
= adjusted
;
588 for (auto& jt
: fixups
.m_pendingJmpTransIDs
) {
589 if (TCA adjusted
= rel
.adjustedAddressAfter(jt
.first
)) {
595 * Most of the time we want to adjust to a corresponding "before" address
596 * with the exception of the start of the range where "before" can point to
597 * the end of a previous range.
599 if (!fixups
.m_bcMap
.empty()) {
600 auto const aStart
= fixups
.m_bcMap
[0].aStart
;
601 auto const acoldStart
= fixups
.m_bcMap
[0].acoldStart
;
602 auto const afrozenStart
= fixups
.m_bcMap
[0].afrozenStart
;
603 for (auto& tbc
: fixups
.m_bcMap
) {
604 if (TCA adjusted
= (tbc
.aStart
== aStart
605 ? rel
.adjustedAddressAfter(aStart
)
606 : rel
.adjustedAddressBefore(tbc
.aStart
))) {
607 tbc
.aStart
= adjusted
;
609 if (TCA adjusted
= (tbc
.acoldStart
== acoldStart
610 ? rel
.adjustedAddressAfter(acoldStart
)
611 : rel
.adjustedAddressBefore(tbc
.acoldStart
))) {
612 tbc
.acoldStart
= adjusted
;
614 if (TCA adjusted
= (tbc
.afrozenStart
== afrozenStart
615 ? rel
.adjustedAddressAfter(afrozenStart
)
616 : rel
.adjustedAddressBefore(tbc
.afrozenStart
))) {
617 tbc
.afrozenStart
= adjusted
;
622 decltype(fixups
.m_addressImmediates
) updatedAI
;
623 for (auto addrImm
: fixups
.m_addressImmediates
) {
624 if (TCA adjusted
= rel
.adjustedAddressAfter(addrImm
)) {
625 updatedAI
.insert(adjusted
);
626 } else if (TCA odd
= rel
.adjustedAddressAfter((TCA
)~uintptr_t(addrImm
))) {
627 // just for cgLdObjMethod
628 updatedAI
.insert((TCA
)~uintptr_t(odd
));
630 updatedAI
.insert(addrImm
);
633 updatedAI
.swap(fixups
.m_addressImmediates
);
635 decltype(fixups
.m_alignFixups
) updatedAF
;
636 for (auto af
: fixups
.m_alignFixups
) {
637 if (TCA adjusted
= rel
.adjustedAddressAfter(af
.first
)) {
638 updatedAF
.emplace(adjusted
, af
.second
);
640 updatedAF
.emplace(af
);
643 updatedAF
.swap(fixups
.m_alignFixups
);
646 fixupStateVector(asmInfo
->asmInstRanges
, rel
);
647 fixupStateVector(asmInfo
->asmBlockRanges
, rel
);
648 fixupStateVector(asmInfo
->coldInstRanges
, rel
);
649 fixupStateVector(asmInfo
->coldBlockRanges
, rel
);
650 fixupStateVector(asmInfo
->frozenInstRanges
, rel
);
651 fixupStateVector(asmInfo
->frozenBlockRanges
, rel
);
655 void adjustCodeForRelocation(RelocationInfo
& rel
,
656 CodeGenFixups
& fixups
) override
{
657 for (auto addr
: fixups
.m_reusedStubs
) {
659 * The stubs are terminated by a ud2. Check for it.
661 while (addr
[0] != 0x0f || addr
[1] != 0x0b) {
662 DecodedInstruction
di(addr
);
663 if (di
.hasPicOffset()) {
664 if (TCA adjusted
= rel
.adjustedAddressAfter(di
.picAddress())) {
665 di
.setPicAddress(adjusted
);
672 for (auto codePtr
: fixups
.m_codePointers
) {
673 if (TCA adjusted
= rel
.adjustedAddressAfter(*codePtr
)) {
680 void smashJmpOrCall(TCA addr
, TCA dest
, bool isCall
) {
681 // Unconditional rip-relative jmps can also be encoded with an EB as the
682 // first byte, but that means the delta is 1 byte, and we shouldn't be
683 // encoding smashable jumps that way.
684 assert(kJmpLen
== kCallLen
);
686 // XXX The LLVM check here is terrible and awful and temporary until we fix
687 // llvm's smashable tail call support: t5742980. For now it just means it's
688 // not safe to run multiple PHP threads when LLVM is enabled.
689 always_assert(RuntimeOption::EvalJitLLVM
||
690 isSmashable(addr
, x64::kJmpLen
));
692 auto& cb
= mcg
->code
.blockFor(addr
);
693 CodeCursor cursor
{ cb
, addr
};
694 X64Assembler a
{ cb
};
695 if (dest
> addr
&& dest
- addr
<= x64::kJmpLen
) {
697 a
. emitNop(dest
- addr
);
706 void smashJmp(TCA jmpAddr
, TCA newDest
) override
{
707 assert(MCGenerator::canWrite());
708 FTRACE(2, "smashJmp: {} -> {}\n", jmpAddr
, newDest
);
709 smashJmpOrCall(jmpAddr
, newDest
, false);
712 void smashCall(TCA callAddr
, TCA newDest
) override
{
713 assert(MCGenerator::canWrite());
714 FTRACE(2, "smashCall: {} -> {}\n", callAddr
, newDest
);
715 smashJmpOrCall(callAddr
, newDest
, true);
718 void smashJcc(TCA jccAddr
, TCA newDest
) override
{
719 assert(MCGenerator::canWrite());
720 FTRACE(2, "smashJcc: {} -> {}\n", jccAddr
, newDest
);
721 // Make sure the encoding is what we expect. It has to be a rip-relative jcc
722 // with a 4-byte delta.
723 assert(*jccAddr
== 0x0F && (*(jccAddr
+ 1) & 0xF0) == 0x80);
724 assert(isSmashable(jccAddr
, x64::kJmpccLen
));
726 // Can't use the assembler to write out a new instruction, because we have
727 // to preserve the condition code.
728 auto newDelta
= safe_cast
<int32_t>(newDest
- jccAddr
- x64::kJmpccLen
);
729 auto deltaAddr
= reinterpret_cast<int32_t*>(jccAddr
731 - x64::kJmpImmBytes
);
732 *deltaAddr
= newDelta
;
735 void emitSmashableJump(CodeBlock
& cb
, TCA dest
, ConditionCode cc
) override
{
736 X64Assembler a
{ cb
};
738 assert(isSmashable(cb
.frontier(), x64::kJmpLen
));
741 assert(isSmashable(cb
.frontier(), x64::kJmpccLen
));
746 TCA
smashableCallFromReturn(TCA retAddr
) override
{
747 auto addr
= retAddr
- x64::kCallLen
;
748 assert(isSmashable(addr
, x64::kCallLen
));
752 void emitSmashableCall(CodeBlock
& cb
, TCA dest
) override
{
753 X64Assembler a
{ cb
};
754 assert(isSmashable(cb
.frontier(), x64::kCallLen
));
758 TCA
jmpTarget(TCA jmp
) override
{
759 if (jmp
[0] != 0xe9) {
760 if (jmp
[0] == 0x0f &&
768 return jmp
+ 5 + ((int32_t*)(jmp
+ 5))[-1];
771 TCA
jccTarget(TCA jmp
) override
{
772 if (jmp
[0] != 0x0F || (jmp
[1] & 0xF0) != 0x80) return nullptr;
773 return jmp
+ 6 + ((int32_t*)(jmp
+ 6))[-1];
776 TCA
callTarget(TCA call
) override
{
777 if (call
[0] != 0xE8) return nullptr;
778 return call
+ 5 + ((int32_t*)(call
+ 5))[-1];
781 void addDbgGuard(CodeBlock
& codeMain
, CodeBlock
& codeCold
,
782 SrcKey sk
, size_t dbgOff
) override
{
785 // Emit the checks for debugger attach
787 emitTLSLoad
<ThreadInfo
>(a
, ThreadInfo::s_threadInfo
, rtmp
);
788 a
. loadb (rtmp
[dbgOff
], rbyte(rtmp
));
789 a
. testb ((int8_t)0xff, rbyte(rtmp
));
791 // Branch to a special REQ_INTERPRET if attached
792 auto const fallback
=
793 emitServiceReq(codeCold
, REQ_INTERPRET
, sk
.offset());
797 void streamPhysReg(std::ostream
& os
, PhysReg reg
) override
{
798 auto name
= (reg
.type() == PhysReg::GP
) ? reg::regname(Reg64(reg
)) :
799 (reg
.type() == PhysReg::SIMD
) ? reg::regname(RegXMM(reg
)) :
800 /* (reg.type() == PhysReg::SF) ? */ reg::regname(RegSF(reg
));
804 void disasmRange(std::ostream
& os
, int indent
, bool dumpIR
, TCA begin
,
806 Disasm
disasm(Disasm::Options().indent(indent
+ 4)
807 .printEncoding(dumpIR
)
808 .color(color(ANSI_COLOR_BROWN
)));
809 disasm
.disasm(os
, begin
, end
);
812 void genCodeImpl(IRUnit
& unit
, AsmInfo
*) override
;
815 std::unique_ptr
<jit::BackEnd
> newBackEnd() {
816 return folly::make_unique
<BackEnd
>();
819 static size_t genBlock(CodegenState
& state
, Vout
& v
, Vout
& vc
, Block
* block
) {
820 FTRACE(6, "genBlock: {}\n", block
->id());
821 CodeGenerator
cg(state
, v
, vc
);
822 size_t hhir_count
{0};
823 for (IRInstruction
& inst
: *block
) {
825 if (inst
.is(EndGuards
)) state
.pastGuards
= true;
833 auto const vasm_gp
= x64::abi
.gpUnreserved
| RegSet(rAsm
).add(r11
);
834 auto const vasm_simd
= x64::kXMMRegs
;
835 UNUSED
const Abi vasm_abi
{
836 .gpUnreserved
= vasm_gp
,
837 .gpReserved
= x64::abi
.gp() - vasm_gp
,
838 .simdUnreserved
= vasm_simd
,
839 .simdReserved
= x64::abi
.simd() - vasm_simd
,
840 .calleeSaved
= x64::kCalleeSaved
,
844 void BackEnd::genCodeImpl(IRUnit
& unit
, AsmInfo
* asmInfo
) {
845 Timer
_t(Timer::codeGen
);
846 CodeBlock
& mainCodeIn
= mcg
->code
.main();
847 CodeBlock
& coldCodeIn
= mcg
->code
.cold();
848 CodeBlock
* frozenCode
= &mcg
->code
.frozen();
852 const bool useLLVM
= mcg
->useLLVM();
853 bool relocate
= false;
855 RuntimeOption::EvalJitRelocationSize
&&
856 supportsRelocation() &&
857 coldCodeIn
.canEmit(RuntimeOption::EvalJitRelocationSize
* 3)) {
859 * This is mainly to exercise the relocator, and ensure that its
860 * not broken by new non-relocatable code. Later, it will be
861 * used to do some peephole optimizations, such as reducing branch
863 * Allocate enough space that the relocated cold code doesn't
864 * overlap the emitted cold code.
867 static unsigned seed
= 42;
868 auto off
= rand_r(&seed
) & (cacheLineSize() - 1);
869 coldCode
.init(coldCodeIn
.frontier() +
870 RuntimeOption::EvalJitRelocationSize
+ off
,
871 RuntimeOption::EvalJitRelocationSize
- off
, "cgRelocCold");
873 mainCode
.init(coldCode
.frontier() +
874 RuntimeOption::EvalJitRelocationSize
+ off
,
875 RuntimeOption::EvalJitRelocationSize
- off
, "cgRelocMain");
880 * Use separate code blocks, so that attempts to use the mcg's
881 * code blocks directly will fail (eg by overwriting the same
882 * memory being written through these locals).
884 coldCode
.init(coldCodeIn
.frontier(), coldCodeIn
.available(),
885 coldCodeIn
.name().c_str());
886 mainCode
.init(mainCodeIn
.frontier(), mainCodeIn
.available(),
887 mainCodeIn
.name().c_str());
890 if (frozenCode
== &coldCodeIn
) {
891 frozenCode
= &coldCode
;
894 auto frozenStart
= frozenCode
->frontier();
895 auto coldStart DEBUG_ONLY
= coldCodeIn
.frontier();
896 auto mainStart DEBUG_ONLY
= mainCodeIn
.frontier();
897 size_t hhir_count
{0};
901 mcg
->cgFixups().setBlocks(&mainCode
, &coldCode
, frozenCode
);
904 mcg
->cgFixups().setBlocks(nullptr, nullptr, nullptr);
908 if (RuntimeOption::EvalHHIRGenerateAsserts
) {
909 emitTraceCall(mainCode
, unit
.bcOff());
912 CodegenState
state(unit
, asmInfo
, *frozenCode
);
913 auto const blocks
= rpoSortCfg(unit
);
915 auto& vunit
= vasm
.unit();
916 // create the initial set of vasm numbered the same as hhir blocks.
917 for (uint32_t i
= 0, n
= unit
.numBlocks(); i
< n
; ++i
) {
918 state
.labels
[i
] = vunit
.makeBlock(AreaIndex::Main
);
920 // create vregs for all relevant SSATmps
921 assignRegs(unit
, vunit
, state
, blocks
, this);
922 vunit
.entry
= state
.labels
[unit
.entry()];
925 vasm
.frozen(*frozenCode
);
926 for (auto block
: blocks
) {
927 auto& v
= block
->hint() == Block::Hint::Unlikely
? vasm
.cold() :
928 block
->hint() == Block::Hint::Unused
? vasm
.frozen() :
930 FTRACE(6, "genBlock {} on {}\n", block
->id(),
931 area_names
[(unsigned)v
.area()]);
932 auto b
= state
.labels
[block
];
933 vunit
.blocks
[b
].area
= v
.area();
935 hhir_count
+= genBlock(state
, v
, vasm
.cold(), block
);
937 assert(vasm
.main().empty() || vasm
.main().closed());
938 assert(vasm
.cold().empty() || vasm
.cold().closed());
939 assert(vasm
.frozen().empty() || vasm
.frozen().closed());
941 printUnit(kInitialVasmLevel
, "after initial vasm generation", vunit
);
942 assert(check(vunit
));
946 genCodeLLVM(vunit
, vasm
.areas(), sortBlocks(vunit
));
947 } catch (const FailedLLVMCodeGen
& e
) {
948 FTRACE(1, "LLVM codegen failed ({}); falling back to x64 backend\n",
950 vasm
.finishX64(vasm_abi
, state
.asmInfo
);
953 vasm
.finishX64(vasm_abi
, state
.asmInfo
);
957 auto bcMap
= &mcg
->cgFixups().m_bcMap
;
958 if (relocate
&& !bcMap
->empty()) {
959 TRACE(1, "BCMAPS before relocation\n");
960 for (UNUSED
auto& map
: *bcMap
) {
961 TRACE(1, "%s %-6d %p %p %p\n", map
.md5
.toString().c_str(),
962 map
.bcStart
, map
.aStart
, map
.acoldStart
, map
.afrozenStart
);
966 assert(coldCodeIn
.frontier() == coldStart
);
967 assert(mainCodeIn
.frontier() == mainStart
);
971 printUnit(kRelocationLevel
, unit
, " before relocation ", asmInfo
);
974 auto& be
= mcg
->backEnd();
977 asm_count
+= be
.relocate(rel
, mainCodeIn
,
978 mainCode
.base(), mainCode
.frontier(),
979 mcg
->cgFixups(), nullptr);
981 asm_count
+= be
.relocate(rel
, coldCodeIn
,
982 coldCode
.base(), coldCode
.frontier(),
983 mcg
->cgFixups(), nullptr);
984 TRACE(1, "hhir-inst-count %ld asm %ld\n", hhir_count
, asm_count
);
986 if (frozenCode
!= &coldCode
) {
987 rel
.recordRange(frozenStart
, frozenCode
->frontier(),
988 frozenStart
, frozenCode
->frontier());
990 be
.adjustForRelocation(rel
);
991 be
.adjustMetaDataForRelocation(rel
, asmInfo
, mcg
->cgFixups());
992 be
.adjustCodeForRelocation(rel
, mcg
->cgFixups());
995 static int64_t mainDeltaTot
= 0, coldDeltaTot
= 0;
997 (mainCodeIn
.frontier() - mainStart
) -
998 (mainCode
.frontier() - mainCode
.base());
1000 (coldCodeIn
.frontier() - coldStart
) -
1001 (coldCode
.frontier() - coldCode
.base());
1003 mainDeltaTot
+= mainDelta
;
1004 HPHP::Trace::traceRelease("main delta after relocation: "
1005 "%" PRId64
" (%" PRId64
")\n",
1006 mainDelta
, mainDeltaTot
);
1007 coldDeltaTot
+= coldDelta
;
1008 HPHP::Trace::traceRelease("cold delta after relocation: "
1009 "%" PRId64
" (%" PRId64
")\n",
1010 coldDelta
, coldDeltaTot
);
1013 auto& ip
= mcg
->cgFixups().m_inProgressTailJumps
;
1014 for (size_t i
= 0; i
< ip
.size(); ++i
) {
1015 const auto& ib
= ip
[i
];
1016 assert(!mainCode
.contains(ib
.toSmash()));
1017 assert(!coldCode
.contains(ib
.toSmash()));
1019 memset(mainCode
.base(), 0xcc, mainCode
.frontier() - mainCode
.base());
1020 memset(coldCode
.base(), 0xcc, coldCode
.frontier() - coldCode
.base());
1023 coldCodeIn
.skip(coldCode
.frontier() - coldCodeIn
.frontier());
1024 mainCodeIn
.skip(mainCode
.frontier() - mainCodeIn
.frontier());
1028 printUnit(kCodeGenLevel
, unit
, " after code gen ", asmInfo
);