2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
19 #include "hphp/util/asm-x64.h"
20 #include "hphp/util/ringbuffer.h"
21 #include "hphp/util/trace.h"
23 #include "hphp/runtime/base/arch.h"
24 #include "hphp/runtime/base/runtime-option.h"
25 #include "hphp/runtime/base/stats.h"
26 #include "hphp/runtime/base/types.h"
27 #include "hphp/runtime/vm/jit/back-end.h"
28 #include "hphp/runtime/vm/jit/translator-inline.h"
29 #include "hphp/runtime/vm/jit/mc-generator.h"
30 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
31 #include "hphp/runtime/vm/jit/translator.h"
32 #include "hphp/runtime/vm/jit/ir.h"
33 #include "hphp/runtime/vm/jit/code-gen-x64.h"
35 namespace HPHP
{ namespace JIT
{ namespace X64
{
37 //////////////////////////////////////////////////////////////////////
39 using namespace JIT::reg
;
43 //////////////////////////////////////////////////////////////////////
46 * It's not normally ok to directly use tracelet abi registers in
47 * codegen, unless you're directly dealing with an instruction that
48 * does near-end-of-tracelet glue. (Or also we sometimes use them
49 * just for some static_assertions relating to calls to helpers from
50 * mcg that hardcode these registers.)
54 * Satisfy an alignment constraint. Bridge the gap with int3's.
56 void moveToAlign(CodeBlock
& cb
,
57 const size_t align
/* =kJmpTargetAlign */) {
58 X64Assembler a
{ cb
};
59 assert(folly::isPowTwo(align
));
60 size_t leftInBlock
= align
- ((align
- 1) & uintptr_t(cb
.frontier()));
61 if (leftInBlock
== align
) return;
62 if (leftInBlock
> 2) {
66 if (leftInBlock
> 0) {
67 a
.emitInt3s(leftInBlock
);
71 void emitEagerSyncPoint(Asm
& as
, const Op
* pc
) {
72 static COff spOff
= offsetof(ExecutionContext
, m_stack
) +
73 Stack::topOfStackOffset();
74 static COff fpOff
= offsetof(ExecutionContext
, m_fp
);
75 static COff pcOff
= offsetof(ExecutionContext
, m_pc
);
77 // we can use rAsm because we don't clobber it in X64Assembler
79 emitGetGContext(as
, rEC
);
80 as
. storeq(rVmFp
, rEC
[fpOff
]);
81 as
. storeq(rVmSp
, rEC
[spOff
]);
82 emitImmStoreq(as
, intptr_t(pc
), rEC
[pcOff
]);
85 // emitEagerVMRegSave --
86 // Inline. Saves regs in-place in the TC. This is an unusual need;
87 // you probably want to lazily save these regs via recordCall and
89 void emitEagerVMRegSave(Asm
& as
, RegSaveFlags flags
) {
90 bool saveFP
= bool(flags
& RegSaveFlags::SaveFP
);
91 bool savePC
= bool(flags
& RegSaveFlags::SavePC
);
92 assert((flags
& ~(RegSaveFlags::SavePC
| RegSaveFlags::SaveFP
)) ==
97 assert(!kSpecialCrossTraceRegs
.contains(rdi
));
99 emitGetGContext(as
, rEC
);
101 static COff spOff
= offsetof(ExecutionContext
, m_stack
) +
102 Stack::topOfStackOffset();
103 static COff fpOff
= offsetof(ExecutionContext
, m_fp
) - spOff
;
104 static COff pcOff
= offsetof(ExecutionContext
, m_pc
) - spOff
;
107 as
. addq (spOff
, r64(rEC
));
108 as
. storeq (rVmSp
, *rEC
);
110 // We're going to temporarily abuse rVmSp to hold the current unit.
113 // m_fp -> m_func -> m_unit -> m_bc + pcReg
114 as
. loadq (rVmFp
[AROFF(m_func
)], rBC
);
115 as
. loadq (rBC
[Func::unitOff()], rBC
);
116 as
. loadq (rBC
[Unit::bcOff()], rBC
);
117 as
. addq (rBC
, pcReg
);
118 as
. storeq (pcReg
, rEC
[pcOff
]);
122 as
. storeq (rVmFp
, rEC
[fpOff
]);
126 void emitGetGContext(Asm
& as
, PhysReg dest
) {
127 emitTLSLoad
<ExecutionContext
>(as
, g_context
, dest
);
130 // IfCountNotStatic --
131 // Emits if (%reg->_count < 0) { ... }.
132 // This depends on UncountedValue and StaticValue
133 // being the only valid negative refCounts and both indicating no
134 // ref count is needed.
135 // May short-circuit this check if the type is known to be
137 struct IfCountNotStatic
{
138 typedef CondBlock
<FAST_REFCOUNT_OFFSET
,
141 int32_t> NonStaticCondBlock
;
142 static_assert(UncountedValue
< 0 && StaticValue
< 0, "");
143 NonStaticCondBlock
*m_cb
; // might be null
144 IfCountNotStatic(Asm
& as
,
146 DataType t
= KindOfInvalid
) {
148 // Objects and variants cannot be static
149 if (t
!= KindOfObject
&& t
!= KindOfResource
&& t
!= KindOfRef
) {
150 m_cb
= new NonStaticCondBlock(as
, reg
);
156 ~IfCountNotStatic() {
162 void emitTransCounterInc(Asm
& a
) {
163 if (!tx
->isTransDBEnabled()) return;
165 a
. movq (tx
->getTransCounterAddr(), rAsm
);
170 void emitIncRef(Asm
& as
, PhysReg base
) {
171 if (RuntimeOption::EvalHHIRGenerateAsserts
) {
172 emitAssertRefCount(as
, base
);
175 as
.incl(base
[FAST_REFCOUNT_OFFSET
]);
176 if (RuntimeOption::EvalHHIRGenerateAsserts
) {
177 // Assert that the ref count is greater than zero
178 emitAssertFlagsNonNegative(as
);
182 void emitIncRefCheckNonStatic(Asm
& as
, PhysReg base
, DataType dtype
) {
184 IfCountNotStatic
ins(as
, base
, dtype
);
185 emitIncRef(as
, base
);
189 void emitIncRefGenericRegSafe(Asm
& as
, PhysReg base
, int disp
, PhysReg tmpReg
) {
191 IfRefCounted
irc(as
, base
, disp
);
192 as
. loadq (base
[disp
+ TVOFF(m_data
)], tmpReg
);
194 IfCountNotStatic
ins(as
, tmpReg
);
195 as
. incl(tmpReg
[FAST_REFCOUNT_OFFSET
]);
200 void emitAssertFlagsNonNegative(Asm
& as
) {
201 ifThen(as
, CC_NGE
, [&] { as
.ud2(); });
204 void emitAssertRefCount(Asm
& as
, PhysReg base
) {
205 as
.cmpl(HPHP::StaticValue
, base
[FAST_REFCOUNT_OFFSET
]);
206 ifThen(as
, CC_NLE
, [&] {
207 as
.cmpl(HPHP::RefCountMaxRealistic
, base
[FAST_REFCOUNT_OFFSET
]);
208 ifThen(as
, CC_NBE
, [&] { as
.ud2(); });
212 // Logical register move: ensures the value in src will be in dest
213 // after execution, but might do so in strange ways. Do not count on
214 // being able to smash dest to a different register in the future, e.g.
215 void emitMovRegReg(Asm
& as
, PhysReg srcReg
, PhysReg dstReg
) {
216 assert(srcReg
!= InvalidReg
);
217 assert(dstReg
!= InvalidReg
);
219 if (srcReg
== dstReg
) return;
222 if (dstReg
.isGP()) { // GP => GP
223 as
. movq(srcReg
, dstReg
);
224 } else { // GP => XMM
225 // This generates a movq x86 instruction, which zero extends
226 // the 64-bit value in srcReg into a 128-bit XMM register
227 as
. movq_rx(srcReg
, dstReg
);
230 if (dstReg
.isGP()) { // XMM => GP
231 as
. movq_xr(srcReg
, dstReg
);
232 } else { // XMM => XMM
233 // This copies all 128 bits in XMM,
234 // thus avoiding partial register stalls
235 as
. movdqa(srcReg
, dstReg
);
240 void emitLea(Asm
& as
, MemoryRef mr
, PhysReg dst
) {
241 if (dst
== InvalidReg
) return;
242 if (mr
.r
.disp
== 0) {
243 emitMovRegReg(as
, mr
.r
.base
, dst
);
249 void emitLdObjClass(Asm
& as
, PhysReg objReg
, PhysReg dstReg
) {
250 emitLdLowPtr(as
, objReg
[ObjectData::getVMClassOffset()],
251 dstReg
, sizeof(LowClassPtr
));
254 void emitLdClsCctx(Asm
& as
, PhysReg srcReg
, PhysReg dstReg
) {
255 emitMovRegReg(as
, srcReg
, dstReg
);
259 void emitCall(Asm
& a
, TCA dest
) {
260 if (a
.jmpDeltaFits(dest
) && !Stats::enabled()) {
263 dest
= mcg
->getNativeTrampoline(dest
);
264 if (a
.jmpDeltaFits(dest
)) {
267 // can't do a near call; store address in data section.
268 // call by loading the address using rip-relative addressing. This
269 // assumes the data section is near the current code section. Since
270 // this sequence is directly in-line, rip-relative like this is
271 // more compact than loading a 64-bit immediate.
272 TCA
* addr
= mcg
->allocData
<TCA
>(sizeof(TCA
), 1);
274 a
.call(rip
[(intptr_t)addr
]);
275 assert(((int32_t*)a
.frontier())[-1] + a
.frontier() == (TCA
)addr
);
280 void emitCall(Asm
& a
, CppCall call
) {
281 switch (call
.kind()) {
282 case CppCall::Kind::Direct
:
283 return emitCall(a
, static_cast<TCA
>(call
.address()));
284 case CppCall::Kind::Virtual
:
286 // Load method's address from proper offset off of object in rdi,
287 // using rax as scratch.
288 a
. loadq (*rdi
, rax
);
289 a
. call (rax
[call
.vtableOffset()]);
291 case CppCall::Kind::Indirect
:
292 a
. call (call
.reg());
294 case CppCall::Kind::ArrayVirt
:
296 auto const addr
= reinterpret_cast<intptr_t>(call
.arrayTable());
298 deltaFits(addr
, sz::dword
),
299 "Array data vtables are expected to be in the data "
300 "segment, with addresses less than 2^31"
302 a
. loadzbl (rdi
[ArrayData::offsetofKind()], eax
);
303 a
. call (baseless(rax
*8 + addr
));
310 void emitJmpOrJcc(Asm
& a
, ConditionCode cc
, TCA dest
) {
314 a
. jcc((ConditionCode
)cc
, dest
);
318 void emitRB(X64Assembler
& a
,
319 Trace::RingBufferType t
,
322 if (!Trace::moduleEnabledRelease(Trace::ringbuffer
, 1)) {
325 PhysRegSaver
save(a
, toSave
| kSpecialCrossTraceRegs
);
327 a
. emitImmReg((uintptr_t)msg
, argNumToRegName
[arg
++]);
328 a
. emitImmReg(strlen(msg
), argNumToRegName
[arg
++]);
329 a
. emitImmReg(t
, argNumToRegName
[arg
++]);
330 a
. call((TCA
)Trace::ringbufferMsg
);
333 void emitTraceCall(CodeBlock
& cb
, int64_t pcOff
) {
335 // call to a trace function
336 a
. lea (rip
[(int64_t)a
.frontier()], rcx
);
337 a
. movq (rVmFp
, rdi
);
338 a
. movq (rVmSp
, rsi
);
339 a
. movq (pcOff
, rdx
);
340 // do the call; may use a trampoline
341 emitCall(a
, reinterpret_cast<TCA
>(traceCallback
));
344 void emitTestSurpriseFlags(Asm
& a
) {
345 static_assert(RequestInjectionData::LastFlag
< (1LL << 32),
346 "Translator assumes RequestInjectionFlags fit in 32-bit int");
347 a
. testl((int32_t)0xffffffff, rVmTl
[RDS::kConditionFlagsOff
]);
350 void emitCheckSurpriseFlagsEnter(CodeBlock
& mainCode
, CodeBlock
& coldCode
,
353 Asm acold
{ coldCode
};
355 emitTestSurpriseFlags(a
);
356 a
. jnz (coldCode
.frontier());
358 acold
. movq (rVmFp
, argNumToRegName
[0]);
359 emitCall(acold
, tx
->uniqueStubs
.functionEnterHelper
);
360 mcg
->recordSyncPoint(coldCode
.frontier(),
361 fixup
.m_pcOffset
, fixup
.m_spOffset
);
362 acold
. jmp (mainCode
.frontier());
366 void emitCmpClass(Asm
& as
, Reg64 reg
, Mem mem
) {
367 auto size
= sizeof(LowClassPtr
);
371 } else if (size
== 4) {
372 as
. cmpl (r32(reg
), mem
);
378 template void emitCmpClass
<MemoryRef
>(Asm
& as
, Reg64 reg
, MemoryRef mem
);
380 void emitCmpClass(Asm
& as
, Reg64 reg1
, PhysReg reg2
) {
381 auto size
= sizeof(LowClassPtr
);
384 as
. cmpq (reg1
, reg2
);
385 } else if (size
== 4) {
386 as
. cmpl (r32(reg1
), r32(reg2
));
392 void shuffle2(Asm
& as
, PhysReg s0
, PhysReg s1
, PhysReg d0
, PhysReg d1
) {
393 if (s0
== InvalidReg
&& s1
== InvalidReg
&&
394 d0
== InvalidReg
&& d1
== InvalidReg
) return;
396 assert(!s0
.isSIMD() || s1
== InvalidReg
); // never 2 XMMs
397 assert(!d0
.isSIMD() || d1
== InvalidReg
); // never 2 XMMs
398 if (d0
== s1
&& d1
!= InvalidReg
) {
403 as
. movq (s1
, d1
); // save s1 first; d1 != s0
406 } else if (d0
.isSIMD() && s0
.isGP() && s1
.isGP()) {
407 // move 2 gpr to 1 xmm
408 assert(d0
!= rCgXMM0
); // xmm0 is reserved for scratch
410 as
. movq_rx(s1
, rCgXMM0
);
411 as
. unpcklpd(rCgXMM0
, d0
); // s1 -> d0[1]
413 if (d0
!= InvalidReg
) emitMovRegReg(as
, s0
, d0
); // d0 != s1
414 if (d1
!= InvalidReg
) emitMovRegReg(as
, s1
, d1
);
418 void zeroExtendIfBool(CodeGenerator::Asm
& as
, const SSATmp
* src
, PhysReg reg
) {
419 if (src
->isA(Type::Bool
) && reg
!= InvalidReg
) {
420 // zero-extend the bool from a byte to a quad
421 // note: movzbl actually extends the value to 64 bits.
422 as
.movzbl(rbyte(reg
), r32(reg
));
426 ConditionCode
opToConditionCode(Opcode opc
) {
428 case JmpGt
: return CC_G
;
429 case JmpGte
: return CC_GE
;
430 case JmpLt
: return CC_L
;
431 case JmpLte
: return CC_LE
;
432 case JmpEq
: return CC_E
;
433 case JmpNeq
: return CC_NE
;
434 case JmpGtInt
: return CC_G
;
435 case JmpGteInt
: return CC_GE
;
436 case JmpLtInt
: return CC_L
;
437 case JmpLteInt
: return CC_LE
;
438 case JmpEqInt
: return CC_E
;
439 case JmpNeqInt
: return CC_NE
;
440 case JmpSame
: return CC_E
;
441 case JmpNSame
: return CC_NE
;
442 case JmpInstanceOfBitmask
: return CC_NZ
;
443 case JmpNInstanceOfBitmask
: return CC_Z
;
444 case JmpZero
: return CC_Z
;
445 case JmpNZero
: return CC_NZ
;
446 case ReqBindJmpGt
: return CC_G
;
447 case ReqBindJmpGte
: return CC_GE
;
448 case ReqBindJmpLt
: return CC_L
;
449 case ReqBindJmpLte
: return CC_LE
;
450 case ReqBindJmpEq
: return CC_E
;
451 case ReqBindJmpNeq
: return CC_NE
;
452 case ReqBindJmpGtInt
: return CC_G
;
453 case ReqBindJmpGteInt
: return CC_GE
;
454 case ReqBindJmpLtInt
: return CC_L
;
455 case ReqBindJmpLteInt
: return CC_LE
;
456 case ReqBindJmpEqInt
: return CC_E
;
457 case ReqBindJmpNeqInt
: return CC_NE
;
458 case ReqBindJmpSame
: return CC_E
;
459 case ReqBindJmpNSame
: return CC_NE
;
460 case ReqBindJmpInstanceOfBitmask
: return CC_NZ
;
461 case ReqBindJmpNInstanceOfBitmask
: return CC_Z
;
462 case ReqBindJmpZero
: return CC_Z
;
463 case ReqBindJmpNZero
: return CC_NZ
;
464 case SideExitJmpGt
: return CC_G
;
465 case SideExitJmpGte
: return CC_GE
;
466 case SideExitJmpLt
: return CC_L
;
467 case SideExitJmpLte
: return CC_LE
;
468 case SideExitJmpEq
: return CC_E
;
469 case SideExitJmpNeq
: return CC_NE
;
470 case SideExitJmpGtInt
: return CC_G
;
471 case SideExitJmpGteInt
: return CC_GE
;
472 case SideExitJmpLtInt
: return CC_L
;
473 case SideExitJmpLteInt
: return CC_LE
;
474 case SideExitJmpEqInt
: return CC_E
;
475 case SideExitJmpNeqInt
: return CC_NE
;
476 case SideExitJmpSame
: return CC_E
;
477 case SideExitJmpNSame
: return CC_NE
;
478 case SideExitJmpInstanceOfBitmask
: return CC_NZ
;
479 case SideExitJmpNInstanceOfBitmask
: return CC_Z
;
480 case SideExitJmpZero
: return CC_Z
;
481 case SideExitJmpNZero
: return CC_NZ
;