2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
19 #include "hphp/util/asm-x64.h"
20 #include "hphp/util/ringbuffer.h"
21 #include "hphp/util/trace.h"
23 #include "hphp/runtime/base/arch.h"
24 #include "hphp/runtime/base/runtime-option.h"
25 #include "hphp/runtime/base/stats.h"
26 #include "hphp/runtime/base/types.h"
27 #include "hphp/runtime/vm/jit/back-end.h"
28 #include "hphp/runtime/vm/jit/translator-inline.h"
29 #include "hphp/runtime/vm/jit/mc-generator.h"
30 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
31 #include "hphp/runtime/vm/jit/translator.h"
32 #include "hphp/runtime/vm/jit/ir-opcode.h"
33 #include "hphp/runtime/vm/jit/code-gen-x64.h"
34 #include "hphp/runtime/vm/jit/vasm-x64.h"
36 namespace HPHP
{ namespace jit
{ namespace x64
{
38 //////////////////////////////////////////////////////////////////////
40 using namespace jit::reg
;
44 //////////////////////////////////////////////////////////////////////
47 * It's not normally ok to directly use tracelet abi registers in
48 * codegen, unless you're directly dealing with an instruction that
49 * does near-end-of-tracelet glue. (Or also we sometimes use them
50 * just for some static_assertions relating to calls to helpers from
51 * mcg that hardcode these registers.)
55 * Satisfy an alignment constraint. Bridge the gap with int3's.
57 void moveToAlign(CodeBlock
& cb
,
58 const size_t align
/* =kJmpTargetAlign */) {
59 X64Assembler a
{ cb
};
60 assert(folly::isPowTwo(align
));
61 size_t leftInBlock
= align
- ((align
- 1) & uintptr_t(cb
.frontier()));
62 if (leftInBlock
== align
) return;
63 if (leftInBlock
> 2) {
67 if (leftInBlock
> 0) {
68 a
.emitInt3s(leftInBlock
);
72 void emitEagerSyncPoint(Vout
& v
, const Op
* pc
) {
73 v
<< storeq
{rVmFp
, rVmTl
[RDS::kVmfpOff
]};
74 v
<< storeq
{rVmSp
, rVmTl
[RDS::kVmspOff
]};
75 emitImmStoreq(v
, intptr_t(pc
), rVmTl
[RDS::kVmpcOff
]);
78 void emitEagerSyncPoint(Asm
& as
, const Op
* pc
) {
79 emitEagerSyncPoint(Vauto().main(as
), pc
);
82 // emitEagerVMRegSave --
83 // Inline. Saves regs in-place in the TC. This is an unusual need;
84 // you probably want to lazily save these regs via recordCall and
86 void emitEagerVMRegSave(Asm
& as
, RegSaveFlags flags
) {
87 bool saveFP
= bool(flags
& RegSaveFlags::SaveFP
);
88 bool savePC
= bool(flags
& RegSaveFlags::SavePC
);
89 assert((flags
& ~(RegSaveFlags::SavePC
| RegSaveFlags::SaveFP
)) ==
93 assert(!kSpecialCrossTraceRegs
.contains(rdi
));
95 as
. storeq (rVmSp
, rVmTl
[RDS::kVmspOff
]);
97 // We're going to temporarily abuse rVmSp to hold the current unit.
100 // m_fp -> m_func -> m_unit -> m_bc + pcReg
101 as
. loadq (rVmFp
[AROFF(m_func
)], rBC
);
102 as
. loadq (rBC
[Func::unitOff()], rBC
);
103 as
. loadq (rBC
[Unit::bcOff()], rBC
);
104 as
. addq (rBC
, pcReg
);
105 as
. storeq (pcReg
, rVmTl
[RDS::kVmpcOff
]);
109 as
. storeq (rVmFp
, rVmTl
[RDS::kVmfpOff
]);
113 void emitGetGContext(Vout
& v
, Vreg dest
) {
114 emitTLSLoad
<ExecutionContext
>(v
, g_context
, dest
);
117 void emitGetGContext(Asm
& as
, PhysReg dest
) {
118 emitGetGContext(Vauto().main(as
), dest
);
121 // IfCountNotStatic --
122 // Emits if (%reg->_count < 0) { ... }.
123 // This depends on UncountedValue and StaticValue
124 // being the only valid negative refCounts and both indicating no
125 // ref count is needed.
126 // May short-circuit this check if the type is known to be
128 struct IfCountNotStatic
{
129 typedef CondBlock
<FAST_REFCOUNT_OFFSET
,
132 int32_t> NonStaticCondBlock
;
133 static_assert(UncountedValue
< 0 && StaticValue
< 0, "");
134 NonStaticCondBlock
*m_cb
; // might be null
135 IfCountNotStatic(Asm
& as
,
137 DataType t
= KindOfInvalid
) {
139 // Objects and variants cannot be static
140 if (t
!= KindOfObject
&& t
!= KindOfResource
&& t
!= KindOfRef
) {
141 m_cb
= new NonStaticCondBlock(as
, reg
);
147 ~IfCountNotStatic() {
152 void emitTransCounterInc(Vout
& v
) {
153 if (!mcg
->tx().isTransDBEnabled()) return;
154 auto t
= v
.cns(mcg
->tx().getTransCounterAddr());
155 v
<< incqmlock
{*t
, v
.makeReg()};
158 void emitTransCounterInc(Asm
& a
) {
159 emitTransCounterInc(Vauto().main(a
));
162 void emitIncRef(Vout
& v
, Vreg base
) {
163 if (RuntimeOption::EvalHHIRGenerateAsserts
) {
164 emitAssertRefCount(v
, base
);
167 auto const sf
= v
.makeReg();
168 v
<< inclm
{base
[FAST_REFCOUNT_OFFSET
], sf
};
169 if (RuntimeOption::EvalHHIRGenerateAsserts
) {
170 // Assert that the ref count is greater than zero
171 emitAssertFlagsNonNegative(v
, sf
);
175 void emitIncRef(Asm
& as
, PhysReg base
) {
176 emitIncRef(Vauto().main(as
), base
);
179 void emitIncRefCheckNonStatic(Asm
& as
, PhysReg base
, DataType dtype
) {
181 IfCountNotStatic
ins(as
, base
, dtype
);
182 emitIncRef(as
, base
);
186 void emitIncRefGenericRegSafe(Asm
& as
, PhysReg base
, int disp
, PhysReg tmpReg
) {
188 IfRefCounted
irc(as
, base
, disp
);
189 as
. loadq (base
[disp
+ TVOFF(m_data
)], tmpReg
);
191 IfCountNotStatic
ins(as
, tmpReg
);
192 as
. incl(tmpReg
[FAST_REFCOUNT_OFFSET
]);
197 void emitAssertFlagsNonNegative(Vout
& v
, Vreg sf
) {
198 ifThen(v
, CC_NGE
, sf
, [&](Vout
& v
) { v
<< ud2
{}; });
201 void emitAssertRefCount(Vout
& v
, Vreg base
) {
202 auto const sf
= v
.makeReg();
203 v
<< cmplim
{HPHP::StaticValue
, base
[FAST_REFCOUNT_OFFSET
], sf
};
204 ifThen(v
, CC_NLE
, sf
, [&](Vout
& v
) {
205 auto const sf
= v
.makeReg();
206 v
<< cmplim
{HPHP::RefCountMaxRealistic
, base
[FAST_REFCOUNT_OFFSET
], sf
};
207 ifThen(v
, CC_NBE
, sf
, [&](Vout
& v
) { v
<< ud2
{}; });
211 // Logical register move: ensures the value in src will be in dest
212 // after execution, but might do so in strange ways. Do not count on
213 // being able to smash dest to a different register in the future, e.g.
214 void emitMovRegReg(Asm
& as
, PhysReg srcReg
, PhysReg dstReg
) {
215 assert(srcReg
!= InvalidReg
);
216 assert(dstReg
!= InvalidReg
);
218 if (srcReg
== dstReg
) return;
221 if (dstReg
.isGP()) { // GP => GP
222 as
. movq(srcReg
, dstReg
);
223 } else { // GP => XMM
224 // This generates a movq x86 instruction, which zero extends
225 // the 64-bit value in srcReg into a 128-bit XMM register
226 as
. movq_rx(srcReg
, dstReg
);
229 if (dstReg
.isGP()) { // XMM => GP
230 as
. movq_xr(srcReg
, dstReg
);
231 } else { // XMM => XMM
232 // This copies all 128 bits in XMM,
233 // thus avoiding partial register stalls
234 as
. movdqa(srcReg
, dstReg
);
239 void emitLea(Asm
& as
, MemoryRef mr
, PhysReg dst
) {
240 if (dst
== InvalidReg
) return;
241 if (mr
.r
.disp
== 0) {
242 emitMovRegReg(as
, mr
.r
.base
, dst
);
248 Vreg
emitLdObjClass(Vout
& v
, Vreg objReg
, Vreg dstReg
) {
249 emitLdLowPtr(v
, objReg
[ObjectData::getVMClassOffset()],
250 dstReg
, sizeof(LowClassPtr
));
254 Vreg
emitLdClsCctx(Vout
& v
, Vreg srcReg
, Vreg dstReg
) {
255 auto t
= v
.makeReg();
256 v
<< copy
{srcReg
, t
};
257 v
<< decq
{t
, dstReg
, v
.makeReg()};
261 void emitCall(Asm
& a
, TCA dest
, RegSet args
) {
262 Vauto().main(a
) << call
{dest
, args
};
265 void emitCall(Asm
& a
, CppCall call
, RegSet args
) {
266 emitCall(Vauto().main(a
), call
, args
);
269 void emitCall(Vout
& v
, CppCall target
, RegSet args
) {
270 switch (target
.kind()) {
271 case CppCall::Kind::Direct
:
272 v
<< call
{static_cast<TCA
>(target
.address()), args
};
274 case CppCall::Kind::Virtual
:
276 // Load method's address from proper offset off of object in rdi,
277 // using rax as scratch.
278 v
<< loadq
{*rdi
, rax
};
279 v
<< callm
{rax
[target
.vtableOffset()], args
};
281 case CppCall::Kind::ArrayVirt
: {
282 auto const addr
= reinterpret_cast<intptr_t>(target
.arrayTable());
284 deltaFits(addr
, sz::dword
),
285 "deltaFits on ArrayData vtable calls needs to be checked before "
288 v
<< loadzbl
{rdi
[ArrayData::offsetofKind()], eax
};
289 v
<< callm
{baseless(rax
*8 + addr
), args
};
292 case CppCall::Kind::Destructor
:
293 // this movzbl is only needed because callers aren't
294 // required to zero-extend the type.
295 v
<< movzbl
{target
.reg(), target
.reg()};
296 auto dtor_ptr
= lookupDestructor(v
, target
.reg());
297 v
<< callm
{dtor_ptr
, args
};
303 void emitImmStoreq(Vout
& v
, Immed64 imm
, Vptr ref
) {
304 if (imm
.fits(sz::dword
)) {
305 v
<< storeqim
{imm
.l(), ref
};
307 v
<< storelim
{int32_t(imm
.q()), ref
};
308 v
<< storelim
{int32_t(imm
.q() >> 32), ref
+ 4};
312 void emitImmStoreq(Asm
& a
, Immed64 imm
, MemoryRef ref
) {
313 if (imm
.fits(sz::dword
)) {
314 a
.storeq(imm
.l(), ref
);
316 a
.storel(int32_t(imm
.q()), ref
);
317 a
.storel(int32_t(imm
.q() >> 32), MemoryRef(ref
.r
+ 4));
321 void emitJmpOrJcc(Asm
& a
, ConditionCode cc
, TCA dest
) {
325 a
. jcc((ConditionCode
)cc
, dest
);
329 void emitRB(X64Assembler
& a
,
330 Trace::RingBufferType t
,
332 if (!Trace::moduleEnabledRelease(Trace::ringbuffer
, 1)) {
335 PhysRegSaver
save(a
, kSpecialCrossTraceRegs
);
337 a
. emitImmReg((uintptr_t)msg
, argNumToRegName
[arg
++]);
338 a
. emitImmReg(strlen(msg
), argNumToRegName
[arg
++]);
339 a
. emitImmReg(t
, argNumToRegName
[arg
++]);
340 a
. call((TCA
)Trace::ringbufferMsg
);
343 void emitTraceCall(CodeBlock
& cb
, Offset pcOff
) {
345 // call to a trace function
346 a
. lea (rip
[(int64_t)a
.frontier()], rcx
);
347 a
. movq (rVmFp
, rdi
);
348 a
. movq (rVmSp
, rsi
);
349 a
. movq (pcOff
, rdx
);
350 // do the call; may use a trampoline
351 emitCall(a
, reinterpret_cast<TCA
>(traceCallback
),
352 RegSet().add(rcx
).add(rdi
).add(rsi
).add(rdx
));
355 void emitTestSurpriseFlags(Asm
& a
) {
356 static_assert(RequestInjectionData::LastFlag
< (1LL << 32),
357 "Translator assumes RequestInjectionFlags fit in 32-bit int");
358 a
.testl(-1, rVmTl
[RDS::kConditionFlagsOff
]);
361 Vreg
emitTestSurpriseFlags(Vout
& v
) {
362 static_assert(RequestInjectionData::LastFlag
< (1LL << 32),
363 "Translator assumes RequestInjectionFlags fit in 32-bit int");
364 auto const sf
= v
.makeReg();
365 v
<< testlim
{-1, rVmTl
[RDS::kConditionFlagsOff
], sf
};
369 void emitCheckSurpriseFlagsEnter(CodeBlock
& mainCode
, CodeBlock
& coldCode
,
372 auto& v
= vasm
.main(mainCode
);
373 auto& vc
= vasm
.cold(coldCode
);
374 emitCheckSurpriseFlagsEnter(v
, vc
, fixup
);
377 void emitCheckSurpriseFlagsEnter(Vout
& v
, Vout
& vcold
, Fixup fixup
) {
378 auto cold
= vcold
.makeBlock();
379 auto done
= v
.makeBlock();
380 auto const sf
= emitTestSurpriseFlags(v
);
381 v
<< jcc
{CC_NZ
, sf
, {done
, cold
}};
384 vcold
<< movq
{rVmFp
, argNumToRegName
[0]};
385 vcold
<< call
{mcg
->tx().uniqueStubs
.functionEnterHelper
, argSet(1)};
386 vcold
<< syncpoint
{Fixup
{fixup
.pcOffset
, fixup
.spOffset
}};
391 void emitLdLowPtr(Vout
& v
, Vptr mem
, Vreg reg
, size_t size
) {
394 } else if (size
== 4) {
395 v
<< loadl
{mem
, reg
};
401 void emitCmpClass(Vout
& v
, Vreg sf
, const Class
* c
, Vptr mem
) {
402 auto size
= sizeof(LowClassPtr
);
404 v
<< cmpqm
{v
.cns(c
), mem
, sf
};
405 } else if (size
== 4) {
406 v
<< cmplm
{v
.cns(c
), mem
, sf
};
412 void emitCmpClass(Vout
& v
, Vreg sf
, Vreg reg
, Vptr mem
) {
413 auto size
= sizeof(LowClassPtr
);
415 v
<< cmpqm
{reg
, mem
, sf
};
416 } else if (size
== 4) {
417 v
<< cmplm
{reg
, mem
, sf
};
423 void emitCmpClass(Vout
& v
, Vreg sf
, Vreg reg1
, Vreg reg2
) {
424 auto size
= sizeof(LowClassPtr
);
426 v
<< cmpq
{reg1
, reg2
, sf
};
427 } else if (size
== 4) {
428 v
<< cmpl
{reg1
, reg2
, sf
};
434 void copyTV(Vout
& v
, Vloc src
, Vloc dst
) {
435 auto src_arity
= src
.numAllocated();
436 auto dst_arity
= dst
.numAllocated();
437 if (dst_arity
== 2) {
438 assert(src_arity
== 2);
439 v
<< copy2
{src
.reg(0), src
.reg(1), dst
.reg(0), dst
.reg(1)};
442 assert(dst_arity
== 1);
443 if (src_arity
== 2 && dst
.isFullSIMD()) {
444 pack2(v
, src
.reg(0), src
.reg(1), dst
.reg(0));
447 assert(src_arity
>= 1);
448 v
<< copy
{src
.reg(0), dst
.reg(0)};
451 // move 2 gpr to 1 xmm
452 void pack2(Vout
& v
, Vreg s0
, Vreg s1
, Vreg d0
) {
453 auto t0
= v
.makeReg();
454 auto t1
= v
.makeReg();
457 v
<< unpcklpd
{t1
, t0
, d0
}; // s0,s1 -> d0[0],d0[1]
460 Vreg
zeroExtendIfBool(Vout
& v
, const SSATmp
* src
, Vreg reg
) {
461 if (!src
->isA(Type::Bool
)) return reg
;
462 // zero-extend the bool from a byte to a quad
463 // note: movzbl actually extends the value to 64 bits.
464 auto extended
= v
.makeReg();
465 v
<< movzbl
{reg
, extended
};
469 ConditionCode
opToConditionCode(Opcode opc
) {
471 case JmpGt
: return CC_G
;
472 case JmpGte
: return CC_GE
;
473 case JmpLt
: return CC_L
;
474 case JmpLte
: return CC_LE
;
475 case JmpEq
: return CC_E
;
476 case JmpNeq
: return CC_NE
;
477 case JmpGtInt
: return CC_G
;
478 case JmpGteInt
: return CC_GE
;
479 case JmpLtInt
: return CC_L
;
480 case JmpLteInt
: return CC_LE
;
481 case JmpEqInt
: return CC_E
;
482 case JmpNeqInt
: return CC_NE
;
483 case JmpSame
: return CC_E
;
484 case JmpNSame
: return CC_NE
;
485 case JmpInstanceOfBitmask
: return CC_NZ
;
486 case JmpNInstanceOfBitmask
: return CC_Z
;
487 case JmpZero
: return CC_Z
;
488 case JmpNZero
: return CC_NZ
;
489 case ReqBindJmpGt
: return CC_G
;
490 case ReqBindJmpGte
: return CC_GE
;
491 case ReqBindJmpLt
: return CC_L
;
492 case ReqBindJmpLte
: return CC_LE
;
493 case ReqBindJmpEq
: return CC_E
;
494 case ReqBindJmpNeq
: return CC_NE
;
495 case ReqBindJmpGtInt
: return CC_G
;
496 case ReqBindJmpGteInt
: return CC_GE
;
497 case ReqBindJmpLtInt
: return CC_L
;
498 case ReqBindJmpLteInt
: return CC_LE
;
499 case ReqBindJmpEqInt
: return CC_E
;
500 case ReqBindJmpNeqInt
: return CC_NE
;
501 case ReqBindJmpSame
: return CC_E
;
502 case ReqBindJmpNSame
: return CC_NE
;
503 case ReqBindJmpInstanceOfBitmask
: return CC_NZ
;
504 case ReqBindJmpNInstanceOfBitmask
: return CC_Z
;
505 case ReqBindJmpZero
: return CC_Z
;
506 case ReqBindJmpNZero
: return CC_NZ
;
507 case SideExitJmpGt
: return CC_G
;
508 case SideExitJmpGte
: return CC_GE
;
509 case SideExitJmpLt
: return CC_L
;
510 case SideExitJmpLte
: return CC_LE
;
511 case SideExitJmpEq
: return CC_E
;
512 case SideExitJmpNeq
: return CC_NE
;
513 case SideExitJmpGtInt
: return CC_G
;
514 case SideExitJmpGteInt
: return CC_GE
;
515 case SideExitJmpLtInt
: return CC_L
;
516 case SideExitJmpLteInt
: return CC_LE
;
517 case SideExitJmpEqInt
: return CC_E
;
518 case SideExitJmpNeqInt
: return CC_NE
;
519 case SideExitJmpSame
: return CC_E
;
520 case SideExitJmpNSame
: return CC_NE
;
521 case SideExitJmpInstanceOfBitmask
: return CC_NZ
;
522 case SideExitJmpNInstanceOfBitmask
: return CC_Z
;
523 case SideExitJmpZero
: return CC_Z
;
524 case SideExitJmpNZero
: return CC_NZ
;