Native support for blockOn()
[hiphop-php.git] / hphp / runtime / vm / jit / code-gen-helpers-x64.cpp
bloba9b5df30598c0f12f00ee6d4341374958bd84198
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
19 #include "hphp/util/asm-x64.h"
20 #include "hphp/util/ringbuffer.h"
21 #include "hphp/util/trace.h"
23 #include "hphp/runtime/base/arch.h"
24 #include "hphp/runtime/base/runtime-option.h"
25 #include "hphp/runtime/base/stats.h"
26 #include "hphp/runtime/base/types.h"
27 #include "hphp/runtime/vm/jit/back-end.h"
28 #include "hphp/runtime/vm/jit/translator-inline.h"
29 #include "hphp/runtime/vm/jit/mc-generator.h"
30 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
31 #include "hphp/runtime/vm/jit/translator.h"
32 #include "hphp/runtime/vm/jit/ir.h"
33 #include "hphp/runtime/vm/jit/code-gen-x64.h"
35 namespace HPHP { namespace JIT { namespace X64 {
37 //////////////////////////////////////////////////////////////////////
39 using namespace JIT::reg;
41 TRACE_SET_MOD(hhir);
43 //////////////////////////////////////////////////////////////////////
46 * It's not normally ok to directly use tracelet abi registers in
47 * codegen, unless you're directly dealing with an instruction that
48 * does near-end-of-tracelet glue. (Or also we sometimes use them
49 * just for some static_assertions relating to calls to helpers from
50 * mcg that hardcode these registers.)
54 * Satisfy an alignment constraint. Bridge the gap with int3's.
56 void moveToAlign(CodeBlock& cb,
57 const size_t align /* =kJmpTargetAlign */) {
58 X64Assembler a { cb };
59 assert(folly::isPowTwo(align));
60 size_t leftInBlock = align - ((align - 1) & uintptr_t(cb.frontier()));
61 if (leftInBlock == align) return;
62 if (leftInBlock > 2) {
63 a.ud2();
64 leftInBlock -= 2;
66 if (leftInBlock > 0) {
67 a.emitInt3s(leftInBlock);
71 void emitEagerSyncPoint(Asm& as, const Op* pc) {
72 static COff spOff = offsetof(ExecutionContext, m_stack) +
73 Stack::topOfStackOffset();
74 static COff fpOff = offsetof(ExecutionContext, m_fp);
75 static COff pcOff = offsetof(ExecutionContext, m_pc);
77 // we can use rAsm because we don't clobber it in X64Assembler
78 Reg64 rEC = rAsm;
79 emitGetGContext(as, rEC);
80 as. storeq(rVmFp, rEC[fpOff]);
81 as. storeq(rVmSp, rEC[spOff]);
82 emitImmStoreq(as, intptr_t(pc), rEC[pcOff]);
85 // emitEagerVMRegSave --
86 // Inline. Saves regs in-place in the TC. This is an unusual need;
87 // you probably want to lazily save these regs via recordCall and
88 // its ilk.
89 void emitEagerVMRegSave(Asm& as, RegSaveFlags flags) {
90 bool saveFP = bool(flags & RegSaveFlags::SaveFP);
91 bool savePC = bool(flags & RegSaveFlags::SavePC);
92 assert((flags & ~(RegSaveFlags::SavePC | RegSaveFlags::SaveFP)) ==
93 RegSaveFlags::None);
95 Reg64 pcReg = rdi;
96 PhysReg rEC = rAsm;
97 assert(!kSpecialCrossTraceRegs.contains(rdi));
99 emitGetGContext(as, rEC);
101 static COff spOff = offsetof(ExecutionContext, m_stack) +
102 Stack::topOfStackOffset();
103 static COff fpOff = offsetof(ExecutionContext, m_fp) - spOff;
104 static COff pcOff = offsetof(ExecutionContext, m_pc) - spOff;
106 assert(spOff != 0);
107 as. addq (spOff, r64(rEC));
108 as. storeq (rVmSp, *rEC);
109 if (savePC) {
110 // We're going to temporarily abuse rVmSp to hold the current unit.
111 Reg64 rBC = rVmSp;
112 as. push (rBC);
113 // m_fp -> m_func -> m_unit -> m_bc + pcReg
114 as. loadq (rVmFp[AROFF(m_func)], rBC);
115 as. loadq (rBC[Func::unitOff()], rBC);
116 as. loadq (rBC[Unit::bcOff()], rBC);
117 as. addq (rBC, pcReg);
118 as. storeq (pcReg, rEC[pcOff]);
119 as. pop (rBC);
121 if (saveFP) {
122 as. storeq (rVmFp, rEC[fpOff]);
126 void emitGetGContext(Asm& as, PhysReg dest) {
127 emitTLSLoad<ExecutionContext>(as, g_context, dest);
130 // IfCountNotStatic --
131 // Emits if (%reg->_count < 0) { ... }.
132 // This depends on UncountedValue and StaticValue
133 // being the only valid negative refCounts and both indicating no
134 // ref count is needed.
135 // May short-circuit this check if the type is known to be
136 // static already.
137 struct IfCountNotStatic {
138 typedef CondBlock<FAST_REFCOUNT_OFFSET,
140 CC_S,
141 int32_t> NonStaticCondBlock;
142 static_assert(UncountedValue < 0 && StaticValue < 0, "");
143 NonStaticCondBlock *m_cb; // might be null
144 IfCountNotStatic(Asm& as,
145 PhysReg reg,
146 DataType t = KindOfInvalid) {
148 // Objects and variants cannot be static
149 if (t != KindOfObject && t != KindOfResource && t != KindOfRef) {
150 m_cb = new NonStaticCondBlock(as, reg);
151 } else {
152 m_cb = nullptr;
156 ~IfCountNotStatic() {
157 delete m_cb;
162 void emitTransCounterInc(Asm& a) {
163 if (!tx->isTransDBEnabled()) return;
165 a. movq (tx->getTransCounterAddr(), rAsm);
166 a. lock ();
167 a. incq (*rAsm);
170 void emitIncRef(Asm& as, PhysReg base) {
171 if (RuntimeOption::EvalHHIRGenerateAsserts) {
172 emitAssertRefCount(as, base);
174 // emit incref
175 as.incl(base[FAST_REFCOUNT_OFFSET]);
176 if (RuntimeOption::EvalHHIRGenerateAsserts) {
177 // Assert that the ref count is greater than zero
178 emitAssertFlagsNonNegative(as);
182 void emitIncRefCheckNonStatic(Asm& as, PhysReg base, DataType dtype) {
183 { // if !static then
184 IfCountNotStatic ins(as, base, dtype);
185 emitIncRef(as, base);
186 } // endif
189 void emitIncRefGenericRegSafe(Asm& as, PhysReg base, int disp, PhysReg tmpReg) {
190 { // if RC
191 IfRefCounted irc(as, base, disp);
192 as. loadq (base[disp + TVOFF(m_data)], tmpReg);
193 { // if !static
194 IfCountNotStatic ins(as, tmpReg);
195 as. incl(tmpReg[FAST_REFCOUNT_OFFSET]);
196 } // endif
197 } // endif
200 void emitAssertFlagsNonNegative(Asm& as) {
201 ifThen(as, CC_NGE, [&] { as.ud2(); });
204 void emitAssertRefCount(Asm& as, PhysReg base) {
205 as.cmpl(HPHP::StaticValue, base[FAST_REFCOUNT_OFFSET]);
206 ifThen(as, CC_NLE, [&] {
207 as.cmpl(HPHP::RefCountMaxRealistic, base[FAST_REFCOUNT_OFFSET]);
208 ifThen(as, CC_NBE, [&] { as.ud2(); });
212 // Logical register move: ensures the value in src will be in dest
213 // after execution, but might do so in strange ways. Do not count on
214 // being able to smash dest to a different register in the future, e.g.
215 void emitMovRegReg(Asm& as, PhysReg srcReg, PhysReg dstReg) {
216 assert(srcReg != InvalidReg);
217 assert(dstReg != InvalidReg);
219 if (srcReg == dstReg) return;
221 if (srcReg.isGP()) {
222 if (dstReg.isGP()) { // GP => GP
223 as. movq(srcReg, dstReg);
224 } else { // GP => XMM
225 // This generates a movq x86 instruction, which zero extends
226 // the 64-bit value in srcReg into a 128-bit XMM register
227 as. movq_rx(srcReg, dstReg);
229 } else {
230 if (dstReg.isGP()) { // XMM => GP
231 as. movq_xr(srcReg, dstReg);
232 } else { // XMM => XMM
233 // This copies all 128 bits in XMM,
234 // thus avoiding partial register stalls
235 as. movdqa(srcReg, dstReg);
240 void emitLea(Asm& as, MemoryRef mr, PhysReg dst) {
241 if (dst == InvalidReg) return;
242 if (mr.r.disp == 0) {
243 emitMovRegReg(as, mr.r.base, dst);
244 } else {
245 as. lea(mr, dst);
249 void emitLdObjClass(Asm& as, PhysReg objReg, PhysReg dstReg) {
250 emitLdLowPtr(as, objReg[ObjectData::getVMClassOffset()],
251 dstReg, sizeof(LowClassPtr));
254 void emitLdClsCctx(Asm& as, PhysReg srcReg, PhysReg dstReg) {
255 emitMovRegReg(as, srcReg, dstReg);
256 as. decq(dstReg);
259 void emitCall(Asm& a, TCA dest) {
260 if (a.jmpDeltaFits(dest) && !Stats::enabled()) {
261 a. call(dest);
262 } else {
263 dest = mcg->getNativeTrampoline(dest);
264 if (a.jmpDeltaFits(dest)) {
265 a.call(dest);
266 } else {
267 // can't do a near call; store address in data section.
268 // call by loading the address using rip-relative addressing. This
269 // assumes the data section is near the current code section. Since
270 // this sequence is directly in-line, rip-relative like this is
271 // more compact than loading a 64-bit immediate.
272 TCA* addr = mcg->allocData<TCA>(sizeof(TCA), 1);
273 *addr = dest;
274 a.call(rip[(intptr_t)addr]);
275 assert(((int32_t*)a.frontier())[-1] + a.frontier() == (TCA)addr);
280 void emitCall(Asm& a, CppCall call) {
281 switch (call.kind()) {
282 case CppCall::Kind::Direct:
283 return emitCall(a, static_cast<TCA>(call.address()));
284 case CppCall::Kind::Virtual:
285 // Virtual call.
286 // Load method's address from proper offset off of object in rdi,
287 // using rax as scratch.
288 a. loadq (*rdi, rax);
289 a. call (rax[call.vtableOffset()]);
290 return;
291 case CppCall::Kind::Indirect:
292 a. call (call.reg());
293 return;
294 case CppCall::Kind::ArrayVirt:
296 auto const addr = reinterpret_cast<intptr_t>(call.arrayTable());
297 always_assert_flog(
298 deltaFits(addr, sz::dword),
299 "Array data vtables are expected to be in the data "
300 "segment, with addresses less than 2^31"
302 a. loadzbl (rdi[ArrayData::offsetofKind()], eax);
303 a. call (baseless(rax*8 + addr));
305 return;
307 not_reached();
310 void emitJmpOrJcc(Asm& a, ConditionCode cc, TCA dest) {
311 if (cc == CC_None) {
312 a. jmp(dest);
313 } else {
314 a. jcc((ConditionCode)cc, dest);
318 void emitRB(X64Assembler& a,
319 Trace::RingBufferType t,
320 const char* msg,
321 RegSet toSave) {
322 if (!Trace::moduleEnabledRelease(Trace::ringbuffer, 1)) {
323 return;
325 PhysRegSaver save(a, toSave | kSpecialCrossTraceRegs);
326 int arg = 0;
327 a. emitImmReg((uintptr_t)msg, argNumToRegName[arg++]);
328 a. emitImmReg(strlen(msg), argNumToRegName[arg++]);
329 a. emitImmReg(t, argNumToRegName[arg++]);
330 a. call((TCA)Trace::ringbufferMsg);
333 void emitTraceCall(CodeBlock& cb, int64_t pcOff) {
334 Asm a { cb };
335 // call to a trace function
336 a. lea (rip[(int64_t)a.frontier()], rcx);
337 a. movq (rVmFp, rdi);
338 a. movq (rVmSp, rsi);
339 a. movq (pcOff, rdx);
340 // do the call; may use a trampoline
341 emitCall(a, reinterpret_cast<TCA>(traceCallback));
344 void emitTestSurpriseFlags(Asm& a) {
345 static_assert(RequestInjectionData::LastFlag < (1LL << 32),
346 "Translator assumes RequestInjectionFlags fit in 32-bit int");
347 a. testl((int32_t)0xffffffff, rVmTl[RDS::kConditionFlagsOff]);
350 void emitCheckSurpriseFlagsEnter(CodeBlock& mainCode, CodeBlock& coldCode,
351 Fixup fixup) {
352 Asm a { mainCode };
353 Asm acold { coldCode };
355 emitTestSurpriseFlags(a);
356 a. jnz (coldCode.frontier());
358 acold. movq (rVmFp, argNumToRegName[0]);
359 emitCall(acold, tx->uniqueStubs.functionEnterHelper);
360 mcg->recordSyncPoint(coldCode.frontier(),
361 fixup.m_pcOffset, fixup.m_spOffset);
362 acold. jmp (mainCode.frontier());
365 template<class Mem>
366 void emitCmpClass(Asm& as, Reg64 reg, Mem mem) {
367 auto size = sizeof(LowClassPtr);
369 if (size == 8) {
370 as. cmpq (reg, mem);
371 } else if (size == 4) {
372 as. cmpl (r32(reg), mem);
373 } else {
374 not_implemented();
378 template void emitCmpClass<MemoryRef>(Asm& as, Reg64 reg, MemoryRef mem);
380 void emitCmpClass(Asm& as, Reg64 reg1, PhysReg reg2) {
381 auto size = sizeof(LowClassPtr);
383 if (size == 8) {
384 as. cmpq (reg1, reg2);
385 } else if (size == 4) {
386 as. cmpl (r32(reg1), r32(reg2));
387 } else {
388 not_implemented();
392 void shuffle2(Asm& as, PhysReg s0, PhysReg s1, PhysReg d0, PhysReg d1) {
393 if (s0 == InvalidReg && s1 == InvalidReg &&
394 d0 == InvalidReg && d1 == InvalidReg) return;
395 assert(s0 != s1);
396 assert(!s0.isSIMD() || s1 == InvalidReg); // never 2 XMMs
397 assert(!d0.isSIMD() || d1 == InvalidReg); // never 2 XMMs
398 if (d0 == s1 && d1 != InvalidReg) {
399 assert(d0 != d1);
400 if (d1 == s0) {
401 as. xchgq (s1, s0);
402 } else {
403 as. movq (s1, d1); // save s1 first; d1 != s0
404 as. movq (s0, d0);
406 } else if (d0.isSIMD() && s0.isGP() && s1.isGP()) {
407 // move 2 gpr to 1 xmm
408 assert(d0 != rCgXMM0); // xmm0 is reserved for scratch
409 as. movq_rx(s0, d0);
410 as. movq_rx(s1, rCgXMM0);
411 as. unpcklpd(rCgXMM0, d0); // s1 -> d0[1]
412 } else {
413 if (d0 != InvalidReg) emitMovRegReg(as, s0, d0); // d0 != s1
414 if (d1 != InvalidReg) emitMovRegReg(as, s1, d1);
418 void zeroExtendIfBool(CodeGenerator::Asm& as, const SSATmp* src, PhysReg reg) {
419 if (src->isA(Type::Bool) && reg != InvalidReg) {
420 // zero-extend the bool from a byte to a quad
421 // note: movzbl actually extends the value to 64 bits.
422 as.movzbl(rbyte(reg), r32(reg));
426 ConditionCode opToConditionCode(Opcode opc) {
427 switch (opc) {
428 case JmpGt: return CC_G;
429 case JmpGte: return CC_GE;
430 case JmpLt: return CC_L;
431 case JmpLte: return CC_LE;
432 case JmpEq: return CC_E;
433 case JmpNeq: return CC_NE;
434 case JmpGtInt: return CC_G;
435 case JmpGteInt: return CC_GE;
436 case JmpLtInt: return CC_L;
437 case JmpLteInt: return CC_LE;
438 case JmpEqInt: return CC_E;
439 case JmpNeqInt: return CC_NE;
440 case JmpSame: return CC_E;
441 case JmpNSame: return CC_NE;
442 case JmpInstanceOfBitmask: return CC_NZ;
443 case JmpNInstanceOfBitmask: return CC_Z;
444 case JmpZero: return CC_Z;
445 case JmpNZero: return CC_NZ;
446 case ReqBindJmpGt: return CC_G;
447 case ReqBindJmpGte: return CC_GE;
448 case ReqBindJmpLt: return CC_L;
449 case ReqBindJmpLte: return CC_LE;
450 case ReqBindJmpEq: return CC_E;
451 case ReqBindJmpNeq: return CC_NE;
452 case ReqBindJmpGtInt: return CC_G;
453 case ReqBindJmpGteInt: return CC_GE;
454 case ReqBindJmpLtInt: return CC_L;
455 case ReqBindJmpLteInt: return CC_LE;
456 case ReqBindJmpEqInt: return CC_E;
457 case ReqBindJmpNeqInt: return CC_NE;
458 case ReqBindJmpSame: return CC_E;
459 case ReqBindJmpNSame: return CC_NE;
460 case ReqBindJmpInstanceOfBitmask: return CC_NZ;
461 case ReqBindJmpNInstanceOfBitmask: return CC_Z;
462 case ReqBindJmpZero: return CC_Z;
463 case ReqBindJmpNZero: return CC_NZ;
464 case SideExitJmpGt: return CC_G;
465 case SideExitJmpGte: return CC_GE;
466 case SideExitJmpLt: return CC_L;
467 case SideExitJmpLte: return CC_LE;
468 case SideExitJmpEq: return CC_E;
469 case SideExitJmpNeq: return CC_NE;
470 case SideExitJmpGtInt: return CC_G;
471 case SideExitJmpGteInt: return CC_GE;
472 case SideExitJmpLtInt: return CC_L;
473 case SideExitJmpLteInt: return CC_LE;
474 case SideExitJmpEqInt: return CC_E;
475 case SideExitJmpNeqInt: return CC_NE;
476 case SideExitJmpSame: return CC_E;
477 case SideExitJmpNSame: return CC_NE;
478 case SideExitJmpInstanceOfBitmask: return CC_NZ;
479 case SideExitJmpNInstanceOfBitmask: return CC_Z;
480 case SideExitJmpZero: return CC_Z;
481 case SideExitJmpNZero: return CC_NZ;
482 default:
483 always_assert(0);