Improve idiom for getting the return address of a call{}
[hiphop-php.git] / hphp / runtime / vm / jit / unique-stubs-x64.cpp
blob715ba6c6a9c0a3f7cf75f11cb33c6bef0adae7c2
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2016 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/unique-stubs-x64.h"
19 #include "hphp/runtime/base/header-kind.h"
20 #include "hphp/runtime/base/rds-header.h"
21 #include "hphp/runtime/base/runtime-option.h"
22 #include "hphp/runtime/base/stats.h"
23 #include "hphp/runtime/vm/bytecode.h"
24 #include "hphp/runtime/vm/event-hook.h"
25 #include "hphp/runtime/vm/vm-regs.h"
27 #include "hphp/runtime/vm/jit/types.h"
28 #include "hphp/runtime/vm/jit/abi-x64.h"
29 #include "hphp/runtime/vm/jit/align-x64.h"
30 #include "hphp/runtime/vm/jit/code-gen-cf.h"
31 #include "hphp/runtime/vm/jit/code-gen-helpers.h"
32 #include "hphp/runtime/vm/jit/code-gen-tls.h"
33 #include "hphp/runtime/vm/jit/fixup.h"
34 #include "hphp/runtime/vm/jit/mc-generator.h"
35 #include "hphp/runtime/vm/jit/phys-reg.h"
36 #include "hphp/runtime/vm/jit/service-requests.h"
37 #include "hphp/runtime/vm/jit/translator-inline.h"
38 #include "hphp/runtime/vm/jit/unique-stubs.h"
39 #include "hphp/runtime/vm/jit/unwind-itanium.h"
40 #include "hphp/runtime/vm/jit/vasm-gen.h"
41 #include "hphp/runtime/vm/jit/vasm-instr.h"
43 #include "hphp/util/asm-x64.h"
44 #include "hphp/util/data-block.h"
46 namespace HPHP { namespace jit {
48 ///////////////////////////////////////////////////////////////////////////////
50 TRACE_SET_MOD(ustubs);
52 extern "C" void enterTCHelper(Cell* vm_sp,
53 ActRec* vm_fp,
54 TCA start,
55 ActRec* firstAR,
56 void* targetCacheBase,
57 ActRec* stashedAR);
59 ///////////////////////////////////////////////////////////////////////////////
61 namespace x64 {
63 ///////////////////////////////////////////////////////////////////////////////
65 static void alignJmpTarget(CodeBlock& cb) {
66 align(cb, nullptr, Alignment::JmpTarget, AlignContext::Dead);
69 ///////////////////////////////////////////////////////////////////////////////
71 TCA emitFunctionEnterHelper(CodeBlock& cb, UniqueStubs& us) {
72 alignJmpTarget(cb);
74 auto const start = vwrap2(cb, [&] (Vout& v, Vout& vcold) {
75 auto const ar = v.makeReg();
77 v << copy{rvmfp(), ar};
79 // Fully set up the call frame for the stub. We can't skip this like we do
80 // in other stubs because we need the return IP for this frame in the %rbp
81 // chain, in order to find the proper fixup for the VMRegAnchor in the
82 // intercept handler.
83 v << stublogue{true};
84 v << copy{rsp(), rvmfp()};
86 // When we call the event hook, it might tell us to skip the callee
87 // (because of fb_intercept). If that happens, we need to return to the
88 // caller, but the handler will have already popped the callee's frame.
89 // So, we need to save these values for later.
90 v << pushm{ar[AROFF(m_savedRip)]};
91 v << pushm{ar[AROFF(m_sfp)]};
93 v << copy2{ar, v.cns(EventHook::NormalFunc), rarg(0), rarg(1)};
95 bool (*hook)(const ActRec*, int) = &EventHook::onFunctionCall;
96 v << call{TCA(hook), arg_regs(0), &us.functionEnterHelperReturn};
98 auto const sf = v.makeReg();
99 v << testb{rret(), rret(), sf};
101 unlikelyIfThen(v, vcold, CC_Z, sf, [&] (Vout& v) {
102 auto const saved_rip = v.makeReg();
104 // The event hook has already cleaned up the stack and popped the
105 // callee's frame, so we're ready to continue from the original call
106 // site. We just need to grab the fp/rip of the original frame that we
107 // saved earlier, and sync rvmsp().
108 v << pop{rvmfp()};
109 v << pop{saved_rip};
111 // Drop our call frame; the stublogue{} instruction guarantees that this
112 // is exactly 16 bytes.
113 v << lea{rsp()[16], rsp()};
115 // Sync vmsp and return to the caller. This unbalances the return stack
116 // buffer, but if we're intercepting, we probably don't care.
117 v << load{rvmtl()[rds::kVmspOff], rvmsp()};
118 v << jmpr{saved_rip};
121 // Skip past the stuff we saved for the intercept case.
122 v << lea{rsp()[16], rsp()};
124 // Restore rvmfp() and return to the callee's func prologue.
125 v << stubret{RegSet(), true};
128 return start;
131 ///////////////////////////////////////////////////////////////////////////////
134 * Helper for the freeLocalsHelpers which does the actual work of decrementing
135 * a value's refcount or releasing it.
137 * This helper is reached via call from the various freeLocalHelpers. It
138 * expects `tv' to be the address of a TypedValue with refcounted type `type'
139 * (though it may be static, and we will do nothing in that case).
141 * The `live' registers must be preserved across any native calls (and
142 * generally left untouched).
144 static TCA emitDecRefHelper(CodeBlock& cb, CGMeta& fixups, PhysReg tv,
145 PhysReg type, RegSet live) {
146 return vwrap(cb, fixups, [&] (Vout& v) {
147 // We use the first argument register for the TV data because we might pass
148 // it to the native release call. It's not live when we enter the helper.
149 auto const data = rarg(0);
150 v << load{tv[TVOFF(m_data)], data};
152 auto const sf = v.makeReg();
153 v << cmplim{1, data[FAST_REFCOUNT_OFFSET], sf};
155 ifThen(v, CC_NL, sf, [&] (Vout& v) {
156 // The refcount is positive, so the value is refcounted. We need to
157 // either decref or release.
158 ifThen(v, CC_NE, sf, [&] (Vout& v) {
159 // The refcount is greater than 1; decref it.
160 v << declm{data[FAST_REFCOUNT_OFFSET], v.makeReg()};
161 v << ret{live};
164 // Note that the stack is aligned since we called to this helper from an
165 // stack-unaligned stub.
166 PhysRegSaver prs{v, live};
168 // The refcount is exactly 1; release the value.
169 // Avoid 'this' pointer overwriting by reserving it as an argument.
170 v << callm{lookupDestructor(v, type), arg_regs(1)};
172 // Between where %rsp is now and the saved RIP of the call into the
173 // freeLocalsHelpers stub, we have all the live regs we pushed, plus the
174 // saved RIP of the call from the stub to this helper.
175 v << syncpoint{makeIndirectFixup(prs.dwordsPushed() + 1)};
176 // fallthru
179 // Either we did a decref, or the value was static.
180 v << ret{live};
184 TCA emitFreeLocalsHelpers(CodeBlock& cb, UniqueStubs& us) {
185 // The address of the first local is passed in the second argument register.
186 // We use the third and fourth as scratch registers.
187 auto const local = rarg(1);
188 auto const last = rarg(2);
189 auto const type = rarg(3);
190 CGMeta fixups;
192 // This stub is very hot; keep it cache-aligned.
193 align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead);
194 auto const release = emitDecRefHelper(cb, fixups, local, type, local | last);
196 auto const decref_local = [&] (Vout& v) {
197 auto const sf = v.makeReg();
199 // We can't do a byte load here---we have to sign-extend since we use
200 // `type' as a 32-bit array index to the destructor table.
201 v << loadzbl{local[TVOFF(m_type)], type};
202 emitCmpTVType(v, sf, KindOfRefCountThreshold, type);
204 ifThen(v, CC_G, sf, [&] (Vout& v) {
205 v << call{release, arg_regs(3)};
209 auto const next_local = [&] (Vout& v) {
210 v << addqi{static_cast<int>(sizeof(TypedValue)),
211 local, local, v.makeReg()};
214 alignJmpTarget(cb);
216 us.freeManyLocalsHelper = vwrap(cb, fixups, [&] (Vout& v) {
217 // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop
218 // until we hit that point.
219 v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last};
221 doWhile(v, CC_NZ, {},
222 [&] (const VregList& in, const VregList& out) {
223 auto const sf = v.makeReg();
225 decref_local(v);
226 next_local(v);
227 v << cmpq{local, last, sf};
228 return sf;
233 for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
234 us.freeLocalsHelpers[i] = vwrap(cb, [&] (Vout& v) {
235 decref_local(v);
236 if (i != 0) next_local(v);
240 // All the stub entrypoints share the same ret.
241 vwrap(cb, fixups, [] (Vout& v) { v << ret{}; });
243 // This stub is hot, so make sure to keep it small.
244 // Alas, we have more work to do in this under Windows,
245 // so we can't be this small :(
246 #ifndef _WIN32
247 always_assert(Stats::enabled() ||
248 (cb.frontier() - release <= 4 * x64::cache_line_size()));
249 #endif
251 fixups.process(nullptr);
252 return release;
255 ///////////////////////////////////////////////////////////////////////////////
257 extern "C" void enterTCExit();
259 TCA emitCallToExit(CodeBlock& cb) {
260 X64Assembler a { cb };
262 // Emit a byte of padding. This is a kind of hacky way to avoid
263 // hitting an assert in recordGdbStub when we call it with stub - 1
264 // as the start address.
265 a.emitNop(1);
266 auto const start = a.frontier();
267 if (RuntimeOption::EvalHHIRGenerateAsserts) {
268 Label ok;
269 a.emitImmReg(uintptr_t(enterTCExit), reg::rax);
270 a.cmpq(reg::rax, *rsp());
271 a.je8 (ok);
272 a.ud2();
273 asm_label(a, ok);
276 // Emulate a ret to enterTCExit without actually doing one to avoid
277 // unbalancing the return stack buffer. The call from enterTCHelper() that
278 // got us into the TC was popped off the RSB by the ret that got us to this
279 // stub.
280 a.addq(8, rsp());
281 if (a.jmpDeltaFits(TCA(enterTCExit))) {
282 a.jmp(TCA(enterTCExit));
283 } else {
284 // can't do a near jmp and a rip-relative load/jmp would require threading
285 // through extra state to allocate a literal. use an indirect jump through
286 // a register
287 a.emitImmReg(uintptr_t(enterTCExit), reg::rax);
288 a.jmp(reg::rax);
291 // On a backtrace, gdb tries to locate the calling frame at address
292 // returnRIP-1. However, for the first VM frame, there is no code at
293 // returnRIP-1, since the AR was set up manually. For this frame,
294 // record the tracelet address as starting from this callToExit-1,
295 // so gdb does not barf.
296 return start;
299 TCA emitEndCatchHelper(CodeBlock& cb, UniqueStubs& us) {
300 auto const udrspo = rvmtl()[unwinderDebuggerReturnSPOff()];
302 auto const debuggerReturn = vwrap(cb, [&] (Vout& v) {
303 v << load{udrspo, rvmsp()};
304 v << storeqi{0, udrspo};
306 svcreq::emit_persistent(cb, folly::none, REQ_POST_DEBUGGER_RET);
308 auto const resumeCPPUnwind = vwrap(cb, [&] (Vout& v) {
309 static_assert(sizeof(tl_regState) == 1,
310 "The following store must match the size of tl_regState.");
311 auto const regstate = emitTLSAddr(v, tls_datum(tl_regState));
312 v << storebi{static_cast<int32_t>(VMRegState::CLEAN), regstate};
314 v << load{rvmtl()[unwinderExnOff()], rarg(0)};
315 v << call{TCA(_Unwind_Resume), arg_regs(1), &us.endCatchHelperPast};
316 v << ud2{};
319 alignJmpTarget(cb);
321 return vwrap(cb, [&] (Vout& v) {
322 auto const done1 = v.makeBlock();
323 auto const sf1 = v.makeReg();
325 v << cmpqim{0, udrspo, sf1};
326 v << jcci{CC_NE, sf1, done1, debuggerReturn};
327 v = done1;
329 // Normal end catch situation: call back to tc_unwind_resume, which returns
330 // the catch trace (or null) in %rax, and the new vmfp in %rdx.
331 v << copy{rvmfp(), rarg(0)};
332 v << call{TCA(tc_unwind_resume)};
333 v << copy{reg::rdx, rvmfp()};
335 auto const done2 = v.makeBlock();
336 auto const sf2 = v.makeReg();
338 v << testq{reg::rax, reg::rax, sf2};
339 v << jcci{CC_Z, sf2, done2, resumeCPPUnwind};
340 v = done2;
342 v << jmpr{reg::rax};
346 ///////////////////////////////////////////////////////////////////////////////
348 void enterTCImpl(TCA start, ActRec* stashedAR) {
349 // We have to force C++ to spill anything that might be in a callee-saved
350 // register (aside from %rbp), since enterTCHelper does not save them.
351 CALLEE_SAVED_BARRIER();
352 auto& regs = vmRegsUnsafe();
353 jit::enterTCHelper(regs.stack.top(), regs.fp, start,
354 vmFirstAR(), rds::tl_base, stashedAR);
355 CALLEE_SAVED_BARRIER();
358 ///////////////////////////////////////////////////////////////////////////////