Move shouldPGO() out of Func
[hiphop-php.git] / hphp / runtime / vm / jit / func-prologues-x64.cpp
bloba4be92ffd4ccc53f1fd86d5c5ea7c986eed20930
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/func-prologues-x64.h"
18 #include "hphp/util/asm-x64.h"
20 #include "hphp/runtime/ext/ext_closure.h"
21 #include "hphp/runtime/vm/func.h"
22 #include "hphp/runtime/vm/srckey.h"
23 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
24 #include "hphp/runtime/vm/jit/mc-generator.h"
25 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
26 #include "hphp/runtime/vm/jit/prof-data.h"
27 #include "hphp/runtime/vm/jit/translator-runtime.h"
28 #include "hphp/runtime/vm/jit/write-lease.h"
30 namespace HPHP { namespace jit { namespace x64 {
32 //////////////////////////////////////////////////////////////////////
35 TRACE_SET_MOD(mcg);
37 //////////////////////////////////////////////////////////////////////
39 namespace {
41 void emitStackCheck(X64Assembler& a, int funcDepth, Offset pc) {
42 using namespace reg;
43 funcDepth += kStackCheckPadding * sizeof(Cell);
45 int stackMask = cellsToBytes(RuntimeOption::EvalVMStackElms) - 1;
46 a. movq (rVmSp, rAsm); // copy to destroy
47 a. andq (stackMask, rAsm);
48 a. subq (funcDepth + Stack::sSurprisePageSize, rAsm);
49 a. jl (mcg->tx().uniqueStubs.stackOverflowHelper);
53 * This will omit overflow checks if it is a leaf function that can't
54 * use more than kStackCheckLeafPadding cells.
56 void maybeEmitStackCheck(X64Assembler& a, const Func* func) {
57 auto const needStackCheck =
58 !(func->attrs() & AttrPhpLeafFn) ||
59 func->maxStackCells() >= kStackCheckLeafPadding;
60 if (needStackCheck) {
61 emitStackCheck(a, cellsToBytes(func->maxStackCells()), func->base());
65 TCA emitFuncGuard(X64Assembler& a, const Func* func) {
66 using namespace reg;
67 assert(kScratchCrossTraceRegs.contains(rax));
68 assert(kScratchCrossTraceRegs.contains(rdx));
70 auto funcImm = Immed64(func);
71 int nBytes, offset;
73 // Ensure the immediate is safely smashable
74 // the immediate must not cross a qword boundary,
75 if (!funcImm.fits(sz::dword)) {
76 nBytes = kFuncGuardLen;
77 offset = kFuncMovImm;
78 } else {
79 nBytes = kFuncGuardShortLen;
80 offset = kFuncCmpImm;
83 mcg->backEnd().prepareForSmash(a.code(), nBytes, offset);
85 TCA aStart DEBUG_ONLY = a.frontier();
86 if (!funcImm.fits(sz::dword)) {
87 a. loadq (rStashedAR[AROFF(m_func)], rax);
89 Although func doesnt fit in a signed 32-bit immediate, it may still
90 fit in an unsigned one. Rather than deal with yet another case
91 (which only happens when we disable jemalloc) just force it to
92 be an 8-byte immediate, and patch it up afterwards.
94 a. movq (0xdeadbeeffeedface, rdx);
95 assert(((uint64_t*)a.frontier())[-1] == 0xdeadbeeffeedface);
96 ((uint64_t*)a.frontier())[-1] = uintptr_t(func);
97 a. cmpq (rax, rdx);
98 } else {
99 a. cmpq (funcImm.l(), rStashedAR[AROFF(m_func)]);
101 a. jnz (mcg->tx().uniqueStubs.funcPrologueRedispatch);
103 assert(funcPrologueToGuard(a.frontier(), func) == aStart);
104 assert(funcPrologueHasGuard(a.frontier(), func));
105 return a.frontier();
108 // Initialize at most this many locals inline in function body prologue; more
109 // than this, and emitting a loop is more compact. To be precise, the actual
110 // crossover point in terms of code size is 6; 9 was determined by experiment to
111 // be the optimal point in certain benchmarks. #microoptimization
112 constexpr auto kLocalsToInitializeInline = 9;
114 // Maximum number of default-value parameter initializations to
115 // unroll. Beyond this, a loop is generated.
116 constexpr auto kMaxParamsInitUnroll = 5;
118 SrcKey emitPrologueWork(Func* func, int nPassed) {
119 using namespace reg;
121 auto const numNonVariadicParams = func->numNonVariadicParams();
122 auto const& paramInfo = func->params();
124 Offset entryOffset = func->getEntryForNumArgs(nPassed);
126 Asm a { mcg->code.main() };
128 if (mcg->tx().mode() == TransKind::Proflogue) {
129 assert(shouldPGOFunc(*func));
130 TransID transId = mcg->tx().profData()->curTransID();
131 auto counterAddr = mcg->tx().profData()->transCounterAddr(transId);
132 a.movq(counterAddr, rAsm);
133 a.decq(rAsm[0]);
134 mcg->tx().profData()->setProfiling(func->getFuncId());
137 // Note: you're not allowed to use rVmSp around here for anything in
138 // the nPassed == numNonVariadicParams case, because it might be junk if we
139 // came from emitMagicFuncPrologue.
141 if (nPassed > numNonVariadicParams) {
142 // Too many args; a weird case, so call out to an appropriate helper.
143 // Stash ar somewhere callee-saved.
144 if (false) { // typecheck
145 jit::shuffleExtraArgsMayUseVV((ActRec*)nullptr);
146 jit::shuffleExtraArgsVariadicAndVV((ActRec*)nullptr);
147 jit::shuffleExtraArgsVariadic((ActRec*)nullptr);
148 jit::trimExtraArgs((ActRec*)nullptr);
150 a. movq (rStashedAR, argNumToRegName[0]);
152 if (LIKELY(func->discardExtraArgs())) {
153 emitCall(a, TCA(jit::trimExtraArgs), argSet(1));
154 } else if (func->attrs() & AttrMayUseVV) {
155 emitCall(a, func->hasVariadicCaptureParam()
156 ? TCA(jit::shuffleExtraArgsVariadicAndVV)
157 : TCA(jit::shuffleExtraArgsMayUseVV),
158 argSet(1));
159 } else {
160 assert(func->hasVariadicCaptureParam());
161 emitCall(a, TCA(jit::shuffleExtraArgsVariadic), argSet(1));
163 // We'll fix rVmSp below.
164 } else if (nPassed < numNonVariadicParams) {
165 TRACE(1, "Only have %d of %d args; getting dvFunclet\n",
166 nPassed, numNonVariadicParams);
167 if (numNonVariadicParams - nPassed <= kMaxParamsInitUnroll) {
168 for (int i = nPassed; i < numNonVariadicParams; i++) {
169 int offset = cellsToBytes(nPassed - i - 1);
170 emitStoreTVType(a, KindOfUninit, rVmSp[offset + TVOFF(m_type)]);
172 if (func->hasVariadicCaptureParam()) {
173 int offset = cellsToBytes(nPassed - numNonVariadicParams - 1);
174 emitStoreTVType(a, KindOfArray, rVmSp[offset + TVOFF(m_type)]);
175 emitImmStoreq(a, staticEmptyArray(), rVmSp[offset + TVOFF(m_data)]);
177 } else {
178 a. emitImmReg(nPassed, rax);
179 // do {
180 // *(--rVmSp) = NULL; nPassed++;
181 // } while (nPassed < numNonVariadicParams);
182 // This should be an unusual case, so optimize for code density
183 // rather than execution speed; i.e., don't unroll the loop.
184 TCA loopTop = a.frontier();
185 a. subq (int(sizeof(Cell)), rVmSp);
186 a. incl (eax);
187 emitStoreUninitNull(a, 0, rVmSp);
188 a. cmpl (int(numNonVariadicParams), eax);
189 a. jl8 (loopTop);
190 if (func->hasVariadicCaptureParam()) {
191 emitStoreTVType(a, KindOfArray, rVmSp[-sizeof(Cell) + TVOFF(m_type)]);
192 emitImmStoreq(a, staticEmptyArray(),
193 rVmSp[-sizeof(Cell) + TVOFF(m_data)]);
196 } else if (func->hasVariadicCaptureParam()) {
197 assert(!func->isMagic());
198 int offset = cellsToBytes(-1);
199 emitStoreTVType(a, KindOfArray, rVmSp[offset + TVOFF(m_type)]);
200 emitImmStoreq(a, staticEmptyArray(), rVmSp[offset + TVOFF(m_data)]);
203 // Entry point for numNonVariadicParams == nPassed is here. XXX:
204 // what about variadics!!!
205 // Args are kosher. Frame linkage: set fp = ar.
206 a. movq (rStashedAR, rVmFp);
208 int numLocals = func->numParams();
209 if (func->isClosureBody()) {
210 // Closure object properties are the use vars followed by the
211 // static locals (which are per-instance).
212 int numUseVars = func->cls()->numDeclProperties() -
213 func->numStaticLocals();
215 emitLea(a, rVmFp[-cellsToBytes(numLocals)], rVmSp);
217 PhysReg rClosure = rcx;
218 a. loadq(rVmFp[AROFF(m_this)], rClosure);
220 // Swap in the $this or late bound class
221 a. loadq(rClosure[c_Closure::ctxOffset()], rAsm);
222 a. storeq(rAsm, rVmFp[AROFF(m_this)]);
224 if (!(func->attrs() & AttrStatic)) {
225 a.shrq(1, rAsm);
226 ifThen(a, CC_NBE, [&](Asm& a) {
227 a.shlq(1, rAsm);
228 emitIncRefCheckNonStatic(a, rAsm, KindOfObject);
232 // Put in the correct context
233 a. loadq(rClosure[c_Closure::funcOffset()], rAsm);
234 a. storeq(rAsm, rVmFp[AROFF(m_func)]);
236 // Copy in all the use vars
237 int baseUVOffset = sizeof(ObjectData) + func->cls()->builtinODTailSize();
238 for (int i = 0; i < numUseVars + 1; i++) {
239 int spOffset = -cellsToBytes(i+1);
241 if (i == 0) {
242 // The closure is the first local.
243 // We don't incref because it used to be $this
244 // and now it is a local, so they cancel out
245 emitStoreTypedValue(a, KindOfObject, rClosure, spOffset, rVmSp);
246 continue;
249 int uvOffset = baseUVOffset + cellsToBytes(i-1);
251 emitCopyTo(a, rClosure, uvOffset, rVmSp, spOffset, rAsm);
252 emitIncRefGenericRegSafe(a, rVmSp, spOffset, rAsm);
255 numLocals += numUseVars + 1;
258 // We're in the callee frame; initialize locals. Unroll the loop all
259 // the way if there are a modest number of locals to update;
260 // otherwise, do it in a compact loop.
261 int numUninitLocals = func->numLocals() - numLocals;
262 assert(numUninitLocals >= 0);
263 if (numUninitLocals > 0) {
265 // If there are too many locals, then emitting a loop to initialize locals
266 // is more compact, rather than emitting a slew of movs inline.
267 if (numUninitLocals > kLocalsToInitializeInline) {
268 PhysReg loopReg = rcx;
270 // rVmFp + rcx points to the count/type fields of the TypedValue we're
271 // about to write to.
272 int loopStart = -func->numLocals() * sizeof(TypedValue) + TVOFF(m_type);
273 int loopEnd = -numLocals * sizeof(TypedValue) + TVOFF(m_type);
275 a. emitImmReg(loopStart, loopReg);
276 a. emitImmReg(KindOfUninit, rdx);
278 TCA topOfLoop = a.frontier();
279 // do {
280 // rVmFp[loopReg].m_type = KindOfUninit;
281 // } while(++loopReg != loopEnd);
283 emitStoreTVType(a, edx, rVmFp[loopReg]);
284 a. addq (int(sizeof(Cell)), loopReg);
285 a. cmpq (loopEnd, loopReg);
286 a. jcc8 (CC_NE, topOfLoop);
287 } else {
288 PhysReg base;
289 int disp, k;
290 static_assert(KindOfUninit == 0, "");
291 if (func->numParams() < func->numLocals()) {
292 a.xorl (eax, eax);
294 for (k = numLocals; k < func->numLocals(); ++k) {
295 locToRegDisp(Location(Location::Local, k), &base, &disp, func);
296 emitStoreTVType(a, eax, base[disp + TVOFF(m_type)]);
301 auto destPC = func->unit()->entry() + entryOffset;
302 SrcKey funcBody(func, destPC, false);
304 // Move rVmSp to the right place: just past all locals
305 int frameCells = func->numSlotsInFrame();
306 emitLea(a, rVmFp[-cellsToBytes(frameCells)], rVmSp);
308 Fixup fixup(funcBody.offset() - func->base(), frameCells);
310 // Emit warnings for any missing arguments
311 if (!func->isCPPBuiltin()) {
312 for (int i = nPassed; i < numNonVariadicParams; ++i) {
313 if (paramInfo[i].funcletOff == InvalidAbsoluteOffset) {
314 if (false) { // typecheck
315 jit::raiseMissingArgument((const Func*) nullptr, 0);
317 a. emitImmReg((intptr_t)func, argNumToRegName[0]);
318 a. emitImmReg(nPassed, argNumToRegName[1]);
319 emitCall(a, TCA(jit::raiseMissingArgument), argSet(2));
320 mcg->recordSyncPoint(a.frontier(), fixup.pcOffset, fixup.spOffset);
321 break;
326 // Check surprise flags in the same place as the interpreter: after
327 // setting up the callee's frame but before executing any of its
328 // code
329 emitCheckSurpriseFlagsEnter(mcg->code.main(), mcg->code.cold(), fixup);
331 if (func->isClosureBody() && func->cls()) {
332 int entry = nPassed <= numNonVariadicParams
333 ? nPassed : numNonVariadicParams + 1;
334 // Relying on rStashedAR == rVmFp here
335 a. loadq (rStashedAR[AROFF(m_func)], rax);
336 a. loadq (rax[Func::prologueTableOff() + sizeof(TCA)*entry],
337 rax);
338 a. jmp (rax);
339 } else {
340 emitBindJmp(mcg->code.main(), mcg->code.frozen(), funcBody);
342 return funcBody;
345 } // anonymous namespace
347 //////////////////////////////////////////////////////////////////////
349 TCA emitCallArrayPrologue(Func* func, DVFuncletsVec& dvs) {
350 auto& mainCode = mcg->code.main();
351 auto& frozenCode = mcg->code.frozen();
352 Asm a { mainCode };
353 TCA start = mainCode.frontier();
354 assert(mcg->cgFixups().empty());
355 if (dvs.size() == 1) {
356 a. cmpl (dvs[0].first, rVmFp[AROFF(m_numArgsAndFlags)]);
357 emitBindJcc(mainCode, frozenCode, CC_LE,
358 SrcKey(func, dvs[0].second, false));
359 emitBindJmp(mainCode, frozenCode, SrcKey(func, func->base(), false));
360 } else {
361 a. loadl (rVmFp[AROFF(m_numArgsAndFlags)], reg::eax);
362 for (unsigned i = 0; i < dvs.size(); i++) {
363 a. cmpl (dvs[i].first, reg::eax);
364 emitBindJcc(mainCode, frozenCode, CC_LE,
365 SrcKey(func, dvs[i].second, false));
367 emitBindJmp(mainCode, frozenCode, SrcKey(func, func->base(), false));
369 mcg->cgFixups().process(nullptr);
370 return start;
373 SrcKey emitFuncPrologue(Func* func, int nPassed, TCA& start) {
374 assert(!func->isMagic());
375 Asm a { mcg->code.main() };
377 start = emitFuncGuard(a, func);
378 if (RuntimeOption::EvalJitTransCounters) emitTransCounterInc(a);
379 a. pop (rStashedAR[AROFF(m_savedRip)]);
380 maybeEmitStackCheck(a, func);
381 return emitPrologueWork(func, nPassed);
384 SrcKey emitMagicFuncPrologue(Func* func, uint32_t nPassed, TCA& start) {
385 assert(func->isMagic());
386 assert(func->numParams() == 2);
387 assert(!func->hasVariadicCaptureParam());
388 using namespace reg;
389 using MkPacked = ArrayData* (*)(uint32_t, const TypedValue*);
391 Asm a { mcg->code.main() };
392 Label not_magic_call;
393 auto const rInvName = r13;
394 assert(!kCrossCallRegs.contains(r13));
396 auto skFuncBody = SrcKey {};
397 auto callFixup = TCA { nullptr };
400 * If nPassed is not 2, we need to generate a non-magic prologue
401 * that can be used if there is no invName on the ActRec.
402 * (I.e. someone called __call directly.) In the case where nPassed
403 * is 2, whether it's magic or not the prologue we generate at the
404 * end will work.
406 * This is placed in a ahead of the actual prologue entry point, but
407 * only because emitPrologueWork can't easily go to acold right now.
409 if (nPassed != 2) {
410 asm_label(a, not_magic_call);
411 skFuncBody = emitPrologueWork(func, nPassed);
412 // There is a REQ_BIND_JMP at the end of emitPrologueWork.
415 // Main prologue entry point is here.
416 start = emitFuncGuard(a, func);
417 if (RuntimeOption::EvalJitTransCounters) emitTransCounterInc(a);
418 a. pop (rStashedAR[AROFF(m_savedRip)]);
419 maybeEmitStackCheck(a, func);
422 * Detect if this was actually a magic call (i.e. the ActRec has an
423 * invName), and shuffle the magic call arguments into a packed
424 * array.
426 * If it's not a magic call, we jump backward to a normal function
427 * prologue (see above) for nPassed. Except if nPassed is 2, we'll
428 * be jumping over the magic call shuffle, to the prologue for 2
429 * args below.
431 a. loadq (rStashedAR[AROFF(m_invName)], rInvName);
432 a. testb (1, rbyte(rInvName));
433 if (nPassed == 2) {
434 a. jz8 (not_magic_call);
435 } else {
436 not_magic_call.jccAuto(a, CC_Z);
438 a. decq (rInvName);
439 a. storeq (0, rStashedAR[AROFF(m_varEnv)]);
440 if (nPassed != 0) { // for zero args, we use the empty array
441 a. movq (rStashedAR, argNumToRegName[0]);
442 a. subq (rVmSp, argNumToRegName[0]);
443 a. shrq (0x4, argNumToRegName[0]);
444 a. movq (rVmSp, argNumToRegName[1]);
445 emitCall(a, reinterpret_cast<CodeAddress>(
446 MkPacked{MixedArray::MakePacked}), argSet(2));
447 callFixup = a.frontier();
449 if (nPassed != 2) {
450 a. storel (2, rStashedAR[AROFF(m_numArgsAndFlags)]);
452 if (debug) { // "assertion": the emitPrologueWork path fixes up rVmSp.
453 a. movq (0, rVmSp);
456 // Magic calls expect two arguments---first the name of the called
457 // function, and then a packed array of the arguments to the
458 // function. These are where these two TV's will be.
459 auto const strTV = rStashedAR - cellsToBytes(1);
460 auto const arrayTV = rStashedAR - cellsToBytes(2);
462 // Store the two arguments for the magic call.
463 emitStoreTVType(a, KindOfString, strTV[TVOFF(m_type)]);
464 a. storeq (rInvName, strTV[TVOFF(m_data)]);
465 emitStoreTVType(a, KindOfArray, arrayTV[TVOFF(m_type)]);
466 if (nPassed == 0) {
467 emitImmStoreq(a, staticEmptyArray(), arrayTV[TVOFF(m_data)]);
468 } else {
469 a. storeq (rax, arrayTV[TVOFF(m_data)]);
472 // Every magic call prologue has a case for nPassed == 2, because
473 // this is how it works when the call is actually magic.
474 if (nPassed == 2) asm_label(a, not_magic_call);
475 auto const skFor2Args = emitPrologueWork(func, 2);
476 if (nPassed == 2) skFuncBody = skFor2Args;
478 if (RuntimeOption::HHProfServerEnabled && callFixup) {
479 mcg->recordSyncPoint(callFixup,
480 skFuncBody.offset() - func->base(),
481 func->numSlotsInFrame());
484 return skFuncBody;