From 9d0c9577b030c46fab0a6be14f92619114d9d653 Mon Sep 17 00:00:00 2001 From: mwilliams Date: Sat, 6 Jun 2015 13:10:03 -0700 Subject: [PATCH] Optimize NativeImpl Summary: We can avoid going through the expensive, generic c++ wrappers, by jitting small, cheap stubs for NativeImpl. Reviewed By: @jdelong Differential Revision: D2134420 --- hphp/doc/ir.specification | 6 +- hphp/runtime/vm/jit/code-gen-arm.cpp | 1 + hphp/runtime/vm/jit/code-gen-x64.cpp | 23 +++- hphp/runtime/vm/jit/dce.cpp | 1 + hphp/runtime/vm/jit/gvn.cpp | 1 + hphp/runtime/vm/jit/irgen-builtin.cpp | 224 ++++++++++++++++++++++++++------- hphp/runtime/vm/jit/irgen-ret.cpp | 2 + hphp/runtime/vm/jit/memory-effects.cpp | 1 + 8 files changed, 205 insertions(+), 54 deletions(-) diff --git a/hphp/doc/ir.specification b/hphp/doc/ir.specification index 52751bb3669..9debd565ab9 100644 --- a/hphp/doc/ir.specification +++ b/hphp/doc/ir.specification @@ -1047,6 +1047,10 @@ To string conversions: Loads the m_func member of an ActRec. S0 is the base address, with `offset' cells to the ActRec. +| LdARNumParams, D(Int), S(FramePtr), NF + + Loads the number of params from an ActRec. S0 is the address of the ActRec + | LdFuncNumParams, D(Int), S(Func), NF Returns the value of func->numParams(). @@ -1239,7 +1243,7 @@ To string conversions: Execute a call to the native builtin specified by the current function. S0 and S1 should be the current vmfp and vmsp, respectively. -| CallBuiltin, DBuiltin, S(FramePtr) S(StkPtr) SVar(PtrToGen,Gen), Er|PRc +| CallBuiltin, DBuiltin, S(FramePtr) S(StkPtr) SVar(PtrToGen,Gen,Cls), Er|PRc Call builtin function with N arguments. S0 and S1 should be the current vmfp and vmsp, respectively. diff --git a/hphp/runtime/vm/jit/code-gen-arm.cpp b/hphp/runtime/vm/jit/code-gen-arm.cpp index f4779a39c14..66a88c1b474 100644 --- a/hphp/runtime/vm/jit/code-gen-arm.cpp +++ b/hphp/runtime/vm/jit/code-gen-arm.cpp @@ -218,6 +218,7 @@ DELEGATE_OPCODE(AssertNonNull) DELEGATE_OPCODE(AssertStk) DELEGATE_OPCODE(AssertType) DELEGATE_OPCODE(LdARFuncPtr) +DELEGATE_OPCODE(LdARNumParams) DELEGATE_OPCODE(CheckStk) DELEGATE_OPCODE(CheckType) diff --git a/hphp/runtime/vm/jit/code-gen-x64.cpp b/hphp/runtime/vm/jit/code-gen-x64.cpp index d6d040e2cde..1ff4c7c5927 100644 --- a/hphp/runtime/vm/jit/code-gen-x64.cpp +++ b/hphp/runtime/vm/jit/code-gen-x64.cpp @@ -3074,12 +3074,9 @@ void CodeGenerator::cgCallBuiltin(IRInstruction* inst) { auto srcNum = uint32_t{2}; if (callee->isMethod()) { if (callee->isStatic()) { - // This isn't entirely accurate. HNI functions expect the Class* - // of the class used for the call which may be callee->cls() or - // one of its children. Currently we don't support FCallBuiltin on - // these functions (disabled in inlining-decider.cpp); (t5360661) if (callee->isNative()) { - callArgs.imm(callee->cls()); + callArgs.ssa(srcNum); + ++srcNum; } } else { // Note, we don't support objects with vtables here (if they may @@ -3091,7 +3088,12 @@ void CodeGenerator::cgCallBuiltin(IRInstruction* inst) { } if (callee->attrs() & AttrNumArgs) { - callArgs.imm((int64_t)numNonDefault); + if (numNonDefault >= 0) { + callArgs.imm((int64_t)numNonDefault); + } else { + callArgs.ssa(srcNum); + ++srcNum; + } } for (uint32_t i = 0; i < numArgs; ++i, ++srcNum) { @@ -3281,6 +3283,15 @@ void CodeGenerator::cgLdARFuncPtr(IRInstruction* inst) { vmain() << load{baseReg[off + AROFF(m_func)], dstReg}; } +void CodeGenerator::cgLdARNumParams(IRInstruction* inst) { + auto& v = vmain(); + auto dstReg = dstLoc(inst, 0).reg(); + auto baseReg = srcLoc(inst, 0).reg(); + auto tmp = v.makeReg(); + v << loadzlq{baseReg[AROFF(m_numArgsAndFlags)], tmp}; + v << andqi{ActRec::kNumArgsMask, tmp, dstReg, v.makeReg()}; +} + void CodeGenerator::cgLdStaticLocCached(IRInstruction* inst) { auto const extra = inst->extra(); auto const link = rds::bindStaticLocal(extra->func, extra->name); diff --git a/hphp/runtime/vm/jit/dce.cpp b/hphp/runtime/vm/jit/dce.cpp index 8026b0214b0..3409efb263f 100644 --- a/hphp/runtime/vm/jit/dce.cpp +++ b/hphp/runtime/vm/jit/dce.cpp @@ -148,6 +148,7 @@ bool canDCE(IRInstruction* inst) { case LdClsName: case LdFuncCachedSafe: case LdARFuncPtr: + case LdARNumParams: case LdFuncNumParams: case LdStrLen: case LdStaticLocCached: diff --git a/hphp/runtime/vm/jit/gvn.cpp b/hphp/runtime/vm/jit/gvn.cpp index 713d78ea595..1e2ed70bc2e 100644 --- a/hphp/runtime/vm/jit/gvn.cpp +++ b/hphp/runtime/vm/jit/gvn.cpp @@ -305,6 +305,7 @@ bool supportsGVN(const IRInstruction* inst) { case LdObjClass: case LdClsName: case LdARFuncPtr: + case LdARNumParams: case Mov: case LdContActRec: case LdAFWHActRec: diff --git a/hphp/runtime/vm/jit/irgen-builtin.cpp b/hphp/runtime/vm/jit/irgen-builtin.cpp index f76708d25ff..4f89e31f87f 100644 --- a/hphp/runtime/vm/jit/irgen-builtin.cpp +++ b/hphp/runtime/vm/jit/irgen-builtin.cpp @@ -28,6 +28,7 @@ #include "hphp/runtime/vm/jit/irgen-interpone.h" #include "hphp/runtime/vm/jit/irgen-types.h" #include "hphp/runtime/vm/jit/irgen-internal.h" +#include "hphp/runtime/ext_zend_compat/hhvm/zend-wrap-func.h" namespace HPHP { namespace jit { namespace irgen { @@ -432,8 +433,10 @@ struct ParamPrep { size_t size() const { return info.size(); } SSATmp* thiz{nullptr}; // may be null if call is not a method + SSATmp* count{nullptr}; // if non-null, the count of arguments jit::vector info; uint32_t numThroughStack{0}; + bool forNativeImpl{false}; }; /* @@ -445,18 +448,24 @@ template ParamPrep prepare_params(IRGS& env, const Func* callee, SSATmp* thiz, + SSATmp* numArgsExpr, uint32_t numArgs, uint32_t numNonDefault, + bool forNativeImpl, LoadParam loadParam) { auto ret = ParamPrep(numArgs); ret.thiz = thiz; + ret.count = numArgsExpr; + ret.forNativeImpl = forNativeImpl; // Fill in in reverse order, since they may come from popC's (depending on // what loadParam wants to do). for (auto offset = uint32_t{numArgs}; offset-- > 0;) { - ret[offset].value = loadParam(offset); - auto const ty = param_coerce_type(callee, offset); + ret[offset].value = loadParam(offset, ty); + if (forNativeImpl) { + continue; + } // We do actually mean exact type equality here. We're only capable of // passing the following primitives through registers; everything else goes @@ -467,9 +476,9 @@ ParamPrep prepare_params(IRGS& env, continue; } - // If ty > TBottom, it had some kind of type hint. ++ret.numThroughStack; ret[offset].throughStack = true; + // If ty > TBottom, it had some kind of type hint. ret[offset].needsConversion = offset < numNonDefault && ty > TBottom; } @@ -519,7 +528,7 @@ struct CatchMaker { , m_callee(callee) , m_params(*params) { - assertx(!m_params.thiz || inlining()); + assertx(!m_params.thiz || m_params.forNativeImpl || inlining()); } CatchMaker(const CatchMaker&) = delete; @@ -621,11 +630,13 @@ private: */ void decRefForUnwind() const { + if (m_params.forNativeImpl) return; for (auto i = uint32_t{0}; i < m_params.size(); ++i) { - if (m_params[i].throughStack) { + auto const &pi = m_params[i]; + if (pi.throughStack) { popDecRef(env); } else { - gen(env, DecRef, m_params[i].value); + gen(env, DecRef, pi.value); } } } @@ -751,12 +762,16 @@ jit::vector realize_params(IRGS& env, const Func* callee, ParamPrep& params, const CatchMaker& maker) { - auto const cbNumArgs = 2 + params.size() + (params.thiz ? 1 : 0); + auto const cbNumArgs = 2 + params.size() + + (params.thiz ? 1 : 0) + (params.count ? 1 : 0); auto ret = jit::vector(cbNumArgs); auto argIdx = uint32_t{0}; ret[argIdx++] = fp(env); ret[argIdx++] = sp(env); if (params.thiz) ret[argIdx++] = params.thiz; + if (params.count) ret[argIdx++] = params.count; + + assertx(!params.count || callee->attrs() & AttrNumArgs); auto stackIdx = uint32_t{0}; for (auto paramIdx = uint32_t{0}; paramIdx < params.size(); ++paramIdx) { @@ -804,39 +819,41 @@ void builtinCall(IRGS& env, ParamPrep& params, int32_t numNonDefault, const CatchMaker& catchMaker) { - /* - * Everything that needs to be on the stack gets spilled now. - * - * If we're not inlining, the reason we do this even when numThroughStack is - * zero is to make it so that in either case the stack depth when we enter - * our catch blocks is always the same as the numThroughStack value, in all - * situations. If we didn't do this, then when we aren't inlining, and - * numThroughStack is zero, we'd have the stack depth be the total num params - * (the depth before the FCallBuiltin), which would add more cases to handle - * in the catch blocks. - */ - if (params.numThroughStack != 0 || !catchMaker.inlining()) { - for (auto i = uint32_t{0}; i < params.size(); ++i) { - if (params[i].throughStack) { - push(env, params[i].value); - } - } - // We're going to do ldStkAddrs on these, so the stack must be - // materialized: - spillStack(env); + if (!params.forNativeImpl) { /* - * This marker update is to make sure rbx points to the bottom of our stack - * if we enter a catch trace. It's also necessary because we might run - * destructors as part of parameter coersions, which we don't want to - * clobber our spilled stack. + * Everything that needs to be on the stack gets spilled now. + * + * If we're not inlining, the reason we do this even when numThroughStack is + * zero is to make it so that in either case the stack depth when we enter + * our catch blocks is always the same as the numThroughStack value, in all + * situations. If we didn't do this, then when we aren't inlining, and + * numThroughStack is zero, we'd have the stack depth be the total num + * params (the depth before the FCallBuiltin), which would add more cases + * to handle in the catch blocks. */ - updateMarker(env); - } + if (params.numThroughStack != 0 || !catchMaker.inlining()) { + for (auto i = uint32_t{0}; i < params.size(); ++i) { + if (params[i].throughStack) { + push(env, params[i].value); + } + } + // We're going to do ldStkAddrs on these, so the stack must be + // materialized: + spillStack(env); + /* + * This marker update is to make sure rbx points to the bottom of our + * stack if we enter a catch trace. It's also necessary because we might + * run destructors as part of parameter coersions, which we don't want to + * clobber our spilled stack. + */ + updateMarker(env); + } - // If we're not inlining, we've spilled the stack and are about to do things - // that can throw. If we are inlining, we've done various DefInlineFP-type - // stuff and possibly also spilled the stack. - env.irb->exceptionStackBoundary(); + // If we're not inlining, we've spilled the stack and are about to do things + // that can throw. If we are inlining, we've done various DefInlineFP-type + // stuff and possibly also spilled the stack. + env.irb->exceptionStackBoundary(); + } auto const retType = [&]() -> Type { auto const retDT = callee->returnType(); @@ -857,18 +874,21 @@ void builtinCall(IRGS& env, CallBuiltinData { offsetFromIRSP(env, BCSPOffset{0}), callee, - numNonDefault, + params.count ? -1 : numNonDefault, builtinFuncDestroysLocals(callee) }, catchMaker.makeUnusualCatch(), std::make_pair(realized.size(), decayedPtr) ); - // Pop the stack params and push the return value. - if (params.thiz) gen(env, DecRef, params.thiz); - for (auto i = uint32_t{0}; i < params.numThroughStack; ++i) { - popDecRef(env); + if (!params.forNativeImpl) { + // Pop the stack params + if (params.thiz) gen(env, DecRef, params.thiz); + for (auto i = uint32_t{0}; i < params.numThroughStack; ++i) { + popDecRef(env); + } } + // We don't need to decref the non-state param values, because they are only // non-reference counted types. (At this point we've gotten through all our // coersions, so even if they started refcounted we've already decref'd them @@ -906,9 +926,11 @@ void nativeImplInlined(IRGS& env) { env, callee, paramThis, + nullptr, numArgs, numArgs, // numNonDefault is equal to numArgs here. - [&] (uint32_t i) { + false, + [&] (uint32_t i, Type ty) { auto ret = ldLoc(env, i, nullptr, DataTypeSpecific); // These IncRefs must be 'inside' the callee: it may own the only // reference to the parameters. Normally they will cancel with the @@ -991,9 +1013,11 @@ void emitFCallBuiltin(IRGS& env, env, callee, nullptr, // no $this; FCallBuiltin never happens for methods + nullptr, // count is constant numNonDefault numArgs, numNonDefault, - [&] (uint32_t i) { return pop(env, DataTypeSpecific); } + false, + [&] (uint32_t i, Type ty) { return pop(env, DataTypeSpecific); } ); auto const catcher = CatchMaker { @@ -1009,9 +1033,115 @@ void emitFCallBuiltin(IRGS& env, void emitNativeImpl(IRGS& env) { if (isInlining(env)) return nativeImplInlined(env); - gen(env, NativeImpl, fp(env), sp(env)); - auto const data = RetCtrlData { offsetToReturnSlot(env), false }; - gen(env, RetCtrl, data, sp(env), fp(env)); + auto genericNativeImpl = [&]() { + gen(env, NativeImpl, fp(env), sp(env)); + auto const data = RetCtrlData { offsetToReturnSlot(env), false }; + gen(env, RetCtrl, data, sp(env), fp(env)); + }; + + auto callee = curFunc(env); + if (!callee->nativeFuncPtr() || callee->builtinFuncPtr() == zend_wrap_func) { + genericNativeImpl(); + return; + } + + Block* fallback = nullptr; + auto thiz = callee->isMethod() && (!callee->isStatic() || callee->isNative()) + ? gen(env, LdCtx, fp(env)) : nullptr; + auto const numParams = gen(env, LdARNumParams, fp(env)); + + ifThenElse( + env, + [&] (Block* t) { + fallback = t; + if (thiz) { + if (callee->isStatic()) { + thiz = gen(env, LdClsCtx, thiz); + } else { + gen(env, CheckCtxThis, fallback, thiz); + thiz = gen(env, CastCtxThis, thiz); + } + } + + auto maxArgs = callee->numParams(); + auto minArgs = callee->numNonVariadicParams(); + while (minArgs) { + auto const& pi = callee->params()[minArgs - 1]; + if (pi.funcletOff == InvalidAbsoluteOffset) { + break; + } + --minArgs; + } + if (callee->hasVariadicCaptureParam()) { + if (minArgs) { + auto const check = gen(env, LtInt, numParams, cns(env, minArgs)); + gen(env, JmpNZero, fallback, check); + } + } else { + if (minArgs == maxArgs) { + auto const check = gen(env, EqInt, numParams, cns(env, minArgs)); + gen(env, JmpZero, fallback, check); + } else { + if (minArgs) { + auto const checkMin = gen(env, LtInt, + numParams, cns(env, minArgs)); + gen(env, JmpNZero, fallback, checkMin); + } + auto const checkMax = gen(env, GtInt, numParams, cns(env, maxArgs)); + gen(env, JmpNZero, fallback, checkMax); + } + } + }, + [&] () { + auto params = prepare_params( + env, + callee, + thiz, + callee->attrs() & AttrNumArgs ? numParams : nullptr, + callee->numParams(), + callee->numParams(), + true, + [&] (uint32_t i, Type targetType) { + auto const addr = gen(env, LdLocAddr, + TPtrToFrameGen, LocalId(i), fp(env)); + auto verifyType = [&] (Block* fail) { + gen(env, CheckTypeMem, targetType, fail, addr); + }; + if (targetType.maybe(TInitNull)) { + targetType = targetType - TNull; + ifThen(env, + verifyType, + [&] { + gen(env, CheckTypeMem, TInitNull, + fallback, addr); + }); + } else if (targetType != TBottom) { + verifyType(fallback); + if (targetType == TBool || + targetType == TInt || + targetType == TDbl) { + return gen(env, LdMem, + targetType == TBool ? TInt : targetType, addr); + } + } + return addr; + } + ); + auto const catcher = CatchMaker { + env, + CatchMaker::Kind::NotInlining, + callee, + ¶ms + }; + + builtinCall(env, callee, params, callee->numParams(), catcher); + emitRetC(env); + }, + [&] () { + hint(env, Block::Hint::Unlikely); + genericNativeImpl(); + } + ); } ////////////////////////////////////////////////////////////////////// diff --git a/hphp/runtime/vm/jit/irgen-ret.cpp b/hphp/runtime/vm/jit/irgen-ret.cpp index 92e43cda0bd..408b2dd0c52 100644 --- a/hphp/runtime/vm/jit/irgen-ret.cpp +++ b/hphp/runtime/vm/jit/irgen-ret.cpp @@ -63,6 +63,8 @@ void freeLocalsAndThis(IRGS& env) { // In a pseudomain, we have to do a non-inline DecRef, because we can't // side-exit in the middle of the sequence of LdLocPseudoMains. if (curFunc(env)->isPseudoMain()) return false; + // We don't want to specialize on arg types for builtins + if (curFunc(env)->builtinFuncPtr()) return false; auto const count = mcg->numTranslations( env.irb->unit().context().srcKey()); diff --git a/hphp/runtime/vm/jit/memory-effects.cpp b/hphp/runtime/vm/jit/memory-effects.cpp index 8be535ca43c..1e468a58244 100644 --- a/hphp/runtime/vm/jit/memory-effects.cpp +++ b/hphp/runtime/vm/jit/memory-effects.cpp @@ -943,6 +943,7 @@ MemEffects memory_effects_impl(const IRInstruction& inst) { case LdStaticLocCached: case CheckCtxThis: case CastCtxThis: + case LdARNumParams: case LdRDSAddr: case PredictLoc: case PredictStk: -- 2.11.4.GIT