2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/opt.h"
21 #include <boost/variant.hpp>
22 #include <folly/Optional.h>
23 #include <folly/ScopeGuard.h>
24 #include <folly/Hash.h>
26 #include "hphp/util/bitset-utils.h"
27 #include "hphp/util/functional.h"
28 #include "hphp/util/either.h"
29 #include "hphp/util/dataflow-worklist.h"
30 #include "hphp/util/match.h"
31 #include "hphp/util/trace.h"
33 #include "hphp/runtime/base/perf-warning.h"
35 #include "hphp/runtime/vm/jit/alias-analysis.h"
36 #include "hphp/runtime/vm/jit/analysis.h"
37 #include "hphp/runtime/vm/jit/cfg.h"
38 #include "hphp/runtime/vm/jit/containers.h"
39 #include "hphp/runtime/vm/jit/dce.h"
40 #include "hphp/runtime/vm/jit/extra-data.h"
41 #include "hphp/runtime/vm/jit/ir-instruction.h"
42 #include "hphp/runtime/vm/jit/memory-effects.h"
43 #include "hphp/runtime/vm/jit/mutation.h"
44 #include "hphp/runtime/vm/jit/pass-tracer.h"
45 #include "hphp/runtime/vm/jit/simplify.h"
46 #include "hphp/runtime/vm/jit/state-vector.h"
47 #include "hphp/runtime/vm/jit/timer.h"
49 namespace HPHP
{ namespace jit
{
53 TRACE_SET_MOD(hhir_load
);
55 //////////////////////////////////////////////////////////////////////
57 // Reverse post order block ids.
58 using RpoID
= uint32_t;
60 //////////////////////////////////////////////////////////////////////
63 * Information about a value in memory.
65 * We either have nullptr, meaning we know nothing, or we know a specific
66 * SSATmp* for the value, or we'll track the Block* where different non-null
67 * states were merged, which is also a location we will be able to insert a phi
68 * to make a new available SSATmp.
70 * For some discussion on how we know whether the SSATmp* is usable (i.e. its
71 * definition dominates code that might want to reuse it), see the bottom of
74 using ValueInfo
= Either
<SSATmp
*,Block
*>;
77 * Each tracked location has both a ValueInfo and a separately-tracked type.
78 * We may have a known type even without an SSATmp for an available value, or
79 * that is more refined than an SSATmp knownValue's type. This is always at
80 * least as refined as knownValue->type(), if knownValue is an SSATmp.
83 ValueInfo knownValue
{nullptr};
88 * Abstract state for a program position.
90 * This has a set of TrackedLocs, and an availability mask to determine which
91 * ones are currently valid. When updating a tracked location, the visitor
92 * below will kill any potentially conflicting locations in the availability
93 * mask without updating them.
96 bool initialized
= false; // if false, we'll never reach this block
99 * Currently tracked locations, indexed by their ids.
101 jit::vector
<TrackedLoc
> tracked
;
104 * Currently available indexes in tracked.
109 * If we know whether various RDS entries have been initialized at this
112 jit::flat_set
<rds::Handle
> initRDS
{};
120 * Output states for the next and taken edge. These are stored explicitly
123 * o When inserting phis, we need to see the state on each edge coming to
124 * a join point to know what to phi.
126 * o This lets us re-merge all predecessors into a block's stateIn while
127 * doing the dataflow analysis. This can get better results than just
128 * merging into stateIn in place, basically because of the way we
129 * represent the locations that need phis. If we didn't remerge, it's
130 * possible for the analysis to believe it needs to insert phis even in
131 * blocks with only a single predecessor (e.g. if the first time it saw
132 * the block it had a SSATmp* in some location, but the second time it
133 * tries to merge a Block* for a phi location).
141 size_t operator()(PhiKey k
) const {
142 return folly::hash::hash_combine(pointer_hash
<Block
>()(k
.block
), k
.aloc
);
146 bool operator==(PhiKey o
) const {
147 return block
== o
.block
&& aloc
== o
.aloc
;
155 explicit Global(IRUnit
& unit
)
157 , rpoBlocks(rpoSortCfg(unit
))
158 , ainfo(collect_aliases(unit
, rpoBlocks
))
159 , blockInfo(unit
, BlockInfo
{})
167 * Map from each block to its information. Before we've done the fixed point
168 * analysis the states in the info structures are not necessarily meaningful.
170 StateVector
<Block
,BlockInfo
> blockInfo
;
173 * During the optimize pass, we may add phis for the values known to be in
174 * memory locations at particular control-flow-joins. To avoid doing it more
175 * than once for any given join point, this keeps track of what we've added
176 * so if it's needed in more than one place we can reuse it.
178 jit::hash_map
<PhiKey
,SSATmp
*,PhiKey::Hash
> insertedPhis
;
183 size_t instrsReduced
= 0;
184 size_t loadsRemoved
= 0;
185 size_t loadsRefined
= 0;
186 size_t jumpsRemoved
= 0;
189 //////////////////////////////////////////////////////////////////////
191 using HPHP::jit::show
;
193 std::string
show(ValueInfo vi
) {
194 if (vi
== nullptr) return "<>";
196 [&] (SSATmp
* t
) { return t
->toString(); },
197 [&] (Block
* b
) { return folly::sformat("phi@B{}", b
->id()); }
201 std::string
show(TrackedLoc li
) {
202 return folly::sformat(
204 li
.knownType
.toString(),
209 DEBUG_ONLY
std::string
show(const State
& state
) {
210 auto ret
= std::string
{};
211 if (!state
.initialized
) {
212 ret
= " unreachable\n";
216 folly::format(&ret
, " av: {}\n", show(state
.avail
));
217 for (auto idx
= uint32_t{0}; idx
< state
.tracked
.size(); ++idx
) {
218 if (state
.avail
.test(idx
)) {
219 folly::format(&ret
, " {: >3} = {}\n", idx
, show(state
.tracked
[idx
]));
227 using namespace folly::gen
;
228 return from(state
.initRDS
) | unsplit
<std::string
>(",");
235 //////////////////////////////////////////////////////////////////////
238 const Global
& global
;
243 * No flags. This means no transformations were possible for the analyzed
249 * The instruction was a pure load, and its value was known to be available in
250 * `knownValue', with best known type `knownType'.
252 struct FRedundant
{ ValueInfo knownValue
; Type knownType
; uint32_t aloc
; };
255 * Like FRedundant, but the load is impure---rather than killing it, we can
256 * factor out the load.
258 struct FReducible
{ ValueInfo knownValue
; Type knownType
; uint32_t aloc
; };
261 * The instruction is a load which can be refined to a better type than its
262 * type-parameter currently has.
264 struct FRefinableLoad
{ Type refinedType
; };
267 * The instruction can be replaced with a nop.
269 struct FRemovable
{};
272 * The instruction can be legally replaced with a Jmp to either its next or
278 using Flags
= boost::variant
<FNone
,FRedundant
,FReducible
,FRefinableLoad
,
279 FRemovable
,FJmpNext
,FJmpTaken
>;
281 //////////////////////////////////////////////////////////////////////
283 // Conservative list of instructions which have type-parameters safe to refine.
284 bool refinable_load_eligible(const IRInstruction
& inst
) {
290 assertx(inst
.hasTypeParam());
297 void clear_everything(Local
& env
) {
298 FTRACE(3, " clear_everything\n");
299 env
.state
.avail
.reset();
302 TrackedLoc
* find_tracked(State
& state
,
303 folly::Optional
<ALocMeta
> meta
) {
304 if (!meta
) return nullptr;
305 return state
.avail
[meta
->index
] ? &state
.tracked
[meta
->index
]
309 TrackedLoc
* find_tracked(Local
& env
,
310 folly::Optional
<ALocMeta
> meta
) {
311 return find_tracked(env
.state
, meta
);
314 Flags
load(Local
& env
,
315 const IRInstruction
& inst
,
317 acls
= canonicalize(acls
);
319 auto const meta
= env
.global
.ainfo
.find(acls
);
320 if (!meta
) return FNone
{};
321 assertx(meta
->index
< kMaxTrackedALocs
);
323 auto& tracked
= env
.state
.tracked
[meta
->index
];
325 // We can always trust the dst type of the load. Add its knowledge to
326 // knownType before doing anything else.
327 if (!env
.state
.avail
[meta
->index
]) {
328 tracked
.knownValue
= nullptr;
329 tracked
.knownType
= inst
.dst()->type();
330 env
.state
.avail
.set(meta
->index
);
332 tracked
.knownType
&= inst
.dst()->type();
335 if (tracked
.knownType
.admitsSingleVal()) {
336 tracked
.knownValue
= env
.global
.unit
.cns(tracked
.knownType
);
338 FTRACE(4, " {} <- {}\n", show(acls
), inst
.dst()->toString());
339 FTRACE(5, " av: {}\n", show(env
.state
.avail
));
343 kMaxTrackedALocs
// it's a constant, so it is nowhere
347 if (tracked
.knownValue
!= nullptr) {
348 // Don't use knownValue if it's from a different block and we're currently
349 // in a catch trace, to avoid extending lifetimes too much.
350 auto const block
= tracked
.knownValue
.match(
351 [] (SSATmp
* tmp
) { return tmp
->inst()->block(); },
352 [] (Block
* blk
) { return blk
; }
354 if (inst
.block() == block
|| inst
.block()->hint() != Block::Hint::Unused
) {
355 return FRedundant
{ tracked
.knownValue
, tracked
.knownType
, meta
->index
};
358 // Only set a new known value if we previously didn't have one. If we had a
359 // known value already, we would have made the load redundant above, unless
360 // we're in an Hint::Unused block. In that case, we want to keep any old
361 // known value to avoid disturbing the main trace in case we merge back.
362 tracked
.knownValue
= inst
.dst();
363 FTRACE(4, " {} <- {}\n", show(acls
), inst
.dst()->toString());
364 FTRACE(5, " av: {}\n", show(env
.state
.avail
));
367 // Even if we can't make this load redundant, we might be able to refine its
369 if (refinable_load_eligible(inst
)) {
370 if (tracked
.knownType
< inst
.typeParam()) {
371 return FRefinableLoad
{ tracked
.knownType
};
378 void store(Local
& env
, AliasClass acls
, SSATmp
* value
) {
379 acls
= canonicalize(acls
);
381 auto const meta
= env
.global
.ainfo
.find(acls
);
383 env
.state
.avail
&= ~env
.global
.ainfo
.may_alias(acls
);
387 FTRACE(5, " av: {}\n", show(env
.state
.avail
));
388 env
.state
.avail
&= ~meta
->conflicts
;
390 auto& current
= env
.state
.tracked
[meta
->index
];
391 current
.knownValue
= value
;
392 current
.knownType
= value
? value
->type() : TTop
;
393 env
.state
.avail
.set(meta
->index
);
394 FTRACE(4, " {} <- {}\n", show(acls
),
395 value
? value
->toString() : std::string("<>"));
396 FTRACE(5, " av: {}\n", show(env
.state
.avail
));
399 Flags
handle_general_effects(Local
& env
,
400 const IRInstruction
& inst
,
402 auto handleCheck
= [&](Type typeParam
) -> folly::Optional
<Flags
> {
403 auto const meta
= env
.global
.ainfo
.find(canonicalize(m
.loads
));
404 if (!meta
) return folly::none
;
406 auto const tloc
= &env
.state
.tracked
[meta
->index
];
407 if (!env
.state
.avail
[meta
->index
]) {
408 // We know nothing about the location. Initialize it with our typeParam.
409 tloc
->knownValue
= nullptr;
410 tloc
->knownType
= typeParam
;
411 env
.state
.avail
.set(meta
->index
);
412 } else if (tloc
->knownType
<= typeParam
) {
413 // We had a type that was good enough to pass the check.
414 return Flags
{FJmpNext
{}};
416 // We had a type that didn't pass the check. Narrow it using our
418 tloc
->knownType
&= typeParam
;
421 if (tloc
->knownType
<= TBottom
) {
422 // i.e., !maybe(typeParam); fail check.
423 return Flags
{FJmpTaken
{}};
425 if (tloc
->knownValue
!= nullptr) {
426 return Flags
{FReducible
{tloc
->knownValue
, tloc
->knownType
, meta
->index
}};
437 if (auto flags
= handleCheck(inst
.typeParam())) return *flags
;
441 if (auto flags
= handleCheck(TInitGen
)) return *flags
;
445 auto const handle
= inst
.extra
<ClassData
>()->cls
->sPropInitHandle();
446 if (env
.state
.initRDS
.count(handle
) > 0) return FJmpNext
{};
447 env
.state
.initRDS
.insert(handle
);
452 auto const handle
= inst
.extra
<ClassData
>()->cls
->propHandle();
453 if (env
.state
.initRDS
.count(handle
) > 0) return FJmpNext
{};
454 env
.state
.initRDS
.insert(handle
);
458 case CheckRDSInitialized
: {
459 auto const handle
= inst
.extra
<CheckRDSInitialized
>()->handle
;
460 if (env
.state
.initRDS
.count(handle
) > 0) return FJmpNext
{};
461 // set this unconditionally; we record taken state before every
462 // instruction, and next state after each instruction
463 env
.state
.initRDS
.insert(handle
);
467 case MarkRDSInitialized
: {
468 auto const handle
= inst
.extra
<MarkRDSInitialized
>()->handle
;
469 if (env
.state
.initRDS
.count(handle
) > 0) return FRemovable
{};
470 env
.state
.initRDS
.insert(handle
);
478 store(env
, m
.stores
, nullptr);
483 void handle_call_effects(Local
& env
,
484 const IRInstruction
& inst
,
485 CallEffects effects
) {
487 * Keep types for stack, locals, and iterators, and throw away the
488 * values. We are just doing this to avoid extending lifetimes
489 * across php calls, which currently always leads to spilling.
491 auto const keep
= env
.global
.ainfo
.all_stack
|
492 env
.global
.ainfo
.all_frame
;
493 env
.state
.avail
&= keep
;
494 for (auto aloc
= uint32_t{0};
495 aloc
< env
.global
.ainfo
.locations
.size();
497 if (!env
.state
.avail
[aloc
]) continue;
498 env
.state
.tracked
[aloc
].knownValue
= nullptr;
501 // Any stack locations modified by the callee are no longer valid
502 store(env
, effects
.kills
, nullptr);
503 store(env
, effects
.inputs
, nullptr);
504 store(env
, effects
.actrec
, nullptr);
505 store(env
, effects
.outputs
, nullptr);
508 Flags
handle_assert(Local
& env
, const IRInstruction
& inst
) {
509 auto const acls
= [&] () -> folly::Optional
<AliasClass
> {
513 AFrame
{ inst
.src(0), inst
.extra
<AssertLoc
>()->locId
}
517 AStack
{ inst
.src(0), inst
.extra
<AssertStk
>()->offset
, 1 }
523 if (!acls
) return FNone
{};
525 auto const meta
= env
.global
.ainfo
.find(canonicalize(*acls
));
526 auto const tloc
= find_tracked(env
, meta
);
528 FTRACE(4, " untracked assert\n");
532 tloc
->knownType
&= inst
.typeParam();
533 if (tloc
->knownValue
!= nullptr || tloc
->knownType
.admitsSingleVal()) {
534 if (tloc
->knownValue
== nullptr) {
535 tloc
->knownValue
= env
.global
.unit
.cns(tloc
->knownType
);
538 FTRACE(4, " reducible assert: {}\n", show(*tloc
));
539 return FReducible
{ tloc
->knownValue
, tloc
->knownType
, meta
->index
};
542 FTRACE(4, " non-reducible assert: {}\n", show(*tloc
));
546 void refine_value(Local
& env
, SSATmp
* newVal
, SSATmp
* oldVal
) {
550 auto& tracked
= env
.state
.tracked
[i
];
551 if (tracked
.knownValue
!= oldVal
) return;
552 FTRACE(4, " refining {} to {}\n", i
, newVal
->toString());
553 tracked
.knownValue
= newVal
;
554 tracked
.knownType
= newVal
->type();
559 //////////////////////////////////////////////////////////////////////
561 Flags
analyze_inst(Local
& env
, const IRInstruction
& inst
) {
562 auto const effects
= memory_effects(inst
);
568 auto flags
= Flags
{};
571 [&] (IrrelevantEffects
) {},
572 [&] (UnknownEffects
) { clear_everything(env
); },
573 [&] (ExitEffects
) { clear_everything(env
); },
574 [&] (ReturnEffects
) {},
576 [&] (PureStore m
) { store(env
, m
.dst
, m
.value
); },
577 [&] (PureLoad m
) { flags
= load(env
, inst
, m
.src
); },
579 [&] (InlineEnterEffects m
) { store(env
, m
.inlFrame
, nullptr);
580 store(env
, m
.inlStack
, nullptr);
581 store(env
, m
.actrec
, nullptr); },
582 [&] (InlineExitEffects m
) { store(env
, m
.inlFrame
, nullptr);
583 store(env
, m
.inlMeta
, nullptr); },
584 [&] (GeneralEffects m
) { flags
= handle_general_effects(env
, inst
, m
); },
585 [&] (CallEffects x
) { handle_call_effects(env
, inst
, x
); }
593 refine_value(env
, inst
.dst(), inst
.src(0));
597 flags
= handle_assert(env
, inst
);
606 //////////////////////////////////////////////////////////////////////
609 * Make sure that every predecessor ends with a Jmp that we can add arguments
610 * to, so we can create a new phi'd value. This may involve splitting some
613 void prepare_predecessors_for_phi(Global
& env
, Block
* block
) {
614 auto preds
= jit::vector
<Block
*>{};
615 block
->forEachPred([&] (Block
* b
) { preds
.push_back(b
); });
617 for (auto& pred
: preds
) {
618 if (pred
->back().is(Jmp
)) continue;
619 auto const middle
= splitEdge(env
.unit
, pred
, block
);
620 ITRACE(3, " B{} -> B{} to add a Jmp\n", pred
->id(), middle
->id());
621 auto& midInfo
= env
.blockInfo
[middle
];
622 auto& predInfo
= env
.blockInfo
[pred
];
623 auto const& srcState
= pred
->next() == middle
624 ? predInfo
.stateOutNext
625 : predInfo
.stateOutTaken
;
626 // We don't need to set the in state on the new edge-splitting block,
627 // because it isn't in our env.rpoBlocks, so we don't ever visit it for
628 // the optimize pass.
629 midInfo
.stateOutTaken
= srcState
;
633 SSATmp
* resolve_phis_work(Global
& env
,
634 jit::vector
<IRInstruction
*>& mutated_labels
,
637 auto& phi_record
= env
.insertedPhis
[PhiKey
{ block
, alocID
}];
639 ITRACE(3, " resolve_phis: B{} for aloc {}: already made {}\n",
642 phi_record
->toString());
646 // We should never require phis going into a catch block, because they may
647 // not have multiple predecessors in HHIR.
648 assertx(!block
->isCatch());
650 ITRACE(3, " resolve_phis: B{} for aloc {}\n", block
->id(), alocID
);
651 Trace::Indent indenter
;
653 prepare_predecessors_for_phi(env
, block
);
655 // Create the new destination of the DefLabel at this block. This has to
656 // happen before we start hooking up the Jmps on predecessors, because the
657 // block could be one of its own predecessors and it might need to send this
658 // new dst into the same phi.
659 if (block
->front().is(DefLabel
)) {
660 env
.unit
.expandLabel(&block
->front(), 1);
662 block
->prepend(env
.unit
.defLabel(1, block
->front().bcctx()));
664 auto const label
= &block
->front();
665 mutated_labels
.push_back(label
);
666 phi_record
= label
->dst(label
->numDsts() - 1);
668 // Now add the new argument to each incoming Jmp. If one of the predecessors
669 // has a phi state with this same block, it'll find the dst we just added in
671 block
->forEachPred([&] (Block
* pred
) {
672 auto& predInfo
= env
.blockInfo
[pred
];
673 auto& state
= pred
->next() == block
? predInfo
.stateOutNext
674 : predInfo
.stateOutTaken
;
675 assertx(state
.initialized
);
676 auto& incomingLoc
= state
.tracked
[alocID
];
677 assertx(incomingLoc
.knownValue
!= nullptr);
679 auto const incomingTmp
= incomingLoc
.knownValue
.match(
680 [&] (SSATmp
* t
) { return t
; },
681 [&] (Block
* incoming_phi_block
) {
682 // Note resolve_phis can add blocks, which can cause a resize of
683 // env.blockInfo---don't reuse pointers/references to any block state
684 // structures across here.
685 return resolve_phis_work(
694 ITRACE(4, " B{} <~~> {}\n", pred
->id(), incomingTmp
->toString());
695 env
.unit
.expandJmp(&pred
->back(), incomingTmp
);
698 ITRACE(4, " dst: {}\n", phi_record
->toString());
702 void reflow_deflabel_types(const IRUnit
& unit
,
703 const jit::vector
<IRInstruction
*>& labels
) {
704 for (bool changed
= true; changed
;) {
706 for (auto& l
: labels
) if (retypeDests(l
, &unit
)) changed
= true;
710 SSATmp
* resolve_phis(Global
& env
, Block
* block
, uint32_t alocID
) {
711 auto mutated_labels
= jit::vector
<IRInstruction
*>{};
712 auto const ret
= resolve_phis_work(env
, mutated_labels
, block
, alocID
);
714 * We've potentially created some SSATmp's (that are defined by DefLabels),
715 * which have types that depend on their own types. This should only be
716 * possible with loops.
718 * Right now, we can't just wait for the final reflowTypes at the end of this
719 * pass to update things for this, because SSATmp types may be inspected by
720 * our simplify() calls while we're still optimizing, so reflow them now.
722 reflow_deflabel_types(env
.unit
, mutated_labels
);
727 SSATmp
* resolve_value(Global
& env
, const IRInstruction
& inst
, Flag flags
) {
728 if (inst
.dst() && !(flags
.knownType
<= inst
.dst()->type())) {
730 * It's possible we could assert the intersection of the types, but it's
731 * not entirely clear what situations this would happen in, so let's just
732 * not do it in this case for now.
734 FTRACE(2, " knownType wasn't substitutable; not right now\n");
738 auto const val
= flags
.knownValue
;
739 if (val
== nullptr) return nullptr;
741 [&] (SSATmp
* t
) { return t
; },
742 [&] (Block
* b
) { return resolve_phis(env
, b
, flags
.aloc
); }
746 void reduce_inst(Global
& env
, IRInstruction
& inst
, const FReducible
& flags
) {
747 auto const resolved
= resolve_value(env
, inst
, flags
);
748 if (!resolved
) return;
750 DEBUG_ONLY Opcode oldOp
= inst
.op();
751 DEBUG_ONLY Opcode newOp
;
753 auto const reduce_to
= [&] (Opcode op
, folly::Optional
<Type
> typeParam
) {
754 auto const taken
= hasEdges(op
) ? inst
.taken() : nullptr;
755 auto const newInst
= env
.unit
.gen(
762 if (hasEdges(op
)) newInst
->setNext(inst
.next());
764 auto const block
= inst
.block();
765 block
->insert(++block
->iteratorTo(&inst
), newInst
);
777 reduce_to(CheckType
, inst
.typeParam());
781 reduce_to(CheckInit
, folly::none
);
786 reduce_to(AssertType
, flags
.knownType
);
789 default: always_assert(false);
792 FTRACE(2, " reduced: {} = {} {}\n",
795 resolved
->toString());
800 void refine_load(Global
& env
,
802 const FRefinableLoad
& flags
) {
803 assertx(refinable_load_eligible(inst
));
804 assertx(flags
.refinedType
< inst
.typeParam());
806 FTRACE(2, " refinable: {} :: {} -> {}\n",
811 inst
.setTypeParam(flags
.refinedType
);
812 retypeDests(&inst
, &env
.unit
);
816 //////////////////////////////////////////////////////////////////////
818 void optimize_inst(Global
& env
, IRInstruction
& inst
, Flags flags
) {
823 [&] (FRedundant redundantFlags
) {
824 auto const resolved
= resolve_value(env
, inst
, redundantFlags
);
825 if (!resolved
) return;
827 FTRACE(2, " redundant: {} :: {} = {}\n",
828 inst
.dst()->toString(),
829 redundantFlags
.knownType
.toString(),
830 resolved
->toString());
832 if (resolved
->type().subtypeOfAny(TGen
, TCls
)) {
833 env
.unit
.replace(&inst
, AssertType
, redundantFlags
.knownType
, resolved
);
835 env
.unit
.replace(&inst
, Mov
, resolved
);
841 [&] (FReducible reducibleFlags
) { reduce_inst(env
, inst
, reducibleFlags
); },
843 [&] (FRefinableLoad f
) { refine_load(env
, inst
, f
); },
846 FTRACE(2, " removable\n");
847 assertx(!inst
.isControlFlow());
852 FTRACE(2, " unnecessary\n");
853 env
.unit
.replace(&inst
, Jmp
, inst
.next());
858 FTRACE(2, " unnecessary\n");
859 env
.unit
.replace(&inst
, Jmp
, inst
.taken());
864 // Re-simplify AssertType if we produced any.
865 if (inst
.is(AssertType
)) simplifyInPlace(env
.unit
, &inst
);
868 void optimize_block(Local
& env
, Global
& genv
, Block
* blk
) {
869 if (!env
.state
.initialized
) {
870 FTRACE(2, " unreachable\n");
874 for (auto& inst
: *blk
) {
875 simplifyInPlace(genv
.unit
, &inst
);
876 auto const flags
= analyze_inst(env
, inst
);
877 optimize_inst(genv
, inst
, flags
);
881 void optimize(Global
& genv
) {
883 * Simplify() calls can make blocks unreachable as we walk, and visiting the
884 * unreachable blocks with simplify calls is not allowed. They may have uses
885 * of SSATmps that no longer have defs, which can break how the simplifier
886 * chases up to definitions.
888 * We use a StateVector because we can add new blocks during optimize().
890 StateVector
<Block
,bool> reachable(genv
.unit
, false);
891 reachable
[genv
.unit
.entry()] = true;
892 for (auto& blk
: genv
.rpoBlocks
) {
893 FTRACE(1, "B{}:\n", blk
->id());
895 if (!reachable
[blk
]) {
896 FTRACE(2, " unreachable\n");
900 auto env
= Local
{ genv
, genv
.blockInfo
[blk
].stateIn
};
901 optimize_block(env
, genv
, blk
);
903 if (auto const x
= blk
->next()) reachable
[x
] = true;
904 if (auto const x
= blk
->taken()) reachable
[x
] = true;
908 //////////////////////////////////////////////////////////////////////
910 bool operator==(const TrackedLoc
& a
, const TrackedLoc
& b
) {
911 return a
.knownValue
== b
.knownValue
&& a
.knownType
== b
.knownType
;
914 bool operator==(const State
& a
, const State
& b
) {
915 if (!a
.initialized
) return !b
.initialized
;
916 if (!b
.initialized
) return false;
917 return a
.tracked
== b
.tracked
&& a
.avail
== b
.avail
;
920 bool operator!=(const State
& a
, const State
& b
) { return !(a
== b
); }
922 //////////////////////////////////////////////////////////////////////
924 void merge_into(Block
* target
, TrackedLoc
& dst
, const TrackedLoc
& src
) {
925 dst
.knownType
|= src
.knownType
;
927 if (dst
.knownValue
== src
.knownValue
|| dst
.knownValue
== nullptr) return;
928 if (src
.knownValue
== nullptr) {
929 dst
.knownValue
= nullptr;
933 dst
.knownValue
.match(
935 src
.knownValue
.match(
937 if (auto const lcm
= least_common_ancestor(tdst
, tsrc
)) {
938 dst
.knownValue
= lcm
;
941 // We will need a phi at this block if we want to reuse this value.
942 dst
.knownValue
= target
;
945 [&](Block
* /*blk*/) { dst
.knownValue
= target
; });
948 [&](Block
* /*b*/) { dst
.knownValue
= target
; });
951 void merge_into(Global
& genv
, Block
* target
, State
& dst
, const State
& src
) {
952 always_assert(src
.initialized
);
953 if (!dst
.initialized
) {
958 always_assert(dst
.tracked
.size() == src
.tracked
.size());
959 always_assert(dst
.tracked
.size() == genv
.ainfo
.locations
.size());
961 dst
.avail
&= src
.avail
;
966 dst
.avail
&= ~genv
.ainfo
.locations_inv
[idx
].conflicts
;
967 if (dst
.avail
[idx
]) {
968 merge_into(target
, dst
.tracked
[idx
], src
.tracked
[idx
]);
973 for (auto it
= dst
.initRDS
.begin(); it
!= dst
.initRDS
.end();) {
974 if (!src
.initRDS
.count(*it
)) {
975 it
= dst
.initRDS
.erase(it
);
982 //////////////////////////////////////////////////////////////////////
984 void save_taken_state(Global
& genv
, const IRInstruction
& inst
,
985 const State
& state
) {
986 if (!inst
.taken()) return;
988 auto& outState
= genv
.blockInfo
[inst
.block()].stateOutTaken
= state
;
990 // CheckInitMem's pointee is TUninit on the taken branch, so update outState.
991 if (inst
.is(CheckInitMem
)) {
992 auto const effects
= memory_effects(inst
);
993 auto const ge
= boost::get
<GeneralEffects
>(effects
);
994 auto const meta
= genv
.ainfo
.find(canonicalize(ge
.loads
));
995 if (auto const tloc
= find_tracked(outState
, meta
)) {
996 tloc
->knownType
&= TUninit
;
998 auto tloc
= &outState
.tracked
[meta
->index
];
999 tloc
->knownValue
= nullptr;
1000 tloc
->knownType
= TUninit
;
1001 outState
.avail
.set(meta
->index
);
1006 void save_next_state(Global
& genv
, const IRInstruction
& inst
,
1007 const State
& state
) {
1008 if (inst
.next()) genv
.blockInfo
[inst
.block()].stateOutNext
= state
;
1011 void analyze(Global
& genv
) {
1012 FTRACE(1, "\nAnalyze:\n");
1015 * Scheduled blocks for the fixed point computation. We'll visit blocks with
1016 * lower reverse post order ids first.
1018 auto incompleteQ
= dataflow_worklist
<RpoID
>(genv
.rpoBlocks
.size());
1019 for (auto rpoID
= uint32_t{0}; rpoID
< genv
.rpoBlocks
.size(); ++rpoID
) {
1020 genv
.blockInfo
[genv
.rpoBlocks
[rpoID
]].rpoID
= rpoID
;
1023 auto& entryIn
= genv
.blockInfo
[genv
.unit
.entry()].stateIn
;
1024 entryIn
.initialized
= true;
1025 entryIn
.tracked
.resize(genv
.ainfo
.locations
.size());
1027 incompleteQ
.push(0);
1030 auto propagate
= [&] (Block
* target
) {
1031 FTRACE(3, " -> {}\n", target
->id());
1032 auto& targetInfo
= genv
.blockInfo
[target
];
1033 auto const oldState
= targetInfo
.stateIn
;
1034 targetInfo
.stateIn
.initialized
= false; // re-merge of all pred states
1035 target
->forEachPred([&] (Block
* pred
) {
1036 auto const merge
= [&](const State
& predState
) {
1037 if (predState
.initialized
) {
1038 FTRACE(7, "pulling state from pred B{}:\n{}\n",
1039 pred
->id(), show(predState
));
1040 merge_into(genv
, target
, targetInfo
.stateIn
, predState
);
1044 auto const& predInfo
= genv
.blockInfo
[pred
];
1045 if (pred
->next() != pred
->taken()) {
1046 auto const& predState
= pred
->next() == target
1047 ? predInfo
.stateOutNext
1048 : predInfo
.stateOutTaken
;
1051 // The predecessor jumps to this block along both its next and taken
1052 // branches. We can't distinguish the two paths, so just merge both
1054 assertx(pred
->next() == target
);
1055 merge(predInfo
.stateOutNext
);
1056 merge(predInfo
.stateOutTaken
);
1059 if (oldState
!= targetInfo
.stateIn
) {
1060 FTRACE(7, "target state now:\n{}\n", show(targetInfo
.stateIn
));
1061 incompleteQ
.push(targetInfo
.rpoID
);
1065 auto const blk
= genv
.rpoBlocks
[incompleteQ
.pop()];
1066 auto& blkInfo
= genv
.blockInfo
[blk
];
1067 FTRACE(2, "B{}:\n", blk
->id());
1069 auto env
= Local
{ genv
, blkInfo
.stateIn
};
1070 for (auto& inst
: *blk
) {
1071 save_taken_state(genv
, inst
, env
.state
);
1072 analyze_inst(env
, inst
);
1073 save_next_state(genv
, inst
, env
.state
);
1075 if (auto const t
= blk
->taken()) propagate(t
);
1076 if (auto const n
= blk
->next()) propagate(n
);
1077 } while (!incompleteQ
.empty());
1080 //////////////////////////////////////////////////////////////////////
1085 * This pass does some analysis to try to avoid PureLoad instructions, and
1086 * avoid "hidden" loads that are part of some instructions like CheckLoc. It
1087 * also runs the simplify() subroutine on every instruction in the unit, and
1088 * can eliminate some conditional branches that test types of memory locations.
1091 * Currently the following things are done:
1093 * o Replace PureLoad instructions with new uses of an available value, if
1094 * we know it would load that value.
1096 * o Remove instructions that check the type of memory locations if we
1097 * proved the memory location must hold that type.
1099 * o Convert instructions that check types of values in memory to use values
1100 * in SSATmp virtual registers, if we know that we have a register that
1101 * contains the same value as that memory location.
1103 * o Insert phis iff it allows us to do any of the above.
1105 * o Run the simplifier on every instruction.
1108 * Notes about preserving SSA form:
1110 * This pass can add new uses of SSATmp*'s to replace accesses to memory.
1111 * This means we need to think about whether the SSATmp*'s will be defined in
1112 * a location that dominates the places that we want to add new uses, which
1113 * is one of the requirements to keep the IR in SSA form.
1115 * To understand why this seems to "just work", without any code specifically
1116 * dealing with it, consider the following:
1118 * Suppose this pass sees a LdLoc at a program point and has determined the
1119 * local being loaded contains an SSATmp `t1'. We want to add a new use of
1120 * `t1' and delete the LdLoc.
1122 * What this analysis state tells us is that every path through the program
1123 * to the point of this LdLoc has provably stored `t1' to that local. This
1124 * also means that on every path to this point, `t1' was used in some
1125 * instruction that did a store(*), which means `t1' must have been defined
1126 * on that path, or else the input program already wasn't in SSA form. But
1127 * this is just the dominance condition we wanted: every path through the
1128 * program to this load must have defined `t1', so its definition dominates
1131 * (*) `t1' could also be defined by an instruction doing a load from that
1132 * location, but it's even more obviously ok in that situation.
1134 * One further note: some parts of the code here is structured in
1135 * anticipation of allowing the pass to conditionally explore parts of the
1136 * CFG, so we don't need to propagate states down unreachable paths. But
1137 * this isn't implemented yet: the above will need to be revisted at that
1138 * point (essentially we'll have to guarantee we eliminate any jumps to
1139 * blocks this pass proved were unreachable to keep the above working at that
1143 void optimizeLoads(IRUnit
& unit
) {
1144 PassTracer tracer
{&unit
, Trace::hhir_load
, "optimizeLoads"};
1145 Timer
t(Timer::optimize_loads
, unit
.logEntry().get_pointer());
1147 // Unfortunately load-elim may not reach a fixed-point after just one
1148 // round. This is because the optimization stage can cause the types of
1149 // SSATmps to change, which then can affect what we infer in the analysis
1150 // stage. So, loop until we reach a fixed point. Bound the iterations at some
1151 // max value for safety.
1153 size_t instrsReduced
= 0;
1154 size_t loadsRemoved
= 0;
1155 size_t loadsRefined
= 0;
1156 size_t jumpsRemoved
= 0;
1158 auto genv
= Global
{ unit
};
1159 if (genv
.ainfo
.locations
.size() == 0) {
1160 FTRACE(1, "no memory accesses to possibly optimize\n");
1165 FTRACE(1, "\nIteration #{}\n", iters
);
1166 FTRACE(1, "Locations:\n{}\n", show(genv
.ainfo
));
1170 FTRACE(1, "\n\nFixed point:\n{}\n",
1171 [&] () -> std::string
{
1172 auto ret
= std::string
{};
1173 for (auto& blk
: genv
.rpoBlocks
) {
1174 folly::format(&ret
, "B{}:\n{}", blk
->id(),
1175 show(genv
.blockInfo
[blk
].stateIn
));
1181 FTRACE(1, "\nOptimize:\n");
1184 if (!genv
.instrsReduced
&&
1185 !genv
.loadsRemoved
&&
1186 !genv
.loadsRefined
&&
1187 !genv
.jumpsRemoved
) {
1188 // Nothing changed so we're done
1191 instrsReduced
+= genv
.instrsReduced
;
1192 loadsRemoved
+= genv
.loadsRemoved
;
1193 loadsRefined
+= genv
.loadsRefined
;
1194 jumpsRemoved
+= genv
.jumpsRemoved
;
1196 FTRACE(2, "reflowing types\n");
1197 reflowTypes(genv
.unit
);
1199 // Restore reachability invariants
1202 if (iters
>= RuntimeOption::EvalHHIRLoadElimMaxIters
) {
1203 // We've iterated way more than usual without reaching a fixed
1204 // point. Either there's some bug in load-elim, or this unit is especially
1205 // pathological. Emit a perf warning so we're aware and stop iterating.
1207 "optimize_loads_max_iters", 1,
1208 [&](StructuredLogEntry
& cols
) {
1209 auto const func
= unit
.context().initSrcKey
.func();
1210 cols
.setStr("func", func
->fullName()->slice());
1211 cols
.setStr("filename", func
->unit()->filepath()->slice());
1212 cols
.setStr("hhir_unit", show(unit
));
1219 if (auto& entry
= unit
.logEntry()) {
1220 entry
->setInt("optimize_loads_iters", iters
);
1221 entry
->setInt("optimize_loads_instrs_reduced", instrsReduced
);
1222 entry
->setInt("optimize_loads_loads_removed", loadsRemoved
);
1223 entry
->setInt("optimize_loads_loads_refined", loadsRefined
);
1224 entry
->setInt("optimize_loads_jumps_removed", jumpsRemoved
);
1228 //////////////////////////////////////////////////////////////////////