2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
22 #include <folly/Format.h>
24 #include <boost/dynamic_bitset.hpp>
26 #include "hphp/util/trace.h"
27 #include "hphp/util/match.h"
29 #include "hphp/runtime/vm/jit/alias-analysis.h"
30 #include "hphp/runtime/vm/jit/analysis.h"
31 #include "hphp/runtime/vm/jit/cfg.h"
32 #include "hphp/runtime/vm/jit/ir-unit.h"
33 #include "hphp/runtime/vm/jit/loop-analysis.h"
34 #include "hphp/runtime/vm/jit/memory-effects.h"
35 #include "hphp/runtime/vm/jit/mutation.h"
36 #include "hphp/runtime/vm/jit/pass-tracer.h"
37 #include "hphp/runtime/vm/jit/prof-data.h"
38 #include "hphp/runtime/vm/jit/timer.h"
40 namespace HPHP
{ namespace jit
{
44 TRACE_SET_MOD(hhir_licm
);
46 //////////////////////////////////////////////////////////////////////
48 bool is_pure_licmable(const IRInstruction
* inst
) {
49 // Note: this list is still not exhaustive; this is just a prototype.
81 case InterfaceSupportsArr
:
82 case InterfaceSupportsVec
:
83 case InterfaceSupportsDict
:
84 case InterfaceSupportsKeyset
:
85 case InterfaceSupportsStr
:
86 case InterfaceSupportsInt
:
87 case InterfaceSupportsDbl
:
105 case InstanceOfIface
:
106 case InstanceOfBitmask
:
107 case NInstanceOfBitmask
:
115 //////////////////////////////////////////////////////////////////////
117 enum class IdomState
{ Uninit
, Valid
, Invalid
};
120 explicit Env(IRUnit
& unit
, const BlockList
& rpoBlocks
)
122 , ainfo(collect_aliases(unit
, rpoBlocks
))
123 , loops(identifyLoops(unit
, rpoBlocks
))
127 switch (idom_state
) {
128 case IdomState::Uninit
:
130 case IdomState::Invalid
:
131 case IdomState::Valid
:
140 IdomState idom_state
{IdomState::Uninit
};
141 union { IdomVector idoms
; };
144 //////////////////////////////////////////////////////////////////////
146 IdomVector
& find_idoms(Env
& env
) {
147 switch (env
.idom_state
) {
148 case IdomState::Valid
:
150 case IdomState::Invalid
:
151 case IdomState::Uninit
:
153 auto const rpo
= rpoSortCfg(env
.unit
);
154 auto new_idoms
= IdomVector(
155 findDominators(env
.unit
, rpo
, numberBlocks(env
.unit
, rpo
))
157 if (env
.idom_state
== IdomState::Uninit
) {
158 new (&env
.idoms
) IdomVector(std::move(new_idoms
));
160 env
.idoms
= std::move(new_idoms
);
163 env
.idom_state
= IdomState::Valid
;
169 //////////////////////////////////////////////////////////////////////
172 * Information needed during processing of a single loop. We process an
173 * "extended" loop at a time: all the natural loops that share a particular
177 explicit LoopEnv(Env
& global
, LoopID loopId
)
180 , invariant_tmps(global
.unit
.numTmps())
181 , blocks(global
.loops
.loops
[loopId
].blocks
)
183 FTRACE(2, "blocks: {}\n",
184 [&] () -> std::string
{
185 auto ret
= std::string
{};
186 for (auto& b
: blocks
) folly::format(&ret
, "B{} ", b
->id());
196 * SSATmps with loop-invariant definitions, either because they are purely
197 * loop invariant or because the other definitions they depend on are also
198 * loop invariant. This is populated in a topological order of dependencies
199 * between the definitions.
201 * These can be moved to the loop header with no change in program semantics,
202 * although in the current version of this pass it may cause increases in
203 * computation on some paths that exit the loop, because we don't try to make
204 * sure the definitions dominate all the Likely-hinted loop exits.
206 sparse_idptr_set
<SSATmp
> invariant_tmps
;
209 * Loads of loop invariant memory locations.
211 * These definitions are not necessarily loop invariant, because their
212 * destination SSATmps may have types that depend on the position in the
213 * program. This means they can't simply be hoisted to a loop preheader, and
214 * it also means that computations depending on the destination can't be
215 * hoisted. We can however insert generic loads in the loop pre-header and
216 * replace these instructions with AssertTypes.
218 jit::vector
<IRInstruction
*> invariant_loads
;
221 * Hoistable check instructions.
223 * These are instructions that check the type of either loop invariant memory
224 * locations, or loop invariant definitions, and occur in the loop body
225 * before any code with side effects (including any branches out of the
226 * loop). They can be moved to the loop pre header without changing the
227 * program's semantics.
229 jit::vector
<IRInstruction
*> invariant_checks
;
232 * Hoistable checks if we're willing to side-exit.
234 * These instructions check the type of either loop invariant memory
235 * locations, or loop invariant definitions, and if the check fails they
236 * definitely leave the loop, and we think they probably also leave the
239 * We can rewrite the code to check the condition before entering the loop,
240 * and side exit if it fails. This transformation is only appropriate if we
241 * believe the check failing is very unlikely, because it can cause new
242 * regions to be generated that duplicate portions of the loop body.
244 jit::vector
<IRInstruction
*> hoistable_as_side_exits
;
247 * The set of blocks in this loop.
249 jit::flat_set
<Block
*> blocks
;
252 * Whether any block in the loop contains a php call. If this is true,
253 * hoisting any loop invariant definition will create uses of SSATmps that
254 * span a call on some path to the back edge. Right now we don't try to
255 * optimize in this situation.
257 bool contains_call
{false};
260 * Memory locations the loop provably cannot modify on any route to a back
261 * edge. (Note that the program still may modify these memory locations in
262 * blocks that are reachable from the loop header, after exiting the loop.)
264 ALocBits invariant_memory
;
267 * Currently unused except for debug printing:
269 * Memory locations that we saw PureLoad instructions for in the loop. And
270 * memory locations that we saw check instructions for in the loop.
272 ALocBits pure_load_memory
;
273 ALocBits dbg_checked
;
276 //////////////////////////////////////////////////////////////////////
278 LoopInfo
& linfo(LoopEnv
& env
) {
279 return env
.global
.loops
.loops
[env
.loopId
];
282 Block
* header(LoopEnv
& env
) { return linfo(env
).header
; }
283 Block
* pre_header(LoopEnv
& env
) { return linfo(env
).preHeader
; }
284 Block
* pre_exit(LoopEnv
& env
) { return linfo(env
).preExit
; }
286 template<class Seen
, class F
>
287 void visit_loop_post_order(LoopEnv
& env
, Seen
& seen
, Block
* b
, F f
) {
288 if (seen
[b
->id()]) return;
290 auto go
= [&] (Block
* block
) {
291 if (!block
|| !env
.blocks
.count(block
)) return;
292 visit_loop_post_order(env
, seen
, block
, f
);
299 BlockList
rpo_sort_loop(LoopEnv
& env
) {
300 auto seen
= boost::dynamic_bitset
<>(env
.global
.unit
.numBlocks());
301 auto ret
= BlockList
{};
302 visit_loop_post_order(
303 env
, seen
, header(env
),
304 [&] (Block
* b
) { ret
.push_back(b
); }
306 std::reverse(begin(ret
), end(ret
));
310 //////////////////////////////////////////////////////////////////////
312 void analyze_block(LoopEnv
& env
, Block
* blk
) {
313 auto kill
= [&] (AliasClass acls
) {
314 auto const canon
= canonicalize(acls
);
315 auto const may_write
= env
.global
.ainfo
.may_alias(canon
);
316 FTRACE(6, " kill: {}\n", show(may_write
));
317 env
.invariant_memory
&= ~may_write
;
320 FTRACE(6, "analyze_block B{}\n", blk
->id());
321 for (auto& inst
: blk
->instrs()) {
322 auto const effects
= canonicalize(memory_effects(inst
));
323 FTRACE(6, " {}\n mem: {}\n", inst
.toString(), show(effects
));
326 [&] (UnknownEffects
) { kill(AUnknown
); },
328 [&] (CallEffects x
) { env
.contains_call
= true;
330 [&] (PureStore x
) { kill(x
.dst
); },
331 [&] (PureSpillFrame x
) { kill(x
.stk
); },
333 [&] (GeneralEffects x
) {
339 if (auto const meta
= env
.global
.ainfo
.find(x
.loads
)) {
340 env
.dbg_checked
.set(meta
->index
);
348 if (auto const meta
= env
.global
.ainfo
.find(x
.src
)) {
349 env
.pure_load_memory
.set(meta
->index
);
353 [&] (IrrelevantEffects
) {},
355 [&] (InlineEnterEffects x
) { kill(x
.inlFrame
);
358 [&] (InlineExitEffects x
) { kill(x
.inlFrame
);
361 [&] (ReturnEffects
) { assertx(!"tried to return in a loop"); },
362 [&] (ExitEffects
) { assertx(!"tried to exit in a loop"); }
367 void analyze_loop(LoopEnv
& env
) {
368 env
.invariant_memory
.set();
369 analyze_block(env
, header(env
));
370 for (auto& blk
: env
.blocks
) analyze_block(env
, blk
);
371 FTRACE(2, "invariant: {}\n"
375 show(env
.invariant_memory
),
376 show(env
.pure_load_memory
),
377 show(env
.dbg_checked
),
382 bool is_invariant_memory(LoopEnv
& env
, AliasClass acls
) {
383 if (auto const meta
= env
.global
.ainfo
.find(canonicalize(acls
))) {
384 return env
.invariant_memory
[meta
->index
] == true;
389 bool has_all_loop_invariant_args(LoopEnv
& env
, const IRInstruction
& inst
) {
390 for (auto& src
: inst
.srcs()) {
391 if (env
.invariant_tmps
.contains(src
)) continue;
392 if (is_tmp_usable(find_idoms(env
.global
), src
, pre_header(env
))) continue;
398 bool try_pure_licmable(LoopEnv
& env
, const IRInstruction
& inst
) {
399 if (env
.contains_call
) return false;
400 if (!is_pure_licmable(&inst
)) return false;
401 if (!has_all_loop_invariant_args(env
, inst
)) return false;
402 FTRACE(2, " invariant: {} = {}\n", inst
.dst()->toString(),
404 env
.invariant_tmps
.insert(inst
.dst());
408 bool check_is_loop_exit(LoopEnv
& env
, const IRInstruction
& inst
) {
409 auto const t
= inst
.taken();
410 return !env
.blocks
.count(t
);
413 bool impl_hoistable_check(LoopEnv
& env
, IRInstruction
& inst
) {
414 if (!check_is_loop_exit(env
, inst
)) return false;
415 if (!has_all_loop_invariant_args(env
, inst
)) return false;
416 FTRACE(2, " hoistable check: {}\n", inst
.toString());
417 env
.invariant_checks
.push_back(&inst
);
422 * NB. This function doesn't actually prove that the taken branch is a side
423 * exit. We can't really know this for a fact: even if it started as a
424 * "side-exit", real computation could've been moved into the branch.
426 bool check_is_probable_side_exit(LoopEnv
& env
, const IRInstruction
& inst
) {
427 return check_is_loop_exit(env
, inst
) &&
428 inst
.taken()->isExit() &&
429 inst
.taken()->hint() == Block::Hint::Unlikely
;
432 bool try_hoistable_as_side_exit(LoopEnv
& env
, IRInstruction
& inst
) {
433 if (!check_is_probable_side_exit(env
, inst
)) return false;
434 if (!has_all_loop_invariant_args(env
, inst
)) return false;
435 FTRACE(2, " hoistable as side exit: {}\n", inst
.toString());
436 env
.hoistable_as_side_exits
.push_back(&inst
);
440 bool try_hoistable_check(LoopEnv
& env
, IRInstruction
& inst
) {
443 return impl_hoistable_check(env
, inst
);
449 bool try_invariant_memory(LoopEnv
& env
,
451 bool may_still_hoist_checks
,
452 const MemEffects
& effects
) {
455 [&] (UnknownEffects
) { return false; },
456 [&] (CallEffects
) { return false; },
457 [&] (PureStore
) { return false; },
458 [&] (PureSpillFrame
) { return false; },
459 [&] (IrrelevantEffects
) { return false; },
460 [&] (ReturnEffects
) { return false; },
461 [&] (ExitEffects
) { return false; },
462 [&] (InlineEnterEffects
){ return false; },
463 [&] (InlineExitEffects
) { return false; },
465 [&] (GeneralEffects mem
) {
468 if (!is_invariant_memory(env
, mem
.loads
)) return false;
469 if (may_still_hoist_checks
) {
470 if (impl_hoistable_check(env
, inst
)) return true;
472 return try_hoistable_as_side_exit(env
, inst
);
481 if (env
.contains_call
) return false;
482 if (!is_invariant_memory(env
, mem
.src
)) return false;
483 if (!has_all_loop_invariant_args(env
, inst
)) return false;
484 FTRACE(2, " invariant load: {} = {}\n",
485 inst
.dst()->toString(),
487 env
.invariant_loads
.push_back(&inst
);
493 bool may_have_side_effects(const IRInstruction
& inst
) {
496 case ExitPlaceholder
:
500 * Note: we could potentially extend this to allow various load instructions
501 * or pure computations, but it would only be legal to hoist checks after
502 * them if we first verify that the SSATmps they defined are not used under
512 * Unlike a usual LICM algorithm, we don't need to iterate to identify the
513 * invariant code. If we walk the loop blocks in reverse post order, so we see
514 * successors after predecessors, we see enough information for each of the
515 * types of invariant code we're trying to identify:
517 * o For "pure licm"able instructions, if they are only licmable after some
518 * of their sources are licm'd, we're guaranteed to see those dependencies
519 * in order because of SSA. All of these have a single definition, and the
520 * definitions dominate uses.
522 * o For loads of invariant locations, we're using information from the full
523 * flow-insensitive pass over the loop in analyze_loop that identified
524 * which locations are invariant.
526 * o For hoisting Check instructions: we only can do this for Checks that
527 * occur before any side-effects. We also only /want/ to do it if the
528 * Check is guaranteed to be executed in the loop body (assuming another
529 * hoistable Check doesn't fail). So, for correctness in the presence of
530 * nested loops (natural or not), we need to start to refuse to hoist
531 * checks if we see a block where we haven't visited all of its
532 * predecessors yet. The restriction on when we want to do it is handled
533 * by considering conditional jumps (and any instruction with multiple
534 * successor blocks other than ExitPlaceholder) to be instructions with
538 void find_invariant_code(LoopEnv
& env
) {
539 auto may_still_hoist_checks
= true;
540 auto visited_block
= boost::dynamic_bitset
<>(env
.global
.unit
.numBlocks());
541 auto profData
= jit::profData();
542 auto numInvocations
= linfo(env
).numInvocations
;
543 FTRACE(4, "numInvocations: {}\n", numInvocations
);
544 for (auto& b
: rpo_sort_loop(env
)) {
545 FTRACE(2, " B{}:\n", b
->id());
546 if (may_still_hoist_checks
&& b
!= header(env
)) {
547 b
->forEachPred([&] (Block
* pred
) {
548 if (!visited_block
[pred
->id()]) {
549 FTRACE(5, " may_still_hoist_checks = false (pred: B{})\n",
551 may_still_hoist_checks
= false;
556 // Skip this block if its profile weight is less than the number of times
557 // the loop is invoked, since otherwise we're likely to executed the
558 // instruction more by hoisting it out of the loop.
559 auto tid
= b
->front().marker().profTransID();
560 auto profCount
= profData
? profData
->transCounter(tid
) : 0;
561 if (profCount
< numInvocations
) {
562 FTRACE(4, " skipping Block {} because of low profile weight ({})\n",
564 if (may_still_hoist_checks
) {
565 for (auto& inst
: b
->instrs()) {
566 if (may_have_side_effects(inst
)) {
567 FTRACE(5, " may_still_hoist_checks = false\n");
568 may_still_hoist_checks
= false;
576 for (auto& inst
: b
->instrs()) {
577 FTRACE(4, " {}\n", inst
.toString());
578 if (try_pure_licmable(env
, inst
)) continue;
580 auto const effects
= memory_effects(inst
);
581 if (try_invariant_memory(env
, inst
, may_still_hoist_checks
, effects
)) {
585 if (may_still_hoist_checks
) {
586 if (try_hoistable_check(env
, inst
)) continue;
587 if (may_have_side_effects(inst
)) {
588 FTRACE(5, " may_still_hoist_checks = false\n");
589 may_still_hoist_checks
= false;
594 visited_block
.set(b
->id());
598 void hoist_invariant(LoopEnv
& env
) {
600 * Iterating invariant_tmps goes in insertion order, which means it is
601 * already guaranteed to be topologically sorted by any dependencies between
604 for (auto& invID
: env
.invariant_tmps
) {
605 auto const dst
= env
.global
.unit
.findSSATmp(invID
);
606 auto const inst
= dst
->inst();
607 auto const preh
= pre_header(env
);
608 FTRACE(1, "moving {} to B{}\n", inst
->toString(), preh
->id());
609 inst
->block()->erase(inst
);
610 assertx(!inst
->taken() && !inst
->next());
611 preh
->insert(std::prev(preh
->end()), inst
);
616 * Loads that have invariant memory locations still don't define SSATmps that
617 * are loop invariant (without further analysis). The reason is that the type
618 * may depend on some other instruction that is still in the loop.
620 * So we can insert a load in the pre-header that defines a Gen, and replace
621 * the load at the position in the loop with an AssertType.
623 void hoist_invariant_loads(LoopEnv
& env
) {
624 for (auto& old_load
: env
.invariant_loads
) {
625 if (!old_load
->is(LdLoc
, LdStk
, LdMem
)) continue;
627 auto const preh
= pre_header(env
);
628 auto const new_load
= env
.global
.unit
.clone(old_load
);
629 new_load
->setTypeParam(TGen
);
630 retypeDests(new_load
, &env
.global
.unit
);
631 preh
->insert(std::prev(preh
->end()), new_load
);
632 env
.global
.unit
.replace(
635 old_load
->typeParam(),
638 FTRACE(1, "inserted {} in B{}\n {} -> {}\n",
639 new_load
->toString(),
641 old_load
->dst()->toString(),
642 old_load
->toString());
646 void hoist_check_instruction(LoopEnv
& env
,
647 IRInstruction
* old_check
,
650 auto const preh
= pre_header(env
);
651 auto const new_check
= env
.global
.unit
.clone(old_check
);
653 new_check
->setTaken(new_taken
);
655 // Note that the current pre_header jump may have arguments. We need to
656 // preserve them in the new pre_header, so we have to keep the same
658 assertx(preh
->back().is(Jmp
));
659 auto const jmp
= &preh
->back();
660 auto const new_preh
= env
.global
.unit
.defBlock(linfo(env
).numInvocations
);
662 new_preh
->prepend(jmp
);
663 new_check
->setNext(new_preh
);
664 preh
->insert(preh
->end(), new_check
);
666 FTRACE(1, "[{}] inserted {} in B{}, adding B{} as the new pre_header\n",
668 new_check
->toString(),
672 env
.global
.unit
.replace(
678 // We've changed the pre-header and invalidated the dominator tree.
679 env
.global
.idom_state
= IdomState::Invalid
;
680 updatePreHeader(env
.global
.loops
, env
.loopId
, new_preh
);
683 void hoist_invariant_checks(LoopEnv
& env
) {
684 for (auto& check
: env
.invariant_checks
) {
685 hoist_check_instruction(env
, check
, check
->taken(), "invariant");
689 void hoist_side_exits(LoopEnv
& env
) {
690 auto const side_exit
= pre_exit(env
);
692 for (auto& check
: env
.hoistable_as_side_exits
) {
693 hoist_check_instruction(env
, check
, side_exit
, "side-exit");
697 void process_loop(LoopEnv
& env
) {
699 find_invariant_code(env
);
700 hoist_invariant(env
);
701 hoist_invariant_loads(env
);
702 hoist_invariant_checks(env
);
703 hoist_side_exits(env
);
706 //////////////////////////////////////////////////////////////////////
708 void insert_pre_headers_and_exits(IRUnit
& unit
, LoopAnalysis
& loops
) {
710 for (auto& linfo
: loops
.loops
) {
711 if (!linfo
.preHeader
) {
712 insertLoopPreHeader(unit
, loops
, linfo
.id
);
715 if (linfo
.preHeader
&& !linfo
.preExit
) {
716 insertLoopPreExit(unit
, loops
, linfo
.id
);
721 FTRACE(1, "Loops after adding pre-headers and pre-exits:\n{}\n",
726 //////////////////////////////////////////////////////////////////////
730 void optimizeLoopInvariantCode(IRUnit
& unit
) {
731 PassTracer tracer
{ &unit
, Trace::hhir_licm
, "LICM" };
732 Timer
t(Timer::optimize_licm
, unit
.logEntry().get_pointer());
733 Env env
{ unit
, rpoSortCfg(unit
) };
734 if (env
.loops
.loops
.empty()) {
735 FTRACE(1, "no loops\n");
738 FTRACE(2, "Locations:\n{}\n", show(env
.ainfo
));
739 FTRACE(1, "Loops:\n{}\n", show(env
.loops
));
740 insert_pre_headers_and_exits(env
.unit
, env
.loops
);
743 * Note: currently this is visiting inner loops first, but not for any strong
744 * reason for the types of optimizations it currently performs.
747 auto workQ
= jit::queue
<LoopID
>{};
748 auto seen
= boost::dynamic_bitset
<>(env
.loops
.loops
.size());
750 auto enqueue
= [&] (LoopID id
) {
751 if (id
== kInvalidLoopID
) return;
752 id
= env
.loops
.headers
[env
.loops
.loops
[id
].header
];
753 if (seen
[id
]) return;
758 for (auto& id
: env
.loops
.innerLoops
) enqueue(id
);
761 auto const id
= workQ
.front();
763 enqueue(env
.loops
.loops
[id
].parent
);
765 FTRACE(1, "L{}:\n", id
);
766 auto lenv
= LoopEnv
{ env
, id
};
769 } while (!workQ
.empty());
772 //////////////////////////////////////////////////////////////////////
777 * LICM is a work in progress, disabled for now. Here's some things still to
780 * o More is_pure_licmable. Some things that dereference certain kinds of
781 * pointers (e.g. LdObjClass) can probably be in the list too.
783 * o More kinds of checks for hoisting (CheckType, CheckTypeMem).