2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/hhbbc/dce.h"
26 #include <boost/dynamic_bitset.hpp>
27 #include <boost/container/flat_map.hpp>
29 #include <folly/gen/Base.h>
30 #include <folly/gen/String.h>
32 #include "hphp/util/bitops.h"
33 #include "hphp/util/dataflow-worklist.h"
34 #include "hphp/util/trace.h"
36 #include "hphp/hhbbc/analyze.h"
37 #include "hphp/hhbbc/cfg-opts.h"
38 #include "hphp/hhbbc/cfg.h"
39 #include "hphp/hhbbc/interp-state.h"
40 #include "hphp/hhbbc/interp.h"
41 #include "hphp/hhbbc/representation.h"
42 #include "hphp/hhbbc/type-system.h"
43 #include "hphp/hhbbc/unit-util.h"
45 namespace HPHP
{ namespace HHBBC
{
47 TRACE_SET_MOD(hhbbc_dce
);
49 //////////////////////////////////////////////////////////////////////
52 * This module contains code to perform both local DCE and global DCE.
54 * The local DCE algorithm addresses dead eval stack manipulations and
55 * dead stores to locals that are visible within a single basic block.
57 * The global DCE performs a liveness analysis and then uses this
58 * information to allow dead stores to locals to be eliminated across
61 * Both types of DCE here need to be type-aware, but they must visit
62 * blocks backward. They accomplish this by forward-propagating the
63 * block input states from a FuncAnalysis to each instruction in the
64 * block prior to doing the backward iteration.
68 * During backward traversal of a block, we maintain a "backwards"
69 * stack, indicating which eval stack slots are going to be required
70 * in the future or not.
72 * During this traversal, each instruction that pops when going
73 * forward instead "pushes" information about whether that input
74 * will be required. If it is not required, it also pushes an
75 * accumulating set of instruction ids that must be removed if the
76 * instruction which produces the stack slot is removed. (All
77 * instructions in these sets must be removed if any are, in order
78 * to keep the stack depths correct.)
80 * Similarly, each instruction that would push a value going forward
81 * instead "pops" the information about whether its stack output is
82 * going to be needed. If not, the instruction can mark itself (and
83 * all downstream instructions that depended on it) as removable.
87 * While a block is iterated backward, the set of live locals is
88 * tracked. The initial state of this live set depends on whether
89 * we are performing global or local DCE, and in the local case
90 * includes all locals in the function.
92 * When a local may be read, it is added to the live set. When a
93 * local is definitely-written, it is removed from the set.
95 * If a instruction may write to a local that is not live, it can be
96 * marked as removable if it is known to have no other side-effects.
97 * Currently this is only hooked up to SetL.
101 * The global algorithm first performs a liveness analysis to
102 * propagate live out sets to each block.
104 * This analysis is basically normal, but slightly modified from the
105 * usual in order to deal with the way exceptional control flow is
106 * represented in our CFG (with factored edges).
108 * It essentially is the liveness analysis algorithm described at
109 * http://dl.acm.org/citation.cfm?id=316171, except that we don't
110 * need to track kill sets for each PEI because we don't have a
111 * means of determining which factored edges may be traversed by any
112 * given PEI. (Maybe that may be more usual to have in the context
113 * of a language with declared exception clauses...)
115 * Since we only deal with the most pessimistic exception case, this
116 * means for each block we just determine a gen and kill set, along
117 * with a subset of the latter set that is the locals that must be
118 * killed before any PEI. (See killBeforePEI below.)
120 * Final note about types:
122 * Global DCE can change the types of locals in a way that spans
123 * basic blocks. For a simple example, take the following code:
125 * // $foo :: Uninit here.
127 * // ... code that breaks a block ...
130 * If the first store to $foo is removed, the type of $foo before
131 * the SetL in the later block is now Uninit, instead of Int.
133 * In addition, by killing dead eval stack slots across basic
134 * blocks, the saved entry stacks in the FuncAnalysis no longer
135 * match the actual stack depths.
137 * This means that after calling global_dce on a function, its
138 * FuncAnalysis can no longer be considered accurate.
140 * Moreover, since global DCE makes use of type information to
141 * determine whether a store is dead, we need to be careful that
142 * this never changes whether the assumptions used to perform DCE
145 * This is ok right now: DCE only uses the types to figure out which
146 * values can either have lifetimes extended or shortened without
147 * visible side-effects, or which values may be refs (so we can't
148 * omit stores to them). If we omit a store that changes the type
149 * of a local globally, this means the new type cannot be different
150 * with regard to these features (or we wouldn't have omitted it),
151 * which means we won't have made different decisions elsewhere in
152 * the algorithm based on the new type.
154 * Specifically: we will never omit a store where the old local type
155 * was something that could've had side effects, and if a new value
156 * is stored into a local where destroying it could have
157 * side-effects, some point along the path to the function exit (if
158 * nothing else, the RetC) will have added it to the gen set and the
159 * store also won't be removable.
161 * In contrast, note that local DCE can not change types across
168 //////////////////////////////////////////////////////////////////////
170 // Returns whether decrefing a type could run a destructor.
171 bool couldRunDestructor(const Type
& t
) {
172 // We could check for specialized objects to see if they don't
173 // declare a user-defined destructor, but currently don't.
182 // Returns whether a set on something containing type t could have
183 // side-effects (running destuctors, or modifying arbitrary things via
185 bool setCouldHaveSideEffects(const Type
& t
) {
194 // Some reads could raise warnings and run arbitrary code.
195 bool readCouldHaveSideEffects(const Type
& t
) {
196 return t
.couldBe(TUninit
);
199 //////////////////////////////////////////////////////////////////////
202 * Use information of a stack cell.
205 // Indicates that the cell is (unconditionally) not used.
208 // Indicates that the cell is (possibly) used.
212 * Indicates that the cell is only used if it was the last reference alive.
213 * For instance, a PopC will call the destructor of the top-of-stack object
214 * if it was the last reference alive, and this counts as an example of
217 * If the producer of the cell knows that it is not the last reference, then
218 * it can treat Use::UsedIfLastRef as being equivalent to Use::Not.
228 using StkId
= InstrId
;
230 bool operator<(const InstrId
& i1
, const InstrId
& i2
) {
231 if (i1
.blk
!= i2
.blk
) return i1
.blk
< i2
.blk
;
232 // sort by decreasing idx so that kill_marked_instrs works
233 return i1
.idx
> i2
.idx
;
236 enum class DceAction
{
241 const char* show(DceAction action
) {
243 case DceAction::Kill
: return "Kill";
244 case DceAction::PopOutputs
: return "PopOutputs";
249 using DceActionMap
= std::map
<InstrId
, DceAction
>;
252 explicit UseInfo(Use u
) : usage(u
) {}
253 UseInfo(Use u
, DceActionMap
&& ids
) : usage(u
), actions(std::move(ids
)) {}
257 * Set of instructions that should be removed if we decide to
258 * discard the corresponding stack slot.
260 DceActionMap actions
;
262 * Used for stack slots that are live across blocks to indicate a
263 * stack slot that should be considered Used at the entry to blk.
265 * If its not live across blocks, popper.blk will stay NoBlockId.
267 StkId popper
{ NoBlockId
, 0 };
270 std::string
show(InstrId id
) {
271 return folly::sformat("{}:{}", id
.blk
, id
.idx
);
274 //////////////////////////////////////////////////////////////////////
277 explicit DceState(const FuncAnalysis
& ainfo
) : ainfo(ainfo
) {}
278 const FuncAnalysis
& ainfo
;
280 * Used to accumulate a set of blk/stack-slot pairs that
281 * should be marked used
283 std::set
<StkId
> usedStackSlots
;
286 * Eval stack use information. Stacks slots are marked as being
287 * needed or not needed. If they aren't needed, they carry a set of
288 * instructions that must be removed if the instruction that
289 * produces the stack value is also removable.
291 std::vector
<UseInfo
> stack
;
294 * Locals known to be live at a point in a DCE walk. This is used
295 * when we're actually acting on information we discovered during
298 std::bitset
<kMaxTrackedLocals
> liveLocals
;
301 * Class-ref slots known to be live at a point in a DCE walk. This is used
302 * when we're actually acting on information we discovered during liveness
305 std::bitset
<kMaxTrackedClsRefSlots
> liveSlots
;
308 * These variable sets are used to compute the transfer function for
309 * the global liveness analysis in global_dce.
311 * The gen set accumulates the set of variables in the block with
312 * upward-exposed uses. The kill set is the set of variables the
313 * block will re-define, ignoring exceptional control flow.
315 * The killBeforePEI set is the set of variables killed before a
316 * PEI. Propagation of liveness needs to use this (always more
317 * conservative) set instead of kill when crossing a factored exit
320 std::bitset
<kMaxTrackedLocals
> locGen
;
321 std::bitset
<kMaxTrackedLocals
> locKill
;
322 std::bitset
<kMaxTrackedLocals
> locKillBeforePEI
;
325 * Instructions marked in this set will be processed by dce_perform.
326 * They must all be processed together to keep eval stack consumers
327 * and producers balanced.
329 DceActionMap actionMap
;
332 * The set of locals and class-ref slots that were ever live in this block.
333 * (This includes ones that were live going out of this block.) This set is
334 * used by global DCE to remove locals and class-ref slots that are completely
335 * unused in the entire function.
337 std::bitset
<kMaxTrackedLocals
> usedLocals
;
338 std::bitset
<kMaxTrackedClsRefSlots
> usedSlots
;
341 * Mapping of class-ref slots to instruction sets. If the usage of a class-ref
342 * slot is removed, all the instructions currently in the set must also be
345 std::vector
<DceActionMap
> slotDependentActions
;
348 //////////////////////////////////////////////////////////////////////
350 const char* show(Use u
) {
352 case Use::Not
: return "0";
353 case Use::Used
: return "U";
354 case Use::UsedIfLastRef
: return "UL";
359 std::string
show(const DceActionMap::value_type
& elm
) {
360 return folly::sformat("{}@{}", show(elm
.first
), show(elm
.second
));
363 std::string
show(const DceActionMap
& actions
) {
364 using namespace folly::gen
;
366 | map([](const DceActionMap::value_type
& elm
) { return show(elm
); })
367 | unsplit
<std::string
>(";")
371 std::string DEBUG_ONLY
show(const UseInfo
& ui
) {
372 return folly::sformat("{}@{}", show(ui
.usage
), show(ui
.actions
));
375 std::string DEBUG_ONLY
loc_bits_string(borrowed_ptr
<const php::Func
> func
,
376 std::bitset
<kMaxTrackedLocals
> locs
) {
377 std::ostringstream out
;
378 if (func
->locals
.size() < kMaxTrackedLocals
) {
379 for (auto i
= func
->locals
.size(); i
-- > 0;) {
380 out
<< (locs
.test(i
) ? '1' : '0');
388 std::string DEBUG_ONLY
389 slot_bits_string(borrowed_ptr
<const php::Func
> func
,
390 std::bitset
<kMaxTrackedClsRefSlots
> slots
) {
391 std::ostringstream out
;
392 if (func
->numClsRefSlots
< kMaxTrackedClsRefSlots
) {
393 for (auto i
= 0; i
< func
->numClsRefSlots
; ++i
) {
394 out
<< (slots
.test(i
) ? '1' : '0');
402 //////////////////////////////////////////////////////////////////////
408 const State
& stateBefore
;
409 const StepFlags
& flags
;
412 //////////////////////////////////////////////////////////////////////
414 void markSetDead(Env
& env
, const DceActionMap
& actions
) {
415 env
.dceState
.actionMap
.emplace(env
.id
, DceAction::Kill
);
416 FTRACE(2, " marking {} {}\n", show(env
.id
), show(actions
));
417 for (auto& i
: actions
) env
.dceState
.actionMap
.insert(i
);
420 void markUisDead(Env
& env
, const UseInfo
& ui
) {
421 markSetDead(env
, ui
.actions
);
424 template<typename
... Args
>
425 void markUisDead(Env
& env
, const UseInfo
& ui
, Args
&&... args
) {
426 markUisDead(env
, ui
);
427 markUisDead(env
, std::forward
<Args
>(args
)...);
431 * During global dce, a particular stack element could be pushed
432 * on multiple paths. eg:
436 * If f() is known to return a non-counted type, we have UnboxRNop ->
437 * PopC on one path, and Int 42 -> PopC on another, and the PopC marks
438 * its value Use::Not. When we get to the Int 42 it thinkgs both
439 * instructions can be killed; but when we get to the UnboxRNop it
440 * does nothing. So any time we decide to ignore a Use::Not or
441 * Use::UsedIfLastRef, we have to record that fact so we can prevent
442 * the other paths from trying to use that information. We communicate
443 * this via the ui.popper field, and the usedStackSlots set.
445 * Note that a more sophisticated system would insert a PopC after the
446 * UnboxRNop, and allow the 42/PopC to be removed (work in progress).
448 void markUisLive(Env
& env
, const UseInfo
& ui
) {
449 if (ui
.usage
!= Use::Used
&& ui
.popper
.blk
!= NoBlockId
) {
450 env
.dceState
.usedStackSlots
.insert(ui
.popper
);
454 template<typename
... Args
>
455 void markUisLive(Env
& env
, const UseInfo
& ui
, Args
&&... args
) {
456 markUisLive(env
, ui
);
457 markUisLive(env
, std::forward
<Args
>(args
)...);
460 void markDead(Env
& env
) {
461 env
.dceState
.actionMap
.emplace(env
.id
, DceAction::Kill
);
462 FTRACE(2, " marking {}\n", show(env
.id
));
465 //////////////////////////////////////////////////////////////////////
468 void pop(Env
& env
, UseInfo
&& ui
) {
469 FTRACE(2, " pop({})\n", show(ui
.usage
));
470 env
.dceState
.stack
.push_back(std::move(ui
));
473 void pop(Env
& env
, Use u
, DceActionMap actions
) {
474 pop(env
, {u
, std::move(actions
)});
477 void pop(Env
& env
, Use u
, InstrId id
) {
478 pop(env
, {u
, {{id
, DceAction::Kill
}}});
481 void pop(Env
& env
) { pop(env
, Use::Used
, DceActionMap
{}); }
483 Type
topT(Env
& env
, uint32_t idx
= 0) {
484 assert(idx
< env
.stateBefore
.stack
.size());
485 return env
.stateBefore
.stack
[env
.stateBefore
.stack
.size() - idx
- 1].type
;
488 Type
topC(Env
& env
, uint32_t idx
= 0) {
489 auto const t
= topT(env
, idx
);
490 assert(t
.subtypeOf(TInitCell
));
494 void discard(Env
& env
) {
495 pop(env
, Use::Not
, env
.id
);
498 bool allUnused() { return true; }
499 template<class... Args
>
500 bool allUnused(const UseInfo
& ui
, Args
&&... args
) {
501 return ui
.usage
== Use::Not
&&
502 allUnused(std::forward
<Args
>(args
)...);
505 bool allUnusedIfNotLastRef() { return true; }
506 template<class... Args
>
507 bool allUnusedIfNotLastRef(const UseInfo
& ui
, Args
&&... args
) {
508 return (ui
.usage
== Use::Not
|| ui
.usage
== Use::UsedIfLastRef
) &&
509 allUnusedIfNotLastRef(std::forward
<Args
>(args
)...);
512 void combineSets(UseInfo
&) {}
513 template<class... Args
>
514 void combineSets(UseInfo
& accum
, const UseInfo
& ui
, Args
&&... args
) {
515 accum
.actions
.insert(begin(ui
.actions
), end(ui
.actions
));
516 if (accum
.popper
.blk
== NoBlockId
||
517 accum
.popper
< ui
.popper
) {
518 accum
.popper
= ui
.popper
;
520 combineSets(accum
, std::forward
<Args
>(args
)...);
523 // The instruction wants to be killed, as long as its inputs can be
524 // killed; combine the UseInfo sets from its outputs, and make an
525 // appropriate number of Use::Not pops.
526 template<class... Args
>
527 void passThrough(Env
& env
, Args
&&... args
) {
528 UseInfo ui
{ Use::Not
, {{ env
.id
, DceAction::Kill
}} };
529 combineSets(ui
, std::forward
<Args
>(args
)...);
531 for (auto i
= 1; i
< env
.op
.numPop(); ++i
) {
532 pop(env
, UseInfo
{ui
});
534 pop(env
, std::move(ui
));
537 bool popCouldRunDestructor(Env
& env
) {
538 // If there's an equivLocal, we know that it's not the last
539 // reference, so popping the stack won't run any destructors.
541 env
.stateBefore
.stack
.back().equivLocal
== NoLocalId
&&
542 couldRunDestructor(topC(env
));
546 * It may be ok to remove pops on objects with destructors in some scenarios
547 * (where it won't change the observable point at which a destructor runs). We
548 * could also look at the object type and see if it is known that it can't have
549 * a user-defined destructor.
551 * For now, we mark the cell popped with a Use::UsedIfLastRef. This indicates
552 * to the producer of the cell that the it is considered used if it could be
553 * the last reference alive (in which case the destructor would be run on
554 * Pop). If the producer knows that the cell is not the last reference (e.g. if
555 * it is a Dup), then Use:UsedIfLastRef is equivalent to Use::Not.
557 void discardNonDtors(Env
& env
) {
558 if (popCouldRunDestructor(env
)) {
559 return pop(env
, Use::UsedIfLastRef
, env
.id
);
564 enum class PushFlags
{
571 bool alwaysPop(const UseInfo
& ui
) {
573 ui
.popper
.blk
!= NoBlockId
||
574 ui
.actions
.size() > 1;
577 template<typename
... Args
>
578 bool alwaysPop(const UseInfo
& ui
, Args
&&... args
) {
579 return alwaysPop(ui
) || alwaysPop(std::forward
<Args
>(args
)...);
582 bool maybePop(Env
& env
, const UseInfo
& ui
) {
584 (ui
.actions
.size() == 1 &&
585 ui
.actions
.begin()->first
.idx
> env
.id
.idx
+ 1);
588 template<typename
... Args
>
589 bool maybePop(Env
& env
, const UseInfo
& ui
, Args
&&... args
) {
590 return maybePop(env
, ui
) || maybePop(env
, std::forward
<Args
>(args
)...);
593 template<typename
... Args
>
594 bool shouldPopOutputs(Env
& env
, Args
&&... args
) {
595 if (alwaysPop(std::forward
<Args
>(args
)...)) return true;
596 return maybePop(env
, std::forward
<Args
>(args
)...);
599 template<typename
... Args
>
600 void popOutputs(Env
& env
, Args
&&... args
) {
601 if (shouldPopOutputs(env
, std::forward
<Args
>(args
)...)) {
602 markUisDead(env
, std::forward
<Args
>(args
)...);
603 env
.dceState
.actionMap
[env
.id
] = DceAction::PopOutputs
;
606 markUisLive(env
, std::forward
<Args
>(args
)...);
609 template<typename
... Args
>
610 void handle_push(Env
& env
, PushFlags pf
, Args
&&... uis
) {
612 case PushFlags::MarkLive
:
613 markUisLive(env
, std::forward
<Args
>(uis
)...);
615 case PushFlags::MarkDead
:
616 markUisDead(env
, std::forward
<Args
>(uis
)...);
618 case PushFlags::PassThrough
:
619 passThrough(env
, std::move(uis
)...);
621 case PushFlags::PopOutputs
:
622 popOutputs(env
, std::forward
<Args
>(uis
)...);
628 void push(Env
& env
, F fun
) {
629 always_assert(!env
.dceState
.stack
.empty());
630 auto ui
= std::move(env
.dceState
.stack
.back());
631 env
.dceState
.stack
.pop_back();
632 FTRACE(2, " {}@{} = push()\n", show(ui
.usage
), show(ui
.actions
));
633 auto const f
= fun(ui
);
634 handle_push(env
, f
, std::move(ui
));
637 void push(Env
& env
) {
638 push(env
, [](const UseInfo
&) { return PushFlags::MarkLive
; });
642 void push2(Env
& env
, F fun
) {
643 always_assert(env
.dceState
.stack
.size() >= 2);
644 auto u1
= std::move(env
.dceState
.stack
.back());
645 env
.dceState
.stack
.pop_back();
646 FTRACE(2, " {}@{} = push()\n", show(u1
.usage
), show(u1
.actions
));
647 auto u2
= std::move(env
.dceState
.stack
.back());
648 env
.dceState
.stack
.pop_back();
649 FTRACE(2, " {}@{} = push()\n", show(u1
.usage
), show(u2
.actions
));
650 auto const f
= fun(u1
, u2
);
651 handle_push(env
, f
, std::move(u1
), std::move(u2
));
654 void pushRemovable(Env
& env
) {
655 push(env
,[] (const UseInfo
& ui
) {
656 return ui
.usage
== Use::Not
? PushFlags::MarkDead
: PushFlags::MarkLive
;
660 //////////////////////////////////////////////////////////////////////
663 void addLocGenSet(Env
& env
, std::bitset
<kMaxTrackedLocals
> locs
) {
664 FTRACE(4, " loc-conservative: {}\n",
665 loc_bits_string(env
.dceState
.ainfo
.ctx
.func
, locs
));
666 env
.dceState
.liveLocals
|= locs
;
667 env
.dceState
.locGen
|= locs
;
668 env
.dceState
.locKill
&= ~locs
;
669 env
.dceState
.locKillBeforePEI
&= ~locs
;
672 void addLocGen(Env
& env
, uint32_t id
) {
673 FTRACE(2, " loc-gen: {}\n", id
);
674 if (id
>= kMaxTrackedLocals
) return;
675 env
.dceState
.liveLocals
[id
] = 1;
676 env
.dceState
.locGen
[id
] = 1;
677 env
.dceState
.locKill
[id
] = 0;
678 env
.dceState
.locKillBeforePEI
[id
] = 0;
681 void addLocKill(Env
& env
, uint32_t id
) {
682 FTRACE(2, " loc-kill: {}\n", id
);
683 if (id
>= kMaxTrackedLocals
) return;
684 env
.dceState
.liveLocals
[id
] = 0;
685 env
.dceState
.locGen
[id
] = 0;
686 env
.dceState
.locKill
[id
] = 1;
687 env
.dceState
.locKillBeforePEI
[id
] = 1;
690 bool isLocLive(Env
& env
, uint32_t id
) {
691 if (id
>= kMaxTrackedLocals
) {
692 // Conservatively assume it's potentially live.
695 return env
.dceState
.liveLocals
[id
];
698 Type
locRaw(Env
& env
, LocalId loc
) {
699 return env
.stateBefore
.locals
[loc
];
702 bool setLocCouldHaveSideEffects(Env
& env
, LocalId loc
, bool forExit
= false) {
703 // Normally, if there's an equivLocal this isn't the last reference,
704 // so overwriting it won't run any destructors. But if we're
705 // destroying all the locals (eg RetC) they can't all protect each
706 // other; in that case we require a lower equivLoc to ensure that
707 // one local from each equivalence set will still be marked as
708 // having effects (choosing lower numbers also means we mark the
709 // params as live, which makes it more likely that we can eliminate
711 static_assert(NoLocalId
== std::numeric_limits
<LocalId
>::max(),
712 "NoLocalId must be greater than all valid local ids");
713 if (env
.stateBefore
.equivLocals
.size() > loc
) {
714 auto l
= env
.stateBefore
.equivLocals
[loc
];
715 if (l
!= NoLocalId
) {
716 if (!forExit
) return false;
718 if (l
< loc
) return false;
719 l
= env
.stateBefore
.equivLocals
[l
];
724 // If this local is bound to a static, its type will be TRef, but we
725 // may know the type of the static - but statics *are* live out of
726 // the function, so don't do this when forExit is set.
728 env
.stateBefore
.localStaticBindings
.size() > loc
&&
729 env
.stateBefore
.localStaticBindings
[loc
] == LocalStaticBinding::Bound
&&
730 !setCouldHaveSideEffects(env
.dceState
.ainfo
.localStaticTypes
[loc
])) {
734 return setCouldHaveSideEffects(locRaw(env
, loc
));
737 void readDtorLocs(Env
& env
) {
738 for (auto i
= LocalId
{0}; i
< env
.stateBefore
.locals
.size(); ++i
) {
739 if (setLocCouldHaveSideEffects(env
, i
, true)) {
745 //////////////////////////////////////////////////////////////////////
748 void readSlot(Env
& env
, uint32_t id
) {
749 FTRACE(2, " read-slot: {}\n", id
);
750 if (id
>= kMaxTrackedClsRefSlots
) return;
751 env
.dceState
.liveSlots
[id
] = 1;
752 env
.dceState
.usedSlots
[id
] = 1;
753 env
.dceState
.slotDependentActions
[id
].clear();
756 // Read a slot, but in a usage that is discardable. If this read actually is
757 // discarded, then also discard the given instructions.
758 void readSlotDiscardable(Env
& env
, uint32_t id
, DceActionMap actions
) {
759 FTRACE(2, " read-slot (discardable): {}\n", id
);
760 if (id
>= kMaxTrackedClsRefSlots
) return;
761 env
.dceState
.liveSlots
[id
] = 0;
762 env
.dceState
.usedSlots
[id
] = 1;
763 actions
.insert({env
.id
, DceAction::Kill
});
764 env
.dceState
.slotDependentActions
[id
] = std::move(actions
);
767 void writeSlot(Env
& env
, uint32_t id
) {
768 FTRACE(2, " write-slot: {}\n", id
);
769 if (id
>= kMaxTrackedClsRefSlots
) return;
770 env
.dceState
.liveSlots
[id
] = 0;
771 env
.dceState
.usedSlots
[id
] = 1;
772 env
.dceState
.slotDependentActions
[id
].clear();
775 bool isSlotLive(Env
& env
, uint32_t id
) {
776 if (id
>= kMaxTrackedClsRefSlots
) return true;
777 return env
.dceState
.liveSlots
[id
];
780 DceActionMap
slotDependentActions(Env
& env
, uint32_t id
) {
781 return std::move(env
.dceState
.slotDependentActions
[id
]);
784 //////////////////////////////////////////////////////////////////////
787 * Note that the instructions with popConds are relying on the consumer of the
788 * values they push to check whether lifetime changes can have side-effects.
790 * For example, in bytecode like this, assuming $x is an object with a
796 * PopC $x // dtor should be here.
798 * The PopC will decide it can't be eliminated, which prevents us from
799 * eliminating the CGetL.
802 void dce(Env
& env
, const bc::PopC
&) { discardNonDtors(env
); }
803 // For PopV and PopR currently we never know if can't run a
805 void dce(Env
& env
, const bc::PopU
&) { discard(env
); }
806 void dce(Env
& env
, const bc::Int
&) { pushRemovable(env
); }
807 void dce(Env
& env
, const bc::String
&) { pushRemovable(env
); }
808 void dce(Env
& env
, const bc::Array
&) { pushRemovable(env
); }
809 void dce(Env
& env
, const bc::Dict
&) { pushRemovable(env
); }
810 void dce(Env
& env
, const bc::Vec
&) { pushRemovable(env
); }
811 void dce(Env
& env
, const bc::Keyset
&) { pushRemovable(env
); }
812 void dce(Env
& env
, const bc::Double
&) { pushRemovable(env
); }
813 void dce(Env
& env
, const bc::True
&) { pushRemovable(env
); }
814 void dce(Env
& env
, const bc::False
&) { pushRemovable(env
); }
815 void dce(Env
& env
, const bc::Null
&) { pushRemovable(env
); }
816 void dce(Env
& env
, const bc::NullUninit
&) { pushRemovable(env
); }
817 void dce(Env
& env
, const bc::File
&) { pushRemovable(env
); }
818 void dce(Env
& env
, const bc::Dir
&) { pushRemovable(env
); }
819 void dce(Env
& env
, const bc::NewArray
&) { pushRemovable(env
); }
820 void dce(Env
& env
, const bc::NewDictArray
&) { pushRemovable(env
); }
821 void dce(Env
& env
, const bc::NewMixedArray
&) { pushRemovable(env
); }
822 void dce(Env
& env
, const bc::NewCol
&) { pushRemovable(env
); }
823 void dce(Env
& env
, const bc::CheckProp
&) { pushRemovable(env
); }
825 void dce(Env
& env
, const bc::UnboxRNop
&) {
826 push(env
, [&] (const UseInfo
& ui
) {
828 return allUnused(ui
) ?
829 PushFlags::PopOutputs
: PushFlags::MarkLive
;
833 void dce(Env
& env
, const bc::ClsRefName
& op
) {
834 // If the usage of the name is discardable, then so is this read of the
836 push(env
, [&] (UseInfo
& ui
) {
837 if (ui
.popper
.blk
== NoBlockId
&& allUnused(ui
)) {
838 // we don't yet support cross-block dce of classrefs
839 readSlotDiscardable(env
, op
.slot
, std::move(ui
.actions
));
841 readSlot(env
, op
.slot
);
843 return PushFlags::MarkLive
;
847 void dce(Env
& env
, const bc::ClsRefGetC
& op
) {
848 // If the usage of this class-ref slot is dead, then it can be potentially be
849 // removed if the source is dead as well.
850 if (!isSlotLive(env
, op
.slot
)) {
851 auto actions
= slotDependentActions(env
, op
.slot
);
852 actions
.insert({env
.id
, DceAction::Kill
});
853 writeSlot(env
, op
.slot
);
856 popCouldRunDestructor(env
) ? Use::UsedIfLastRef
: Use::Not
,
860 writeSlot(env
, op
.slot
);
864 void dce(Env
& env
, const bc::ClsRefGetL
& op
) {
865 auto const ty
= locRaw(env
, op
.loc1
);
867 if (!isSlotLive(env
, op
.slot
) && !readCouldHaveSideEffects(ty
)) {
868 markSetDead(env
, slotDependentActions(env
, op
.slot
));
872 addLocGen(env
, op
.loc1
);
873 writeSlot(env
, op
.slot
);
876 void dce(Env
& env
, const bc::DiscardClsRef
& op
) {
877 readSlotDiscardable(env
, op
.slot
, {});
880 void dce(Env
& env
, const bc::Dup
&) {
882 // - If the duplicated value is not used, delete the dup and all
883 // its consumers, then
884 // o If the original value is unused pass that on to let the
885 // instruction which pushed it decide whether to kill it
886 // o Otherwise we're done.
887 // - Otherwise, if the original value is unused, kill the dup
888 // and all dependent instructions.
889 push(env
, [&] (const UseInfo
& u1
) {
890 // Dup pushes a cell that is guaranteed to be not the last reference.
891 // So, it can be eliminated if the cell it pushes is used as either
892 // Use::Not or Use::UsedIfLastRef.
893 auto u1_unused
= allUnusedIfNotLastRef(u1
);
894 push(env
, [&] (UseInfo
& u2
) {
897 return PushFlags::PassThrough
;
900 return PushFlags::MarkDead
;
903 return PushFlags::MarkLive
;
905 return u1_unused
? PushFlags::MarkDead
: PushFlags::MarkLive
;
909 void dce(Env
& env
, const bc::CGetL
& op
) {
910 push(env
, [&] (const UseInfo
& ui
) {
911 if (allUnused(ui
) && !readCouldHaveSideEffects(locRaw(env
, op
.loc1
))) {
912 return PushFlags::MarkDead
;
914 addLocGen(env
, op
.loc1
);
915 return PushFlags::MarkLive
;
919 void dce(Env
& env
, const bc::CGetL2
& op
) {
920 auto const ty
= locRaw(env
, op
.loc1
);
921 addLocGen(env
, op
.loc1
);
923 push2(env
, [&] (const UseInfo
& u1
, const UseInfo
& u2
) {
924 if (readCouldHaveSideEffects(ty
) || !allUnused(u1
, u2
)) {
926 return PushFlags::MarkLive
;
928 return PushFlags::PassThrough
;
933 void dce(Env
& env
, const bc::RetC
&) { pop(env
); readDtorLocs(env
); }
934 void dce(Env
& env
, const bc::Throw
&) { pop(env
); readDtorLocs(env
); }
935 void dce(Env
& env
, const bc::Fatal
&) { pop(env
); readDtorLocs(env
); }
936 void dce(Env
& env
, const bc::Exit
&) { push(env
); pop(env
); readDtorLocs(env
); }
938 void dce(Env
& env
, const bc::SetL
& op
) {
939 auto const effects
= setLocCouldHaveSideEffects(env
, op
.loc1
);
940 if (!isLocLive(env
, op
.loc1
) && !effects
) {
941 assert(!locRaw(env
, op
.loc1
).couldBe(TRef
) ||
942 env
.stateBefore
.localStaticBindings
[op
.loc1
] ==
943 LocalStaticBinding::Bound
);
944 return markDead(env
);
948 if (effects
|| locRaw(env
, op
.loc1
).couldBe(TRef
)) {
949 addLocGen(env
, op
.loc1
);
951 addLocKill(env
, op
.loc1
);
955 void dce(Env
& env
, const bc::UnsetL
& op
) {
956 auto const oldTy
= locRaw(env
, op
.loc1
);
957 if (oldTy
.subtypeOf(TUninit
)) return markDead(env
);
959 // Unsetting a local bound to a static never has side effects
960 // because the static itself has a reference to the value.
962 setLocCouldHaveSideEffects(env
, op
.loc1
) &&
963 (env
.stateBefore
.localStaticBindings
.size() <= op
.loc1
||
964 env
.stateBefore
.localStaticBindings
[op
.loc1
] != LocalStaticBinding::Bound
);
966 if (!isLocLive(env
, op
.loc1
) && !effects
) return markDead(env
);
968 addLocGen(env
, op
.loc1
);
970 addLocKill(env
, op
.loc1
);
975 * IncDecL is a read-modify-write: can be removed if the local isn't live, the
976 * set can't have side effects, and no one reads the value it pushes. If the
977 * instruction is not dead, always add the local to the set of upward exposed
980 void dce(Env
& env
, const bc::IncDecL
& op
) {
981 auto const oldTy
= locRaw(env
, op
.loc1
);
982 auto const effects
= setLocCouldHaveSideEffects(env
, op
.loc1
) ||
983 readCouldHaveSideEffects(oldTy
);
984 push(env
, [&] (const UseInfo
& ui
) {
985 if (!isLocLive(env
, op
.loc1
) && !effects
&& allUnused(ui
)) {
986 return PushFlags::MarkDead
;
988 addLocGen(env
, op
.loc1
);
989 return PushFlags::MarkLive
;
994 * SetOpL is like IncDecL, but with the complication that we don't know if we
995 * can mark it dead when visiting it, because it is going to pop an input but
996 * unlike SetL doesn't push the value it popped. For the current scheme we
997 * just add the local to gen even if we're doing a removable push, which is
998 * correct but could definitely fail to eliminate some earlier stores.
1000 void dce(Env
& env
, const bc::SetOpL
& op
) {
1001 auto const oldTy
= locRaw(env
, op
.loc1
);
1002 auto const effects
= setLocCouldHaveSideEffects(env
, op
.loc1
) ||
1003 readCouldHaveSideEffects(oldTy
);
1005 push(env
, [&] (const UseInfo
& ui
) {
1006 if (!isLocLive(env
, op
.loc1
) && !effects
&& allUnused(ui
)) {
1007 return PushFlags::PassThrough
;
1010 return PushFlags::MarkLive
;
1013 addLocGen(env
, op
.loc1
);
1017 * Default implementation is conservative: assume we use all of our
1018 * inputs, and can't be removed even if our output is unused.
1020 * We also assume all the locals in the mayReadLocalSet must be
1021 * added to the live local set, and don't remove anything from it.
1024 template<typename Op
>
1025 typename
std::enable_if
<has_car
<Op
>::value
,void>::type
1026 dce_slot_default(Env
& env
, const Op
& op
) {
1027 readSlot(env
, op
.slot
);
1030 template<typename Op
>
1031 typename
std::enable_if
<has_caw
<Op
>::value
,void>::type
1032 dce_slot_default(Env
& env
, const Op
& op
) {
1033 writeSlot(env
, op
.slot
);
1036 template<typename Op
>
1037 typename
std::enable_if
<!has_car
<Op
>::value
&& !has_caw
<Op
>::value
,void>::type
1038 dce_slot_default(Env
& env
, const Op
& op
) {}
1041 void dce(Env
& env
, const Op
& op
) {
1042 addLocGenSet(env
, env
.flags
.mayReadLocalSet
);
1043 env
.dceState
.liveLocals
|= env
.flags
.mayReadLocalSet
;
1044 for (auto i
= uint32_t{0}; i
< op
.numPush(); ++i
) {
1047 for (auto i
= uint32_t{0}; i
< op
.numPop(); ++i
) {
1050 dce_slot_default(env
, op
);
1053 void dispatch_dce(Env
& env
, const Bytecode
& op
) {
1054 #define O(opcode, ...) case Op::opcode: dce(env, op.opcode); return;
1055 switch (op
.op
) { OPCODES
}
1060 //////////////////////////////////////////////////////////////////////
1062 folly::Optional
<DceState
>
1063 dce_visit(const Index
& index
,
1064 const FuncAnalysis
& fa
,
1065 borrowed_ptr
<const php::Block
> const blk
,
1066 const State
& stateIn
,
1067 std::bitset
<kMaxTrackedLocals
> locLiveOut
,
1068 std::bitset
<kMaxTrackedLocals
> locLiveOutExn
,
1069 const folly::Optional
<std::vector
<UseInfo
>>& dceStack
) {
1070 if (!stateIn
.initialized
) {
1072 * Skip unreachable blocks.
1074 * For DCE analysis it is ok to assume the transfer function is
1075 * the identity on unreachable blocks (i.e. gen and kill sets are
1076 * empty). For optimize, we don't need to bother doing anything
1077 * to these---another pass is responsible for removing completely
1078 * unreachable blocks.
1083 auto const states
= locally_propagated_states(index
, fa
, blk
, stateIn
);
1085 auto dceState
= DceState
{ fa
};
1086 dceState
.liveLocals
= locLiveOut
;
1087 dceState
.liveSlots
.set();
1088 dceState
.usedLocals
= locLiveOut
;
1091 dceState
.stack
= *dceStack
;
1093 dceState
.stack
.resize(states
.back().first
.stack
.size(),
1094 UseInfo
{ Use::Used
});
1096 dceState
.slotDependentActions
.resize(states
.back().first
.clsRefSlots
.size());
1098 for (uint32_t idx
= blk
->hhbcs
.size(); idx
-- > 0;) {
1099 auto const& op
= blk
->hhbcs
[idx
];
1101 FTRACE(2, " == #{} {}\n", idx
, show(fa
.ctx
.func
, op
));
1103 auto visit_env
= Env
{
1110 dispatch_dce(visit_env
, op
);
1113 * When we see a PEI, we need to start over on the killBeforePEI
1114 * set, and the local-liveness must take into account the fact
1115 * that we could take an exception edge here (or'ing in the
1118 if (states
[idx
].second
.wasPEI
) {
1119 FTRACE(2, " <-- exceptions\n");
1120 dceState
.liveLocals
|= locLiveOutExn
;
1121 dceState
.liveSlots
.set();
1122 dceState
.locKillBeforePEI
.reset();
1125 dceState
.usedLocals
|= dceState
.liveLocals
;
1127 FTRACE(4, " dce stack: {}\n",
1129 using namespace folly::gen
;
1130 return from(dceState
.stack
)
1131 | map([&] (const UseInfo
& ui
) { return show(ui
); })
1132 | unsplit
<std::string
>(" ");
1135 FTRACE(4, " interp stack: {}\n",
1137 using namespace folly::gen
;
1138 return from(states
[idx
].first
.stack
)
1139 | map([&] (const StackElem
& e
) { return show(e
.type
); })
1140 | unsplit
<std::string
>(" ");
1144 FTRACE(4, " cls-ref slots: {}\n{}\n",
1145 slot_bits_string(dceState
.ainfo
.ctx
.func
, dceState
.liveSlots
),
1147 using namespace folly::gen
;
1148 auto i
= uint32_t{0};
1149 return from(dceState
.slotDependentActions
)
1151 [&] (const DceActionMap
& s
) {
1152 if (s
.empty()) return std::string
{};
1153 return folly::sformat(" {}: [{}]\n",
1156 | unsplit
<std::string
>("");
1160 // We're now at the state before this instruction, so the stack
1161 // sizes must line up.
1162 assert(dceState
.stack
.size() == states
[idx
].first
.stack
.size());
1168 struct DceAnalysis
{
1169 std::bitset
<kMaxTrackedLocals
> locGen
;
1170 std::bitset
<kMaxTrackedLocals
> locKill
;
1171 std::bitset
<kMaxTrackedLocals
> locKillExn
;
1172 std::vector
<UseInfo
> stack
;
1173 std::set
<InstrId
> usedStackSlots
;
1176 DceAnalysis
analyze_dce(const Index
& index
,
1177 const FuncAnalysis
& fa
,
1178 borrowed_ptr
<php::Block
> const blk
,
1179 const State
& stateIn
,
1180 const folly::Optional
<std::vector
<UseInfo
>>& dceStack
) {
1181 // During this analysis pass, we have to assume everything could be
1182 // live out, so we set allLive here. (Later we'll determine the
1183 // real liveOut sets.)
1184 auto allLocLive
= std::bitset
<kMaxTrackedLocals
>{};
1186 if (auto dceState
= dce_visit(index
, fa
, blk
, stateIn
,
1187 allLocLive
, allLocLive
, dceStack
)) {
1188 return DceAnalysis
{
1191 dceState
->locKillBeforePEI
,
1193 dceState
->usedStackSlots
1196 return DceAnalysis
{};
1200 * Do the actual updates to the bytecodes.
1202 void dce_perform(const php::Func
& func
, const DceActionMap
& actionMap
) {
1203 for (auto const& elm
: actionMap
) {
1204 auto const& id
= elm
.first
;
1205 auto const b
= borrow(func
.blocks
[id
.blk
]);
1206 FTRACE(1, "{}: {}:{} {}\n", show(elm
.second
),
1207 id
.blk
, id
.idx
, show(func
, b
->hhbcs
[id
.idx
]));
1208 switch (elm
.second
) {
1209 case DceAction::Kill
:
1210 if (b
->hhbcs
.size() == 1) {
1211 // we don't allow empty blocks
1212 b
->hhbcs
[0] = bc::Nop
{};
1214 b
->hhbcs
.erase(b
->hhbcs
.begin() + id
.idx
);
1217 case DceAction::PopOutputs
:
1219 auto const numPop
= b
->hhbcs
[id
.idx
].numPush();
1220 b
->hhbcs
.insert(b
->hhbcs
.begin() + id
.idx
+ 1,
1229 struct DceOptResult
{
1230 std::bitset
<kMaxTrackedLocals
> usedLocals
;
1231 std::bitset
<kMaxTrackedClsRefSlots
> usedSlots
;
1232 DceActionMap actionMap
;
1236 optimize_dce(const Index
& index
,
1237 const FuncAnalysis
& fa
,
1238 borrowed_ptr
<php::Block
> const blk
,
1239 const State
& stateIn
,
1240 std::bitset
<kMaxTrackedLocals
> locLiveOut
,
1241 std::bitset
<kMaxTrackedLocals
> locLiveOutExn
,
1242 const folly::Optional
<std::vector
<UseInfo
>>& dceStack
) {
1243 auto dceState
= dce_visit(
1252 return {std::bitset
<kMaxTrackedLocals
>{},
1253 std::bitset
<kMaxTrackedClsRefSlots
>{}};
1257 std::move(dceState
->usedLocals
),
1258 std::move(dceState
->usedSlots
),
1259 std::move(dceState
->actionMap
)
1263 //////////////////////////////////////////////////////////////////////
1265 void remove_unused_locals(Context
const ctx
,
1266 std::bitset
<kMaxTrackedLocals
> usedLocals
) {
1267 if (!options
.RemoveUnusedLocals
) return;
1268 auto const func
= ctx
.func
;
1271 * Removing unused locals in closures requires checking which ones
1272 * are captured variables so we can remove the relevant properties,
1273 * and then we'd have to mutate the CreateCl callsite, so we don't
1276 * Note: many closure bodies have unused $this local, because of
1277 * some emitter quirk, so this might be worthwhile.
1279 if (func
->isClosureBody
) return;
1281 for (auto loc
= func
->locals
.begin() + func
->params
.size();
1282 loc
!= func
->locals
.end(); ++loc
) {
1284 assert(loc
->id
< kMaxTrackedLocals
&& !usedLocals
.test(loc
->id
));
1287 if (loc
->id
< kMaxTrackedLocals
&& !usedLocals
.test(loc
->id
)) {
1288 FTRACE(2, " killing: {}\n", local_string(*func
, loc
->id
));
1289 const_cast<php::Local
&>(*loc
).killed
= true;
1294 //////////////////////////////////////////////////////////////////////
1298 struct WritableClsRefSlotVisitor
: boost::static_visitor
<ClsRefSlotId
*> {
1300 typename
std::enable_if
<
1301 !has_car
<T
>::value
&& !has_caw
<T
>::value
, ClsRefSlotId
*
1303 operator()(T
& t
) const { return nullptr; }
1306 typename
std::enable_if
<
1307 has_car
<T
>::value
|| has_caw
<T
>::value
, ClsRefSlotId
*
1309 operator()(T
& t
) const { return &t
.slot
; }
1314 // Remove totally unused class-ref slots. Shift any usages downward so we can
1315 // reduce the total number of class-ref slots needed.
1316 void remove_unused_clsref_slots(Context
const ctx
,
1317 std::bitset
<kMaxTrackedClsRefSlots
> usedSlots
) {
1318 if (!options
.RemoveUnusedClsRefSlots
) return;
1319 auto func
= ctx
.func
;
1321 auto numSlots
= func
->numClsRefSlots
;
1322 if (numSlots
> kMaxTrackedClsRefSlots
) return;
1324 auto unusedSlots
= usedSlots
;
1327 // Construct a mapping of class-ref slots that should be rewritten to a
1328 // different one. For each totally unused class-ref slot, rewrite a higher
1329 // (used) class-ref to it.
1330 boost::container::flat_map
<size_t, size_t> rewrites
;
1331 while (numSlots
> 0) {
1332 if (!unusedSlots
[numSlots
- 1]) {
1333 auto const unused
= bitset_find_first(unusedSlots
);
1334 if (unused
>= numSlots
) break;
1335 unusedSlots
[unused
] = false;
1336 unusedSlots
[numSlots
- 1] = true;
1337 rewrites
[numSlots
- 1] = unused
;
1342 FTRACE(2, " cls-ref rewrites: {}\n",
1344 using namespace folly::gen
;
1345 return from(rewrites
)
1347 [&] (const std::pair
<size_t, size_t>& p
) {
1348 return folly::sformat("{}->{}", p
.first
, p
.second
);
1350 | unsplit
<std::string
>(";");
1354 // Do the actually rewriting
1355 if (!rewrites
.empty()) {
1356 for (auto& block
: func
->blocks
) {
1357 for (auto& bcop
: block
->hhbcs
) {
1358 if (auto* slot
= visit(bcop
, WritableClsRefSlotVisitor
{})) {
1359 auto const iter
= rewrites
.find(*slot
);
1360 if (iter
== rewrites
.end()) continue;
1361 auto const oldOp
= bcop
;
1362 *slot
= iter
->second
;
1363 FTRACE(4, " rewriting {} to {}\n",
1364 show(func
, oldOp
), show(func
, bcop
));
1370 func
->numClsRefSlots
= numSlots
;
1373 //////////////////////////////////////////////////////////////////////
1377 void local_dce(const Index
& index
,
1378 const FuncAnalysis
& ainfo
,
1379 borrowed_ptr
<php::Block
> const blk
,
1380 const State
& stateIn
) {
1381 Trace::Bump bumper
{Trace::hhbbc_dce
, kSystemLibBump
,
1382 is_systemlib_part(*ainfo
.ctx
.unit
)};
1384 // For local DCE, we have to assume all variables are in the
1385 // live-out set for the block.
1386 auto allLocLive
= std::bitset
<kMaxTrackedLocals
>();
1388 auto const ret
= optimize_dce(index
, ainfo
, blk
, stateIn
,
1389 allLocLive
, allLocLive
, folly::none
);
1391 dce_perform(*ainfo
.ctx
.func
, ret
.actionMap
);
1394 //////////////////////////////////////////////////////////////////////
1396 void global_dce(const Index
& index
, const FuncAnalysis
& ai
) {
1397 Trace::Bump bumper
{Trace::hhbbc_dce
, kSystemLibBump
,
1398 is_systemlib_part(*ai
.ctx
.unit
)};
1400 auto rpoId
= [&] (borrowed_ptr
<php::Block
> blk
) {
1401 return ai
.bdata
[blk
->id
].rpoId
;
1404 FTRACE(1, "|---- global DCE analyze ({})\n", show(ai
.ctx
));
1405 FTRACE(2, "{}", [&] {
1406 using namespace folly::gen
;
1407 auto i
= uint32_t{0};
1408 return from(ai
.ctx
.func
->locals
)
1410 [&] (const php::Local
& l
) {
1411 return folly::sformat(" {} {}\n",
1412 i
++, local_string(*ai
.ctx
.func
, l
.id
));
1414 | unsplit
<std::string
>("");
1418 * States for each block, indexed by block id.
1420 * The liveOut state is the union of liveIn states of each normal
1421 * successor, and liveOutExn is the union of liveIn states of each
1422 * exceptional successor.
1424 * The dceStack is the state of the dceStack at the end of the
1425 * block; it contains a UseInfo for each stack slot, which is the
1426 * union of the corresponding UseInfo's from the start of the
1427 * successor blocks (its folly::none if it has no successor blocks,
1428 * or if the successor blocks have not yet been visited).
1431 std::bitset
<kMaxTrackedLocals
> locLiveOut
;
1432 std::bitset
<kMaxTrackedLocals
> locLiveOutExn
;
1433 folly::Optional
<std::vector
<UseInfo
>> dceStack
;
1435 std::vector
<BlockState
> blockStates(ai
.ctx
.func
->blocks
.size());
1438 * Set of block reverse post order ids that still need to be
1439 * visited. This is ordered by std::greater, so we'll generally
1440 * visit blocks before their predecessors. The algorithm does not
1441 * depend on this for correctness, but it will reduce iteration, and
1442 * the optimality of mergeDceStack depends on visiting at least one
1443 * successor prior to visiting any given block.
1445 * Every block must be visited at least once, so we throw them all
1448 auto incompleteQ
= dataflow_worklist
<uint32_t,std::less
<uint32_t>>(
1451 for (auto& b
: ai
.rpoBlocks
) incompleteQ
.push(rpoId(b
));
1453 auto const normalPreds
= computeNormalPreds(ai
.rpoBlocks
);
1454 auto const factoredPreds
= computeFactoredPreds(ai
.rpoBlocks
);
1457 * Suppose a stack slot isn't used, but it was pushed on two separate
1458 * paths, one of which could be killed, but the other can't. We have
1459 * to prevent either, so any time we find a non-used stack slot that
1460 * can't be killed, we add it to this set (via markUisLive, and the
1463 std::set
<StkId
> usedStackSlots
;
1465 auto mergeDceStack
= [&] (folly::Optional
<std::vector
<UseInfo
>>& stkOut
,
1466 const std::vector
<UseInfo
>& stkIn
,
1470 for (uint32_t i
= 0; i
< stkIn
.size(); i
++) {
1471 auto& out
= stkOut
.value()[i
];
1472 if (out
.popper
.blk
== NoBlockId
) out
.popper
= { blk
, i
};
1473 if (usedStackSlots
.count({blk
, i
})) {
1474 out
.usage
= Use::Used
;
1475 out
.actions
.clear();
1482 assert(stkOut
->size() == stkIn
.size());
1483 for (uint32_t i
= 0; i
< stkIn
.size(); i
++) {
1484 auto& out
= stkOut
.value()[i
];
1485 if (out
.usage
== Use::Used
) {
1488 if (stkIn
[i
].usage
== Use::Used
||
1489 usedStackSlots
.count({blk
, i
})) {
1490 out
.usage
= Use::Used
;
1491 out
.actions
.clear();
1495 if (out
.usage
== stkIn
[i
].usage
) {
1496 for (auto const& id
: stkIn
[i
].actions
) {
1497 ret
|= out
.actions
.insert(id
).second
;
1499 auto popper
= stkIn
[i
].popper
;
1500 if (popper
.blk
== NoBlockId
) popper
= { blk
, i
};
1501 if (out
.popper
< stkIn
[i
].popper
) {
1503 out
.popper
= stkIn
[i
].popper
;
1506 out
.usage
= Use::Used
;
1507 out
.actions
.clear();
1516 * Iterate on live out states until we reach a fixed point.
1518 * This algorithm treats the exceptional live-out states differently
1519 * from the live-out states during normal control flow. The
1520 * liveOutExn sets only take part in the liveIn computation when the
1521 * block has factored exits.
1523 while (!incompleteQ
.empty()) {
1524 auto const blk
= ai
.rpoBlocks
[incompleteQ
.pop()];
1526 // skip unreachable blocks
1527 if (!ai
.bdata
[blk
->id
].stateIn
.initialized
) continue;
1529 FTRACE(2, "block #{}\n", blk
->id
);
1531 auto const locLiveOut
= blockStates
[blk
->id
].locLiveOut
;
1532 auto const locLiveOutExn
= blockStates
[blk
->id
].locLiveOutExn
;
1533 auto const transfer
= analyze_dce(
1537 ai
.bdata
[blk
->id
].stateIn
,
1538 blockStates
[blk
->id
].dceStack
1541 auto const liveIn
= transfer
.locGen
|
1542 (locLiveOut
& ~transfer
.locKill
) |
1543 (locLiveOutExn
& ~transfer
.locKillExn
);
1545 FTRACE(2, "live out : {}\n"
1551 loc_bits_string(ai
.ctx
.func
, locLiveOut
),
1552 loc_bits_string(ai
.ctx
.func
, locLiveOutExn
),
1553 loc_bits_string(ai
.ctx
.func
, transfer
.locGen
),
1554 loc_bits_string(ai
.ctx
.func
, transfer
.locKill
),
1555 loc_bits_string(ai
.ctx
.func
, transfer
.locKillExn
),
1556 loc_bits_string(ai
.ctx
.func
, liveIn
));
1559 * If we weren't able to take advantage of any unused entries
1560 * on the dceStack that originated from another block, mark
1561 * them used to prevent other paths from trying to delete them.
1563 * Also, merge the entries into our global usedStackSlots, to
1564 * make sure they always get marked Used.
1566 for (auto const& id
: transfer
.usedStackSlots
) {
1567 if (usedStackSlots
.insert(id
).second
) {
1568 for (auto& pred
: normalPreds
[id
.blk
]) {
1569 FTRACE(2, " {} force-use {}:{}\n", id
.blk
, pred
->id
, id
.idx
);
1570 auto& dceStack
= blockStates
[pred
->id
].dceStack
;
1571 if (!dceStack
) continue;
1572 auto& ui
= dceStack
.value()[id
.idx
];
1573 if (ui
.usage
!= Use::Used
) {
1575 ui
.usage
= Use::Used
;
1576 // no need to reprocess *this* block, since this is the
1577 // one that reported the problem.
1579 incompleteQ
.push(rpoId(pred
));
1586 // Merge the liveIn into the liveOut of each normal predecessor.
1587 // If the set changes, reschedule that predecessor.
1588 for (auto& pred
: normalPreds
[blk
->id
]) {
1589 FTRACE(2, " -> {}\n", pred
->id
);
1590 auto& predState
= blockStates
[pred
->id
].locLiveOut
;
1591 auto const oldPredState
= predState
;
1592 predState
|= liveIn
;
1593 if (mergeDceStack(blockStates
[pred
->id
].dceStack
,
1594 transfer
.stack
, blk
->id
) ||
1595 predState
!= oldPredState
) {
1596 incompleteQ
.push(rpoId(pred
));
1600 // Merge the liveIn into the liveOutExn state for each exceptional
1601 // precessor. The liveIn computation also depends on the
1602 // liveOutExn state, so again reschedule if it changes.
1603 for (auto& pred
: factoredPreds
[blk
->id
]) {
1604 FTRACE(2, " => {}\n", pred
->id
);
1605 auto& predState
= blockStates
[pred
->id
].locLiveOutExn
;
1606 auto const oldPredState
= predState
;
1607 predState
|= liveIn
;
1608 if (predState
!= oldPredState
) {
1609 incompleteQ
.push(rpoId(pred
));
1615 * Now that we're at a fixed point, use the propagated states to
1616 * remove instructions that don't need to be there.
1618 FTRACE(1, "|---- global DCE optimize ({})\n", show(ai
.ctx
));
1619 std::bitset
<kMaxTrackedLocals
> usedLocals
;
1620 std::bitset
<kMaxTrackedClsRefSlots
> usedSlots
;
1621 DceActionMap actionMap
;
1622 for (auto& b
: ai
.rpoBlocks
) {
1623 FTRACE(2, "block #{}\n", b
->id
);
1624 auto ret
= optimize_dce(
1628 ai
.bdata
[b
->id
].stateIn
,
1629 blockStates
[b
->id
].locLiveOut
,
1630 blockStates
[b
->id
].locLiveOutExn
,
1631 blockStates
[b
->id
].dceStack
1633 usedLocals
|= ret
.usedLocals
;
1634 usedSlots
|= ret
.usedSlots
;
1635 if (ret
.actionMap
.size()) {
1636 if (!actionMap
.size()) {
1637 actionMap
= std::move(ret
.actionMap
);
1639 for (auto const& elm
: ret
.actionMap
) {
1640 actionMap
.insert(elm
);
1646 dce_perform(*ai
.ctx
.func
, actionMap
);
1648 FTRACE(1, " used locals: {}\n", loc_bits_string(ai
.ctx
.func
, usedLocals
));
1649 remove_unused_locals(ai
.ctx
, usedLocals
);
1651 FTRACE(1, " used slots: {}\n", slot_bits_string(ai
.ctx
.func
, usedSlots
));
1652 remove_unused_clsref_slots(ai
.ctx
, usedSlots
);
1655 //////////////////////////////////////////////////////////////////////