2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/hhbbc/dce.h"
26 #include <boost/dynamic_bitset.hpp>
28 #include <folly/gen/Base.h>
29 #include <folly/gen/String.h>
31 #include "hphp/util/trace.h"
32 #include "hphp/util/dataflow-worklist.h"
34 #include "hphp/hhbbc/representation.h"
35 #include "hphp/hhbbc/analyze.h"
36 #include "hphp/hhbbc/interp-state.h"
37 #include "hphp/hhbbc/interp.h"
38 #include "hphp/hhbbc/type-system.h"
39 #include "hphp/hhbbc/unit-util.h"
40 #include "hphp/hhbbc/cfg.h"
42 namespace HPHP
{ namespace HHBBC
{
44 TRACE_SET_MOD(hhbbc_dce
);
46 //////////////////////////////////////////////////////////////////////
49 * This module contains code to perform both local DCE and global DCE.
51 * The local DCE algorithm addresses dead eval stack manipulations and
52 * dead stores to locals that are visible within a single basic block.
54 * The global DCE performs a liveness analysis and then uses this
55 * information to allow dead stores to locals to be eliminated across
56 * blocks. It does not attempt to remove unnecessary evaluation stack
57 * manipulation spanning basic blocks, but it uses the same local DCE
58 * code and will eliminate intra-block stack manipulations.
60 * Both types of DCE here need to be type-aware, but they must visit
61 * blocks backward. They accomplish this by forward-propagating the
62 * block input states from a FuncAnalysis to each instruction in the
63 * block prior to doing the backward iteration.
67 * During backward traversal of a block, we maintain a "backwards"
68 * stack, indicating which eval stack slots are going to be required
69 * in the future or not.
71 * During this traversal, each instruction that pops when going
72 * forward instead "pushes" information about whether that input
73 * will be required. If it is not required, it also pushes an
74 * accumulating set of instruction ids that must be removed if the
75 * instruction which produces the stack slot is removed. (All
76 * instructions in these sets must be removed if any are, in order
77 * to keep the stack depths correct.)
79 * Similarly, each instruction that would push a value going forward
80 * instead "pops" the information about whether its stack output is
81 * going to be needed. If not, the instruction can mark itself (and
82 * all downstream instructions that depended on it) as removable.
86 * While a block is iterated backward, the set of live locals is
87 * tracked. The initial state of this live set depends on whether
88 * we are performing global or local DCE, and in the local case
89 * includes all locals in the function.
91 * When a local may be read, it is added to the live set. When a
92 * local is definitely-written, it is removed from the set.
94 * If a instruction may write to a local that is not live, it can be
95 * marked as removable if it is known to have no other side-effects.
96 * Currently this is only hooked up to SetL.
100 * The global algorithm first performs a liveness analysis to
101 * propagate live out sets to each block.
103 * This analysis is basically normal, but slightly modified from the
104 * usual in order to deal with the way exceptional control flow is
105 * represented in our CFG (with factored edges).
107 * It essentially is the liveness analysis algorithm described at
108 * http://dl.acm.org/citation.cfm?id=316171, except that we don't
109 * need to track kill sets for each PEI because we don't have a
110 * means of determining which factored edges may be traversed by any
111 * given PEI. (Maybe that may be more usual to have in the context
112 * of a language with declared exception clauses...)
114 * Since we only deal with the most pessimistic exception case, this
115 * means for each block we just determine a gen and kill set, along
116 * with a subset of the latter set that is the locals that must be
117 * killed before any PEI. (See killBeforePEI below.)
119 * Final note about types:
121 * Global DCE can change the types of locals in a way that spans
122 * basic blocks. For a simple example, take the following code:
124 * // $foo :: Uninit here.
126 * // ... code that breaks a block ...
129 * If the first store to $foo is removed, the type of $foo before
130 * the SetL in the later block is now Uninit, instead of Int.
132 * This means that after calling global_dce on a function, the type
133 * information in the block input states in the associated
134 * FuncAnalysis can no longer be considered accurate.
136 * Moreover, since global DCE makes use of type information to
137 * determine whether a store is dead, we need to be careful that
138 * this never changes whether the assumptions used to perform DCE
141 * This is ok right now: DCE only uses the types to figure out which
142 * values can either have lifetimes extended or shortened without
143 * visible side-effects, or which values may be refs (so we can't
144 * omit stores to them). If we omit a store that changes the type
145 * of a local globally, this means the new type cannot be different
146 * with regard to these features (or we wouldn't have omitted it),
147 * which means we won't have made different decisions elsewhere in
148 * the algorithm based on the new type.
150 * Specifically: we will never omit a store where the old local type
151 * was something that could've had side effects, and if a new value
152 * is stored into a local where destroying it could have
153 * side-effects, some point along the path to the function exit (if
154 * nothing else, the RetC) will have added it to the gen set and the
155 * store also won't be removable.
157 * In contrast, note that local DCE can not change types across
164 //////////////////////////////////////////////////////////////////////
166 // Returns whether decrefing a type could run a destructor.
167 bool couldRunDestructor(const Type
& t
) {
168 // We could check for specialized objects to see if they don't
169 // declare a user-defined destructor, but currently don't.
170 return t
.couldBe(TObj
) || t
.couldBe(TCArr
) || t
.couldBe(TRef
);
173 // Returns whether a set on something containing type t could have
174 // side-effects (running destuctors, or modifying arbitrary things via
176 bool setCouldHaveSideEffects(const Type
& t
) {
177 return t
.couldBe(TObj
) || t
.couldBe(TCArr
) || t
.couldBe(TRef
);
180 // Some reads could raise warnings and run arbitrary code.
181 bool readCouldHaveSideEffects(const Type
& t
) {
182 return t
.couldBe(TUninit
);
185 //////////////////////////////////////////////////////////////////////
188 * Use information of a stack cell.
191 // Indicates that the cell is (unconditionally) not used.
194 // Indicates that the cell is (possibly) used.
198 * Indicates that the cell is only used if it was the last reference alive.
199 * For instance, a PopC will call the destructor of the top-of-stack object
200 * if it was the last reference alive, and this counts as an example of
203 * If the producer of the cell knows that it is not the last reference, then
204 * it can treat Use::UsedIfLastRef as being equivalent to Use::Not.
209 using InstrId
= size_t;
210 using InstrIdSet
= std::set
<InstrId
>;
211 using UseInfo
= std::pair
<Use
,InstrIdSet
>;
213 //////////////////////////////////////////////////////////////////////
216 borrowed_ptr
<const php::Func
> func
;
219 * Eval stack use information. Stacks slots are marked as being
220 * needed or not needed. If they aren't needed, they carry a set of
221 * instructions that must be removed if the instruction that
222 * produces the stack value is also removable.
224 std::vector
<UseInfo
> stack
;
227 * Locals known to be live at a point in a DCE walk. This is used
228 * when we're actually acting on information we discovered during
231 std::bitset
<kMaxTrackedLocals
> liveLocals
;
234 * These variable sets are used to compute the transfer function for
235 * the global liveness analysis in global_dce.
237 * The gen set accumulates the set of variables in the block with
238 * upward-exposed uses. The kill set is the set of variables the
239 * block will re-define, ignoring exceptional control flow.
241 * The killBeforePEI set is the set of variables killed before a
242 * PEI. Propagation of liveness needs to use this (always more
243 * conservative) set instead of kill when crossing a factored exit
246 std::bitset
<kMaxTrackedLocals
> gen
;
247 std::bitset
<kMaxTrackedLocals
> kill
;
248 std::bitset
<kMaxTrackedLocals
> killBeforePEI
;
251 * Instructions marked in this set are dead. If any of them are
252 * removed, however, they must all be removed, because of the need
253 * to keep eval stack consumers and producers balanced.
255 boost::dynamic_bitset
<> markedDead
{};
258 * The set of locals that were ever live in this block. (This
259 * includes locals that were live going out of this block.) This
260 * set is used by global DCE to remove locals that are completely
261 * unused in the entire function.
263 std::bitset
<kMaxTrackedLocals
> usedLocals
;
266 //////////////////////////////////////////////////////////////////////
268 const char* show(Use u
) {
270 case Use::Not
: return "0";
271 case Use::Used
: return "U";
272 case Use::UsedIfLastRef
: return "UL";
277 std::string
show(const InstrIdSet
& set
) {
278 using namespace folly::gen
;
280 | eachTo
<std::string
>()
281 | unsplit
<std::string
>(";")
285 std::string DEBUG_ONLY
show(const UseInfo
& ui
) {
286 return folly::format("{}@{}", show(ui
.first
), show(ui
.second
)).str();
289 std::string DEBUG_ONLY
bits_string(borrowed_ptr
<const php::Func
> func
,
290 std::bitset
<kMaxTrackedLocals
> locs
) {
291 std::ostringstream out
;
292 if (func
->locals
.size() < kMaxTrackedLocals
) {
293 for (auto i
= func
->locals
.size(); i
-- > 0;) {
294 out
<< (locs
.test(i
) ? '1' : '0');
302 //////////////////////////////////////////////////////////////////////
307 const State
& stateBefore
;
308 const StepFlags
& flags
;
311 void markSetDead(Env
& env
, const InstrIdSet
& set
) {
312 env
.dceState
.markedDead
[env
.id
] = 1;
313 FTRACE(2, " marking {} {}\n", env
.id
, show(set
));
314 for (auto& i
: set
) env
.dceState
.markedDead
[i
] = 1;
317 void markDead(Env
& env
) {
318 env
.dceState
.markedDead
[env
.id
] = 1;
319 FTRACE(2, " marking {}\n", env
.id
);
322 //////////////////////////////////////////////////////////////////////
325 void pop(Env
& env
, Use u
, InstrIdSet set
) {
326 FTRACE(2, " pop({})\n", show(u
));
327 env
.dceState
.stack
.emplace_back(u
, std::move(set
));
329 void pop(Env
& env
) { pop(env
, Use::Used
, InstrIdSet
{}); }
331 Type
topT(Env
& env
, uint32_t idx
= 0) {
332 assert(idx
< env
.stateBefore
.stack
.size());
333 return env
.stateBefore
.stack
[env
.stateBefore
.stack
.size() - idx
- 1].type
;
336 Type
topC(Env
& env
, uint32_t idx
= 0) {
337 auto const t
= topT(env
, idx
);
338 assert(t
.subtypeOf(TInitCell
));
342 void discard(Env
& env
) {
343 pop(env
, Use::Not
, InstrIdSet
{env
.id
});
346 bool allUnused() { return true; }
347 template<class... Args
>
348 bool allUnused(const UseInfo
& ui
, Args
&&... args
) {
349 return ui
.first
== Use::Not
&&
350 allUnused(std::forward
<Args
>(args
)...);
353 void combineSets(InstrIdSet
&) {}
354 template<class... Args
>
355 void combineSets(InstrIdSet
& accum
, const UseInfo
& ui
, Args
&&... args
) {
356 accum
.insert(begin(ui
.second
), end(ui
.second
));
357 combineSets(accum
, std::forward
<Args
>(args
)...);
360 // If all the supplied UseInfos represent unused stack slots, make a
361 // pop that is considered unused. Otherwise pop as a Use::Used.
362 template<class... Args
>
363 void popCond(Env
& env
, Args
&&... args
) {
364 bool unused
= allUnused(std::forward
<Args
>(args
)...);
365 if (!unused
) return pop(env
, Use::Used
, InstrIdSet
{});
366 auto accum
= InstrIdSet
{env
.id
};
367 combineSets(accum
, std::forward
<Args
>(args
)...);
368 pop(env
, Use::Not
, accum
);
372 * It may be ok to remove pops on objects with destructors in some scenarios
373 * (where it won't change the observable point at which a destructor runs). We
374 * could also look at the object type and see if it is known that it can't have
375 * a user-defined destructor.
377 * For now, we mark the cell popped with a Use::UsedIfLastRef. This indicates
378 * to the producer of the cell that the it is considered used if it could be
379 * the last reference alive (in which case the destructor would be run on
380 * Pop). If the producer knows that the cell is not the last reference (e.g. if
381 * it is a Dup), then Use:UsedIfLastRef is equivalent to Use::Not.
383 void discardNonDtors(Env
& env
) {
384 auto const t
= topC(env
);
385 if (couldRunDestructor(t
)) {
386 return pop(env
, Use::UsedIfLastRef
, InstrIdSet
{env
.id
});
391 UseInfo
push(Env
& env
) {
392 always_assert(!env
.dceState
.stack
.empty());
393 auto ret
= env
.dceState
.stack
.back();
394 env
.dceState
.stack
.pop_back();
395 FTRACE(2, " {}@{} = push()\n", show(ret
.first
), show(ret
.second
));
399 void pushRemovable(Env
& env
) {
400 auto const ui
= push(env
);
403 markSetDead(env
, ui
.second
);
406 case Use::UsedIfLastRef
:
411 //////////////////////////////////////////////////////////////////////
414 void addGenSet(Env
& env
, std::bitset
<kMaxTrackedLocals
> locs
) {
415 FTRACE(4, " conservative: {}\n", bits_string(env
.dceState
.func
, locs
));
416 env
.dceState
.liveLocals
|= locs
;
417 env
.dceState
.gen
|= locs
;
418 env
.dceState
.kill
&= ~locs
;
419 env
.dceState
.killBeforePEI
&= ~locs
;
422 void addGen(Env
& env
, uint32_t id
) {
423 FTRACE(2, " gen: {}\n", id
);
424 if (id
>= kMaxTrackedLocals
) return;
425 env
.dceState
.liveLocals
[id
] = 1;
426 env
.dceState
.gen
[id
] = 1;
427 env
.dceState
.kill
[id
] = 0;
428 env
.dceState
.killBeforePEI
[id
] = 0;
431 void addKill(Env
& env
, uint32_t id
) {
432 FTRACE(2, " kill: {}\n", id
);
433 if (id
>= kMaxTrackedLocals
) return;
434 env
.dceState
.liveLocals
[id
] = 0;
435 env
.dceState
.gen
[id
] = 0;
436 env
.dceState
.kill
[id
] = 1;
437 env
.dceState
.killBeforePEI
[id
] = 1;
440 bool isLive(Env
& env
, uint32_t id
) {
441 if (id
>= kMaxTrackedLocals
) {
442 // Conservatively assume it's potentially live.
445 return env
.dceState
.liveLocals
[id
];
448 Type
locRaw(Env
& env
, LocalId loc
) {
449 return env
.stateBefore
.locals
[loc
];
452 void readDtorLocs(Env
& env
) {
453 for (auto i
= size_t{0}; i
< env
.stateBefore
.locals
.size(); ++i
) {
454 if (couldRunDestructor(env
.stateBefore
.locals
[i
])) {
460 //////////////////////////////////////////////////////////////////////
463 * Note that the instructions with popConds are relying on the consumer of the
464 * values they push to check whether lifetime changes can have side-effects.
466 * For example, in bytecode like this, assuming $x is an object with a
472 * PopC $x // dtor should be here.
474 * The PopC will decide it can't be eliminated, which prevents us from
475 * eliminating the CGetL.
478 void dce(Env
& env
, const bc::PopC
&) { discardNonDtors(env
); }
479 // For PopV and PopR currently we never know if can't run a
481 void dce(Env
& env
, const bc::PopA
&) { discard(env
); }
482 void dce(Env
& env
, const bc::PopU
&) { discard(env
); }
483 void dce(Env
& env
, const bc::Int
&) { pushRemovable(env
); }
484 void dce(Env
& env
, const bc::String
&) { pushRemovable(env
); }
485 void dce(Env
& env
, const bc::Array
&) { pushRemovable(env
); }
486 void dce(Env
& env
, const bc::Dict
&) { pushRemovable(env
); }
487 void dce(Env
& env
, const bc::Vec
&) { pushRemovable(env
); }
488 void dce(Env
& env
, const bc::Keyset
&) { pushRemovable(env
); }
489 void dce(Env
& env
, const bc::Double
&) { pushRemovable(env
); }
490 void dce(Env
& env
, const bc::True
&) { pushRemovable(env
); }
491 void dce(Env
& env
, const bc::False
&) { pushRemovable(env
); }
492 void dce(Env
& env
, const bc::Null
&) { pushRemovable(env
); }
493 void dce(Env
& env
, const bc::NullUninit
&) { pushRemovable(env
); }
494 void dce(Env
& env
, const bc::File
&) { pushRemovable(env
); }
495 void dce(Env
& env
, const bc::Dir
&) { pushRemovable(env
); }
496 void dce(Env
& env
, const bc::NameA
&) { popCond(env
, push(env
)); }
497 void dce(Env
& env
, const bc::NewArray
&) { pushRemovable(env
); }
498 void dce(Env
& env
, const bc::NewCol
&) { pushRemovable(env
); }
499 void dce(Env
& env
, const bc::AGetC
&) { popCond(env
, push(env
)); }
501 void dce(Env
& env
, const bc::Dup
&) {
502 auto const u1
= push(env
);
503 auto const u2
= push(env
);
504 // Dup pushes a cell that is guaranteed to be not the last reference.
505 // So, it can be eliminated if the cell it pushes is used as either
506 // Use::Not or Use::UsedIfLastRef.
507 // The cell it pops can be marked Use::Not only if Dup itself
508 // can be eliminated.
511 case Use::UsedIfLastRef
:
512 // It is ok to eliminate the Dup even if its second output u2
513 // is used, because eliminating the Dup still leaves the second
514 // output u2 on stack.
515 markSetDead(env
, u1
.second
);
518 pop(env
, Use::Not
, u2
.second
);
521 case Use::UsedIfLastRef
:
522 pop(env
, Use::Used
, InstrIdSet
{});
527 pop(env
, Use::Used
, InstrIdSet
{});
532 void dce(Env
& env
, const bc::CGetL
& op
) {
533 auto const ty
= locRaw(env
, op
.loc1
);
534 addGen(env
, op
.loc1
);
535 if (readCouldHaveSideEffects(ty
)) {
542 void dce(Env
& env
, const bc::CGetL2
& op
) {
543 auto const ty
= locRaw(env
, op
.loc1
);
544 addGen(env
, op
.loc1
);
545 auto const u1
= push(env
);
546 auto const u2
= push(env
);
547 if (readCouldHaveSideEffects(ty
)) {
550 popCond(env
, u1
, u2
);
554 void dce(Env
& env
, const bc::CGetL3
& op
) {
555 auto const ty
= locRaw(env
, op
.loc1
);
556 addGen(env
, op
.loc1
);
557 auto const u1
= push(env
);
558 auto const u2
= push(env
);
559 auto const u3
= push(env
);
560 if (readCouldHaveSideEffects(ty
)) {
564 popCond(env
, u1
, u2
, u3
);
565 popCond(env
, u1
, u2
, u3
);
569 void dce(Env
& env
, const bc::RetC
&) { pop(env
); readDtorLocs(env
); }
570 void dce(Env
& env
, const bc::Throw
&) { pop(env
); readDtorLocs(env
); }
571 void dce(Env
& env
, const bc::Fatal
&) { pop(env
); readDtorLocs(env
); }
572 void dce(Env
& env
, const bc::Exit
&) { push(env
); pop(env
); readDtorLocs(env
); }
574 void dce(Env
& env
, const bc::SetL
& op
) {
575 auto const oldTy
= locRaw(env
, op
.loc1
);
576 auto const effects
= setCouldHaveSideEffects(oldTy
);
577 if (!isLive(env
, op
.loc1
) && !effects
) return markDead(env
);
581 addGen(env
, op
.loc1
);
583 addKill(env
, op
.loc1
);
587 void dce(Env
& env
, const bc::UnsetL
& op
) {
588 auto const oldTy
= locRaw(env
, op
.loc1
);
589 auto const effects
= setCouldHaveSideEffects(oldTy
);
590 if (oldTy
.subtypeOf(TUninit
)) return markDead(env
);
591 if (!isLive(env
, op
.loc1
) && !effects
) return markDead(env
);
593 addGen(env
, op
.loc1
);
595 addKill(env
, op
.loc1
);
600 * IncDecL is a read-modify-write: can be removed if the local isn't live, the
601 * set can't have side effects, and no one reads the value it pushes. If the
602 * instruction is not dead, always add the local to the set of upward exposed
605 void dce(Env
& env
, const bc::IncDecL
& op
) {
606 auto const oldTy
= locRaw(env
, op
.loc1
);
607 auto const effects
= setCouldHaveSideEffects(oldTy
) ||
608 readCouldHaveSideEffects(oldTy
);
609 auto const u1
= push(env
);
610 if (!isLive(env
, op
.loc1
) && !effects
&& allUnused(u1
)) {
611 return markSetDead(env
, u1
.second
);
613 addGen(env
, op
.loc1
);
617 * SetOpL is like IncDecL, but with the complication that we don't know if we
618 * can mark it dead when visiting it, because it is going to pop an input but
619 * unlike SetL doesn't push the value it popped. For the current scheme we
620 * just add the local to gen even if we're doing a removable push, which is
621 * correct but could definitely fail to eliminate some earlier stores.
623 void dce(Env
& env
, const bc::SetOpL
& op
) {
624 auto const oldTy
= locRaw(env
, op
.loc1
);
625 auto const effects
= setCouldHaveSideEffects(oldTy
) ||
626 readCouldHaveSideEffects(oldTy
);
627 if (!isLive(env
, op
.loc1
) && !effects
) {
628 popCond(env
, push(env
));
633 addGen(env
, op
.loc1
);
637 * Default implementation is conservative: assume we use all of our
638 * inputs, and can't be removed even if our output is unused.
640 * We also assume all the locals in the mayReadLocalSet must be
641 * added to the live local set, and don't remove anything from it.
644 void dce(Env
& env
, const Op
& op
) {
645 addGenSet(env
, env
.flags
.mayReadLocalSet
);
646 env
.dceState
.liveLocals
|= env
.flags
.mayReadLocalSet
;
647 for (auto i
= uint32_t{0}; i
< op
.numPush(); ++i
) {
650 for (auto i
= uint32_t{0}; i
< op
.numPop(); ++i
) {
651 pop(env
, Use::Used
, InstrIdSet
{});
655 void dispatch_dce(Env
& env
, const Bytecode
& op
) {
656 #define O(opcode, ...) case Op::opcode: dce(env, op.opcode); return;
657 switch (op
.op
) { OPCODES
}
662 //////////////////////////////////////////////////////////////////////
664 folly::Optional
<DceState
>
665 dce_visit(const Index
& index
,
667 borrowed_ptr
<const php::Block
> const blk
,
668 const State
& stateIn
,
669 std::bitset
<kMaxTrackedLocals
> liveOut
,
670 std::bitset
<kMaxTrackedLocals
> liveOutExn
) {
671 if (!stateIn
.initialized
) {
673 * Skip unreachable blocks.
675 * For DCE analysis it is ok to assume the transfer function is
676 * the identity on unreachable blocks (i.e. gen and kill sets are
677 * empty). For optimize, we don't need to bother doing anything
678 * to these---another pass is responsible for removing completely
679 * unreachable blocks.
684 auto const states
= locally_propagated_states(index
, ctx
, blk
, stateIn
);
686 auto dceState
= DceState
{};
687 dceState
.func
= ctx
.func
;
688 dceState
.markedDead
.resize(blk
->hhbcs
.size());
689 dceState
.liveLocals
= liveOut
;
690 dceState
.stack
.resize(states
.back().first
.stack
.size());
691 dceState
.usedLocals
= liveOut
;
692 for (auto& s
: dceState
.stack
) {
693 s
= UseInfo
{ Use::Used
, InstrIdSet
{} };
696 for (auto idx
= blk
->hhbcs
.size(); idx
-- > 0;) {
697 auto const& op
= blk
->hhbcs
[idx
];
699 FTRACE(2, " == #{} {}\n", idx
, show(ctx
.func
, op
));
701 auto visit_env
= Env
{
707 dispatch_dce(visit_env
, op
);
710 * When we see a PEI, we need to start over on the killBeforePEI
711 * set, and the local-liveness must take into account the fact
712 * that we could take an exception edge here (or'ing in the
715 if (states
[idx
].second
.wasPEI
) {
716 FTRACE(2, " <-- exceptions\n");
717 dceState
.liveLocals
|= liveOutExn
;
718 dceState
.killBeforePEI
.reset();
721 dceState
.usedLocals
|= dceState
.liveLocals
;
723 FTRACE(4, " dce stack: {}\n",
725 using namespace folly::gen
;
726 return from(dceState
.stack
)
727 | map([&] (const UseInfo
& ui
) { return show(ui
); })
728 | unsplit
<std::string
>(" ");
731 FTRACE(4, " interp stack: {}\n",
733 using namespace folly::gen
;
734 return from(states
[idx
].first
.stack
)
735 | map([&] (const StackElem
& e
) { return show(e
.type
); })
736 | unsplit
<std::string
>(" ");
740 // We're now at the state before this instruction, so the stack
741 // sizes must line up.
742 assert(dceState
.stack
.size() == states
[idx
].first
.stack
.size());
749 std::bitset
<kMaxTrackedLocals
> gen
;
750 std::bitset
<kMaxTrackedLocals
> kill
;
751 std::bitset
<kMaxTrackedLocals
> killExn
;
754 DceAnalysis
analyze_dce(const Index
& index
,
756 borrowed_ptr
<php::Block
> const blk
,
757 const State
& stateIn
) {
758 // During this analysis pass, we have to assume everything could be
759 // live out, so we set allLive here. (Later we'll determine the
760 // real liveOut sets.)
761 auto allLive
= std::bitset
<kMaxTrackedLocals
>{};
763 if (auto dceState
= dce_visit(index
, ctx
, blk
, stateIn
, allLive
, allLive
)) {
767 dceState
->killBeforePEI
770 return DceAnalysis
{};
773 std::bitset
<kMaxTrackedLocals
>
774 optimize_dce(const Index
& index
,
776 borrowed_ptr
<php::Block
> const blk
,
777 const State
& stateIn
,
778 std::bitset
<kMaxTrackedLocals
> liveOut
,
779 std::bitset
<kMaxTrackedLocals
> liveOutExn
) {
780 auto const dceState
= dce_visit(index
, ctx
, blk
,
781 stateIn
, liveOut
, liveOutExn
);
782 if (!dceState
) return std::bitset
<kMaxTrackedLocals
>{};
784 // Remove all instructions that were marked dead, and replace
785 // instructions that can be replaced with pops but aren't dead.
786 for (auto idx
= blk
->hhbcs
.size(); idx
-- > 0;) {
787 if (!dceState
->markedDead
.test(idx
)) continue;
788 blk
->hhbcs
.erase(begin(blk
->hhbcs
) + idx
);
791 // Blocks must be non-empty. Make sure we don't change that.
792 if (blk
->hhbcs
.empty()) {
793 blk
->hhbcs
.push_back(bc::Nop
{});
796 return dceState
->usedLocals
;
799 //////////////////////////////////////////////////////////////////////
801 void remove_unused_locals(Context
const ctx
,
802 std::bitset
<kMaxTrackedLocals
> usedLocals
) {
803 if (!options
.RemoveUnusedLocals
) return;
804 auto const func
= ctx
.func
;
807 * Removing unused locals in closures requires checking which ones
808 * are captured variables so we can remove the relevant properties,
809 * and then we'd have to mutate the CreateCl callsite, so we don't
812 * Note: many closure bodies have unused $this local, because of
813 * some emitter quirk, so this might be worthwhile.
815 if (func
->isClosureBody
) return;
817 for (auto loc
= func
->locals
.begin() + func
->params
.size();
818 loc
!= func
->locals
.end(); ++loc
) {
820 assert(loc
->id
< kMaxTrackedLocals
&& !usedLocals
.test(loc
->id
));
823 if (loc
->id
< kMaxTrackedLocals
&& !usedLocals
.test(loc
->id
)) {
824 FTRACE(2, " killing: {}\n", local_string(*func
, loc
->id
));
825 const_cast<php::Local
&>(*loc
).killed
= true;
830 //////////////////////////////////////////////////////////////////////
834 void local_dce(const Index
& index
,
835 const FuncAnalysis
& ainfo
,
836 borrowed_ptr
<php::Block
> const blk
,
837 const State
& stateIn
) {
838 Trace::Bump bumper
{Trace::hhbbc_dce
, kSystemLibBump
,
839 is_systemlib_part(*ainfo
.ctx
.unit
)};
841 // For local DCE, we have to assume all variables are in the
842 // live-out set for the block.
843 auto allLive
= std::bitset
<kMaxTrackedLocals
>();
845 optimize_dce(index
, ainfo
.ctx
, blk
, stateIn
, allLive
, allLive
);
848 //////////////////////////////////////////////////////////////////////
850 void global_dce(const Index
& index
, const FuncAnalysis
& ai
) {
851 Trace::Bump bumper
{Trace::hhbbc_dce
, kSystemLibBump
,
852 is_systemlib_part(*ai
.ctx
.unit
)};
854 auto rpoId
= [&] (borrowed_ptr
<php::Block
> blk
) {
855 return ai
.bdata
[blk
->id
].rpoId
;
858 FTRACE(1, "|---- global DCE analyze ({})\n", show(ai
.ctx
));
859 FTRACE(2, "{}", [&] {
860 using namespace folly::gen
;
861 auto i
= uint32_t{0};
862 return from(ai
.ctx
.func
->locals
)
864 [&] (const php::Local
& l
) {
865 return folly::sformat(" {} {}\n",
866 i
++, local_string(*ai
.ctx
.func
, l
.id
));
868 | unsplit
<std::string
>("");
872 * Create a DceAnalysis for each block, indexed by rpo id.
874 * Here we want to pre-compute the transfer function for each block,
875 * so we don't need to visit each instruction repeatedly during the
876 * fixed point computation. The transfer function is a function of
877 * the set of locals read and killed in the block, and does not
878 * depend on the final live out state, so we can compute it here.
880 auto blockAnalysis
= std::vector
<DceAnalysis
>{};
881 for (auto& b
: ai
.rpoBlocks
) {
882 FTRACE(2, "block #{}\n", b
->id
);
883 auto const dinfo
= analyze_dce(
887 ai
.bdata
[b
->id
].stateIn
889 blockAnalysis
.push_back(dinfo
);
893 * States for each block, indexed by RPO id.
895 * The liveOut state is the union of liveIn states of each normal
896 * successor, and liveOutExn is the union of liveIn states of each
897 * exceptional successor.
900 std::bitset
<kMaxTrackedLocals
> liveOut
;
901 std::bitset
<kMaxTrackedLocals
> liveOutExn
;
903 std::vector
<BlockState
> blockStates(ai
.rpoBlocks
.size());
906 * Set of block reverse post order ids that still need to be
907 * visited. This is ordered by std::greater, so we'll generally
908 * visit blocks before their predecessors. (The algorithm doesn't
909 * need this for correctness, but it might make it iterate less.)
911 * Every block must be visited at least once, so we throw them all
914 auto incompleteQ
= dataflow_worklist
<uint32_t,std::less
<uint32_t>>(
917 for (auto& b
: ai
.rpoBlocks
) incompleteQ
.push(rpoId(b
));
919 auto const normalPreds
= computeNormalPreds(ai
.rpoBlocks
);
920 auto const factoredPreds
= computeFactoredPreds(ai
.rpoBlocks
);
923 * Iterate on live out states until we reach a fixed point.
925 * This algorithm treats the exceptional live-out states differently
926 * from the live-out states during normal control flow. The
927 * liveOutExn sets only take part in the liveIn computation when the
928 * block has factored exits.
930 while (!incompleteQ
.empty()) {
931 auto const blk
= ai
.rpoBlocks
[incompleteQ
.pop()];
933 FTRACE(2, "block #{}\n", blk
->id
);
935 auto const liveOut
= blockStates
[rpoId(blk
)].liveOut
;
936 auto const liveOutExn
= blockStates
[rpoId(blk
)].liveOutExn
;
937 auto const transfer
= blockAnalysis
[rpoId(blk
)];
938 auto const liveIn
= transfer
.gen
| (liveOut
& ~transfer
.kill
)
939 | (liveOutExn
& ~transfer
.killExn
);
941 FTRACE(2, "live out : {}\n"
947 bits_string(ai
.ctx
.func
, liveOut
),
948 bits_string(ai
.ctx
.func
, liveOutExn
),
949 bits_string(ai
.ctx
.func
, transfer
.gen
),
950 bits_string(ai
.ctx
.func
, transfer
.kill
),
951 bits_string(ai
.ctx
.func
, transfer
.killExn
),
952 bits_string(ai
.ctx
.func
, liveIn
));
954 // Merge the liveIn into the liveOut of each normal predecessor.
955 // If the set changes, reschedule that predecessor.
956 for (auto& pred
: normalPreds
[blk
->id
]) {
957 FTRACE(2, " -> {}\n", pred
->id
);
958 auto& predState
= blockStates
[rpoId(pred
)].liveOut
;
959 auto const oldPredState
= predState
;
961 if (predState
!= oldPredState
) {
962 incompleteQ
.push(rpoId(pred
));
966 // Merge the liveIn into the liveOutExn state for each exceptional
967 // precessor. The liveIn computation also depends on the
968 // liveOutExn state, so again reschedule if it changes.
969 for (auto& pred
: factoredPreds
[blk
->id
]) {
970 FTRACE(2, " => {}\n", pred
->id
);
971 auto& predState
= blockStates
[rpoId(pred
)].liveOutExn
;
972 auto const oldPredState
= predState
;
974 if (predState
!= oldPredState
) {
975 incompleteQ
.push(rpoId(pred
));
981 * Now that we're at a fixed point, use the propagated states to
982 * remove instructions that don't need to be there.
984 FTRACE(1, "|---- global DCE optimize ({})\n", show(ai
.ctx
));
985 std::bitset
<kMaxTrackedLocals
> usedLocals
;
986 for (auto& b
: ai
.rpoBlocks
) {
987 FTRACE(2, "block #{}\n", b
->id
);
988 usedLocals
|= optimize_dce(
992 ai
.bdata
[b
->id
].stateIn
,
993 blockStates
[rpoId(b
)].liveOut
,
994 blockStates
[rpoId(b
)].liveOutExn
998 FTRACE(1, " used locals: {}\n", bits_string(ai
.ctx
.func
, usedLocals
));
999 remove_unused_locals(ai
.ctx
, usedLocals
);
1002 //////////////////////////////////////////////////////////////////////
1004 void remove_unreachable_blocks(const Index
& index
, const FuncAnalysis
& ainfo
) {
1005 auto reachable
= [&](BlockId id
) {
1006 return ainfo
.bdata
[id
].stateIn
.initialized
;
1009 for (auto& blk
: ainfo
.rpoBlocks
) {
1010 if (reachable(blk
->id
)) continue;
1011 auto const srcLoc
= blk
->hhbcs
.front().srcLoc
;
1013 bc_with_loc(srcLoc
, bc::String
{ s_unreachable
.get() }),
1014 bc_with_loc(srcLoc
, bc::Fatal
{ FatalOp::Runtime
})
1016 blk
->fallthrough
= NoBlockId
;
1019 if (!options
.RemoveDeadBlocks
) return;
1021 for (auto& blk
: ainfo
.rpoBlocks
) {
1022 auto reachableTargets
= false;
1023 forEachTakenEdge(blk
->hhbcs
.back(), [&] (BlockId id
) {
1024 if (reachable(id
)) reachableTargets
= true;
1026 if (reachableTargets
) continue;
1027 switch (blk
->hhbcs
.back().op
) {
1030 blk
->hhbcs
.back() = bc_with_loc(blk
->hhbcs
.back().srcLoc
, bc::PopC
{});
1038 bool merge_blocks(const FuncAnalysis
& ainfo
) {
1039 auto& func
= *ainfo
.ctx
.func
;
1040 FTRACE(2, "merge_blocks: {}\n", func
.name
);
1042 boost::dynamic_bitset
<> hasPred(func
.blocks
.size());
1043 boost::dynamic_bitset
<> multiplePreds(func
.blocks
.size());
1044 boost::dynamic_bitset
<> multipleSuccs(func
.blocks
.size());
1045 boost::dynamic_bitset
<> removed(func
.blocks
.size());
1046 auto reachable
= [&](php::Block
& b
) {
1047 auto const& state
= ainfo
.bdata
[b
.id
].stateIn
;
1048 return state
.initialized
&& !state
.unreachable
;
1050 // find all the blocks with multiple preds; they can't be merged
1051 // into their predecessors
1052 for (auto& blk
: func
.blocks
) {
1053 if (blk
->id
== NoBlockId
) continue;
1055 if (!reachable(*blk
)) multiplePreds
[blk
->id
] = true;
1056 forEachSuccessor(*blk
, [&](BlockId succId
) {
1057 if (hasPred
[succId
]) {
1058 multiplePreds
[succId
] = true;
1060 hasPred
[succId
] = true;
1064 if (numSucc
> 1) multipleSuccs
[blk
->id
] = true;
1066 multiplePreds
[func
.mainEntry
] = true;
1067 for (auto& blkId
: func
.dvEntries
) {
1068 if (blkId
!= NoBlockId
) {
1069 multiplePreds
[blkId
] = true;
1073 bool removedAny
= false;
1074 for (auto& blk
: func
.blocks
) {
1075 if (blk
->id
== NoBlockId
) continue;
1076 while (blk
->fallthrough
!= NoBlockId
) {
1077 auto nxt
= borrow(func
.blocks
[blk
->fallthrough
]);
1078 if (multipleSuccs
[blk
->id
] ||
1079 multiplePreds
[nxt
->id
] ||
1080 blk
->exnNode
!= nxt
->exnNode
||
1081 blk
->section
!= nxt
->section
) {
1085 FTRACE(1, "merging: {} into {}\n", (void*)nxt
, (void*)blk
.get());
1086 multipleSuccs
[blk
->id
] = multipleSuccs
[nxt
->id
];
1087 blk
->fallthrough
= nxt
->fallthrough
;
1088 blk
->fallthroughNS
= nxt
->fallthroughNS
;
1089 if (nxt
->factoredExits
.size()) {
1090 if (blk
->factoredExits
.size()) {
1091 std::set
<BlockId
> exitSet
;
1092 std::copy(begin(blk
->factoredExits
), end(blk
->factoredExits
),
1093 std::inserter(exitSet
, begin(exitSet
)));
1094 std::copy(nxt
->factoredExits
.begin(), nxt
->factoredExits
.end(),
1095 std::inserter(exitSet
, begin(exitSet
)));
1096 blk
->factoredExits
.resize(exitSet
.size());
1097 std::copy(begin(exitSet
), end(exitSet
), blk
->factoredExits
.begin());
1098 nxt
->factoredExits
= decltype(nxt
->factoredExits
) {};
1100 blk
->factoredExits
= std::move(nxt
->factoredExits
);
1103 std::copy(nxt
->hhbcs
.begin(), nxt
->hhbcs
.end(),
1104 std::back_inserter(blk
->hhbcs
));
1105 nxt
->fallthrough
= NoBlockId
;
1106 nxt
->id
= NoBlockId
;
1107 nxt
->hhbcs
= { bc::Nop
{} };
1115 //////////////////////////////////////////////////////////////////////