adding pieces of the translation function from nast to hhbc ast
[hiphop-php.git] / hphp / hhbbc / dce.cpp
blob67b33d1e7f334092d21dcd41c39c5f69e6f1c314
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/hhbbc/dce.h"
18 #include <vector>
19 #include <string>
20 #include <utility>
21 #include <bitset>
22 #include <sstream>
23 #include <algorithm>
24 #include <set>
26 #include <boost/dynamic_bitset.hpp>
28 #include <folly/gen/Base.h>
29 #include <folly/gen/String.h>
31 #include "hphp/util/trace.h"
32 #include "hphp/util/dataflow-worklist.h"
34 #include "hphp/hhbbc/representation.h"
35 #include "hphp/hhbbc/analyze.h"
36 #include "hphp/hhbbc/interp-state.h"
37 #include "hphp/hhbbc/interp.h"
38 #include "hphp/hhbbc/type-system.h"
39 #include "hphp/hhbbc/unit-util.h"
40 #include "hphp/hhbbc/cfg.h"
42 namespace HPHP { namespace HHBBC {
44 TRACE_SET_MOD(hhbbc_dce);
46 //////////////////////////////////////////////////////////////////////
49 * This module contains code to perform both local DCE and global DCE.
51 * The local DCE algorithm addresses dead eval stack manipulations and
52 * dead stores to locals that are visible within a single basic block.
54 * The global DCE performs a liveness analysis and then uses this
55 * information to allow dead stores to locals to be eliminated across
56 * blocks. It does not attempt to remove unnecessary evaluation stack
57 * manipulation spanning basic blocks, but it uses the same local DCE
58 * code and will eliminate intra-block stack manipulations.
60 * Both types of DCE here need to be type-aware, but they must visit
61 * blocks backward. They accomplish this by forward-propagating the
62 * block input states from a FuncAnalysis to each instruction in the
63 * block prior to doing the backward iteration.
65 * Eval stack:
67 * During backward traversal of a block, we maintain a "backwards"
68 * stack, indicating which eval stack slots are going to be required
69 * in the future or not.
71 * During this traversal, each instruction that pops when going
72 * forward instead "pushes" information about whether that input
73 * will be required. If it is not required, it also pushes an
74 * accumulating set of instruction ids that must be removed if the
75 * instruction which produces the stack slot is removed. (All
76 * instructions in these sets must be removed if any are, in order
77 * to keep the stack depths correct.)
79 * Similarly, each instruction that would push a value going forward
80 * instead "pops" the information about whether its stack output is
81 * going to be needed. If not, the instruction can mark itself (and
82 * all downstream instructions that depended on it) as removable.
84 * Locals:
86 * While a block is iterated backward, the set of live locals is
87 * tracked. The initial state of this live set depends on whether
88 * we are performing global or local DCE, and in the local case
89 * includes all locals in the function.
91 * When a local may be read, it is added to the live set. When a
92 * local is definitely-written, it is removed from the set.
94 * If a instruction may write to a local that is not live, it can be
95 * marked as removable if it is known to have no other side-effects.
96 * Currently this is only hooked up to SetL.
98 * Liveness analysis:
100 * The global algorithm first performs a liveness analysis to
101 * propagate live out sets to each block.
103 * This analysis is basically normal, but slightly modified from the
104 * usual in order to deal with the way exceptional control flow is
105 * represented in our CFG (with factored edges).
107 * It essentially is the liveness analysis algorithm described at
108 * http://dl.acm.org/citation.cfm?id=316171, except that we don't
109 * need to track kill sets for each PEI because we don't have a
110 * means of determining which factored edges may be traversed by any
111 * given PEI. (Maybe that may be more usual to have in the context
112 * of a language with declared exception clauses...)
114 * Since we only deal with the most pessimistic exception case, this
115 * means for each block we just determine a gen and kill set, along
116 * with a subset of the latter set that is the locals that must be
117 * killed before any PEI. (See killBeforePEI below.)
119 * Final note about types:
121 * Global DCE can change the types of locals in a way that spans
122 * basic blocks. For a simple example, take the following code:
124 * // $foo :: Uninit here.
125 * $foo = 12;
126 * // ... code that breaks a block ...
127 * $foo = 100;
129 * If the first store to $foo is removed, the type of $foo before
130 * the SetL in the later block is now Uninit, instead of Int.
132 * This means that after calling global_dce on a function, the type
133 * information in the block input states in the associated
134 * FuncAnalysis can no longer be considered accurate.
136 * Moreover, since global DCE makes use of type information to
137 * determine whether a store is dead, we need to be careful that
138 * this never changes whether the assumptions used to perform DCE
139 * were correct.
141 * This is ok right now: DCE only uses the types to figure out which
142 * values can either have lifetimes extended or shortened without
143 * visible side-effects, or which values may be refs (so we can't
144 * omit stores to them). If we omit a store that changes the type
145 * of a local globally, this means the new type cannot be different
146 * with regard to these features (or we wouldn't have omitted it),
147 * which means we won't have made different decisions elsewhere in
148 * the algorithm based on the new type.
150 * Specifically: we will never omit a store where the old local type
151 * was something that could've had side effects, and if a new value
152 * is stored into a local where destroying it could have
153 * side-effects, some point along the path to the function exit (if
154 * nothing else, the RetC) will have added it to the gen set and the
155 * store also won't be removable.
157 * In contrast, note that local DCE can not change types across
158 * block boundaries.
162 namespace {
164 //////////////////////////////////////////////////////////////////////
166 // Returns whether decrefing a type could run a destructor.
167 bool couldRunDestructor(const Type& t) {
168 // We could check for specialized objects to see if they don't
169 // declare a user-defined destructor, but currently don't.
170 return t.couldBe(TObj) || t.couldBe(TCArr) || t.couldBe(TRef);
173 // Returns whether a set on something containing type t could have
174 // side-effects (running destuctors, or modifying arbitrary things via
175 // a Ref).
176 bool setCouldHaveSideEffects(const Type& t) {
177 return t.couldBe(TObj) || t.couldBe(TCArr) || t.couldBe(TRef);
180 // Some reads could raise warnings and run arbitrary code.
181 bool readCouldHaveSideEffects(const Type& t) {
182 return t.couldBe(TUninit);
185 //////////////////////////////////////////////////////////////////////
188 * Use information of a stack cell.
190 enum class Use {
191 // Indicates that the cell is (unconditionally) not used.
192 Not,
194 // Indicates that the cell is (possibly) used.
195 Used,
198 * Indicates that the cell is only used if it was the last reference alive.
199 * For instance, a PopC will call the destructor of the top-of-stack object
200 * if it was the last reference alive, and this counts as an example of
201 * 'UsedIfLastRef'.
203 * If the producer of the cell knows that it is not the last reference, then
204 * it can treat Use::UsedIfLastRef as being equivalent to Use::Not.
206 UsedIfLastRef,
209 using InstrId = size_t;
210 using InstrIdSet = std::set<InstrId>;
211 using UseInfo = std::pair<Use,InstrIdSet>;
213 //////////////////////////////////////////////////////////////////////
215 struct DceState {
216 borrowed_ptr<const php::Func> func;
219 * Eval stack use information. Stacks slots are marked as being
220 * needed or not needed. If they aren't needed, they carry a set of
221 * instructions that must be removed if the instruction that
222 * produces the stack value is also removable.
224 std::vector<UseInfo> stack;
227 * Locals known to be live at a point in a DCE walk. This is used
228 * when we're actually acting on information we discovered during
229 * liveness analysis.
231 std::bitset<kMaxTrackedLocals> liveLocals;
234 * These variable sets are used to compute the transfer function for
235 * the global liveness analysis in global_dce.
237 * The gen set accumulates the set of variables in the block with
238 * upward-exposed uses. The kill set is the set of variables the
239 * block will re-define, ignoring exceptional control flow.
241 * The killBeforePEI set is the set of variables killed before a
242 * PEI. Propagation of liveness needs to use this (always more
243 * conservative) set instead of kill when crossing a factored exit
244 * edge.
246 std::bitset<kMaxTrackedLocals> gen;
247 std::bitset<kMaxTrackedLocals> kill;
248 std::bitset<kMaxTrackedLocals> killBeforePEI;
251 * Instructions marked in this set are dead. If any of them are
252 * removed, however, they must all be removed, because of the need
253 * to keep eval stack consumers and producers balanced.
255 boost::dynamic_bitset<> markedDead{};
258 * The set of locals that were ever live in this block. (This
259 * includes locals that were live going out of this block.) This
260 * set is used by global DCE to remove locals that are completely
261 * unused in the entire function.
263 std::bitset<kMaxTrackedLocals> usedLocals;
266 //////////////////////////////////////////////////////////////////////
268 const char* show(Use u) {
269 switch (u) {
270 case Use::Not: return "0";
271 case Use::Used: return "U";
272 case Use::UsedIfLastRef: return "UL";
274 not_reached();
277 std::string show(const InstrIdSet& set) {
278 using namespace folly::gen;
279 return from(set)
280 | eachTo<std::string>()
281 | unsplit<std::string>(";")
285 std::string DEBUG_ONLY show(const UseInfo& ui) {
286 return folly::format("{}@{}", show(ui.first), show(ui.second)).str();
289 std::string DEBUG_ONLY bits_string(borrowed_ptr<const php::Func> func,
290 std::bitset<kMaxTrackedLocals> locs) {
291 std::ostringstream out;
292 if (func->locals.size() < kMaxTrackedLocals) {
293 for (auto i = func->locals.size(); i-- > 0;) {
294 out << (locs.test(i) ? '1' : '0');
296 } else {
297 out << locs;
299 return out.str();
302 //////////////////////////////////////////////////////////////////////
304 struct Env {
305 DceState& dceState;
306 InstrId id;
307 const State& stateBefore;
308 const StepFlags& flags;
311 void markSetDead(Env& env, const InstrIdSet& set) {
312 env.dceState.markedDead[env.id] = 1;
313 FTRACE(2, " marking {} {}\n", env.id, show(set));
314 for (auto& i : set) env.dceState.markedDead[i] = 1;
317 void markDead(Env& env) {
318 env.dceState.markedDead[env.id] = 1;
319 FTRACE(2, " marking {}\n", env.id);
322 //////////////////////////////////////////////////////////////////////
323 // eval stack
325 void pop(Env& env, Use u, InstrIdSet set) {
326 FTRACE(2, " pop({})\n", show(u));
327 env.dceState.stack.emplace_back(u, std::move(set));
329 void pop(Env& env) { pop(env, Use::Used, InstrIdSet{}); }
331 Type topT(Env& env, uint32_t idx = 0) {
332 assert(idx < env.stateBefore.stack.size());
333 return env.stateBefore.stack[env.stateBefore.stack.size() - idx - 1].type;
336 Type topC(Env& env, uint32_t idx = 0) {
337 auto const t = topT(env, idx);
338 assert(t.subtypeOf(TInitCell));
339 return t;
342 void discard(Env& env) {
343 pop(env, Use::Not, InstrIdSet{env.id});
346 bool allUnused() { return true; }
347 template<class... Args>
348 bool allUnused(const UseInfo& ui, Args&&... args) {
349 return ui.first == Use::Not &&
350 allUnused(std::forward<Args>(args)...);
353 void combineSets(InstrIdSet&) {}
354 template<class... Args>
355 void combineSets(InstrIdSet& accum, const UseInfo& ui, Args&&... args) {
356 accum.insert(begin(ui.second), end(ui.second));
357 combineSets(accum, std::forward<Args>(args)...);
360 // If all the supplied UseInfos represent unused stack slots, make a
361 // pop that is considered unused. Otherwise pop as a Use::Used.
362 template<class... Args>
363 void popCond(Env& env, Args&&... args) {
364 bool unused = allUnused(std::forward<Args>(args)...);
365 if (!unused) return pop(env, Use::Used, InstrIdSet{});
366 auto accum = InstrIdSet{env.id};
367 combineSets(accum, std::forward<Args>(args)...);
368 pop(env, Use::Not, accum);
372 * It may be ok to remove pops on objects with destructors in some scenarios
373 * (where it won't change the observable point at which a destructor runs). We
374 * could also look at the object type and see if it is known that it can't have
375 * a user-defined destructor.
377 * For now, we mark the cell popped with a Use::UsedIfLastRef. This indicates
378 * to the producer of the cell that the it is considered used if it could be
379 * the last reference alive (in which case the destructor would be run on
380 * Pop). If the producer knows that the cell is not the last reference (e.g. if
381 * it is a Dup), then Use:UsedIfLastRef is equivalent to Use::Not.
383 void discardNonDtors(Env& env) {
384 auto const t = topC(env);
385 if (couldRunDestructor(t)) {
386 return pop(env, Use::UsedIfLastRef, InstrIdSet{env.id});
388 discard(env);
391 UseInfo push(Env& env) {
392 always_assert(!env.dceState.stack.empty());
393 auto ret = env.dceState.stack.back();
394 env.dceState.stack.pop_back();
395 FTRACE(2, " {}@{} = push()\n", show(ret.first), show(ret.second));
396 return ret;
399 void pushRemovable(Env& env) {
400 auto const ui = push(env);
401 switch (ui.first) {
402 case Use::Not:
403 markSetDead(env, ui.second);
404 break;
405 case Use::Used:
406 case Use::UsedIfLastRef:
407 break;
411 //////////////////////////////////////////////////////////////////////
412 // locals
414 void addGenSet(Env& env, std::bitset<kMaxTrackedLocals> locs) {
415 FTRACE(4, " conservative: {}\n", bits_string(env.dceState.func, locs));
416 env.dceState.liveLocals |= locs;
417 env.dceState.gen |= locs;
418 env.dceState.kill &= ~locs;
419 env.dceState.killBeforePEI &= ~locs;
422 void addGen(Env& env, uint32_t id) {
423 FTRACE(2, " gen: {}\n", id);
424 if (id >= kMaxTrackedLocals) return;
425 env.dceState.liveLocals[id] = 1;
426 env.dceState.gen[id] = 1;
427 env.dceState.kill[id] = 0;
428 env.dceState.killBeforePEI[id] = 0;
431 void addKill(Env& env, uint32_t id) {
432 FTRACE(2, " kill: {}\n", id);
433 if (id >= kMaxTrackedLocals) return;
434 env.dceState.liveLocals[id] = 0;
435 env.dceState.gen[id] = 0;
436 env.dceState.kill[id] = 1;
437 env.dceState.killBeforePEI[id] = 1;
440 bool isLive(Env& env, uint32_t id) {
441 if (id >= kMaxTrackedLocals) {
442 // Conservatively assume it's potentially live.
443 return true;
445 return env.dceState.liveLocals[id];
448 Type locRaw(Env& env, LocalId loc) {
449 return env.stateBefore.locals[loc];
452 void readDtorLocs(Env& env) {
453 for (auto i = size_t{0}; i < env.stateBefore.locals.size(); ++i) {
454 if (couldRunDestructor(env.stateBefore.locals[i])) {
455 addGen(env, i);
460 //////////////////////////////////////////////////////////////////////
463 * Note that the instructions with popConds are relying on the consumer of the
464 * values they push to check whether lifetime changes can have side-effects.
466 * For example, in bytecode like this, assuming $x is an object with a
467 * destructor:
469 * CGetL $x
470 * UnsetL $x
471 * // ...
472 * PopC $x // dtor should be here.
474 * The PopC will decide it can't be eliminated, which prevents us from
475 * eliminating the CGetL.
478 void dce(Env& env, const bc::PopC&) { discardNonDtors(env); }
479 // For PopV and PopR currently we never know if can't run a
480 // destructor.
481 void dce(Env& env, const bc::PopA&) { discard(env); }
482 void dce(Env& env, const bc::PopU&) { discard(env); }
483 void dce(Env& env, const bc::Int&) { pushRemovable(env); }
484 void dce(Env& env, const bc::String&) { pushRemovable(env); }
485 void dce(Env& env, const bc::Array&) { pushRemovable(env); }
486 void dce(Env& env, const bc::Dict&) { pushRemovable(env); }
487 void dce(Env& env, const bc::Vec&) { pushRemovable(env); }
488 void dce(Env& env, const bc::Keyset&) { pushRemovable(env); }
489 void dce(Env& env, const bc::Double&) { pushRemovable(env); }
490 void dce(Env& env, const bc::True&) { pushRemovable(env); }
491 void dce(Env& env, const bc::False&) { pushRemovable(env); }
492 void dce(Env& env, const bc::Null&) { pushRemovable(env); }
493 void dce(Env& env, const bc::NullUninit&) { pushRemovable(env); }
494 void dce(Env& env, const bc::File&) { pushRemovable(env); }
495 void dce(Env& env, const bc::Dir&) { pushRemovable(env); }
496 void dce(Env& env, const bc::NameA&) { popCond(env, push(env)); }
497 void dce(Env& env, const bc::NewArray&) { pushRemovable(env); }
498 void dce(Env& env, const bc::NewCol&) { pushRemovable(env); }
499 void dce(Env& env, const bc::AGetC&) { popCond(env, push(env)); }
501 void dce(Env& env, const bc::Dup&) {
502 auto const u1 = push(env);
503 auto const u2 = push(env);
504 // Dup pushes a cell that is guaranteed to be not the last reference.
505 // So, it can be eliminated if the cell it pushes is used as either
506 // Use::Not or Use::UsedIfLastRef.
507 // The cell it pops can be marked Use::Not only if Dup itself
508 // can be eliminated.
509 switch (u1.first) {
510 case Use::Not:
511 case Use::UsedIfLastRef:
512 // It is ok to eliminate the Dup even if its second output u2
513 // is used, because eliminating the Dup still leaves the second
514 // output u2 on stack.
515 markSetDead(env, u1.second);
516 switch (u2.first) {
517 case Use::Not:
518 pop(env, Use::Not, u2.second);
519 break;
520 case Use::Used:
521 case Use::UsedIfLastRef:
522 pop(env, Use::Used, InstrIdSet{});
523 break;
525 break;
526 case Use::Used:
527 pop(env, Use::Used, InstrIdSet{});
528 break;
532 void dce(Env& env, const bc::CGetL& op) {
533 auto const ty = locRaw(env, op.loc1);
534 addGen(env, op.loc1);
535 if (readCouldHaveSideEffects(ty)) {
536 push(env);
537 } else {
538 pushRemovable(env);
542 void dce(Env& env, const bc::CGetL2& op) {
543 auto const ty = locRaw(env, op.loc1);
544 addGen(env, op.loc1);
545 auto const u1 = push(env);
546 auto const u2 = push(env);
547 if (readCouldHaveSideEffects(ty)) {
548 pop(env);
549 } else {
550 popCond(env, u1, u2);
554 void dce(Env& env, const bc::CGetL3& op) {
555 auto const ty = locRaw(env, op.loc1);
556 addGen(env, op.loc1);
557 auto const u1 = push(env);
558 auto const u2 = push(env);
559 auto const u3 = push(env);
560 if (readCouldHaveSideEffects(ty)) {
561 pop(env);
562 pop(env);
563 } else {
564 popCond(env, u1, u2, u3);
565 popCond(env, u1, u2, u3);
569 void dce(Env& env, const bc::RetC&) { pop(env); readDtorLocs(env); }
570 void dce(Env& env, const bc::Throw&) { pop(env); readDtorLocs(env); }
571 void dce(Env& env, const bc::Fatal&) { pop(env); readDtorLocs(env); }
572 void dce(Env& env, const bc::Exit&) { push(env); pop(env); readDtorLocs(env); }
574 void dce(Env& env, const bc::SetL& op) {
575 auto const oldTy = locRaw(env, op.loc1);
576 auto const effects = setCouldHaveSideEffects(oldTy);
577 if (!isLive(env, op.loc1) && !effects) return markDead(env);
578 push(env);
579 pop(env);
580 if (effects) {
581 addGen(env, op.loc1);
582 } else {
583 addKill(env, op.loc1);
587 void dce(Env& env, const bc::UnsetL& op) {
588 auto const oldTy = locRaw(env, op.loc1);
589 auto const effects = setCouldHaveSideEffects(oldTy);
590 if (oldTy.subtypeOf(TUninit)) return markDead(env);
591 if (!isLive(env, op.loc1) && !effects) return markDead(env);
592 if (effects) {
593 addGen(env, op.loc1);
594 } else {
595 addKill(env, op.loc1);
600 * IncDecL is a read-modify-write: can be removed if the local isn't live, the
601 * set can't have side effects, and no one reads the value it pushes. If the
602 * instruction is not dead, always add the local to the set of upward exposed
603 * uses.
605 void dce(Env& env, const bc::IncDecL& op) {
606 auto const oldTy = locRaw(env, op.loc1);
607 auto const effects = setCouldHaveSideEffects(oldTy) ||
608 readCouldHaveSideEffects(oldTy);
609 auto const u1 = push(env);
610 if (!isLive(env, op.loc1) && !effects && allUnused(u1)) {
611 return markSetDead(env, u1.second);
613 addGen(env, op.loc1);
617 * SetOpL is like IncDecL, but with the complication that we don't know if we
618 * can mark it dead when visiting it, because it is going to pop an input but
619 * unlike SetL doesn't push the value it popped. For the current scheme we
620 * just add the local to gen even if we're doing a removable push, which is
621 * correct but could definitely fail to eliminate some earlier stores.
623 void dce(Env& env, const bc::SetOpL& op) {
624 auto const oldTy = locRaw(env, op.loc1);
625 auto const effects = setCouldHaveSideEffects(oldTy) ||
626 readCouldHaveSideEffects(oldTy);
627 if (!isLive(env, op.loc1) && !effects) {
628 popCond(env, push(env));
629 } else {
630 push(env);
631 pop(env);
633 addGen(env, op.loc1);
637 * Default implementation is conservative: assume we use all of our
638 * inputs, and can't be removed even if our output is unused.
640 * We also assume all the locals in the mayReadLocalSet must be
641 * added to the live local set, and don't remove anything from it.
643 template<class Op>
644 void dce(Env& env, const Op& op) {
645 addGenSet(env, env.flags.mayReadLocalSet);
646 env.dceState.liveLocals |= env.flags.mayReadLocalSet;
647 for (auto i = uint32_t{0}; i < op.numPush(); ++i) {
648 push(env);
650 for (auto i = uint32_t{0}; i < op.numPop(); ++i) {
651 pop(env, Use::Used, InstrIdSet{});
655 void dispatch_dce(Env& env, const Bytecode& op) {
656 #define O(opcode, ...) case Op::opcode: dce(env, op.opcode); return;
657 switch (op.op) { OPCODES }
658 #undef O
659 not_reached();
662 //////////////////////////////////////////////////////////////////////
664 folly::Optional<DceState>
665 dce_visit(const Index& index,
666 Context const ctx,
667 borrowed_ptr<const php::Block> const blk,
668 const State& stateIn,
669 std::bitset<kMaxTrackedLocals> liveOut,
670 std::bitset<kMaxTrackedLocals> liveOutExn) {
671 if (!stateIn.initialized) {
673 * Skip unreachable blocks.
675 * For DCE analysis it is ok to assume the transfer function is
676 * the identity on unreachable blocks (i.e. gen and kill sets are
677 * empty). For optimize, we don't need to bother doing anything
678 * to these---another pass is responsible for removing completely
679 * unreachable blocks.
681 return folly::none;
684 auto const states = locally_propagated_states(index, ctx, blk, stateIn);
686 auto dceState = DceState{};
687 dceState.func = ctx.func;
688 dceState.markedDead.resize(blk->hhbcs.size());
689 dceState.liveLocals = liveOut;
690 dceState.stack.resize(states.back().first.stack.size());
691 dceState.usedLocals = liveOut;
692 for (auto& s : dceState.stack) {
693 s = UseInfo { Use::Used, InstrIdSet{} };
696 for (auto idx = blk->hhbcs.size(); idx-- > 0;) {
697 auto const& op = blk->hhbcs[idx];
699 FTRACE(2, " == #{} {}\n", idx, show(ctx.func, op));
701 auto visit_env = Env {
702 dceState,
703 idx,
704 states[idx].first,
705 states[idx].second
707 dispatch_dce(visit_env, op);
710 * When we see a PEI, we need to start over on the killBeforePEI
711 * set, and the local-liveness must take into account the fact
712 * that we could take an exception edge here (or'ing in the
713 * liveOutExn set).
715 if (states[idx].second.wasPEI) {
716 FTRACE(2, " <-- exceptions\n");
717 dceState.liveLocals |= liveOutExn;
718 dceState.killBeforePEI.reset();
721 dceState.usedLocals |= dceState.liveLocals;
723 FTRACE(4, " dce stack: {}\n",
724 [&] {
725 using namespace folly::gen;
726 return from(dceState.stack)
727 | map([&] (const UseInfo& ui) { return show(ui); })
728 | unsplit<std::string>(" ");
731 FTRACE(4, " interp stack: {}\n",
732 [&] {
733 using namespace folly::gen;
734 return from(states[idx].first.stack)
735 | map([&] (const StackElem& e) { return show(e.type); })
736 | unsplit<std::string>(" ");
740 // We're now at the state before this instruction, so the stack
741 // sizes must line up.
742 assert(dceState.stack.size() == states[idx].first.stack.size());
745 return dceState;
748 struct DceAnalysis {
749 std::bitset<kMaxTrackedLocals> gen;
750 std::bitset<kMaxTrackedLocals> kill;
751 std::bitset<kMaxTrackedLocals> killExn;
754 DceAnalysis analyze_dce(const Index& index,
755 Context const ctx,
756 borrowed_ptr<php::Block> const blk,
757 const State& stateIn) {
758 // During this analysis pass, we have to assume everything could be
759 // live out, so we set allLive here. (Later we'll determine the
760 // real liveOut sets.)
761 auto allLive = std::bitset<kMaxTrackedLocals>{};
762 allLive.set();
763 if (auto dceState = dce_visit(index, ctx, blk, stateIn, allLive, allLive)) {
764 return DceAnalysis {
765 dceState->gen,
766 dceState->kill,
767 dceState->killBeforePEI
770 return DceAnalysis {};
773 std::bitset<kMaxTrackedLocals>
774 optimize_dce(const Index& index,
775 Context const ctx,
776 borrowed_ptr<php::Block> const blk,
777 const State& stateIn,
778 std::bitset<kMaxTrackedLocals> liveOut,
779 std::bitset<kMaxTrackedLocals> liveOutExn) {
780 auto const dceState = dce_visit(index, ctx, blk,
781 stateIn, liveOut, liveOutExn);
782 if (!dceState) return std::bitset<kMaxTrackedLocals>{};
784 // Remove all instructions that were marked dead, and replace
785 // instructions that can be replaced with pops but aren't dead.
786 for (auto idx = blk->hhbcs.size(); idx-- > 0;) {
787 if (!dceState->markedDead.test(idx)) continue;
788 blk->hhbcs.erase(begin(blk->hhbcs) + idx);
791 // Blocks must be non-empty. Make sure we don't change that.
792 if (blk->hhbcs.empty()) {
793 blk->hhbcs.push_back(bc::Nop {});
796 return dceState->usedLocals;
799 //////////////////////////////////////////////////////////////////////
801 void remove_unused_locals(Context const ctx,
802 std::bitset<kMaxTrackedLocals> usedLocals) {
803 if (!options.RemoveUnusedLocals) return;
804 auto const func = ctx.func;
807 * Removing unused locals in closures requires checking which ones
808 * are captured variables so we can remove the relevant properties,
809 * and then we'd have to mutate the CreateCl callsite, so we don't
810 * bother for now.
812 * Note: many closure bodies have unused $this local, because of
813 * some emitter quirk, so this might be worthwhile.
815 if (func->isClosureBody) return;
817 for (auto loc = func->locals.begin() + func->params.size();
818 loc != func->locals.end(); ++loc) {
819 if (loc->killed) {
820 assert(loc->id < kMaxTrackedLocals && !usedLocals.test(loc->id));
821 continue;
823 if (loc->id < kMaxTrackedLocals && !usedLocals.test(loc->id)) {
824 FTRACE(2, " killing: {}\n", local_string(*func, loc->id));
825 const_cast<php::Local&>(*loc).killed = true;
830 //////////////////////////////////////////////////////////////////////
834 void local_dce(const Index& index,
835 const FuncAnalysis& ainfo,
836 borrowed_ptr<php::Block> const blk,
837 const State& stateIn) {
838 Trace::Bump bumper{Trace::hhbbc_dce, kSystemLibBump,
839 is_systemlib_part(*ainfo.ctx.unit)};
841 // For local DCE, we have to assume all variables are in the
842 // live-out set for the block.
843 auto allLive = std::bitset<kMaxTrackedLocals>();
844 allLive.set();
845 optimize_dce(index, ainfo.ctx, blk, stateIn, allLive, allLive);
848 //////////////////////////////////////////////////////////////////////
850 void global_dce(const Index& index, const FuncAnalysis& ai) {
851 Trace::Bump bumper{Trace::hhbbc_dce, kSystemLibBump,
852 is_systemlib_part(*ai.ctx.unit)};
854 auto rpoId = [&] (borrowed_ptr<php::Block> blk) {
855 return ai.bdata[blk->id].rpoId;
858 FTRACE(1, "|---- global DCE analyze ({})\n", show(ai.ctx));
859 FTRACE(2, "{}", [&] {
860 using namespace folly::gen;
861 auto i = uint32_t{0};
862 return from(ai.ctx.func->locals)
863 | mapped(
864 [&] (const php::Local& l) {
865 return folly::sformat(" {} {}\n",
866 i++, local_string(*ai.ctx.func, l.id));
868 | unsplit<std::string>("");
869 }());
872 * Create a DceAnalysis for each block, indexed by rpo id.
874 * Here we want to pre-compute the transfer function for each block,
875 * so we don't need to visit each instruction repeatedly during the
876 * fixed point computation. The transfer function is a function of
877 * the set of locals read and killed in the block, and does not
878 * depend on the final live out state, so we can compute it here.
880 auto blockAnalysis = std::vector<DceAnalysis>{};
881 for (auto& b : ai.rpoBlocks) {
882 FTRACE(2, "block #{}\n", b->id);
883 auto const dinfo = analyze_dce(
884 index,
885 ai.ctx,
887 ai.bdata[b->id].stateIn
889 blockAnalysis.push_back(dinfo);
893 * States for each block, indexed by RPO id.
895 * The liveOut state is the union of liveIn states of each normal
896 * successor, and liveOutExn is the union of liveIn states of each
897 * exceptional successor.
899 struct BlockState {
900 std::bitset<kMaxTrackedLocals> liveOut;
901 std::bitset<kMaxTrackedLocals> liveOutExn;
903 std::vector<BlockState> blockStates(ai.rpoBlocks.size());
906 * Set of block reverse post order ids that still need to be
907 * visited. This is ordered by std::greater, so we'll generally
908 * visit blocks before their predecessors. (The algorithm doesn't
909 * need this for correctness, but it might make it iterate less.)
911 * Every block must be visited at least once, so we throw them all
912 * in to start.
914 auto incompleteQ = dataflow_worklist<uint32_t,std::less<uint32_t>>(
915 ai.rpoBlocks.size()
917 for (auto& b : ai.rpoBlocks) incompleteQ.push(rpoId(b));
919 auto const normalPreds = computeNormalPreds(ai.rpoBlocks);
920 auto const factoredPreds = computeFactoredPreds(ai.rpoBlocks);
923 * Iterate on live out states until we reach a fixed point.
925 * This algorithm treats the exceptional live-out states differently
926 * from the live-out states during normal control flow. The
927 * liveOutExn sets only take part in the liveIn computation when the
928 * block has factored exits.
930 while (!incompleteQ.empty()) {
931 auto const blk = ai.rpoBlocks[incompleteQ.pop()];
933 FTRACE(2, "block #{}\n", blk->id);
935 auto const liveOut = blockStates[rpoId(blk)].liveOut;
936 auto const liveOutExn = blockStates[rpoId(blk)].liveOutExn;
937 auto const transfer = blockAnalysis[rpoId(blk)];
938 auto const liveIn = transfer.gen | (liveOut & ~transfer.kill)
939 | (liveOutExn & ~transfer.killExn);
941 FTRACE(2, "live out : {}\n"
942 "out exn : {}\n"
943 "gen : {}\n"
944 "kill : {}\n"
945 "kill exn : {}\n"
946 "live in : {}\n",
947 bits_string(ai.ctx.func, liveOut),
948 bits_string(ai.ctx.func, liveOutExn),
949 bits_string(ai.ctx.func, transfer.gen),
950 bits_string(ai.ctx.func, transfer.kill),
951 bits_string(ai.ctx.func, transfer.killExn),
952 bits_string(ai.ctx.func, liveIn));
954 // Merge the liveIn into the liveOut of each normal predecessor.
955 // If the set changes, reschedule that predecessor.
956 for (auto& pred : normalPreds[blk->id]) {
957 FTRACE(2, " -> {}\n", pred->id);
958 auto& predState = blockStates[rpoId(pred)].liveOut;
959 auto const oldPredState = predState;
960 predState |= liveIn;
961 if (predState != oldPredState) {
962 incompleteQ.push(rpoId(pred));
966 // Merge the liveIn into the liveOutExn state for each exceptional
967 // precessor. The liveIn computation also depends on the
968 // liveOutExn state, so again reschedule if it changes.
969 for (auto& pred : factoredPreds[blk->id]) {
970 FTRACE(2, " => {}\n", pred->id);
971 auto& predState = blockStates[rpoId(pred)].liveOutExn;
972 auto const oldPredState = predState;
973 predState |= liveIn;
974 if (predState != oldPredState) {
975 incompleteQ.push(rpoId(pred));
981 * Now that we're at a fixed point, use the propagated states to
982 * remove instructions that don't need to be there.
984 FTRACE(1, "|---- global DCE optimize ({})\n", show(ai.ctx));
985 std::bitset<kMaxTrackedLocals> usedLocals;
986 for (auto& b : ai.rpoBlocks) {
987 FTRACE(2, "block #{}\n", b->id);
988 usedLocals |= optimize_dce(
989 index,
990 ai.ctx,
992 ai.bdata[b->id].stateIn,
993 blockStates[rpoId(b)].liveOut,
994 blockStates[rpoId(b)].liveOutExn
998 FTRACE(1, " used locals: {}\n", bits_string(ai.ctx.func, usedLocals));
999 remove_unused_locals(ai.ctx, usedLocals);
1002 //////////////////////////////////////////////////////////////////////
1004 void remove_unreachable_blocks(const Index& index, const FuncAnalysis& ainfo) {
1005 auto reachable = [&](BlockId id) {
1006 return ainfo.bdata[id].stateIn.initialized;
1009 for (auto& blk : ainfo.rpoBlocks) {
1010 if (reachable(blk->id)) continue;
1011 auto const srcLoc = blk->hhbcs.front().srcLoc;
1012 blk->hhbcs = {
1013 bc_with_loc(srcLoc, bc::String { s_unreachable.get() }),
1014 bc_with_loc(srcLoc, bc::Fatal { FatalOp::Runtime })
1016 blk->fallthrough = NoBlockId;
1019 if (!options.RemoveDeadBlocks) return;
1021 for (auto& blk : ainfo.rpoBlocks) {
1022 auto reachableTargets = false;
1023 forEachTakenEdge(blk->hhbcs.back(), [&] (BlockId id) {
1024 if (reachable(id)) reachableTargets = true;
1026 if (reachableTargets) continue;
1027 switch (blk->hhbcs.back().op) {
1028 case Op::JmpNZ:
1029 case Op::JmpZ:
1030 blk->hhbcs.back() = bc_with_loc(blk->hhbcs.back().srcLoc, bc::PopC {});
1031 break;
1032 default:
1033 break;
1038 bool merge_blocks(const FuncAnalysis& ainfo) {
1039 auto& func = *ainfo.ctx.func;
1040 FTRACE(2, "merge_blocks: {}\n", func.name);
1042 boost::dynamic_bitset<> hasPred(func.blocks.size());
1043 boost::dynamic_bitset<> multiplePreds(func.blocks.size());
1044 boost::dynamic_bitset<> multipleSuccs(func.blocks.size());
1045 boost::dynamic_bitset<> removed(func.blocks.size());
1046 auto reachable = [&](php::Block& b) {
1047 auto const& state = ainfo.bdata[b.id].stateIn;
1048 return state.initialized && !state.unreachable;
1050 // find all the blocks with multiple preds; they can't be merged
1051 // into their predecessors
1052 for (auto& blk : func.blocks) {
1053 if (blk->id == NoBlockId) continue;
1054 int numSucc = 0;
1055 if (!reachable(*blk)) multiplePreds[blk->id] = true;
1056 forEachSuccessor(*blk, [&](BlockId succId) {
1057 if (hasPred[succId]) {
1058 multiplePreds[succId] = true;
1059 } else {
1060 hasPred[succId] = true;
1062 numSucc++;
1064 if (numSucc > 1) multipleSuccs[blk->id] = true;
1066 multiplePreds[func.mainEntry] = true;
1067 for (auto& blkId: func.dvEntries) {
1068 if (blkId != NoBlockId) {
1069 multiplePreds[blkId] = true;
1073 bool removedAny = false;
1074 for (auto& blk : func.blocks) {
1075 if (blk->id == NoBlockId) continue;
1076 while (blk->fallthrough != NoBlockId) {
1077 auto nxt = borrow(func.blocks[blk->fallthrough]);
1078 if (multipleSuccs[blk->id] ||
1079 multiplePreds[nxt->id] ||
1080 blk->exnNode != nxt->exnNode ||
1081 blk->section != nxt->section) {
1082 break;
1085 FTRACE(1, "merging: {} into {}\n", (void*)nxt, (void*)blk.get());
1086 multipleSuccs[blk->id] = multipleSuccs[nxt->id];
1087 blk->fallthrough = nxt->fallthrough;
1088 blk->fallthroughNS = nxt->fallthroughNS;
1089 if (nxt->factoredExits.size()) {
1090 if (blk->factoredExits.size()) {
1091 std::set<BlockId> exitSet;
1092 std::copy(begin(blk->factoredExits), end(blk->factoredExits),
1093 std::inserter(exitSet, begin(exitSet)));
1094 std::copy(nxt->factoredExits.begin(), nxt->factoredExits.end(),
1095 std::inserter(exitSet, begin(exitSet)));
1096 blk->factoredExits.resize(exitSet.size());
1097 std::copy(begin(exitSet), end(exitSet), blk->factoredExits.begin());
1098 nxt->factoredExits = decltype(nxt->factoredExits) {};
1099 } else {
1100 blk->factoredExits = std::move(nxt->factoredExits);
1103 std::copy(nxt->hhbcs.begin(), nxt->hhbcs.end(),
1104 std::back_inserter(blk->hhbcs));
1105 nxt->fallthrough = NoBlockId;
1106 nxt->id = NoBlockId;
1107 nxt->hhbcs = { bc::Nop {} };
1108 removedAny = true;
1112 return removedAny;
1115 //////////////////////////////////////////////////////////////////////