hphp/hhbbc/dce.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com)  |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include "hphp/hhbbc/dce.h"
  17
  18 #include <vector>
  19 #include <string>
  20 #include <utility>
  21 #include <bitset>
  22 #include <sstream>
  23 #include <algorithm>
  24 #include <set>
  25
  26 #include <boost/dynamic_bitset.hpp>
  27 #include <boost/container/flat_map.hpp>
  28
  29 #include <folly/gen/Base.h>
  30 #include <folly/gen/String.h>
  31
  32 #include "hphp/util/bitops.h"
  33 #include "hphp/util/dataflow-worklist.h"
  34 #include "hphp/util/trace.h"
  35
  36 #include "hphp/hhbbc/analyze.h"
  37 #include "hphp/hhbbc/cfg-opts.h"
  38 #include "hphp/hhbbc/cfg.h"
  39 #include "hphp/hhbbc/interp-state.h"
  40 #include "hphp/hhbbc/interp.h"
  41 #include "hphp/hhbbc/representation.h"
  42 #include "hphp/hhbbc/type-system.h"
  43 #include "hphp/hhbbc/unit-util.h"
  44
  45 namespace HPHP { namespace HHBBC {
  46
  47 TRACE_SET_MOD(hhbbc_dce);
  48
  49 //////////////////////////////////////////////////////////////////////
  50
  51 /*
  52  * This module contains code to perform both local DCE and global DCE.
  53  *
  54  * The local DCE algorithm addresses dead eval stack manipulations and
  55  * dead stores to locals that are visible within a single basic block.
  56  *
  57  * The global DCE performs a liveness analysis and then uses this
  58  * information to allow dead stores to locals to be eliminated across
  59  * blocks.
  60  *
  61  * Both types of DCE here need to be type-aware, but they must visit
  62  * blocks backward.  They accomplish this by forward-propagating the
  63  * block input states from a FuncAnalysis to each instruction in the
  64  * block prior to doing the backward iteration.
  65  *
  66  * Eval stack:
  67  *
  68  *   During backward traversal of a block, we maintain a "backwards"
  69  *   stack, indicating which eval stack slots are going to be required
  70  *   in the future or not.
  71  *
  72  *   During this traversal, each instruction that pops when going
  73  *   forward instead "pushes" information about whether that input
  74  *   will be required.  If it is not required, it also pushes an
  75  *   accumulating set of instruction ids that must be removed if the
  76  *   instruction which produces the stack slot is removed.  (All
  77  *   instructions in these sets must be removed if any are, in order
  78  *   to keep the stack depths correct.)
  79  *
  80  *   Similarly, each instruction that would push a value going forward
  81  *   instead "pops" the information about whether its stack output is
  82  *   going to be needed.  If not, the instruction can mark itself (and
  83  *   all downstream instructions that depended on it) as removable.
  84  *
  85  * Locals:
  86  *
  87  *   While a block is iterated backward, the set of live locals is
  88  *   tracked.  The initial state of this live set depends on whether
  89  *   we are performing global or local DCE, and in the local case
  90  *   includes all locals in the function.
  91  *
  92  *   When a local may be read, it is added to the live set.  When a
  93  *   local is definitely-written, it is removed from the set.
  94  *
  95  *   If a instruction may write to a local that is not live, it can be
  96  *   marked as removable if it is known to have no other side-effects.
  97  *   Currently this is only hooked up to SetL.
  98  *
  99  * Liveness analysis:
 100  *
 101  *   The global algorithm first performs a liveness analysis to
 102  *   propagate live out sets to each block.
 103  *
 104  *   This analysis is basically normal, but slightly modified from the
 105  *   usual in order to deal with the way exceptional control flow is
 106  *   represented in our CFG (with factored edges).
 107  *
 108  *   It essentially is the liveness analysis algorithm described at
 109  *   http://dl.acm.org/citation.cfm?id=316171, except that we don't
 110  *   need to track kill sets for each PEI because we don't have a
 111  *   means of determining which factored edges may be traversed by any
 112  *   given PEI.  (Maybe that may be more usual to have in the context
 113  *   of a language with declared exception clauses...)
 114  *
 115  *   Since we only deal with the most pessimistic exception case, this
 116  *   means for each block we just determine a gen and kill set, along
 117  *   with a subset of the latter set that is the locals that must be
 118  *   killed before any PEI.  (See killBeforePEI below.)
 119  *
 120  * Final note about types:
 121  *
 122  *   Global DCE can change the types of locals in a way that spans
 123  *   basic blocks.  For a simple example, take the following code:
 124  *
 125  *      // $foo :: Uninit here.
 126  *      $foo = 12;
 127  *      // ... code that breaks a block ...
 128  *      $foo = 100;
 129  *
 130  *   If the first store to $foo is removed, the type of $foo before
 131  *   the SetL in the later block is now Uninit, instead of Int.
 132  *
 133  *   In addition, by killing dead eval stack slots across basic
 134  *   blocks, the saved entry stacks in the FuncAnalysis no longer
 135  *   match the actual stack depths.
 136  *
 137  *   This means that after calling global_dce on a function, its
 138  *   FuncAnalysis can no longer be considered accurate.
 139  *
 140  *   Moreover, since global DCE makes use of type information to
 141  *   determine whether a store is dead, we need to be careful that
 142  *   this never changes whether the assumptions used to perform DCE
 143  *   were correct.
 144  *
 145  *   This is ok right now: DCE only uses the types to figure out which
 146  *   values can either have lifetimes extended or shortened without
 147  *   visible side-effects, or which values may be refs (so we can't
 148  *   omit stores to them).  If we omit a store that changes the type
 149  *   of a local globally, this means the new type cannot be different
 150  *   with regard to these features (or we wouldn't have omitted it),
 151  *   which means we won't have made different decisions elsewhere in
 152  *   the algorithm based on the new type.
 153  *
 154  *   Specifically: we will never omit a store where the old local type
 155  *   was something that could've had side effects, and if a new value
 156  *   is stored into a local where destroying it could have
 157  *   side-effects, some point along the path to the function exit (if
 158  *   nothing else, the RetC) will have added it to the gen set and the
 159  *   store also won't be removable.
 160  *
 161  *   In contrast, note that local DCE can not change types across
 162  *   block boundaries.
 163  *
 164  */
 165
 166 namespace {
 167
 168 //////////////////////////////////////////////////////////////////////
 169
 170 // Returns whether decrefing a type could run a destructor.
 171 bool couldRunDestructor(const Type& t) {
 172   // We could check for specialized objects to see if they don't
 173   // declare a user-defined destructor, but currently don't.
 174   return
 175     t.couldBe(TObj) ||
 176     t.couldBe(TRef) ||
 177     t.couldBe(TCArrN) ||
 178     t.couldBe(TCVecN) ||
 179     t.couldBe(TCDictN);
 180 }
 181
 182 // Returns whether a set on something containing type t could have
 183 // side-effects (running destuctors, or modifying arbitrary things via
 184 // a Ref).
 185 bool setCouldHaveSideEffects(const Type& t) {
 186   return
 187     t.couldBe(TObj) ||
 188     t.couldBe(TRef) ||
 189     t.couldBe(TCArrN) ||
 190     t.couldBe(TCVecN) ||
 191     t.couldBe(TCDictN);
 192 }
 193
 194 // Some reads could raise warnings and run arbitrary code.
 195 bool readCouldHaveSideEffects(const Type& t) {
 196   return t.couldBe(TUninit);
 197 }
 198
 199 //////////////////////////////////////////////////////////////////////
 200
 201 /*
 202  * Use information of a stack cell.
 203  */
 204 enum class Use {
 205   // Indicates that the cell is (unconditionally) not used.
 206   Not,
 207
 208   // Indicates that the cell is (possibly) used.
 209   Used,
 210
 211   /*
 212    * Indicates that the cell is only used if it was the last reference alive.
 213    * For instance, a PopC will call the destructor of the top-of-stack object
 214    * if it was the last reference alive, and this counts as an example of
 215    * 'UsedIfLastRef'.
 216    *
 217    * If the producer of the cell knows that it is not the last reference, then
 218    * it can treat Use::UsedIfLastRef as being equivalent to Use::Not.
 219    */
 220   UsedIfLastRef,
 221 };
 222
 223 struct InstrId {
 224   BlockId blk;
 225   uint32_t idx;
 226 };
 227
 228 using StkId = InstrId;
 229
 230 bool operator<(const InstrId& i1, const InstrId& i2) {
 231   if (i1.blk != i2.blk) return i1.blk < i2.blk;
 232   // sort by decreasing idx so that kill_marked_instrs works
 233   return i1.idx > i2.idx;
 234 }
 235
 236 enum class DceAction {
 237   Kill,
 238   PopOutputs
 239 };
 240
 241 const char* show(DceAction action) {
 242   switch (action) {
 243     case DceAction::Kill: return "Kill";
 244     case DceAction::PopOutputs: return "PopOutputs";
 245   };
 246   not_reached();
 247 }
 248
 249 using DceActionMap = std::map<InstrId, DceAction>;
 250
 251 struct UseInfo {
 252   explicit UseInfo(Use u) : usage(u) {}
 253   UseInfo(Use u, DceActionMap&& ids) : usage(u), actions(std::move(ids)) {}
 254
 255   Use usage;
 256   /*
 257    * Set of instructions that should be removed if we decide to
 258    * discard the corresponding stack slot.
 259    */
 260   DceActionMap actions;
 261   /*
 262    * Used for stack slots that are live across blocks to indicate a
 263    * stack slot that should be considered Used at the entry to blk.
 264    *
 265    * If its not live across blocks, popper.blk will stay NoBlockId.
 266    */
 267   StkId popper { NoBlockId, 0 };
 268 };
 269
 270 std::string show(InstrId id) {
 271   return folly::sformat("{}:{}", id.blk, id.idx);
 272 }
 273
 274 //////////////////////////////////////////////////////////////////////
 275
 276 struct DceState {
 277   explicit DceState(const FuncAnalysis& ainfo) : ainfo(ainfo) {}
 278   const FuncAnalysis& ainfo;
 279   /*
 280    * Used to accumulate a set of blk/stack-slot pairs that
 281    * should be marked used
 282    */
 283   std::set<StkId> usedStackSlots;
 284
 285   /*
 286    * Eval stack use information.  Stacks slots are marked as being
 287    * needed or not needed.  If they aren't needed, they carry a set of
 288    * instructions that must be removed if the instruction that
 289    * produces the stack value is also removable.
 290    */
 291   std::vector<UseInfo> stack;
 292
 293   /*
 294    * Locals known to be live at a point in a DCE walk.  This is used
 295    * when we're actually acting on information we discovered during
 296    * liveness analysis.
 297    */
 298   std::bitset<kMaxTrackedLocals> liveLocals;
 299
 300   /*
 301    * Class-ref slots known to be live at a point in a DCE walk.  This is used
 302    * when we're actually acting on information we discovered during liveness
 303    * analysis.
 304    */
 305   std::bitset<kMaxTrackedClsRefSlots> liveSlots;
 306
 307   /*
 308    * These variable sets are used to compute the transfer function for
 309    * the global liveness analysis in global_dce.
 310    *
 311    * The gen set accumulates the set of variables in the block with
 312    * upward-exposed uses.  The kill set is the set of variables the
 313    * block will re-define, ignoring exceptional control flow.
 314    *
 315    * The killBeforePEI set is the set of variables killed before a
 316    * PEI.  Propagation of liveness needs to use this (always more
 317    * conservative) set instead of kill when crossing a factored exit
 318    * edge.
 319    */
 320   std::bitset<kMaxTrackedLocals> locGen;
 321   std::bitset<kMaxTrackedLocals> locKill;
 322   std::bitset<kMaxTrackedLocals> locKillBeforePEI;
 323
 324   /*
 325    * Instructions marked in this set will be processed by dce_perform.
 326    * They must all be processed together to keep eval stack consumers
 327    * and producers balanced.
 328    */
 329   DceActionMap actionMap;
 330
 331   /*
 332    * The set of locals and class-ref slots that were ever live in this block.
 333    * (This includes ones that were live going out of this block.)  This set is
 334    * used by global DCE to remove locals and class-ref slots that are completely
 335    * unused in the entire function.
 336    */
 337   std::bitset<kMaxTrackedLocals> usedLocals;
 338   std::bitset<kMaxTrackedClsRefSlots> usedSlots;
 339
 340   /*
 341    * Mapping of class-ref slots to instruction sets. If the usage of a class-ref
 342    * slot is removed, all the instructions currently in the set must also be
 343    * removed.
 344    */
 345   std::vector<DceActionMap> slotDependentActions;
 346 };
 347
 348 //////////////////////////////////////////////////////////////////////
 349
 350 const char* show(Use u) {
 351   switch (u) {
 352   case Use::Not:              return "0";
 353   case Use::Used:             return "U";
 354   case Use::UsedIfLastRef:    return "UL";
 355   }
 356   not_reached();
 357 }
 358
 359 std::string show(const DceActionMap::value_type& elm) {
 360   return folly::sformat("{}@{}", show(elm.first), show(elm.second));
 361 }
 362
 363 std::string show(const DceActionMap& actions) {
 364   using namespace folly::gen;
 365   return from(actions)
 366     | map([](const DceActionMap::value_type& elm) { return show(elm); })
 367     | unsplit<std::string>(";")
 368     ;
 369 }
 370
 371 std::string DEBUG_ONLY show(const UseInfo& ui) {
 372   return folly::sformat("{}@{}", show(ui.usage), show(ui.actions));
 373 }
 374
 375 std::string DEBUG_ONLY loc_bits_string(borrowed_ptr<const php::Func> func,
 376                                        std::bitset<kMaxTrackedLocals> locs) {
 377   std::ostringstream out;
 378   if (func->locals.size() < kMaxTrackedLocals) {
 379     for (auto i = func->locals.size(); i-- > 0;) {
 380       out << (locs.test(i) ? '1' : '0');
 381     }
 382   } else {
 383     out << locs;
 384   }
 385   return out.str();
 386 }
 387
 388 std::string DEBUG_ONLY
 389 slot_bits_string(borrowed_ptr<const php::Func> func,
 390                  std::bitset<kMaxTrackedClsRefSlots> slots) {
 391   std::ostringstream out;
 392   if (func->numClsRefSlots < kMaxTrackedClsRefSlots) {
 393     for (auto i = 0; i < func->numClsRefSlots; ++i) {
 394       out << (slots.test(i) ? '1' : '0');
 395     }
 396   } else {
 397     out << slots;
 398   }
 399   return out.str();
 400 }
 401
 402 //////////////////////////////////////////////////////////////////////
 403
 404 struct Env {
 405   DceState& dceState;
 406   const Bytecode& op;
 407   InstrId id;
 408   const State& stateBefore;
 409   const StepFlags& flags;
 410 };
 411
 412 //////////////////////////////////////////////////////////////////////
 413
 414 void markSetDead(Env& env, const DceActionMap& actions) {
 415   env.dceState.actionMap.emplace(env.id, DceAction::Kill);
 416   FTRACE(2, "     marking {} {}\n", show(env.id), show(actions));
 417   for (auto& i : actions) env.dceState.actionMap.insert(i);
 418 }
 419
 420 void markUisDead(Env& env, const UseInfo& ui) {
 421   markSetDead(env, ui.actions);
 422 }
 423
 424 template<typename... Args>
 425 void markUisDead(Env& env, const UseInfo& ui, Args&&... args) {
 426   markUisDead(env, ui);
 427   markUisDead(env, std::forward<Args>(args)...);
 428 }
 429
 430 /*
 431  * During global dce, a particular stack element could be pushed
 432  * on multiple paths. eg:
 433  *
 434  *   $a ? f() : 42
 435  *
 436  * If f() is known to return a non-counted type, we have UnboxRNop ->
 437  * PopC on one path, and Int 42 -> PopC on another, and the PopC marks
 438  * its value Use::Not. When we get to the Int 42 it thinkgs both
 439  * instructions can be killed; but when we get to the UnboxRNop it
 440  * does nothing. So any time we decide to ignore a Use::Not or
 441  * Use::UsedIfLastRef, we have to record that fact so we can prevent
 442  * the other paths from trying to use that information. We communicate
 443  * this via the ui.popper field, and the usedStackSlots set.
 444  *
 445  * Note that a more sophisticated system would insert a PopC after the
 446  * UnboxRNop, and allow the 42/PopC to be removed (work in progress).
 447  */
 448 void markUisLive(Env& env, const UseInfo& ui) {
 449   if (ui.usage != Use::Used && ui.popper.blk != NoBlockId) {
 450     env.dceState.usedStackSlots.insert(ui.popper);
 451   }
 452 }
 453
 454 template<typename... Args>
 455 void markUisLive(Env& env, const UseInfo& ui, Args&&... args) {
 456   markUisLive(env, ui);
 457   markUisLive(env, std::forward<Args>(args)...);
 458 }
 459
 460 void markDead(Env& env) {
 461   env.dceState.actionMap.emplace(env.id, DceAction::Kill);
 462   FTRACE(2, "     marking {}\n", show(env.id));
 463 }
 464
 465 //////////////////////////////////////////////////////////////////////
 466 // eval stack
 467
 468 void pop(Env& env, UseInfo&& ui) {
 469   FTRACE(2, "      pop({})\n", show(ui.usage));
 470   env.dceState.stack.push_back(std::move(ui));
 471 }
 472
 473 void pop(Env& env, Use u, DceActionMap actions) {
 474   pop(env, {u, std::move(actions)});
 475 }
 476
 477 void pop(Env& env, Use u, InstrId id) {
 478   pop(env, {u, {{id, DceAction::Kill}}});
 479 }
 480
 481 void pop(Env& env) { pop(env, Use::Used, DceActionMap{}); }
 482
 483 Type topT(Env& env, uint32_t idx = 0) {
 484   assert(idx < env.stateBefore.stack.size());
 485   return env.stateBefore.stack[env.stateBefore.stack.size() - idx - 1].type;
 486 }
 487
 488 Type topC(Env& env, uint32_t idx = 0) {
 489   auto const t = topT(env, idx);
 490   assert(t.subtypeOf(TInitCell));
 491   return t;
 492 }
 493
 494 void discard(Env& env) {
 495   pop(env, Use::Not, env.id);
 496 }
 497
 498 bool allUnused() { return true; }
 499 template<class... Args>
 500 bool allUnused(const UseInfo& ui, Args&&... args) {
 501   return ui.usage == Use::Not &&
 502     allUnused(std::forward<Args>(args)...);
 503 }
 504
 505 bool allUnusedIfNotLastRef() { return true; }
 506 template<class... Args>
 507 bool allUnusedIfNotLastRef(const UseInfo& ui, Args&&... args) {
 508   return (ui.usage == Use::Not || ui.usage == Use::UsedIfLastRef) &&
 509     allUnusedIfNotLastRef(std::forward<Args>(args)...);
 510 }
 511
 512 void combineSets(UseInfo&) {}
 513 template<class... Args>
 514 void combineSets(UseInfo& accum, const UseInfo& ui, Args&&... args) {
 515   accum.actions.insert(begin(ui.actions), end(ui.actions));
 516   if (accum.popper.blk == NoBlockId ||
 517       accum.popper < ui.popper) {
 518     accum.popper = ui.popper;
 519   }
 520   combineSets(accum, std::forward<Args>(args)...);
 521 }
 522
 523 // The instruction wants to be killed, as long as its inputs can be
 524 // killed; combine the UseInfo sets from its outputs, and make an
 525 // appropriate number of Use::Not pops.
 526 template<class... Args>
 527 void passThrough(Env& env, Args&&... args) {
 528   UseInfo ui { Use::Not, {{ env.id, DceAction::Kill }} };
 529   combineSets(ui, std::forward<Args>(args)...);
 530
 531   for (auto i = 1; i < env.op.numPop(); ++i) {
 532     pop(env, UseInfo{ui});
 533   }
 534   pop(env, std::move(ui));
 535 }
 536
 537 bool popCouldRunDestructor(Env& env) {
 538   // If there's an equivLocal, we know that it's not the last
 539   // reference, so popping the stack won't run any destructors.
 540   return
 541     env.stateBefore.stack.back().equivLocal == NoLocalId &&
 542     couldRunDestructor(topC(env));
 543 }
 544
 545 /*
 546  * It may be ok to remove pops on objects with destructors in some scenarios
 547  * (where it won't change the observable point at which a destructor runs).  We
 548  * could also look at the object type and see if it is known that it can't have
 549  * a user-defined destructor.
 550  *
 551  * For now, we mark the cell popped with a Use::UsedIfLastRef. This indicates
 552  * to the producer of the cell that the it is considered used if it could be
 553  * the last reference alive (in which case the destructor would be run on
 554  * Pop). If the producer knows that the cell is not the last reference (e.g. if
 555  * it is a Dup), then Use:UsedIfLastRef is equivalent to Use::Not.
 556  */
 557 void discardNonDtors(Env& env) {
 558   if (popCouldRunDestructor(env)) {
 559     return pop(env, Use::UsedIfLastRef, env.id);
 560   }
 561   discard(env);
 562 }
 563
 564 enum class PushFlags {
 565   MarkLive,
 566   MarkDead,
 567   PassThrough,
 568   PopOutputs
 569 };
 570
 571 bool alwaysPop(const UseInfo& ui) {
 572   return
 573     ui.popper.blk != NoBlockId ||
 574     ui.actions.size() > 1;
 575 }
 576
 577 template<typename... Args>
 578 bool alwaysPop(const UseInfo& ui, Args&&... args) {
 579   return alwaysPop(ui) || alwaysPop(std::forward<Args>(args)...);
 580 }
 581
 582 bool maybePop(Env& env, const UseInfo& ui) {
 583   return
 584     (ui.actions.size() == 1 &&
 585      ui.actions.begin()->first.idx > env.id.idx + 1);
 586 }
 587
 588 template<typename... Args>
 589 bool maybePop(Env& env, const UseInfo& ui, Args&&... args) {
 590   return maybePop(env, ui) || maybePop(env, std::forward<Args>(args)...);
 591 }
 592
 593 template<typename... Args>
 594 bool shouldPopOutputs(Env& env, Args&&... args) {
 595   if (alwaysPop(std::forward<Args>(args)...)) return true;
 596   return maybePop(env, std::forward<Args>(args)...);
 597 }
 598
 599 template<typename... Args>
 600 void popOutputs(Env& env, Args&&... args) {
 601   if (shouldPopOutputs(env, std::forward<Args>(args)...)) {
 602     markUisDead(env, std::forward<Args>(args)...);
 603     env.dceState.actionMap[env.id] = DceAction::PopOutputs;
 604     return;
 605   }
 606   markUisLive(env, std::forward<Args>(args)...);
 607 }
 608
 609 template<typename... Args>
 610 void handle_push(Env& env, PushFlags pf, Args&&... uis) {
 611   switch (pf) {
 612     case PushFlags::MarkLive:
 613       markUisLive(env, std::forward<Args>(uis)...);
 614       return;
 615     case PushFlags::MarkDead:
 616       markUisDead(env, std::forward<Args>(uis)...);
 617       return;
 618     case PushFlags::PassThrough:
 619       passThrough(env, std::move(uis)...);
 620       return;
 621     case PushFlags::PopOutputs:
 622       popOutputs(env, std::forward<Args>(uis)...);
 623       return;
 624   }
 625 }
 626
 627 template<typename F>
 628 void push(Env& env, F fun) {
 629   always_assert(!env.dceState.stack.empty());
 630   auto ui = std::move(env.dceState.stack.back());
 631   env.dceState.stack.pop_back();
 632   FTRACE(2, "      {}@{} = push()\n", show(ui.usage), show(ui.actions));
 633   auto const f = fun(ui);
 634   handle_push(env, f, std::move(ui));
 635 }
 636
 637 void push(Env& env) {
 638   push(env, [](const UseInfo&) { return PushFlags::MarkLive; });
 639 }
 640
 641 template<typename F>
 642 void push2(Env& env, F fun) {
 643   always_assert(env.dceState.stack.size() >= 2);
 644   auto u1 = std::move(env.dceState.stack.back());
 645   env.dceState.stack.pop_back();
 646   FTRACE(2, "      {}@{} = push()\n", show(u1.usage), show(u1.actions));
 647   auto u2 = std::move(env.dceState.stack.back());
 648   env.dceState.stack.pop_back();
 649   FTRACE(2, "      {}@{} = push()\n", show(u1.usage), show(u2.actions));
 650   auto const f = fun(u1, u2);
 651   handle_push(env, f, std::move(u1), std::move(u2));
 652 }
 653
 654 void pushRemovable(Env& env) {
 655   push(env,[] (const UseInfo& ui) {
 656       return ui.usage == Use::Not ? PushFlags::MarkDead : PushFlags::MarkLive;
 657     });
 658 }
 659
 660 //////////////////////////////////////////////////////////////////////
 661 // locals
 662
 663 void addLocGenSet(Env& env, std::bitset<kMaxTrackedLocals> locs) {
 664   FTRACE(4, "      loc-conservative: {}\n",
 665          loc_bits_string(env.dceState.ainfo.ctx.func, locs));
 666   env.dceState.liveLocals |= locs;
 667   env.dceState.locGen |= locs;
 668   env.dceState.locKill &= ~locs;
 669   env.dceState.locKillBeforePEI &= ~locs;
 670 }
 671
 672 void addLocGen(Env& env, uint32_t id) {
 673   FTRACE(2, "      loc-gen: {}\n", id);
 674   if (id >= kMaxTrackedLocals) return;
 675   env.dceState.liveLocals[id] = 1;
 676   env.dceState.locGen[id] = 1;
 677   env.dceState.locKill[id] = 0;
 678   env.dceState.locKillBeforePEI[id] = 0;
 679 }
 680
 681 void addLocKill(Env& env, uint32_t id) {
 682   FTRACE(2, "     loc-kill: {}\n", id);
 683   if (id >= kMaxTrackedLocals) return;
 684   env.dceState.liveLocals[id] = 0;
 685   env.dceState.locGen[id] = 0;
 686   env.dceState.locKill[id] = 1;
 687   env.dceState.locKillBeforePEI[id] = 1;
 688 }
 689
 690 bool isLocLive(Env& env, uint32_t id) {
 691   if (id >= kMaxTrackedLocals) {
 692     // Conservatively assume it's potentially live.
 693     return true;
 694   }
 695   return env.dceState.liveLocals[id];
 696 }
 697
 698 Type locRaw(Env& env, LocalId loc) {
 699   return env.stateBefore.locals[loc];
 700 }
 701
 702 bool setLocCouldHaveSideEffects(Env& env, LocalId loc, bool forExit = false) {
 703   // Normally, if there's an equivLocal this isn't the last reference,
 704   // so overwriting it won't run any destructors. But if we're
 705   // destroying all the locals (eg RetC) they can't all protect each
 706   // other; in that case we require a lower equivLoc to ensure that
 707   // one local from each equivalence set will still be marked as
 708   // having effects (choosing lower numbers also means we mark the
 709   // params as live, which makes it more likely that we can eliminate
 710   // a local).
 711   static_assert(NoLocalId == std::numeric_limits<LocalId>::max(),
 712                 "NoLocalId must be greater than all valid local ids");
 713   if (env.stateBefore.equivLocals.size() > loc) {
 714     auto l = env.stateBefore.equivLocals[loc];
 715     if (l != NoLocalId) {
 716       if (!forExit) return false;
 717       do {
 718         if (l < loc) return false;
 719         l = env.stateBefore.equivLocals[l];
 720       } while (l != loc);
 721     }
 722   }
 723
 724   // If this local is bound to a static, its type will be TRef, but we
 725   // may know the type of the static - but statics *are* live out of
 726   // the function, so don't do this when forExit is set.
 727   if (!forExit &&
 728       env.stateBefore.localStaticBindings.size() > loc &&
 729       env.stateBefore.localStaticBindings[loc] == LocalStaticBinding::Bound &&
 730       !setCouldHaveSideEffects(env.dceState.ainfo.localStaticTypes[loc])) {
 731     return false;
 732   }
 733
 734   return setCouldHaveSideEffects(locRaw(env, loc));
 735 }
 736
 737 void readDtorLocs(Env& env) {
 738   for (auto i = LocalId{0}; i < env.stateBefore.locals.size(); ++i) {
 739     if (setLocCouldHaveSideEffects(env, i, true)) {
 740       addLocGen(env, i);
 741     }
 742   }
 743 }
 744
 745 //////////////////////////////////////////////////////////////////////
 746 // class-ref slots
 747
 748 void readSlot(Env& env, uint32_t id) {
 749   FTRACE(2, "     read-slot: {}\n", id);
 750   if (id >= kMaxTrackedClsRefSlots) return;
 751   env.dceState.liveSlots[id] = 1;
 752   env.dceState.usedSlots[id] = 1;
 753   env.dceState.slotDependentActions[id].clear();
 754 }
 755
 756 // Read a slot, but in a usage that is discardable. If this read actually is
 757 // discarded, then also discard the given instructions.
 758 void readSlotDiscardable(Env& env, uint32_t id, DceActionMap actions) {
 759   FTRACE(2, "     read-slot (discardable): {}\n", id);
 760   if (id >= kMaxTrackedClsRefSlots) return;
 761   env.dceState.liveSlots[id] = 0;
 762   env.dceState.usedSlots[id] = 1;
 763   actions.insert({env.id, DceAction::Kill});
 764   env.dceState.slotDependentActions[id] = std::move(actions);
 765 }
 766
 767 void writeSlot(Env& env, uint32_t id) {
 768   FTRACE(2, "     write-slot: {}\n", id);
 769   if (id >= kMaxTrackedClsRefSlots) return;
 770   env.dceState.liveSlots[id] = 0;
 771   env.dceState.usedSlots[id] = 1;
 772   env.dceState.slotDependentActions[id].clear();
 773 }
 774
 775 bool isSlotLive(Env& env, uint32_t id) {
 776   if (id >= kMaxTrackedClsRefSlots) return true;
 777   return env.dceState.liveSlots[id];
 778 }
 779
 780 DceActionMap slotDependentActions(Env& env, uint32_t id) {
 781   return std::move(env.dceState.slotDependentActions[id]);
 782 }
 783
 784 //////////////////////////////////////////////////////////////////////
 785
 786 /*
 787  * Note that the instructions with popConds are relying on the consumer of the
 788  * values they push to check whether lifetime changes can have side-effects.
 789  *
 790  * For example, in bytecode like this, assuming $x is an object with a
 791  * destructor:
 792  *
 793  *   CGetL $x
 794  *   UnsetL $x
 795  *   // ...
 796  *   PopC $x // dtor should be here.
 797  *
 798  * The PopC will decide it can't be eliminated, which prevents us from
 799  * eliminating the CGetL.
 800  */
 801
 802 void dce(Env& env, const bc::PopC&)          { discardNonDtors(env); }
 803 // For PopV and PopR currently we never know if can't run a
 804 // destructor.
 805 void dce(Env& env, const bc::PopU&)          { discard(env); }
 806 void dce(Env& env, const bc::Int&)           { pushRemovable(env); }
 807 void dce(Env& env, const bc::String&)        { pushRemovable(env); }
 808 void dce(Env& env, const bc::Array&)         { pushRemovable(env); }
 809 void dce(Env& env, const bc::Dict&)          { pushRemovable(env); }
 810 void dce(Env& env, const bc::Vec&)           { pushRemovable(env); }
 811 void dce(Env& env, const bc::Keyset&)        { pushRemovable(env); }
 812 void dce(Env& env, const bc::Double&)        { pushRemovable(env); }
 813 void dce(Env& env, const bc::True&)          { pushRemovable(env); }
 814 void dce(Env& env, const bc::False&)         { pushRemovable(env); }
 815 void dce(Env& env, const bc::Null&)          { pushRemovable(env); }
 816 void dce(Env& env, const bc::NullUninit&)    { pushRemovable(env); }
 817 void dce(Env& env, const bc::File&)          { pushRemovable(env); }
 818 void dce(Env& env, const bc::Dir&)           { pushRemovable(env); }
 819 void dce(Env& env, const bc::NewArray&)      { pushRemovable(env); }
 820 void dce(Env& env, const bc::NewDictArray&)  { pushRemovable(env); }
 821 void dce(Env& env, const bc::NewMixedArray&) { pushRemovable(env); }
 822 void dce(Env& env, const bc::NewCol&)        { pushRemovable(env); }
 823 void dce(Env& env, const bc::CheckProp&)     { pushRemovable(env); }
 824
 825 void dce(Env& env, const bc::UnboxRNop&) {
 826   push(env, [&] (const UseInfo& ui) {
 827       pop(env);
 828       return allUnused(ui) ?
 829         PushFlags::PopOutputs : PushFlags::MarkLive;
 830     });
 831 }
 832
 833 void dce(Env& env, const bc::ClsRefName& op) {
 834   // If the usage of the name is discardable, then so is this read of the
 835   // class-ref.
 836   push(env, [&] (UseInfo& ui) {
 837       if (ui.popper.blk == NoBlockId && allUnused(ui)) {
 838         // we don't yet support cross-block dce of classrefs
 839         readSlotDiscardable(env, op.slot, std::move(ui.actions));
 840       } else {
 841         readSlot(env, op.slot);
 842       }
 843       return PushFlags::MarkLive;
 844     });
 845 }
 846
 847 void dce(Env& env, const bc::ClsRefGetC& op) {
 848   // If the usage of this class-ref slot is dead, then it can be potentially be
 849   // removed if the source is dead as well.
 850   if (!isSlotLive(env, op.slot)) {
 851     auto actions = slotDependentActions(env, op.slot);
 852     actions.insert({env.id, DceAction::Kill});
 853     writeSlot(env, op.slot);
 854     return pop(
 855       env,
 856       popCouldRunDestructor(env) ? Use::UsedIfLastRef : Use::Not,
 857       std::move(actions)
 858     );
 859   }
 860   writeSlot(env, op.slot);
 861   pop(env);
 862 }
 863
 864 void dce(Env& env, const bc::ClsRefGetL& op) {
 865   auto const ty = locRaw(env, op.loc1);
 866
 867   if (!isSlotLive(env, op.slot) && !readCouldHaveSideEffects(ty)) {
 868     markSetDead(env, slotDependentActions(env, op.slot));
 869     return;
 870   }
 871
 872   addLocGen(env, op.loc1);
 873   writeSlot(env, op.slot);
 874 }
 875
 876 void dce(Env& env, const bc::DiscardClsRef& op) {
 877   readSlotDiscardable(env, op.slot, {});
 878 }
 879
 880 void dce(Env& env, const bc::Dup&) {
 881   // Various cases:
 882   //   - If the duplicated value is not used, delete the dup and all
 883   //     its consumers, then
 884   //     o If the original value is unused pass that on to let the
 885   //       instruction which pushed it decide whether to kill it
 886   //     o Otherwise we're done.
 887   //   - Otherwise, if the original value is unused, kill the dup
 888   //     and all dependent instructions.
 889   push(env, [&] (const UseInfo& u1) {
 890       // Dup pushes a cell that is guaranteed to be not the last reference.
 891       // So, it can be eliminated if the cell it pushes is used as either
 892       // Use::Not or Use::UsedIfLastRef.
 893       auto u1_unused = allUnusedIfNotLastRef(u1);
 894       push(env, [&] (UseInfo& u2) {
 895           if (allUnused(u2)) {
 896             if (u1_unused) {
 897               return PushFlags::PassThrough;
 898             }
 899             pop(env);
 900             return PushFlags::MarkDead;
 901           }
 902           pop(env);
 903           return PushFlags::MarkLive;
 904         });
 905       return u1_unused ? PushFlags::MarkDead : PushFlags::MarkLive;
 906     });
 907 }
 908
 909 void dce(Env& env, const bc::CGetL& op) {
 910   push(env, [&] (const UseInfo& ui) {
 911       if (allUnused(ui) && !readCouldHaveSideEffects(locRaw(env, op.loc1))) {
 912         return PushFlags::MarkDead;
 913       }
 914       addLocGen(env, op.loc1);
 915       return PushFlags::MarkLive;
 916     });
 917 }
 918
 919 void dce(Env& env, const bc::CGetL2& op) {
 920   auto const ty = locRaw(env, op.loc1);
 921   addLocGen(env, op.loc1);
 922
 923   push2(env, [&] (const UseInfo& u1, const UseInfo& u2) {
 924       if (readCouldHaveSideEffects(ty) || !allUnused(u1, u2)) {
 925         pop(env);
 926         return PushFlags::MarkLive;
 927       } else {
 928         return PushFlags::PassThrough;
 929       }
 930     });
 931 }
 932
 933 void dce(Env& env, const bc::RetC&)  { pop(env); readDtorLocs(env); }
 934 void dce(Env& env, const bc::Throw&) { pop(env); readDtorLocs(env); }
 935 void dce(Env& env, const bc::Fatal&) { pop(env); readDtorLocs(env); }
 936 void dce(Env& env, const bc::Exit&)  { push(env); pop(env); readDtorLocs(env); }
 937
 938 void dce(Env& env, const bc::SetL& op) {
 939   auto const effects = setLocCouldHaveSideEffects(env, op.loc1);
 940   if (!isLocLive(env, op.loc1) && !effects) {
 941     assert(!locRaw(env, op.loc1).couldBe(TRef) ||
 942            env.stateBefore.localStaticBindings[op.loc1] ==
 943            LocalStaticBinding::Bound);
 944     return markDead(env);
 945   }
 946   push(env);
 947   pop(env);
 948   if (effects || locRaw(env, op.loc1).couldBe(TRef)) {
 949     addLocGen(env, op.loc1);
 950   } else {
 951     addLocKill(env, op.loc1);
 952   }
 953 }
 954
 955 void dce(Env& env, const bc::UnsetL& op) {
 956   auto const oldTy   = locRaw(env, op.loc1);
 957   if (oldTy.subtypeOf(TUninit)) return markDead(env);
 958
 959   // Unsetting a local bound to a static never has side effects
 960   // because the static itself has a reference to the value.
 961   auto const effects =
 962     setLocCouldHaveSideEffects(env, op.loc1) &&
 963     (env.stateBefore.localStaticBindings.size() <= op.loc1 ||
 964      env.stateBefore.localStaticBindings[op.loc1] != LocalStaticBinding::Bound);
 965
 966   if (!isLocLive(env, op.loc1) && !effects) return markDead(env);
 967   if (effects) {
 968     addLocGen(env, op.loc1);
 969   } else {
 970     addLocKill(env, op.loc1);
 971   }
 972 }
 973
 974 /*
 975  * IncDecL is a read-modify-write: can be removed if the local isn't live, the
 976  * set can't have side effects, and no one reads the value it pushes.  If the
 977  * instruction is not dead, always add the local to the set of upward exposed
 978  * uses.
 979  */
 980 void dce(Env& env, const bc::IncDecL& op) {
 981   auto const oldTy   = locRaw(env, op.loc1);
 982   auto const effects = setLocCouldHaveSideEffects(env, op.loc1) ||
 983                          readCouldHaveSideEffects(oldTy);
 984   push(env, [&] (const UseInfo& ui) {
 985       if (!isLocLive(env, op.loc1) && !effects && allUnused(ui)) {
 986         return PushFlags::MarkDead;
 987       }
 988       addLocGen(env, op.loc1);
 989       return PushFlags::MarkLive;
 990     });
 991 }
 992
 993 /*
 994  * SetOpL is like IncDecL, but with the complication that we don't know if we
 995  * can mark it dead when visiting it, because it is going to pop an input but
 996  * unlike SetL doesn't push the value it popped.  For the current scheme we
 997  * just add the local to gen even if we're doing a removable push, which is
 998  * correct but could definitely fail to eliminate some earlier stores.
 999  */
1000 void dce(Env& env, const bc::SetOpL& op) {
1001   auto const oldTy   = locRaw(env, op.loc1);
1002   auto const effects = setLocCouldHaveSideEffects(env, op.loc1) ||
1003                          readCouldHaveSideEffects(oldTy);
1004
1005   push(env, [&] (const UseInfo& ui) {
1006       if (!isLocLive(env, op.loc1) && !effects && allUnused(ui)) {
1007         return PushFlags::PassThrough;
1008       }
1009       pop(env);
1010       return PushFlags::MarkLive;
1011     });
1012
1013   addLocGen(env, op.loc1);
1014 }
1015
1016 /*
1017  * Default implementation is conservative: assume we use all of our
1018  * inputs, and can't be removed even if our output is unused.
1019  *
1020  * We also assume all the locals in the mayReadLocalSet must be
1021  * added to the live local set, and don't remove anything from it.
1022  */
1023
1024 template<typename Op>
1025 typename std::enable_if<has_car<Op>::value,void>::type
1026 dce_slot_default(Env& env, const Op& op) {
1027   readSlot(env, op.slot);
1028 }
1029
1030 template<typename Op>
1031 typename std::enable_if<has_caw<Op>::value,void>::type
1032 dce_slot_default(Env& env, const Op& op) {
1033   writeSlot(env, op.slot);
1034 }
1035
1036 template<typename Op>
1037 typename std::enable_if<!has_car<Op>::value && !has_caw<Op>::value,void>::type
1038 dce_slot_default(Env& env, const Op& op) {}
1039
1040 template<class Op>
1041 void dce(Env& env, const Op& op) {
1042   addLocGenSet(env, env.flags.mayReadLocalSet);
1043   env.dceState.liveLocals |= env.flags.mayReadLocalSet;
1044   for (auto i = uint32_t{0}; i < op.numPush(); ++i) {
1045     push(env);
1046   }
1047   for (auto i = uint32_t{0}; i < op.numPop(); ++i) {
1048     pop(env);
1049   }
1050   dce_slot_default(env, op);
1051 }
1052
1053 void dispatch_dce(Env& env, const Bytecode& op) {
1054 #define O(opcode, ...) case Op::opcode: dce(env, op.opcode); return;
1055   switch (op.op) { OPCODES }
1056 #undef O
1057   not_reached();
1058 }
1059
1060 //////////////////////////////////////////////////////////////////////
1061
1062 folly::Optional<DceState>
1063 dce_visit(const Index& index,
1064           const FuncAnalysis& fa,
1065           borrowed_ptr<const php::Block> const blk,
1066           const State& stateIn,
1067           std::bitset<kMaxTrackedLocals> locLiveOut,
1068           std::bitset<kMaxTrackedLocals> locLiveOutExn,
1069           const folly::Optional<std::vector<UseInfo>>& dceStack) {
1070   if (!stateIn.initialized) {
1071     /*
1072      * Skip unreachable blocks.
1073      *
1074      * For DCE analysis it is ok to assume the transfer function is
1075      * the identity on unreachable blocks (i.e. gen and kill sets are
1076      * empty).  For optimize, we don't need to bother doing anything
1077      * to these---another pass is responsible for removing completely
1078      * unreachable blocks.
1079      */
1080     return folly::none;
1081   }
1082
1083   auto const states = locally_propagated_states(index, fa, blk, stateIn);
1084
1085   auto dceState = DceState{ fa };
1086   dceState.liveLocals = locLiveOut;
1087   dceState.liveSlots.set();
1088   dceState.usedLocals = locLiveOut;
1089
1090   if (dceStack) {
1091     dceState.stack = *dceStack;
1092   } else {
1093     dceState.stack.resize(states.back().first.stack.size(),
1094                           UseInfo { Use::Used });
1095   }
1096   dceState.slotDependentActions.resize(states.back().first.clsRefSlots.size());
1097
1098   for (uint32_t idx = blk->hhbcs.size(); idx-- > 0;) {
1099     auto const& op = blk->hhbcs[idx];
1100
1101     FTRACE(2, "  == #{} {}\n", idx, show(fa.ctx.func, op));
1102
1103     auto visit_env = Env {
1104       dceState,
1105       op,
1106       { blk->id, idx },
1107       states[idx].first,
1108       states[idx].second
1109     };
1110     dispatch_dce(visit_env, op);
1111
1112     /*
1113      * When we see a PEI, we need to start over on the killBeforePEI
1114      * set, and the local-liveness must take into account the fact
1115      * that we could take an exception edge here (or'ing in the
1116      * liveOutExn set).
1117      */
1118     if (states[idx].second.wasPEI) {
1119       FTRACE(2, "    <-- exceptions\n");
1120       dceState.liveLocals |= locLiveOutExn;
1121       dceState.liveSlots.set();
1122       dceState.locKillBeforePEI.reset();
1123     }
1124
1125     dceState.usedLocals |= dceState.liveLocals;
1126
1127     FTRACE(4, "    dce stack: {}\n",
1128       [&] {
1129         using namespace folly::gen;
1130         return from(dceState.stack)
1131           | map([&] (const UseInfo& ui) { return show(ui); })
1132           | unsplit<std::string>(" ");
1133       }()
1134     );
1135     FTRACE(4, "    interp stack: {}\n",
1136       [&] {
1137         using namespace folly::gen;
1138         return from(states[idx].first.stack)
1139           | map([&] (const StackElem& e) { return show(e.type); })
1140           | unsplit<std::string>(" ");
1141       }()
1142     );
1143
1144     FTRACE(4, "    cls-ref slots: {}\n{}\n",
1145       slot_bits_string(dceState.ainfo.ctx.func, dceState.liveSlots),
1146       [&]{
1147         using namespace folly::gen;
1148         auto i = uint32_t{0};
1149         return from(dceState.slotDependentActions)
1150           | mapped(
1151             [&] (const DceActionMap& s) {
1152               if (s.empty()) return std::string{};
1153               return folly::sformat("  {}: [{}]\n",
1154                                     i++, show(s));
1155             })
1156           | unsplit<std::string>("");
1157       }()
1158     );
1159
1160     // We're now at the state before this instruction, so the stack
1161     // sizes must line up.
1162     assert(dceState.stack.size() == states[idx].first.stack.size());
1163   }
1164
1165   return dceState;
1166 }
1167
1168 struct DceAnalysis {
1169   std::bitset<kMaxTrackedLocals> locGen;
1170   std::bitset<kMaxTrackedLocals> locKill;
1171   std::bitset<kMaxTrackedLocals> locKillExn;
1172   std::vector<UseInfo>           stack;
1173   std::set<InstrId>              usedStackSlots;
1174 };
1175
1176 DceAnalysis analyze_dce(const Index& index,
1177                         const FuncAnalysis& fa,
1178                         borrowed_ptr<php::Block> const blk,
1179                         const State& stateIn,
1180                         const folly::Optional<std::vector<UseInfo>>& dceStack) {
1181   // During this analysis pass, we have to assume everything could be
1182   // live out, so we set allLive here.  (Later we'll determine the
1183   // real liveOut sets.)
1184   auto allLocLive = std::bitset<kMaxTrackedLocals>{};
1185   allLocLive.set();
1186   if (auto dceState = dce_visit(index, fa, blk, stateIn,
1187                                 allLocLive, allLocLive, dceStack)) {
1188     return DceAnalysis {
1189       dceState->locGen,
1190       dceState->locKill,
1191       dceState->locKillBeforePEI,
1192       dceState->stack,
1193       dceState->usedStackSlots
1194     };
1195   }
1196   return DceAnalysis {};
1197 }
1198
1199 /*
1200  * Do the actual updates to the bytecodes.
1201  */
1202 void dce_perform(const php::Func& func, const DceActionMap& actionMap) {
1203   for (auto const& elm : actionMap) {
1204     auto const& id = elm.first;
1205     auto const b = borrow(func.blocks[id.blk]);
1206     FTRACE(1, "{}: {}:{} {}\n", show(elm.second),
1207            id.blk, id.idx, show(func, b->hhbcs[id.idx]));
1208     switch (elm.second) {
1209       case DceAction::Kill:
1210         if (b->hhbcs.size() == 1) {
1211           // we don't allow empty blocks
1212           b->hhbcs[0] = bc::Nop {};
1213         } else {
1214           b->hhbcs.erase(b->hhbcs.begin() + id.idx);
1215         }
1216         break;
1217       case DceAction::PopOutputs:
1218       {
1219         auto const numPop = b->hhbcs[id.idx].numPush();
1220         b->hhbcs.insert(b->hhbcs.begin() + id.idx + 1,
1221                         numPop,
1222                         bc::PopC {});
1223         break;
1224       }
1225     }
1226   }
1227 }
1228
1229 struct DceOptResult {
1230   std::bitset<kMaxTrackedLocals> usedLocals;
1231   std::bitset<kMaxTrackedClsRefSlots> usedSlots;
1232   DceActionMap actionMap;
1233 };
1234
1235 DceOptResult
1236 optimize_dce(const Index& index,
1237              const FuncAnalysis& fa,
1238              borrowed_ptr<php::Block> const blk,
1239              const State& stateIn,
1240              std::bitset<kMaxTrackedLocals> locLiveOut,
1241              std::bitset<kMaxTrackedLocals> locLiveOutExn,
1242              const folly::Optional<std::vector<UseInfo>>& dceStack) {
1243   auto dceState = dce_visit(
1244     index, fa, blk,
1245     stateIn,
1246     locLiveOut,
1247     locLiveOutExn,
1248     dceStack
1249   );
1250
1251   if (!dceState) {
1252     return {std::bitset<kMaxTrackedLocals>{},
1253             std::bitset<kMaxTrackedClsRefSlots>{}};
1254   }
1255
1256   return {
1257     std::move(dceState->usedLocals),
1258     std::move(dceState->usedSlots),
1259     std::move(dceState->actionMap)
1260   };
1261 }
1262
1263 //////////////////////////////////////////////////////////////////////
1264
1265 void remove_unused_locals(Context const ctx,
1266                           std::bitset<kMaxTrackedLocals> usedLocals) {
1267   if (!options.RemoveUnusedLocals) return;
1268   auto const func = ctx.func;
1269
1270   /*
1271    * Removing unused locals in closures requires checking which ones
1272    * are captured variables so we can remove the relevant properties,
1273    * and then we'd have to mutate the CreateCl callsite, so we don't
1274    * bother for now.
1275    *
1276    * Note: many closure bodies have unused $this local, because of
1277    * some emitter quirk, so this might be worthwhile.
1278    */
1279   if (func->isClosureBody) return;
1280
1281   for (auto loc = func->locals.begin() + func->params.size();
1282        loc != func->locals.end(); ++loc) {
1283     if (loc->killed) {
1284       assert(loc->id < kMaxTrackedLocals && !usedLocals.test(loc->id));
1285       continue;
1286     }
1287     if (loc->id < kMaxTrackedLocals && !usedLocals.test(loc->id)) {
1288       FTRACE(2, "  killing: {}\n", local_string(*func, loc->id));
1289       const_cast<php::Local&>(*loc).killed = true;
1290     }
1291   }
1292 }
1293
1294 //////////////////////////////////////////////////////////////////////
1295
1296 namespace {
1297
1298 struct WritableClsRefSlotVisitor : boost::static_visitor<ClsRefSlotId*> {
1299   template<class T>
1300   typename std::enable_if<
1301     !has_car<T>::value && !has_caw<T>::value, ClsRefSlotId*
1302   >::type
1303   operator()(T& t) const { return nullptr; }
1304
1305   template<class T>
1306   typename std::enable_if<
1307     has_car<T>::value || has_caw<T>::value, ClsRefSlotId*
1308   >::type
1309   operator()(T& t) const { return &t.slot; }
1310 };
1311
1312 }
1313
1314 // Remove totally unused class-ref slots. Shift any usages downward so we can
1315 // reduce the total number of class-ref slots needed.
1316 void remove_unused_clsref_slots(Context const ctx,
1317                                 std::bitset<kMaxTrackedClsRefSlots> usedSlots) {
1318   if (!options.RemoveUnusedClsRefSlots) return;
1319   auto func = ctx.func;
1320
1321   auto numSlots = func->numClsRefSlots;
1322   if (numSlots > kMaxTrackedClsRefSlots) return;
1323
1324   auto unusedSlots = usedSlots;
1325   unusedSlots.flip();
1326
1327   // Construct a mapping of class-ref slots that should be rewritten to a
1328   // different one. For each totally unused class-ref slot, rewrite a higher
1329   // (used) class-ref to it.
1330   boost::container::flat_map<size_t, size_t> rewrites;
1331   while (numSlots > 0) {
1332     if (!unusedSlots[numSlots - 1]) {
1333       auto const unused = bitset_find_first(unusedSlots);
1334       if (unused >= numSlots) break;
1335       unusedSlots[unused] = false;
1336       unusedSlots[numSlots - 1] = true;
1337       rewrites[numSlots - 1] = unused;
1338     }
1339     --numSlots;
1340   }
1341
1342   FTRACE(2, "    cls-ref rewrites: {}\n",
1343     [&]{
1344       using namespace folly::gen;
1345       return from(rewrites)
1346         | mapped(
1347           [&] (const std::pair<size_t, size_t>& p) {
1348             return folly::sformat("{}->{}", p.first, p.second);
1349           })
1350         | unsplit<std::string>(";");
1351     }()
1352   );
1353
1354   // Do the actually rewriting
1355   if (!rewrites.empty()) {
1356     for (auto& block : func->blocks) {
1357       for (auto& bcop : block->hhbcs) {
1358         if (auto* slot = visit(bcop, WritableClsRefSlotVisitor{})) {
1359           auto const iter = rewrites.find(*slot);
1360           if (iter == rewrites.end()) continue;
1361           auto const oldOp = bcop;
1362           *slot = iter->second;
1363           FTRACE(4, "    rewriting {} to {}\n",
1364                  show(func, oldOp), show(func, bcop));
1365         }
1366       }
1367     }
1368   }
1369
1370   func->numClsRefSlots = numSlots;
1371 }
1372
1373 //////////////////////////////////////////////////////////////////////
1374
1375 }
1376
1377 void local_dce(const Index& index,
1378                const FuncAnalysis& ainfo,
1379                borrowed_ptr<php::Block> const blk,
1380                const State& stateIn) {
1381   Trace::Bump bumper{Trace::hhbbc_dce, kSystemLibBump,
1382     is_systemlib_part(*ainfo.ctx.unit)};
1383
1384   // For local DCE, we have to assume all variables are in the
1385   // live-out set for the block.
1386   auto allLocLive = std::bitset<kMaxTrackedLocals>();
1387   allLocLive.set();
1388   auto const ret = optimize_dce(index, ainfo, blk, stateIn,
1389                                 allLocLive, allLocLive, folly::none);
1390
1391   dce_perform(*ainfo.ctx.func, ret.actionMap);
1392 }
1393
1394 //////////////////////////////////////////////////////////////////////
1395
1396 void global_dce(const Index& index, const FuncAnalysis& ai) {
1397   Trace::Bump bumper{Trace::hhbbc_dce, kSystemLibBump,
1398     is_systemlib_part(*ai.ctx.unit)};
1399
1400   auto rpoId = [&] (borrowed_ptr<php::Block> blk) {
1401     return ai.bdata[blk->id].rpoId;
1402   };
1403
1404   FTRACE(1, "|---- global DCE analyze ({})\n", show(ai.ctx));
1405   FTRACE(2, "{}", [&] {
1406     using namespace folly::gen;
1407     auto i = uint32_t{0};
1408     return from(ai.ctx.func->locals)
1409       | mapped(
1410         [&] (const php::Local& l) {
1411           return folly::sformat("  {} {}\n",
1412                                 i++, local_string(*ai.ctx.func, l.id));
1413         })
1414       | unsplit<std::string>("");
1415   }());
1416
1417   /*
1418    * States for each block, indexed by block id.
1419    *
1420    * The liveOut state is the union of liveIn states of each normal
1421    * successor, and liveOutExn is the union of liveIn states of each
1422    * exceptional successor.
1423    *
1424    * The dceStack is the state of the dceStack at the end of the
1425    * block; it contains a UseInfo for each stack slot, which is the
1426    * union of the corresponding UseInfo's from the start of the
1427    * successor blocks (its folly::none if it has no successor blocks,
1428    * or if the successor blocks have not yet been visited).
1429    */
1430   struct BlockState {
1431     std::bitset<kMaxTrackedLocals>        locLiveOut;
1432     std::bitset<kMaxTrackedLocals>        locLiveOutExn;
1433     folly::Optional<std::vector<UseInfo>> dceStack;
1434   };
1435   std::vector<BlockState> blockStates(ai.ctx.func->blocks.size());
1436
1437   /*
1438    * Set of block reverse post order ids that still need to be
1439    * visited.  This is ordered by std::greater, so we'll generally
1440    * visit blocks before their predecessors.  The algorithm does not
1441    * depend on this for correctness, but it will reduce iteration, and
1442    * the optimality of mergeDceStack depends on visiting at least one
1443    * successor prior to visiting any given block.
1444    *
1445    * Every block must be visited at least once, so we throw them all
1446    * in to start.
1447    */
1448   auto incompleteQ = dataflow_worklist<uint32_t,std::less<uint32_t>>(
1449     ai.rpoBlocks.size()
1450   );
1451   for (auto& b : ai.rpoBlocks) incompleteQ.push(rpoId(b));
1452
1453   auto const normalPreds   = computeNormalPreds(ai.rpoBlocks);
1454   auto const factoredPreds = computeFactoredPreds(ai.rpoBlocks);
1455
1456   /*
1457    * Suppose a stack slot isn't used, but it was pushed on two separate
1458    * paths, one of which could be killed, but the other can't. We have
1459    * to prevent either, so any time we find a non-used stack slot that
1460    * can't be killed, we add it to this set (via markUisLive, and the
1461    * DceAnalysis).
1462    */
1463   std::set<StkId> usedStackSlots;
1464
1465   auto mergeDceStack = [&] (folly::Optional<std::vector<UseInfo>>& stkOut,
1466                             const std::vector<UseInfo>& stkIn,
1467                             BlockId blk) {
1468     if (!stkOut) {
1469       stkOut = stkIn;
1470       for (uint32_t i = 0; i < stkIn.size(); i++) {
1471         auto& out = stkOut.value()[i];
1472         if (out.popper.blk == NoBlockId) out.popper = { blk, i };
1473         if (usedStackSlots.count({blk, i})) {
1474           out.usage = Use::Used;
1475           out.actions.clear();
1476         }
1477       }
1478       return true;
1479     }
1480
1481     auto ret = false;
1482     assert(stkOut->size() == stkIn.size());
1483     for (uint32_t i = 0; i < stkIn.size(); i++) {
1484       auto& out = stkOut.value()[i];
1485       if (out.usage == Use::Used) {
1486         continue;
1487       }
1488       if (stkIn[i].usage == Use::Used ||
1489           usedStackSlots.count({blk, i})) {
1490         out.usage = Use::Used;
1491         out.actions.clear();
1492         ret = true;
1493         continue;
1494       }
1495       if (out.usage == stkIn[i].usage) {
1496         for (auto const& id : stkIn[i].actions) {
1497           ret |= out.actions.insert(id).second;
1498         }
1499         auto popper = stkIn[i].popper;
1500         if (popper.blk == NoBlockId) popper = { blk, i };
1501         if (out.popper < stkIn[i].popper) {
1502           ret = true;
1503           out.popper = stkIn[i].popper;
1504         }
1505       } else {
1506         out.usage = Use::Used;
1507         out.actions.clear();
1508         ret = true;
1509       }
1510     }
1511
1512     return ret;
1513   };
1514
1515   /*
1516    * Iterate on live out states until we reach a fixed point.
1517    *
1518    * This algorithm treats the exceptional live-out states differently
1519    * from the live-out states during normal control flow.  The
1520    * liveOutExn sets only take part in the liveIn computation when the
1521    * block has factored exits.
1522    */
1523   while (!incompleteQ.empty()) {
1524     auto const blk = ai.rpoBlocks[incompleteQ.pop()];
1525
1526     // skip unreachable blocks
1527     if (!ai.bdata[blk->id].stateIn.initialized) continue;
1528
1529     FTRACE(2, "block #{}\n", blk->id);
1530
1531     auto const locLiveOut    = blockStates[blk->id].locLiveOut;
1532     auto const locLiveOutExn = blockStates[blk->id].locLiveOutExn;
1533     auto const transfer      = analyze_dce(
1534       index,
1535       ai,
1536       blk,
1537       ai.bdata[blk->id].stateIn,
1538       blockStates[blk->id].dceStack
1539     );
1540
1541     auto const liveIn        = transfer.locGen |
1542                                (locLiveOut & ~transfer.locKill) |
1543                                (locLiveOutExn & ~transfer.locKillExn);
1544
1545     FTRACE(2, "live out : {}\n"
1546               "out exn  : {}\n"
1547               "gen      : {}\n"
1548               "kill     : {}\n"
1549               "kill exn : {}\n"
1550               "live in  : {}\n",
1551               loc_bits_string(ai.ctx.func, locLiveOut),
1552               loc_bits_string(ai.ctx.func, locLiveOutExn),
1553               loc_bits_string(ai.ctx.func, transfer.locGen),
1554               loc_bits_string(ai.ctx.func, transfer.locKill),
1555               loc_bits_string(ai.ctx.func, transfer.locKillExn),
1556               loc_bits_string(ai.ctx.func, liveIn));
1557
1558     /*
1559      * If we weren't able to take advantage of any unused entries
1560      * on the dceStack that originated from another block, mark
1561      * them used to prevent other paths from trying to delete them.
1562      *
1563      * Also, merge the entries into our global usedStackSlots, to
1564      * make sure they always get marked Used.
1565      */
1566     for (auto const& id : transfer.usedStackSlots) {
1567       if (usedStackSlots.insert(id).second) {
1568         for (auto& pred : normalPreds[id.blk]) {
1569           FTRACE(2, "  {} force-use {}:{}\n", id.blk, pred->id, id.idx);
1570           auto& dceStack = blockStates[pred->id].dceStack;
1571           if (!dceStack) continue;
1572           auto& ui = dceStack.value()[id.idx];
1573           if (ui.usage != Use::Used) {
1574             ui.actions.clear();
1575             ui.usage = Use::Used;
1576             // no need to reprocess *this* block, since this is the
1577             // one that reported the problem.
1578             if (pred != blk) {
1579               incompleteQ.push(rpoId(pred));
1580             }
1581           }
1582         }
1583       }
1584     }
1585
1586     // Merge the liveIn into the liveOut of each normal predecessor.
1587     // If the set changes, reschedule that predecessor.
1588     for (auto& pred : normalPreds[blk->id]) {
1589       FTRACE(2, "  -> {}\n", pred->id);
1590       auto& predState = blockStates[pred->id].locLiveOut;
1591       auto const oldPredState = predState;
1592       predState |= liveIn;
1593       if (mergeDceStack(blockStates[pred->id].dceStack,
1594                         transfer.stack, blk->id) ||
1595           predState != oldPredState) {
1596         incompleteQ.push(rpoId(pred));
1597       }
1598     }
1599
1600     // Merge the liveIn into the liveOutExn state for each exceptional
1601     // precessor.  The liveIn computation also depends on the
1602     // liveOutExn state, so again reschedule if it changes.
1603     for (auto& pred : factoredPreds[blk->id]) {
1604       FTRACE(2, "  => {}\n", pred->id);
1605       auto& predState = blockStates[pred->id].locLiveOutExn;
1606       auto const oldPredState = predState;
1607       predState |= liveIn;
1608       if (predState != oldPredState) {
1609         incompleteQ.push(rpoId(pred));
1610       }
1611     }
1612   }
1613
1614   /*
1615    * Now that we're at a fixed point, use the propagated states to
1616    * remove instructions that don't need to be there.
1617    */
1618   FTRACE(1, "|---- global DCE optimize ({})\n", show(ai.ctx));
1619   std::bitset<kMaxTrackedLocals> usedLocals;
1620   std::bitset<kMaxTrackedClsRefSlots> usedSlots;
1621   DceActionMap actionMap;
1622   for (auto& b : ai.rpoBlocks) {
1623     FTRACE(2, "block #{}\n", b->id);
1624     auto ret = optimize_dce(
1625       index,
1626       ai,
1627       b,
1628       ai.bdata[b->id].stateIn,
1629       blockStates[b->id].locLiveOut,
1630       blockStates[b->id].locLiveOutExn,
1631       blockStates[b->id].dceStack
1632     );
1633     usedLocals |= ret.usedLocals;
1634     usedSlots  |= ret.usedSlots;
1635     if (ret.actionMap.size()) {
1636       if (!actionMap.size()) {
1637         actionMap = std::move(ret.actionMap);
1638       } else {
1639         for (auto const& elm : ret.actionMap) {
1640           actionMap.insert(elm);
1641         }
1642       }
1643     }
1644   }
1645
1646   dce_perform(*ai.ctx.func, actionMap);
1647
1648   FTRACE(1, "  used locals: {}\n", loc_bits_string(ai.ctx.func, usedLocals));
1649   remove_unused_locals(ai.ctx, usedLocals);
1650
1651   FTRACE(1, "  used slots: {}\n", slot_bits_string(ai.ctx.func, usedSlots));
1652   remove_unused_clsref_slots(ai.ctx, usedSlots);
1653 }
1654
1655 //////////////////////////////////////////////////////////////////////
1656
1657 }}