hphp/hhbbc/dce.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com)  |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include "hphp/hhbbc/dce.h"
  17
  18 #include <vector>
  19 #include <string>
  20 #include <utility>
  21 #include <bitset>
  22 #include <sstream>
  23 #include <algorithm>
  24 #include <set>
  25
  26 #include <boost/dynamic_bitset.hpp>
  27
  28 #include <folly/gen/Base.h>
  29 #include <folly/gen/String.h>
  30
  31 #include "hphp/util/trace.h"
  32 #include "hphp/util/dataflow-worklist.h"
  33
  34 #include "hphp/hhbbc/representation.h"
  35 #include "hphp/hhbbc/analyze.h"
  36 #include "hphp/hhbbc/interp-state.h"
  37 #include "hphp/hhbbc/interp.h"
  38 #include "hphp/hhbbc/type-system.h"
  39 #include "hphp/hhbbc/unit-util.h"
  40 #include "hphp/hhbbc/cfg.h"
  41
  42 namespace HPHP { namespace HHBBC {
  43
  44 TRACE_SET_MOD(hhbbc_dce);
  45
  46 //////////////////////////////////////////////////////////////////////
  47
  48 /*
  49  * This module contains code to perform both local DCE and global DCE.
  50  *
  51  * The local DCE algorithm addresses dead eval stack manipulations and
  52  * dead stores to locals that are visible within a single basic block.
  53  *
  54  * The global DCE performs a liveness analysis and then uses this
  55  * information to allow dead stores to locals to be eliminated across
  56  * blocks.  It does not attempt to remove unnecessary evaluation stack
  57  * manipulation spanning basic blocks, but it uses the same local DCE
  58  * code and will eliminate intra-block stack manipulations.
  59  *
  60  * Both types of DCE here need to be type-aware, but they must visit
  61  * blocks backward.  They accomplish this by forward-propagating the
  62  * block input states from a FuncAnalysis to each instruction in the
  63  * block prior to doing the backward iteration.
  64  *
  65  * Eval stack:
  66  *
  67  *   During backward traversal of a block, we maintain a "backwards"
  68  *   stack, indicating which eval stack slots are going to be required
  69  *   in the future or not.
  70  *
  71  *   During this traversal, each instruction that pops when going
  72  *   forward instead "pushes" information about whether that input
  73  *   will be required.  If it is not required, it also pushes an
  74  *   accumulating set of instruction ids that must be removed if the
  75  *   instruction which produces the stack slot is removed.  (All
  76  *   instructions in these sets must be removed if any are, in order
  77  *   to keep the stack depths correct.)
  78  *
  79  *   Similarly, each instruction that would push a value going forward
  80  *   instead "pops" the information about whether its stack output is
  81  *   going to be needed.  If not, the instruction can mark itself (and
  82  *   all downstream instructions that depended on it) as removable.
  83  *
  84  * Locals:
  85  *
  86  *   While a block is iterated backward, the set of live locals is
  87  *   tracked.  The initial state of this live set depends on whether
  88  *   we are performing global or local DCE, and in the local case
  89  *   includes all locals in the function.
  90  *
  91  *   When a local may be read, it is added to the live set.  When a
  92  *   local is definitely-written, it is removed from the set.
  93  *
  94  *   If a instruction may write to a local that is not live, it can be
  95  *   marked as removable if it is known to have no other side-effects.
  96  *   Currently this is only hooked up to SetL.
  97  *
  98  * Liveness analysis:
  99  *
 100  *   The global algorithm first performs a liveness analysis to
 101  *   propagate live out sets to each block.
 102  *
 103  *   This analysis is basically normal, but slightly modified from the
 104  *   usual in order to deal with the way exceptional control flow is
 105  *   represented in our CFG (with factored edges).
 106  *
 107  *   It essentially is the liveness analysis algorithm described at
 108  *   http://dl.acm.org/citation.cfm?id=316171, except that we don't
 109  *   need to track kill sets for each PEI because we don't have a
 110  *   means of determining which factored edges may be traversed by any
 111  *   given PEI.  (Maybe that may be more usual to have in the context
 112  *   of a language with declared exception clauses...)
 113  *
 114  *   Since we only deal with the most pessimistic exception case, this
 115  *   means for each block we just determine a gen and kill set, along
 116  *   with a subset of the latter set that is the locals that must be
 117  *   killed before any PEI.  (See killBeforePEI below.)
 118  *
 119  * Final note about types:
 120  *
 121  *   Global DCE can change the types of locals in a way that spans
 122  *   basic blocks.  For a simple example, take the following code:
 123  *
 124  *      // $foo :: Uninit here.
 125  *      $foo = 12;
 126  *      // ... code that breaks a block ...
 127  *      $foo = 100;
 128  *
 129  *   If the first store to $foo is removed, the type of $foo before
 130  *   the SetL in the later block is now Uninit, instead of Int.
 131  *
 132  *   This means that after calling global_dce on a function, the type
 133  *   information in the block input states in the associated
 134  *   FuncAnalysis can no longer be considered accurate.
 135  *
 136  *   Moreover, since global DCE makes use of type information to
 137  *   determine whether a store is dead, we need to be careful that
 138  *   this never changes whether the assumptions used to perform DCE
 139  *   were correct.
 140  *
 141  *   This is ok right now: DCE only uses the types to figure out which
 142  *   values can either have lifetimes extended or shortened without
 143  *   visible side-effects, or which values may be refs (so we can't
 144  *   omit stores to them).  If we omit a store that changes the type
 145  *   of a local globally, this means the new type cannot be different
 146  *   with regard to these features (or we wouldn't have omitted it),
 147  *   which means we won't have made different decisions elsewhere in
 148  *   the algorithm based on the new type.
 149  *
 150  *   Specifically: we will never omit a store where the old local type
 151  *   was something that could've had side effects, and if a new value
 152  *   is stored into a local where destroying it could have
 153  *   side-effects, some point along the path to the function exit (if
 154  *   nothing else, the RetC) will have added it to the gen set and the
 155  *   store also won't be removable.
 156  *
 157  *   In contrast, note that local DCE can not change types across
 158  *   block boundaries.
 159  *
 160  */
 161
 162 namespace {
 163
 164 //////////////////////////////////////////////////////////////////////
 165
 166 // Returns whether decrefing a type could run a destructor.
 167 bool couldRunDestructor(const Type& t) {
 168   // We could check for specialized objects to see if they don't
 169   // declare a user-defined destructor, but currently don't.
 170   return t.couldBe(TObj) || t.couldBe(TCArr) || t.couldBe(TRef);
 171 }
 172
 173 // Returns whether a set on something containing type t could have
 174 // side-effects (running destuctors, or modifying arbitrary things via
 175 // a Ref).
 176 bool setCouldHaveSideEffects(const Type& t) {
 177   return t.couldBe(TObj) || t.couldBe(TCArr) || t.couldBe(TRef);
 178 }
 179
 180 // Some reads could raise warnings and run arbitrary code.
 181 bool readCouldHaveSideEffects(const Type& t) {
 182   return t.couldBe(TUninit);
 183 }
 184
 185 //////////////////////////////////////////////////////////////////////
 186
 187 /*
 188  * Use information of a stack cell.
 189  */
 190 enum class Use {
 191   // Indicates that the cell is (unconditionally) not used.
 192   Not,
 193
 194   // Indicates that the cell is (possibly) used.
 195   Used,
 196
 197   /*
 198    * Indicates that the cell is only used if it was the last reference alive.
 199    * For instance, a PopC will call the destructor of the top-of-stack object
 200    * if it was the last reference alive, and this counts as an example of
 201    * 'UsedIfLastRef'.
 202    *
 203    * If the producer of the cell knows that it is not the last reference, then
 204    * it can treat Use::UsedIfLastRef as being equivalent to Use::Not.
 205    */
 206   UsedIfLastRef,
 207 };
 208
 209 using InstrId    = size_t;
 210 using InstrIdSet = std::set<InstrId>;
 211 using UseInfo    = std::pair<Use,InstrIdSet>;
 212
 213 //////////////////////////////////////////////////////////////////////
 214
 215 struct DceState {
 216   borrowed_ptr<const php::Func> func;
 217
 218   /*
 219    * Eval stack use information.  Stacks slots are marked as being
 220    * needed or not needed.  If they aren't needed, they carry a set of
 221    * instructions that must be removed if the instruction that
 222    * produces the stack value is also removable.
 223    */
 224   std::vector<UseInfo> stack;
 225
 226   /*
 227    * Locals known to be live at a point in a DCE walk.  This is used
 228    * when we're actually acting on information we discovered during
 229    * liveness analysis.
 230    */
 231   std::bitset<kMaxTrackedLocals> liveLocals;
 232
 233   /*
 234    * These variable sets are used to compute the transfer function for
 235    * the global liveness analysis in global_dce.
 236    *
 237    * The gen set accumulates the set of variables in the block with
 238    * upward-exposed uses.  The kill set is the set of variables the
 239    * block will re-define, ignoring exceptional control flow.
 240    *
 241    * The killBeforePEI set is the set of variables killed before a
 242    * PEI.  Propagation of liveness needs to use this (always more
 243    * conservative) set instead of kill when crossing a factored exit
 244    * edge.
 245    */
 246   std::bitset<kMaxTrackedLocals> gen;
 247   std::bitset<kMaxTrackedLocals> kill;
 248   std::bitset<kMaxTrackedLocals> killBeforePEI;
 249
 250   /*
 251    * Instructions marked in this set are dead.  If any of them are
 252    * removed, however, they must all be removed, because of the need
 253    * to keep eval stack consumers and producers balanced.
 254    */
 255   boost::dynamic_bitset<> markedDead{};
 256
 257   /*
 258    * The set of locals that were ever live in this block.  (This
 259    * includes locals that were live going out of this block.)  This
 260    * set is used by global DCE to remove locals that are completely
 261    * unused in the entire function.
 262    */
 263   std::bitset<kMaxTrackedLocals> usedLocals;
 264 };
 265
 266 //////////////////////////////////////////////////////////////////////
 267
 268 const char* show(Use u) {
 269   switch (u) {
 270   case Use::Not:              return "0";
 271   case Use::Used:             return "U";
 272   case Use::UsedIfLastRef:    return "UL";
 273   }
 274   not_reached();
 275 }
 276
 277 std::string show(const InstrIdSet& set) {
 278   using namespace folly::gen;
 279   return from(set)
 280     | eachTo<std::string>()
 281     | unsplit<std::string>(";")
 282     ;
 283 }
 284
 285 std::string DEBUG_ONLY show(const UseInfo& ui) {
 286   return folly::format("{}@{}", show(ui.first), show(ui.second)).str();
 287 }
 288
 289 std::string DEBUG_ONLY bits_string(borrowed_ptr<const php::Func> func,
 290                                    std::bitset<kMaxTrackedLocals> locs) {
 291   std::ostringstream out;
 292   if (func->locals.size() < kMaxTrackedLocals) {
 293     for (auto i = func->locals.size(); i-- > 0;) {
 294       out << (locs.test(i) ? '1' : '0');
 295     }
 296   } else {
 297     out << locs;
 298   }
 299   return out.str();
 300 }
 301
 302 //////////////////////////////////////////////////////////////////////
 303
 304 struct Env {
 305   DceState& dceState;
 306   InstrId id;
 307   const State& stateBefore;
 308   const StepFlags& flags;
 309 };
 310
 311 void markSetDead(Env& env, const InstrIdSet& set) {
 312   env.dceState.markedDead[env.id] = 1;
 313   FTRACE(2, "     marking {} {}\n", env.id, show(set));
 314   for (auto& i : set) env.dceState.markedDead[i] = 1;
 315 }
 316
 317 void markDead(Env& env) {
 318   env.dceState.markedDead[env.id] = 1;
 319   FTRACE(2, "     marking {}\n", env.id);
 320 }
 321
 322 //////////////////////////////////////////////////////////////////////
 323 // eval stack
 324
 325 void pop(Env& env, Use u, InstrIdSet set) {
 326   FTRACE(2, "      pop({})\n", show(u));
 327   env.dceState.stack.emplace_back(u, std::move(set));
 328 }
 329 void pop(Env& env) { pop(env, Use::Used, InstrIdSet{}); }
 330
 331 Type topT(Env& env, uint32_t idx = 0) {
 332   assert(idx < env.stateBefore.stack.size());
 333   return env.stateBefore.stack[env.stateBefore.stack.size() - idx - 1].type;
 334 }
 335
 336 Type topC(Env& env, uint32_t idx = 0) {
 337   auto const t = topT(env, idx);
 338   assert(t.subtypeOf(TInitCell));
 339   return t;
 340 }
 341
 342 void discard(Env& env) {
 343   pop(env, Use::Not, InstrIdSet{env.id});
 344 }
 345
 346 bool allUnused() { return true; }
 347 template<class... Args>
 348 bool allUnused(const UseInfo& ui, Args&&... args) {
 349   return ui.first == Use::Not &&
 350     allUnused(std::forward<Args>(args)...);
 351 }
 352
 353 void combineSets(InstrIdSet&) {}
 354 template<class... Args>
 355 void combineSets(InstrIdSet& accum, const UseInfo& ui, Args&&... args) {
 356   accum.insert(begin(ui.second), end(ui.second));
 357   combineSets(accum, std::forward<Args>(args)...);
 358 }
 359
 360 // If all the supplied UseInfos represent unused stack slots, make a
 361 // pop that is considered unused.  Otherwise pop as a Use::Used.
 362 template<class... Args>
 363 void popCond(Env& env, Args&&... args) {
 364   bool unused = allUnused(std::forward<Args>(args)...);
 365   if (!unused) return pop(env, Use::Used, InstrIdSet{});
 366   auto accum = InstrIdSet{env.id};
 367   combineSets(accum, std::forward<Args>(args)...);
 368   pop(env, Use::Not, accum);
 369 }
 370
 371 /*
 372  * It may be ok to remove pops on objects with destructors in some scenarios
 373  * (where it won't change the observable point at which a destructor runs).  We
 374  * could also look at the object type and see if it is known that it can't have
 375  * a user-defined destructor.
 376  *
 377  * For now, we mark the cell popped with a Use::UsedIfLastRef. This indicates
 378  * to the producer of the cell that the it is considered used if it could be
 379  * the last reference alive (in which case the destructor would be run on
 380  * Pop). If the producer knows that the cell is not the last reference (e.g. if
 381  * it is a Dup), then Use:UsedIfLastRef is equivalent to Use::Not.
 382  */
 383 void discardNonDtors(Env& env) {
 384   auto const t = topC(env);
 385   if (couldRunDestructor(t)) {
 386     return pop(env, Use::UsedIfLastRef, InstrIdSet{env.id});
 387   }
 388   discard(env);
 389 }
 390
 391 UseInfo push(Env& env) {
 392   always_assert(!env.dceState.stack.empty());
 393   auto ret = env.dceState.stack.back();
 394   env.dceState.stack.pop_back();
 395   FTRACE(2, "      {}@{} = push()\n", show(ret.first), show(ret.second));
 396   return ret;
 397 }
 398
 399 void pushRemovable(Env& env) {
 400   auto const ui = push(env);
 401   switch (ui.first) {
 402   case Use::Not:
 403     markSetDead(env, ui.second);
 404     break;
 405   case Use::Used:
 406   case Use::UsedIfLastRef:
 407     break;
 408   }
 409 }
 410
 411 //////////////////////////////////////////////////////////////////////
 412 // locals
 413
 414 void addGenSet(Env& env, std::bitset<kMaxTrackedLocals> locs) {
 415   FTRACE(4, "      conservative: {}\n", bits_string(env.dceState.func, locs));
 416   env.dceState.liveLocals |= locs;
 417   env.dceState.gen |= locs;
 418   env.dceState.kill &= ~locs;
 419   env.dceState.killBeforePEI &= ~locs;
 420 }
 421
 422 void addGen(Env& env, uint32_t id) {
 423   FTRACE(2, "      gen: {}\n", id);
 424   if (id >= kMaxTrackedLocals) return;
 425   env.dceState.liveLocals[id] = 1;
 426   env.dceState.gen[id] = 1;
 427   env.dceState.kill[id] = 0;
 428   env.dceState.killBeforePEI[id] = 0;
 429 }
 430
 431 void addKill(Env& env, uint32_t id) {
 432   FTRACE(2, "     kill: {}\n", id);
 433   if (id >= kMaxTrackedLocals) return;
 434   env.dceState.liveLocals[id] = 0;
 435   env.dceState.gen[id] = 0;
 436   env.dceState.kill[id] = 1;
 437   env.dceState.killBeforePEI[id] = 1;
 438 }
 439
 440 bool isLive(Env& env, uint32_t id) {
 441   if (id >= kMaxTrackedLocals) {
 442     // Conservatively assume it's potentially live.
 443     return true;
 444   }
 445   return env.dceState.liveLocals[id];
 446 }
 447
 448 Type locRaw(Env& env, LocalId loc) {
 449   return env.stateBefore.locals[loc];
 450 }
 451
 452 void readDtorLocs(Env& env) {
 453   for (auto i = size_t{0}; i < env.stateBefore.locals.size(); ++i) {
 454     if (couldRunDestructor(env.stateBefore.locals[i])) {
 455       addGen(env, i);
 456     }
 457   }
 458 }
 459
 460 //////////////////////////////////////////////////////////////////////
 461
 462 /*
 463  * Note that the instructions with popConds are relying on the consumer of the
 464  * values they push to check whether lifetime changes can have side-effects.
 465  *
 466  * For example, in bytecode like this, assuming $x is an object with a
 467  * destructor:
 468  *
 469  *   CGetL $x
 470  *   UnsetL $x
 471  *   // ...
 472  *   PopC $x // dtor should be here.
 473  *
 474  * The PopC will decide it can't be eliminated, which prevents us from
 475  * eliminating the CGetL.
 476  */
 477
 478 void dce(Env& env, const bc::PopC&)       { discardNonDtors(env); }
 479 // For PopV and PopR currently we never know if can't run a
 480 // destructor.
 481 void dce(Env& env, const bc::PopA&)       { discard(env); }
 482 void dce(Env& env, const bc::PopU&)       { discard(env); }
 483 void dce(Env& env, const bc::Int&)        { pushRemovable(env); }
 484 void dce(Env& env, const bc::String&)     { pushRemovable(env); }
 485 void dce(Env& env, const bc::Array&)      { pushRemovable(env); }
 486 void dce(Env& env, const bc::Dict&)       { pushRemovable(env); }
 487 void dce(Env& env, const bc::Vec&)        { pushRemovable(env); }
 488 void dce(Env& env, const bc::Keyset&)     { pushRemovable(env); }
 489 void dce(Env& env, const bc::Double&)     { pushRemovable(env); }
 490 void dce(Env& env, const bc::True&)       { pushRemovable(env); }
 491 void dce(Env& env, const bc::False&)      { pushRemovable(env); }
 492 void dce(Env& env, const bc::Null&)       { pushRemovable(env); }
 493 void dce(Env& env, const bc::NullUninit&) { pushRemovable(env); }
 494 void dce(Env& env, const bc::File&)       { pushRemovable(env); }
 495 void dce(Env& env, const bc::Dir&)        { pushRemovable(env); }
 496 void dce(Env& env, const bc::NameA&)      { popCond(env, push(env)); }
 497 void dce(Env& env, const bc::NewArray&)   { pushRemovable(env); }
 498 void dce(Env& env, const bc::NewCol&)     { pushRemovable(env); }
 499 void dce(Env& env, const bc::AGetC&)      { popCond(env, push(env)); }
 500
 501 void dce(Env& env, const bc::Dup&) {
 502   auto const u1 = push(env);
 503   auto const u2 = push(env);
 504   // Dup pushes a cell that is guaranteed to be not the last reference.
 505   // So, it can be eliminated if the cell it pushes is used as either
 506   // Use::Not or Use::UsedIfLastRef.
 507   // The cell it pops can be marked Use::Not only if Dup itself
 508   // can be eliminated.
 509   switch (u1.first) {
 510   case Use::Not:
 511   case Use::UsedIfLastRef:
 512     // It is ok to eliminate the Dup even if its second output u2
 513     // is used, because eliminating the Dup still leaves the second
 514     // output u2 on stack.
 515     markSetDead(env, u1.second);
 516     switch (u2.first) {
 517     case Use::Not:
 518       pop(env, Use::Not, u2.second);
 519       break;
 520     case Use::Used:
 521     case Use::UsedIfLastRef:
 522       pop(env, Use::Used, InstrIdSet{});
 523       break;
 524     }
 525     break;
 526   case Use::Used:
 527     pop(env, Use::Used, InstrIdSet{});
 528     break;
 529   }
 530 }
 531
 532 void dce(Env& env, const bc::CGetL& op) {
 533   auto const ty = locRaw(env, op.loc1);
 534   addGen(env, op.loc1);
 535   if (readCouldHaveSideEffects(ty)) {
 536     push(env);
 537   } else {
 538     pushRemovable(env);
 539   }
 540 }
 541
 542 void dce(Env& env, const bc::CGetL2& op) {
 543   auto const ty = locRaw(env, op.loc1);
 544   addGen(env, op.loc1);
 545   auto const u1 = push(env);
 546   auto const u2 = push(env);
 547   if (readCouldHaveSideEffects(ty)) {
 548     pop(env);
 549   } else {
 550     popCond(env, u1, u2);
 551   }
 552 }
 553
 554 void dce(Env& env, const bc::CGetL3& op) {
 555   auto const ty = locRaw(env, op.loc1);
 556   addGen(env, op.loc1);
 557   auto const u1 = push(env);
 558   auto const u2 = push(env);
 559   auto const u3 = push(env);
 560   if (readCouldHaveSideEffects(ty)) {
 561     pop(env);
 562     pop(env);
 563   } else {
 564     popCond(env, u1, u2, u3);
 565     popCond(env, u1, u2, u3);
 566   }
 567 }
 568
 569 void dce(Env& env, const bc::RetC&)  { pop(env); readDtorLocs(env); }
 570 void dce(Env& env, const bc::Throw&) { pop(env); readDtorLocs(env); }
 571 void dce(Env& env, const bc::Fatal&) { pop(env); readDtorLocs(env); }
 572 void dce(Env& env, const bc::Exit&)  { push(env); pop(env); readDtorLocs(env); }
 573
 574 void dce(Env& env, const bc::SetL& op) {
 575   auto const oldTy   = locRaw(env, op.loc1);
 576   auto const effects = setCouldHaveSideEffects(oldTy);
 577   if (!isLive(env, op.loc1) && !effects) return markDead(env);
 578   push(env);
 579   pop(env);
 580   if (effects) {
 581     addGen(env, op.loc1);
 582   } else {
 583     addKill(env, op.loc1);
 584   }
 585 }
 586
 587 void dce(Env& env, const bc::UnsetL& op) {
 588   auto const oldTy   = locRaw(env, op.loc1);
 589   auto const effects = setCouldHaveSideEffects(oldTy);
 590   if (oldTy.subtypeOf(TUninit)) return markDead(env);
 591   if (!isLive(env, op.loc1) && !effects) return markDead(env);
 592   if (effects) {
 593     addGen(env, op.loc1);
 594   } else {
 595     addKill(env, op.loc1);
 596   }
 597 }
 598
 599 /*
 600  * IncDecL is a read-modify-write: can be removed if the local isn't live, the
 601  * set can't have side effects, and no one reads the value it pushes.  If the
 602  * instruction is not dead, always add the local to the set of upward exposed
 603  * uses.
 604  */
 605 void dce(Env& env, const bc::IncDecL& op) {
 606   auto const oldTy   = locRaw(env, op.loc1);
 607   auto const effects = setCouldHaveSideEffects(oldTy) ||
 608                          readCouldHaveSideEffects(oldTy);
 609   auto const u1      = push(env);
 610   if (!isLive(env, op.loc1) && !effects && allUnused(u1)) {
 611     return markSetDead(env, u1.second);
 612   }
 613   addGen(env, op.loc1);
 614 }
 615
 616 /*
 617  * SetOpL is like IncDecL, but with the complication that we don't know if we
 618  * can mark it dead when visiting it, because it is going to pop an input but
 619  * unlike SetL doesn't push the value it popped.  For the current scheme we
 620  * just add the local to gen even if we're doing a removable push, which is
 621  * correct but could definitely fail to eliminate some earlier stores.
 622  */
 623 void dce(Env& env, const bc::SetOpL& op) {
 624   auto const oldTy   = locRaw(env, op.loc1);
 625   auto const effects = setCouldHaveSideEffects(oldTy) ||
 626                          readCouldHaveSideEffects(oldTy);
 627   if (!isLive(env, op.loc1) && !effects) {
 628     popCond(env, push(env));
 629   } else {
 630     push(env);
 631     pop(env);
 632   }
 633   addGen(env, op.loc1);
 634 }
 635
 636 /*
 637  * Default implementation is conservative: assume we use all of our
 638  * inputs, and can't be removed even if our output is unused.
 639  *
 640  * We also assume all the locals in the mayReadLocalSet must be
 641  * added to the live local set, and don't remove anything from it.
 642  */
 643 template<class Op>
 644 void dce(Env& env, const Op& op) {
 645   addGenSet(env, env.flags.mayReadLocalSet);
 646   env.dceState.liveLocals |= env.flags.mayReadLocalSet;
 647   for (auto i = uint32_t{0}; i < op.numPush(); ++i) {
 648     push(env);
 649   }
 650   for (auto i = uint32_t{0}; i < op.numPop(); ++i) {
 651     pop(env, Use::Used, InstrIdSet{});
 652   }
 653 }
 654
 655 void dispatch_dce(Env& env, const Bytecode& op) {
 656 #define O(opcode, ...) case Op::opcode: dce(env, op.opcode); return;
 657   switch (op.op) { OPCODES }
 658 #undef O
 659   not_reached();
 660 }
 661
 662 //////////////////////////////////////////////////////////////////////
 663
 664 folly::Optional<DceState>
 665 dce_visit(const Index& index,
 666           Context const ctx,
 667           borrowed_ptr<const php::Block> const blk,
 668           const State& stateIn,
 669           std::bitset<kMaxTrackedLocals> liveOut,
 670           std::bitset<kMaxTrackedLocals> liveOutExn) {
 671   if (!stateIn.initialized) {
 672     /*
 673      * Skip unreachable blocks.
 674      *
 675      * For DCE analysis it is ok to assume the transfer function is
 676      * the identity on unreachable blocks (i.e. gen and kill sets are
 677      * empty).  For optimize, we don't need to bother doing anything
 678      * to these---another pass is responsible for removing completely
 679      * unreachable blocks.
 680      */
 681     return folly::none;
 682   }
 683
 684   auto const states = locally_propagated_states(index, ctx, blk, stateIn);
 685
 686   auto dceState = DceState{};
 687   dceState.func = ctx.func;
 688   dceState.markedDead.resize(blk->hhbcs.size());
 689   dceState.liveLocals = liveOut;
 690   dceState.stack.resize(states.back().first.stack.size());
 691   dceState.usedLocals = liveOut;
 692   for (auto& s : dceState.stack) {
 693     s = UseInfo { Use::Used, InstrIdSet{} };
 694   }
 695
 696   for (auto idx = blk->hhbcs.size(); idx-- > 0;) {
 697     auto const& op = blk->hhbcs[idx];
 698
 699     FTRACE(2, "  == #{} {}\n", idx, show(ctx.func, op));
 700
 701     auto visit_env = Env {
 702       dceState,
 703       idx,
 704       states[idx].first,
 705       states[idx].second
 706     };
 707     dispatch_dce(visit_env, op);
 708
 709     /*
 710      * When we see a PEI, we need to start over on the killBeforePEI
 711      * set, and the local-liveness must take into account the fact
 712      * that we could take an exception edge here (or'ing in the
 713      * liveOutExn set).
 714      */
 715     if (states[idx].second.wasPEI) {
 716       FTRACE(2, "    <-- exceptions\n");
 717       dceState.liveLocals |= liveOutExn;
 718       dceState.killBeforePEI.reset();
 719     }
 720
 721     dceState.usedLocals |= dceState.liveLocals;
 722
 723     FTRACE(4, "    dce stack: {}\n",
 724       [&] {
 725         using namespace folly::gen;
 726         return from(dceState.stack)
 727           | map([&] (const UseInfo& ui) { return show(ui); })
 728           | unsplit<std::string>(" ");
 729       }()
 730     );
 731     FTRACE(4, "    interp stack: {}\n",
 732       [&] {
 733         using namespace folly::gen;
 734         return from(states[idx].first.stack)
 735           | map([&] (const StackElem& e) { return show(e.type); })
 736           | unsplit<std::string>(" ");
 737       }()
 738     );
 739
 740     // We're now at the state before this instruction, so the stack
 741     // sizes must line up.
 742     assert(dceState.stack.size() == states[idx].first.stack.size());
 743   }
 744
 745   return dceState;
 746 }
 747
 748 struct DceAnalysis {
 749   std::bitset<kMaxTrackedLocals> gen;
 750   std::bitset<kMaxTrackedLocals> kill;
 751   std::bitset<kMaxTrackedLocals> killExn;
 752 };
 753
 754 DceAnalysis analyze_dce(const Index& index,
 755                         Context const ctx,
 756                         borrowed_ptr<php::Block> const blk,
 757                         const State& stateIn) {
 758   // During this analysis pass, we have to assume everything could be
 759   // live out, so we set allLive here.  (Later we'll determine the
 760   // real liveOut sets.)
 761   auto allLive = std::bitset<kMaxTrackedLocals>{};
 762   allLive.set();
 763   if (auto dceState = dce_visit(index, ctx, blk, stateIn, allLive, allLive)) {
 764     return DceAnalysis {
 765       dceState->gen,
 766       dceState->kill,
 767       dceState->killBeforePEI
 768     };
 769   }
 770   return DceAnalysis {};
 771 }
 772
 773 std::bitset<kMaxTrackedLocals>
 774 optimize_dce(const Index& index,
 775              Context const ctx,
 776              borrowed_ptr<php::Block> const blk,
 777              const State& stateIn,
 778              std::bitset<kMaxTrackedLocals> liveOut,
 779              std::bitset<kMaxTrackedLocals> liveOutExn) {
 780   auto const dceState = dce_visit(index, ctx, blk,
 781     stateIn, liveOut, liveOutExn);
 782   if (!dceState) return std::bitset<kMaxTrackedLocals>{};
 783
 784   // Remove all instructions that were marked dead, and replace
 785   // instructions that can be replaced with pops but aren't dead.
 786   for (auto idx = blk->hhbcs.size(); idx-- > 0;) {
 787     if (!dceState->markedDead.test(idx)) continue;
 788     blk->hhbcs.erase(begin(blk->hhbcs) + idx);
 789   }
 790
 791   // Blocks must be non-empty.  Make sure we don't change that.
 792   if (blk->hhbcs.empty()) {
 793     blk->hhbcs.push_back(bc::Nop {});
 794   }
 795
 796   return dceState->usedLocals;
 797 }
 798
 799 //////////////////////////////////////////////////////////////////////
 800
 801 void remove_unused_locals(Context const ctx,
 802                           std::bitset<kMaxTrackedLocals> usedLocals) {
 803   if (!options.RemoveUnusedLocals) return;
 804   auto const func = ctx.func;
 805
 806   /*
 807    * Removing unused locals in closures requires checking which ones
 808    * are captured variables so we can remove the relevant properties,
 809    * and then we'd have to mutate the CreateCl callsite, so we don't
 810    * bother for now.
 811    *
 812    * Note: many closure bodies have unused $this local, because of
 813    * some emitter quirk, so this might be worthwhile.
 814    */
 815   if (func->isClosureBody) return;
 816
 817   for (auto loc = func->locals.begin() + func->params.size();
 818        loc != func->locals.end(); ++loc) {
 819     if (loc->killed) {
 820       assert(loc->id < kMaxTrackedLocals && !usedLocals.test(loc->id));
 821       continue;
 822     }
 823     if (loc->id < kMaxTrackedLocals && !usedLocals.test(loc->id)) {
 824       FTRACE(2, "  killing: {}\n", local_string(*func, loc->id));
 825       const_cast<php::Local&>(*loc).killed = true;
 826     }
 827   }
 828 }
 829
 830 //////////////////////////////////////////////////////////////////////
 831
 832 }
 833
 834 void local_dce(const Index& index,
 835                const FuncAnalysis& ainfo,
 836                borrowed_ptr<php::Block> const blk,
 837                const State& stateIn) {
 838   Trace::Bump bumper{Trace::hhbbc_dce, kSystemLibBump,
 839     is_systemlib_part(*ainfo.ctx.unit)};
 840
 841   // For local DCE, we have to assume all variables are in the
 842   // live-out set for the block.
 843   auto allLive = std::bitset<kMaxTrackedLocals>();
 844   allLive.set();
 845   optimize_dce(index, ainfo.ctx, blk, stateIn, allLive, allLive);
 846 }
 847
 848 //////////////////////////////////////////////////////////////////////
 849
 850 void global_dce(const Index& index, const FuncAnalysis& ai) {
 851   Trace::Bump bumper{Trace::hhbbc_dce, kSystemLibBump,
 852     is_systemlib_part(*ai.ctx.unit)};
 853
 854   auto rpoId = [&] (borrowed_ptr<php::Block> blk) {
 855     return ai.bdata[blk->id].rpoId;
 856   };
 857
 858   FTRACE(1, "|---- global DCE analyze ({})\n", show(ai.ctx));
 859   FTRACE(2, "{}", [&] {
 860     using namespace folly::gen;
 861     auto i = uint32_t{0};
 862     return from(ai.ctx.func->locals)
 863       | mapped(
 864         [&] (const php::Local& l) {
 865           return folly::sformat("  {} {}\n",
 866                                 i++, local_string(*ai.ctx.func, l.id));
 867         })
 868       | unsplit<std::string>("");
 869   }());
 870
 871   /*
 872    * Create a DceAnalysis for each block, indexed by rpo id.
 873    *
 874    * Here we want to pre-compute the transfer function for each block,
 875    * so we don't need to visit each instruction repeatedly during the
 876    * fixed point computation.  The transfer function is a function of
 877    * the set of locals read and killed in the block, and does not
 878    * depend on the final live out state, so we can compute it here.
 879    */
 880   auto blockAnalysis = std::vector<DceAnalysis>{};
 881   for (auto& b : ai.rpoBlocks) {
 882     FTRACE(2, "block #{}\n", b->id);
 883     auto const dinfo = analyze_dce(
 884       index,
 885       ai.ctx,
 886       b,
 887       ai.bdata[b->id].stateIn
 888     );
 889     blockAnalysis.push_back(dinfo);
 890   }
 891
 892   /*
 893    * States for each block, indexed by RPO id.
 894    *
 895    * The liveOut state is the union of liveIn states of each normal
 896    * successor, and liveOutExn is the union of liveIn states of each
 897    * exceptional successor.
 898    */
 899   struct BlockState {
 900     std::bitset<kMaxTrackedLocals> liveOut;
 901     std::bitset<kMaxTrackedLocals> liveOutExn;
 902   };
 903   std::vector<BlockState> blockStates(ai.rpoBlocks.size());
 904
 905   /*
 906    * Set of block reverse post order ids that still need to be
 907    * visited.  This is ordered by std::greater, so we'll generally
 908    * visit blocks before their predecessors.  (The algorithm doesn't
 909    * need this for correctness, but it might make it iterate less.)
 910    *
 911    * Every block must be visited at least once, so we throw them all
 912    * in to start.
 913    */
 914   auto incompleteQ = dataflow_worklist<uint32_t,std::less<uint32_t>>(
 915     ai.rpoBlocks.size()
 916   );
 917   for (auto& b : ai.rpoBlocks) incompleteQ.push(rpoId(b));
 918
 919   auto const normalPreds   = computeNormalPreds(ai.rpoBlocks);
 920   auto const factoredPreds = computeFactoredPreds(ai.rpoBlocks);
 921
 922   /*
 923    * Iterate on live out states until we reach a fixed point.
 924    *
 925    * This algorithm treats the exceptional live-out states differently
 926    * from the live-out states during normal control flow.  The
 927    * liveOutExn sets only take part in the liveIn computation when the
 928    * block has factored exits.
 929    */
 930   while (!incompleteQ.empty()) {
 931     auto const blk = ai.rpoBlocks[incompleteQ.pop()];
 932
 933     FTRACE(2, "block #{}\n", blk->id);
 934
 935     auto const liveOut    = blockStates[rpoId(blk)].liveOut;
 936     auto const liveOutExn = blockStates[rpoId(blk)].liveOutExn;
 937     auto const transfer   = blockAnalysis[rpoId(blk)];
 938     auto const liveIn     = transfer.gen | (liveOut & ~transfer.kill)
 939                                          | (liveOutExn & ~transfer.killExn);
 940
 941     FTRACE(2, "live out : {}\n"
 942               "out exn  : {}\n"
 943               "gen      : {}\n"
 944               "kill     : {}\n"
 945               "kill exn : {}\n"
 946               "live in  : {}\n",
 947               bits_string(ai.ctx.func, liveOut),
 948               bits_string(ai.ctx.func, liveOutExn),
 949               bits_string(ai.ctx.func, transfer.gen),
 950               bits_string(ai.ctx.func, transfer.kill),
 951               bits_string(ai.ctx.func, transfer.killExn),
 952               bits_string(ai.ctx.func, liveIn));
 953
 954     // Merge the liveIn into the liveOut of each normal predecessor.
 955     // If the set changes, reschedule that predecessor.
 956     for (auto& pred : normalPreds[blk->id]) {
 957       FTRACE(2, "  -> {}\n", pred->id);
 958       auto& predState = blockStates[rpoId(pred)].liveOut;
 959       auto const oldPredState = predState;
 960       predState |= liveIn;
 961       if (predState != oldPredState) {
 962         incompleteQ.push(rpoId(pred));
 963       }
 964     }
 965
 966     // Merge the liveIn into the liveOutExn state for each exceptional
 967     // precessor.  The liveIn computation also depends on the
 968     // liveOutExn state, so again reschedule if it changes.
 969     for (auto& pred : factoredPreds[blk->id]) {
 970       FTRACE(2, "  => {}\n", pred->id);
 971       auto& predState = blockStates[rpoId(pred)].liveOutExn;
 972       auto const oldPredState = predState;
 973       predState |= liveIn;
 974       if (predState != oldPredState) {
 975         incompleteQ.push(rpoId(pred));
 976       }
 977     }
 978   }
 979
 980   /*
 981    * Now that we're at a fixed point, use the propagated states to
 982    * remove instructions that don't need to be there.
 983    */
 984   FTRACE(1, "|---- global DCE optimize ({})\n", show(ai.ctx));
 985   std::bitset<kMaxTrackedLocals> usedLocals;
 986   for (auto& b : ai.rpoBlocks) {
 987     FTRACE(2, "block #{}\n", b->id);
 988     usedLocals |= optimize_dce(
 989       index,
 990       ai.ctx,
 991       b,
 992       ai.bdata[b->id].stateIn,
 993       blockStates[rpoId(b)].liveOut,
 994       blockStates[rpoId(b)].liveOutExn
 995     );
 996   }
 997
 998   FTRACE(1, "  used locals: {}\n", bits_string(ai.ctx.func, usedLocals));
 999   remove_unused_locals(ai.ctx, usedLocals);
1000 }
1001
1002 //////////////////////////////////////////////////////////////////////
1003
1004 void remove_unreachable_blocks(const Index& index, const FuncAnalysis& ainfo) {
1005   auto reachable = [&](BlockId id) {
1006     return ainfo.bdata[id].stateIn.initialized;
1007   };
1008
1009   for (auto& blk : ainfo.rpoBlocks) {
1010     if (reachable(blk->id)) continue;
1011     auto const srcLoc = blk->hhbcs.front().srcLoc;
1012     blk->hhbcs = {
1013       bc_with_loc(srcLoc, bc::String { s_unreachable.get() }),
1014       bc_with_loc(srcLoc, bc::Fatal { FatalOp::Runtime })
1015     };
1016     blk->fallthrough = NoBlockId;
1017   }
1018
1019   if (!options.RemoveDeadBlocks) return;
1020
1021   for (auto& blk : ainfo.rpoBlocks) {
1022     auto reachableTargets = false;
1023     forEachTakenEdge(blk->hhbcs.back(), [&] (BlockId id) {
1024         if (reachable(id)) reachableTargets = true;
1025       });
1026     if (reachableTargets) continue;
1027     switch (blk->hhbcs.back().op) {
1028     case Op::JmpNZ:
1029     case Op::JmpZ:
1030       blk->hhbcs.back() = bc_with_loc(blk->hhbcs.back().srcLoc, bc::PopC {});
1031       break;
1032     default:
1033       break;
1034     }
1035   }
1036 }
1037
1038 bool merge_blocks(const FuncAnalysis& ainfo) {
1039   auto& func = *ainfo.ctx.func;
1040   FTRACE(2, "merge_blocks: {}\n", func.name);
1041
1042   boost::dynamic_bitset<> hasPred(func.blocks.size());
1043   boost::dynamic_bitset<> multiplePreds(func.blocks.size());
1044   boost::dynamic_bitset<> multipleSuccs(func.blocks.size());
1045   boost::dynamic_bitset<> removed(func.blocks.size());
1046   auto reachable = [&](php::Block& b) {
1047     auto const& state = ainfo.bdata[b.id].stateIn;
1048     return state.initialized && !state.unreachable;
1049   };
1050   // find all the blocks with multiple preds; they can't be merged
1051   // into their predecessors
1052   for (auto& blk : func.blocks) {
1053     if (blk->id == NoBlockId) continue;
1054     int numSucc = 0;
1055     if (!reachable(*blk)) multiplePreds[blk->id] = true;
1056     forEachSuccessor(*blk, [&](BlockId succId) {
1057         if (hasPred[succId]) {
1058           multiplePreds[succId] = true;
1059         } else {
1060           hasPred[succId] = true;
1061         }
1062         numSucc++;
1063       });
1064     if (numSucc > 1) multipleSuccs[blk->id] = true;
1065   }
1066   multiplePreds[func.mainEntry] = true;
1067   for (auto& blkId: func.dvEntries) {
1068     if (blkId != NoBlockId) {
1069       multiplePreds[blkId] = true;
1070     }
1071   }
1072
1073   bool removedAny = false;
1074   for (auto& blk : func.blocks) {
1075     if (blk->id == NoBlockId) continue;
1076     while (blk->fallthrough != NoBlockId) {
1077       auto nxt = borrow(func.blocks[blk->fallthrough]);
1078       if (multipleSuccs[blk->id] ||
1079           multiplePreds[nxt->id] ||
1080           blk->exnNode != nxt->exnNode ||
1081           blk->section != nxt->section) {
1082         break;
1083       }
1084
1085       FTRACE(1, "merging: {} into {}\n", (void*)nxt, (void*)blk.get());
1086       multipleSuccs[blk->id] = multipleSuccs[nxt->id];
1087       blk->fallthrough = nxt->fallthrough;
1088       blk->fallthroughNS = nxt->fallthroughNS;
1089       if (nxt->factoredExits.size()) {
1090         if (blk->factoredExits.size()) {
1091           std::set<BlockId> exitSet;
1092           std::copy(begin(blk->factoredExits), end(blk->factoredExits),
1093                     std::inserter(exitSet, begin(exitSet)));
1094           std::copy(nxt->factoredExits.begin(), nxt->factoredExits.end(),
1095                     std::inserter(exitSet, begin(exitSet)));
1096           blk->factoredExits.resize(exitSet.size());
1097           std::copy(begin(exitSet), end(exitSet), blk->factoredExits.begin());
1098           nxt->factoredExits = decltype(nxt->factoredExits) {};
1099         } else {
1100           blk->factoredExits = std::move(nxt->factoredExits);
1101         }
1102       }
1103       std::copy(nxt->hhbcs.begin(), nxt->hhbcs.end(),
1104                 std::back_inserter(blk->hhbcs));
1105       nxt->fallthrough = NoBlockId;
1106       nxt->id = NoBlockId;
1107       nxt->hhbcs = { bc::Nop {} };
1108       removedAny = true;
1109     }
1110   }
1111
1112   return removedAny;
1113 }
1114
1115 //////////////////////////////////////////////////////////////////////
1116
1117 }}