hphp/runtime/vm/jit/refcount-opts.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16
  17 #include "hphp/runtime/vm/jit/opt.h"
  18 #include <algorithm>
  19 #include <utility>
  20
  21 #include "folly/Lazy.h"
  22 #include "folly/Optional.h"
  23 #include "folly/MapUtil.h"
  24
  25 #include "hphp/runtime/vm/jit/containers.h"
  26 #include "hphp/runtime/vm/jit/block.h"
  27 #include "hphp/runtime/vm/jit/cfg.h"
  28 #include "hphp/runtime/vm/jit/frame-state.h"
  29 #include "hphp/runtime/vm/jit/ir.h"
  30 #include "hphp/runtime/vm/jit/ir-instruction.h"
  31 #include "hphp/runtime/vm/jit/ir-unit.h"
  32 #include "hphp/runtime/vm/jit/mc-generator.h"
  33 #include "hphp/runtime/vm/jit/print.h"
  34 #include "hphp/runtime/vm/jit/simplifier.h"
  35 #include "hphp/runtime/vm/jit/ssa-tmp.h"
  36 #include "hphp/runtime/vm/jit/state-vector.h"
  37 #include "hphp/runtime/vm/jit/timer.h"
  38 #include "hphp/runtime/vm/jit/translator.h"
  39 #include "hphp/util/trace.h"
  40
  41 namespace HPHP { namespace jit {
  42
  43 TRACE_SET_MOD(hhir_refcount);
  44
  45 namespace {
  46
  47 // These are used a lot in this file.
  48 using HPHP::Trace::Indent;
  49 using HPHP::Trace::indent;
  50
  51 // A Point is an id representing a program point, determined by IdMap.
  52 typedef uint32_t Point;
  53
  54 /*
  55  * IdMap stores a linear position id for program points before (even numbers)
  56  * and after (odd numbers) each instruction. The distinction is necessary so we
  57  * can insert sink points after the last instruction of a block.
  58  */
  59 struct IdMap {
  60   IdMap(const BlockList& blocks, const IRUnit& unit)
  61     : m_ids(unit, -1)
  62   {
  63     Point nextId = 0;
  64     for (auto* block : blocks) {
  65       for (auto& inst : *block) {
  66         m_ids[inst] = nextId;
  67         nextId += 2;
  68         m_insts.push_back(&inst);
  69       }
  70     }
  71   }
  72
  73   IRInstruction* inst(Point id) const { return m_insts.at(id / 2); }
  74
  75   static bool isBefore(Point id) { return !(id % 2); }
  76   static bool isAfter(Point id)  { return id % 2; }
  77
  78   Point before(const IRInstruction* inst) const { return m_ids[inst]; }
  79   Point before(const IRInstruction& inst) const { return before(&inst); }
  80
  81   Point after(const IRInstruction* inst) const { return m_ids[inst] + 1; }
  82   Point after(const IRInstruction& inst) const { return after(&inst); }
  83
  84  private:
  85   StateVector<IRInstruction, Point> m_ids;
  86   jit::vector<IRInstruction*> m_insts;
  87 };
  88
  89 /*
  90  * IncSet holds a set of ids of IncRef instructions, representing a pending
  91  * reference in a Value (defined below). We have to use a set of ids instead of
  92  * just a single id because it's possible for the same pending reference to
  93  * come from different instructions in different control flow paths.
  94  */
  95 typedef jit::flat_set<Point> IncSet;
  96 std::string show(const IncSet& incs, const IdMap& ids) {
  97   std::string ret;
  98   auto* separator = "";
  99   for (auto id : incs) {
 100     folly::toAppend(separator, ids.inst(id)->toString(), &ret);
 101     separator = "\n";
 102   }
 103   return ret;
 104 }
 105
 106 /*
 107  * Value represents what we know about the current state of a php value with a
 108  * refcount. It holds lower bounds on the real refcount and optimized refcount
 109  * of an object. As the analysis pass progresses, it uses this information to
 110  * ensure that any optimizations will not affect what observers of the refcount
 111  * see.
 112  */
 113 struct Value {
 114   explicit Value(int c = 0, bool load = false)
 115     : realCount(c)
 116     , fromLoad(load)
 117   {}
 118
 119   int optCount() const { return realCount - optDelta(); }
 120   int optDelta() const { return pendingIncs.size(); }
 121   bool empty() const {
 122     return pendingIncs.empty() && realCount == 0 && !fromLoad;
 123   }
 124
 125   /*
 126    * Merge other's state with this state.
 127    *
 128    * Precondition:
 129    * If realCounts do not match, the lesser value must be fromLoad.  The idea
 130    * is that fromLoad allows us to make more aggressive assumptions, so we
 131    * shouldn't have both a higher count and fromLoad.
 132    *
 133    * For an example of where this is OK, consider:
 134    *
 135    * B0:
 136    *  t1 = LdLoc<0>
 137    *       IncRef t1
 138    *       DecRef t2
 139    *       JmpNZ B2
 140    * B1:
 141    *       IncRef t1
 142    *       SpillStack t1
 143    *       TakeStack t1
 144    *       DecRef t1
 145    * B2:
 146    *       ...
 147    *
 148    * The count of t1 at B2 will be 1 if coming from B0, and 0 if coming from
 149    * B1, due to the SpillStack.  However, the TakeStack marks t1 as fromLoad,
 150    * which lets us know that there's at least one surviving reference, so we
 151    * can move forward assuming the max of the two.
 152    *
 153    * Postconditions:
 154    * 1. realCount becomes the max of the incoming value;
 155    * 2. fromLoad is set if either incoming value has it;
 156    * 3. pendingIncs is truncated to the size of the smaller of the two, then
 157    *    the sets at each index are merged. The two can differ when a value's
 158    *    count is observed in one branch of a conditional but not the other, and
 159    *    it's safe because any differences are resolved in mergeStates().
 160    */
 161   void merge(const Value& other, const IRUnit& unit) {
 162     auto showFailure = [&] {
 163       return folly::format(
 164         "Failed to merge values in unit:\n{}\n{}\n{}\n",
 165         show(*this), show(other), unit
 166       ).str();
 167     };
 168
 169     always_assert_log(
 170       IMPLIES(realCount > other.realCount, other.fromLoad) &&
 171       IMPLIES(realCount < other.realCount, fromLoad),
 172       showFailure);
 173
 174     fromLoad = fromLoad || other.fromLoad;
 175     realCount = std::max(realCount, other.realCount);
 176
 177     auto minSize = std::min(pendingIncs.size(), other.pendingIncs.size());
 178     pendingIncs.resize(minSize);
 179     for (unsigned i = 0; i < minSize; ++i) {
 180       for (auto id : other.pendingIncs[i]) {
 181         pendingIncs[i].insert(id);
 182       }
 183     }
 184   }
 185
 186   void pushInc(Point id) {
 187     ++realCount;
 188     pendingIncs.emplace_back();
 189     pendingIncs.back().insert(id);
 190   }
 191
 192   IncSet popRef() {
 193     assert(!pendingIncs.empty());
 194     auto incs = std::move(pendingIncs.back());
 195     pendingIncs.pop_back();
 196     return incs;
 197   }
 198
 199   void clearPendingIncs() {
 200     pendingIncs.clear();
 201   }
 202
 203   /* realCount is the number of live references currently owned by the
 204    * jit. Since it represents references with lifetimes we can reliably track,
 205    * it's also used as a lower bound of the refcount of the object. */
 206   int realCount;
 207
 208   /* fromLoad represents whether or not the value came from a load
 209    * instruction. This optimization is based on the idea of producing and
 210    * consuming references, and most of our load instructions sometimes produce
 211    * a reference and sometimes don't. Rather than splitting up all the loads
 212    * into two flavors, we allow consumption of a value from a load even if
 213    * there appear to be no live references. */
 214   bool fromLoad;
 215
 216  private:
 217   friend std::string show(const Value&);
 218
 219   /* pendingIncs contains ids of IncRef instructions that have yet to be placed
 220    * before an observer. The size of pendingIncs represents the delta between
 221    * the real count and the optimized count. */
 222   jit::vector<IncSet> pendingIncs;
 223 };
 224
 225 std::string show(const Value& state) {
 226   std::string incs;
 227   auto* sep = "";
 228   for (auto const& set : state.pendingIncs) {
 229     folly::toAppend(sep, '{', folly::join(", ", set), '}', &incs);
 230     sep = ", ";
 231   }
 232
 233   return folly::format(
 234     "(real, opt) = ({:>2}, {:>2}) - [{}]{}",
 235     state.realCount, state.optCount(),
 236     incs,
 237     state.fromLoad ? " - from load" : "").str();
 238 }
 239
 240 /*
 241  * Frame represents a call frame and is used to track references to the current
 242  * $this pointer.
 243  */
 244 struct Frame {
 245   explicit Frame(SSATmp* main = nullptr, SSATmp* current = nullptr)
 246     : mainThis(main)
 247     , currentThis(current)
 248   {}
 249
 250   /* The canonical form of $this. For the outermost frame of the trace, this
 251    * will be nullptr if we haven't loaded $this yet, or a LdThis if we
 252    * have. For inlined frames, this will be the canonical SSATmp* for the $this
 253    * pointer given to the SpillFrame. */
 254   SSATmp* mainThis;
 255
 256   /* Latest live temp holding $this. This is updated whenever we see a new
 257    * LdThis in a frame, and it's set to nullptr after any instructions that
 258    * kill all live temps (calls, mostly). */
 259   SSATmp* currentThis;
 260
 261   bool operator==(const Frame b) const {
 262     return mainThis == b.mainThis && currentThis == b.currentThis;
 263   }
 264 };
 265
 266 std::string show(const Frame frame) {
 267   if (!frame.mainThis && !frame.currentThis) {
 268     return "<no $this>";
 269   }
 270   std::string ret;
 271   if (frame.mainThis) {
 272     folly::toAppend("main this: ", frame.mainThis->toString(), &ret);
 273     if (frame.currentThis) ret += " ";
 274   }
 275   if (frame.currentThis) {
 276     folly::toAppend("current this: ", frame.currentThis->toString(), &ret);
 277   }
 278   return ret;
 279 }
 280
 281 /*
 282  * FrameStack contains a Frame struct for all live and pre-live frames.
 283  */
 284 struct FrameStack {
 285   /* Push a possibly empty Frame representing a pre-live ActRec. We don't yet
 286    * know if the call with be inlined or not. */
 287   void pushPreLive(Frame f = Frame()) {
 288     preLive.emplace_back(f);
 289   }
 290
 291   /* Push a new frame representing a newly defined FramePtr for an inlined
 292    * call. */
 293   void pushInline(const IRInstruction* fpInst) {
 294     assert(!preLive.empty());
 295     assert(fpInst->is(DefInlineFP));
 296     live[fpInst] = std::move(preLive.back());
 297     preLive.pop_back();
 298   }
 299
 300   /* Pop an inlined frame to represent an InlineReturn, forgetting what we know
 301    * about its $this pointer. */
 302   void popInline(const IRInstruction* fpInst) {
 303     assert(fpInst->is(DefInlineFP));
 304     assert(live.size() >= 2);
 305     auto it = live.find(fpInst);
 306     assert(it != live.end());
 307
 308     assert(dead.count(fpInst) == 0);
 309     dead[fpInst] = std::move(it->second);
 310     live.erase(it);
 311   }
 312
 313   /* Pop a non-inlined frame. This is only called if a trace includes a RetC in
 314    * the outermost function. */
 315   void pop() {
 316     assert(live.size() == 1);
 317     live.erase(live.begin());
 318   }
 319
 320   bool operator==(const FrameStack& b) const {
 321     return live == b.live && preLive == b.preLive;
 322   }
 323
 324   bool operator!=(const FrameStack& b) const {
 325     return !(*this == b);
 326   }
 327
 328   /* Map from the instruction defining the frame to a Frame object. */
 329   jit::flat_map<const IRInstruction*, Frame> live;
 330
 331   /* Similar to live, but for frames that have been popped. We keep track of
 332    * these because we often refer to a LdThis from an inlined function after
 333    * it's returned. */
 334   jit::flat_map<const IRInstruction*, Frame> dead;
 335
 336   /* Frames that have been pushed but not activated. */
 337   jit::vector<Frame> preLive;
 338 };
 339
 340 typedef jit::flat_map<SSATmp*, SSATmp*> CanonMap;
 341
 342 /*
 343  * State holds all the information we care about during the analysis pass, and
 344  * is used to propagate and merge state across control flow edges.
 345  */
 346 struct State {
 347   /* values maps from live SSATmps to the currently known state about the
 348    * value. */
 349   jit::flat_map<SSATmp*, Value> values;
 350
 351   /* canon keeps track of values that have been through passthrough
 352    * instructions, like CheckType and AssertType. */
 353   CanonMap canon;
 354
 355   /* frames keeps track of live $this pointers in call frames. */
 356   FrameStack frames;
 357 };
 358
 359 std::string show(const State& state) {
 360   std::string ret;
 361
 362   for (auto const& val : state.values) {
 363     if (val.second.empty()) continue;
 364
 365     folly::toAppend(indent(), show(val.second), " | ",
 366                     val.first->inst()->toString(), '\n', &ret);
 367   }
 368
 369   if (!state.frames.live.empty()) {
 370     folly::toAppend(indent(), "live frames (unordered):\n", &ret);
 371     Indent _i;
 372     for (auto const& pair : state.frames.live) {
 373       folly::toAppend(indent(), pair.first->toString(), ": ",
 374                       show(pair.second), '\n', &ret);
 375     }
 376   }
 377
 378   if (!state.frames.preLive.empty()) {
 379     folly::toAppend(indent(), "pre-live frames:\n", &ret);
 380     Indent _i;
 381     for (auto const& frame : state.frames.preLive) {
 382       folly::toAppend(indent(), show(frame), '\n', &ret);
 383     }
 384   }
 385
 386   if (!state.canon.empty()) {
 387     folly::toAppend(indent(), "replaced values:\n", &ret);
 388     Indent _i;
 389     for (auto const& pair : state.canon) {
 390       ret += folly::format("{}canon[{}] = {}\n",
 391                            indent(), *pair.first, *pair.second).str();
 392     }
 393   }
 394
 395   return ret;
 396 }
 397
 398 struct IncomingState {
 399   IncomingState(const Block* f, const State& s)
 400     : from(f)
 401     , state(s)
 402   {}
 403
 404   IncomingState(const Block* f, State&& s)
 405     : from(f)
 406     , state(std::move(s))
 407   {}
 408
 409   const Block* from;
 410   State state;
 411 };
 412 typedef jit::vector<IncomingState> IncomingStateVec;
 413 typedef jit::hash_map<const Block*, IncomingStateVec> SavedStates;
 414
 415 /*
 416  * One SinkPoint exists for each optimizable IncRef in each control flow path
 417  * dominated by it.
 418  */
 419 struct SinkPoint {
 420   SinkPoint(Point i, SSATmp* v, bool erase)
 421     : id(i)
 422     , eraseOnly(erase)
 423     , value(v)
 424   {}
 425
 426   /* The position of the sink point, from IdMap. */
 427   Point id;
 428
 429   /* In some very rare situations we may know that we want to insert a
 430    * SinkPoint for a value without having a live SSATmp for that value. This
 431    * means it's not safe to sink the IncRef to that point, but if the
 432    * instruction immediately after the SinkPoint is a DecRef of the same value,
 433    * it's safe to eliminate both instructions. */
 434   bool eraseOnly;
 435
 436   /* The current version of the value, if it has been through a passthrough
 437    * instruction. Keeping track of this allows us to sink an IncRef of a Cell
 438    * past a CheckType and turn it into a cheaper IncRef. */
 439   SSATmp* value;
 440 };
 441
 442 typedef jit::flat_multimap<IncSet, SinkPoint> SinkPoints;
 443
 444 struct SinkPointsMap {
 445   // Maps values to SinkPoints for their IncRefs
 446   jit::hash_map<SSATmp*, SinkPoints> points;
 447
 448   // Maps ids of DecRef instructions to the incoming lower bound of the object's
 449   // refcount. Only DecRefs that cannot go to zero are in this map, so there
 450   // will be no entries with a value of less than 2.
 451   jit::hash_map<Point, int> decRefs;
 452 };
 453
 454 /*
 455  * We often want to insert sink points on control flow edges. idForEdge returns
 456  * the appropriate id to use for the given edge, assuming it is not a critical
 457  * edge.
 458  */
 459 Point idForEdge(const Block* from, const Block* to, const IdMap& ids) {
 460   auto* next = from->next();
 461   auto* taken = from->taken();
 462   assert(next || taken);
 463
 464   auto before = [&](const IRInstruction& inst) {
 465     ITRACE(6, "id for B{} -> B{} is before {}\n", from->id(), to->id(), inst);
 466     return ids.before(inst);
 467   };
 468   auto after = [&](const IRInstruction& inst) {
 469     ITRACE(6, "id for B{} -> B{} is after {}\n", from->id(), to->id(), inst);
 470     return ids.after(inst);
 471   };
 472
 473   if (next && taken) {
 474     // from has two outgoing edges. Use the beginning of to.
 475     assert(to->numPreds() == 1 && !to->empty());
 476     auto it = to->begin();
 477     assert(it != to->end() && !it->is(DefLabel, BeginCatch));
 478     return before(*it);
 479   } else {
 480     // from has one outgoing edges. Use the end of from.
 481     assert(!from->empty());
 482     auto it = from->end();
 483     --it;
 484     if (it->isControlFlow()) {
 485       assert(it->isTerminal());
 486       return before(*it);
 487     } else {
 488       return after(*it);
 489     }
 490   }
 491 }
 492
 493 const StaticString s_get_defined_vars("get_defined_vars"),
 494   s_get_defined_vars_SystemLib("__SystemLib\\get_defined_vars");
 495
 496 /*
 497  * SinkPointAnalyzer is responsible for inspecting a trace and determining two
 498  * things: the latest safe point to sink each IncRef to, and which DecRefs
 499  * cannot take their src to 0.
 500  */
 501 struct SinkPointAnalyzer : private LocalStateHook {
 502   SinkPointAnalyzer(const BlockList* blocks, const IdMap* ids,
 503                     IRUnit& unit, FrameState&& frameState)
 504     : m_unit(unit)
 505     , m_blocks(*blocks)
 506     , m_block(nullptr)
 507     , m_inst(nullptr)
 508     , m_ids(*ids)
 509     , m_takenState(nullptr)
 510     , m_frameState(std::move(frameState))
 511   {}
 512
 513   SinkPointsMap find() {
 514     assert(!m_blocks.empty());
 515     auto& fpInst = m_blocks.front()->front();
 516     assert(fpInst.is(DefFP));
 517     m_state.frames.live[&fpInst] = Frame();
 518
 519     for (auto* block : m_blocks) {
 520       ITRACE(3, "entering B{}\n", block->id());
 521       Indent _i;
 522
 523       m_block = block;
 524       m_frameState.startBlock(block, block->front().marker());
 525
 526       if (block != m_blocks.front()) {
 527         assert(m_savedStates.count(block) == 1);
 528         m_state = mergeStates(std::move(m_savedStates[block]));
 529         m_savedStates.erase(block);
 530       }
 531
 532       for (auto& inst : *block) {
 533         m_inst = &inst;
 534         processInst();
 535       }
 536       m_inst = nullptr;
 537
 538       auto* next = block->next();
 539       auto* taken = block->taken();
 540       if (!next && !taken) {
 541         // This block is terminal. Ensure that all live references have been
 542         // accounted for.
 543         ITRACE(3, "finishing terminal B{}\n", block->id());
 544         Indent _i;
 545         FTRACE(3, "{}", show(m_state));
 546
 547         auto showFailure = [&]{
 548           std::string ret;
 549           ret += show(*(mcg->tx().region())) + "\n";
 550           ret += folly::format("Unconsumed reference(s) leaving B{}\n",
 551                                block->id()).str();
 552           ret += show(m_state);
 553           ret += folly::format("{:-^80}\n{}{:-^80}\n",
 554                                " unit ", m_unit.toString(), "").str();
 555           return ret;
 556         };
 557
 558         // Terminal block. Everything must be resolved.
 559         for (auto const& valState : m_state.values) {
 560           always_assert_log(valState.second.realCount == 0 &&
 561                             valState.second.optDelta() == 0,
 562                             showFailure);
 563         }
 564       }
 565
 566       // Propagate current state to the next block, if one exists. If we have a
 567       // taken branch, that information will be propagated in processInst().
 568       if (next) {
 569         ITRACE(3, "propagating B{}'s state to next - B{}\n",
 570                m_block->id(), next->id());
 571         Indent _i;
 572         FTRACE(3, "{}", show(m_state));
 573         m_savedStates[next].emplace_back(block, std::move(m_state));
 574       }
 575       FTRACE(3, "\n");
 576
 577       m_frameState.finishBlock(block);
 578     }
 579
 580     return std::move(m_ret);
 581   }
 582
 583  private:
 584   struct IncomingBranch {
 585     IncomingBranch(const Block* b, const Value& v)
 586       : from(b)
 587       , value(v)
 588     {}
 589
 590     const Block* from;
 591     Value value;
 592   };
 593   struct IncomingValue {
 594     Value value;
 595     jit::vector<IncomingBranch> inBlocks;
 596   };
 597
 598   State mergeStates(IncomingStateVec&& states) {
 599     DEBUG_ONLY auto doTrace = [&] {
 600       ITRACE(3, "merging {} state(s) into B{}\n", states.size(), m_block->id());
 601       for (DEBUG_ONLY auto const& inState : states) {
 602         Indent _i;
 603         ITRACE(3, "from B{}\n", inState.from->id());
 604         Indent __i;
 605         FTRACE(3, "{}", show(inState.state));
 606       }
 607     };
 608     ONTRACE(3, doTrace());
 609
 610     assert(!states.empty());
 611     /*
 612      * Short circuit the easy, common case: one incoming state.  (This
 613      * is just to save JIT time.)
 614      *
 615      * Except if it's a DefLabel---there could still be unconsumed
 616      * references that need to be forwarded through the DefLabel (for
 617      * example in the case of a control flow diamond where one side
 618      * was optimized away because the branch turned into a Nop).
 619      */
 620     if (states.size() == 1 && !m_block->front().is(DefLabel)) {
 621       return std::move(states.front().state);
 622     }
 623
 624     auto const& firstFrames = states.front().state.frames;
 625     State retState;
 626     retState.canon = mergeCanons(states);
 627     retState.frames = firstFrames;
 628
 629     // If the current block begins with a DefLabel, we start by taking
 630     // unconsumed references for each dest of the label that produces a
 631     // reference.
 632     if (m_block->front().is(DefLabel)) {
 633       auto& label = m_block->front();
 634       auto& refsVec = m_unit.labelRefs().at(&label);
 635       always_assert(refsVec.size() == label.numDsts());
 636
 637       ITRACE(3, "producing refs for dests of {}\n", label);
 638       Indent _i;
 639       for (auto i = 0U, n = label.numDsts(); i < n; ++i) {
 640         if (label.dst(i)->type().notCounted()) continue;
 641
 642         auto refs = refsVec[i];
 643         if (refs == 0) continue;
 644
 645         ITRACE(3, "dest {} produces {} ref(s)\n", i, refs);
 646         Indent _i;
 647         m_block->forEachSrc(
 648           i, [&](IRInstruction* jmp, SSATmp* src) {
 649             if (src->type().notCounted()) return;
 650
 651             auto it = find_if(states.begin(), states.end(),
 652                               [jmp](const IncomingState& s) {
 653                                 return s.from == jmp->block();
 654                               });
 655             always_assert(it != states.end());
 656             src = canonical(src);
 657             auto& valState = it->state.values[src];
 658             always_assert(valState.optDelta() == 0);
 659             ITRACE(3, "consuming refs from {} | {}\n",
 660                    show(valState), *src->inst());
 661             if (valState.realCount >= refs) {
 662               valState.realCount -= refs;
 663             } else {
 664               always_assert(valState.fromLoad);
 665               valState.realCount = 0;
 666             }
 667           });
 668
 669         retState.values[label.dst(i)].realCount = refs;
 670       }
 671     }
 672
 673     // Now, we build a map from values to their merged incoming state and which
 674     // blocks provide information about the value.
 675     jit::hash_map<SSATmp*, IncomingValue> mergedValues;
 676     for (auto const& inState : states) {
 677       if (inState.state.frames != firstFrames) {
 678         // Task #5216936: add support for merging states with
 679         // different FrameStacks, and get rid of the TRACE_PUNT below.
 680         if (RuntimeOption::EvalHHIRBytecodeControlFlow) {
 681           TRACE_PUNT("refcount-opts needs support for merging states with "
 682                      "different FrameStacks");
 683         }
 684         always_assert(false &&
 685           "merging states with different FrameStacks is not supported");
 686       }
 687
 688       for (auto const& inPair : inState.state.values) {
 689         if (inPair.second.empty()) continue;
 690
 691         auto* value = inPair.first;
 692         assert(!value->inst()->isPassthrough());
 693         const bool existed = mergedValues.count(value);
 694         auto& mergedState = mergedValues[value];
 695         if (existed) {
 696           mergedState.value.merge(inPair.second, m_unit);
 697         } else {
 698           mergedState.value = inPair.second;
 699         }
 700
 701         // Register this block as an incoming provider of the value.
 702         mergedState.inBlocks.emplace_back(inState.from, inPair.second);
 703       }
 704     }
 705
 706     // Now, for each incoming value, insert it into the resulting state. If
 707     // there is a difference in a value's state between incoming branches,
 708     // resolve it by inserting sink points on the appropriate incoming edges.
 709     for (auto& pair : mergedValues) {
 710       auto mergedState = pair.second.value;
 711
 712       // If the value wasn't provided by every incoming branch, we're hosed
 713       // unless it was just fromLoad in some branches.
 714       if (pair.second.inBlocks.size() < states.size()) {
 715         for (auto& inBlock : pair.second.inBlocks) {
 716           always_assert_flog(
 717             inBlock.value.realCount == 0 && inBlock.value.optDelta() == 0,
 718             "While merging incoming states for B{}, value {} not provided by"
 719             " all preds but has nontrivial state `{}' from B{}."
 720             "\n\n{:-^80}\n{}{:-^80}\n",
 721             m_block->id(), *pair.first->inst(),
 722             show(inBlock.value), inBlock.from->id(),
 723             " unit ", m_unit, ""
 724           );
 725         }
 726       }
 727
 728       auto* incVal =
 729         folly::get_default(retState.canon, pair.first, pair.first);
 730       auto const mergedDelta = mergedState.optDelta();
 731       for (auto& inBlock : pair.second.inBlocks) {
 732         auto& inState = inBlock.value;
 733         assert(inState.optDelta() >= mergedDelta);
 734
 735         Point insertId = idForEdge(inBlock.from, m_block, m_ids);
 736         while (inState.optDelta() > mergedDelta) {
 737           ITRACE(3, "Inserting sink point on edge B{} -> B{}\n",
 738                  inBlock.from->id(), m_block->id());
 739           m_ret.points[pair.first].insert(
 740             std::make_pair(inState.popRef(),
 741                            SinkPoint(insertId, incVal, false)));
 742         }
 743       }
 744
 745       // Only put the merged state in the result if it provides useful
 746       // information.
 747       assert(mergedState.optCount() >= 0);
 748       if (mergedState.realCount || mergedState.fromLoad) {
 749         retState.values[pair.first] = mergedState;
 750       }
 751     }
 752
 753     ITRACE(3, "returning state:\n");
 754     Indent _i;
 755     FTRACE(3, "{}", show(retState));
 756     return retState;
 757   }
 758
 759   /* If control flow after something like a CheckType rejoins, we need to make
 760    * sure we only reference values that dominate the join. In the example
 761    * below, t4 is not defined in B3 and therefore cannot be used in B4:
 762    *
 763    * B1:
 764    *   t4:Obj = CheckType<Obj> t3:Cell -> B3
 765    *  fallthrough to B2
 766    *
 767    * B2:
 768    *   IncRef t4:Obj
 769    *   Jmp B4
 770    *
 771    * B3:
 772    *   IncRef t3:Cell
 773    *   Jmp B4
 774    *
 775    * B2's incoming state will have a mapping from t3 -> t4, and B3 will have no
 776    * mapping for t3. We need to look at all the incoming mappings and pick the
 777    * most specific value defined in all incoming paths (t3 in this case).
 778    */
 779   CanonMap mergeCanons(const IncomingStateVec& states) const {
 780     CanonMap retMap;
 781
 782     auto passthrough = [](SSATmp*& val) -> void {
 783       val = val->inst()->getPassthroughValue();
 784     };
 785
 786     // First, build a map from canonical values to the values they map to and
 787     // how many incoming branches have the mapped value available.
 788     jit::flat_map<SSATmp*, jit::flat_map<SSATmp*, int>> mergedCanon;
 789
 790     for (auto const& inState : states) {
 791       for (auto const& pair : inState.state.canon) {
 792         for (auto* val = pair.second; val->inst()->isPassthrough();
 793              passthrough(val)) {
 794           ++mergedCanon[pair.first][val];
 795         }
 796       }
 797     }
 798
 799     for (auto const& pair : mergedCanon) {
 800       auto* trueRoot = pair.first;
 801       // Build a list of versions of this value that exist in all incoming
 802       // branches.
 803       jit::flat_set<SSATmp*> tmps;
 804       for (auto const& countPair : pair.second) {
 805         if (countPair.first != trueRoot && countPair.second == states.size()) {
 806           tmps.insert(countPair.first);
 807         }
 808       }
 809
 810       if (tmps.empty()) {
 811         // None of the derived values exist, so use trueRoot. This is
 812         // equivalent to not having an entry in the map.
 813         continue;
 814       } else if (tmps.size() == 1) {
 815         // Only one derived value exists in all incoming branches, so use that
 816         // value.
 817         retMap[trueRoot] = *tmps.begin();
 818         continue;
 819       }
 820
 821       // We found more than one value coming in from all branches, so find the
 822       // most derived one. This is the one temp that doesn't appear as an
 823       // ancestor of any of the others.
 824       jit::flat_set<SSATmp*> ancestors;
 825       for (auto* val : tmps) {
 826         // trueRoot shouldn't be in the set
 827         assert(val->inst()->isPassthrough());
 828         passthrough(val);
 829
 830         // For each ancestor of val, if it's not yet in ancestors, add it and
 831         // continue. If it is in ancestors we've already visited it and all of
 832         // its ancestors, so do nothing more.
 833         for (; val->inst()->isPassthrough(); passthrough(val)) {
 834           auto it = ancestors.find(val);
 835           if (it == ancestors.end()) {
 836             ancestors.insert(val);
 837           } else {
 838             break;
 839           }
 840         }
 841       }
 842
 843       // There should now be exactly one value in tmps that isn't
 844       // ancestors. This is the value to use in the merge map. The sets are
 845       // sorted, so walk them in parallel to find the discrepancy.
 846       assert(ancestors.size() == tmps.size() - 1);
 847       SSATmp* mostDerived = nullptr;
 848
 849       for (auto tIt = tmps.begin(), aIt = ancestors.begin(); ; ++tIt, ++aIt) {
 850         assert(tIt != tmps.end());
 851         if (aIt == ancestors.end() || *tIt != *aIt) {
 852           mostDerived = *tIt;
 853           break;
 854         }
 855       }
 856       retMap[trueRoot] = mostDerived;
 857     }
 858
 859     return retMap;
 860   }
 861
 862   void processInst() {
 863     ITRACE(3, "processing instruction id {:>3}: {}\n",
 864            m_ids.before(m_inst), *m_inst);
 865     Indent _i;
 866
 867     auto const nSrcs = m_inst->numSrcs();
 868     m_frameState.setMarker(m_inst->marker());
 869
 870     if (auto* taken = m_inst->taken()) {
 871       // If an instruction's branch is ever taken, none of its sources will be
 872       // consumed. This means we should propagate state across this edge before
 873       // consuming any values and modifying the current state.
 874       ITRACE(3, "propagating B{}'s state to taken - B{}\n",
 875              m_block->id(), taken->id());
 876       Indent _i;
 877       FTRACE(3, "{}", show(m_state));
 878       auto& takenStates = m_savedStates[taken];
 879       takenStates.emplace_back(m_block, m_state);
 880
 881       // Since we can't insert sink points after the branch but before the
 882       // instruction consumes its sources, we have to insert them before the
 883       // whole instruction. This happens after we've already propagated the
 884       // state to taken, so we keep a pointer to the propagated state so we can
 885       // update it when needed.
 886       m_takenState = &takenStates.back().state;
 887     } else {
 888       m_takenState = nullptr;
 889     }
 890
 891     if (m_inst->is(IncRef, IncRefCtx, TakeRef)) {
 892       auto* src = m_inst->src(0);
 893
 894       // We only consider an IncRef optimizable if it's not an IncRefCtx/TakeRef
 895       // and the value doesn't have an optimized count of 0. This prevents any
 896       // subsequent instructions from taking in a source with count 0.
 897       if (src->type().maybeCounted()) {
 898         auto& valState = m_state.values[canonical(src)];
 899         if (valState.optCount() > 0 && m_inst->is(IncRef)) {
 900           auto const id = m_ids.before(m_inst);
 901           m_state.values[canonical(src)].pushInc(id);
 902         } else {
 903           // Even if the IncRef isn't optimizable, it still needs to be tracked
 904           // as a live reference.
 905           ++valState.realCount;
 906         }
 907         ITRACE(3, "{}\n", show(m_state.values[canonical(src)]));
 908       }
 909     } else if (m_inst->is(TakeStack)) {
 910       // TakeStack is used to indicate that we've logically popped a value off
 911       // the stack, in place of a LdStack.
 912       auto* src = m_inst->src(0);
 913       if (src->type().maybeCounted()) {
 914         m_state.values[canonical(src)].fromLoad = true;
 915       }
 916     } else if (m_inst->is(Jmp)) {
 917       // Completely resolve the deficit for any sources to Jmp. This isn't a
 918       // perfect solution but it's vastly simpler than other options and
 919       // probably good enough for our purposes.
 920       for (uint32_t i = 0; i < nSrcs; ++i) {
 921         resolveValue(m_inst->src(i));
 922       }
 923     } else if (m_inst == &m_block->back() && m_block->isExit() &&
 924                // Make sure it's not a RetCtrl from Ret{C,V}
 925                (!m_inst->is(RetCtrl) ||
 926                 m_inst->extra<RetCtrlData>()->suspendingResumed) &&
 927                // The EndCatch in Function{Suspend,Return}Hook's catch block is
 928                // special: it happens after locals and $this have been
 929                // decreffed, so we don't want to do the normal cleanup
 930                !(m_inst->is(EndCatch) &&
 931                  m_block->preds().front().inst()
 932                    ->is(FunctionSuspendHook, FunctionReturnHook) &&
 933                  !m_block->preds().front().inst()
 934                    ->extra<RetCtrlData>()->suspendingResumed)) {
 935       // When leaving a trace, we need to account for all live references in
 936       // locals and $this pointers.
 937       consumeAllLocals();
 938       consumeAllFrames();
 939     } else if (m_inst->is(GenericRetDecRefs, NativeImpl)) {
 940       consumeCurrentLocals();
 941     } else if (m_inst->is(CreateCont, CreateAFWH)) {
 942       consumeInputs();
 943       consumeCurrentLocals();
 944       auto frame = m_inst->src(0)->inst();
 945       consumeFrame(m_state.frames.live.at(frame));
 946       defineOutputs();
 947     } else if (m_inst->is(DecRefLoc)) {
 948       consumeLocal(m_inst->extra<DecRefLoc>()->locId);
 949     } else if (m_inst->is(DecRefThis)) {
 950       // This only happens during a RetC, and it happens instead of a normal
 951       // DecRef on $this.
 952       auto frame = m_inst->src(0)->inst();
 953       consumeFrame(m_state.frames.live.at(frame));
 954     } else {
 955       // All other instructions take the generic path.
 956       consumeInputs();
 957       defineOutputs();
 958     }
 959
 960     m_frameState.update(m_inst);
 961   }
 962
 963   /* jit::canonical() traces through passthrough instructions to get the root
 964    * of a value. Since we're tracking the state of inlined frames in the trace,
 965    * there are often cases where the root value for a LdThis is really a value
 966    * up in some enclosing frame. */
 967   SSATmp* canonical(SSATmp* value) {
 968     auto* root = jit::canonical(value);
 969     auto* inst = root->inst();
 970     if (!inst->is(LdThis)) return root;
 971
 972     auto* fpInst = inst->src(0)->inst();
 973     auto it = m_state.frames.live.find(fpInst);
 974     if (it == m_state.frames.live.end()) {
 975       it = m_state.frames.dead.find(fpInst);
 976       assert(it != m_state.frames.dead.end());
 977     }
 978     return it->second.mainThis;
 979   }
 980
 981   /*
 982    * Consumes all local values, including those in callers if we're inlined.
 983    */
 984   void consumeAllLocals() {
 985     ITRACE(3, "consuming all locals\n");
 986     Indent _i;
 987     m_frameState.forEachLocal(
 988       [&](uint32_t id, SSATmp* value) {
 989         if (value) consumeValue(value);
 990       }
 991     );
 992   }
 993
 994   /*
 995    * Consumes all locals in the current frame, not including inline callers.
 996    */
 997   void consumeCurrentLocals() {
 998     ITRACE(3, "consume current frame locals\n");
 999     Indent _i;
1000     for (uint32_t i = 0, n = m_frameState.func()->numLocals(); i < n; ++i) {
1001       if (auto value = m_frameState.localValue(i)) {
1002         consumeValue(value);
1003       }
1004     }
1005   }
1006
1007   void consumeInputs() {
1008     if (m_inst->is(DecRef, DecRefNZ)) {
1009       auto* src = m_inst->src(0);
1010
1011       if (src->type().notCounted()) return;
1012       src = canonical(src);
1013
1014       auto const& valState = m_state.values[src];
1015       // Record DecRefs that won't go to 0
1016       assert(IMPLIES(m_inst->is(DecRefNZ), valState.realCount > 1));
1017       if (valState.realCount > 1) {
1018         m_ret.decRefs[m_ids.before(m_inst)] = valState.realCount;
1019       }
1020     } else if (m_inst->is(SpillFrame)) {
1021       auto* this_ = m_inst->src(2);
1022       if (this_->isA(Type::Obj)) {
1023         m_state.frames.pushPreLive(Frame(canonical(this_), this_));
1024         // When spilling an Object to a pre-live ActRec, we can reliably track
1025         // the reference in the frame. This allows us to optimize away many
1026         // refcounting operations on $this in inlined calls.
1027         ITRACE(3, "{}({}) becoming $this in an ActRec; not consuming "
1028                "reference\n",
1029                *this_, *canonical(this_));
1030         Indent _i;
1031
1032         auto& valState = m_state.values[canonical(this_)];
1033         if (valState.realCount == 0) {
1034           assert(valState.fromLoad);
1035           ++valState.realCount;
1036         }
1037         ITRACE(3, " after: {}\n", show(valState));
1038         return;
1039       }
1040       m_state.frames.pushPreLive();
1041     } else if (m_inst->is(DefInlineFP)) {
1042       m_state.frames.pushInline(m_inst);
1043     } else if (m_inst->is(InlineReturn)) {
1044       FTRACE(3, "{}", show(m_state));
1045       m_state.frames.popInline(m_inst->src(0)->inst());
1046     } else if (m_inst->is(RetAdjustStack)) {
1047       m_state.frames.pop();
1048     } else if (m_inst->is(Call, CallArray)) {
1049       resolveAllFrames();
1050       callPreLiveFrame();
1051     } else if (m_inst->is(ContEnter)) {
1052       resolveAllFrames();
1053     } else if (m_inst->is(CallBuiltin)) {
1054       const auto callee = m_inst->extra<CallBuiltinData>()->callee->fullName();
1055       if (callee->isame(s_get_defined_vars.get()) ||
1056           callee->isame(s_get_defined_vars_SystemLib.get())) {
1057         observeLocalRefs();
1058       }
1059     } else if (m_inst->is(InterpOne, InterpOneCF)) {
1060       // InterpOne can push and pop ActRecs.
1061       auto const op = m_inst->extra<InterpOneData>()->opcode;
1062       if (jit::getInstrInfo(op).type == jit::InstrFlags::OutFDesc) {
1063         m_state.frames.pushPreLive();
1064       } else if (isFCallStar(op)) {
1065         resolveAllFrames();
1066         callPreLiveFrame();
1067       } else if (op == OpContEnter) {
1068         resolveAllFrames();
1069       } else if (op == OpFCallBuiltin) {
1070         // This isn't optimal, but InterpOne of FCallBuiltin is rare enough
1071         // that it doesn't matter.
1072         observeLocalRefs();
1073       }
1074
1075       if (m_inst->is(InterpOneCF)) {
1076         // InterpOneCF's normal path leaves the trace, so consume all live
1077         // references.
1078         consumeAllLocals();
1079         consumeAllFrames();
1080       }
1081     }
1082
1083     ITRACE(3, "consuming normal inputs\n");
1084     for (uint32_t i = 0; i < m_inst->numSrcs(); ++i) {
1085       Indent _i;
1086       auto src = m_inst->src(i);
1087
1088       if (src->isA(Type::StkPtr) && src->inst()->is(SpillFrame) &&
1089           !m_inst->is(DefInlineFP,
1090                       Call,
1091                       CallArray,
1092                       ContEnter)) {
1093         bool const beginInlDefSP = m_inst->is(ReDefSP) &&
1094                                      m_inst->src(1)->inst()->is(DefInlineFP);
1095         if (!beginInlDefSP) {
1096           // If the StkPtr being consumed points to a pre-live ActRec, observe
1097           // its $this pointer since many of our helper functions decref it.
1098           auto this_ = src->inst()->src(2);
1099           if (this_->isA(Type::Obj)) {
1100             auto const sinkPoint = SinkPoint(m_ids.before(m_inst), this_,
1101               false);
1102             this_ = canonical(this_);
1103             observeValue(this_, m_state.values[this_], sinkPoint);
1104           }
1105         }
1106       }
1107
1108       if (src->type().notCounted()) continue;
1109
1110       auto& valState = m_state.values[canonical(src)];
1111       const SinkPoint sp(m_ids.before(m_inst), src, false);
1112       assert(IMPLIES(valState.optCount() == 0, valState.realCount == 0));
1113       if (m_inst->consumesReference(i)) {
1114         consumeValue(canonical(src), valState, sp);
1115       }
1116     }
1117
1118     // Many instructions have complicated effects on the state of the
1119     // locals. Get this information from FrameState.
1120     ITRACE(3, "getting local effects from FrameState\n");
1121     Indent _i;
1122     m_frameState.getLocalEffects(m_inst, *this);
1123   }
1124
1125   /*
1126    * Process a normal call to a prelive frame. Consume $this, if any, and pop
1127    * the prelive frame.
1128    */
1129   void callPreLiveFrame() {
1130     // If we have no prelive frames the FPush* was in a different trace.
1131     if (m_state.frames.preLive.empty()) return;
1132
1133     consumeFrame(m_state.frames.preLive.back());
1134     m_state.frames.preLive.pop_back();
1135   }
1136
1137   template<typename L>
1138   void forEachFrame(L body) {
1139     for (auto& pair : m_state.frames.live) body(pair.second);
1140     for (auto& frame : m_state.frames.preLive) body(frame);
1141   }
1142
1143   /*
1144    * Call, CallArray, and ContEnter destroy all live values. They also might
1145    * throw and we can't attach catch traces to them. Resolve the opt delta for
1146    * any live $this pointers and forget the "current" this pointer so we don't
1147    * reuse it after the call.
1148    */
1149   void resolveAllFrames() {
1150     forEachFrame([this](Frame& f) {
1151       if (f.currentThis) {
1152         resolveValue(f.currentThis);
1153         f.currentThis = nullptr;
1154       } else if (f.mainThis) {
1155         resolveValueEraseOnly(f.mainThis);
1156       }
1157     });
1158   }
1159
1160   /*
1161    * Consume the $this pointer for a Frame, if it has one. If currentThis is
1162    * nullptr but mainThis exists we use an "erase-only" sink point, which means
1163    * it's not safe to actually sink the IncRef but it's safe to erase the
1164    * IncRef/DecRef pair (usually because the value has been smashed by a Call).
1165    */
1166   void consumeFrame(Frame& f) {
1167     if (f.currentThis) {
1168       consumeValue(f.currentThis);
1169     } else if (f.mainThis) {
1170       consumeValueEraseOnly(f.mainThis);
1171     }
1172   }
1173
1174   /*
1175    * When we leave a trace, we have to account for all the references we're
1176    * tracking in pre-live frames and inlined frames.
1177    */
1178   void consumeAllFrames() {
1179     forEachFrame([this](Frame& f) {
1180       consumeFrame(f);
1181     });
1182   }
1183
1184   /* When an instruction consumes a reference to a value, two things in our
1185    * tracked state must be updated. We first ensure that the behavior of DecRef
1186    * and COW will not be affected by the difference between the real count and
1187    * the optimized count. This is done by inserting as many IncRef sink points
1188    * as needed before the instruction. Second, the real count of the value is
1189    * decremented to reflect the consumption of the reference. If there is no
1190    * reference to consume, this is either a bug in this analysis pass or the
1191    * incoming IR, so abort with a (hopefully) helpful error. */
1192   void consumeValueImpl(SSATmp* value, bool eraseOnly) {
1193     assert(value);
1194     if (value->type().notCounted()) return;
1195
1196     auto* root = canonical(value);
1197     consumeValue(root, m_state.values[root],
1198                  SinkPoint(m_ids.before(m_inst), value, eraseOnly));
1199   }
1200   void consumeValue(SSATmp* value)          { consumeValueImpl(value, false); }
1201   void consumeValueEraseOnly(SSATmp* value) { consumeValueImpl(value, true); }
1202
1203   void consumeValue(SSATmp* value, Value& valState, SinkPoint sinkPoint) {
1204     ITRACE(3, "consuming value {}\n", *value->inst());
1205     assert(value == canonical(value));
1206     Indent _i;
1207
1208     ITRACE(3, "before: {}\n", show(valState));
1209     assert(valState.realCount >= 0);
1210     assert(valState.optCount() >= 0);
1211
1212     assertCanConsume(value);
1213
1214     // Note that we're treating consumers and observers the same here, which is
1215     // necessary until we have better alias analysis.
1216     observeValue(value, valState, sinkPoint);
1217     if (valState.realCount) --valState.realCount;
1218
1219     ITRACE(3, " after: {}\n", show(valState));
1220   }
1221
1222   /* DecRef and COW both care about the same thing: is _count == 1 coming into
1223    * the operation? */
1224   struct ConsumeObserver {
1225     bool operator()(int count) { return count == 1; }
1226   };
1227   /* When deciding if something is a user-visible reference, we need to know if
1228    * _count >= 2. */
1229   struct RefObserver {
1230     bool operator()(int count) { return count >= 2; }
1231   };
1232
1233   template<typename T = ConsumeObserver>
1234   void observeValue(SSATmp* value, Value& valState, SinkPoint sp,
1235                     T pred = T()) {
1236     while (pred(valState.realCount) != pred(valState.optCount())) {
1237       placeSinkPoint(value, valState, sp);
1238     }
1239   }
1240
1241   void placeSinkPoint(SSATmp* value, Value& valState, SinkPoint sp) {
1242     m_ret.points[value].insert(std::make_pair(valState.popRef(), sp));
1243     ITRACE(3, "placing IncRef at id {}\n", sp.id);
1244
1245     // If the current instruction has a taken branch, we need to tell the
1246     // propagated state that we used one of the pending IncRefs.
1247     if (m_takenState && sp.id == m_ids.before(m_inst)) {
1248       ITRACE(3, "updating taken state for {}\n", *value);
1249       m_takenState->values[value].popRef();
1250     }
1251   }
1252
1253   void assertCanConsumeImpl(SSATmp* value, bool checkConsume) {
1254     auto const& valState = m_state.values[value];
1255     auto showState = [&](const std::string& what) {
1256       std::string ret;
1257       ret += folly::format("'{}' wants to consume {} but {}\n",
1258                            *m_inst, *value, what).str();
1259       ret += show(m_state);
1260       ret += folly::format("{:-^80}\n{}{:-^80}\n",
1261                            " unit ", m_unit.toString(), "").str();
1262       return ret;
1263     };
1264
1265     if (valState.realCount == 0) {
1266       auto showFailure = [&] {
1267         return showState("it has no unconsumed references");
1268       };
1269
1270       // This is ok as long as the value came from a load (see the Value struct
1271       // for why).
1272       always_assert_log((valState.fromLoad && valState.optCount() == 0),
1273                         showFailure);
1274     } else if (checkConsume) {
1275       auto showFailure = [&] {
1276         return showState(folly::format("it has an optCount of {}\n",
1277                                        valState.optCount()).str());
1278       };
1279       always_assert_log(valState.optCount() >= 1, showFailure);
1280     }
1281   }
1282
1283   void assertHasUnconsumedReference(SSATmp* value) {
1284     assertCanConsumeImpl(value, false);
1285   }
1286
1287   void assertCanConsume(SSATmp* value) {
1288     assertCanConsumeImpl(value, true);
1289   }
1290
1291   void observeLocalRefs() {
1292     // If any locals are RefDatas, the behavior of get_defined_vars can
1293     // depend on whether or not the count is >= 2.
1294     m_frameState.forEachLocal(
1295       [&](uint32_t id, SSATmp* value) {
1296         if (!value) return;
1297
1298         auto const sp = SinkPoint(m_ids.before(m_inst), value, false);
1299         value = canonical(value);
1300         observeValue(value, m_state.values[value], sp, RefObserver());
1301       }
1302     );
1303   }
1304
1305   void consumeLocal(Point id) {
1306     if (auto* val = m_frameState.localValue(id)) {
1307       consumeValue(val);
1308     }
1309   }
1310
1311   /* Completely resolve the delta between realCount and optCount. */
1312   void resolveValueImpl(SSATmp* const origVal, bool eraseOnly) {
1313     assert(origVal);
1314
1315     if (origVal->type().notCounted()) return;
1316     auto* value = canonical(origVal);
1317
1318     ITRACE(3, "resolving value {}\n", *value->inst());
1319     Indent _i;
1320
1321     auto& valState = m_state.values[value];
1322     if (!valState.optDelta()) return;
1323
1324     const SinkPoint sp(m_ids.before(m_inst), origVal, eraseOnly);
1325     while (valState.optDelta()) {
1326       placeSinkPoint(value, valState, sp);
1327     }
1328   }
1329   void resolveValue(SSATmp* val)          { resolveValueImpl(val, false); }
1330   void resolveValueEraseOnly(SSATmp* val) { resolveValueImpl(val, true); }
1331
1332   /* Remember that oldVal has been replace by newVal, either because of a
1333    * passthrough instruction or something from FrameState. */
1334   void replaceValue(SSATmp* oldVal, SSATmp* newVal) {
1335     ITRACE(3, "replacing {} with {}\n", *oldVal, *newVal);
1336
1337     assert(canonical(oldVal) == canonical(newVal) &&
1338            oldVal != newVal);
1339     m_state.canon[canonical(newVal)] = newVal;
1340   }
1341
1342   void defineOutputs() {
1343     if (m_inst->is(LdLoc)) {
1344       // LdLoc's output is the new value of the local we loaded, and this has
1345       // already been tracked in setLocalValue().
1346       return;
1347     } else if (m_inst->is(LdThis)) {
1348       if (!m_inst->marker().func()->mayHaveThis()) {
1349         // If this function can't have a $this pointer everything after the
1350         // LdThis is unreachable. More importantly, we aren't going to decref
1351         // the non-existent $this pointer in RetC, so don't track a reference
1352         // to it.
1353         return;
1354       }
1355
1356       auto* fpInst = m_inst->src(0)->inst();
1357       assert(m_state.frames.live.count(fpInst));
1358       auto& frame = m_state.frames.live[fpInst];
1359       frame.currentThis = m_inst->dst();
1360       if (frame.mainThis == nullptr) {
1361         // Nobody has tried to load $this in the current frame yet. Set it as
1362         // the main $this for the frame and indicate that we're tracking a
1363         // reference to it.
1364         frame.mainThis = m_inst->dst();
1365         m_state.values[frame.mainThis].realCount = 1;
1366       }
1367       return;
1368     }
1369
1370     if (m_inst->isPassthrough()) {
1371       assert(m_inst->numDsts() == 1);
1372       auto* dst = m_inst->dst();
1373       auto* src = m_inst->src(0);
1374       if (dst->type().maybeCounted()) {
1375         replaceValue(src, dst);
1376       }
1377
1378       if (m_inst->is(CheckType, AssertType)) {
1379         // If we're doing CheckType or AssertType from a counted type to an
1380         // uncounted type, we need to do something with the pending references
1381         // that won't be consumed now that the type is uncounted. Since we now
1382         // know that for its entire lifetime, the value has been an uncounted
1383         // type, we consume a live reference and drop all pending IncRefs on
1384         // the floor. This will result in any IncRefs of this value being sunk
1385         // to the CheckType's taken branch, and erased completely from the path
1386         // falling through the CheckType.
1387         if (dst->type().notCounted() && src->type().maybeCounted()) {
1388           auto* src = canonical(dst);
1389           auto& valState = m_state.values[src];
1390
1391           ITRACE(3, "consuming reference to {}: {} and dropping "
1392                  "opt delta of {}\n",
1393                  *src, show(valState), valState.optDelta());
1394           if (valState.realCount) --valState.realCount;
1395           while (valState.optDelta()) valState.popRef();
1396         } else if (dst->type().maybeCounted() && src->type().notCounted()) {
1397           // Going from notCounted to maybeCounted. This sounds silly, but it's
1398           // possible so we have to handle it "correctly". Treat it like a
1399           // load instruction.
1400           ITRACE(3, "producing fake reference to {}\n", *dst);
1401           m_state.values[canonical(dst)].fromLoad = true;
1402         }
1403       }
1404
1405       return;
1406     }
1407
1408     // Define any values produced by the instruction.
1409     for (uint32_t i = 0; i < m_inst->numDsts(); ++i) {
1410       auto dst = m_inst->dst(i);
1411
1412       if (dst->type().notCounted()) continue;
1413
1414       if (m_inst->producesReference(i) || m_inst->isRawLoad()) {
1415         assert(!m_state.values.count(dst));
1416         ITRACE(3, "defining value {}\n", *m_inst);
1417         Indent _i;
1418
1419         auto& state = m_state.values[dst];
1420         if (m_inst->producesReference(i)) state.realCount = 1;
1421         if (m_inst->isRawLoad()) state.fromLoad = true;
1422
1423         ITRACE(3, "{}\n", show(state));
1424       }
1425     }
1426   }
1427
1428   ///// LocalStateHook overrides /////
1429   void setLocalValue(uint32_t id, SSATmp* newVal) override {
1430     // TrackLoc is only used for the dests of labels, and those references are
1431     // handled in mergeStates.
1432     if (m_inst->is(TrackLoc)) {
1433       assert(newVal->inst()->is(DefLabel));
1434       return;
1435     }
1436
1437     // When a local's value is updated by StLoc(NT), the consumption of the old
1438     // value should've been visible to us, so we ignore that here.
1439     if (!m_inst->is(StLoc, StLocNT)) {
1440       ITRACE(3, "consuming local {} for setLocalValue\n", id);
1441       Indent _i;
1442       consumeLocal(id);
1443     }
1444
1445     if (newVal && newVal->type().maybeCounted()) {
1446       ITRACE(3, "{} is now in a local, adding tracked reference\n",
1447              *newVal);
1448       Indent _i;
1449       auto& valState = m_state.values[canonical(newVal)];
1450       ++valState.realCount;
1451       if (m_inst->is(LdLoc)) valState.fromLoad = true;
1452       ITRACE(3, " after: {}\n", show(valState));
1453     }
1454   }
1455
1456   void refineLocalValue(uint32_t id, unsigned inlineIdx,
1457                         SSATmp* oldVal, SSATmp* newVal) override {
1458     if (oldVal->type().maybeCounted() && newVal->type().notCounted()) {
1459       assert(newVal->inst()->is(CheckType, AssertType));
1460       assert(newVal->inst()->src(0) == oldVal);
1461       // Similar to what we do when processing the CheckType directly, we
1462       // "consume" the value on behalf of the CheckType.
1463       oldVal = canonical(oldVal);
1464       auto& valState = m_state.values[oldVal];
1465       ITRACE(2, "'consuming' reference to {} for refineLocalValue\n",
1466              *oldVal->inst());
1467       if (valState.realCount) --valState.realCount;
1468     }
1469   }
1470
1471   void setLocalType(uint32_t id, Type) override {
1472     ITRACE(3, "consuming local {} for setLocalType\n", id);
1473     Indent _i;
1474     consumeLocal(id);
1475   }
1476
1477   void killLocalForCall(uint32_t id, unsigned inlineIdx,
1478                         SSATmp* value) override {
1479     ITRACE(3, "consuming local {} at inline level {} for killLocalForCall\n",
1480            id, inlineIdx);
1481     Indent _i;
1482     consumeValue(value);
1483   }
1484
1485   void updateLocalRefValue(uint32_t id, unsigned,
1486                            SSATmp* oldVal, SSATmp* newVal) override {
1487     ITRACE(3, "replacing {} with {} in local {} for updateLocalRefValue\n",
1488            *oldVal, *newVal, id);
1489     replaceValue(oldVal, newVal);
1490   }
1491
1492   /* The IRUnit being processed and its blocks */
1493   const IRUnit& m_unit;
1494   const BlockList& m_blocks;
1495
1496   /* Current block and current instruction */
1497   const Block* m_block;
1498   const IRInstruction* m_inst;
1499
1500   /* Map between linear ids and IRInstruction* */
1501   const IdMap& m_ids;
1502
1503   /* Current state of the world */
1504   State m_state;
1505
1506   /* Sometimes present pointer to state propagated to m_inst's taken
1507    * branch. See consumeValue() for usage. */
1508   State* m_takenState;
1509   SavedStates m_savedStates;
1510
1511   /* Analysis results to be returned to caller */
1512   SinkPointsMap m_ret;
1513
1514   /* Used to track local state and other information about the trace */
1515   FrameState m_frameState;
1516 };
1517
1518 ////////// Refcount validation pass //////////
1519 typedef jit::hash_map<SSATmp*, double> TmpDelta;
1520 typedef jit::hash_map<Block*, TmpDelta> BlockMap;
1521
1522 /*
1523  * Simulate the effect of block b on refcounts of SSATmps.
1524  */
1525 void updateCounts(Block* b, TmpDelta& delta) {
1526   for (auto& inst: *b) {
1527     if (inst.is(IncRef, DecRef, DecRefNZ)) {
1528       SSATmp* src = canonical(inst.src(0));
1529       if (src->type().notCounted()) continue;
1530
1531       if (inst.is(IncRef)) {
1532         delta[src]++;
1533       } else {
1534         delta[src]--;
1535       }
1536     }
1537   }
1538 }
1539
1540 /*
1541  * Populate each block in BlockMap with a map of the refcount delta for each
1542  * SSATmp. Ideally, we want to maintain the map for every path through the
1543  * trace. However, to avoid the exponential cost, we use a linear-time
1544  * approximation here using an idea similar to Random Interpretation (Gulwani
1545  * and Necula, POPL 2003). The logic for phi-nodes in the approximiation
1546  * considers each predecessor equally likely, and uses the average as the
1547  * incoming ref-count for the phi-node.
1548  */
1549 void getRefDeltas(IRUnit& unit, BlockMap& map) {
1550   auto blocks = rpoSortCfg(unit);
1551   for (auto* block : blocks) {
1552     TmpDelta& delta = map[block];
1553     int numPreds = block->numPreds();
1554     block->forEachPred([&](Block* from) {
1555       TmpDelta& predMap = map[from];
1556       for (auto it = predMap.begin(), end = predMap.end(); it != end; ) {
1557         SSATmp* src = it->first;
1558         delta[src] += it->second / numPreds;
1559         ++it;
1560       }
1561     });
1562     updateCounts(block, delta);
1563   }
1564 }
1565
1566 const double deltaThreshold = 1E-15;
1567
1568 /*
1569  * Validate that orig and opt have the same ref-count deltas for SSATmps in
1570  * each exit block.
1571  */
1572 bool validateDeltas(BlockMap& orig, BlockMap& opt) {
1573   for (auto it = opt.begin(), end = opt.end(); it != end; ++it) {
1574     if (!it->first->isExit()) {
1575       continue;
1576     }
1577     TmpDelta& deltaOpt = it->second;
1578     TmpDelta& deltaOrig = orig[it->first];
1579     for (auto it2 = deltaOpt.begin(), end2 = deltaOpt.end(); it2 != end2;
1580            it2++) {
1581       SSATmp* src = it2->first;
1582       double delta = deltaOrig[src];
1583       if (fabs(delta - it2->second) > deltaThreshold) {
1584         if (src->inst()->is(LdThis)) {
1585           // The optimization does some nontrivial state tracking to keep track
1586           // of $this pointers, so this is probably a false positive.
1587           FTRACE(1, "possible ");
1588         }
1589         FTRACE(1, "refcount mismatch in {}: orig: {} opt: {}, block {}\n",
1590                    src->toString(),
1591                    deltaOrig[src],
1592                    it2->second,
1593                    it->first->id());
1594         return src->inst()->is(LdThis);
1595       }
1596     }
1597   }
1598   return true;
1599 }
1600
1601 std::string show(const SinkPointsMap& sinkPoints, const IdMap& ids) DEBUG_ONLY;
1602 std::string show(const SinkPointsMap& sinkPoints, const IdMap& ids) {
1603   std::string ret;
1604
1605   typedef std::pair<SSATmp*, SinkPoints> SinkPair;
1606   jit::vector<SinkPair> sortedPoints(
1607     sinkPoints.points.begin(), sinkPoints.points.end());
1608   std::sort(sortedPoints.begin(), sortedPoints.end(),
1609             [&](const SinkPair& a, const SinkPair& b) {
1610               return ids.before(a.first->inst()) < ids.before(b.first->inst());
1611             });
1612
1613   for (auto const& pair : sortedPoints) {
1614     auto* value = pair.first;
1615     auto& points = pair.second;
1616     ret += folly::format("  {}\n", *value->inst()).str();
1617
1618     const IncSet* lastIncs = nullptr;
1619     for (auto const& point : points) {
1620       auto* thisIncs = &point.first;
1621       if (!lastIncs || *lastIncs != *thisIncs) {
1622         ret += folly::format("    {}\n", show(*thisIncs, ids)).str();
1623         lastIncs = thisIncs;
1624       }
1625       ret += folly::format("      {} {} (using {}){}\n",
1626                            (point.second.id % 2) ? "after " : "before",
1627                            *ids.inst(point.second.id),
1628                            *point.second.value,
1629                            point.second.eraseOnly ? ", erase only" : "").str();
1630
1631     }
1632     ret += '\n';
1633   }
1634
1635   typedef std::pair<Point, int> DecRefPair;
1636   jit::vector<DecRefPair> sortedDecs(
1637     sinkPoints.decRefs.begin(), sinkPoints.decRefs.end());
1638   std::sort(sortedDecs.begin(), sortedDecs.end(),
1639             [](const DecRefPair& a, const DecRefPair& b) {
1640               return a.first < b.first;
1641             });
1642   for (auto const& pair : sortedDecs) {
1643     ret += folly::format("  {} has incoming count >= {}\n",
1644                          *ids.inst(pair.first), pair.second).str();
1645   }
1646
1647   return ret;
1648 }
1649
1650 /* Using the information from SinkPointAnalyzer, sink IncRefs when and where
1651  * appropriate. This pass only removes and inserts IncRef instructions. */
1652 void sinkIncRefs(IRUnit& unit, const SinkPointsMap& info, const IdMap& ids) {
1653   ITRACE(3, "optimizing refcounts\n");
1654   Indent _i;
1655
1656   for (auto const& pair : info.points) {
1657     auto& incRefs = pair.second;
1658     ITRACE(3, "looking at value {}, with {} IncRefs\n",
1659            *pair.first, incRefs.size());
1660     Indent _i;
1661     auto it = incRefs.begin();
1662     while (it != incRefs.end()) {
1663       auto const& incs = it->first;
1664       auto const range = incRefs.equal_range(incs);
1665       auto const end = range.second;
1666       ITRACE(3, "processing {}\n", show(incs, ids));
1667       Indent _i;
1668
1669       // Eval.HHIRRefcountOptsAlwaysSink allows us to stress test the sinking
1670       // logic.
1671       auto doSink = RuntimeOption::EvalHHIRRefcountOptsAlwaysSink;
1672       if (it == end) {
1673         // There are no sink points for this IncRef. This can only happen if
1674         // the value started out as a maybeCounted type and was refined to a
1675         // notCounted type by an AssertType (if it was refined by a CheckType
1676         // there should be a sink point on the taken branch). In this case
1677         // we'll intentionally delete the IncRef down below without sinking any
1678         // copies anywhere.
1679         doSink = true;
1680       }
1681
1682       // For each sink point of this IncRef, check if doing the sink would
1683       // allow us to optimize it. If we find no optimizable sink points, don't
1684       // bother sinking.
1685       for (; it != end; ++it) {
1686         auto const& sp = it->second;
1687         if (sp.eraseOnly) {
1688           // An "erase only" sink point means we've proven that it's safe to
1689           // sink the IncRef to this point but we don't have access to the
1690           // value, probably because the SSATmp was killed by a Call. For now
1691           // we refuse to optimize this case, but it's possible to use this
1692           // sink point as long as we've proven that doing so will eliminte the
1693           // Inc/Dec pair.
1694           ITRACE(2, "found erase-only SinkPoint; not optimizing\n");
1695           doSink = false;
1696           break;
1697         }
1698
1699         if (IdMap::isAfter(sp.id)) continue;
1700
1701         auto canOptimize = [&](IRInstruction* inst) {
1702           // If inst is a DecRef that won't go to zero of the same value as the
1703           // IncRef, we're good.
1704           if (info.decRefs.count(ids.before(inst)) &&
1705               canonical(sp.value) == canonical(inst->src(0))) {
1706             return true;
1707           }
1708
1709           // If the current type of the value is more specific than the
1710           // original source of the IncRef, sink it to get a possibly cheaper
1711           // IncRef.
1712           if (sp.value->type() < ids.inst(*incs.begin())->src(0)->type()) {
1713             return true;
1714           }
1715
1716           return false;
1717         };
1718
1719         if (canOptimize(ids.inst(sp.id))) {
1720           doSink = true;
1721         }
1722       }
1723
1724       if (!doSink) {
1725         ITRACE(3, "no optimizable sink points\n");
1726         it = end;
1727         continue;
1728       }
1729
1730       ITRACE(3, "sinking {}\n", show(incs, ids));
1731       Indent __i;
1732       for (auto id : incs) {
1733         auto* incRef = ids.inst(id);
1734         incRef->block()->erase(incRef);
1735       }
1736
1737       for (it = range.first; it != end; ++it) {
1738         auto const& point = it->second;
1739         auto destId = point.id;
1740         auto* destInst = ids.inst(destId);
1741         auto* destBlock = destInst->block();
1742         auto destIt = destBlock->iteratorTo(destInst);
1743         if (IdMap::isAfter(point.id)) ++destIt;
1744
1745         auto* newInc = unit.gen(IncRef, destInst->marker(), point.value);
1746         ITRACE(2, "inserting {} {} {}\n",
1747                *newInc,
1748                IdMap::isBefore(point.id) ? "before" : "after",
1749                *destInst);
1750         destBlock->insert(destIt, newInc);
1751       }
1752     }
1753   }
1754   ITRACE(2, "\n");
1755 }
1756
1757 /* If an IncRef of a value is followed immediately by a DecRef of the same
1758  * value that is guaranteed to not destroy the value, erase both
1759  * instructions. */
1760 void eliminateRefcounts(IRUnit& unit, const SinkPointsMap& info,
1761                         const IdMap& ids) {
1762   for (auto const& pair : info.decRefs) {
1763     auto* decRef = ids.inst(pair.first);
1764     auto* decBlock = decRef->block();
1765     auto decIt = decBlock->iteratorTo(decRef);
1766     if (decIt == decBlock->begin()) continue;
1767
1768     auto incIt = decIt;
1769     --incIt;
1770     auto* incRef = &*incIt;
1771     if (!incRef->is(IncRef) ||
1772         canonical(incRef->src(0)) != canonical(decRef->src(0))) {
1773       // We can't erase an IncRef/DecRef pair but we can turn the DecRef into
1774       // the cheaper DecRefNZ.
1775       ITRACE(2, "changing {} into DecRefNZ\n", *decRef);
1776       decRef->setOpcode(DecRefNZ);
1777       continue;
1778     }
1779
1780     ITRACE(2, "erasing {} and {}\n", *incIt, *decIt);
1781     incRef->convertToNop();
1782     decRef->convertToNop();
1783   }
1784   ITRACE(2, "\n");
1785 }
1786
1787 /* After this pass completes, we don't need the TakeStack/TakeRef instructions
1788  * anymore. This pass converts them to Nop, and dce removes them. */
1789 void eliminateTakes(const BlockList& blocks) {
1790   for (auto b : blocks) {
1791     for (auto& inst : *b) {
1792       if (inst.is(TakeStack, TakeRef)) {
1793         inst.convertToNop();
1794       }
1795     }
1796   }
1797 }
1798
1799 }
1800
1801 /* optimizeRefcounts attempts to remove IncRef/DecRef pairs when we can prove
1802  * that doing so won't alter the visible behavior of the program. It does so in
1803  * three phases:
1804  *
1805  * - Analysis: Each reachable instruction is inspected to determine its effect
1806  *   on the refcount of its sources and dests. We keep track of two values for
1807  *   each SSATmp: the real count and the optimized count. The real count is a
1808  *   lower bound on the refcount of the object, assuming all IncRefs and
1809  *   DecRefs are left as is. The optimized count is a lower bound with as many
1810  *   IncRefs delayed as possible. When we get to an instruction that consumes a
1811  *   reference to one of its sources, we ensure that the delta between
1812  *   realCount and optCount won't cause a behavioral difference by logically
1813  *   inserting as many IncRefs as needed. This phase doesn't modify the code;
1814  *   it just records where the IncRefs may be sunk to.
1815  *
1816  * - IncRef sinking: Using the result of the previous phase, any IncRefs for
1817  *   which sinking would enable optimizations are moved to their sink
1818  *   points. Depending on the cfg being optimized, this may change the number
1819  *   of IncRef instructions present in the trace. The number of IncRefs
1820  *   executed in each control flow path will not be modified.
1821  *
1822  * - IncRef/DecRef removal: All of the hard work was done in the first two
1823  *   passes, leaving a simple peephole optimization. When an IncRef has been
1824  *   pushed down to right before the DecRef consuming its reference and that
1825  *   DecRef is guaranteed to not destroy the object, we can delete both
1826  *   instructions.
1827  *
1828  * SinkPointAnalyzer verifies that each produced reference is consumed exactly
1829  * once in all paths as it walks through the trace. After the optimization is
1830  * complete, a separate validation pass is run to ensure the net effect on the
1831  * refcount of each object has not changed.
1832  */
1833 void optimizeRefcounts(IRUnit& unit, FrameState&& fs) {
1834   Timer _t(Timer::optimize_refcountOpts);
1835   FTRACE(2, "vvvvvvvvvv refcount opts vvvvvvvvvv\n");
1836   auto const changed = splitCriticalEdges(unit);
1837   if (changed) {
1838     printUnit(6, unit, "after splitting critical edges for refcount opts");
1839   }
1840
1841   auto const blocks = rpoSortCfg(unit);
1842   IdMap ids(blocks, unit);
1843
1844   Indent _i;
1845   auto const sinkPoints =
1846     SinkPointAnalyzer(&blocks, &ids, unit, std::move(fs)).find();
1847   ITRACE(2, "Found sink points:\n{}\n", show(sinkPoints, ids));
1848
1849   BlockMap before;
1850   if (RuntimeOption::EvalHHIRValidateRefCount) {
1851     getRefDeltas(unit, before);
1852   }
1853
1854   sinkIncRefs(unit, sinkPoints, ids);
1855   eliminateRefcounts(unit, sinkPoints, ids);
1856   eliminateTakes(blocks);
1857
1858   if (RuntimeOption::EvalHHIRValidateRefCount) {
1859     BlockMap after;
1860     getRefDeltas(unit, after);
1861     if (!validateDeltas(before, after)) {
1862       printUnit(0, unit, "after refcount optimization");
1863       always_assert(false && "refcount validation failed");
1864     }
1865   }
1866
1867   FTRACE(2, "^^^^^^^^^^ refcount opts ^^^^^^^^^^\n");
1868 }
1869
1870 } }