TRACE_PUNT when refcount optimizer can't handle a region
[hiphop-php.git] / hphp / runtime / vm / jit / refcount-opts.cpp
blobd8321079b72e9d11fb5a6fc6968b353f0696d76c
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/opt.h"
18 #include <algorithm>
19 #include <utility>
21 #include "folly/Lazy.h"
22 #include "folly/Optional.h"
23 #include "folly/MapUtil.h"
25 #include "hphp/runtime/vm/jit/containers.h"
26 #include "hphp/runtime/vm/jit/block.h"
27 #include "hphp/runtime/vm/jit/cfg.h"
28 #include "hphp/runtime/vm/jit/frame-state.h"
29 #include "hphp/runtime/vm/jit/ir.h"
30 #include "hphp/runtime/vm/jit/ir-instruction.h"
31 #include "hphp/runtime/vm/jit/ir-unit.h"
32 #include "hphp/runtime/vm/jit/mc-generator.h"
33 #include "hphp/runtime/vm/jit/print.h"
34 #include "hphp/runtime/vm/jit/simplifier.h"
35 #include "hphp/runtime/vm/jit/ssa-tmp.h"
36 #include "hphp/runtime/vm/jit/state-vector.h"
37 #include "hphp/runtime/vm/jit/timer.h"
38 #include "hphp/runtime/vm/jit/translator.h"
39 #include "hphp/util/trace.h"
41 namespace HPHP { namespace jit {
43 TRACE_SET_MOD(hhir_refcount);
45 namespace {
47 // These are used a lot in this file.
48 using HPHP::Trace::Indent;
49 using HPHP::Trace::indent;
51 // A Point is an id representing a program point, determined by IdMap.
52 typedef uint32_t Point;
55 * IdMap stores a linear position id for program points before (even numbers)
56 * and after (odd numbers) each instruction. The distinction is necessary so we
57 * can insert sink points after the last instruction of a block.
59 struct IdMap {
60 IdMap(const BlockList& blocks, const IRUnit& unit)
61 : m_ids(unit, -1)
63 Point nextId = 0;
64 for (auto* block : blocks) {
65 for (auto& inst : *block) {
66 m_ids[inst] = nextId;
67 nextId += 2;
68 m_insts.push_back(&inst);
73 IRInstruction* inst(Point id) const { return m_insts.at(id / 2); }
75 static bool isBefore(Point id) { return !(id % 2); }
76 static bool isAfter(Point id) { return id % 2; }
78 Point before(const IRInstruction* inst) const { return m_ids[inst]; }
79 Point before(const IRInstruction& inst) const { return before(&inst); }
81 Point after(const IRInstruction* inst) const { return m_ids[inst] + 1; }
82 Point after(const IRInstruction& inst) const { return after(&inst); }
84 private:
85 StateVector<IRInstruction, Point> m_ids;
86 jit::vector<IRInstruction*> m_insts;
90 * IncSet holds a set of ids of IncRef instructions, representing a pending
91 * reference in a Value (defined below). We have to use a set of ids instead of
92 * just a single id because it's possible for the same pending reference to
93 * come from different instructions in different control flow paths.
95 typedef jit::flat_set<Point> IncSet;
96 std::string show(const IncSet& incs, const IdMap& ids) {
97 std::string ret;
98 auto* separator = "";
99 for (auto id : incs) {
100 folly::toAppend(separator, ids.inst(id)->toString(), &ret);
101 separator = "\n";
103 return ret;
107 * Value represents what we know about the current state of a php value with a
108 * refcount. It holds lower bounds on the real refcount and optimized refcount
109 * of an object. As the analysis pass progresses, it uses this information to
110 * ensure that any optimizations will not affect what observers of the refcount
111 * see.
113 struct Value {
114 explicit Value(int c = 0, bool load = false)
115 : realCount(c)
116 , fromLoad(load)
119 int optCount() const { return realCount - optDelta(); }
120 int optDelta() const { return pendingIncs.size(); }
121 bool empty() const {
122 return pendingIncs.empty() && realCount == 0 && !fromLoad;
126 * Merge other's state with this state.
128 * Precondition:
129 * If realCounts do not match, the lesser value must be fromLoad. The idea
130 * is that fromLoad allows us to make more aggressive assumptions, so we
131 * shouldn't have both a higher count and fromLoad.
133 * For an example of where this is OK, consider:
135 * B0:
136 * t1 = LdLoc<0>
137 * IncRef t1
138 * DecRef t2
139 * JmpNZ B2
140 * B1:
141 * IncRef t1
142 * SpillStack t1
143 * TakeStack t1
144 * DecRef t1
145 * B2:
146 * ...
148 * The count of t1 at B2 will be 1 if coming from B0, and 0 if coming from
149 * B1, due to the SpillStack. However, the TakeStack marks t1 as fromLoad,
150 * which lets us know that there's at least one surviving reference, so we
151 * can move forward assuming the max of the two.
153 * Postconditions:
154 * 1. realCount becomes the max of the incoming value;
155 * 2. fromLoad is set if either incoming value has it;
156 * 3. pendingIncs is truncated to the size of the smaller of the two, then
157 * the sets at each index are merged. The two can differ when a value's
158 * count is observed in one branch of a conditional but not the other, and
159 * it's safe because any differences are resolved in mergeStates().
161 void merge(const Value& other, const IRUnit& unit) {
162 auto showFailure = [&] {
163 return folly::format(
164 "Failed to merge values in unit:\n{}\n{}\n{}\n",
165 show(*this), show(other), unit
166 ).str();
169 always_assert_log(
170 IMPLIES(realCount > other.realCount, other.fromLoad) &&
171 IMPLIES(realCount < other.realCount, fromLoad),
172 showFailure);
174 fromLoad = fromLoad || other.fromLoad;
175 realCount = std::max(realCount, other.realCount);
177 auto minSize = std::min(pendingIncs.size(), other.pendingIncs.size());
178 pendingIncs.resize(minSize);
179 for (unsigned i = 0; i < minSize; ++i) {
180 for (auto id : other.pendingIncs[i]) {
181 pendingIncs[i].insert(id);
186 void pushInc(Point id) {
187 ++realCount;
188 pendingIncs.emplace_back();
189 pendingIncs.back().insert(id);
192 IncSet popRef() {
193 assert(!pendingIncs.empty());
194 auto incs = std::move(pendingIncs.back());
195 pendingIncs.pop_back();
196 return incs;
199 void clearPendingIncs() {
200 pendingIncs.clear();
203 /* realCount is the number of live references currently owned by the
204 * jit. Since it represents references with lifetimes we can reliably track,
205 * it's also used as a lower bound of the refcount of the object. */
206 int realCount;
208 /* fromLoad represents whether or not the value came from a load
209 * instruction. This optimization is based on the idea of producing and
210 * consuming references, and most of our load instructions sometimes produce
211 * a reference and sometimes don't. Rather than splitting up all the loads
212 * into two flavors, we allow consumption of a value from a load even if
213 * there appear to be no live references. */
214 bool fromLoad;
216 private:
217 friend std::string show(const Value&);
219 /* pendingIncs contains ids of IncRef instructions that have yet to be placed
220 * before an observer. The size of pendingIncs represents the delta between
221 * the real count and the optimized count. */
222 jit::vector<IncSet> pendingIncs;
225 std::string show(const Value& state) {
226 std::string incs;
227 auto* sep = "";
228 for (auto const& set : state.pendingIncs) {
229 folly::toAppend(sep, '{', folly::join(", ", set), '}', &incs);
230 sep = ", ";
233 return folly::format(
234 "(real, opt) = ({:>2}, {:>2}) - [{}]{}",
235 state.realCount, state.optCount(),
236 incs,
237 state.fromLoad ? " - from load" : "").str();
241 * Frame represents a call frame and is used to track references to the current
242 * $this pointer.
244 struct Frame {
245 explicit Frame(SSATmp* main = nullptr, SSATmp* current = nullptr)
246 : mainThis(main)
247 , currentThis(current)
250 /* The canonical form of $this. For the outermost frame of the trace, this
251 * will be nullptr if we haven't loaded $this yet, or a LdThis if we
252 * have. For inlined frames, this will be the canonical SSATmp* for the $this
253 * pointer given to the SpillFrame. */
254 SSATmp* mainThis;
256 /* Latest live temp holding $this. This is updated whenever we see a new
257 * LdThis in a frame, and it's set to nullptr after any instructions that
258 * kill all live temps (calls, mostly). */
259 SSATmp* currentThis;
261 bool operator==(const Frame b) const {
262 return mainThis == b.mainThis && currentThis == b.currentThis;
266 std::string show(const Frame frame) {
267 if (!frame.mainThis && !frame.currentThis) {
268 return "<no $this>";
270 std::string ret;
271 if (frame.mainThis) {
272 folly::toAppend("main this: ", frame.mainThis->toString(), &ret);
273 if (frame.currentThis) ret += " ";
275 if (frame.currentThis) {
276 folly::toAppend("current this: ", frame.currentThis->toString(), &ret);
278 return ret;
282 * FrameStack contains a Frame struct for all live and pre-live frames.
284 struct FrameStack {
285 /* Push a possibly empty Frame representing a pre-live ActRec. We don't yet
286 * know if the call with be inlined or not. */
287 void pushPreLive(Frame f = Frame()) {
288 preLive.emplace_back(f);
291 /* Push a new frame representing a newly defined FramePtr for an inlined
292 * call. */
293 void pushInline(const IRInstruction* fpInst) {
294 assert(!preLive.empty());
295 assert(fpInst->is(DefInlineFP));
296 live[fpInst] = std::move(preLive.back());
297 preLive.pop_back();
300 /* Pop an inlined frame to represent an InlineReturn, forgetting what we know
301 * about its $this pointer. */
302 void popInline(const IRInstruction* fpInst) {
303 assert(fpInst->is(DefInlineFP));
304 assert(live.size() >= 2);
305 auto it = live.find(fpInst);
306 assert(it != live.end());
308 assert(dead.count(fpInst) == 0);
309 dead[fpInst] = std::move(it->second);
310 live.erase(it);
313 /* Pop a non-inlined frame. This is only called if a trace includes a RetC in
314 * the outermost function. */
315 void pop() {
316 assert(live.size() == 1);
317 live.erase(live.begin());
320 bool operator==(const FrameStack& b) const {
321 return live == b.live && preLive == b.preLive;
324 bool operator!=(const FrameStack& b) const {
325 return !(*this == b);
328 /* Map from the instruction defining the frame to a Frame object. */
329 jit::flat_map<const IRInstruction*, Frame> live;
331 /* Similar to live, but for frames that have been popped. We keep track of
332 * these because we often refer to a LdThis from an inlined function after
333 * it's returned. */
334 jit::flat_map<const IRInstruction*, Frame> dead;
336 /* Frames that have been pushed but not activated. */
337 jit::vector<Frame> preLive;
340 typedef jit::flat_map<SSATmp*, SSATmp*> CanonMap;
343 * State holds all the information we care about during the analysis pass, and
344 * is used to propagate and merge state across control flow edges.
346 struct State {
347 /* values maps from live SSATmps to the currently known state about the
348 * value. */
349 jit::flat_map<SSATmp*, Value> values;
351 /* canon keeps track of values that have been through passthrough
352 * instructions, like CheckType and AssertType. */
353 CanonMap canon;
355 /* frames keeps track of live $this pointers in call frames. */
356 FrameStack frames;
359 std::string show(const State& state) {
360 std::string ret;
362 for (auto const& val : state.values) {
363 if (val.second.empty()) continue;
365 folly::toAppend(indent(), show(val.second), " | ",
366 val.first->inst()->toString(), '\n', &ret);
369 if (!state.frames.live.empty()) {
370 folly::toAppend(indent(), "live frames (unordered):\n", &ret);
371 Indent _i;
372 for (auto const& pair : state.frames.live) {
373 folly::toAppend(indent(), pair.first->toString(), ": ",
374 show(pair.second), '\n', &ret);
378 if (!state.frames.preLive.empty()) {
379 folly::toAppend(indent(), "pre-live frames:\n", &ret);
380 Indent _i;
381 for (auto const& frame : state.frames.preLive) {
382 folly::toAppend(indent(), show(frame), '\n', &ret);
386 if (!state.canon.empty()) {
387 folly::toAppend(indent(), "replaced values:\n", &ret);
388 Indent _i;
389 for (auto const& pair : state.canon) {
390 ret += folly::format("{}canon[{}] = {}\n",
391 indent(), *pair.first, *pair.second).str();
395 return ret;
398 struct IncomingState {
399 IncomingState(const Block* f, const State& s)
400 : from(f)
401 , state(s)
404 IncomingState(const Block* f, State&& s)
405 : from(f)
406 , state(std::move(s))
409 const Block* from;
410 State state;
412 typedef jit::vector<IncomingState> IncomingStateVec;
413 typedef jit::hash_map<const Block*, IncomingStateVec> SavedStates;
416 * One SinkPoint exists for each optimizable IncRef in each control flow path
417 * dominated by it.
419 struct SinkPoint {
420 SinkPoint(Point i, SSATmp* v, bool erase)
421 : id(i)
422 , eraseOnly(erase)
423 , value(v)
426 /* The position of the sink point, from IdMap. */
427 Point id;
429 /* In some very rare situations we may know that we want to insert a
430 * SinkPoint for a value without having a live SSATmp for that value. This
431 * means it's not safe to sink the IncRef to that point, but if the
432 * instruction immediately after the SinkPoint is a DecRef of the same value,
433 * it's safe to eliminate both instructions. */
434 bool eraseOnly;
436 /* The current version of the value, if it has been through a passthrough
437 * instruction. Keeping track of this allows us to sink an IncRef of a Cell
438 * past a CheckType and turn it into a cheaper IncRef. */
439 SSATmp* value;
442 typedef jit::flat_multimap<IncSet, SinkPoint> SinkPoints;
444 struct SinkPointsMap {
445 // Maps values to SinkPoints for their IncRefs
446 jit::hash_map<SSATmp*, SinkPoints> points;
448 // Maps ids of DecRef instructions to the incoming lower bound of the object's
449 // refcount. Only DecRefs that cannot go to zero are in this map, so there
450 // will be no entries with a value of less than 2.
451 jit::hash_map<Point, int> decRefs;
455 * We often want to insert sink points on control flow edges. idForEdge returns
456 * the appropriate id to use for the given edge, assuming it is not a critical
457 * edge.
459 Point idForEdge(const Block* from, const Block* to, const IdMap& ids) {
460 auto* next = from->next();
461 auto* taken = from->taken();
462 assert(next || taken);
464 auto before = [&](const IRInstruction& inst) {
465 ITRACE(6, "id for B{} -> B{} is before {}\n", from->id(), to->id(), inst);
466 return ids.before(inst);
468 auto after = [&](const IRInstruction& inst) {
469 ITRACE(6, "id for B{} -> B{} is after {}\n", from->id(), to->id(), inst);
470 return ids.after(inst);
473 if (next && taken) {
474 // from has two outgoing edges. Use the beginning of to.
475 assert(to->numPreds() == 1 && !to->empty());
476 auto it = to->begin();
477 assert(it != to->end() && !it->is(DefLabel, BeginCatch));
478 return before(*it);
479 } else {
480 // from has one outgoing edges. Use the end of from.
481 assert(!from->empty());
482 auto it = from->end();
483 --it;
484 if (it->isControlFlow()) {
485 assert(it->isTerminal());
486 return before(*it);
487 } else {
488 return after(*it);
493 const StaticString s_get_defined_vars("get_defined_vars"),
494 s_get_defined_vars_SystemLib("__SystemLib\\get_defined_vars");
497 * SinkPointAnalyzer is responsible for inspecting a trace and determining two
498 * things: the latest safe point to sink each IncRef to, and which DecRefs
499 * cannot take their src to 0.
501 struct SinkPointAnalyzer : private LocalStateHook {
502 SinkPointAnalyzer(const BlockList* blocks, const IdMap* ids,
503 IRUnit& unit, FrameState&& frameState)
504 : m_unit(unit)
505 , m_blocks(*blocks)
506 , m_block(nullptr)
507 , m_inst(nullptr)
508 , m_ids(*ids)
509 , m_takenState(nullptr)
510 , m_frameState(std::move(frameState))
513 SinkPointsMap find() {
514 assert(!m_blocks.empty());
515 auto& fpInst = m_blocks.front()->front();
516 assert(fpInst.is(DefFP));
517 m_state.frames.live[&fpInst] = Frame();
519 for (auto* block : m_blocks) {
520 ITRACE(3, "entering B{}\n", block->id());
521 Indent _i;
523 m_block = block;
524 m_frameState.startBlock(block, block->front().marker());
526 if (block != m_blocks.front()) {
527 assert(m_savedStates.count(block) == 1);
528 m_state = mergeStates(std::move(m_savedStates[block]));
529 m_savedStates.erase(block);
532 for (auto& inst : *block) {
533 m_inst = &inst;
534 processInst();
536 m_inst = nullptr;
538 auto* next = block->next();
539 auto* taken = block->taken();
540 if (!next && !taken) {
541 // This block is terminal. Ensure that all live references have been
542 // accounted for.
543 ITRACE(3, "finishing terminal B{}\n", block->id());
544 Indent _i;
545 FTRACE(3, "{}", show(m_state));
547 auto showFailure = [&]{
548 std::string ret;
549 ret += show(*(mcg->tx().region())) + "\n";
550 ret += folly::format("Unconsumed reference(s) leaving B{}\n",
551 block->id()).str();
552 ret += show(m_state);
553 ret += folly::format("{:-^80}\n{}{:-^80}\n",
554 " unit ", m_unit.toString(), "").str();
555 return ret;
558 // Terminal block. Everything must be resolved.
559 for (auto const& valState : m_state.values) {
560 always_assert_log(valState.second.realCount == 0 &&
561 valState.second.optDelta() == 0,
562 showFailure);
566 // Propagate current state to the next block, if one exists. If we have a
567 // taken branch, that information will be propagated in processInst().
568 if (next) {
569 ITRACE(3, "propagating B{}'s state to next - B{}\n",
570 m_block->id(), next->id());
571 Indent _i;
572 FTRACE(3, "{}", show(m_state));
573 m_savedStates[next].emplace_back(block, std::move(m_state));
575 FTRACE(3, "\n");
577 m_frameState.finishBlock(block);
580 return std::move(m_ret);
583 private:
584 struct IncomingBranch {
585 IncomingBranch(const Block* b, const Value& v)
586 : from(b)
587 , value(v)
590 const Block* from;
591 Value value;
593 struct IncomingValue {
594 Value value;
595 jit::vector<IncomingBranch> inBlocks;
598 State mergeStates(IncomingStateVec&& states) {
599 DEBUG_ONLY auto doTrace = [&] {
600 ITRACE(3, "merging {} state(s) into B{}\n", states.size(), m_block->id());
601 for (DEBUG_ONLY auto const& inState : states) {
602 Indent _i;
603 ITRACE(3, "from B{}\n", inState.from->id());
604 Indent __i;
605 FTRACE(3, "{}", show(inState.state));
608 ONTRACE(3, doTrace());
610 assert(!states.empty());
612 * Short circuit the easy, common case: one incoming state. (This
613 * is just to save JIT time.)
615 * Except if it's a DefLabel---there could still be unconsumed
616 * references that need to be forwarded through the DefLabel (for
617 * example in the case of a control flow diamond where one side
618 * was optimized away because the branch turned into a Nop).
620 if (states.size() == 1 && !m_block->front().is(DefLabel)) {
621 return std::move(states.front().state);
624 auto const& firstFrames = states.front().state.frames;
625 State retState;
626 retState.canon = mergeCanons(states);
627 retState.frames = firstFrames;
629 // If the current block begins with a DefLabel, we start by taking
630 // unconsumed references for each dest of the label that produces a
631 // reference.
632 if (m_block->front().is(DefLabel)) {
633 auto& label = m_block->front();
634 auto& refsVec = m_unit.labelRefs().at(&label);
635 always_assert(refsVec.size() == label.numDsts());
637 ITRACE(3, "producing refs for dests of {}\n", label);
638 Indent _i;
639 for (auto i = 0U, n = label.numDsts(); i < n; ++i) {
640 if (label.dst(i)->type().notCounted()) continue;
642 auto refs = refsVec[i];
643 if (refs == 0) continue;
645 ITRACE(3, "dest {} produces {} ref(s)\n", i, refs);
646 Indent _i;
647 m_block->forEachSrc(
648 i, [&](IRInstruction* jmp, SSATmp* src) {
649 if (src->type().notCounted()) return;
651 auto it = find_if(states.begin(), states.end(),
652 [jmp](const IncomingState& s) {
653 return s.from == jmp->block();
655 always_assert(it != states.end());
656 src = canonical(src);
657 auto& valState = it->state.values[src];
658 always_assert(valState.optDelta() == 0);
659 ITRACE(3, "consuming refs from {} | {}\n",
660 show(valState), *src->inst());
661 if (valState.realCount >= refs) {
662 valState.realCount -= refs;
663 } else {
664 always_assert(valState.fromLoad);
665 valState.realCount = 0;
669 retState.values[label.dst(i)].realCount = refs;
673 // Now, we build a map from values to their merged incoming state and which
674 // blocks provide information about the value.
675 jit::hash_map<SSATmp*, IncomingValue> mergedValues;
676 for (auto const& inState : states) {
677 if (inState.state.frames != firstFrames) {
678 // Task #5216936: add support for merging states with
679 // different FrameStacks, and get rid of the TRACE_PUNT below.
680 if (RuntimeOption::EvalHHIRBytecodeControlFlow) {
681 TRACE_PUNT("refcount-opts needs support for merging states with "
682 "different FrameStacks");
684 always_assert(false &&
685 "merging states with different FrameStacks is not supported");
688 for (auto const& inPair : inState.state.values) {
689 if (inPair.second.empty()) continue;
691 auto* value = inPair.first;
692 assert(!value->inst()->isPassthrough());
693 const bool existed = mergedValues.count(value);
694 auto& mergedState = mergedValues[value];
695 if (existed) {
696 mergedState.value.merge(inPair.second, m_unit);
697 } else {
698 mergedState.value = inPair.second;
701 // Register this block as an incoming provider of the value.
702 mergedState.inBlocks.emplace_back(inState.from, inPair.second);
706 // Now, for each incoming value, insert it into the resulting state. If
707 // there is a difference in a value's state between incoming branches,
708 // resolve it by inserting sink points on the appropriate incoming edges.
709 for (auto& pair : mergedValues) {
710 auto mergedState = pair.second.value;
712 // If the value wasn't provided by every incoming branch, we're hosed
713 // unless it was just fromLoad in some branches.
714 if (pair.second.inBlocks.size() < states.size()) {
715 for (auto& inBlock : pair.second.inBlocks) {
716 always_assert_flog(
717 inBlock.value.realCount == 0 && inBlock.value.optDelta() == 0,
718 "While merging incoming states for B{}, value {} not provided by"
719 " all preds but has nontrivial state `{}' from B{}."
720 "\n\n{:-^80}\n{}{:-^80}\n",
721 m_block->id(), *pair.first->inst(),
722 show(inBlock.value), inBlock.from->id(),
723 " unit ", m_unit, ""
728 auto* incVal =
729 folly::get_default(retState.canon, pair.first, pair.first);
730 auto const mergedDelta = mergedState.optDelta();
731 for (auto& inBlock : pair.second.inBlocks) {
732 auto& inState = inBlock.value;
733 assert(inState.optDelta() >= mergedDelta);
735 Point insertId = idForEdge(inBlock.from, m_block, m_ids);
736 while (inState.optDelta() > mergedDelta) {
737 ITRACE(3, "Inserting sink point on edge B{} -> B{}\n",
738 inBlock.from->id(), m_block->id());
739 m_ret.points[pair.first].insert(
740 std::make_pair(inState.popRef(),
741 SinkPoint(insertId, incVal, false)));
745 // Only put the merged state in the result if it provides useful
746 // information.
747 assert(mergedState.optCount() >= 0);
748 if (mergedState.realCount || mergedState.fromLoad) {
749 retState.values[pair.first] = mergedState;
753 ITRACE(3, "returning state:\n");
754 Indent _i;
755 FTRACE(3, "{}", show(retState));
756 return retState;
759 /* If control flow after something like a CheckType rejoins, we need to make
760 * sure we only reference values that dominate the join. In the example
761 * below, t4 is not defined in B3 and therefore cannot be used in B4:
763 * B1:
764 * t4:Obj = CheckType<Obj> t3:Cell -> B3
765 * fallthrough to B2
767 * B2:
768 * IncRef t4:Obj
769 * Jmp B4
771 * B3:
772 * IncRef t3:Cell
773 * Jmp B4
775 * B2's incoming state will have a mapping from t3 -> t4, and B3 will have no
776 * mapping for t3. We need to look at all the incoming mappings and pick the
777 * most specific value defined in all incoming paths (t3 in this case).
779 CanonMap mergeCanons(const IncomingStateVec& states) const {
780 CanonMap retMap;
782 auto passthrough = [](SSATmp*& val) -> void {
783 val = val->inst()->getPassthroughValue();
786 // First, build a map from canonical values to the values they map to and
787 // how many incoming branches have the mapped value available.
788 jit::flat_map<SSATmp*, jit::flat_map<SSATmp*, int>> mergedCanon;
790 for (auto const& inState : states) {
791 for (auto const& pair : inState.state.canon) {
792 for (auto* val = pair.second; val->inst()->isPassthrough();
793 passthrough(val)) {
794 ++mergedCanon[pair.first][val];
799 for (auto const& pair : mergedCanon) {
800 auto* trueRoot = pair.first;
801 // Build a list of versions of this value that exist in all incoming
802 // branches.
803 jit::flat_set<SSATmp*> tmps;
804 for (auto const& countPair : pair.second) {
805 if (countPair.first != trueRoot && countPair.second == states.size()) {
806 tmps.insert(countPair.first);
810 if (tmps.empty()) {
811 // None of the derived values exist, so use trueRoot. This is
812 // equivalent to not having an entry in the map.
813 continue;
814 } else if (tmps.size() == 1) {
815 // Only one derived value exists in all incoming branches, so use that
816 // value.
817 retMap[trueRoot] = *tmps.begin();
818 continue;
821 // We found more than one value coming in from all branches, so find the
822 // most derived one. This is the one temp that doesn't appear as an
823 // ancestor of any of the others.
824 jit::flat_set<SSATmp*> ancestors;
825 for (auto* val : tmps) {
826 // trueRoot shouldn't be in the set
827 assert(val->inst()->isPassthrough());
828 passthrough(val);
830 // For each ancestor of val, if it's not yet in ancestors, add it and
831 // continue. If it is in ancestors we've already visited it and all of
832 // its ancestors, so do nothing more.
833 for (; val->inst()->isPassthrough(); passthrough(val)) {
834 auto it = ancestors.find(val);
835 if (it == ancestors.end()) {
836 ancestors.insert(val);
837 } else {
838 break;
843 // There should now be exactly one value in tmps that isn't
844 // ancestors. This is the value to use in the merge map. The sets are
845 // sorted, so walk them in parallel to find the discrepancy.
846 assert(ancestors.size() == tmps.size() - 1);
847 SSATmp* mostDerived = nullptr;
849 for (auto tIt = tmps.begin(), aIt = ancestors.begin(); ; ++tIt, ++aIt) {
850 assert(tIt != tmps.end());
851 if (aIt == ancestors.end() || *tIt != *aIt) {
852 mostDerived = *tIt;
853 break;
856 retMap[trueRoot] = mostDerived;
859 return retMap;
862 void processInst() {
863 ITRACE(3, "processing instruction id {:>3}: {}\n",
864 m_ids.before(m_inst), *m_inst);
865 Indent _i;
867 auto const nSrcs = m_inst->numSrcs();
868 m_frameState.setMarker(m_inst->marker());
870 if (auto* taken = m_inst->taken()) {
871 // If an instruction's branch is ever taken, none of its sources will be
872 // consumed. This means we should propagate state across this edge before
873 // consuming any values and modifying the current state.
874 ITRACE(3, "propagating B{}'s state to taken - B{}\n",
875 m_block->id(), taken->id());
876 Indent _i;
877 FTRACE(3, "{}", show(m_state));
878 auto& takenStates = m_savedStates[taken];
879 takenStates.emplace_back(m_block, m_state);
881 // Since we can't insert sink points after the branch but before the
882 // instruction consumes its sources, we have to insert them before the
883 // whole instruction. This happens after we've already propagated the
884 // state to taken, so we keep a pointer to the propagated state so we can
885 // update it when needed.
886 m_takenState = &takenStates.back().state;
887 } else {
888 m_takenState = nullptr;
891 if (m_inst->is(IncRef, IncRefCtx, TakeRef)) {
892 auto* src = m_inst->src(0);
894 // We only consider an IncRef optimizable if it's not an IncRefCtx/TakeRef
895 // and the value doesn't have an optimized count of 0. This prevents any
896 // subsequent instructions from taking in a source with count 0.
897 if (src->type().maybeCounted()) {
898 auto& valState = m_state.values[canonical(src)];
899 if (valState.optCount() > 0 && m_inst->is(IncRef)) {
900 auto const id = m_ids.before(m_inst);
901 m_state.values[canonical(src)].pushInc(id);
902 } else {
903 // Even if the IncRef isn't optimizable, it still needs to be tracked
904 // as a live reference.
905 ++valState.realCount;
907 ITRACE(3, "{}\n", show(m_state.values[canonical(src)]));
909 } else if (m_inst->is(TakeStack)) {
910 // TakeStack is used to indicate that we've logically popped a value off
911 // the stack, in place of a LdStack.
912 auto* src = m_inst->src(0);
913 if (src->type().maybeCounted()) {
914 m_state.values[canonical(src)].fromLoad = true;
916 } else if (m_inst->is(Jmp)) {
917 // Completely resolve the deficit for any sources to Jmp. This isn't a
918 // perfect solution but it's vastly simpler than other options and
919 // probably good enough for our purposes.
920 for (uint32_t i = 0; i < nSrcs; ++i) {
921 resolveValue(m_inst->src(i));
923 } else if (m_inst == &m_block->back() && m_block->isExit() &&
924 // Make sure it's not a RetCtrl from Ret{C,V}
925 (!m_inst->is(RetCtrl) ||
926 m_inst->extra<RetCtrlData>()->suspendingResumed) &&
927 // The EndCatch in Function{Suspend,Return}Hook's catch block is
928 // special: it happens after locals and $this have been
929 // decreffed, so we don't want to do the normal cleanup
930 !(m_inst->is(EndCatch) &&
931 m_block->preds().front().inst()
932 ->is(FunctionSuspendHook, FunctionReturnHook) &&
933 !m_block->preds().front().inst()
934 ->extra<RetCtrlData>()->suspendingResumed)) {
935 // When leaving a trace, we need to account for all live references in
936 // locals and $this pointers.
937 consumeAllLocals();
938 consumeAllFrames();
939 } else if (m_inst->is(GenericRetDecRefs, NativeImpl)) {
940 consumeCurrentLocals();
941 } else if (m_inst->is(CreateCont, CreateAFWH)) {
942 consumeInputs();
943 consumeCurrentLocals();
944 auto frame = m_inst->src(0)->inst();
945 consumeFrame(m_state.frames.live.at(frame));
946 defineOutputs();
947 } else if (m_inst->is(DecRefLoc)) {
948 consumeLocal(m_inst->extra<DecRefLoc>()->locId);
949 } else if (m_inst->is(DecRefThis)) {
950 // This only happens during a RetC, and it happens instead of a normal
951 // DecRef on $this.
952 auto frame = m_inst->src(0)->inst();
953 consumeFrame(m_state.frames.live.at(frame));
954 } else {
955 // All other instructions take the generic path.
956 consumeInputs();
957 defineOutputs();
960 m_frameState.update(m_inst);
963 /* jit::canonical() traces through passthrough instructions to get the root
964 * of a value. Since we're tracking the state of inlined frames in the trace,
965 * there are often cases where the root value for a LdThis is really a value
966 * up in some enclosing frame. */
967 SSATmp* canonical(SSATmp* value) {
968 auto* root = jit::canonical(value);
969 auto* inst = root->inst();
970 if (!inst->is(LdThis)) return root;
972 auto* fpInst = inst->src(0)->inst();
973 auto it = m_state.frames.live.find(fpInst);
974 if (it == m_state.frames.live.end()) {
975 it = m_state.frames.dead.find(fpInst);
976 assert(it != m_state.frames.dead.end());
978 return it->second.mainThis;
982 * Consumes all local values, including those in callers if we're inlined.
984 void consumeAllLocals() {
985 ITRACE(3, "consuming all locals\n");
986 Indent _i;
987 m_frameState.forEachLocal(
988 [&](uint32_t id, SSATmp* value) {
989 if (value) consumeValue(value);
995 * Consumes all locals in the current frame, not including inline callers.
997 void consumeCurrentLocals() {
998 ITRACE(3, "consume current frame locals\n");
999 Indent _i;
1000 for (uint32_t i = 0, n = m_frameState.func()->numLocals(); i < n; ++i) {
1001 if (auto value = m_frameState.localValue(i)) {
1002 consumeValue(value);
1007 void consumeInputs() {
1008 if (m_inst->is(DecRef, DecRefNZ)) {
1009 auto* src = m_inst->src(0);
1011 if (src->type().notCounted()) return;
1012 src = canonical(src);
1014 auto const& valState = m_state.values[src];
1015 // Record DecRefs that won't go to 0
1016 assert(IMPLIES(m_inst->is(DecRefNZ), valState.realCount > 1));
1017 if (valState.realCount > 1) {
1018 m_ret.decRefs[m_ids.before(m_inst)] = valState.realCount;
1020 } else if (m_inst->is(SpillFrame)) {
1021 auto* this_ = m_inst->src(2);
1022 if (this_->isA(Type::Obj)) {
1023 m_state.frames.pushPreLive(Frame(canonical(this_), this_));
1024 // When spilling an Object to a pre-live ActRec, we can reliably track
1025 // the reference in the frame. This allows us to optimize away many
1026 // refcounting operations on $this in inlined calls.
1027 ITRACE(3, "{}({}) becoming $this in an ActRec; not consuming "
1028 "reference\n",
1029 *this_, *canonical(this_));
1030 Indent _i;
1032 auto& valState = m_state.values[canonical(this_)];
1033 if (valState.realCount == 0) {
1034 assert(valState.fromLoad);
1035 ++valState.realCount;
1037 ITRACE(3, " after: {}\n", show(valState));
1038 return;
1040 m_state.frames.pushPreLive();
1041 } else if (m_inst->is(DefInlineFP)) {
1042 m_state.frames.pushInline(m_inst);
1043 } else if (m_inst->is(InlineReturn)) {
1044 FTRACE(3, "{}", show(m_state));
1045 m_state.frames.popInline(m_inst->src(0)->inst());
1046 } else if (m_inst->is(RetAdjustStack)) {
1047 m_state.frames.pop();
1048 } else if (m_inst->is(Call, CallArray)) {
1049 resolveAllFrames();
1050 callPreLiveFrame();
1051 } else if (m_inst->is(ContEnter)) {
1052 resolveAllFrames();
1053 } else if (m_inst->is(CallBuiltin)) {
1054 const auto callee = m_inst->extra<CallBuiltinData>()->callee->fullName();
1055 if (callee->isame(s_get_defined_vars.get()) ||
1056 callee->isame(s_get_defined_vars_SystemLib.get())) {
1057 observeLocalRefs();
1059 } else if (m_inst->is(InterpOne, InterpOneCF)) {
1060 // InterpOne can push and pop ActRecs.
1061 auto const op = m_inst->extra<InterpOneData>()->opcode;
1062 if (jit::getInstrInfo(op).type == jit::InstrFlags::OutFDesc) {
1063 m_state.frames.pushPreLive();
1064 } else if (isFCallStar(op)) {
1065 resolveAllFrames();
1066 callPreLiveFrame();
1067 } else if (op == OpContEnter) {
1068 resolveAllFrames();
1069 } else if (op == OpFCallBuiltin) {
1070 // This isn't optimal, but InterpOne of FCallBuiltin is rare enough
1071 // that it doesn't matter.
1072 observeLocalRefs();
1075 if (m_inst->is(InterpOneCF)) {
1076 // InterpOneCF's normal path leaves the trace, so consume all live
1077 // references.
1078 consumeAllLocals();
1079 consumeAllFrames();
1083 ITRACE(3, "consuming normal inputs\n");
1084 for (uint32_t i = 0; i < m_inst->numSrcs(); ++i) {
1085 Indent _i;
1086 auto src = m_inst->src(i);
1088 if (src->isA(Type::StkPtr) && src->inst()->is(SpillFrame) &&
1089 !m_inst->is(DefInlineFP,
1090 Call,
1091 CallArray,
1092 ContEnter)) {
1093 bool const beginInlDefSP = m_inst->is(ReDefSP) &&
1094 m_inst->src(1)->inst()->is(DefInlineFP);
1095 if (!beginInlDefSP) {
1096 // If the StkPtr being consumed points to a pre-live ActRec, observe
1097 // its $this pointer since many of our helper functions decref it.
1098 auto this_ = src->inst()->src(2);
1099 if (this_->isA(Type::Obj)) {
1100 auto const sinkPoint = SinkPoint(m_ids.before(m_inst), this_,
1101 false);
1102 this_ = canonical(this_);
1103 observeValue(this_, m_state.values[this_], sinkPoint);
1108 if (src->type().notCounted()) continue;
1110 auto& valState = m_state.values[canonical(src)];
1111 const SinkPoint sp(m_ids.before(m_inst), src, false);
1112 assert(IMPLIES(valState.optCount() == 0, valState.realCount == 0));
1113 if (m_inst->consumesReference(i)) {
1114 consumeValue(canonical(src), valState, sp);
1118 // Many instructions have complicated effects on the state of the
1119 // locals. Get this information from FrameState.
1120 ITRACE(3, "getting local effects from FrameState\n");
1121 Indent _i;
1122 m_frameState.getLocalEffects(m_inst, *this);
1126 * Process a normal call to a prelive frame. Consume $this, if any, and pop
1127 * the prelive frame.
1129 void callPreLiveFrame() {
1130 // If we have no prelive frames the FPush* was in a different trace.
1131 if (m_state.frames.preLive.empty()) return;
1133 consumeFrame(m_state.frames.preLive.back());
1134 m_state.frames.preLive.pop_back();
1137 template<typename L>
1138 void forEachFrame(L body) {
1139 for (auto& pair : m_state.frames.live) body(pair.second);
1140 for (auto& frame : m_state.frames.preLive) body(frame);
1144 * Call, CallArray, and ContEnter destroy all live values. They also might
1145 * throw and we can't attach catch traces to them. Resolve the opt delta for
1146 * any live $this pointers and forget the "current" this pointer so we don't
1147 * reuse it after the call.
1149 void resolveAllFrames() {
1150 forEachFrame([this](Frame& f) {
1151 if (f.currentThis) {
1152 resolveValue(f.currentThis);
1153 f.currentThis = nullptr;
1154 } else if (f.mainThis) {
1155 resolveValueEraseOnly(f.mainThis);
1161 * Consume the $this pointer for a Frame, if it has one. If currentThis is
1162 * nullptr but mainThis exists we use an "erase-only" sink point, which means
1163 * it's not safe to actually sink the IncRef but it's safe to erase the
1164 * IncRef/DecRef pair (usually because the value has been smashed by a Call).
1166 void consumeFrame(Frame& f) {
1167 if (f.currentThis) {
1168 consumeValue(f.currentThis);
1169 } else if (f.mainThis) {
1170 consumeValueEraseOnly(f.mainThis);
1175 * When we leave a trace, we have to account for all the references we're
1176 * tracking in pre-live frames and inlined frames.
1178 void consumeAllFrames() {
1179 forEachFrame([this](Frame& f) {
1180 consumeFrame(f);
1184 /* When an instruction consumes a reference to a value, two things in our
1185 * tracked state must be updated. We first ensure that the behavior of DecRef
1186 * and COW will not be affected by the difference between the real count and
1187 * the optimized count. This is done by inserting as many IncRef sink points
1188 * as needed before the instruction. Second, the real count of the value is
1189 * decremented to reflect the consumption of the reference. If there is no
1190 * reference to consume, this is either a bug in this analysis pass or the
1191 * incoming IR, so abort with a (hopefully) helpful error. */
1192 void consumeValueImpl(SSATmp* value, bool eraseOnly) {
1193 assert(value);
1194 if (value->type().notCounted()) return;
1196 auto* root = canonical(value);
1197 consumeValue(root, m_state.values[root],
1198 SinkPoint(m_ids.before(m_inst), value, eraseOnly));
1200 void consumeValue(SSATmp* value) { consumeValueImpl(value, false); }
1201 void consumeValueEraseOnly(SSATmp* value) { consumeValueImpl(value, true); }
1203 void consumeValue(SSATmp* value, Value& valState, SinkPoint sinkPoint) {
1204 ITRACE(3, "consuming value {}\n", *value->inst());
1205 assert(value == canonical(value));
1206 Indent _i;
1208 ITRACE(3, "before: {}\n", show(valState));
1209 assert(valState.realCount >= 0);
1210 assert(valState.optCount() >= 0);
1212 assertCanConsume(value);
1214 // Note that we're treating consumers and observers the same here, which is
1215 // necessary until we have better alias analysis.
1216 observeValue(value, valState, sinkPoint);
1217 if (valState.realCount) --valState.realCount;
1219 ITRACE(3, " after: {}\n", show(valState));
1222 /* DecRef and COW both care about the same thing: is _count == 1 coming into
1223 * the operation? */
1224 struct ConsumeObserver {
1225 bool operator()(int count) { return count == 1; }
1227 /* When deciding if something is a user-visible reference, we need to know if
1228 * _count >= 2. */
1229 struct RefObserver {
1230 bool operator()(int count) { return count >= 2; }
1233 template<typename T = ConsumeObserver>
1234 void observeValue(SSATmp* value, Value& valState, SinkPoint sp,
1235 T pred = T()) {
1236 while (pred(valState.realCount) != pred(valState.optCount())) {
1237 placeSinkPoint(value, valState, sp);
1241 void placeSinkPoint(SSATmp* value, Value& valState, SinkPoint sp) {
1242 m_ret.points[value].insert(std::make_pair(valState.popRef(), sp));
1243 ITRACE(3, "placing IncRef at id {}\n", sp.id);
1245 // If the current instruction has a taken branch, we need to tell the
1246 // propagated state that we used one of the pending IncRefs.
1247 if (m_takenState && sp.id == m_ids.before(m_inst)) {
1248 ITRACE(3, "updating taken state for {}\n", *value);
1249 m_takenState->values[value].popRef();
1253 void assertCanConsumeImpl(SSATmp* value, bool checkConsume) {
1254 auto const& valState = m_state.values[value];
1255 auto showState = [&](const std::string& what) {
1256 std::string ret;
1257 ret += folly::format("'{}' wants to consume {} but {}\n",
1258 *m_inst, *value, what).str();
1259 ret += show(m_state);
1260 ret += folly::format("{:-^80}\n{}{:-^80}\n",
1261 " unit ", m_unit.toString(), "").str();
1262 return ret;
1265 if (valState.realCount == 0) {
1266 auto showFailure = [&] {
1267 return showState("it has no unconsumed references");
1270 // This is ok as long as the value came from a load (see the Value struct
1271 // for why).
1272 always_assert_log((valState.fromLoad && valState.optCount() == 0),
1273 showFailure);
1274 } else if (checkConsume) {
1275 auto showFailure = [&] {
1276 return showState(folly::format("it has an optCount of {}\n",
1277 valState.optCount()).str());
1279 always_assert_log(valState.optCount() >= 1, showFailure);
1283 void assertHasUnconsumedReference(SSATmp* value) {
1284 assertCanConsumeImpl(value, false);
1287 void assertCanConsume(SSATmp* value) {
1288 assertCanConsumeImpl(value, true);
1291 void observeLocalRefs() {
1292 // If any locals are RefDatas, the behavior of get_defined_vars can
1293 // depend on whether or not the count is >= 2.
1294 m_frameState.forEachLocal(
1295 [&](uint32_t id, SSATmp* value) {
1296 if (!value) return;
1298 auto const sp = SinkPoint(m_ids.before(m_inst), value, false);
1299 value = canonical(value);
1300 observeValue(value, m_state.values[value], sp, RefObserver());
1305 void consumeLocal(Point id) {
1306 if (auto* val = m_frameState.localValue(id)) {
1307 consumeValue(val);
1311 /* Completely resolve the delta between realCount and optCount. */
1312 void resolveValueImpl(SSATmp* const origVal, bool eraseOnly) {
1313 assert(origVal);
1315 if (origVal->type().notCounted()) return;
1316 auto* value = canonical(origVal);
1318 ITRACE(3, "resolving value {}\n", *value->inst());
1319 Indent _i;
1321 auto& valState = m_state.values[value];
1322 if (!valState.optDelta()) return;
1324 const SinkPoint sp(m_ids.before(m_inst), origVal, eraseOnly);
1325 while (valState.optDelta()) {
1326 placeSinkPoint(value, valState, sp);
1329 void resolveValue(SSATmp* val) { resolveValueImpl(val, false); }
1330 void resolveValueEraseOnly(SSATmp* val) { resolveValueImpl(val, true); }
1332 /* Remember that oldVal has been replace by newVal, either because of a
1333 * passthrough instruction or something from FrameState. */
1334 void replaceValue(SSATmp* oldVal, SSATmp* newVal) {
1335 ITRACE(3, "replacing {} with {}\n", *oldVal, *newVal);
1337 assert(canonical(oldVal) == canonical(newVal) &&
1338 oldVal != newVal);
1339 m_state.canon[canonical(newVal)] = newVal;
1342 void defineOutputs() {
1343 if (m_inst->is(LdLoc)) {
1344 // LdLoc's output is the new value of the local we loaded, and this has
1345 // already been tracked in setLocalValue().
1346 return;
1347 } else if (m_inst->is(LdThis)) {
1348 if (!m_inst->marker().func()->mayHaveThis()) {
1349 // If this function can't have a $this pointer everything after the
1350 // LdThis is unreachable. More importantly, we aren't going to decref
1351 // the non-existent $this pointer in RetC, so don't track a reference
1352 // to it.
1353 return;
1356 auto* fpInst = m_inst->src(0)->inst();
1357 assert(m_state.frames.live.count(fpInst));
1358 auto& frame = m_state.frames.live[fpInst];
1359 frame.currentThis = m_inst->dst();
1360 if (frame.mainThis == nullptr) {
1361 // Nobody has tried to load $this in the current frame yet. Set it as
1362 // the main $this for the frame and indicate that we're tracking a
1363 // reference to it.
1364 frame.mainThis = m_inst->dst();
1365 m_state.values[frame.mainThis].realCount = 1;
1367 return;
1370 if (m_inst->isPassthrough()) {
1371 assert(m_inst->numDsts() == 1);
1372 auto* dst = m_inst->dst();
1373 auto* src = m_inst->src(0);
1374 if (dst->type().maybeCounted()) {
1375 replaceValue(src, dst);
1378 if (m_inst->is(CheckType, AssertType)) {
1379 // If we're doing CheckType or AssertType from a counted type to an
1380 // uncounted type, we need to do something with the pending references
1381 // that won't be consumed now that the type is uncounted. Since we now
1382 // know that for its entire lifetime, the value has been an uncounted
1383 // type, we consume a live reference and drop all pending IncRefs on
1384 // the floor. This will result in any IncRefs of this value being sunk
1385 // to the CheckType's taken branch, and erased completely from the path
1386 // falling through the CheckType.
1387 if (dst->type().notCounted() && src->type().maybeCounted()) {
1388 auto* src = canonical(dst);
1389 auto& valState = m_state.values[src];
1391 ITRACE(3, "consuming reference to {}: {} and dropping "
1392 "opt delta of {}\n",
1393 *src, show(valState), valState.optDelta());
1394 if (valState.realCount) --valState.realCount;
1395 while (valState.optDelta()) valState.popRef();
1396 } else if (dst->type().maybeCounted() && src->type().notCounted()) {
1397 // Going from notCounted to maybeCounted. This sounds silly, but it's
1398 // possible so we have to handle it "correctly". Treat it like a
1399 // load instruction.
1400 ITRACE(3, "producing fake reference to {}\n", *dst);
1401 m_state.values[canonical(dst)].fromLoad = true;
1405 return;
1408 // Define any values produced by the instruction.
1409 for (uint32_t i = 0; i < m_inst->numDsts(); ++i) {
1410 auto dst = m_inst->dst(i);
1412 if (dst->type().notCounted()) continue;
1414 if (m_inst->producesReference(i) || m_inst->isRawLoad()) {
1415 assert(!m_state.values.count(dst));
1416 ITRACE(3, "defining value {}\n", *m_inst);
1417 Indent _i;
1419 auto& state = m_state.values[dst];
1420 if (m_inst->producesReference(i)) state.realCount = 1;
1421 if (m_inst->isRawLoad()) state.fromLoad = true;
1423 ITRACE(3, "{}\n", show(state));
1428 ///// LocalStateHook overrides /////
1429 void setLocalValue(uint32_t id, SSATmp* newVal) override {
1430 // TrackLoc is only used for the dests of labels, and those references are
1431 // handled in mergeStates.
1432 if (m_inst->is(TrackLoc)) {
1433 assert(newVal->inst()->is(DefLabel));
1434 return;
1437 // When a local's value is updated by StLoc(NT), the consumption of the old
1438 // value should've been visible to us, so we ignore that here.
1439 if (!m_inst->is(StLoc, StLocNT)) {
1440 ITRACE(3, "consuming local {} for setLocalValue\n", id);
1441 Indent _i;
1442 consumeLocal(id);
1445 if (newVal && newVal->type().maybeCounted()) {
1446 ITRACE(3, "{} is now in a local, adding tracked reference\n",
1447 *newVal);
1448 Indent _i;
1449 auto& valState = m_state.values[canonical(newVal)];
1450 ++valState.realCount;
1451 if (m_inst->is(LdLoc)) valState.fromLoad = true;
1452 ITRACE(3, " after: {}\n", show(valState));
1456 void refineLocalValue(uint32_t id, unsigned inlineIdx,
1457 SSATmp* oldVal, SSATmp* newVal) override {
1458 if (oldVal->type().maybeCounted() && newVal->type().notCounted()) {
1459 assert(newVal->inst()->is(CheckType, AssertType));
1460 assert(newVal->inst()->src(0) == oldVal);
1461 // Similar to what we do when processing the CheckType directly, we
1462 // "consume" the value on behalf of the CheckType.
1463 oldVal = canonical(oldVal);
1464 auto& valState = m_state.values[oldVal];
1465 ITRACE(2, "'consuming' reference to {} for refineLocalValue\n",
1466 *oldVal->inst());
1467 if (valState.realCount) --valState.realCount;
1471 void setLocalType(uint32_t id, Type) override {
1472 ITRACE(3, "consuming local {} for setLocalType\n", id);
1473 Indent _i;
1474 consumeLocal(id);
1477 void killLocalForCall(uint32_t id, unsigned inlineIdx,
1478 SSATmp* value) override {
1479 ITRACE(3, "consuming local {} at inline level {} for killLocalForCall\n",
1480 id, inlineIdx);
1481 Indent _i;
1482 consumeValue(value);
1485 void updateLocalRefValue(uint32_t id, unsigned,
1486 SSATmp* oldVal, SSATmp* newVal) override {
1487 ITRACE(3, "replacing {} with {} in local {} for updateLocalRefValue\n",
1488 *oldVal, *newVal, id);
1489 replaceValue(oldVal, newVal);
1492 /* The IRUnit being processed and its blocks */
1493 const IRUnit& m_unit;
1494 const BlockList& m_blocks;
1496 /* Current block and current instruction */
1497 const Block* m_block;
1498 const IRInstruction* m_inst;
1500 /* Map between linear ids and IRInstruction* */
1501 const IdMap& m_ids;
1503 /* Current state of the world */
1504 State m_state;
1506 /* Sometimes present pointer to state propagated to m_inst's taken
1507 * branch. See consumeValue() for usage. */
1508 State* m_takenState;
1509 SavedStates m_savedStates;
1511 /* Analysis results to be returned to caller */
1512 SinkPointsMap m_ret;
1514 /* Used to track local state and other information about the trace */
1515 FrameState m_frameState;
1518 ////////// Refcount validation pass //////////
1519 typedef jit::hash_map<SSATmp*, double> TmpDelta;
1520 typedef jit::hash_map<Block*, TmpDelta> BlockMap;
1523 * Simulate the effect of block b on refcounts of SSATmps.
1525 void updateCounts(Block* b, TmpDelta& delta) {
1526 for (auto& inst: *b) {
1527 if (inst.is(IncRef, DecRef, DecRefNZ)) {
1528 SSATmp* src = canonical(inst.src(0));
1529 if (src->type().notCounted()) continue;
1531 if (inst.is(IncRef)) {
1532 delta[src]++;
1533 } else {
1534 delta[src]--;
1541 * Populate each block in BlockMap with a map of the refcount delta for each
1542 * SSATmp. Ideally, we want to maintain the map for every path through the
1543 * trace. However, to avoid the exponential cost, we use a linear-time
1544 * approximation here using an idea similar to Random Interpretation (Gulwani
1545 * and Necula, POPL 2003). The logic for phi-nodes in the approximiation
1546 * considers each predecessor equally likely, and uses the average as the
1547 * incoming ref-count for the phi-node.
1549 void getRefDeltas(IRUnit& unit, BlockMap& map) {
1550 auto blocks = rpoSortCfg(unit);
1551 for (auto* block : blocks) {
1552 TmpDelta& delta = map[block];
1553 int numPreds = block->numPreds();
1554 block->forEachPred([&](Block* from) {
1555 TmpDelta& predMap = map[from];
1556 for (auto it = predMap.begin(), end = predMap.end(); it != end; ) {
1557 SSATmp* src = it->first;
1558 delta[src] += it->second / numPreds;
1559 ++it;
1562 updateCounts(block, delta);
1566 const double deltaThreshold = 1E-15;
1569 * Validate that orig and opt have the same ref-count deltas for SSATmps in
1570 * each exit block.
1572 bool validateDeltas(BlockMap& orig, BlockMap& opt) {
1573 for (auto it = opt.begin(), end = opt.end(); it != end; ++it) {
1574 if (!it->first->isExit()) {
1575 continue;
1577 TmpDelta& deltaOpt = it->second;
1578 TmpDelta& deltaOrig = orig[it->first];
1579 for (auto it2 = deltaOpt.begin(), end2 = deltaOpt.end(); it2 != end2;
1580 it2++) {
1581 SSATmp* src = it2->first;
1582 double delta = deltaOrig[src];
1583 if (fabs(delta - it2->second) > deltaThreshold) {
1584 if (src->inst()->is(LdThis)) {
1585 // The optimization does some nontrivial state tracking to keep track
1586 // of $this pointers, so this is probably a false positive.
1587 FTRACE(1, "possible ");
1589 FTRACE(1, "refcount mismatch in {}: orig: {} opt: {}, block {}\n",
1590 src->toString(),
1591 deltaOrig[src],
1592 it2->second,
1593 it->first->id());
1594 return src->inst()->is(LdThis);
1598 return true;
1601 std::string show(const SinkPointsMap& sinkPoints, const IdMap& ids) DEBUG_ONLY;
1602 std::string show(const SinkPointsMap& sinkPoints, const IdMap& ids) {
1603 std::string ret;
1605 typedef std::pair<SSATmp*, SinkPoints> SinkPair;
1606 jit::vector<SinkPair> sortedPoints(
1607 sinkPoints.points.begin(), sinkPoints.points.end());
1608 std::sort(sortedPoints.begin(), sortedPoints.end(),
1609 [&](const SinkPair& a, const SinkPair& b) {
1610 return ids.before(a.first->inst()) < ids.before(b.first->inst());
1613 for (auto const& pair : sortedPoints) {
1614 auto* value = pair.first;
1615 auto& points = pair.second;
1616 ret += folly::format(" {}\n", *value->inst()).str();
1618 const IncSet* lastIncs = nullptr;
1619 for (auto const& point : points) {
1620 auto* thisIncs = &point.first;
1621 if (!lastIncs || *lastIncs != *thisIncs) {
1622 ret += folly::format(" {}\n", show(*thisIncs, ids)).str();
1623 lastIncs = thisIncs;
1625 ret += folly::format(" {} {} (using {}){}\n",
1626 (point.second.id % 2) ? "after " : "before",
1627 *ids.inst(point.second.id),
1628 *point.second.value,
1629 point.second.eraseOnly ? ", erase only" : "").str();
1632 ret += '\n';
1635 typedef std::pair<Point, int> DecRefPair;
1636 jit::vector<DecRefPair> sortedDecs(
1637 sinkPoints.decRefs.begin(), sinkPoints.decRefs.end());
1638 std::sort(sortedDecs.begin(), sortedDecs.end(),
1639 [](const DecRefPair& a, const DecRefPair& b) {
1640 return a.first < b.first;
1642 for (auto const& pair : sortedDecs) {
1643 ret += folly::format(" {} has incoming count >= {}\n",
1644 *ids.inst(pair.first), pair.second).str();
1647 return ret;
1650 /* Using the information from SinkPointAnalyzer, sink IncRefs when and where
1651 * appropriate. This pass only removes and inserts IncRef instructions. */
1652 void sinkIncRefs(IRUnit& unit, const SinkPointsMap& info, const IdMap& ids) {
1653 ITRACE(3, "optimizing refcounts\n");
1654 Indent _i;
1656 for (auto const& pair : info.points) {
1657 auto& incRefs = pair.second;
1658 ITRACE(3, "looking at value {}, with {} IncRefs\n",
1659 *pair.first, incRefs.size());
1660 Indent _i;
1661 auto it = incRefs.begin();
1662 while (it != incRefs.end()) {
1663 auto const& incs = it->first;
1664 auto const range = incRefs.equal_range(incs);
1665 auto const end = range.second;
1666 ITRACE(3, "processing {}\n", show(incs, ids));
1667 Indent _i;
1669 // Eval.HHIRRefcountOptsAlwaysSink allows us to stress test the sinking
1670 // logic.
1671 auto doSink = RuntimeOption::EvalHHIRRefcountOptsAlwaysSink;
1672 if (it == end) {
1673 // There are no sink points for this IncRef. This can only happen if
1674 // the value started out as a maybeCounted type and was refined to a
1675 // notCounted type by an AssertType (if it was refined by a CheckType
1676 // there should be a sink point on the taken branch). In this case
1677 // we'll intentionally delete the IncRef down below without sinking any
1678 // copies anywhere.
1679 doSink = true;
1682 // For each sink point of this IncRef, check if doing the sink would
1683 // allow us to optimize it. If we find no optimizable sink points, don't
1684 // bother sinking.
1685 for (; it != end; ++it) {
1686 auto const& sp = it->second;
1687 if (sp.eraseOnly) {
1688 // An "erase only" sink point means we've proven that it's safe to
1689 // sink the IncRef to this point but we don't have access to the
1690 // value, probably because the SSATmp was killed by a Call. For now
1691 // we refuse to optimize this case, but it's possible to use this
1692 // sink point as long as we've proven that doing so will eliminte the
1693 // Inc/Dec pair.
1694 ITRACE(2, "found erase-only SinkPoint; not optimizing\n");
1695 doSink = false;
1696 break;
1699 if (IdMap::isAfter(sp.id)) continue;
1701 auto canOptimize = [&](IRInstruction* inst) {
1702 // If inst is a DecRef that won't go to zero of the same value as the
1703 // IncRef, we're good.
1704 if (info.decRefs.count(ids.before(inst)) &&
1705 canonical(sp.value) == canonical(inst->src(0))) {
1706 return true;
1709 // If the current type of the value is more specific than the
1710 // original source of the IncRef, sink it to get a possibly cheaper
1711 // IncRef.
1712 if (sp.value->type() < ids.inst(*incs.begin())->src(0)->type()) {
1713 return true;
1716 return false;
1719 if (canOptimize(ids.inst(sp.id))) {
1720 doSink = true;
1724 if (!doSink) {
1725 ITRACE(3, "no optimizable sink points\n");
1726 it = end;
1727 continue;
1730 ITRACE(3, "sinking {}\n", show(incs, ids));
1731 Indent __i;
1732 for (auto id : incs) {
1733 auto* incRef = ids.inst(id);
1734 incRef->block()->erase(incRef);
1737 for (it = range.first; it != end; ++it) {
1738 auto const& point = it->second;
1739 auto destId = point.id;
1740 auto* destInst = ids.inst(destId);
1741 auto* destBlock = destInst->block();
1742 auto destIt = destBlock->iteratorTo(destInst);
1743 if (IdMap::isAfter(point.id)) ++destIt;
1745 auto* newInc = unit.gen(IncRef, destInst->marker(), point.value);
1746 ITRACE(2, "inserting {} {} {}\n",
1747 *newInc,
1748 IdMap::isBefore(point.id) ? "before" : "after",
1749 *destInst);
1750 destBlock->insert(destIt, newInc);
1754 ITRACE(2, "\n");
1757 /* If an IncRef of a value is followed immediately by a DecRef of the same
1758 * value that is guaranteed to not destroy the value, erase both
1759 * instructions. */
1760 void eliminateRefcounts(IRUnit& unit, const SinkPointsMap& info,
1761 const IdMap& ids) {
1762 for (auto const& pair : info.decRefs) {
1763 auto* decRef = ids.inst(pair.first);
1764 auto* decBlock = decRef->block();
1765 auto decIt = decBlock->iteratorTo(decRef);
1766 if (decIt == decBlock->begin()) continue;
1768 auto incIt = decIt;
1769 --incIt;
1770 auto* incRef = &*incIt;
1771 if (!incRef->is(IncRef) ||
1772 canonical(incRef->src(0)) != canonical(decRef->src(0))) {
1773 // We can't erase an IncRef/DecRef pair but we can turn the DecRef into
1774 // the cheaper DecRefNZ.
1775 ITRACE(2, "changing {} into DecRefNZ\n", *decRef);
1776 decRef->setOpcode(DecRefNZ);
1777 continue;
1780 ITRACE(2, "erasing {} and {}\n", *incIt, *decIt);
1781 incRef->convertToNop();
1782 decRef->convertToNop();
1784 ITRACE(2, "\n");
1787 /* After this pass completes, we don't need the TakeStack/TakeRef instructions
1788 * anymore. This pass converts them to Nop, and dce removes them. */
1789 void eliminateTakes(const BlockList& blocks) {
1790 for (auto b : blocks) {
1791 for (auto& inst : *b) {
1792 if (inst.is(TakeStack, TakeRef)) {
1793 inst.convertToNop();
1801 /* optimizeRefcounts attempts to remove IncRef/DecRef pairs when we can prove
1802 * that doing so won't alter the visible behavior of the program. It does so in
1803 * three phases:
1805 * - Analysis: Each reachable instruction is inspected to determine its effect
1806 * on the refcount of its sources and dests. We keep track of two values for
1807 * each SSATmp: the real count and the optimized count. The real count is a
1808 * lower bound on the refcount of the object, assuming all IncRefs and
1809 * DecRefs are left as is. The optimized count is a lower bound with as many
1810 * IncRefs delayed as possible. When we get to an instruction that consumes a
1811 * reference to one of its sources, we ensure that the delta between
1812 * realCount and optCount won't cause a behavioral difference by logically
1813 * inserting as many IncRefs as needed. This phase doesn't modify the code;
1814 * it just records where the IncRefs may be sunk to.
1816 * - IncRef sinking: Using the result of the previous phase, any IncRefs for
1817 * which sinking would enable optimizations are moved to their sink
1818 * points. Depending on the cfg being optimized, this may change the number
1819 * of IncRef instructions present in the trace. The number of IncRefs
1820 * executed in each control flow path will not be modified.
1822 * - IncRef/DecRef removal: All of the hard work was done in the first two
1823 * passes, leaving a simple peephole optimization. When an IncRef has been
1824 * pushed down to right before the DecRef consuming its reference and that
1825 * DecRef is guaranteed to not destroy the object, we can delete both
1826 * instructions.
1828 * SinkPointAnalyzer verifies that each produced reference is consumed exactly
1829 * once in all paths as it walks through the trace. After the optimization is
1830 * complete, a separate validation pass is run to ensure the net effect on the
1831 * refcount of each object has not changed.
1833 void optimizeRefcounts(IRUnit& unit, FrameState&& fs) {
1834 Timer _t(Timer::optimize_refcountOpts);
1835 FTRACE(2, "vvvvvvvvvv refcount opts vvvvvvvvvv\n");
1836 auto const changed = splitCriticalEdges(unit);
1837 if (changed) {
1838 printUnit(6, unit, "after splitting critical edges for refcount opts");
1841 auto const blocks = rpoSortCfg(unit);
1842 IdMap ids(blocks, unit);
1844 Indent _i;
1845 auto const sinkPoints =
1846 SinkPointAnalyzer(&blocks, &ids, unit, std::move(fs)).find();
1847 ITRACE(2, "Found sink points:\n{}\n", show(sinkPoints, ids));
1849 BlockMap before;
1850 if (RuntimeOption::EvalHHIRValidateRefCount) {
1851 getRefDeltas(unit, before);
1854 sinkIncRefs(unit, sinkPoints, ids);
1855 eliminateRefcounts(unit, sinkPoints, ids);
1856 eliminateTakes(blocks);
1858 if (RuntimeOption::EvalHHIRValidateRefCount) {
1859 BlockMap after;
1860 getRefDeltas(unit, after);
1861 if (!validateDeltas(before, after)) {
1862 printUnit(0, unit, "after refcount optimization");
1863 always_assert(false && "refcount validation failed");
1867 FTRACE(2, "^^^^^^^^^^ refcount opts ^^^^^^^^^^\n");