2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/opt.h"
21 #include "folly/Lazy.h"
22 #include "folly/Optional.h"
23 #include "folly/MapUtil.h"
25 #include "hphp/runtime/vm/jit/containers.h"
26 #include "hphp/runtime/vm/jit/block.h"
27 #include "hphp/runtime/vm/jit/cfg.h"
28 #include "hphp/runtime/vm/jit/frame-state.h"
29 #include "hphp/runtime/vm/jit/ir.h"
30 #include "hphp/runtime/vm/jit/ir-instruction.h"
31 #include "hphp/runtime/vm/jit/ir-unit.h"
32 #include "hphp/runtime/vm/jit/mc-generator.h"
33 #include "hphp/runtime/vm/jit/print.h"
34 #include "hphp/runtime/vm/jit/simplifier.h"
35 #include "hphp/runtime/vm/jit/ssa-tmp.h"
36 #include "hphp/runtime/vm/jit/state-vector.h"
37 #include "hphp/runtime/vm/jit/timer.h"
38 #include "hphp/runtime/vm/jit/translator.h"
39 #include "hphp/util/trace.h"
41 namespace HPHP
{ namespace jit
{
43 TRACE_SET_MOD(hhir_refcount
);
47 // These are used a lot in this file.
48 using HPHP::Trace::Indent
;
49 using HPHP::Trace::indent
;
51 // A Point is an id representing a program point, determined by IdMap.
52 typedef uint32_t Point
;
55 * IdMap stores a linear position id for program points before (even numbers)
56 * and after (odd numbers) each instruction. The distinction is necessary so we
57 * can insert sink points after the last instruction of a block.
60 IdMap(const BlockList
& blocks
, const IRUnit
& unit
)
64 for (auto* block
: blocks
) {
65 for (auto& inst
: *block
) {
68 m_insts
.push_back(&inst
);
73 IRInstruction
* inst(Point id
) const { return m_insts
.at(id
/ 2); }
75 static bool isBefore(Point id
) { return !(id
% 2); }
76 static bool isAfter(Point id
) { return id
% 2; }
78 Point
before(const IRInstruction
* inst
) const { return m_ids
[inst
]; }
79 Point
before(const IRInstruction
& inst
) const { return before(&inst
); }
81 Point
after(const IRInstruction
* inst
) const { return m_ids
[inst
] + 1; }
82 Point
after(const IRInstruction
& inst
) const { return after(&inst
); }
85 StateVector
<IRInstruction
, Point
> m_ids
;
86 jit::vector
<IRInstruction
*> m_insts
;
90 * IncSet holds a set of ids of IncRef instructions, representing a pending
91 * reference in a Value (defined below). We have to use a set of ids instead of
92 * just a single id because it's possible for the same pending reference to
93 * come from different instructions in different control flow paths.
95 typedef jit::flat_set
<Point
> IncSet
;
96 std::string
show(const IncSet
& incs
, const IdMap
& ids
) {
99 for (auto id
: incs
) {
100 folly::toAppend(separator
, ids
.inst(id
)->toString(), &ret
);
107 * Value represents what we know about the current state of a php value with a
108 * refcount. It holds lower bounds on the real refcount and optimized refcount
109 * of an object. As the analysis pass progresses, it uses this information to
110 * ensure that any optimizations will not affect what observers of the refcount
114 explicit Value(int c
= 0, bool load
= false)
119 int optCount() const { return realCount
- optDelta(); }
120 int optDelta() const { return pendingIncs
.size(); }
122 return pendingIncs
.empty() && realCount
== 0 && !fromLoad
;
126 * Merge other's state with this state.
129 * If realCounts do not match, the lesser value must be fromLoad. The idea
130 * is that fromLoad allows us to make more aggressive assumptions, so we
131 * shouldn't have both a higher count and fromLoad.
133 * For an example of where this is OK, consider:
148 * The count of t1 at B2 will be 1 if coming from B0, and 0 if coming from
149 * B1, due to the SpillStack. However, the TakeStack marks t1 as fromLoad,
150 * which lets us know that there's at least one surviving reference, so we
151 * can move forward assuming the max of the two.
154 * 1. realCount becomes the max of the incoming value;
155 * 2. fromLoad is set if either incoming value has it;
156 * 3. pendingIncs is truncated to the size of the smaller of the two, then
157 * the sets at each index are merged. The two can differ when a value's
158 * count is observed in one branch of a conditional but not the other, and
159 * it's safe because any differences are resolved in mergeStates().
161 void merge(const Value
& other
, const IRUnit
& unit
) {
162 auto showFailure
= [&] {
163 return folly::format(
164 "Failed to merge values in unit:\n{}\n{}\n{}\n",
165 show(*this), show(other
), unit
170 IMPLIES(realCount
> other
.realCount
, other
.fromLoad
) &&
171 IMPLIES(realCount
< other
.realCount
, fromLoad
),
174 fromLoad
= fromLoad
|| other
.fromLoad
;
175 realCount
= std::max(realCount
, other
.realCount
);
177 auto minSize
= std::min(pendingIncs
.size(), other
.pendingIncs
.size());
178 pendingIncs
.resize(minSize
);
179 for (unsigned i
= 0; i
< minSize
; ++i
) {
180 for (auto id
: other
.pendingIncs
[i
]) {
181 pendingIncs
[i
].insert(id
);
186 void pushInc(Point id
) {
188 pendingIncs
.emplace_back();
189 pendingIncs
.back().insert(id
);
193 assert(!pendingIncs
.empty());
194 auto incs
= std::move(pendingIncs
.back());
195 pendingIncs
.pop_back();
199 void clearPendingIncs() {
203 /* realCount is the number of live references currently owned by the
204 * jit. Since it represents references with lifetimes we can reliably track,
205 * it's also used as a lower bound of the refcount of the object. */
208 /* fromLoad represents whether or not the value came from a load
209 * instruction. This optimization is based on the idea of producing and
210 * consuming references, and most of our load instructions sometimes produce
211 * a reference and sometimes don't. Rather than splitting up all the loads
212 * into two flavors, we allow consumption of a value from a load even if
213 * there appear to be no live references. */
217 friend std::string
show(const Value
&);
219 /* pendingIncs contains ids of IncRef instructions that have yet to be placed
220 * before an observer. The size of pendingIncs represents the delta between
221 * the real count and the optimized count. */
222 jit::vector
<IncSet
> pendingIncs
;
225 std::string
show(const Value
& state
) {
228 for (auto const& set
: state
.pendingIncs
) {
229 folly::toAppend(sep
, '{', folly::join(", ", set
), '}', &incs
);
233 return folly::format(
234 "(real, opt) = ({:>2}, {:>2}) - [{}]{}",
235 state
.realCount
, state
.optCount(),
237 state
.fromLoad
? " - from load" : "").str();
241 * Frame represents a call frame and is used to track references to the current
245 explicit Frame(SSATmp
* main
= nullptr, SSATmp
* current
= nullptr)
247 , currentThis(current
)
250 /* The canonical form of $this. For the outermost frame of the trace, this
251 * will be nullptr if we haven't loaded $this yet, or a LdThis if we
252 * have. For inlined frames, this will be the canonical SSATmp* for the $this
253 * pointer given to the SpillFrame. */
256 /* Latest live temp holding $this. This is updated whenever we see a new
257 * LdThis in a frame, and it's set to nullptr after any instructions that
258 * kill all live temps (calls, mostly). */
261 bool operator==(const Frame b
) const {
262 return mainThis
== b
.mainThis
&& currentThis
== b
.currentThis
;
266 std::string
show(const Frame frame
) {
267 if (!frame
.mainThis
&& !frame
.currentThis
) {
271 if (frame
.mainThis
) {
272 folly::toAppend("main this: ", frame
.mainThis
->toString(), &ret
);
273 if (frame
.currentThis
) ret
+= " ";
275 if (frame
.currentThis
) {
276 folly::toAppend("current this: ", frame
.currentThis
->toString(), &ret
);
282 * FrameStack contains a Frame struct for all live and pre-live frames.
285 /* Push a possibly empty Frame representing a pre-live ActRec. We don't yet
286 * know if the call with be inlined or not. */
287 void pushPreLive(Frame f
= Frame()) {
288 preLive
.emplace_back(f
);
291 /* Push a new frame representing a newly defined FramePtr for an inlined
293 void pushInline(const IRInstruction
* fpInst
) {
294 assert(!preLive
.empty());
295 assert(fpInst
->is(DefInlineFP
));
296 live
[fpInst
] = std::move(preLive
.back());
300 /* Pop an inlined frame to represent an InlineReturn, forgetting what we know
301 * about its $this pointer. */
302 void popInline(const IRInstruction
* fpInst
) {
303 assert(fpInst
->is(DefInlineFP
));
304 assert(live
.size() >= 2);
305 auto it
= live
.find(fpInst
);
306 assert(it
!= live
.end());
308 assert(dead
.count(fpInst
) == 0);
309 dead
[fpInst
] = std::move(it
->second
);
313 /* Pop a non-inlined frame. This is only called if a trace includes a RetC in
314 * the outermost function. */
316 assert(live
.size() == 1);
317 live
.erase(live
.begin());
320 bool operator==(const FrameStack
& b
) const {
321 return live
== b
.live
&& preLive
== b
.preLive
;
324 bool operator!=(const FrameStack
& b
) const {
325 return !(*this == b
);
328 /* Map from the instruction defining the frame to a Frame object. */
329 jit::flat_map
<const IRInstruction
*, Frame
> live
;
331 /* Similar to live, but for frames that have been popped. We keep track of
332 * these because we often refer to a LdThis from an inlined function after
334 jit::flat_map
<const IRInstruction
*, Frame
> dead
;
336 /* Frames that have been pushed but not activated. */
337 jit::vector
<Frame
> preLive
;
340 typedef jit::flat_map
<SSATmp
*, SSATmp
*> CanonMap
;
343 * State holds all the information we care about during the analysis pass, and
344 * is used to propagate and merge state across control flow edges.
347 /* values maps from live SSATmps to the currently known state about the
349 jit::flat_map
<SSATmp
*, Value
> values
;
351 /* canon keeps track of values that have been through passthrough
352 * instructions, like CheckType and AssertType. */
355 /* frames keeps track of live $this pointers in call frames. */
359 std::string
show(const State
& state
) {
362 for (auto const& val
: state
.values
) {
363 if (val
.second
.empty()) continue;
365 folly::toAppend(indent(), show(val
.second
), " | ",
366 val
.first
->inst()->toString(), '\n', &ret
);
369 if (!state
.frames
.live
.empty()) {
370 folly::toAppend(indent(), "live frames (unordered):\n", &ret
);
372 for (auto const& pair
: state
.frames
.live
) {
373 folly::toAppend(indent(), pair
.first
->toString(), ": ",
374 show(pair
.second
), '\n', &ret
);
378 if (!state
.frames
.preLive
.empty()) {
379 folly::toAppend(indent(), "pre-live frames:\n", &ret
);
381 for (auto const& frame
: state
.frames
.preLive
) {
382 folly::toAppend(indent(), show(frame
), '\n', &ret
);
386 if (!state
.canon
.empty()) {
387 folly::toAppend(indent(), "replaced values:\n", &ret
);
389 for (auto const& pair
: state
.canon
) {
390 ret
+= folly::format("{}canon[{}] = {}\n",
391 indent(), *pair
.first
, *pair
.second
).str();
398 struct IncomingState
{
399 IncomingState(const Block
* f
, const State
& s
)
404 IncomingState(const Block
* f
, State
&& s
)
406 , state(std::move(s
))
412 typedef jit::vector
<IncomingState
> IncomingStateVec
;
413 typedef jit::hash_map
<const Block
*, IncomingStateVec
> SavedStates
;
416 * One SinkPoint exists for each optimizable IncRef in each control flow path
420 SinkPoint(Point i
, SSATmp
* v
, bool erase
)
426 /* The position of the sink point, from IdMap. */
429 /* In some very rare situations we may know that we want to insert a
430 * SinkPoint for a value without having a live SSATmp for that value. This
431 * means it's not safe to sink the IncRef to that point, but if the
432 * instruction immediately after the SinkPoint is a DecRef of the same value,
433 * it's safe to eliminate both instructions. */
436 /* The current version of the value, if it has been through a passthrough
437 * instruction. Keeping track of this allows us to sink an IncRef of a Cell
438 * past a CheckType and turn it into a cheaper IncRef. */
442 typedef jit::flat_multimap
<IncSet
, SinkPoint
> SinkPoints
;
444 struct SinkPointsMap
{
445 // Maps values to SinkPoints for their IncRefs
446 jit::hash_map
<SSATmp
*, SinkPoints
> points
;
448 // Maps ids of DecRef instructions to the incoming lower bound of the object's
449 // refcount. Only DecRefs that cannot go to zero are in this map, so there
450 // will be no entries with a value of less than 2.
451 jit::hash_map
<Point
, int> decRefs
;
455 * We often want to insert sink points on control flow edges. idForEdge returns
456 * the appropriate id to use for the given edge, assuming it is not a critical
459 Point
idForEdge(const Block
* from
, const Block
* to
, const IdMap
& ids
) {
460 auto* next
= from
->next();
461 auto* taken
= from
->taken();
462 assert(next
|| taken
);
464 auto before
= [&](const IRInstruction
& inst
) {
465 ITRACE(6, "id for B{} -> B{} is before {}\n", from
->id(), to
->id(), inst
);
466 return ids
.before(inst
);
468 auto after
= [&](const IRInstruction
& inst
) {
469 ITRACE(6, "id for B{} -> B{} is after {}\n", from
->id(), to
->id(), inst
);
470 return ids
.after(inst
);
474 // from has two outgoing edges. Use the beginning of to.
475 assert(to
->numPreds() == 1 && !to
->empty());
476 auto it
= to
->begin();
477 assert(it
!= to
->end() && !it
->is(DefLabel
, BeginCatch
));
480 // from has one outgoing edges. Use the end of from.
481 assert(!from
->empty());
482 auto it
= from
->end();
484 if (it
->isControlFlow()) {
485 assert(it
->isTerminal());
493 const StaticString
s_get_defined_vars("get_defined_vars"),
494 s_get_defined_vars_SystemLib("__SystemLib\\get_defined_vars");
497 * SinkPointAnalyzer is responsible for inspecting a trace and determining two
498 * things: the latest safe point to sink each IncRef to, and which DecRefs
499 * cannot take their src to 0.
501 struct SinkPointAnalyzer
: private LocalStateHook
{
502 SinkPointAnalyzer(const BlockList
* blocks
, const IdMap
* ids
,
503 IRUnit
& unit
, FrameState
&& frameState
)
509 , m_takenState(nullptr)
510 , m_frameState(std::move(frameState
))
513 SinkPointsMap
find() {
514 assert(!m_blocks
.empty());
515 auto& fpInst
= m_blocks
.front()->front();
516 assert(fpInst
.is(DefFP
));
517 m_state
.frames
.live
[&fpInst
] = Frame();
519 for (auto* block
: m_blocks
) {
520 ITRACE(3, "entering B{}\n", block
->id());
524 m_frameState
.startBlock(block
, block
->front().marker());
526 if (block
!= m_blocks
.front()) {
527 assert(m_savedStates
.count(block
) == 1);
528 m_state
= mergeStates(std::move(m_savedStates
[block
]));
529 m_savedStates
.erase(block
);
532 for (auto& inst
: *block
) {
538 auto* next
= block
->next();
539 auto* taken
= block
->taken();
540 if (!next
&& !taken
) {
541 // This block is terminal. Ensure that all live references have been
543 ITRACE(3, "finishing terminal B{}\n", block
->id());
545 FTRACE(3, "{}", show(m_state
));
547 auto showFailure
= [&]{
549 ret
+= show(*(mcg
->tx().region())) + "\n";
550 ret
+= folly::format("Unconsumed reference(s) leaving B{}\n",
552 ret
+= show(m_state
);
553 ret
+= folly::format("{:-^80}\n{}{:-^80}\n",
554 " unit ", m_unit
.toString(), "").str();
558 // Terminal block. Everything must be resolved.
559 for (auto const& valState
: m_state
.values
) {
560 always_assert_log(valState
.second
.realCount
== 0 &&
561 valState
.second
.optDelta() == 0,
566 // Propagate current state to the next block, if one exists. If we have a
567 // taken branch, that information will be propagated in processInst().
569 ITRACE(3, "propagating B{}'s state to next - B{}\n",
570 m_block
->id(), next
->id());
572 FTRACE(3, "{}", show(m_state
));
573 m_savedStates
[next
].emplace_back(block
, std::move(m_state
));
577 m_frameState
.finishBlock(block
);
580 return std::move(m_ret
);
584 struct IncomingBranch
{
585 IncomingBranch(const Block
* b
, const Value
& v
)
593 struct IncomingValue
{
595 jit::vector
<IncomingBranch
> inBlocks
;
598 State
mergeStates(IncomingStateVec
&& states
) {
599 DEBUG_ONLY
auto doTrace
= [&] {
600 ITRACE(3, "merging {} state(s) into B{}\n", states
.size(), m_block
->id());
601 for (DEBUG_ONLY
auto const& inState
: states
) {
603 ITRACE(3, "from B{}\n", inState
.from
->id());
605 FTRACE(3, "{}", show(inState
.state
));
608 ONTRACE(3, doTrace());
610 assert(!states
.empty());
612 * Short circuit the easy, common case: one incoming state. (This
613 * is just to save JIT time.)
615 * Except if it's a DefLabel---there could still be unconsumed
616 * references that need to be forwarded through the DefLabel (for
617 * example in the case of a control flow diamond where one side
618 * was optimized away because the branch turned into a Nop).
620 if (states
.size() == 1 && !m_block
->front().is(DefLabel
)) {
621 return std::move(states
.front().state
);
624 auto const& firstFrames
= states
.front().state
.frames
;
626 retState
.canon
= mergeCanons(states
);
627 retState
.frames
= firstFrames
;
629 // If the current block begins with a DefLabel, we start by taking
630 // unconsumed references for each dest of the label that produces a
632 if (m_block
->front().is(DefLabel
)) {
633 auto& label
= m_block
->front();
634 auto& refsVec
= m_unit
.labelRefs().at(&label
);
635 always_assert(refsVec
.size() == label
.numDsts());
637 ITRACE(3, "producing refs for dests of {}\n", label
);
639 for (auto i
= 0U, n
= label
.numDsts(); i
< n
; ++i
) {
640 if (label
.dst(i
)->type().notCounted()) continue;
642 auto refs
= refsVec
[i
];
643 if (refs
== 0) continue;
645 ITRACE(3, "dest {} produces {} ref(s)\n", i
, refs
);
648 i
, [&](IRInstruction
* jmp
, SSATmp
* src
) {
649 if (src
->type().notCounted()) return;
651 auto it
= find_if(states
.begin(), states
.end(),
652 [jmp
](const IncomingState
& s
) {
653 return s
.from
== jmp
->block();
655 always_assert(it
!= states
.end());
656 src
= canonical(src
);
657 auto& valState
= it
->state
.values
[src
];
658 always_assert(valState
.optDelta() == 0);
659 ITRACE(3, "consuming refs from {} | {}\n",
660 show(valState
), *src
->inst());
661 if (valState
.realCount
>= refs
) {
662 valState
.realCount
-= refs
;
664 always_assert(valState
.fromLoad
);
665 valState
.realCount
= 0;
669 retState
.values
[label
.dst(i
)].realCount
= refs
;
673 // Now, we build a map from values to their merged incoming state and which
674 // blocks provide information about the value.
675 jit::hash_map
<SSATmp
*, IncomingValue
> mergedValues
;
676 for (auto const& inState
: states
) {
677 if (inState
.state
.frames
!= firstFrames
) {
678 // Task #5216936: add support for merging states with
679 // different FrameStacks, and get rid of the TRACE_PUNT below.
680 if (RuntimeOption::EvalHHIRBytecodeControlFlow
) {
681 TRACE_PUNT("refcount-opts needs support for merging states with "
682 "different FrameStacks");
684 always_assert(false &&
685 "merging states with different FrameStacks is not supported");
688 for (auto const& inPair
: inState
.state
.values
) {
689 if (inPair
.second
.empty()) continue;
691 auto* value
= inPair
.first
;
692 assert(!value
->inst()->isPassthrough());
693 const bool existed
= mergedValues
.count(value
);
694 auto& mergedState
= mergedValues
[value
];
696 mergedState
.value
.merge(inPair
.second
, m_unit
);
698 mergedState
.value
= inPair
.second
;
701 // Register this block as an incoming provider of the value.
702 mergedState
.inBlocks
.emplace_back(inState
.from
, inPair
.second
);
706 // Now, for each incoming value, insert it into the resulting state. If
707 // there is a difference in a value's state between incoming branches,
708 // resolve it by inserting sink points on the appropriate incoming edges.
709 for (auto& pair
: mergedValues
) {
710 auto mergedState
= pair
.second
.value
;
712 // If the value wasn't provided by every incoming branch, we're hosed
713 // unless it was just fromLoad in some branches.
714 if (pair
.second
.inBlocks
.size() < states
.size()) {
715 for (auto& inBlock
: pair
.second
.inBlocks
) {
717 inBlock
.value
.realCount
== 0 && inBlock
.value
.optDelta() == 0,
718 "While merging incoming states for B{}, value {} not provided by"
719 " all preds but has nontrivial state `{}' from B{}."
720 "\n\n{:-^80}\n{}{:-^80}\n",
721 m_block
->id(), *pair
.first
->inst(),
722 show(inBlock
.value
), inBlock
.from
->id(),
729 folly::get_default(retState
.canon
, pair
.first
, pair
.first
);
730 auto const mergedDelta
= mergedState
.optDelta();
731 for (auto& inBlock
: pair
.second
.inBlocks
) {
732 auto& inState
= inBlock
.value
;
733 assert(inState
.optDelta() >= mergedDelta
);
735 Point insertId
= idForEdge(inBlock
.from
, m_block
, m_ids
);
736 while (inState
.optDelta() > mergedDelta
) {
737 ITRACE(3, "Inserting sink point on edge B{} -> B{}\n",
738 inBlock
.from
->id(), m_block
->id());
739 m_ret
.points
[pair
.first
].insert(
740 std::make_pair(inState
.popRef(),
741 SinkPoint(insertId
, incVal
, false)));
745 // Only put the merged state in the result if it provides useful
747 assert(mergedState
.optCount() >= 0);
748 if (mergedState
.realCount
|| mergedState
.fromLoad
) {
749 retState
.values
[pair
.first
] = mergedState
;
753 ITRACE(3, "returning state:\n");
755 FTRACE(3, "{}", show(retState
));
759 /* If control flow after something like a CheckType rejoins, we need to make
760 * sure we only reference values that dominate the join. In the example
761 * below, t4 is not defined in B3 and therefore cannot be used in B4:
764 * t4:Obj = CheckType<Obj> t3:Cell -> B3
775 * B2's incoming state will have a mapping from t3 -> t4, and B3 will have no
776 * mapping for t3. We need to look at all the incoming mappings and pick the
777 * most specific value defined in all incoming paths (t3 in this case).
779 CanonMap
mergeCanons(const IncomingStateVec
& states
) const {
782 auto passthrough
= [](SSATmp
*& val
) -> void {
783 val
= val
->inst()->getPassthroughValue();
786 // First, build a map from canonical values to the values they map to and
787 // how many incoming branches have the mapped value available.
788 jit::flat_map
<SSATmp
*, jit::flat_map
<SSATmp
*, int>> mergedCanon
;
790 for (auto const& inState
: states
) {
791 for (auto const& pair
: inState
.state
.canon
) {
792 for (auto* val
= pair
.second
; val
->inst()->isPassthrough();
794 ++mergedCanon
[pair
.first
][val
];
799 for (auto const& pair
: mergedCanon
) {
800 auto* trueRoot
= pair
.first
;
801 // Build a list of versions of this value that exist in all incoming
803 jit::flat_set
<SSATmp
*> tmps
;
804 for (auto const& countPair
: pair
.second
) {
805 if (countPair
.first
!= trueRoot
&& countPair
.second
== states
.size()) {
806 tmps
.insert(countPair
.first
);
811 // None of the derived values exist, so use trueRoot. This is
812 // equivalent to not having an entry in the map.
814 } else if (tmps
.size() == 1) {
815 // Only one derived value exists in all incoming branches, so use that
817 retMap
[trueRoot
] = *tmps
.begin();
821 // We found more than one value coming in from all branches, so find the
822 // most derived one. This is the one temp that doesn't appear as an
823 // ancestor of any of the others.
824 jit::flat_set
<SSATmp
*> ancestors
;
825 for (auto* val
: tmps
) {
826 // trueRoot shouldn't be in the set
827 assert(val
->inst()->isPassthrough());
830 // For each ancestor of val, if it's not yet in ancestors, add it and
831 // continue. If it is in ancestors we've already visited it and all of
832 // its ancestors, so do nothing more.
833 for (; val
->inst()->isPassthrough(); passthrough(val
)) {
834 auto it
= ancestors
.find(val
);
835 if (it
== ancestors
.end()) {
836 ancestors
.insert(val
);
843 // There should now be exactly one value in tmps that isn't
844 // ancestors. This is the value to use in the merge map. The sets are
845 // sorted, so walk them in parallel to find the discrepancy.
846 assert(ancestors
.size() == tmps
.size() - 1);
847 SSATmp
* mostDerived
= nullptr;
849 for (auto tIt
= tmps
.begin(), aIt
= ancestors
.begin(); ; ++tIt
, ++aIt
) {
850 assert(tIt
!= tmps
.end());
851 if (aIt
== ancestors
.end() || *tIt
!= *aIt
) {
856 retMap
[trueRoot
] = mostDerived
;
863 ITRACE(3, "processing instruction id {:>3}: {}\n",
864 m_ids
.before(m_inst
), *m_inst
);
867 auto const nSrcs
= m_inst
->numSrcs();
868 m_frameState
.setMarker(m_inst
->marker());
870 if (auto* taken
= m_inst
->taken()) {
871 // If an instruction's branch is ever taken, none of its sources will be
872 // consumed. This means we should propagate state across this edge before
873 // consuming any values and modifying the current state.
874 ITRACE(3, "propagating B{}'s state to taken - B{}\n",
875 m_block
->id(), taken
->id());
877 FTRACE(3, "{}", show(m_state
));
878 auto& takenStates
= m_savedStates
[taken
];
879 takenStates
.emplace_back(m_block
, m_state
);
881 // Since we can't insert sink points after the branch but before the
882 // instruction consumes its sources, we have to insert them before the
883 // whole instruction. This happens after we've already propagated the
884 // state to taken, so we keep a pointer to the propagated state so we can
885 // update it when needed.
886 m_takenState
= &takenStates
.back().state
;
888 m_takenState
= nullptr;
891 if (m_inst
->is(IncRef
, IncRefCtx
, TakeRef
)) {
892 auto* src
= m_inst
->src(0);
894 // We only consider an IncRef optimizable if it's not an IncRefCtx/TakeRef
895 // and the value doesn't have an optimized count of 0. This prevents any
896 // subsequent instructions from taking in a source with count 0.
897 if (src
->type().maybeCounted()) {
898 auto& valState
= m_state
.values
[canonical(src
)];
899 if (valState
.optCount() > 0 && m_inst
->is(IncRef
)) {
900 auto const id
= m_ids
.before(m_inst
);
901 m_state
.values
[canonical(src
)].pushInc(id
);
903 // Even if the IncRef isn't optimizable, it still needs to be tracked
904 // as a live reference.
905 ++valState
.realCount
;
907 ITRACE(3, "{}\n", show(m_state
.values
[canonical(src
)]));
909 } else if (m_inst
->is(TakeStack
)) {
910 // TakeStack is used to indicate that we've logically popped a value off
911 // the stack, in place of a LdStack.
912 auto* src
= m_inst
->src(0);
913 if (src
->type().maybeCounted()) {
914 m_state
.values
[canonical(src
)].fromLoad
= true;
916 } else if (m_inst
->is(Jmp
)) {
917 // Completely resolve the deficit for any sources to Jmp. This isn't a
918 // perfect solution but it's vastly simpler than other options and
919 // probably good enough for our purposes.
920 for (uint32_t i
= 0; i
< nSrcs
; ++i
) {
921 resolveValue(m_inst
->src(i
));
923 } else if (m_inst
== &m_block
->back() && m_block
->isExit() &&
924 // Make sure it's not a RetCtrl from Ret{C,V}
925 (!m_inst
->is(RetCtrl
) ||
926 m_inst
->extra
<RetCtrlData
>()->suspendingResumed
) &&
927 // The EndCatch in Function{Suspend,Return}Hook's catch block is
928 // special: it happens after locals and $this have been
929 // decreffed, so we don't want to do the normal cleanup
930 !(m_inst
->is(EndCatch
) &&
931 m_block
->preds().front().inst()
932 ->is(FunctionSuspendHook
, FunctionReturnHook
) &&
933 !m_block
->preds().front().inst()
934 ->extra
<RetCtrlData
>()->suspendingResumed
)) {
935 // When leaving a trace, we need to account for all live references in
936 // locals and $this pointers.
939 } else if (m_inst
->is(GenericRetDecRefs
, NativeImpl
)) {
940 consumeCurrentLocals();
941 } else if (m_inst
->is(CreateCont
, CreateAFWH
)) {
943 consumeCurrentLocals();
944 auto frame
= m_inst
->src(0)->inst();
945 consumeFrame(m_state
.frames
.live
.at(frame
));
947 } else if (m_inst
->is(DecRefLoc
)) {
948 consumeLocal(m_inst
->extra
<DecRefLoc
>()->locId
);
949 } else if (m_inst
->is(DecRefThis
)) {
950 // This only happens during a RetC, and it happens instead of a normal
952 auto frame
= m_inst
->src(0)->inst();
953 consumeFrame(m_state
.frames
.live
.at(frame
));
955 // All other instructions take the generic path.
960 m_frameState
.update(m_inst
);
963 /* jit::canonical() traces through passthrough instructions to get the root
964 * of a value. Since we're tracking the state of inlined frames in the trace,
965 * there are often cases where the root value for a LdThis is really a value
966 * up in some enclosing frame. */
967 SSATmp
* canonical(SSATmp
* value
) {
968 auto* root
= jit::canonical(value
);
969 auto* inst
= root
->inst();
970 if (!inst
->is(LdThis
)) return root
;
972 auto* fpInst
= inst
->src(0)->inst();
973 auto it
= m_state
.frames
.live
.find(fpInst
);
974 if (it
== m_state
.frames
.live
.end()) {
975 it
= m_state
.frames
.dead
.find(fpInst
);
976 assert(it
!= m_state
.frames
.dead
.end());
978 return it
->second
.mainThis
;
982 * Consumes all local values, including those in callers if we're inlined.
984 void consumeAllLocals() {
985 ITRACE(3, "consuming all locals\n");
987 m_frameState
.forEachLocal(
988 [&](uint32_t id
, SSATmp
* value
) {
989 if (value
) consumeValue(value
);
995 * Consumes all locals in the current frame, not including inline callers.
997 void consumeCurrentLocals() {
998 ITRACE(3, "consume current frame locals\n");
1000 for (uint32_t i
= 0, n
= m_frameState
.func()->numLocals(); i
< n
; ++i
) {
1001 if (auto value
= m_frameState
.localValue(i
)) {
1002 consumeValue(value
);
1007 void consumeInputs() {
1008 if (m_inst
->is(DecRef
, DecRefNZ
)) {
1009 auto* src
= m_inst
->src(0);
1011 if (src
->type().notCounted()) return;
1012 src
= canonical(src
);
1014 auto const& valState
= m_state
.values
[src
];
1015 // Record DecRefs that won't go to 0
1016 assert(IMPLIES(m_inst
->is(DecRefNZ
), valState
.realCount
> 1));
1017 if (valState
.realCount
> 1) {
1018 m_ret
.decRefs
[m_ids
.before(m_inst
)] = valState
.realCount
;
1020 } else if (m_inst
->is(SpillFrame
)) {
1021 auto* this_
= m_inst
->src(2);
1022 if (this_
->isA(Type::Obj
)) {
1023 m_state
.frames
.pushPreLive(Frame(canonical(this_
), this_
));
1024 // When spilling an Object to a pre-live ActRec, we can reliably track
1025 // the reference in the frame. This allows us to optimize away many
1026 // refcounting operations on $this in inlined calls.
1027 ITRACE(3, "{}({}) becoming $this in an ActRec; not consuming "
1029 *this_
, *canonical(this_
));
1032 auto& valState
= m_state
.values
[canonical(this_
)];
1033 if (valState
.realCount
== 0) {
1034 assert(valState
.fromLoad
);
1035 ++valState
.realCount
;
1037 ITRACE(3, " after: {}\n", show(valState
));
1040 m_state
.frames
.pushPreLive();
1041 } else if (m_inst
->is(DefInlineFP
)) {
1042 m_state
.frames
.pushInline(m_inst
);
1043 } else if (m_inst
->is(InlineReturn
)) {
1044 FTRACE(3, "{}", show(m_state
));
1045 m_state
.frames
.popInline(m_inst
->src(0)->inst());
1046 } else if (m_inst
->is(RetAdjustStack
)) {
1047 m_state
.frames
.pop();
1048 } else if (m_inst
->is(Call
, CallArray
)) {
1051 } else if (m_inst
->is(ContEnter
)) {
1053 } else if (m_inst
->is(CallBuiltin
)) {
1054 const auto callee
= m_inst
->extra
<CallBuiltinData
>()->callee
->fullName();
1055 if (callee
->isame(s_get_defined_vars
.get()) ||
1056 callee
->isame(s_get_defined_vars_SystemLib
.get())) {
1059 } else if (m_inst
->is(InterpOne
, InterpOneCF
)) {
1060 // InterpOne can push and pop ActRecs.
1061 auto const op
= m_inst
->extra
<InterpOneData
>()->opcode
;
1062 if (jit::getInstrInfo(op
).type
== jit::InstrFlags::OutFDesc
) {
1063 m_state
.frames
.pushPreLive();
1064 } else if (isFCallStar(op
)) {
1067 } else if (op
== OpContEnter
) {
1069 } else if (op
== OpFCallBuiltin
) {
1070 // This isn't optimal, but InterpOne of FCallBuiltin is rare enough
1071 // that it doesn't matter.
1075 if (m_inst
->is(InterpOneCF
)) {
1076 // InterpOneCF's normal path leaves the trace, so consume all live
1083 ITRACE(3, "consuming normal inputs\n");
1084 for (uint32_t i
= 0; i
< m_inst
->numSrcs(); ++i
) {
1086 auto src
= m_inst
->src(i
);
1088 if (src
->isA(Type::StkPtr
) && src
->inst()->is(SpillFrame
) &&
1089 !m_inst
->is(DefInlineFP
,
1093 bool const beginInlDefSP
= m_inst
->is(ReDefSP
) &&
1094 m_inst
->src(1)->inst()->is(DefInlineFP
);
1095 if (!beginInlDefSP
) {
1096 // If the StkPtr being consumed points to a pre-live ActRec, observe
1097 // its $this pointer since many of our helper functions decref it.
1098 auto this_
= src
->inst()->src(2);
1099 if (this_
->isA(Type::Obj
)) {
1100 auto const sinkPoint
= SinkPoint(m_ids
.before(m_inst
), this_
,
1102 this_
= canonical(this_
);
1103 observeValue(this_
, m_state
.values
[this_
], sinkPoint
);
1108 if (src
->type().notCounted()) continue;
1110 auto& valState
= m_state
.values
[canonical(src
)];
1111 const SinkPoint
sp(m_ids
.before(m_inst
), src
, false);
1112 assert(IMPLIES(valState
.optCount() == 0, valState
.realCount
== 0));
1113 if (m_inst
->consumesReference(i
)) {
1114 consumeValue(canonical(src
), valState
, sp
);
1118 // Many instructions have complicated effects on the state of the
1119 // locals. Get this information from FrameState.
1120 ITRACE(3, "getting local effects from FrameState\n");
1122 m_frameState
.getLocalEffects(m_inst
, *this);
1126 * Process a normal call to a prelive frame. Consume $this, if any, and pop
1127 * the prelive frame.
1129 void callPreLiveFrame() {
1130 // If we have no prelive frames the FPush* was in a different trace.
1131 if (m_state
.frames
.preLive
.empty()) return;
1133 consumeFrame(m_state
.frames
.preLive
.back());
1134 m_state
.frames
.preLive
.pop_back();
1137 template<typename L
>
1138 void forEachFrame(L body
) {
1139 for (auto& pair
: m_state
.frames
.live
) body(pair
.second
);
1140 for (auto& frame
: m_state
.frames
.preLive
) body(frame
);
1144 * Call, CallArray, and ContEnter destroy all live values. They also might
1145 * throw and we can't attach catch traces to them. Resolve the opt delta for
1146 * any live $this pointers and forget the "current" this pointer so we don't
1147 * reuse it after the call.
1149 void resolveAllFrames() {
1150 forEachFrame([this](Frame
& f
) {
1151 if (f
.currentThis
) {
1152 resolveValue(f
.currentThis
);
1153 f
.currentThis
= nullptr;
1154 } else if (f
.mainThis
) {
1155 resolveValueEraseOnly(f
.mainThis
);
1161 * Consume the $this pointer for a Frame, if it has one. If currentThis is
1162 * nullptr but mainThis exists we use an "erase-only" sink point, which means
1163 * it's not safe to actually sink the IncRef but it's safe to erase the
1164 * IncRef/DecRef pair (usually because the value has been smashed by a Call).
1166 void consumeFrame(Frame
& f
) {
1167 if (f
.currentThis
) {
1168 consumeValue(f
.currentThis
);
1169 } else if (f
.mainThis
) {
1170 consumeValueEraseOnly(f
.mainThis
);
1175 * When we leave a trace, we have to account for all the references we're
1176 * tracking in pre-live frames and inlined frames.
1178 void consumeAllFrames() {
1179 forEachFrame([this](Frame
& f
) {
1184 /* When an instruction consumes a reference to a value, two things in our
1185 * tracked state must be updated. We first ensure that the behavior of DecRef
1186 * and COW will not be affected by the difference between the real count and
1187 * the optimized count. This is done by inserting as many IncRef sink points
1188 * as needed before the instruction. Second, the real count of the value is
1189 * decremented to reflect the consumption of the reference. If there is no
1190 * reference to consume, this is either a bug in this analysis pass or the
1191 * incoming IR, so abort with a (hopefully) helpful error. */
1192 void consumeValueImpl(SSATmp
* value
, bool eraseOnly
) {
1194 if (value
->type().notCounted()) return;
1196 auto* root
= canonical(value
);
1197 consumeValue(root
, m_state
.values
[root
],
1198 SinkPoint(m_ids
.before(m_inst
), value
, eraseOnly
));
1200 void consumeValue(SSATmp
* value
) { consumeValueImpl(value
, false); }
1201 void consumeValueEraseOnly(SSATmp
* value
) { consumeValueImpl(value
, true); }
1203 void consumeValue(SSATmp
* value
, Value
& valState
, SinkPoint sinkPoint
) {
1204 ITRACE(3, "consuming value {}\n", *value
->inst());
1205 assert(value
== canonical(value
));
1208 ITRACE(3, "before: {}\n", show(valState
));
1209 assert(valState
.realCount
>= 0);
1210 assert(valState
.optCount() >= 0);
1212 assertCanConsume(value
);
1214 // Note that we're treating consumers and observers the same here, which is
1215 // necessary until we have better alias analysis.
1216 observeValue(value
, valState
, sinkPoint
);
1217 if (valState
.realCount
) --valState
.realCount
;
1219 ITRACE(3, " after: {}\n", show(valState
));
1222 /* DecRef and COW both care about the same thing: is _count == 1 coming into
1224 struct ConsumeObserver
{
1225 bool operator()(int count
) { return count
== 1; }
1227 /* When deciding if something is a user-visible reference, we need to know if
1229 struct RefObserver
{
1230 bool operator()(int count
) { return count
>= 2; }
1233 template<typename T
= ConsumeObserver
>
1234 void observeValue(SSATmp
* value
, Value
& valState
, SinkPoint sp
,
1236 while (pred(valState
.realCount
) != pred(valState
.optCount())) {
1237 placeSinkPoint(value
, valState
, sp
);
1241 void placeSinkPoint(SSATmp
* value
, Value
& valState
, SinkPoint sp
) {
1242 m_ret
.points
[value
].insert(std::make_pair(valState
.popRef(), sp
));
1243 ITRACE(3, "placing IncRef at id {}\n", sp
.id
);
1245 // If the current instruction has a taken branch, we need to tell the
1246 // propagated state that we used one of the pending IncRefs.
1247 if (m_takenState
&& sp
.id
== m_ids
.before(m_inst
)) {
1248 ITRACE(3, "updating taken state for {}\n", *value
);
1249 m_takenState
->values
[value
].popRef();
1253 void assertCanConsumeImpl(SSATmp
* value
, bool checkConsume
) {
1254 auto const& valState
= m_state
.values
[value
];
1255 auto showState
= [&](const std::string
& what
) {
1257 ret
+= folly::format("'{}' wants to consume {} but {}\n",
1258 *m_inst
, *value
, what
).str();
1259 ret
+= show(m_state
);
1260 ret
+= folly::format("{:-^80}\n{}{:-^80}\n",
1261 " unit ", m_unit
.toString(), "").str();
1265 if (valState
.realCount
== 0) {
1266 auto showFailure
= [&] {
1267 return showState("it has no unconsumed references");
1270 // This is ok as long as the value came from a load (see the Value struct
1272 always_assert_log((valState
.fromLoad
&& valState
.optCount() == 0),
1274 } else if (checkConsume
) {
1275 auto showFailure
= [&] {
1276 return showState(folly::format("it has an optCount of {}\n",
1277 valState
.optCount()).str());
1279 always_assert_log(valState
.optCount() >= 1, showFailure
);
1283 void assertHasUnconsumedReference(SSATmp
* value
) {
1284 assertCanConsumeImpl(value
, false);
1287 void assertCanConsume(SSATmp
* value
) {
1288 assertCanConsumeImpl(value
, true);
1291 void observeLocalRefs() {
1292 // If any locals are RefDatas, the behavior of get_defined_vars can
1293 // depend on whether or not the count is >= 2.
1294 m_frameState
.forEachLocal(
1295 [&](uint32_t id
, SSATmp
* value
) {
1298 auto const sp
= SinkPoint(m_ids
.before(m_inst
), value
, false);
1299 value
= canonical(value
);
1300 observeValue(value
, m_state
.values
[value
], sp
, RefObserver());
1305 void consumeLocal(Point id
) {
1306 if (auto* val
= m_frameState
.localValue(id
)) {
1311 /* Completely resolve the delta between realCount and optCount. */
1312 void resolveValueImpl(SSATmp
* const origVal
, bool eraseOnly
) {
1315 if (origVal
->type().notCounted()) return;
1316 auto* value
= canonical(origVal
);
1318 ITRACE(3, "resolving value {}\n", *value
->inst());
1321 auto& valState
= m_state
.values
[value
];
1322 if (!valState
.optDelta()) return;
1324 const SinkPoint
sp(m_ids
.before(m_inst
), origVal
, eraseOnly
);
1325 while (valState
.optDelta()) {
1326 placeSinkPoint(value
, valState
, sp
);
1329 void resolveValue(SSATmp
* val
) { resolveValueImpl(val
, false); }
1330 void resolveValueEraseOnly(SSATmp
* val
) { resolveValueImpl(val
, true); }
1332 /* Remember that oldVal has been replace by newVal, either because of a
1333 * passthrough instruction or something from FrameState. */
1334 void replaceValue(SSATmp
* oldVal
, SSATmp
* newVal
) {
1335 ITRACE(3, "replacing {} with {}\n", *oldVal
, *newVal
);
1337 assert(canonical(oldVal
) == canonical(newVal
) &&
1339 m_state
.canon
[canonical(newVal
)] = newVal
;
1342 void defineOutputs() {
1343 if (m_inst
->is(LdLoc
)) {
1344 // LdLoc's output is the new value of the local we loaded, and this has
1345 // already been tracked in setLocalValue().
1347 } else if (m_inst
->is(LdThis
)) {
1348 if (!m_inst
->marker().func()->mayHaveThis()) {
1349 // If this function can't have a $this pointer everything after the
1350 // LdThis is unreachable. More importantly, we aren't going to decref
1351 // the non-existent $this pointer in RetC, so don't track a reference
1356 auto* fpInst
= m_inst
->src(0)->inst();
1357 assert(m_state
.frames
.live
.count(fpInst
));
1358 auto& frame
= m_state
.frames
.live
[fpInst
];
1359 frame
.currentThis
= m_inst
->dst();
1360 if (frame
.mainThis
== nullptr) {
1361 // Nobody has tried to load $this in the current frame yet. Set it as
1362 // the main $this for the frame and indicate that we're tracking a
1364 frame
.mainThis
= m_inst
->dst();
1365 m_state
.values
[frame
.mainThis
].realCount
= 1;
1370 if (m_inst
->isPassthrough()) {
1371 assert(m_inst
->numDsts() == 1);
1372 auto* dst
= m_inst
->dst();
1373 auto* src
= m_inst
->src(0);
1374 if (dst
->type().maybeCounted()) {
1375 replaceValue(src
, dst
);
1378 if (m_inst
->is(CheckType
, AssertType
)) {
1379 // If we're doing CheckType or AssertType from a counted type to an
1380 // uncounted type, we need to do something with the pending references
1381 // that won't be consumed now that the type is uncounted. Since we now
1382 // know that for its entire lifetime, the value has been an uncounted
1383 // type, we consume a live reference and drop all pending IncRefs on
1384 // the floor. This will result in any IncRefs of this value being sunk
1385 // to the CheckType's taken branch, and erased completely from the path
1386 // falling through the CheckType.
1387 if (dst
->type().notCounted() && src
->type().maybeCounted()) {
1388 auto* src
= canonical(dst
);
1389 auto& valState
= m_state
.values
[src
];
1391 ITRACE(3, "consuming reference to {}: {} and dropping "
1392 "opt delta of {}\n",
1393 *src
, show(valState
), valState
.optDelta());
1394 if (valState
.realCount
) --valState
.realCount
;
1395 while (valState
.optDelta()) valState
.popRef();
1396 } else if (dst
->type().maybeCounted() && src
->type().notCounted()) {
1397 // Going from notCounted to maybeCounted. This sounds silly, but it's
1398 // possible so we have to handle it "correctly". Treat it like a
1399 // load instruction.
1400 ITRACE(3, "producing fake reference to {}\n", *dst
);
1401 m_state
.values
[canonical(dst
)].fromLoad
= true;
1408 // Define any values produced by the instruction.
1409 for (uint32_t i
= 0; i
< m_inst
->numDsts(); ++i
) {
1410 auto dst
= m_inst
->dst(i
);
1412 if (dst
->type().notCounted()) continue;
1414 if (m_inst
->producesReference(i
) || m_inst
->isRawLoad()) {
1415 assert(!m_state
.values
.count(dst
));
1416 ITRACE(3, "defining value {}\n", *m_inst
);
1419 auto& state
= m_state
.values
[dst
];
1420 if (m_inst
->producesReference(i
)) state
.realCount
= 1;
1421 if (m_inst
->isRawLoad()) state
.fromLoad
= true;
1423 ITRACE(3, "{}\n", show(state
));
1428 ///// LocalStateHook overrides /////
1429 void setLocalValue(uint32_t id
, SSATmp
* newVal
) override
{
1430 // TrackLoc is only used for the dests of labels, and those references are
1431 // handled in mergeStates.
1432 if (m_inst
->is(TrackLoc
)) {
1433 assert(newVal
->inst()->is(DefLabel
));
1437 // When a local's value is updated by StLoc(NT), the consumption of the old
1438 // value should've been visible to us, so we ignore that here.
1439 if (!m_inst
->is(StLoc
, StLocNT
)) {
1440 ITRACE(3, "consuming local {} for setLocalValue\n", id
);
1445 if (newVal
&& newVal
->type().maybeCounted()) {
1446 ITRACE(3, "{} is now in a local, adding tracked reference\n",
1449 auto& valState
= m_state
.values
[canonical(newVal
)];
1450 ++valState
.realCount
;
1451 if (m_inst
->is(LdLoc
)) valState
.fromLoad
= true;
1452 ITRACE(3, " after: {}\n", show(valState
));
1456 void refineLocalValue(uint32_t id
, unsigned inlineIdx
,
1457 SSATmp
* oldVal
, SSATmp
* newVal
) override
{
1458 if (oldVal
->type().maybeCounted() && newVal
->type().notCounted()) {
1459 assert(newVal
->inst()->is(CheckType
, AssertType
));
1460 assert(newVal
->inst()->src(0) == oldVal
);
1461 // Similar to what we do when processing the CheckType directly, we
1462 // "consume" the value on behalf of the CheckType.
1463 oldVal
= canonical(oldVal
);
1464 auto& valState
= m_state
.values
[oldVal
];
1465 ITRACE(2, "'consuming' reference to {} for refineLocalValue\n",
1467 if (valState
.realCount
) --valState
.realCount
;
1471 void setLocalType(uint32_t id
, Type
) override
{
1472 ITRACE(3, "consuming local {} for setLocalType\n", id
);
1477 void killLocalForCall(uint32_t id
, unsigned inlineIdx
,
1478 SSATmp
* value
) override
{
1479 ITRACE(3, "consuming local {} at inline level {} for killLocalForCall\n",
1482 consumeValue(value
);
1485 void updateLocalRefValue(uint32_t id
, unsigned,
1486 SSATmp
* oldVal
, SSATmp
* newVal
) override
{
1487 ITRACE(3, "replacing {} with {} in local {} for updateLocalRefValue\n",
1488 *oldVal
, *newVal
, id
);
1489 replaceValue(oldVal
, newVal
);
1492 /* The IRUnit being processed and its blocks */
1493 const IRUnit
& m_unit
;
1494 const BlockList
& m_blocks
;
1496 /* Current block and current instruction */
1497 const Block
* m_block
;
1498 const IRInstruction
* m_inst
;
1500 /* Map between linear ids and IRInstruction* */
1503 /* Current state of the world */
1506 /* Sometimes present pointer to state propagated to m_inst's taken
1507 * branch. See consumeValue() for usage. */
1508 State
* m_takenState
;
1509 SavedStates m_savedStates
;
1511 /* Analysis results to be returned to caller */
1512 SinkPointsMap m_ret
;
1514 /* Used to track local state and other information about the trace */
1515 FrameState m_frameState
;
1518 ////////// Refcount validation pass //////////
1519 typedef jit::hash_map
<SSATmp
*, double> TmpDelta
;
1520 typedef jit::hash_map
<Block
*, TmpDelta
> BlockMap
;
1523 * Simulate the effect of block b on refcounts of SSATmps.
1525 void updateCounts(Block
* b
, TmpDelta
& delta
) {
1526 for (auto& inst
: *b
) {
1527 if (inst
.is(IncRef
, DecRef
, DecRefNZ
)) {
1528 SSATmp
* src
= canonical(inst
.src(0));
1529 if (src
->type().notCounted()) continue;
1531 if (inst
.is(IncRef
)) {
1541 * Populate each block in BlockMap with a map of the refcount delta for each
1542 * SSATmp. Ideally, we want to maintain the map for every path through the
1543 * trace. However, to avoid the exponential cost, we use a linear-time
1544 * approximation here using an idea similar to Random Interpretation (Gulwani
1545 * and Necula, POPL 2003). The logic for phi-nodes in the approximiation
1546 * considers each predecessor equally likely, and uses the average as the
1547 * incoming ref-count for the phi-node.
1549 void getRefDeltas(IRUnit
& unit
, BlockMap
& map
) {
1550 auto blocks
= rpoSortCfg(unit
);
1551 for (auto* block
: blocks
) {
1552 TmpDelta
& delta
= map
[block
];
1553 int numPreds
= block
->numPreds();
1554 block
->forEachPred([&](Block
* from
) {
1555 TmpDelta
& predMap
= map
[from
];
1556 for (auto it
= predMap
.begin(), end
= predMap
.end(); it
!= end
; ) {
1557 SSATmp
* src
= it
->first
;
1558 delta
[src
] += it
->second
/ numPreds
;
1562 updateCounts(block
, delta
);
1566 const double deltaThreshold
= 1E-15;
1569 * Validate that orig and opt have the same ref-count deltas for SSATmps in
1572 bool validateDeltas(BlockMap
& orig
, BlockMap
& opt
) {
1573 for (auto it
= opt
.begin(), end
= opt
.end(); it
!= end
; ++it
) {
1574 if (!it
->first
->isExit()) {
1577 TmpDelta
& deltaOpt
= it
->second
;
1578 TmpDelta
& deltaOrig
= orig
[it
->first
];
1579 for (auto it2
= deltaOpt
.begin(), end2
= deltaOpt
.end(); it2
!= end2
;
1581 SSATmp
* src
= it2
->first
;
1582 double delta
= deltaOrig
[src
];
1583 if (fabs(delta
- it2
->second
) > deltaThreshold
) {
1584 if (src
->inst()->is(LdThis
)) {
1585 // The optimization does some nontrivial state tracking to keep track
1586 // of $this pointers, so this is probably a false positive.
1587 FTRACE(1, "possible ");
1589 FTRACE(1, "refcount mismatch in {}: orig: {} opt: {}, block {}\n",
1594 return src
->inst()->is(LdThis
);
1601 std::string
show(const SinkPointsMap
& sinkPoints
, const IdMap
& ids
) DEBUG_ONLY
;
1602 std::string
show(const SinkPointsMap
& sinkPoints
, const IdMap
& ids
) {
1605 typedef std::pair
<SSATmp
*, SinkPoints
> SinkPair
;
1606 jit::vector
<SinkPair
> sortedPoints(
1607 sinkPoints
.points
.begin(), sinkPoints
.points
.end());
1608 std::sort(sortedPoints
.begin(), sortedPoints
.end(),
1609 [&](const SinkPair
& a
, const SinkPair
& b
) {
1610 return ids
.before(a
.first
->inst()) < ids
.before(b
.first
->inst());
1613 for (auto const& pair
: sortedPoints
) {
1614 auto* value
= pair
.first
;
1615 auto& points
= pair
.second
;
1616 ret
+= folly::format(" {}\n", *value
->inst()).str();
1618 const IncSet
* lastIncs
= nullptr;
1619 for (auto const& point
: points
) {
1620 auto* thisIncs
= &point
.first
;
1621 if (!lastIncs
|| *lastIncs
!= *thisIncs
) {
1622 ret
+= folly::format(" {}\n", show(*thisIncs
, ids
)).str();
1623 lastIncs
= thisIncs
;
1625 ret
+= folly::format(" {} {} (using {}){}\n",
1626 (point
.second
.id
% 2) ? "after " : "before",
1627 *ids
.inst(point
.second
.id
),
1628 *point
.second
.value
,
1629 point
.second
.eraseOnly
? ", erase only" : "").str();
1635 typedef std::pair
<Point
, int> DecRefPair
;
1636 jit::vector
<DecRefPair
> sortedDecs(
1637 sinkPoints
.decRefs
.begin(), sinkPoints
.decRefs
.end());
1638 std::sort(sortedDecs
.begin(), sortedDecs
.end(),
1639 [](const DecRefPair
& a
, const DecRefPair
& b
) {
1640 return a
.first
< b
.first
;
1642 for (auto const& pair
: sortedDecs
) {
1643 ret
+= folly::format(" {} has incoming count >= {}\n",
1644 *ids
.inst(pair
.first
), pair
.second
).str();
1650 /* Using the information from SinkPointAnalyzer, sink IncRefs when and where
1651 * appropriate. This pass only removes and inserts IncRef instructions. */
1652 void sinkIncRefs(IRUnit
& unit
, const SinkPointsMap
& info
, const IdMap
& ids
) {
1653 ITRACE(3, "optimizing refcounts\n");
1656 for (auto const& pair
: info
.points
) {
1657 auto& incRefs
= pair
.second
;
1658 ITRACE(3, "looking at value {}, with {} IncRefs\n",
1659 *pair
.first
, incRefs
.size());
1661 auto it
= incRefs
.begin();
1662 while (it
!= incRefs
.end()) {
1663 auto const& incs
= it
->first
;
1664 auto const range
= incRefs
.equal_range(incs
);
1665 auto const end
= range
.second
;
1666 ITRACE(3, "processing {}\n", show(incs
, ids
));
1669 // Eval.HHIRRefcountOptsAlwaysSink allows us to stress test the sinking
1671 auto doSink
= RuntimeOption::EvalHHIRRefcountOptsAlwaysSink
;
1673 // There are no sink points for this IncRef. This can only happen if
1674 // the value started out as a maybeCounted type and was refined to a
1675 // notCounted type by an AssertType (if it was refined by a CheckType
1676 // there should be a sink point on the taken branch). In this case
1677 // we'll intentionally delete the IncRef down below without sinking any
1682 // For each sink point of this IncRef, check if doing the sink would
1683 // allow us to optimize it. If we find no optimizable sink points, don't
1685 for (; it
!= end
; ++it
) {
1686 auto const& sp
= it
->second
;
1688 // An "erase only" sink point means we've proven that it's safe to
1689 // sink the IncRef to this point but we don't have access to the
1690 // value, probably because the SSATmp was killed by a Call. For now
1691 // we refuse to optimize this case, but it's possible to use this
1692 // sink point as long as we've proven that doing so will eliminte the
1694 ITRACE(2, "found erase-only SinkPoint; not optimizing\n");
1699 if (IdMap::isAfter(sp
.id
)) continue;
1701 auto canOptimize
= [&](IRInstruction
* inst
) {
1702 // If inst is a DecRef that won't go to zero of the same value as the
1703 // IncRef, we're good.
1704 if (info
.decRefs
.count(ids
.before(inst
)) &&
1705 canonical(sp
.value
) == canonical(inst
->src(0))) {
1709 // If the current type of the value is more specific than the
1710 // original source of the IncRef, sink it to get a possibly cheaper
1712 if (sp
.value
->type() < ids
.inst(*incs
.begin())->src(0)->type()) {
1719 if (canOptimize(ids
.inst(sp
.id
))) {
1725 ITRACE(3, "no optimizable sink points\n");
1730 ITRACE(3, "sinking {}\n", show(incs
, ids
));
1732 for (auto id
: incs
) {
1733 auto* incRef
= ids
.inst(id
);
1734 incRef
->block()->erase(incRef
);
1737 for (it
= range
.first
; it
!= end
; ++it
) {
1738 auto const& point
= it
->second
;
1739 auto destId
= point
.id
;
1740 auto* destInst
= ids
.inst(destId
);
1741 auto* destBlock
= destInst
->block();
1742 auto destIt
= destBlock
->iteratorTo(destInst
);
1743 if (IdMap::isAfter(point
.id
)) ++destIt
;
1745 auto* newInc
= unit
.gen(IncRef
, destInst
->marker(), point
.value
);
1746 ITRACE(2, "inserting {} {} {}\n",
1748 IdMap::isBefore(point
.id
) ? "before" : "after",
1750 destBlock
->insert(destIt
, newInc
);
1757 /* If an IncRef of a value is followed immediately by a DecRef of the same
1758 * value that is guaranteed to not destroy the value, erase both
1760 void eliminateRefcounts(IRUnit
& unit
, const SinkPointsMap
& info
,
1762 for (auto const& pair
: info
.decRefs
) {
1763 auto* decRef
= ids
.inst(pair
.first
);
1764 auto* decBlock
= decRef
->block();
1765 auto decIt
= decBlock
->iteratorTo(decRef
);
1766 if (decIt
== decBlock
->begin()) continue;
1770 auto* incRef
= &*incIt
;
1771 if (!incRef
->is(IncRef
) ||
1772 canonical(incRef
->src(0)) != canonical(decRef
->src(0))) {
1773 // We can't erase an IncRef/DecRef pair but we can turn the DecRef into
1774 // the cheaper DecRefNZ.
1775 ITRACE(2, "changing {} into DecRefNZ\n", *decRef
);
1776 decRef
->setOpcode(DecRefNZ
);
1780 ITRACE(2, "erasing {} and {}\n", *incIt
, *decIt
);
1781 incRef
->convertToNop();
1782 decRef
->convertToNop();
1787 /* After this pass completes, we don't need the TakeStack/TakeRef instructions
1788 * anymore. This pass converts them to Nop, and dce removes them. */
1789 void eliminateTakes(const BlockList
& blocks
) {
1790 for (auto b
: blocks
) {
1791 for (auto& inst
: *b
) {
1792 if (inst
.is(TakeStack
, TakeRef
)) {
1793 inst
.convertToNop();
1801 /* optimizeRefcounts attempts to remove IncRef/DecRef pairs when we can prove
1802 * that doing so won't alter the visible behavior of the program. It does so in
1805 * - Analysis: Each reachable instruction is inspected to determine its effect
1806 * on the refcount of its sources and dests. We keep track of two values for
1807 * each SSATmp: the real count and the optimized count. The real count is a
1808 * lower bound on the refcount of the object, assuming all IncRefs and
1809 * DecRefs are left as is. The optimized count is a lower bound with as many
1810 * IncRefs delayed as possible. When we get to an instruction that consumes a
1811 * reference to one of its sources, we ensure that the delta between
1812 * realCount and optCount won't cause a behavioral difference by logically
1813 * inserting as many IncRefs as needed. This phase doesn't modify the code;
1814 * it just records where the IncRefs may be sunk to.
1816 * - IncRef sinking: Using the result of the previous phase, any IncRefs for
1817 * which sinking would enable optimizations are moved to their sink
1818 * points. Depending on the cfg being optimized, this may change the number
1819 * of IncRef instructions present in the trace. The number of IncRefs
1820 * executed in each control flow path will not be modified.
1822 * - IncRef/DecRef removal: All of the hard work was done in the first two
1823 * passes, leaving a simple peephole optimization. When an IncRef has been
1824 * pushed down to right before the DecRef consuming its reference and that
1825 * DecRef is guaranteed to not destroy the object, we can delete both
1828 * SinkPointAnalyzer verifies that each produced reference is consumed exactly
1829 * once in all paths as it walks through the trace. After the optimization is
1830 * complete, a separate validation pass is run to ensure the net effect on the
1831 * refcount of each object has not changed.
1833 void optimizeRefcounts(IRUnit
& unit
, FrameState
&& fs
) {
1834 Timer
_t(Timer::optimize_refcountOpts
);
1835 FTRACE(2, "vvvvvvvvvv refcount opts vvvvvvvvvv\n");
1836 auto const changed
= splitCriticalEdges(unit
);
1838 printUnit(6, unit
, "after splitting critical edges for refcount opts");
1841 auto const blocks
= rpoSortCfg(unit
);
1842 IdMap
ids(blocks
, unit
);
1845 auto const sinkPoints
=
1846 SinkPointAnalyzer(&blocks
, &ids
, unit
, std::move(fs
)).find();
1847 ITRACE(2, "Found sink points:\n{}\n", show(sinkPoints
, ids
));
1850 if (RuntimeOption::EvalHHIRValidateRefCount
) {
1851 getRefDeltas(unit
, before
);
1854 sinkIncRefs(unit
, sinkPoints
, ids
);
1855 eliminateRefcounts(unit
, sinkPoints
, ids
);
1856 eliminateTakes(blocks
);
1858 if (RuntimeOption::EvalHHIRValidateRefCount
) {
1860 getRefDeltas(unit
, after
);
1861 if (!validateDeltas(before
, after
)) {
1862 printUnit(0, unit
, "after refcount optimization");
1863 always_assert(false && "refcount validation failed");
1867 FTRACE(2, "^^^^^^^^^^ refcount opts ^^^^^^^^^^\n");