2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/opt.h"
18 #include "hphp/runtime/vm/jit/analysis.h"
19 #include "hphp/runtime/vm/jit/block.h"
20 #include "hphp/runtime/vm/jit/cfg.h"
21 #include "hphp/runtime/vm/jit/containers.h"
22 #include "hphp/runtime/vm/jit/ir-instruction.h"
23 #include "hphp/runtime/vm/jit/ssa-tmp.h"
24 #include "hphp/runtime/vm/jit/state-vector.h"
25 #include "hphp/runtime/vm/jit/pass-tracer.h"
26 #include "hphp/runtime/vm/jit/timer.h"
28 #include "hphp/util/dataflow-worklist.h"
31 #include <folly/small_vector.h>
33 namespace HPHP
{ namespace jit
{
35 TRACE_SET_MOD(hhir_gvn
);
37 //////////////////////////////////////////////////////////////////////
41 struct ValueNumberMetadata
{
46 using ValueNumberTable
= StateVector
<SSATmp
, ValueNumberMetadata
>;
47 using DstIndex
= int32_t;
49 struct CongruenceHasher
{
50 using KeyType
= std::pair
<IRInstruction
*, DstIndex
>;
52 explicit CongruenceHasher(const ValueNumberTable
& globalTable
)
53 : m_globalTable(&globalTable
)
57 size_t hashSharedImpl(IRInstruction
* inst
, size_t result
) const {
58 if (inst
->hasExtra()) {
59 result
= folly::hash::hash_128_to_64(
61 hashExtra(inst
->op(), inst
->rawExtra())
65 if (inst
->hasTypeParam()) {
66 result
= folly::hash::hash_128_to_64(result
, inst
->typeParam().hash());
72 size_t hashSrcs(KeyType key
, size_t result
) const {
73 auto& table
= *m_globalTable
;
74 auto inst
= key
.first
;
75 for (uint32_t i
= 0; i
< inst
->numSrcs(); ++i
) {
76 auto src
= canonical(inst
->src(i
));
77 assertx(table
[src
].value
);
78 result
= folly::hash::hash_128_to_64(
80 reinterpret_cast<size_t>(table
[src
].value
)
86 size_t operator()(KeyType key
) const {
87 auto inst
= key
.first
;
89 // Note: this doesn't take commutativity or associativity into account, but
90 // it might be nice to do so for the opcodes where it makes sense.
91 size_t result
= static_cast<size_t>(inst
->op());
92 result
= hashSrcs(key
, result
);
93 return hashSharedImpl(inst
, result
);
97 const ValueNumberTable
* m_globalTable
;
100 struct CongruenceComparator
{
101 using KeyType
= std::pair
<IRInstruction
*, DstIndex
>;
103 explicit CongruenceComparator(const ValueNumberTable
& globalTable
)
104 : m_globalTable(&globalTable
)
108 bool compareSrcs(KeyType keyA
, KeyType keyB
) const {
109 auto& table
= *m_globalTable
;
110 auto instA
= keyA
.first
;
111 auto instB
= keyB
.first
;
113 for (uint32_t i
= 0; i
< instA
->numSrcs(); ++i
) {
114 auto srcA
= canonical(instA
->src(i
));
115 auto srcB
= canonical(instB
->src(i
));
116 assertx(table
[srcA
].value
);
117 assertx(table
[srcB
].value
);
118 if (table
[srcA
].value
!= table
[srcB
].value
) return false;
124 bool operator()(KeyType keyA
, KeyType keyB
) const {
125 auto instA
= keyA
.first
;
126 auto instB
= keyB
.first
;
128 // Note: this doesn't take commutativity or associativity into account, but
129 // it might be nice to do so for the opcodes where it makes sense.
130 if (instA
->op() != instB
->op()) return false;
131 if (instA
->numSrcs() != instB
->numSrcs()) return false;
132 if (instA
->hasTypeParam() != instB
->hasTypeParam()) return false;
133 if (instA
->hasTypeParam() && instA
->typeParam() != instB
->typeParam()) {
137 if (instA
->hasExtra()) {
138 assertx(instB
->hasExtra());
139 if (!equalsExtra(instA
->op(), instA
->rawExtra(), instB
->rawExtra())) {
144 if (!compareSrcs(keyA
, keyB
)) {
152 const ValueNumberTable
* m_globalTable
;
155 using NameTable
= jit::hash_map
<
156 std::pair
<IRInstruction
*, DstIndex
>, SSATmp
*,
162 ValueNumberTable
* localTable
{nullptr};
163 ValueNumberTable
* globalTable
{nullptr};
164 NameTable
* nameTable
{nullptr};
167 bool supportsGVN(const IRInstruction
* inst
) {
168 switch (inst
->op()) {
261 case InstanceOfIface
:
262 case InstanceOfIfaceVtable
:
264 case InstanceOfBitmask
:
265 case NInstanceOfBitmask
:
266 case InterfaceSupportsArr
:
267 case InterfaceSupportsVec
:
268 case InterfaceSupportsDict
:
269 case InterfaceSupportsKeyset
:
270 case InterfaceSupportsStr
:
271 case InterfaceSupportsInt
:
272 case InterfaceSupportsDbl
:
290 case LdClsFromClsMeth
:
291 case LdSmashableFunc
:
292 case LdFuncFromClsMeth
:
297 case LdClsPropAddrOrNull
:
298 case LdClsPropAddrOrRaise
:
305 case LdPackedArrayDataElemAddr
:
315 case StrictlyIntegerConv
:
316 case LookupSPropSlot
:
322 return !RuntimeOption::EvalHackArrCompatDVCmpNotices
;
329 void initWithInstruction(IRInstruction
* inst
, ValueNumberTable
& table
) {
330 // Each SSATmp starts out as the canonical name for itself.
331 for (auto dst
: inst
->dsts()) {
332 table
[dst
] = ValueNumberMetadata
{ dst
, dst
};
335 for (auto src
: inst
->srcs()) {
336 table
[src
] = ValueNumberMetadata
{ src
, src
};
340 bool visitInstruction(
344 auto& globalTable
= *env
.globalTable
;
345 auto& localTable
= *env
.localTable
;
346 auto& nameTable
= *env
.nameTable
;
348 if (isCallOp(inst
->op())) nameTable
.clear();
350 if (!supportsGVN(inst
)) return false;
352 bool changed
= false;
353 for (auto dstIdx
= 0; dstIdx
< inst
->numDsts(); ++dstIdx
) {
354 auto dst
= inst
->dst(dstIdx
);
357 auto result
= nameTable
.emplace(std::make_pair(inst
, dstIdx
), dst
);
358 SSATmp
* temp
= result
.second
? dst
: result
.first
->second
;
361 assertx(globalTable
[dst
].value
);
362 if (temp
!= globalTable
[dst
].value
) {
363 localTable
[dst
] = ValueNumberMetadata
{ dst
, temp
};
366 "updated value number for dst to dst of {}\n",
380 bool changed
= false;
381 for (auto& inst
: *block
) {
382 changed
= visitInstruction(env
, &inst
) || changed
;
387 void applyLocalUpdates(ValueNumberTable
& local
, ValueNumberTable
& global
) {
388 for (auto metadata
: local
) {
389 if (!metadata
.key
) continue;
390 global
[metadata
.key
] = metadata
;
397 const BlockList
& blocks
399 for (auto block
: blocks
) {
400 for (auto& inst
: *block
) {
401 initWithInstruction(&inst
, *env
.globalTable
);
407 // We need a temporary table of updates which we apply after running this
408 // iteration of the fixed point. If we change the global ValueNumberTable
409 // during the pass, the hash values of the SSATmps will change which is
410 // apparently a no-no for unordered_map.
411 ValueNumberTable
localTable(unit
, ValueNumberMetadata
{});
412 env
.localTable
= &localTable
;
413 SCOPE_EXIT
{ env
.localTable
= nullptr; };
415 CongruenceHasher
hash(*env
.globalTable
);
416 CongruenceComparator
pred(*env
.globalTable
);
417 NameTable
nameTable(0, hash
, pred
);
418 env
.nameTable
= &nameTable
;
419 SCOPE_EXIT
{ env
.nameTable
= nullptr; };
422 for (auto block
: blocks
) {
423 changed
= visitBlock(env
, block
) || changed
;
426 applyLocalUpdates(localTable
, *env
.globalTable
);
431 * When gvn replaces an instruction that produces a refcount, we need
432 * to IncRef the replacement value. Its not enough to IncRef it where
433 * the replacement occurs (where a refcount would originally have been
434 * produced), because the replacement value might not survive that
435 * long. Instead, we need to IncRef it right after the canonical
436 * instruction. However, in general there will be paths from the
437 * canonical instruction that don't reach the replaced instruction, so
438 * we'll need to insert DecRefs on those paths.
440 * As an example, given:
464 * Where Orig is a PRc instruction, and Rep1 and Rep2 are identical
465 * instructions that are going to be replaced, we need to insert an
466 * IncRef after Orig (to ensure its dst lives until Rep1), a DecRef at
467 * exit1 (to keep the RefCounts balanced), an IncRef after Rep1 (to
468 * ensure Orig's dst lives until Rep2), and a DecRef in exit2 (to keep
469 * the RefCounts balanced).
471 * We do this by computing Availability, Anticipability and
472 * Partial-Anticipability of the candidate instructions (Orig, Rep1
473 * and Rep2 in the example). After each candidate instruction we
474 * insert an IncRef if a candidate is Partially-Anticipated-out. If a
475 * candidate is Available-in and not Partially-Anticipated-in, we
476 * insert a DecRef if its Partially-Anticipated out of all
477 * predecessors. This could fail to insert DecRefs if we don't split
478 * critical edges - but in that case there simply wouldn't be anywhere
479 * to insert the DecRef.
481 constexpr uint32_t kMaxTrackedPrcs
= 64;
484 using Bits
= std::bitset
<kMaxTrackedPrcs
>;
488 /* local is the set of candidates in this block */
490 /* antIn = local | antOut */
492 /* pantIn = local | pantOut */
494 /* antOut = Intersection<succs>(antIn) (empty if numSucc==0) */
496 /* pantOut = Union<succs>(pantIn) */
498 /* avlIn = Intersection<preds>(avlOut) (empty if numPred==0) */
500 /* avlOut = local | avlIn */
504 std::string
show(const PrcState::Bits
& bits
) {
505 std::ostringstream out
;
516 std::string
show(const PrcState
& state
) {
517 return folly::sformat(
535 PrcEnv(IRUnit
& unit
, const BlockList
& rpoBlocks
) :
536 unit
{unit
}, rpoBlocks
{rpoBlocks
} {}
539 const BlockList
& rpoBlocks
;
540 // The first element of each inner vector is the canonical SSATmp
541 // and the remainder are the SSATmps that will be replaced.
542 std::vector
<std::vector
<SSATmp
*>> insertMap
;
543 std::vector
<PrcState
> states
;
546 void insertIncRefs(PrcEnv
& env
) {
548 dataflow_worklist
<uint32_t, std::less
<uint32_t>>(env
.rpoBlocks
.size());
550 dataflow_worklist
<uint32_t, std::greater
<uint32_t>>(env
.rpoBlocks
.size());
552 env
.states
.resize(env
.unit
.numBlocks());
553 for (uint32_t i
= 0; i
< env
.rpoBlocks
.size(); i
++) {
554 auto blk
= env
.rpoBlocks
[i
];
555 auto& state
= env
.states
[blk
->id()];
557 if (blk
->numSuccs()) state
.antOut
.set();
558 if (blk
->numPreds()) state
.avlIn
.set();
564 for (auto& v
: env
.insertMap
) {
565 for (auto const tmp
: v
) {
566 auto const blk
= tmp
->inst()->block();
567 auto& state
= env
.states
[blk
->id()];
568 if (!state
.local
.test(id
)) {
576 using Bits
= PrcState::Bits
;
577 // compute anticipated
579 auto const blk
= env
.rpoBlocks
[antQ
.pop()];
580 auto& state
= env
.states
[blk
->id()];
581 state
.antIn
= state
.antOut
| state
.local
;
582 state
.pantIn
= state
.pantOut
| state
.local
;
585 auto& s
= env
.states
[b
->id()];
586 auto const antOut
= s
.antOut
& state
.antIn
;
587 auto const pantOut
= s
.pantOut
| state
.pantIn
;
588 if (antOut
!= s
.antOut
|| pantOut
!= s
.pantOut
) {
595 } while (!antQ
.empty());
599 auto const blk
= env
.rpoBlocks
[avlQ
.pop()];
600 auto& state
= env
.states
[blk
->id()];
601 state
.avlOut
= state
.avlIn
| state
.local
;
604 auto& s
= env
.states
[b
->id()];
605 auto const avlIn
= s
.avlIn
& state
.avlOut
;
606 if (avlIn
!= s
.avlIn
) {
611 } while (!avlQ
.empty());
613 for (auto blk
: env
.rpoBlocks
) {
614 auto& state
= env
.states
[blk
->id()];
616 "InsertIncDecs: Blk(B{}) <- {}\n"
622 blk
->forEachPred([&] (Block
* pred
) {
623 folly::format(&ret
, " B{}", pred
->id());
630 blk
->forEachSucc([&] (Block
* succ
) {
631 folly::format(&ret
, " B{}", succ
->id());
636 auto inc
= state
.local
;
637 for (auto inc_id
= 0; inc
.any(); inc
>>= 1, inc_id
++) {
639 auto const& tmps
= env
.insertMap
[inc_id
];
640 auto insert
= [&] (IRInstruction
* inst
) {
641 FTRACE(3, "Inserting IncRef into B{}\n", blk
->id());
642 auto const iter
= std::next(blk
->iteratorTo(inst
));
643 blk
->insert(iter
, env
.unit
.gen(IncRef
, inst
->bcctx(), tmps
[0]));
645 SSATmp
* last
= nullptr;
646 // Insert an IncRef after every candidate in this block except
647 // the last one (since we know for all but the last that its
648 // successor is anticipated). Note that entries in tmps from
649 // the same block are guaranteed to be in program order.
650 for (auto const tmp
: tmps
) {
651 if (tmp
->inst()->block() != blk
) continue;
652 if (last
) insert(last
->inst());
655 // If it's partially anticipated out, insert an inc after the
658 if (state
.pantOut
.test(inc_id
)) insert(last
->inst());
661 auto dec
= state
.avlIn
& ~state
.pantIn
;
665 auto& pstate
= env
.states
[pred
->id()];
666 dec
&= pstate
.pantOut
;
669 for (auto dec_id
= 0; dec
.any(); dec
>>= 1, dec_id
++) {
671 FTRACE(3, "Inserting DecRef into B{}\n", blk
->id());
672 auto const tmp
= env
.insertMap
[dec_id
][0];
673 blk
->prepend(env
.unit
.gen(DecRef
, tmp
->inst()->bcctx(),
681 using ActionMap
= jit::fast_map
<SSATmp
*, std::vector
<SSATmp
*>>;
683 void tryReplaceInstruction(
685 const IdomVector
& idoms
,
687 ValueNumberTable
& table
,
690 if (inst
->hasDst()) {
691 auto const dst
= inst
->dst();
692 auto const valueNumber
= table
[dst
].value
;
694 valueNumber
!= dst
&&
695 is_tmp_usable(idoms
, valueNumber
, inst
->block())) {
696 if (inst
->producesReference()) {
697 auto& v
= actionMap
[valueNumber
];
698 if (!v
.size()) v
.push_back(valueNumber
);
701 if (!(valueNumber
->type() <= dst
->type())) {
703 "replacing {} with AssertType({})\n",
705 dst
->type().toString()
707 unit
.replace(inst
, AssertType
, dst
->type(), valueNumber
);
712 for (uint32_t i
= 0; i
< inst
->numSrcs(); ++i
) {
713 auto s
= inst
->src(i
);
714 auto valueNumber
= table
[s
].value
;
715 if (valueNumber
== s
) continue;
716 if (!valueNumber
) continue;
717 if (!is_tmp_usable(idoms
, valueNumber
, inst
->block())) continue;
719 // If the replacement is at least as refined as the source type,
720 // just substitute it directly
721 if (valueNumber
->type() <= s
->type()) {
724 "replacing src {} with dst of {}\n",
729 inst
->setSrc(i
, valueNumber
);
734 void replaceRedundantComputations(
736 const IdomVector
& idoms
,
737 const BlockList
& blocks
,
738 ValueNumberTable
& table
741 for (auto block
: blocks
) {
742 for (auto& inst
: *block
) {
743 tryReplaceInstruction(unit
, idoms
, &inst
, table
, actionMap
);
746 if (actionMap
.empty()) return;
747 PrcEnv
env(unit
, blocks
);
748 for (auto& elm
: actionMap
) {
749 if (env
.insertMap
.size() == kMaxTrackedPrcs
) {
750 // This pretty much doesn't happen; when it does, we might be
751 // over-increffing here - but thats not a big deal.
752 auto const newTmp
= elm
.second
[0];
753 auto const block
= newTmp
->inst()->block();
754 auto const iter
= std::next(block
->iteratorTo(newTmp
->inst()));
755 for (auto i
= 1; i
< elm
.second
.size(); i
++) {
756 block
->insert(iter
, unit
.gen(IncRef
, newTmp
->inst()->bcctx(), newTmp
));
760 env
.insertMap
.push_back(std::move(elm
.second
));
767 /////////////////////////////////////////////////////////////////////////
769 void gvn(IRUnit
& unit
) {
770 PassTracer tracer
{&unit
, Trace::hhir_gvn
, "gvn"};
771 Timer
t(Timer::optimize_gvn
, unit
.logEntry().get_pointer());
772 splitCriticalEdges(unit
);
775 auto const rpoBlocks
= rpoSortCfg(unit
);
776 auto const idoms
= findDominators(
779 numberBlocks(unit
, rpoBlocks
)
782 ValueNumberTable
globalTable(unit
, ValueNumberMetadata
{});
783 state
.globalTable
= &globalTable
;
785 // This is an implementation of the RPO version of the global value numbering
786 // algorithm presented in the 1996 paper "SCC-based Value Numbering" by
787 // Cooper and Simpson.
788 runAnalysis(state
, unit
, rpoBlocks
);
789 replaceRedundantComputations(unit
, idoms
, rpoBlocks
, globalTable
);
790 state
.globalTable
= nullptr;