2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/opt.h"
18 #include "hphp/runtime/vm/jit/analysis.h"
19 #include "hphp/runtime/vm/jit/block.h"
20 #include "hphp/runtime/vm/jit/cfg.h"
21 #include "hphp/runtime/vm/jit/containers.h"
22 #include "hphp/runtime/vm/jit/ir-instruction.h"
23 #include "hphp/runtime/vm/jit/ssa-tmp.h"
24 #include "hphp/runtime/vm/jit/state-vector.h"
25 #include "hphp/runtime/vm/jit/pass-tracer.h"
26 #include "hphp/runtime/vm/jit/timer.h"
28 #include "hphp/util/dataflow-worklist.h"
31 #include <folly/small_vector.h>
33 namespace HPHP
{ namespace jit
{
35 TRACE_SET_MOD(hhir_gvn
);
37 //////////////////////////////////////////////////////////////////////
41 struct ValueNumberMetadata
{
46 using ValueNumberTable
= StateVector
<SSATmp
, ValueNumberMetadata
>;
47 using DstIndex
= int32_t;
49 struct CongruenceHasher
{
50 using KeyType
= std::pair
<IRInstruction
*, DstIndex
>;
52 explicit CongruenceHasher(const ValueNumberTable
& globalTable
)
53 : m_globalTable(&globalTable
)
57 size_t hashSharedImpl(IRInstruction
* inst
, size_t result
) const {
58 if (inst
->hasExtra()) {
59 result
= folly::hash::hash_128_to_64(
61 hashExtra(inst
->op(), inst
->rawExtra())
65 if (inst
->hasTypeParam()) {
66 result
= folly::hash::hash_128_to_64(result
, inst
->typeParam().hash());
72 size_t hashSrcs(KeyType key
, size_t result
) const {
73 auto& table
= *m_globalTable
;
74 auto inst
= key
.first
;
75 for (uint32_t i
= 0; i
< inst
->numSrcs(); ++i
) {
76 auto src
= canonical(inst
->src(i
));
77 assertx(table
[src
].value
);
78 result
= folly::hash::hash_128_to_64(
80 reinterpret_cast<size_t>(table
[src
].value
)
86 size_t operator()(KeyType key
) const {
87 auto inst
= key
.first
;
89 // Note: this doesn't take commutativity or associativity into account, but
90 // it might be nice to do so for the opcodes where it makes sense.
91 size_t result
= static_cast<size_t>(inst
->op());
92 result
= hashSrcs(key
, result
);
93 return hashSharedImpl(inst
, result
);
97 const ValueNumberTable
* m_globalTable
;
100 struct CongruenceComparator
{
101 using KeyType
= std::pair
<IRInstruction
*, DstIndex
>;
103 explicit CongruenceComparator(const ValueNumberTable
& globalTable
)
104 : m_globalTable(&globalTable
)
108 bool compareSrcs(KeyType keyA
, KeyType keyB
) const {
109 auto& table
= *m_globalTable
;
110 auto instA
= keyA
.first
;
111 auto instB
= keyB
.first
;
113 for (uint32_t i
= 0; i
< instA
->numSrcs(); ++i
) {
114 auto srcA
= canonical(instA
->src(i
));
115 auto srcB
= canonical(instB
->src(i
));
116 assertx(table
[srcA
].value
);
117 assertx(table
[srcB
].value
);
118 if (table
[srcA
].value
!= table
[srcB
].value
) return false;
124 bool operator()(KeyType keyA
, KeyType keyB
) const {
125 auto instA
= keyA
.first
;
126 auto instB
= keyB
.first
;
128 // Note: this doesn't take commutativity or associativity into account, but
129 // it might be nice to do so for the opcodes where it makes sense.
130 if (instA
->op() != instB
->op()) return false;
131 if (instA
->numSrcs() != instB
->numSrcs()) return false;
132 if (instA
->hasTypeParam() != instB
->hasTypeParam()) return false;
133 if (instA
->hasTypeParam() && instA
->typeParam() != instB
->typeParam()) {
137 if (instA
->hasExtra()) {
138 assertx(instB
->hasExtra());
139 if (!equalsExtra(instA
->op(), instA
->rawExtra(), instB
->rawExtra())) {
144 if (!compareSrcs(keyA
, keyB
)) {
152 const ValueNumberTable
* m_globalTable
;
155 using NameTable
= jit::hash_map
<
156 std::pair
<IRInstruction
*, DstIndex
>, SSATmp
*,
162 ValueNumberTable
* localTable
{nullptr};
163 ValueNumberTable
* globalTable
{nullptr};
164 NameTable
* nameTable
{nullptr};
167 bool supportsGVN(const IRInstruction
* inst
) {
168 switch (inst
->op()) {
261 case InstanceOfIface
:
262 case InstanceOfIfaceVtable
:
264 case InstanceOfBitmask
:
265 case NInstanceOfBitmask
:
266 case InterfaceSupportsArr
:
267 case InterfaceSupportsVec
:
268 case InterfaceSupportsDict
:
269 case InterfaceSupportsKeyset
:
270 case InterfaceSupportsStr
:
271 case InterfaceSupportsInt
:
272 case InterfaceSupportsDbl
:
288 case LdClsFromClsMeth
:
289 case LdSmashableFunc
:
290 case LdFuncFromClsMeth
:
295 case LdClsPropAddrOrNull
:
296 case LdClsPropAddrOrRaise
:
303 case LdPackedArrayDataElemAddr
:
312 case StrictlyIntegerConv
:
313 case LookupSPropSlot
:
319 return !RuntimeOption::EvalHackArrCompatDVCmpNotices
;
326 void initWithInstruction(IRInstruction
* inst
, ValueNumberTable
& table
) {
327 // Each SSATmp starts out as the canonical name for itself.
328 for (auto dst
: inst
->dsts()) {
329 table
[dst
] = ValueNumberMetadata
{ dst
, dst
};
332 for (auto src
: inst
->srcs()) {
333 table
[src
] = ValueNumberMetadata
{ src
, src
};
337 bool visitInstruction(
341 auto& globalTable
= *env
.globalTable
;
342 auto& localTable
= *env
.localTable
;
343 auto& nameTable
= *env
.nameTable
;
345 if (isCallOp(inst
->op())) nameTable
.clear();
347 if (!supportsGVN(inst
)) return false;
349 bool changed
= false;
350 for (auto dstIdx
= 0; dstIdx
< inst
->numDsts(); ++dstIdx
) {
351 auto dst
= inst
->dst(dstIdx
);
354 auto result
= nameTable
.emplace(std::make_pair(inst
, dstIdx
), dst
);
355 SSATmp
* temp
= result
.second
? dst
: result
.first
->second
;
358 assertx(globalTable
[dst
].value
);
359 if (temp
!= globalTable
[dst
].value
) {
360 localTable
[dst
] = ValueNumberMetadata
{ dst
, temp
};
363 "updated value number for dst to dst of {}\n",
377 bool changed
= false;
378 for (auto& inst
: *block
) {
379 changed
= visitInstruction(env
, &inst
) || changed
;
384 void applyLocalUpdates(ValueNumberTable
& local
, ValueNumberTable
& global
) {
385 for (auto metadata
: local
) {
386 if (!metadata
.key
) continue;
387 global
[metadata
.key
] = metadata
;
394 const BlockList
& blocks
396 for (auto block
: blocks
) {
397 for (auto& inst
: *block
) {
398 initWithInstruction(&inst
, *env
.globalTable
);
404 // We need a temporary table of updates which we apply after running this
405 // iteration of the fixed point. If we change the global ValueNumberTable
406 // during the pass, the hash values of the SSATmps will change which is
407 // apparently a no-no for unordered_map.
408 ValueNumberTable
localTable(unit
, ValueNumberMetadata
{});
409 env
.localTable
= &localTable
;
410 SCOPE_EXIT
{ env
.localTable
= nullptr; };
412 CongruenceHasher
hash(*env
.globalTable
);
413 CongruenceComparator
pred(*env
.globalTable
);
414 NameTable
nameTable(0, hash
, pred
);
415 env
.nameTable
= &nameTable
;
416 SCOPE_EXIT
{ env
.nameTable
= nullptr; };
419 for (auto block
: blocks
) {
420 changed
= visitBlock(env
, block
) || changed
;
423 applyLocalUpdates(localTable
, *env
.globalTable
);
428 * When gvn replaces an instruction that produces a refcount, we need
429 * to IncRef the replacement value. Its not enough to IncRef it where
430 * the replacement occurs (where a refcount would originally have been
431 * produced), because the replacement value might not survive that
432 * long. Instead, we need to IncRef it right after the canonical
433 * instruction. However, in general there will be paths from the
434 * canonical instruction that don't reach the replaced instruction, so
435 * we'll need to insert DecRefs on those paths.
437 * As an example, given:
461 * Where Orig is a PRc instruction, and Rep1 and Rep2 are identical
462 * instructions that are going to be replaced, we need to insert an
463 * IncRef after Orig (to ensure its dst lives until Rep1), a DecRef at
464 * exit1 (to keep the RefCounts balanced), an IncRef after Rep1 (to
465 * ensure Orig's dst lives until Rep2), and a DecRef in exit2 (to keep
466 * the RefCounts balanced).
468 * We do this by computing Availability, Anticipability and
469 * Partial-Anticipability of the candidate instructions (Orig, Rep1
470 * and Rep2 in the example). After each candidate instruction we
471 * insert an IncRef if a candidate is Partially-Anticipated-out. If a
472 * candidate is Available-in and not Partially-Anticipated-in, we
473 * insert a DecRef if its Partially-Anticipated out of all
474 * predecessors. This could fail to insert DecRefs if we don't split
475 * critical edges - but in that case there simply wouldn't be anywhere
476 * to insert the DecRef.
478 constexpr uint32_t kMaxTrackedPrcs
= 64;
481 using Bits
= std::bitset
<kMaxTrackedPrcs
>;
485 /* local is the set of candidates in this block */
487 /* antIn = local | antOut */
489 /* pantIn = local | pantOut */
491 /* antOut = Intersection<succs>(antIn) (empty if numSucc==0) */
493 /* pantOut = Union<succs>(pantIn) */
495 /* avlIn = Intersection<preds>(avlOut) (empty if numPred==0) */
497 /* avlOut = local | avlIn */
501 std::string
show(const PrcState::Bits
& bits
) {
502 std::ostringstream out
;
513 std::string
show(const PrcState
& state
) {
514 return folly::sformat(
532 PrcEnv(IRUnit
& unit
, const BlockList
& rpoBlocks
) :
533 unit
{unit
}, rpoBlocks
{rpoBlocks
} {}
536 const BlockList
& rpoBlocks
;
537 // The first element of each inner vector is the canonical SSATmp
538 // and the remainder are the SSATmps that will be replaced.
539 std::vector
<std::vector
<SSATmp
*>> insertMap
;
540 std::vector
<PrcState
> states
;
543 void insertIncRefs(PrcEnv
& env
) {
545 dataflow_worklist
<uint32_t, std::less
<uint32_t>>(env
.rpoBlocks
.size());
547 dataflow_worklist
<uint32_t, std::greater
<uint32_t>>(env
.rpoBlocks
.size());
549 env
.states
.resize(env
.unit
.numBlocks());
550 for (uint32_t i
= 0; i
< env
.rpoBlocks
.size(); i
++) {
551 auto blk
= env
.rpoBlocks
[i
];
552 auto& state
= env
.states
[blk
->id()];
554 if (blk
->numSuccs()) state
.antOut
.set();
555 if (blk
->numPreds()) state
.avlIn
.set();
561 for (auto& v
: env
.insertMap
) {
562 for (auto const tmp
: v
) {
563 auto const blk
= tmp
->inst()->block();
564 auto& state
= env
.states
[blk
->id()];
565 if (!state
.local
.test(id
)) {
573 // compute anticipated
575 auto const blk
= env
.rpoBlocks
[antQ
.pop()];
576 auto& state
= env
.states
[blk
->id()];
577 state
.antIn
= state
.antOut
| state
.local
;
578 state
.pantIn
= state
.pantOut
| state
.local
;
581 auto& s
= env
.states
[b
->id()];
582 auto const antOut
= s
.antOut
& state
.antIn
;
583 auto const pantOut
= s
.pantOut
| state
.pantIn
;
584 if (antOut
!= s
.antOut
|| pantOut
!= s
.pantOut
) {
591 } while (!antQ
.empty());
595 auto const blk
= env
.rpoBlocks
[avlQ
.pop()];
596 auto& state
= env
.states
[blk
->id()];
597 state
.avlOut
= state
.avlIn
| state
.local
;
600 auto& s
= env
.states
[b
->id()];
601 auto const avlIn
= s
.avlIn
& state
.avlOut
;
602 if (avlIn
!= s
.avlIn
) {
607 } while (!avlQ
.empty());
609 for (auto blk
: env
.rpoBlocks
) {
610 auto& state
= env
.states
[blk
->id()];
612 "InsertIncDecs: Blk(B{}) <- {}\n"
618 blk
->forEachPred([&] (Block
* pred
) {
619 folly::format(&ret
, " B{}", pred
->id());
626 blk
->forEachSucc([&] (Block
* succ
) {
627 folly::format(&ret
, " B{}", succ
->id());
632 auto inc
= state
.local
;
633 for (auto inc_id
= 0; inc
.any(); inc
>>= 1, inc_id
++) {
635 auto const& tmps
= env
.insertMap
[inc_id
];
636 auto insert
= [&] (IRInstruction
* inst
) {
637 FTRACE(3, "Inserting IncRef into B{}\n", blk
->id());
638 auto const iter
= std::next(blk
->iteratorTo(inst
));
639 blk
->insert(iter
, env
.unit
.gen(IncRef
, inst
->bcctx(), tmps
[0]));
641 SSATmp
* last
= nullptr;
642 // Insert an IncRef after every candidate in this block except
643 // the last one (since we know for all but the last that its
644 // successor is anticipated). Note that entries in tmps from
645 // the same block are guaranteed to be in program order.
646 for (auto const tmp
: tmps
) {
647 if (tmp
->inst()->block() != blk
) continue;
648 if (last
) insert(last
->inst());
651 // If it's partially anticipated out, insert an inc after the
654 if (state
.pantOut
.test(inc_id
)) insert(last
->inst());
657 auto dec
= state
.avlIn
& ~state
.pantIn
;
661 auto& pstate
= env
.states
[pred
->id()];
662 dec
&= pstate
.pantOut
;
665 for (auto dec_id
= 0; dec
.any(); dec
>>= 1, dec_id
++) {
667 FTRACE(3, "Inserting DecRef into B{}\n", blk
->id());
668 auto const tmp
= env
.insertMap
[dec_id
][0];
669 blk
->prepend(env
.unit
.gen(DecRef
, tmp
->inst()->bcctx(),
677 using ActionMap
= jit::fast_map
<SSATmp
*, std::vector
<SSATmp
*>>;
679 void tryReplaceInstruction(
681 const IdomVector
& idoms
,
683 ValueNumberTable
& table
,
686 if (inst
->hasDst()) {
687 auto const dst
= inst
->dst();
688 auto const valueNumber
= table
[dst
].value
;
690 valueNumber
!= dst
&&
691 is_tmp_usable(idoms
, valueNumber
, inst
->block())) {
692 if (inst
->producesReference()) {
693 auto& v
= actionMap
[valueNumber
];
694 if (!v
.size()) v
.push_back(valueNumber
);
697 if (!(valueNumber
->type() <= dst
->type())) {
699 "replacing {} with AssertType({})\n",
701 dst
->type().toString()
703 unit
.replace(inst
, AssertType
, dst
->type(), valueNumber
);
708 for (uint32_t i
= 0; i
< inst
->numSrcs(); ++i
) {
709 auto s
= inst
->src(i
);
710 auto valueNumber
= table
[s
].value
;
711 if (valueNumber
== s
) continue;
712 if (!valueNumber
) continue;
713 if (!is_tmp_usable(idoms
, valueNumber
, inst
->block())) continue;
715 // If the replacement is at least as refined as the source type,
716 // just substitute it directly
717 if (valueNumber
->type() <= s
->type()) {
720 "replacing src {} with dst of {}\n",
725 inst
->setSrc(i
, valueNumber
);
730 void replaceRedundantComputations(
732 const IdomVector
& idoms
,
733 const BlockList
& blocks
,
734 ValueNumberTable
& table
737 for (auto block
: blocks
) {
738 for (auto& inst
: *block
) {
739 tryReplaceInstruction(unit
, idoms
, &inst
, table
, actionMap
);
742 if (actionMap
.empty()) return;
743 PrcEnv
env(unit
, blocks
);
744 for (auto& elm
: actionMap
) {
745 if (env
.insertMap
.size() == kMaxTrackedPrcs
) {
746 // This pretty much doesn't happen; when it does, we might be
747 // over-increffing here - but thats not a big deal.
748 auto const newTmp
= elm
.second
[0];
749 auto const block
= newTmp
->inst()->block();
750 auto const iter
= std::next(block
->iteratorTo(newTmp
->inst()));
751 for (auto i
= 1; i
< elm
.second
.size(); i
++) {
752 block
->insert(iter
, unit
.gen(IncRef
, newTmp
->inst()->bcctx(), newTmp
));
756 env
.insertMap
.push_back(std::move(elm
.second
));
763 /////////////////////////////////////////////////////////////////////////
765 void gvn(IRUnit
& unit
) {
766 PassTracer tracer
{&unit
, Trace::hhir_gvn
, "gvn"};
767 Timer
t(Timer::optimize_gvn
, unit
.logEntry().get_pointer());
768 splitCriticalEdges(unit
);
771 auto const rpoBlocks
= rpoSortCfg(unit
);
772 auto const idoms
= findDominators(
775 numberBlocks(unit
, rpoBlocks
)
778 ValueNumberTable
globalTable(unit
, ValueNumberMetadata
{});
779 state
.globalTable
= &globalTable
;
781 // This is an implementation of the RPO version of the global value numbering
782 // algorithm presented in the 1996 paper "SCC-based Value Numbering" by
783 // Cooper and Simpson.
784 runAnalysis(state
, unit
, rpoBlocks
);
785 replaceRedundantComputations(unit
, idoms
, rpoBlocks
, globalTable
);
786 state
.globalTable
= nullptr;