2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/opt.h"
19 #include "hphp/runtime/vm/jit/analysis.h"
20 #include "hphp/runtime/vm/jit/cfg.h"
21 #include "hphp/runtime/vm/jit/dce.h"
22 #include "hphp/runtime/vm/jit/ir-opcode.h"
23 #include "hphp/runtime/vm/jit/ir-unit.h"
24 #include "hphp/runtime/vm/jit/mutation.h"
25 #include "hphp/runtime/vm/jit/pass-tracer.h"
26 #include "hphp/runtime/vm/jit/timer.h"
28 #include "hphp/util/dataflow-worklist.h"
32 TRACE_SET_MOD(hhir_checkTypes
);
34 //////////////////////////////////////////////////////////////////////
38 //////////////////////////////////////////////////////////////////////
41 * This optimization reorders chained, back-to-back CheckType instructions where
42 * the second check is for a subtype of the first one. More specifically, this
43 * optimization transforms this pattern:
46 * tmp1:type1 = CheckType<type1> tmp0 -> taken1 [ct1]
48 * tmp2:type2 = CheckType<type2> tmp1 -> taken2 [ct2]
52 * vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
53 * under the condition that type2 < type1, into the following one:
54 * vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
57 * tmp2:type2 = CheckType<type2> tmp0 -> fallthru1 [ct1]
62 * tmp1:type1 = CheckType<type1> tmp0 -> taken1 [ct2]
66 bool reorderCheckTypes(IRUnit
& unit
) {
67 Timer
timer(Timer::optimize_reorderCheckTypes
, unit
.logEntry().get_pointer());
68 PassTracer tracer
{&unit
, Trace::hhir_checkTypes
, "reorderCheckTypes"};
70 FTRACE(5, "ReorderCheckTypes:vvvvvvvvvvvvvvvvvvvvv\n");
71 SCOPE_EXIT
{ FTRACE(5, "ReorderCheckTypes:^^^^^^^^^^^^^^^^^^^^^\n"); };
75 auto const blocks
= rpoSortCfg(unit
);
76 for (auto& block
: blocks
) {
77 auto& ct1
= block
->back();
78 if (!ct1
.is(CheckType
)) continue;
79 auto fallthru1
= ct1
.next();
80 auto& ct2
= fallthru1
->front();
81 if (!ct2
.is(CheckType
)) continue;
82 if (ct1
.dst() != ct2
.src(0)) continue;
83 auto const type1
= ct1
.typeParam();
84 auto const type2
= ct2
.typeParam();
85 if (!(type2
< type1
)) continue;
88 // This transformation assumes that any use of tmp1 that is
89 // dominated by tmp2 actually uses tmp2 (which provides a more
90 // refined type) and not tmp1. Thus we run refineTmps to make
91 // sure that this invariant applies.
93 // This can be expensive, so we defer it until we know we're
94 // going to make a change. This does not modify the CFG, so it's
100 FTRACE(5, " - reordering these 2 instructions:\n - {}\n - {}\n",
103 auto tmp0
= ct1
.src(0);
104 auto tmp1
= ct1
.dst(0);
105 auto tmp2
= ct2
.dst(0);
106 auto taken1
= ct1
.taken();
107 auto taken2
= ct2
.taken();
108 auto fallthru2
= ct2
.next();
110 // Fix block, ct1 and tmp2.
111 ct1
.setTaken(fallthru1
);
112 ct1
.setNext(fallthru2
);
114 ct1
.setTypeParam(type2
);
116 tmp2
->setInstruction(&ct1
);
118 // Fix fallthru1, ct2 and tmp1.
119 ct2
.setTaken(taken1
);
122 ct2
.setTypeParam(type1
);
124 tmp1
->setInstruction(&ct2
);
125 fallthru1
->setProfCount(taken2
->profCount());
126 fallthru1
->setHint(taken2
->hint());
132 //////////////////////////////////////////////////////////////////////
135 * Check Type Hoisting:
137 * This pass attempts to hoist CheckTypes further up the CFG past
138 * DefLabels. This will allow the CheckTypes be optimized away (in
139 * some cases). It tries to convert the following pattern:
143 * Jmp t1:Str, t123:Cell -> B3
147 * Jmp t2:Int, t124:Cell -> B3
150 * t3:Str|Int, t125:Cell = DefLabel B1,B2
152 * t4:Str|Int = Passthrough t3:Str|Int
154 * t7:Int = CheckType<Int> t4:Str|Int -> B5
161 * t11:Bottom = CheckType<Int> t1:Str -> B6
166 * t15:Int = CheckType<Int> t2:Int -> B8
170 * Jmp t1:Str, t123:Cell -> B10
173 * Jmp t11:Bottom, t123:Cell -> B11
176 * Jmp t2:Int, t124:Cell -> B10
179 * Jmp t15:Int, t124:Cell -> B11
182 * t16:Str|Int, t126:Cell = DefLabel B6, B8
184 * t18:Str|Int = Passthrough t16:Str|Int
189 * t22:Int, t127:Cell = DefLabel B7, B9
191 * t21:Int = Passthrough t22:Int
196 * Starting at B5 t3 -> t16, t4 -> t18, t5 -> t17 ->, t125 -> t126
197 * Starting at B4 t3 -> t22, t4 -> t21, t5 -> t20, t7 -> t21, t125 -> t127
199 * Note that the CheckTypes in B1 and B2 can now be optimized away
200 * (and the entire CFG simplified).
203 // Clone the given block (and all instructions within). The block is assumed to
204 // end with a CheckType. The new block's CheckType will be replaced with a Jmp
205 // to dest. Refuse to clone a block defing a FramePtr as that may lead to a
206 // frame pointer phi, which vasm does not handle well.
207 Block
* cloneBlock(IRUnit
& unit
,
210 jit::fast_map
<SSATmp
*, SSATmp
*>& rewrites
) {
211 assertx(block
.back().is(CheckType
));
212 auto const newBlock
= unit
.defBlock(dest
.profCount(), dest
.hint());
214 // Walk over the block, cloning each instruction and recording the
215 // new SSATmps defined.
216 for (auto const& inst
: block
) {
217 auto const newInst
= [&] {
218 if (inst
.is(DefLabel
)) {
219 // We don't setup the types for the DefLabel dests here. Instead we
220 // retype them after hooking up the predecessor Jmps to this block.
221 return unit
.defLabel(inst
.numDsts(), newBlock
, inst
.bcctx());
222 } else if (inst
.is(CheckType
)) {
223 auto const jmp
= unit
.gen(Jmp
, inst
.bcctx(), &dest
);
224 newBlock
->append(jmp
);
227 auto const cloned
= unit
.clone(&inst
);
228 newBlock
->append(cloned
);
232 // Rewrite any uses of the old SSATmps with the new SSATmps within
234 for (uint32_t i
= 0; i
< newInst
->numSrcs(); ++i
) {
235 auto const src
= newInst
->src(i
);
236 auto const it
= rewrites
.find(src
);
237 if (it
== rewrites
.end()) continue;
238 newInst
->setSrc(i
, it
->second
);
241 if (inst
.is(CheckType
)) continue;
242 assertx(inst
.numDsts() == newInst
->numDsts());
243 for (uint32_t i
= 0; i
< inst
.numDsts(); ++i
) {
244 if (inst
.dst(i
)->isA(TFramePtr
)) return nullptr;
245 rewrites
.emplace(inst
.dst(i
), newInst
->dst(i
));
251 // Calculate per-block liveness of SSATmps
252 struct BlockLiveness
{
253 explicit BlockLiveness(const IRUnit
& unit
)
254 : live
{unit
.numTmps()}
255 , defs
{unit
.numTmps()}
256 , uses
{unit
.numTmps()} {}
258 boost::dynamic_bitset
<> live
;
259 boost::dynamic_bitset
<> defs
;
260 boost::dynamic_bitset
<> uses
;
263 // Starting at the given block, rewrite all uses of the rewrite map
264 // keys to their associated values, inserting DefLabels as
265 // necessary. Any created DefLabels are stored in the `phi' output
266 // parameter. No DefLabels are created where the `phi' set says they
267 // have already been created.
268 void rewriteUses(IRUnit
& unit
,
270 const jit::fast_map
<SSATmp
*, SSATmp
*>& rewrites
,
271 const StateVector
<Block
, BlockLiveness
>& liveness
,
272 const IdomVector
& idoms
,
273 jit::fast_set
<Block
*>& phis
) {
275 jit::stack
<Block
*> worklist
;
277 // Visit each block reachable from the start block. For each one,
278 // visit each instruction, doing the necessary rewrites. We stop if
279 // all possible rewrite instructions are dead.
280 visited
.emplace(&start
);
281 worklist
.push(&start
);
284 auto block
= worklist
.top();
287 // Check if all of the rewrites are dead. If so, we can stop.
288 auto const& live
= liveness
[block
].live
;
289 auto const anyLive
= std::any_of(
290 rewrites
.begin(), rewrites
.end(),
291 [&] (auto const& rewrite
) {
292 return live
[rewrite
.first
->id()];
295 if (!anyLive
) continue;
297 if (!dominates(&start
, block
, idoms
)) {
298 // This block is not dominated by the start block. We cannot
299 // perform the rewrites here because the targets are not
300 // available. We need to create a DefLabel. If we already have,
301 // stop (nothing more to do).
302 if (phis
.count(block
)) continue;
304 // All of SSATmps in the rewrite map will be inputs to the
305 // DefLabel. We want these to be in consistent order, so sort
307 jit::vector
<SSATmp
*> sorted
;
308 for (auto const& rewrite
: rewrites
) sorted
.emplace_back(rewrite
.first
);
310 sorted
.begin(), sorted
.end(),
311 [&] (SSATmp
* s1
, SSATmp
* s2
) { return s1
->id() < s2
->id(); }
314 jit::fast_map
<SSATmp
*, SSATmp
*> newRewrites
;
315 for (auto const from
: sorted
) {
316 // If it's dead, we can ignore it
317 if (!live
[from
->id()]) continue;
319 // Determine what to use for DefLabel inputs. If the pred is
320 // dominated by the start, we can use the rewrite map. If not,
321 // the DefLabel input should just be the original SSATmp (it
322 // might be rewritten by other calls to rewriteUses).
323 jit::hash_map
<Block
*, SSATmp
*> inputs
;
326 if (dominates(&start
, pred
, idoms
)) {
327 auto const it
= rewrites
.find(from
);
328 assertx(it
!= rewrites
.end());
329 inputs
.emplace(pred
, it
->second
);
331 inputs
.emplace(pred
, from
);
335 // Create the DefLabel
336 auto const newTmp
= insertPhi(unit
, block
, inputs
);
337 newRewrites
.emplace(from
, newTmp
);
340 // Record that we created the DefLabel, then recurse to rewrite
341 // uses from the created phi. The rewrites are now using the new
342 // SSATmps that the DefLabel defines.
343 assertx(!newRewrites
.empty());
345 rewriteUses(unit
, *block
, newRewrites
, liveness
, idoms
, phis
);
349 // This block is dominated by the start block. This means the
350 // rewrite targets are available, so just perform the rewrites.
351 for (auto& inst
: *block
) {
352 bool changed
= false;
353 for (uint32_t i
= 0; i
< inst
.numSrcs(); ++i
) {
354 auto const it
= rewrites
.find(inst
.src(i
));
355 if (it
== rewrites
.end()) continue;
356 inst
.setSrc(i
, it
->second
);
359 if (changed
) retypeDests(&inst
, &unit
);
362 // Schedule each successor for processing, unless it's already been
366 if (visited
.count(succ
)) return;
367 visited
.emplace(succ
);
371 } while (!worklist
.empty());
379 jit::fast_map
<SSATmp
*, SSATmp
*> next
;
380 jit::fast_map
<SSATmp
*, SSATmp
*> taken
;
383 StateVector
<Block
, BlockLiveness
>
384 calcLiveness(const IRUnit
& unit
,
385 const BlockList
& blocks
,
386 const jit::vector
<Rewrite
>& rewrites
) {
387 StateVector
<Block
, BlockLiveness
> liveness
{unit
, BlockLiveness
{unit
}};
389 // For rewrites, we need liveness information, so gather it. We only
390 // gather it for the SSATmps involved in the rewrite.
391 boost::dynamic_bitset
<> relevant
{unit
.numTmps()};
392 for (auto const& rewrite
: rewrites
) {
393 for (auto const& p
: rewrite
.next
) {
394 liveness
[rewrite
.nextJoin
].defs
.set(p
.first
->id());
395 relevant
.set(p
.first
->id());
397 for (auto const& p
: rewrite
.taken
) {
398 liveness
[rewrite
.takenJoin
].defs
.set(p
.first
->id());
399 relevant
.set(p
.first
->id());
403 dataflow_worklist
<size_t, std::less
<size_t>> worklist
{blocks
.size()};
404 for (size_t i
= 0; i
< blocks
.size(); ++i
) {
405 auto const block
= blocks
[i
];
406 auto& state
= liveness
[block
];
410 for (auto const& inst
: *block
) {
411 for (auto const src
: inst
.srcs()) state
.uses
.set(src
->id());
413 state
.uses
&= relevant
;
416 // Typical liveness dataflow:
417 boost::dynamic_bitset
<> temp
{unit
.numTmps()};
419 auto const block
= blocks
[worklist
.pop()];
420 auto& state
= liveness
[block
];
424 [&] (Block
* succ
) { temp
|= liveness
[succ
].live
; }
429 if (temp
!= state
.live
) {
431 [&] (Block
* pred
) { worklist
.push(liveness
[pred
].id
); }
433 std::swap(state
.live
, temp
);
435 } while (!worklist
.empty());
440 bool hoistCheckTypes(IRUnit
& unit
) {
441 Timer
timer(Timer::optimize_hoistCheckTypes
, unit
.logEntry().get_pointer());
442 PassTracer tracer
{&unit
, Trace::hhir_checkTypes
, "hoistCheckTypes"};
444 FTRACE(4, "hoist check types:vvvvvvvvvvvvvvvvvvvvv\n");
445 SCOPE_EXIT
{ FTRACE(4, "hoist check types:^^^^^^^^^^^^^^^^^^^^^\n"); };
447 jit::vector
<Rewrite
> rewrites
;
449 // Look for blocks which end in a CheckType and start with a
450 // DefLabel. If the CheckType uses a SSATmp defined by that
451 // DefLabel, we can hoist the CheckType above the DefLabel.
452 for (auto const block
: rpoSortCfg(unit
)) {
453 auto& checkType
= block
->back();
454 if (!checkType
.is(CheckType
)) continue;
456 auto const& defLabel
= block
->front();
457 if (!defLabel
.is(DefLabel
)) continue;
459 if (block
->numPreds() <= 1) continue;
461 auto const defIdx
= defLabel
.findDst(canonical(checkType
.src(0)));
462 if (defIdx
== defLabel
.numDsts()) continue;
464 // Only run this optimization if we know we'll eliminate at least
466 auto worthwhile
= false;
469 [&] (const IRInstruction
*, const SSATmp
* s
) {
470 if (worthwhile
) return;
471 auto const refined
= s
->type() & checkType
.src(0)->type();
472 auto const checkTypeParam
= checkType
.typeParam();
473 if (refined
<= checkTypeParam
|| !refined
.maybe(checkTypeParam
)) {
478 if (!worthwhile
) continue;
480 // Record information to rewrite the uses of the old SSATmps
481 // defined in the block with their new ones in rewrite.
483 auto const ctNext
= checkType
.next();
484 auto const ctTaken
= checkType
.taken();
486 auto const nextJoin
= cloneBlock(unit
, *block
, *ctNext
, rewrite
.next
);
487 if (!nextJoin
) continue;
488 auto const takenJoin
= cloneBlock(unit
, *block
, *ctTaken
, rewrite
.taken
);
489 if (!takenJoin
) continue;
491 // At this point we know we have a block we're going to hoist:
492 FTRACE(4, "Hoisting check-type {} in block B{}\n",
493 checkType
.toString(), block
->id());
495 rewrite
.nextStart
= ctNext
;
496 rewrite
.takenStart
= ctTaken
;
497 rewrite
.nextJoin
= nextJoin
;
498 rewrite
.takenJoin
= takenJoin
;
500 // For each predecessor replace its terminal phijmp with a copy of the
501 // CheckType we are hoisting, and generate stub blocks for its next and
502 // taken edges. The phijmp we replaced is copied into the stub blocks and
503 // retargeted to jump to the join blocks we defined earlier. In the copied
504 // phijumps we modify the src corresponding the the CheckType tmp to the
505 // appropriate value (for the next block the checked tmp; for the taken
506 // block then unchecked tmp).
507 block
->forEachPred([&] (Block
* pred
) {
508 auto const next
= unit
.defBlock(pred
->profCount(), pred
->hint());
509 auto const taken
= unit
.defBlock(pred
->profCount(), pred
->hint());
511 auto& jmp
= pred
->back();
512 next
->append(unit
.clone(&jmp
));
513 taken
->append(unit
.clone(&jmp
));
515 next
->back().setTaken(nextJoin
);
516 taken
->back().setTaken(takenJoin
);
518 auto const uncheckedTmp
= jmp
.src(defIdx
);
520 auto const newCheckType
= unit
.clone(&checkType
);
521 newCheckType
->setSrc(0, uncheckedTmp
);
522 pred
->append(newCheckType
);
523 retypeDests(newCheckType
, &unit
);
525 next
->back().setSrc(defIdx
, newCheckType
->dst());
526 taken
->back().setSrc(defIdx
, uncheckedTmp
);
528 newCheckType
->setNext(next
);
529 newCheckType
->setTaken(taken
);
532 // While walking the next join block we track any passthrough operations on
533 // the result of the CheckType.
534 auto checkTypeTmp
= nextJoin
->front().dst(defIdx
);
536 // Update the now fully formed DefLabel's dst types and any operations that
537 // might depend on them
538 for (auto& inst
: *nextJoin
) {
539 retypeDests(&inst
, &unit
);
540 if (inst
.isPassthrough() && inst
.getPassthroughValue() == checkTypeTmp
) {
541 assertx(!inst
.isControlFlow());
542 checkTypeTmp
= inst
.dst();
545 for (auto& inst
: *takenJoin
) {
546 retypeDests(&inst
, &unit
);
549 // Add an extra rewrite for the next block to rewrite the CheckType result.
550 rewrite
.next
.emplace(checkType
.dst(), checkTypeTmp
);
552 // The old block is now unreachable, so unlink it from the CFG.
553 checkType
.setNext(nullptr);
554 checkType
.setTaken(nullptr);
556 rewrites
.emplace_back(std::move(rewrite
));
559 // Nothing to rewrite, which means we didn't do anything
560 if (rewrites
.empty()) return false;
562 auto const blocks
= rpoSortCfg(unit
);
563 auto const liveness
= calcLiveness(unit
, blocks
, rewrites
);
565 // Rewrite the uses. The next and taken branches have the created
566 // phis in common, as they should not revisit ones the other
568 auto const idoms
= findDominators(unit
, blocks
, numberBlocks(unit
, blocks
));
569 for (auto const& rewrite
: rewrites
) {
570 jit::fast_set
<Block
*> phis
;
571 rewriteUses(unit
, *rewrite
.nextStart
, rewrite
.next
, liveness
, idoms
, phis
);
573 unit
, *rewrite
.takenStart
, rewrite
.taken
, liveness
, idoms
, phis
577 // At this point the unit is now valid, though might be a bit
578 // convoluted. Rely on other passes to clean it up.
582 //////////////////////////////////////////////////////////////////////
586 //////////////////////////////////////////////////////////////////////
588 bool optimizeCheckTypes(IRUnit
& unit
) {
589 splitCriticalEdges(unit
);
590 auto changed
= reorderCheckTypes(unit
);
591 changed
|= hoistCheckTypes(unit
);
595 //////////////////////////////////////////////////////////////////////