Store num args instead of offset in prologue and func entry SrcKeys
[hiphop-php.git] / hphp / runtime / vm / jit / check-type-opts.cpp
blob4f624f1da700ae17c11e870d3f890efc50303e75
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/opt.h"
19 #include "hphp/runtime/vm/jit/analysis.h"
20 #include "hphp/runtime/vm/jit/cfg.h"
21 #include "hphp/runtime/vm/jit/dce.h"
22 #include "hphp/runtime/vm/jit/ir-opcode.h"
23 #include "hphp/runtime/vm/jit/ir-unit.h"
24 #include "hphp/runtime/vm/jit/mutation.h"
25 #include "hphp/runtime/vm/jit/pass-tracer.h"
26 #include "hphp/runtime/vm/jit/timer.h"
28 #include "hphp/util/dataflow-worklist.h"
30 namespace HPHP::jit {
32 TRACE_SET_MOD(hhir_checkTypes);
34 //////////////////////////////////////////////////////////////////////
36 namespace {
38 //////////////////////////////////////////////////////////////////////
41 * This optimization reorders chained, back-to-back CheckType instructions where
42 * the second check is for a subtype of the first one. More specifically, this
43 * optimization transforms this pattern:
45 * [block]
46 * tmp1:type1 = CheckType<type1> tmp0 -> taken1 [ct1]
47 * [fallthru1]
48 * tmp2:type2 = CheckType<type2> tmp1 -> taken2 [ct2]
49 * [fallthru2]
50 * ...
52 * vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
53 * under the condition that type2 < type1, into the following one:
54 * vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
56 * [block]
57 * tmp2:type2 = CheckType<type2> tmp0 -> fallthru1 [ct1]
58 * [fallthru2]
59 * ...
61 * [fallthru1]
62 * tmp1:type1 = CheckType<type1> tmp0 -> taken1 [ct2]
63 * [taken2]
66 bool reorderCheckTypes(IRUnit& unit) {
67 Timer timer(Timer::optimize_reorderCheckTypes, unit.logEntry().get_pointer());
68 PassTracer tracer{&unit, Trace::hhir_checkTypes, "reorderCheckTypes"};
70 FTRACE(5, "ReorderCheckTypes:vvvvvvvvvvvvvvvvvvvvv\n");
71 SCOPE_EXIT { FTRACE(5, "ReorderCheckTypes:^^^^^^^^^^^^^^^^^^^^^\n"); };
73 auto changed = false;
75 auto const blocks = rpoSortCfg(unit);
76 for (auto& block : blocks) {
77 auto& ct1 = block->back();
78 if (!ct1.is(CheckType)) continue;
79 auto fallthru1 = ct1.next();
80 auto& ct2 = fallthru1->front();
81 if (!ct2.is(CheckType)) continue;
82 if (ct1.dst() != ct2.src(0)) continue;
83 auto const type1 = ct1.typeParam();
84 auto const type2 = ct2.typeParam();
85 if (!(type2 < type1)) continue;
87 if (!changed) {
88 // This transformation assumes that any use of tmp1 that is
89 // dominated by tmp2 actually uses tmp2 (which provides a more
90 // refined type) and not tmp1. Thus we run refineTmps to make
91 // sure that this invariant applies.
93 // This can be expensive, so we defer it until we know we're
94 // going to make a change. This does not modify the CFG, so it's
95 // safe to do so.
96 refineTmps(unit);
98 changed = true;
100 FTRACE(5, " - reordering these 2 instructions:\n - {}\n - {}\n",
101 ct1, ct2);
103 auto tmp0 = ct1.src(0);
104 auto tmp1 = ct1.dst(0);
105 auto tmp2 = ct2.dst(0);
106 auto taken1 = ct1.taken();
107 auto taken2 = ct2.taken();
108 auto fallthru2 = ct2.next();
110 // Fix block, ct1 and tmp2.
111 ct1.setTaken(fallthru1);
112 ct1.setNext(fallthru2);
113 ct1.setDst(tmp2);
114 ct1.setTypeParam(type2);
115 ct1.setSrc(0, tmp0);
116 tmp2->setInstruction(&ct1);
118 // Fix fallthru1, ct2 and tmp1.
119 ct2.setTaken(taken1);
120 ct2.setNext(taken2);
121 ct2.setDst(tmp1);
122 ct2.setTypeParam(type1);
123 ct2.setSrc(0, tmp0);
124 tmp1->setInstruction(&ct2);
125 fallthru1->setProfCount(taken2->profCount());
126 fallthru1->setHint(taken2->hint());
129 return changed;
132 //////////////////////////////////////////////////////////////////////
135 * Check Type Hoisting:
137 * This pass attempts to hoist CheckTypes further up the CFG past
138 * DefLabels. This will allow the CheckTypes be optimized away (in
139 * some cases). It tries to convert the following pattern:
141 * B1:
142 * .....
143 * Jmp t1:Str, t123:Cell -> B3
145 * B2:
146 * .....
147 * Jmp t2:Int, t124:Cell -> B3
149 * B3:
150 * t3:Str|Int, t125:Cell = DefLabel B1,B2
151 * t5:Cell = Conjure
152 * t4:Str|Int = Passthrough t3:Str|Int
153 * .....
154 * t7:Int = CheckType<Int> t4:Str|Int -> B5
155 * -> B4
157 * Into:
159 * B1:
160 * .....
161 * t11:Bottom = CheckType<Int> t1:Str -> B6
162 * -> B7
164 * B2:
165 * .....
166 * t15:Int = CheckType<Int> t2:Int -> B8
167 * -> B9
169 * B6:
170 * Jmp t1:Str, t123:Cell -> B10
172 * B7:
173 * Jmp t11:Bottom, t123:Cell -> B11
175 * B8:
176 * Jmp t2:Int, t124:Cell -> B10
178 * B9:
179 * Jmp t15:Int, t124:Cell -> B11
181 * B10:
182 * t16:Str|Int, t126:Cell = DefLabel B6, B8
183 * t17:Cell = Conjure
184 * t18:Str|Int = Passthrough t16:Str|Int
185 * .....
186 * Jmp B5
188 * B11:
189 * t22:Int, t127:Cell = DefLabel B7, B9
190 * t20:Cell = Conjure
191 * t21:Int = Passthrough t22:Int
192 * .....
193 * Jmp B4
195 * Then rewrite:
196 * Starting at B5 t3 -> t16, t4 -> t18, t5 -> t17 ->, t125 -> t126
197 * Starting at B4 t3 -> t22, t4 -> t21, t5 -> t20, t7 -> t21, t125 -> t127
199 * Note that the CheckTypes in B1 and B2 can now be optimized away
200 * (and the entire CFG simplified).
203 // Clone the given block (and all instructions within). The block is assumed to
204 // end with a CheckType. The new block's CheckType will be replaced with a Jmp
205 // to dest. Refuse to clone a block defing a FramePtr as that may lead to a
206 // frame pointer phi, which vasm does not handle well.
207 Block* cloneBlock(IRUnit& unit,
208 Block& block,
209 Block& dest,
210 jit::fast_map<SSATmp*, SSATmp*>& rewrites) {
211 assertx(block.back().is(CheckType));
212 auto const newBlock = unit.defBlock(dest.profCount(), dest.hint());
214 // Walk over the block, cloning each instruction and recording the
215 // new SSATmps defined.
216 for (auto const& inst : block) {
217 auto const newInst = [&] {
218 if (inst.is(DefLabel)) {
219 // We don't setup the types for the DefLabel dests here. Instead we
220 // retype them after hooking up the predecessor Jmps to this block.
221 return unit.defLabel(inst.numDsts(), newBlock, inst.bcctx());
222 } else if (inst.is(CheckType)) {
223 auto const jmp = unit.gen(Jmp, inst.bcctx(), &dest);
224 newBlock->append(jmp);
225 return jmp;
227 auto const cloned = unit.clone(&inst);
228 newBlock->append(cloned);
229 return cloned;
230 }();
232 // Rewrite any uses of the old SSATmps with the new SSATmps within
233 // the same block.
234 for (uint32_t i = 0; i < newInst->numSrcs(); ++i) {
235 auto const src = newInst->src(i);
236 auto const it = rewrites.find(src);
237 if (it == rewrites.end()) continue;
238 newInst->setSrc(i, it->second);
241 if (inst.is(CheckType)) continue;
242 assertx(inst.numDsts() == newInst->numDsts());
243 for (uint32_t i = 0; i < inst.numDsts(); ++i) {
244 if (inst.dst(i)->isA(TFramePtr)) return nullptr;
245 rewrites.emplace(inst.dst(i), newInst->dst(i));
248 return newBlock;
251 // Calculate per-block liveness of SSATmps
252 struct BlockLiveness {
253 explicit BlockLiveness(const IRUnit& unit)
254 : live{unit.numTmps()}
255 , defs{unit.numTmps()}
256 , uses{unit.numTmps()} {}
257 size_t id;
258 boost::dynamic_bitset<> live;
259 boost::dynamic_bitset<> defs;
260 boost::dynamic_bitset<> uses;
263 // Starting at the given block, rewrite all uses of the rewrite map
264 // keys to their associated values, inserting DefLabels as
265 // necessary. Any created DefLabels are stored in the `phi' output
266 // parameter. No DefLabels are created where the `phi' set says they
267 // have already been created.
268 void rewriteUses(IRUnit& unit,
269 Block& start,
270 const jit::fast_map<SSATmp*, SSATmp*>& rewrites,
271 const StateVector<Block, BlockLiveness>& liveness,
272 const IdomVector& idoms,
273 jit::fast_set<Block*>& phis) {
274 BlockSet visited;
275 jit::stack<Block*> worklist;
277 // Visit each block reachable from the start block. For each one,
278 // visit each instruction, doing the necessary rewrites. We stop if
279 // all possible rewrite instructions are dead.
280 visited.emplace(&start);
281 worklist.push(&start);
283 do {
284 auto block = worklist.top();
285 worklist.pop();
287 // Check if all of the rewrites are dead. If so, we can stop.
288 auto const& live = liveness[block].live;
289 auto const anyLive = std::any_of(
290 rewrites.begin(), rewrites.end(),
291 [&] (auto const& rewrite) {
292 return live[rewrite.first->id()];
295 if (!anyLive) continue;
297 if (!dominates(&start, block, idoms)) {
298 // This block is not dominated by the start block. We cannot
299 // perform the rewrites here because the targets are not
300 // available. We need to create a DefLabel. If we already have,
301 // stop (nothing more to do).
302 if (phis.count(block)) continue;
304 // All of SSATmps in the rewrite map will be inputs to the
305 // DefLabel. We want these to be in consistent order, so sort
306 // them.
307 jit::vector<SSATmp*> sorted;
308 for (auto const& rewrite : rewrites) sorted.emplace_back(rewrite.first);
309 std::sort(
310 sorted.begin(), sorted.end(),
311 [&] (SSATmp* s1, SSATmp* s2) { return s1->id() < s2->id(); }
314 jit::fast_map<SSATmp*, SSATmp*> newRewrites;
315 for (auto const from : sorted) {
316 // If it's dead, we can ignore it
317 if (!live[from->id()]) continue;
319 // Determine what to use for DefLabel inputs. If the pred is
320 // dominated by the start, we can use the rewrite map. If not,
321 // the DefLabel input should just be the original SSATmp (it
322 // might be rewritten by other calls to rewriteUses).
323 jit::hash_map<Block*, SSATmp*> inputs;
324 block->forEachPred(
325 [&] (Block* pred) {
326 if (dominates(&start, pred, idoms)) {
327 auto const it = rewrites.find(from);
328 assertx(it != rewrites.end());
329 inputs.emplace(pred, it->second);
330 } else {
331 inputs.emplace(pred, from);
335 // Create the DefLabel
336 auto const newTmp = insertPhi(unit, block, inputs);
337 newRewrites.emplace(from, newTmp);
340 // Record that we created the DefLabel, then recurse to rewrite
341 // uses from the created phi. The rewrites are now using the new
342 // SSATmps that the DefLabel defines.
343 assertx(!newRewrites.empty());
344 phis.emplace(block);
345 rewriteUses(unit, *block, newRewrites, liveness, idoms, phis);
346 continue;
349 // This block is dominated by the start block. This means the
350 // rewrite targets are available, so just perform the rewrites.
351 for (auto& inst : *block) {
352 bool changed = false;
353 for (uint32_t i = 0; i < inst.numSrcs(); ++i) {
354 auto const it = rewrites.find(inst.src(i));
355 if (it == rewrites.end()) continue;
356 inst.setSrc(i, it->second);
357 changed = true;
359 if (changed) retypeDests(&inst, &unit);
362 // Schedule each successor for processing, unless it's already been
363 // visited.
364 block->forEachSucc(
365 [&] (Block* succ) {
366 if (visited.count(succ)) return;
367 visited.emplace(succ);
368 worklist.push(succ);
371 } while (!worklist.empty());
374 struct Rewrite {
375 Block* nextStart;
376 Block* takenStart;
377 Block* nextJoin;
378 Block* takenJoin;
379 jit::fast_map<SSATmp*, SSATmp*> next;
380 jit::fast_map<SSATmp*, SSATmp*> taken;
383 StateVector<Block, BlockLiveness>
384 calcLiveness(const IRUnit& unit,
385 const BlockList& blocks,
386 const jit::vector<Rewrite>& rewrites) {
387 StateVector<Block, BlockLiveness> liveness{unit, BlockLiveness{unit}};
389 // For rewrites, we need liveness information, so gather it. We only
390 // gather it for the SSATmps involved in the rewrite.
391 boost::dynamic_bitset<> relevant{unit.numTmps()};
392 for (auto const& rewrite : rewrites) {
393 for (auto const& p : rewrite.next) {
394 liveness[rewrite.nextJoin].defs.set(p.first->id());
395 relevant.set(p.first->id());
397 for (auto const& p : rewrite.taken) {
398 liveness[rewrite.takenJoin].defs.set(p.first->id());
399 relevant.set(p.first->id());
403 dataflow_worklist<size_t, std::less<size_t>> worklist{blocks.size()};
404 for (size_t i = 0; i < blocks.size(); ++i) {
405 auto const block = blocks[i];
406 auto& state = liveness[block];
407 state.id = i;
408 worklist.push(i);
410 for (auto const& inst : *block) {
411 for (auto const src : inst.srcs()) state.uses.set(src->id());
413 state.uses &= relevant;
416 // Typical liveness dataflow:
417 boost::dynamic_bitset<> temp{unit.numTmps()};
418 do {
419 auto const block = blocks[worklist.pop()];
420 auto& state = liveness[block];
422 temp.reset();
423 block->forEachSucc(
424 [&] (Block* succ) { temp |= liveness[succ].live; }
426 temp |= state.uses;
427 temp -= state.defs;
429 if (temp != state.live) {
430 block->forEachPred(
431 [&] (Block* pred) { worklist.push(liveness[pred].id); }
433 std::swap(state.live, temp);
435 } while (!worklist.empty());
437 return liveness;
440 bool hoistCheckTypes(IRUnit& unit) {
441 Timer timer(Timer::optimize_hoistCheckTypes, unit.logEntry().get_pointer());
442 PassTracer tracer{&unit, Trace::hhir_checkTypes, "hoistCheckTypes"};
444 FTRACE(4, "hoist check types:vvvvvvvvvvvvvvvvvvvvv\n");
445 SCOPE_EXIT { FTRACE(4, "hoist check types:^^^^^^^^^^^^^^^^^^^^^\n"); };
447 jit::vector<Rewrite> rewrites;
449 // Look for blocks which end in a CheckType and start with a
450 // DefLabel. If the CheckType uses a SSATmp defined by that
451 // DefLabel, we can hoist the CheckType above the DefLabel.
452 for (auto const block : rpoSortCfg(unit)) {
453 auto& checkType = block->back();
454 if (!checkType.is(CheckType)) continue;
456 auto const& defLabel = block->front();
457 if (!defLabel.is(DefLabel)) continue;
459 if (block->numPreds() <= 1) continue;
461 auto const defIdx = defLabel.findDst(canonical(checkType.src(0)));
462 if (defIdx == defLabel.numDsts()) continue;
464 // Only run this optimization if we know we'll eliminate at least
465 // one CheckType.
466 auto worthwhile = false;
467 block->forEachSrc(
468 defIdx,
469 [&] (const IRInstruction*, const SSATmp* s) {
470 if (worthwhile) return;
471 auto const refined = s->type() & checkType.src(0)->type();
472 auto const checkTypeParam = checkType.typeParam();
473 if (refined <= checkTypeParam || !refined.maybe(checkTypeParam)) {
474 worthwhile = true;
478 if (!worthwhile) continue;
480 // Record information to rewrite the uses of the old SSATmps
481 // defined in the block with their new ones in rewrite.
482 Rewrite rewrite;
483 auto const ctNext = checkType.next();
484 auto const ctTaken = checkType.taken();
486 auto const nextJoin = cloneBlock(unit, *block, *ctNext, rewrite.next);
487 if (!nextJoin) continue;
488 auto const takenJoin = cloneBlock(unit, *block, *ctTaken, rewrite.taken);
489 if (!takenJoin) continue;
491 // At this point we know we have a block we're going to hoist:
492 FTRACE(4, "Hoisting check-type {} in block B{}\n",
493 checkType.toString(), block->id());
495 rewrite.nextStart = ctNext;
496 rewrite.takenStart = ctTaken;
497 rewrite.nextJoin = nextJoin;
498 rewrite.takenJoin = takenJoin;
500 // For each predecessor replace its terminal phijmp with a copy of the
501 // CheckType we are hoisting, and generate stub blocks for its next and
502 // taken edges. The phijmp we replaced is copied into the stub blocks and
503 // retargeted to jump to the join blocks we defined earlier. In the copied
504 // phijumps we modify the src corresponding the the CheckType tmp to the
505 // appropriate value (for the next block the checked tmp; for the taken
506 // block then unchecked tmp).
507 block->forEachPred([&] (Block* pred) {
508 auto const next = unit.defBlock(pred->profCount(), pred->hint());
509 auto const taken = unit.defBlock(pred->profCount(), pred->hint());
511 auto& jmp = pred->back();
512 next->append(unit.clone(&jmp));
513 taken->append(unit.clone(&jmp));
515 next->back().setTaken(nextJoin);
516 taken->back().setTaken(takenJoin);
518 auto const uncheckedTmp = jmp.src(defIdx);
519 jmp.convertToNop();
520 auto const newCheckType = unit.clone(&checkType);
521 newCheckType->setSrc(0, uncheckedTmp);
522 pred->append(newCheckType);
523 retypeDests(newCheckType, &unit);
525 next->back().setSrc(defIdx, newCheckType->dst());
526 taken->back().setSrc(defIdx, uncheckedTmp);
528 newCheckType->setNext(next);
529 newCheckType->setTaken(taken);
532 // While walking the next join block we track any passthrough operations on
533 // the result of the CheckType.
534 auto checkTypeTmp = nextJoin->front().dst(defIdx);
536 // Update the now fully formed DefLabel's dst types and any operations that
537 // might depend on them
538 for (auto& inst : *nextJoin) {
539 retypeDests(&inst, &unit);
540 if (inst.isPassthrough() && inst.getPassthroughValue() == checkTypeTmp) {
541 assertx(!inst.isControlFlow());
542 checkTypeTmp = inst.dst();
545 for (auto& inst : *takenJoin) {
546 retypeDests(&inst, &unit);
549 // Add an extra rewrite for the next block to rewrite the CheckType result.
550 rewrite.next.emplace(checkType.dst(), checkTypeTmp);
552 // The old block is now unreachable, so unlink it from the CFG.
553 checkType.setNext(nullptr);
554 checkType.setTaken(nullptr);
556 rewrites.emplace_back(std::move(rewrite));
559 // Nothing to rewrite, which means we didn't do anything
560 if (rewrites.empty()) return false;
562 auto const blocks = rpoSortCfg(unit);
563 auto const liveness = calcLiveness(unit, blocks, rewrites);
565 // Rewrite the uses. The next and taken branches have the created
566 // phis in common, as they should not revisit ones the other
567 // created.
568 auto const idoms = findDominators(unit, blocks, numberBlocks(unit, blocks));
569 for (auto const& rewrite : rewrites) {
570 jit::fast_set<Block*> phis;
571 rewriteUses(unit, *rewrite.nextStart, rewrite.next, liveness, idoms, phis);
572 rewriteUses(
573 unit, *rewrite.takenStart, rewrite.taken, liveness, idoms, phis
577 // At this point the unit is now valid, though might be a bit
578 // convoluted. Rely on other passes to clean it up.
579 return true;
582 //////////////////////////////////////////////////////////////////////
586 //////////////////////////////////////////////////////////////////////
588 bool optimizeCheckTypes(IRUnit& unit) {
589 splitCriticalEdges(unit);
590 auto changed = reorderCheckTypes(unit);
591 changed |= hoistCheckTypes(unit);
592 return changed;
595 //////////////////////////////////////////////////////////////////////