hphp/runtime/vm/jit/check-type-opts.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com)  |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16
  17 #include "hphp/runtime/vm/jit/opt.h"
  18
  19 #include "hphp/runtime/vm/jit/analysis.h"
  20 #include "hphp/runtime/vm/jit/cfg.h"
  21 #include "hphp/runtime/vm/jit/dce.h"
  22 #include "hphp/runtime/vm/jit/ir-opcode.h"
  23 #include "hphp/runtime/vm/jit/ir-unit.h"
  24 #include "hphp/runtime/vm/jit/mutation.h"
  25 #include "hphp/runtime/vm/jit/pass-tracer.h"
  26 #include "hphp/runtime/vm/jit/timer.h"
  27
  28 #include "hphp/util/dataflow-worklist.h"
  29
  30 namespace HPHP::jit {
  31
  32 TRACE_SET_MOD(hhir_checkTypes);
  33
  34 //////////////////////////////////////////////////////////////////////
  35
  36 namespace {
  37
  38 //////////////////////////////////////////////////////////////////////
  39
  40 /*
  41  * This optimization reorders chained, back-to-back CheckType instructions where
  42  * the second check is for a subtype of the first one.  More specifically, this
  43  * optimization transforms this pattern:
  44  *
  45  *   [block]
  46  *     tmp1:type1 = CheckType<type1> tmp0 -> taken1  [ct1]
  47  *   [fallthru1]
  48  *     tmp2:type2 = CheckType<type2> tmp1 -> taken2  [ct2]
  49  *   [fallthru2]
  50  *     ...
  51  *
  52  *   vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
  53  *   under the condition that type2 < type1, into the following one:
  54  *   vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
  55  *
  56  *   [block]
  57  *     tmp2:type2 = CheckType<type2> tmp0 -> fallthru1 [ct1]
  58  *   [fallthru2]
  59  *     ...
  60  *
  61  *   [fallthru1]
  62  *     tmp1:type1 = CheckType<type1> tmp0 -> taken1  [ct2]
  63  *   [taken2]
  64  *
  65  */
  66 bool reorderCheckTypes(IRUnit& unit) {
  67   Timer timer(Timer::optimize_reorderCheckTypes, unit.logEntry().get_pointer());
  68   PassTracer tracer{&unit, Trace::hhir_checkTypes, "reorderCheckTypes"};
  69
  70   FTRACE(5, "ReorderCheckTypes:vvvvvvvvvvvvvvvvvvvvv\n");
  71   SCOPE_EXIT { FTRACE(5, "ReorderCheckTypes:^^^^^^^^^^^^^^^^^^^^^\n"); };
  72
  73   auto changed = false;
  74
  75   auto const blocks = rpoSortCfg(unit);
  76   for (auto& block : blocks) {
  77     auto& ct1 = block->back();
  78     if (!ct1.is(CheckType)) continue;
  79     auto fallthru1 = ct1.next();
  80     auto& ct2 = fallthru1->front();
  81     if (!ct2.is(CheckType)) continue;
  82     if (ct1.dst() != ct2.src(0)) continue;
  83     auto const type1 = ct1.typeParam();
  84     auto const type2 = ct2.typeParam();
  85     if (!(type2 < type1)) continue;
  86
  87     if (!changed) {
  88       // This transformation assumes that any use of tmp1 that is
  89       // dominated by tmp2 actually uses tmp2 (which provides a more
  90       // refined type) and not tmp1. Thus we run refineTmps to make
  91       // sure that this invariant applies.
  92       //
  93       // This can be expensive, so we defer it until we know we're
  94       // going to make a change. This does not modify the CFG, so it's
  95       // safe to do so.
  96       refineTmps(unit);
  97     }
  98     changed = true;
  99
 100     FTRACE(5, "  - reordering these 2 instructions:\n   - {}\n   - {}\n",
 101            ct1, ct2);
 102
 103     auto tmp0 = ct1.src(0);
 104     auto tmp1 = ct1.dst(0);
 105     auto tmp2 = ct2.dst(0);
 106     auto taken1 = ct1.taken();
 107     auto taken2 = ct2.taken();
 108     auto fallthru2 = ct2.next();
 109
 110     // Fix block, ct1 and tmp2.
 111     ct1.setTaken(fallthru1);
 112     ct1.setNext(fallthru2);
 113     ct1.setDst(tmp2);
 114     ct1.setTypeParam(type2);
 115     ct1.setSrc(0, tmp0);
 116     tmp2->setInstruction(&ct1);
 117
 118     // Fix fallthru1, ct2 and tmp1.
 119     ct2.setTaken(taken1);
 120     ct2.setNext(taken2);
 121     ct2.setDst(tmp1);
 122     ct2.setTypeParam(type1);
 123     ct2.setSrc(0, tmp0);
 124     tmp1->setInstruction(&ct2);
 125     fallthru1->setProfCount(taken2->profCount());
 126     fallthru1->setHint(taken2->hint());
 127   }
 128
 129   return changed;
 130 }
 131
 132 //////////////////////////////////////////////////////////////////////
 133
 134 /*
 135  * Check Type Hoisting:
 136  *
 137  * This pass attempts to hoist CheckTypes further up the CFG past
 138  * DefLabels. This will allow the CheckTypes be optimized away (in
 139  * some cases). It tries to convert the following pattern:
 140  *
 141  * B1:
 142  *   .....
 143  *   Jmp t1:Str, t123:Cell -> B3
 144  *1
 145  * B2:
 146  *   .....
 147  *   Jmp t2:Int, t124:Cell -> B3
 148  *
 149  * B3:
 150  *   t3:Str|Int, t125:Cell = DefLabel B1,B2
 151  *   t5:Cell = Conjure
 152  *   t4:Str|Int = Passthrough t3:Str|Int
 153  *   .....
 154  *   t7:Int = CheckType<Int> t4:Str|Int -> B5
 155  *     -> B4
 156  *
 157  * Into:
 158  *
 159  * B1:
 160  *   .....
 161  *   t11:Bottom = CheckType<Int> t1:Str -> B6
 162  *     -> B7
 163  *
 164  * B2:
 165  *   .....
 166  *   t15:Int = CheckType<Int> t2:Int -> B8
 167  *     -> B9
 168  *
 169  * B6:
 170  *   Jmp t1:Str, t123:Cell -> B10
 171  *
 172  * B7:
 173  *   Jmp t11:Bottom, t123:Cell -> B11
 174  *
 175  * B8:
 176  *   Jmp t2:Int, t124:Cell -> B10
 177  *
 178  * B9:
 179  *   Jmp t15:Int, t124:Cell -> B11
 180  *
 181  * B10:
 182  *   t16:Str|Int, t126:Cell = DefLabel B6, B8
 183  *   t17:Cell = Conjure
 184  *   t18:Str|Int = Passthrough t16:Str|Int
 185  *   .....
 186  *   Jmp B5
 187  *
 188  * B11:
 189  *   t22:Int, t127:Cell = DefLabel B7, B9
 190  *   t20:Cell = Conjure
 191  *   t21:Int = Passthrough t22:Int
 192  *   .....
 193  *   Jmp B4
 194  *
 195  * Then rewrite:
 196  *   Starting at B5 t3 -> t16, t4 -> t18, t5 -> t17 ->, t125 -> t126
 197  *   Starting at B4 t3 -> t22, t4 -> t21, t5 -> t20, t7 -> t21, t125 -> t127
 198  *
 199  * Note that the CheckTypes in B1 and B2 can now be optimized away
 200  * (and the entire CFG simplified).
 201  */
 202
 203 // Clone the given block (and all instructions within). The block is assumed to
 204 // end with a CheckType. The new block's CheckType will be replaced with a Jmp
 205 // to dest.  Refuse to clone a block defing a FramePtr as that may lead to a
 206 // frame pointer phi, which vasm does not handle well.
 207 Block* cloneBlock(IRUnit& unit,
 208                   Block& block,
 209                   Block& dest,
 210                   jit::fast_map<SSATmp*, SSATmp*>& rewrites) {
 211   assertx(block.back().is(CheckType));
 212   auto const newBlock = unit.defBlock(dest.profCount(), dest.hint());
 213
 214   // Walk over the block, cloning each instruction and recording the
 215   // new SSATmps defined.
 216   for (auto const& inst : block) {
 217     auto const newInst = [&] {
 218       if (inst.is(DefLabel)) {
 219         // We don't setup the types for the DefLabel dests here.  Instead we
 220         // retype them after hooking up the predecessor Jmps to this block.
 221         return unit.defLabel(inst.numDsts(), newBlock, inst.bcctx());
 222       } else if (inst.is(CheckType)) {
 223         auto const jmp = unit.gen(Jmp, inst.bcctx(), &dest);
 224         newBlock->append(jmp);
 225         return jmp;
 226       }
 227       auto const cloned = unit.clone(&inst);
 228       newBlock->append(cloned);
 229       return cloned;
 230     }();
 231
 232     // Rewrite any uses of the old SSATmps with the new SSATmps within
 233     // the same block.
 234     for (uint32_t i = 0; i < newInst->numSrcs(); ++i) {
 235       auto const src = newInst->src(i);
 236       auto const it = rewrites.find(src);
 237       if (it == rewrites.end()) continue;
 238       newInst->setSrc(i, it->second);
 239     }
 240
 241     if (inst.is(CheckType)) continue;
 242     assertx(inst.numDsts() == newInst->numDsts());
 243     for (uint32_t i = 0; i < inst.numDsts(); ++i) {
 244       if (inst.dst(i)->isA(TFramePtr)) return nullptr;
 245       rewrites.emplace(inst.dst(i), newInst->dst(i));
 246     }
 247   }
 248   return newBlock;
 249 }
 250
 251 // Calculate per-block liveness of SSATmps
 252 struct BlockLiveness {
 253   explicit BlockLiveness(const IRUnit& unit)
 254     : live{unit.numTmps()}
 255     , defs{unit.numTmps()}
 256     , uses{unit.numTmps()} {}
 257   size_t id;
 258   boost::dynamic_bitset<> live;
 259   boost::dynamic_bitset<> defs;
 260   boost::dynamic_bitset<> uses;
 261 };
 262
 263 // Starting at the given block, rewrite all uses of the rewrite map
 264 // keys to their associated values, inserting DefLabels as
 265 // necessary. Any created DefLabels are stored in the `phi' output
 266 // parameter. No DefLabels are created where the `phi' set says they
 267 // have already been created.
 268 void rewriteUses(IRUnit& unit,
 269                  Block& start,
 270                  const jit::fast_map<SSATmp*, SSATmp*>& rewrites,
 271                  const StateVector<Block, BlockLiveness>& liveness,
 272                  const IdomVector& idoms,
 273                  jit::fast_set<Block*>& phis) {
 274   BlockSet visited;
 275   jit::stack<Block*> worklist;
 276
 277   // Visit each block reachable from the start block. For each one,
 278   // visit each instruction, doing the necessary rewrites. We stop if
 279   // all possible rewrite instructions are dead.
 280   visited.emplace(&start);
 281   worklist.push(&start);
 282
 283   do {
 284     auto block = worklist.top();
 285     worklist.pop();
 286
 287     // Check if all of the rewrites are dead. If so, we can stop.
 288     auto const& live = liveness[block].live;
 289     auto const anyLive = std::any_of(
 290       rewrites.begin(), rewrites.end(),
 291       [&] (auto const& rewrite) {
 292         return live[rewrite.first->id()];
 293       }
 294     );
 295     if (!anyLive) continue;
 296
 297     if (!dominates(&start, block, idoms)) {
 298       // This block is not dominated by the start block. We cannot
 299       // perform the rewrites here because the targets are not
 300       // available. We need to create a DefLabel. If we already have,
 301       // stop (nothing more to do).
 302       if (phis.count(block)) continue;
 303
 304       // All of SSATmps in the rewrite map will be inputs to the
 305       // DefLabel. We want these to be in consistent order, so sort
 306       // them.
 307       jit::vector<SSATmp*> sorted;
 308       for (auto const& rewrite : rewrites) sorted.emplace_back(rewrite.first);
 309       std::sort(
 310         sorted.begin(), sorted.end(),
 311         [&] (SSATmp* s1, SSATmp* s2) { return s1->id() < s2->id(); }
 312       );
 313
 314       jit::fast_map<SSATmp*, SSATmp*> newRewrites;
 315       for (auto const from : sorted) {
 316         // If it's dead, we can ignore it
 317         if (!live[from->id()]) continue;
 318
 319         // Determine what to use for DefLabel inputs. If the pred is
 320         // dominated by the start, we can use the rewrite map. If not,
 321         // the DefLabel input should just be the original SSATmp (it
 322         // might be rewritten by other calls to rewriteUses).
 323         jit::hash_map<Block*, SSATmp*> inputs;
 324         block->forEachPred(
 325           [&] (Block* pred) {
 326             if (dominates(&start, pred, idoms)) {
 327               auto const it = rewrites.find(from);
 328               assertx(it != rewrites.end());
 329               inputs.emplace(pred, it->second);
 330             } else {
 331               inputs.emplace(pred, from);
 332             }
 333           }
 334         );
 335         // Create the DefLabel
 336         auto const newTmp = insertPhi(unit, block, inputs);
 337         newRewrites.emplace(from, newTmp);
 338       }
 339
 340       // Record that we created the DefLabel, then recurse to rewrite
 341       // uses from the created phi. The rewrites are now using the new
 342       // SSATmps that the DefLabel defines.
 343       assertx(!newRewrites.empty());
 344       phis.emplace(block);
 345       rewriteUses(unit, *block, newRewrites, liveness, idoms, phis);
 346       continue;
 347     }
 348
 349     // This block is dominated by the start block. This means the
 350     // rewrite targets are available, so just perform the rewrites.
 351     for (auto& inst : *block) {
 352       bool changed = false;
 353       for (uint32_t i = 0; i < inst.numSrcs(); ++i) {
 354         auto const it = rewrites.find(inst.src(i));
 355         if (it == rewrites.end()) continue;
 356         inst.setSrc(i, it->second);
 357         changed = true;
 358       }
 359       if (changed) retypeDests(&inst, &unit);
 360     }
 361
 362     // Schedule each successor for processing, unless it's already been
 363     // visited.
 364     block->forEachSucc(
 365       [&] (Block* succ) {
 366         if (visited.count(succ)) return;
 367         visited.emplace(succ);
 368         worklist.push(succ);
 369       }
 370     );
 371   } while (!worklist.empty());
 372 }
 373
 374 struct Rewrite {
 375   Block* nextStart;
 376   Block* takenStart;
 377   Block* nextJoin;
 378   Block* takenJoin;
 379   jit::fast_map<SSATmp*, SSATmp*> next;
 380   jit::fast_map<SSATmp*, SSATmp*> taken;
 381 };
 382
 383 StateVector<Block, BlockLiveness>
 384 calcLiveness(const IRUnit& unit,
 385              const BlockList& blocks,
 386              const jit::vector<Rewrite>& rewrites) {
 387   StateVector<Block, BlockLiveness> liveness{unit, BlockLiveness{unit}};
 388
 389   // For rewrites, we need liveness information, so gather it. We only
 390   // gather it for the SSATmps involved in the rewrite.
 391   boost::dynamic_bitset<> relevant{unit.numTmps()};
 392   for (auto const& rewrite : rewrites) {
 393     for (auto const& p : rewrite.next) {
 394       liveness[rewrite.nextJoin].defs.set(p.first->id());
 395       relevant.set(p.first->id());
 396     }
 397     for (auto const& p : rewrite.taken) {
 398       liveness[rewrite.takenJoin].defs.set(p.first->id());
 399       relevant.set(p.first->id());
 400     }
 401   }
 402
 403   dataflow_worklist<size_t, std::less<size_t>> worklist{blocks.size()};
 404   for (size_t i = 0; i < blocks.size(); ++i) {
 405     auto const block = blocks[i];
 406     auto& state = liveness[block];
 407     state.id = i;
 408     worklist.push(i);
 409
 410     for (auto const& inst : *block) {
 411       for (auto const src : inst.srcs()) state.uses.set(src->id());
 412     }
 413     state.uses &= relevant;
 414   }
 415
 416   // Typical liveness dataflow:
 417   boost::dynamic_bitset<> temp{unit.numTmps()};
 418   do {
 419     auto const block = blocks[worklist.pop()];
 420     auto& state = liveness[block];
 421
 422     temp.reset();
 423     block->forEachSucc(
 424       [&] (Block* succ) { temp |= liveness[succ].live; }
 425     );
 426     temp |= state.uses;
 427     temp -= state.defs;
 428
 429     if (temp != state.live) {
 430       block->forEachPred(
 431         [&] (Block* pred) { worklist.push(liveness[pred].id); }
 432       );
 433       std::swap(state.live, temp);
 434     }
 435   } while (!worklist.empty());
 436
 437   return liveness;
 438 }
 439
 440 bool hoistCheckTypes(IRUnit& unit) {
 441   Timer timer(Timer::optimize_hoistCheckTypes, unit.logEntry().get_pointer());
 442   PassTracer tracer{&unit, Trace::hhir_checkTypes, "hoistCheckTypes"};
 443
 444   FTRACE(4, "hoist check types:vvvvvvvvvvvvvvvvvvvvv\n");
 445   SCOPE_EXIT { FTRACE(4, "hoist check types:^^^^^^^^^^^^^^^^^^^^^\n"); };
 446
 447   jit::vector<Rewrite> rewrites;
 448
 449   // Look for blocks which end in a CheckType and start with a
 450   // DefLabel. If the CheckType uses a SSATmp defined by that
 451   // DefLabel, we can hoist the CheckType above the DefLabel.
 452   for (auto const block : rpoSortCfg(unit)) {
 453     auto& checkType = block->back();
 454     if (!checkType.is(CheckType)) continue;
 455
 456     auto const& defLabel = block->front();
 457     if (!defLabel.is(DefLabel)) continue;
 458
 459     if (block->numPreds() <= 1) continue;
 460
 461     auto const defIdx = defLabel.findDst(canonical(checkType.src(0)));
 462     if (defIdx == defLabel.numDsts()) continue;
 463
 464     // Only run this optimization if we know we'll eliminate at least
 465     // one CheckType.
 466     auto worthwhile = false;
 467     block->forEachSrc(
 468       defIdx,
 469       [&] (const IRInstruction*, const SSATmp* s) {
 470         if (worthwhile) return;
 471         auto const refined = s->type() & checkType.src(0)->type();
 472         auto const checkTypeParam = checkType.typeParam();
 473         if (refined <= checkTypeParam || !refined.maybe(checkTypeParam)) {
 474           worthwhile = true;
 475         }
 476       }
 477     );
 478     if (!worthwhile) continue;
 479
 480     // Record information to rewrite the uses of the old SSATmps
 481     // defined in the block with their new ones in rewrite.
 482     Rewrite rewrite;
 483     auto const ctNext = checkType.next();
 484     auto const ctTaken = checkType.taken();
 485
 486     auto const nextJoin = cloneBlock(unit, *block, *ctNext, rewrite.next);
 487     if (!nextJoin) continue;
 488     auto const takenJoin = cloneBlock(unit, *block, *ctTaken, rewrite.taken);
 489     if (!takenJoin) continue;
 490
 491     // At this point we know we have a block we're going to hoist:
 492     FTRACE(4, "Hoisting check-type {} in block B{}\n",
 493            checkType.toString(), block->id());
 494
 495     rewrite.nextStart = ctNext;
 496     rewrite.takenStart = ctTaken;
 497     rewrite.nextJoin = nextJoin;
 498     rewrite.takenJoin = takenJoin;
 499
 500     // For each predecessor replace its terminal phijmp with a copy of the
 501     // CheckType we are hoisting, and generate stub blocks for its next and
 502     // taken edges.  The phijmp we replaced is copied into the stub blocks and
 503     // retargeted to jump to the join blocks we defined earlier.  In the copied
 504     // phijumps we modify the src corresponding the the CheckType tmp to the
 505     // appropriate value (for the next block the checked tmp; for the taken
 506     // block then unchecked tmp).
 507     block->forEachPred([&] (Block* pred) {
 508       auto const next = unit.defBlock(pred->profCount(), pred->hint());
 509       auto const taken = unit.defBlock(pred->profCount(), pred->hint());
 510
 511       auto& jmp = pred->back();
 512       next->append(unit.clone(&jmp));
 513       taken->append(unit.clone(&jmp));
 514
 515       next->back().setTaken(nextJoin);
 516       taken->back().setTaken(takenJoin);
 517
 518       auto const uncheckedTmp = jmp.src(defIdx);
 519       jmp.convertToNop();
 520       auto const newCheckType = unit.clone(&checkType);
 521       newCheckType->setSrc(0, uncheckedTmp);
 522       pred->append(newCheckType);
 523       retypeDests(newCheckType, &unit);
 524
 525       next->back().setSrc(defIdx, newCheckType->dst());
 526       taken->back().setSrc(defIdx, uncheckedTmp);
 527
 528       newCheckType->setNext(next);
 529       newCheckType->setTaken(taken);
 530     });
 531
 532     // While walking the next join block we track any passthrough operations on
 533     // the result of the CheckType.
 534     auto checkTypeTmp = nextJoin->front().dst(defIdx);
 535
 536     // Update the now fully formed DefLabel's dst types and any operations that
 537     // might depend on them
 538     for (auto& inst : *nextJoin) {
 539       retypeDests(&inst, &unit);
 540       if (inst.isPassthrough() && inst.getPassthroughValue() == checkTypeTmp) {
 541         assertx(!inst.isControlFlow());
 542         checkTypeTmp = inst.dst();
 543       }
 544     }
 545     for (auto& inst : *takenJoin) {
 546       retypeDests(&inst, &unit);
 547     }
 548
 549     // Add an extra rewrite for the next block to rewrite the CheckType result.
 550     rewrite.next.emplace(checkType.dst(), checkTypeTmp);
 551
 552     // The old block is now unreachable, so unlink it from the CFG.
 553     checkType.setNext(nullptr);
 554     checkType.setTaken(nullptr);
 555
 556     rewrites.emplace_back(std::move(rewrite));
 557   }
 558
 559   // Nothing to rewrite, which means we didn't do anything
 560   if (rewrites.empty()) return false;
 561
 562   auto const blocks = rpoSortCfg(unit);
 563   auto const liveness = calcLiveness(unit, blocks, rewrites);
 564
 565   // Rewrite the uses. The next and taken branches have the created
 566   // phis in common, as they should not revisit ones the other
 567   // created.
 568   auto const idoms = findDominators(unit, blocks, numberBlocks(unit, blocks));
 569   for (auto const& rewrite : rewrites) {
 570     jit::fast_set<Block*> phis;
 571     rewriteUses(unit, *rewrite.nextStart, rewrite.next, liveness, idoms, phis);
 572     rewriteUses(
 573       unit, *rewrite.takenStart, rewrite.taken, liveness, idoms, phis
 574     );
 575   }
 576
 577   // At this point the unit is now valid, though might be a bit
 578   // convoluted. Rely on other passes to clean it up.
 579   return true;
 580 }
 581
 582 //////////////////////////////////////////////////////////////////////
 583
 584 }
 585
 586 //////////////////////////////////////////////////////////////////////
 587
 588 bool optimizeCheckTypes(IRUnit& unit) {
 589   splitCriticalEdges(unit);
 590   auto changed = reorderCheckTypes(unit);
 591   changed |= hoistCheckTypes(unit);
 592   return changed;
 593 }
 594
 595 //////////////////////////////////////////////////////////////////////
 596
 597 }