hphp/runtime/vm/jit/load-elim.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com)  |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include "hphp/runtime/vm/jit/opt.h"
  17
  18 #include <cstdint>
  19 #include <algorithm>
  20
  21 #include <boost/variant.hpp>
  22 #include <folly/Optional.h>
  23 #include <folly/ScopeGuard.h>
  24 #include <folly/Hash.h>
  25
  26 #include "hphp/util/bitset-utils.h"
  27 #include "hphp/util/functional.h"
  28 #include "hphp/util/either.h"
  29 #include "hphp/util/dataflow-worklist.h"
  30 #include "hphp/util/match.h"
  31 #include "hphp/util/trace.h"
  32
  33 #include "hphp/runtime/base/perf-warning.h"
  34
  35 #include "hphp/runtime/vm/jit/alias-analysis.h"
  36 #include "hphp/runtime/vm/jit/analysis.h"
  37 #include "hphp/runtime/vm/jit/cfg.h"
  38 #include "hphp/runtime/vm/jit/containers.h"
  39 #include "hphp/runtime/vm/jit/dce.h"
  40 #include "hphp/runtime/vm/jit/extra-data.h"
  41 #include "hphp/runtime/vm/jit/ir-instruction.h"
  42 #include "hphp/runtime/vm/jit/memory-effects.h"
  43 #include "hphp/runtime/vm/jit/mutation.h"
  44 #include "hphp/runtime/vm/jit/pass-tracer.h"
  45 #include "hphp/runtime/vm/jit/simplify.h"
  46 #include "hphp/runtime/vm/jit/state-vector.h"
  47 #include "hphp/runtime/vm/jit/timer.h"
  48
  49 namespace HPHP { namespace jit {
  50
  51 namespace {
  52
  53 TRACE_SET_MOD(hhir_load);
  54
  55 //////////////////////////////////////////////////////////////////////
  56
  57 // Reverse post order block ids.
  58 using RpoID = uint32_t;
  59
  60 //////////////////////////////////////////////////////////////////////
  61
  62 /*
  63  * Information about a value in memory.
  64  *
  65  * We either have nullptr, meaning we know nothing, or we know a specific
  66  * SSATmp* for the value, or we'll track the Block* where different non-null
  67  * states were merged, which is also a location we will be able to insert a phi
  68  * to make a new available SSATmp.
  69  *
  70  * For some discussion on how we know whether the SSATmp* is usable (i.e. its
  71  * definition dominates code that might want to reuse it), see the bottom of
  72  * this file.
  73  */
  74 using ValueInfo = Either<SSATmp*,Block*>;
  75
  76 /*
  77  * Each tracked location has both a ValueInfo and a separately-tracked type.
  78  * We may have a known type even without an SSATmp for an available value, or
  79  * that is more refined than an SSATmp knownValue's type.  This is always at
  80  * least as refined as knownValue->type(), if knownValue is an SSATmp.
  81  */
  82 struct TrackedLoc {
  83   ValueInfo knownValue{nullptr};
  84   Type knownType{TTop};
  85 };
  86
  87 /*
  88  * Abstract state for a program position.
  89  *
  90  * This has a set of TrackedLocs, and an availability mask to determine which
  91  * ones are currently valid.  When updating a tracked location, the visitor
  92  * below will kill any potentially conflicting locations in the availability
  93  * mask without updating them.
  94  */
  95 struct State {
  96   bool initialized = false;  // if false, we'll never reach this block
  97
  98   /*
  99    * Currently tracked locations, indexed by their ids.
 100    */
 101   jit::vector<TrackedLoc> tracked;
 102
 103   /*
 104    * Currently available indexes in tracked.
 105    */
 106   ALocBits avail;
 107
 108   /*
 109    * If we know whether various RDS entries have been initialized at this
 110    * position.
 111    */
 112   jit::flat_set<rds::Handle> initRDS{};
 113 };
 114
 115 struct BlockInfo {
 116   RpoID rpoID;
 117   State stateIn;
 118
 119   /*
 120    * Output states for the next and taken edge.  These are stored explicitly
 121    * for two reasons:
 122    *
 123    *    o When inserting phis, we need to see the state on each edge coming to
 124    *      a join point to know what to phi.
 125    *
 126    *    o This lets us re-merge all predecessors into a block's stateIn while
 127    *      doing the dataflow analysis.  This can get better results than just
 128    *      merging into stateIn in place, basically because of the way we
 129    *      represent the locations that need phis.  If we didn't remerge, it's
 130    *      possible for the analysis to believe it needs to insert phis even in
 131    *      blocks with only a single predecessor (e.g. if the first time it saw
 132    *      the block it had a SSATmp* in some location, but the second time it
 133    *      tries to merge a Block* for a phi location).
 134    */
 135   State stateOutNext;
 136   State stateOutTaken;
 137 };
 138
 139 struct PhiKey {
 140   struct Hash {
 141     size_t operator()(PhiKey k) const {
 142       return folly::hash::hash_combine(pointer_hash<Block>()(k.block), k.aloc);
 143     }
 144   };
 145
 146   bool operator==(PhiKey o) const {
 147     return block == o.block && aloc == o.aloc;
 148   }
 149
 150   Block* block;
 151   uint32_t aloc;
 152 };
 153
 154 struct Global {
 155   explicit Global(IRUnit& unit)
 156     : unit(unit)
 157     , rpoBlocks(rpoSortCfg(unit))
 158     , ainfo(collect_aliases(unit, rpoBlocks))
 159     , blockInfo(unit, BlockInfo{})
 160   {}
 161
 162   IRUnit& unit;
 163   BlockList rpoBlocks;
 164   AliasAnalysis ainfo;
 165
 166   /*
 167    * Map from each block to its information.  Before we've done the fixed point
 168    * analysis the states in the info structures are not necessarily meaningful.
 169    */
 170   StateVector<Block,BlockInfo> blockInfo;
 171
 172   /*
 173    * During the optimize pass, we may add phis for the values known to be in
 174    * memory locations at particular control-flow-joins.  To avoid doing it more
 175    * than once for any given join point, this keeps track of what we've added
 176    * so if it's needed in more than one place we can reuse it.
 177    */
 178   jit::hash_map<PhiKey,SSATmp*,PhiKey::Hash> insertedPhis;
 179
 180   /*
 181    * Stats
 182    */
 183   size_t instrsReduced = 0;
 184   size_t loadsRemoved  = 0;
 185   size_t loadsRefined  = 0;
 186   size_t jumpsRemoved  = 0;
 187 };
 188
 189 //////////////////////////////////////////////////////////////////////
 190
 191 using HPHP::jit::show;
 192
 193 std::string show(ValueInfo vi) {
 194   if (vi == nullptr) return "<>";
 195   return vi.match(
 196     [&] (SSATmp* t) { return t->toString(); },
 197     [&] (Block* b) { return folly::sformat("phi@B{}", b->id()); }
 198   );
 199 }
 200
 201 std::string show(TrackedLoc li) {
 202   return folly::sformat(
 203     "{: >12} | {}",
 204     li.knownType.toString(),
 205     show(li.knownValue)
 206   );
 207 }
 208
 209 DEBUG_ONLY std::string show(const State& state) {
 210   auto ret = std::string{};
 211   if (!state.initialized) {
 212     ret = "  unreachable\n";
 213     return ret;
 214   }
 215
 216   folly::format(&ret, "  av: {}\n", show(state.avail));
 217   for (auto idx = uint32_t{0}; idx < state.tracked.size(); ++idx) {
 218     if (state.avail.test(idx)) {
 219       folly::format(&ret, "  {: >3} = {}\n", idx, show(state.tracked[idx]));
 220     }
 221   }
 222
 223   folly::format(
 224     &ret,
 225     "  initRDS : {}\n",
 226     [&] {
 227       using namespace folly::gen;
 228       return from(state.initRDS) | unsplit<std::string>(",");
 229     }()
 230   );
 231
 232   return ret;
 233 }
 234
 235 //////////////////////////////////////////////////////////////////////
 236
 237 struct Local {
 238   const Global& global;
 239   State state;
 240 };
 241
 242 /*
 243  * No flags.  This means no transformations were possible for the analyzed
 244  * instruction.
 245  */
 246 struct FNone {};
 247
 248 /*
 249  * The instruction was a pure load, and its value was known to be available in
 250  * `knownValue', with best known type `knownType'.
 251  */
 252 struct FRedundant { ValueInfo knownValue; Type knownType; uint32_t aloc; };
 253
 254 /*
 255  * Like FRedundant, but the load is impure---rather than killing it, we can
 256  * factor out the load.
 257  */
 258 struct FReducible { ValueInfo knownValue; Type knownType; uint32_t aloc; };
 259
 260 /*
 261  * The instruction is a load which can be refined to a better type than its
 262  * type-parameter currently has.
 263  */
 264 struct FRefinableLoad { Type refinedType; };
 265
 266 /*
 267  * The instruction can be replaced with a nop.
 268  */
 269 struct FRemovable {};
 270
 271 /*
 272  * The instruction can be legally replaced with a Jmp to either its next or
 273  * taken edge.
 274  */
 275 struct FJmpNext {};
 276 struct FJmpTaken {};
 277
 278 using Flags = boost::variant<FNone,FRedundant,FReducible,FRefinableLoad,
 279                              FRemovable,FJmpNext,FJmpTaken>;
 280
 281 //////////////////////////////////////////////////////////////////////
 282
 283 // Conservative list of instructions which have type-parameters safe to refine.
 284 bool refinable_load_eligible(const IRInstruction& inst) {
 285   switch (inst.op()) {
 286     case LdLoc:
 287     case LdStk:
 288     case LdMBase:
 289     case LdMem:
 290       assertx(inst.hasTypeParam());
 291       return true;
 292     default:
 293       return false;
 294   }
 295 }
 296
 297 void clear_everything(Local& env) {
 298   FTRACE(3, "      clear_everything\n");
 299   env.state.avail.reset();
 300 }
 301
 302 TrackedLoc* find_tracked(State& state,
 303                          folly::Optional<ALocMeta> meta) {
 304   if (!meta) return nullptr;
 305   return state.avail[meta->index] ? &state.tracked[meta->index]
 306                                   : nullptr;
 307 }
 308
 309 TrackedLoc* find_tracked(Local& env,
 310                          folly::Optional<ALocMeta> meta) {
 311   return find_tracked(env.state, meta);
 312 }
 313
 314 Flags load(Local& env,
 315            const IRInstruction& inst,
 316            AliasClass acls) {
 317   acls = canonicalize(acls);
 318
 319   auto const meta = env.global.ainfo.find(acls);
 320   if (!meta) return FNone{};
 321   assertx(meta->index < kMaxTrackedALocs);
 322
 323   auto& tracked = env.state.tracked[meta->index];
 324
 325   // We can always trust the dst type of the load. Add its knowledge to
 326   // knownType before doing anything else.
 327   if (!env.state.avail[meta->index]) {
 328     tracked.knownValue = nullptr;
 329     tracked.knownType = inst.dst()->type();
 330     env.state.avail.set(meta->index);
 331   } else {
 332     tracked.knownType &= inst.dst()->type();
 333   }
 334
 335   if (tracked.knownType.admitsSingleVal()) {
 336     tracked.knownValue = env.global.unit.cns(tracked.knownType);
 337
 338     FTRACE(4, "       {} <- {}\n", show(acls), inst.dst()->toString());
 339     FTRACE(5, "       av: {}\n", show(env.state.avail));
 340     return FRedundant {
 341       tracked.knownValue,
 342       tracked.knownType,
 343       kMaxTrackedALocs    // it's a constant, so it is nowhere
 344     };
 345   }
 346
 347   if (tracked.knownValue != nullptr) {
 348     // Don't use knownValue if it's from a different block and we're currently
 349     // in a catch trace, to avoid extending lifetimes too much.
 350     auto const block = tracked.knownValue.match(
 351       [] (SSATmp* tmp) { return tmp->inst()->block(); },
 352       [] (Block* blk)  { return blk; }
 353     );
 354     if (inst.block() == block || inst.block()->hint() != Block::Hint::Unused) {
 355       return FRedundant { tracked.knownValue, tracked.knownType, meta->index };
 356     }
 357   } else {
 358     // Only set a new known value if we previously didn't have one. If we had a
 359     // known value already, we would have made the load redundant above, unless
 360     // we're in an Hint::Unused block. In that case, we want to keep any old
 361     // known value to avoid disturbing the main trace in case we merge back.
 362     tracked.knownValue = inst.dst();
 363     FTRACE(4, "       {} <- {}\n", show(acls), inst.dst()->toString());
 364     FTRACE(5, "       av: {}\n", show(env.state.avail));
 365   }
 366
 367   // Even if we can't make this load redundant, we might be able to refine its
 368   // type parameter.
 369   if (refinable_load_eligible(inst)) {
 370     if (tracked.knownType < inst.typeParam()) {
 371       return FRefinableLoad { tracked.knownType };
 372     }
 373   }
 374
 375   return FNone{};
 376 }
 377
 378 void store(Local& env, AliasClass acls, SSATmp* value) {
 379   acls = canonicalize(acls);
 380
 381   auto const meta = env.global.ainfo.find(acls);
 382   if (!meta) {
 383     env.state.avail &= ~env.global.ainfo.may_alias(acls);
 384     return;
 385   }
 386
 387   FTRACE(5, "       av: {}\n", show(env.state.avail));
 388   env.state.avail &= ~meta->conflicts;
 389
 390   auto& current = env.state.tracked[meta->index];
 391   current.knownValue = value;
 392   current.knownType = value ? value->type() : TTop;
 393   env.state.avail.set(meta->index);
 394   FTRACE(4, "       {} <- {}\n", show(acls),
 395     value ? value->toString() : std::string("<>"));
 396   FTRACE(5, "       av: {}\n", show(env.state.avail));
 397 }
 398
 399 Flags handle_general_effects(Local& env,
 400                              const IRInstruction& inst,
 401                              GeneralEffects m) {
 402   auto handleCheck = [&](Type typeParam) -> folly::Optional<Flags> {
 403     auto const meta = env.global.ainfo.find(canonicalize(m.loads));
 404     if (!meta) return folly::none;
 405
 406     auto const tloc = &env.state.tracked[meta->index];
 407     if (!env.state.avail[meta->index]) {
 408       // We know nothing about the location. Initialize it with our typeParam.
 409       tloc->knownValue = nullptr;
 410       tloc->knownType = typeParam;
 411       env.state.avail.set(meta->index);
 412     } else if (tloc->knownType <= typeParam) {
 413       // We had a type that was good enough to pass the check.
 414       return Flags{FJmpNext{}};
 415     } else {
 416       // We had a type that didn't pass the check. Narrow it using our
 417       // typeParam.
 418       tloc->knownType &= typeParam;
 419     }
 420
 421     if (tloc->knownType <= TBottom) {
 422       // i.e., !maybe(typeParam); fail check.
 423       return Flags{FJmpTaken{}};
 424     }
 425     if (tloc->knownValue != nullptr) {
 426       return Flags{FReducible{tloc->knownValue, tloc->knownType, meta->index}};
 427     }
 428     return folly::none;
 429   };
 430
 431   switch (inst.op()) {
 432   case CheckTypeMem:
 433   case CheckLoc:
 434   case CheckStk:
 435   case CheckMBase:
 436   case CheckRefInner:
 437     if (auto flags = handleCheck(inst.typeParam())) return *flags;
 438     break;
 439
 440   case CheckInitMem:
 441     if (auto flags = handleCheck(TInitGen)) return *flags;
 442     break;
 443
 444   case InitSProps: {
 445     auto const handle = inst.extra<ClassData>()->cls->sPropInitHandle();
 446     if (env.state.initRDS.count(handle) > 0) return FJmpNext{};
 447     env.state.initRDS.insert(handle);
 448     break;
 449   }
 450
 451   case InitProps: {
 452     auto const handle = inst.extra<ClassData>()->cls->propHandle();
 453     if (env.state.initRDS.count(handle) > 0) return FJmpNext{};
 454     env.state.initRDS.insert(handle);
 455     break;
 456   }
 457
 458   case CheckRDSInitialized: {
 459     auto const handle = inst.extra<CheckRDSInitialized>()->handle;
 460     if (env.state.initRDS.count(handle) > 0) return FJmpNext{};
 461     // set this unconditionally; we record taken state before every
 462     // instruction, and next state after each instruction
 463     env.state.initRDS.insert(handle);
 464     break;
 465   }
 466
 467   case MarkRDSInitialized: {
 468     auto const handle = inst.extra<MarkRDSInitialized>()->handle;
 469     if (env.state.initRDS.count(handle) > 0) return FRemovable{};
 470     env.state.initRDS.insert(handle);
 471     break;
 472   }
 473
 474   default:
 475     break;
 476   }
 477
 478   store(env, m.stores, nullptr);
 479
 480   return FNone{};
 481 }
 482
 483 void handle_call_effects(Local& env,
 484                          const IRInstruction& inst,
 485                          CallEffects effects) {
 486   /*
 487    * Keep types for stack, locals, and iterators, and throw away the
 488    * values.  We are just doing this to avoid extending lifetimes
 489    * across php calls, which currently always leads to spilling.
 490    */
 491   auto const keep = env.global.ainfo.all_stack          |
 492                     env.global.ainfo.all_frame;
 493   env.state.avail &= keep;
 494   for (auto aloc = uint32_t{0};
 495       aloc < env.global.ainfo.locations.size();
 496       ++aloc) {
 497     if (!env.state.avail[aloc]) continue;
 498     env.state.tracked[aloc].knownValue = nullptr;
 499   }
 500
 501   // Any stack locations modified by the callee are no longer valid
 502   store(env, effects.kills, nullptr);
 503   store(env, effects.inputs, nullptr);
 504   store(env, effects.actrec, nullptr);
 505   store(env, effects.outputs, nullptr);
 506 }
 507
 508 Flags handle_assert(Local& env, const IRInstruction& inst) {
 509   auto const acls = [&] () -> folly::Optional<AliasClass> {
 510     switch (inst.op()) {
 511     case AssertLoc:
 512       return AliasClass {
 513         AFrame { inst.src(0), inst.extra<AssertLoc>()->locId }
 514       };
 515     case AssertStk:
 516       return AliasClass {
 517         AStack { inst.src(0), inst.extra<AssertStk>()->offset, 1 }
 518       };
 519     default: break;
 520     }
 521     return folly::none;
 522   }();
 523   if (!acls) return FNone{};
 524
 525   auto const meta = env.global.ainfo.find(canonicalize(*acls));
 526   auto const tloc = find_tracked(env, meta);
 527   if (!tloc) {
 528     FTRACE(4, "      untracked assert\n");
 529     return FNone{};
 530   }
 531
 532   tloc->knownType &= inst.typeParam();
 533   if (tloc->knownValue != nullptr || tloc->knownType.admitsSingleVal()) {
 534     if (tloc->knownValue == nullptr) {
 535       tloc->knownValue = env.global.unit.cns(tloc->knownType);
 536     }
 537
 538     FTRACE(4, "      reducible assert: {}\n", show(*tloc));
 539     return FReducible { tloc->knownValue, tloc->knownType, meta->index };
 540   }
 541
 542   FTRACE(4, "      non-reducible assert: {}\n", show(*tloc));
 543   return FNone{};
 544 }
 545
 546 void refine_value(Local& env, SSATmp* newVal, SSATmp* oldVal) {
 547   bitset_for_each_set(
 548     env.state.avail,
 549     [&](size_t i) {
 550       auto& tracked = env.state.tracked[i];
 551       if (tracked.knownValue != oldVal) return;
 552       FTRACE(4, "       refining {} to {}\n", i, newVal->toString());
 553       tracked.knownValue = newVal;
 554       tracked.knownType  = newVal->type();
 555     }
 556   );
 557 }
 558
 559 //////////////////////////////////////////////////////////////////////
 560
 561 Flags analyze_inst(Local& env, const IRInstruction& inst) {
 562   auto const effects = memory_effects(inst);
 563   FTRACE(3, "    {}\n"
 564             "      {}\n",
 565             inst.toString(),
 566             show(effects));
 567
 568   auto flags = Flags{};
 569   match<void>(
 570     effects,
 571     [&] (IrrelevantEffects) {},
 572     [&] (UnknownEffects)    { clear_everything(env); },
 573     [&] (ExitEffects)       { clear_everything(env); },
 574     [&] (ReturnEffects)     {},
 575
 576     [&] (PureStore m)       { store(env, m.dst, m.value); },
 577     [&] (PureLoad m)        { flags = load(env, inst, m.src); },
 578
 579     [&] (InlineEnterEffects m) { store(env, m.inlFrame, nullptr);
 580                                  store(env, m.inlStack, nullptr);
 581                                  store(env, m.actrec, nullptr); },
 582     [&] (InlineExitEffects m) { store(env, m.inlFrame, nullptr);
 583                                 store(env, m.inlMeta, nullptr); },
 584     [&] (GeneralEffects m)  { flags = handle_general_effects(env, inst, m); },
 585     [&] (CallEffects x)     { handle_call_effects(env, inst, x); }
 586   );
 587
 588   switch (inst.op()) {
 589   case CheckType:
 590   case CheckNonNull:
 591   case CheckVArray:
 592   case CheckDArray:
 593     refine_value(env, inst.dst(), inst.src(0));
 594     break;
 595   case AssertLoc:
 596   case AssertStk:
 597     flags = handle_assert(env, inst);
 598     break;
 599   default:
 600     break;
 601   }
 602
 603   return flags;
 604 }
 605
 606 //////////////////////////////////////////////////////////////////////
 607
 608 /*
 609  * Make sure that every predecessor ends with a Jmp that we can add arguments
 610  * to, so we can create a new phi'd value.  This may involve splitting some
 611  * edges.
 612  */
 613 void prepare_predecessors_for_phi(Global& env, Block* block) {
 614   auto preds = jit::vector<Block*>{};
 615   block->forEachPred([&] (Block* b) { preds.push_back(b); });
 616
 617   for (auto& pred : preds) {
 618     if (pred->back().is(Jmp)) continue;
 619     auto const middle = splitEdge(env.unit, pred, block);
 620     ITRACE(3, "      B{} -> B{} to add a Jmp\n", pred->id(), middle->id());
 621     auto& midInfo = env.blockInfo[middle];
 622     auto& predInfo = env.blockInfo[pred];
 623     auto const& srcState = pred->next() == middle
 624       ? predInfo.stateOutNext
 625       : predInfo.stateOutTaken;
 626     // We don't need to set the in state on the new edge-splitting block,
 627     // because it isn't in our env.rpoBlocks, so we don't ever visit it for
 628     // the optimize pass.
 629     midInfo.stateOutTaken = srcState;
 630   }
 631 }
 632
 633 SSATmp* resolve_phis_work(Global& env,
 634                           jit::vector<IRInstruction*>& mutated_labels,
 635                           Block* block,
 636                           uint32_t alocID) {
 637   auto& phi_record = env.insertedPhis[PhiKey { block, alocID }];
 638   if (phi_record) {
 639     ITRACE(3, "      resolve_phis: B{} for aloc {}: already made {}\n",
 640            block->id(),
 641            alocID,
 642            phi_record->toString());
 643     return phi_record;
 644   }
 645
 646   // We should never require phis going into a catch block, because they may
 647   // not have multiple predecessors in HHIR.
 648   assertx(!block->isCatch());
 649
 650   ITRACE(3, "      resolve_phis: B{} for aloc {}\n", block->id(), alocID);
 651   Trace::Indent indenter;
 652
 653   prepare_predecessors_for_phi(env, block);
 654
 655   // Create the new destination of the DefLabel at this block.  This has to
 656   // happen before we start hooking up the Jmps on predecessors, because the
 657   // block could be one of its own predecessors and it might need to send this
 658   // new dst into the same phi.
 659   if (block->front().is(DefLabel)) {
 660     env.unit.expandLabel(&block->front(), 1);
 661   } else {
 662     block->prepend(env.unit.defLabel(1, block->front().bcctx()));
 663   }
 664   auto const label = &block->front();
 665   mutated_labels.push_back(label);
 666   phi_record = label->dst(label->numDsts() - 1);
 667
 668   // Now add the new argument to each incoming Jmp.  If one of the predecessors
 669   // has a phi state with this same block, it'll find the dst we just added in
 670   // phi_record.
 671   block->forEachPred([&] (Block* pred) {
 672     auto& predInfo = env.blockInfo[pred];
 673     auto& state = pred->next() == block ? predInfo.stateOutNext
 674                                         : predInfo.stateOutTaken;
 675     assertx(state.initialized);
 676     auto& incomingLoc = state.tracked[alocID];
 677     assertx(incomingLoc.knownValue != nullptr);
 678
 679     auto const incomingTmp = incomingLoc.knownValue.match(
 680       [&] (SSATmp* t) { return t; },
 681       [&] (Block* incoming_phi_block) {
 682         // Note resolve_phis can add blocks, which can cause a resize of
 683         // env.blockInfo---don't reuse pointers/references to any block state
 684         // structures across here.
 685         return resolve_phis_work(
 686           env,
 687           mutated_labels,
 688           incoming_phi_block,
 689           alocID
 690         );
 691       }
 692     );
 693
 694     ITRACE(4, "      B{} <~~> {}\n", pred->id(), incomingTmp->toString());
 695     env.unit.expandJmp(&pred->back(), incomingTmp);
 696   });
 697
 698   ITRACE(4, "     dst: {}\n", phi_record->toString());
 699   return phi_record;
 700 }
 701
 702 void reflow_deflabel_types(const IRUnit& unit,
 703                            const jit::vector<IRInstruction*>& labels) {
 704   for (bool changed = true; changed;) {
 705     changed = false;
 706     for (auto& l : labels) if (retypeDests(l, &unit)) changed = true;
 707   }
 708 }
 709
 710 SSATmp* resolve_phis(Global& env, Block* block, uint32_t alocID) {
 711   auto mutated_labels = jit::vector<IRInstruction*>{};
 712   auto const ret = resolve_phis_work(env, mutated_labels, block, alocID);
 713   /*
 714    * We've potentially created some SSATmp's (that are defined by DefLabels),
 715    * which have types that depend on their own types.  This should only be
 716    * possible with loops.
 717    *
 718    * Right now, we can't just wait for the final reflowTypes at the end of this
 719    * pass to update things for this, because SSATmp types may be inspected by
 720    * our simplify() calls while we're still optimizing, so reflow them now.
 721    */
 722   reflow_deflabel_types(env.unit, mutated_labels);
 723   return ret;
 724 }
 725
 726 template<class Flag>
 727 SSATmp* resolve_value(Global& env, const IRInstruction& inst, Flag flags) {
 728   if (inst.dst() && !(flags.knownType <= inst.dst()->type())) {
 729     /*
 730      * It's possible we could assert the intersection of the types, but it's
 731      * not entirely clear what situations this would happen in, so let's just
 732      * not do it in this case for now.
 733      */
 734     FTRACE(2, "      knownType wasn't substitutable; not right now\n");
 735     return nullptr;
 736   }
 737
 738   auto const val = flags.knownValue;
 739   if (val == nullptr) return nullptr;
 740   return val.match(
 741     [&] (SSATmp* t) { return t; },
 742     [&] (Block* b) { return resolve_phis(env, b, flags.aloc); }
 743   );
 744 }
 745
 746 void reduce_inst(Global& env, IRInstruction& inst, const FReducible& flags) {
 747   auto const resolved = resolve_value(env, inst, flags);
 748   if (!resolved) return;
 749
 750   DEBUG_ONLY Opcode oldOp = inst.op();
 751   DEBUG_ONLY Opcode newOp;
 752
 753   auto const reduce_to = [&] (Opcode op, folly::Optional<Type> typeParam) {
 754     auto const taken = hasEdges(op) ? inst.taken() : nullptr;
 755     auto const newInst = env.unit.gen(
 756       op,
 757       inst.bcctx(),
 758       taken,
 759       typeParam,
 760       resolved
 761     );
 762     if (hasEdges(op)) newInst->setNext(inst.next());
 763
 764     auto const block = inst.block();
 765     block->insert(++block->iteratorTo(&inst), newInst);
 766     inst.convertToNop();
 767
 768     newOp = op;
 769   };
 770
 771   switch (inst.op()) {
 772   case CheckTypeMem:
 773   case CheckLoc:
 774   case CheckStk:
 775   case CheckMBase:
 776   case CheckRefInner:
 777     reduce_to(CheckType, inst.typeParam());
 778     break;
 779
 780   case CheckInitMem:
 781     reduce_to(CheckInit, folly::none);
 782     break;
 783
 784   case AssertLoc:
 785   case AssertStk:
 786     reduce_to(AssertType, flags.knownType);
 787     break;
 788
 789   default: always_assert(false);
 790   }
 791
 792   FTRACE(2, "      reduced: {} = {} {}\n",
 793          opcodeName(oldOp),
 794          opcodeName(newOp),
 795          resolved->toString());
 796
 797   ++env.instrsReduced;
 798 }
 799
 800 void refine_load(Global& env,
 801                  IRInstruction& inst,
 802                  const FRefinableLoad& flags) {
 803   assertx(refinable_load_eligible(inst));
 804   assertx(flags.refinedType < inst.typeParam());
 805
 806   FTRACE(2, "      refinable: {} :: {} -> {}\n",
 807          inst.toString(),
 808          inst.typeParam(),
 809          flags.refinedType);
 810
 811   inst.setTypeParam(flags.refinedType);
 812   retypeDests(&inst, &env.unit);
 813   ++env.loadsRefined;
 814 }
 815
 816 //////////////////////////////////////////////////////////////////////
 817
 818 void optimize_inst(Global& env, IRInstruction& inst, Flags flags) {
 819   match<void>(
 820     flags,
 821     [&] (FNone) {},
 822
 823     [&] (FRedundant redundantFlags) {
 824       auto const resolved = resolve_value(env, inst, redundantFlags);
 825       if (!resolved) return;
 826
 827       FTRACE(2, "      redundant: {} :: {} = {}\n",
 828              inst.dst()->toString(),
 829              redundantFlags.knownType.toString(),
 830              resolved->toString());
 831
 832       if (resolved->type().subtypeOfAny(TGen, TCls)) {
 833         env.unit.replace(&inst, AssertType, redundantFlags.knownType, resolved);
 834       } else {
 835         env.unit.replace(&inst, Mov, resolved);
 836       }
 837
 838       ++env.loadsRemoved;
 839     },
 840
 841     [&] (FReducible reducibleFlags) { reduce_inst(env, inst, reducibleFlags); },
 842
 843     [&] (FRefinableLoad f) { refine_load(env, inst, f); },
 844
 845     [&] (FRemovable) {
 846       FTRACE(2, "      removable\n");
 847       assertx(!inst.isControlFlow());
 848       inst.convertToNop();
 849     },
 850
 851     [&] (FJmpNext) {
 852       FTRACE(2, "      unnecessary\n");
 853       env.unit.replace(&inst, Jmp, inst.next());
 854       ++env.jumpsRemoved;
 855     },
 856
 857     [&] (FJmpTaken) {
 858       FTRACE(2, "      unnecessary\n");
 859       env.unit.replace(&inst, Jmp, inst.taken());
 860       ++env.jumpsRemoved;
 861     }
 862   );
 863
 864   // Re-simplify AssertType if we produced any.
 865   if (inst.is(AssertType)) simplifyInPlace(env.unit, &inst);
 866 }
 867
 868 void optimize_block(Local& env, Global& genv, Block* blk) {
 869   if (!env.state.initialized) {
 870     FTRACE(2, "  unreachable\n");
 871     return;
 872   }
 873
 874   for (auto& inst : *blk) {
 875     simplifyInPlace(genv.unit, &inst);
 876     auto const flags = analyze_inst(env, inst);
 877     optimize_inst(genv, inst, flags);
 878   }
 879 }
 880
 881 void optimize(Global& genv) {
 882   /*
 883    * Simplify() calls can make blocks unreachable as we walk, and visiting the
 884    * unreachable blocks with simplify calls is not allowed.  They may have uses
 885    * of SSATmps that no longer have defs, which can break how the simplifier
 886    * chases up to definitions.
 887    *
 888    * We use a StateVector because we can add new blocks during optimize().
 889    */
 890   StateVector<Block,bool> reachable(genv.unit, false);
 891   reachable[genv.unit.entry()] = true;
 892   for (auto& blk : genv.rpoBlocks) {
 893     FTRACE(1, "B{}:\n", blk->id());
 894
 895     if (!reachable[blk]) {
 896       FTRACE(2, "   unreachable\n");
 897       continue;
 898     }
 899
 900     auto env = Local { genv, genv.blockInfo[blk].stateIn };
 901     optimize_block(env, genv, blk);
 902
 903     if (auto const x = blk->next()) reachable[x] = true;
 904     if (auto const x = blk->taken()) reachable[x] = true;
 905   }
 906 }
 907
 908 //////////////////////////////////////////////////////////////////////
 909
 910 bool operator==(const TrackedLoc& a, const TrackedLoc& b) {
 911   return a.knownValue == b.knownValue && a.knownType == b.knownType;
 912 }
 913
 914 bool operator==(const State& a, const State& b) {
 915   if (!a.initialized) return !b.initialized;
 916   if (!b.initialized) return false;
 917   return a.tracked == b.tracked && a.avail == b.avail;
 918 }
 919
 920 bool operator!=(const State& a, const State& b) { return !(a == b); }
 921
 922 //////////////////////////////////////////////////////////////////////
 923
 924 void merge_into(Block* target, TrackedLoc& dst, const TrackedLoc& src) {
 925   dst.knownType |= src.knownType;
 926
 927   if (dst.knownValue == src.knownValue || dst.knownValue == nullptr) return;
 928   if (src.knownValue == nullptr) {
 929     dst.knownValue = nullptr;
 930     return;
 931   }
 932
 933   dst.knownValue.match(
 934     [&](SSATmp* tdst) {
 935       src.knownValue.match(
 936         [&](SSATmp* tsrc) {
 937           if (auto const lcm = least_common_ancestor(tdst, tsrc)) {
 938             dst.knownValue = lcm;
 939             return;
 940           }
 941           // We will need a phi at this block if we want to reuse this value.
 942           dst.knownValue = target;
 943         },
 944
 945         [&](Block* /*blk*/) { dst.knownValue = target; });
 946     },
 947
 948     [&](Block* /*b*/) { dst.knownValue = target; });
 949 }
 950
 951 void merge_into(Global& genv, Block* target, State& dst, const State& src) {
 952   always_assert(src.initialized);
 953   if (!dst.initialized) {
 954     dst = src;
 955     return;
 956   }
 957
 958   always_assert(dst.tracked.size() == src.tracked.size());
 959   always_assert(dst.tracked.size() == genv.ainfo.locations.size());
 960
 961   dst.avail &= src.avail;
 962
 963   bitset_for_each_set(
 964     src.avail,
 965     [&](size_t idx) {
 966       dst.avail &= ~genv.ainfo.locations_inv[idx].conflicts;
 967       if (dst.avail[idx]) {
 968         merge_into(target, dst.tracked[idx], src.tracked[idx]);
 969       }
 970     }
 971   );
 972
 973   for (auto it = dst.initRDS.begin(); it != dst.initRDS.end();) {
 974     if (!src.initRDS.count(*it)) {
 975       it = dst.initRDS.erase(it);
 976     } else {
 977       ++it;
 978     }
 979   }
 980 }
 981
 982 //////////////////////////////////////////////////////////////////////
 983
 984 void save_taken_state(Global& genv, const IRInstruction& inst,
 985                       const State& state) {
 986   if (!inst.taken()) return;
 987
 988   auto& outState = genv.blockInfo[inst.block()].stateOutTaken = state;
 989
 990   // CheckInitMem's pointee is TUninit on the taken branch, so update outState.
 991   if (inst.is(CheckInitMem)) {
 992     auto const effects = memory_effects(inst);
 993     auto const ge = boost::get<GeneralEffects>(effects);
 994     auto const meta = genv.ainfo.find(canonicalize(ge.loads));
 995     if (auto const tloc = find_tracked(outState, meta)) {
 996       tloc->knownType &= TUninit;
 997     } else if (meta) {
 998       auto tloc = &outState.tracked[meta->index];
 999       tloc->knownValue = nullptr;
1000       tloc->knownType = TUninit;
1001       outState.avail.set(meta->index);
1002     }
1003   }
1004 }
1005
1006 void save_next_state(Global& genv, const IRInstruction& inst,
1007                      const State& state) {
1008   if (inst.next()) genv.blockInfo[inst.block()].stateOutNext = state;
1009 }
1010
1011 void analyze(Global& genv) {
1012   FTRACE(1, "\nAnalyze:\n");
1013
1014   /*
1015    * Scheduled blocks for the fixed point computation.  We'll visit blocks with
1016    * lower reverse post order ids first.
1017    */
1018   auto incompleteQ = dataflow_worklist<RpoID>(genv.rpoBlocks.size());
1019   for (auto rpoID = uint32_t{0}; rpoID < genv.rpoBlocks.size(); ++rpoID) {
1020     genv.blockInfo[genv.rpoBlocks[rpoID]].rpoID = rpoID;
1021   }
1022   {
1023     auto& entryIn = genv.blockInfo[genv.unit.entry()].stateIn;
1024     entryIn.initialized = true;
1025     entryIn.tracked.resize(genv.ainfo.locations.size());
1026   }
1027   incompleteQ.push(0);
1028
1029   do {
1030     auto propagate = [&] (Block* target) {
1031       FTRACE(3, "  -> {}\n", target->id());
1032       auto& targetInfo = genv.blockInfo[target];
1033       auto const oldState = targetInfo.stateIn;
1034       targetInfo.stateIn.initialized = false; // re-merge of all pred states
1035       target->forEachPred([&] (Block* pred) {
1036         auto const merge = [&](const State& predState) {
1037           if (predState.initialized) {
1038             FTRACE(7, "pulling state from pred B{}:\n{}\n",
1039                    pred->id(), show(predState));
1040             merge_into(genv, target, targetInfo.stateIn, predState);
1041           }
1042         };
1043
1044         auto const& predInfo = genv.blockInfo[pred];
1045         if (pred->next() != pred->taken()) {
1046           auto const& predState = pred->next() == target
1047             ? predInfo.stateOutNext
1048             : predInfo.stateOutTaken;
1049           merge(predState);
1050         } else {
1051           // The predecessor jumps to this block along both its next and taken
1052           // branches. We can't distinguish the two paths, so just merge both
1053           // in.
1054           assertx(pred->next() == target);
1055           merge(predInfo.stateOutNext);
1056           merge(predInfo.stateOutTaken);
1057         }
1058       });
1059       if (oldState != targetInfo.stateIn) {
1060         FTRACE(7, "target state now:\n{}\n", show(targetInfo.stateIn));
1061         incompleteQ.push(targetInfo.rpoID);
1062       }
1063     };
1064
1065     auto const blk = genv.rpoBlocks[incompleteQ.pop()];
1066     auto& blkInfo = genv.blockInfo[blk];
1067     FTRACE(2, "B{}:\n", blk->id());
1068
1069     auto env = Local { genv, blkInfo.stateIn };
1070     for (auto& inst : *blk) {
1071       save_taken_state(genv, inst, env.state);
1072       analyze_inst(env, inst);
1073       save_next_state(genv, inst, env.state);
1074     }
1075     if (auto const t = blk->taken()) propagate(t);
1076     if (auto const n = blk->next())  propagate(n);
1077   } while (!incompleteQ.empty());
1078 }
1079
1080 //////////////////////////////////////////////////////////////////////
1081
1082 }
1083
1084 /*
1085  * This pass does some analysis to try to avoid PureLoad instructions, and
1086  * avoid "hidden" loads that are part of some instructions like CheckLoc.  It
1087  * also runs the simplify() subroutine on every instruction in the unit, and
1088  * can eliminate some conditional branches that test types of memory locations.
1089  *
1090  *
1091  * Currently the following things are done:
1092  *
1093  *    o Replace PureLoad instructions with new uses of an available value, if
1094  *      we know it would load that value.
1095  *
1096  *    o Remove instructions that check the type of memory locations if we
1097  *      proved the memory location must hold that type.
1098  *
1099  *    o Convert instructions that check types of values in memory to use values
1100  *      in SSATmp virtual registers, if we know that we have a register that
1101  *      contains the same value as that memory location.
1102  *
1103  *    o Insert phis iff it allows us to do any of the above.
1104  *
1105  *    o Run the simplifier on every instruction.
1106  *
1107  *
1108  * Notes about preserving SSA form:
1109  *
1110  *   This pass can add new uses of SSATmp*'s to replace accesses to memory.
1111  *   This means we need to think about whether the SSATmp*'s will be defined in
1112  *   a location that dominates the places that we want to add new uses, which
1113  *   is one of the requirements to keep the IR in SSA form.
1114  *
1115  *   To understand why this seems to "just work", without any code specifically
1116  *   dealing with it, consider the following:
1117  *
1118  *   Suppose this pass sees a LdLoc at a program point and has determined the
1119  *   local being loaded contains an SSATmp `t1'.  We want to add a new use of
1120  *   `t1' and delete the LdLoc.
1121  *
1122  *   What this analysis state tells us is that every path through the program
1123  *   to the point of this LdLoc has provably stored `t1' to that local.  This
1124  *   also means that on every path to this point, `t1' was used in some
1125  *   instruction that did a store(*), which means `t1' must have been defined
1126  *   on that path, or else the input program already wasn't in SSA form.  But
1127  *   this is just the dominance condition we wanted: every path through the
1128  *   program to this load must have defined `t1', so its definition dominates
1129  *   the load.
1130  *
1131  *   (*) `t1' could also be defined by an instruction doing a load from that
1132  *       location, but it's even more obviously ok in that situation.
1133  *
1134  *   One further note: some parts of the code here is structured in
1135  *   anticipation of allowing the pass to conditionally explore parts of the
1136  *   CFG, so we don't need to propagate states down unreachable paths.  But
1137  *   this isn't implemented yet: the above will need to be revisted at that
1138  *   point (essentially we'll have to guarantee we eliminate any jumps to
1139  *   blocks this pass proved were unreachable to keep the above working at that
1140  *   point).
1141  *
1142  */
1143 void optimizeLoads(IRUnit& unit) {
1144   PassTracer tracer{&unit, Trace::hhir_load, "optimizeLoads"};
1145   Timer t(Timer::optimize_loads, unit.logEntry().get_pointer());
1146
1147   // Unfortunately load-elim may not reach a fixed-point after just one
1148   // round. This is because the optimization stage can cause the types of
1149   // SSATmps to change, which then can affect what we infer in the analysis
1150   // stage. So, loop until we reach a fixed point. Bound the iterations at some
1151   // max value for safety.
1152   size_t iters = 0;
1153   size_t instrsReduced = 0;
1154   size_t loadsRemoved = 0;
1155   size_t loadsRefined = 0;
1156   size_t jumpsRemoved = 0;
1157   do {
1158     auto genv = Global { unit };
1159     if (genv.ainfo.locations.size() == 0) {
1160       FTRACE(1, "no memory accesses to possibly optimize\n");
1161       break;
1162     }
1163
1164     ++iters;
1165     FTRACE(1, "\nIteration #{}\n", iters);
1166     FTRACE(1, "Locations:\n{}\n", show(genv.ainfo));
1167
1168     analyze(genv);
1169
1170     FTRACE(1, "\n\nFixed point:\n{}\n",
1171            [&] () -> std::string {
1172              auto ret = std::string{};
1173              for (auto& blk : genv.rpoBlocks) {
1174                folly::format(&ret, "B{}:\n{}", blk->id(),
1175                              show(genv.blockInfo[blk].stateIn));
1176              }
1177              return ret;
1178            }()
1179           );
1180
1181     FTRACE(1, "\nOptimize:\n");
1182     optimize(genv);
1183
1184     if (!genv.instrsReduced &&
1185         !genv.loadsRemoved &&
1186         !genv.loadsRefined &&
1187         !genv.jumpsRemoved) {
1188       // Nothing changed so we're done
1189       break;
1190     }
1191     instrsReduced += genv.instrsReduced;
1192     loadsRemoved += genv.loadsRemoved;
1193     loadsRefined += genv.loadsRefined;
1194     jumpsRemoved += genv.jumpsRemoved;
1195
1196     FTRACE(2, "reflowing types\n");
1197     reflowTypes(genv.unit);
1198
1199     // Restore reachability invariants
1200     mandatoryDCE(unit);
1201
1202     if (iters >= RuntimeOption::EvalHHIRLoadElimMaxIters) {
1203       // We've iterated way more than usual without reaching a fixed
1204       // point. Either there's some bug in load-elim, or this unit is especially
1205       // pathological. Emit a perf warning so we're aware and stop iterating.
1206       logPerfWarning(
1207         "optimize_loads_max_iters", 1,
1208         [&](StructuredLogEntry& cols) {
1209           auto const func = unit.context().initSrcKey.func();
1210           cols.setStr("func", func->fullName()->slice());
1211           cols.setStr("filename", func->unit()->filepath()->slice());
1212           cols.setStr("hhir_unit", show(unit));
1213         }
1214       );
1215       break;
1216     }
1217   } while (true);
1218
1219   if (auto& entry = unit.logEntry()) {
1220     entry->setInt("optimize_loads_iters", iters);
1221     entry->setInt("optimize_loads_instrs_reduced", instrsReduced);
1222     entry->setInt("optimize_loads_loads_removed", loadsRemoved);
1223     entry->setInt("optimize_loads_loads_refined", loadsRefined);
1224     entry->setInt("optimize_loads_jumps_removed", jumpsRemoved);
1225   }
1226 }
1227
1228 //////////////////////////////////////////////////////////////////////
1229
1230 }}