Remove SpillFrame, merge its memory effects into CallEffects and InlineEnterEffects
[hiphop-php.git] / hphp / runtime / vm / jit / load-elim.cpp
blobbe9d7ef22b62e89ddabcf26aa81dfb1bb72980e8
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/opt.h"
18 #include <cstdint>
19 #include <algorithm>
21 #include <boost/variant.hpp>
22 #include <folly/Optional.h>
23 #include <folly/ScopeGuard.h>
24 #include <folly/Hash.h>
26 #include "hphp/util/bitset-utils.h"
27 #include "hphp/util/functional.h"
28 #include "hphp/util/either.h"
29 #include "hphp/util/dataflow-worklist.h"
30 #include "hphp/util/match.h"
31 #include "hphp/util/trace.h"
33 #include "hphp/runtime/base/perf-warning.h"
35 #include "hphp/runtime/vm/jit/alias-analysis.h"
36 #include "hphp/runtime/vm/jit/analysis.h"
37 #include "hphp/runtime/vm/jit/cfg.h"
38 #include "hphp/runtime/vm/jit/containers.h"
39 #include "hphp/runtime/vm/jit/dce.h"
40 #include "hphp/runtime/vm/jit/extra-data.h"
41 #include "hphp/runtime/vm/jit/ir-instruction.h"
42 #include "hphp/runtime/vm/jit/memory-effects.h"
43 #include "hphp/runtime/vm/jit/mutation.h"
44 #include "hphp/runtime/vm/jit/pass-tracer.h"
45 #include "hphp/runtime/vm/jit/simplify.h"
46 #include "hphp/runtime/vm/jit/state-vector.h"
47 #include "hphp/runtime/vm/jit/timer.h"
49 namespace HPHP { namespace jit {
51 namespace {
53 TRACE_SET_MOD(hhir_load);
55 //////////////////////////////////////////////////////////////////////
57 // Reverse post order block ids.
58 using RpoID = uint32_t;
60 //////////////////////////////////////////////////////////////////////
63 * Information about a value in memory.
65 * We either have nullptr, meaning we know nothing, or we know a specific
66 * SSATmp* for the value, or we'll track the Block* where different non-null
67 * states were merged, which is also a location we will be able to insert a phi
68 * to make a new available SSATmp.
70 * For some discussion on how we know whether the SSATmp* is usable (i.e. its
71 * definition dominates code that might want to reuse it), see the bottom of
72 * this file.
74 using ValueInfo = Either<SSATmp*,Block*>;
77 * Each tracked location has both a ValueInfo and a separately-tracked type.
78 * We may have a known type even without an SSATmp for an available value, or
79 * that is more refined than an SSATmp knownValue's type. This is always at
80 * least as refined as knownValue->type(), if knownValue is an SSATmp.
82 struct TrackedLoc {
83 ValueInfo knownValue{nullptr};
84 Type knownType{TTop};
88 * Abstract state for a program position.
90 * This has a set of TrackedLocs, and an availability mask to determine which
91 * ones are currently valid. When updating a tracked location, the visitor
92 * below will kill any potentially conflicting locations in the availability
93 * mask without updating them.
95 struct State {
96 bool initialized = false; // if false, we'll never reach this block
99 * Currently tracked locations, indexed by their ids.
101 jit::vector<TrackedLoc> tracked;
104 * Currently available indexes in tracked.
106 ALocBits avail;
109 * If we know whether various RDS entries have been initialized at this
110 * position.
112 jit::flat_set<rds::Handle> initRDS{};
115 struct BlockInfo {
116 RpoID rpoID;
117 State stateIn;
120 * Output states for the next and taken edge. These are stored explicitly
121 * for two reasons:
123 * o When inserting phis, we need to see the state on each edge coming to
124 * a join point to know what to phi.
126 * o This lets us re-merge all predecessors into a block's stateIn while
127 * doing the dataflow analysis. This can get better results than just
128 * merging into stateIn in place, basically because of the way we
129 * represent the locations that need phis. If we didn't remerge, it's
130 * possible for the analysis to believe it needs to insert phis even in
131 * blocks with only a single predecessor (e.g. if the first time it saw
132 * the block it had a SSATmp* in some location, but the second time it
133 * tries to merge a Block* for a phi location).
135 State stateOutNext;
136 State stateOutTaken;
139 struct PhiKey {
140 struct Hash {
141 size_t operator()(PhiKey k) const {
142 return folly::hash::hash_combine(pointer_hash<Block>()(k.block), k.aloc);
146 bool operator==(PhiKey o) const {
147 return block == o.block && aloc == o.aloc;
150 Block* block;
151 uint32_t aloc;
154 struct Global {
155 explicit Global(IRUnit& unit)
156 : unit(unit)
157 , rpoBlocks(rpoSortCfg(unit))
158 , ainfo(collect_aliases(unit, rpoBlocks))
159 , blockInfo(unit, BlockInfo{})
162 IRUnit& unit;
163 BlockList rpoBlocks;
164 AliasAnalysis ainfo;
167 * Map from each block to its information. Before we've done the fixed point
168 * analysis the states in the info structures are not necessarily meaningful.
170 StateVector<Block,BlockInfo> blockInfo;
173 * During the optimize pass, we may add phis for the values known to be in
174 * memory locations at particular control-flow-joins. To avoid doing it more
175 * than once for any given join point, this keeps track of what we've added
176 * so if it's needed in more than one place we can reuse it.
178 jit::hash_map<PhiKey,SSATmp*,PhiKey::Hash> insertedPhis;
181 * Stats
183 size_t instrsReduced = 0;
184 size_t loadsRemoved = 0;
185 size_t loadsRefined = 0;
186 size_t jumpsRemoved = 0;
189 //////////////////////////////////////////////////////////////////////
191 using HPHP::jit::show;
193 std::string show(ValueInfo vi) {
194 if (vi == nullptr) return "<>";
195 return vi.match(
196 [&] (SSATmp* t) { return t->toString(); },
197 [&] (Block* b) { return folly::sformat("phi@B{}", b->id()); }
201 std::string show(TrackedLoc li) {
202 return folly::sformat(
203 "{: >12} | {}",
204 li.knownType.toString(),
205 show(li.knownValue)
209 DEBUG_ONLY std::string show(const State& state) {
210 auto ret = std::string{};
211 if (!state.initialized) {
212 ret = " unreachable\n";
213 return ret;
216 folly::format(&ret, " av: {}\n", show(state.avail));
217 for (auto idx = uint32_t{0}; idx < state.tracked.size(); ++idx) {
218 if (state.avail.test(idx)) {
219 folly::format(&ret, " {: >3} = {}\n", idx, show(state.tracked[idx]));
223 folly::format(
224 &ret,
225 " initRDS : {}\n",
226 [&] {
227 using namespace folly::gen;
228 return from(state.initRDS) | unsplit<std::string>(",");
232 return ret;
235 //////////////////////////////////////////////////////////////////////
237 struct Local {
238 const Global& global;
239 State state;
243 * No flags. This means no transformations were possible for the analyzed
244 * instruction.
246 struct FNone {};
249 * The instruction was a pure load, and its value was known to be available in
250 * `knownValue', with best known type `knownType'.
252 struct FRedundant { ValueInfo knownValue; Type knownType; uint32_t aloc; };
255 * Like FRedundant, but the load is impure---rather than killing it, we can
256 * factor out the load.
258 struct FReducible { ValueInfo knownValue; Type knownType; uint32_t aloc; };
261 * The instruction is a load which can be refined to a better type than its
262 * type-parameter currently has.
264 struct FRefinableLoad { Type refinedType; };
267 * The instruction can be replaced with a nop.
269 struct FRemovable {};
272 * The instruction can be legally replaced with a Jmp to either its next or
273 * taken edge.
275 struct FJmpNext {};
276 struct FJmpTaken {};
278 using Flags = boost::variant<FNone,FRedundant,FReducible,FRefinableLoad,
279 FRemovable,FJmpNext,FJmpTaken>;
281 //////////////////////////////////////////////////////////////////////
283 // Conservative list of instructions which have type-parameters safe to refine.
284 bool refinable_load_eligible(const IRInstruction& inst) {
285 switch (inst.op()) {
286 case LdLoc:
287 case LdStk:
288 case LdMBase:
289 case LdMem:
290 assertx(inst.hasTypeParam());
291 return true;
292 default:
293 return false;
297 void clear_everything(Local& env) {
298 FTRACE(3, " clear_everything\n");
299 env.state.avail.reset();
302 TrackedLoc* find_tracked(State& state,
303 folly::Optional<ALocMeta> meta) {
304 if (!meta) return nullptr;
305 return state.avail[meta->index] ? &state.tracked[meta->index]
306 : nullptr;
309 TrackedLoc* find_tracked(Local& env,
310 folly::Optional<ALocMeta> meta) {
311 return find_tracked(env.state, meta);
314 Flags load(Local& env,
315 const IRInstruction& inst,
316 AliasClass acls) {
317 acls = canonicalize(acls);
319 auto const meta = env.global.ainfo.find(acls);
320 if (!meta) return FNone{};
321 assertx(meta->index < kMaxTrackedALocs);
323 auto& tracked = env.state.tracked[meta->index];
325 // We can always trust the dst type of the load. Add its knowledge to
326 // knownType before doing anything else.
327 if (!env.state.avail[meta->index]) {
328 tracked.knownValue = nullptr;
329 tracked.knownType = inst.dst()->type();
330 env.state.avail.set(meta->index);
331 } else {
332 tracked.knownType &= inst.dst()->type();
335 if (tracked.knownType.admitsSingleVal()) {
336 tracked.knownValue = env.global.unit.cns(tracked.knownType);
338 FTRACE(4, " {} <- {}\n", show(acls), inst.dst()->toString());
339 FTRACE(5, " av: {}\n", show(env.state.avail));
340 return FRedundant {
341 tracked.knownValue,
342 tracked.knownType,
343 kMaxTrackedALocs // it's a constant, so it is nowhere
347 if (tracked.knownValue != nullptr) {
348 // Don't use knownValue if it's from a different block and we're currently
349 // in a catch trace, to avoid extending lifetimes too much.
350 auto const block = tracked.knownValue.match(
351 [] (SSATmp* tmp) { return tmp->inst()->block(); },
352 [] (Block* blk) { return blk; }
354 if (inst.block() == block || inst.block()->hint() != Block::Hint::Unused) {
355 return FRedundant { tracked.knownValue, tracked.knownType, meta->index };
357 } else {
358 // Only set a new known value if we previously didn't have one. If we had a
359 // known value already, we would have made the load redundant above, unless
360 // we're in an Hint::Unused block. In that case, we want to keep any old
361 // known value to avoid disturbing the main trace in case we merge back.
362 tracked.knownValue = inst.dst();
363 FTRACE(4, " {} <- {}\n", show(acls), inst.dst()->toString());
364 FTRACE(5, " av: {}\n", show(env.state.avail));
367 // Even if we can't make this load redundant, we might be able to refine its
368 // type parameter.
369 if (refinable_load_eligible(inst)) {
370 if (tracked.knownType < inst.typeParam()) {
371 return FRefinableLoad { tracked.knownType };
375 return FNone{};
378 void store(Local& env, AliasClass acls, SSATmp* value) {
379 acls = canonicalize(acls);
381 auto const meta = env.global.ainfo.find(acls);
382 if (!meta) {
383 env.state.avail &= ~env.global.ainfo.may_alias(acls);
384 return;
387 FTRACE(5, " av: {}\n", show(env.state.avail));
388 env.state.avail &= ~meta->conflicts;
390 auto& current = env.state.tracked[meta->index];
391 current.knownValue = value;
392 current.knownType = value ? value->type() : TTop;
393 env.state.avail.set(meta->index);
394 FTRACE(4, " {} <- {}\n", show(acls),
395 value ? value->toString() : std::string("<>"));
396 FTRACE(5, " av: {}\n", show(env.state.avail));
399 Flags handle_general_effects(Local& env,
400 const IRInstruction& inst,
401 GeneralEffects m) {
402 auto handleCheck = [&](Type typeParam) -> folly::Optional<Flags> {
403 auto const meta = env.global.ainfo.find(canonicalize(m.loads));
404 if (!meta) return folly::none;
406 auto const tloc = &env.state.tracked[meta->index];
407 if (!env.state.avail[meta->index]) {
408 // We know nothing about the location. Initialize it with our typeParam.
409 tloc->knownValue = nullptr;
410 tloc->knownType = typeParam;
411 env.state.avail.set(meta->index);
412 } else if (tloc->knownType <= typeParam) {
413 // We had a type that was good enough to pass the check.
414 return Flags{FJmpNext{}};
415 } else {
416 // We had a type that didn't pass the check. Narrow it using our
417 // typeParam.
418 tloc->knownType &= typeParam;
421 if (tloc->knownType <= TBottom) {
422 // i.e., !maybe(typeParam); fail check.
423 return Flags{FJmpTaken{}};
425 if (tloc->knownValue != nullptr) {
426 return Flags{FReducible{tloc->knownValue, tloc->knownType, meta->index}};
428 return folly::none;
431 switch (inst.op()) {
432 case CheckTypeMem:
433 case CheckLoc:
434 case CheckStk:
435 case CheckMBase:
436 case CheckRefInner:
437 if (auto flags = handleCheck(inst.typeParam())) return *flags;
438 break;
440 case CheckInitMem:
441 if (auto flags = handleCheck(TInitGen)) return *flags;
442 break;
444 case InitSProps: {
445 auto const handle = inst.extra<ClassData>()->cls->sPropInitHandle();
446 if (env.state.initRDS.count(handle) > 0) return FJmpNext{};
447 env.state.initRDS.insert(handle);
448 break;
451 case InitProps: {
452 auto const handle = inst.extra<ClassData>()->cls->propHandle();
453 if (env.state.initRDS.count(handle) > 0) return FJmpNext{};
454 env.state.initRDS.insert(handle);
455 break;
458 case CheckRDSInitialized: {
459 auto const handle = inst.extra<CheckRDSInitialized>()->handle;
460 if (env.state.initRDS.count(handle) > 0) return FJmpNext{};
461 // set this unconditionally; we record taken state before every
462 // instruction, and next state after each instruction
463 env.state.initRDS.insert(handle);
464 break;
467 case MarkRDSInitialized: {
468 auto const handle = inst.extra<MarkRDSInitialized>()->handle;
469 if (env.state.initRDS.count(handle) > 0) return FRemovable{};
470 env.state.initRDS.insert(handle);
471 break;
474 default:
475 break;
478 store(env, m.stores, nullptr);
480 return FNone{};
483 void handle_call_effects(Local& env,
484 const IRInstruction& inst,
485 CallEffects effects) {
487 * Keep types for stack, locals, and iterators, and throw away the
488 * values. We are just doing this to avoid extending lifetimes
489 * across php calls, which currently always leads to spilling.
491 auto const keep = env.global.ainfo.all_stack |
492 env.global.ainfo.all_frame;
493 env.state.avail &= keep;
494 for (auto aloc = uint32_t{0};
495 aloc < env.global.ainfo.locations.size();
496 ++aloc) {
497 if (!env.state.avail[aloc]) continue;
498 env.state.tracked[aloc].knownValue = nullptr;
501 // Any stack locations modified by the callee are no longer valid
502 store(env, effects.kills, nullptr);
503 store(env, effects.inputs, nullptr);
504 store(env, effects.actrec, nullptr);
505 store(env, effects.outputs, nullptr);
508 Flags handle_assert(Local& env, const IRInstruction& inst) {
509 auto const acls = [&] () -> folly::Optional<AliasClass> {
510 switch (inst.op()) {
511 case AssertLoc:
512 return AliasClass {
513 AFrame { inst.src(0), inst.extra<AssertLoc>()->locId }
515 case AssertStk:
516 return AliasClass {
517 AStack { inst.src(0), inst.extra<AssertStk>()->offset, 1 }
519 default: break;
521 return folly::none;
522 }();
523 if (!acls) return FNone{};
525 auto const meta = env.global.ainfo.find(canonicalize(*acls));
526 auto const tloc = find_tracked(env, meta);
527 if (!tloc) {
528 FTRACE(4, " untracked assert\n");
529 return FNone{};
532 tloc->knownType &= inst.typeParam();
533 if (tloc->knownValue != nullptr || tloc->knownType.admitsSingleVal()) {
534 if (tloc->knownValue == nullptr) {
535 tloc->knownValue = env.global.unit.cns(tloc->knownType);
538 FTRACE(4, " reducible assert: {}\n", show(*tloc));
539 return FReducible { tloc->knownValue, tloc->knownType, meta->index };
542 FTRACE(4, " non-reducible assert: {}\n", show(*tloc));
543 return FNone{};
546 void refine_value(Local& env, SSATmp* newVal, SSATmp* oldVal) {
547 bitset_for_each_set(
548 env.state.avail,
549 [&](size_t i) {
550 auto& tracked = env.state.tracked[i];
551 if (tracked.knownValue != oldVal) return;
552 FTRACE(4, " refining {} to {}\n", i, newVal->toString());
553 tracked.knownValue = newVal;
554 tracked.knownType = newVal->type();
559 //////////////////////////////////////////////////////////////////////
561 Flags analyze_inst(Local& env, const IRInstruction& inst) {
562 auto const effects = memory_effects(inst);
563 FTRACE(3, " {}\n"
564 " {}\n",
565 inst.toString(),
566 show(effects));
568 auto flags = Flags{};
569 match<void>(
570 effects,
571 [&] (IrrelevantEffects) {},
572 [&] (UnknownEffects) { clear_everything(env); },
573 [&] (ExitEffects) { clear_everything(env); },
574 [&] (ReturnEffects) {},
576 [&] (PureStore m) { store(env, m.dst, m.value); },
577 [&] (PureLoad m) { flags = load(env, inst, m.src); },
579 [&] (InlineEnterEffects m) { store(env, m.inlFrame, nullptr);
580 store(env, m.inlStack, nullptr);
581 store(env, m.actrec, nullptr); },
582 [&] (InlineExitEffects m) { store(env, m.inlFrame, nullptr);
583 store(env, m.inlMeta, nullptr); },
584 [&] (GeneralEffects m) { flags = handle_general_effects(env, inst, m); },
585 [&] (CallEffects x) { handle_call_effects(env, inst, x); }
588 switch (inst.op()) {
589 case CheckType:
590 case CheckNonNull:
591 case CheckVArray:
592 case CheckDArray:
593 refine_value(env, inst.dst(), inst.src(0));
594 break;
595 case AssertLoc:
596 case AssertStk:
597 flags = handle_assert(env, inst);
598 break;
599 default:
600 break;
603 return flags;
606 //////////////////////////////////////////////////////////////////////
609 * Make sure that every predecessor ends with a Jmp that we can add arguments
610 * to, so we can create a new phi'd value. This may involve splitting some
611 * edges.
613 void prepare_predecessors_for_phi(Global& env, Block* block) {
614 auto preds = jit::vector<Block*>{};
615 block->forEachPred([&] (Block* b) { preds.push_back(b); });
617 for (auto& pred : preds) {
618 if (pred->back().is(Jmp)) continue;
619 auto const middle = splitEdge(env.unit, pred, block);
620 ITRACE(3, " B{} -> B{} to add a Jmp\n", pred->id(), middle->id());
621 auto& midInfo = env.blockInfo[middle];
622 auto& predInfo = env.blockInfo[pred];
623 auto const& srcState = pred->next() == middle
624 ? predInfo.stateOutNext
625 : predInfo.stateOutTaken;
626 // We don't need to set the in state on the new edge-splitting block,
627 // because it isn't in our env.rpoBlocks, so we don't ever visit it for
628 // the optimize pass.
629 midInfo.stateOutTaken = srcState;
633 SSATmp* resolve_phis_work(Global& env,
634 jit::vector<IRInstruction*>& mutated_labels,
635 Block* block,
636 uint32_t alocID) {
637 auto& phi_record = env.insertedPhis[PhiKey { block, alocID }];
638 if (phi_record) {
639 ITRACE(3, " resolve_phis: B{} for aloc {}: already made {}\n",
640 block->id(),
641 alocID,
642 phi_record->toString());
643 return phi_record;
646 // We should never require phis going into a catch block, because they may
647 // not have multiple predecessors in HHIR.
648 assertx(!block->isCatch());
650 ITRACE(3, " resolve_phis: B{} for aloc {}\n", block->id(), alocID);
651 Trace::Indent indenter;
653 prepare_predecessors_for_phi(env, block);
655 // Create the new destination of the DefLabel at this block. This has to
656 // happen before we start hooking up the Jmps on predecessors, because the
657 // block could be one of its own predecessors and it might need to send this
658 // new dst into the same phi.
659 if (block->front().is(DefLabel)) {
660 env.unit.expandLabel(&block->front(), 1);
661 } else {
662 block->prepend(env.unit.defLabel(1, block->front().bcctx()));
664 auto const label = &block->front();
665 mutated_labels.push_back(label);
666 phi_record = label->dst(label->numDsts() - 1);
668 // Now add the new argument to each incoming Jmp. If one of the predecessors
669 // has a phi state with this same block, it'll find the dst we just added in
670 // phi_record.
671 block->forEachPred([&] (Block* pred) {
672 auto& predInfo = env.blockInfo[pred];
673 auto& state = pred->next() == block ? predInfo.stateOutNext
674 : predInfo.stateOutTaken;
675 assertx(state.initialized);
676 auto& incomingLoc = state.tracked[alocID];
677 assertx(incomingLoc.knownValue != nullptr);
679 auto const incomingTmp = incomingLoc.knownValue.match(
680 [&] (SSATmp* t) { return t; },
681 [&] (Block* incoming_phi_block) {
682 // Note resolve_phis can add blocks, which can cause a resize of
683 // env.blockInfo---don't reuse pointers/references to any block state
684 // structures across here.
685 return resolve_phis_work(
686 env,
687 mutated_labels,
688 incoming_phi_block,
689 alocID
694 ITRACE(4, " B{} <~~> {}\n", pred->id(), incomingTmp->toString());
695 env.unit.expandJmp(&pred->back(), incomingTmp);
698 ITRACE(4, " dst: {}\n", phi_record->toString());
699 return phi_record;
702 void reflow_deflabel_types(const IRUnit& unit,
703 const jit::vector<IRInstruction*>& labels) {
704 for (bool changed = true; changed;) {
705 changed = false;
706 for (auto& l : labels) if (retypeDests(l, &unit)) changed = true;
710 SSATmp* resolve_phis(Global& env, Block* block, uint32_t alocID) {
711 auto mutated_labels = jit::vector<IRInstruction*>{};
712 auto const ret = resolve_phis_work(env, mutated_labels, block, alocID);
714 * We've potentially created some SSATmp's (that are defined by DefLabels),
715 * which have types that depend on their own types. This should only be
716 * possible with loops.
718 * Right now, we can't just wait for the final reflowTypes at the end of this
719 * pass to update things for this, because SSATmp types may be inspected by
720 * our simplify() calls while we're still optimizing, so reflow them now.
722 reflow_deflabel_types(env.unit, mutated_labels);
723 return ret;
726 template<class Flag>
727 SSATmp* resolve_value(Global& env, const IRInstruction& inst, Flag flags) {
728 if (inst.dst() && !(flags.knownType <= inst.dst()->type())) {
730 * It's possible we could assert the intersection of the types, but it's
731 * not entirely clear what situations this would happen in, so let's just
732 * not do it in this case for now.
734 FTRACE(2, " knownType wasn't substitutable; not right now\n");
735 return nullptr;
738 auto const val = flags.knownValue;
739 if (val == nullptr) return nullptr;
740 return val.match(
741 [&] (SSATmp* t) { return t; },
742 [&] (Block* b) { return resolve_phis(env, b, flags.aloc); }
746 void reduce_inst(Global& env, IRInstruction& inst, const FReducible& flags) {
747 auto const resolved = resolve_value(env, inst, flags);
748 if (!resolved) return;
750 DEBUG_ONLY Opcode oldOp = inst.op();
751 DEBUG_ONLY Opcode newOp;
753 auto const reduce_to = [&] (Opcode op, folly::Optional<Type> typeParam) {
754 auto const taken = hasEdges(op) ? inst.taken() : nullptr;
755 auto const newInst = env.unit.gen(
757 inst.bcctx(),
758 taken,
759 typeParam,
760 resolved
762 if (hasEdges(op)) newInst->setNext(inst.next());
764 auto const block = inst.block();
765 block->insert(++block->iteratorTo(&inst), newInst);
766 inst.convertToNop();
768 newOp = op;
771 switch (inst.op()) {
772 case CheckTypeMem:
773 case CheckLoc:
774 case CheckStk:
775 case CheckMBase:
776 case CheckRefInner:
777 reduce_to(CheckType, inst.typeParam());
778 break;
780 case CheckInitMem:
781 reduce_to(CheckInit, folly::none);
782 break;
784 case AssertLoc:
785 case AssertStk:
786 reduce_to(AssertType, flags.knownType);
787 break;
789 default: always_assert(false);
792 FTRACE(2, " reduced: {} = {} {}\n",
793 opcodeName(oldOp),
794 opcodeName(newOp),
795 resolved->toString());
797 ++env.instrsReduced;
800 void refine_load(Global& env,
801 IRInstruction& inst,
802 const FRefinableLoad& flags) {
803 assertx(refinable_load_eligible(inst));
804 assertx(flags.refinedType < inst.typeParam());
806 FTRACE(2, " refinable: {} :: {} -> {}\n",
807 inst.toString(),
808 inst.typeParam(),
809 flags.refinedType);
811 inst.setTypeParam(flags.refinedType);
812 retypeDests(&inst, &env.unit);
813 ++env.loadsRefined;
816 //////////////////////////////////////////////////////////////////////
818 void optimize_inst(Global& env, IRInstruction& inst, Flags flags) {
819 match<void>(
820 flags,
821 [&] (FNone) {},
823 [&] (FRedundant redundantFlags) {
824 auto const resolved = resolve_value(env, inst, redundantFlags);
825 if (!resolved) return;
827 FTRACE(2, " redundant: {} :: {} = {}\n",
828 inst.dst()->toString(),
829 redundantFlags.knownType.toString(),
830 resolved->toString());
832 if (resolved->type().subtypeOfAny(TGen, TCls)) {
833 env.unit.replace(&inst, AssertType, redundantFlags.knownType, resolved);
834 } else {
835 env.unit.replace(&inst, Mov, resolved);
838 ++env.loadsRemoved;
841 [&] (FReducible reducibleFlags) { reduce_inst(env, inst, reducibleFlags); },
843 [&] (FRefinableLoad f) { refine_load(env, inst, f); },
845 [&] (FRemovable) {
846 FTRACE(2, " removable\n");
847 assertx(!inst.isControlFlow());
848 inst.convertToNop();
851 [&] (FJmpNext) {
852 FTRACE(2, " unnecessary\n");
853 env.unit.replace(&inst, Jmp, inst.next());
854 ++env.jumpsRemoved;
857 [&] (FJmpTaken) {
858 FTRACE(2, " unnecessary\n");
859 env.unit.replace(&inst, Jmp, inst.taken());
860 ++env.jumpsRemoved;
864 // Re-simplify AssertType if we produced any.
865 if (inst.is(AssertType)) simplifyInPlace(env.unit, &inst);
868 void optimize_block(Local& env, Global& genv, Block* blk) {
869 if (!env.state.initialized) {
870 FTRACE(2, " unreachable\n");
871 return;
874 for (auto& inst : *blk) {
875 simplifyInPlace(genv.unit, &inst);
876 auto const flags = analyze_inst(env, inst);
877 optimize_inst(genv, inst, flags);
881 void optimize(Global& genv) {
883 * Simplify() calls can make blocks unreachable as we walk, and visiting the
884 * unreachable blocks with simplify calls is not allowed. They may have uses
885 * of SSATmps that no longer have defs, which can break how the simplifier
886 * chases up to definitions.
888 * We use a StateVector because we can add new blocks during optimize().
890 StateVector<Block,bool> reachable(genv.unit, false);
891 reachable[genv.unit.entry()] = true;
892 for (auto& blk : genv.rpoBlocks) {
893 FTRACE(1, "B{}:\n", blk->id());
895 if (!reachable[blk]) {
896 FTRACE(2, " unreachable\n");
897 continue;
900 auto env = Local { genv, genv.blockInfo[blk].stateIn };
901 optimize_block(env, genv, blk);
903 if (auto const x = blk->next()) reachable[x] = true;
904 if (auto const x = blk->taken()) reachable[x] = true;
908 //////////////////////////////////////////////////////////////////////
910 bool operator==(const TrackedLoc& a, const TrackedLoc& b) {
911 return a.knownValue == b.knownValue && a.knownType == b.knownType;
914 bool operator==(const State& a, const State& b) {
915 if (!a.initialized) return !b.initialized;
916 if (!b.initialized) return false;
917 return a.tracked == b.tracked && a.avail == b.avail;
920 bool operator!=(const State& a, const State& b) { return !(a == b); }
922 //////////////////////////////////////////////////////////////////////
924 void merge_into(Block* target, TrackedLoc& dst, const TrackedLoc& src) {
925 dst.knownType |= src.knownType;
927 if (dst.knownValue == src.knownValue || dst.knownValue == nullptr) return;
928 if (src.knownValue == nullptr) {
929 dst.knownValue = nullptr;
930 return;
933 dst.knownValue.match(
934 [&](SSATmp* tdst) {
935 src.knownValue.match(
936 [&](SSATmp* tsrc) {
937 if (auto const lcm = least_common_ancestor(tdst, tsrc)) {
938 dst.knownValue = lcm;
939 return;
941 // We will need a phi at this block if we want to reuse this value.
942 dst.knownValue = target;
945 [&](Block* /*blk*/) { dst.knownValue = target; });
948 [&](Block* /*b*/) { dst.knownValue = target; });
951 void merge_into(Global& genv, Block* target, State& dst, const State& src) {
952 always_assert(src.initialized);
953 if (!dst.initialized) {
954 dst = src;
955 return;
958 always_assert(dst.tracked.size() == src.tracked.size());
959 always_assert(dst.tracked.size() == genv.ainfo.locations.size());
961 dst.avail &= src.avail;
963 bitset_for_each_set(
964 src.avail,
965 [&](size_t idx) {
966 dst.avail &= ~genv.ainfo.locations_inv[idx].conflicts;
967 if (dst.avail[idx]) {
968 merge_into(target, dst.tracked[idx], src.tracked[idx]);
973 for (auto it = dst.initRDS.begin(); it != dst.initRDS.end();) {
974 if (!src.initRDS.count(*it)) {
975 it = dst.initRDS.erase(it);
976 } else {
977 ++it;
982 //////////////////////////////////////////////////////////////////////
984 void save_taken_state(Global& genv, const IRInstruction& inst,
985 const State& state) {
986 if (!inst.taken()) return;
988 auto& outState = genv.blockInfo[inst.block()].stateOutTaken = state;
990 // CheckInitMem's pointee is TUninit on the taken branch, so update outState.
991 if (inst.is(CheckInitMem)) {
992 auto const effects = memory_effects(inst);
993 auto const ge = boost::get<GeneralEffects>(effects);
994 auto const meta = genv.ainfo.find(canonicalize(ge.loads));
995 if (auto const tloc = find_tracked(outState, meta)) {
996 tloc->knownType &= TUninit;
997 } else if (meta) {
998 auto tloc = &outState.tracked[meta->index];
999 tloc->knownValue = nullptr;
1000 tloc->knownType = TUninit;
1001 outState.avail.set(meta->index);
1006 void save_next_state(Global& genv, const IRInstruction& inst,
1007 const State& state) {
1008 if (inst.next()) genv.blockInfo[inst.block()].stateOutNext = state;
1011 void analyze(Global& genv) {
1012 FTRACE(1, "\nAnalyze:\n");
1015 * Scheduled blocks for the fixed point computation. We'll visit blocks with
1016 * lower reverse post order ids first.
1018 auto incompleteQ = dataflow_worklist<RpoID>(genv.rpoBlocks.size());
1019 for (auto rpoID = uint32_t{0}; rpoID < genv.rpoBlocks.size(); ++rpoID) {
1020 genv.blockInfo[genv.rpoBlocks[rpoID]].rpoID = rpoID;
1023 auto& entryIn = genv.blockInfo[genv.unit.entry()].stateIn;
1024 entryIn.initialized = true;
1025 entryIn.tracked.resize(genv.ainfo.locations.size());
1027 incompleteQ.push(0);
1029 do {
1030 auto propagate = [&] (Block* target) {
1031 FTRACE(3, " -> {}\n", target->id());
1032 auto& targetInfo = genv.blockInfo[target];
1033 auto const oldState = targetInfo.stateIn;
1034 targetInfo.stateIn.initialized = false; // re-merge of all pred states
1035 target->forEachPred([&] (Block* pred) {
1036 auto const merge = [&](const State& predState) {
1037 if (predState.initialized) {
1038 FTRACE(7, "pulling state from pred B{}:\n{}\n",
1039 pred->id(), show(predState));
1040 merge_into(genv, target, targetInfo.stateIn, predState);
1044 auto const& predInfo = genv.blockInfo[pred];
1045 if (pred->next() != pred->taken()) {
1046 auto const& predState = pred->next() == target
1047 ? predInfo.stateOutNext
1048 : predInfo.stateOutTaken;
1049 merge(predState);
1050 } else {
1051 // The predecessor jumps to this block along both its next and taken
1052 // branches. We can't distinguish the two paths, so just merge both
1053 // in.
1054 assertx(pred->next() == target);
1055 merge(predInfo.stateOutNext);
1056 merge(predInfo.stateOutTaken);
1059 if (oldState != targetInfo.stateIn) {
1060 FTRACE(7, "target state now:\n{}\n", show(targetInfo.stateIn));
1061 incompleteQ.push(targetInfo.rpoID);
1065 auto const blk = genv.rpoBlocks[incompleteQ.pop()];
1066 auto& blkInfo = genv.blockInfo[blk];
1067 FTRACE(2, "B{}:\n", blk->id());
1069 auto env = Local { genv, blkInfo.stateIn };
1070 for (auto& inst : *blk) {
1071 save_taken_state(genv, inst, env.state);
1072 analyze_inst(env, inst);
1073 save_next_state(genv, inst, env.state);
1075 if (auto const t = blk->taken()) propagate(t);
1076 if (auto const n = blk->next()) propagate(n);
1077 } while (!incompleteQ.empty());
1080 //////////////////////////////////////////////////////////////////////
1085 * This pass does some analysis to try to avoid PureLoad instructions, and
1086 * avoid "hidden" loads that are part of some instructions like CheckLoc. It
1087 * also runs the simplify() subroutine on every instruction in the unit, and
1088 * can eliminate some conditional branches that test types of memory locations.
1091 * Currently the following things are done:
1093 * o Replace PureLoad instructions with new uses of an available value, if
1094 * we know it would load that value.
1096 * o Remove instructions that check the type of memory locations if we
1097 * proved the memory location must hold that type.
1099 * o Convert instructions that check types of values in memory to use values
1100 * in SSATmp virtual registers, if we know that we have a register that
1101 * contains the same value as that memory location.
1103 * o Insert phis iff it allows us to do any of the above.
1105 * o Run the simplifier on every instruction.
1108 * Notes about preserving SSA form:
1110 * This pass can add new uses of SSATmp*'s to replace accesses to memory.
1111 * This means we need to think about whether the SSATmp*'s will be defined in
1112 * a location that dominates the places that we want to add new uses, which
1113 * is one of the requirements to keep the IR in SSA form.
1115 * To understand why this seems to "just work", without any code specifically
1116 * dealing with it, consider the following:
1118 * Suppose this pass sees a LdLoc at a program point and has determined the
1119 * local being loaded contains an SSATmp `t1'. We want to add a new use of
1120 * `t1' and delete the LdLoc.
1122 * What this analysis state tells us is that every path through the program
1123 * to the point of this LdLoc has provably stored `t1' to that local. This
1124 * also means that on every path to this point, `t1' was used in some
1125 * instruction that did a store(*), which means `t1' must have been defined
1126 * on that path, or else the input program already wasn't in SSA form. But
1127 * this is just the dominance condition we wanted: every path through the
1128 * program to this load must have defined `t1', so its definition dominates
1129 * the load.
1131 * (*) `t1' could also be defined by an instruction doing a load from that
1132 * location, but it's even more obviously ok in that situation.
1134 * One further note: some parts of the code here is structured in
1135 * anticipation of allowing the pass to conditionally explore parts of the
1136 * CFG, so we don't need to propagate states down unreachable paths. But
1137 * this isn't implemented yet: the above will need to be revisted at that
1138 * point (essentially we'll have to guarantee we eliminate any jumps to
1139 * blocks this pass proved were unreachable to keep the above working at that
1140 * point).
1143 void optimizeLoads(IRUnit& unit) {
1144 PassTracer tracer{&unit, Trace::hhir_load, "optimizeLoads"};
1145 Timer t(Timer::optimize_loads, unit.logEntry().get_pointer());
1147 // Unfortunately load-elim may not reach a fixed-point after just one
1148 // round. This is because the optimization stage can cause the types of
1149 // SSATmps to change, which then can affect what we infer in the analysis
1150 // stage. So, loop until we reach a fixed point. Bound the iterations at some
1151 // max value for safety.
1152 size_t iters = 0;
1153 size_t instrsReduced = 0;
1154 size_t loadsRemoved = 0;
1155 size_t loadsRefined = 0;
1156 size_t jumpsRemoved = 0;
1157 do {
1158 auto genv = Global { unit };
1159 if (genv.ainfo.locations.size() == 0) {
1160 FTRACE(1, "no memory accesses to possibly optimize\n");
1161 break;
1164 ++iters;
1165 FTRACE(1, "\nIteration #{}\n", iters);
1166 FTRACE(1, "Locations:\n{}\n", show(genv.ainfo));
1168 analyze(genv);
1170 FTRACE(1, "\n\nFixed point:\n{}\n",
1171 [&] () -> std::string {
1172 auto ret = std::string{};
1173 for (auto& blk : genv.rpoBlocks) {
1174 folly::format(&ret, "B{}:\n{}", blk->id(),
1175 show(genv.blockInfo[blk].stateIn));
1177 return ret;
1181 FTRACE(1, "\nOptimize:\n");
1182 optimize(genv);
1184 if (!genv.instrsReduced &&
1185 !genv.loadsRemoved &&
1186 !genv.loadsRefined &&
1187 !genv.jumpsRemoved) {
1188 // Nothing changed so we're done
1189 break;
1191 instrsReduced += genv.instrsReduced;
1192 loadsRemoved += genv.loadsRemoved;
1193 loadsRefined += genv.loadsRefined;
1194 jumpsRemoved += genv.jumpsRemoved;
1196 FTRACE(2, "reflowing types\n");
1197 reflowTypes(genv.unit);
1199 // Restore reachability invariants
1200 mandatoryDCE(unit);
1202 if (iters >= RuntimeOption::EvalHHIRLoadElimMaxIters) {
1203 // We've iterated way more than usual without reaching a fixed
1204 // point. Either there's some bug in load-elim, or this unit is especially
1205 // pathological. Emit a perf warning so we're aware and stop iterating.
1206 logPerfWarning(
1207 "optimize_loads_max_iters", 1,
1208 [&](StructuredLogEntry& cols) {
1209 auto const func = unit.context().initSrcKey.func();
1210 cols.setStr("func", func->fullName()->slice());
1211 cols.setStr("filename", func->unit()->filepath()->slice());
1212 cols.setStr("hhir_unit", show(unit));
1215 break;
1217 } while (true);
1219 if (auto& entry = unit.logEntry()) {
1220 entry->setInt("optimize_loads_iters", iters);
1221 entry->setInt("optimize_loads_instrs_reduced", instrsReduced);
1222 entry->setInt("optimize_loads_loads_removed", loadsRemoved);
1223 entry->setInt("optimize_loads_loads_refined", loadsRefined);
1224 entry->setInt("optimize_loads_jumps_removed", jumpsRemoved);
1228 //////////////////////////////////////////////////////////////////////