Various llvm backend bugfixes
[hiphop-php.git] / hphp / runtime / vm / jit / load-elim.cpp
blobb4c4a1aa27d959353560df6e16da6e5ebe4b9a72
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/opt.h"
18 #include <cstdint>
20 #include <folly/ScopeGuard.h>
22 #include "hphp/util/match.h"
23 #include "hphp/util/trace.h"
24 #include "hphp/util/dataflow-worklist.h"
25 #include "hphp/runtime/vm/jit/pass-tracer.h"
26 #include "hphp/runtime/vm/jit/containers.h"
27 #include "hphp/runtime/vm/jit/cfg.h"
28 #include "hphp/runtime/vm/jit/memory-effects.h"
29 #include "hphp/runtime/vm/jit/state-vector.h"
30 #include "hphp/runtime/vm/jit/analysis.h"
31 #include "hphp/runtime/vm/jit/alias-analysis.h"
33 namespace HPHP { namespace jit {
35 namespace {
37 TRACE_SET_MOD(hhir_load);
39 //////////////////////////////////////////////////////////////////////
41 // Reverse post order block ids.
42 using RpoId = uint32_t;
44 //////////////////////////////////////////////////////////////////////
46 struct TrackedLoc {
48 * If we have a known value in a location, this will be non-null and point to
49 * it.
51 SSATmp* knownValue{nullptr};
54 * We may have a known type even without an available value, or that is more
55 * refined than knownValue->type().
57 Type knownType{Type::Top};
61 * Abstract state for a program position.
63 * This has a set of TrackedLocs, and an availability mask to determine which
64 * ones are currently valid. When updating a tracked location, the visitor
65 * below will kill any potentially conflicting locations in the availability
66 * mask without updating them.
68 struct State {
69 bool initialized = false; // if false, we never reached this block
72 * Currently tracked locations, indexed by their ids.
74 jit::vector<TrackedLoc> tracked;
77 * Currently available indexes in tracked.
79 ALocBits avail;
82 struct BlockInfo {
83 RpoId rpoId;
84 State stateIn;
87 struct Global {
88 explicit Global(IRUnit& unit, BlocksWithIds&& sortedBlocks)
89 : unit(unit)
90 , idoms(findDominators(unit, sortedBlocks))
91 , rpoBlocks(std::move(sortedBlocks.blocks))
92 , ainfo(collect_aliases(unit, rpoBlocks))
93 , blockInfo(unit, BlockInfo{})
96 IRUnit& unit;
97 IdomVector idoms;
98 BlockList rpoBlocks;
99 AliasAnalysis ainfo;
102 * Map from each block to its information. Before we've done the fixed point
103 * analysis the states in the info structures are not necessarily meaningful.
105 StateVector<Block,BlockInfo> blockInfo;
108 //////////////////////////////////////////////////////////////////////
110 using HPHP::jit::show;
112 std::string show(TrackedLoc li) {
113 return folly::sformat("{} :: {}",
114 li.knownValue ? li.knownValue->toString() : std::string("<>"),
115 li.knownType.toString()
119 std::string show(const State& state) {
120 auto ret = std::string{};
121 if (!state.initialized) {
122 ret = " unreachable\n";
123 return ret;
126 folly::format(&ret, " av: {}\n", show(state.avail));
127 for (auto idx = uint32_t{0}; idx < state.tracked.size(); ++idx) {
128 if (state.avail.test(idx)) {
129 folly::format(&ret, " {: >3} = {}\n", idx, show(state.tracked[idx]));
132 return ret;
135 //////////////////////////////////////////////////////////////////////
137 struct Local {
138 Global& global;
139 State state;
142 struct Flags {
144 * If set, the instruction was a pure load and the value was known to be
145 * available in `replaceable'.
147 SSATmp* replaceable{nullptr};
150 * If true, the instruction was a conditional jump that can be converted to
151 * an unconditional jump to it's next() edge.
153 bool convertToJmp{false};
156 //////////////////////////////////////////////////////////////////////
158 void clear_everything(Local& env) {
159 FTRACE(3, " clear_everything\n");
160 env.state.avail.reset();
163 TrackedLoc* find_tracked(Local& env,
164 const IRInstruction& inst,
165 AliasClass acls) {
166 auto const meta = env.global.ainfo.find(canonicalize(acls));
167 if (!meta) return nullptr;
168 assert(meta->index < kMaxTrackedALocs);
169 return env.state.avail[meta->index] ? &env.state.tracked[meta->index]
170 : nullptr;
173 SSATmp* load(Local& env, const IRInstruction& inst, AliasClass acls) {
174 acls = canonicalize(acls);
175 auto const meta = env.global.ainfo.find(acls);
176 if (!meta) return nullptr;
177 assert(meta->index < kMaxTrackedALocs);
178 auto& tracked = env.state.tracked[meta->index];
180 if (env.state.avail[meta->index]) {
181 if (tracked.knownValue) return tracked.knownValue;
183 * We didn't have a value, but we had an available type. This can happen
184 * at control flow joins right now (since we don't have support for
185 * inserting phis for available values). Whatever the old type is is still
186 * valid, and whatever information the load instruction knows is also
187 * valid, so we can keep their intersection.
189 tracked.knownType &= inst.dst()->type();
190 } else {
191 tracked.knownType = inst.dst()->type();
192 env.state.avail.set(meta->index);
194 tracked.knownValue = inst.dst();
196 FTRACE(4, " {} <- {}\n", show(acls), inst.dst()->toString());
197 FTRACE(5, " av: {}\n", show(env.state.avail));
198 return nullptr;
201 void store(Local& env, AliasClass acls, SSATmp* value) {
202 acls = canonicalize(acls);
204 auto const meta = env.global.ainfo.find(acls);
205 if (!meta) {
206 env.state.avail &= ~env.global.ainfo.may_alias(acls);
207 return;
210 FTRACE(5, " av: {}\n", show(env.state.avail));
211 env.state.avail &= ~meta->conflicts;
213 auto& current = env.state.tracked[meta->index];
214 current.knownValue = value;
215 current.knownType = value ? value->type() : Type::Top;
216 env.state.avail.set(meta->index);
217 FTRACE(4, " {} <- {}\n", show(acls),
218 value ? value->toString() : std::string("<>"));
219 FTRACE(5, " av: {}\n", show(env.state.avail));
222 void refine_value(Local& env, SSATmp* newVal, SSATmp* oldVal) {
223 for (auto i = uint32_t{0}; i < kMaxTrackedALocs; ++i) {
224 if (!env.state.avail[i]) continue;
225 auto& tracked = env.state.tracked[i];
226 if (tracked.knownValue != oldVal) continue;
227 FTRACE(4, " refining {} to {}\n", i, newVal->toString());
228 tracked.knownValue = newVal;
229 tracked.knownType = newVal->type();
233 //////////////////////////////////////////////////////////////////////
235 template<class Propagate>
236 Flags analyze_inst(Local& env,
237 const IRInstruction& inst,
238 Propagate propagate) {
239 if (auto const taken = inst.taken()) propagate(taken, env.state);
241 auto const effects = memory_effects(inst);
242 FTRACE(3, " {: <30} -- {}\n", show(effects), inst.toString());
243 auto flags = Flags{};
244 match<void>(
245 effects,
246 [&] (IrrelevantEffects) {},
247 [&] (UnknownEffects) { clear_everything(env); },
248 [&] (InterpOneEffects) { clear_everything(env); },
249 [&] (KillFrameLocals l) {},
250 [&] (ReturnEffects l) {},
251 [&] (CallEffects l) { // Note: shouldn't need to give up types for
252 // locals, but it doesn't matter right now.
253 clear_everything(env); },
254 [&] (IterEffects) { clear_everything(env); },
255 [&] (IterEffects2) { clear_everything(env); },
257 [&] (PureLoad m) { flags.replaceable = load(env, inst, m.src); },
258 [&] (PureStore m) { store(env, m.dst, m.value); },
259 [&] (PureStoreNT m) { store(env, m.dst, m.value); },
261 [&] (MayLoadStore m) {
262 store(env, m.stores, nullptr);
264 switch (inst.op()) {
266 * We could handle CheckLoc, but right now ir-builder does all that, so
267 * it's not here yet.
269 case CheckTypeMem:
270 case CheckTypePackedArrayElem:
271 if (auto const tloc = find_tracked(env, inst, m.loads)) {
272 if (tloc->knownType <= inst.typeParam()) {
273 flags.convertToJmp = true;
274 return;
276 tloc->knownType &= inst.typeParam();
278 break;
279 default:
280 break;
285 switch (inst.op()) {
286 case CheckType:
287 refine_value(env, inst.dst(), inst.src(0));
288 break;
289 default:
290 break;
293 if (auto const next = inst.next()) propagate(next, env.state);
295 return flags;
298 //////////////////////////////////////////////////////////////////////
300 void optimize_block(Local& env, Block* blk) {
301 if (!env.state.initialized) {
302 FTRACE(2, " unreachable\n");
303 return;
306 for (auto& inst : *blk) {
307 auto const flags = analyze_inst(env, inst, [&] (Block*, const State&) {});
309 auto const can_replace = [&](SSATmp* what) -> bool {
310 if (!(what->type() <= inst.dst()->type())) {
312 * TODO(#5664026): For now we can't replace values that have types that
313 * aren't at least as narrow as the current tmp we are replacing. If
314 * we wanted to, we could insert them as AssertType opcodes here, but
315 * codegen-x64 can't handle those kinds of assert types right now.
317 FTRACE(2, " type would change too much; not right now\n");
318 return false;
321 if (what->inst()->is(DefConst)) return true;
322 auto const defBlock = findDefiningBlock(what);
323 if (!defBlock) return false;
324 return dominates(defBlock, inst.block(), env.global.idoms);
327 if (flags.replaceable && can_replace(flags.replaceable)) {
328 FTRACE(2, " redundant: {} = Mov {}\n", inst.dst()->toString(),
329 flags.replaceable->toString());
330 env.global.unit.replace(&inst, Mov, flags.replaceable);
331 continue;
334 if (flags.convertToJmp) {
335 FTRACE(2, " unnecessary\n");
336 env.global.unit.replace(&inst, Jmp, inst.next());
337 continue;
342 //////////////////////////////////////////////////////////////////////
344 bool merge_into(TrackedLoc& dst, const TrackedLoc& src) {
345 if (dst.knownValue != src.knownValue) {
346 dst.knownValue = least_common_ancestor(
347 dst.knownValue,
348 src.knownValue
350 return true;
352 auto const newType = dst.knownType & src.knownType;
353 auto const changed = newType != dst.knownType;
354 dst.knownType = newType;
355 return changed;
358 bool merge_into(Global& genv, State& dst, const State& src) {
359 always_assert(src.initialized);
360 if (!dst.initialized) {
361 dst = src;
362 return true;
365 always_assert(dst.tracked.size() == src.tracked.size());
366 always_assert(dst.tracked.size() == genv.ainfo.locations.size());
368 auto changed = false;
370 auto mod_avail = [&] (ALocBits mask) {
371 auto const new_avail = dst.avail & mask;
372 if (new_avail != dst.avail) changed = true;
373 dst.avail = new_avail;
376 mod_avail(src.avail);
378 for (auto idx = uint32_t{0}; idx < kMaxTrackedALocs; ++idx) {
379 if (!src.avail[idx]) continue;
380 mod_avail(~genv.ainfo.locations_inv[idx].conflicts);
382 if (dst.avail[idx]) {
383 if (merge_into(dst.tracked[idx], src.tracked[idx])) {
384 changed = true;
389 return changed;
392 //////////////////////////////////////////////////////////////////////
394 void analyze(Global& genv) {
395 FTRACE(1, "\nAnalyze:\n");
398 * Scheduled blocks for the fixed point computation. We'll visit blocks with
399 * lower reverse post order ids first.
401 auto incompleteQ = dataflow_worklist<RpoId>(genv.rpoBlocks.size());
402 for (auto rpoId = uint32_t{0}; rpoId < genv.rpoBlocks.size(); ++rpoId) {
403 genv.blockInfo[genv.rpoBlocks[rpoId]].rpoId = rpoId;
406 auto& entryIn = genv.blockInfo[genv.unit.entry()].stateIn;
407 entryIn.initialized = true;
408 entryIn.tracked.resize(genv.ainfo.locations.size());
410 incompleteQ.push(0);
412 while (!incompleteQ.empty()) {
413 auto const blk = genv.rpoBlocks[incompleteQ.pop()];
415 auto propagate = [&] (Block* target, const State& state) {
416 auto& targetInfo = genv.blockInfo[target];
417 FTRACE(3, " -> {}\n", target->id());
418 if (merge_into(genv, targetInfo.stateIn, state)) {
419 FTRACE(7, "target state now:\n{}\n", show(targetInfo.stateIn));
420 incompleteQ.push(targetInfo.rpoId);
424 FTRACE(2, "B{}:\n", blk->id());
425 auto env = Local { genv, genv.blockInfo[blk].stateIn };
426 for (auto& inst : *blk) analyze_inst(env, inst, propagate);
430 //////////////////////////////////////////////////////////////////////
435 * This pass does some analysis to try to avoid PureLoad instructions, or
436 * remove unnecessary CheckTypeMem instructions.
438 * The way it works is to do an abstract interpretation of the IRUnit, where
439 * we're tracking changes to abstract memory locations (TrackedLoc). This pass
440 * propagates input states to each block, and is repeated until every reachable
441 * block's input state stops changing.
443 * Then we make a final walk over the CFG and try to perform some
444 * optimizations. Currently only a few things are done:
446 * o Replace PureLoad instructions with new uses of an available value, if
447 * we know it would load that value.
449 * o Remove instructions that check the type of memory locations if we
450 * proved the memory location must hold that type.
452 void optimizeLoads(IRUnit& unit) {
453 if (RuntimeOption::EnableArgsInBacktraces) {
454 // We want to be able to run this pass with this flag on, but need to teach
455 // memory_effects about it first.
456 return;
458 PassTracer tracer{&unit, Trace::hhir_load, "optimizeLoads"};
460 auto genv = Global { unit, rpoSortCfgWithIds(unit) };
461 if (genv.ainfo.locations.size() == 0) {
462 FTRACE(1, "no memory accesses to possibly optimize\n");
463 return;
465 FTRACE(1, "\nLocations:\n{}\n", show(genv.ainfo));
466 analyze(genv);
468 FTRACE(1, "\n\nFixed point:\n{}\n",
469 [&] () -> std::string {
470 auto ret = std::string{};
471 for (auto& blk : genv.rpoBlocks) {
472 folly::format(&ret, "B{}:\n{}", blk->id(),
473 show(genv.blockInfo[blk].stateIn));
475 return ret;
479 FTRACE(1, "\nOptimize:\n");
480 for (auto& blk : genv.rpoBlocks) {
481 FTRACE(1, "B{}:\n", blk->id());
482 auto env = Local { genv, genv.blockInfo[blk].stateIn };
483 optimize_block(env, blk);
487 //////////////////////////////////////////////////////////////////////