Switch-related cleanup
[hiphop-php.git] / hphp / runtime / vm / jit / alias-analysis.cpp
blob129d70df6219b8bc20545241975bfdfb8b4f1f52
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/alias-analysis.h"
18 #include <utility>
19 #include <sstream>
21 #include "hphp/util/match.h"
22 #include "hphp/runtime/vm/jit/ir-unit.h"
23 #include "hphp/runtime/vm/jit/cfg.h"
24 #include "hphp/runtime/vm/jit/memory-effects.h"
25 #include "hphp/runtime/vm/jit/alias-class.h"
27 namespace HPHP { namespace jit {
29 TRACE_SET_MOD(hhir_alias);
31 namespace {
33 //////////////////////////////////////////////////////////////////////
35 // Locations that refer to ranges of the eval stack are expanded into
36 // individual stack slots only if smaller than this threshold.
37 constexpr int kMaxExpandedStackRange = 16;
39 template<class Visit>
40 void visit_locations(const BlockList& blocks, Visit visit) {
41 for (auto& blk : blocks) {
42 FTRACE(1, "B{}:\n", blk->id());
43 for (auto& inst : *blk) {
44 auto const effects = canonicalize(memory_effects(inst));
45 FTRACE(1, " {: <30} -- {}\n", show(effects), inst.toString());
46 match<void>(
47 effects,
48 [&] (IrrelevantEffects) {},
49 [&] (UnknownEffects) {},
50 [&] (ReturnEffects x) { visit(x.kills); },
51 [&] (CallEffects x) { visit(x.kills); visit(x.stack); },
52 [&] (GeneralEffects x) { visit(x.loads);
53 visit(x.stores);
54 visit(x.moves);
55 visit(x.kills); },
56 [&] (PureLoad x) { visit(x.src); },
57 [&] (PureStore x) { visit(x.dst); },
58 [&] (PureSpillFrame x) { visit(x.stk); visit(x.ctx); },
59 [&] (ExitEffects x) { visit(x.live); visit(x.kills); }
65 folly::Optional<uint32_t> add_class(AliasAnalysis& ret, AliasClass acls) {
66 auto const ins = ret.locations.insert(std::make_pair(acls, ALocMeta{}));
67 if (!ins.second) return ins.first->second.index;
68 if (ret.locations.size() > kMaxTrackedALocs) {
69 always_assert(ret.locations.size() == kMaxTrackedALocs + 1);
70 ret.locations.erase(acls);
71 return folly::none;
73 FTRACE(1, " new: {}\n", show(acls));
74 auto& meta = ins.first->second;
75 meta.index = ret.locations.size() - 1;
76 always_assert(meta.index < kMaxTrackedALocs);
77 return meta.index;
80 template<class T>
81 ALocBits may_alias_part(const AliasAnalysis& aa,
82 AliasClass acls,
83 folly::Optional<T> proj,
84 AliasClass any,
85 ALocBits pessimistic) {
86 if (proj) {
87 if (auto const meta = aa.find(*proj)) {
88 return ALocBits{meta->conflicts}.set(meta->index);
90 assertx(acls.maybe(any));
91 return pessimistic;
93 return acls.maybe(any) ? pessimistic : ALocBits{};
96 template<class T>
97 ALocBits expand_part(const AliasAnalysis& aa,
98 AliasClass acls,
99 folly::Optional<T> proj,
100 AliasClass any,
101 ALocBits all) {
102 auto ret = ALocBits{};
103 if (proj) {
104 if (auto const meta = aa.find(*proj)) {
105 return ret.set(meta->index); // A single tracked location.
107 assertx(acls <= any);
108 return ret;
110 return any <= acls ? all : ret;
113 //////////////////////////////////////////////////////////////////////
117 AliasAnalysis::AliasAnalysis(const IRUnit& unit)
118 : per_frame_bits(unit.numTmps())
121 folly::Optional<ALocMeta> AliasAnalysis::find(AliasClass acls) const {
122 auto const it = locations.find(acls);
123 if (it == end(locations)) return folly::none;
124 return it->second;
127 ALocBits AliasAnalysis::may_alias(AliasClass acls) const {
128 if (auto const meta = find(acls)) {
129 return ALocBits{meta->conflicts}.set(meta->index);
132 auto ret = ALocBits{};
134 if (auto const stk = acls.stack()) {
135 if (stk->size == 1) {
136 if (auto const meta = find(*stk)) {
137 // Different stack slots never alias each other
138 assertx(meta->conflicts.none());
139 ret.set(meta->index);
141 // Otherwise the stack slot is untracked, no need to pessimize.
142 } else {
143 auto const it = stack_ranges.find(*stk);
144 // Since individual stack slots are canonicalized, we could be less
145 // pessimistic even if the entry is not in `stack_ranges', but that
146 // would require iterating over `all_stack'.
147 ret |= it != end(stack_ranges) ? it->second : all_stack;
149 } else if (acls.maybe(AStackAny)) {
150 ret |= all_stack;
153 // Individual frame locals are canonicalized so that different `AFrame's
154 // cannot alias each other.
155 if (auto const frame = acls.frame()) {
156 // For now it is a single local, but this will change soon.
157 if (auto const meta = find(*frame)) {
158 ret.set(meta->index);
160 // Otherwise the slot is untracked.
161 } else if (acls.maybe(AFrameAny)) {
162 ret |= all_frame;
165 ret |= may_alias_part(*this, acls, acls.prop(), APropAny, all_props);
166 ret |= may_alias_part(*this, acls, acls.elemI(), AElemIAny, all_elemIs);
167 ret |= may_alias_part(*this, acls, acls.mis(), AMIStateAny, all_mistate);
168 ret |= may_alias_part(*this, acls, acls.ref(), ARefAny, all_refs);
170 return ret;
173 ALocBits AliasAnalysis::expand(AliasClass acls) const {
174 if (auto const info = find(acls)) return ALocBits{}.set(info->index);
176 auto ret = ALocBits{};
178 if (auto const stack = acls.stack()) {
179 if (auto const info = find(*stack)) {
180 ret.set(info->index);
181 } else {
182 auto const it = stack_ranges.find(*stack);
183 if (it != end(stack_ranges)) {
184 ret |= it->second;
186 // We could iterate over `all_stack' and check individually, but since we
187 // don't combine stack slots in `AliasClass::precise_union()', doing so
188 // would not really help us much for now.
190 } else if (AStackAny <= acls) {
191 ret |= all_stack;
194 ret |= expand_part(*this, acls, acls.frame(), AFrameAny, all_frame);
195 ret |= expand_part(*this, acls, acls.prop(), APropAny, all_props);
196 ret |= expand_part(*this, acls, acls.elemI(), AElemIAny, all_elemIs);
197 ret |= expand_part(*this, acls, acls.mis(), AMIStateAny, all_mistate);
198 ret |= expand_part(*this, acls, acls.ref(), ARefAny, all_refs);
200 return ret;
203 AliasAnalysis collect_aliases(const IRUnit& unit, const BlockList& blocks) {
204 FTRACE(1, "collect_aliases:vvvvvvvvvvvvvvvvvvvv\n");
205 SCOPE_EXIT { FTRACE(1, "collect_aliases:^^^^^^^^^^^^^^^^^^^^\n"); };
207 auto ret = AliasAnalysis{unit};
210 * Right now we compute the conflict sets for object properties based only on
211 * object property offsets, and for arrays based only on index. Everything
212 * colliding in that regard is assumed to possibly alias.
214 auto conflict_prop_offset = jit::hash_map<uint32_t,ALocBits>{};
215 auto conflict_array_index = jit::hash_map<int64_t,ALocBits>{};
217 visit_locations(blocks, [&] (AliasClass acls) {
218 if (auto const prop = acls.is_prop()) {
219 if (auto const index = add_class(ret, acls)) {
220 conflict_prop_offset[prop->offset].set(*index);
222 return;
225 if (auto const elemI = acls.is_elemI()) {
226 if (auto const index = add_class(ret, acls)) {
227 conflict_array_index[elemI->idx].set(*index);
229 return;
232 if (auto const ref = acls.is_ref()) {
233 if (auto const index = add_class(ret, acls)) {
234 ret.all_refs.set(*index);
236 return;
239 if (acls.is_frame() || acls.is_mis()) {
240 add_class(ret, acls);
241 return;
245 * Note that unlike the above we're going to assign location ids to the
246 * individual stack slots in AStack portions of AliasClasses that are
247 * unions of AStack ranges with other classes. (I.e. basically we're using
248 * stack() instead of is_stack() here, so it will match things that are
249 * only partially stacks.)
251 * The reason for this is that many instructions can have effects like
252 * that, when they can re-enter and do things to the stack in some range
253 * (below the re-entry depth, for example), but also affect some other type
254 * of memory (CastStk, for example). In particular this means we want that
255 * AliasClass to have an entry in the stack_ranges, so we'll populate
256 * it later. Currently most of these situations should probably bail at
257 * kMaxExpandedStackRange, although there are some situations that won't
258 * (e.g. instructions like CoerceStk, which will have an AHeapAny (from
259 * re-entry) unioned with a single stack slot).
261 if (auto const stk = acls.stack()) {
262 if (stk->size > 1) {
263 ret.stack_ranges[AliasClass { *stk }];
265 if (stk->size > kMaxExpandedStackRange) return;
267 for (auto stkidx = int32_t{0}; stkidx < stk->size; ++stkidx) {
268 AliasClass single = AStack { stk->offset - stkidx, 1 };
269 add_class(ret, single);
272 return;
276 always_assert(ret.locations.size() <= kMaxTrackedALocs);
277 if (ret.locations.size() == kMaxTrackedALocs) {
278 FTRACE(1, "max locations limit was reached\n");
281 auto make_conflict_set = [&] (AliasClass acls, ALocMeta& meta) {
282 if (auto const prop = acls.is_prop()) {
283 meta.conflicts = conflict_prop_offset[prop->offset];
284 meta.conflicts.reset(meta.index);
285 ret.all_props.set(meta.index);
286 return;
289 if (auto const elemI = acls.is_elemI()) {
290 meta.conflicts = conflict_array_index[elemI->idx];
291 meta.conflicts.reset(meta.index);
292 ret.all_elemIs.set(meta.index);
293 return;
296 if (auto const frame = acls.is_frame()) {
297 ret.all_frame.set(meta.index);
298 ret.per_frame_bits[frame->fp].set(meta.index);
299 return;
302 if (auto const stk = acls.is_stack()) {
303 ret.all_stack.set(meta.index);
304 return;
307 if (auto const mis = acls.is_mis()) {
308 ret.all_mistate.set(meta.index);
309 return;
312 if (auto const ref = acls.is_ref()) {
313 meta.conflicts = ret.all_refs;
314 meta.conflicts.reset(meta.index);
315 return;
318 always_assert_flog(0, "AliasAnalysis assigned an AliasClass an id "
319 "but it didn't match a situation we undestood: {}\n", show(acls));
322 ret.locations_inv.resize(ret.locations.size());
323 for (auto& kv : ret.locations) {
324 make_conflict_set(kv.first, kv.second);
325 ret.locations_inv[kv.second.index] = kv.second;
328 * Note: this is probably more complex than it needs to be, because we're
329 * iterating the stack_ranges for each location. Since kMaxTrackedALocs
330 * is bounded by a constant, it's kinda O(stack_ranges), but not in a
331 * good way. The number of locations is currently generally small, so this
332 * is probably ok for now---but if we remove the limit we may need to
333 * revisit this so it can't blow up.
335 if (kv.first.is_stack()) {
336 for (auto& ent : ret.stack_ranges) {
337 if (kv.first <= ent.first) {
338 FTRACE(2, " ({}) {} <= {}\n",
339 kv.second.index,
340 show(kv.first),
341 show(ent.first));
342 ent.second.set(kv.second.index);
348 return ret;
351 //////////////////////////////////////////////////////////////////////
353 std::string show(ALocBits bits) {
354 std::ostringstream out;
355 if (bits.none()) {
356 return "0";
358 if (bits.all()) {
359 return "-1";
361 out << bits;
362 return out.str();
365 std::string show(const AliasAnalysis& linfo) {
366 auto ret = std::string{};
367 for (auto& kv : linfo.locations) {
368 auto conf = kv.second.conflicts;
369 conf.set(kv.second.index);
370 folly::format(&ret, " {: <20} = {: >3} : {}\n",
371 show(kv.first),
372 kv.second.index,
373 show(conf));
375 folly::format(&ret, " {: <20} : {}\n"
376 " {: <20} : {}\n"
377 " {: <20} : {}\n"
378 " {: <20} : {}\n"
379 " {: <20} : {}\n",
380 "all props", show(linfo.all_props),
381 "all elemIs", show(linfo.all_elemIs),
382 "all refs", show(linfo.all_refs),
383 "all frame", show(linfo.all_frame),
384 "all stack", show(linfo.all_stack)
386 for (auto& kv : linfo.stack_ranges) {
387 folly::format(&ret, " ex {: <17} : {}\n",
388 show(kv.first),
389 show(kv.second));
391 return ret;
394 //////////////////////////////////////////////////////////////////////