Improve AliasAnalysis::may_alias
[hiphop-php.git] / hphp / runtime / vm / jit / alias-analysis.cpp
blobfb5480b5b5b51049543c24b0b1fddfcc9a5f6660
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/alias-analysis.h"
18 #include <utility>
19 #include <sstream>
21 #include "hphp/util/match.h"
22 #include "hphp/runtime/vm/jit/ir-unit.h"
23 #include "hphp/runtime/vm/jit/cfg.h"
24 #include "hphp/runtime/vm/jit/memory-effects.h"
25 #include "hphp/runtime/vm/jit/alias-class.h"
27 namespace HPHP { namespace jit {
29 TRACE_SET_MOD(hhir_alias);
31 namespace {
33 //////////////////////////////////////////////////////////////////////
35 // Locations that refer to ranges of the eval stack are expanded into
36 // individual stack slots only if smaller than this threshold.
37 constexpr int kMaxExpandedStackRange = 16;
39 template<class Visit>
40 void visit_locations(const BlockList& blocks, Visit visit) {
41 for (auto& blk : blocks) {
42 FTRACE(1, "B{}:\n", blk->id());
43 for (auto& inst : *blk) {
44 auto const effects = canonicalize(memory_effects(inst));
45 FTRACE(1, " {: <30} -- {}\n", show(effects), inst.toString());
46 match<void>(
47 effects,
48 [&] (IrrelevantEffects) {},
49 [&] (UnknownEffects) {},
50 [&] (ReturnEffects x) { visit(x.kills); },
51 [&] (CallEffects x) { visit(x.kills); visit(x.stack); },
52 [&] (GeneralEffects x) { visit(x.loads);
53 visit(x.stores);
54 visit(x.moves);
55 visit(x.kills); },
56 [&] (PureLoad x) { visit(x.src); },
57 [&] (PureStore x) { visit(x.dst); },
58 [&] (PureSpillFrame x) { visit(x.stk); visit(x.ctx); },
59 [&] (ExitEffects x) { visit(x.live); visit(x.kills); }
65 folly::Optional<uint32_t> add_class(AliasAnalysis& ret, AliasClass acls) {
66 auto const ins = ret.locations.insert(std::make_pair(acls, ALocMeta{}));
67 if (!ins.second) return ins.first->second.index;
68 if (ret.locations.size() > kMaxTrackedALocs) {
69 always_assert(ret.locations.size() == kMaxTrackedALocs + 1);
70 ret.locations.erase(acls);
71 return folly::none;
73 FTRACE(1, " new: {}\n", show(acls));
74 auto& meta = ins.first->second;
75 meta.index = ret.locations.size() - 1;
76 always_assert(meta.index < kMaxTrackedALocs);
77 return meta.index;
80 template<class T>
81 ALocBits may_alias_part(const AliasAnalysis& aa,
82 AliasClass acls,
83 folly::Optional<T> proj,
84 AliasClass any,
85 ALocBits pessimistic) {
86 if (proj) {
87 if (auto const meta = aa.find(*proj)) {
88 return ALocBits{meta->conflicts}.set(meta->index);
90 assertx(acls.maybe(any));
91 return pessimistic;
93 return acls.maybe(any) ? pessimistic : ALocBits{};
96 //////////////////////////////////////////////////////////////////////
100 AliasAnalysis::AliasAnalysis(const IRUnit& unit)
101 : per_frame_bits(unit.numTmps())
104 folly::Optional<ALocMeta> AliasAnalysis::find(AliasClass acls) const {
105 auto const it = locations.find(acls);
106 if (it == end(locations)) return folly::none;
107 return it->second;
110 ALocBits AliasAnalysis::may_alias(AliasClass acls) const {
111 if (auto const meta = find(acls)) {
112 return ALocBits{meta->conflicts}.set(meta->index);
115 auto ret = ALocBits{};
117 // We may have some special may-alias sets for multi-slot stack ranges, so
118 // this works a little differently from the other projections.
120 auto const stk = acls.stack();
121 if (stk && stk->size > 1) {
122 auto const it = stack_ranges.find(*stk);
123 ret |= it != end(stack_ranges) ? it->second : all_stack;
124 } else {
125 ret |= may_alias_part(*this, acls, stk, AStackAny, all_stack);
129 ret |= may_alias_part(*this, acls, acls.frame(), AFrameAny, all_frame);
130 ret |= may_alias_part(*this, acls, acls.prop(), APropAny, all_props);
131 ret |= may_alias_part(*this, acls, acls.elemI(), AElemIAny, all_elemIs);
132 ret |= may_alias_part(*this, acls, acls.mis(), AMIStateAny, all_mistate);
133 ret |= may_alias_part(*this, acls, acls.ref(), ARefAny, all_refs);
135 return ret;
138 ALocBits AliasAnalysis::expand(AliasClass acls) const {
139 if (auto const info = find(acls)) return ALocBits{}.set(info->index);
141 auto ret = ALocBits{};
143 auto const it = stk_expand_map.find(acls);
144 if (it != end(stk_expand_map)) {
145 ret |= it->second;
146 } else if (AStackAny <= acls) {
147 ret |= all_stack;
150 if (APropAny <= acls) ret |= all_props;
151 if (AElemIAny <= acls) ret |= all_elemIs;
152 if (AFrameAny <= acls) ret |= all_frame;
153 if (AMIStateAny <= acls) ret |= all_mistate;
154 if (ARefAny <= acls) ret |= all_refs;
156 return ret;
159 AliasAnalysis collect_aliases(const IRUnit& unit, const BlockList& blocks) {
160 FTRACE(1, "collect_aliases:vvvvvvvvvvvvvvvvvvvv\n");
161 SCOPE_EXIT { FTRACE(1, "collect_aliases:^^^^^^^^^^^^^^^^^^^^\n"); };
163 auto ret = AliasAnalysis{unit};
166 * Right now we compute the conflict sets for object properties based only on
167 * object property offsets, and for arrays based only on index. Everything
168 * colliding in that regard is assumed to possibly alias.
170 auto conflict_prop_offset = jit::hash_map<uint32_t,ALocBits>{};
171 auto conflict_array_index = jit::hash_map<int64_t,ALocBits>{};
174 * Stack offset conflict sets: any stack alias class based off a different
175 * StkPtr base is presumed to potentially alias. See the comments above
176 * AStack in alias-class.h.
178 auto conflict_stkptrs = jit::hash_map<SSATmp*,ALocBits>{};
180 visit_locations(blocks, [&] (AliasClass acls) {
181 if (auto const prop = acls.is_prop()) {
182 if (auto const index = add_class(ret, acls)) {
183 conflict_prop_offset[prop->offset].set(*index);
185 return;
188 if (auto const elemI = acls.is_elemI()) {
189 if (auto const index = add_class(ret, acls)) {
190 conflict_array_index[elemI->idx].set(*index);
192 return;
195 if (auto const ref = acls.is_ref()) {
196 if (auto const index = add_class(ret, acls)) {
197 ret.all_refs.set(*index);
199 return;
202 if (acls.is_frame() || acls.is_mis()) {
203 add_class(ret, acls);
204 return;
208 * Note that unlike the above we're going to assign location ids to the
209 * individual stack slots in AStack portions of AliasClasses that are
210 * unions of AStack ranges with other classes. (I.e. basically we're using
211 * stack() instead of is_stack() here, so it will match things that are
212 * only partially stacks.)
214 * The reason for this is that many instructions can have effects like
215 * that, when they can re-enter and do things to the stack in some range
216 * (below the re-entry depth, for example), but also affect some other type
217 * of memory (CastStk, for example). In particular this means we want that
218 * AliasClass to have an entry in the stk_expand_map, so we'll populate
219 * it later. Currently most of these situations should probably bail at
220 * kMaxExpandedStackRange, although there are some situations that won't
221 * (e.g. instructions like CoerceStk, which will have an AHeapAny (from
222 * re-entry) unioned with a single stack slot).
224 if (auto const stk = acls.stack()) {
225 if (stk->size > 1) {
226 ret.stk_expand_map[AliasClass { *stk }];
228 if (stk->size > kMaxExpandedStackRange) return;
230 ALocBits conf_set;
231 bool complete = true;
232 for (auto stkidx = int32_t{0}; stkidx < stk->size; ++stkidx) {
233 AliasClass single = AStack { stk->base, stk->offset - stkidx, 1 };
234 if (auto const index = add_class(ret, single)) {
235 conf_set.set(*index);
236 conflict_stkptrs[stk->base].set(*index);
237 } else {
238 complete = false;
242 if (stk->size > 1 && complete) {
243 FTRACE(2, " range {}: {}\n", show(acls), show(conf_set));
244 ret.stack_ranges[acls] = conf_set;
247 return;
251 always_assert(ret.locations.size() <= kMaxTrackedALocs);
252 if (ret.locations.size() == kMaxTrackedALocs) {
253 FTRACE(1, "max locations limit was reached\n");
256 auto make_conflict_set = [&] (AliasClass acls, ALocMeta& meta) {
257 if (auto const prop = acls.is_prop()) {
258 meta.conflicts = conflict_prop_offset[prop->offset];
259 meta.conflicts.reset(meta.index);
260 ret.all_props.set(meta.index);
261 return;
264 if (auto const elemI = acls.is_elemI()) {
265 meta.conflicts = conflict_array_index[elemI->idx];
266 meta.conflicts.reset(meta.index);
267 ret.all_elemIs.set(meta.index);
268 return;
271 if (auto const frame = acls.is_frame()) {
272 ret.all_frame.set(meta.index);
273 ret.per_frame_bits[frame->fp].set(meta.index);
274 return;
277 if (auto const stk = acls.is_stack()) {
278 ret.all_stack.set(meta.index);
279 for (auto& kv : conflict_stkptrs) {
280 if (kv.first != stk->base) {
281 if (kv.first->type() <= TStkPtr ||
282 stk->base->type() <= TStkPtr) {
283 meta.conflicts |= kv.second;
287 return;
290 if (auto const mis = acls.is_mis()) {
291 ret.all_mistate.set(meta.index);
292 return;
295 if (auto const ref = acls.is_ref()) {
296 meta.conflicts = ret.all_refs;
297 meta.conflicts.reset(meta.index);
298 return;
301 always_assert_flog(0, "AliasAnalysis assigned an AliasClass an id "
302 "but it didn't match a situation we undestood: {}\n", show(acls));
305 ret.locations_inv.resize(ret.locations.size());
306 for (auto& kv : ret.locations) {
307 make_conflict_set(kv.first, kv.second);
308 ret.locations_inv[kv.second.index] = kv.second;
311 * Note: this is probably more complex than it needs to be, because we're
312 * iterating the stk_expand_map for each location. Since kMaxTrackedALocs
313 * is bounded by a constant, it's kinda O(stk_expand_map), but not in a
314 * good way. The number of locations is currently generally small, so this
315 * is probably ok for now---but if we remove the limit we may need to
316 * revisit this so it can't blow up.
318 if (kv.first.is_stack()) {
319 for (auto& ent : ret.stk_expand_map) {
320 if (kv.first <= ent.first) {
321 FTRACE(2, " ({}) {} <= {}\n",
322 kv.second.index,
323 show(kv.first),
324 show(ent.first));
325 ent.second.set(kv.second.index);
331 return ret;
334 //////////////////////////////////////////////////////////////////////
336 std::string show(ALocBits bits) {
337 std::ostringstream out;
338 if (bits.none()) {
339 return "0";
341 if (bits.all()) {
342 return "-1";
344 out << bits;
345 return out.str();
348 std::string show(const AliasAnalysis& linfo) {
349 auto ret = std::string{};
350 for (auto& kv : linfo.locations) {
351 auto conf = kv.second.conflicts;
352 conf.set(kv.second.index);
353 folly::format(&ret, " {: <20} = {: >3} : {}\n",
354 show(kv.first),
355 kv.second.index,
356 show(conf));
358 folly::format(&ret, " {: <20} : {}\n"
359 " {: <20} : {}\n"
360 " {: <20} : {}\n"
361 " {: <20} : {}\n"
362 " {: <20} : {}\n",
363 "all props", show(linfo.all_props),
364 "all elemIs", show(linfo.all_elemIs),
365 "all refs", show(linfo.all_refs),
366 "all frame", show(linfo.all_frame),
367 "all stack", show(linfo.all_stack)
369 for (auto& kv : linfo.stk_expand_map) {
370 folly::format(&ret, " ex {: <17} : {}\n",
371 show(kv.first),
372 show(kv.second));
374 return ret;
377 //////////////////////////////////////////////////////////////////////