2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/vm/jit/alias-analysis.h"
21 #include "hphp/util/match.h"
22 #include "hphp/runtime/vm/jit/ir-unit.h"
23 #include "hphp/runtime/vm/jit/cfg.h"
24 #include "hphp/runtime/vm/jit/memory-effects.h"
25 #include "hphp/runtime/vm/jit/alias-class.h"
27 namespace HPHP
{ namespace jit
{
29 TRACE_SET_MOD(hhir_alias
);
33 //////////////////////////////////////////////////////////////////////
35 // Locations that refer to ranges of the eval stack are expanded into
36 // individual stack slots only if smaller than this threshold.
37 constexpr int kMaxExpandedStackRange
= 16;
40 void visit_locations(const BlockList
& blocks
, Visit visit
) {
41 for (auto& blk
: blocks
) {
42 FTRACE(1, "B{}:\n", blk
->id());
43 for (auto& inst
: *blk
) {
44 auto const effects
= canonicalize(memory_effects(inst
));
45 FTRACE(1, " {: <30} -- {}\n", show(effects
), inst
.toString());
48 [&] (IrrelevantEffects
) {},
49 [&] (UnknownEffects
) {},
50 [&] (ReturnEffects x
) { visit(x
.kills
); },
51 [&] (CallEffects x
) { visit(x
.kills
); visit(x
.stack
); },
52 [&] (GeneralEffects x
) { visit(x
.loads
);
56 [&] (PureLoad x
) { visit(x
.src
); },
57 [&] (PureStore x
) { visit(x
.dst
); },
58 [&] (PureSpillFrame x
) { visit(x
.stk
); visit(x
.ctx
); },
59 [&] (ExitEffects x
) { visit(x
.live
); visit(x
.kills
); }
65 folly::Optional
<uint32_t> add_class(AliasAnalysis
& ret
, AliasClass acls
) {
66 auto const ins
= ret
.locations
.insert(std::make_pair(acls
, ALocMeta
{}));
67 if (!ins
.second
) return ins
.first
->second
.index
;
68 if (ret
.locations
.size() > kMaxTrackedALocs
) {
69 always_assert(ret
.locations
.size() == kMaxTrackedALocs
+ 1);
70 ret
.locations
.erase(acls
);
73 FTRACE(1, " new: {}\n", show(acls
));
74 auto& meta
= ins
.first
->second
;
75 meta
.index
= ret
.locations
.size() - 1;
76 always_assert(meta
.index
< kMaxTrackedALocs
);
81 ALocBits
may_alias_part(const AliasAnalysis
& aa
,
83 folly::Optional
<T
> proj
,
85 ALocBits pessimistic
) {
87 if (auto const meta
= aa
.find(*proj
)) {
88 return ALocBits
{meta
->conflicts
}.set(meta
->index
);
90 assertx(acls
.maybe(any
));
93 return acls
.maybe(any
) ? pessimistic
: ALocBits
{};
97 ALocBits
expand_part(const AliasAnalysis
& aa
,
99 folly::Optional
<T
> proj
,
102 auto ret
= ALocBits
{};
104 if (auto const meta
= aa
.find(*proj
)) {
105 return ret
.set(meta
->index
); // A single tracked location.
107 assertx(acls
<= any
);
110 return any
<= acls
? all
: ret
;
113 //////////////////////////////////////////////////////////////////////
117 AliasAnalysis::AliasAnalysis(const IRUnit
& unit
)
118 : per_frame_bits(unit
.numTmps())
121 folly::Optional
<ALocMeta
> AliasAnalysis::find(AliasClass acls
) const {
122 auto const it
= locations
.find(acls
);
123 if (it
== end(locations
)) return folly::none
;
127 ALocBits
AliasAnalysis::may_alias(AliasClass acls
) const {
128 if (auto const meta
= find(acls
)) {
129 return ALocBits
{meta
->conflicts
}.set(meta
->index
);
132 auto ret
= ALocBits
{};
134 if (auto const stk
= acls
.stack()) {
135 if (stk
->size
== 1) {
136 if (auto const meta
= find(*stk
)) {
137 // Different stack slots never alias each other
138 assertx(meta
->conflicts
.none());
139 ret
.set(meta
->index
);
141 // Otherwise the stack slot is untracked, no need to pessimize.
143 auto const it
= stack_ranges
.find(*stk
);
144 // Since individual stack slots are canonicalized, we could be less
145 // pessimistic even if the entry is not in `stack_ranges', but that
146 // would require iterating over `all_stack'.
147 ret
|= it
!= end(stack_ranges
) ? it
->second
: all_stack
;
149 } else if (acls
.maybe(AStackAny
)) {
153 // Individual frame locals are canonicalized so that different `AFrame's
154 // cannot alias each other.
155 if (auto const frame
= acls
.frame()) {
156 // For now it is a single local, but this will change soon.
157 if (auto const meta
= find(*frame
)) {
158 ret
.set(meta
->index
);
160 // Otherwise the slot is untracked.
161 } else if (acls
.maybe(AFrameAny
)) {
165 ret
|= may_alias_part(*this, acls
, acls
.prop(), APropAny
, all_props
);
166 ret
|= may_alias_part(*this, acls
, acls
.elemI(), AElemIAny
, all_elemIs
);
167 ret
|= may_alias_part(*this, acls
, acls
.mis(), AMIStateAny
, all_mistate
);
168 ret
|= may_alias_part(*this, acls
, acls
.ref(), ARefAny
, all_refs
);
173 ALocBits
AliasAnalysis::expand(AliasClass acls
) const {
174 if (auto const info
= find(acls
)) return ALocBits
{}.set(info
->index
);
176 auto ret
= ALocBits
{};
178 if (auto const stack
= acls
.stack()) {
179 if (auto const info
= find(*stack
)) {
180 ret
.set(info
->index
);
182 auto const it
= stack_ranges
.find(*stack
);
183 if (it
!= end(stack_ranges
)) {
186 // We could iterate over `all_stack' and check individually, but since we
187 // don't combine stack slots in `AliasClass::precise_union()', doing so
188 // would not really help us much for now.
190 } else if (AStackAny
<= acls
) {
194 ret
|= expand_part(*this, acls
, acls
.frame(), AFrameAny
, all_frame
);
195 ret
|= expand_part(*this, acls
, acls
.prop(), APropAny
, all_props
);
196 ret
|= expand_part(*this, acls
, acls
.elemI(), AElemIAny
, all_elemIs
);
197 ret
|= expand_part(*this, acls
, acls
.mis(), AMIStateAny
, all_mistate
);
198 ret
|= expand_part(*this, acls
, acls
.ref(), ARefAny
, all_refs
);
203 AliasAnalysis
collect_aliases(const IRUnit
& unit
, const BlockList
& blocks
) {
204 FTRACE(1, "collect_aliases:vvvvvvvvvvvvvvvvvvvv\n");
205 SCOPE_EXIT
{ FTRACE(1, "collect_aliases:^^^^^^^^^^^^^^^^^^^^\n"); };
207 auto ret
= AliasAnalysis
{unit
};
210 * Right now we compute the conflict sets for object properties based only on
211 * object property offsets, and for arrays based only on index. Everything
212 * colliding in that regard is assumed to possibly alias.
214 auto conflict_prop_offset
= jit::hash_map
<uint32_t,ALocBits
>{};
215 auto conflict_array_index
= jit::hash_map
<int64_t,ALocBits
>{};
217 visit_locations(blocks
, [&] (AliasClass acls
) {
218 if (auto const prop
= acls
.is_prop()) {
219 if (auto const index
= add_class(ret
, acls
)) {
220 conflict_prop_offset
[prop
->offset
].set(*index
);
225 if (auto const elemI
= acls
.is_elemI()) {
226 if (auto const index
= add_class(ret
, acls
)) {
227 conflict_array_index
[elemI
->idx
].set(*index
);
232 if (auto const ref
= acls
.is_ref()) {
233 if (auto const index
= add_class(ret
, acls
)) {
234 ret
.all_refs
.set(*index
);
239 if (acls
.is_frame() || acls
.is_mis()) {
240 add_class(ret
, acls
);
245 * Note that unlike the above we're going to assign location ids to the
246 * individual stack slots in AStack portions of AliasClasses that are
247 * unions of AStack ranges with other classes. (I.e. basically we're using
248 * stack() instead of is_stack() here, so it will match things that are
249 * only partially stacks.)
251 * The reason for this is that many instructions can have effects like
252 * that, when they can re-enter and do things to the stack in some range
253 * (below the re-entry depth, for example), but also affect some other type
254 * of memory (CastStk, for example). In particular this means we want that
255 * AliasClass to have an entry in the stack_ranges, so we'll populate
256 * it later. Currently most of these situations should probably bail at
257 * kMaxExpandedStackRange, although there are some situations that won't
258 * (e.g. instructions like CoerceStk, which will have an AHeapAny (from
259 * re-entry) unioned with a single stack slot).
261 if (auto const stk
= acls
.stack()) {
263 ret
.stack_ranges
[AliasClass
{ *stk
}];
265 if (stk
->size
> kMaxExpandedStackRange
) return;
267 for (auto stkidx
= int32_t{0}; stkidx
< stk
->size
; ++stkidx
) {
268 AliasClass single
= AStack
{ stk
->offset
- stkidx
, 1 };
269 add_class(ret
, single
);
276 always_assert(ret
.locations
.size() <= kMaxTrackedALocs
);
277 if (ret
.locations
.size() == kMaxTrackedALocs
) {
278 FTRACE(1, "max locations limit was reached\n");
281 auto make_conflict_set
= [&] (AliasClass acls
, ALocMeta
& meta
) {
282 if (auto const prop
= acls
.is_prop()) {
283 meta
.conflicts
= conflict_prop_offset
[prop
->offset
];
284 meta
.conflicts
.reset(meta
.index
);
285 ret
.all_props
.set(meta
.index
);
289 if (auto const elemI
= acls
.is_elemI()) {
290 meta
.conflicts
= conflict_array_index
[elemI
->idx
];
291 meta
.conflicts
.reset(meta
.index
);
292 ret
.all_elemIs
.set(meta
.index
);
296 if (auto const frame
= acls
.is_frame()) {
297 ret
.all_frame
.set(meta
.index
);
298 ret
.per_frame_bits
[frame
->fp
].set(meta
.index
);
302 if (auto const stk
= acls
.is_stack()) {
303 ret
.all_stack
.set(meta
.index
);
307 if (auto const mis
= acls
.is_mis()) {
308 ret
.all_mistate
.set(meta
.index
);
312 if (auto const ref
= acls
.is_ref()) {
313 meta
.conflicts
= ret
.all_refs
;
314 meta
.conflicts
.reset(meta
.index
);
318 always_assert_flog(0, "AliasAnalysis assigned an AliasClass an id "
319 "but it didn't match a situation we undestood: {}\n", show(acls
));
322 ret
.locations_inv
.resize(ret
.locations
.size());
323 for (auto& kv
: ret
.locations
) {
324 make_conflict_set(kv
.first
, kv
.second
);
325 ret
.locations_inv
[kv
.second
.index
] = kv
.second
;
328 * Note: this is probably more complex than it needs to be, because we're
329 * iterating the stack_ranges for each location. Since kMaxTrackedALocs
330 * is bounded by a constant, it's kinda O(stack_ranges), but not in a
331 * good way. The number of locations is currently generally small, so this
332 * is probably ok for now---but if we remove the limit we may need to
333 * revisit this so it can't blow up.
335 if (kv
.first
.is_stack()) {
336 for (auto& ent
: ret
.stack_ranges
) {
337 if (kv
.first
<= ent
.first
) {
338 FTRACE(2, " ({}) {} <= {}\n",
342 ent
.second
.set(kv
.second
.index
);
351 //////////////////////////////////////////////////////////////////////
353 std::string
show(ALocBits bits
) {
354 std::ostringstream out
;
365 std::string
show(const AliasAnalysis
& linfo
) {
366 auto ret
= std::string
{};
367 for (auto& kv
: linfo
.locations
) {
368 auto conf
= kv
.second
.conflicts
;
369 conf
.set(kv
.second
.index
);
370 folly::format(&ret
, " {: <20} = {: >3} : {}\n",
375 folly::format(&ret
, " {: <20} : {}\n"
380 "all props", show(linfo
.all_props
),
381 "all elemIs", show(linfo
.all_elemIs
),
382 "all refs", show(linfo
.all_refs
),
383 "all frame", show(linfo
.all_frame
),
384 "all stack", show(linfo
.all_stack
)
386 for (auto& kv
: linfo
.stack_ranges
) {
387 folly::format(&ret
, " ex {: <17} : {}\n",
394 //////////////////////////////////////////////////////////////////////