2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #ifndef incl_HHBBC_INTERP_STATE_H_
17 #define incl_HHBBC_INTERP_STATE_H_
23 #include <boost/variant.hpp>
25 #include <folly/Optional.h>
27 #include "hphp/hhbbc/index.h"
28 #include "hphp/hhbbc/misc.h"
29 #include "hphp/hhbbc/type-system.h"
30 #include "hphp/hhbbc/bc.h"
32 namespace HPHP
{ namespace HHBBC
{
34 //////////////////////////////////////////////////////////////////////
39 //////////////////////////////////////////////////////////////////////
42 * State of an iterator in the program.
44 * We track iterator liveness precisely, so if an iterator is DeadIter, its
45 * definitely dead and vice-versa. We only track "normal" iterators (non-weak,
46 * non-mutable), so iterators not of those type are considered "dead".
48 * We use this state to check if the "local iterator" optimization is valid.
49 * In this optimization, we leave the iterator base in a local slot instead of
50 * inc-ref-ing it and storing it in the iter.
52 * Local iteration is only possible if the positions of the keys of the base
53 * iterator are unchanged. We call an update that leaves these key positions
54 * unchanged a "safe" update; the only one that we can account for is updating
55 * the value of the current iteration key, as in:
57 * foreach ($base as $key => $value) {
58 * $base[$key] = $value + 1;
61 * Finally, we also track a flag that is set whenever we see *any* update to
62 * the base (including "safe" updates). If we know the base is unchanged during
63 * iteration, we can further optimize the iterator in HHIR.
68 // If the base came from a local, and all updates to it have been "safe",
69 // this field will be the id of that local. Otherwise, it will be NoLocalId.
70 LocalId baseLocal
= NoLocalId
;
71 // The local that is known to be equivalent to the current key in the iter.
72 // Used to detect "safe" updates to the base.
73 LocalId keyLocal
= NoLocalId
;
74 // The block id where this iterator was initialized. If there's more than one
75 // such block, it will be NoBlockId.
76 BlockId initBlock
= NoBlockId
;
77 // Set whenever we see any mutation, even "safe" ones that don't affect keys.
78 bool baseUpdated
= false;
79 // Set whenever the base of the iterator cannot be an iterator
80 bool baseCannotBeObject
= false;
82 using Iter
= boost::variant
<DeadIter
, LiveIter
>;
85 * Tag indicating what sort of thing contains the current member base.
87 * Note that if we're in an array-chain, the base location always reflects the
88 * location of the array which started the array-chain.
94 * Base is in a number of possible places after an Elem op. It
95 * cannot possibly be in an object property (although it certainly
96 * may alias one). See miElem for details.
98 * This is only used if its unclear if its actually in an array. If it is
99 * definitely in an array, then arrayChain will be non-empty and the base
100 * location will reflect where the array is located.
105 * Base is in possible locations after a Prop op. This means it
106 * possibly lives in a property on an object, but possibly not
107 * (e.g. it could be a null in tvScratch). See miProp for
110 * If it is definitely known to be a property in an object, the
111 * locTy in the Base will be a subtype of TObj.
116 * Known to be a static property on an object. This is only
117 * possible as an initial base.
122 * Known to be contained in the current frame as a local, as the
123 * frame $this, by the evaluation stack, or inside $GLOBALS. Only
124 * possible as initial bases.
133 * Information about the current member base's type and location.
136 explicit Base(Type type
= {},
137 BaseLoc loc
= BaseLoc::None
,
139 SString locName
= {},
140 LocalId locLocal
= NoLocalId
,
141 uint32_t locSlot
= 0)
142 : type
{std::move(type
)}
144 , locTy
{std::move(locTy
)}
147 , locSlot
{locSlot
} {}
153 * We also need to track effects of intermediate dims on the type of the base.
154 * So we have a type, name, and possibly associated local or stack slot for
155 * the base's container.
157 * For StaticProp, locName is the name of the property if known, or nullptr,
158 * and locTy is the type of the class containing the static property.
160 * For Prop, locName is the name of the property if it was known, and locTy
161 * gives as much information about the object type it is in. (If we actually
162 * *know* it is in an object, locTy will be a subtype of TObj.)
164 * For Local, locName is the name of the local if known, or nullptr, and
165 * locLocal is the LocalId corresponding to the local (or NoLocalId if not
168 * For Stack, locSlot is the stack index of the corresponding stack slot.
176 // An element on the eval stack
178 static auto constexpr NoId
= std::numeric_limits
<uint32_t>::max();
181 // A location which is known to have an equivalent value to this
182 // stack value. This could be a valid LocalId, the special value
183 // StackDupId to indicate that its equivalent to the stack element
184 // below it, the special value StackThisId to indicate that its the
185 // value of $this, or NoLocalId if it has no known equivalents.
186 // Note that the location may not match the stack value wrt Uninit.
196 friend struct InterpStack
;
197 Iterator(S
* owner
, uint32_t idx
) :
198 owner(owner
), idx(idx
) {
199 assertx(idx
<= owner
->size());
202 assertx(idx
< owner
->size());
209 Iterator
operator-(ssize_t off
) {
210 return Iterator(owner
, idx
- off
);
212 Iterator
operator+(ssize_t off
) {
213 return Iterator(owner
, idx
+ off
);
215 auto& operator*() const {
216 assertx(idx
< owner
->index
.size());
217 return owner
->elems
[owner
->index
[idx
]];
219 auto* operator->() const {
222 // very special helper for use with Add*ElemC. To prevent
223 // quadratic time/space when appending to an array, we need to
224 // ensure we're appending to an array with a single reference; but
225 // popping from an InterpStack doesn't actually remove the
226 // element. This allows us to drop the type's datatag. It means
227 // that rewinding wouldn't see the original type; but Add*ElemC is
228 // very carefully coded anyway.
229 auto unspecialize() const {
230 auto t
= std::move((*this)->type
);
231 (*this)->type
= loosen_values(t
);
234 StackElem
* next_elem(ssize_t off
) const {
235 const size_t i
= owner
->index
[idx
] + off
;
236 if (i
> owner
->elems
.size()) return nullptr;
237 return &owner
->elems
[i
];
240 bool operator==(const Iterator
<T
>& other
) const {
241 return owner
== other
.owner
&& idx
== other
.idx
;
244 bool operator!=(const Iterator
<T
>& other
) const {
245 return !operator==(other
);
248 const Iterator
& operator=(const Iterator
<T
>& other
) const {
258 using iterator
= Iterator
<InterpStack
>;
259 using const_iterator
= Iterator
<const InterpStack
>;
260 auto begin() { return iterator(this, 0); }
261 auto end() { return iterator(this, index
.size()); }
262 auto begin() const { return const_iterator(this, 0); }
263 auto end() const { return const_iterator(this, index
.size()); }
264 auto& operator[](size_t idx
) { return *iterator(this, idx
); }
265 auto& operator[](size_t idx
) const { return *const_iterator(this, idx
); }
266 auto& back() { return *iterator(this, index
.size() - 1); }
267 auto& back() const { return *const_iterator(this, index
.size() - 1); }
268 void push_elem(const StackElem
& elm
) {
269 assertx(elm
.index
== index
.size());
270 index
.push_back(elems
.size());
271 elems
.push_back(elm
);
273 void push_elem(StackElem
&& elm
) {
274 assertx(elm
.index
== index
.size());
275 index
.push_back(elems
.size());
276 elems
.push_back(std::move(elm
));
278 void push_elem(const Type
& t
, LocalId equivLoc
,
279 uint32_t id
= StackElem::NoId
) {
280 uint32_t isize
= index
.size();
281 push_elem({t
, equivLoc
, isize
, id
});
283 void push_elem(Type
&& t
, LocalId equivLoc
, uint32_t id
= StackElem::NoId
) {
284 uint32_t isize
= index
.size();
285 push_elem({std::move(t
), equivLoc
, isize
, id
});
290 void erase(iterator i1
, iterator i2
) {
291 assertx(i1
.owner
== i2
.owner
);
292 assertx(i1
.idx
< i2
.idx
);
293 i1
.owner
->index
.erase(i1
.owner
->index
.begin() + i1
.idx
,
294 i1
.owner
->index
.begin() + i2
.idx
);
296 bool empty() const { return index
.empty(); }
297 size_t size() const { return index
.size(); }
304 for (auto& ix
: index
) {
307 std::swap(elems
[i
], elems
[ix
]);
314 // rewind the stack to the state it was in before the last
315 // instruction ran (which is known to have popped numPop items and
316 // pushed numPush items).
317 void rewind(int numPop
, int numPush
);
318 void peek(int numPop
, const StackElem
** values
, int numPush
) const;
319 void kill(int numPop
, int numPush
, uint32_t id
);
320 void insert_after(int numPop
, int numPush
, const Type
* types
,
321 uint32_t numInst
, uint32_t id
);
323 void refill(size_t elemIx
, size_t indexLow
, int numPop
, int numPush
);
324 CompactVector
<uint32_t> index
;
325 CompactVector
<StackElem
> elems
;
329 * A program state at a position in a php::Block.
331 * The `initialized' flag indicates whether the state knows anything. All
332 * other fields are invalid if a state is not initialized, and notably, after
333 * all analysis has run, any blocks that still don't have initialized input
334 * states are not reachable.
336 * The `unreachable' flag means we've produced this state from analysis, but
337 * the program cannot reach this program position. This flag has two uses:
339 * o It allows us to determine arbitrary mid-block positions where code
340 * becomes unreachable, and eliminate that code in optimize.cpp.
342 * o We may still do abstract interpretation of the unreachable code, but
343 * this flag is used when merging states to allow the interpreter to
344 * analyze blocks that are unreachable without pessimizing states for
345 * reachable blocks that would've been their successors.
347 * TODO: having the interpreter visit blocks when they are unreachable still
348 * potentially merges types into object properties that aren't possible at
351 * We split off a base class from State as a convenience to enable the use
352 * default copy construction and assignment.
357 initialized
= unreachable
= false;
359 StateBase(const StateBase
&) = default;
360 StateBase(StateBase
&&) = default;
361 StateBase
& operator=(const StateBase
&) = default;
362 StateBase
& operator=(StateBase
&&) = default;
364 uint8_t initialized
: 1;
365 uint8_t unreachable
: 1;
367 LocalId thisLoc
= NoLocalId
;
369 CompactVector
<Type
> locals
;
370 CompactVector
<Iter
> iters
;
373 * Mapping of a local to other locals which are known to have
374 * equivalent values. This equivalence ignores Uninit; users should
375 * compare types if they care.
377 CompactVector
<LocalId
> equivLocals
;
380 struct State
: StateBase
{
382 State(const State
&) = default;
383 State(State
&&) = default;
385 enum class Compact
{};
386 State(const State
& src
, Compact
) : StateBase(src
) {
387 for (auto const& elm
: src
.stack
) {
388 stack
.push_elem(elm
.type
, elm
.equivLoc
);
392 // delete assignment operator, so we have to explicitly choose what
393 // we want to do from amongst the various copies.
394 State
& operator=(const State
&) = delete;
395 State
& operator=(State
&&) = delete;
397 void copy_from(const State
& src
) {
398 *static_cast<StateBase
*>(this) = src
;
402 void copy_from(State
&& src
) {
403 *static_cast<StateBase
*>(this) = std::move(src
);
404 stack
= std::move(src
.stack
);
407 void copy_and_compact(const State
& src
) {
408 *static_cast<StateBase
*>(this) = src
;
410 for (auto const& elm
: src
.stack
) {
411 stack
.push_elem(elm
.type
, elm
.equivLoc
);
415 void swap(State
& other
) {
416 std::swap(static_cast<StateBase
&>(*this), static_cast<StateBase
&>(other
));
417 std::swap(stack
, other
.stack
);
424 * Return a copy of a State without copying the evaluation stack, pushing
425 * Throwable on the stack.
427 State
with_throwable_only(const Index
& env
, const State
&);
429 //////////////////////////////////////////////////////////////////////
432 * PropertiesInfo packages the PropState for private instance and
433 * static properties, which is cross-block information collected in
436 * During analysis the ClassAnalysis* is available and the PropState is
437 * retrieved from there. However during optimization the ClassAnalysis is
438 * not available and the PropState has to be retrieved off the Class in
439 * the Index. In that case cls is nullptr and the PropState fields are
442 struct PropertiesInfo
{
443 PropertiesInfo(const Index
&, Context
, ClassAnalysis
*);
445 PropState
& privateProperties();
446 PropState
& privateStatics();
447 const PropState
& privateProperties() const;
448 const PropState
& privateStatics() const;
450 void setBadPropInitialValues();
453 ClassAnalysis
* const m_cls
;
454 PropState m_privateProperties
;
455 PropState m_privateStatics
;
458 //////////////////////////////////////////////////////////////////////
461 * Map from closure classes to types for each of their used vars.
462 * Shows up in a few different interpreter structures.
464 using ClosureUseVarMap
= hphp_fast_map
<
470 * Merge the types in the vector as possible use vars for the closure
471 * `clo' into the destination map.
473 void merge_closure_use_vars_into(ClosureUseVarMap
& dst
,
475 CompactVector
<Type
>);
477 //////////////////////////////////////////////////////////////////////
479 enum class CollectionOpts
{
486 inline CollectionOpts
operator|(CollectionOpts o1
, CollectionOpts o2
) {
487 return static_cast<CollectionOpts
>(
488 static_cast<int>(o1
) | static_cast<int>(o2
)
492 inline CollectionOpts
operator&(CollectionOpts o1
, CollectionOpts o2
) {
493 return static_cast<CollectionOpts
>(
494 static_cast<int>(o1
) & static_cast<int>(o2
)
498 inline CollectionOpts
operator-(CollectionOpts o1
, CollectionOpts o2
) {
499 return static_cast<CollectionOpts
>(
500 static_cast<int>(o1
) & ~static_cast<int>(o2
)
504 inline bool any(CollectionOpts o
) { return static_cast<int>(o
); }
507 * Area used for writing down any information that is collected across
508 * a series of step operations (possibly cross block).
510 struct CollectedInfo
{
511 explicit CollectedInfo(const Index
& index
,
515 const FuncAnalysis
* fa
= nullptr);
517 ClosureUseVarMap closureUseTypes
;
518 PropertiesInfo props
;
519 hphp_fast_set
<std::pair
<const php::Func
*, BlockId
>>
521 bool mayUseVV
{false};
522 bool effectFree
{true};
523 bool hasInvariantIterBase
{false};
524 CollectionOpts opts
{};
525 bool (*propagate_constants
)(const Bytecode
& bc
, State
& state
,
526 BytecodeVec
& out
) = nullptr;
528 * See FuncAnalysisResult for details.
530 std::bitset
<64> usedParams
;
532 PublicSPropMutations publicSPropMutations
;
536 * The current member base. Updated as we move through bytecodes
537 * representing the operation.
542 bool extraPop
{false};
545 * Chains of member operations on array elements will affect the type of
546 * something further back in the member instruction. This vector tracks the
547 * base,key type pair that was used at each stage. See
548 * interp-minstr.cpp:resolveArrayChain().
550 struct ArrayChainEnt
{
555 using ArrayChain
= CompactVector
<ArrayChainEnt
>;
556 ArrayChain arrayChain
;
559 base
.loc
= BaseLoc::None
;
563 MInstrState mInstrState
;
566 //////////////////////////////////////////////////////////////////////
569 * State merging functions, based on the union_of operation for types.
571 * These return true if the destination state changed.
573 bool merge_into(State
&, const State
&);
576 * State merging functions, based on the widening_union operation.
577 * See analyze.cpp for details on when this is needed.
579 bool widen_into(State
&, const State
&);
580 void widen_props(PropState
&);
582 //////////////////////////////////////////////////////////////////////
585 * Functions to show various aspects of interpreter state as strings.
587 std::string
show(const php::Func
&, const Base
& b
);
588 std::string
show(const php::Func
&, const CollectedInfo::MInstrState
&);
589 std::string
show(const php::Func
&, const Iter
&);
590 std::string
property_state_string(const PropertiesInfo
&);
591 std::string
state_string(const php::Func
&, const State
&, const CollectedInfo
&);
593 //////////////////////////////////////////////////////////////////////