Index file line lengths
[hiphop-php.git] / hphp / hhbbc / interp-state.h
blobccf4e57299864c5672b693cde8b6dc88eb42f83c
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #ifndef incl_HHBBC_INTERP_STATE_H_
17 #define incl_HHBBC_INTERP_STATE_H_
19 #include <vector>
20 #include <string>
21 #include <map>
23 #include <boost/variant.hpp>
25 #include <folly/Optional.h>
27 #include "hphp/hhbbc/index.h"
28 #include "hphp/hhbbc/misc.h"
29 #include "hphp/hhbbc/type-system.h"
30 #include "hphp/hhbbc/bc.h"
32 namespace HPHP { namespace HHBBC {
34 //////////////////////////////////////////////////////////////////////
36 struct ClassAnalysis;
37 struct FuncAnalysis;
39 //////////////////////////////////////////////////////////////////////
42 * State of an iterator in the program.
44 * We track iterator liveness precisely, so if an iterator is DeadIter, its
45 * definitely dead and vice-versa. We only track "normal" iterators (non-weak,
46 * non-mutable), so iterators not of those type are considered "dead".
48 * We use this state to check if the "local iterator" optimization is valid.
49 * In this optimization, we leave the iterator base in a local slot instead of
50 * inc-ref-ing it and storing it in the iter.
52 * Local iteration is only possible if the positions of the keys of the base
53 * iterator are unchanged. We call an update that leaves these key positions
54 * unchanged a "safe" update; the only one that we can account for is updating
55 * the value of the current iteration key, as in:
57 * foreach ($base as $key => $value) {
58 * $base[$key] = $value + 1;
59 * }
61 * Finally, we also track a flag that is set whenever we see *any* update to
62 * the base (including "safe" updates). If we know the base is unchanged during
63 * iteration, we can further optimize the iterator in HHIR.
65 struct DeadIter {};
66 struct LiveIter {
67 IterTypes types;
68 // If the base came from a local, and all updates to it have been "safe",
69 // this field will be the id of that local. Otherwise, it will be NoLocalId.
70 LocalId baseLocal = NoLocalId;
71 // The local that is known to be equivalent to the current key in the iter.
72 // Used to detect "safe" updates to the base.
73 LocalId keyLocal = NoLocalId;
74 // The block id where this iterator was initialized. If there's more than one
75 // such block, it will be NoBlockId.
76 BlockId initBlock = NoBlockId;
77 // Set whenever we see any mutation, even "safe" ones that don't affect keys.
78 bool baseUpdated = false;
79 // Set whenever the base of the iterator cannot be an iterator
80 bool baseCannotBeObject = false;
82 using Iter = boost::variant<DeadIter, LiveIter>;
85 * Tag indicating what sort of thing contains the current member base.
87 * Note that if we're in an array-chain, the base location always reflects the
88 * location of the array which started the array-chain.
90 enum class BaseLoc {
91 None,
94 * Base is in a number of possible places after an Elem op. It
95 * cannot possibly be in an object property (although it certainly
96 * may alias one). See miElem for details.
98 * This is only used if its unclear if its actually in an array. If it is
99 * definitely in an array, then arrayChain will be non-empty and the base
100 * location will reflect where the array is located.
102 Elem,
105 * Base is in possible locations after a Prop op. This means it
106 * possibly lives in a property on an object, but possibly not
107 * (e.g. it could be a null in tvScratch). See miProp for
108 * details.
110 * If it is definitely known to be a property in an object, the
111 * locTy in the Base will be a subtype of TObj.
113 Prop,
116 * Known to be a static property on an object. This is only
117 * possible as an initial base.
119 StaticProp,
122 * Known to be contained in the current frame as a local, as the
123 * frame $this, by the evaluation stack, or inside $GLOBALS. Only
124 * possible as initial bases.
126 Local,
127 This,
128 Stack,
129 Global,
133 * Information about the current member base's type and location.
135 struct Base {
136 explicit Base(Type type = {},
137 BaseLoc loc = BaseLoc::None,
138 Type locTy = {},
139 SString locName = {},
140 LocalId locLocal = NoLocalId,
141 uint32_t locSlot = 0)
142 : type{std::move(type)}
143 , loc{loc}
144 , locTy{std::move(locTy)}
145 , locName{locName}
146 , locLocal{locLocal}
147 , locSlot{locSlot} {}
149 Type type;
150 BaseLoc loc;
153 * We also need to track effects of intermediate dims on the type of the base.
154 * So we have a type, name, and possibly associated local or stack slot for
155 * the base's container.
157 * For StaticProp, locName is the name of the property if known, or nullptr,
158 * and locTy is the type of the class containing the static property.
160 * For Prop, locName is the name of the property if it was known, and locTy
161 * gives as much information about the object type it is in. (If we actually
162 * *know* it is in an object, locTy will be a subtype of TObj.)
164 * For Local, locName is the name of the local if known, or nullptr, and
165 * locLocal is the LocalId corresponding to the local (or NoLocalId if not
166 * known).
168 * For Stack, locSlot is the stack index of the corresponding stack slot.
170 Type locTy;
171 SString locName;
172 LocalId locLocal;
173 uint32_t locSlot;
176 // An element on the eval stack
177 struct StackElem {
178 static auto constexpr NoId = std::numeric_limits<uint32_t>::max();
180 Type type;
181 // A location which is known to have an equivalent value to this
182 // stack value. This could be a valid LocalId, the special value
183 // StackDupId to indicate that its equivalent to the stack element
184 // below it, the special value StackThisId to indicate that its the
185 // value of $this, or NoLocalId if it has no known equivalents.
186 // Note that the location may not match the stack value wrt Uninit.
187 LocalId equivLoc;
188 uint32_t index;
189 uint32_t id;
192 struct InterpStack {
193 private:
194 template<typename S>
195 struct Iterator {
196 friend struct InterpStack;
197 Iterator(S* owner, uint32_t idx) :
198 owner(owner), idx(idx) {
199 assertx(idx <= owner->size());
201 void operator++() {
202 assertx(idx < owner->size());
203 ++idx;
205 void operator--() {
206 assertx(idx);
207 --idx;
209 Iterator operator-(ssize_t off) {
210 return Iterator(owner, idx - off);
212 Iterator operator+(ssize_t off) {
213 return Iterator(owner, idx + off);
215 auto& operator*() const {
216 assertx(idx < owner->index.size());
217 return owner->elems[owner->index[idx]];
219 auto* operator->() const {
220 return &operator*();
222 // very special helper for use with Add*ElemC. To prevent
223 // quadratic time/space when appending to an array, we need to
224 // ensure we're appending to an array with a single reference; but
225 // popping from an InterpStack doesn't actually remove the
226 // element. This allows us to drop the type's datatag. It means
227 // that rewinding wouldn't see the original type; but Add*ElemC is
228 // very carefully coded anyway.
229 auto unspecialize() const {
230 auto t = std::move((*this)->type);
231 (*this)->type = loosen_values(t);
232 return t;
234 StackElem* next_elem(ssize_t off) const {
235 const size_t i = owner->index[idx] + off;
236 if (i > owner->elems.size()) return nullptr;
237 return &owner->elems[i];
239 template<typename T>
240 bool operator==(const Iterator<T>& other) const {
241 return owner == other.owner && idx == other.idx;
243 template<typename T>
244 bool operator!=(const Iterator<T>& other) const {
245 return !operator==(other);
247 template<typename T>
248 const Iterator& operator=(const Iterator<T>& other) const {
249 owner = other.owner;
250 idx = other.idx;
251 return *this;
253 private:
254 S* owner;
255 uint32_t idx;
257 public:
258 using iterator = Iterator<InterpStack>;
259 using const_iterator = Iterator<const InterpStack>;
260 auto begin() { return iterator(this, 0); }
261 auto end() { return iterator(this, index.size()); }
262 auto begin() const { return const_iterator(this, 0); }
263 auto end() const { return const_iterator(this, index.size()); }
264 auto& operator[](size_t idx) { return *iterator(this, idx); }
265 auto& operator[](size_t idx) const { return *const_iterator(this, idx); }
266 auto& back() { return *iterator(this, index.size() - 1); }
267 auto& back() const { return *const_iterator(this, index.size() - 1); }
268 void push_elem(const StackElem& elm) {
269 assertx(elm.index == index.size());
270 index.push_back(elems.size());
271 elems.push_back(elm);
273 void push_elem(StackElem&& elm) {
274 assertx(elm.index == index.size());
275 index.push_back(elems.size());
276 elems.push_back(std::move(elm));
278 void push_elem(const Type& t, LocalId equivLoc,
279 uint32_t id = StackElem::NoId) {
280 uint32_t isize = index.size();
281 push_elem({t, equivLoc, isize, id});
283 void push_elem(Type&& t, LocalId equivLoc, uint32_t id = StackElem::NoId) {
284 uint32_t isize = index.size();
285 push_elem({std::move(t), equivLoc, isize, id});
287 void pop_elem() {
288 index.pop_back();
290 void erase(iterator i1, iterator i2) {
291 assertx(i1.owner == i2.owner);
292 assertx(i1.idx < i2.idx);
293 i1.owner->index.erase(i1.owner->index.begin() + i1.idx,
294 i1.owner->index.begin() + i2.idx);
296 bool empty() const { return index.empty(); }
297 size_t size() const { return index.size(); }
298 void clear() {
299 index.clear();
300 elems.clear();
302 void compact() {
303 uint32_t i = 0;
304 for (auto& ix : index) {
305 if (ix != i) {
306 assertx(ix > i);
307 std::swap(elems[i], elems[ix]);
308 ix = i;
310 ++i;
312 elems.resize(i);
314 // rewind the stack to the state it was in before the last
315 // instruction ran (which is known to have popped numPop items and
316 // pushed numPush items).
317 void rewind(int numPop, int numPush);
318 void peek(int numPop, const StackElem** values, int numPush) const;
319 void kill(int numPop, int numPush, uint32_t id);
320 void insert_after(int numPop, int numPush, const Type* types,
321 uint32_t numInst, uint32_t id);
322 private:
323 void refill(size_t elemIx, size_t indexLow, int numPop, int numPush);
324 CompactVector<uint32_t> index;
325 CompactVector<StackElem> elems;
329 * A program state at a position in a php::Block.
331 * The `initialized' flag indicates whether the state knows anything. All
332 * other fields are invalid if a state is not initialized, and notably, after
333 * all analysis has run, any blocks that still don't have initialized input
334 * states are not reachable.
336 * The `unreachable' flag means we've produced this state from analysis, but
337 * the program cannot reach this program position. This flag has two uses:
339 * o It allows us to determine arbitrary mid-block positions where code
340 * becomes unreachable, and eliminate that code in optimize.cpp.
342 * o We may still do abstract interpretation of the unreachable code, but
343 * this flag is used when merging states to allow the interpreter to
344 * analyze blocks that are unreachable without pessimizing states for
345 * reachable blocks that would've been their successors.
347 * TODO: having the interpreter visit blocks when they are unreachable still
348 * potentially merges types into object properties that aren't possible at
349 * runtime.
351 * We split off a base class from State as a convenience to enable the use
352 * default copy construction and assignment.
355 struct StateBase {
356 StateBase() {
357 initialized = unreachable = false;
359 StateBase(const StateBase&) = default;
360 StateBase(StateBase&&) = default;
361 StateBase& operator=(const StateBase&) = default;
362 StateBase& operator=(StateBase&&) = default;
364 uint8_t initialized : 1;
365 uint8_t unreachable : 1;
367 LocalId thisLoc = NoLocalId;
368 Type thisType;
369 CompactVector<Type> locals;
370 CompactVector<Iter> iters;
373 * Mapping of a local to other locals which are known to have
374 * equivalent values. This equivalence ignores Uninit; users should
375 * compare types if they care.
377 CompactVector<LocalId> equivLocals;
380 struct State : StateBase {
381 State() = default;
382 State(const State&) = default;
383 State(State&&) = default;
385 enum class Compact {};
386 State(const State& src, Compact) : StateBase(src) {
387 for (auto const& elm : src.stack) {
388 stack.push_elem(elm.type, elm.equivLoc);
392 // delete assignment operator, so we have to explicitly choose what
393 // we want to do from amongst the various copies.
394 State& operator=(const State&) = delete;
395 State& operator=(State&&) = delete;
397 void copy_from(const State& src) {
398 *static_cast<StateBase*>(this) = src;
399 stack = src.stack;
402 void copy_from(State&& src) {
403 *static_cast<StateBase*>(this) = std::move(src);
404 stack = std::move(src.stack);
407 void copy_and_compact(const State& src) {
408 *static_cast<StateBase*>(this) = src;
409 stack.clear();
410 for (auto const& elm : src.stack) {
411 stack.push_elem(elm.type, elm.equivLoc);
415 void swap(State& other) {
416 std::swap(static_cast<StateBase&>(*this), static_cast<StateBase&>(other));
417 std::swap(stack, other.stack);
420 InterpStack stack;
424 * Return a copy of a State without copying the evaluation stack, pushing
425 * Throwable on the stack.
427 State with_throwable_only(const Index& env, const State&);
429 //////////////////////////////////////////////////////////////////////
432 * PropertiesInfo packages the PropState for private instance and
433 * static properties, which is cross-block information collected in
434 * CollectedInfo.
436 * During analysis the ClassAnalysis* is available and the PropState is
437 * retrieved from there. However during optimization the ClassAnalysis is
438 * not available and the PropState has to be retrieved off the Class in
439 * the Index. In that case cls is nullptr and the PropState fields are
440 * populated.
442 struct PropertiesInfo {
443 PropertiesInfo(const Index&, Context, ClassAnalysis*);
445 PropState& privateProperties();
446 PropState& privateStatics();
447 const PropState& privateProperties() const;
448 const PropState& privateStatics() const;
450 void setBadPropInitialValues();
452 private:
453 ClassAnalysis* const m_cls;
454 PropState m_privateProperties;
455 PropState m_privateStatics;
458 //////////////////////////////////////////////////////////////////////
461 * Map from closure classes to types for each of their used vars.
462 * Shows up in a few different interpreter structures.
464 using ClosureUseVarMap = hphp_fast_map<
465 php::Class*,
466 CompactVector<Type>
470 * Merge the types in the vector as possible use vars for the closure
471 * `clo' into the destination map.
473 void merge_closure_use_vars_into(ClosureUseVarMap& dst,
474 php::Class* clo,
475 CompactVector<Type>);
477 //////////////////////////////////////////////////////////////////////
479 enum class CollectionOpts {
480 Speculating = 1,
481 Inlining = 2,
482 EffectFreeOnly = 4,
483 Optimizing = 8,
486 inline CollectionOpts operator|(CollectionOpts o1, CollectionOpts o2) {
487 return static_cast<CollectionOpts>(
488 static_cast<int>(o1) | static_cast<int>(o2)
492 inline CollectionOpts operator&(CollectionOpts o1, CollectionOpts o2) {
493 return static_cast<CollectionOpts>(
494 static_cast<int>(o1) & static_cast<int>(o2)
498 inline CollectionOpts operator-(CollectionOpts o1, CollectionOpts o2) {
499 return static_cast<CollectionOpts>(
500 static_cast<int>(o1) & ~static_cast<int>(o2)
504 inline bool any(CollectionOpts o) { return static_cast<int>(o); }
507 * Area used for writing down any information that is collected across
508 * a series of step operations (possibly cross block).
510 struct CollectedInfo {
511 explicit CollectedInfo(const Index& index,
512 Context ctx,
513 ClassAnalysis* cls,
514 CollectionOpts opts,
515 const FuncAnalysis* fa = nullptr);
517 ClosureUseVarMap closureUseTypes;
518 PropertiesInfo props;
519 hphp_fast_set<std::pair<const php::Func*, BlockId>>
520 unfoldableFuncs;
521 bool mayUseVV{false};
522 bool effectFree{true};
523 bool hasInvariantIterBase{false};
524 CollectionOpts opts{};
525 bool (*propagate_constants)(const Bytecode& bc, State& state,
526 BytecodeVec& out) = nullptr;
528 * See FuncAnalysisResult for details.
530 std::bitset<64> usedParams;
532 PublicSPropMutations publicSPropMutations;
534 struct MInstrState {
536 * The current member base. Updated as we move through bytecodes
537 * representing the operation.
539 Base base{};
541 bool noThrow{false};
542 bool extraPop{false};
545 * Chains of member operations on array elements will affect the type of
546 * something further back in the member instruction. This vector tracks the
547 * base,key type pair that was used at each stage. See
548 * interp-minstr.cpp:resolveArrayChain().
550 struct ArrayChainEnt {
551 Type base;
552 Type key;
553 LocalId keyLoc;
555 using ArrayChain = CompactVector<ArrayChainEnt>;
556 ArrayChain arrayChain;
558 void clear() {
559 base.loc = BaseLoc::None;
560 arrayChain.clear();
563 MInstrState mInstrState;
566 //////////////////////////////////////////////////////////////////////
569 * State merging functions, based on the union_of operation for types.
571 * These return true if the destination state changed.
573 bool merge_into(State&, const State&);
576 * State merging functions, based on the widening_union operation.
577 * See analyze.cpp for details on when this is needed.
579 bool widen_into(State&, const State&);
580 void widen_props(PropState&);
582 //////////////////////////////////////////////////////////////////////
585 * Functions to show various aspects of interpreter state as strings.
587 std::string show(const php::Func&, const Base& b);
588 std::string show(const php::Func&, const CollectedInfo::MInstrState&);
589 std::string show(const php::Func&, const Iter&);
590 std::string property_state_string(const PropertiesInfo&);
591 std::string state_string(const php::Func&, const State&, const CollectedInfo&);
593 //////////////////////////////////////////////////////////////////////
596 #endif