Optimize minstr sequences in interp
[hiphop-php.git] / hphp / hhbbc / interp-state.h
blobc71ef3c7c6ec710399b4162cee3c66ae7d676216
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #ifndef incl_HHBBC_INTERP_STATE_H_
17 #define incl_HHBBC_INTERP_STATE_H_
19 #include <vector>
20 #include <string>
21 #include <map>
23 #include <boost/variant.hpp>
25 #include <folly/Optional.h>
27 #include "hphp/hhbbc/index.h"
28 #include "hphp/hhbbc/misc.h"
29 #include "hphp/hhbbc/type-system.h"
30 #include "hphp/hhbbc/bc.h"
32 namespace HPHP { namespace HHBBC {
34 //////////////////////////////////////////////////////////////////////
36 struct ClassAnalysis;
37 struct FuncAnalysis;
39 //////////////////////////////////////////////////////////////////////
42 * Types of a FPI regions. (What sort of function call is being
43 * made.)
45 enum class FPIKind {
46 Unknown, // Nothing is known.
47 CallableArr, // May be an ObjMeth or a ClsMeth.
48 Func, // Definitely a non-member function.
49 Ctor, // Definitely a constructor for an object.
50 ObjMeth, // Definitely a method on an object (possibly __call).
51 ObjMethNS, // ObjMeth, but allows obj to be null.
52 ClsMeth, // Definitely a static method on a class.
53 ObjInvoke, // Closure invoke or __invoke on an object.
54 Builtin, // Resolved builtin call; we will convert params and FCall as
55 // we go
59 * Information about a pre-live ActRec. Part of state tracked in
60 * State.
62 struct ActRec {
63 explicit ActRec(FPIKind kind,
64 Type calledOn,
65 folly::Optional<res::Class> c = folly::none,
66 folly::Optional<res::Func> f = folly::none,
67 folly::Optional<res::Func> f2 = folly::none)
68 : kind(kind)
69 , cls(std::move(c))
70 , func(std::move(f))
71 , fallbackFunc(std::move(f2))
72 , context(std::move(calledOn))
75 FPIKind kind;
76 bool foldable{false};
77 BlockId pushBlk{NoBlockId};
78 folly::Optional<res::Class> cls;
79 folly::Optional<res::Func> func;
80 // Possible fallback func if we cannot determine which will be called.
81 folly::Optional<res::Func> fallbackFunc;
82 // isCtx of context is whether it matches caller's context
83 Type context;
87 * State of an iterator in the program.
89 * We track iterator liveness precisely, so if an iterator is DeadIter, its
90 * definitely dead and vice-versa. We only track "normal" iterators (non-weak,
91 * non-mutable), so iterators not of those type are considered "dead".
93 struct DeadIter {};
94 struct LiveIter {
95 IterTypes types;
96 // The local that an iterator was initialized with (and which has not been
97 // changed since).
98 LocalId baseLocal = NoLocalId;
99 // The local that is known to be equivalent to the current key in the
100 // iterator. Used to detect "safe" writes.
101 LocalId keyLocal = NoLocalId;
102 // The block id where this iterator was initialized. If there's more than one
103 // such block, NoBlockId.
104 BlockId initBlock = NoBlockId;
106 using Iter = boost::variant<DeadIter, LiveIter>;
109 * Tag indicating what sort of thing contains the current member base.
111 * The base is always the unboxed version of the type, and its location could be
112 * inside of a Ref. So, for example, a base with BaseLoc::Frame could be located
113 * inside of a Ref that is pointed to by the Frame. (We may want to distinguish
114 * these two cases at some point if we start trying to track real information
115 * about Refs, but not yet.)
117 * Note that if we're in an array-chain, the base location always reflects the
118 * location of the array which started the array-chain.
120 enum class BaseLoc {
121 None,
124 * Base is in a number of possible places after an Elem op. It
125 * cannot possibly be in an object property (although it certainly
126 * may alias one). See miElem for details.
128 * This is only used if its unclear if its actually in an array. If it is
129 * definitely in an array, then arrayChain will be non-empty and the base
130 * location will reflect where the array is located.
132 Elem,
135 * Base is in possible locations after a Prop op. This means it
136 * possibly lives in a property on an object, but possibly not
137 * (e.g. it could be a null in tvScratch). See miProp for
138 * details.
140 * If it is definitely known to be a property in an object, the
141 * locTy in the Base will be a subtype of TObj.
143 Prop,
146 * Known to be a static property on an object. This is only
147 * possible as an initial base.
149 StaticProp,
152 * Known to be contained in the current frame as a local, as the
153 * frame $this, by the evaluation stack, or inside $GLOBALS. Only
154 * possible as initial bases.
156 Local,
157 This,
158 Stack,
159 Global,
163 * Information about the current member base's type and location.
165 struct Base {
166 explicit Base(Type type = {},
167 BaseLoc loc = BaseLoc::None,
168 Type locTy = {},
169 SString locName = {},
170 LocalId locLocal = NoLocalId,
171 uint32_t locSlot = 0)
172 : type{std::move(type)}
173 , loc{loc}
174 , locTy{std::move(locTy)}
175 , locName{locName}
176 , locLocal{locLocal}
177 , locSlot{locSlot} {}
179 Type type;
180 BaseLoc loc;
183 * We also need to track effects of intermediate dims on the type of the base.
184 * So we have a type, name, and possibly associated local or stack slot for
185 * the base's container.
187 * For StaticProp, locName is the name of the property if known, or nullptr,
188 * and locTy is the type of the class containing the static property.
190 * For Prop, locName is the name of the property if it was known, and locTy
191 * gives as much information about the object type it is in. (If we actually
192 * *know* it is in an object, locTy will be a subtype of TObj.)
194 * For Local, locName is the name of the local if known, or nullptr, and
195 * locLocal is the LocalId corresponding to the local (or NoLocalId if not
196 * known).
198 * For Stack, locSlot is the stack index of the corresponding stack slot.
200 Type locTy;
201 SString locName;
202 LocalId locLocal;
203 uint32_t locSlot;
206 // An element on the eval stack
207 struct StackElem {
208 static auto constexpr NoId = std::numeric_limits<uint32_t>::max();
210 Type type;
211 // A location which is known to have an equivalent value to this
212 // stack value. This could be a valid LocalId, the special value
213 // StackDupId to indicate that its equivalent to the stack element
214 // below it, the special value StackThisId to indicate that its the
215 // value of $this, or NoLocalId if it has no known equivalents.
216 // Note that the location may not match the stack value wrt Uninit.
217 LocalId equivLoc;
218 uint32_t index;
219 uint32_t id;
222 struct InterpStack {
223 private:
224 template<typename S>
225 struct Iterator {
226 friend struct InterpStack;
227 Iterator(S* owner, uint32_t idx) :
228 owner(owner), idx(idx) {
229 assertx(idx <= owner->size());
231 void operator++() {
232 assertx(idx < owner->size());
233 ++idx;
235 void operator--() {
236 assertx(idx);
237 --idx;
239 Iterator operator-(ssize_t off) {
240 return Iterator(owner, idx - off);
242 Iterator operator+(ssize_t off) {
243 return Iterator(owner, idx + off);
245 auto& operator*() const {
246 assertx(idx < owner->index.size());
247 return owner->elems[owner->index[idx]];
249 auto* operator->() const {
250 return &operator*();
252 // very special helper for use with Add*ElemC. To prevent
253 // quadratic time/space when appending to an array, we need to
254 // ensure we're appending to an array with a single reference; but
255 // popping from an InterpStack doesn't actually remove the
256 // element. This allows us to drop the type's datatag. It means
257 // that rewinding wouldn't see the original type; but Add*ElemC is
258 // very carefully coded anyway.
259 auto unspecialize() const {
260 auto t = std::move((*this)->type);
261 (*this)->type = loosen_values(t);
262 return t;
264 StackElem* next_elem(ssize_t off) const {
265 const size_t i = owner->index[idx] + off;
266 if (i > owner->elems.size()) return nullptr;
267 return &owner->elems[i];
269 template<typename T>
270 bool operator==(const Iterator<T>& other) const {
271 return owner == other.owner && idx == other.idx;
273 template<typename T>
274 bool operator!=(const Iterator<T>& other) const {
275 return !operator==(other);
277 template<typename T>
278 const Iterator& operator=(const Iterator<T>& other) const {
279 owner = other.owner;
280 idx = other.idx;
281 return *this;
283 private:
284 S* owner;
285 uint32_t idx;
287 public:
288 using iterator = Iterator<InterpStack>;
289 using const_iterator = Iterator<const InterpStack>;
290 auto begin() { return iterator(this, 0); }
291 auto end() { return iterator(this, index.size()); }
292 auto begin() const { return const_iterator(this, 0); }
293 auto end() const { return const_iterator(this, index.size()); }
294 auto& operator[](size_t idx) { return *iterator(this, idx); }
295 auto& operator[](size_t idx) const { return *const_iterator(this, idx); }
296 auto& back() { return *iterator(this, index.size() - 1); }
297 auto& back() const { return *const_iterator(this, index.size() - 1); }
298 void push_elem(const StackElem& elm) {
299 assertx(elm.index == index.size());
300 index.push_back(elems.size());
301 elems.push_back(elm);
303 void push_elem(StackElem&& elm) {
304 assertx(elm.index == index.size());
305 index.push_back(elems.size());
306 elems.push_back(std::move(elm));
308 void push_elem(const Type& t, LocalId equivLoc,
309 uint32_t id = StackElem::NoId) {
310 uint32_t isize = index.size();
311 push_elem({t, equivLoc, isize, id});
313 void push_elem(Type&& t, LocalId equivLoc, uint32_t id = StackElem::NoId) {
314 uint32_t isize = index.size();
315 push_elem({std::move(t), equivLoc, isize, id});
317 void pop_elem() {
318 index.pop_back();
320 void erase(iterator i1, iterator i2) {
321 assertx(i1.owner == i2.owner);
322 assertx(i1.idx < i2.idx);
323 i1.owner->index.erase(i1.owner->index.begin() + i1.idx,
324 i1.owner->index.begin() + i2.idx);
326 bool empty() const { return index.empty(); }
327 size_t size() const { return index.size(); }
328 void clear() {
329 index.clear();
330 elems.clear();
332 void compact() {
333 uint32_t i = 0;
334 for (auto& ix : index) {
335 if (ix != i) {
336 assertx(ix > i);
337 std::swap(elems[i], elems[ix]);
338 ix = i;
340 ++i;
342 elems.resize(i);
344 // rewind the stack to the state it was in before the last
345 // instruction ran (which is known to have popped numPop items and
346 // pushed numPush items).
347 void rewind(int numPop, int numPush);
348 void peek(int numPop, const StackElem** values, int numPush) const;
349 void kill(int numPop, int numPush, uint32_t id);
350 void insert_after(int numPop, int numPush, const Type* types,
351 uint32_t numInst, uint32_t id);
352 private:
353 void refill(size_t elemIx, size_t indexLow, int numPop, int numPush);
354 CompactVector<uint32_t> index;
355 CompactVector<StackElem> elems;
359 * A program state at a position in a php::Block.
361 * The `initialized' flag indicates whether the state knows anything. All
362 * other fields are invalid if a state is not initialized, and notably, after
363 * all analysis has run, any blocks that still don't have initialized input
364 * states are not reachable.
366 * The `unreachable' flag means we've produced this state from analysis, but
367 * the program cannot reach this program position. This flag has two uses:
369 * o It allows us to determine arbitrary mid-block positions where code
370 * becomes unreachable, and eliminate that code in optimize.cpp.
372 * o HHBC invariants can complicate removing unreachable code in FPI
373 * regions---see the rules in bytecode.specification. Inside FPI regions,
374 * we still do abstract interpretation of the unreachable code, but this
375 * flag is used when merging states to allow the interpreter to analyze
376 * blocks that are unreachable without pessimizing states for reachable
377 * blocks that would've been their successors.
379 * One other note: having the interpreter visit blocks when they are
380 * unreachable still potentially merges types into object properties that
381 * aren't possible at runtime. We're only doing this to handle FPI regions for
382 * now, but it's not ideal.
384 * We split off a base class from State as a convenience to enable the use
385 * default copy construction and assignment.
388 struct StateBase {
389 StateBase() {
390 initialized = unreachable = false;
392 StateBase(const StateBase&) = default;
393 StateBase(StateBase&&) = default;
394 StateBase& operator=(const StateBase&) = default;
395 StateBase& operator=(StateBase&&) = default;
397 uint8_t initialized : 1;
398 uint8_t unreachable : 1;
400 LocalId thisLoc = NoLocalId;
401 Type thisType;
402 CompactVector<Type> locals;
403 CompactVector<Iter> iters;
404 CompactVector<Type> clsRefSlots;
405 CompactVector<ActRec> fpiStack;
408 * Mapping of a local to other locals which are known to have
409 * equivalent values. This equivalence ignores Uninit; users should
410 * compare types if they care.
412 CompactVector<LocalId> equivLocals;
415 struct State : StateBase {
416 State() = default;
417 State(const State&) = default;
418 State(State&&) = default;
420 enum class Compact {};
421 State(const State& src, Compact) : StateBase(src) {
422 for (auto const& elm : src.stack) {
423 stack.push_elem(elm.type, elm.equivLoc);
427 // delete assignment operator, so we have to explicitly choose what
428 // we want to do from amongst the various copies.
429 State& operator=(const State&) = delete;
430 State& operator=(State&&) = delete;
432 void copy_from(const State& src) {
433 *static_cast<StateBase*>(this) = src;
434 stack = src.stack;
437 void copy_from(State&& src) {
438 *static_cast<StateBase*>(this) = std::move(src);
439 stack = std::move(src.stack);
442 void copy_and_compact(const State& src) {
443 *static_cast<StateBase*>(this) = src;
444 stack.clear();
445 for (auto const& elm : src.stack) {
446 stack.push_elem(elm.type, elm.equivLoc);
450 void swap(State& other) {
451 std::swap(static_cast<StateBase&>(*this), static_cast<StateBase&>(other));
452 std::swap(stack, other.stack);
455 InterpStack stack;
459 * States are EqualityComparable (provided they are in-states for the
460 * same block).
462 bool operator==(const ActRec&, const ActRec&);
463 bool operator!=(const ActRec&, const ActRec&);
466 * Return a copy of a State without copying either the evaluation
467 * stack or FPI stack, pushing Throwable on the stack.
469 State with_throwable_only(const Index& env, const State&);
471 //////////////////////////////////////////////////////////////////////
474 * PropertiesInfo packages the PropState for private instance and
475 * static properties, which is cross-block information collected in
476 * CollectedInfo.
478 * During analysis the ClassAnalysis* is available and the PropState is
479 * retrieved from there. However during optimization the ClassAnalysis is
480 * not available and the PropState has to be retrieved off the Class in
481 * the Index. In that case cls is nullptr and the PropState fields are
482 * populated.
484 struct PropertiesInfo {
485 PropertiesInfo(const Index&, Context, ClassAnalysis*);
487 PropState& privateProperties();
488 PropState& privateStatics();
489 const PropState& privateProperties() const;
490 const PropState& privateStatics() const;
492 void setBadPropInitialValues();
494 private:
495 ClassAnalysis* const m_cls;
496 PropState m_privateProperties;
497 PropState m_privateStatics;
500 //////////////////////////////////////////////////////////////////////
503 * Map from closure classes to types for each of their used vars.
504 * Shows up in a few different interpreter structures.
506 using ClosureUseVarMap = hphp_fast_map<
507 php::Class*,
508 CompactVector<Type>
512 * Merge the types in the vector as possible use vars for the closure
513 * `clo' into the destination map.
515 void merge_closure_use_vars_into(ClosureUseVarMap& dst,
516 php::Class* clo,
517 CompactVector<Type>);
519 //////////////////////////////////////////////////////////////////////
521 enum class CollectionOpts {
522 Speculating = 1,
523 Inlining = 2,
524 EffectFreeOnly = 4,
525 Optimizing = 8,
528 inline CollectionOpts operator|(CollectionOpts o1, CollectionOpts o2) {
529 return static_cast<CollectionOpts>(
530 static_cast<int>(o1) | static_cast<int>(o2)
534 inline CollectionOpts operator&(CollectionOpts o1, CollectionOpts o2) {
535 return static_cast<CollectionOpts>(
536 static_cast<int>(o1) & static_cast<int>(o2)
540 inline CollectionOpts operator-(CollectionOpts o1, CollectionOpts o2) {
541 return static_cast<CollectionOpts>(
542 static_cast<int>(o1) & ~static_cast<int>(o2)
546 inline bool any(CollectionOpts o) { return static_cast<int>(o); }
549 * Area used for writing down any information that is collected across
550 * a series of step operations (possibly cross block).
552 struct CollectedInfo {
553 explicit CollectedInfo(const Index& index,
554 Context ctx,
555 ClassAnalysis* cls,
556 CollectionOpts opts,
557 const FuncAnalysis* fa = nullptr);
559 ClosureUseVarMap closureUseTypes;
560 PropertiesInfo props;
561 ConstantMap cnsMap;
562 hphp_fast_set<std::pair<const php::Func*, BlockId>>
563 unfoldableFuncs;
564 bool mayUseVV{false};
565 bool effectFree{true};
566 bool hasInvariantIterBase{false};
567 bool readsUntrackedConstants{false};
568 CollectionOpts opts{};
569 bool (*propagate_constants)(const Bytecode& bc, State& state,
570 BytecodeVec& out) = nullptr;
572 * See FuncAnalysisResult for details.
574 std::bitset<64> usedParams;
576 PublicSPropMutations publicSPropMutations;
578 struct MInstrState {
580 * The current member base. Updated as we move through bytecodes
581 * representing the operation.
583 Base base{};
585 bool noThrow{false};
586 bool extraPop{false};
589 * Chains of member operations on array elements will affect the type of
590 * something further back in the member instruction. This vector tracks the
591 * base,key type pair that was used at each stage. See
592 * interp-minstr.cpp:resolveArrayChain().
594 struct ArrayChainEnt {
595 Type base;
596 Type key;
597 LocalId keyLoc;
599 using ArrayChain = CompactVector<ArrayChainEnt>;
600 ArrayChain arrayChain;
602 void clear() {
603 base.loc = BaseLoc::None;
604 arrayChain.clear();
607 MInstrState mInstrState;
610 //////////////////////////////////////////////////////////////////////
613 * State merging functions, based on the union_of operation for types.
615 * These return true if the destination state changed.
617 bool merge_into(ActRec&, const ActRec&);
618 bool merge_into(State&, const State&);
621 * State merging functions, based on the widening_union operation.
622 * See analyze.cpp for details on when this is needed.
624 bool widen_into(State&, const State&);
625 void widen_props(PropState&);
627 //////////////////////////////////////////////////////////////////////
630 * Functions to show various aspects of interpreter state as strings.
632 std::string show(const ActRec& a);
633 std::string show(const php::Func&, const Base& b);
634 std::string show(const php::Func&, const CollectedInfo::MInstrState&);
635 std::string show(const php::Func&, const Iter&);
636 std::string property_state_string(const PropertiesInfo&);
637 std::string state_string(const php::Func&, const State&, const CollectedInfo&);
639 //////////////////////////////////////////////////////////////////////
642 #endif