Deshim VirtualExecutor in folly
[hiphop-php.git] / hphp / hhbbc / interp-state.h
blobddf76c44643bcf045ac71d81a188afddf1cff989
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #pragma once
18 #include <vector>
19 #include <string>
20 #include <map>
22 #include <boost/variant.hpp>
24 #include "hphp/hhbbc/index.h"
25 #include "hphp/hhbbc/interp.h"
26 #include "hphp/hhbbc/misc.h"
27 #include "hphp/hhbbc/type-system.h"
28 #include "hphp/hhbbc/bc.h"
30 namespace HPHP::HHBBC {
32 //////////////////////////////////////////////////////////////////////
34 struct ClassAnalysis;
35 struct FuncAnalysis;
37 struct ClsConstantWork;
39 //////////////////////////////////////////////////////////////////////
42 * State of an iterator in the program.
44 * We track iterator liveness precisely, so if an iterator is DeadIter, its
45 * definitely dead and vice-versa. We only track "normal" iterators (non-weak,
46 * non-mutable), so iterators not of those type are considered "dead".
48 * We use this state to check if the "local iterator" optimization is valid.
49 * In this optimization, we leave the iterator base in a local slot instead of
50 * inc-ref-ing it and storing it in the iter.
52 * Local iteration is only possible if the positions of the keys of the base
53 * iterator are unchanged. We call an update that leaves these key positions
54 * unchanged a "safe" update; the only one that we can account for is updating
55 * the value of the current iteration key, as in:
57 * foreach ($base as $key => $value) {
58 * $base[$key] = $value + 1;
59 * }
61 * Finally, we also track a flag that is set whenever we see *any* update to
62 * the base (including "safe" updates). If we know the base is unchanged during
63 * iteration, we can further optimize the iterator in HHIR.
65 struct DeadIter {};
66 struct LiveIter {
67 IterTypes types;
68 // If the base came from a local, and all updates to it have been "safe",
69 // this field will be the id of that local. Otherwise, it will be NoLocalId.
70 LocalId baseLocal = NoLocalId;
71 // The local that is known to be equivalent to the current key in the iter.
72 // Used to detect "safe" updates to the base.
73 LocalId keyLocal = NoLocalId;
74 // The block id where this iterator was initialized. If there's more than one
75 // such block, it will be NoBlockId.
76 BlockId initBlock = NoBlockId;
77 // Set whenever we see any mutation, even "safe" ones that don't affect keys.
78 bool baseUpdated = false;
80 using Iter = boost::variant<DeadIter, LiveIter>;
83 * Tag indicating what sort of thing contains the current member base.
85 * Note that if we're in an array-chain, the base location always reflects the
86 * location of the array which started the array-chain.
88 enum class BaseLoc {
89 None,
92 * Base is in a number of possible places after an Elem op. It
93 * cannot possibly be in an object property (although it certainly
94 * may alias one). See miElem for details.
96 * This is only used if its unclear if its actually in an array. If it is
97 * definitely in an array, then arrayChain will be non-empty and the base
98 * location will reflect where the array is located.
100 Elem,
103 * Base is in possible locations after a Prop op. This means it
104 * possibly lives in a property on an object, but possibly not
105 * (e.g. it could be a null in tvScratch). See miProp for
106 * details.
108 * If it is definitely known to be a property in an object, the
109 * locTy in the Base will be a subtype of TObj.
111 Prop,
114 * Known to be a static property on an object. This is only
115 * possible as an initial base.
117 StaticProp,
120 * Known to be contained in the current frame as a local, as the
121 * frame $this, by the evaluation stack, or inside $GLOBALS. Only
122 * possible as initial bases.
124 Local,
125 This,
126 Stack,
127 Global,
131 * Information about the current member base's type and location.
133 struct Base {
134 explicit Base(Type type = TCell,
135 BaseLoc loc = BaseLoc::None,
136 Type locTy = TCell,
137 SString locName = {},
138 LocalId locLocal = NoLocalId,
139 uint32_t locSlot = 0)
140 : type{std::move(type)}
141 , loc{loc}
142 , locTy{std::move(locTy)}
143 , locName{locName}
144 , locLocal{locLocal}
145 , locSlot{locSlot} {}
147 Type type;
148 BaseLoc loc;
151 * We also need to track effects of intermediate dims on the type of the base.
152 * So we have a type, name, and possibly associated local or stack slot for
153 * the base's container.
155 * For StaticProp, locName is the name of the property if known, or nullptr,
156 * and locTy is the type of the class containing the static property.
158 * For Prop, locName is the name of the property if it was known, and locTy
159 * gives as much information about the object type it is in. (If we actually
160 * *know* it is in an object, locTy will be a subtype of TObj.)
162 * For Local, locName is the name of the local if known, or nullptr, and
163 * locLocal is the LocalId corresponding to the local (or NoLocalId if not
164 * known).
166 * For Stack, locSlot is the stack index of the corresponding stack slot.
168 Type locTy;
169 SString locName;
170 LocalId locLocal;
171 uint32_t locSlot;
174 // An element on the eval stack
175 struct StackElem {
176 static auto constexpr NoId = std::numeric_limits<uint32_t>::max();
178 Type type;
179 // A location which is known to have an equivalent value to this
180 // stack value. This could be a valid LocalId, the special value
181 // StackDupId to indicate that its equivalent to the stack element
182 // below it, the special value StackThisId to indicate that its the
183 // value of $this, or NoLocalId if it has no known equivalents.
184 // Note that the location may not match the stack value wrt Uninit.
185 LocalId equivLoc;
186 uint32_t index;
187 uint32_t id;
190 struct InterpStack {
191 private:
192 template<typename S>
193 struct Iterator {
194 friend struct InterpStack;
195 Iterator(S* owner, uint32_t idx) :
196 owner(owner), idx(idx) {
197 assertx(idx <= owner->size());
199 void operator++() {
200 assertx(idx < owner->size());
201 ++idx;
203 void operator--() {
204 assertx(idx);
205 --idx;
207 Iterator operator-(ssize_t off) {
208 return Iterator(owner, idx - off);
210 Iterator operator+(ssize_t off) {
211 return Iterator(owner, idx + off);
213 auto& operator*() const {
214 assertx(idx < owner->index.size());
215 return owner->elems[owner->index[idx]];
217 auto* operator->() const {
218 return &operator*();
220 // very special helper for use with Add*ElemC. To prevent
221 // quadratic time/space when appending to an array, we need to
222 // ensure we're appending to an array with a single reference; but
223 // popping from an InterpStack doesn't actually remove the
224 // element. This allows us to drop the type's datatag. It means
225 // that rewinding wouldn't see the original type; but Add*ElemC is
226 // very carefully coded anyway.
227 auto unspecialize() const {
228 auto t = std::move((*this)->type);
229 (*this)->type = loosen_values(t);
230 return t;
232 StackElem* next_elem(ssize_t off) const {
233 const size_t i = owner->index[idx] + off;
234 if (i > owner->elems.size()) return nullptr;
235 return &owner->elems[i];
237 template<typename T>
238 bool operator==(const Iterator<T>& other) const {
239 return owner == other.owner && idx == other.idx;
241 template<typename T>
242 bool operator!=(const Iterator<T>& other) const {
243 return !operator==(other);
245 template<typename T>
246 const Iterator& operator=(const Iterator<T>& other) const {
247 owner = other.owner;
248 idx = other.idx;
249 return *this;
251 private:
252 S* owner;
253 uint32_t idx;
255 public:
256 using iterator = Iterator<InterpStack>;
257 using const_iterator = Iterator<const InterpStack>;
258 auto begin() { return iterator(this, 0); }
259 auto end() { return iterator(this, index.size()); }
260 auto begin() const { return const_iterator(this, 0); }
261 auto end() const { return const_iterator(this, index.size()); }
262 auto& operator[](size_t idx) { return *iterator(this, idx); }
263 auto& operator[](size_t idx) const { return *const_iterator(this, idx); }
264 auto& back() { return *iterator(this, index.size() - 1); }
265 auto& back() const { return *const_iterator(this, index.size() - 1); }
266 void push_elem(const StackElem& elm) {
267 assertx(elm.index == index.size());
268 index.push_back(elems.size());
269 elems.push_back(elm);
271 void push_elem(StackElem&& elm) {
272 assertx(elm.index == index.size());
273 index.push_back(elems.size());
274 elems.push_back(std::move(elm));
276 void push_elem(const Type& t, LocalId equivLoc,
277 uint32_t id = StackElem::NoId) {
278 uint32_t isize = index.size();
279 push_elem({t, equivLoc, isize, id});
281 void push_elem(Type&& t, LocalId equivLoc, uint32_t id = StackElem::NoId) {
282 uint32_t isize = index.size();
283 push_elem({std::move(t), equivLoc, isize, id});
285 void pop_elem() {
286 index.pop_back();
288 void erase(iterator i1, iterator i2) {
289 assertx(i1.owner == i2.owner);
290 assertx(i1.idx < i2.idx);
291 i1.owner->index.erase(i1.owner->index.begin() + i1.idx,
292 i1.owner->index.begin() + i2.idx);
294 bool empty() const { return index.empty(); }
295 size_t size() const { return index.size(); }
296 void clear() {
297 index.clear();
298 elems.clear();
300 void compact() {
301 uint32_t i = 0;
302 for (auto& ix : index) {
303 if (ix != i) {
304 assertx(ix > i);
305 std::swap(elems[i], elems[ix]);
306 ix = i;
308 ++i;
310 elems.resize(i);
312 // rewind the stack to the state it was in before the last
313 // instruction ran (which is known to have popped numPop items and
314 // pushed numPush items).
315 void rewind(int numPop, int numPush);
316 void peek(int numPop, const StackElem** values, int numPush) const;
317 void kill(int numPop, int numPush, uint32_t id);
318 void insert_after(int numPop, int numPush, const Type* types,
319 uint32_t numInst, uint32_t id);
320 private:
321 void refill(size_t elemIx, size_t indexLow, int numPop, int numPush);
322 CompactVector<uint32_t> index;
323 CompactVector<StackElem> elems;
327 * A program state at a position in a php::Block.
329 * The `initialized' flag indicates whether the state knows anything. All
330 * other fields are invalid if a state is not initialized, and notably, after
331 * all analysis has run, any blocks that still don't have initialized input
332 * states are not reachable.
334 * The `unreachable' flag means we've produced this state from analysis, but
335 * the program cannot reach this program position. This flag has two uses:
337 * o It allows us to determine arbitrary mid-block positions where code
338 * becomes unreachable, and eliminate that code in optimize.cpp.
340 * o We may still do abstract interpretation of the unreachable code, but
341 * this flag is used when merging states to allow the interpreter to
342 * analyze blocks that are unreachable without pessimizing states for
343 * reachable blocks that would've been their successors.
345 * TODO: having the interpreter visit blocks when they are unreachable still
346 * potentially merges types into object properties that aren't possible at
347 * runtime.
349 * We split off a base class from State as a convenience to enable the use
350 * default copy construction and assignment.
353 struct StateBase {
354 StateBase() {
355 initialized = unreachable = false;
357 StateBase(const StateBase&) = default;
358 StateBase(StateBase&&) = default;
359 StateBase& operator=(const StateBase&) = default;
360 StateBase& operator=(StateBase&&) = default;
362 uint8_t initialized : 1;
363 uint8_t unreachable : 1;
365 LocalId thisLoc = NoLocalId;
366 Type thisType;
367 CompactVector<Type> locals;
368 CompactVector<Iter> iters;
371 * Mapping of a local to other locals which are known to have
372 * equivalent values. This equivalence ignores Uninit; users should
373 * compare types if they care.
375 CompactVector<LocalId> equivLocals;
378 struct State : StateBase {
379 State() = default;
380 State(const State&) = default;
381 State(State&&) = default;
383 enum class Compact {};
384 State(const State& src, Compact) : StateBase(src) {
385 for (auto const& elm : src.stack) {
386 stack.push_elem(elm.type, elm.equivLoc);
390 // delete assignment operator, so we have to explicitly choose what
391 // we want to do from amongst the various copies.
392 State& operator=(const State&) = delete;
393 State& operator=(State&&) = delete;
395 void copy_from(const State& src) {
396 *static_cast<StateBase*>(this) = src;
397 stack = src.stack;
400 void copy_from(State&& src) {
401 *static_cast<StateBase*>(this) = std::move(src);
402 stack = std::move(src.stack);
405 void copy_and_compact(const State& src) {
406 *static_cast<StateBase*>(this) = src;
407 stack.clear();
408 for (auto const& elm : src.stack) {
409 stack.push_elem(elm.type, elm.equivLoc);
413 void swap(State& other) {
414 std::swap(static_cast<StateBase&>(*this), static_cast<StateBase&>(other));
415 std::swap(stack, other.stack);
418 InterpStack stack;
422 * Return a copy of a State without copying the evaluation stack, pushing
423 * Throwable on the stack.
425 State with_throwable_only(const IIndex& env, const State&);
427 //////////////////////////////////////////////////////////////////////
430 * Undo log for mutations to the interp state. If mutating state, this
431 * records enough information to undo that mutation and restore the
432 * state to the previous values.
434 struct StateMutationUndo {
435 // Marks an instruction boundary, along with the flags during
436 // interp.
437 struct Mark {
438 bool wasPEI = true;
439 bool unreachable = false;
440 decltype(StepFlags::mayReadLocalSet) mayReadLocalSet;
442 // Push to the stack (undone by a pop)
443 struct Push {};
444 // Pop from the stack (undone by pushing the recorded type)
445 struct Pop { Type t; };
446 // Location modification (undone by changing the local slot to the
447 // recorded type)
448 struct Local { LocalId id; Type t; };
449 // Stack modification (undone by changing the stack slot to the
450 // recorded type).
451 struct Stack { size_t idx; Type t; };
452 using Events = boost::variant<Push, Pop, Local, Stack, Mark>;
454 std::vector<Events> events;
456 void onPush() { events.emplace_back(Push{}); }
457 void onPop(Type old) { events.emplace_back(Pop{ std::move(old) }); }
458 void onStackWrite(size_t idx, Type old) {
459 events.emplace_back(Stack{ idx, std::move(old) });
461 void onLocalWrite(LocalId l, Type old) {
462 events.emplace_back(Local{ l, std::move(old) });
466 //////////////////////////////////////////////////////////////////////
469 * PropertiesInfo packages the PropState for private instance and
470 * static properties, which is cross-block information collected in
471 * CollectedInfo.
473 * During analysis the ClassAnalysis* is available and the PropState is
474 * retrieved from there. However during optimization the ClassAnalysis is
475 * not available and the PropState has to be retrieved off the Class in
476 * the Index. In that case cls is nullptr and the PropState fields are
477 * populated.
479 struct PropertiesInfo {
480 PropertiesInfo(const IIndex&, Context, ClassAnalysis*);
482 const PropStateElem* readPrivateProp(SString name) const;
483 const PropStateElem* readPrivateStatic(SString name) const;
485 void mergeInPrivateProp(const IIndex& index,
486 SString name,
487 const Type& t);
488 void mergeInPrivateStatic(const IIndex& index,
489 SString name,
490 const Type& t,
491 bool ignoreConst,
492 bool mustBeReadOnly);
494 void mergeInPrivateStaticPreAdjusted(SString name, const Type& t);
496 void mergeInAllPrivateProps(const IIndex&, const Type&);
497 void mergeInAllPrivateStatics(const IIndex&, const Type&,
498 bool ignoreConst,
499 bool mustBeReadOnly);
501 void setInitialValue(const php::Prop&, TypedValue, bool, bool);
503 bool hasInitialValues() const { return !m_inits.empty(); }
504 const PropInitInfo* getInitialValue(const php::Prop&) const;
506 const PropState& privatePropertiesRaw() const;
507 const PropState& privateStaticsRaw() const;
509 private:
510 ClassAnalysis* const m_cls;
511 PropState m_privateProperties;
512 PropState m_privateStatics;
513 hphp_fast_map<const php::Prop*, PropInitInfo> m_inits;
514 const php::Func* m_func;
517 //////////////////////////////////////////////////////////////////////
520 * Encapsulates information about the current class's methods during
521 * class-at-a-time analysis. This might be more refined than the
522 * information in the Index.
524 struct MethodsInfo {
525 MethodsInfo(Context, ClassAnalysis*);
527 // Look up the best known return type for the current class's
528 // method, return std::nullopt if not known, or if the Func is not a
529 // method of the current class.
530 Optional<Index::ReturnType> lookupReturnType(const php::Func&);
532 private:
533 ClassAnalysis* m_cls;
534 const php::Func* m_func;
537 //////////////////////////////////////////////////////////////////////
540 * Map from closure classes to types for each of their used vars.
541 * Shows up in a few different interpreter structures.
543 using ClosureUseVarMap = hphp_fast_map<
544 const php::Class*,
545 CompactVector<Type>
549 * Merge the types in the vector as possible use vars for the closure
550 * `clo' into the destination map.
552 void merge_closure_use_vars_into(ClosureUseVarMap& dst,
553 const php::Class& clo,
554 CompactVector<Type>);
556 //////////////////////////////////////////////////////////////////////
558 enum class CollectionOpts {
559 Speculating = 1,
560 Inlining = 2,
561 EffectFreeOnly = 4,
562 Optimizing = 8,
565 inline CollectionOpts operator|(CollectionOpts o1, CollectionOpts o2) {
566 return static_cast<CollectionOpts>(
567 static_cast<int>(o1) | static_cast<int>(o2)
571 inline CollectionOpts operator&(CollectionOpts o1, CollectionOpts o2) {
572 return static_cast<CollectionOpts>(
573 static_cast<int>(o1) & static_cast<int>(o2)
577 inline CollectionOpts operator-(CollectionOpts o1, CollectionOpts o2) {
578 return static_cast<CollectionOpts>(
579 static_cast<int>(o1) & ~static_cast<int>(o2)
583 inline bool any(CollectionOpts o) { return static_cast<int>(o); }
586 * Area used for writing down any information that is collected across
587 * a series of step operations (possibly cross block).
589 struct CollectedInfo {
590 CollectedInfo(const IIndex& index,
591 Context ctx,
592 ClassAnalysis* cls,
593 CollectionOpts opts,
594 ClsConstantWork* clsCns = nullptr,
595 const FuncAnalysis* fa = nullptr);
597 ClosureUseVarMap closureUseTypes;
598 PropertiesInfo props;
599 MethodsInfo methods;
600 ClsConstantWork* clsCns;
601 hphp_fast_set<CallContext, CallContextHasher> unfoldableFuncs;
602 bool effectFree{true};
603 bool hasInvariantIterBase{false};
604 CollectionOpts opts{};
606 PublicSPropMutations publicSPropMutations;
608 struct MInstrState {
610 * The current member base. Updated as we move through bytecodes
611 * representing the operation.
613 Base base{};
616 * Used to track whether a member op sequence is effect free. We use
617 * this information to replace member op sequences with constants.
619 bool effectFree{false};
620 bool extraPop{false};
623 * Chains of member operations on array elements will affect the type of
624 * something further back in the member instruction. This vector tracks the
625 * base,key type pair that was used at each stage. See
626 * interp-minstr.cpp:resolveArrayChain().
628 struct ArrayChainEnt {
629 Type base;
630 Type key;
631 LocalId keyLoc;
633 using ArrayChain = CompactVector<ArrayChainEnt>;
634 ArrayChain arrayChain;
636 void clear() {
637 base.loc = BaseLoc::None;
638 arrayChain.clear();
641 MInstrState mInstrState;
644 * All blocks encountered (so far) which contain a MemoGet
645 * instruction.
647 folly::sorted_vector_set<BlockId> allMemoGets;
649 * The union of all types used as inputs to a MemoSet instruction,
650 * and whether those types come from an effect-free function.
652 Index::ReturnType allMemoSets{TBottom, true};
655 //////////////////////////////////////////////////////////////////////
658 * State merging functions, based on the union_of operation for types.
660 * These return true if the destination state changed.
662 bool merge_into(State&, const State&);
665 * State merging functions, based on the widening_union operation.
666 * See analyze.cpp for details on when this is needed.
668 bool widen_into(State&, const State&);
670 //////////////////////////////////////////////////////////////////////
673 * Functions to show various aspects of interpreter state as strings.
675 std::string show(const php::Func&, const Base& b);
676 std::string show(const php::Func&, const CollectedInfo::MInstrState&);
677 std::string show(const php::Func&, const Iter&);
678 std::string property_state_string(const PropertiesInfo&);
679 std::string state_string(const php::Func&, const State&, const CollectedInfo&);
681 //////////////////////////////////////////////////////////////////////