Implement full-fidelity parsing for unknown shape fields
[hiphop-php.git] / hphp / hhbbc / index.h
blob65a79fbc43f59d1bef8f8d04620c40182e251b25
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #ifndef incl_HHBBC_INDEX_H_
17 #define incl_HHBBC_INDEX_H_
19 #include <memory>
20 #include <mutex>
21 #include <tuple>
22 #include <vector>
23 #include <map>
25 #include <boost/variant.hpp>
26 #include <tbb/concurrent_hash_map.h>
28 #include <folly/Optional.h>
29 #include <folly/Hash.h>
31 #include "hphp/util/compact-vector.h"
32 #include "hphp/util/either.h"
33 #include "hphp/runtime/base/repo-auth-type-array.h"
34 #include "hphp/runtime/vm/type-constraint.h"
36 #include "hphp/hhbbc/hhbbc.h"
37 #include "hphp/hhbbc/misc.h"
39 namespace HPHP { namespace HHBBC {
41 //////////////////////////////////////////////////////////////////////
43 struct Type;
44 struct Index;
45 struct PublicSPropIndexer;
46 struct FuncAnalysis;
48 namespace php {
49 struct Class;
50 struct Func;
51 struct Unit;
52 struct Program;
55 //////////////////////////////////////////////////////////////////////
58 * This module contains functions for building and querying an Index
59 * of data relating to "resolved" versions of the names in of a
60 * php::Program. It also records dependencies so it is possible to
61 * tell which parts of the program may be interested in new inferred
62 * information about other parts of the program.
64 * The main entry point here is the Index class. The Index is built
65 * after parse time, and then analysis can query it for information.
68 //////////////////////////////////////////////////////////////////////
71 * A Context is a (unit, func, class) triple, where cls and func
72 * fields may be null in some situations. Most queries to the Index
73 * need a "context", to allow recording dependencies.
75 struct Context {
76 borrowed_ptr<const php::Unit> unit;
77 borrowed_ptr<php::Func> func;
78 borrowed_ptr<const php::Class> cls;
80 struct Hash {
81 size_t operator()(const Context& c) const {
82 return pointer_hash<void>{}(c.func ? (void*)c.func :
83 c.cls ? (void*)c.cls : (void*)c.unit);
88 inline bool operator==(Context a, Context b) {
89 return a.unit == b.unit && a.func == b.func && a.cls == b.cls;
92 inline bool operator<(Context a, Context b) {
93 return std::make_tuple(a.unit, a.func, a.cls) <
94 std::make_tuple(b.unit, b.func, b.cls);
97 using ContextSet = std::unordered_set<Context, Context::Hash>;
99 std::string show(Context);
101 using ConstantMap = std::unordered_map<SString, Cell>;
103 * Context for a call to a function. This means the types and number
104 * of arguments, and where it is being called from.
106 * TODO(#3788877): add type of $this if it is going to be an object
107 * method, and the LSB class type if static.
109 struct CallContext {
110 Context caller;
111 std::vector<Type> args;
114 inline bool operator==(const CallContext& a, const CallContext& b) {
115 return a.caller == b.caller &&
116 a.args == b.args;
120 * State of properties on a class. Map from property name to its
121 * Type.
123 using PropState = std::map<LSString,Type>;
125 //////////////////////////////////////////////////////////////////////
127 // private types
128 struct IndexData;
129 struct FuncFamily;
130 struct FuncInfo;
131 struct ClassInfo;
133 //////////////////////////////////////////////////////////////////////
136 * References to "resolved" entities with information in the index are
137 * in the res:: namespace.
139 * These represent handles to program entities that may have variable
140 * amounts of information. For example, we may know the name of a
141 * class in a res::Class, but do not know for sure which php::Class
142 * struct is actually associated with it.
144 namespace res {
147 * A resolved runtime Class, for a particular php::Class.
149 * Provides various lookup tables that allow querying the Class'
150 * information.
152 struct Class {
154 * Returns whether two classes are definitely same at runtime. If
155 * this function returns false, they still *may* be the same at
156 * runtime.
158 bool same(const Class&) const;
161 * Returns true if this class is definitely going to be a subtype
162 * of `o' at runtime. If this function returns false, this may
163 * still be a subtype of `o' at runtime, it just may not be known.
164 * A typical example is with "non unique" classes.
166 bool subtypeOf(const Class& o) const;
169 * If this function return false, it is known that this class
170 * is in no subtype relationship with the argument Class 'o'.
171 * Returns true if this class could be a subtype of `o' at runtime.
172 * When true is returned the two classes may still be unrelated but it is
173 * not possible to tell. A typical example is with "non unique" classes.
175 bool couldBe(const Class& o) const;
178 * Returns the name of this class. Non-null guarantee.
180 SString name() const;
183 * Whether this class could possibly be an interface or a trait.
185 * When returning false, it is known that this class is not an interface
186 * or a trait. When returning true, it's possible that this class is not
187 * an interface or trait but the system cannot tell.
189 bool couldBeInterfaceOrTrait() const;
192 * Returns whether this type has the no override attribute, that is, if it
193 * is a final class (explicitly marked by the user or known by the static
194 * analysis).
196 * When returning false the class is guaranteed to be final. When returning
197 * true the system cannot tell though the class may still be final.
199 bool couldBeOverriden() const;
202 * Whether this class (or its subtypes) could possibly have have
203 * certain magic methods.
205 bool couldHaveMagicGet() const;
208 * Returns the Class that is the first common ancestor between 'this' and 'o'.
209 * If there is no common ancestor folly::none is returned
211 folly::Optional<Class> commonAncestor(const Class& o) const;
214 * Returns the res::Class for this Class's parent if there is one,
215 * or nullptr.
217 folly::Optional<Class> parent() const;
220 * Returns true if we have a ClassInfo for this Class.
222 bool resolved() const {
223 return val.right() != nullptr;
227 * Returns the php::Class for this Class if there is one, or
228 * nullptr.
230 borrowed_ptr<const php::Class> cls() const;
232 private:
233 Class(borrowed_ptr<const Index>, Either<SString,borrowed_ptr<ClassInfo>>);
235 private:
236 friend std::string show(const Class&);
237 friend struct ::HPHP::HHBBC::Index;
238 friend struct ::HPHP::HHBBC::PublicSPropIndexer;
239 borrowed_ptr<const Index> index;
240 Either<SString,borrowed_ptr<ClassInfo>> val;
244 * This is an abstraction layer to represent possible runtime function
245 * resolutions.
247 * Internally, this may only know the name of the function (or method), or we
248 * may know exactly which source-code-level function it refers to, or we may
249 * only have ruled it down to one of a few functions in a class hierarchy. The
250 * interpreter can treat all these cases the same way using this.
252 struct Func {
254 * Returns whether two res::Funcs definitely mean the func at
255 * runtime.
257 * Note: this is potentially pessimistic for its use in ActRec state
258 * merging right now, but not incorrect.
260 bool same(const Func&) const;
263 * Returns the name of this function. Non-null guarantee.
265 SString name() const;
268 * If this resolved function represents exactly one php::Func, return it.
270 borrowed_ptr<const php::Func> exactFunc() const;
273 * Returns whether this resolved function could possibly be going through a
274 * magic call, in the magic way.
276 * That is, if was resolved as part of a direct call to an __call method,
277 * this will say true. If it was resolved as part as some normal method
278 * call, and we haven't proven that there's no way an __call dispatch could
279 * be involved, this will say false.
281 bool cantBeMagicCall() const;
284 * Returns whether this resolved function could possibly read or write to the
285 * caller's frame.
287 bool mightReadCallerFrame() const;
288 bool mightWriteCallerFrame() const;
289 bool mightAccessCallerFrame() const {
290 return mightReadCallerFrame() || mightWriteCallerFrame();
294 * Returns whether this resolved function is definitely safe to constant fold.
296 bool isFoldable() const;
299 * Returns whether this resolved function could possibly be skipped when
300 * looking for a caller's frame.
302 bool mightBeSkipFrame() const;
303 private:
304 friend struct ::HPHP::HHBBC::Index;
305 struct FuncName {
306 bool operator==(FuncName o) const { return name == o.name; }
307 SString name;
309 struct MethodName {
310 bool operator==(MethodName o) const { return name == o.name; }
311 SString name;
313 using Rep = boost::variant< FuncName
314 , MethodName
315 , borrowed_ptr<FuncInfo>
316 , borrowed_ptr<FuncFamily>
319 private:
320 Func(borrowed_ptr<const Index>, Rep);
321 friend std::string show(const Func&);
323 private:
324 borrowed_ptr<const Index> index;
325 Rep val;
329 * Produce a trace-able string for a res::Func or res::Class.
331 std::string show(const Func&);
332 std::string show(const Class&);
336 //////////////////////////////////////////////////////////////////////
339 * This class encapsulates the known facts about the program, with a
340 * whole-program view.
342 * This structure contains unowned pointers into the php::Program it
343 * was created for. It should not out-live the Program.
345 * The const member functions of this class are thread safe for
346 * concurrent reads and writes. The non-const functions should be
347 * called in a single threaded context only (they are used during the
348 * "update" step in between whole program analysis rounds).
350 struct Index {
352 * Create an Index for a php::Program. Performs some initial
353 * analysis of the program.
355 explicit Index(borrowed_ptr<php::Program>);
358 * This class must not be destructed after its associated
359 * php::Program.
361 ~Index();
364 * The index operates in two modes: frozen, and unfrozen.
366 * Conceptually, the index is mutable and may acquire new
367 * information until it has been frozen, and once frozen, it retains
368 * the information it had at the point it was frozen.
370 * The reason this exists is because certain functions on the index
371 * may cause it to need to consult information in the bodies of
372 * functions other than the Context passed in. Specifically, if the
373 * interpreter tries to look up the return type for a callee in a
374 * given CallContext, the index may choose to recursively invoke
375 * type inference on that callee's function body to see if more
376 * precise information can be determined, unless it is frozen.
378 * This is fine until the final pass, because all bytecode is
379 * read-only at that stage. However, in the final pass, other
380 * threads might be optimizing a callee's bytecode and changing it,
381 * so we should not be reading from it to perform type inference
382 * concurrently. Freezing the index tells it it can't do that
383 * anymore.
385 * These are the functions to query and transition to frozen state.
387 bool frozen() const;
388 void freeze();
391 * The Index contains a Builder for an ArrayTypeTable.
393 * If we're creating assert types with options.InsertAssertions, we
394 * need to keep track of which array types exist in the whole
395 * program in order to include it in the repo.
397 std::unique_ptr<ArrayTypeTable::Builder>& array_table_builder() const;
400 * Find all the closures created inside the context of a given
401 * php::Class.
403 const CompactVector<borrowed_ptr<const php::Class>>*
404 lookup_closures(borrowed_ptr<const php::Class>) const;
407 * Try to resolve which class will be the class named `name' from a
408 * given context, if we can resolve it to a single class.
410 * Note, the returned class may or may not be *defined* at the
411 * program point you care about (it could be non-hoistable, even
412 * though it's unique, for example).
414 * Returns folly::none if we can't prove the supplied name must be a
415 * object type. (E.g. if there are type aliases.)
417 folly::Optional<res::Class> resolve_class(Context, SString name) const;
420 * Resolve a closure class.
422 * Returns both a resolved Class, and the actual php::Class for the
423 * closure.
425 std::pair<res::Class,borrowed_ptr<php::Class>>
426 resolve_closure_class(Context ctx, int32_t idx) const;
429 * Return a resolved class for a builtin class.
431 * Pre: `name' must be the name of a class defined in a systemlib.
433 res::Class builtin_class(SString name) const;
436 * Try to resolve a function named `name' from a given context.
438 * Note, the returned function may or may not be defined at the
439 * program point (it could require a function autoload that might
440 * fail).
442 res::Func resolve_func(Context, SString name) const;
445 * Try to resolve a function using namespace-style fallback lookup.
447 * The name `name' is tried first, and `fallback' is used if this
448 * isn't found. Both names must already be namespace-normalized.
449 * If we don't know which will be called at runtime, both will be
450 * returned.
452 * Note: the returned function may or may not be defined at the
453 * program point (it could require a function autoload that might
454 * fail).
456 std::pair<res::Func, folly::Optional<res::Func>>
457 resolve_func_fallback(Context,
458 SString name,
459 SString fallback) const;
462 * Try to resolve a class method named `name' with a given Context
463 * and class type.
465 * Pre: clsType.subtypeOf(TCls)
467 res::Func resolve_method(Context, Type clsType, SString name) const;
470 * Try to resolve a class constructor for the supplied class.
472 * Returns: folly::none if we can't figure out which constructor
473 * this would call.
475 folly::Optional<res::Func> resolve_ctor(Context, res::Class) const;
478 * Give the Type in our type system that matches an hhvm
479 * TypeConstraint, subject to the information in this Index.
481 * This function returns a subtype of Cell, although TypeConstraints
482 * at runtime can match reference parameters. The caller should
483 * make sure to handle that case.
485 * For soft constraints (@), this function returns Cell.
487 * For some non-soft constraints (such as "Stringish"), this
488 * function may return a Type that is a strict supertype of the
489 * constraint's type.
491 Type lookup_constraint(Context, const TypeConstraint&) const;
494 * If this function returns true, it is safe to assume that Type t
495 * will always satisfy TypeConstraint tc at run time.
497 bool satisfies_constraint(Context, Type t, const TypeConstraint& tc) const;
500 * Lookup what the best known Type for a class constant would be,
501 * using a given Index and Context, if a class of that name were
502 * loaded.
504 Type lookup_class_constant(Context, res::Class, SString cns) const;
507 * Lookup what the best known Type for a constant would be, using a
508 * given Index and Context, if a constant of that name were defined.
510 * Returns folly::none if the constant isn't in the index.
512 folly::Optional<Type> lookup_constant(Context ctx, SString cnsName) const;
515 * Return the best known return type for a resolved function, in a
516 * context insensitive way. Returns TInitGen at worst.
518 Type lookup_return_type(Context, res::Func) const;
521 * Return the best known return type for a resolved function, given
522 * the supplied calling context. Returns TInitGen at worst.
524 * During analyze phases, this function may re-enter analyze in
525 * order to interpret the callee with these argument types.
527 Type lookup_return_type(CallContext, res::Func) const;
530 * Look up the return type for an unresolved function. The
531 * interpreter should not use this routine---it's for stats or debug
532 * dumps.
534 * Nothing may be writing to the index when this function is used,
535 * but concurrent readers are allowed.
537 Type lookup_return_type_raw(borrowed_ptr<const php::Func>) const;
540 * Return the best known types of a closure's used variables (on
541 * entry to the closure). The function is the closure body.
543 std::vector<Type>
544 lookup_closure_use_vars(borrowed_ptr<const php::Func>) const;
547 * Return the availability of $this on entry to the provided method.
548 * If the Func provided is not a method of a class false is
549 * returned.
551 bool lookup_this_available(borrowed_ptr<const php::Func>) const;
554 * Returns the parameter preparation kind (if known) for parameter
555 * `paramId' on the given resolved Func.
557 PrepKind lookup_param_prep(Context, res::Func, uint32_t paramId) const;
560 * Returns the control-flow insensitive inferred private instance
561 * property types for a Class. The Class doesn't need to be
562 * resolved, because private properties don't depend on the
563 * inheritance hierarchy.
565 * The Index tracks the largest types for private properties that
566 * are guaranteed to hold at any program point.
568 PropState lookup_private_props(borrowed_ptr<const php::Class>) const;
571 * Returns the control-flow insensitive inferred private static
572 * property types for a Class. The class doesn't need to be
573 * resolved for the same reasons as for instance properties.
575 * The Index tracks the largest types for private static properties
576 * that are guaranteed to hold at any program point.
578 PropState lookup_private_statics(borrowed_ptr<const php::Class>) const;
581 * Lookup the best known type for a public static property, with a given
582 * class and name.
584 * This function will always return TInitGen before refine_public_statics has
585 * been called, or if the AnalyzePublicStatics option is off.
587 Type lookup_public_static(const Type& cls, const Type& name) const;
588 Type lookup_public_static(borrowed_ptr<const php::Class>, SString name) const;
591 * If we resolve a public static initializer to a constant, and eliminate the
592 * 86pinit, we need to update the initializer in the index.
594 * Note that this is called from code that runs in parallel, and
595 * consequently isn't normally allowed to modify the index. Its safe
596 * in this case, because for any given property there can only be
597 * one InitProp which sets it, and all we do is modify an existing
598 * element of a map.
600 void fixup_public_static(borrowed_ptr<const php::Class>, SString name,
601 const Type& ty) const;
603 * Returns whether a public static property is known to be immutable. This
604 * is used to add AttrPersistent flags to static properties, and relies on
605 * AnalyzePublicStatics (without this flag it will always return false).
607 bool lookup_public_static_immutable(borrowed_ptr<const php::Class>,
608 SString name) const;
611 * Returns the computed vtable slot for the given class, if it's an interface
612 * that was given a vtable slot. No two interfaces implemented by the same
613 * class will share the same vtable slot. May return kInvalidSlot, if the
614 * given class isn't an interface or if it wasn't assigned a slot.
616 Slot lookup_iface_vtable_slot(borrowed_ptr<const php::Class>) const;
619 * Refine the types of the class constants defined by an 86cinit,
620 * based on a round of analysis.
622 * Constants not defined by a pseudomain are considered unknowable
624 * No other threads should be calling functions on this Index when
625 * this function is called.
627 * Merges the set of Contexts that depended on the constants defined
628 * by this 86cinit.
630 void refine_class_constants(const Context& ctx, ContextSet& deps);
633 * Refine the types of the constants defined by a function, based on
634 * a round of analysis.
636 * Constants not defined by a pseudomain are considered unknowable
638 * No other threads should be calling functions on this Index when
639 * this function is called.
641 * Merges the set of Contexts that depended on the constants defined
642 * by this php::Func into deps.
644 void refine_constants(const FuncAnalysis& fa, ContextSet& deps);
647 * Refine the types of the local statics owned by the function.
649 void refine_local_static_types(borrowed_ptr<const php::Func> func,
650 const CompactVector<Type>& localStaticTypes);
653 * Refine the return type for a function, based on a round of
654 * analysis.
656 * No other threads should be calling functions on this Index when
657 * this function is called.
659 * Merges the set of Contexts that depended on the return type of
660 * this php::Func into deps.
662 void refine_return_type(borrowed_ptr<const php::Func>, Type,
663 ContextSet& deps);
666 * Refine the used var types for a closure, based on a round of
667 * analysis.
669 * No other threads should be calling functions on this Index when
670 * this function is called.
672 * Returns: true if the types have changed.
674 bool refine_closure_use_vars(borrowed_ptr<const php::Class>,
675 const std::vector<Type>&);
678 * Refine the private property types for a class, based on a round
679 * of analysis.
681 * No other threads should be calling functions on this Index when
682 * this function is called.
684 void refine_private_props(borrowed_ptr<const php::Class> cls,
685 const PropState&);
688 * Refine the static private property types for a class, based on a
689 * round of analysis.
691 * No other threads should be calling functions on this Index when
692 * this function is called.
694 void refine_private_statics(borrowed_ptr<const php::Class> cls,
695 const PropState&);
698 * After a whole program pass using PublicSPropIndexer, the types can be
699 * reflected into the index for use during another type inference pass.
701 * No other threads should be calling functions on this Index or on the
702 * provided PublicSPropIndexer when this function is called.
704 void refine_public_statics(const PublicSPropIndexer&);
707 * Identify the persistent classes, functions and typeAliases.
709 void mark_persistent_classes_and_functions(php::Program& program);
712 * Return true if the resolved function is an async
713 * function.
715 bool is_async_func(res::Func rfunc) const;
718 * Return true if there are any interceptable functions
720 bool any_interceptable_functions() const;
721 private:
722 Index(const Index&) = delete;
723 Index& operator=(Index&&) = delete;
725 private:
726 template<class FuncRange>
727 res::Func resolve_func_helper(const FuncRange&, SString) const;
728 res::Func do_resolve(borrowed_ptr<const php::Func>) const;
729 bool must_be_derived_from(borrowed_ptr<const php::Class>,
730 borrowed_ptr<const php::Class>) const;
731 bool could_be_related(borrowed_ptr<const php::Class>,
732 borrowed_ptr<const php::Class>) const;
733 Type satisfies_constraint_helper(Context, const TypeConstraint&) const;
735 private:
736 std::unique_ptr<IndexData> const m_data;
739 //////////////////////////////////////////////////////////////////////
742 * Indexer object used for collecting information about public static property
743 * types. See analyze_public_statics in whole-program.cpp for details about
744 * how it is used.
746 struct PublicSPropIndexer {
747 explicit PublicSPropIndexer(borrowed_ptr<const Index> index)
748 : m_index(index)
752 * Called by the interpreter during analyze_func_collect when a
753 * PublicSPropIndexer is active. This function must be called anywhere the
754 * interpreter does something that could change the type of public static
755 * properties named `name' on classes of type `cls' to `val'.
757 * Note that if cls and name are both too generic this object will have to
758 * give up all information it knows about any public static properties.
760 * This routine may be safely called concurrently by multiple analysis
761 * threads.
763 void merge(Context ctx, Type cls, Type name, Type val);
765 private:
766 friend struct Index;
768 struct KnownKey {
769 bool operator==(KnownKey o) const {
770 return cinfo == o.cinfo && prop == o.prop;
773 friend size_t tbb_hasher(KnownKey k) {
774 return folly::hash::hash_combine(k.cinfo, k.prop);
777 borrowed_ptr<ClassInfo> cinfo;
778 SString prop;
781 using UnknownMap = tbb::concurrent_hash_map<SString,Type>;
782 using KnownMap = tbb::concurrent_hash_map<KnownKey,Type>;
784 private:
785 borrowed_ptr<const Index> m_index;
786 std::atomic<bool> m_everything_bad{false};
787 UnknownMap m_unknown;
788 KnownMap m_known;
791 //////////////////////////////////////////////////////////////////////
795 #endif