Refactor back ends into implementations of BackEnd.
[hiphop-php.git] / hphp / runtime / vm / unit.h
blobc376ed757ae4cd0c4385868a0efe3ca63b3f8392
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_VM_UNIT_H_
18 #define incl_HPHP_VM_UNIT_H_
20 #include "hphp/parser/location.h"
22 #include "hphp/runtime/base/runtime-option.h"
23 #include "hphp/runtime/base/type-array.h"
24 #include "hphp/runtime/base/type-string.h"
26 #include "hphp/runtime/vm/class.h"
27 #include "hphp/runtime/vm/hhbc.h"
28 #include "hphp/runtime/vm/named-entity.h"
29 #include "hphp/runtime/vm/repo-helpers.h"
30 #include "hphp/runtime/vm/type-alias.h"
32 #include "hphp/util/md5.h"
33 #include "hphp/util/range.h"
34 #include "hphp/util/tiny-vector.h"
36 #include <memory>
38 namespace HPHP {
39 // Forward declarations.
40 namespace Compiler { class Peephole; }
41 struct ActRec;
43 class Func;
44 class FuncEmitter;
45 class Repo;
46 class FuncDict;
47 class Unit;
48 class PreClassEmitter;
50 enum class UnitOrigin {
51 File = 0,
52 Eval = 1
55 enum UnitMergeKind {
56 // UnitMergeKindClass is required to be 0 for correctness.
57 UnitMergeKindClass = 0,
58 UnitMergeKindUniqueDefinedClass = 1,
59 // Top level, scalar defines in the unit
60 UnitMergeKindDefine = 2,
61 // Top level, scalar defines that will be loaded once
62 // and preserved from request to request
63 UnitMergeKindPersistentDefine = 3,
64 UnitMergeKindGlobal = 4,
65 // 5 is available
66 UnitMergeKindReqDoc = 6,
67 UnitMergeKindDone = 7,
68 // We cannot add more kinds here; this has to fit in 3 bits.
71 enum UnitMergeState {
72 UnitMergeStateUnmerged = 0,
73 UnitMergeStateMerging = 1,
74 UnitMergeStateMerged = 2,
75 UnitMergeStateUniqueFuncs = 4,
76 UnitMergeStateNeedsCompact = 8,
77 UnitMergeStateEmpty = 32
80 ALWAYS_INLINE
81 bool isMergeKindReq(UnitMergeKind k) {
82 return k == UnitMergeKindReqDoc;
85 struct UnitMergeInfo {
86 typedef IterRange<Func* const*> FuncRange;
87 typedef IterRange<Func**> MutableFuncRange;
89 unsigned m_firstHoistableFunc;
90 unsigned m_firstHoistablePreClass;
91 unsigned m_firstMergeablePreClass;
92 unsigned m_mergeablesSize;
93 void* m_mergeables[1];
95 static UnitMergeInfo* alloc(size_t num);
97 Func** funcBegin() const {
98 return (Func**)m_mergeables;
100 Func** funcEnd() const {
101 return funcBegin() + m_firstHoistablePreClass;
103 Func** funcHoistableBegin() const {
104 return funcBegin() + m_firstHoistableFunc;
106 MutableFuncRange nonMainFuncs() const {
107 return MutableFuncRange(funcBegin() + 1, funcEnd());
109 MutableFuncRange hoistableFuncs() const {
110 return MutableFuncRange(funcHoistableBegin(), funcEnd());
112 FuncRange funcs() const {
113 return FuncRange(funcBegin(), funcEnd());
115 MutableFuncRange mutableFuncs() {
116 return MutableFuncRange(funcBegin(), funcEnd());
118 void*& mergeableObj(int ix) { return ((void**)m_mergeables)[ix]; }
119 void* mergeableData(int ix) { return (char*)m_mergeables + ix*sizeof(void*); }
122 // Exception handler table entry.
123 class EHEnt {
124 public:
125 enum class Type {
126 Catch,
127 Fault
129 Type m_type;
130 Offset m_base;
131 Offset m_past;
132 int m_iterId;
133 bool m_itRef;
134 int m_parentIndex;
135 Offset m_fault;
136 typedef std::vector<std::pair<Id, Offset> > CatchVec;
137 CatchVec m_catches;
139 template<class SerDe> void serde(SerDe& sd) {
140 sd(m_type)
141 (m_base)
142 (m_past)
143 (m_iterId)
144 (m_fault)
145 (m_itRef)
146 (m_parentIndex)
148 if (m_type == Type::Catch) {
149 sd(m_catches);
154 // Function paramater info region table entry.
155 class FPIEnt {
156 public:
157 Offset m_fpushOff;
158 Offset m_fcallOff;
159 Offset m_fpOff; // evaluation stack depth to current frame pointer
160 int m_parentIndex;
161 int m_fpiDepth;
163 template<class SerDe> void serde(SerDe& sd) {
164 sd(m_fpushOff)(m_fcallOff)(m_fpOff);
165 // These fields are recomputed by sortFPITab:
166 // m_parentIndex;
167 // m_fpiDepth;
171 class SourceLoc {
172 public:
173 SourceLoc() : line0(1), char0(1), line1(1), char1(1) {}
174 explicit SourceLoc(const Location& l) { setLoc(&l); }
176 int line0;
177 int char0;
178 int line1;
179 int char1;
181 // {1, 1, 1, 1} is a special "invalid" value.
182 void reset() {
183 line0 = char0 = line1 = char1 = 1;
186 bool valid() {
187 return line0 != 1 || char0 != 1 || line1 != 1 || char1 != 1;
190 void setLoc(const Location *l) {
191 line0 = l->line0;
192 char0 = l->char0;
193 line1 = l->line1;
194 char1 = l->char1;
197 bool same(const SourceLoc *l) const {
198 return (this == l) ||
199 (line0 == l->line0 && char0 == l->char0 &&
200 line1 == l->line1 && char1 == l->char1);
203 bool operator==(const SourceLoc &l) const {
204 return same(&l);
208 class OffsetRange {
209 public:
210 OffsetRange() : m_base(0), m_past(0) {}
211 OffsetRange(Offset base, Offset past) : m_base(base), m_past(past) {}
212 Offset m_base;
213 Offset m_past;
215 typedef std::vector<OffsetRange> OffsetRangeVec;
217 template<typename T>
218 class TableEntry {
219 public:
220 TableEntry() : m_pastOffset(0) {}
222 TableEntry(Offset pastOffset, T val)
223 : m_pastOffset(pastOffset), m_val(val) {}
224 Offset pastOffset() const { return m_pastOffset; }
225 T val() const { return m_val; }
226 bool operator <(const TableEntry& other) const {
227 return m_pastOffset < other.m_pastOffset;
230 template<class SerDe> void serde(SerDe& sd) { sd(m_pastOffset)(m_val); }
232 private:
233 Offset m_pastOffset;
234 T m_val;
237 typedef TableEntry<int> LineEntry;
238 typedef std::vector<LineEntry> LineTable;
239 typedef TableEntry<SourceLoc> SourceLocEntry;
240 typedef std::vector<SourceLocEntry> SourceLocTable;
241 typedef std::map<int, OffsetRangeVec> LineToOffsetRangeVecMap;
242 typedef TableEntry<const Func*> FuncEntry;
243 typedef std::vector<FuncEntry> FuncTable;
245 //==============================================================================
246 // (const StringData*) versus (StringData*)
248 // All (const StringData*) values are static strings that came from e.g.
249 // makeStaticString(). Therefore no reference counting is required.
251 //==============================================================================
253 // Functions for differentiating global litstrId's from unit-local Id's.
254 const int kGlobalLitstrOffset = 0x40000000;
255 inline bool isGlobalLitstrId(Id id) { return id >= kGlobalLitstrOffset; }
256 inline Id encodeGlobalLitstrId(Id id) { return id + kGlobalLitstrOffset; }
257 inline Id decodeGlobalLitstrId(Id id) { return id - kGlobalLitstrOffset; }
260 * Global table of literal strings. This can only be safely used when
261 * the repo is built in WholeProgram mode and run in RepoAuthoritative
262 * mode.
264 class LitstrTable {
265 private:
266 static LitstrTable* s_litstrTable;
268 public:
269 static void init() {
270 LitstrTable::s_litstrTable = new LitstrTable();
273 static LitstrTable& get() {
274 return *LitstrTable::s_litstrTable;
277 ~LitstrTable() {}
278 Id mergeLitstr(const StringData* litstr);
279 size_t numLitstrs() { return m_namedInfo.size(); }
280 StringData* lookupLitstrId(Id id) const;
281 const NamedEntity* lookupNamedEntityId(Id id) const;
282 const NamedEntityPair& lookupNamedEntityPairId(Id id) const;
283 void insert(RepoTxn& txn, UnitOrigin uo);
284 Mutex& mutex() { return m_mutex; }
286 void setReading() { m_safeToRead = true; }
287 void setWriting() { m_safeToRead = false; }
289 private:
290 LitstrTable() {}
291 typedef hphp_hash_map<const StringData*, Id,
292 string_data_hash, string_data_same> LitstrMap;
294 LitstrMap m_litstr2id;
295 std::vector<const StringData*> m_litstrs;
296 std::vector<NamedEntityPair> m_namedInfo;
297 Mutex m_mutex;
298 std::atomic<bool> m_safeToRead;
302 * Metadata about a compilation unit.
304 * Contains the list of PreClasses and global functions, along with a
305 * special function called the 'pseudo-main', which is logically
306 * invoked (modulo optimizations that avoid it) during execution when
307 * the unit is included/required.
309 struct Unit {
310 friend class UnitEmitter;
311 friend class UnitRepoProxy;
312 friend class FuncDict;
314 typedef UnitMergeInfo::FuncRange FuncRange;
315 typedef UnitMergeInfo::MutableFuncRange MutableFuncRange;
317 typedef hphp_hash_map<const Class*, Func*,
318 pointer_hash<Class> > PseudoMainCacheMap;
320 class MetaInfo {
321 public:
322 enum class Kind {
323 None,
324 Class,
327 * Marks types that are proven to be a particular type by static
328 * analysis. Guards are not needed in these cases.
330 DataTypeInferred,
333 * Marks types that are predicted by static analysis. Guards
334 * will still be needed in case the prediction is wrong.
336 DataTypePredicted,
338 GuardedThis,
339 GuardedCls,
342 * Information about the known class of a property base in the
343 * middle of a vector instruction.
345 * In this case, m_arg is the index of the member code for the
346 * relevant property dim. (Unlike other cases, m_arg is not an
347 * index into the instruction inputs in NormalizedInstruction.)
349 * Whatever the base is when processing that member code will be
350 * an object of the supplied class type (or a null).
352 MVecPropClass,
356 * This flag is used to mark that m_arg is an index into an
357 * MVector input list. (We need to know this so we can bump the
358 * indexes different amounts depending on the instruction type;
359 * see applyInputMetaData.)
361 static const int VectorArg = 1 << 7;
363 MetaInfo(Kind k, int a, Id d) : m_kind(k), m_arg(a), m_data(d) {
364 assert((int)m_arg == a);
366 MetaInfo() : m_kind(Kind::None), m_arg(-1), m_data(0) {}
369 * m_arg indicates which input the MetaInfo applies to.
371 * For instructions taking vector immediates, it is an index into
372 * the immediate elements, excluding any MW members (and including
373 * the base). (This is currently even if the instruction takes
374 * other stack arguments.)
376 Kind m_kind;
377 uint8_t m_arg;
378 Id m_data;
381 class MetaHandle {
383 The meta-data in Unit::m_bc_meta is stored as:
385 Offset <num entries>
386 Offset byte-code-offset-1
387 Offset byte-code-offset-2
389 Offset byte-code_offset-n
390 Offset INT_MAX # sentinel
391 Offset data-offset-1
392 Offset data-offset-2
394 Offset data-offset-n
395 Offset m_bc_meta_len # sentinel
396 uint8 m_kind1
397 uint8 m_arg1
398 VSI m_data1
400 uint8 m_kind-n
401 uint8 m_arg-n
402 VSI m_data-n
404 public:
405 MetaHandle() : index(nullptr), cur(0) {}
406 bool findMeta(const Unit* unit, Offset offset);
407 bool nextArg(MetaInfo& info);
408 private:
409 const Offset* index;
410 unsigned cur;
411 const uint8_t *ptr;
414 Unit();
415 ~Unit();
416 void* operator new(size_t sz);
417 void operator delete(void* p, size_t sz);
419 int repoId() const { return m_repoId; }
420 int64_t sn() const { return m_sn; }
422 PC entry() const { return m_bc; }
423 Offset bclen() const { return m_bclen; }
425 PC at(Offset off) const {
426 assert(off >= 0 && off <= Offset(m_bclen));
427 return m_bc + off;
430 Offset offsetOf(PC pc) const {
431 assert(contains(pc));
432 return pc - m_bc;
435 bool contains(PC pc) const {
436 return pc >= m_bc && pc <= m_bc + m_bclen;
439 const StringData* filepath() const {
440 assert(m_filepath);
441 return m_filepath;
443 const String& filepathRef() const {
444 assert(m_filepath);
445 return *(String*)(&m_filepath);
447 const StringData* dirpath() const {
448 assert(m_dirpath);
449 return m_dirpath;
452 MD5 md5() const { return m_md5; }
454 static NamedEntity* GetNamedEntity(const StringData *str,
455 bool allowCreate = true,
456 String* normStr = nullptr) FLATTEN;
458 static size_t GetNamedEntityTableSize();
459 static Array getClassesInfo();
460 static Array getInterfacesInfo();
461 static Array getTraitsInfo();
462 static Array getClassesWithAttrInfo(HPHP::Attr attrs, bool inverse = false);
463 static Array getUserFunctions() { return getFunctions(false); }
464 static Array getSystemFunctions() { return getFunctions(true); }
466 private:
467 static Array getFunctions(bool system);
469 public:
470 size_t numLitstrs() const {
471 return m_namedInfo.size();
474 StringData* lookupLitstrId(Id id) const {
475 if (isGlobalLitstrId(id)) {
476 return LitstrTable::get().lookupLitstrId(decodeGlobalLitstrId(id));
478 assert(id >= 0 && id < Id(m_namedInfo.size()));
479 return const_cast<StringData*>(m_namedInfo[id].first);
482 const NamedEntity* lookupNamedEntityId(Id id) const {
483 if (isGlobalLitstrId(id)) {
484 return LitstrTable::get().lookupNamedEntityId(decodeGlobalLitstrId(id));
486 return lookupNamedEntityPairId(id).second;
489 const NamedEntityPair& lookupNamedEntityPairId(Id id) const {
490 if (isGlobalLitstrId(id)) {
491 auto decodedId = decodeGlobalLitstrId(id);
492 return LitstrTable::get().lookupNamedEntityPairId(decodedId);
494 assert(id < Id(m_namedInfo.size()));
495 const NamedEntityPair &ne = m_namedInfo[id];
496 assert(ne.first);
497 assert(ne.first->data()[ne.first->size()] == 0);
498 assert(ne.first->data()[0] != '\\');
499 if (UNLIKELY(!ne.second)) {
500 const_cast<const NamedEntity*&>(ne.second) = GetNamedEntity(ne.first);
502 return ne;
505 bool checkStringId(Id id) const;
507 size_t numArrays() const {
508 return m_arrays.size();
510 ArrayData* lookupArrayId(Id id) const {
511 return const_cast<ArrayData*>(m_arrays.at(id));
514 static Func* lookupFunc(const NamedEntity *ne);
515 static Func* lookupFunc(const StringData *funcName);
516 static Func* loadFunc(const NamedEntity *ne, const StringData* name);
517 static Func* loadFunc(const StringData* name);
519 static Class* defClass(const HPHP::PreClass* preClass,
520 bool failIsFatal = true);
521 static bool aliasClass(Class* original, const StringData* alias);
522 void defTypeAlias(Id id);
524 static const Cell* lookupCns(const StringData* cnsName);
525 static const Cell* lookupPersistentCns(const StringData* cnsName);
526 static const Cell* loadCns(const StringData* cnsName);
527 static bool defCns(const StringData* cnsName, const TypedValue* value,
528 bool persistent = false);
529 static uint64_t defCnsHelper(uint64_t ch,
530 const TypedValue* value,
531 const StringData* cnsName);
532 static void defDynamicSystemConstant(const StringData* cnsName,
533 const void* data);
534 static bool defCnsDynamic(const StringData* cnsName, TypedValue* value);
537 * Find the Class* for a defined class corresponding to the name
538 * `clsName'.
540 * Returns: nullptr if the class of the given name is not yet
541 * defined in this request.
543 static Class *lookupClass(const StringData *clsName) {
544 return lookupClass(GetNamedEntity(clsName));
548 * Find the Class* for a defined class with name mapped to the
549 * supplied NamedEntity.
551 * Returns: nullptr if the class is not yet defined in this request.
553 static Class *lookupClass(const NamedEntity *ne) {
554 Class* cls;
555 if (LIKELY((cls = ne->getCachedClass()) != nullptr)) {
556 return cls;
558 return nullptr;
562 * Same as lookupClass, except if it's not defined *and* is unique,
563 * return the Class* anyway.
565 * The point of this is that when jitting code before a unique class
566 * is defined, we can often still burn the Class* into the TC, since
567 * it will be defined by the time the code that needs the Class*
568 * runs (via autoload or whatnot).
570 static Class *lookupUniqueClass(const NamedEntity *ne) {
571 Class* cls = ne->clsList();
572 if (LIKELY(cls != nullptr)) {
573 if (cls->attrs() & AttrUnique && RuntimeOption::RepoAuthoritative) {
574 return cls;
576 return cls->getCached();
578 return nullptr;
581 static Class *lookupUniqueClass(const StringData *clsName) {
582 return lookupUniqueClass(GetNamedEntity(clsName));
585 static Class *loadClass(const NamedEntity *ne,
586 const StringData *name);
588 static Class *loadClass(const StringData *name) {
589 String normStr;
590 auto ne = GetNamedEntity(name, true, &normStr);
591 if (normStr) {
592 name = normStr.get();
594 return loadClass(ne, name);
597 static Class *loadMissingClass(const NamedEntity *ne,
598 const StringData *name);
600 static Class* getClass(const StringData* name, bool tryAutoload) {
601 String normStr;
602 auto ne = GetNamedEntity(name, true, &normStr);
603 if (normStr) {
604 name = normStr.get();
606 return getClass(ne, name, tryAutoload);
609 static Class* getClass(const NamedEntity *ne, const StringData *name,
610 bool tryAutoload);
611 static bool classExists(const StringData* name, bool autoload,
612 Attr typeAttrs);
614 bool compileTimeFatal(const StringData*& msg, int& line) const;
615 bool parseFatal(const StringData*& msg, int& line) const;
616 const TypedValue *getMainReturn() const {
617 assert(isMergeOnly());
618 return &m_mainReturn;
621 private:
622 template <bool debugger>
623 void mergeImpl(void* tcbase, UnitMergeInfo* mi);
624 public:
625 Func* firstHoistable() const {
626 return *m_mergeInfo->funcHoistableBegin();
628 Func* getMain(Class* cls = nullptr) const;
629 // Ranges for iterating over functions.
630 MutableFuncRange nonMainFuncs() const {
631 return m_mergeInfo->nonMainFuncs();
633 MutableFuncRange hoistableFuncs() const {
634 return m_mergeInfo->hoistableFuncs();
636 void renameFunc(const StringData* oldName, const StringData* newName);
637 static void loadFunc(const Func *func);
638 FuncRange funcs() const {
639 return m_mergeInfo->funcs();
641 MutableFuncRange mutableFuncs() {
642 return m_mergeInfo->mutableFuncs();
644 Func* lookupFuncId(Id id) const {
645 assert(id < Id(m_mergeInfo->m_firstHoistablePreClass));
646 return m_mergeInfo->funcBegin()[id];
648 size_t numPreClasses() const {
649 return (size_t)m_preClasses.size();
651 PreClass* lookupPreClassId(Id id) const {
652 assert(id < Id(m_preClasses.size()));
653 return m_preClasses[id].get();
655 typedef std::vector<PreClassPtr> PreClassPtrVec;
656 typedef Range<PreClassPtrVec> PreClassRange;
657 void initialMerge();
658 void merge();
659 PreClassRange preclasses() const {
660 return PreClassRange(m_preClasses);
662 bool mergeClasses() const;
664 int getLineNumber(Offset pc) const;
665 bool getSourceLoc(Offset pc, SourceLoc& sLoc) const;
666 bool getOffsetRanges(int line, OffsetRangeVec& offsets) const;
667 bool getOffsetRange(Offset pc, OffsetRange& range) const;
669 Op getOpcode(size_t instrOffset) const {
670 assert(instrOffset < m_bclen);
671 return static_cast<Op>(m_bc[instrOffset]);
675 * Return the Func* for the code at offset off.
677 * Returns nullptr if the offset is not in a func body (but this
678 * should be impossible).
680 const Func* getFunc(Offset pc) const;
682 void setCacheId(unsigned id) {
683 m_cacheOffset = id >> 3;
684 m_cacheMask = 1 << (id & 7);
686 bool isInterpretOnly() const { return m_interpretOnly; }
687 void setInterpretOnly() { m_interpretOnly = true; }
688 bool isMergeOnly() const { return m_mergeOnly; }
689 bool isEmpty() const { return m_mergeState & UnitMergeStateEmpty; }
690 void* replaceUnit() const;
692 public:
693 static Mutex s_classesMutex;
695 struct PrintOpts {
696 PrintOpts()
697 : startOffset(kInvalidOffset)
698 , stopOffset(kInvalidOffset)
699 , showLines(true)
700 , showFuncs(true)
701 , indentSize(1)
704 PrintOpts& range(Offset start, Offset stop) {
705 startOffset = start;
706 stopOffset = stop;
707 return *this;
710 PrintOpts& noLineNumbers() {
711 showLines = false;
712 return *this;
715 PrintOpts& noFuncs() {
716 showFuncs = false;
717 return *this;
720 PrintOpts& indent(int i) {
721 indentSize = i;
722 return *this;
725 Offset startOffset;
726 Offset stopOffset;
727 bool showLines;
728 bool showFuncs;
729 int indentSize;
732 void prettyPrint(std::ostream&, PrintOpts = PrintOpts()) const;
733 std::string toString() const;
735 public: // Translator field access
736 static size_t bcOff() { return offsetof(Unit, m_bc); }
738 private:
739 // List of (offset, sourceLoc) where offset is the offset of the first byte
740 // code of the next source location if there is one, m_bclen otherwise.
741 // Sorted by offset. sourceLocs are not assumed to be unique.
742 SourceLocTable getSourceLocTable() const;
743 // A map from all source lines that correspond to one or more byte codes.
744 // The result from the map is a list of offset ranges, so a single line
745 // with several sub-statements may correspond to the byte codes of all
746 // of the sub-statements.
747 LineToOffsetRangeVecMap getLineToOffsetRangeVecMap() const;
750 Frequently used fields.
751 Do not reorder without good reason
753 unsigned char const* m_bc{nullptr};
754 size_t m_bclen{0};
755 const StringData* m_filepath{nullptr};
756 // List of (line, offset) where offset is the offset of the first byte code
757 // of the next line if there is one, m_bclen otherwise.
758 // Sorted by offset. line values are not assumed to be unique.
759 LineTable m_lineTable;
760 UnitMergeInfo* m_mergeInfo{nullptr};
761 unsigned m_cacheOffset{0};
762 int8_t m_repoId{-1};
763 uint8_t m_mergeState{UnitMergeStateUnmerged};
764 uint8_t m_cacheMask{0};
765 bool m_mergeOnly{false};
766 bool m_interpretOnly;
767 // pseudoMain's return value, or KindOfUninit if its not known.
768 TypedValue m_mainReturn;
769 PreClassPtrVec m_preClasses;
770 FixedVector<TypeAlias> m_typeAliases;
772 End of freqently used fields
775 int64_t m_sn{-1};
776 unsigned char const* m_bc_meta{nullptr};
777 size_t m_bc_meta_len{0};
778 const StringData* m_dirpath{nullptr};
779 MD5 m_md5;
780 std::vector<NamedEntityPair> m_namedInfo;
781 std::vector<const ArrayData*> m_arrays;
782 SourceLocTable m_sourceLocTable;
783 LineToOffsetRangeVecMap m_lineToOffsetRangeVecMap;
784 FuncTable m_funcTable;
785 mutable PseudoMainCacheMap *m_pseudoMainCache{nullptr};
788 int getLineNumber(const LineTable& table, Offset pc);
789 bool getSourceLoc(const SourceLocTable& table, Offset pc, SourceLoc& sLoc);
791 class UnitEmitter {
792 friend class UnitRepoProxy;
793 friend class ::HPHP::Compiler::Peephole;
794 public:
795 explicit UnitEmitter(const MD5& md5);
796 ~UnitEmitter();
798 bool isASystemLib() const {
799 static const char systemlib_prefix[] = "/:systemlib";
800 return !strncmp(getFilepath()->data(),
801 systemlib_prefix,
802 sizeof systemlib_prefix - 1);
805 void addTrivialPseudoMain();
806 int repoId() const { return m_repoId; }
807 void setRepoId(int repoId) { m_repoId = repoId; }
808 int64_t sn() const { return m_sn; }
809 void setSn(int64_t sn) { m_sn = sn; }
810 const unsigned char* bc() const { return m_bc; }
811 Offset bcPos() const { return (Offset)m_bclen; }
812 void setBc(const unsigned char* bc, size_t bclen);
813 void setBcMeta(const unsigned char* bc_meta, size_t bc_meta_len);
814 const StringData* getFilepath() const { return m_filepath; }
815 void setFilepath(const StringData* filepath) { m_filepath = filepath; }
816 void setMainReturn(const TypedValue* v) { m_mainReturn = *v; }
817 void setMergeOnly(bool b) { m_mergeOnly = b; }
818 const MD5& md5() const { return m_md5; }
819 Id addTypeAlias(const TypeAlias& td);
820 Id mergeLitstr(const StringData* litstr);
821 Id mergeUnitLitstr(const StringData* litstr);
822 Id mergeArray(const ArrayData* a);
823 Id mergeArray(const ArrayData* a, const std::string& key);
824 const StringData* lookupLitstr(Id id) const;
825 const ArrayData* lookupArray(Id id) const;
826 FuncEmitter* getMain();
827 void initMain(int line1, int line2);
828 FuncEmitter* newFuncEmitter(const StringData* n);
829 void appendTopEmitter(FuncEmitter* func);
830 FuncEmitter* newMethodEmitter(const StringData* n, PreClassEmitter* pce);
831 PreClassEmitter* newPreClassEmitter(const StringData* n,
832 PreClass::Hoistable hoistable);
833 PreClassEmitter* pce(Id preClassId) { return m_pceVec[preClassId]; }
834 const PreClassEmitter* pce(Id preClassId) const {
835 return m_pceVec[preClassId];
837 size_t numPreClasses() const { return m_pceVec.size(); }
838 const std::vector<FuncEmitter*>& fevec() const { return m_fes; }
839 const std::vector<TypeAlias>& typeAliases() const { return m_typeAliases; }
842 * Record source location information for the last chunk of bytecode
843 * added to this UnitEmitter. Adjacent regions associated with the
844 * same source line will be collapsed as this is created.
846 void recordSourceLocation(const Location *sLoc, Offset start);
849 * Return the SrcLocTable for this unit emitter, if it has one.
850 * Otherwise an empty table is returned.
852 SourceLocTable createSourceLocTable() const;
855 * Returns whether this unit emitter contains full SourceLoc
856 * information.
858 bool hasSourceLocInfo() const { return !m_sourceLocTab.empty(); }
861 * Returns access to this UnitEmitter's LineTable. Generally
862 * UnitEmitters loaded from a production repo will have a line table
863 * instead of a full SourceLocTable.
865 const LineTable& lineTable() const { return m_lineTable; }
868 * Adds a new FuncEmitter to the unit. You can only do this once
869 * for the FuncEmitter (after you are done setting it up). Also,
870 * all FuncEmitter's added to the unit must not overlap.
872 * Takes ownership of `fe'.
874 void recordFunction(FuncEmitter *fe);
876 private:
877 template<class T>
878 void emitImpl(T n, int64_t pos) {
879 auto *c = (unsigned char*)&n;
880 if (pos == -1) {
881 // Make sure m_bc is large enough.
882 while (m_bclen + sizeof(T) > m_bcmax) {
883 m_bc = (unsigned char*)realloc(m_bc, m_bcmax << 1);
884 m_bcmax <<= 1;
886 memcpy(&m_bc[m_bclen], c, sizeof(T));
887 m_bclen += sizeof(T);
888 } else {
889 assert(pos + sizeof(T) <= m_bclen);
890 for (uint i = 0; i < sizeof(T); ++i) {
891 m_bc[pos + i] = c[i];
895 public:
896 void emitOp(Op op, int64_t pos = -1) {
897 emitByte((unsigned char)op, pos);
899 void emitByte(unsigned char n, int64_t pos = -1) { emitImpl(n, pos); }
900 void emitInt32(int n, int64_t pos = -1) { emitImpl(n, pos); }
901 template<typename T> void emitIVA(T n) {
902 if (LIKELY((n & 0x7f) == n)) {
903 emitByte((unsigned char)n << 1);
904 } else {
905 assert((n & 0x7fffffff) == n);
906 emitInt32((n << 1) | 0x1);
909 void emitInt64(int64_t n, int64_t pos = -1) { emitImpl(n, pos); }
910 void emitDouble(double n, int64_t pos = -1) { emitImpl(n, pos); }
911 bool insert(UnitOrigin unitOrigin, RepoTxn& txn);
912 void commit(UnitOrigin unitOrigin);
913 Func* newFunc(const FuncEmitter* fe, Unit& unit, Id id, PreClass* preClass,
914 int line1, int line2, Offset base, Offset past,
915 const StringData* name, Attr attrs, bool top,
916 const StringData* docComment, int numParams,
917 bool needsNextClonedClosure);
918 Unit* create();
919 void returnSeen() { m_returnSeen = true; }
920 void pushMergeableClass(PreClassEmitter* e);
921 void pushMergeableInclude(UnitMergeKind kind, const StringData* unitName);
922 void insertMergeableInclude(int ix, UnitMergeKind kind, Id id);
923 void pushMergeableDef(UnitMergeKind kind,
924 const StringData* name, const TypedValue& tv);
925 void insertMergeableDef(int ix, UnitMergeKind kind,
926 Id id, const TypedValue& tv);
927 private:
928 void setLines(const LineTable& lines);
930 private:
931 int m_repoId;
932 int64_t m_sn;
933 static const size_t BCMaxInit = 4096; // Initial bytecode size.
934 size_t m_bcmax;
935 unsigned char* m_bc;
936 size_t m_bclen;
937 unsigned char* m_bc_meta;
938 size_t m_bc_meta_len;
939 TypedValue m_mainReturn;
940 const StringData* m_filepath;
941 MD5 m_md5;
942 typedef hphp_hash_map<const StringData*, Id,
943 string_data_hash, string_data_same> LitstrMap;
944 LitstrMap m_litstr2id;
945 std::vector<const StringData*> m_litstrs;
946 typedef hphp_hash_map<std::string, Id, string_hash> ArrayIdMap;
947 ArrayIdMap m_array2id;
948 struct ArrayVecElm {
949 std::string serialized;
950 const ArrayData* array;
952 typedef std::vector<ArrayVecElm> ArrayVec;
953 ArrayVec m_arrays;
954 int m_nextFuncSn;
955 bool m_mergeOnly;
956 typedef std::vector<FuncEmitter*> FeVec;
957 FeVec m_fes;
958 typedef hphp_hash_map<const FuncEmitter*, const Func*,
959 pointer_hash<FuncEmitter> > FMap;
960 FMap m_fMap;
961 typedef std::vector<PreClassEmitter*> PceVec;
962 typedef std::list<Id> IdList;
963 PceVec m_pceVec;
964 typedef hphp_hash_set<const StringData*, string_data_hash,
965 string_data_isame> HoistedPreClassSet;
966 HoistedPreClassSet m_hoistablePreClassSet;
967 IdList m_hoistablePceIdList;
968 typedef std::vector<std::pair<UnitMergeKind, Id> > MergeableStmtVec;
969 MergeableStmtVec m_mergeableStmts;
970 std::vector<std::pair<Id,TypedValue> > m_mergeableValues;
971 bool m_allClassesHoistable;
972 bool m_returnSeen;
974 * m_sourceLocTab and m_feTab are interval maps. Each entry encodes
975 * an open-closed range of bytecode offsets.
977 * The m_sourceLocTab is keyed by the start of each half-open range.
978 * This is to allow appending new bytecode offsets that are part of
979 * the same range to coalesce.
981 * The m_feTab is keyed by the past-the-end offset. This is the
982 * format we'll want it in when we go to create a Unit.
984 std::vector<std::pair<Offset,SourceLoc> > m_sourceLocTab;
985 std::vector<std::pair<Offset,const FuncEmitter*> > m_feTab;
986 LineTable m_lineTable;
987 std::vector<TypeAlias> m_typeAliases;
990 //////////////////////////////////////////////////////////////////////
993 * Member functions of LitstrTable inlined for perf. Must come after
994 * Unit definition to break circular dependences.
996 inline
997 StringData* LitstrTable::lookupLitstrId(Id id) const {
998 assert(m_safeToRead);
999 assert(id >= 0 && id < Id(s_litstrTable->m_litstrs.size()));
1000 return const_cast<StringData*>(s_litstrTable->m_litstrs[id]);
1003 inline
1004 const NamedEntity* LitstrTable::lookupNamedEntityId(Id id) const {
1005 assert(m_safeToRead);
1006 return lookupNamedEntityPairId(id).second;
1009 inline
1010 const NamedEntityPair& LitstrTable::lookupNamedEntityPairId(Id id) const {
1011 assert(m_safeToRead);
1012 assert(id >= 0 && id < Id(s_litstrTable->m_namedInfo.size()));
1013 const NamedEntityPair& ne = s_litstrTable->m_namedInfo[id];
1014 assert(ne.first);
1015 assert(ne.first->data()[ne.first->size()] == 0);
1016 assert(ne.first->data()[0] != '\\');
1017 if (UNLIKELY(!ne.second)) {
1018 const_cast<const NamedEntity*&>(ne.second) = Unit::GetNamedEntity(ne.first);
1020 return ne;
1023 //////////////////////////////////////////////////////////////////////
1025 class UnitRepoProxy : public RepoProxy {
1026 friend class Unit;
1027 friend class UnitEmitter;
1028 public:
1029 explicit UnitRepoProxy(Repo& repo);
1030 ~UnitRepoProxy();
1031 void createSchema(int repoId, RepoTxn& txn);
1032 Unit* load(const std::string& name, const MD5& md5);
1033 std::unique_ptr<UnitEmitter> loadEmitter(const std::string& name,
1034 const MD5& md5);
1036 #define URP_IOP(o) URP_OP(Insert##o, insert##o)
1037 #define URP_GOP(o) URP_OP(Get##o, get##o)
1038 #define URP_OPS \
1039 URP_IOP(Unit) \
1040 URP_GOP(Unit) \
1041 URP_IOP(UnitLitstr) \
1042 URP_GOP(UnitLitstrs) \
1043 URP_IOP(UnitArray) \
1044 URP_GOP(UnitArrays) \
1045 URP_IOP(UnitMergeable) \
1046 URP_GOP(UnitMergeables) \
1047 URP_IOP(UnitSourceLoc) \
1048 URP_GOP(SourceLoc) \
1049 URP_GOP(SourceLocTab) \
1050 URP_GOP(SourceLocPastOffsets) \
1051 URP_GOP(SourceLocBaseOffset) \
1052 URP_GOP(BaseOffsetAtPCLoc) \
1053 URP_GOP(BaseOffsetAfterPCLoc)
1054 class InsertUnitStmt : public RepoProxy::Stmt {
1055 public:
1056 InsertUnitStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1057 void insert(RepoTxn& txn, int64_t& unitSn, const MD5& md5,
1058 const unsigned char* bc,
1059 size_t bclen, const unsigned char* bc_meta,
1060 size_t bc_meta_len,
1061 const TypedValue* mainReturn, bool mergeOnly,
1062 const LineTable& lines,
1063 const std::vector<TypeAlias>&);
1065 class GetUnitStmt : public RepoProxy::Stmt {
1066 public:
1067 GetUnitStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1068 bool get(UnitEmitter& ue, const MD5& md5);
1070 class InsertUnitLitstrStmt : public RepoProxy::Stmt {
1071 public:
1072 InsertUnitLitstrStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1073 void insert(RepoTxn& txn, int64_t unitSn, Id litstrId,
1074 const StringData* litstr);
1076 class GetUnitLitstrsStmt : public RepoProxy::Stmt {
1077 public:
1078 GetUnitLitstrsStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1079 void get(UnitEmitter& ue);
1081 class InsertUnitArrayStmt : public RepoProxy::Stmt {
1082 public:
1083 InsertUnitArrayStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1084 void insert(RepoTxn& txn, int64_t unitSn, Id arrayId,
1085 const std::string& array);
1087 class GetUnitArraysStmt : public RepoProxy::Stmt {
1088 public:
1089 GetUnitArraysStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1090 void get(UnitEmitter& ue);
1092 class InsertUnitMergeableStmt : public RepoProxy::Stmt {
1093 public:
1094 InsertUnitMergeableStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1095 void insert(RepoTxn& txn, int64_t unitSn,
1096 int ix, UnitMergeKind kind,
1097 Id id, TypedValue *value);
1099 class GetUnitMergeablesStmt : public RepoProxy::Stmt {
1100 public:
1101 GetUnitMergeablesStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1102 void get(UnitEmitter& ue);
1104 class InsertUnitSourceLocStmt : public RepoProxy::Stmt {
1105 public:
1106 InsertUnitSourceLocStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1107 void insert(RepoTxn& txn, int64_t unitSn, Offset pastOffset, int line0,
1108 int char0, int line1, int char1);
1110 class GetSourceLocStmt : public RepoProxy::Stmt {
1111 public:
1112 GetSourceLocStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1113 bool get(int64_t unitSn, Offset pc, SourceLoc& sLoc);
1115 class GetSourceLocTabStmt : public RepoProxy::Stmt {
1116 public:
1117 GetSourceLocTabStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1118 bool get(int64_t unitSn, SourceLocTable& sourceLocTab);
1120 class GetSourceLocPastOffsetsStmt : public RepoProxy::Stmt {
1121 public:
1122 GetSourceLocPastOffsetsStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1123 bool get(int64_t unitSn, int line, OffsetRangeVec& ranges);
1125 class GetSourceLocBaseOffsetStmt : public RepoProxy::Stmt {
1126 public:
1127 GetSourceLocBaseOffsetStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1128 bool get(int64_t unitSn, OffsetRange& range);
1130 class GetBaseOffsetAtPCLocStmt : public RepoProxy::Stmt {
1131 public:
1132 GetBaseOffsetAtPCLocStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1133 bool get(int64_t unitSn, Offset pc, Offset& offset);
1135 class GetBaseOffsetAfterPCLocStmt : public RepoProxy::Stmt {
1136 public:
1137 GetBaseOffsetAfterPCLocStmt(Repo& repo, int repoId) : Stmt(repo, repoId) {}
1138 bool get(int64_t unitSn, Offset pc, Offset& offset);
1141 private:
1142 bool loadHelper(UnitEmitter& ue, const std::string&, const MD5&);
1144 #define URP_OP(c, o) \
1145 public: \
1146 c##Stmt& o(int repoId) { return *m_##o[repoId]; } \
1147 private: \
1148 c##Stmt m_##o##Local; \
1149 c##Stmt m_##o##Central; \
1150 c##Stmt* m_##o[RepoIdCount];
1151 URP_OPS
1152 #undef URP_OP
1155 //////////////////////////////////////////////////////////////////////
1157 struct ConstPreClassMethodRanger {
1158 typedef Func* const* Iter;
1159 typedef const Func* Value;
1160 static Iter get(PreClassPtr pc) {
1161 return pc->methods();
1165 struct MutablePreClassMethodRanger {
1166 typedef Func** Iter;
1167 typedef Func* Value;
1168 static Func** get(PreClassPtr pc) {
1169 return pc->mutableMethods();
1173 template<typename FuncRange,
1174 typename GetMethods>
1175 struct AllFuncsImpl {
1176 explicit AllFuncsImpl(const Unit* unit)
1177 : fr(unit->funcs())
1178 , mr(0, 0)
1179 , cr(unit->preclasses())
1181 if (fr.empty()) skip();
1183 bool empty() const { return fr.empty() && mr.empty() && cr.empty(); }
1184 typedef typename GetMethods::Value FuncPtr;
1185 FuncPtr front() const {
1186 assert(!empty());
1187 if (!fr.empty()) return fr.front();
1188 assert(!mr.empty());
1189 return mr.front();
1191 FuncPtr popFront() {
1192 FuncPtr f = !fr.empty() ? fr.popFront() :
1193 !mr.empty() ? mr.popFront() : 0;
1194 assert(f);
1195 if (fr.empty() && mr.empty()) skip();
1196 return f;
1199 private:
1200 void skip() {
1201 assert(fr.empty());
1202 while (!cr.empty() && mr.empty()) {
1203 PreClassPtr c = cr.popFront();
1204 mr = Unit::FuncRange(GetMethods::get(c),
1205 GetMethods::get(c) + c->numMethods());
1209 Unit::FuncRange fr;
1210 Unit::FuncRange mr;
1211 Unit::PreClassRange cr;
1214 typedef AllFuncsImpl<Unit::FuncRange,ConstPreClassMethodRanger> AllFuncs;
1215 typedef AllFuncsImpl<Unit::MutableFuncRange,MutablePreClassMethodRanger>
1216 MutableAllFuncs;
1219 * Range over all defined classes.
1221 class AllClasses {
1222 NamedEntityMap::iterator m_next, m_end;
1223 Class* m_current;
1224 void next();
1225 void skip();
1227 public:
1228 AllClasses();
1229 bool empty() const;
1230 Class* front() const;
1231 Class* popFront();
1234 //////////////////////////////////////////////////////////////////////
1237 #endif