2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * This module contains an assembler implementation for HHBC. It is
19 * probably fairly close to allowing you to access most of the
20 * metadata associated with hhvm's compiled unit format, although it's
21 * possible something has been overlooked.
23 * To use it, run hhvm with -v Eval.AllowHhas=true on a file with a
24 * ".hhas" extension. The syntax is probably easiest to understand by
25 * looking at some examples (or the semi-BNF markup around some of the
26 * parse functions here). For examples, see hphp/tests/vm/asm_*.
31 * - You can crash hhvm very easily with this.
33 * Using this module, you can emit pretty much any sort of not
34 * trivially-illegal bytecode stream, and many trivially-illegal
35 * ones as well. You can also easily create Units with illegal
36 * metadata. Generally this will crash the VM. In other cases
37 * (especially if you don't bother to DefCls your classes in your
38 * .main) you'll just get mysterious "class not defined" errors
41 * - Whitespace is not normally significant, but newlines may not
42 * be in the middle of a list of opcode arguments. (After the
43 * newline, the next thing seen is expected to be either a
44 * mnemonic for the next opcode in the stream or some sort of
45 * directive.) However, newlines (and comments) may appear
46 * *inside* certain opcode arguments (e.g. string literals or
49 * Rationale: this is partially intended to make it trivial to
50 * catch wrong-number-of-arguments errors, although it probably
51 * could be done without this if you feel like changing it.
56 * - It might be nice if you could refer to iterators by name
57 * instead of by index.
59 * - DefCls by name would be nice.
61 * Missing features (partial list):
63 * - while class/function names can contains ':', '$', and ';',
64 * .use declarations can't handle those names because of syntax
67 * @author Jordan DeLong <delong.j@fb.com>
70 #include "hphp/runtime/vm/as.h"
79 #include <boost/algorithm/string.hpp>
80 #include <boost/scoped_ptr.hpp>
81 #include <boost/bind.hpp>
83 #include <folly/Conv.h>
84 #include <folly/MapUtil.h>
85 #include <folly/Memory.h>
86 #include <folly/Range.h>
87 #include <folly/String.h>
89 #include "hphp/util/sha1.h"
91 #include "hphp/runtime/base/builtin-functions.h"
92 #include "hphp/runtime/base/memory-manager-defs.h"
93 #include "hphp/runtime/base/repo-auth-type-codec.h"
94 #include "hphp/runtime/base/repo-auth-type.h"
95 #include "hphp/runtime/base/variable-serializer.h"
96 #include "hphp/runtime/base/tv-type.h"
97 #include "hphp/runtime/vm/as-shared.h"
98 #include "hphp/runtime/vm/bc-pattern.h"
99 #include "hphp/runtime/vm/extern-compiler.h"
100 #include "hphp/runtime/vm/func-emitter.h"
101 #include "hphp/runtime/vm/hhbc.h"
102 #include "hphp/runtime/vm/native.h"
103 #include "hphp/runtime/vm/preclass-emitter.h"
104 #include "hphp/runtime/vm/record-emitter.h"
105 #include "hphp/runtime/vm/rx.h"
106 #include "hphp/runtime/vm/unit.h"
107 #include "hphp/runtime/vm/unit-emitter.h"
108 #include "hphp/system/systemlib.h"
109 #include "hphp/zend/zend-string.h"
115 AssemblerError::AssemblerError(int where
, const std::string
& what
)
116 : std::runtime_error(
117 folly::sformat("Assembler Error: line {}: {}", where
, what
))
120 //////////////////////////////////////////////////////////////////////
124 StringData
* makeDocComment(const String
& s
) {
125 if (RuntimeOption::EvalGenerateDocComments
) return makeStaticString(s
);
126 return staticEmptyString();
130 typedef void (*ParserFunc
)(AsmState
& as
);
133 explicit Input(std::istream
& in
)
137 int peek() { return m_in
.peek(); }
140 int ret
= m_in
.get();
143 } else if (ret
== '\n') {
149 void ungetc(char c
) {
150 if (c
== '\n') --m_lineNumber
;
156 error(folly::sformat("expected character `{}'", char(c
)));
161 * Expect `c' after possible whitespace/comments. When convenient,
162 * preferable to doing skipWhitespace/expect manually to keep the
163 * line number in the error prior to the whitespace skipped.
165 void expectWs(int c
) {
166 const int currentLine
= m_lineNumber
;
169 throw AssemblerError(currentLine
,
170 folly::sformat("expected character `{}'", char(c
)));
174 int getLineNumber() const {
178 // Skips whitespace, then populates word with valid bareword
179 // characters. Returns true if we read any characters into word.
180 bool readword(std::string
& word
) {
183 consumePred(is_bareword(), std::back_inserter(word
));
184 return !word
.empty();
186 // Skips whitespace, then populates name with valid extname
187 // characters. Returns true if we read any characters into name.
188 bool readname(std::string
& name
) {
191 consumePred(is_extname(), std::back_inserter(name
));
192 return !name
.empty();
194 // Try to consume a bareword. Skips whitespace. If we can't
195 // consume the specified word, returns false.
196 bool tryConsume(const std::string
& what
) {
198 if (!readword(word
)) {
202 std::for_each(word
.rbegin(), word
.rend(),
203 boost::bind(&Input::ungetc
, this, _1
));
211 if (peek() == '-') buf
+= (char)getc();
212 consumePred(isdigit
, std::back_inserter(buf
));
213 if (buf
.empty() || buf
== "-") {
214 throw AssemblerError(m_lineNumber
, "expected integral value");
216 return folly::to
<int32_t>(buf
);
219 // C-style character escapes, no support for unicode escapes or
221 template<class OutCont
>
222 void escapeChar(OutCont
& out
) {
223 auto is_oct
= [&] (int i
) { return i
>= '0' && i
<= '7'; };
224 auto is_hex
= [&] (int i
) {
225 return (i
>= '0' && i
<= '9') ||
226 (i
>= 'a' && i
<= 'f') ||
227 (i
>= 'A' && i
<= 'F');
229 auto hex_val
= [&] (int i
) -> uint32_t {
231 return i
>= '0' && i
<= '9' ? i
- '0' :
232 i
>= 'a' && i
<= 'f' ? i
- 'a' + 10 : i
- 'A' + 10;
237 case EOF
: error("EOF in string literal");
238 case 'a': out
.push_back('\a'); break;
239 case 'b': out
.push_back('\b'); break;
240 case 'f': out
.push_back('\f'); break;
241 case 'n': out
.push_back('\n'); break;
242 case 'r': out
.push_back('\r'); break;
243 case 't': out
.push_back('\t'); break;
244 case 'v': out
.push_back('\v'); break;
245 case '\'': out
.push_back('\''); break;
246 case '\"': out
.push_back('\"'); break;
247 case '\?': out
.push_back('\?'); break;
248 case '\\': out
.push_back('\\'); break;
249 case '\r': /* ignore */ break;
250 case '\n': /* ignore */ break;
253 auto val
= int64_t{src
} - '0';
254 for (auto i
= int{1}; i
< 3; ++i
) {
256 if (!is_oct(src
)) { ungetc(src
); break; }
260 if (val
> std::numeric_limits
<uint8_t>::max()) {
261 error("octal escape sequence overflowed");
263 out
.push_back(static_cast<uint8_t>(val
));
267 if (src
== 'x' || src
== 'X') {
268 auto val
= uint64_t{0};
269 if (!is_hex(peek())) error("\\x used without no following hex digits");
270 for (auto i
= int{0}; i
< 2; ++i
) {
272 if (!is_hex(src
)) { ungetc(src
); break; }
276 if (val
> std::numeric_limits
<uint8_t>::max()) {
277 error("hex escape sequence overflowed");
279 out
.push_back(static_cast<uint8_t>(val
));
283 error("unrecognized character escape");
287 // Reads a quoted string with typical escaping rules. Does not skip
288 // any whitespace. Returns true if we successfully read one, or
289 // false. EOF during the string throws.
290 bool readQuotedStr(std::string
& str
) {
292 if (peek() != '\"') {
298 while ((c
= getc()) != EOF
) {
300 case '\"': return true;
301 case '\\': escapeChar(str
); break;
302 default: str
.push_back(c
); break;
305 error("EOF in string literal");
311 * Reads a python-style longstring, or returns false if we don't
312 * have one. Does not skip any whitespace before looking for the
315 * Python longstrings start with \"\"\", and can contain any bytes
316 * other than \"\"\". A '\\' character introduces C-style escapes,
317 * but there's no need to escape single quote characters.
319 bool readLongString(std::vector
<char>& buffer
) {
320 if (peek() != '\"') return false;
322 if (peek() != '\"') { ungetc('\"'); return false; }
324 if (peek() != '\"') { ungetc('\"');
325 ungetc('\"'); return false; }
329 while ((c
= getc()) != EOF
) {
337 buffer
.push_back('"');
343 buffer
.push_back('"');
344 buffer
.push_back('"');
353 error("EOF in \"\"\"-string literal");
358 // Skips whitespace (including newlines and comments).
359 void skipWhitespace() {
360 while (skipPred(boost::is_any_of(" \t\r\n"))) {
362 skipPred(!boost::is_any_of("\n"));
370 // Skip spaces and tabs, but other whitespace (such as comments or
371 // newlines) stop the skip.
372 void skipSpaceTab() {
373 skipPred(boost::is_any_of(" \t"));
376 template<class Predicate
>
377 bool skipPred(Predicate pred
) {
378 while (pred(peek())) {
387 template<class Predicate
, class OutputIterator
>
388 bool consumePred(Predicate pred
, OutputIterator out
) {
390 while (pred(c
= peek())) {
402 // whether a character is a valid part of the extended sorts of
403 // names that HHVM uses for certain generated constructs
404 // (closures, __Memoize implementations, etc)
406 bool operator()(int i
) const {
408 return is_bw(i
) || i
== ':' || i
== ';' || i
== '#' || i
=='@' ||
409 (i
>= 0x7f && i
<= 0xff) /* see hphp.ll :( */;
413 void error(const std::string
& what
) {
414 throw AssemblerError(getLineNumber(), what
);
417 void io_error_if_bad() {
419 error("I/O error reading stream: " +
420 folly::errnoStr(errno
).toStdString());
432 * Tracks the depth of the stack in a given block of instructions.
434 * This structure is linked to a block of instructions (usually starting at a
435 * label), and tracks the current stack depth in this block. This tracking can
437 * - Absolute depth: the depth of the stack is exactly known for this block
438 * - Relative depth: the depth of the stack is unknown for now. We keep track
439 * of an offset, relative to the depth of the stack at the first instruction
445 * Tracks the max depth of elem stack + desc stack offset inside a region
446 * where baseValue is unknown.
450 * Tracks the min depth of the elem stack inside a region where baseValue
451 * is unknown, and the line where the min occurred.
455 folly::Optional
<int> baseValue
;
458 * During the parsing process, when a Jmp instruction is encountered, the
459 * StackDepth structure for this jump becomes linked to the StackDepth
460 * structure of the label (which is added to the listeners list).
462 * Once the absolute depth at the jump becomes known, its StackDepth
463 * instance calls the setBase method of the StackDepth instance of the label.
464 * The absolute depth at the label can then be inferred from the
465 * absolute depth at the jump.
467 std::vector
<std::pair
<StackDepth
*, int> > listeners
;
475 void adjust(AsmState
& as
, int delta
);
476 void addListener(AsmState
& as
, StackDepth
* target
);
477 void setBase(AsmState
& as
, int stackDepth
);
478 int absoluteDepth() {
479 assertx(baseValue
.hasValue());
480 return baseValue
.value() + currentOffset
;
484 * Sets the baseValue such as the current stack depth matches the
487 * If the base value is already known, it may conflict with the
488 * parameter of this function. In this case, an error will be raised.
490 void setCurrentAbsolute(AsmState
& as
, int stackDepth
);
496 StackDepth stackDepth
;
499 * Each label source source has an Offset where the jmp should be
500 * patched up is, and an Offset from which the jump delta should be
501 * computed. (The second Offset is basically to the actual
502 * jump/switch/etc instruction, while the first points to the
505 std::vector
<std::pair
<Offset
,Offset
>> sources
;
508 * List of a parameter ids that use this label for its DV
511 std::vector
<Id
> dvInits
;
514 * List of EHEnts that have m_handler pointing to this label.
516 std::vector
<size_t> ehEnts
;
519 struct HashSymbolRef
{
520 size_t operator()(SymbolRef s
) const {
521 return static_cast<size_t>(s
);
526 explicit AsmState(std::istream
& in
, bool wants_symbol_refs
= false)
528 , wants_symbol_refs
{wants_symbol_refs
}
530 currentStackDepth
->setBase(*this, 0);
533 AsmState(const AsmState
&) = delete;
534 AsmState
& operator=(const AsmState
&) = delete;
536 template<typename
... Args
>
537 void error(const std::string
& fmt
, Args
&&... args
) {
538 throw AssemblerError(in
.getLineNumber(),
539 folly::sformat(fmt
, std::forward
<Args
>(args
)...));
543 void adjustStack(int delta
) {
544 if (currentStackDepth
== nullptr) {
545 // Instruction is unreachable, nothing to do here!
549 currentStackDepth
->adjust(*this, delta
);
552 void adjustStackHighwater(int depth
) {
554 fe
->maxStackCells
= std::max(fe
->maxStackCells
, depth
);
558 std::string
displayStackDepth() {
559 std::ostringstream stack
;
561 if (currentStackDepth
== nullptr) {
563 } else if (currentStackDepth
->baseValue
) {
564 stack
<< *currentStackDepth
->baseValue
+
565 currentStackDepth
->currentOffset
;
567 stack
<< "?" << currentStackDepth
->currentOffset
;
573 void addLabelTarget(const std::string
& name
) {
574 auto& label
= labelMap
[name
];
576 error("Duplicate label " + name
);
579 label
.target
= ue
->bcPos();
581 StackDepth
* newStack
= &label
.stackDepth
;
583 if (currentStackDepth
== nullptr) {
584 // Previous instruction was unreachable
585 currentStackDepth
= newStack
;
589 // The stack depth at the label depends on the current depth
590 currentStackDepth
->addListener(*this, newStack
);
591 currentStackDepth
= newStack
;
594 void addLabelJump(const std::string
& name
, Offset immOff
, Offset opcodeOff
) {
595 auto& label
= labelMap
[name
];
597 if (currentStackDepth
!= nullptr) {
598 // The stack depth at the target must be the same as the current depth
599 // (whatever this may be: it may still be unknown)
600 currentStackDepth
->addListener(*this, &label
.stackDepth
);
603 label
.sources
.emplace_back(immOff
, opcodeOff
);
606 void enforceStackDepth(int stackDepth
) {
607 if (currentStackDepth
== nullptr) {
608 // Current instruction is unreachable, thus the constraint
609 // on the stack depth will never be violated
613 currentStackDepth
->setCurrentAbsolute(*this, stackDepth
);
616 bool isUnreachable() {
617 return currentStackDepth
== nullptr;
620 void enterUnreachableRegion() {
621 currentStackDepth
= nullptr;
624 void enterReachableRegion(int stackDepth
) {
625 unnamedStackDepths
.emplace_back(std::make_unique
<StackDepth
>());
626 currentStackDepth
= unnamedStackDepths
.back().get();
627 currentStackDepth
->setBase(*this, stackDepth
);
630 void addLabelDVInit(const std::string
& name
, int paramId
) {
631 labelMap
[name
].dvInits
.push_back(paramId
);
633 // Stack depth should be 0 when entering a DV init
634 labelMap
[name
].stackDepth
.setBase(*this, 0);
637 void addLabelEHEnt(const std::string
& name
, size_t ehIdx
) {
638 labelMap
[name
].ehEnts
.push_back(ehIdx
);
640 // Stack depth should be 0 when entering a fault funclet
641 labelMap
[name
].stackDepth
.setBase(*this, 0);
646 ue
->addPreClassEmitter(pce
);
651 void finishRecord() {
652 assertx(!fe
&& !pce
);
653 ue
->addRecordEmitter(re
);
657 void patchLabelOffsets(const Label
& label
) {
658 for (auto const& source
: label
.sources
) {
659 ue
->emitInt32(label
.target
- source
.second
, source
.first
);
662 for (auto const& dvinit
: label
.dvInits
) {
663 fe
->params
[dvinit
].funcletOff
= label
.target
;
666 for (auto const& ehEnt
: label
.ehEnts
) {
667 fe
->ehtab
[ehEnt
].m_handler
= label
.target
;
671 void finishSection() {
672 for (auto const& label
: labelMap
) {
673 if (!label
.second
.bound
) {
674 error("Undefined label " + label
.first
);
676 if (label
.second
.target
>= ue
->bcPos()) {
677 error("label " + label
.first
+ " falls of the end of the function");
680 patchLabelOffsets(label
.second
);
684 void finishFunction() {
687 // Stack depth should be 0 at the end of a function body
688 enforceStackDepth(0);
690 // Bump up the unnamed local count
691 const int numLocals
= maxUnnamed
+ 1;
692 while (fe
->numLocals() < numLocals
) {
693 fe
->allocUnnamedLocal();
698 fe
->numIterators() * kNumIterCells
;
700 fe
->finish(ue
->bcPos());
705 initStackDepth
= StackDepth();
706 initStackDepth
.setBase(*this, 0);
707 currentStackDepth
= &initStackDepth
;
708 unnamedStackDepths
.clear();
712 int getLocalId(const std::string
& name
) {
713 if (name
[0] == '_') {
714 int id
= folly::to
<int>(name
.substr(1));
715 if (id
> maxUnnamed
) maxUnnamed
= id
;
719 if (name
[0] != '$') {
720 error("local variables must be prefixed with $ or _");
723 const StringData
* sd
= makeStaticString(name
.c_str() + 1);
725 return fe
->lookupVarId(sd
);
728 int getIterId(int32_t id
) {
729 if (id
>= fe
->numIterators()) {
730 error("iterator id exceeded number of iterators in the function");
737 bool emittedPseudoMain
{false};
738 bool emittedTopLevelFunc
{false};
741 * Map of adata identifiers to their serialized contents
742 * Needed because, when instrumenting array provenance, we're unable
743 * to initialize their static arrays until the adata is first referenced
745 * There's also some painful maneuvering around keeping either the serialized
746 * or unserialized array in request heap until it can be made static since
747 * this could potentially confusingly OOM a request that autoloads a large
750 std::unordered_map
<std::string
, std::vector
<char>> adataDecls
;
752 // Map of adata identifiers to their associated static arrays and potential DV
756 std::pair
<ArrayData
*,VariableSerializer::DVOverrides
>
758 // Map of array immediates to their adata identifiers.
759 std::map
<Offset
, std::string
> adataUses
;
761 // In whole program mode it isn't possible to lookup a litstr in the global
762 // table while emitting, so keep a lookaside of litstrs seen by the assembler.
763 std::unordered_map
<Id
, const StringData
*> litstrMap
;
765 // When inside a class, this state is active.
766 PreClassEmitter
* pce
{nullptr};
768 // When inside a record, this state is active.
769 RecordEmitter
* re
{nullptr};
771 // When we're doing a function or method body, this state is active.
772 FuncEmitter
* fe
{nullptr};
773 std::map
<std::string
,Label
> labelMap
;
774 bool numItersSet
{false};
775 bool enumTySet
{false};
776 StackDepth initStackDepth
;
777 StackDepth
* currentStackDepth
{&initStackDepth
};
778 std::vector
<std::unique_ptr
<StackDepth
>> unnamedStackDepths
;
779 int minStackDepth
{0};
781 std::set
<std::string
,stdltistr
> hoistables
;
782 std::unordered_map
<uint32_t,Offset
> defClsOffsets
;
783 Location::Range srcLoc
{-1,-1,-1,-1};
784 hphp_fast_map
<SymbolRef
,
785 CompactVector
<std::string
>,
786 HashSymbolRef
> symbol_refs
;
787 bool wants_symbol_refs
;
790 void StackDepth::adjust(AsmState
& as
, int delta
) {
791 currentOffset
+= delta
;
794 // The absolute stack depth is unknown. We only store the min
795 // and max offsets, and we will take a decision later, when the
796 // base value will be known.
797 maxOffset
= std::max(currentOffset
, maxOffset
);
798 if (currentOffset
< minOffset
) {
799 minOffsetLine
= as
.in
.getLineNumber();
800 minOffset
= currentOffset
;
805 if (*baseValue
+ currentOffset
< 0) {
806 as
.error("opcode sequence caused stack depth to go negative");
809 as
.adjustStackHighwater(*baseValue
+ currentOffset
);
812 void StackDepth::addListener(AsmState
& as
, StackDepth
* target
) {
814 target
->setBase(as
, *baseValue
+ currentOffset
);
816 listeners
.emplace_back(target
, currentOffset
);
820 void StackDepth::setBase(AsmState
& as
, int stackDepth
) {
821 if (baseValue
&& stackDepth
!= *baseValue
) {
822 as
.error("stack depth {} does not match base value {}",
823 stackDepth
, *baseValue
);
826 baseValue
= stackDepth
;
828 // We finally know the base value. Update AsmState accordingly.
829 if (*baseValue
+ minOffset
< 0) {
830 throw AssemblerError(
832 "opcode sequence caused stack depth to go negative"
835 as
.adjustStackHighwater(*baseValue
+ maxOffset
);
837 // Update the listeners
838 auto l
= std::move(listeners
);
839 // We won't need them anymore
842 kv
.first
->setBase(as
, *baseValue
+ kv
.second
);
846 void StackDepth::setCurrentAbsolute(AsmState
& as
, int stackDepth
) {
847 setBase(as
, stackDepth
- currentOffset
);
851 decltype(auto) suppressOOM(F func
) {
852 MemoryManager::SuppressOOM
so(*tl_heap
);
856 //////////////////////////////////////////////////////////////////////
859 * Opcode arguments must be on the same line as the opcode itself,
860 * although certain argument types may contain internal newlines (see,
861 * for example, read_jmpvector or string literals).
863 template<class Target
> Target
read_opcode_arg(AsmState
& as
) {
864 as
.in
.skipSpaceTab();
866 as
.in
.consumePred(!boost::is_any_of(" \t\r\n#;>"),
867 std::back_inserter(strVal
));
868 if (strVal
.empty()) {
869 as
.error("expected opcode or directive argument");
872 return folly::to
<Target
>(strVal
);
873 } catch (std::range_error
&) {
874 as
.error("couldn't convert input argument (" + strVal
+ ") to "
880 template<class SubOpType
>
881 uint8_t read_subop(AsmState
& as
) {
882 auto const str
= read_opcode_arg
<std::string
>(as
);
883 if (auto const ty
= nameToSubop
<SubOpType
>(str
.c_str())) {
884 return static_cast<uint8_t>(*ty
);
886 as
.error("unknown subop name");
890 const StringData
* read_litstr(AsmState
& as
) {
891 as
.in
.skipSpaceTab();
893 if (!as
.in
.readQuotedStr(strVal
)) {
894 as
.error("expected quoted string literal");
896 return makeStaticString(strVal
);
900 * maybe-string-literal : N
904 const StringData
* read_maybe_litstr(AsmState
& as
) {
905 as
.in
.skipSpaceTab();
906 if (as
.in
.peek() == 'N') {
910 return read_litstr(as
);
913 std::vector
<std::string
> read_strvector(AsmState
& as
) {
914 std::vector
<std::string
> ret
;
915 as
.in
.skipSpaceTab();
918 while (as
.in
.skipSpaceTab(), as
.in
.readQuotedStr(name
)) {
921 as
.in
.skipSpaceTab();
926 Variant
parse_php_serialized(folly::StringPiece
,
927 VariableSerializer::DVOverrides
*);
929 std::pair
<ArrayData
*, std::string
> read_litarray(AsmState
& as
) {
930 as
.in
.skipSpaceTab();
931 if (as
.in
.getc() != '@') {
932 as
.error("expecting an `@foo' array literal reference");
935 if (!as
.in
.readword(name
)) {
936 as
.error("expected name of .adata literal");
939 auto adata
= [&]() -> ArrayData
* {
940 auto const it
= as
.adataMap
.find(name
);
941 if (it
!= as
.adataMap
.end()) return it
->second
.first
;
942 auto const decl
= as
.adataDecls
.find(name
);
943 if (decl
== as
.adataDecls
.end()) return nullptr;
944 auto& buf
= decl
->second
;
945 return suppressOOM([&] {
946 VariableSerializer::DVOverrides overrides
;
947 auto var
= parse_php_serialized(
949 RuntimeOption::EvalHackArrDVArrs
? &overrides
: nullptr
951 if (!var
.isArray()) {
952 as
.error(".adata only supports serialized arrays");
955 auto data
= var
.detach().m_data
.parr
;
956 ArrayData::GetScalarArray(&data
);
957 as
.adataMap
[name
] = std::make_pair(data
, std::move(overrides
));
958 as
.adataDecls
.erase(decl
);
963 if (!adata
) as
.error("unknown array data literal name " + name
);
965 return {adata
, std::move(name
)};
968 RepoAuthType
read_repo_auth_type(AsmState
& as
) {
969 auto const str
= read_opcode_arg
<std::string
>(as
);
970 folly::StringPiece
parse(str
);
973 * Note: no support for reading array types. (The assembler only
974 * emits a single unit, so it can't really be involved in creating a
978 using T
= RepoAuthType::Tag
;
980 #define X(what, tag) \
981 if (parse.startsWith(what)) return RepoAuthType{tag}
983 #define Y(what, tag) \
984 if (parse.startsWith(what)) { \
985 parse.removePrefix(what); \
986 auto const cls = makeStaticString(parse.data()); \
987 as.ue->mergeLitstr(cls); \
988 return RepoAuthType{tag, cls}; \
991 Y("Obj=", T::ExactObj
);
992 Y("?Obj=", T::OptExactObj
);
993 Y("?Obj<=", T::OptSubObj
);
994 Y("Obj<=", T::SubObj
);
995 Y("Cls=", T::ExactCls
);
996 Y("?Cls=", T::OptExactCls
);
997 Y("?Cls<=", T::OptSubCls
);
998 Y("Cls<=", T::SubCls
);
1000 X("?Arr", T::OptArr
);
1002 X("?VArr", T::OptVArr
);
1004 X("?DArr", T::OptDArr
);
1006 X("?Vec", T::OptVec
);
1008 X("?Dict", T::OptDict
);
1009 X("Keyset", T::Keyset
);
1010 X("?Keyset", T::OptKeyset
);
1012 X("?Bool", T::OptBool
);
1015 X("?Dbl", T::OptDbl
);
1017 X("InitCell", T::InitCell
);
1018 X("InitGen", T::InitGen
);
1019 X("InitNull", T::InitNull
);
1020 X("InitUnc", T::InitUnc
);
1022 X("?Int", T::OptInt
);
1025 X("?Obj", T::OptObj
);
1027 X("?Func", T::OptFunc
);
1029 X("?Cls", T::OptCls
);
1030 X("ClsMeth", T::ClsMeth
);
1031 X("?ClsMeth", T::OptClsMeth
);
1032 X("Record", T::Record
);
1033 X("?Record", T::OptRecord
);
1035 X("?Res", T::OptRes
);
1037 X("?SArr", T::OptSArr
);
1039 X("?SVArr", T::OptSVArr
);
1040 X("SVArr", T::SVArr
);
1041 X("?SDArr", T::OptSDArr
);
1042 X("SDArr", T::SDArr
);
1043 X("?SVec", T::OptSVec
);
1045 X("?SDict", T::OptSDict
);
1046 X("SDict", T::SDict
);
1047 X("?SKeyset", T::OptSKeyset
);
1048 X("SKeyset", T::SKeyset
);
1049 X("?SStr", T::OptSStr
);
1051 X("?Str", T::OptStr
);
1054 X("?UncArrKey", T::OptUncArrKey
);
1055 X("?ArrKey", T::OptArrKey
);
1056 X("UncArrKey",T::UncArrKey
);
1057 X("ArrKey", T::ArrKey
);
1058 X("?UncStrLike",T::OptUncStrLike
);
1059 X("?StrLike",T::OptStrLike
);
1060 X("UncStrLike",T::UncStrLike
);
1061 X("StrLike",T::StrLike
);
1062 X("Uninit", T::Uninit
);
1067 // Make sure the above parsing code is revisited when new tags are
1068 // added (we'll get a warning for a missing case label):
1069 if (debug
) switch (RepoAuthType
{}.tag()) {
1121 case T::OptUncArrKey
:
1125 case T::OptUncStrLike
:
1136 case T::OptExactObj
:
1140 case T::OptExactCls
:
1145 as
.error("unrecognized RepoAuthType format");
1149 // Read a vector of IVAs, with format <int, int, int, ...>, the vector may be
1150 // excluded entirely if it is empty.
1151 std::vector
<uint32_t> read_argv32(AsmState
& as
) {
1152 as
.in
.skipSpaceTab();
1153 if (as
.in
.peek() != '<') return {};
1156 std::vector
<uint32_t> result
;
1158 auto const num
= as
.in
.readint();
1159 if (num
< 0) as
.error("Was expecting a positive integer");
1160 result
.push_back(num
);
1161 as
.in
.skipWhitespace();
1162 if (as
.in
.peek() == '>') break;
1163 as
.in
.expectWs(',');
1165 as
.in
.expectWs('>');
1170 // Read in a vector of iterators the format for this vector is:
1171 // <(TYPE) ID LOCAL?, (TYPE) ID LOCAL?, ...>
1172 // Where TYPE := Iter | LIter
1173 // and ID := Integer
1174 // and LOCAL := String (only valid when TYPE = LIter)
1175 IterTable
read_iter_table(AsmState
& as
) {
1178 as
.in
.skipSpaceTab();
1185 as
.in
.expectWs('(');
1186 if (!as
.in
.readword(word
)) as
.error("Was expecting Iterator type.");
1187 if (!word
.compare("Iter")) ent
.kind
= KindOfIter
;
1188 else if (!word
.compare("LIter")) ent
.kind
= KindOfLIter
;
1189 else as
.error("Unknown iterator type `" + word
+ "'");
1190 as
.in
.expectWs(')');
1192 as
.in
.skipSpaceTab();
1194 if (!as
.in
.readword(word
)) as
.error("Was expecting iterator id.");
1195 ent
.id
= as
.getIterId(folly::to
<uint32_t>(word
));
1197 if (ent
.kind
== KindOfLIter
) {
1198 as
.in
.skipSpaceTab();
1199 if (!as
.in
.readword(word
)) as
.error("Was expecting local.");
1200 ent
.local
= as
.getLocalId(word
);
1202 ent
.local
= kInvalidId
;
1205 ret
.push_back(std::move(ent
));
1207 if (!isdigit(word
.back())) {
1208 if (word
.back() == '>') break;
1209 if (word
.back() != ',') as
.error("Was expecting `,'.");
1211 as
.in
.skipSpaceTab();
1212 if (as
.in
.peek() == '>') { as
.in
.getc(); break; }
1220 // Jump tables are lists of labels.
1221 std::vector
<std::string
> read_jmpvector(AsmState
& as
) {
1222 std::vector
<std::string
> ret
;
1224 as
.in
.skipSpaceTab();
1228 while (as
.in
.readword(word
)) {
1229 ret
.push_back(word
);
1231 as
.in
.expectWs('>');
1236 typedef std::vector
<std::pair
<Id
, std::string
>> SSwitchJmpVector
;
1238 SSwitchJmpVector
read_sswitch_jmpvector(AsmState
& as
) {
1239 SSwitchJmpVector ret
;
1241 as
.in
.skipSpaceTab();
1244 std::string defLabel
;
1246 std::string caseStr
;
1247 if (!as
.in
.readQuotedStr(caseStr
)) {
1248 as
.error("expected quoted string literal");
1253 as
.in
.readword(defLabel
);
1256 as
.ue
->mergeLitstr(makeStaticString(caseStr
)),
1260 as
.in
.skipWhitespace();
1261 } while (as
.in
.peek() != '-');
1265 as
.in
.readword(defLabel
);
1267 // -1 stand for default case.
1268 ret
.emplace_back(-1, defLabel
);
1275 MemberKey
read_member_key(AsmState
& as
) {
1276 as
.in
.skipWhitespace();
1279 if (!as
.in
.readword(word
)) as
.error("expected member code");
1281 auto optMcode
= parseMemberCode(word
.c_str());
1282 if (!optMcode
) as
.error("unrecognized member code `" + word
+ "'");
1284 auto const mcode
= *optMcode
;
1285 if (mcode
!= MW
&& as
.in
.getc() != ':') {
1286 as
.error("expected `:' after member code `" + word
+ "'");
1292 case MEL
: case MPL
: {
1294 if (!as
.in
.readword(name
)) {
1295 as
.error("couldn't read name for local variable in member key");
1297 return MemberKey
{mcode
, as
.getLocalId(name
)};
1300 return MemberKey
{mcode
, read_opcode_arg
<int32_t>(as
)};
1302 return MemberKey
{mcode
, read_opcode_arg
<int64_t>(as
)};
1303 case MET
: case MPT
: case MQT
:
1304 return MemberKey
{mcode
, read_litstr(as
)};
1309 LocalRange
read_local_range(AsmState
& as
) {
1310 auto first
= read_opcode_arg
<std::string
>(as
);
1311 if (first
.size() > 2 && first
[0] == 'L' && first
[1] == ':') {
1312 first
= "_" + first
.substr(2);
1314 auto const pos
= first
.find('+');
1315 if (pos
== std::string::npos
) as
.error("expecting `+' in local range");
1316 auto const rest
= first
.substr(pos
+ 1);
1317 first
= first
.substr(0, pos
);
1318 auto const count
= folly::to
<uint32_t>(rest
);
1319 if (!count
) return LocalRange
{0, 0};
1320 auto const firstLoc
= as
.getLocalId(first
);
1321 if (firstLoc
+ count
- 1 > as
.maxUnnamed
) {
1322 as
.maxUnnamed
= firstLoc
+ count
- 1;
1324 return LocalRange
{uint32_t(firstLoc
), count
};
1327 std::pair
<FCallArgs::Flags
, bool>
1328 read_fcall_flags(AsmState
& as
, Op thisOpcode
) {
1330 bool lockWhileUnwinding
= false;
1332 as
.in
.skipSpaceTab();
1336 while (as
.in
.readword(flag
)) {
1337 if (flag
== "SupportsAER") {
1338 if (thisOpcode
== Op::FCallCtor
) {
1339 as
.error("FCall flag SupportsAER is not valid for FCallCtor");
1341 flags
|= FCallArgs::SupportsAsyncEagerReturn
;
1345 if (flag
== "LockWhileUnwinding") {
1346 if (thisOpcode
== Op::FCallCtor
) {
1347 lockWhileUnwinding
= true;
1350 as
.error("FCall flag LockWhileUnwinding is only valid for FCallCtor");
1353 if (flag
== "Unpack") { flags
|= FCallArgs::HasUnpack
; continue; }
1354 if (flag
== "Generics") { flags
|= FCallArgs::HasGenerics
; continue; }
1355 as
.error("unrecognized FCall flag `" + flag
+ "'");
1357 as
.in
.expectWs('>');
1359 return std::make_pair(static_cast<FCallArgs::Flags
>(flags
),
1360 lockWhileUnwinding
);
1363 // Read a vector of booleans formatted as a quoted string of '0' and '1'.
1364 std::unique_ptr
<uint8_t[]> read_by_refs(AsmState
& as
, uint32_t numArgs
) {
1365 as
.in
.skipSpaceTab();
1367 if (!as
.in
.readQuotedStr(strVal
)) {
1368 as
.error("expected quoted string literal");
1371 if (strVal
.empty()) return nullptr;
1372 if (strVal
.length() != numArgs
) {
1373 as
.error("reffiness vector must be either empty or match number of args");
1376 auto result
= std::make_unique
<uint8_t[]>((numArgs
+ 7) / 8);
1377 for (auto i
= 0; i
< numArgs
; ++i
) {
1378 auto const c
= strVal
[i
];
1379 if (c
!= '0' && c
!= '1') as
.error("Was expecting a boolean (0 or 1)");
1380 result
[i
/ 8] |= (c
== '1' ? 1 : 0) << (i
% 8);
1386 std::tuple
<FCallArgsBase
, std::unique_ptr
<uint8_t[]>, std::string
>
1387 read_fcall_args(AsmState
& as
, Op thisOpcode
) {
1388 FCallArgs::Flags flags
;
1389 bool lockWhileUnwinding
;
1390 std::tie(flags
, lockWhileUnwinding
) = read_fcall_flags(as
, thisOpcode
);
1391 auto const numArgs
= read_opcode_arg
<uint32_t>(as
);
1392 auto const numRets
= read_opcode_arg
<uint32_t>(as
);
1393 auto byRefs
= read_by_refs(as
, numArgs
);
1394 auto asyncEagerLabel
= read_opcode_arg
<std::string
>(as
);
1395 return std::make_tuple(
1396 FCallArgsBase(flags
, numArgs
, numRets
, lockWhileUnwinding
),
1398 std::move(asyncEagerLabel
)
1402 Id
create_litstr_id(AsmState
& as
) {
1403 auto const sd
= read_litstr(as
);
1404 auto const id
= as
.ue
->mergeLitstr(sd
);
1405 as
.litstrMap
.emplace(id
, sd
);
1409 //////////////////////////////////////////////////////////////////////
1411 std::map
<std::string
,ParserFunc
> opcode_parsers
;
1414 #define IMM_ONE(t) IMM_##t
1415 #define IMM_TWO(t1, t2) IMM_ONE(t1); ++immIdx; IMM_##t2
1416 #define IMM_THREE(t1, t2, t3) IMM_TWO(t1, t2); ++immIdx; IMM_##t3
1417 #define IMM_FOUR(t1, t2, t3, t4) IMM_THREE(t1, t2, t3); ++immIdx; IMM_##t4
1418 #define IMM_FIVE(t1, t2, t3, t4, t5) IMM_FOUR(t1, t2, t3, t4); ++immIdx; IMM_##t5
1419 #define IMM_SIX(t1, t2, t3, t4, t5, t6) IMM_FIVE(t1, t2, t3, t4, t5); ++immIdx; IMM_##t6
1421 // Some bytecodes need to know an iva imm for (PUSH|POP)_*.
1422 #define IMM_IVA do { \
1423 auto imm = read_opcode_arg<uint32_t>(as); \
1424 as.ue->emitIVA(imm); \
1425 immIVA[immIdx] = imm; \
1429 std::vector<std::string> vecImm = read_strvector(as); \
1430 auto const vecImmStackValues = vecImm.size(); \
1431 as.ue->emitIVA(vecImmStackValues); \
1432 for (size_t i = 0; i < vecImmStackValues; ++i) { \
1433 as.ue->emitInt32(as.ue->mergeLitstr(String(vecImm[i]).get())); \
1436 #define IMM_SA as.ue->emitInt32(create_litstr_id(as))
1437 #define IMM_RATA encodeRAT(*as.ue, read_repo_auth_type(as))
1438 #define IMM_I64A as.ue->emitInt64(read_opcode_arg<int64_t>(as))
1439 #define IMM_DA as.ue->emitDouble(read_opcode_arg<double>(as))
1440 #define IMM_LA as.ue->emitIVA(as.getLocalId( \
1441 read_opcode_arg<std::string>(as)))
1442 #define IMM_IA as.ue->emitIVA(as.getIterId( \
1443 read_opcode_arg<int32_t>(as)))
1444 #define IMM_OA(ty) as.ue->emitByte(read_subop<ty>(as));
1445 #define IMM_LAR encodeLocalRange(*as.ue, read_local_range(as))
1446 #define IMM_FCA do { \
1447 auto const fca = read_fcall_args(as, thisOpcode); \
1449 *as.ue, std::get<0>(fca), std::get<1>(fca).get(), \
1450 std::get<2>(fca) != "-", \
1452 labelJumps.emplace_back(std::get<2>(fca), as.ue->bcPos()); \
1453 as.ue->emitInt32(0); \
1456 immFCA = std::get<0>(fca); \
1459 // Record the offset of the immediate so that we can correlate it with its
1460 // associated adata later.
1461 #define IMM_AA do { \
1462 auto const p = read_litarray(as); \
1463 auto const pos = as.ue->bcPos(); \
1464 as.ue->emitInt32(as.ue->mergeArray(p.first)); \
1465 as.adataUses[pos] = std::move(p.second); \
1469 * There can currently be no more than one immvector per instruction,
1470 * and we need access to the size of the immediate vector for
1471 * NUM_POP_*, so the member vector guy exposes a vecImmStackValues
1474 #define IMM_ILA do { \
1475 auto const immTable = read_iter_table(as); \
1476 as.ue->emitIVA(immTable.size()); \
1477 for (auto const& it : immTable) { \
1478 as.ue->emitIVA(it.kind); \
1479 as.ue->emitIVA(it.id); \
1480 if (it.kind == KindOfLIter) { \
1481 as.ue->emitIVA(it.local); \
1486 #define IMM_I32LA do { \
1487 std::vector<uint32_t> vecImm = read_argv32(as); \
1488 as.ue->emitIVA(vecImm.size()); \
1489 for (auto i : vecImm) { \
1490 as.ue->emitInt32(i); \
1494 #define IMM_BLA do { \
1495 std::vector<std::string> vecImm = read_jmpvector(as); \
1496 as.ue->emitIVA(vecImm.size()); \
1497 for (auto const& imm : vecImm) { \
1498 labelJumps.emplace_back(imm, as.ue->bcPos()); \
1499 as.ue->emitInt32(0); /* to be patched */ \
1503 #define IMM_SLA do { \
1504 auto vecImm = read_sswitch_jmpvector(as); \
1505 as.ue->emitIVA(vecImm.size()); \
1506 for (auto const& pair : vecImm) { \
1507 as.ue->emitInt32(pair.first); \
1508 labelJumps.emplace_back(pair.second, as.ue->bcPos()); \
1509 as.ue->emitInt32(0); /* to be patched */ \
1513 #define IMM_BA do { \
1514 labelJumps.emplace_back( \
1515 read_opcode_arg<std::string>(as), \
1518 as.ue->emitInt32(0); \
1521 #define IMM_KA encode_member_key(read_member_key(as), *as.ue)
1523 #define NUM_PUSH_NOV 0
1524 #define NUM_PUSH_ONE(a) 1
1525 #define NUM_PUSH_TWO(a,b) 2
1526 #define NUM_PUSH_THREE(a,b,c) 3
1527 #define NUM_PUSH_CMANY immIVA[0]
1528 #define NUM_PUSH_FCALL immFCA.numRets
1529 #define NUM_PUSH_CALLNATIVE (immIVA[2] + 1)
1530 #define NUM_POP_NOV 0
1531 #define NUM_POP_ONE(a) 1
1532 #define NUM_POP_TWO(a,b) 2
1533 #define NUM_POP_THREE(a,b,c) 3
1534 #define NUM_POP_MFINAL immIVA[0]
1535 #define NUM_POP_C_MFINAL(n) (immIVA[0] + n)
1536 #define NUM_POP_CUMANY immIVA[0] /* number of arguments */
1537 #define NUM_POP_CMANY_U3 immIVA[0] + 3
1538 #define NUM_POP_CALLNATIVE (immIVA[0] + immIVA[2]) /* number of args + nout */
1539 #define NUM_POP_FCALL(nin, nobj) (nin + immFCA.numInputs() + 2 + immFCA.numRets)
1540 #define NUM_POP_CMANY immIVA[0] /* number of arguments */
1541 #define NUM_POP_SMANY vecImmStackValues
1543 #define O(name, imm, pop, push, flags) \
1544 void parse_opcode_##name(AsmState& as) { \
1545 UNUSED auto immFCA = FCallArgsBase(FCallArgsBase::None, -1, -1, \
1547 UNUSED uint32_t immIVA[kMaxHhbcImms]; \
1548 UNUSED auto const thisOpcode = Op::name; \
1549 UNUSED const Offset curOpcodeOff = as.ue->bcPos(); \
1550 std::vector<std::pair<std::string, Offset> > labelJumps; \
1555 as.in.getLineNumber(), \
1556 as.displayStackDepth().c_str(), \
1560 /* Pretend the stack is reachable and empty, same as hphpc */ \
1561 if (as.currentStackDepth == nullptr) { \
1562 as.enterReachableRegion(0); \
1565 as.ue->emitOp(Op##name); \
1567 UNUSED size_t immIdx = 0; \
1570 as.adjustStack(-NUM_POP_##pop); \
1572 if (thisOpcode == OpMemoGet) { \
1573 /* MemoGet pushes after branching */ \
1574 assertx(labelJumps.size() == 1); \
1576 labelJumps[0].first, labelJumps[0].second, curOpcodeOff \
1578 as.adjustStack(NUM_PUSH_##push); \
1579 } else if (thisOpcode == OpMemoGetEager) { \
1580 /* MemoGetEager pushes on its second branch only */ \
1581 assertx(labelJumps.size() == 2); \
1583 labelJumps[0].first, labelJumps[0].second, curOpcodeOff \
1585 as.adjustStack(NUM_PUSH_##push); \
1587 labelJumps[1].first, labelJumps[1].second, curOpcodeOff \
1590 /* Everything else pushes before branching */ \
1591 as.adjustStack(NUM_PUSH_##push); \
1592 for (auto& kv : labelJumps) { \
1593 as.addLabelJump(kv.first, kv.second, curOpcodeOff); \
1597 /* FCalls with unpack perform their own bounds checking. */ \
1598 if (isFCall(Op##name) && !immFCA.hasUnpack()) { \
1599 as.fe->containsCalls = true; \
1602 /* Stack depth should be 0 after RetC or RetM. */ \
1603 if (thisOpcode == OpRetC || thisOpcode == OpRetCSuspended || \
1604 thisOpcode == OpRetM) { \
1605 as.enforceStackDepth(0); \
1608 /* Stack depth should be 1 after resume from suspend. */ \
1609 if (thisOpcode == OpCreateCont || thisOpcode == OpAwait || \
1610 thisOpcode == OpYield || thisOpcode == OpYieldK || \
1611 thisOpcode == OpYieldFromDelegate) { \
1612 as.enforceStackDepth(1); \
1615 /* Record source location. */ \
1616 as.ue->recordSourceLocation(as.srcLoc, curOpcodeOff); \
1618 if (Op##name == OpDefCls || Op##name == OpDefClsNop) { \
1619 as.defClsOffsets.emplace(immIVA[0], curOpcodeOff); \
1622 /* Retain stack depth after calls to exit */ \
1623 if ((instrFlags(thisOpcode) & InstrFlags::TF) && \
1624 (Op##name != OpExit)) { \
1625 as.enterUnreachableRegion(); \
1655 #undef NUM_PUSH_THREE
1656 #undef NUM_PUSH_CMANY
1657 #undef NUM_PUSH_FCALL
1658 #undef NUM_PUSH_CALLNATIVE
1662 #undef NUM_POP_THREE
1663 #undef NUM_POP_MFINAL
1664 #undef NUM_POP_C_MFINAL
1665 #undef NUM_POP_CUMANY
1666 #undef NUM_POP_CMANY_U3
1667 #undef NUM_POP_CALLNATIVE
1668 #undef NUM_POP_FCALL
1669 #undef NUM_POP_CMANY
1670 #undef NUM_POP_SMANY
1672 void initialize_opcode_map() {
1673 #define O(name, imm, pop, push, flags) \
1674 opcode_parsers[#name] = parse_opcode_##name;
1679 struct Initializer
{
1680 Initializer() { initialize_opcode_map(); }
1683 //////////////////////////////////////////////////////////////////////
1685 std::vector
<char> parse_long_string_raw(AsmState
& as
) {
1686 as
.in
.skipWhitespace();
1688 std::vector
<char> buffer
;
1689 if (!as
.in
.readLongString(buffer
)) {
1690 as
.error("expected \"\"\"-string of serialized php data");
1692 if (buffer
.empty()) {
1693 as
.error("empty php serialized data is not a valid php object");
1696 // String wants a null, and dereferences one past the size we give
1698 buffer
.push_back('\0');
1704 * long-string-literal: <string>
1706 * `long-string-literal' is a python-style longstring. See
1707 * readLongString for more details.
1709 String
parse_long_string(AsmState
& as
) {
1710 auto buffer
= parse_long_string_raw(as
);
1711 return String(&buffer
[0], buffer
.size() - 1, CopyString
);
1715 * maybe-long-string-literal : long-string-literal
1719 String
parse_maybe_long_string(AsmState
& as
) {
1720 as
.in
.skipWhitespace();
1722 std::vector
<char> buffer
;
1723 if (!as
.in
.readLongString(buffer
)) {
1724 return StrNR(staticEmptyString());
1726 if (buffer
.empty()) {
1727 return StrNR(staticEmptyString());
1730 // String wants a null, and dereferences one past the size we give
1732 buffer
.push_back('\0');
1733 return String(&buffer
[0], buffer
.size() - 1, CopyString
);
1736 void checkSize(TypedValue tv
, size_t& available
) {
1737 auto const update
= [&] (size_t sz
) {
1738 if (sz
> available
) {
1739 throw AssemblerFatal("Maximum allowable size of scalar exceeded");
1744 if (isArrayLikeType(type(tv
))) {
1745 update(allocSize(val(tv
).parr
));
1747 IterateKVNoInc(val(tv
).parr
, [&] (Cell k
, TypedValue v
) {
1748 if (isStringType(type(k
))) {
1749 update(val(k
).pstr
->heapSize());
1751 checkSize(v
, available
);
1755 if (isStringType(type(tv
))) {
1756 update(val(tv
).pstr
->heapSize());
1760 Variant
checkSize(Variant val
) {
1761 size_t avail
= RuntimeOption::EvalAssemblerMaxScalarSize
;
1762 checkSize(*val
.asTypedValue(), avail
);
1767 * php-serialized : long-string-literal
1770 * `long-string-literal' is a python-style longstring. See
1771 * readLongString for more details.
1773 * Returns a Variant representing the serialized data. It's up to the
1774 * caller to make sure it is a legal literal.
1776 Variant
parse_php_serialized(
1777 folly::StringPiece str
,
1778 VariableSerializer::DVOverrides
* overrides
= nullptr
1780 VariableUnserializer
vu(
1783 VariableUnserializer::Type::Internal
,
1786 if (overrides
) vu
.setDVOverrides(overrides
);
1788 return checkSize(vu
.unserialize());
1789 } catch (const FatalErrorException
&) {
1791 } catch (const AssemblerFatal
&) {
1793 } catch (const std::exception
& e
) {
1795 folly::sformat("AssemblerUnserializationError: {}", e
.what());
1796 throw AssemblerUnserializationError(msg
);
1800 Variant
parse_php_serialized(
1802 VariableSerializer::DVOverrides
* overrides
= nullptr
1804 auto str
= parse_long_string(as
);
1805 return parse_php_serialized(str
.slice(), overrides
);
1809 * maybe-php-serialized : maybe-long-string-literal
1812 Variant
parse_maybe_php_serialized(AsmState
& as
) {
1813 auto s
= parse_maybe_long_string(as
);
1816 return unserialize_from_string(s
, VariableUnserializer::Type::Internal
);
1817 } catch (const FatalErrorException
&) {
1819 } catch (const AssemblerFatal
&) {
1821 } catch (const std::exception
& e
) {
1823 folly::sformat("AssemblerUnserializationError: {}", e
.what());
1824 throw AssemblerUnserializationError(msg
);
1831 * directive-numiters : integer ';'
1834 void parse_numiters(AsmState
& as
) {
1835 if (as
.numItersSet
) {
1836 as
.error("only one .numiters directive may appear in a given function");
1838 int32_t count
= read_opcode_arg
<int32_t>(as
);
1839 as
.numItersSet
= true;
1840 as
.fe
->setNumIterators(count
);
1841 as
.in
.expectWs(';');
1845 * directive-declvars : var-name* ';'
1848 * Variables are usually allocated when first seen, but
1849 * declvars can be used to preallocate varibles for when
1850 * the exact assignment matters (like for closures).
1852 void parse_declvars(AsmState
& as
) {
1854 as
.in
.skipWhitespace();
1856 if (as
.in
.readQuotedStr(var
) || as
.in
.readword(var
)) {
1863 as
.in
.expectWs(';');
1866 void parse_function_body(AsmState
&, int nestLevel
= 0);
1869 * directive-catch : identifier integer? '{' function-body
1872 void parse_catch(AsmState
& as
, int nestLevel
) {
1873 const Offset start
= as
.ue
->bcPos();
1876 if (!as
.in
.readword(label
)) {
1877 as
.error("expected label name after .try_catch");
1880 as
.in
.skipWhitespace();
1881 if (as
.in
.peek() != '{') {
1882 iterId
= read_opcode_arg
<int32_t>(as
);
1884 as
.in
.expectWs('{');
1885 parse_function_body(as
, nestLevel
+ 1);
1887 auto& eh
= as
.fe
->addEHEnt();
1889 eh
.m_past
= as
.ue
->bcPos();
1890 eh
.m_iterId
= iterId
;
1891 eh
.m_end
= kInvalidOffset
;
1893 as
.addLabelEHEnt(label
, as
.fe
->ehtab
.size() - 1);
1897 * directive-try-catch : integer? '{' function-body ".catch" '{' function-body
1900 void parse_try_catch(AsmState
& as
, int nestLevel
) {
1901 const Offset start
= as
.ue
->bcPos();
1904 as
.in
.skipWhitespace();
1905 if (as
.in
.peek() != '{') {
1906 iterId
= read_opcode_arg
<int32_t>(as
);
1910 as
.in
.expectWs('{');
1911 parse_function_body(as
, nestLevel
+ 1);
1912 if (!as
.isUnreachable()) {
1913 as
.error("expected .try region to not fall-thru");
1916 const Offset handler
= as
.ue
->bcPos();
1919 as
.enterReachableRegion(0);
1921 as
.enforceStackDepth(1);
1924 as
.in
.skipWhitespace();
1925 if (!as
.in
.readword(word
) || word
!= ".catch") {
1926 as
.error("expected .catch directive after .try");
1928 as
.in
.skipWhitespace();
1929 as
.in
.expectWs('{');
1930 parse_function_body(as
, nestLevel
+ 1);
1932 const Offset end
= as
.ue
->bcPos();
1934 auto& eh
= as
.fe
->addEHEnt();
1936 eh
.m_past
= handler
;
1937 eh
.m_iterId
= iterId
;
1938 eh
.m_handler
= handler
;
1943 * directive-srcloc : line_no ':' chr_no ',' line_no ':' chr_no ';'
1950 * Record that subsequent bytecodes are at the source location indicated by the
1951 * range of inline numbers and character positions specified.
1953 void parse_srcloc(AsmState
& as
, int /*nestLevel*/) {
1954 auto const line0
= as
.in
.readint();
1955 as
.in
.expectWs(':');
1956 auto const char0
= as
.in
.readint();
1957 as
.in
.expectWs(',');
1958 auto const line1
= as
.in
.readint();
1959 as
.in
.expectWs(':');
1960 auto const char1
= as
.in
.readint();
1961 as
.in
.expectWs(';');
1963 as
.srcLoc
= Location::Range(line0
, char0
, line1
, char1
);
1967 * directive-doccomment : long-string-literal ';'
1971 void parse_func_doccomment(AsmState
& as
) {
1972 auto const doc
= parse_long_string(as
);
1973 as
.in
.expectWs(';');
1975 as
.fe
->docComment
= makeDocComment(doc
);
1979 * fixup_default_values: This function does a *rough* match of the default value
1980 * initializers for a function and attempts to construct corresponding default
1981 * TypedValues for them. It will also attempt to normalize the phpCode using a
1982 * variable serializer.
1984 void fixup_default_values(AsmState
& as
, FuncEmitter
* fe
) {
1985 using Atom
= BCPattern::Atom
;
1986 using Captures
= BCPattern::CaptureVec
;
1988 auto end
= as
.ue
->bc() + fe
->past
;
1989 for (uint32_t paramIdx
= 0; paramIdx
< fe
->params
.size(); ++paramIdx
) {
1990 auto& pi
= fe
->params
[paramIdx
];
1991 if (!pi
.hasDefaultValue() || pi
.funcletOff
== kInvalidOffset
) continue;
1992 auto inst
= as
.ue
->bc() + pi
.funcletOff
;
1994 // Check that the DV intitializer is actually setting the local for the
1995 // parameter being initialized.
1996 auto checkloc
= [&] (PC pc
, const Captures
&) {
1997 auto const UNUSED op
= decode_op(pc
);
1998 assertx(op
== OpSetL
|| op
== OpPopL
);
1999 auto const loc
= decode_iva(pc
);
2000 return loc
== paramIdx
;
2003 // Look for DV initializers which push a primitive value onto the stack and
2004 // then immediately use it to set the parameter local and pop it from the
2005 // stack. Currently the following relatively limited sequences are accepted:
2007 // Int | String | Double | Null | True | False | Array | Dict | Keyset | Vec
2008 // SetL loc, PopC | PopL loc
2009 auto result
= BCPattern
{
2011 Atom(OpInt
), Atom(OpString
), Atom(OpDouble
), Atom(OpNull
), Atom(OpTrue
),
2012 Atom(OpFalse
), Atom(OpArray
), Atom(OpDict
), Atom(OpVec
), Atom(OpKeyset
)
2015 Atom(OpPopL
).onlyif(checkloc
),
2016 Atom::seq(Atom(OpSetL
).onlyif(checkloc
), Atom(OpPopC
))
2018 }.ignore({OpAssertRATL
, OpAssertRATStk
}).matchAnchored(inst
, end
);
2020 // Verify that the pattern we matched is either for the last DV initializer,
2021 // in which case it must end with a JmpNS that targets the function entry,
2022 // or is immediately followed by the next DV initializer.
2023 if (!result
.found() || result
.getEnd() >= end
) continue;
2024 auto pc
= result
.getEnd();
2025 auto off
= pc
- as
.ue
->bc();
2026 auto const valid
= [&] {
2027 for (uint32_t next
= paramIdx
+ 1; next
< fe
->params
.size(); ++next
) {
2028 auto& npi
= fe
->params
[next
];
2029 if (!npi
.hasDefaultValue() || npi
.funcletOff
== kInvalidOffset
) {
2032 return npi
.funcletOff
== off
;
2034 auto const orig
= pc
;
2035 auto const base
= as
.ue
->bc() + fe
->base
;
2036 return decode_op(pc
) == OpJmpNS
&& orig
+ decode_raw
<Offset
>(pc
) == base
;
2038 if (!valid
) continue;
2040 // Use the captured initializer bytecode to construct the default value for
2042 auto capture
= result
.getCapture(0);
2045 TypedValue dv
= make_tv
<KindOfUninit
>();
2046 const VariableSerializer::DVOverrides
* overrides
= nullptr;
2047 SCOPE_EXIT
{ overrides
= nullptr; };
2048 auto decode_array
= [&] (DataType dt
) {
2049 auto const captureCopy
= capture
;
2050 if (auto arr
= as
.ue
->lookupArray(decode_raw
<uint32_t>(capture
))) {
2052 dv
.m_data
.parr
= const_cast<ArrayData
*>(arr
);
2053 if (RuntimeOption::EvalHackArrDVArrs
) {
2054 auto const litOffset
= captureCopy
- as
.ue
->bc();
2055 auto const it
= as
.adataUses
.find(litOffset
);
2056 assertx(it
!= as
.adataUses
.end());
2057 overrides
= &as
.adataMap
[it
->second
].second
;
2062 switch (decode_op(capture
)) {
2063 case OpNull
: dv
= make_tv
<KindOfNull
>(); break;
2064 case OpTrue
: dv
= make_tv
<KindOfBoolean
>(true); break;
2065 case OpFalse
: dv
= make_tv
<KindOfBoolean
>(false); break;
2066 case OpArray
: decode_array(KindOfPersistentArray
); break;
2067 case OpVec
: decode_array(KindOfPersistentVec
); break;
2068 case OpDict
: decode_array(KindOfPersistentDict
); break;
2069 case OpKeyset
: decode_array(KindOfPersistentKeyset
); break;
2071 dv
= make_tv
<KindOfInt64
>(decode_raw
<int64_t>(capture
));
2074 dv
= make_tv
<KindOfDouble
>(decode_raw
<double>(capture
));
2077 if (auto str
= as
.litstrMap
[decode_raw
<uint32_t>(capture
)]) {
2078 dv
= make_tv
<KindOfPersistentString
>(str
);
2082 always_assert(false);
2085 // Use the variable serializer to construct a serialized version of the
2086 // default value, matching the behavior of hphpc.
2087 if (dv
.m_type
!= KindOfUninit
) {
2088 VariableSerializer
vs(VariableSerializer::Type::PHPOutput
);
2089 if (RuntimeOption::EvalHackArrDVArrs
&& overrides
) {
2090 vs
.setDVOverrides(overrides
);
2092 auto str
= vs
.serialize(tvAsCVarRef(&dv
), true);
2093 pi
.defaultValue
= dv
;
2094 pi
.phpCode
= makeStaticString(str
.get());
2100 * function-body : fbody-line* '}'
2103 * fbody-line : ".numiters" directive-numiters
2104 * | ".declvars" directive-declvars
2105 * | ".try_fault" directive-fault
2106 * | ".try_catch" directive-catch
2107 * | ".try" directive-try-catch
2108 * | ".ismemoizewrapper"
2109 * | ".ismemoizewrapperlsb"
2110 * | ".srcloc" directive-srcloc
2111 * | ".doc" directive-doccomment
2116 * label-name : identifier ':'
2119 * opcode-line : opcode-mnemonic <junk that depends on opcode> '\n'
2122 void parse_function_body(AsmState
& as
, int nestLevel
/* = 0 */) {
2125 as
.in
.skipWhitespace();
2126 if (as
.in
.peek() == '}') {
2129 as
.finishFunction();
2134 if (!as
.in
.readword(word
)) {
2135 as
.error("unexpected directive or opcode line in function body");
2137 if (word
[0] == '.') {
2138 if (word
== ".ismemoizewrapper") {
2139 as
.fe
->isMemoizeWrapper
= true;
2140 as
.in
.expectWs(';');
2143 if (word
== ".ismemoizewrapperlsb") {
2144 as
.fe
->isMemoizeWrapper
= true;
2145 as
.fe
->isMemoizeWrapperLSB
= true;
2146 as
.in
.expectWs(';');
2149 if (word
== ".numiters") { parse_numiters(as
); continue; }
2150 if (word
== ".declvars") { parse_declvars(as
); continue; }
2151 if (word
== ".try_catch") { parse_catch(as
, nestLevel
); continue; }
2152 if (word
== ".try") { parse_try_catch(as
, nestLevel
); continue; }
2153 if (word
== ".srcloc") { parse_srcloc(as
, nestLevel
); continue; }
2154 if (word
== ".doc") { parse_func_doccomment(as
); continue; }
2155 as
.error("unrecognized directive `" + word
+ "' in function");
2157 if (as
.in
.peek() == ':') {
2159 as
.addLabelTarget(word
);
2163 // Ok, it better be an opcode now.
2164 auto it
= opcode_parsers
.find(word
);
2165 if (it
== opcode_parsers
.end()) {
2166 as
.error("unrecognized opcode `" + word
+ "'");
2170 as
.in
.skipSpaceTab();
2171 if (as
.in
.peek() != '\n' &&
2172 as
.in
.peek() != '\r' &&
2173 as
.in
.peek() != '#' &&
2174 as
.in
.peek() != EOF
) {
2175 as
.error("too many arguments for opcode `" + word
+ "'");
2180 void parse_user_attribute(AsmState
& as
,
2181 UserAttributeMap
& userAttrs
) {
2183 auto name
= read_litstr(as
);
2184 as
.in
.expectWs('(');
2186 auto var
= parse_php_serialized(as
);
2188 as
.in
.expectWs(')');
2190 if (!var
.isArray()) {
2191 as
.error("user attribute values must be arrays");
2195 RuntimeOption::EvalHackArrDVArrs
2196 ? make_tv
<KindOfVec
>(ArrayData::GetScalarArray(std::move(var
)))
2197 : make_tv
<KindOfArray
>(ArrayData::GetScalarArray(std::move(var
)));
2202 * attribute : attribute-name
2203 * | string-literal '(' long-string-literal ')'
2206 * attribute-list : empty
2207 * | '[' attribute* ']'
2210 * The `attribute-name' rule is context-sensitive; see as-shared.cpp.
2211 * The second attribute form is for user attributes and only applies
2212 * if attributeMap is non null.
2214 Attr
parse_attribute_list(AsmState
& as
, AttrContext ctx
,
2215 UserAttributeMap
*userAttrs
= nullptr,
2216 bool* isTop
= nullptr) {
2217 as
.in
.skipWhitespace();
2219 if (as
.in
.peek() != '[') return Attr(ret
);
2223 auto seen_rxl
= false;
2225 as
.in
.skipWhitespace();
2226 if (as
.in
.peek() == ']') break;
2227 if (as
.in
.peek() == '"' && userAttrs
) {
2228 parse_user_attribute(as
, *userAttrs
);
2231 if (!as
.in
.readword(word
)) break;
2233 auto const abit
= string_to_attr(ctx
, word
);
2238 if (isTop
&& word
== "nontop") {
2242 auto const rxAttrs
= rxAttrsFromAttrString(word
);
2244 if (seen_rxl
) as
.error("multiple rx attributes");
2250 as
.error("unrecognized attribute `" + word
+ "' in this context");
2258 * | '<' maybe-string-literal maybe-string-literal
2261 * type-constraint : empty
2262 * | '<' maybe-string-literal
2265 * This parses type-info if noUserType is false, type-constraint if true
2267 std::pair
<const StringData
*, TypeConstraint
> parse_type_info(
2268 AsmState
& as
, bool noUserType
= false) {
2269 as
.in
.skipWhitespace();
2270 if (as
.in
.peek() != '<') return {};
2273 const StringData
*userType
= noUserType
? nullptr : read_maybe_litstr(as
);
2274 const StringData
*typeName
= read_maybe_litstr(as
);
2277 auto flags
= TypeConstraint::NoFlags
;
2279 as
.in
.skipWhitespace();
2280 if (as
.in
.peek() == '>') break;
2281 if (!as
.in
.readword(word
)) break;
2283 auto const abit
= string_to_type_flag(word
);
2285 flags
= flags
| *abit
;
2289 as
.error("unrecognized type flag `" + word
+ "' in this context");
2292 return std::make_pair(userType
, TypeConstraint
{typeName
, flags
});
2294 TypeConstraint
parse_type_constraint(AsmState
& as
) {
2295 return parse_type_info(as
, true).second
;
2300 * parameter-list : '(' param-name-list ')'
2303 * param-name-list : empty
2304 * | param-name ',' param-name-list
2307 * param-name : '$' identifier dv-initializer
2308 * | '&' '$' identifier dv-initializer
2311 * dv-initializer : empty
2312 * | '=' identifier arg-default
2315 * arg-default : empty
2316 * | '(' long-string-literal ')'
2319 void parse_parameter_list(AsmState
& as
) {
2320 as
.in
.skipWhitespace();
2321 if (as
.in
.peek() != '(') return;
2324 bool seenVariadic
= false;
2325 bool seenRef
= false;
2328 FuncEmitter::ParamInfo param
;
2329 param
.byRef
= false;
2330 param
.inout
= false;
2332 as
.in
.skipWhitespace();
2333 int ch
= as
.in
.peek();
2334 if (ch
== ')') { as
.in
.getc(); break; } // allow empty param lists
2337 as
.error("functions can only have one variadic argument");
2340 parse_attribute_list(as
, AttrContext::Parameter
, ¶m
.userAttributes
);
2344 if (as
.in
.getc() != '.' ||
2345 as
.in
.getc() != '.') {
2346 as
.error("expecting '...'");
2349 seenVariadic
= true;
2350 param
.variadic
= true;
2351 as
.fe
->attrs
|= AttrVariadicParam
;
2354 if (as
.in
.tryConsume("inout")) {
2356 as
.error("inout parameters cannot be variadic");
2359 as
.error("functions cannot contain both inout and ref parameters");
2362 as
.fe
->attrs
|= AttrTakesInOutParams
;
2365 std::tie(param
.userType
, param
.typeConstraint
) = parse_type_info(as
);
2367 as
.in
.skipWhitespace();
2371 if (param
.variadic
) {
2372 as
.error("ref parameters cannot be variadic");
2375 as
.error("parameters cannot be marked both inout and ref");
2377 if (as
.fe
->attrs
& AttrTakesInOutParams
) {
2378 as
.error("functions cannot contain both inout and ref parameters");
2385 as
.error("function parameters must have a $ prefix");
2388 if (!as
.in
.readword(name
)) {
2389 as
.error("expected parameter name after $");
2392 as
.in
.skipWhitespace();
2396 as
.error("variadic parameter cannot have dv-initializer");
2400 if (!as
.in
.readword(label
)) {
2401 as
.error("expected label name for dv-initializer");
2403 as
.addLabelDVInit(label
, as
.fe
->params
.size());
2405 as
.in
.skipWhitespace();
2408 String str
= parse_long_string(as
);
2409 param
.phpCode
= makeStaticString(str
);
2412 if (str
.size() == 4) {
2413 if (!strcasecmp("null", str
.data())) {
2415 } else if (!strcasecmp("true", str
.data())) {
2416 tv
= make_tv
<KindOfBoolean
>(true);
2418 } else if (str
.size() == 5 && !strcasecmp("false", str
.data())) {
2419 tv
= make_tv
<KindOfBoolean
>(false);
2421 auto utype
= param
.typeConstraint
.underlyingDataType();
2422 if (tv
.m_type
== KindOfUninit
&&
2423 (!utype
|| *utype
== KindOfInt64
|| *utype
== KindOfDouble
)) {
2427 auto dt
= str
.get()->isNumericWithVal(ival
, dval
, false, &overflow
);
2428 if (overflow
== 0) {
2429 if (dt
== KindOfInt64
) {
2430 if (utype
== KindOfDouble
) tv
= make_tv
<KindOfDouble
>(ival
);
2431 else tv
= make_tv
<KindOfInt64
>(ival
);
2432 } else if (dt
== KindOfDouble
&&
2433 (!utype
|| utype
== KindOfDouble
)) {
2434 tv
= make_tv
<KindOfDouble
>(dval
);
2438 if (tv
.m_type
!= KindOfUninit
) {
2439 param
.defaultValue
= tv
;
2441 as
.in
.expectWs(')');
2442 as
.in
.skipWhitespace();
2447 as
.fe
->appendParam(makeStaticString(name
), param
);
2449 if (ch
== ')') break;
2450 if (ch
!= ',') as
.error("expected , between parameter names");
2454 void parse_function_flags(AsmState
& as
) {
2455 as
.in
.skipWhitespace();
2458 if (as
.in
.peek() == '{') break;
2459 if (!as
.in
.readword(flag
)) break;
2461 if (flag
== "isGenerator") {
2462 as
.fe
->isGenerator
= true;
2463 } else if (flag
== "isAsync") {
2464 as
.fe
->isAsync
= true;
2465 } else if (flag
== "isClosureBody") {
2466 as
.fe
->isClosureBody
= true;
2467 } else if (flag
== "isPairGenerator") {
2468 as
.fe
->isPairGenerator
= true;
2469 } else if (flag
== "isRxDisabled") {
2470 // this relies on attributes being parsed before flags
2471 if (!funcAttrIsAnyRx(as
.fe
->attrs
)) {
2472 as
.error("isRxDisabled on non-rx func");
2474 as
.fe
->isRxDisabled
= true;
2476 as
.error("Unexpected function flag \"" + flag
+ "\"");
2482 * line-range : "(" integer "," integer ")"
2485 bool parse_line_range(AsmState
& as
, int& line0
, int& line1
) {
2486 as
.in
.skipWhitespace();
2487 if (as
.in
.peek() != '(') {
2488 line0
= as
.in
.getLineNumber();
2489 line1
= as
.in
.getLineNumber() + 1;
2493 line0
= as
.in
.readint();
2494 as
.in
.expectWs(',');
2495 line1
= as
.in
.readint();
2496 as
.in
.expectWs(')');
2501 * If we haven't seen a pseudomain and we are compiling systemlib,
2502 * add a pseudomain and return true
2503 * If we haven't seen a pseudomain and we are not compiling systemlib,
2504 * return false so that the caller can give an assembler error
2505 * Otherwise, return true
2507 bool ensure_pseudomain(AsmState
& as
) {
2508 if (!as
.emittedPseudoMain
) {
2509 if (!SystemLib::s_inited
) {
2511 * The SystemLib::s_hhas_unit is required to be merge-only,
2512 * and we create the source by concatenating separate .hhas files
2513 * Rather than choosing one to have the .main directive, we just
2514 * generate a trivial pseudoMain automatically.
2516 as
.ue
->addTrivialPseudoMain();
2517 as
.emittedPseudoMain
= true;
2525 static StaticString
s_native("__Native");
2527 MaybeDataType
type_constraint_to_data_type(LowStringPtr user_type
,
2528 const TypeConstraint
& tc
) {
2529 if (auto type
= tc
.typeName()) {
2530 // in type_annotation.cpp this code uses m_typeArgs
2531 // as indicator that type can represent one of collection types
2532 // when we extract data from the constraint we know if type is one of
2533 // collection types but we don't have direct way to figure out if
2534 // type used to have type arguments - do it indirectly by checking
2535 // if name of user type contains '<'
2536 auto has_type_args
=
2537 user_type
&& user_type
->slice().str().find('<') != std::string::npos
;
2538 return get_datatype(
2539 type
->toCppString(),
2541 false, // no syntactic functions in type annotations
2542 false, // no xhp type annotation
2543 false, // no tuples in type annotation
2551 * Checks whether the current function is native by looking at the user
2552 * attribute map and sets the isNative flag accoringly
2553 * If the give function is op code implementation, then isNative is not set
2555 void check_native(AsmState
& as
, bool is_construct
) {
2556 if (as
.fe
->userAttributes
.count(s_native
.get())) {
2557 as
.fe
->hniReturnType
= is_construct
2559 : type_constraint_to_data_type(as
.fe
->retUserType
,
2560 as
.fe
->retTypeConstraint
);
2563 !(as
.fe
->parseNativeAttributes(as
.fe
->attrs
) & Native::AttrOpCodeImpl
);
2565 if (as
.fe
->isNative
) {
2566 auto info
= as
.fe
->getNativeInfo();
2568 if (SystemLib::s_inited
) {
2569 // non-builtin native functions must have a valid binding
2570 as
.error("No NativeFunctionInfo for function {}",
2571 as
.fe
->nativeFullname());
2573 // Allow builtins to have mising NativeFunctionInfo, to support
2574 // conditional compilation. Calling such a function will Fatal.
2578 // was AttrOpCodeImpl
2581 // set extra attributes for builtin native functions
2582 if (!SystemLib::s_inited
) {
2583 as
.fe
->attrs
|= AttrBuiltin
| AttrMayUseVV
;
2586 for (auto& pi
: as
.fe
->params
) {
2588 type_constraint_to_data_type(pi
.userType
, pi
.typeConstraint
);
2594 * directive-function : attribute-list ?line-range type-info identifier
2595 * parameter-list function-flags '{' function-body
2598 void parse_function(AsmState
& as
) {
2599 if (!ensure_pseudomain(as
)) {
2600 as
.error(".function blocks must all follow the .main block");
2603 as
.in
.skipWhitespace();
2607 UserAttributeMap userAttrs
;
2608 Attr attrs
= parse_attribute_list(as
, AttrContext::Func
, &userAttrs
, &isTop
);
2610 if (!SystemLib::s_inited
) {
2611 attrs
|= AttrUnique
| AttrPersistent
| AttrBuiltin
;
2614 // Be conservative by default. HHBBC can clear it where appropriate.
2615 attrs
|= AttrMayUseVV
;
2617 if(!isTop
&& as
.emittedTopLevelFunc
) {
2618 as
.error("All top level functions must be defined after any "
2619 "non-top functions");
2622 as
.emittedTopLevelFunc
|= isTop
;
2626 parse_line_range(as
, line0
, line1
);
2628 auto typeInfo
= parse_type_info(as
);
2630 if (!as
.in
.readname(name
)) {
2631 as
.error(".function must have a name");
2634 as
.fe
= as
.ue
->newFuncEmitter(makeStaticString(name
));
2635 as
.fe
->init(line0
, line1
, as
.ue
->bcPos(), attrs
, isTop
, 0);
2636 std::tie(as
.fe
->retUserType
, as
.fe
->retTypeConstraint
) = typeInfo
;
2637 as
.fe
->userAttributes
= userAttrs
;
2639 parse_parameter_list(as
);
2640 // parse_function_flabs relies on as.fe already having valid attrs
2641 parse_function_flags(as
);
2643 check_native(as
, false);
2645 as
.in
.expectWs('{');
2647 as
.srcLoc
= Location::Range
{-1,-1,-1,-1};
2648 parse_function_body(as
);
2652 * directive-method : attribute-list ?line-range type-info identifier
2653 * parameter-list function-flags '{' function-body
2656 void parse_method(AsmState
& as
) {
2657 as
.in
.skipWhitespace();
2659 UserAttributeMap userAttrs
;
2660 Attr attrs
= parse_attribute_list(as
, AttrContext::Func
, &userAttrs
);
2662 if (!SystemLib::s_inited
) attrs
|= AttrBuiltin
;
2666 parse_line_range(as
, line0
, line1
);
2668 auto typeInfo
= parse_type_info(as
);
2670 if (!as
.in
.readname(name
)) {
2671 as
.error(".method requires a method name");
2674 auto const sname
= makeStaticString(name
);
2675 if (as
.pce
->hasMethod(sname
)) {
2676 as
.error("duplicate method name " + sname
->toCppString());
2679 as
.fe
= as
.ue
->newMethodEmitter(sname
, as
.pce
);
2680 as
.pce
->addMethod(as
.fe
);
2681 as
.fe
->init(line0
, line1
,
2682 as
.ue
->bcPos(), attrs
, false, 0);
2683 std::tie(as
.fe
->retUserType
, as
.fe
->retTypeConstraint
) = typeInfo
;
2684 as
.fe
->userAttributes
= userAttrs
;
2686 parse_parameter_list(as
);
2687 // parse_function_flabs relies on as.fe already having valid attrs
2688 parse_function_flags(as
);
2690 check_native(as
, name
== "__construct");
2692 as
.in
.expectWs('{');
2694 as
.srcLoc
= Location::Range
{-1,-1,-1,-1};
2695 parse_function_body(as
);
2699 * member-tv-initializer : '=' php-serialized ';'
2704 TypedValue
parse_member_tv_initializer(AsmState
& as
) {
2705 as
.in
.skipWhitespace();
2708 tvWriteNull(tvInit
); // Don't confuse Variant with uninit data
2710 int what
= as
.in
.getc();
2712 as
.in
.skipWhitespace();
2714 if (as
.in
.peek() != '\"') {
2715 // It might be an uninitialized property/constant.
2716 if (!as
.in
.tryConsume("uninit")) {
2717 as
.error("Expected \"\"\" or \"uninit\" after '=' in "
2718 "const/property initializer");
2720 as
.in
.expectWs(';');
2721 tvWriteUninit(tvInit
);
2726 tvAsVariant(&tvInit
) = parse_php_serialized(as
);
2727 if (tvInit
.m_type
== KindOfObject
) {
2728 as
.error("property initializer can't be an object");
2729 } else if (tvInit
.m_type
== KindOfResource
) {
2730 as
.error("property initializer can't be a resource");
2732 tvAsVariant(&tvInit
).setEvalScalar();
2735 as
.in
.expectWs(';');
2736 } else if (what
== ';') {
2739 as
.error("expected '=' or ';' after property name");
2745 template<typename AttrValidator
, typename Adder
>
2746 void parse_prop_or_field_impl(AsmState
& as
, AttrValidator validate
, Adder add
) {
2747 as
.in
.skipWhitespace();
2749 UserAttributeMap userAttributes
;
2750 Attr attrs
= parse_attribute_list(as
, AttrContext::Prop
, &userAttributes
);
2753 auto const heredoc
= makeDocComment(parse_maybe_long_string(as
));
2755 const StringData
* userTy
;
2756 TypeConstraint typeConstraint
;
2757 std::tie(userTy
, typeConstraint
) = parse_type_info(as
, false);
2758 auto const userTyStr
= userTy
? userTy
: staticEmptyString();
2761 as
.in
.skipSpaceTab();
2762 as
.in
.consumePred(!boost::is_any_of(" \t\r\n#;="),
2763 std::back_inserter(name
));
2765 as
.error("expected name for property or field");
2768 TypedValue tvInit
= parse_member_tv_initializer(as
);
2769 add(makeStaticString(name
),
2780 * directive-property : attribute-list maybe-long-string-literal type-info
2781 * identifier member-tv-initializer
2784 * Define a property with an associated type and heredoc.
2786 void parse_property(AsmState
& as
, bool class_is_const
) {
2787 parse_prop_or_field_impl(
2790 if (attrs
& AttrIsConst
) {
2791 if (attrs
& AttrLateInit
) {
2792 as
.error("const properties may not also be late init");
2794 } else if (class_is_const
&& !(attrs
& AttrStatic
)) {
2795 as
.error("all instance properties of a const class must be const");
2798 [&](auto&&... args
) {
2799 as
.pce
->addProperty(std::forward
<decltype(args
)>(args
)...);
2804 void parse_record_field(AsmState
& as
) {
2805 parse_prop_or_field_impl(
2808 [&](auto&&... args
) {
2809 as
.re
->addField(std::forward
<decltype(args
)>(args
)...);
2816 * const-flags : isType
2819 * directive-const : identifier const-flags member-tv-initializer
2820 * | identifier const-flags ';'
2823 void parse_constant(AsmState
& as
) {
2824 as
.in
.skipWhitespace();
2827 if (!as
.in
.readword(name
)) {
2828 as
.error("expected name for constant");
2831 bool isType
= as
.in
.tryConsume("isType");
2832 as
.in
.skipWhitespace();
2834 if (as
.in
.peek() == ';') {
2836 as
.pce
->addAbstractConstant(makeStaticString(name
),
2837 staticEmptyString(),
2842 TypedValue tvInit
= parse_member_tv_initializer(as
);
2843 as
.pce
->addConstant(makeStaticString(name
),
2844 staticEmptyString(), &tvInit
,
2845 staticEmptyString(),
2850 * directive-default-ctor : ';'
2853 * No-op, for backward compat
2855 void parse_default_ctor(AsmState
& as
) {
2856 assertx(!as
.fe
&& as
.pce
);
2857 as
.in
.expectWs(';');
2861 * directive-use : identifier+ ';'
2862 * | identifier+ '{' use-line* '}'
2865 * use-line : use-name-ref "insteadof" identifier+ ';'
2866 * | use-name-ref "as" attribute-list identifier ';'
2867 * | use-name-ref "as" attribute-list ';'
2870 void parse_use(AsmState
& as
) {
2871 std::vector
<std::string
> usedTraits
;
2874 if (!as
.in
.readword(name
)) break;
2875 usedTraits
.push_back(name
);
2877 if (usedTraits
.empty()) {
2878 as
.error(".use requires a trait name");
2881 for (size_t i
= 0; i
< usedTraits
.size(); ++i
) {
2882 as
.pce
->addUsedTrait(makeStaticString(usedTraits
[i
]));
2884 as
.in
.skipWhitespace();
2885 if (as
.in
.peek() != '{') {
2892 as
.in
.skipWhitespace();
2893 if (as
.in
.peek() == '}') break;
2895 std::string traitName
;
2896 std::string identifier
;
2897 if (!as
.in
.readword(traitName
)) {
2898 as
.error("expected identifier for line in .use block");
2900 as
.in
.skipWhitespace();
2901 if (as
.in
.peek() == ':') {
2904 if (!as
.in
.readword(identifier
)) {
2905 as
.error("expected identifier after ::");
2908 identifier
= traitName
;
2912 if (as
.in
.tryConsume("as")) {
2913 bool strict
= as
.in
.tryConsume("strict");
2914 bool async
= as
.in
.tryConsume("async");
2916 Attr attrs
= parse_attribute_list(as
, AttrContext::TraitImport
);
2918 if (!as
.in
.readword(alias
)) {
2919 if (attrs
!= AttrNone
) {
2922 as
.error("expected identifier or attribute list after "
2923 "`as' in .use block");
2927 as
.pce
->addTraitAliasRule(PreClass::TraitAliasRule(
2928 makeStaticString(traitName
),
2929 makeStaticString(identifier
),
2930 makeStaticString(alias
),
2934 } else if (as
.in
.tryConsume("insteadof")) {
2935 if (traitName
.empty()) {
2936 as
.error("Must specify TraitName::name when using a trait insteadof");
2939 PreClass::TraitPrecRule
precRule(
2940 makeStaticString(traitName
),
2941 makeStaticString(identifier
));
2943 bool addedOtherTraits
= false;
2945 while (as
.in
.readword(whom
)) {
2946 precRule
.addOtherTraitName(makeStaticString(whom
));
2947 addedOtherTraits
= true;
2949 if (!addedOtherTraits
) {
2950 as
.error("one or more trait names expected after `insteadof'");
2953 as
.pce
->addTraitPrecRule(precRule
);
2955 as
.error("expected `as' or `insteadof' in .use block");
2958 as
.in
.expectWs(';');
2965 * directive-enum_ty : type-constraint ';'
2969 void parse_enum_ty(AsmState
& as
) {
2971 as
.error("only one .enum_ty directive may appear in a given class");
2973 as
.enumTySet
= true;
2975 as
.pce
->setEnumBaseTy(parse_type_constraint(as
));
2977 as
.in
.expectWs(';');
2981 * directive-require : 'extends' '<' indentifier '>' ';'
2982 * | 'implements' '<' indentifier '>' ';'
2986 void parse_require(AsmState
& as
) {
2987 as
.in
.skipWhitespace();
2989 bool extends
= as
.in
.tryConsume("extends");
2990 if (!extends
&& !as
.in
.tryConsume("implements")) {
2991 as
.error(".require should be extends or implements");
2994 as
.in
.expectWs('<');
2996 if (!as
.in
.readname(name
)) {
2997 as
.error(".require expects a class or interface name");
2999 as
.in
.expectWs('>');
3001 as
.pce
->addClassRequirement(PreClass::ClassRequirement(
3002 makeStaticString(name
), extends
3005 as
.in
.expectWs(';');
3009 * directive-doccomment : long-string-literal ';'
3013 void parse_cls_doccomment(AsmState
& as
) {
3014 auto const doc
= parse_long_string(as
);
3015 as
.in
.expectWs(';');
3017 as
.pce
->setDocComment(makeDocComment(doc
));
3021 * class-body : class-body-line* '}'
3024 * class-body-line : ".method" directive-method
3025 * | ".property" directive-property
3026 * | ".const" directive-const
3027 * | ".use" directive-use
3028 * | ".default_ctor" directive-default-ctor
3029 * | ".enum_ty" directive-enum-ty
3030 * | ".require" directive-require
3031 * | ".doc" directive-doccomment
3034 void parse_class_body(AsmState
& as
, bool class_is_const
) {
3035 if (!ensure_pseudomain(as
)) {
3036 as
.error(".class blocks must all follow the .main block");
3039 std::string directive
;
3040 while (as
.in
.readword(directive
)) {
3041 if (directive
== ".property") {
3042 parse_property(as
, class_is_const
);
3045 if (directive
== ".method") { parse_method(as
); continue; }
3046 if (directive
== ".const") { parse_constant(as
); continue; }
3047 if (directive
== ".use") { parse_use(as
); continue; }
3048 if (directive
== ".default_ctor") { parse_default_ctor(as
); continue; }
3049 if (directive
== ".enum_ty") { parse_enum_ty(as
); continue; }
3050 if (directive
== ".require") { parse_require(as
); continue; }
3051 if (directive
== ".doc") { parse_cls_doccomment(as
); continue; }
3053 as
.error("unrecognized directive `" + directive
+ "' in class");
3059 * record-body : record-body-line* '}'
3062 * record-body-line : ".property" directive-property
3065 void parse_record_body(AsmState
& as
) {
3066 if (!ensure_pseudomain(as
)) {
3067 as
.error(".record blocks must all follow the .main block");
3070 std::string directive
;
3071 while (as
.in
.readword(directive
)) {
3072 if (directive
== ".property") { parse_record_field(as
); continue; }
3074 as
.error(folly::to
<std::string
>("unrecognized directive `",
3075 directive
, "` in record"));
3080 PreClass::Hoistable
compute_hoistable(AsmState
& as
,
3081 const std::string
&name
,
3082 const std::string
&parentName
) {
3083 auto &pce
= *as
.pce
;
3084 bool system
= pce
.attrs() & AttrBuiltin
;
3086 if (pce
.methods().size() == 1 && pce
.methods()[0]->isClosureBody
) {
3087 return PreClass::NotHoistable
;
3090 if (!pce
.interfaces().empty() ||
3091 !pce
.usedTraits().empty() ||
3092 !pce
.requirements().empty() ||
3093 (pce
.attrs() & AttrEnum
)) {
3094 return PreClass::Mergeable
;
3096 if (!parentName
.empty() && !as
.hoistables
.count(parentName
)) {
3097 return PreClass::MaybeHoistable
;
3100 as
.hoistables
.insert(name
);
3102 return pce
.attrs() & AttrUnique
?
3103 PreClass::AlwaysHoistable
: PreClass::MaybeHoistable
;
3107 * directive-class : ?"top" attribute-list identifier ?line-range
3108 * extension-clause implements-clause '{' class-body
3111 * extension-clause : empty
3112 * | "extends" identifier
3115 * implements-clause : empty
3116 * | "implements" '(' identifier* ')'
3120 void parse_class(AsmState
& as
) {
3121 as
.in
.skipWhitespace();
3125 UserAttributeMap userAttrs
;
3126 Attr attrs
= parse_attribute_list(as
, AttrContext::Class
, &userAttrs
, &isTop
);
3127 if (!SystemLib::s_inited
) {
3128 attrs
|= AttrUnique
| AttrPersistent
| AttrBuiltin
;
3130 if (attrs
& AttrIsConst
) {
3131 if (attrs
& (AttrEnum
| AttrInterface
| AttrTrait
)) {
3132 as
.error("interfaces, traits and enums may not be const");
3134 if (!(attrs
& AttrForbidDynamicProps
)) {
3135 as
.error("const class missing ForbidDynamicProps attribute");
3140 if (!as
.in
.readname(name
)) {
3141 as
.error(".class must have a name");
3143 if (PreClassEmitter::IsAnonymousClassName(name
)) {
3144 // assign unique numbers to anonymous classes
3145 // they must not be pre-numbered in the hhas
3146 auto p
= name
.find(';');
3147 if (p
!= std::string::npos
) {
3148 as
.error("anonymous class and closure names may not contain ids in hhas");
3150 name
= HPHP::NewAnonymousClassName(name
);
3155 parse_line_range(as
, line0
, line1
);
3157 std::string parentName
;
3158 if (as
.in
.tryConsume("extends")) {
3159 if (!as
.in
.readname(parentName
)) {
3160 as
.error("expected parent class name after `extends'");
3164 std::vector
<std::string
> ifaces
;
3165 if (as
.in
.tryConsume("implements")) {
3166 as
.in
.expectWs('(');
3168 while (as
.in
.readname(word
)) {
3169 ifaces
.push_back(word
);
3174 auto off
= folly::get_default(as
.defClsOffsets
, as
.ue
->numPreClasses(),
3177 as
.pce
= as
.ue
->newBarePreClassEmitter(name
, PreClass::MaybeHoistable
);
3182 makeStaticString(parentName
),
3183 staticEmptyString());
3184 for (auto const& iface
: ifaces
) {
3185 as
.pce
->addInterface(makeStaticString(iface
));
3187 as
.pce
->setUserAttributes(userAttrs
);
3189 as
.in
.expectWs('{');
3190 parse_class_body(as
, attrs
& AttrIsConst
);
3192 as
.pce
->setHoistable(
3193 isTop
? compute_hoistable(as
, name
, parentName
) : PreClass::NotHoistable
3200 * directive-record : attribute identifier ?line-range
3201 * extension-clause '{' record-body
3204 * extension-clause : empty
3205 * | "extends" identifier
3208 void parse_record(AsmState
& as
) {
3209 if (!RuntimeOption::EvalHackRecords
&& !RuntimeOption::EvalHackRecordArrays
) {
3210 as
.error("Records not supported");
3213 as
.in
.skipWhitespace();
3215 Attr attrs
= parse_attribute_list(as
, AttrContext::Class
);
3216 if (!(attrs
& AttrFinal
)) {
3217 // parser only sets the final flag. If the final flag is not set,
3218 // the record is abstract.
3219 attrs
|= AttrAbstract
;
3220 } else if (attrs
& AttrAbstract
) {
3221 as
.error("A record cannot be both final and abstract");
3226 if (!as
.in
.readname(name
)) {
3227 as
.error(".record must have a name");
3232 parse_line_range(as
, line0
, line1
);
3234 std::string parentName
;
3235 if (as
.in
.tryConsume("extends")) {
3236 if (!as
.in
.readname(parentName
)) {
3237 as
.error("expected parent record name after `extends'");
3241 as
.re
= as
.ue
->newBareRecordEmitter(name
);
3245 makeStaticString(parentName
),
3246 staticEmptyString());
3248 as
.in
.expectWs('{');
3249 parse_record_body(as
);
3255 * directive-filepath : quoted-string-literal ';'
3258 void parse_filepath(AsmState
& as
) {
3259 auto const str
= read_litstr(as
);
3260 if (nullptr == g_hhas_handler
) {
3261 // We don't want to use file path from cached HHAS
3262 as
.ue
->m_filepath
= str
;
3264 as
.in
.expectWs(';');
3268 * directive-main : ?line-range '{' function-body
3271 void parse_main(AsmState
& as
) {
3272 if (as
.emittedPseudoMain
) {
3273 as
.error("Multiple .main directives found");
3278 bool fromSrcLoc
= parse_line_range(as
, line0
, line1
);
3280 as
.in
.expectWs('{');
3282 as
.ue
->initMain(line0
, line1
);
3283 as
.fe
= as
.ue
->getMain();
3284 as
.emittedPseudoMain
= true;
3286 as
.srcLoc
= Location::Range
{line0
,0,line1
,0};
3288 as
.srcLoc
= Location::Range
{-1,-1,-1,-1};
3290 parse_function_body(as
);
3294 * directive-adata : identifier '=' php-serialized ';'
3297 void parse_adata(AsmState
& as
) {
3298 as
.in
.skipWhitespace();
3299 std::string dataLabel
;
3300 if (!as
.in
.readword(dataLabel
)) {
3301 as
.error("expected name for .adata");
3303 if (as
.adataMap
.count(dataLabel
)) {
3304 as
.error("duplicate adata label name " + dataLabel
);
3307 as
.in
.expectWs('=');
3308 as
.adataDecls
[dataLabel
] = parse_long_string_raw(as
);
3309 as
.in
.expectWs(';');
3313 * directive-alias : attribute-list identifier '=' type-constraint
3314 * maybe-php-serialized ';'
3317 * We represent alias type information using the syntax for
3318 * TypeConstraints. We populate the name and nullable field of the
3319 * alias directly from the specified type constraint and derive the
3320 * AnnotType from the compute AnnotType in the constraint.
3322 * Following the type-constraint we encode the serialized type structure
3323 * corresponding to this alias.
3325 void parse_alias(AsmState
& as
) {
3326 as
.in
.skipWhitespace();
3329 Attr attrs
= parse_attribute_list(as
, AttrContext::Alias
, &record
.userAttrs
);
3330 if (!SystemLib::s_inited
) {
3331 attrs
|= AttrPersistent
;
3334 if (!as
.in
.readname(name
)) {
3335 as
.error(".alias must have a name");
3337 as
.in
.expectWs('=');
3339 TypeConstraint ty
= parse_type_constraint(as
);
3340 Variant ts
= parse_maybe_php_serialized(as
);
3342 if (ts
.isInitialized() && !ts
.isArray()) {
3343 as
.error(".alias must have an array type structure");
3346 const StringData
* typeName
= ty
.typeName();
3347 if (!typeName
) typeName
= staticEmptyString();
3348 const StringData
* sname
= makeStaticString(name
);
3349 // Merge to ensure namedentity creation, according to
3350 // emitTypedef in emitter.cpp
3351 as
.ue
->mergeLitstr(sname
);
3352 as
.ue
->mergeLitstr(typeName
);
3354 record
.name
= sname
;
3355 record
.value
= typeName
;
3356 record
.type
= typeName
->empty() ? AnnotType::Mixed
: ty
.type();
3357 record
.nullable
= (ty
.flags() & TypeConstraint::Nullable
) != 0;
3358 record
.attrs
= attrs
;
3359 if (ts
.isInitialized()) {
3360 record
.typeStructure
= ArrNR(ArrayData::GetScalarArray(std::move(ts
)));
3362 auto aliasId
= as
.ue
->addTypeAlias(record
);
3363 as
.ue
->pushMergeableTypeAlias(aliasId
);
3365 as
.in
.expectWs(';');
3369 * directive-hh-file : '1' ';'
3373 void parse_hh_file(AsmState
& as
) {
3374 as
.in
.skipWhitespace();
3376 if (!as
.in
.readword(word
)) {
3377 as
.error(".hh_file must have a value");
3379 as
.ue
->m_isHHFile
= word
== "1";
3381 if (!as
.ue
->m_isHHFile
&& word
!= "0") {
3382 as
.error(".hh_file must be either 1 or 0");
3385 as
.in
.expectWs(';');
3389 * directive-symbols : '{' identifier identifier* '}'
3391 void parse_symbol_refs(AsmState
& as
, SymbolRef symbol_kind
) {
3392 as
.in
.expectWs('{');
3394 if (as
.wants_symbol_refs
) {
3396 as
.in
.skipWhitespace();
3398 as
.in
.consumePred(!boost::is_any_of(" \t\r\n#}"),
3399 std::back_inserter(symbol
));
3400 if (symbol
.empty()) {
3403 as
.symbol_refs
[symbol_kind
].push_back(symbol
);
3406 while (as
.in
.peek() != '}') {
3407 as
.in
.skipWhitespace();
3408 if (!as
.in
.skipPred(!boost::is_any_of("#}"))) break;
3416 * directive-filepaths : '{' string string* '}'
3418 void parse_includes(AsmState
& as
) {
3419 parse_symbol_refs(as
, SymbolRef::Include
);
3422 void parse_constant_refs(AsmState
& as
) {
3423 parse_symbol_refs(as
, SymbolRef::Constant
);
3426 void parse_function_refs(AsmState
& as
) {
3427 parse_symbol_refs(as
, SymbolRef::Function
);
3430 void parse_class_refs(AsmState
& as
) {
3431 parse_symbol_refs(as
, SymbolRef::Class
);
3435 * directive-metadata : identifier = identifier ';'
3436 * | identifier = quoted-string-literal ';'
3437 * | identifier = long-string-literal ';'
3440 void parse_metadata(AsmState
& as
) {
3442 if (as
.in
.readname(key
)) {
3443 as
.in
.expectWs('=');
3444 as
.in
.skipWhitespace();
3445 auto const value
= [&] () -> const StringData
* {
3446 auto ret
= parse_maybe_long_string(as
);
3447 if (!ret
.empty()) return makeStaticString(ret
);
3449 if (as
.in
.readQuotedStr(tmp
) || as
.in
.readword(tmp
)) {
3450 return makeStaticString(tmp
);
3456 as
.ue
->m_metaData
.emplace(
3457 makeStaticString(key
),
3458 make_tv
<KindOfPersistentString
>(value
)
3463 as
.error(".metadata expects a key = value pair");
3467 * directive-file-attributes : attribute-list ';'
3470 void parse_file_attributes(AsmState
& as
) {
3471 as
.in
.skipWhitespace();
3473 parse_attribute_list(as
, AttrContext::Func
, &(as
.ue
->m_fileAttributes
));
3475 as
.in
.expectWs(';');
3479 * asm-file : asm-tld* <EOF>
3482 * asm-tld : ".filepath" directive-filepath
3483 * | ".main" directive-main
3484 * | ".function" directive-function
3485 * | ".adata" directive-adata
3486 * | ".class" directive-class
3487 * | ".alias" directive-alias
3488 * | ".hh_file" directive-hh-file
3489 * | ".includes" directive-filepaths
3490 * | ".constant_refs" directive-symbols
3491 * | ".function_refs" directive-symbols
3492 * | ".class_refs" directive-symbols
3493 * | ".metadata" directive-meta-data
3494 * | ".file_attributes" directive-file-attributes
3497 void parse(AsmState
& as
) {
3498 as
.in
.skipWhitespace();
3499 std::string directive
;
3501 while (as
.in
.readword(directive
)) {
3502 if (directive
== ".filepath") { parse_filepath(as
) ; continue; }
3503 if (directive
== ".main") { parse_main(as
) ; continue; }
3504 if (directive
== ".function") { parse_function(as
) ; continue; }
3505 if (directive
== ".adata") { parse_adata(as
) ; continue; }
3506 if (directive
== ".class") { parse_class(as
) ; continue; }
3507 if (directive
== ".record") { parse_record(as
) ; continue; }
3508 if (directive
== ".alias") { parse_alias(as
) ; continue; }
3509 if (directive
== ".hh_file") { parse_hh_file(as
) ; continue; }
3510 if (directive
== ".includes") { parse_includes(as
) ; continue; }
3511 if (directive
== ".constant_refs") { parse_constant_refs(as
) ; continue; }
3512 if (directive
== ".function_refs") { parse_function_refs(as
) ; continue; }
3513 if (directive
== ".class_refs") { parse_class_refs(as
) ; continue; }
3514 if (directive
== ".metadata") { parse_metadata(as
) ; continue; }
3515 if (directive
== ".file_attributes") { parse_file_attributes(as
); continue;}
3517 as
.error("unrecognized top-level directive `" + directive
+ "'");
3520 if (!ensure_pseudomain(as
)) {
3521 as
.error("no .main found in hhas unit");
3524 if (as
.symbol_refs
.size()) {
3525 for (auto& ent
: as
.symbol_refs
) {
3526 as
.ue
->m_symbol_refs
.push_back(std::move(ent
));
3530 if (RuntimeOption::EvalAssemblerFoldDefaultValues
) {
3531 for (auto& fe
: as
.ue
->fevec()) fixup_default_values(as
, fe
.get());
3532 for (size_t n
= 0; n
< as
.ue
->numPreClasses(); ++n
) {
3533 for (auto fe
: as
.ue
->pce(n
)->methods()) fixup_default_values(as
, fe
);
3540 //////////////////////////////////////////////////////////////////////
3542 std::unique_ptr
<UnitEmitter
> assemble_string(
3545 const char* filename
,
3547 const Native::FuncTable
& nativeFuncs
,
3549 bool wantsSymbolRefs
3551 auto const bcSha1
= SHA1
{string_sha1(folly::StringPiece(code
, codeLen
))};
3552 auto ue
= std::make_unique
<UnitEmitter
>(sha1
, bcSha1
, nativeFuncs
, false);
3553 if (!SystemLib::s_inited
) {
3554 ue
->m_mergeOnly
= true;
3556 StringData
* sd
= makeStaticString(filename
);
3557 ue
->m_filepath
= sd
;
3560 auto const mode
= std::istringstream::binary
| std::istringstream::in
;
3561 std::istringstream
instr(std::string(code
, codeLen
), mode
);
3562 AsmState
as(instr
, wantsSymbolRefs
);
3565 } catch (const FatalErrorException
& e
) {
3566 if (!swallowErrors
) throw;
3567 ue
= createFatalUnit(sd
, sha1
, FatalOp::Runtime
,
3568 makeStaticString(e
.what()));
3569 } catch (const AssemblerError
& e
) {
3570 if (!swallowErrors
) throw;
3571 ue
= createFatalUnit(sd
, sha1
, FatalOp::Runtime
, makeStaticString(e
.what()));
3572 } catch (const AssemblerFatal
& e
) {
3573 if (!swallowErrors
) throw;
3574 ue
= createFatalUnit(sd
, sha1
, FatalOp::Runtime
, makeStaticString(e
.what()));
3575 } catch (const std::exception
& e
) {
3576 if (!swallowErrors
) {
3577 // assembler should throw only AssemblerErrors and FatalErrorExceptions
3578 throw AssemblerError(folly::sformat("AssemblerError: {}", e
.what()));
3580 ue
= createFatalUnit(sd
, sha1
, FatalOp::Runtime
,
3581 makeStaticString(e
.what()));
3587 //////////////////////////////////////////////////////////////////////