2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * This module contains an assembler implementation for HHBC. It is
19 * probably fairly close to allowing you to access most of the
20 * metadata associated with hhvm's compiled unit format, although it's
21 * possible something has been overlooked.
23 * To use it, run hhvm with -v Eval.AllowHhas=true on a file with a
24 * ".hhas" extension. The syntax is probably easiest to understand by
25 * looking at some examples (or the semi-BNF markup around some of the
26 * parse functions here). For examples, see hphp/tests/vm/asm_*.
31 * - You can crash hhvm very easily with this.
33 * Using this module, you can emit pretty much any sort of not
34 * trivially-illegal bytecode stream, and many trivially-illegal
35 * ones as well. You can also easily create Units with illegal
36 * metadata. Generally this will crash the VM. In other cases
37 * (especially if you don't bother to DefCls your classes in your
38 * .main) you'll just get mysterious "class not defined" errors
41 * - Whitespace is not normally significant, but newlines may not
42 * be in the middle of a list of opcode arguments. (After the
43 * newline, the next thing seen is expected to be either a
44 * mnemonic for the next opcode in the stream or some sort of
45 * directive.) However, newlines (and comments) may appear
46 * *inside* certain opcode arguments (e.g. string literals or
49 * Rationale: this is partially intended to make it trivial to
50 * catch wrong-number-of-arguments errors, although it probably
51 * could be done without this if you feel like changing it.
56 * - It might be nice if you could refer to iterators by name
57 * instead of by index.
59 * - DefCls by name would be nice.
61 * Missing features (partial list):
63 * - builtinType (for native funcs) field on ParamInfo
65 * - while class/function names can contains ':', '$', and ';',
66 * .use declarations can't handle those names because of syntax
69 * @author Jordan DeLong <delong.j@fb.com>
72 #include "hphp/runtime/vm/as.h"
80 #include <boost/algorithm/string.hpp>
81 #include <boost/scoped_ptr.hpp>
82 #include <boost/bind.hpp>
84 #include <folly/Conv.h>
85 #include <folly/MapUtil.h>
86 #include <folly/Memory.h>
87 #include <folly/Range.h>
88 #include <folly/String.h>
90 #include "hphp/util/md5.h"
92 #include "hphp/parser/parser.h"
93 #include "hphp/runtime/base/builtin-functions.h"
94 #include "hphp/runtime/base/repo-auth-type-codec.h"
95 #include "hphp/runtime/base/repo-auth-type.h"
96 #include "hphp/runtime/base/tv-type.h"
97 #include "hphp/runtime/vm/as-shared.h"
98 #include "hphp/runtime/vm/func-emitter.h"
99 #include "hphp/runtime/vm/hhbc.h"
100 #include "hphp/runtime/vm/preclass-emitter.h"
101 #include "hphp/runtime/vm/unit.h"
102 #include "hphp/runtime/vm/unit-emitter.h"
103 #include "hphp/system/systemlib.h"
109 //////////////////////////////////////////////////////////////////////
114 typedef void (*ParserFunc
)(AsmState
& as
);
116 struct Error
: std::runtime_error
{
117 explicit Error(int where
, const std::string
& what
)
118 : std::runtime_error(folly::sformat(
119 "Assembler Error: line {}: {}", where
, what
))
124 explicit Input(std::istream
& in
)
128 int peek() { return m_in
.peek(); }
131 int ret
= m_in
.get();
134 } else if (ret
== '\n') {
140 void ungetc(char c
) {
141 if (c
== '\n') --m_lineNumber
;
147 error(folly::sformat("expected character `{}'", char(c
)));
152 * Expect `c' after possible whitespace/comments. When convenient,
153 * preferable to doing skipWhitespace/expect manually to keep the
154 * line number in the error prior to the whitespace skipped.
156 void expectWs(int c
) {
157 const int currentLine
= m_lineNumber
;
160 throw Error(currentLine
,
161 folly::sformat("expected character `{}'", char(c
)));
165 int getLineNumber() const {
169 // Skips whitespace, then populates word with valid bareword
170 // characters. Returns true if we read any characters into word.
171 bool readword(std::string
& word
) {
174 consumePred(is_bareword(), std::back_inserter(word
));
175 return !word
.empty();
177 // Skips whitespace, then populates name with valid extname
178 // characters. Returns true if we read any characters into name.
179 bool readname(std::string
& name
) {
182 consumePred(is_extname(), std::back_inserter(name
));
183 return !name
.empty();
185 // Try to consume a bareword. Skips whitespace. If we can't
186 // consume the specified word, returns false.
187 bool tryConsume(const std::string
& what
) {
189 if (!readword(word
)) {
193 std::for_each(word
.rbegin(), word
.rend(),
194 boost::bind(&Input::ungetc
, this, _1
));
202 if (peek() == '-') buf
+= (char)getc();
203 consumePred(isdigit
, std::back_inserter(buf
));
204 if (buf
.empty() || buf
== "-") {
205 throw Error(m_lineNumber
, "expected integral value");
207 return folly::to
<int32_t>(buf
);
210 // C-style character escapes, no support for unicode escapes or
212 template<class OutCont
>
213 void escapeChar(OutCont
& out
) {
214 auto is_oct
= [&] (int i
) { return i
>= '0' && i
<= '7'; };
215 auto is_hex
= [&] (int i
) {
216 return (i
>= '0' && i
<= '9') ||
217 (i
>= 'a' && i
<= 'f') ||
218 (i
>= 'A' && i
<= 'F');
220 auto hex_val
= [&] (int i
) -> uint32_t {
222 return i
>= '0' && i
<= '9' ? i
- '0' :
223 i
>= 'a' && i
<= 'f' ? i
- 'a' + 10 : i
- 'A' + 10;
228 case EOF
: error("EOF in string literal");
229 case 'a': out
.push_back('\a'); break;
230 case 'b': out
.push_back('\b'); break;
231 case 'f': out
.push_back('\f'); break;
232 case 'n': out
.push_back('\n'); break;
233 case 'r': out
.push_back('\r'); break;
234 case 't': out
.push_back('\t'); break;
235 case 'v': out
.push_back('\v'); break;
236 case '\'': out
.push_back('\''); break;
237 case '\"': out
.push_back('\"'); break;
238 case '\?': out
.push_back('\?'); break;
239 case '\\': out
.push_back('\\'); break;
240 case '\r': /* ignore */ break;
241 case '\n': /* ignore */ break;
244 auto val
= int64_t{src
} - '0';
245 for (auto i
= int{1}; i
< 3; ++i
) {
247 if (!is_oct(src
)) { ungetc(src
); break; }
251 if (val
> std::numeric_limits
<uint8_t>::max()) {
252 error("octal escape sequence overflowed");
254 out
.push_back(static_cast<uint8_t>(val
));
258 if (src
== 'x' || src
== 'X') {
259 auto val
= uint64_t{0};
260 if (!is_hex(peek())) error("\\x used without no following hex digits");
261 for (auto i
= int{0}; i
< 2; ++i
) {
263 if (!is_hex(src
)) { ungetc(src
); break; }
267 if (val
> std::numeric_limits
<uint8_t>::max()) {
268 error("hex escape sequence overflowed");
270 out
.push_back(static_cast<uint8_t>(val
));
274 error("unrecognized character escape");
278 // Reads a quoted string with typical escaping rules. Does not skip
279 // any whitespace. Returns true if we successfully read one, or
280 // false. EOF during the string throws.
281 bool readQuotedStr(std::string
& str
) {
283 if (peek() != '\"') {
289 while ((c
= getc()) != EOF
) {
291 case '\"': return true;
292 case '\\': escapeChar(str
); break;
293 default: str
.push_back(c
); break;
296 error("EOF in string literal");
302 * Reads a python-style longstring, or returns false if we don't
303 * have one. Does not skip any whitespace before looking for the
306 * Python longstrings start with \"\"\", and can contain any bytes
307 * other than \"\"\". A '\\' character introduces C-style escapes,
308 * but there's no need to escape single quote characters.
310 bool readLongString(std::vector
<char>& buffer
) {
311 if (peek() != '\"') return false;
313 if (peek() != '\"') { ungetc('\"'); return false; }
315 if (peek() != '\"') { ungetc('\"');
316 ungetc('\"'); return false; }
320 while ((c
= getc()) != EOF
) {
328 buffer
.push_back('"');
334 buffer
.push_back('"');
335 buffer
.push_back('"');
344 error("EOF in \"\"\"-string literal");
349 // Skips whitespace (including newlines and comments).
350 void skipWhitespace() {
351 while (skipPred(boost::is_any_of(" \t\r\n"))) {
353 skipPred(!boost::is_any_of("\n"));
361 // Skip spaces and tabs, but other whitespace (such as comments or
362 // newlines) stop the skip.
363 void skipSpaceTab() {
364 skipPred(boost::is_any_of(" \t"));
367 template<class Predicate
>
368 bool skipPred(Predicate pred
) {
369 while (pred(peek())) {
378 template<class Predicate
, class OutputIterator
>
379 bool consumePred(Predicate pred
, OutputIterator out
) {
381 while (pred(c
= peek())) {
393 // whether a character is a valid part of the extended sorts of
394 // names that HHVM uses for certain generated constructs
395 // (closures, __Memoize implementations, etc)
397 bool operator()(int i
) const {
399 return is_bw(i
) || i
== ':' || i
== ';' || i
== '#' || i
=='@' ||
400 (i
>= 0x7f && i
<= 0xff) /* see hphp.ll :( */;
404 void error(const std::string
& what
) {
405 throw Error(getLineNumber(), what
);
408 void io_error_if_bad() {
410 error("I/O error reading stream: " +
411 folly::errnoStr(errno
).toStdString());
424 StackDepth
* stackDepth
;
429 * Tracks the depth of the stack in a given block of instructions.
431 * This structure is linked to a block of instructions (usually starting at a
432 * label), and tracks the current stack depth in this block. This tracking can
434 * - Absolute depth: the depth of the stack is exactly known for this block
435 * - Relative depth: the depth of the stack is unknown for now. We keep track
436 * of an offset, relative to the depth of the stack at the first instruction
442 * Tracks the max depth of elem stack + desc stack offset inside a region
443 * where baseValue is unknown.
447 * Tracks the min depth of the elem stack inside a region where baseValue
448 * is unknown, and the line where the min occurred.
452 folly::Optional
<int> baseValue
;
455 * During the parsing process, when a Jmp instruction is encountered, the
456 * StackDepth structure for this jump becomes linked to the StackDepth
457 * structure of the label (which is added to the listeners list).
459 * Once the absolute depth at the jump becomes known, its StackDepth
460 * instance calls the setBase method of the StackDepth instance of the label.
461 * The absolute depth at the label can then be inferred from the
462 * absolute depth at the jump.
464 std::vector
<std::pair
<StackDepth
*, int> > listeners
;
472 void adjust(AsmState
& as
, int delta
);
473 void addListener(AsmState
& as
, StackDepth
* target
);
474 void setBase(AsmState
& as
, int stackDepth
);
475 int absoluteDepth() {
476 assert(baseValue
.hasValue());
477 return baseValue
.value() + currentOffset
;
481 * Sets the baseValue such as the current stack depth matches the
484 * If the base value is already known, it may conflict with the
485 * parameter of this function. In this case, an error will be raised.
487 void setCurrentAbsolute(AsmState
& as
, int stackDepth
);
493 StackDepth stackDepth
;
496 * Each label source source has an Offset where the jmp should be
497 * patched up is, and an Offset from which the jump delta should be
498 * computed. (The second Offset is basically to the actual
499 * jump/switch/etc instruction, while the first points to the
502 std::vector
<std::pair
<Offset
,Offset
>> sources
;
505 * List of a parameter ids that use this label for its DV
508 std::vector
<Id
> dvInits
;
511 * List of EHEnts that have m_handler pointing to this label.
513 std::vector
<size_t> ehEnts
;
517 explicit AsmState(std::istream
& in
, AsmCallbacks
* callbacks
= nullptr)
519 , callbacks(callbacks
)
521 currentStackDepth
->setBase(*this, 0);
524 AsmState(const AsmState
&) = delete;
525 AsmState
& operator=(const AsmState
&) = delete;
527 template<typename
... Args
>
528 void error(const std::string
& fmt
, Args
&&... args
) {
529 throw Error(in
.getLineNumber(),
530 folly::sformat(fmt
, std::forward
<Args
>(args
)...));
534 void adjustStack(int delta
) {
535 if (currentStackDepth
== nullptr) {
536 // Instruction is unreachable, nothing to do here!
540 currentStackDepth
->adjust(*this, delta
);
543 void adjustStackHighwater(int depth
) {
545 fe
->maxStackCells
= std::max(fe
->maxStackCells
, depth
);
549 std::string
displayStackDepth() {
550 std::ostringstream stack
;
552 if (currentStackDepth
== nullptr) {
554 } else if (currentStackDepth
->baseValue
) {
555 stack
<< *currentStackDepth
->baseValue
+
556 currentStackDepth
->currentOffset
;
558 stack
<< "?" << currentStackDepth
->currentOffset
;
564 void addLabelTarget(const std::string
& name
) {
565 auto& label
= labelMap
[name
];
567 error("Duplicate label " + name
);
570 label
.target
= ue
->bcPos();
572 StackDepth
* newStack
= &label
.stackDepth
;
574 if (currentStackDepth
== nullptr) {
575 // Previous instruction was unreachable
576 currentStackDepth
= newStack
;
580 // The stack depth at the label depends on the current depth
581 currentStackDepth
->addListener(*this, newStack
);
582 currentStackDepth
= newStack
;
585 void addLabelJump(const std::string
& name
, Offset immOff
, Offset opcodeOff
) {
586 auto& label
= labelMap
[name
];
588 if (currentStackDepth
!= nullptr) {
589 // The stack depth at the target must be the same as the current depth
590 // (whatever this may be: it may still be unknown)
591 currentStackDepth
->addListener(*this, &label
.stackDepth
);
594 label
.sources
.emplace_back(immOff
, opcodeOff
);
597 void enforceStackDepth(int stackDepth
) {
598 if (currentStackDepth
== nullptr) {
599 // Current instruction is unreachable, thus the constraint
600 // on the stack depth will never be violated
604 currentStackDepth
->setCurrentAbsolute(*this, stackDepth
);
607 bool isUnreachable() {
608 return currentStackDepth
== nullptr;
611 void enterUnreachableRegion() {
612 currentStackDepth
= nullptr;
615 void enterReachableRegion(int stackDepth
) {
616 unnamedStackDepths
.emplace_back(std::make_unique
<StackDepth
>());
617 currentStackDepth
= unnamedStackDepths
.back().get();
618 currentStackDepth
->setBase(*this, stackDepth
);
621 void addLabelDVInit(const std::string
& name
, int paramId
) {
622 labelMap
[name
].dvInits
.push_back(paramId
);
624 // Stack depth should be 0 when entering a DV init
625 labelMap
[name
].stackDepth
.setBase(*this, 0);
628 void addLabelEHEnt(const std::string
& name
, size_t ehIdx
) {
629 labelMap
[name
].ehEnts
.push_back(ehIdx
);
631 // Stack depth should be 0 when entering a fault funclet
632 labelMap
[name
].stackDepth
.setBase(*this, 0);
635 void beginFpi(Offset fpushOff
) {
636 fpiRegs
.push_back(FPIReg
{
639 currentStackDepth
->currentOffset
641 fdescDepth
+= kNumActRecCells
;
642 currentStackDepth
->adjust(*this, 0);
646 if (fpiRegs
.empty()) {
647 error("endFpi called with no active fpi region");
650 auto& ent
= fe
->addFPIEnt();
651 const auto& reg
= fpiRegs
.back();
652 ent
.m_fpushOff
= reg
.fpushOff
;
653 ent
.m_fpiEndOff
= ue
->bcPos();
654 ent
.m_fpOff
= reg
.fpOff
;
655 if (reg
.stackDepth
->baseValue
) {
656 ent
.m_fpOff
+= *reg
.stackDepth
->baseValue
;
658 // Base value still unknown, this will need to be updated later.
660 // Store the FPIEnt's index in the FuncEmitter's entry table.
661 assert(&fe
->fpitab
[fe
->fpitab
.size()-1] == &ent
);
662 fpiToUpdate
.emplace_back(fe
->fpitab
.size() - 1, reg
.stackDepth
);
666 always_assert(fdescDepth
>= kNumActRecCells
);
667 fdescDepth
-= kNumActRecCells
;
672 ue
->addPreClassEmitter(pce
);
677 void patchLabelOffsets(const Label
& label
) {
678 for (auto const& source
: label
.sources
) {
679 ue
->emitInt32(label
.target
- source
.second
, source
.first
);
682 for (auto const& dvinit
: label
.dvInits
) {
683 fe
->params
[dvinit
].funcletOff
= label
.target
;
686 for (auto const& ehEnt
: label
.ehEnts
) {
687 fe
->ehtab
[ehEnt
].m_handler
= label
.target
;
691 void finishSection() {
692 for (auto const& label
: labelMap
) {
693 if (!label
.second
.bound
) {
694 error("Undefined label " + label
.first
);
696 if (label
.second
.target
>= ue
->bcPos()) {
697 error("label " + label
.first
+ " falls of the end of the function");
700 patchLabelOffsets(label
.second
);
703 // Patch the FPI structures
704 for (auto& kv
: fpiToUpdate
) {
705 if (!kv
.second
->baseValue
) {
706 error("created a FPI from an unreachable instruction");
709 fe
->fpitab
[kv
.first
].m_fpOff
+= *kv
.second
->baseValue
;
713 void finishFunction() {
716 // Stack depth should be 0 at the end of a function body
717 enforceStackDepth(0);
719 // Bump up the unnamed local count
720 const int numLocals
= maxUnnamed
+ 1;
721 while (fe
->numLocals() < numLocals
) {
722 fe
->allocUnnamedLocal();
727 fe
->numIterators() * kNumIterCells
+
728 clsRefCountToCells(fe
->numClsRefSlots());
730 fe
->finish(ue
->bcPos(), false);
731 ue
->recordFunction(fe
);
737 numClsRefSlotsSet
= false;
738 initStackDepth
= StackDepth();
739 initStackDepth
.setBase(*this, 0);
740 currentStackDepth
= &initStackDepth
;
741 unnamedStackDepths
.clear();
747 int getLocalId(const std::string
& name
) {
748 if (name
[0] == '_') {
749 int id
= folly::to
<int>(name
.substr(1));
750 if (id
> maxUnnamed
) maxUnnamed
= id
;
754 if (name
[0] != '$') {
755 error("local variables must be prefixed with $ or _");
758 const StringData
* sd
= makeStaticString(name
.c_str() + 1);
760 return fe
->lookupVarId(sd
);
763 int getIterId(int32_t id
) {
764 if (id
>= fe
->numIterators()) {
765 error("iterator id exceeded number of iterators in the function");
770 int getClsRefSlot(int32_t slot
) {
771 if (slot
>= fe
->numClsRefSlots()) {
772 error("class-ref slot id exceeded number of class-ref "
773 "slots in the function");
780 bool emittedPseudoMain
{false};
781 bool emittedTopLevelFunc
{false};
783 std::map
<std::string
,ArrayData
*> adataMap
;
785 // When inside a class, this state is active.
786 PreClassEmitter
* pce
;
788 // When we're doing a function or method body, this state is active.
789 FuncEmitter
* fe
{nullptr};
790 std::vector
<FPIReg
> fpiRegs
;
791 std::map
<std::string
,Label
> labelMap
;
792 bool numItersSet
{false};
793 bool numClsRefSlotsSet
{false};
794 bool enumTySet
{false};
795 StackDepth initStackDepth
;
796 StackDepth
* currentStackDepth
{&initStackDepth
};
797 std::vector
<std::unique_ptr
<StackDepth
>> unnamedStackDepths
;
799 int minStackDepth
{0};
801 std::vector
<std::pair
<size_t, StackDepth
*>> fpiToUpdate
;
802 std::set
<std::string
,stdltistr
> hoistables
;
803 std::unordered_map
<uint32_t,Offset
> defClsOffsets
;
804 Location::Range srcLoc
{-1,-1,-1,-1};
805 AsmCallbacks
* callbacks
{ nullptr };
808 void StackDepth::adjust(AsmState
& as
, int delta
) {
809 currentOffset
+= delta
;
812 // The absolute stack depth is unknown. We only store the min
813 // and max offsets, and we will take a decision later, when the
814 // base value will be known.
815 maxOffset
= std::max(currentOffset
+ as
.fdescDepth
, maxOffset
);
816 if (currentOffset
< minOffset
) {
817 minOffsetLine
= as
.in
.getLineNumber();
818 minOffset
= currentOffset
;
823 if (*baseValue
+ currentOffset
< 0) {
824 as
.error("opcode sequence caused stack depth to go negative");
827 as
.adjustStackHighwater(*baseValue
+ currentOffset
+ as
.fdescDepth
);
830 void StackDepth::addListener(AsmState
& as
, StackDepth
* target
) {
832 target
->setBase(as
, *baseValue
+ currentOffset
);
834 listeners
.emplace_back(target
, currentOffset
);
838 void StackDepth::setBase(AsmState
& as
, int stackDepth
) {
839 if (baseValue
&& stackDepth
!= *baseValue
) {
840 as
.error("stack depth {} does not match base value {}",
841 stackDepth
, *baseValue
);
844 baseValue
= stackDepth
;
846 // We finally know the base value. Update AsmState accordingly.
847 if (*baseValue
+ minOffset
< 0) {
850 "opcode sequence caused stack depth to go negative"
853 as
.adjustStackHighwater(*baseValue
+ maxOffset
);
855 // Update the listeners
856 auto l
= std::move(listeners
);
857 // We won't need them anymore
860 kv
.first
->setBase(as
, *baseValue
+ kv
.second
);
864 void StackDepth::setCurrentAbsolute(AsmState
& as
, int stackDepth
) {
865 setBase(as
, stackDepth
- currentOffset
);
868 //////////////////////////////////////////////////////////////////////
871 * Opcode arguments must be on the same line as the opcode itself,
872 * although certain argument types may contain internal newlines (see,
873 * for example, read_jmpvector or string literals).
875 template<class Target
> Target
read_opcode_arg(AsmState
& as
) {
876 as
.in
.skipSpaceTab();
878 as
.in
.consumePred(!boost::is_any_of(" \t\r\n#;>"),
879 std::back_inserter(strVal
));
880 if (strVal
.empty()) {
881 as
.error("expected opcode or directive argument");
884 return folly::to
<Target
>(strVal
);
885 } catch (std::range_error
&) {
886 as
.error("couldn't convert input argument (" + strVal
+ ") to "
892 template<class SubOpType
>
893 uint8_t read_subop(AsmState
& as
) {
894 auto const str
= read_opcode_arg
<std::string
>(as
);
895 if (auto const ty
= nameToSubop
<SubOpType
>(str
.c_str())) {
896 return static_cast<uint8_t>(*ty
);
898 as
.error("unknown subop name");
902 const StringData
* read_litstr(AsmState
& as
) {
903 as
.in
.skipSpaceTab();
905 if (!as
.in
.readQuotedStr(strVal
)) {
906 as
.error("expected quoted string literal");
908 return makeStaticString(strVal
);
912 * maybe-string-literal : N
916 const StringData
* read_maybe_litstr(AsmState
& as
) {
917 as
.in
.skipSpaceTab();
918 if (as
.in
.peek() == 'N') {
922 return read_litstr(as
);
925 std::vector
<std::string
> read_strvector(AsmState
& as
) {
926 std::vector
<std::string
> ret
;
927 as
.in
.skipSpaceTab();
930 while (as
.in
.skipSpaceTab(), as
.in
.readQuotedStr(name
)) {
933 as
.in
.skipSpaceTab();
938 ArrayData
* read_litarray(AsmState
& as
) {
939 as
.in
.skipSpaceTab();
940 if (as
.in
.getc() != '@') {
941 as
.error("expecting an `@foo' array literal reference");
944 if (!as
.in
.readword(name
)) {
945 as
.error("expected name of .adata literal");
948 auto const it
= as
.adataMap
.find(name
);
949 if (it
== as
.adataMap
.end()) {
950 as
.error("unknown array data literal name " + name
);
955 RepoAuthType
read_repo_auth_type(AsmState
& as
) {
956 auto const str
= read_opcode_arg
<std::string
>(as
);
957 folly::StringPiece
parse(str
);
960 * Note: no support for reading array types. (The assembler only
961 * emits a single unit, so it can't really be involved in creating a
965 using T
= RepoAuthType::Tag
;
967 #define X(what, tag) \
968 if (parse.startsWith(what)) return RepoAuthType{tag}
970 #define Y(what, tag) \
971 if (parse.startsWith(what)) { \
972 parse.removePrefix(what); \
973 auto const cls = makeStaticString(parse.data()); \
974 as.ue->mergeLitstr(cls); \
975 return RepoAuthType{tag, cls}; \
978 Y("Obj=", T::ExactObj
);
979 Y("?Obj=", T::OptExactObj
);
980 Y("?Obj<=", T::OptSubObj
);
981 Y("Obj<=", T::SubObj
);
983 X("?Arr", T::OptArr
);
985 X("?VArr", T::OptVArr
);
987 X("?DArr", T::OptDArr
);
989 X("?Vec", T::OptVec
);
991 X("?Dict", T::OptDict
);
992 X("Keyset", T::Keyset
);
993 X("?Keyset", T::OptKeyset
);
995 X("?Bool", T::OptBool
);
998 X("?Dbl", T::OptDbl
);
1000 X("InitCell", T::InitCell
);
1001 X("InitGen", T::InitGen
);
1002 X("InitNull", T::InitNull
);
1003 X("InitUnc", T::InitUnc
);
1005 X("?Int", T::OptInt
);
1008 X("?Obj", T::OptObj
);
1010 X("?Res", T::OptRes
);
1012 X("?SArr", T::OptSArr
);
1014 X("?SVArr", T::OptSVArr
);
1015 X("SVArr", T::SVArr
);
1016 X("?SDArr", T::OptSDArr
);
1017 X("SDArr", T::SDArr
);
1018 X("?SVec", T::OptSVec
);
1020 X("?SDict", T::OptSDict
);
1021 X("SDict", T::SDict
);
1022 X("?SKeyset", T::OptSKeyset
);
1023 X("SKeyset", T::SKeyset
);
1024 X("?SStr", T::OptSStr
);
1026 X("?Str", T::OptStr
);
1029 X("?UncArrKey", T::OptUncArrKey
);
1030 X("?ArrKey", T::OptArrKey
);
1031 X("UncArrKey",T::UncArrKey
);
1032 X("ArrKey", T::ArrKey
);
1033 X("Uninit", T::Uninit
);
1038 // Make sure the above parsing code is revisited when new tags are
1039 // added (we'll get a warning for a missing case label):
1040 if (debug
) switch (RepoAuthType
{}.tag()) {
1084 case T::OptUncArrKey
:
1095 case T::OptExactObj
:
1100 as
.error("unrecognized RepoAuthType format");
1104 // Read a vector of IVAs, with format <int, int, int, ...>, the vector may be
1105 // excluded entirely if it is empty.
1106 std::vector
<uint32_t> read_argv(AsmState
& as
) {
1107 as
.in
.skipSpaceTab();
1108 if (as
.in
.peek() != '<') return {};
1111 std::vector
<uint32_t> result
;
1113 auto const num
= as
.in
.readint();
1114 if (num
< 0) as
.error("Was expecting a positive integer");
1115 result
.push_back(num
);
1116 as
.in
.skipWhitespace();
1117 if (as
.in
.peek() == '>') break;
1118 as
.in
.expectWs(',');
1120 as
.in
.expectWs('>');
1125 // Read in a vector of iterators the format for this vector is:
1126 // <(TYPE) ID, (TYPE) ID, ...>
1127 // Where TYPE := Iter | MIter | CIter
1128 // and ID := Integer
1129 std::vector
<uint32_t> read_itervec(AsmState
& as
) {
1130 std::vector
<uint32_t> ret
;
1132 as
.in
.skipSpaceTab();
1138 as
.in
.expectWs('(');
1139 if (!as
.in
.readword(word
)) as
.error("Was expecting Iterator type.");
1140 if (!word
.compare("Iter")) ret
.push_back(KindOfIter
);
1141 else if (!word
.compare("MIter")) ret
.push_back(KindOfMIter
);
1142 else if (!word
.compare("CIter")) ret
.push_back(KindOfCIter
);
1143 else as
.error("Unknown iterator type `" + word
+ "'");
1144 as
.in
.expectWs(')');
1146 as
.in
.skipSpaceTab();
1148 if (!as
.in
.readword(word
)) as
.error("Was expecting iterator id.");
1149 ret
.push_back(folly::to
<uint32_t>(word
));
1151 if (!isdigit(word
.back())) {
1152 if (word
.back() == '>') break;
1153 if (word
.back() != ',') as
.error("Was expecting `,'.");
1155 as
.in
.skipSpaceTab();
1156 if (as
.in
.peek() == '>') { as
.in
.getc(); break; }
1164 // Jump tables are lists of labels.
1165 std::vector
<std::string
> read_jmpvector(AsmState
& as
) {
1166 std::vector
<std::string
> ret
;
1168 as
.in
.skipSpaceTab();
1172 while (as
.in
.readword(word
)) {
1173 ret
.push_back(word
);
1175 as
.in
.expectWs('>');
1180 typedef std::vector
<std::pair
<Id
, std::string
>> SSwitchJmpVector
;
1182 SSwitchJmpVector
read_sswitch_jmpvector(AsmState
& as
) {
1183 SSwitchJmpVector ret
;
1185 as
.in
.skipSpaceTab();
1188 std::string defLabel
;
1190 std::string caseStr
;
1191 if (!as
.in
.readQuotedStr(caseStr
)) {
1192 as
.error("expected quoted string literal");
1197 as
.in
.readword(defLabel
);
1200 as
.ue
->mergeLitstr(makeStaticString(caseStr
)),
1204 as
.in
.skipWhitespace();
1205 } while (as
.in
.peek() != '-');
1209 as
.in
.readword(defLabel
);
1211 // -1 stand for default case.
1212 ret
.emplace_back(-1, defLabel
);
1219 MemberKey
read_member_key(AsmState
& as
) {
1220 as
.in
.skipWhitespace();
1223 if (!as
.in
.readword(word
)) as
.error("expected member code");
1225 auto optMcode
= parseMemberCode(word
.c_str());
1226 if (!optMcode
) as
.error("unrecognized member code `" + word
+ "'");
1228 auto const mcode
= *optMcode
;
1229 if (mcode
!= MW
&& as
.in
.getc() != ':') {
1230 as
.error("expected `:' after member code `" + word
+ "'");
1236 case MEL
: case MPL
: {
1238 if (!as
.in
.readword(name
)) {
1239 as
.error("couldn't read name for local variable in member key");
1241 return MemberKey
{mcode
, as
.getLocalId(name
)};
1244 return MemberKey
{mcode
, read_opcode_arg
<int32_t>(as
)};
1246 return MemberKey
{mcode
, read_opcode_arg
<int64_t>(as
)};
1247 case MET
: case MPT
: case MQT
:
1248 return MemberKey
{mcode
, read_litstr(as
)};
1253 LocalRange
read_local_range(AsmState
& as
) {
1254 auto first
= read_opcode_arg
<std::string
>(as
);
1255 if (first
.size() > 2 && first
[0] == 'L' && first
[1] == ':') {
1256 first
= "_" + first
.substr(2);
1258 auto const pos
= first
.find('+');
1259 if (pos
== std::string::npos
) as
.error("expecting `+' in local range");
1260 auto const rest
= first
.substr(pos
+ 1);
1261 first
= first
.substr(0, pos
);
1262 auto const firstLoc
= as
.getLocalId(first
);
1263 auto const restCount
= folly::to
<uint32_t>(rest
);
1264 if (firstLoc
+ restCount
> as
.maxUnnamed
) {
1265 as
.maxUnnamed
= firstLoc
+ restCount
;
1267 return LocalRange
{uint32_t(firstLoc
), restCount
};
1270 //////////////////////////////////////////////////////////////////////
1272 std::map
<std::string
,ParserFunc
> opcode_parsers
;
1275 #define IMM_ONE(t) IMM_##t
1276 #define IMM_TWO(t1, t2) IMM_ONE(t1); ++immIdx; IMM_##t2
1277 #define IMM_THREE(t1, t2, t3) IMM_TWO(t1, t2); ++immIdx; IMM_##t3
1278 #define IMM_FOUR(t1, t2, t3, t4) IMM_THREE(t1, t2, t3); ++immIdx; IMM_##t4
1280 // Some bytecodes need to know an iva imm for (PUSH|POP)_*.
1281 #define IMM_IVA do { \
1282 auto imm = read_opcode_arg<uint32_t>(as); \
1283 as.ue->emitIVA(imm); \
1284 immIVA[immIdx] = imm; \
1288 std::vector<std::string> vecImm = read_strvector(as); \
1289 auto const vecImmStackValues = vecImm.size(); \
1290 as.ue->emitInt32(vecImmStackValues); \
1291 for (size_t i = 0; i < vecImmStackValues; ++i) { \
1292 as.ue->emitInt32(as.ue->mergeLitstr(String(vecImm[i]).get())); \
1295 #define IMM_SA as.ue->emitInt32(as.ue->mergeLitstr(read_litstr(as)))
1296 #define IMM_RATA encodeRAT(*as.ue, read_repo_auth_type(as))
1297 #define IMM_I64A as.ue->emitInt64(read_opcode_arg<int64_t>(as))
1298 #define IMM_DA as.ue->emitDouble(read_opcode_arg<double>(as))
1299 #define IMM_LA as.ue->emitIVA(as.getLocalId( \
1300 read_opcode_arg<std::string>(as)))
1301 #define IMM_IA as.ue->emitIVA(as.getIterId( \
1302 read_opcode_arg<int32_t>(as)))
1303 #define IMM_CAR as.ue->emitIVA(as.getClsRefSlot( \
1304 read_opcode_arg<int32_t>(as)))
1305 #define IMM_CAW as.ue->emitIVA(as.getClsRefSlot( \
1306 read_opcode_arg<int32_t>(as)))
1307 #define IMM_OA(ty) as.ue->emitByte(read_subop<ty>(as));
1308 #define IMM_AA as.ue->emitInt32(as.ue->mergeArray(read_litarray(as)))
1309 #define IMM_LAR encodeLocalRange(*as.ue, read_local_range(as))
1312 * There can currently be no more than one immvector per instruction,
1313 * and we need access to the size of the immediate vector for
1314 * NUM_POP_*, so the member vector guy exposes a vecImmStackValues
1317 #define IMM_ILA do { \
1318 std::vector<uint32_t> vecImm = read_itervec(as); \
1319 as.ue->emitInt32(vecImm.size() / 2); \
1320 for (auto& i : vecImm) { \
1321 as.ue->emitInt32(i); \
1325 #define IMM_I32LA do { \
1326 std::vector<uint32_t> vecImm = read_argv(as); \
1327 as.ue->emitInt32(vecImm.size()); \
1328 for (auto i : vecImm) { \
1329 as.ue->emitInt32(i); \
1333 #define IMM_BLA do { \
1334 std::vector<std::string> vecImm = read_jmpvector(as); \
1335 as.ue->emitInt32(vecImm.size()); \
1336 for (auto const& imm : vecImm) { \
1337 labelJumps.emplace_back(imm, as.ue->bcPos()); \
1338 as.ue->emitInt32(0); /* to be patched */ \
1342 #define IMM_SLA do { \
1343 auto vecImm = read_sswitch_jmpvector(as); \
1344 as.ue->emitInt32(vecImm.size()); \
1345 for (auto const& pair : vecImm) { \
1346 as.ue->emitInt32(pair.first); \
1347 labelJumps.emplace_back(pair.second, as.ue->bcPos()); \
1348 as.ue->emitInt32(0); /* to be patched */ \
1352 #define IMM_BA do { \
1353 labelJumps.emplace_back( \
1354 read_opcode_arg<std::string>(as), \
1357 as.ue->emitInt32(0); \
1360 #define IMM_KA encode_member_key(read_member_key(as), *as.ue)
1362 #define NUM_PUSH_NOV 0
1363 #define NUM_PUSH_ONE(a) 1
1364 #define NUM_PUSH_TWO(a,b) 2
1365 #define NUM_PUSH_THREE(a,b,c) 3
1366 #define NUM_PUSH_INS_1(a) 1
1367 #define NUM_POP_NOV 0
1368 #define NUM_POP_ONE(a) 1
1369 #define NUM_POP_TWO(a,b) 2
1370 #define NUM_POP_THREE(a,b,c) 3
1371 #define NUM_POP_MFINAL immIVA[0]
1372 #define NUM_POP_F_MFINAL immIVA[1]
1373 #define NUM_POP_C_MFINAL (immIVA[0] + 1)
1374 #define NUM_POP_V_MFINAL NUM_POP_C_MFINAL
1375 #define NUM_POP_FMANY immIVA[0] /* number of arguments */
1376 #define NUM_POP_CVUMANY immIVA[0] /* number of arguments */
1377 #define NUM_POP_CMANY immIVA[0] /* number of arguments */
1378 #define NUM_POP_SMANY vecImmStackValues
1380 #define O(name, imm, pop, push, flags) \
1381 void parse_opcode_##name(AsmState& as) { \
1382 UNUSED uint32_t immIVA[4]; \
1383 UNUSED auto const thisOpcode = Op::name; \
1384 UNUSED const Offset curOpcodeOff = as.ue->bcPos(); \
1385 std::vector<std::pair<std::string, Offset> > labelJumps; \
1390 as.in.getLineNumber(), \
1391 as.displayStackDepth().c_str(), \
1395 /* Pretend the stack is reachable and empty, same as hphpc */ \
1396 if (as.currentStackDepth == nullptr) { \
1397 as.enterReachableRegion(0); \
1400 if (isFCallStar(Op##name)) { \
1404 /* Other FCall* functions perform their own bounds checking. */ \
1405 if (Op##name == OpFCall || Op##name == OpFCallD || \
1406 Op##name == OpFCallAwait) { \
1407 as.fe->containsCalls = true; \
1410 as.ue->emitOp(Op##name); \
1412 UNUSED size_t immIdx = 0; \
1415 int stackDelta = NUM_PUSH_##push - NUM_POP_##pop; \
1416 as.adjustStack(stackDelta); \
1418 if (isFPush(Op##name)) { \
1419 as.beginFpi(curOpcodeOff); \
1422 for (auto& kv : labelJumps) { \
1423 as.addLabelJump(kv.first, kv.second, curOpcodeOff); \
1426 /* Stack depth should be 0 after RetC or RetV. */ \
1427 if (thisOpcode == OpRetC || thisOpcode == OpRetV) { \
1428 as.enforceStackDepth(0); \
1431 /* Stack depth should be 1 after resume from suspend. */ \
1432 if (thisOpcode == OpCreateCont || thisOpcode == OpAwait || \
1433 thisOpcode == OpYield || thisOpcode == OpYieldK || \
1434 thisOpcode == OpYieldFromDelegate) { \
1435 as.enforceStackDepth(1); \
1438 /* Record source location. */ \
1439 as.ue->recordSourceLocation(as.srcLoc, curOpcodeOff); \
1441 if (Op##name == OpDefCls || Op##name == OpDefClsNop) { \
1442 as.defClsOffsets.emplace(immIVA[0], curOpcodeOff); \
1445 /* Retain stack depth after calls to exit */ \
1446 if ((instrFlags(thisOpcode) & InstrFlags::TF) && \
1447 (Op##name != OpExit)) { \
1448 as.enterUnreachableRegion(); \
1477 #undef NUM_PUSH_THREE
1478 #undef NUM_PUSH_POS_N
1479 #undef NUM_PUSH_INS_1
1483 #undef NUM_POP_THREE
1484 #undef NUM_POP_POS_N
1485 #undef NUM_POP_MFINAL
1486 #undef NUM_POP_F_MFINAL
1487 #undef NUM_POP_C_MFINAL
1488 #undef NUM_POP_V_MFINAL
1489 #undef NUM_POP_FMANY
1490 #undef NUM_POP_CVUMANY
1491 #undef NUM_POP_CMANY
1492 #undef NUM_POP_SMANY
1494 void initialize_opcode_map() {
1495 #define O(name, imm, pop, push, flags) \
1496 opcode_parsers[#name] = parse_opcode_##name;
1501 struct Initializer
{
1502 Initializer() { initialize_opcode_map(); }
1505 //////////////////////////////////////////////////////////////////////
1508 * long-string-literal: <string>
1510 * `long-string-literal' is a python-style longstring. See
1511 * readLongString for more details.
1513 String
parse_long_string(AsmState
& as
) {
1514 as
.in
.skipWhitespace();
1516 std::vector
<char> buffer
;
1517 if (!as
.in
.readLongString(buffer
)) {
1518 as
.error("expected \"\"\"-string of serialized php data");
1520 if (buffer
.empty()) {
1521 as
.error("empty php serialized data is not a valid php object");
1524 // String wants a null, and dereferences one past the size we give
1526 buffer
.push_back('\0');
1527 return String(&buffer
[0], buffer
.size() - 1, CopyString
);
1531 * maybe-long-string-literal : long-string-literal
1535 String
parse_maybe_long_string(AsmState
& as
) {
1536 as
.in
.skipWhitespace();
1538 std::vector
<char> buffer
;
1539 if (!as
.in
.readLongString(buffer
)) {
1540 return StrNR(staticEmptyString());
1542 if (buffer
.empty()) {
1543 return StrNR(staticEmptyString());
1546 // String wants a null, and dereferences one past the size we give
1548 buffer
.push_back('\0');
1549 return String(&buffer
[0], buffer
.size() - 1, CopyString
);
1553 * php-serialized : long-string-literal
1556 * `long-string-literal' is a python-style longstring. See
1557 * readLongString for more details.
1559 * Returns a Variant representing the serialized data. It's up to the
1560 * caller to make sure it is a legal literal.
1562 Variant
parse_php_serialized(AsmState
& as
) {
1563 return unserialize_from_string(
1564 parse_long_string(as
),
1565 VariableUnserializer::Type::Internal
1570 * maybe-php-serialized : maybe-long-string-literal
1573 Variant
parse_maybe_php_serialized(AsmState
& as
) {
1574 auto s
= parse_maybe_long_string(as
);
1576 return unserialize_from_string(s
, VariableUnserializer::Type::Internal
);
1582 * directive-numiters : integer ';'
1585 void parse_numiters(AsmState
& as
) {
1586 if (as
.numItersSet
) {
1587 as
.error("only one .numiters directive may appear in a given function");
1589 int32_t count
= read_opcode_arg
<int32_t>(as
);
1590 as
.numItersSet
= true;
1591 as
.fe
->setNumIterators(count
);
1592 as
.in
.expectWs(';');
1596 * directive-numclsrefslots : integer ';'
1599 void parse_numclsrefslots(AsmState
& as
) {
1600 if (as
.numClsRefSlotsSet
) {
1601 as
.error("only one .numclsrefslots directive may appear "
1602 "in a given function");
1604 int32_t count
= read_opcode_arg
<int32_t>(as
);
1605 as
.numClsRefSlotsSet
= true;
1606 as
.fe
->setNumClsRefSlots(count
);
1607 as
.in
.expectWs(';');
1611 * directive-declvars : var-name* ';'
1614 * Variables are usually allocated when first seen, but
1615 * declvars can be used to preallocate varibles for when
1616 * the exact assignment matters (like for closures).
1618 void parse_declvars(AsmState
& as
) {
1620 as
.in
.skipWhitespace();
1622 if (as
.in
.readQuotedStr(var
) || as
.in
.readword(var
)) {
1629 as
.in
.expectWs(';');
1632 void parse_function_body(AsmState
&, int nestLevel
= 0);
1635 * directive-fault : identifier integer? '{' function-body
1638 void parse_fault(AsmState
& as
, int nestLevel
) {
1639 const Offset start
= as
.ue
->bcPos();
1642 if (!as
.in
.readword(label
)) {
1643 as
.error("expected label name after .try_fault");
1646 as
.in
.skipWhitespace();
1647 if (as
.in
.peek() != '{') {
1648 iterId
= read_opcode_arg
<int32_t>(as
);
1650 as
.in
.expectWs('{');
1651 parse_function_body(as
, nestLevel
+ 1);
1653 auto& eh
= as
.fe
->addEHEnt();
1654 eh
.m_type
= EHEnt::Type::Fault
;
1656 eh
.m_past
= as
.ue
->bcPos();
1657 eh
.m_iterId
= iterId
;
1658 eh
.m_end
= kInvalidOffset
;
1660 as
.addLabelEHEnt(label
, as
.fe
->ehtab
.size() - 1);
1664 * directive-catch : identifier integer? '{' function-body
1667 void parse_catch(AsmState
& as
, int nestLevel
) {
1668 const Offset start
= as
.ue
->bcPos();
1671 if (!as
.in
.readword(label
)) {
1672 as
.error("expected label name after .try_catch");
1675 as
.in
.skipWhitespace();
1676 if (as
.in
.peek() != '{') {
1677 iterId
= read_opcode_arg
<int32_t>(as
);
1679 as
.in
.expectWs('{');
1680 parse_function_body(as
, nestLevel
+ 1);
1682 auto& eh
= as
.fe
->addEHEnt();
1683 eh
.m_type
= EHEnt::Type::Catch
;
1685 eh
.m_past
= as
.ue
->bcPos();
1686 eh
.m_iterId
= iterId
;
1687 eh
.m_end
= kInvalidOffset
;
1689 as
.addLabelEHEnt(label
, as
.fe
->ehtab
.size() - 1);
1693 * directive-try-catch : integer? '{' function-body ".catch" '{' function-body
1696 void parse_try_catch(AsmState
& as
, int nestLevel
) {
1697 const Offset start
= as
.ue
->bcPos();
1700 as
.in
.skipWhitespace();
1701 if (as
.in
.peek() != '{') {
1702 iterId
= read_opcode_arg
<int32_t>(as
);
1706 as
.in
.expectWs('{');
1707 parse_function_body(as
, nestLevel
+ 1);
1708 if (!as
.isUnreachable()) {
1709 as
.error("expected .try region to not fall-thru");
1712 const Offset handler
= as
.ue
->bcPos();
1715 as
.enterReachableRegion(0);
1716 as
.ue
->emitOp(OpCatch
);
1718 as
.enforceStackDepth(1);
1721 as
.in
.skipWhitespace();
1722 if (!as
.in
.readword(word
) || word
!= ".catch") {
1723 as
.error("expected .catch directive after .try");
1725 as
.in
.skipWhitespace();
1726 as
.in
.expectWs('{');
1727 parse_function_body(as
, nestLevel
+ 1);
1729 const Offset end
= as
.ue
->bcPos();
1731 auto& eh
= as
.fe
->addEHEnt();
1732 eh
.m_type
= EHEnt::Type::Catch
;
1734 eh
.m_past
= handler
;
1735 eh
.m_iterId
= iterId
;
1736 eh
.m_handler
= handler
;
1741 * directive-srcloc : line_no ':' chr_no ',' line_no ':' chr_no ';'
1748 * Record that subsequent bytecodes are at the source location indicated by the
1749 * range of inline numbers and character positions specified.
1751 void parse_srcloc(AsmState
& as
, int /*nestLevel*/) {
1752 auto const line0
= as
.in
.readint();
1753 as
.in
.expectWs(':');
1754 auto const char0
= as
.in
.readint();
1755 as
.in
.expectWs(',');
1756 auto const line1
= as
.in
.readint();
1757 as
.in
.expectWs(':');
1758 auto const char1
= as
.in
.readint();
1759 as
.in
.expectWs(';');
1761 as
.srcLoc
= Location::Range(line0
, char0
, line1
, char1
);
1765 * directive-static : '$' local_name = long-string-literal ';'
1768 * Record that the function contains a static named local_name along with an
1769 * associated initializer.
1771 void parse_static(AsmState
& as
) {
1772 Func::SVInfo svInfo
;
1776 as
.in
.expectWs('$');
1777 if (!as
.in
.readword(name
)) {
1778 as
.error("Statics must be named");
1780 svInfo
.name
= makeStaticString(name
);
1781 as
.fe
->staticVars
.push_back(svInfo
);
1783 as
.in
.expectWs(';');
1787 * directive-doccomment : long-string-literal ';'
1791 void parse_func_doccomment(AsmState
& as
) {
1792 auto const doc
= parse_long_string(as
);
1793 as
.in
.expectWs(';');
1795 as
.fe
->docComment
= makeStaticString(doc
);
1799 * function-body : fbody-line* '}'
1802 * fbody-line : ".numiters" directive-numiters
1803 * | ".numclsrefslots" directive-numclsrefslots
1804 * | ".declvars" directive-declvars
1805 * | ".try_fault" directive-fault
1806 * | ".try_catch" directive-catch
1807 * | ".try" directive-try-catch
1808 * | ".ismemoizewrapper"
1809 * | ".srcloc" directive-srcloc
1810 * | ".doc" directive-doccomment
1815 * label-name : identifier ':'
1818 * opcode-line : opcode-mnemonic <junk that depends on opcode> '\n'
1821 void parse_function_body(AsmState
& as
, int nestLevel
/* = 0 */) {
1824 as
.in
.skipWhitespace();
1825 if (as
.in
.peek() == '}') {
1828 as
.finishFunction();
1833 if (!as
.in
.readword(word
)) {
1834 as
.error("unexpected directive or opcode line in function body");
1836 if (word
[0] == '.') {
1837 if (word
== ".ismemoizewrapper") {
1838 as
.fe
->isMemoizeWrapper
= true;
1839 as
.in
.expectWs(';');
1842 if (word
== ".numiters") { parse_numiters(as
); continue; }
1843 if (word
== ".declvars") { parse_declvars(as
); continue; }
1844 if (word
== ".numclsrefslots") { parse_numclsrefslots(as
); continue; }
1845 if (word
== ".try_fault") { parse_fault(as
, nestLevel
); continue; }
1846 if (word
== ".try_catch") { parse_catch(as
, nestLevel
); continue; }
1847 if (word
== ".try") { parse_try_catch(as
, nestLevel
); continue; }
1848 if (word
== ".srcloc") { parse_srcloc(as
, nestLevel
); continue; }
1849 if (word
== ".static") { parse_static(as
); continue; }
1850 if (word
== ".doc") { parse_func_doccomment(as
); continue; }
1851 as
.error("unrecognized directive `" + word
+ "' in function");
1853 if (as
.in
.peek() == ':') {
1855 as
.addLabelTarget(word
);
1859 // Ok, it better be an opcode now.
1860 auto it
= opcode_parsers
.find(word
);
1861 if (it
== opcode_parsers
.end()) {
1862 as
.error("unrecognized opcode `" + word
+ "'");
1866 as
.in
.skipSpaceTab();
1867 if (as
.in
.peek() != '\n' &&
1868 as
.in
.peek() != '\r' &&
1869 as
.in
.peek() != '#' &&
1870 as
.in
.peek() != EOF
) {
1871 as
.error("too many arguments for opcode `" + word
+ "'");
1876 void parse_user_attribute(AsmState
& as
,
1877 UserAttributeMap
& userAttrs
) {
1878 auto name
= read_litstr(as
);
1879 as
.in
.expectWs('(');
1881 auto var
= parse_php_serialized(as
);
1883 as
.in
.expectWs(')');
1885 if (!var
.isPHPArray()) {
1886 as
.error("user attribute values must be arrays");
1890 make_tv
<KindOfArray
>(ArrayData::GetScalarArray(std::move(var
)));
1894 * attribute : attribute-name
1895 * | string-literal '(' long-string-literal ')'
1898 * attribute-list : empty
1899 * | '[' attribute* ']'
1902 * The `attribute-name' rule is context-sensitive; see as-shared.cpp.
1903 * The second attribute form is for user attributes and only applies
1904 * if attributeMap is non null.
1906 Attr
parse_attribute_list(AsmState
& as
, AttrContext ctx
,
1907 UserAttributeMap
*userAttrs
= nullptr,
1908 bool* isTop
= nullptr) {
1909 as
.in
.skipWhitespace();
1911 if (ctx
== AttrContext::Class
|| ctx
== AttrContext::Func
) {
1912 if (!SystemLib::s_inited
) {
1913 ret
|= AttrUnique
| AttrPersistent
| AttrBuiltin
;
1916 if (as
.in
.peek() != '[') return Attr(ret
);
1921 as
.in
.skipWhitespace();
1922 if (as
.in
.peek() == ']') break;
1923 if (as
.in
.peek() == '"' && userAttrs
) {
1924 parse_user_attribute(as
, *userAttrs
);
1927 if (!as
.in
.readword(word
)) break;
1929 auto const abit
= string_to_attr(ctx
, word
);
1934 if (isTop
&& word
== "nontop") {
1939 as
.error("unrecognized attribute `" + word
+ "' in this context");
1947 * | '<' maybe-string-literal maybe-string-literal
1950 * type-constraint : empty
1951 * | '<' maybe-string-literal
1954 * This parses type-info if noUserType is false, type-constraint if true
1956 std::pair
<const StringData
*, TypeConstraint
> parse_type_info(
1957 AsmState
& as
, bool noUserType
= false) {
1958 as
.in
.skipWhitespace();
1959 if (as
.in
.peek() != '<') return {};
1962 const StringData
*userType
= noUserType
? nullptr : read_maybe_litstr(as
);
1963 const StringData
*typeName
= read_maybe_litstr(as
);
1966 auto flags
= TypeConstraint::NoFlags
;
1968 as
.in
.skipWhitespace();
1969 if (as
.in
.peek() == '>') break;
1970 if (!as
.in
.readword(word
)) break;
1972 auto const abit
= string_to_type_flag(word
);
1974 flags
= flags
| *abit
;
1978 as
.error("unrecognized type flag `" + word
+ "' in this context");
1981 return std::make_pair(userType
, TypeConstraint
{typeName
, flags
});
1983 TypeConstraint
parse_type_constraint(AsmState
& as
) {
1984 return parse_type_info(as
, true).second
;
1989 * parameter-list : '(' param-name-list ')'
1992 * param-name-list : empty
1993 * | param-name ',' param-name-list
1996 * param-name : '$' identifier dv-initializer
1997 * | '&' '$' identifier dv-initializer
2000 * dv-initializer : empty
2001 * | '=' identifier arg-default
2004 * arg-default : empty
2005 * | '(' long-string-literal ')'
2008 void parse_parameter_list(AsmState
& as
) {
2009 as
.in
.skipWhitespace();
2010 if (as
.in
.peek() != '(') return;
2013 bool seenVariadic
= false;
2014 bool seenRef
= false;
2017 FuncEmitter::ParamInfo param
;
2018 param
.byRef
= false;
2019 param
.inout
= false;
2021 as
.in
.skipWhitespace();
2022 int ch
= as
.in
.peek();
2023 if (ch
== ')') { as
.in
.getc(); break; } // allow empty param lists
2026 as
.error("functions can only have one variadic argument");
2029 parse_attribute_list(as
, AttrContext::Parameter
, ¶m
.userAttributes
);
2033 if (as
.in
.getc() != '.' ||
2034 as
.in
.getc() != '.') {
2035 as
.error("expecting '...'");
2038 seenVariadic
= true;
2039 param
.variadic
= true;
2040 as
.fe
->attrs
|= AttrVariadicParam
;
2043 if (as
.in
.tryConsume("inout")) {
2045 as
.error("inout parameters cannot be variadic");
2048 as
.error("functions cannot contain both inout and ref parameters");
2051 as
.fe
->attrs
|= AttrTakesInOutParams
;
2054 std::tie(param
.userType
, param
.typeConstraint
) = parse_type_info(as
);
2056 as
.in
.skipWhitespace();
2061 as
.error("parameters cannot be marked both inout and ref");
2063 if (as
.fe
->attrs
& AttrTakesInOutParams
) {
2064 as
.error("functions cannot contain both inout and ref parameters");
2071 as
.error("function parameters must have a $ prefix");
2074 if (!as
.in
.readword(name
)) {
2075 as
.error("expected parameter name after $");
2078 as
.in
.skipWhitespace();
2082 as
.error("variadic parameter cannot have dv-initializer");
2086 if (!as
.in
.readword(label
)) {
2087 as
.error("expected label name for dv-initializer");
2089 as
.addLabelDVInit(label
, as
.fe
->params
.size());
2091 as
.in
.skipWhitespace();
2094 String str
= parse_long_string(as
);
2095 param
.phpCode
= makeStaticString(str
);
2098 if (str
.size() == 4) {
2099 if (!strcasecmp("null", str
.data())) {
2101 } else if (!strcasecmp("true", str
.data())) {
2102 tv
= make_tv
<KindOfBoolean
>(true);
2104 } else if (str
.size() == 5 && !strcasecmp("false", str
.data())) {
2105 tv
= make_tv
<KindOfBoolean
>(false);
2107 if (tv
.m_type
!= KindOfUninit
) {
2108 param
.defaultValue
= tv
;
2110 as
.in
.expectWs(')');
2111 as
.in
.skipWhitespace();
2116 as
.fe
->appendParam(makeStaticString(name
), param
);
2118 if (ch
== ')') break;
2119 if (ch
!= ',') as
.error("expected , between parameter names");
2123 void parse_function_flags(AsmState
& as
) {
2124 as
.in
.skipWhitespace();
2127 if (as
.in
.peek() == '{') break;
2128 if (!as
.in
.readword(flag
)) break;
2130 if (flag
== "isGenerator") {
2131 as
.fe
->isGenerator
= true;
2132 } else if (flag
== "isAsync") {
2133 as
.fe
->isAsync
= true;
2134 } else if (flag
== "isClosureBody") {
2135 as
.fe
->isClosureBody
= true;
2136 } else if (flag
== "isPairGenerator") {
2137 as
.fe
->isPairGenerator
= true;
2139 as
.error("Unexpected function flag \"" + flag
+ "\"");
2145 * line-range : "(" integer "," integer ")"
2148 bool parse_line_range(AsmState
& as
, int& line0
, int& line1
) {
2149 as
.in
.skipWhitespace();
2150 if (as
.in
.peek() != '(') {
2151 line0
= as
.in
.getLineNumber();
2152 line1
= as
.in
.getLineNumber() + 1;
2156 line0
= as
.in
.readint();
2157 as
.in
.expectWs(',');
2158 line1
= as
.in
.readint();
2159 as
.in
.expectWs(')');
2164 * directive-function : attribute-list ?line-range type-info identifier
2165 * parameter-list function-flags '{' function-body
2168 void parse_function(AsmState
& as
) {
2169 if (!as
.emittedPseudoMain
) {
2170 as
.error(".function blocks must all follow the .main block");
2173 as
.in
.skipWhitespace();
2177 UserAttributeMap userAttrs
;
2178 Attr attrs
= parse_attribute_list(as
, AttrContext::Func
, &userAttrs
, &isTop
);
2180 if(!isTop
&& as
.emittedTopLevelFunc
) {
2181 as
.error("All top level functions must be defined after any "
2182 "non-top functions");
2185 as
.emittedTopLevelFunc
|= isTop
;
2189 parse_line_range(as
, line0
, line1
);
2191 auto typeInfo
= parse_type_info(as
);
2193 if (!as
.in
.readname(name
)) {
2194 as
.error(".function must have a name");
2197 as
.fe
= as
.ue
->newFuncEmitter(makeStaticString(name
));
2198 as
.fe
->init(line0
, line1
, as
.ue
->bcPos(), attrs
, isTop
, 0);
2199 std::tie(as
.fe
->retUserType
, as
.fe
->retTypeConstraint
) = typeInfo
;
2200 as
.fe
->userAttributes
= userAttrs
;
2202 parse_parameter_list(as
);
2203 parse_function_flags(as
);
2205 as
.in
.expectWs('{');
2207 as
.srcLoc
= Location::Range
{-1,-1,-1,-1};
2208 parse_function_body(as
);
2212 * directive-method : attribute-list ?line-range type-info identifier
2213 * parameter-list function-flags '{' function-body
2216 void parse_method(AsmState
& as
) {
2217 as
.in
.skipWhitespace();
2219 UserAttributeMap userAttrs
;
2220 Attr attrs
= parse_attribute_list(as
, AttrContext::Func
, &userAttrs
);
2224 parse_line_range(as
, line0
, line1
);
2226 auto typeInfo
= parse_type_info(as
);
2228 if (!as
.in
.readname(name
)) {
2229 as
.error(".method requires a method name");
2232 as
.fe
= as
.ue
->newMethodEmitter(makeStaticString(name
), as
.pce
);
2233 as
.pce
->addMethod(as
.fe
);
2234 as
.fe
->init(line0
, line1
,
2235 as
.ue
->bcPos(), attrs
, false, 0);
2236 std::tie(as
.fe
->retUserType
, as
.fe
->retTypeConstraint
) = typeInfo
;
2237 as
.fe
->userAttributes
= userAttrs
;
2239 parse_parameter_list(as
);
2240 parse_function_flags(as
);
2242 as
.in
.expectWs('{');
2244 as
.srcLoc
= Location::Range
{-1,-1,-1,-1};
2245 parse_function_body(as
);
2249 * member-tv-initializer : '=' php-serialized ';'
2254 TypedValue
parse_member_tv_initializer(AsmState
& as
) {
2255 as
.in
.skipWhitespace();
2258 tvWriteNull(tvInit
); // Don't confuse Variant with uninit data
2260 int what
= as
.in
.getc();
2262 as
.in
.skipWhitespace();
2264 if (as
.in
.peek() != '\"') {
2265 // It might be an uninitialized property/constant.
2266 if (!as
.in
.tryConsume("uninit")) {
2267 as
.error("Expected \"\"\" or \"uninit\" after '=' in "
2268 "const/property initializer");
2270 as
.in
.expectWs(';');
2271 tvWriteUninit(tvInit
);
2275 tvAsVariant(&tvInit
) = parse_php_serialized(as
);
2276 if (tvInit
.m_type
== KindOfObject
) {
2277 as
.error("property initializer can't be an object");
2278 } else if (tvInit
.m_type
== KindOfResource
) {
2279 as
.error("property initializer can't be a resource");
2281 tvAsVariant(&tvInit
).setEvalScalar();
2283 as
.in
.expectWs(';');
2284 } else if (what
== ';') {
2287 as
.error("expected '=' or ';' after property name");
2294 * directive-property : attribute-list maybe-long-string-literal type-info
2295 * identifier member-tv-initializer
2298 * Define a property with an associated type and heredoc.
2300 void parse_property(AsmState
& as
) {
2301 as
.in
.skipWhitespace();
2303 Attr attrs
= parse_attribute_list(as
, AttrContext::Prop
);
2305 auto const heredoc
= makeStaticString(parse_maybe_long_string(as
));
2306 auto const userTy
= parse_type_info(as
, false).first
;
2307 auto const userTyStr
= userTy
? userTy
: staticEmptyString();
2310 if (!as
.in
.readword(name
)) {
2311 as
.error("expected name for property");
2314 TypedValue tvInit
= parse_member_tv_initializer(as
);
2315 as
.pce
->addProperty(makeStaticString(name
),
2324 * const-flags : isType
2327 * directive-const : identifier const-flags member-tv-initializer
2328 * | identifier const-flags ';'
2331 void parse_constant(AsmState
& as
) {
2332 as
.in
.skipWhitespace();
2335 if (!as
.in
.readword(name
)) {
2336 as
.error("expected name for constant");
2339 bool isType
= as
.in
.tryConsume("isType");
2340 as
.in
.skipWhitespace();
2342 if (as
.in
.peek() == ';') {
2344 as
.pce
->addAbstractConstant(makeStaticString(name
),
2345 staticEmptyString(),
2350 TypedValue tvInit
= parse_member_tv_initializer(as
);
2351 as
.pce
->addConstant(makeStaticString(name
),
2352 staticEmptyString(), &tvInit
,
2353 staticEmptyString(),
2358 * directive-default-ctor : ';'
2361 * No-op, for backward compat
2363 void parse_default_ctor(AsmState
& as
) {
2364 assert(!as
.fe
&& as
.pce
);
2365 as
.in
.expectWs(';');
2369 * directive-use : identifier+ ';'
2370 * | identifier+ '{' use-line* '}'
2373 * use-line : use-name-ref "insteadof" identifier+ ';'
2374 * | use-name-ref "as" attribute-list identifier ';'
2375 * | use-name-ref "as" attribute-list ';'
2378 void parse_use(AsmState
& as
) {
2379 std::vector
<std::string
> usedTraits
;
2382 if (!as
.in
.readword(name
)) break;
2383 usedTraits
.push_back(name
);
2385 if (usedTraits
.empty()) {
2386 as
.error(".use requires a trait name");
2389 for (size_t i
= 0; i
< usedTraits
.size(); ++i
) {
2390 as
.pce
->addUsedTrait(makeStaticString(usedTraits
[i
]));
2392 as
.in
.skipWhitespace();
2393 if (as
.in
.peek() != '{') {
2400 as
.in
.skipWhitespace();
2401 if (as
.in
.peek() == '}') break;
2403 std::string traitName
;
2404 std::string identifier
;
2405 if (!as
.in
.readword(traitName
)) {
2406 as
.error("expected identifier for line in .use block");
2408 as
.in
.skipWhitespace();
2409 if (as
.in
.peek() == ':') {
2412 if (!as
.in
.readword(identifier
)) {
2413 as
.error("expected identifier after ::");
2416 identifier
= traitName
;
2420 if (as
.in
.tryConsume("as")) {
2421 Attr attrs
= parse_attribute_list(as
, AttrContext::TraitImport
);
2423 if (!as
.in
.readword(alias
)) {
2424 if (attrs
!= AttrNone
) {
2427 as
.error("expected identifier or attribute list after "
2428 "`as' in .use block");
2432 as
.pce
->addTraitAliasRule(PreClass::TraitAliasRule(
2433 makeStaticString(traitName
),
2434 makeStaticString(identifier
),
2435 makeStaticString(alias
),
2437 } else if (as
.in
.tryConsume("insteadof")) {
2438 if (traitName
.empty()) {
2439 as
.error("Must specify TraitName::name when using a trait insteadof");
2442 PreClass::TraitPrecRule
precRule(
2443 makeStaticString(traitName
),
2444 makeStaticString(identifier
));
2446 bool addedOtherTraits
= false;
2448 while (as
.in
.readword(whom
)) {
2449 precRule
.addOtherTraitName(makeStaticString(whom
));
2450 addedOtherTraits
= true;
2452 if (!addedOtherTraits
) {
2453 as
.error("one or more trait names expected after `insteadof'");
2456 as
.pce
->addTraitPrecRule(precRule
);
2458 as
.error("expected `as' or `insteadof' in .use block");
2461 as
.in
.expectWs(';');
2468 * directive-enum_ty : type-constraint ';'
2472 void parse_enum_ty(AsmState
& as
) {
2474 as
.error("only one .enum_ty directive may appear in a given class");
2476 as
.enumTySet
= true;
2478 as
.pce
->setEnumBaseTy(parse_type_constraint(as
));
2480 as
.in
.expectWs(';');
2484 * directive-require : 'extends' '<' indentifier '>' ';'
2485 * | 'implements' '<' indentifier '>' ';'
2489 void parse_require(AsmState
& as
) {
2490 as
.in
.skipWhitespace();
2492 bool extends
= as
.in
.tryConsume("extends");
2493 if (!extends
&& !as
.in
.tryConsume("implements")) {
2494 as
.error(".require should be extends or implements");
2497 as
.in
.expectWs('<');
2499 if (!as
.in
.readname(name
)) {
2500 as
.error(".require expects a class or interface name");
2502 as
.in
.expectWs('>');
2504 as
.pce
->addClassRequirement(PreClass::ClassRequirement(
2505 makeStaticString(name
), extends
2508 as
.in
.expectWs(';');
2512 * directive-doccomment : long-string-literal ';'
2516 void parse_cls_doccomment(AsmState
& as
) {
2517 auto const doc
= parse_long_string(as
);
2518 as
.in
.expectWs(';');
2520 as
.pce
->setDocComment(makeStaticString(doc
));
2524 * class-body : class-body-line* '}'
2527 * class-body-line : ".method" directive-method
2528 * | ".property" directive-property
2529 * | ".const" directive-const
2530 * | ".use" directive-use
2531 * | ".default_ctor" directive-default-ctor
2532 * | ".enum_ty" directive-enum-ty
2533 * | ".require" directive-require
2534 * | ".doc" directive-doccomment
2537 void parse_class_body(AsmState
& as
) {
2538 if (!as
.emittedPseudoMain
) {
2539 as
.error(".class blocks must all follow the .main block");
2542 std::string directive
;
2543 while (as
.in
.readword(directive
)) {
2544 if (directive
== ".method") { parse_method(as
); continue; }
2545 if (directive
== ".property") { parse_property(as
); continue; }
2546 if (directive
== ".const") { parse_constant(as
); continue; }
2547 if (directive
== ".use") { parse_use(as
); continue; }
2548 if (directive
== ".default_ctor") { parse_default_ctor(as
); continue; }
2549 if (directive
== ".enum_ty") { parse_enum_ty(as
); continue; }
2550 if (directive
== ".require") { parse_require(as
); continue; }
2551 if (directive
== ".doc") { parse_cls_doccomment(as
); continue; }
2553 as
.error("unrecognized directive `" + directive
+ "' in class");
2558 PreClass::Hoistable
compute_hoistable(AsmState
& as
,
2559 const std::string
&name
,
2560 const std::string
&parentName
) {
2561 auto &pce
= *as
.pce
;
2562 bool system
= pce
.attrs() & AttrBuiltin
;
2564 if (pce
.methods().size() == 1 && pce
.methods()[0]->isClosureBody
) {
2565 return PreClass::NotHoistable
;
2568 if (!pce
.interfaces().empty() ||
2569 !pce
.usedTraits().empty() ||
2570 !pce
.requirements().empty() ||
2571 (pce
.attrs() & AttrEnum
)) {
2572 return PreClass::Mergeable
;
2574 if (!parentName
.empty() && !as
.hoistables
.count(parentName
)) {
2575 return PreClass::MaybeHoistable
;
2578 as
.hoistables
.insert(name
);
2580 return pce
.attrs() & AttrUnique
?
2581 PreClass::AlwaysHoistable
: PreClass::MaybeHoistable
;
2585 * directive-class : ?"top" attribute-list identifier ?line-range
2586 * extension-clause implements-clause '{' class-body
2589 * extension-clause : empty
2590 * | "extends" identifier
2593 * implements-clause : empty
2594 * | "implements" '(' identifier* ')'
2598 void parse_class(AsmState
& as
) {
2599 as
.in
.skipWhitespace();
2603 UserAttributeMap userAttrs
;
2604 Attr attrs
= parse_attribute_list(as
, AttrContext::Class
, &userAttrs
, &isTop
);
2606 if (!as
.in
.readname(name
)) {
2607 as
.error(".class must have a name");
2609 if (ParserBase::IsAnonymousClassName(name
)) {
2610 // refresh names of anonymous classes
2611 // to make sure they are unique
2612 auto p
= name
.find(';');
2613 if (p
!= std::string::npos
) {
2614 name
= name
.substr(0, p
);
2615 name
= HPHP::NewAnonymousClassName(name
);
2621 parse_line_range(as
, line0
, line1
);
2623 std::string parentName
;
2624 if (as
.in
.tryConsume("extends")) {
2625 if (!as
.in
.readname(parentName
)) {
2626 as
.error("expected parent class name after `extends'");
2630 std::vector
<std::string
> ifaces
;
2631 if (as
.in
.tryConsume("implements")) {
2632 as
.in
.expectWs('(');
2634 while (as
.in
.readname(word
)) {
2635 ifaces
.push_back(word
);
2640 auto off
= folly::get_default(as
.defClsOffsets
, as
.ue
->numPreClasses(),
2643 as
.pce
= as
.ue
->newBarePreClassEmitter(name
, PreClass::MaybeHoistable
);
2648 makeStaticString(parentName
),
2649 staticEmptyString());
2650 for (auto const& iface
: ifaces
) {
2651 as
.pce
->addInterface(makeStaticString(iface
));
2653 as
.pce
->setUserAttributes(userAttrs
);
2655 as
.in
.expectWs('{');
2656 parse_class_body(as
);
2658 as
.pce
->setHoistable(
2659 isTop
? compute_hoistable(as
, name
, parentName
) : PreClass::NotHoistable
2666 * directive-filepath : quoted-string-literal ';'
2669 void parse_filepath(AsmState
& as
) {
2670 auto const str
= read_litstr(as
);
2671 as
.ue
->m_filepath
= str
;
2672 as
.in
.expectWs(';');
2676 * directive-main : ?line-range '{' function-body
2679 void parse_main(AsmState
& as
) {
2680 if (as
.emittedPseudoMain
) {
2681 if (!SystemLib::s_inited
) {
2682 as
.error(".main found in systemlib");
2684 as
.error("Multiple .main directives found");
2690 bool fromSrcLoc
= parse_line_range(as
, line0
, line1
);
2692 as
.in
.expectWs('{');
2694 as
.ue
->initMain(line0
, line1
);
2695 as
.fe
= as
.ue
->getMain();
2696 as
.emittedPseudoMain
= true;
2698 as
.srcLoc
= Location::Range
{line0
,0,line1
,0};
2700 as
.srcLoc
= Location::Range
{-1,-1,-1,-1};
2702 parse_function_body(as
);
2706 * directive-adata : identifier '=' php-serialized ';'
2709 void parse_adata(AsmState
& as
) {
2710 as
.in
.skipWhitespace();
2711 std::string dataLabel
;
2712 if (!as
.in
.readword(dataLabel
)) {
2713 as
.error("expected name for .adata");
2715 if (as
.adataMap
.count(dataLabel
)) {
2716 as
.error("duplicate adata label name " + dataLabel
);
2719 as
.in
.expectWs('=');
2720 auto var
= parse_php_serialized(as
);
2721 if (!var
.isArray()) {
2722 as
.error(".adata only supports serialized arrays");
2724 auto const data
= ArrayData::GetScalarArray(std::move(var
));
2725 as
.ue
->mergeArray(data
);
2726 as
.adataMap
[dataLabel
] = data
;
2728 as
.in
.expectWs(';');
2732 * directive-alias : attribute-list identifier '=' type-constraint
2733 * maybe-php-serialized ';'
2736 * We represent alias type information using the syntax for
2737 * TypeConstraints. We populate the name and nullable field of the
2738 * alias directly from the specified type constraint and derive the
2739 * AnnotType from the compute AnnotType in the constraint.
2741 * Following the type-constraint we encode the serialized type structure
2742 * corresponding to this alias.
2744 void parse_alias(AsmState
& as
) {
2745 as
.in
.skipWhitespace();
2748 Attr attrs
= parse_attribute_list(as
, AttrContext::Alias
, &record
.userAttrs
);
2750 if (!as
.in
.readname(name
)) {
2751 as
.error(".alias must have a name");
2753 as
.in
.expectWs('=');
2755 TypeConstraint ty
= parse_type_constraint(as
);
2756 Variant ts
= parse_maybe_php_serialized(as
);
2758 if (ts
.isInitialized() && !ts
.isArray()) {
2759 as
.error(".alias must have an array type structure");
2762 const StringData
* typeName
= ty
.typeName();
2763 if (!typeName
) typeName
= staticEmptyString();
2764 const StringData
* sname
= makeStaticString(name
);
2765 // Merge to ensure namedentity creation, according to
2766 // emitTypedef in emitter.cpp
2767 as
.ue
->mergeLitstr(sname
);
2768 as
.ue
->mergeLitstr(typeName
);
2770 record
.name
= sname
;
2771 record
.value
= typeName
;
2772 record
.type
= typeName
->empty() ? AnnotType::Mixed
: ty
.type();
2773 record
.nullable
= (ty
.flags() & TypeConstraint::Nullable
) != 0;
2774 record
.attrs
= attrs
;
2775 if (ts
.isInitialized()) {
2776 record
.typeStructure
= ArrNR(ArrayData::GetScalarArray(std::move(ts
)));
2778 as
.ue
->addTypeAlias(record
);
2780 as
.in
.expectWs(';');
2783 void parse_hh_file(AsmState
& as
) {
2784 as
.in
.skipWhitespace();
2786 if (!as
.in
.readword(word
)) {
2787 as
.error(".hh_file must have a value");
2789 as
.ue
->m_isHHFile
= word
== "1";
2791 if (!as
.ue
->m_isHHFile
&& word
!= "0") {
2792 as
.error(".hh_file must be either 1 or 0");
2795 as
.in
.expectWs(';');
2798 void parse_strict(AsmState
& as
) {
2799 as
.in
.skipWhitespace();
2801 if (!as
.in
.readword(word
)) {
2802 as
.error(".strict must have a value");
2804 if (!RuntimeOption::PHP7_ScalarTypes
) {
2805 as
.error("Cannot set .strict without PHP7 ScalarTypes");
2808 as
.ue
->m_useStrictTypes
= as
.ue
->m_useStrictTypesForBuiltins
= word
== "1";
2810 if (!as
.ue
->m_useStrictTypes
&& word
!= "0") {
2811 as
.error("Strict types must be either 1 or 0");
2814 as
.in
.expectWs(';');
2817 void parse_symbol_refs(
2819 void (AsmCallbacks::*onSymbol
)(const std::string
&)
2821 as
.in
.expectWs('{');
2825 as
.in
.skipWhitespace();
2827 as
.in
.consumePred(!boost::is_any_of(" \t\r\n#}"),
2828 std::back_inserter(symbol
));
2829 if (symbol
.empty()) {
2832 (as
.callbacks
->*onSymbol
)(symbol
);
2835 while (as
.in
.peek() != '}') {
2836 as
.in
.skipWhitespace();
2837 if (!as
.in
.skipPred(!boost::is_any_of("#}"))) break;
2844 void parse_includes(AsmState
& as
) {
2845 parse_symbol_refs(as
, &AsmCallbacks::onInclude
);
2848 void parse_constant_refs(AsmState
& as
) {
2849 parse_symbol_refs(as
, &AsmCallbacks::onConstantRef
);
2852 void parse_function_refs(AsmState
& as
) {
2853 parse_symbol_refs(as
, &AsmCallbacks::onFunctionRef
);
2856 void parse_class_refs(AsmState
& as
) {
2857 parse_symbol_refs(as
, &AsmCallbacks::onClassRef
);
2861 * directive-metadata : name = bareword ';'
2862 * | name = quoted-string-literal ';'
2863 * | name = long-string-literal ';'
2866 void parse_metadata(AsmState
& as
) {
2868 if (as
.in
.readname(key
)) {
2869 as
.in
.expectWs('=');
2870 as
.in
.skipWhitespace();
2871 auto const value
= [&] () -> const StringData
* {
2872 auto ret
= parse_maybe_long_string(as
);
2873 if (!ret
.empty()) return makeStaticString(ret
);
2875 if (as
.in
.readQuotedStr(tmp
) || as
.in
.readword(tmp
)) {
2876 return makeStaticString(tmp
);
2882 as
.ue
->m_metaData
.emplace(
2883 makeStaticString(key
),
2884 make_tv
<KindOfPersistentString
>(value
)
2889 as
.error(".metadata expects a key = value pair");
2893 * asm-file : asm-tld* <EOF>
2896 * asm-tld : ".filepath" directive-filepath
2897 * | ".main" directive-main
2898 * | ".function" directive-function
2899 * | ".adata" directive-adata
2900 * | ".class" directive-class
2901 * | ".alias" directive-alias
2902 * | ".strict" directive-strict
2903 * | ".includes directive-filepaths
2904 * | ".constant_refs directive-symbols
2905 * | ".function_refs directive-symbols
2906 * | ".class_refs directive-symbols
2907 * | ".metadata directive-meta-data
2910 void parse(AsmState
& as
) {
2911 as
.in
.skipWhitespace();
2912 std::string directive
;
2913 if (!SystemLib::s_inited
) {
2915 * The SystemLib::s_hhas_unit is required to be merge-only,
2916 * and we create the source by concatenating separate .hhas files
2917 * Rather than choosing one to have the .main directive, we just
2918 * generate a trivial pseudoMain automatically.
2920 as
.ue
->addTrivialPseudoMain();
2921 as
.emittedPseudoMain
= true;
2924 while (as
.in
.readword(directive
)) {
2925 if (directive
== ".filepath") { parse_filepath(as
) ; continue; }
2926 if (directive
== ".main") { parse_main(as
) ; continue; }
2927 if (directive
== ".function") { parse_function(as
) ; continue; }
2928 if (directive
== ".adata") { parse_adata(as
) ; continue; }
2929 if (directive
== ".class") { parse_class(as
) ; continue; }
2930 if (directive
== ".alias") { parse_alias(as
) ; continue; }
2931 if (directive
== ".strict") { parse_strict(as
) ; continue; }
2932 if (directive
== ".hh_file") { parse_hh_file(as
) ; continue; }
2933 if (directive
== ".includes") { parse_includes(as
) ; continue; }
2934 if (directive
== ".constant_refs") { parse_constant_refs(as
) ; continue; }
2935 if (directive
== ".function_refs") { parse_function_refs(as
) ; continue; }
2936 if (directive
== ".class_refs") { parse_class_refs(as
) ; continue; }
2937 if (directive
== ".metadata") { parse_metadata(as
) ; continue; }
2939 as
.error("unrecognized top-level directive `" + directive
+ "'");
2942 if (!as
.emittedPseudoMain
) {
2943 as
.error("no .main found in hhas unit");
2949 //////////////////////////////////////////////////////////////////////
2951 std::unique_ptr
<UnitEmitter
> assemble_string(
2954 const char* filename
,
2957 AsmCallbacks
* callbacks
2959 auto ue
= std::make_unique
<UnitEmitter
>(md5
);
2960 StringData
* sd
= makeStaticString(filename
);
2961 ue
->m_filepath
= sd
;
2962 ue
->m_useStrictTypes
= RuntimeOption::EnableHipHopSyntax
||
2963 !RuntimeOption::PHP7_ScalarTypes
;
2966 auto const mode
= std::istringstream::binary
| std::istringstream::in
;
2967 std::istringstream
instr(std::string(code
, codeLen
), mode
);
2968 AsmState
as(instr
, callbacks
);
2971 } catch (const std::exception
& e
) {
2972 if (!swallowErrors
) throw;
2973 ue
= createFatalUnit(sd
, md5
, FatalOp::Runtime
, makeStaticString(e
.what()));
2979 AsmResult
assemble_expression(UnitEmitter
& ue
, FuncEmitter
* fe
,
2980 int incomingStackDepth
,
2981 const std::string
& expr
) {
2982 auto const mode
= std::istringstream::binary
| std::istringstream::in
;
2983 std::stringstream
sstr(expr
+ '}', mode
);
2987 as
.initStackDepth
.adjust(as
, incomingStackDepth
);
2988 parse_function_body(as
, 1);
2990 if (as
.maxUnnamed
>= 0) {
2991 as
.error("Unnamed locals are not allowed in inline assembly");
2994 if (!as
.currentStackDepth
) return AsmResult::Unreachable
;
2996 // If we fall off the end of the inline assembly, we're expected to
2997 // leave a single value on the stack, or leave the stack unchanged.
2998 if (!as
.currentStackDepth
->baseValue
) {
2999 as
.error("Unknown stack offset on exit from inline assembly");
3001 auto curStackDepth
= as
.currentStackDepth
->absoluteDepth();
3002 if (curStackDepth
== incomingStackDepth
+ 1) {
3003 return AsmResult::ValuePushed
;
3005 if (curStackDepth
!= incomingStackDepth
) {
3006 as
.error("Inline assembly expressions should leave the stack unchanged, "
3007 "or push exactly one cell onto the stack.");
3010 return AsmResult::NoResult
;
3013 //////////////////////////////////////////////////////////////////////