2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * This module contains an assembler implementation for HHBC. It is
19 * probably fairly close to allowing you to access most of the
20 * metadata associated with hhvm's compiled unit format, although it's
21 * possible something has been overlooked.
23 * To use it, run hhvm with -v Eval.AllowHhas=true on a file with a
24 * ".hhas" extension. The syntax is probably easiest to understand by
25 * looking at some examples (or the semi-BNF markup around some of the
26 * parse functions here). For examples, see hphp/tests/vm/asm_*.
31 * - You can crash hhvm very easily with this.
33 * Using this module, you can emit pretty much any sort of not
34 * trivially-illegal bytecode stream, and many trivially-illegal
35 * ones as well. You can also easily create Units with illegal
36 * metadata. Generally this will crash the VM. In other cases
37 * (especially if you don't bother to DefCls your classes in your
38 * .main) you'll just get mysterious "class not defined" errors
41 * - Whitespace is not normally significant, but newlines may not
42 * be in the middle of a list of opcode arguments. (After the
43 * newline, the next thing seen is expected to be either a
44 * mnemonic for the next opcode in the stream or some sort of
45 * directive.) However, newlines (and comments) may appear
46 * *inside* certain opcode arguments (e.g. string literals or
49 * Rationale: this is partially intended to make it trivial to
50 * catch wrong-number-of-arguments errors, although it probably
51 * could be done without this if you feel like changing it.
56 * - It might be nice if you could refer to iterators by name
57 * instead of by index.
59 * - DefCls by name would be nice.
61 * Missing features (partial list):
63 * - while class/function names can contains ':', '$', and ';',
64 * .use declarations can't handle those names because of syntax
67 * @author Jordan DeLong <delong.j@fb.com>
70 #include "hphp/runtime/vm/as.h"
78 #include <boost/algorithm/string.hpp>
79 #include <boost/scoped_ptr.hpp>
80 #include <boost/bind.hpp>
82 #include <folly/Conv.h>
83 #include <folly/MapUtil.h>
84 #include <folly/Memory.h>
85 #include <folly/Range.h>
86 #include <folly/String.h>
88 #include "hphp/util/md5.h"
90 #include "hphp/parser/parser.h"
91 #include "hphp/runtime/base/builtin-functions.h"
92 #include "hphp/runtime/base/repo-auth-type-codec.h"
93 #include "hphp/runtime/base/repo-auth-type.h"
94 #include "hphp/runtime/base/tv-type.h"
95 #include "hphp/runtime/vm/as-shared.h"
96 #include "hphp/runtime/vm/func-emitter.h"
97 #include "hphp/runtime/vm/hhbc.h"
98 #include "hphp/runtime/vm/native.h"
99 #include "hphp/runtime/vm/preclass-emitter.h"
100 #include "hphp/runtime/vm/unit.h"
101 #include "hphp/runtime/vm/unit-emitter.h"
102 #include "hphp/system/systemlib.h"
108 //////////////////////////////////////////////////////////////////////
113 typedef void (*ParserFunc
)(AsmState
& as
);
115 struct Error
: std::runtime_error
{
116 explicit Error(int where
, const std::string
& what
)
117 : std::runtime_error(folly::sformat(
118 "Assembler Error: line {}: {}", where
, what
))
123 explicit Input(std::istream
& in
)
127 int peek() { return m_in
.peek(); }
130 int ret
= m_in
.get();
133 } else if (ret
== '\n') {
139 void ungetc(char c
) {
140 if (c
== '\n') --m_lineNumber
;
146 error(folly::sformat("expected character `{}'", char(c
)));
151 * Expect `c' after possible whitespace/comments. When convenient,
152 * preferable to doing skipWhitespace/expect manually to keep the
153 * line number in the error prior to the whitespace skipped.
155 void expectWs(int c
) {
156 const int currentLine
= m_lineNumber
;
159 throw Error(currentLine
,
160 folly::sformat("expected character `{}'", char(c
)));
164 int getLineNumber() const {
168 // Skips whitespace, then populates word with valid bareword
169 // characters. Returns true if we read any characters into word.
170 bool readword(std::string
& word
) {
173 consumePred(is_bareword(), std::back_inserter(word
));
174 return !word
.empty();
176 // Skips whitespace, then populates name with valid extname
177 // characters. Returns true if we read any characters into name.
178 bool readname(std::string
& name
) {
181 consumePred(is_extname(), std::back_inserter(name
));
182 return !name
.empty();
184 // Try to consume a bareword. Skips whitespace. If we can't
185 // consume the specified word, returns false.
186 bool tryConsume(const std::string
& what
) {
188 if (!readword(word
)) {
192 std::for_each(word
.rbegin(), word
.rend(),
193 boost::bind(&Input::ungetc
, this, _1
));
201 if (peek() == '-') buf
+= (char)getc();
202 consumePred(isdigit
, std::back_inserter(buf
));
203 if (buf
.empty() || buf
== "-") {
204 throw Error(m_lineNumber
, "expected integral value");
206 return folly::to
<int32_t>(buf
);
209 // C-style character escapes, no support for unicode escapes or
211 template<class OutCont
>
212 void escapeChar(OutCont
& out
) {
213 auto is_oct
= [&] (int i
) { return i
>= '0' && i
<= '7'; };
214 auto is_hex
= [&] (int i
) {
215 return (i
>= '0' && i
<= '9') ||
216 (i
>= 'a' && i
<= 'f') ||
217 (i
>= 'A' && i
<= 'F');
219 auto hex_val
= [&] (int i
) -> uint32_t {
221 return i
>= '0' && i
<= '9' ? i
- '0' :
222 i
>= 'a' && i
<= 'f' ? i
- 'a' + 10 : i
- 'A' + 10;
227 case EOF
: error("EOF in string literal");
228 case 'a': out
.push_back('\a'); break;
229 case 'b': out
.push_back('\b'); break;
230 case 'f': out
.push_back('\f'); break;
231 case 'n': out
.push_back('\n'); break;
232 case 'r': out
.push_back('\r'); break;
233 case 't': out
.push_back('\t'); break;
234 case 'v': out
.push_back('\v'); break;
235 case '\'': out
.push_back('\''); break;
236 case '\"': out
.push_back('\"'); break;
237 case '\?': out
.push_back('\?'); break;
238 case '\\': out
.push_back('\\'); break;
239 case '\r': /* ignore */ break;
240 case '\n': /* ignore */ break;
243 auto val
= int64_t{src
} - '0';
244 for (auto i
= int{1}; i
< 3; ++i
) {
246 if (!is_oct(src
)) { ungetc(src
); break; }
250 if (val
> std::numeric_limits
<uint8_t>::max()) {
251 error("octal escape sequence overflowed");
253 out
.push_back(static_cast<uint8_t>(val
));
257 if (src
== 'x' || src
== 'X') {
258 auto val
= uint64_t{0};
259 if (!is_hex(peek())) error("\\x used without no following hex digits");
260 for (auto i
= int{0}; i
< 2; ++i
) {
262 if (!is_hex(src
)) { ungetc(src
); break; }
266 if (val
> std::numeric_limits
<uint8_t>::max()) {
267 error("hex escape sequence overflowed");
269 out
.push_back(static_cast<uint8_t>(val
));
273 error("unrecognized character escape");
277 // Reads a quoted string with typical escaping rules. Does not skip
278 // any whitespace. Returns true if we successfully read one, or
279 // false. EOF during the string throws.
280 bool readQuotedStr(std::string
& str
) {
282 if (peek() != '\"') {
288 while ((c
= getc()) != EOF
) {
290 case '\"': return true;
291 case '\\': escapeChar(str
); break;
292 default: str
.push_back(c
); break;
295 error("EOF in string literal");
301 * Reads a python-style longstring, or returns false if we don't
302 * have one. Does not skip any whitespace before looking for the
305 * Python longstrings start with \"\"\", and can contain any bytes
306 * other than \"\"\". A '\\' character introduces C-style escapes,
307 * but there's no need to escape single quote characters.
309 bool readLongString(std::vector
<char>& buffer
) {
310 if (peek() != '\"') return false;
312 if (peek() != '\"') { ungetc('\"'); return false; }
314 if (peek() != '\"') { ungetc('\"');
315 ungetc('\"'); return false; }
319 while ((c
= getc()) != EOF
) {
327 buffer
.push_back('"');
333 buffer
.push_back('"');
334 buffer
.push_back('"');
343 error("EOF in \"\"\"-string literal");
348 // Skips whitespace (including newlines and comments).
349 void skipWhitespace() {
350 while (skipPred(boost::is_any_of(" \t\r\n"))) {
352 skipPred(!boost::is_any_of("\n"));
360 // Skip spaces and tabs, but other whitespace (such as comments or
361 // newlines) stop the skip.
362 void skipSpaceTab() {
363 skipPred(boost::is_any_of(" \t"));
366 template<class Predicate
>
367 bool skipPred(Predicate pred
) {
368 while (pred(peek())) {
377 template<class Predicate
, class OutputIterator
>
378 bool consumePred(Predicate pred
, OutputIterator out
) {
380 while (pred(c
= peek())) {
392 // whether a character is a valid part of the extended sorts of
393 // names that HHVM uses for certain generated constructs
394 // (closures, __Memoize implementations, etc)
396 bool operator()(int i
) const {
398 return is_bw(i
) || i
== ':' || i
== ';' || i
== '#' || i
=='@' ||
399 (i
>= 0x7f && i
<= 0xff) /* see hphp.ll :( */;
403 void error(const std::string
& what
) {
404 throw Error(getLineNumber(), what
);
407 void io_error_if_bad() {
409 error("I/O error reading stream: " +
410 folly::errnoStr(errno
).toStdString());
423 StackDepth
* stackDepth
;
428 * Tracks the depth of the stack in a given block of instructions.
430 * This structure is linked to a block of instructions (usually starting at a
431 * label), and tracks the current stack depth in this block. This tracking can
433 * - Absolute depth: the depth of the stack is exactly known for this block
434 * - Relative depth: the depth of the stack is unknown for now. We keep track
435 * of an offset, relative to the depth of the stack at the first instruction
441 * Tracks the max depth of elem stack + desc stack offset inside a region
442 * where baseValue is unknown.
446 * Tracks the min depth of the elem stack inside a region where baseValue
447 * is unknown, and the line where the min occurred.
451 folly::Optional
<int> baseValue
;
454 * During the parsing process, when a Jmp instruction is encountered, the
455 * StackDepth structure for this jump becomes linked to the StackDepth
456 * structure of the label (which is added to the listeners list).
458 * Once the absolute depth at the jump becomes known, its StackDepth
459 * instance calls the setBase method of the StackDepth instance of the label.
460 * The absolute depth at the label can then be inferred from the
461 * absolute depth at the jump.
463 std::vector
<std::pair
<StackDepth
*, int> > listeners
;
471 void adjust(AsmState
& as
, int delta
);
472 void addListener(AsmState
& as
, StackDepth
* target
);
473 void setBase(AsmState
& as
, int stackDepth
);
474 int absoluteDepth() {
475 assertx(baseValue
.hasValue());
476 return baseValue
.value() + currentOffset
;
480 * Sets the baseValue such as the current stack depth matches the
483 * If the base value is already known, it may conflict with the
484 * parameter of this function. In this case, an error will be raised.
486 void setCurrentAbsolute(AsmState
& as
, int stackDepth
);
492 StackDepth stackDepth
;
495 * Each label source source has an Offset where the jmp should be
496 * patched up is, and an Offset from which the jump delta should be
497 * computed. (The second Offset is basically to the actual
498 * jump/switch/etc instruction, while the first points to the
501 std::vector
<std::pair
<Offset
,Offset
>> sources
;
504 * List of a parameter ids that use this label for its DV
507 std::vector
<Id
> dvInits
;
510 * List of EHEnts that have m_handler pointing to this label.
512 std::vector
<size_t> ehEnts
;
516 explicit AsmState(std::istream
& in
, AsmCallbacks
* callbacks
= nullptr)
518 , callbacks(callbacks
)
520 currentStackDepth
->setBase(*this, 0);
523 AsmState(const AsmState
&) = delete;
524 AsmState
& operator=(const AsmState
&) = delete;
526 template<typename
... Args
>
527 void error(const std::string
& fmt
, Args
&&... args
) {
528 throw Error(in
.getLineNumber(),
529 folly::sformat(fmt
, std::forward
<Args
>(args
)...));
533 void adjustStack(int delta
) {
534 if (currentStackDepth
== nullptr) {
535 // Instruction is unreachable, nothing to do here!
539 currentStackDepth
->adjust(*this, delta
);
542 void adjustStackHighwater(int depth
) {
544 fe
->maxStackCells
= std::max(fe
->maxStackCells
, depth
);
548 std::string
displayStackDepth() {
549 std::ostringstream stack
;
551 if (currentStackDepth
== nullptr) {
553 } else if (currentStackDepth
->baseValue
) {
554 stack
<< *currentStackDepth
->baseValue
+
555 currentStackDepth
->currentOffset
;
557 stack
<< "?" << currentStackDepth
->currentOffset
;
563 void addLabelTarget(const std::string
& name
) {
564 auto& label
= labelMap
[name
];
566 error("Duplicate label " + name
);
569 label
.target
= ue
->bcPos();
571 StackDepth
* newStack
= &label
.stackDepth
;
573 if (currentStackDepth
== nullptr) {
574 // Previous instruction was unreachable
575 currentStackDepth
= newStack
;
579 // The stack depth at the label depends on the current depth
580 currentStackDepth
->addListener(*this, newStack
);
581 currentStackDepth
= newStack
;
584 void addLabelJump(const std::string
& name
, Offset immOff
, Offset opcodeOff
) {
585 auto& label
= labelMap
[name
];
587 if (currentStackDepth
!= nullptr) {
588 // The stack depth at the target must be the same as the current depth
589 // (whatever this may be: it may still be unknown)
590 currentStackDepth
->addListener(*this, &label
.stackDepth
);
593 label
.sources
.emplace_back(immOff
, opcodeOff
);
596 void enforceStackDepth(int stackDepth
) {
597 if (currentStackDepth
== nullptr) {
598 // Current instruction is unreachable, thus the constraint
599 // on the stack depth will never be violated
603 currentStackDepth
->setCurrentAbsolute(*this, stackDepth
);
606 bool isUnreachable() {
607 return currentStackDepth
== nullptr;
610 void enterUnreachableRegion() {
611 currentStackDepth
= nullptr;
614 void enterReachableRegion(int stackDepth
) {
615 unnamedStackDepths
.emplace_back(std::make_unique
<StackDepth
>());
616 currentStackDepth
= unnamedStackDepths
.back().get();
617 currentStackDepth
->setBase(*this, stackDepth
);
620 void addLabelDVInit(const std::string
& name
, int paramId
) {
621 labelMap
[name
].dvInits
.push_back(paramId
);
623 // Stack depth should be 0 when entering a DV init
624 labelMap
[name
].stackDepth
.setBase(*this, 0);
627 void addLabelEHEnt(const std::string
& name
, size_t ehIdx
) {
628 labelMap
[name
].ehEnts
.push_back(ehIdx
);
630 // Stack depth should be 0 when entering a fault funclet
631 labelMap
[name
].stackDepth
.setBase(*this, 0);
634 void beginFpi(Offset fpushOff
) {
635 fpiRegs
.push_back(FPIReg
{
638 currentStackDepth
->currentOffset
640 fdescDepth
+= kNumActRecCells
;
641 currentStackDepth
->adjust(*this, 0);
645 if (fpiRegs
.empty()) {
646 error("endFpi called with no active fpi region");
649 auto& ent
= fe
->addFPIEnt();
650 const auto& reg
= fpiRegs
.back();
651 ent
.m_fpushOff
= reg
.fpushOff
;
652 ent
.m_fpiEndOff
= ue
->bcPos();
653 ent
.m_fpOff
= reg
.fpOff
;
654 if (reg
.stackDepth
->baseValue
) {
655 ent
.m_fpOff
+= *reg
.stackDepth
->baseValue
;
657 // Base value still unknown, this will need to be updated later.
659 // Store the FPIEnt's index in the FuncEmitter's entry table.
660 assertx(&fe
->fpitab
[fe
->fpitab
.size()-1] == &ent
);
661 fpiToUpdate
.emplace_back(fe
->fpitab
.size() - 1, reg
.stackDepth
);
665 always_assert(fdescDepth
>= kNumActRecCells
);
666 fdescDepth
-= kNumActRecCells
;
671 ue
->addPreClassEmitter(pce
);
676 void patchLabelOffsets(const Label
& label
) {
677 for (auto const& source
: label
.sources
) {
678 ue
->emitInt32(label
.target
- source
.second
, source
.first
);
681 for (auto const& dvinit
: label
.dvInits
) {
682 fe
->params
[dvinit
].funcletOff
= label
.target
;
685 for (auto const& ehEnt
: label
.ehEnts
) {
686 fe
->ehtab
[ehEnt
].m_handler
= label
.target
;
690 void finishSection() {
691 for (auto const& label
: labelMap
) {
692 if (!label
.second
.bound
) {
693 error("Undefined label " + label
.first
);
695 if (label
.second
.target
>= ue
->bcPos()) {
696 error("label " + label
.first
+ " falls of the end of the function");
699 patchLabelOffsets(label
.second
);
702 // Patch the FPI structures
703 for (auto& kv
: fpiToUpdate
) {
704 if (!kv
.second
->baseValue
) {
705 error("created a FPI from an unreachable instruction");
708 fe
->fpitab
[kv
.first
].m_fpOff
+= *kv
.second
->baseValue
;
712 void finishFunction() {
715 // Stack depth should be 0 at the end of a function body
716 enforceStackDepth(0);
718 // Bump up the unnamed local count
719 const int numLocals
= maxUnnamed
+ 1;
720 while (fe
->numLocals() < numLocals
) {
721 fe
->allocUnnamedLocal();
726 fe
->numIterators() * kNumIterCells
+
727 clsRefCountToCells(fe
->numClsRefSlots());
729 fe
->finish(ue
->bcPos(), false);
730 ue
->recordFunction(fe
);
736 numClsRefSlotsSet
= false;
737 initStackDepth
= StackDepth();
738 initStackDepth
.setBase(*this, 0);
739 currentStackDepth
= &initStackDepth
;
740 unnamedStackDepths
.clear();
746 int getLocalId(const std::string
& name
) {
747 if (name
[0] == '_') {
748 int id
= folly::to
<int>(name
.substr(1));
749 if (id
> maxUnnamed
) maxUnnamed
= id
;
753 if (name
[0] != '$') {
754 error("local variables must be prefixed with $ or _");
757 const StringData
* sd
= makeStaticString(name
.c_str() + 1);
759 return fe
->lookupVarId(sd
);
762 int getIterId(int32_t id
) {
763 if (id
>= fe
->numIterators()) {
764 error("iterator id exceeded number of iterators in the function");
769 int getClsRefSlot(int32_t slot
) {
770 if (slot
>= fe
->numClsRefSlots()) {
771 error("class-ref slot id exceeded number of class-ref "
772 "slots in the function");
779 bool emittedPseudoMain
{false};
780 bool emittedTopLevelFunc
{false};
782 std::map
<std::string
,ArrayData
*> adataMap
;
784 // When inside a class, this state is active.
785 PreClassEmitter
* pce
;
787 // When we're doing a function or method body, this state is active.
788 FuncEmitter
* fe
{nullptr};
789 std::vector
<FPIReg
> fpiRegs
;
790 std::map
<std::string
,Label
> labelMap
;
791 bool numItersSet
{false};
792 bool numClsRefSlotsSet
{false};
793 bool enumTySet
{false};
794 StackDepth initStackDepth
;
795 StackDepth
* currentStackDepth
{&initStackDepth
};
796 std::vector
<std::unique_ptr
<StackDepth
>> unnamedStackDepths
;
798 int minStackDepth
{0};
800 std::vector
<std::pair
<size_t, StackDepth
*>> fpiToUpdate
;
801 std::set
<std::string
,stdltistr
> hoistables
;
802 std::unordered_map
<uint32_t,Offset
> defClsOffsets
;
803 Location::Range srcLoc
{-1,-1,-1,-1};
804 AsmCallbacks
* callbacks
{ nullptr };
807 void StackDepth::adjust(AsmState
& as
, int delta
) {
808 currentOffset
+= delta
;
811 // The absolute stack depth is unknown. We only store the min
812 // and max offsets, and we will take a decision later, when the
813 // base value will be known.
814 maxOffset
= std::max(currentOffset
+ as
.fdescDepth
, maxOffset
);
815 if (currentOffset
< minOffset
) {
816 minOffsetLine
= as
.in
.getLineNumber();
817 minOffset
= currentOffset
;
822 if (*baseValue
+ currentOffset
< 0) {
823 as
.error("opcode sequence caused stack depth to go negative");
826 as
.adjustStackHighwater(*baseValue
+ currentOffset
+ as
.fdescDepth
);
829 void StackDepth::addListener(AsmState
& as
, StackDepth
* target
) {
831 target
->setBase(as
, *baseValue
+ currentOffset
);
833 listeners
.emplace_back(target
, currentOffset
);
837 void StackDepth::setBase(AsmState
& as
, int stackDepth
) {
838 if (baseValue
&& stackDepth
!= *baseValue
) {
839 as
.error("stack depth {} does not match base value {}",
840 stackDepth
, *baseValue
);
843 baseValue
= stackDepth
;
845 // We finally know the base value. Update AsmState accordingly.
846 if (*baseValue
+ minOffset
< 0) {
849 "opcode sequence caused stack depth to go negative"
852 as
.adjustStackHighwater(*baseValue
+ maxOffset
);
854 // Update the listeners
855 auto l
= std::move(listeners
);
856 // We won't need them anymore
859 kv
.first
->setBase(as
, *baseValue
+ kv
.second
);
863 void StackDepth::setCurrentAbsolute(AsmState
& as
, int stackDepth
) {
864 setBase(as
, stackDepth
- currentOffset
);
867 //////////////////////////////////////////////////////////////////////
870 * Opcode arguments must be on the same line as the opcode itself,
871 * although certain argument types may contain internal newlines (see,
872 * for example, read_jmpvector or string literals).
874 template<class Target
> Target
read_opcode_arg(AsmState
& as
) {
875 as
.in
.skipSpaceTab();
877 as
.in
.consumePred(!boost::is_any_of(" \t\r\n#;>"),
878 std::back_inserter(strVal
));
879 if (strVal
.empty()) {
880 as
.error("expected opcode or directive argument");
883 return folly::to
<Target
>(strVal
);
884 } catch (std::range_error
&) {
885 as
.error("couldn't convert input argument (" + strVal
+ ") to "
891 template<class SubOpType
>
892 uint8_t read_subop(AsmState
& as
) {
893 auto const str
= read_opcode_arg
<std::string
>(as
);
894 if (auto const ty
= nameToSubop
<SubOpType
>(str
.c_str())) {
895 return static_cast<uint8_t>(*ty
);
897 as
.error("unknown subop name");
901 const StringData
* read_litstr(AsmState
& as
) {
902 as
.in
.skipSpaceTab();
904 if (!as
.in
.readQuotedStr(strVal
)) {
905 as
.error("expected quoted string literal");
907 return makeStaticString(strVal
);
911 * maybe-string-literal : N
915 const StringData
* read_maybe_litstr(AsmState
& as
) {
916 as
.in
.skipSpaceTab();
917 if (as
.in
.peek() == 'N') {
921 return read_litstr(as
);
924 std::vector
<std::string
> read_strvector(AsmState
& as
) {
925 std::vector
<std::string
> ret
;
926 as
.in
.skipSpaceTab();
929 while (as
.in
.skipSpaceTab(), as
.in
.readQuotedStr(name
)) {
932 as
.in
.skipSpaceTab();
937 ArrayData
* read_litarray(AsmState
& as
) {
938 as
.in
.skipSpaceTab();
939 if (as
.in
.getc() != '@') {
940 as
.error("expecting an `@foo' array literal reference");
943 if (!as
.in
.readword(name
)) {
944 as
.error("expected name of .adata literal");
947 auto const it
= as
.adataMap
.find(name
);
948 if (it
== as
.adataMap
.end()) {
949 as
.error("unknown array data literal name " + name
);
954 RepoAuthType
read_repo_auth_type(AsmState
& as
) {
955 auto const str
= read_opcode_arg
<std::string
>(as
);
956 folly::StringPiece
parse(str
);
959 * Note: no support for reading array types. (The assembler only
960 * emits a single unit, so it can't really be involved in creating a
964 using T
= RepoAuthType::Tag
;
966 #define X(what, tag) \
967 if (parse.startsWith(what)) return RepoAuthType{tag}
969 #define Y(what, tag) \
970 if (parse.startsWith(what)) { \
971 parse.removePrefix(what); \
972 auto const cls = makeStaticString(parse.data()); \
973 as.ue->mergeLitstr(cls); \
974 return RepoAuthType{tag, cls}; \
977 Y("Obj=", T::ExactObj
);
978 Y("?Obj=", T::OptExactObj
);
979 Y("?Obj<=", T::OptSubObj
);
980 Y("Obj<=", T::SubObj
);
982 X("?Arr", T::OptArr
);
984 X("?VArr", T::OptVArr
);
986 X("?DArr", T::OptDArr
);
988 X("?Vec", T::OptVec
);
990 X("?Dict", T::OptDict
);
991 X("Keyset", T::Keyset
);
992 X("?Keyset", T::OptKeyset
);
994 X("?Bool", T::OptBool
);
997 X("?Dbl", T::OptDbl
);
999 X("InitCell", T::InitCell
);
1000 X("InitGen", T::InitGen
);
1001 X("InitNull", T::InitNull
);
1002 X("InitUnc", T::InitUnc
);
1004 X("?Int", T::OptInt
);
1007 X("?Obj", T::OptObj
);
1009 X("?Res", T::OptRes
);
1011 X("?SArr", T::OptSArr
);
1013 X("?SVArr", T::OptSVArr
);
1014 X("SVArr", T::SVArr
);
1015 X("?SDArr", T::OptSDArr
);
1016 X("SDArr", T::SDArr
);
1017 X("?SVec", T::OptSVec
);
1019 X("?SDict", T::OptSDict
);
1020 X("SDict", T::SDict
);
1021 X("?SKeyset", T::OptSKeyset
);
1022 X("SKeyset", T::SKeyset
);
1023 X("?SStr", T::OptSStr
);
1025 X("?Str", T::OptStr
);
1028 X("?UncArrKey", T::OptUncArrKey
);
1029 X("?ArrKey", T::OptArrKey
);
1030 X("UncArrKey",T::UncArrKey
);
1031 X("ArrKey", T::ArrKey
);
1032 X("Uninit", T::Uninit
);
1037 // Make sure the above parsing code is revisited when new tags are
1038 // added (we'll get a warning for a missing case label):
1039 if (debug
) switch (RepoAuthType
{}.tag()) {
1083 case T::OptUncArrKey
:
1094 case T::OptExactObj
:
1099 as
.error("unrecognized RepoAuthType format");
1103 // Read a vector of IVAs, with format <int, int, int, ...>, the vector may be
1104 // excluded entirely if it is empty.
1105 std::vector
<uint32_t> read_argv(AsmState
& as
) {
1106 as
.in
.skipSpaceTab();
1107 if (as
.in
.peek() != '<') return {};
1110 std::vector
<uint32_t> result
;
1112 auto const num
= as
.in
.readint();
1113 if (num
< 0) as
.error("Was expecting a positive integer");
1114 result
.push_back(num
);
1115 as
.in
.skipWhitespace();
1116 if (as
.in
.peek() == '>') break;
1117 as
.in
.expectWs(',');
1119 as
.in
.expectWs('>');
1124 // Read in a vector of iterators the format for this vector is:
1125 // <(TYPE) ID, (TYPE) ID, ...>
1126 // Where TYPE := Iter | MIter | CIter
1127 // and ID := Integer
1128 std::vector
<uint32_t> read_itervec(AsmState
& as
) {
1129 std::vector
<uint32_t> ret
;
1131 as
.in
.skipSpaceTab();
1137 as
.in
.expectWs('(');
1138 if (!as
.in
.readword(word
)) as
.error("Was expecting Iterator type.");
1139 if (!word
.compare("Iter")) ret
.push_back(KindOfIter
);
1140 else if (!word
.compare("MIter")) ret
.push_back(KindOfMIter
);
1141 else if (!word
.compare("CIter")) ret
.push_back(KindOfCIter
);
1142 else as
.error("Unknown iterator type `" + word
+ "'");
1143 as
.in
.expectWs(')');
1145 as
.in
.skipSpaceTab();
1147 if (!as
.in
.readword(word
)) as
.error("Was expecting iterator id.");
1148 ret
.push_back(folly::to
<uint32_t>(word
));
1150 if (!isdigit(word
.back())) {
1151 if (word
.back() == '>') break;
1152 if (word
.back() != ',') as
.error("Was expecting `,'.");
1154 as
.in
.skipSpaceTab();
1155 if (as
.in
.peek() == '>') { as
.in
.getc(); break; }
1163 // Jump tables are lists of labels.
1164 std::vector
<std::string
> read_jmpvector(AsmState
& as
) {
1165 std::vector
<std::string
> ret
;
1167 as
.in
.skipSpaceTab();
1171 while (as
.in
.readword(word
)) {
1172 ret
.push_back(word
);
1174 as
.in
.expectWs('>');
1179 typedef std::vector
<std::pair
<Id
, std::string
>> SSwitchJmpVector
;
1181 SSwitchJmpVector
read_sswitch_jmpvector(AsmState
& as
) {
1182 SSwitchJmpVector ret
;
1184 as
.in
.skipSpaceTab();
1187 std::string defLabel
;
1189 std::string caseStr
;
1190 if (!as
.in
.readQuotedStr(caseStr
)) {
1191 as
.error("expected quoted string literal");
1196 as
.in
.readword(defLabel
);
1199 as
.ue
->mergeLitstr(makeStaticString(caseStr
)),
1203 as
.in
.skipWhitespace();
1204 } while (as
.in
.peek() != '-');
1208 as
.in
.readword(defLabel
);
1210 // -1 stand for default case.
1211 ret
.emplace_back(-1, defLabel
);
1218 MemberKey
read_member_key(AsmState
& as
) {
1219 as
.in
.skipWhitespace();
1222 if (!as
.in
.readword(word
)) as
.error("expected member code");
1224 auto optMcode
= parseMemberCode(word
.c_str());
1225 if (!optMcode
) as
.error("unrecognized member code `" + word
+ "'");
1227 auto const mcode
= *optMcode
;
1228 if (mcode
!= MW
&& as
.in
.getc() != ':') {
1229 as
.error("expected `:' after member code `" + word
+ "'");
1235 case MEL
: case MPL
: {
1237 if (!as
.in
.readword(name
)) {
1238 as
.error("couldn't read name for local variable in member key");
1240 return MemberKey
{mcode
, as
.getLocalId(name
)};
1243 return MemberKey
{mcode
, read_opcode_arg
<int32_t>(as
)};
1245 return MemberKey
{mcode
, read_opcode_arg
<int64_t>(as
)};
1246 case MET
: case MPT
: case MQT
:
1247 return MemberKey
{mcode
, read_litstr(as
)};
1252 LocalRange
read_local_range(AsmState
& as
) {
1253 auto first
= read_opcode_arg
<std::string
>(as
);
1254 if (first
.size() > 2 && first
[0] == 'L' && first
[1] == ':') {
1255 first
= "_" + first
.substr(2);
1257 auto const pos
= first
.find('+');
1258 if (pos
== std::string::npos
) as
.error("expecting `+' in local range");
1259 auto const rest
= first
.substr(pos
+ 1);
1260 first
= first
.substr(0, pos
);
1261 auto const firstLoc
= as
.getLocalId(first
);
1262 auto const restCount
= folly::to
<uint32_t>(rest
);
1263 if (firstLoc
+ restCount
> as
.maxUnnamed
) {
1264 as
.maxUnnamed
= firstLoc
+ restCount
;
1266 return LocalRange
{uint32_t(firstLoc
), restCount
};
1269 //////////////////////////////////////////////////////////////////////
1271 std::map
<std::string
,ParserFunc
> opcode_parsers
;
1274 #define IMM_ONE(t) IMM_##t
1275 #define IMM_TWO(t1, t2) IMM_ONE(t1); ++immIdx; IMM_##t2
1276 #define IMM_THREE(t1, t2, t3) IMM_TWO(t1, t2); ++immIdx; IMM_##t3
1277 #define IMM_FOUR(t1, t2, t3, t4) IMM_THREE(t1, t2, t3); ++immIdx; IMM_##t4
1279 // Some bytecodes need to know an iva imm for (PUSH|POP)_*.
1280 #define IMM_IVA do { \
1281 auto imm = read_opcode_arg<uint32_t>(as); \
1282 as.ue->emitIVA(imm); \
1283 immIVA[immIdx] = imm; \
1287 std::vector<std::string> vecImm = read_strvector(as); \
1288 auto const vecImmStackValues = vecImm.size(); \
1289 as.ue->emitInt32(vecImmStackValues); \
1290 for (size_t i = 0; i < vecImmStackValues; ++i) { \
1291 as.ue->emitInt32(as.ue->mergeLitstr(String(vecImm[i]).get())); \
1294 #define IMM_SA as.ue->emitInt32(as.ue->mergeLitstr(read_litstr(as)))
1295 #define IMM_RATA encodeRAT(*as.ue, read_repo_auth_type(as))
1296 #define IMM_I64A as.ue->emitInt64(read_opcode_arg<int64_t>(as))
1297 #define IMM_DA as.ue->emitDouble(read_opcode_arg<double>(as))
1298 #define IMM_LA as.ue->emitIVA(as.getLocalId( \
1299 read_opcode_arg<std::string>(as)))
1300 #define IMM_IA as.ue->emitIVA(as.getIterId( \
1301 read_opcode_arg<int32_t>(as)))
1302 #define IMM_CAR as.ue->emitIVA(as.getClsRefSlot( \
1303 read_opcode_arg<int32_t>(as)))
1304 #define IMM_CAW as.ue->emitIVA(as.getClsRefSlot( \
1305 read_opcode_arg<int32_t>(as)))
1306 #define IMM_OA(ty) as.ue->emitByte(read_subop<ty>(as));
1307 #define IMM_AA as.ue->emitInt32(as.ue->mergeArray(read_litarray(as)))
1308 #define IMM_LAR encodeLocalRange(*as.ue, read_local_range(as))
1311 * There can currently be no more than one immvector per instruction,
1312 * and we need access to the size of the immediate vector for
1313 * NUM_POP_*, so the member vector guy exposes a vecImmStackValues
1316 #define IMM_ILA do { \
1317 std::vector<uint32_t> vecImm = read_itervec(as); \
1318 as.ue->emitInt32(vecImm.size() / 2); \
1319 for (auto& i : vecImm) { \
1320 as.ue->emitInt32(i); \
1324 #define IMM_I32LA do { \
1325 std::vector<uint32_t> vecImm = read_argv(as); \
1326 as.ue->emitInt32(vecImm.size()); \
1327 for (auto i : vecImm) { \
1328 as.ue->emitInt32(i); \
1332 #define IMM_BLA do { \
1333 std::vector<std::string> vecImm = read_jmpvector(as); \
1334 as.ue->emitInt32(vecImm.size()); \
1335 for (auto const& imm : vecImm) { \
1336 labelJumps.emplace_back(imm, as.ue->bcPos()); \
1337 as.ue->emitInt32(0); /* to be patched */ \
1341 #define IMM_SLA do { \
1342 auto vecImm = read_sswitch_jmpvector(as); \
1343 as.ue->emitInt32(vecImm.size()); \
1344 for (auto const& pair : vecImm) { \
1345 as.ue->emitInt32(pair.first); \
1346 labelJumps.emplace_back(pair.second, as.ue->bcPos()); \
1347 as.ue->emitInt32(0); /* to be patched */ \
1351 #define IMM_BA do { \
1352 labelJumps.emplace_back( \
1353 read_opcode_arg<std::string>(as), \
1356 as.ue->emitInt32(0); \
1359 #define IMM_KA encode_member_key(read_member_key(as), *as.ue)
1361 #define NUM_PUSH_NOV 0
1362 #define NUM_PUSH_ONE(a) 1
1363 #define NUM_PUSH_TWO(a,b) 2
1364 #define NUM_PUSH_THREE(a,b,c) 3
1365 #define NUM_PUSH_INS_1(a) 1
1366 #define NUM_PUSH_CMANY immIVA[1] /* number of outputs */
1367 #define NUM_POP_NOV 0
1368 #define NUM_POP_ONE(a) 1
1369 #define NUM_POP_TWO(a,b) 2
1370 #define NUM_POP_THREE(a,b,c) 3
1371 #define NUM_POP_MFINAL immIVA[0]
1372 #define NUM_POP_F_MFINAL immIVA[1]
1373 #define NUM_POP_C_MFINAL (immIVA[0] + 1)
1374 #define NUM_POP_V_MFINAL NUM_POP_C_MFINAL
1375 #define NUM_POP_FMANY immIVA[0] /* number of arguments */
1376 #define NUM_POP_UFMANY (immIVA[0] + immIVA[1] - 1) /* number of arguments */
1377 #define NUM_POP_CVUMANY immIVA[0] /* number of arguments */
1378 #define NUM_POP_CMANY immIVA[0] /* number of arguments */
1379 #define NUM_POP_SMANY vecImmStackValues
1381 #define O(name, imm, pop, push, flags) \
1382 void parse_opcode_##name(AsmState& as) { \
1383 UNUSED uint32_t immIVA[4]; \
1384 UNUSED auto const thisOpcode = Op::name; \
1385 UNUSED const Offset curOpcodeOff = as.ue->bcPos(); \
1386 std::vector<std::pair<std::string, Offset> > labelJumps; \
1391 as.in.getLineNumber(), \
1392 as.displayStackDepth().c_str(), \
1396 /* Pretend the stack is reachable and empty, same as hphpc */ \
1397 if (as.currentStackDepth == nullptr) { \
1398 as.enterReachableRegion(0); \
1401 if (isFCallStar(Op##name)) { \
1405 /* Other FCall* functions perform their own bounds checking. */ \
1406 if (Op##name == OpFCall || Op##name == OpFCallD || \
1407 Op##name == OpFCallAwait || Op##name == OpFCallM || \
1408 Op##name == OpFCallDM) { \
1409 as.fe->containsCalls = true; \
1412 as.ue->emitOp(Op##name); \
1414 UNUSED size_t immIdx = 0; \
1417 int stackDelta = NUM_PUSH_##push - NUM_POP_##pop; \
1418 as.adjustStack(stackDelta); \
1420 if (isFPush(Op##name)) { \
1421 as.beginFpi(curOpcodeOff); \
1424 for (auto& kv : labelJumps) { \
1425 as.addLabelJump(kv.first, kv.second, curOpcodeOff); \
1428 /* Stack depth should be 0 after RetC or RetV. */ \
1429 if (thisOpcode == OpRetC || thisOpcode == OpRetV || \
1430 thisOpcode == OpRetM) { \
1431 as.enforceStackDepth(0); \
1434 /* Stack depth should be 1 after resume from suspend. */ \
1435 if (thisOpcode == OpCreateCont || thisOpcode == OpAwait || \
1436 thisOpcode == OpYield || thisOpcode == OpYieldK || \
1437 thisOpcode == OpYieldFromDelegate) { \
1438 as.enforceStackDepth(1); \
1441 /* Record source location. */ \
1442 as.ue->recordSourceLocation(as.srcLoc, curOpcodeOff); \
1444 if (Op##name == OpDefCls || Op##name == OpDefClsNop) { \
1445 as.defClsOffsets.emplace(immIVA[0], curOpcodeOff); \
1448 /* Retain stack depth after calls to exit */ \
1449 if ((instrFlags(thisOpcode) & InstrFlags::TF) && \
1450 (Op##name != OpExit)) { \
1451 as.enterUnreachableRegion(); \
1480 #undef NUM_PUSH_THREE
1481 #undef NUM_PUSH_POS_N
1482 #undef NUM_PUSH_INS_1
1483 #undef NUM_PUSH_CMANY
1487 #undef NUM_POP_THREE
1488 #undef NUM_POP_POS_N
1489 #undef NUM_POP_MFINAL
1490 #undef NUM_POP_F_MFINAL
1491 #undef NUM_POP_C_MFINAL
1492 #undef NUM_POP_V_MFINAL
1493 #undef NUM_POP_FMANY
1494 #undef NUM_POP_UFMANY
1495 #undef NUM_POP_CVUMANY
1496 #undef NUM_POP_CMANY
1497 #undef NUM_POP_SMANY
1499 void initialize_opcode_map() {
1500 #define O(name, imm, pop, push, flags) \
1501 opcode_parsers[#name] = parse_opcode_##name;
1506 struct Initializer
{
1507 Initializer() { initialize_opcode_map(); }
1510 //////////////////////////////////////////////////////////////////////
1513 * long-string-literal: <string>
1515 * `long-string-literal' is a python-style longstring. See
1516 * readLongString for more details.
1518 String
parse_long_string(AsmState
& as
) {
1519 as
.in
.skipWhitespace();
1521 std::vector
<char> buffer
;
1522 if (!as
.in
.readLongString(buffer
)) {
1523 as
.error("expected \"\"\"-string of serialized php data");
1525 if (buffer
.empty()) {
1526 as
.error("empty php serialized data is not a valid php object");
1529 // String wants a null, and dereferences one past the size we give
1531 buffer
.push_back('\0');
1532 return String(&buffer
[0], buffer
.size() - 1, CopyString
);
1536 * maybe-long-string-literal : long-string-literal
1540 String
parse_maybe_long_string(AsmState
& as
) {
1541 as
.in
.skipWhitespace();
1543 std::vector
<char> buffer
;
1544 if (!as
.in
.readLongString(buffer
)) {
1545 return StrNR(staticEmptyString());
1547 if (buffer
.empty()) {
1548 return StrNR(staticEmptyString());
1551 // String wants a null, and dereferences one past the size we give
1553 buffer
.push_back('\0');
1554 return String(&buffer
[0], buffer
.size() - 1, CopyString
);
1558 * php-serialized : long-string-literal
1561 * `long-string-literal' is a python-style longstring. See
1562 * readLongString for more details.
1564 * Returns a Variant representing the serialized data. It's up to the
1565 * caller to make sure it is a legal literal.
1567 Variant
parse_php_serialized(AsmState
& as
) {
1568 return unserialize_from_string(
1569 parse_long_string(as
),
1570 VariableUnserializer::Type::Internal
1575 * maybe-php-serialized : maybe-long-string-literal
1578 Variant
parse_maybe_php_serialized(AsmState
& as
) {
1579 auto s
= parse_maybe_long_string(as
);
1581 return unserialize_from_string(s
, VariableUnserializer::Type::Internal
);
1587 * directive-numiters : integer ';'
1590 void parse_numiters(AsmState
& as
) {
1591 if (as
.numItersSet
) {
1592 as
.error("only one .numiters directive may appear in a given function");
1594 int32_t count
= read_opcode_arg
<int32_t>(as
);
1595 as
.numItersSet
= true;
1596 as
.fe
->setNumIterators(count
);
1597 as
.in
.expectWs(';');
1601 * directive-numclsrefslots : integer ';'
1604 void parse_numclsrefslots(AsmState
& as
) {
1605 if (as
.numClsRefSlotsSet
) {
1606 as
.error("only one .numclsrefslots directive may appear "
1607 "in a given function");
1609 int32_t count
= read_opcode_arg
<int32_t>(as
);
1610 as
.numClsRefSlotsSet
= true;
1611 as
.fe
->setNumClsRefSlots(count
);
1612 as
.in
.expectWs(';');
1616 * directive-declvars : var-name* ';'
1619 * Variables are usually allocated when first seen, but
1620 * declvars can be used to preallocate varibles for when
1621 * the exact assignment matters (like for closures).
1623 void parse_declvars(AsmState
& as
) {
1625 as
.in
.skipWhitespace();
1627 if (as
.in
.readQuotedStr(var
) || as
.in
.readword(var
)) {
1634 as
.in
.expectWs(';');
1637 void parse_function_body(AsmState
&, int nestLevel
= 0);
1640 * directive-fault : identifier integer? '{' function-body
1643 void parse_fault(AsmState
& as
, int nestLevel
) {
1644 const Offset start
= as
.ue
->bcPos();
1647 if (!as
.in
.readword(label
)) {
1648 as
.error("expected label name after .try_fault");
1651 as
.in
.skipWhitespace();
1652 if (as
.in
.peek() != '{') {
1653 iterId
= read_opcode_arg
<int32_t>(as
);
1655 as
.in
.expectWs('{');
1656 parse_function_body(as
, nestLevel
+ 1);
1658 auto& eh
= as
.fe
->addEHEnt();
1659 eh
.m_type
= EHEnt::Type::Fault
;
1661 eh
.m_past
= as
.ue
->bcPos();
1662 eh
.m_iterId
= iterId
;
1663 eh
.m_end
= kInvalidOffset
;
1665 as
.addLabelEHEnt(label
, as
.fe
->ehtab
.size() - 1);
1669 * directive-catch : identifier integer? '{' function-body
1672 void parse_catch(AsmState
& as
, int nestLevel
) {
1673 const Offset start
= as
.ue
->bcPos();
1676 if (!as
.in
.readword(label
)) {
1677 as
.error("expected label name after .try_catch");
1680 as
.in
.skipWhitespace();
1681 if (as
.in
.peek() != '{') {
1682 iterId
= read_opcode_arg
<int32_t>(as
);
1684 as
.in
.expectWs('{');
1685 parse_function_body(as
, nestLevel
+ 1);
1687 auto& eh
= as
.fe
->addEHEnt();
1688 eh
.m_type
= EHEnt::Type::Catch
;
1690 eh
.m_past
= as
.ue
->bcPos();
1691 eh
.m_iterId
= iterId
;
1692 eh
.m_end
= kInvalidOffset
;
1694 as
.addLabelEHEnt(label
, as
.fe
->ehtab
.size() - 1);
1698 * directive-try-catch : integer? '{' function-body ".catch" '{' function-body
1701 void parse_try_catch(AsmState
& as
, int nestLevel
) {
1702 const Offset start
= as
.ue
->bcPos();
1705 as
.in
.skipWhitespace();
1706 if (as
.in
.peek() != '{') {
1707 iterId
= read_opcode_arg
<int32_t>(as
);
1711 as
.in
.expectWs('{');
1712 parse_function_body(as
, nestLevel
+ 1);
1713 if (!as
.isUnreachable()) {
1714 as
.error("expected .try region to not fall-thru");
1717 const Offset handler
= as
.ue
->bcPos();
1720 as
.enterReachableRegion(0);
1721 as
.ue
->emitOp(OpCatch
);
1723 as
.enforceStackDepth(1);
1726 as
.in
.skipWhitespace();
1727 if (!as
.in
.readword(word
) || word
!= ".catch") {
1728 as
.error("expected .catch directive after .try");
1730 as
.in
.skipWhitespace();
1731 as
.in
.expectWs('{');
1732 parse_function_body(as
, nestLevel
+ 1);
1734 const Offset end
= as
.ue
->bcPos();
1736 auto& eh
= as
.fe
->addEHEnt();
1737 eh
.m_type
= EHEnt::Type::Catch
;
1739 eh
.m_past
= handler
;
1740 eh
.m_iterId
= iterId
;
1741 eh
.m_handler
= handler
;
1746 * directive-srcloc : line_no ':' chr_no ',' line_no ':' chr_no ';'
1753 * Record that subsequent bytecodes are at the source location indicated by the
1754 * range of inline numbers and character positions specified.
1756 void parse_srcloc(AsmState
& as
, int /*nestLevel*/) {
1757 auto const line0
= as
.in
.readint();
1758 as
.in
.expectWs(':');
1759 auto const char0
= as
.in
.readint();
1760 as
.in
.expectWs(',');
1761 auto const line1
= as
.in
.readint();
1762 as
.in
.expectWs(':');
1763 auto const char1
= as
.in
.readint();
1764 as
.in
.expectWs(';');
1766 as
.srcLoc
= Location::Range(line0
, char0
, line1
, char1
);
1770 * directive-static : '$' local_name = long-string-literal ';'
1773 * Record that the function contains a static named local_name along with an
1774 * associated initializer.
1776 void parse_static(AsmState
& as
) {
1777 Func::SVInfo svInfo
;
1781 as
.in
.expectWs('$');
1782 if (!as
.in
.readword(name
)) {
1783 as
.error("Statics must be named");
1785 svInfo
.name
= makeStaticString(name
);
1786 as
.fe
->staticVars
.push_back(svInfo
);
1788 as
.in
.expectWs(';');
1792 * directive-doccomment : long-string-literal ';'
1796 void parse_func_doccomment(AsmState
& as
) {
1797 auto const doc
= parse_long_string(as
);
1798 as
.in
.expectWs(';');
1800 as
.fe
->docComment
= makeStaticString(doc
);
1804 * function-body : fbody-line* '}'
1807 * fbody-line : ".numiters" directive-numiters
1808 * | ".numclsrefslots" directive-numclsrefslots
1809 * | ".declvars" directive-declvars
1810 * | ".try_fault" directive-fault
1811 * | ".try_catch" directive-catch
1812 * | ".try" directive-try-catch
1813 * | ".ismemoizewrapper"
1814 * | ".dynamicallycallable"
1815 * | ".srcloc" directive-srcloc
1816 * | ".doc" directive-doccomment
1821 * label-name : identifier ':'
1824 * opcode-line : opcode-mnemonic <junk that depends on opcode> '\n'
1827 void parse_function_body(AsmState
& as
, int nestLevel
/* = 0 */) {
1830 as
.in
.skipWhitespace();
1831 if (as
.in
.peek() == '}') {
1834 as
.finishFunction();
1839 if (!as
.in
.readword(word
)) {
1840 as
.error("unexpected directive or opcode line in function body");
1842 if (word
[0] == '.') {
1843 if (word
== ".ismemoizewrapper") {
1844 as
.fe
->isMemoizeWrapper
= true;
1845 as
.in
.expectWs(';');
1848 if (word
== ".numiters") { parse_numiters(as
); continue; }
1849 if (word
== ".declvars") { parse_declvars(as
); continue; }
1850 if (word
== ".numclsrefslots") { parse_numclsrefslots(as
); continue; }
1851 if (word
== ".try_fault") { parse_fault(as
, nestLevel
); continue; }
1852 if (word
== ".try_catch") { parse_catch(as
, nestLevel
); continue; }
1853 if (word
== ".try") { parse_try_catch(as
, nestLevel
); continue; }
1854 if (word
== ".srcloc") { parse_srcloc(as
, nestLevel
); continue; }
1855 if (word
== ".static") { parse_static(as
); continue; }
1856 if (word
== ".doc") { parse_func_doccomment(as
); continue; }
1857 as
.error("unrecognized directive `" + word
+ "' in function");
1859 if (as
.in
.peek() == ':') {
1861 as
.addLabelTarget(word
);
1865 // Ok, it better be an opcode now.
1866 auto it
= opcode_parsers
.find(word
);
1867 if (it
== opcode_parsers
.end()) {
1868 as
.error("unrecognized opcode `" + word
+ "'");
1872 as
.in
.skipSpaceTab();
1873 if (as
.in
.peek() != '\n' &&
1874 as
.in
.peek() != '\r' &&
1875 as
.in
.peek() != '#' &&
1876 as
.in
.peek() != EOF
) {
1877 as
.error("too many arguments for opcode `" + word
+ "'");
1882 void parse_user_attribute(AsmState
& as
,
1883 UserAttributeMap
& userAttrs
) {
1884 auto name
= read_litstr(as
);
1885 as
.in
.expectWs('(');
1887 auto var
= parse_php_serialized(as
);
1889 as
.in
.expectWs(')');
1891 if (!var
.isPHPArray()) {
1892 as
.error("user attribute values must be arrays");
1896 make_tv
<KindOfArray
>(ArrayData::GetScalarArray(std::move(var
)));
1900 * attribute : attribute-name
1901 * | string-literal '(' long-string-literal ')'
1904 * attribute-list : empty
1905 * | '[' attribute* ']'
1908 * The `attribute-name' rule is context-sensitive; see as-shared.cpp.
1909 * The second attribute form is for user attributes and only applies
1910 * if attributeMap is non null.
1912 Attr
parse_attribute_list(AsmState
& as
, AttrContext ctx
,
1913 UserAttributeMap
*userAttrs
= nullptr,
1914 bool* isTop
= nullptr) {
1915 as
.in
.skipWhitespace();
1917 if (as
.in
.peek() != '[') return Attr(ret
);
1922 as
.in
.skipWhitespace();
1923 if (as
.in
.peek() == ']') break;
1924 if (as
.in
.peek() == '"' && userAttrs
) {
1925 parse_user_attribute(as
, *userAttrs
);
1928 if (!as
.in
.readword(word
)) break;
1930 auto const abit
= string_to_attr(ctx
, word
);
1935 if (isTop
&& word
== "nontop") {
1940 as
.error("unrecognized attribute `" + word
+ "' in this context");
1948 * | '<' maybe-string-literal maybe-string-literal
1951 * type-constraint : empty
1952 * | '<' maybe-string-literal
1955 * This parses type-info if noUserType is false, type-constraint if true
1957 std::pair
<const StringData
*, TypeConstraint
> parse_type_info(
1958 AsmState
& as
, bool noUserType
= false) {
1959 as
.in
.skipWhitespace();
1960 if (as
.in
.peek() != '<') return {};
1963 const StringData
*userType
= noUserType
? nullptr : read_maybe_litstr(as
);
1964 const StringData
*typeName
= read_maybe_litstr(as
);
1967 auto flags
= TypeConstraint::NoFlags
;
1969 as
.in
.skipWhitespace();
1970 if (as
.in
.peek() == '>') break;
1971 if (!as
.in
.readword(word
)) break;
1973 auto const abit
= string_to_type_flag(word
);
1975 flags
= flags
| *abit
;
1979 as
.error("unrecognized type flag `" + word
+ "' in this context");
1982 return std::make_pair(userType
, TypeConstraint
{typeName
, flags
});
1984 TypeConstraint
parse_type_constraint(AsmState
& as
) {
1985 return parse_type_info(as
, true).second
;
1990 * parameter-list : '(' param-name-list ')'
1993 * param-name-list : empty
1994 * | param-name ',' param-name-list
1997 * param-name : '$' identifier dv-initializer
1998 * | '&' '$' identifier dv-initializer
2001 * dv-initializer : empty
2002 * | '=' identifier arg-default
2005 * arg-default : empty
2006 * | '(' long-string-literal ')'
2009 void parse_parameter_list(AsmState
& as
) {
2010 as
.in
.skipWhitespace();
2011 if (as
.in
.peek() != '(') return;
2014 bool seenVariadic
= false;
2015 bool seenRef
= false;
2018 FuncEmitter::ParamInfo param
;
2019 param
.byRef
= false;
2020 param
.inout
= false;
2022 as
.in
.skipWhitespace();
2023 int ch
= as
.in
.peek();
2024 if (ch
== ')') { as
.in
.getc(); break; } // allow empty param lists
2027 as
.error("functions can only have one variadic argument");
2030 parse_attribute_list(as
, AttrContext::Parameter
, ¶m
.userAttributes
);
2034 if (as
.in
.getc() != '.' ||
2035 as
.in
.getc() != '.') {
2036 as
.error("expecting '...'");
2039 seenVariadic
= true;
2040 param
.variadic
= true;
2041 as
.fe
->attrs
|= AttrVariadicParam
;
2044 if (as
.in
.tryConsume("inout")) {
2046 as
.error("inout parameters cannot be variadic");
2049 as
.error("functions cannot contain both inout and ref parameters");
2052 as
.fe
->attrs
|= AttrTakesInOutParams
;
2055 std::tie(param
.userType
, param
.typeConstraint
) = parse_type_info(as
);
2057 as
.in
.skipWhitespace();
2062 as
.error("parameters cannot be marked both inout and ref");
2064 if (as
.fe
->attrs
& AttrTakesInOutParams
) {
2065 as
.error("functions cannot contain both inout and ref parameters");
2072 as
.error("function parameters must have a $ prefix");
2075 if (!as
.in
.readword(name
)) {
2076 as
.error("expected parameter name after $");
2079 as
.in
.skipWhitespace();
2083 as
.error("variadic parameter cannot have dv-initializer");
2087 if (!as
.in
.readword(label
)) {
2088 as
.error("expected label name for dv-initializer");
2090 as
.addLabelDVInit(label
, as
.fe
->params
.size());
2092 as
.in
.skipWhitespace();
2095 String str
= parse_long_string(as
);
2096 param
.phpCode
= makeStaticString(str
);
2099 if (str
.size() == 4) {
2100 if (!strcasecmp("null", str
.data())) {
2102 } else if (!strcasecmp("true", str
.data())) {
2103 tv
= make_tv
<KindOfBoolean
>(true);
2105 } else if (str
.size() == 5 && !strcasecmp("false", str
.data())) {
2106 tv
= make_tv
<KindOfBoolean
>(false);
2108 auto utype
= param
.typeConstraint
.underlyingDataType();
2109 if (tv
.m_type
== KindOfUninit
&&
2110 (!utype
|| *utype
== KindOfInt64
|| *utype
== KindOfDouble
)) {
2114 auto dt
= str
.get()->isNumericWithVal(ival
, dval
, false, &overflow
);
2115 if (overflow
== 0) {
2116 if (dt
== KindOfInt64
) {
2117 if (utype
== KindOfDouble
) tv
= make_tv
<KindOfDouble
>(ival
);
2118 else tv
= make_tv
<KindOfInt64
>(ival
);
2119 } else if (dt
== KindOfDouble
&&
2120 (!utype
|| utype
== KindOfDouble
)) {
2121 tv
= make_tv
<KindOfDouble
>(dval
);
2125 if (tv
.m_type
!= KindOfUninit
) {
2126 param
.defaultValue
= tv
;
2128 as
.in
.expectWs(')');
2129 as
.in
.skipWhitespace();
2134 as
.fe
->appendParam(makeStaticString(name
), param
);
2136 if (ch
== ')') break;
2137 if (ch
!= ',') as
.error("expected , between parameter names");
2141 void parse_function_flags(AsmState
& as
) {
2142 as
.in
.skipWhitespace();
2145 if (as
.in
.peek() == '{') break;
2146 if (!as
.in
.readword(flag
)) break;
2148 if (flag
== "isGenerator") {
2149 as
.fe
->isGenerator
= true;
2150 } else if (flag
== "isAsync") {
2151 as
.fe
->isAsync
= true;
2152 } else if (flag
== "isClosureBody") {
2153 as
.fe
->isClosureBody
= true;
2154 } else if (flag
== "isPairGenerator") {
2155 as
.fe
->isPairGenerator
= true;
2157 as
.error("Unexpected function flag \"" + flag
+ "\"");
2163 * line-range : "(" integer "," integer ")"
2166 bool parse_line_range(AsmState
& as
, int& line0
, int& line1
) {
2167 as
.in
.skipWhitespace();
2168 if (as
.in
.peek() != '(') {
2169 line0
= as
.in
.getLineNumber();
2170 line1
= as
.in
.getLineNumber() + 1;
2174 line0
= as
.in
.readint();
2175 as
.in
.expectWs(',');
2176 line1
= as
.in
.readint();
2177 as
.in
.expectWs(')');
2182 * If we haven't seen a pseudomain and we are compiling systemlib,
2183 * add a pseudomain and return true
2184 * If we haven't seen a pseudomain and we are not compiling systemlib,
2185 * return false so that the caller can give an assembler error
2186 * Otherwise, return true
2188 bool ensure_pseudomain(AsmState
& as
) {
2189 if (!as
.emittedPseudoMain
) {
2190 if (!SystemLib::s_inited
) {
2192 * The SystemLib::s_hhas_unit is required to be merge-only,
2193 * and we create the source by concatenating separate .hhas files
2194 * Rather than choosing one to have the .main directive, we just
2195 * generate a trivial pseudoMain automatically.
2197 as
.ue
->addTrivialPseudoMain();
2198 as
.emittedPseudoMain
= true;
2206 static StaticString
s_native("__Native");
2208 MaybeDataType
type_constraint_to_data_type(LowStringPtr user_type
,
2209 const TypeConstraint
& tc
) {
2210 if (auto type
= tc
.typeName()) {
2211 // in type_annotation.cpp this code uses m_typeArgs
2212 // as indicator that type can represent one of collection types
2213 // when we extract data from the constraint we know if type is one of
2214 // collection types but we don't have direct way to figure out if
2215 // type used to have type arguments - do it indirectly by checking
2216 // if name of user type contains '<'
2217 auto has_type_args
=
2218 user_type
&& user_type
->slice().str().find('<') != std::string::npos
;
2219 return get_datatype(
2220 type
->toCppString(),
2222 false, // no syntactic functions in type annotations
2223 false, // no xhp type annotation
2224 false, // no tuples in type annotation
2231 static const StaticString
2232 s_AllowStatic("__AllowStatic"),
2233 s_ParamCoerceModeNull("__ParamCoerceModeNull"),
2234 s_ParamCoerceModeFalse("__ParamCoerceModeFalse");
2238 * Checks whether the current function is native by looking at the user
2239 * attribute map and sets the isNative flag accoringly
2240 * If the give function is op code implementation, then isNative is not set
2242 void check_native(AsmState
& as
, bool is_construct_or_destruct
) {
2243 if (as
.fe
->userAttributes
.count(s_native
.get())) {
2244 if (SystemLib::s_inited
) {
2245 as
.error("Native function may only appear in systemlib");
2248 as
.fe
->hniReturnType
= is_construct_or_destruct
2250 : type_constraint_to_data_type(as
.fe
->retUserType
,
2251 as
.fe
->retTypeConstraint
);
2253 !(as
.fe
->parseNativeAttributes(as
.fe
->attrs
) & Native::AttrOpCodeImpl
);
2255 // set extra attributes
2256 as
.fe
->attrs
|= AttrBuiltin
| AttrSkipFrame
| AttrMayUseVV
;
2259 !(as
.fe
->attrs
& AttrStatic
) &&
2260 !as
.fe
->userAttributes
.count(s_AllowStatic
.get())) {
2261 as
.fe
->attrs
|= AttrRequiresThis
;
2263 if (as
.fe
->userAttributes
.count(s_ParamCoerceModeFalse
.get())) {
2264 as
.fe
->attrs
|= AttrParamCoerceModeFalse
;
2266 if (as
.fe
->userAttributes
.count(s_ParamCoerceModeNull
.get())) {
2267 as
.fe
->attrs
|= AttrParamCoerceModeNull
;
2269 if (!(as
.fe
->attrs
&
2270 (AttrParamCoerceModeFalse
| AttrParamCoerceModeNull
))) {
2271 as
.fe
->attrs
|= AttrParamCoerceModeNull
;
2274 for (auto& pi
: as
.fe
->params
) {
2276 type_constraint_to_data_type(pi
.userType
, pi
.typeConstraint
);
2282 * directive-function : attribute-list ?line-range type-info identifier
2283 * parameter-list function-flags '{' function-body
2286 void parse_function(AsmState
& as
) {
2287 if (!ensure_pseudomain(as
)) {
2288 as
.error(".function blocks must all follow the .main block");
2291 as
.in
.skipWhitespace();
2295 UserAttributeMap userAttrs
;
2296 Attr attrs
= parse_attribute_list(as
, AttrContext::Func
, &userAttrs
, &isTop
);
2298 if (!SystemLib::s_inited
) {
2299 attrs
|= AttrUnique
| AttrPersistent
| AttrBuiltin
;
2302 // Be conservative by default. HHBBC can clear it where appropriate.
2303 attrs
|= AttrMayUseVV
;
2305 if(!isTop
&& as
.emittedTopLevelFunc
) {
2306 as
.error("All top level functions must be defined after any "
2307 "non-top functions");
2310 as
.emittedTopLevelFunc
|= isTop
;
2314 parse_line_range(as
, line0
, line1
);
2316 auto typeInfo
= parse_type_info(as
);
2318 if (!as
.in
.readname(name
)) {
2319 as
.error(".function must have a name");
2322 as
.fe
= as
.ue
->newFuncEmitter(makeStaticString(name
));
2323 as
.fe
->init(line0
, line1
, as
.ue
->bcPos(), attrs
, isTop
, 0);
2324 std::tie(as
.fe
->retUserType
, as
.fe
->retTypeConstraint
) = typeInfo
;
2325 as
.fe
->userAttributes
= userAttrs
;
2327 parse_parameter_list(as
);
2328 parse_function_flags(as
);
2330 check_native(as
, false);
2332 as
.in
.expectWs('{');
2334 as
.srcLoc
= Location::Range
{-1,-1,-1,-1};
2335 parse_function_body(as
);
2339 * directive-method : attribute-list ?line-range type-info identifier
2340 * parameter-list function-flags '{' function-body
2343 void parse_method(AsmState
& as
) {
2344 as
.in
.skipWhitespace();
2346 UserAttributeMap userAttrs
;
2347 Attr attrs
= parse_attribute_list(as
, AttrContext::Func
, &userAttrs
);
2349 if (!SystemLib::s_inited
) {
2350 attrs
|= AttrBuiltin
;
2355 parse_line_range(as
, line0
, line1
);
2357 auto typeInfo
= parse_type_info(as
);
2359 if (!as
.in
.readname(name
)) {
2360 as
.error(".method requires a method name");
2363 as
.fe
= as
.ue
->newMethodEmitter(makeStaticString(name
), as
.pce
);
2364 as
.pce
->addMethod(as
.fe
);
2365 as
.fe
->init(line0
, line1
,
2366 as
.ue
->bcPos(), attrs
, false, 0);
2367 std::tie(as
.fe
->retUserType
, as
.fe
->retTypeConstraint
) = typeInfo
;
2368 as
.fe
->userAttributes
= userAttrs
;
2370 parse_parameter_list(as
);
2371 parse_function_flags(as
);
2373 check_native(as
, name
== "__construct" || name
== "__destruct");
2375 as
.in
.expectWs('{');
2377 as
.srcLoc
= Location::Range
{-1,-1,-1,-1};
2378 parse_function_body(as
);
2382 * member-tv-initializer : '=' php-serialized ';'
2387 TypedValue
parse_member_tv_initializer(AsmState
& as
) {
2388 as
.in
.skipWhitespace();
2391 tvWriteNull(tvInit
); // Don't confuse Variant with uninit data
2393 int what
= as
.in
.getc();
2395 as
.in
.skipWhitespace();
2397 if (as
.in
.peek() != '\"') {
2398 // It might be an uninitialized property/constant.
2399 if (!as
.in
.tryConsume("uninit")) {
2400 as
.error("Expected \"\"\" or \"uninit\" after '=' in "
2401 "const/property initializer");
2403 as
.in
.expectWs(';');
2404 tvWriteUninit(tvInit
);
2408 tvAsVariant(&tvInit
) = parse_php_serialized(as
);
2409 if (tvInit
.m_type
== KindOfObject
) {
2410 as
.error("property initializer can't be an object");
2411 } else if (tvInit
.m_type
== KindOfResource
) {
2412 as
.error("property initializer can't be a resource");
2414 tvAsVariant(&tvInit
).setEvalScalar();
2416 as
.in
.expectWs(';');
2417 } else if (what
== ';') {
2420 as
.error("expected '=' or ';' after property name");
2427 * directive-property : attribute-list maybe-long-string-literal type-info
2428 * identifier member-tv-initializer
2431 * Define a property with an associated type and heredoc.
2433 void parse_property(AsmState
& as
) {
2434 as
.in
.skipWhitespace();
2436 Attr attrs
= parse_attribute_list(as
, AttrContext::Prop
);
2438 auto const heredoc
= makeStaticString(parse_maybe_long_string(as
));
2439 auto const userTy
= parse_type_info(as
, false).first
;
2440 auto const userTyStr
= userTy
? userTy
: staticEmptyString();
2443 if (!as
.in
.readword(name
)) {
2444 as
.error("expected name for property");
2447 TypedValue tvInit
= parse_member_tv_initializer(as
);
2448 as
.pce
->addProperty(makeStaticString(name
),
2457 * const-flags : isType
2460 * directive-const : identifier const-flags member-tv-initializer
2461 * | identifier const-flags ';'
2464 void parse_constant(AsmState
& as
) {
2465 as
.in
.skipWhitespace();
2468 if (!as
.in
.readword(name
)) {
2469 as
.error("expected name for constant");
2472 bool isType
= as
.in
.tryConsume("isType");
2473 as
.in
.skipWhitespace();
2475 if (as
.in
.peek() == ';') {
2477 as
.pce
->addAbstractConstant(makeStaticString(name
),
2478 staticEmptyString(),
2483 TypedValue tvInit
= parse_member_tv_initializer(as
);
2484 as
.pce
->addConstant(makeStaticString(name
),
2485 staticEmptyString(), &tvInit
,
2486 staticEmptyString(),
2491 * directive-default-ctor : ';'
2494 * No-op, for backward compat
2496 void parse_default_ctor(AsmState
& as
) {
2497 assertx(!as
.fe
&& as
.pce
);
2498 as
.in
.expectWs(';');
2502 * directive-use : identifier+ ';'
2503 * | identifier+ '{' use-line* '}'
2506 * use-line : use-name-ref "insteadof" identifier+ ';'
2507 * | use-name-ref "as" attribute-list identifier ';'
2508 * | use-name-ref "as" attribute-list ';'
2511 void parse_use(AsmState
& as
) {
2512 std::vector
<std::string
> usedTraits
;
2515 if (!as
.in
.readword(name
)) break;
2516 usedTraits
.push_back(name
);
2518 if (usedTraits
.empty()) {
2519 as
.error(".use requires a trait name");
2522 for (size_t i
= 0; i
< usedTraits
.size(); ++i
) {
2523 as
.pce
->addUsedTrait(makeStaticString(usedTraits
[i
]));
2525 as
.in
.skipWhitespace();
2526 if (as
.in
.peek() != '{') {
2533 as
.in
.skipWhitespace();
2534 if (as
.in
.peek() == '}') break;
2536 std::string traitName
;
2537 std::string identifier
;
2538 if (!as
.in
.readword(traitName
)) {
2539 as
.error("expected identifier for line in .use block");
2541 as
.in
.skipWhitespace();
2542 if (as
.in
.peek() == ':') {
2545 if (!as
.in
.readword(identifier
)) {
2546 as
.error("expected identifier after ::");
2549 identifier
= traitName
;
2553 if (as
.in
.tryConsume("as")) {
2554 Attr attrs
= parse_attribute_list(as
, AttrContext::TraitImport
);
2556 if (!as
.in
.readword(alias
)) {
2557 if (attrs
!= AttrNone
) {
2560 as
.error("expected identifier or attribute list after "
2561 "`as' in .use block");
2565 as
.pce
->addTraitAliasRule(PreClass::TraitAliasRule(
2566 makeStaticString(traitName
),
2567 makeStaticString(identifier
),
2568 makeStaticString(alias
),
2570 } else if (as
.in
.tryConsume("insteadof")) {
2571 if (traitName
.empty()) {
2572 as
.error("Must specify TraitName::name when using a trait insteadof");
2575 PreClass::TraitPrecRule
precRule(
2576 makeStaticString(traitName
),
2577 makeStaticString(identifier
));
2579 bool addedOtherTraits
= false;
2581 while (as
.in
.readword(whom
)) {
2582 precRule
.addOtherTraitName(makeStaticString(whom
));
2583 addedOtherTraits
= true;
2585 if (!addedOtherTraits
) {
2586 as
.error("one or more trait names expected after `insteadof'");
2589 as
.pce
->addTraitPrecRule(precRule
);
2591 as
.error("expected `as' or `insteadof' in .use block");
2594 as
.in
.expectWs(';');
2601 * directive-enum_ty : type-constraint ';'
2605 void parse_enum_ty(AsmState
& as
) {
2607 as
.error("only one .enum_ty directive may appear in a given class");
2609 as
.enumTySet
= true;
2611 as
.pce
->setEnumBaseTy(parse_type_constraint(as
));
2613 as
.in
.expectWs(';');
2617 * directive-require : 'extends' '<' indentifier '>' ';'
2618 * | 'implements' '<' indentifier '>' ';'
2622 void parse_require(AsmState
& as
) {
2623 as
.in
.skipWhitespace();
2625 bool extends
= as
.in
.tryConsume("extends");
2626 if (!extends
&& !as
.in
.tryConsume("implements")) {
2627 as
.error(".require should be extends or implements");
2630 as
.in
.expectWs('<');
2632 if (!as
.in
.readname(name
)) {
2633 as
.error(".require expects a class or interface name");
2635 as
.in
.expectWs('>');
2637 as
.pce
->addClassRequirement(PreClass::ClassRequirement(
2638 makeStaticString(name
), extends
2641 as
.in
.expectWs(';');
2645 * directive-doccomment : long-string-literal ';'
2649 void parse_cls_doccomment(AsmState
& as
) {
2650 auto const doc
= parse_long_string(as
);
2651 as
.in
.expectWs(';');
2653 as
.pce
->setDocComment(makeStaticString(doc
));
2657 * class-body : class-body-line* '}'
2660 * class-body-line : ".method" directive-method
2661 * | ".property" directive-property
2662 * | ".const" directive-const
2663 * | ".use" directive-use
2664 * | ".default_ctor" directive-default-ctor
2665 * | ".enum_ty" directive-enum-ty
2666 * | ".require" directive-require
2667 * | ".doc" directive-doccomment
2670 void parse_class_body(AsmState
& as
) {
2671 if (!ensure_pseudomain(as
)) {
2672 as
.error(".class blocks must all follow the .main block");
2675 std::string directive
;
2676 while (as
.in
.readword(directive
)) {
2677 if (directive
== ".method") { parse_method(as
); continue; }
2678 if (directive
== ".property") { parse_property(as
); continue; }
2679 if (directive
== ".const") { parse_constant(as
); continue; }
2680 if (directive
== ".use") { parse_use(as
); continue; }
2681 if (directive
== ".default_ctor") { parse_default_ctor(as
); continue; }
2682 if (directive
== ".enum_ty") { parse_enum_ty(as
); continue; }
2683 if (directive
== ".require") { parse_require(as
); continue; }
2684 if (directive
== ".doc") { parse_cls_doccomment(as
); continue; }
2686 as
.error("unrecognized directive `" + directive
+ "' in class");
2691 PreClass::Hoistable
compute_hoistable(AsmState
& as
,
2692 const std::string
&name
,
2693 const std::string
&parentName
) {
2694 auto &pce
= *as
.pce
;
2695 bool system
= pce
.attrs() & AttrBuiltin
;
2697 if (pce
.methods().size() == 1 && pce
.methods()[0]->isClosureBody
) {
2698 return PreClass::NotHoistable
;
2701 if (!pce
.interfaces().empty() ||
2702 !pce
.usedTraits().empty() ||
2703 !pce
.requirements().empty() ||
2704 (pce
.attrs() & AttrEnum
)) {
2705 return PreClass::Mergeable
;
2707 if (!parentName
.empty() && !as
.hoistables
.count(parentName
)) {
2708 return PreClass::MaybeHoistable
;
2711 as
.hoistables
.insert(name
);
2713 return pce
.attrs() & AttrUnique
?
2714 PreClass::AlwaysHoistable
: PreClass::MaybeHoistable
;
2718 * directive-class : ?"top" attribute-list identifier ?line-range
2719 * extension-clause implements-clause '{' class-body
2722 * extension-clause : empty
2723 * | "extends" identifier
2726 * implements-clause : empty
2727 * | "implements" '(' identifier* ')'
2731 void parse_class(AsmState
& as
) {
2732 as
.in
.skipWhitespace();
2736 UserAttributeMap userAttrs
;
2737 Attr attrs
= parse_attribute_list(as
, AttrContext::Class
, &userAttrs
, &isTop
);
2738 if (!SystemLib::s_inited
) {
2739 attrs
|= AttrUnique
| AttrPersistent
| AttrBuiltin
;
2743 if (!as
.in
.readname(name
)) {
2744 as
.error(".class must have a name");
2746 if (ParserBase::IsAnonymousClassName(name
)) {
2747 // refresh names of anonymous classes
2748 // to make sure they are unique
2749 auto p
= name
.find(';');
2750 if (p
!= std::string::npos
) {
2751 name
= name
.substr(0, p
);
2752 name
= HPHP::NewAnonymousClassName(name
);
2758 parse_line_range(as
, line0
, line1
);
2760 std::string parentName
;
2761 if (as
.in
.tryConsume("extends")) {
2762 if (!as
.in
.readname(parentName
)) {
2763 as
.error("expected parent class name after `extends'");
2767 std::vector
<std::string
> ifaces
;
2768 if (as
.in
.tryConsume("implements")) {
2769 as
.in
.expectWs('(');
2771 while (as
.in
.readname(word
)) {
2772 ifaces
.push_back(word
);
2777 auto off
= folly::get_default(as
.defClsOffsets
, as
.ue
->numPreClasses(),
2780 as
.pce
= as
.ue
->newBarePreClassEmitter(name
, PreClass::MaybeHoistable
);
2785 makeStaticString(parentName
),
2786 staticEmptyString());
2787 for (auto const& iface
: ifaces
) {
2788 as
.pce
->addInterface(makeStaticString(iface
));
2790 as
.pce
->setUserAttributes(userAttrs
);
2792 as
.in
.expectWs('{');
2793 parse_class_body(as
);
2795 as
.pce
->setHoistable(
2796 isTop
? compute_hoistable(as
, name
, parentName
) : PreClass::NotHoistable
2803 * directive-filepath : quoted-string-literal ';'
2806 void parse_filepath(AsmState
& as
) {
2807 auto const str
= read_litstr(as
);
2808 as
.ue
->m_filepath
= str
;
2809 as
.in
.expectWs(';');
2813 * directive-main : ?line-range '{' function-body
2816 void parse_main(AsmState
& as
) {
2817 if (as
.emittedPseudoMain
) {
2818 as
.error("Multiple .main directives found");
2823 bool fromSrcLoc
= parse_line_range(as
, line0
, line1
);
2825 as
.in
.expectWs('{');
2827 as
.ue
->initMain(line0
, line1
);
2828 as
.fe
= as
.ue
->getMain();
2829 as
.emittedPseudoMain
= true;
2831 as
.srcLoc
= Location::Range
{line0
,0,line1
,0};
2833 as
.srcLoc
= Location::Range
{-1,-1,-1,-1};
2835 parse_function_body(as
);
2839 * directive-adata : identifier '=' php-serialized ';'
2842 void parse_adata(AsmState
& as
) {
2843 as
.in
.skipWhitespace();
2844 std::string dataLabel
;
2845 if (!as
.in
.readword(dataLabel
)) {
2846 as
.error("expected name for .adata");
2848 if (as
.adataMap
.count(dataLabel
)) {
2849 as
.error("duplicate adata label name " + dataLabel
);
2852 as
.in
.expectWs('=');
2853 auto var
= parse_php_serialized(as
);
2854 if (!var
.isArray()) {
2855 as
.error(".adata only supports serialized arrays");
2857 auto const data
= ArrayData::GetScalarArray(std::move(var
));
2858 as
.ue
->mergeArray(data
);
2859 as
.adataMap
[dataLabel
] = data
;
2861 as
.in
.expectWs(';');
2865 * directive-alias : attribute-list identifier '=' type-constraint
2866 * maybe-php-serialized ';'
2869 * We represent alias type information using the syntax for
2870 * TypeConstraints. We populate the name and nullable field of the
2871 * alias directly from the specified type constraint and derive the
2872 * AnnotType from the compute AnnotType in the constraint.
2874 * Following the type-constraint we encode the serialized type structure
2875 * corresponding to this alias.
2877 void parse_alias(AsmState
& as
) {
2878 as
.in
.skipWhitespace();
2881 Attr attrs
= parse_attribute_list(as
, AttrContext::Alias
, &record
.userAttrs
);
2882 if (!SystemLib::s_inited
) {
2883 attrs
|= AttrPersistent
;
2886 if (!as
.in
.readname(name
)) {
2887 as
.error(".alias must have a name");
2889 as
.in
.expectWs('=');
2891 TypeConstraint ty
= parse_type_constraint(as
);
2892 Variant ts
= parse_maybe_php_serialized(as
);
2894 if (ts
.isInitialized() && !ts
.isArray()) {
2895 as
.error(".alias must have an array type structure");
2898 const StringData
* typeName
= ty
.typeName();
2899 if (!typeName
) typeName
= staticEmptyString();
2900 const StringData
* sname
= makeStaticString(name
);
2901 // Merge to ensure namedentity creation, according to
2902 // emitTypedef in emitter.cpp
2903 as
.ue
->mergeLitstr(sname
);
2904 as
.ue
->mergeLitstr(typeName
);
2906 record
.name
= sname
;
2907 record
.value
= typeName
;
2908 record
.type
= typeName
->empty() ? AnnotType::Mixed
: ty
.type();
2909 record
.nullable
= (ty
.flags() & TypeConstraint::Nullable
) != 0;
2910 record
.attrs
= attrs
;
2911 if (ts
.isInitialized()) {
2912 record
.typeStructure
= ArrNR(ArrayData::GetScalarArray(std::move(ts
)));
2914 auto aliasId
= as
.ue
->addTypeAlias(record
);
2915 as
.ue
->pushMergeableTypeAlias(Unit::MergeKind::TypeAlias
, aliasId
);
2917 as
.in
.expectWs(';');
2921 * directive-hh-file : '1' ';'
2925 void parse_hh_file(AsmState
& as
) {
2926 as
.in
.skipWhitespace();
2928 if (!as
.in
.readword(word
)) {
2929 as
.error(".hh_file must have a value");
2931 as
.ue
->m_isHHFile
= word
== "1";
2933 if (!as
.ue
->m_isHHFile
&& word
!= "0") {
2934 as
.error(".hh_file must be either 1 or 0");
2937 as
.in
.expectWs(';');
2941 * directive-strict : '1' ';'
2945 void parse_strict(AsmState
& as
) {
2946 as
.in
.skipWhitespace();
2948 if (!as
.in
.readword(word
)) {
2949 as
.error(".strict must have a value");
2951 if (!RuntimeOption::PHP7_ScalarTypes
) {
2952 as
.error("Cannot set .strict without PHP7 ScalarTypes");
2955 as
.ue
->m_useStrictTypes
= as
.ue
->m_useStrictTypesForBuiltins
= word
== "1";
2957 if (!as
.ue
->m_useStrictTypes
&& word
!= "0") {
2958 as
.error("Strict types must be either 1 or 0");
2961 as
.in
.expectWs(';');
2965 * directive-symbols : '{' identifier identifier* '}'
2967 void parse_symbol_refs(
2969 void (AsmCallbacks::*onSymbol
)(const std::string
&)
2971 as
.in
.expectWs('{');
2975 as
.in
.skipWhitespace();
2977 as
.in
.consumePred(!boost::is_any_of(" \t\r\n#}"),
2978 std::back_inserter(symbol
));
2979 if (symbol
.empty()) {
2982 (as
.callbacks
->*onSymbol
)(symbol
);
2985 while (as
.in
.peek() != '}') {
2986 as
.in
.skipWhitespace();
2987 if (!as
.in
.skipPred(!boost::is_any_of("#}"))) break;
2995 * directive-filepaths : '{' string string* '}'
2997 void parse_includes(AsmState
& as
) {
2998 parse_symbol_refs(as
, &AsmCallbacks::onInclude
);
3001 void parse_constant_refs(AsmState
& as
) {
3002 parse_symbol_refs(as
, &AsmCallbacks::onConstantRef
);
3005 void parse_function_refs(AsmState
& as
) {
3006 parse_symbol_refs(as
, &AsmCallbacks::onFunctionRef
);
3009 void parse_class_refs(AsmState
& as
) {
3010 parse_symbol_refs(as
, &AsmCallbacks::onClassRef
);
3014 * directive-metadata : identifier = identifier ';'
3015 * | identifier = quoted-string-literal ';'
3016 * | identifier = long-string-literal ';'
3019 void parse_metadata(AsmState
& as
) {
3021 if (as
.in
.readname(key
)) {
3022 as
.in
.expectWs('=');
3023 as
.in
.skipWhitespace();
3024 auto const value
= [&] () -> const StringData
* {
3025 auto ret
= parse_maybe_long_string(as
);
3026 if (!ret
.empty()) return makeStaticString(ret
);
3028 if (as
.in
.readQuotedStr(tmp
) || as
.in
.readword(tmp
)) {
3029 return makeStaticString(tmp
);
3035 as
.ue
->m_metaData
.emplace(
3036 makeStaticString(key
),
3037 make_tv
<KindOfPersistentString
>(value
)
3042 as
.error(".metadata expects a key = value pair");
3046 * asm-file : asm-tld* <EOF>
3049 * asm-tld : ".filepath" directive-filepath
3050 * | ".main" directive-main
3051 * | ".function" directive-function
3052 * | ".adata" directive-adata
3053 * | ".class" directive-class
3054 * | ".alias" directive-alias
3055 * | ".strict" directive-strict
3056 * | ".hh_file" directive-hh-file
3057 * | ".includes directive-filepaths
3058 * | ".constant_refs directive-symbols
3059 * | ".function_refs directive-symbols
3060 * | ".class_refs directive-symbols
3061 * | ".metadata directive-meta-data
3064 void parse(AsmState
& as
) {
3065 as
.in
.skipWhitespace();
3066 std::string directive
;
3068 while (as
.in
.readword(directive
)) {
3069 if (directive
== ".filepath") { parse_filepath(as
) ; continue; }
3070 if (directive
== ".main") { parse_main(as
) ; continue; }
3071 if (directive
== ".function") { parse_function(as
) ; continue; }
3072 if (directive
== ".adata") { parse_adata(as
) ; continue; }
3073 if (directive
== ".class") { parse_class(as
) ; continue; }
3074 if (directive
== ".alias") { parse_alias(as
) ; continue; }
3075 if (directive
== ".strict") { parse_strict(as
) ; continue; }
3076 if (directive
== ".hh_file") { parse_hh_file(as
) ; continue; }
3077 if (directive
== ".includes") { parse_includes(as
) ; continue; }
3078 if (directive
== ".constant_refs") { parse_constant_refs(as
) ; continue; }
3079 if (directive
== ".function_refs") { parse_function_refs(as
) ; continue; }
3080 if (directive
== ".class_refs") { parse_class_refs(as
) ; continue; }
3081 if (directive
== ".metadata") { parse_metadata(as
) ; continue; }
3083 as
.error("unrecognized top-level directive `" + directive
+ "'");
3086 if (!ensure_pseudomain(as
)) {
3087 as
.error("no .main found in hhas unit");
3093 //////////////////////////////////////////////////////////////////////
3095 std::unique_ptr
<UnitEmitter
> assemble_string(
3098 const char* filename
,
3101 AsmCallbacks
* callbacks
3103 auto ue
= std::make_unique
<UnitEmitter
>(md5
);
3104 if (!SystemLib::s_inited
) {
3105 ue
->m_mergeOnly
= true;
3107 StringData
* sd
= makeStaticString(filename
);
3108 ue
->m_filepath
= sd
;
3109 ue
->m_useStrictTypes
= RuntimeOption::EnableHipHopSyntax
||
3110 !RuntimeOption::PHP7_ScalarTypes
;
3113 auto const mode
= std::istringstream::binary
| std::istringstream::in
;
3114 std::istringstream
instr(std::string(code
, codeLen
), mode
);
3115 AsmState
as(instr
, callbacks
);
3118 if (ue
->m_isHHFile
) {
3119 ue
->m_useStrictTypes
= true;
3121 } catch (const std::exception
& e
) {
3122 if (!swallowErrors
) throw;
3123 ue
= createFatalUnit(sd
, md5
, FatalOp::Runtime
, makeStaticString(e
.what()));
3129 AsmResult
assemble_expression(UnitEmitter
& ue
, FuncEmitter
* fe
,
3130 int incomingStackDepth
,
3131 const std::string
& expr
) {
3132 auto const mode
= std::istringstream::binary
| std::istringstream::in
;
3133 std::stringstream
sstr(expr
+ '}', mode
);
3137 as
.initStackDepth
.adjust(as
, incomingStackDepth
);
3138 parse_function_body(as
, 1);
3140 if (as
.maxUnnamed
>= 0) {
3141 as
.error("Unnamed locals are not allowed in inline assembly");
3144 if (!as
.currentStackDepth
) return AsmResult::Unreachable
;
3146 // If we fall off the end of the inline assembly, we're expected to
3147 // leave a single value on the stack, or leave the stack unchanged.
3148 if (!as
.currentStackDepth
->baseValue
) {
3149 as
.error("Unknown stack offset on exit from inline assembly");
3151 auto curStackDepth
= as
.currentStackDepth
->absoluteDepth();
3152 if (curStackDepth
== incomingStackDepth
+ 1) {
3153 return AsmResult::ValuePushed
;
3155 if (curStackDepth
!= incomingStackDepth
) {
3156 as
.error("Inline assembly expressions should leave the stack unchanged, "
3157 "or push exactly one cell onto the stack.");
3160 return AsmResult::NoResult
;
3163 //////////////////////////////////////////////////////////////////////