2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * This module contains an assembler implementation for HHBC. It is
19 * probably fairly close to allowing you to access most of the
20 * metadata associated with hhvm's compiled unit format, although it's
21 * possible something has been overlooked.
23 * To use it, run hhvm with -v Eval.AllowHhas=true on a file with a
24 * ".hhas" extension. The syntax is probably easiest to understand by
25 * looking at some examples (or the semi-BNF markup around some of the
26 * parse functions here). For examples, see hphp/tests/vm/asm_*.
31 * - You can crash hhvm very easily with this.
33 * Using this module, you can emit pretty much any sort of not
34 * trivially-illegal bytecode stream, and many trivially-illegal
35 * ones as well. You can also easily create Units with illegal
36 * metadata. Generally this will crash the VM. In other cases
37 * (especially if you don't bother to DefCls your classes in your
38 * .main) you'll just get mysterious "class not defined" errors
41 * - Whitespace is not normally significant, but newlines may not
42 * be in the middle of a list of opcode arguments. (After the
43 * newline, the next thing seen is expected to be either a
44 * mnemonic for the next opcode in the stream or some sort of
45 * directive.) However, newlines (and comments) may appear
46 * *inside* certain opcode arguments (e.g. string literals or
49 * Rationale: this is partially intended to make it trivial to
50 * catch wrong-number-of-arguments errors, although it probably
51 * could be done without this if you feel like changing it.
56 * - It might be nice if you could refer to iterators by name
57 * instead of by index.
59 * - DefCls by name would be nice.
61 * - Line number information can't be propagated to the various Unit
62 * structures. (It might make sense to do this via something like
63 * a .line directive at some point.)
65 * - You can't currently create non-top functions or non-hoistable
68 * - Missing support for static variables in a function/method.
70 * @author Jordan DeLong <delong.j@fb.com>
73 #include "hphp/runtime/vm/as.h"
80 #include <boost/algorithm/string.hpp>
81 #include <boost/format.hpp>
82 #include <boost/lexical_cast.hpp>
83 #include <boost/scoped_ptr.hpp>
84 #include <boost/noncopyable.hpp>
85 #include <boost/bind.hpp>
87 #include "folly/String.h"
88 #include "folly/Range.h"
90 #include "hphp/util/md5.h"
92 #include "hphp/runtime/base/builtin-functions.h"
93 #include "hphp/runtime/base/repo-auth-type-codec.h"
94 #include "hphp/runtime/base/repo-auth-type.h"
95 #include "hphp/runtime/vm/as-shared.h"
96 #include "hphp/runtime/vm/func-emitter.h"
97 #include "hphp/runtime/vm/hhbc.h"
98 #include "hphp/runtime/vm/preclass-emitter.h"
99 #include "hphp/runtime/vm/unit.h"
100 #include "hphp/runtime/vm/unit-emitter.h"
101 #include "hphp/system/systemlib.h"
107 //////////////////////////////////////////////////////////////////////
112 typedef void (*ParserFunc
)(AsmState
& as
);
114 struct Error
: std::runtime_error
{
115 explicit Error(int where
, const std::string
& what
)
116 : std::runtime_error(str(
117 boost::format("Assembler Error: line %1%: %2%") % where
% what
))
122 explicit Input(std::istream
& in
)
127 int peek() { return m_in
.peek(); }
130 int ret
= m_in
.get();
133 } else if (ret
== '\n') {
139 void ungetc(char c
) {
140 if (c
== '\n') --m_lineNumber
;
146 error(str(boost::format("expected character `%1%'") % char(c
)));
151 * Expect `c' after possible whitespace/comments. When convenient,
152 * preferable to doing skipWhitespace/expect manually to keep the
153 * line number in the error prior to the whitespace skipped.
155 void expectWs(int c
) {
156 const int currentLine
= m_lineNumber
;
159 throw Error(currentLine
,
160 str(boost::format("expected character `%1%'") % char(c
)));
164 int getLineNumber() const {
168 // Skips whitespace, then populates word with valid bareword
169 // characters. Returns true if we read any characters into word.
170 bool readword(std::string
& word
) {
173 consumePred(is_bareword(), std::back_inserter(word
));
174 return !word
.empty();
177 // Try to consume a bareword. Skips whitespace. If we can't
178 // consume the specified word, returns false.
179 bool tryConsume(const std::string
& what
) {
181 if (!readword(word
)) {
185 std::for_each(word
.rbegin(), word
.rend(),
186 boost::bind(&Input::ungetc
, this, _1
));
192 // C-style character escapes, no support for unicode escapes or
194 template<class OutCont
>
195 void escapeChar(OutCont
& out
) {
196 auto is_oct
= [&] (int i
) { return i
>= '0' && i
<= '7'; };
197 auto is_hex
= [&] (int i
) {
198 return (i
>= '0' && i
<= '9') ||
199 (i
>= 'a' && i
<= 'f') ||
200 (i
>= 'A' && i
<= 'F');
202 auto hex_val
= [&] (int i
) -> uint32_t {
204 return i
>= '0' && i
<= '9' ? i
- '0' :
205 i
>= 'a' && i
<= 'f' ? i
- 'a' + 10 : i
- 'A' + 10;
210 case EOF
: error("EOF in string literal");
211 case 'a': out
.push_back('\a'); break;
212 case 'b': out
.push_back('\b'); break;
213 case 'f': out
.push_back('\f'); break;
214 case 'n': out
.push_back('\n'); break;
215 case 'r': out
.push_back('\r'); break;
216 case 't': out
.push_back('\t'); break;
217 case 'v': out
.push_back('\v'); break;
218 case '\'': out
.push_back('\''); break;
219 case '\"': out
.push_back('\"'); break;
220 case '\?': out
.push_back('\?'); break;
221 case '\\': out
.push_back('\\'); break;
222 case '\n': /* ignore */ break;
225 auto val
= int64_t{src
} - '0';
226 for (auto i
= int{0}; i
< 3; ++i
) {
228 if (!is_oct(src
)) { ungetc(src
); break; }
232 if (val
> std::numeric_limits
<uint8_t>::max()) {
233 error("octal escape sequence overflowed");
235 out
.push_back(static_cast<uint8_t>(val
));
239 if (src
== 'x' || src
== 'X') {
240 auto val
= uint64_t{0};
241 if (!is_hex(peek())) error("\\x used without no following hex digits");
246 } while (is_hex(peek()));
247 if (val
> std::numeric_limits
<uint8_t>::max()) {
248 error("hex escape sequence overflowed");
250 out
.push_back(static_cast<uint8_t>(val
));
254 error("unrecognized character escape");
258 // Reads a quoted string with typical escaping rules. Does not skip
259 // any whitespace. Returns true if we successfully read one, or
260 // false. EOF during the string throws.
261 bool readQuotedStr(std::string
& str
) {
263 if (peek() != '\"') {
269 while ((c
= getc()) != EOF
) {
271 case '\"': return true;
272 case '\\': escapeChar(str
); break;
273 default: str
.push_back(c
); break;
276 error("EOF in string literal");
282 * Reads a python-style longstring, or returns false if we don't
283 * have one. Does not skip any whitespace before looking for the
286 * Python longstrings start with \"\"\", and can contain any bytes
287 * other than \"\"\". A '\\' character introduces C-style escapes,
288 * but there's no need to escape single quote characters.
290 bool readLongString(std::vector
<char>& buffer
) {
291 if (peek() != '\"') return false;
293 if (peek() != '\"') { ungetc('\"'); return false; }
295 if (peek() != '\"') { ungetc('\"');
296 ungetc('\"'); return false; }
300 while ((c
= getc()) != EOF
) {
308 buffer
.push_back('"');
314 buffer
.push_back('"');
315 buffer
.push_back('"');
324 error("EOF in \"\"\"-string literal");
329 // Skips whitespace (including newlines and comments).
330 void skipWhitespace() {
332 skipPred(boost::is_any_of(" \t\n"));
334 skipPred(!boost::is_any_of("\n"));
342 // Skip spaces and tabs, but other whitespace (such as comments or
343 // newlines) stop the skip.
344 void skipSpaceTab() {
345 skipPred(boost::is_any_of(" \t"));
348 template<class Predicate
>
349 void skipPred(Predicate pred
) {
351 while (pred(c
= peek())) { getc(); }
354 template<class Predicate
, class OutputIterator
>
355 void consumePred(Predicate pred
, OutputIterator out
) {
357 while (pred(c
= peek())) { *out
++ = getc(); }
362 bool operator()(int i
) const {
363 return isalnum(i
) || i
== '_' || i
== '.' || i
== '$';
367 void error(const std::string
& what
) {
368 throw Error(getLineNumber(), what
);
371 void io_error_if_bad() {
373 error("I/O error reading stream: " +
374 folly::errnoStr(errno
).toStdString());
387 StackDepth
* stackDepth
;
392 * Tracks the depth of the stack in a given block of instructions.
394 * This structure is linked to a block of instructions (usually starting at a
395 * label), and tracks the current stack depth in this block. This tracking can
397 * - Absolute depth: the depth of the stack is exactly known for this block
398 * - Relative depth: the depth of the stack is unknown for now. We keep track
399 * of an offset, relative to the depth of the stack at the first instruction
407 folly::Optional
<int> baseValue
;
410 * During the parsing process, when a Jmp instruction is encountered, the
411 * StackDepth structure for this jump becomes linked to the StackDepth
412 * structure of the label (which is added to the listeners list).
414 * Once the absolute depth at the jump becomes known, its StackDepth
415 * instance calls the setBase method of the StackDepth instance of the label.
416 * The absolute depth at the label can then be inferred from the
417 * absolute depth at the jump.
419 std::vector
<std::pair
<StackDepth
*, int> > listeners
;
427 void adjust(AsmState
& as
, int delta
);
428 void addListener(AsmState
& as
, StackDepth
* target
);
429 void setBase(AsmState
& as
, int stackDepth
);
432 * Sets the baseValue such as the current stack depth matches the
435 * If the base value is already known, it may conflict with the
436 * parameter of this function. In this case, an error will be raised.
438 void setCurrentAbsolute(AsmState
& as
, int stackDepth
);
442 Label() : bound(false) {}
446 StackDepth stackDepth
;
449 * Each label source source has an Offset where the jmp should be
450 * patched up is, and an Offset from which the jump delta should be
451 * computed. (The second Offset is basically to the actual
452 * jump/switch/etc instruction, while the first points to the
455 typedef std::vector
<std::pair
<Offset
,Offset
> > SourcesVec
;
459 * List of a parameter ids that use this label for its DV
462 std::vector
<Id
> dvInits
;
465 * List of EHEnt's that should have m_fault bound to the Offset of
468 std::vector
<size_t> ehFaults
;
471 * Map from exception names to the list of EHEnt's that have a catch
472 * block jumping to this label for that name.
474 typedef std::map
<std::string
,std::vector
<size_t> > CatchesMap
;
475 CatchesMap ehCatches
;
478 struct AsmState
: private boost::noncopyable
{
479 explicit AsmState(std::istream
& in
)
481 , emittedPseudoMain(false)
484 , currentStackDepth(&initStackDepth
)
489 currentStackDepth
->setBase(*this, 0);
492 void error(const std::string
& what
) {
493 throw Error(in
.getLineNumber(), what
);
496 void adjustStack(int delta
) {
497 if (currentStackDepth
== nullptr) {
498 // Instruction is unreachable, nothing to do here!
502 currentStackDepth
->adjust(*this, delta
);
505 void adjustStackHighwater(int depth
) {
506 stackHighWater
= std::max(stackHighWater
, depth
);
509 std::string
displayStackDepth() {
510 std::ostringstream stack
;
512 if (currentStackDepth
== nullptr) {
514 } else if (currentStackDepth
->baseValue
) {
515 stack
<< *currentStackDepth
->baseValue
+
516 currentStackDepth
->currentOffset
;
518 stack
<< "?" << currentStackDepth
->currentOffset
;
524 void addLabelTarget(const std::string
& name
) {
525 Label
& label
= labelMap
[name
];
527 error("Duplicate label " + name
);
530 label
.target
= ue
->bcPos();
532 StackDepth
* newStack
= &label
.stackDepth
;
534 if (currentStackDepth
== nullptr) {
535 // Previous instruction was unreachable
536 currentStackDepth
= newStack
;
540 // The stack depth at the label depends on the current depth
541 currentStackDepth
->addListener(*this, newStack
);
542 currentStackDepth
= newStack
;
545 void addLabelJump(const std::string
& name
, Offset immOff
, Offset opcodeOff
) {
546 Label
& label
= labelMap
[name
];
548 if (currentStackDepth
== nullptr) {
549 // Jump is unreachable, nothing to do here
553 // The stack depth at the target must be the same as the current depth
554 // (whatever this may be: it may still be unknown)
555 currentStackDepth
->addListener(*this, &label
.stackDepth
);
557 label
.sources
.push_back(std::make_pair(immOff
, opcodeOff
));
560 void enforceStackDepth(int stackDepth
) {
561 if (currentStackDepth
== nullptr) {
562 // Current instruction is unreachable, thus the constraint
563 // on the stack depth will never be violated
567 currentStackDepth
->setCurrentAbsolute(*this, stackDepth
);
570 void enterUnreachableRegion() {
571 currentStackDepth
= nullptr;
574 void addLabelDVInit(const std::string
& name
, int paramId
) {
575 labelMap
[name
].dvInits
.push_back(paramId
);
577 // Stack depth should be 0 when entering a DV init
578 labelMap
[name
].stackDepth
.setBase(*this, 0);
581 void addLabelEHFault(const std::string
& name
, size_t ehIdx
) {
582 labelMap
[name
].ehFaults
.push_back(ehIdx
);
584 // Stack depth should be 0 when entering a fault funclet
585 labelMap
[name
].stackDepth
.setBase(*this, 0);
588 void addLabelEHCatch(const std::string
& what
,
589 const std::string
& label
,
591 labelMap
[label
].ehCatches
[what
].push_back(ehIdx
);
593 // Stack depth should be 0 when entering a catch block
594 labelMap
[label
].stackDepth
.setBase(*this, 0);
597 void beginFpi(Offset fpushOff
) {
598 if (currentStackDepth
== nullptr) {
599 error("beginFpi called from unreachable instruction");
602 fpiRegs
.push_back(FPIReg());
603 FPIReg
& fpi
= fpiRegs
.back();
604 fpi
.fpushOff
= fpushOff
;
605 fpi
.stackDepth
= currentStackDepth
;
606 fpi
.fpOff
= currentStackDepth
->currentOffset
;
607 fdescDepth
+= kNumActRecCells
;
608 fdescHighWater
= std::max(fdescDepth
, fdescHighWater
);
612 assert(!fpiRegs
.empty());
614 FPIEnt
& ent
= fe
->addFPIEnt();
615 FPIReg
& reg
= fpiRegs
.back();
616 ent
.m_fpushOff
= reg
.fpushOff
;
617 ent
.m_fcallOff
= ue
->bcPos();
618 ent
.m_fpOff
= reg
.fpOff
;
619 if (reg
.stackDepth
->baseValue
) {
620 ent
.m_fpOff
+= *reg
.stackDepth
->baseValue
;
622 // base value still unknown, this will need to be updated later
623 fpiToUpdate
.push_back(std::make_pair(&ent
, reg
.stackDepth
));
627 fdescDepth
-= kNumActRecCells
;
635 void patchLabelOffsets(const Label
& label
) {
636 for (Label::SourcesVec::const_iterator it
= label
.sources
.begin();
637 it
!= label
.sources
.end();
639 ue
->emitInt32(label
.target
- it
->second
, it
->first
);
642 for (std::vector
<Id
>::const_iterator it
= label
.dvInits
.begin();
643 it
!= label
.dvInits
.end();
645 fe
->params
[*it
].funcletOff
= label
.target
;
648 for (std::vector
<size_t>::const_iterator it
= label
.ehFaults
.begin();
649 it
!= label
.ehFaults
.end();
651 fe
->ehtab
[*it
].m_fault
= label
.target
;
654 for (Label::CatchesMap::const_iterator it
= label
.ehCatches
.begin();
655 it
!= label
.ehCatches
.end();
657 Id exId
= ue
->mergeLitstr(makeStaticString(it
->first
));
658 for (std::vector
<size_t>::const_iterator idx_it
= it
->second
.begin();
659 idx_it
!= it
->second
.end();
661 fe
->ehtab
[*idx_it
].m_catches
.push_back(
662 std::make_pair(exId
, label
.target
));
667 void finishFunction() {
668 for (LabelMap::const_iterator it
= labelMap
.begin();
669 it
!= labelMap
.end();
671 if (!it
->second
.bound
) {
672 error("Undefined label " + it
->first
);
674 if (it
->second
.target
>= ue
->bcPos()) {
675 error("label " + it
->first
+ " falls of the end of the function");
678 patchLabelOffsets(it
->second
);
681 // Patch the FPI structures
682 for (auto& kv
: fpiToUpdate
) {
683 if (!kv
.second
->baseValue
) {
684 error("created a FPI from an unreachable instruction");
687 kv
.first
->m_fpOff
+= *kv
.second
->baseValue
;
690 // Stack depth should be 0 at the end of a function body
691 enforceStackDepth(0);
693 fe
->maxStackCells
= fe
->numLocals() +
694 fe
->numIterators() * kNumIterCells
+
696 fdescHighWater
; // in units of cells already
697 fe
->finish(ue
->bcPos(), false);
698 ue
->recordFunction(fe
);
704 initStackDepth
= StackDepth();
705 initStackDepth
.setBase(*this, 0);
706 currentStackDepth
= &initStackDepth
;
713 int getLocalId(const std::string
& name
) {
714 if (name
[0] != '$') {
715 error("local variables must be prefixed with $");
718 const StringData
* sd
= makeStaticString(name
.c_str() + 1);
720 return fe
->lookupVarId(sd
);
723 int getIterId(int32_t id
) {
724 if (id
>= fe
->numIterators()) {
725 error("iterator id exceeded number of iterators in the function");
732 bool emittedPseudoMain
;
734 typedef std::map
<std::string
,ArrayData
*> ADataMap
;
737 // When inside a class, this state is active.
738 PreClassEmitter
* pce
;
740 // When we're doing a function or method body, this state is active.
742 std::vector
<FPIReg
> fpiRegs
;
743 typedef std::map
<std::string
,Label
> LabelMap
;
744 std::map
<std::string
,Label
> labelMap
;
746 StackDepth initStackDepth
;
747 StackDepth
* currentStackDepth
;
751 std::vector
<std::pair
<FPIEnt
*, StackDepth
*> > fpiToUpdate
;
755 void StackDepth::adjust(AsmState
& as
, int delta
) {
756 currentOffset
+= delta
;
759 // The absolute stack depth is unknown. We only store the min
760 // and max offsets, and we will take a decision later, when the
761 // base value will be known.
762 maxOffset
= std::max(currentOffset
, maxOffset
);
763 if (currentOffset
< minOffset
) {
764 minOffsetLine
= as
.in
.getLineNumber();
765 minOffset
= currentOffset
;
770 if (*baseValue
+ currentOffset
< 0) {
771 as
.error("opcode sequence caused stack depth to go negative");
774 as
.adjustStackHighwater(*baseValue
+ currentOffset
);
777 void StackDepth::addListener(AsmState
& as
, StackDepth
* target
) {
779 target
->setBase(as
, *baseValue
+ currentOffset
);
781 listeners
.push_back(std::make_pair(target
, currentOffset
));
785 void StackDepth::setBase(AsmState
& as
, int stackDepth
) {
786 if (baseValue
&& stackDepth
!= *baseValue
) {
787 as
.error("stack depth do not match");
790 baseValue
= stackDepth
;
792 // We finally know the base value. Update AsmState accordingly.
793 if (*baseValue
+ minOffset
< 0) {
796 "opcode sequence caused stack depth to go negative"
799 as
.adjustStackHighwater(*baseValue
+ maxOffset
);
801 // Update the listeners
802 for (auto& kv
: listeners
) {
803 kv
.first
->setBase(as
, *baseValue
+ kv
.second
);
806 // We won't need them anymore
810 void StackDepth::setCurrentAbsolute(AsmState
& as
, int stackDepth
) {
811 setBase(as
, stackDepth
- currentOffset
);
814 //////////////////////////////////////////////////////////////////////
817 * Opcode arguments must be on the same line as the opcode itself,
818 * although certain argument types may contain internal newlines (see,
819 * for example, read_immvector, read_jmpvector, or string literals).
821 template<class Target
> Target
read_opcode_arg(AsmState
& as
) {
822 as
.in
.skipSpaceTab();
824 as
.in
.consumePred(!boost::is_any_of(" \t\n#;>"),
825 std::back_inserter(strVal
));
826 if (strVal
.empty()) {
827 as
.error("expected opcode or directive argument");
830 return boost::lexical_cast
<Target
>(strVal
);
831 } catch (boost::bad_lexical_cast
&) {
832 as
.error("couldn't convert input argument (" + strVal
+ ") to "
838 template<class SubOpType
>
839 uint8_t read_subop(AsmState
& as
) {
840 auto const str
= read_opcode_arg
<std::string
>(as
);
841 if (auto const ty
= nameToSubop
<SubOpType
>(str
.c_str())) {
842 return static_cast<uint8_t>(*ty
);
844 as
.error("unknown subop name");
848 const StringData
* read_litstr(AsmState
& as
) {
849 as
.in
.skipSpaceTab();
851 if (!as
.in
.readQuotedStr(strVal
)) {
852 as
.error("expected quoted string literal");
854 return makeStaticString(strVal
);
857 std::vector
<std::string
> read_strvector(AsmState
& as
) {
858 std::vector
<std::string
> ret
;
859 as
.in
.skipSpaceTab();
862 while (as
.in
.skipSpaceTab(), as
.in
.readQuotedStr(name
)) {
865 as
.in
.skipSpaceTab();
870 ArrayData
* read_litarray(AsmState
& as
) {
871 as
.in
.skipSpaceTab();
872 if (as
.in
.getc() != '@') {
873 as
.error("expecting an `@foo' array literal reference");
876 if (!as
.in
.readword(name
)) {
877 as
.error("expected name of .adata literal");
880 AsmState::ADataMap::const_iterator it
= as
.adataMap
.find(name
);
881 if (it
== as
.adataMap
.end()) {
882 as
.error("unknown array data literal name " + name
);
887 void read_immvector_immediate(AsmState
& as
, std::vector
<unsigned char>& ret
,
888 MemberCode mcode
= InvalidMemberCode
) {
889 if (memberCodeImmIsLoc(mcode
) || mcode
== InvalidMemberCode
) {
890 if (as
.in
.getc() != '$') {
891 as
.error("*L member code in vector immediate must be followed by "
892 "a local variable name");
895 if (!as
.in
.readword(name
)) {
896 as
.error("couldn't read name for local variable in vector immediate");
898 encodeIvaToVector(ret
, as
.getLocalId("$" + name
));
899 } else if (memberCodeImmIsString(mcode
)) {
900 encodeToVector
<int32_t>(ret
, as
.ue
->mergeLitstr(read_litstr(as
)));
901 } else if (memberCodeImmIsInt(mcode
)) {
902 encodeToVector
<int64_t>(ret
, read_opcode_arg
<int64_t>(as
));
904 as
.error(std::string("don't understand immediate for member code ") +
905 memberCodeString(mcode
));
909 std::vector
<unsigned char> read_immvector(AsmState
& as
, int& stackCount
) {
910 std::vector
<unsigned char> ret
;
912 as
.in
.skipSpaceTab();
916 if (!as
.in
.readword(word
)) {
917 as
.error("expected location code in immediate vector");
920 LocationCode lcode
= parseLocationCode(word
.c_str());
921 if (lcode
== InvalidLocationCode
) {
922 as
.error("expected location code, saw `" + word
+ "'");
924 ret
.push_back(uint8_t(lcode
));
925 if (word
[word
.size() - 1] == 'L') {
926 if (as
.in
.getc() != ':') {
927 as
.error("expected `:' after location code `" + word
+ "'");
930 for (int i
= 0; i
< numLocationCodeImms(lcode
); ++i
) {
931 read_immvector_immediate(as
, ret
);
933 stackCount
= numLocationCodeStackVals(lcode
);
935 // Read all the member entries.
937 as
.in
.skipWhitespace();
938 if (as
.in
.peek() == '>') { as
.in
.getc(); break; }
940 if (!as
.in
.readword(word
)) {
941 as
.error("expected member code in immediate vector");
943 MemberCode mcode
= parseMemberCode(word
.c_str());
944 if (mcode
== InvalidMemberCode
) {
945 as
.error("unrecognized member code `" + word
+ "'");
947 ret
.push_back(uint8_t(mcode
));
948 if (memberCodeHasImm(mcode
)) {
949 if (as
.in
.getc() != ':') {
950 as
.error("expected `:' after member code `" + word
+ "'");
952 read_immvector_immediate(as
, ret
, mcode
);
953 } else if (mcode
!= MW
) {
961 RepoAuthType
read_repo_auth_type(AsmState
& as
) {
962 auto const str
= read_opcode_arg
<std::string
>(as
);
963 folly::StringPiece
parse(str
);
966 * Note: no support for reading array types. (The assembler only
967 * emits a single unit, so it can't really be involved in creating a
971 using T
= RepoAuthType::Tag
;
973 #define X(what, tag) \
974 if (parse.startsWith(what)) return RepoAuthType{tag}
976 #define Y(what, tag) \
977 if (parse.startsWith(what)) { \
978 parse.removePrefix(what); \
979 auto const cls = makeStaticString(parse.data()); \
980 as.ue->mergeLitstr(cls); \
981 return RepoAuthType{tag, cls}; \
984 Y("Obj=", T::ExactObj
);
985 Y("?Obj=", T::OptExactObj
);
986 Y("?Obj<=", T::OptSubObj
);
987 Y("Obj<=", T::SubObj
);
990 X("?Arr", T::OptArr
);
992 X("?Bool", T::OptBool
);
995 X("?Dbl", T::OptDbl
);
997 X("InitCell", T::InitCell
);
998 X("InitGen", T::InitGen
);
999 X("InitNull", T::InitNull
);
1000 X("InitUnc", T::InitUnc
);
1002 X("?Int", T::OptInt
);
1005 X("?Obj", T::OptObj
);
1007 X("?Res", T::OptRes
);
1009 X("?SArr", T::OptSArr
);
1011 X("?SStr", T::OptSStr
);
1013 X("?Str", T::OptStr
);
1016 X("Uninit", T::Uninit
);
1021 // Make sure the above parsing code is revisited when new tags are
1022 // added (we'll get a warning for a missing case label):
1023 if (debug
) switch (RepoAuthType
{}.tag()) {
1054 case T::OptExactObj
:
1059 as
.error("unrecognized RepoAuthType format");
1063 // Read in a vector of iterators the format for this vector is:
1064 // <(TYPE) ID, (TYPE) ID, ...>
1065 // Where TYPE := Iter | MIter | CIter
1066 // and ID := Integer
1067 std::vector
<uint32_t> read_itervec(AsmState
& as
) {
1068 std::vector
<uint32_t> ret
;
1070 as
.in
.skipSpaceTab();
1076 as
.in
.expectWs('(');
1077 if (!as
.in
.readword(word
)) as
.error("Was expecting Iterator type.");
1078 if (!word
.compare("Iter")) ret
.push_back(KindOfIter
);
1079 else if (!word
.compare("MIter")) ret
.push_back(KindOfMIter
);
1080 else if (!word
.compare("CIter")) ret
.push_back(KindOfCIter
);
1081 else as
.error("Unknown iterator type `" + word
+ "'");
1082 as
.in
.expectWs(')');
1084 as
.in
.skipSpaceTab();
1086 if (!as
.in
.readword(word
)) as
.error("Was expecting iterator id.");
1087 uint32_t iterId
= folly::to
<uint32_t>(word
);
1088 ret
.push_back(iterId
);
1090 if (!isdigit(word
.back())) {
1091 if (word
.back() == '>') break;
1092 if (word
.back() != ',') as
.error("Was expecting `,'.");
1094 as
.in
.skipSpaceTab();
1095 if (as
.in
.peek() == '>') { as
.in
.getc(); break; }
1103 // Jump tables are lists of labels.
1104 std::vector
<std::string
> read_jmpvector(AsmState
& as
) {
1105 std::vector
<std::string
> ret
;
1107 as
.in
.skipSpaceTab();
1111 while (as
.in
.readword(word
)) {
1112 ret
.push_back(word
);
1114 as
.in
.expectWs('>');
1119 typedef std::vector
<std::pair
<Id
, std::string
>> SSwitchJmpVector
;
1121 SSwitchJmpVector
read_sswitch_jmpvector(AsmState
& as
) {
1122 SSwitchJmpVector ret
;
1124 as
.in
.skipSpaceTab();
1127 std::string defLabel
;
1129 std::string caseStr
;
1130 if (!as
.in
.readQuotedStr(caseStr
)) {
1131 as
.error("expected quoted string literal");
1136 as
.in
.readword(defLabel
);
1138 ret
.push_back(std::make_pair(
1139 as
.ue
->mergeLitstr(makeStaticString(caseStr
)),
1143 as
.in
.skipWhitespace();
1144 } while (as
.in
.peek() != '-');
1148 as
.in
.readword(defLabel
);
1150 // -1 stand for default case.
1151 ret
.push_back(std::make_pair(-1, defLabel
));
1158 //////////////////////////////////////////////////////////////////////
1160 typedef std::map
<std::string
,ParserFunc
> OpcodeParserMap
;
1161 OpcodeParserMap opcode_parsers
;
1164 #define IMM_ONE(t) IMM_##t
1165 #define IMM_TWO(t1, t2) IMM_##t1; IMM_##t2
1166 #define IMM_THREE(t1, t2, t3) IMM_##t1; IMM_##t2; IMM_##t3
1167 #define IMM_FOUR(t1, t2, t3, t4) IMM_##t1; IMM_##t2; IMM_##t3; IMM_##t4
1169 // FCall and NewPackedArray need to know the the first imm do POP_*MANY.
1170 #define IMM_IVA do { \
1171 int imm = read_opcode_arg<int64_t>(as); \
1172 as.ue->emitIVA(imm); \
1173 if (immIVA < 0) immIVA = imm; \
1177 std::vector<std::string> vecImm = read_strvector(as); \
1178 auto const vecImmStackValues = vecImm.size(); \
1179 as.ue->emitInt32(vecImmStackValues); \
1180 for (size_t i = 0; i < vecImmStackValues; ++i) { \
1181 as.ue->emitInt32(as.ue->mergeLitstr(String(vecImm[i]).get())); \
1184 #define IMM_SA as.ue->emitInt32(as.ue->mergeLitstr(read_litstr(as)))
1185 #define IMM_RATA encodeRAT(*as.ue, read_repo_auth_type(as))
1186 #define IMM_I64A as.ue->emitInt64(read_opcode_arg<int64_t>(as))
1187 #define IMM_DA as.ue->emitDouble(read_opcode_arg<double>(as))
1188 #define IMM_LA as.ue->emitIVA(as.getLocalId( \
1189 read_opcode_arg<std::string>(as)))
1190 #define IMM_IA as.ue->emitIVA(as.getIterId( \
1191 read_opcode_arg<int32_t>(as)))
1192 #define IMM_OA(ty) as.ue->emitByte(read_subop<ty>(as));
1193 #define IMM_AA as.ue->emitInt32(as.ue->mergeArray(read_litarray(as)))
1196 * There can currently be no more than one immvector per instruction,
1197 * and we need access to the size of the immediate vector for
1198 * NUM_POP_*, so the member vector guy exposes a vecImmStackValues
1202 int vecImmStackValues = 0; \
1203 auto vecImm = read_immvector(as, vecImmStackValues); \
1204 as.ue->emitInt32(vecImm.size()); \
1205 as.ue->emitInt32(vecImmStackValues); \
1206 for (size_t i = 0; i < vecImm.size(); ++i) { \
1207 as.ue->emitByte(vecImm[i]); \
1210 #define IMM_ILA do { \
1211 std::vector<uint32_t> vecImm = read_itervec(as); \
1212 as.ue->emitInt32(vecImm.size() / 2); \
1213 for (auto& i : vecImm) { \
1214 as.ue->emitInt32(i); \
1218 #define IMM_BLA do { \
1219 std::vector<std::string> vecImm = read_jmpvector(as); \
1220 as.ue->emitInt32(vecImm.size()); \
1221 for (size_t i = 0; i < vecImm.size(); ++i) { \
1222 labelJumps.push_back( \
1223 std::make_pair(vecImm[i], as.ue->bcPos())); \
1224 as.ue->emitInt32(0); /* to be patched */ \
1228 #define IMM_SLA do { \
1229 SSwitchJmpVector vecImm = read_sswitch_jmpvector(as); \
1230 as.ue->emitInt32(vecImm.size()); \
1231 for (auto const& pair : vecImm) { \
1232 as.ue->emitInt32(pair.first); \
1233 labelJumps.push_back( \
1234 std::make_pair(pair.second, as.ue->bcPos())); \
1235 as.ue->emitInt32(0); /* to be patched */ \
1239 #define IMM_BA do { \
1240 labelJumps.push_back(std::make_pair( \
1241 read_opcode_arg<std::string>(as), \
1244 as.ue->emitInt32(0); \
1247 #define NUM_PUSH_NOV 0
1248 #define NUM_PUSH_ONE(a) 1
1249 #define NUM_PUSH_TWO(a,b) 2
1250 #define NUM_PUSH_THREE(a,b,c) 3
1251 #define NUM_PUSH_INS_1(a) 1
1252 #define NUM_PUSH_INS_2(a) 1
1253 #define NUM_POP_NOV 0
1254 #define NUM_POP_ONE(a) 1
1255 #define NUM_POP_TWO(a,b) 2
1256 #define NUM_POP_THREE(a,b,c) 3
1257 #define NUM_POP_MMANY vecImmStackValues
1258 #define NUM_POP_V_MMANY (1 + vecImmStackValues)
1259 #define NUM_POP_R_MMANY (1 + vecImmStackValues)
1260 #define NUM_POP_C_MMANY (1 + vecImmStackValues)
1261 #define NUM_POP_FMANY immIVA /* number of arguments */
1262 #define NUM_POP_CVMANY immIVA /* number of arguments */
1263 #define NUM_POP_CVUMANY immIVA /* number of arguments */
1264 #define NUM_POP_CMANY immIVA /* number of arguments */
1265 #define NUM_POP_SMANY vecImmStackValues
1267 #define O(name, imm, pop, push, flags) \
1268 void parse_opcode_##name(AsmState& as) { \
1269 UNUSED int64_t immIVA = -1; \
1270 UNUSED auto const thisOpcode = Op::name; \
1271 UNUSED const Offset curOpcodeOff = as.ue->bcPos(); \
1272 std::vector<std::pair<std::string, Offset> > labelJumps; \
1277 as.in.getLineNumber(), \
1278 as.displayStackDepth().c_str(), \
1282 if (isFCallStar(Op##name)) { \
1286 as.ue->emitOp(Op##name); \
1290 int stackDelta = NUM_PUSH_##push - NUM_POP_##pop; \
1291 as.adjustStack(stackDelta); \
1293 if (isFPush(Op##name)) { \
1294 as.beginFpi(curOpcodeOff); \
1297 for (auto& kv : labelJumps) { \
1298 as.addLabelJump(kv.first, kv.second, curOpcodeOff); \
1301 /* Stack depth should be 0 after RetC or RetV. */ \
1302 if (thisOpcode == OpRetC || thisOpcode == OpRetV) { \
1303 as.enforceStackDepth(0); \
1306 /* Stack depth should be 1 after resume from suspend. */ \
1307 if (thisOpcode == OpCreateCont || thisOpcode == OpAwait || \
1308 thisOpcode == OpYield || thisOpcode == OpYieldK) { \
1309 as.enforceStackDepth(1); \
1312 if (instrFlags(thisOpcode) & InstrFlags::TF) { \
1313 as.enterUnreachableRegion(); \
1338 #undef NUM_PUSH_THREE
1339 #undef NUM_PUSH_POS_N
1340 #undef NUM_PUSH_INS_1
1344 #undef NUM_POP_THREE
1345 #undef NUM_POP_POS_N
1346 #undef NUM_POP_MMANY
1347 #undef NUM_POP_V_MMANY
1348 #undef NUM_POP_R_MMANY
1349 #undef NUM_POP_C_MMANY
1350 #undef NUM_POP_FMANY
1351 #undef NUM_POP_CVMANY
1352 #undef NUM_POP_CVUMANY
1353 #undef NUM_POP_CMANY
1354 #undef NUM_POP_SMANY
1356 void initialize_opcode_map() {
1357 #define O(name, imm, pop, push, flags) \
1358 opcode_parsers[#name] = parse_opcode_##name;
1363 struct Initializer
{
1364 Initializer() { initialize_opcode_map(); }
1367 //////////////////////////////////////////////////////////////////////
1370 * long-string-literal: <string>
1372 * `long-string-literal' is a python-style longstring. See
1373 * readLongString for more details.
1375 String
parse_long_string(AsmState
& as
) {
1376 as
.in
.skipWhitespace();
1378 std::vector
<char> buffer
;
1379 if (!as
.in
.readLongString(buffer
)) {
1380 as
.error("expected \"\"\"-string of serialized php data");
1382 if (buffer
.empty()) {
1383 as
.error("empty php serialized data is not a valid php object");
1386 // String wants a null, and dereferences one past the size we give
1388 buffer
.push_back('\0');
1389 return String(&buffer
[0], buffer
.size() - 1, CopyString
);
1393 * php-serialized : long-string-literal
1396 * `long-string-literal' is a python-style longstring. See
1397 * readLongString for more details.
1399 * Returns a Variant representing the serialized data. It's up to the
1400 * caller to make sure it is a legal literal.
1402 Variant
parse_php_serialized(AsmState
& as
) {
1403 return unserialize_from_string(parse_long_string(as
));
1407 * directive-numiters : integer ';'
1410 void parse_numiters(AsmState
& as
) {
1411 if (as
.numItersSet
) {
1412 as
.error("only one .numiters directive may appear in a given function");
1414 int32_t count
= read_opcode_arg
<int32_t>(as
);
1415 as
.numItersSet
= true;
1416 as
.fe
->setNumIterators(count
);
1417 as
.in
.expectWs(';');
1420 void parse_function_body(AsmState
&, int nestLevel
= 0);
1423 * directive-fault : identifier integer? '{' function-body
1426 void parse_fault(AsmState
& as
, int nestLevel
) {
1427 const Offset start
= as
.ue
->bcPos();
1430 if (!as
.in
.readword(label
)) {
1431 as
.error("expected label name after .try_fault");
1434 as
.in
.skipWhitespace();
1435 if (as
.in
.peek() != '{') {
1436 iterId
= read_opcode_arg
<int32_t>(as
);
1438 as
.in
.expectWs('{');
1439 parse_function_body(as
, nestLevel
+ 1);
1441 EHEnt
& eh
= as
.fe
->addEHEnt();
1442 eh
.m_type
= EHEnt::Type::Fault
;
1444 eh
.m_past
= as
.ue
->bcPos();
1445 eh
.m_iterId
= iterId
;
1447 as
.addLabelEHFault(label
, as
.fe
->ehtab
.size() - 1);
1451 * directive-catch : catch-spec+ '{' function-body
1454 * catch-spec : '(' identifier identifier ')'
1457 void parse_catch(AsmState
& as
, int nestLevel
) {
1458 const Offset start
= as
.ue
->bcPos();
1460 std::vector
<std::pair
<std::string
,std::string
> > catches
;
1461 size_t numCatches
= 0;
1462 as
.in
.skipWhitespace();
1463 for (; as
.in
.peek() == '('; ++numCatches
) {
1466 std::string except
, label
;
1467 if (!as
.in
.readword(except
) || !as
.in
.readword(label
)) {
1468 as
.error("expected (ExceptionType label) after .try_catch");
1471 as
.in
.expectWs(')');
1473 catches
.push_back(std::make_pair(except
, label
));
1474 as
.in
.skipWhitespace();
1476 if (catches
.empty()) {
1477 as
.error("expected at least one (ExceptionType label) pair "
1478 "after .try_catch");
1482 parse_function_body(as
, nestLevel
+ 1);
1484 EHEnt
& eh
= as
.fe
->addEHEnt();
1485 eh
.m_type
= EHEnt::Type::Catch
;
1487 eh
.m_past
= as
.ue
->bcPos();
1490 for (size_t i
= 0; i
< catches
.size(); ++i
) {
1491 as
.addLabelEHCatch(catches
[i
].first
,
1493 as
.fe
->ehtab
.size() - 1);
1498 * function-body : fbody-line* '}'
1501 * fbody-line : ".numiters" directive-numiters
1502 * | ".try_fault" directive-fault
1503 * | ".try_catch" directive-catch
1508 * label-name : identifier ':'
1511 * opcode-line : opcode-mnemonic <junk that depends on opcode> '\n'
1514 void parse_function_body(AsmState
& as
, int nestLevel
/* = 0 */) {
1517 as
.in
.skipWhitespace();
1518 if (as
.in
.peek() == '}') {
1521 as
.finishFunction();
1526 if (!as
.in
.readword(word
)) {
1527 as
.error("unexpected directive or opcode line in function body");
1529 if (word
[0] == '.') {
1530 if (word
== ".numiters") { parse_numiters(as
); continue; }
1531 if (word
== ".try_fault") { parse_fault(as
, nestLevel
); continue; }
1532 if (word
== ".try_catch") { parse_catch(as
, nestLevel
); continue; }
1533 as
.error("unrecognized directive `" + word
+ "' in function");
1535 if (as
.in
.peek() == ':') {
1537 as
.addLabelTarget(word
);
1541 // Ok, it better be an opcode now.
1542 OpcodeParserMap::const_iterator it
= opcode_parsers
.find(word
);
1543 if (it
== opcode_parsers
.end()) {
1544 as
.error("unrecognized opcode `" + word
+ "'");
1548 as
.in
.skipSpaceTab();
1549 if (as
.in
.peek() != '\n' && as
.in
.peek() != '#' && as
.in
.peek() != EOF
) {
1550 as
.error("too many arguments for opcode `" + word
+ "'");
1556 * attribute-list : empty
1557 * | '[' attribute-name* ']'
1560 * The `attribute-name' rule is context-sensitive; see as-shared.cpp.
1562 Attr
parse_attribute_list(AsmState
& as
, AttrContext ctx
) {
1563 as
.in
.skipWhitespace();
1565 if (ctx
== AttrContext::Class
|| ctx
== AttrContext::Func
) {
1566 if (!SystemLib::s_inited
) {
1567 ret
|= AttrUnique
| AttrPersistent
| AttrBuiltin
;
1570 if (as
.in
.peek() != '[') return Attr(ret
);
1575 as
.in
.skipWhitespace();
1576 if (as
.in
.peek() == ']') break;
1577 if (!as
.in
.readword(word
)) break;
1579 auto const abit
= string_to_attr(ctx
, word
);
1585 as
.error("unrecognized attribute `" + word
+ "' in this context");
1592 * parameter-list : '(' param-name-list ')'
1595 * param-name-list : empty
1596 * | param-name ',' param-name-list
1599 * param-name : '$' identifier dv-initializer
1600 * | '&' '$' identifier dv-initializer
1603 * dv-initializer : empty
1604 * | '=' identifier arg-default
1607 * arg-default : empty
1608 * | '(' long-string-literal ')'
1611 void parse_parameter_list(AsmState
& as
) {
1612 as
.in
.skipWhitespace();
1613 if (as
.in
.peek() != '(') return;
1616 // Once we see one dv-initializer, every parameter after that must
1617 // have a dv-initializer.
1618 bool inDVInits
= false;
1621 FuncEmitter::ParamInfo param
;
1623 as
.in
.skipWhitespace();
1624 int ch
= as
.in
.getc();
1625 if (ch
== ')') break; // allow empty param lists
1627 if (as
.in
.getc() != '.' ||
1628 as
.in
.getc() != '.') {
1629 as
.error("expecting '...'");
1631 as
.in
.expectWs(')');
1632 as
.fe
->attrs
|= AttrMayUseVV
;
1640 as
.error("function parameters must have a $ prefix");
1643 if (!as
.in
.readword(name
)) {
1644 as
.error("expected parameter name after $");
1647 as
.in
.skipWhitespace();
1653 if (!as
.in
.readword(label
)) {
1654 as
.error("expected label name for dv-initializer");
1656 as
.addLabelDVInit(label
, as
.fe
->params
.size());
1658 as
.in
.skipWhitespace();
1661 String str
= parse_long_string(as
);
1662 param
.phpCode
= makeStaticString(str
);
1665 if (str
.size() == 4) {
1666 if (!strcasecmp("null", str
.data())) {
1668 } else if (!strcasecmp("true", str
.data())) {
1669 tv
= make_tv
<KindOfBoolean
>(true);
1671 } else if (str
.size() == 5 && !strcasecmp("false", str
.data())) {
1672 tv
= make_tv
<KindOfBoolean
>(false);
1674 if (tv
.m_type
!= KindOfUninit
) {
1675 param
.defaultValue
= tv
;
1677 as
.in
.expectWs(')');
1678 as
.in
.skipWhitespace();
1683 as
.error("all parameters after the first with a dv-initializer "
1684 "must have a dv-initializer");
1688 as
.fe
->appendParam(makeStaticString(name
), param
);
1690 if (ch
== ')') break;
1691 if (ch
!= ',') as
.error("expected , between parameter names");
1695 void parse_function_flags(AsmState
& as
) {
1696 as
.in
.skipWhitespace();
1699 if (as
.in
.peek() == '{') break;
1700 if (!as
.in
.readword(flag
)) break;
1702 if (flag
== "isGenerator") {
1703 as
.fe
->isGenerator
= true;
1704 } else if (flag
== "isAsync") {
1705 as
.fe
->isAsync
= true;
1706 } else if (flag
== "isClosureBody") {
1707 as
.fe
->isClosureBody
= true;
1708 } else if (flag
== "isPairGenerator") {
1709 as
.fe
->isPairGenerator
= true;
1711 as
.error("Unexpected function flag \"" + flag
+ "\"");
1717 * directive-function : attribute-list identifier parameter-list function-flags
1721 void parse_function(AsmState
& as
) {
1722 if (!as
.emittedPseudoMain
) {
1723 as
.error(".function blocks must all follow the .main block");
1726 Attr attrs
= parse_attribute_list(as
, AttrContext::Func
);
1728 if (!as
.in
.readword(name
)) {
1729 as
.error(".function must have a name");
1732 as
.fe
= as
.ue
->newFuncEmitter(makeStaticString(name
));
1733 as
.fe
->init(as
.in
.getLineNumber(), as
.in
.getLineNumber() + 1 /* XXX */,
1734 as
.ue
->bcPos(), attrs
, true, 0);
1736 parse_parameter_list(as
);
1737 parse_function_flags(as
);
1739 as
.in
.expectWs('{');
1741 parse_function_body(as
);
1745 * directive-method : attribute-list identifier parameter-list
1749 void parse_method(AsmState
& as
) {
1750 as
.in
.skipWhitespace();
1752 Attr attrs
= parse_attribute_list(as
, AttrContext::Func
);
1754 if (!as
.in
.readword(name
)) {
1755 as
.error(".method requires a method name");
1758 as
.fe
= as
.ue
->newMethodEmitter(makeStaticString(name
), as
.pce
);
1759 as
.pce
->addMethod(as
.fe
);
1760 as
.fe
->init(as
.in
.getLineNumber(), as
.in
.getLineNumber() + 1 /* XXX */,
1761 as
.ue
->bcPos(), attrs
, true, 0);
1763 parse_parameter_list(as
);
1764 as
.in
.expectWs('{');
1766 parse_function_body(as
);
1770 * member-tv-initializer : '=' php-serialized ';'
1775 TypedValue
parse_member_tv_initializer(AsmState
& as
) {
1776 as
.in
.skipWhitespace();
1779 tvWriteNull(&tvInit
); // Don't confuse Variant with uninit data
1781 int what
= as
.in
.getc();
1783 as
.in
.skipWhitespace();
1785 if (as
.in
.peek() != '\"') {
1786 // It might be an uninitialized property/constant.
1787 if (!as
.in
.tryConsume("uninit")) {
1788 as
.error("Expected \"\"\" or \"uninit\" after '=' in "
1789 "const/property initializer");
1791 as
.in
.expectWs(';');
1792 tvWriteUninit(&tvInit
);
1796 tvAsVariant(&tvInit
) = parse_php_serialized(as
);
1797 if (IS_STRING_TYPE(tvInit
.m_type
)) {
1798 tvInit
.m_data
.pstr
= makeStaticString(tvInit
.m_data
.pstr
);
1799 as
.ue
->mergeLitstr(tvInit
.m_data
.pstr
);
1800 } else if (IS_ARRAY_TYPE(tvInit
.m_type
)) {
1801 tvInit
.m_data
.parr
= ArrayData::GetScalarArray(tvInit
.m_data
.parr
);
1802 as
.ue
->mergeArray(tvInit
.m_data
.parr
);
1803 } else if (tvInit
.m_type
== KindOfObject
) {
1804 as
.error("property initializer can't be an object");
1805 } else if (tvInit
.m_type
== KindOfResource
) {
1806 as
.error("property initializer can't be a resource");
1808 as
.in
.expectWs(';');
1809 } else if (what
== ';') {
1812 as
.error("expected '=' or ';' after property name");
1819 * directive-property : attribute-list identifier member-tv-initializer
1823 void parse_property(AsmState
& as
) {
1824 as
.in
.skipWhitespace();
1826 Attr attrs
= parse_attribute_list(as
, AttrContext::Prop
);
1828 if (!as
.in
.readword(name
)) {
1829 as
.error("expected name for property");
1832 TypedValue tvInit
= parse_member_tv_initializer(as
);
1833 as
.pce
->addProperty(makeStaticString(name
),
1834 attrs
, staticEmptyString(),
1835 staticEmptyString(),
1841 * directive-const : identifier member-tv-initializer
1844 void parse_constant(AsmState
& as
) {
1845 as
.in
.skipWhitespace();
1848 if (!as
.in
.readword(name
)) {
1849 as
.error("expected name for constant");
1852 TypedValue tvInit
= parse_member_tv_initializer(as
);
1853 as
.pce
->addConstant(makeStaticString(name
),
1854 staticEmptyString(), &tvInit
,
1855 staticEmptyString());
1859 * directive-default-ctor : ';'
1862 * Creates an 86ctor stub for the class.
1864 void parse_default_ctor(AsmState
& as
) {
1865 assert(!as
.fe
&& as
.pce
);
1867 as
.fe
= as
.ue
->newMethodEmitter(
1868 makeStaticString("86ctor"), as
.pce
);
1869 as
.pce
->addMethod(as
.fe
);
1870 as
.fe
->init(as
.in
.getLineNumber(), as
.in
.getLineNumber(),
1871 as
.ue
->bcPos(), AttrPublic
, true, 0);
1872 as
.ue
->emitOp(OpNull
);
1873 as
.ue
->emitOp(OpRetC
);
1874 as
.stackHighWater
= 1;
1875 as
.finishFunction();
1877 as
.in
.expectWs(';');
1881 * directive-use : identifier+ ';'
1882 * | identifier+ '{' use-line* '}'
1885 * use-line : use-name-ref "insteadof" identifier+ ';'
1886 * | use-name-ref "as" attribute-list identifier ';'
1887 * | use-name-ref "as" attribute-list ';'
1890 void parse_use(AsmState
& as
) {
1891 std::vector
<std::string
> usedTraits
;
1894 if (!as
.in
.readword(name
)) break;
1895 usedTraits
.push_back(name
);
1897 if (usedTraits
.empty()) {
1898 as
.error(".use requires a trait name");
1901 for (size_t i
= 0; i
< usedTraits
.size(); ++i
) {
1902 as
.pce
->addUsedTrait(makeStaticString(usedTraits
[i
]));
1904 as
.in
.skipWhitespace();
1905 if (as
.in
.peek() != '{') {
1912 as
.in
.skipWhitespace();
1913 if (as
.in
.peek() == '}') break;
1915 std::string traitName
;
1916 std::string identifier
;
1917 if (!as
.in
.readword(traitName
)) {
1918 as
.error("expected identifier for line in .use block");
1920 as
.in
.skipWhitespace();
1921 if (as
.in
.peek() == ':') {
1924 if (!as
.in
.readword(identifier
)) {
1925 as
.error("expected identifier after ::");
1928 identifier
= traitName
;
1932 if (as
.in
.tryConsume("as")) {
1933 Attr attrs
= parse_attribute_list(as
, AttrContext::TraitImport
);
1935 if (!as
.in
.readword(alias
)) {
1936 if (attrs
!= AttrNone
) {
1939 as
.error("expected identifier or attribute list after "
1940 "`as' in .use block");
1944 as
.pce
->addTraitAliasRule(PreClass::TraitAliasRule(
1945 makeStaticString(traitName
),
1946 makeStaticString(identifier
),
1947 makeStaticString(alias
),
1949 } else if (as
.in
.tryConsume("insteadof")) {
1950 if (traitName
.empty()) {
1951 as
.error("Must specify TraitName::name when using a trait insteadof");
1954 PreClass::TraitPrecRule
precRule(
1955 makeStaticString(traitName
),
1956 makeStaticString(identifier
));
1958 bool addedOtherTraits
= false;
1960 while (as
.in
.readword(whom
)) {
1961 precRule
.addOtherTraitName(makeStaticString(whom
));
1962 addedOtherTraits
= true;
1964 if (!addedOtherTraits
) {
1965 as
.error("one or more trait names expected after `insteadof'");
1968 as
.pce
->addTraitPrecRule(precRule
);
1970 as
.error("expected `as' or `insteadof' in .use block");
1973 as
.in
.expectWs(';');
1980 * class-body : class-body-line* '}'
1983 * class-body-line : ".method" directive-method
1984 * | ".property" directive-property
1985 * | ".const" directive-const
1986 * | ".use" directive-use
1987 * | ".default_ctor" directive-default-ctor
1990 void parse_class_body(AsmState
& as
) {
1991 if (!as
.emittedPseudoMain
) {
1992 as
.error(".class blocks must all follow the .main block");
1995 std::string directive
;
1996 while (as
.in
.readword(directive
)) {
1997 if (directive
== ".method") { parse_method(as
); continue; }
1998 if (directive
== ".property") { parse_property(as
); continue; }
1999 if (directive
== ".const") { parse_constant(as
); continue; }
2000 if (directive
== ".use") { parse_use(as
); continue; }
2001 if (directive
== ".default_ctor") { parse_default_ctor(as
); continue; }
2003 as
.error("unrecognized directive `" + directive
+ "' in class");
2010 * directive-class : attribute-list identifier extension-clause
2011 * implements-clause '{' class-body
2014 * extension-clause : empty
2015 * | "extends" identifier
2018 * implements-clause : empty
2019 * | "implements" '(' identifier* ')'
2023 void parse_class(AsmState
& as
) {
2024 as
.in
.skipWhitespace();
2026 Attr attrs
= parse_attribute_list(as
, AttrContext::Class
);
2028 if (!as
.in
.readword(name
)) {
2029 as
.error(".class must have a name");
2032 std::string parentName
;
2033 if (as
.in
.tryConsume("extends")) {
2034 if (!as
.in
.readword(parentName
)) {
2035 as
.error("expected parent class name after `extends'");
2039 std::vector
<std::string
> ifaces
;
2040 if (as
.in
.tryConsume("implements")) {
2041 as
.in
.expectWs('(');
2043 while (as
.in
.readword(word
)) {
2044 ifaces
.push_back(word
);
2049 as
.pce
= as
.ue
->newPreClassEmitter(makeStaticString(name
),
2050 PreClass::MaybeHoistable
);
2051 as
.pce
->init(as
.in
.getLineNumber(),
2052 as
.in
.getLineNumber() + 1, // XXX
2055 makeStaticString(parentName
),
2056 staticEmptyString());
2057 for (size_t i
= 0; i
< ifaces
.size(); ++i
) {
2058 as
.pce
->addInterface(makeStaticString(ifaces
[i
]));
2061 as
.in
.expectWs('{');
2062 parse_class_body(as
);
2066 * directive-filepath : quoted-string-literal ';'
2069 void parse_filepath(AsmState
& as
) {
2070 auto const str
= read_litstr(as
);
2071 as
.ue
->m_filepath
= str
;
2072 as
.in
.expectWs(';');
2076 * directive-main : '{' function-body
2079 void parse_main(AsmState
& as
) {
2080 if (as
.emittedPseudoMain
) {
2081 if (!SystemLib::s_inited
) {
2082 as
.error(".main found in systemlib");
2084 as
.error("Multiple .main directives found");
2087 as
.in
.expectWs('{');
2089 as
.ue
->initMain(as
.in
.getLineNumber(),
2090 as
.in
.getLineNumber() + 1 /* XXX */);
2091 as
.fe
= as
.ue
->getMain();
2092 as
.emittedPseudoMain
= true;
2093 parse_function_body(as
);
2097 * directive-adata : identifier '=' php-serialized ';'
2100 void parse_adata(AsmState
& as
) {
2101 as
.in
.skipWhitespace();
2102 std::string dataLabel
;
2103 if (!as
.in
.readword(dataLabel
)) {
2104 as
.error("expected name for .adata");
2106 if (as
.adataMap
.count(dataLabel
)) {
2107 as
.error("duplicate adata label name " + dataLabel
);
2110 as
.in
.expectWs('=');
2111 Variant var
= parse_php_serialized(as
);
2112 if (!var
.isArray()) {
2113 as
.error(".adata only supports serialized arrays");
2115 Array
arr(var
.toArray());
2116 ArrayData
* data
= ArrayData::GetScalarArray(arr
.get());
2117 as
.ue
->mergeArray(data
);
2118 as
.adataMap
[dataLabel
] = data
;
2120 as
.in
.expectWs(';');
2124 * asm-file : asm-tld* <EOF>
2127 * asm-tld : ".filepath" directive-filepath
2128 * | ".main" directive-main
2129 * | ".function" directive-function
2130 * | ".adata" directive-adata
2131 * | ".class" directive-class
2134 void parse(AsmState
& as
) {
2135 as
.in
.skipWhitespace();
2136 std::string directive
;
2137 if (!SystemLib::s_inited
) {
2139 * The SystemLib::s_hhas_unit is required to be merge-only,
2140 * and we create the source by concatenating separate .hhas files
2141 * Rather than choosing one to have the .main directive, we just
2142 * generate a trivial pseudoMain automatically.
2144 as
.ue
->addTrivialPseudoMain();
2145 as
.emittedPseudoMain
= true;
2148 while (as
.in
.readword(directive
)) {
2149 if (directive
== ".filepath") { parse_filepath(as
); continue; }
2150 if (directive
== ".main") { parse_main(as
); continue; }
2151 if (directive
== ".function") { parse_function(as
); continue; }
2152 if (directive
== ".adata") { parse_adata(as
); continue; }
2153 if (directive
== ".class") { parse_class(as
); continue; }
2155 as
.error("unrecognized top-level directive `" + directive
+ "'");
2158 if (!as
.emittedPseudoMain
) {
2159 as
.error("no .main found in hhas unit");
2165 //////////////////////////////////////////////////////////////////////
2167 UnitEmitter
* assemble_string(const char* code
, int codeLen
,
2168 const char* filename
, const MD5
& md5
) {
2169 std::unique_ptr
<UnitEmitter
> ue(new UnitEmitter(md5
));
2170 StringData
* sd
= makeStaticString(filename
);
2171 ue
->m_filepath
= sd
;
2174 std::istringstream
instr(std::string(code
, codeLen
));
2178 } catch (const std::exception
& e
) {
2179 ue
.reset(new UnitEmitter(md5
));
2180 ue
->m_filepath
= sd
;
2182 ue
->emitOp(OpString
);
2183 ue
->emitInt32(ue
->mergeLitstr(makeStaticString(e
.what())));
2184 ue
->emitOp(OpFatal
);
2185 ue
->emitByte(static_cast<uint8_t>(FatalOp::Runtime
));
2186 FuncEmitter
* fe
= ue
->getMain();
2187 fe
->maxStackCells
= 1;
2188 // XXX line numbers are bogus
2189 fe
->finish(ue
->bcPos(), false);
2190 ue
->recordFunction(fe
);
2193 return ue
.release();
2196 //////////////////////////////////////////////////////////////////////