Add a .metadata directive to hhas
[hiphop-php.git] / hphp / runtime / vm / as.cpp
blob4a8f809507e65f45946a556ad3b72ac211f167be
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * This module contains an assembler implementation for HHBC. It is
19 * probably fairly close to allowing you to access most of the
20 * metadata associated with hhvm's compiled unit format, although it's
21 * possible something has been overlooked.
23 * To use it, run hhvm with -v Eval.AllowHhas=true on a file with a
24 * ".hhas" extension. The syntax is probably easiest to understand by
25 * looking at some examples (or the semi-BNF markup around some of the
26 * parse functions here). For examples, see hphp/tests/vm/asm_*.
29 * Notes:
31 * - You can crash hhvm very easily with this.
33 * Using this module, you can emit pretty much any sort of not
34 * trivially-illegal bytecode stream, and many trivially-illegal
35 * ones as well. You can also easily create Units with illegal
36 * metadata. Generally this will crash the VM. In other cases
37 * (especially if you don't bother to DefCls your classes in your
38 * .main) you'll just get mysterious "class not defined" errors
39 * or weird behavior.
41 * - Whitespace is not normally significant, but newlines may not
42 * be in the middle of a list of opcode arguments. (After the
43 * newline, the next thing seen is expected to be either a
44 * mnemonic for the next opcode in the stream or some sort of
45 * directive.) However, newlines (and comments) may appear
46 * *inside* certain opcode arguments (e.g. string literals or
47 * vector immediates).
49 * Rationale: this is partially intended to make it trivial to
50 * catch wrong-number-of-arguments errors, although it probably
51 * could be done without this if you feel like changing it.
54 * Wishlist:
56 * - It might be nice if you could refer to iterators by name
57 * instead of by index.
59 * - DefCls by name would be nice.
61 * Missing features (partial list):
63 * - builtinType (for native funcs) field on ParamInfo
65 * - while class/function names can contains ':', '$', and ';',
66 * .use declarations can't handle those names because of syntax
67 * conflicts
69 * @author Jordan DeLong <delong.j@fb.com>
72 #include "hphp/runtime/vm/as.h"
74 #include <algorithm>
75 #include <cstdio>
76 #include <iostream>
77 #include <iterator>
78 #include <vector>
80 #include <boost/algorithm/string.hpp>
81 #include <boost/scoped_ptr.hpp>
82 #include <boost/bind.hpp>
84 #include <folly/Conv.h>
85 #include <folly/MapUtil.h>
86 #include <folly/Memory.h>
87 #include <folly/Range.h>
88 #include <folly/String.h>
90 #include "hphp/util/md5.h"
92 #include "hphp/parser/parser.h"
93 #include "hphp/runtime/base/builtin-functions.h"
94 #include "hphp/runtime/base/repo-auth-type-codec.h"
95 #include "hphp/runtime/base/repo-auth-type.h"
96 #include "hphp/runtime/base/tv-type.h"
97 #include "hphp/runtime/vm/as-shared.h"
98 #include "hphp/runtime/vm/func-emitter.h"
99 #include "hphp/runtime/vm/hhbc.h"
100 #include "hphp/runtime/vm/preclass-emitter.h"
101 #include "hphp/runtime/vm/unit.h"
102 #include "hphp/runtime/vm/unit-emitter.h"
103 #include "hphp/system/systemlib.h"
105 TRACE_SET_MOD(hhas);
107 namespace HPHP {
109 //////////////////////////////////////////////////////////////////////
111 namespace {
113 struct AsmState;
114 typedef void (*ParserFunc)(AsmState& as);
116 struct Error : std::runtime_error {
117 explicit Error(int where, const std::string& what)
118 : std::runtime_error(folly::sformat(
119 "Assembler Error: line {}: {}", where, what))
123 struct Input {
124 explicit Input(std::istream& in)
125 : m_in(in)
128 int peek() { return m_in.peek(); }
130 int getc() {
131 int ret = m_in.get();
132 if (ret == EOF) {
133 io_error_if_bad();
134 } else if (ret == '\n') {
135 ++m_lineNumber;
137 return ret;
140 void ungetc(char c) {
141 if (c == '\n') --m_lineNumber;
142 m_in.putback(c);
145 void expect(int c) {
146 if (getc() != c) {
147 error(folly::sformat("expected character `{}'", char(c)));
152 * Expect `c' after possible whitespace/comments. When convenient,
153 * preferable to doing skipWhitespace/expect manually to keep the
154 * line number in the error prior to the whitespace skipped.
156 void expectWs(int c) {
157 const int currentLine = m_lineNumber;
158 skipWhitespace();
159 if (getc() != c) {
160 throw Error(currentLine,
161 folly::sformat("expected character `{}'", char(c)));
165 int getLineNumber() const {
166 return m_lineNumber;
169 // Skips whitespace, then populates word with valid bareword
170 // characters. Returns true if we read any characters into word.
171 bool readword(std::string& word) {
172 word.clear();
173 skipWhitespace();
174 consumePred(is_bareword(), std::back_inserter(word));
175 return !word.empty();
177 // Skips whitespace, then populates name with valid extname
178 // characters. Returns true if we read any characters into name.
179 bool readname(std::string& name) {
180 name.clear();
181 skipWhitespace();
182 consumePred(is_extname(), std::back_inserter(name));
183 return !name.empty();
185 // Try to consume a bareword. Skips whitespace. If we can't
186 // consume the specified word, returns false.
187 bool tryConsume(const std::string& what) {
188 std::string word;
189 if (!readword(word)) {
190 return false;
192 if (word != what) {
193 std::for_each(word.rbegin(), word.rend(),
194 boost::bind(&Input::ungetc, this, _1));
195 return false;
197 return true;
199 int32_t readint() {
200 std::string buf;
201 skipWhitespace();
202 if (peek() == '-') buf += (char)getc();
203 consumePred(isdigit, std::back_inserter(buf));
204 if (buf.empty() || buf == "-") {
205 throw Error(m_lineNumber, "expected integral value");
207 return folly::to<int32_t>(buf);
210 // C-style character escapes, no support for unicode escapes or
211 // whatnot.
212 template<class OutCont>
213 void escapeChar(OutCont& out) {
214 auto is_oct = [&] (int i) { return i >= '0' && i <= '7'; };
215 auto is_hex = [&] (int i) {
216 return (i >= '0' && i <= '9') ||
217 (i >= 'a' && i <= 'f') ||
218 (i >= 'A' && i <= 'F');
220 auto hex_val = [&] (int i) -> uint32_t {
221 assert(is_hex(i));
222 return i >= '0' && i <= '9' ? i - '0' :
223 i >= 'a' && i <= 'f' ? i - 'a' + 10 : i - 'A' + 10;
226 auto src = getc();
227 switch (src) {
228 case EOF: error("EOF in string literal");
229 case 'a': out.push_back('\a'); break;
230 case 'b': out.push_back('\b'); break;
231 case 'f': out.push_back('\f'); break;
232 case 'n': out.push_back('\n'); break;
233 case 'r': out.push_back('\r'); break;
234 case 't': out.push_back('\t'); break;
235 case 'v': out.push_back('\v'); break;
236 case '\'': out.push_back('\''); break;
237 case '\"': out.push_back('\"'); break;
238 case '\?': out.push_back('\?'); break;
239 case '\\': out.push_back('\\'); break;
240 case '\r': /* ignore */ break;
241 case '\n': /* ignore */ break;
242 default:
243 if (is_oct(src)) {
244 auto val = int64_t{src} - '0';
245 for (auto i = int{1}; i < 3; ++i) {
246 src = getc();
247 if (!is_oct(src)) { ungetc(src); break; }
248 val *= 8;
249 val += src - '0';
251 if (val > std::numeric_limits<uint8_t>::max()) {
252 error("octal escape sequence overflowed");
254 out.push_back(static_cast<uint8_t>(val));
255 return;
258 if (src == 'x' || src == 'X') {
259 auto val = uint64_t{0};
260 if (!is_hex(peek())) error("\\x used without no following hex digits");
261 for (auto i = int{0}; i < 2; ++i) {
262 src = getc();
263 if (!is_hex(src)) { ungetc(src); break; }
264 val *= 0x10;
265 val += hex_val(src);
267 if (val > std::numeric_limits<uint8_t>::max()) {
268 error("hex escape sequence overflowed");
270 out.push_back(static_cast<uint8_t>(val));
271 return;
274 error("unrecognized character escape");
278 // Reads a quoted string with typical escaping rules. Does not skip
279 // any whitespace. Returns true if we successfully read one, or
280 // false. EOF during the string throws.
281 bool readQuotedStr(std::string& str) {
282 str.clear();
283 if (peek() != '\"') {
284 return false;
286 getc();
288 int c;
289 while ((c = getc()) != EOF) {
290 switch (c) {
291 case '\"': return true;
292 case '\\': escapeChar(str); break;
293 default: str.push_back(c); break;
296 error("EOF in string literal");
297 not_reached();
298 return false;
302 * Reads a python-style longstring, or returns false if we don't
303 * have one. Does not skip any whitespace before looking for the
304 * string.
306 * Python longstrings start with \"\"\", and can contain any bytes
307 * other than \"\"\". A '\\' character introduces C-style escapes,
308 * but there's no need to escape single quote characters.
310 bool readLongString(std::vector<char>& buffer) {
311 if (peek() != '\"') return false;
312 getc();
313 if (peek() != '\"') { ungetc('\"'); return false; }
314 getc();
315 if (peek() != '\"') { ungetc('\"');
316 ungetc('\"'); return false; }
317 getc();
319 int c;
320 while ((c = getc()) != EOF) {
321 if (c == '\\') {
322 escapeChar(buffer);
323 continue;
325 if (c == '"') {
326 c = getc();
327 if (c != '"') {
328 buffer.push_back('"');
329 ungetc(c);
330 continue;
332 c = getc();
333 if (c != '"') {
334 buffer.push_back('"');
335 buffer.push_back('"');
336 ungetc(c);
337 continue;
339 return true;
342 buffer.push_back(c);
344 error("EOF in \"\"\"-string literal");
345 not_reached();
346 return false;
349 // Skips whitespace (including newlines and comments).
350 void skipWhitespace() {
351 while (skipPred(boost::is_any_of(" \t\r\n"))) {
352 if (peek() == '#') {
353 skipPred(!boost::is_any_of("\n"));
354 expect('\n');
355 } else {
356 break;
361 // Skip spaces and tabs, but other whitespace (such as comments or
362 // newlines) stop the skip.
363 void skipSpaceTab() {
364 skipPred(boost::is_any_of(" \t"));
367 template<class Predicate>
368 bool skipPred(Predicate pred) {
369 while (pred(peek())) {
370 if (getc() == EOF) {
371 return false;
375 return true;
378 template<class Predicate, class OutputIterator>
379 bool consumePred(Predicate pred, OutputIterator out) {
380 int c;
381 while (pred(c = peek())) {
382 if (getc() == EOF) {
383 return false;
386 *out++ = c;
389 return true;
392 private:
393 // whether a character is a valid part of the extended sorts of
394 // names that HHVM uses for certain generated constructs
395 // (closures, __Memoize implementations, etc)
396 struct is_extname {
397 bool operator()(int i) const {
398 is_bareword is_bw;
399 return is_bw(i) || i == ':' || i == ';' || i == '#' || i =='@' ||
400 (i >= 0x7f && i <= 0xff) /* see hphp.ll :( */;
404 void error(const std::string& what) {
405 throw Error(getLineNumber(), what);
408 void io_error_if_bad() {
409 if (m_in.bad()) {
410 error("I/O error reading stream: " +
411 folly::errnoStr(errno).toStdString());
415 private:
416 std::istream& m_in;
417 int m_lineNumber{1};
420 struct StackDepth;
422 struct FPIReg {
423 Offset fpushOff;
424 StackDepth* stackDepth;
425 int fpOff;
429 * Tracks the depth of the stack in a given block of instructions.
431 * This structure is linked to a block of instructions (usually starting at a
432 * label), and tracks the current stack depth in this block. This tracking can
433 * take two forms:
434 * - Absolute depth: the depth of the stack is exactly known for this block
435 * - Relative depth: the depth of the stack is unknown for now. We keep track
436 * of an offset, relative to the depth of the stack at the first instruction
437 * of the block
439 struct StackDepth {
440 int currentOffset;
442 * Tracks the max depth of elem stack + desc stack offset inside a region
443 * where baseValue is unknown.
445 int maxOffset;
447 * Tracks the min depth of the elem stack inside a region where baseValue
448 * is unknown, and the line where the min occurred.
450 int minOffset;
451 int minOffsetLine;
452 folly::Optional<int> baseValue;
455 * During the parsing process, when a Jmp instruction is encountered, the
456 * StackDepth structure for this jump becomes linked to the StackDepth
457 * structure of the label (which is added to the listeners list).
459 * Once the absolute depth at the jump becomes known, its StackDepth
460 * instance calls the setBase method of the StackDepth instance of the label.
461 * The absolute depth at the label can then be inferred from the
462 * absolute depth at the jump.
464 std::vector<std::pair<StackDepth*, int> > listeners;
466 StackDepth()
467 : currentOffset(0)
468 , maxOffset(0)
469 , minOffset(0)
472 void adjust(AsmState& as, int delta);
473 void addListener(AsmState& as, StackDepth* target);
474 void setBase(AsmState& as, int stackDepth);
475 int absoluteDepth() {
476 assert(baseValue.hasValue());
477 return baseValue.value() + currentOffset;
481 * Sets the baseValue such as the current stack depth matches the
482 * parameter.
484 * If the base value is already known, it may conflict with the
485 * parameter of this function. In this case, an error will be raised.
487 void setCurrentAbsolute(AsmState& as, int stackDepth);
490 struct Label {
491 bool bound{false};
492 Offset target;
493 StackDepth stackDepth;
496 * Each label source source has an Offset where the jmp should be
497 * patched up is, and an Offset from which the jump delta should be
498 * computed. (The second Offset is basically to the actual
499 * jump/switch/etc instruction, while the first points to the
500 * immediate.)
502 std::vector<std::pair<Offset,Offset>> sources;
505 * List of a parameter ids that use this label for its DV
506 * initializer.
508 std::vector<Id> dvInits;
511 * List of EHEnts that have m_handler pointing to this label.
513 std::vector<size_t> ehEnts;
516 struct AsmState {
517 explicit AsmState(std::istream& in, AsmCallbacks* callbacks = nullptr)
518 : in(in)
519 , callbacks(callbacks)
521 currentStackDepth->setBase(*this, 0);
524 AsmState(const AsmState&) = delete;
525 AsmState& operator=(const AsmState&) = delete;
527 template<typename... Args>
528 void error(const std::string& fmt, Args&&... args) {
529 throw Error(in.getLineNumber(),
530 folly::sformat(fmt, std::forward<Args>(args)...));
534 void adjustStack(int delta) {
535 if (currentStackDepth == nullptr) {
536 // Instruction is unreachable, nothing to do here!
537 return;
540 currentStackDepth->adjust(*this, delta);
543 void adjustStackHighwater(int depth) {
544 if (depth) {
545 fe->maxStackCells = std::max(fe->maxStackCells, depth);
549 std::string displayStackDepth() {
550 std::ostringstream stack;
552 if (currentStackDepth == nullptr) {
553 stack << "/";
554 } else if (currentStackDepth->baseValue) {
555 stack << *currentStackDepth->baseValue +
556 currentStackDepth->currentOffset;
557 } else {
558 stack << "?" << currentStackDepth->currentOffset;
561 return stack.str();
564 void addLabelTarget(const std::string& name) {
565 auto& label = labelMap[name];
566 if (label.bound) {
567 error("Duplicate label " + name);
569 label.bound = true;
570 label.target = ue->bcPos();
572 StackDepth* newStack = &label.stackDepth;
574 if (currentStackDepth == nullptr) {
575 // Previous instruction was unreachable
576 currentStackDepth = newStack;
577 return;
580 // The stack depth at the label depends on the current depth
581 currentStackDepth->addListener(*this, newStack);
582 currentStackDepth = newStack;
585 void addLabelJump(const std::string& name, Offset immOff, Offset opcodeOff) {
586 auto& label = labelMap[name];
588 if (currentStackDepth != nullptr) {
589 // The stack depth at the target must be the same as the current depth
590 // (whatever this may be: it may still be unknown)
591 currentStackDepth->addListener(*this, &label.stackDepth);
594 label.sources.emplace_back(immOff, opcodeOff);
597 void enforceStackDepth(int stackDepth) {
598 if (currentStackDepth == nullptr) {
599 // Current instruction is unreachable, thus the constraint
600 // on the stack depth will never be violated
601 return;
604 currentStackDepth->setCurrentAbsolute(*this, stackDepth);
607 bool isUnreachable() {
608 return currentStackDepth == nullptr;
611 void enterUnreachableRegion() {
612 currentStackDepth = nullptr;
615 void enterReachableRegion(int stackDepth) {
616 unnamedStackDepths.emplace_back(std::make_unique<StackDepth>());
617 currentStackDepth = unnamedStackDepths.back().get();
618 currentStackDepth->setBase(*this, stackDepth);
621 void addLabelDVInit(const std::string& name, int paramId) {
622 labelMap[name].dvInits.push_back(paramId);
624 // Stack depth should be 0 when entering a DV init
625 labelMap[name].stackDepth.setBase(*this, 0);
628 void addLabelEHEnt(const std::string& name, size_t ehIdx) {
629 labelMap[name].ehEnts.push_back(ehIdx);
631 // Stack depth should be 0 when entering a fault funclet
632 labelMap[name].stackDepth.setBase(*this, 0);
635 void beginFpi(Offset fpushOff) {
636 fpiRegs.push_back(FPIReg{
637 fpushOff,
638 currentStackDepth,
639 currentStackDepth->currentOffset
641 fdescDepth += kNumActRecCells;
642 currentStackDepth->adjust(*this, 0);
645 void endFpi() {
646 if (fpiRegs.empty()) {
647 error("endFpi called with no active fpi region");
650 auto& ent = fe->addFPIEnt();
651 const auto& reg = fpiRegs.back();
652 ent.m_fpushOff = reg.fpushOff;
653 ent.m_fpiEndOff = ue->bcPos();
654 ent.m_fpOff = reg.fpOff;
655 if (reg.stackDepth->baseValue) {
656 ent.m_fpOff += *reg.stackDepth->baseValue;
657 } else {
658 // Base value still unknown, this will need to be updated later.
660 // Store the FPIEnt's index in the FuncEmitter's entry table.
661 assert(&fe->fpitab[fe->fpitab.size()-1] == &ent);
662 fpiToUpdate.emplace_back(fe->fpitab.size() - 1, reg.stackDepth);
665 fpiRegs.pop_back();
666 always_assert(fdescDepth >= kNumActRecCells);
667 fdescDepth -= kNumActRecCells;
670 void finishClass() {
671 assert(!fe);
672 ue->addPreClassEmitter(pce);
673 pce = 0;
674 enumTySet = false;
677 void patchLabelOffsets(const Label& label) {
678 for (auto const& source : label.sources) {
679 ue->emitInt32(label.target - source.second, source.first);
682 for (auto const& dvinit : label.dvInits) {
683 fe->params[dvinit].funcletOff = label.target;
686 for (auto const& ehEnt : label.ehEnts) {
687 fe->ehtab[ehEnt].m_handler = label.target;
691 void finishSection() {
692 for (auto const& label : labelMap) {
693 if (!label.second.bound) {
694 error("Undefined label " + label.first);
696 if (label.second.target >= ue->bcPos()) {
697 error("label " + label.first + " falls of the end of the function");
700 patchLabelOffsets(label.second);
703 // Patch the FPI structures
704 for (auto& kv : fpiToUpdate) {
705 if (!kv.second->baseValue) {
706 error("created a FPI from an unreachable instruction");
709 fe->fpitab[kv.first].m_fpOff += *kv.second->baseValue;
713 void finishFunction() {
714 finishSection();
716 // Stack depth should be 0 at the end of a function body
717 enforceStackDepth(0);
719 // Bump up the unnamed local count
720 const int numLocals = maxUnnamed + 1;
721 while (fe->numLocals() < numLocals) {
722 fe->allocUnnamedLocal();
725 fe->maxStackCells +=
726 fe->numLocals() +
727 fe->numIterators() * kNumIterCells +
728 clsRefCountToCells(fe->numClsRefSlots());
730 fe->finish(ue->bcPos(), false);
731 ue->recordFunction(fe);
733 fe = 0;
734 fpiRegs.clear();
735 labelMap.clear();
736 numItersSet = false;
737 numClsRefSlotsSet = false;
738 initStackDepth = StackDepth();
739 initStackDepth.setBase(*this, 0);
740 currentStackDepth = &initStackDepth;
741 unnamedStackDepths.clear();
742 fdescDepth = 0;
743 maxUnnamed = -1;
744 fpiToUpdate.clear();
747 int getLocalId(const std::string& name) {
748 if (name[0] == '_') {
749 int id = folly::to<int>(name.substr(1));
750 if (id > maxUnnamed) maxUnnamed = id;
751 return id;
754 if (name[0] != '$') {
755 error("local variables must be prefixed with $ or _");
758 const StringData* sd = makeStaticString(name.c_str() + 1);
759 fe->allocVarId(sd);
760 return fe->lookupVarId(sd);
763 int getIterId(int32_t id) {
764 if (id >= fe->numIterators()) {
765 error("iterator id exceeded number of iterators in the function");
767 return id;
770 int getClsRefSlot(int32_t slot) {
771 if (slot >= fe->numClsRefSlots()) {
772 error("class-ref slot id exceeded number of class-ref "
773 "slots in the function");
775 return slot;
778 UnitEmitter* ue;
779 Input in;
780 bool emittedPseudoMain{false};
781 bool emittedTopLevelFunc{false};
783 std::map<std::string,ArrayData*> adataMap;
785 // When inside a class, this state is active.
786 PreClassEmitter* pce;
788 // When we're doing a function or method body, this state is active.
789 FuncEmitter* fe{nullptr};
790 std::vector<FPIReg> fpiRegs;
791 std::map<std::string,Label> labelMap;
792 bool numItersSet{false};
793 bool numClsRefSlotsSet{false};
794 bool enumTySet{false};
795 StackDepth initStackDepth;
796 StackDepth* currentStackDepth{&initStackDepth};
797 std::vector<std::unique_ptr<StackDepth>> unnamedStackDepths;
798 int fdescDepth{0};
799 int minStackDepth{0};
800 int maxUnnamed{-1};
801 std::vector<std::pair<size_t, StackDepth*>> fpiToUpdate;
802 std::set<std::string,stdltistr> hoistables;
803 std::unordered_map<uint32_t,Offset> defClsOffsets;
804 Location::Range srcLoc{-1,-1,-1,-1};
805 AsmCallbacks* callbacks{ nullptr };
808 void StackDepth::adjust(AsmState& as, int delta) {
809 currentOffset += delta;
811 if (!baseValue) {
812 // The absolute stack depth is unknown. We only store the min
813 // and max offsets, and we will take a decision later, when the
814 // base value will be known.
815 maxOffset = std::max(currentOffset + as.fdescDepth, maxOffset);
816 if (currentOffset < minOffset) {
817 minOffsetLine = as.in.getLineNumber();
818 minOffset = currentOffset;
820 return;
823 if (*baseValue + currentOffset < 0) {
824 as.error("opcode sequence caused stack depth to go negative");
827 as.adjustStackHighwater(*baseValue + currentOffset + as.fdescDepth);
830 void StackDepth::addListener(AsmState& as, StackDepth* target) {
831 if (baseValue) {
832 target->setBase(as, *baseValue + currentOffset);
833 } else {
834 listeners.emplace_back(target, currentOffset);
838 void StackDepth::setBase(AsmState& as, int stackDepth) {
839 if (baseValue && stackDepth != *baseValue) {
840 as.error("stack depth {} does not match base value {}",
841 stackDepth, *baseValue);
844 baseValue = stackDepth;
846 // We finally know the base value. Update AsmState accordingly.
847 if (*baseValue + minOffset < 0) {
848 throw Error(
849 minOffsetLine,
850 "opcode sequence caused stack depth to go negative"
853 as.adjustStackHighwater(*baseValue + maxOffset);
855 // Update the listeners
856 auto l = std::move(listeners);
857 // We won't need them anymore
858 listeners.clear();
859 for (auto& kv : l) {
860 kv.first->setBase(as, *baseValue + kv.second);
864 void StackDepth::setCurrentAbsolute(AsmState& as, int stackDepth) {
865 setBase(as, stackDepth - currentOffset);
868 //////////////////////////////////////////////////////////////////////
871 * Opcode arguments must be on the same line as the opcode itself,
872 * although certain argument types may contain internal newlines (see,
873 * for example, read_jmpvector or string literals).
875 template<class Target> Target read_opcode_arg(AsmState& as) {
876 as.in.skipSpaceTab();
877 std::string strVal;
878 as.in.consumePred(!boost::is_any_of(" \t\r\n#;>"),
879 std::back_inserter(strVal));
880 if (strVal.empty()) {
881 as.error("expected opcode or directive argument");
883 try {
884 return folly::to<Target>(strVal);
885 } catch (std::range_error&) {
886 as.error("couldn't convert input argument (" + strVal + ") to "
887 "proper type");
888 not_reached();
892 template<class SubOpType>
893 uint8_t read_subop(AsmState& as) {
894 auto const str = read_opcode_arg<std::string>(as);
895 if (auto const ty = nameToSubop<SubOpType>(str.c_str())) {
896 return static_cast<uint8_t>(*ty);
898 as.error("unknown subop name");
899 not_reached();
902 const StringData* read_litstr(AsmState& as) {
903 as.in.skipSpaceTab();
904 std::string strVal;
905 if (!as.in.readQuotedStr(strVal)) {
906 as.error("expected quoted string literal");
908 return makeStaticString(strVal);
912 * maybe-string-literal : N
913 * | string-literal
916 const StringData* read_maybe_litstr(AsmState& as) {
917 as.in.skipSpaceTab();
918 if (as.in.peek() == 'N') {
919 as.in.getc();
920 return nullptr;
922 return read_litstr(as);
925 std::vector<std::string> read_strvector(AsmState& as) {
926 std::vector<std::string> ret;
927 as.in.skipSpaceTab();
928 as.in.expect('<');
929 std::string name;
930 while (as.in.skipSpaceTab(), as.in.readQuotedStr(name)) {
931 ret.push_back(name);
933 as.in.skipSpaceTab();
934 as.in.expectWs('>');
935 return ret;
938 ArrayData* read_litarray(AsmState& as) {
939 as.in.skipSpaceTab();
940 if (as.in.getc() != '@') {
941 as.error("expecting an `@foo' array literal reference");
943 std::string name;
944 if (!as.in.readword(name)) {
945 as.error("expected name of .adata literal");
948 auto const it = as.adataMap.find(name);
949 if (it == as.adataMap.end()) {
950 as.error("unknown array data literal name " + name);
952 return it->second;
955 RepoAuthType read_repo_auth_type(AsmState& as) {
956 auto const str = read_opcode_arg<std::string>(as);
957 folly::StringPiece parse(str);
960 * Note: no support for reading array types. (The assembler only
961 * emits a single unit, so it can't really be involved in creating a
962 * ArrayTypeTable.)
965 using T = RepoAuthType::Tag;
967 #define X(what, tag) \
968 if (parse.startsWith(what)) return RepoAuthType{tag}
970 #define Y(what, tag) \
971 if (parse.startsWith(what)) { \
972 parse.removePrefix(what); \
973 auto const cls = makeStaticString(parse.data()); \
974 as.ue->mergeLitstr(cls); \
975 return RepoAuthType{tag, cls}; \
978 Y("Obj=", T::ExactObj);
979 Y("?Obj=", T::OptExactObj);
980 Y("?Obj<=", T::OptSubObj);
981 Y("Obj<=", T::SubObj);
982 X("Arr", T::Arr);
983 X("?Arr", T::OptArr);
984 X("VArr", T::VArr);
985 X("?VArr", T::OptVArr);
986 X("DArr", T::DArr);
987 X("?DArr", T::OptDArr);
988 X("Vec", T::Vec);
989 X("?Vec", T::OptVec);
990 X("Dict", T::Dict);
991 X("?Dict", T::OptDict);
992 X("Keyset", T::Keyset);
993 X("?Keyset", T::OptKeyset);
994 X("Bool", T::Bool);
995 X("?Bool", T::OptBool);
996 X("Cell", T::Cell);
997 X("Dbl", T::Dbl);
998 X("?Dbl", T::OptDbl);
999 X("Gen", T::Gen);
1000 X("InitCell", T::InitCell);
1001 X("InitGen", T::InitGen);
1002 X("InitNull", T::InitNull);
1003 X("InitUnc", T::InitUnc);
1004 X("Int", T::Int);
1005 X("?Int", T::OptInt);
1006 X("Null", T::Null);
1007 X("Obj", T::Obj);
1008 X("?Obj", T::OptObj);
1009 X("Ref", T::Ref);
1010 X("?Res", T::OptRes);
1011 X("Res", T::Res);
1012 X("?SArr", T::OptSArr);
1013 X("SArr", T::SArr);
1014 X("?SVArr", T::OptSVArr);
1015 X("SVArr", T::SVArr);
1016 X("?SDArr", T::OptSDArr);
1017 X("SDArr", T::SDArr);
1018 X("?SVec", T::OptSVec);
1019 X("SVec", T::SVec);
1020 X("?SDict", T::OptSDict);
1021 X("SDict", T::SDict);
1022 X("?SKeyset", T::OptSKeyset);
1023 X("SKeyset", T::SKeyset);
1024 X("?SStr", T::OptSStr);
1025 X("SStr", T::SStr);
1026 X("?Str", T::OptStr);
1027 X("Str", T::Str);
1028 X("Unc", T::Unc);
1029 X("?UncArrKey", T::OptUncArrKey);
1030 X("?ArrKey", T::OptArrKey);
1031 X("UncArrKey",T::UncArrKey);
1032 X("ArrKey", T::ArrKey);
1033 X("Uninit", T::Uninit);
1035 #undef X
1036 #undef Y
1038 // Make sure the above parsing code is revisited when new tags are
1039 // added (we'll get a warning for a missing case label):
1040 if (debug) switch (RepoAuthType{}.tag()) {
1041 case T::Uninit:
1042 case T::InitNull:
1043 case T::Null:
1044 case T::Int:
1045 case T::OptInt:
1046 case T::Dbl:
1047 case T::OptDbl:
1048 case T::Res:
1049 case T::OptRes:
1050 case T::Bool:
1051 case T::OptBool:
1052 case T::SStr:
1053 case T::OptSStr:
1054 case T::Str:
1055 case T::OptStr:
1056 case T::SArr:
1057 case T::OptSArr:
1058 case T::Arr:
1059 case T::OptArr:
1060 case T::SVArr:
1061 case T::OptSVArr:
1062 case T::VArr:
1063 case T::OptVArr:
1064 case T::SDArr:
1065 case T::OptSDArr:
1066 case T::DArr:
1067 case T::OptDArr:
1068 case T::SVec:
1069 case T::OptSVec:
1070 case T::Vec:
1071 case T::OptVec:
1072 case T::SDict:
1073 case T::OptSDict:
1074 case T::Dict:
1075 case T::OptDict:
1076 case T::SKeyset:
1077 case T::OptSKeyset:
1078 case T::Keyset:
1079 case T::OptKeyset:
1080 case T::Obj:
1081 case T::OptObj:
1082 case T::InitUnc:
1083 case T::Unc:
1084 case T::OptUncArrKey:
1085 case T::OptArrKey:
1086 case T::UncArrKey:
1087 case T::ArrKey:
1088 case T::InitCell:
1089 case T::Cell:
1090 case T::Ref:
1091 case T::InitGen:
1092 case T::Gen:
1093 case T::ExactObj:
1094 case T::SubObj:
1095 case T::OptExactObj:
1096 case T::OptSubObj:
1097 break;
1100 as.error("unrecognized RepoAuthType format");
1101 not_reached();
1104 // Read a vector of IVAs, with format <int, int, int, ...>, the vector may be
1105 // excluded entirely if it is empty.
1106 std::vector<uint32_t> read_argv(AsmState& as) {
1107 as.in.skipSpaceTab();
1108 if (as.in.peek() != '<') return {};
1109 as.in.getc();
1111 std::vector<uint32_t> result;
1112 for (;;) {
1113 auto const num = as.in.readint();
1114 if (num < 0) as.error("Was expecting a positive integer");
1115 result.push_back(num);
1116 as.in.skipWhitespace();
1117 if (as.in.peek() == '>') break;
1118 as.in.expectWs(',');
1120 as.in.expectWs('>');
1122 return result;
1125 // Read in a vector of iterators the format for this vector is:
1126 // <(TYPE) ID, (TYPE) ID, ...>
1127 // Where TYPE := Iter | MIter | CIter
1128 // and ID := Integer
1129 std::vector<uint32_t> read_itervec(AsmState& as) {
1130 std::vector<uint32_t> ret;
1132 as.in.skipSpaceTab();
1133 as.in.expect('<');
1135 std::string word;
1137 for (;;) {
1138 as.in.expectWs('(');
1139 if (!as.in.readword(word)) as.error("Was expecting Iterator type.");
1140 if (!word.compare("Iter")) ret.push_back(KindOfIter);
1141 else if (!word.compare("MIter")) ret.push_back(KindOfMIter);
1142 else if (!word.compare("CIter")) ret.push_back(KindOfCIter);
1143 else as.error("Unknown iterator type `" + word + "'");
1144 as.in.expectWs(')');
1146 as.in.skipSpaceTab();
1148 if (!as.in.readword(word)) as.error("Was expecting iterator id.");
1149 ret.push_back(folly::to<uint32_t>(word));
1151 if (!isdigit(word.back())) {
1152 if (word.back() == '>') break;
1153 if (word.back() != ',') as.error("Was expecting `,'.");
1154 } else {
1155 as.in.skipSpaceTab();
1156 if (as.in.peek() == '>') { as.in.getc(); break; }
1157 as.in.expect(',');
1161 return ret;
1164 // Jump tables are lists of labels.
1165 std::vector<std::string> read_jmpvector(AsmState& as) {
1166 std::vector<std::string> ret;
1168 as.in.skipSpaceTab();
1169 as.in.expect('<');
1171 std::string word;
1172 while (as.in.readword(word)) {
1173 ret.push_back(word);
1175 as.in.expectWs('>');
1177 return ret;
1180 typedef std::vector<std::pair<Id, std::string>> SSwitchJmpVector;
1182 SSwitchJmpVector read_sswitch_jmpvector(AsmState& as) {
1183 SSwitchJmpVector ret;
1185 as.in.skipSpaceTab();
1186 as.in.expect('<');
1188 std::string defLabel;
1189 do {
1190 std::string caseStr;
1191 if (!as.in.readQuotedStr(caseStr)) {
1192 as.error("expected quoted string literal");
1195 as.in.expect(':');
1197 as.in.readword(defLabel);
1199 ret.emplace_back(
1200 as.ue->mergeLitstr(makeStaticString(caseStr)),
1201 defLabel
1204 as.in.skipWhitespace();
1205 } while (as.in.peek() != '-');
1207 as.in.expect('-');
1208 as.in.expect(':');
1209 as.in.readword(defLabel);
1211 // -1 stand for default case.
1212 ret.emplace_back(-1, defLabel);
1214 as.in.expect('>');
1216 return ret;
1219 MemberKey read_member_key(AsmState& as) {
1220 as.in.skipWhitespace();
1222 std::string word;
1223 if (!as.in.readword(word)) as.error("expected member code");
1225 auto optMcode = parseMemberCode(word.c_str());
1226 if (!optMcode) as.error("unrecognized member code `" + word + "'");
1228 auto const mcode = *optMcode;
1229 if (mcode != MW && as.in.getc() != ':') {
1230 as.error("expected `:' after member code `" + word + "'");
1233 switch (mcode) {
1234 case MW:
1235 return MemberKey{};
1236 case MEL: case MPL: {
1237 std::string name;
1238 if (!as.in.readword(name)) {
1239 as.error("couldn't read name for local variable in member key");
1241 return MemberKey{mcode, as.getLocalId(name)};
1243 case MEC: case MPC:
1244 return MemberKey{mcode, read_opcode_arg<int32_t>(as)};
1245 case MEI:
1246 return MemberKey{mcode, read_opcode_arg<int64_t>(as)};
1247 case MET: case MPT: case MQT:
1248 return MemberKey{mcode, read_litstr(as)};
1250 not_reached();
1253 LocalRange read_local_range(AsmState& as) {
1254 auto first = read_opcode_arg<std::string>(as);
1255 if (first.size() > 2 && first[0] == 'L' && first[1] == ':') {
1256 first = "_" + first.substr(2);
1258 auto const pos = first.find('+');
1259 if (pos == std::string::npos) as.error("expecting `+' in local range");
1260 auto const rest = first.substr(pos + 1);
1261 first = first.substr(0, pos);
1262 auto const firstLoc = as.getLocalId(first);
1263 auto const restCount = folly::to<uint32_t>(rest);
1264 if (firstLoc + restCount > as.maxUnnamed) {
1265 as.maxUnnamed = firstLoc + restCount;
1267 return LocalRange{uint32_t(firstLoc), restCount};
1270 //////////////////////////////////////////////////////////////////////
1272 std::map<std::string,ParserFunc> opcode_parsers;
1274 #define IMM_NA
1275 #define IMM_ONE(t) IMM_##t
1276 #define IMM_TWO(t1, t2) IMM_ONE(t1); ++immIdx; IMM_##t2
1277 #define IMM_THREE(t1, t2, t3) IMM_TWO(t1, t2); ++immIdx; IMM_##t3
1278 #define IMM_FOUR(t1, t2, t3, t4) IMM_THREE(t1, t2, t3); ++immIdx; IMM_##t4
1280 // Some bytecodes need to know an iva imm for (PUSH|POP)_*.
1281 #define IMM_IVA do { \
1282 auto imm = read_opcode_arg<uint32_t>(as); \
1283 as.ue->emitIVA(imm); \
1284 immIVA[immIdx] = imm; \
1285 } while (0)
1287 #define IMM_VSA \
1288 std::vector<std::string> vecImm = read_strvector(as); \
1289 auto const vecImmStackValues = vecImm.size(); \
1290 as.ue->emitInt32(vecImmStackValues); \
1291 for (size_t i = 0; i < vecImmStackValues; ++i) { \
1292 as.ue->emitInt32(as.ue->mergeLitstr(String(vecImm[i]).get())); \
1295 #define IMM_SA as.ue->emitInt32(as.ue->mergeLitstr(read_litstr(as)))
1296 #define IMM_RATA encodeRAT(*as.ue, read_repo_auth_type(as))
1297 #define IMM_I64A as.ue->emitInt64(read_opcode_arg<int64_t>(as))
1298 #define IMM_DA as.ue->emitDouble(read_opcode_arg<double>(as))
1299 #define IMM_LA as.ue->emitIVA(as.getLocalId( \
1300 read_opcode_arg<std::string>(as)))
1301 #define IMM_IA as.ue->emitIVA(as.getIterId( \
1302 read_opcode_arg<int32_t>(as)))
1303 #define IMM_CAR as.ue->emitIVA(as.getClsRefSlot( \
1304 read_opcode_arg<int32_t>(as)))
1305 #define IMM_CAW as.ue->emitIVA(as.getClsRefSlot( \
1306 read_opcode_arg<int32_t>(as)))
1307 #define IMM_OA(ty) as.ue->emitByte(read_subop<ty>(as));
1308 #define IMM_AA as.ue->emitInt32(as.ue->mergeArray(read_litarray(as)))
1309 #define IMM_LAR encodeLocalRange(*as.ue, read_local_range(as))
1312 * There can currently be no more than one immvector per instruction,
1313 * and we need access to the size of the immediate vector for
1314 * NUM_POP_*, so the member vector guy exposes a vecImmStackValues
1315 * integer.
1317 #define IMM_ILA do { \
1318 std::vector<uint32_t> vecImm = read_itervec(as); \
1319 as.ue->emitInt32(vecImm.size() / 2); \
1320 for (auto& i : vecImm) { \
1321 as.ue->emitInt32(i); \
1323 } while (0)
1325 #define IMM_I32LA do { \
1326 std::vector<uint32_t> vecImm = read_argv(as); \
1327 as.ue->emitInt32(vecImm.size()); \
1328 for (auto i : vecImm) { \
1329 as.ue->emitInt32(i); \
1331 } while (0)
1333 #define IMM_BLA do { \
1334 std::vector<std::string> vecImm = read_jmpvector(as); \
1335 as.ue->emitInt32(vecImm.size()); \
1336 for (auto const& imm : vecImm) { \
1337 labelJumps.emplace_back(imm, as.ue->bcPos()); \
1338 as.ue->emitInt32(0); /* to be patched */ \
1340 } while (0)
1342 #define IMM_SLA do { \
1343 auto vecImm = read_sswitch_jmpvector(as); \
1344 as.ue->emitInt32(vecImm.size()); \
1345 for (auto const& pair : vecImm) { \
1346 as.ue->emitInt32(pair.first); \
1347 labelJumps.emplace_back(pair.second, as.ue->bcPos()); \
1348 as.ue->emitInt32(0); /* to be patched */ \
1350 } while(0)
1352 #define IMM_BA do { \
1353 labelJumps.emplace_back( \
1354 read_opcode_arg<std::string>(as), \
1355 as.ue->bcPos() \
1356 ); \
1357 as.ue->emitInt32(0); \
1358 } while (0)
1360 #define IMM_KA encode_member_key(read_member_key(as), *as.ue)
1362 #define NUM_PUSH_NOV 0
1363 #define NUM_PUSH_ONE(a) 1
1364 #define NUM_PUSH_TWO(a,b) 2
1365 #define NUM_PUSH_THREE(a,b,c) 3
1366 #define NUM_PUSH_INS_1(a) 1
1367 #define NUM_POP_NOV 0
1368 #define NUM_POP_ONE(a) 1
1369 #define NUM_POP_TWO(a,b) 2
1370 #define NUM_POP_THREE(a,b,c) 3
1371 #define NUM_POP_MFINAL immIVA[0]
1372 #define NUM_POP_F_MFINAL immIVA[1]
1373 #define NUM_POP_C_MFINAL (immIVA[0] + 1)
1374 #define NUM_POP_V_MFINAL NUM_POP_C_MFINAL
1375 #define NUM_POP_FMANY immIVA[0] /* number of arguments */
1376 #define NUM_POP_CVUMANY immIVA[0] /* number of arguments */
1377 #define NUM_POP_CMANY immIVA[0] /* number of arguments */
1378 #define NUM_POP_SMANY vecImmStackValues
1380 #define O(name, imm, pop, push, flags) \
1381 void parse_opcode_##name(AsmState& as) { \
1382 UNUSED uint32_t immIVA[4]; \
1383 UNUSED auto const thisOpcode = Op::name; \
1384 UNUSED const Offset curOpcodeOff = as.ue->bcPos(); \
1385 std::vector<std::pair<std::string, Offset> > labelJumps; \
1387 TRACE( \
1388 4, \
1389 "%d\t[%s] %s\n", \
1390 as.in.getLineNumber(), \
1391 as.displayStackDepth().c_str(), \
1392 #name \
1393 ); \
1395 /* Pretend the stack is reachable and empty, same as hphpc */ \
1396 if (as.currentStackDepth == nullptr) { \
1397 as.enterReachableRegion(0); \
1400 if (isFCallStar(Op##name)) { \
1401 as.endFpi(); \
1404 /* Other FCall* functions perform their own bounds checking. */ \
1405 if (Op##name == OpFCall || Op##name == OpFCallD || \
1406 Op##name == OpFCallAwait) { \
1407 as.fe->containsCalls = true; \
1410 as.ue->emitOp(Op##name); \
1412 UNUSED size_t immIdx = 0; \
1413 IMM_##imm; \
1415 int stackDelta = NUM_PUSH_##push - NUM_POP_##pop; \
1416 as.adjustStack(stackDelta); \
1418 if (isFPush(Op##name)) { \
1419 as.beginFpi(curOpcodeOff); \
1422 for (auto& kv : labelJumps) { \
1423 as.addLabelJump(kv.first, kv.second, curOpcodeOff); \
1426 /* Stack depth should be 0 after RetC or RetV. */ \
1427 if (thisOpcode == OpRetC || thisOpcode == OpRetV) { \
1428 as.enforceStackDepth(0); \
1431 /* Stack depth should be 1 after resume from suspend. */ \
1432 if (thisOpcode == OpCreateCont || thisOpcode == OpAwait || \
1433 thisOpcode == OpYield || thisOpcode == OpYieldK || \
1434 thisOpcode == OpYieldFromDelegate) { \
1435 as.enforceStackDepth(1); \
1438 /* Record source location. */ \
1439 as.ue->recordSourceLocation(as.srcLoc, curOpcodeOff); \
1441 if (Op##name == OpDefCls || Op##name == OpDefClsNop) { \
1442 as.defClsOffsets.emplace(immIVA[0], curOpcodeOff); \
1445 /* Retain stack depth after calls to exit */ \
1446 if ((instrFlags(thisOpcode) & InstrFlags::TF) && \
1447 (Op##name != OpExit)) { \
1448 as.enterUnreachableRegion(); \
1452 OPCODES
1454 #undef O
1456 #undef IMM_I64A
1457 #undef IMM_SA
1458 #undef IMM_RATA
1459 #undef IMM_DA
1460 #undef IMM_IVA
1461 #undef IMM_LA
1462 #undef IMM_CAR
1463 #undef IMM_CAW
1464 #undef IMM_BA
1465 #undef IMM_BLA
1466 #undef IMM_SLA
1467 #undef IMM_OA
1468 #undef IMM_MA
1469 #undef IMM_AA
1470 #undef IMM_VSA
1471 #undef IMM_KA
1472 #undef IMM_LAR
1474 #undef NUM_PUSH_NOV
1475 #undef NUM_PUSH_ONE
1476 #undef NUM_PUSH_TWO
1477 #undef NUM_PUSH_THREE
1478 #undef NUM_PUSH_POS_N
1479 #undef NUM_PUSH_INS_1
1480 #undef NUM_POP_NOV
1481 #undef NUM_POP_ONE
1482 #undef NUM_POP_TWO
1483 #undef NUM_POP_THREE
1484 #undef NUM_POP_POS_N
1485 #undef NUM_POP_MFINAL
1486 #undef NUM_POP_F_MFINAL
1487 #undef NUM_POP_C_MFINAL
1488 #undef NUM_POP_V_MFINAL
1489 #undef NUM_POP_FMANY
1490 #undef NUM_POP_CVUMANY
1491 #undef NUM_POP_CMANY
1492 #undef NUM_POP_SMANY
1494 void initialize_opcode_map() {
1495 #define O(name, imm, pop, push, flags) \
1496 opcode_parsers[#name] = parse_opcode_##name;
1497 OPCODES
1498 #undef O
1501 struct Initializer {
1502 Initializer() { initialize_opcode_map(); }
1503 } initializer;
1505 //////////////////////////////////////////////////////////////////////
1508 * long-string-literal: <string>
1510 * `long-string-literal' is a python-style longstring. See
1511 * readLongString for more details.
1513 String parse_long_string(AsmState& as) {
1514 as.in.skipWhitespace();
1516 std::vector<char> buffer;
1517 if (!as.in.readLongString(buffer)) {
1518 as.error("expected \"\"\"-string of serialized php data");
1520 if (buffer.empty()) {
1521 as.error("empty php serialized data is not a valid php object");
1524 // String wants a null, and dereferences one past the size we give
1525 // it.
1526 buffer.push_back('\0');
1527 return String(&buffer[0], buffer.size() - 1, CopyString);
1531 * maybe-long-string-literal : long-string-literal
1535 String parse_maybe_long_string(AsmState& as) {
1536 as.in.skipWhitespace();
1538 std::vector<char> buffer;
1539 if (!as.in.readLongString(buffer)) {
1540 return StrNR(staticEmptyString());
1542 if (buffer.empty()) {
1543 return StrNR(staticEmptyString());
1546 // String wants a null, and dereferences one past the size we give
1547 // it.
1548 buffer.push_back('\0');
1549 return String(&buffer[0], buffer.size() - 1, CopyString);
1553 * php-serialized : long-string-literal
1556 * `long-string-literal' is a python-style longstring. See
1557 * readLongString for more details.
1559 * Returns a Variant representing the serialized data. It's up to the
1560 * caller to make sure it is a legal literal.
1562 Variant parse_php_serialized(AsmState& as) {
1563 return unserialize_from_string(
1564 parse_long_string(as),
1565 VariableUnserializer::Type::Internal
1570 * maybe-php-serialized : maybe-long-string-literal
1573 Variant parse_maybe_php_serialized(AsmState& as) {
1574 auto s = parse_maybe_long_string(as);
1575 if (!s.empty()) {
1576 return unserialize_from_string(s, VariableUnserializer::Type::Internal);
1578 return Variant();
1582 * directive-numiters : integer ';'
1585 void parse_numiters(AsmState& as) {
1586 if (as.numItersSet) {
1587 as.error("only one .numiters directive may appear in a given function");
1589 int32_t count = read_opcode_arg<int32_t>(as);
1590 as.numItersSet = true;
1591 as.fe->setNumIterators(count);
1592 as.in.expectWs(';');
1596 * directive-numclsrefslots : integer ';'
1599 void parse_numclsrefslots(AsmState& as) {
1600 if (as.numClsRefSlotsSet) {
1601 as.error("only one .numclsrefslots directive may appear "
1602 "in a given function");
1604 int32_t count = read_opcode_arg<int32_t>(as);
1605 as.numClsRefSlotsSet = true;
1606 as.fe->setNumClsRefSlots(count);
1607 as.in.expectWs(';');
1611 * directive-declvars : var-name* ';'
1614 * Variables are usually allocated when first seen, but
1615 * declvars can be used to preallocate varibles for when
1616 * the exact assignment matters (like for closures).
1618 void parse_declvars(AsmState& as) {
1619 while (true) {
1620 as.in.skipWhitespace();
1621 std::string var;
1622 if (as.in.readQuotedStr(var) || as.in.readword(var)) {
1623 as.getLocalId(var);
1625 else {
1626 break;
1629 as.in.expectWs(';');
1632 void parse_function_body(AsmState&, int nestLevel = 0);
1635 * directive-fault : identifier integer? '{' function-body
1638 void parse_fault(AsmState& as, int nestLevel) {
1639 const Offset start = as.ue->bcPos();
1641 std::string label;
1642 if (!as.in.readword(label)) {
1643 as.error("expected label name after .try_fault");
1645 int iterId = -1;
1646 as.in.skipWhitespace();
1647 if (as.in.peek() != '{') {
1648 iterId = read_opcode_arg<int32_t>(as);
1650 as.in.expectWs('{');
1651 parse_function_body(as, nestLevel + 1);
1653 auto& eh = as.fe->addEHEnt();
1654 eh.m_type = EHEnt::Type::Fault;
1655 eh.m_base = start;
1656 eh.m_past = as.ue->bcPos();
1657 eh.m_iterId = iterId;
1658 eh.m_end = kInvalidOffset;
1660 as.addLabelEHEnt(label, as.fe->ehtab.size() - 1);
1664 * directive-catch : identifier integer? '{' function-body
1667 void parse_catch(AsmState& as, int nestLevel) {
1668 const Offset start = as.ue->bcPos();
1670 std::string label;
1671 if (!as.in.readword(label)) {
1672 as.error("expected label name after .try_catch");
1674 int iterId = -1;
1675 as.in.skipWhitespace();
1676 if (as.in.peek() != '{') {
1677 iterId = read_opcode_arg<int32_t>(as);
1679 as.in.expectWs('{');
1680 parse_function_body(as, nestLevel + 1);
1682 auto& eh = as.fe->addEHEnt();
1683 eh.m_type = EHEnt::Type::Catch;
1684 eh.m_base = start;
1685 eh.m_past = as.ue->bcPos();
1686 eh.m_iterId = iterId;
1687 eh.m_end = kInvalidOffset;
1689 as.addLabelEHEnt(label, as.fe->ehtab.size() - 1);
1693 * directive-try-catch : integer? '{' function-body ".catch" '{' function-body
1696 void parse_try_catch(AsmState& as, int nestLevel) {
1697 const Offset start = as.ue->bcPos();
1699 int iterId = -1;
1700 as.in.skipWhitespace();
1701 if (as.in.peek() != '{') {
1702 iterId = read_opcode_arg<int32_t>(as);
1705 // Emit try body.
1706 as.in.expectWs('{');
1707 parse_function_body(as, nestLevel + 1);
1708 if (!as.isUnreachable()) {
1709 as.error("expected .try region to not fall-thru");
1712 const Offset handler = as.ue->bcPos();
1714 // Emit catch body.
1715 as.enterReachableRegion(0);
1716 as.ue->emitOp(OpCatch);
1717 as.adjustStack(1);
1718 as.enforceStackDepth(1);
1720 std::string word;
1721 as.in.skipWhitespace();
1722 if (!as.in.readword(word) || word != ".catch") {
1723 as.error("expected .catch directive after .try");
1725 as.in.skipWhitespace();
1726 as.in.expectWs('{');
1727 parse_function_body(as, nestLevel + 1);
1729 const Offset end = as.ue->bcPos();
1731 auto& eh = as.fe->addEHEnt();
1732 eh.m_type = EHEnt::Type::Catch;
1733 eh.m_base = start;
1734 eh.m_past = handler;
1735 eh.m_iterId = iterId;
1736 eh.m_handler = handler;
1737 eh.m_end = end;
1741 * directive-srcloc : line_no ':' chr_no ',' line_no ':' chr_no ';'
1743 * line_no : integer
1745 * chr_no : integer
1748 * Record that subsequent bytecodes are at the source location indicated by the
1749 * range of inline numbers and character positions specified.
1751 void parse_srcloc(AsmState& as, int /*nestLevel*/) {
1752 auto const line0 = as.in.readint();
1753 as.in.expectWs(':');
1754 auto const char0 = as.in.readint();
1755 as.in.expectWs(',');
1756 auto const line1 = as.in.readint();
1757 as.in.expectWs(':');
1758 auto const char1 = as.in.readint();
1759 as.in.expectWs(';');
1761 as.srcLoc = Location::Range(line0, char0, line1, char1);
1765 * directive-static : '$' local_name = long-string-literal ';'
1768 * Record that the function contains a static named local_name along with an
1769 * associated initializer.
1771 void parse_static(AsmState& as) {
1772 Func::SVInfo svInfo;
1773 std::string name;
1774 String init;
1776 as.in.expectWs('$');
1777 if (!as.in.readword(name)) {
1778 as.error("Statics must be named");
1780 svInfo.name = makeStaticString(name);
1781 as.fe->staticVars.push_back(svInfo);
1783 as.in.expectWs(';');
1787 * directive-doccomment : long-string-literal ';'
1791 void parse_func_doccomment(AsmState& as) {
1792 auto const doc = parse_long_string(as);
1793 as.in.expectWs(';');
1795 as.fe->docComment = makeStaticString(doc);
1799 * function-body : fbody-line* '}'
1802 * fbody-line : ".numiters" directive-numiters
1803 * | ".numclsrefslots" directive-numclsrefslots
1804 * | ".declvars" directive-declvars
1805 * | ".try_fault" directive-fault
1806 * | ".try_catch" directive-catch
1807 * | ".try" directive-try-catch
1808 * | ".ismemoizewrapper"
1809 * | ".srcloc" directive-srcloc
1810 * | ".doc" directive-doccomment
1811 * | label-name
1812 * | opcode-line
1815 * label-name : identifier ':'
1818 * opcode-line : opcode-mnemonic <junk that depends on opcode> '\n'
1821 void parse_function_body(AsmState& as, int nestLevel /* = 0 */) {
1822 std::string word;
1823 for (;;) {
1824 as.in.skipWhitespace();
1825 if (as.in.peek() == '}') {
1826 as.in.getc();
1827 if (!nestLevel) {
1828 as.finishFunction();
1830 return;
1833 if (!as.in.readword(word)) {
1834 as.error("unexpected directive or opcode line in function body");
1836 if (word[0] == '.') {
1837 if (word == ".ismemoizewrapper") {
1838 as.fe->isMemoizeWrapper = true;
1839 as.in.expectWs(';');
1840 continue;
1842 if (word == ".numiters") { parse_numiters(as); continue; }
1843 if (word == ".declvars") { parse_declvars(as); continue; }
1844 if (word == ".numclsrefslots") { parse_numclsrefslots(as); continue; }
1845 if (word == ".try_fault") { parse_fault(as, nestLevel); continue; }
1846 if (word == ".try_catch") { parse_catch(as, nestLevel); continue; }
1847 if (word == ".try") { parse_try_catch(as, nestLevel); continue; }
1848 if (word == ".srcloc") { parse_srcloc(as, nestLevel); continue; }
1849 if (word == ".static") { parse_static(as); continue; }
1850 if (word == ".doc") { parse_func_doccomment(as); continue; }
1851 as.error("unrecognized directive `" + word + "' in function");
1853 if (as.in.peek() == ':') {
1854 as.in.getc();
1855 as.addLabelTarget(word);
1856 continue;
1859 // Ok, it better be an opcode now.
1860 auto it = opcode_parsers.find(word);
1861 if (it == opcode_parsers.end()) {
1862 as.error("unrecognized opcode `" + word + "'");
1864 it->second(as);
1866 as.in.skipSpaceTab();
1867 if (as.in.peek() != '\n' &&
1868 as.in.peek() != '\r' &&
1869 as.in.peek() != '#' &&
1870 as.in.peek() != EOF) {
1871 as.error("too many arguments for opcode `" + word + "'");
1876 void parse_user_attribute(AsmState& as,
1877 UserAttributeMap& userAttrs) {
1878 auto name = read_litstr(as);
1879 as.in.expectWs('(');
1881 auto var = parse_php_serialized(as);
1883 as.in.expectWs(')');
1885 if (!var.isPHPArray()) {
1886 as.error("user attribute values must be arrays");
1889 userAttrs[name] =
1890 make_tv<KindOfArray>(ArrayData::GetScalarArray(std::move(var)));
1894 * attribute : attribute-name
1895 * | string-literal '(' long-string-literal ')'
1898 * attribute-list : empty
1899 * | '[' attribute* ']'
1902 * The `attribute-name' rule is context-sensitive; see as-shared.cpp.
1903 * The second attribute form is for user attributes and only applies
1904 * if attributeMap is non null.
1906 Attr parse_attribute_list(AsmState& as, AttrContext ctx,
1907 UserAttributeMap *userAttrs = nullptr,
1908 bool* isTop = nullptr) {
1909 as.in.skipWhitespace();
1910 int ret = AttrNone;
1911 if (ctx == AttrContext::Class || ctx == AttrContext::Func) {
1912 if (!SystemLib::s_inited) {
1913 ret |= AttrUnique | AttrPersistent | AttrBuiltin;
1916 if (as.in.peek() != '[') return Attr(ret);
1917 as.in.getc();
1919 std::string word;
1920 for (;;) {
1921 as.in.skipWhitespace();
1922 if (as.in.peek() == ']') break;
1923 if (as.in.peek() == '"' && userAttrs) {
1924 parse_user_attribute(as, *userAttrs);
1925 continue;
1927 if (!as.in.readword(word)) break;
1929 auto const abit = string_to_attr(ctx, word);
1930 if (abit) {
1931 ret |= *abit;
1932 continue;
1934 if (isTop && word == "nontop") {
1935 *isTop = false;
1936 continue;
1939 as.error("unrecognized attribute `" + word + "' in this context");
1941 as.in.expect(']');
1942 return Attr(ret);
1946 * type-info : empty
1947 * | '<' maybe-string-literal maybe-string-literal
1948 * type-flag* '>'
1950 * type-constraint : empty
1951 * | '<' maybe-string-literal
1952 * type-flag* '>'
1954 * This parses type-info if noUserType is false, type-constraint if true
1956 std::pair<const StringData *, TypeConstraint> parse_type_info(
1957 AsmState& as, bool noUserType = false) {
1958 as.in.skipWhitespace();
1959 if (as.in.peek() != '<') return {};
1960 as.in.getc();
1962 const StringData *userType = noUserType ? nullptr : read_maybe_litstr(as);
1963 const StringData *typeName = read_maybe_litstr(as);
1965 std::string word;
1966 auto flags = TypeConstraint::NoFlags;
1967 for (;;) {
1968 as.in.skipWhitespace();
1969 if (as.in.peek() == '>') break;
1970 if (!as.in.readword(word)) break;
1972 auto const abit = string_to_type_flag(word);
1973 if (abit) {
1974 flags = flags | *abit;
1975 continue;
1978 as.error("unrecognized type flag `" + word + "' in this context");
1980 as.in.expect('>');
1981 return std::make_pair(userType, TypeConstraint{typeName, flags});
1983 TypeConstraint parse_type_constraint(AsmState& as) {
1984 return parse_type_info(as, true).second;
1989 * parameter-list : '(' param-name-list ')'
1992 * param-name-list : empty
1993 * | param-name ',' param-name-list
1996 * param-name : '$' identifier dv-initializer
1997 * | '&' '$' identifier dv-initializer
2000 * dv-initializer : empty
2001 * | '=' identifier arg-default
2004 * arg-default : empty
2005 * | '(' long-string-literal ')'
2008 void parse_parameter_list(AsmState& as) {
2009 as.in.skipWhitespace();
2010 if (as.in.peek() != '(') return;
2011 as.in.getc();
2013 bool seenVariadic = false;
2014 bool seenRef = false;
2016 for (;;) {
2017 FuncEmitter::ParamInfo param;
2018 param.byRef = false;
2019 param.inout = false;
2021 as.in.skipWhitespace();
2022 int ch = as.in.peek();
2023 if (ch == ')') { as.in.getc(); break; } // allow empty param lists
2025 if (seenVariadic) {
2026 as.error("functions can only have one variadic argument");
2029 parse_attribute_list(as, AttrContext::Parameter, &param.userAttributes);
2031 if (ch == '.') {
2032 as.in.getc();
2033 if (as.in.getc() != '.' ||
2034 as.in.getc() != '.') {
2035 as.error("expecting '...'");
2038 seenVariadic = true;
2039 param.variadic = true;
2040 as.fe->attrs |= AttrVariadicParam;
2043 if (as.in.tryConsume("inout")) {
2044 if (seenVariadic) {
2045 as.error("inout parameters cannot be variadic");
2047 if (seenRef) {
2048 as.error("functions cannot contain both inout and ref parameters");
2050 param.inout = true;
2051 as.fe->attrs |= AttrTakesInOutParams;
2054 std::tie(param.userType, param.typeConstraint) = parse_type_info(as);
2056 as.in.skipWhitespace();
2057 ch = as.in.getc();
2059 if (ch == '&') {
2060 if (param.inout) {
2061 as.error("parameters cannot be marked both inout and ref");
2063 if (as.fe->attrs & AttrTakesInOutParams) {
2064 as.error("functions cannot contain both inout and ref parameters");
2066 seenRef = true;
2067 param.byRef = true;
2068 ch = as.in.getc();
2070 if (ch != '$') {
2071 as.error("function parameters must have a $ prefix");
2073 std::string name;
2074 if (!as.in.readword(name)) {
2075 as.error("expected parameter name after $");
2078 as.in.skipWhitespace();
2079 ch = as.in.getc();
2080 if (ch == '=') {
2081 if (seenVariadic) {
2082 as.error("variadic parameter cannot have dv-initializer");
2085 std::string label;
2086 if (!as.in.readword(label)) {
2087 as.error("expected label name for dv-initializer");
2089 as.addLabelDVInit(label, as.fe->params.size());
2091 as.in.skipWhitespace();
2092 ch = as.in.getc();
2093 if (ch == '(') {
2094 String str = parse_long_string(as);
2095 param.phpCode = makeStaticString(str);
2096 TypedValue tv;
2097 tvWriteUninit(tv);
2098 if (str.size() == 4) {
2099 if (!strcasecmp("null", str.data())) {
2100 tvWriteNull(tv);
2101 } else if (!strcasecmp("true", str.data())) {
2102 tv = make_tv<KindOfBoolean>(true);
2104 } else if (str.size() == 5 && !strcasecmp("false", str.data())) {
2105 tv = make_tv<KindOfBoolean>(false);
2107 if (tv.m_type != KindOfUninit) {
2108 param.defaultValue = tv;
2110 as.in.expectWs(')');
2111 as.in.skipWhitespace();
2112 ch = as.in.getc();
2116 as.fe->appendParam(makeStaticString(name), param);
2118 if (ch == ')') break;
2119 if (ch != ',') as.error("expected , between parameter names");
2123 void parse_function_flags(AsmState& as) {
2124 as.in.skipWhitespace();
2125 std::string flag;
2126 for (;;) {
2127 if (as.in.peek() == '{') break;
2128 if (!as.in.readword(flag)) break;
2130 if (flag == "isGenerator") {
2131 as.fe->isGenerator = true;
2132 } else if (flag == "isAsync") {
2133 as.fe->isAsync = true;
2134 } else if (flag == "isClosureBody") {
2135 as.fe->isClosureBody = true;
2136 } else if (flag == "isPairGenerator") {
2137 as.fe->isPairGenerator = true;
2138 } else {
2139 as.error("Unexpected function flag \"" + flag + "\"");
2145 * line-range : "(" integer "," integer ")"
2148 bool parse_line_range(AsmState& as, int& line0, int& line1) {
2149 as.in.skipWhitespace();
2150 if (as.in.peek() != '(') {
2151 line0 = as.in.getLineNumber();
2152 line1 = as.in.getLineNumber() + 1;
2153 return false;
2155 as.in.getc();
2156 line0 = as.in.readint();
2157 as.in.expectWs(',');
2158 line1 = as.in.readint();
2159 as.in.expectWs(')');
2160 return true;
2164 * directive-function : attribute-list ?line-range type-info identifier
2165 * parameter-list function-flags '{' function-body
2168 void parse_function(AsmState& as) {
2169 if (!as.emittedPseudoMain) {
2170 as.error(".function blocks must all follow the .main block");
2173 as.in.skipWhitespace();
2175 bool isTop = true;
2177 UserAttributeMap userAttrs;
2178 Attr attrs = parse_attribute_list(as, AttrContext::Func, &userAttrs, &isTop);
2180 if(!isTop && as.emittedTopLevelFunc) {
2181 as.error("All top level functions must be defined after any "
2182 "non-top functions");
2185 as.emittedTopLevelFunc |= isTop;
2187 int line0;
2188 int line1;
2189 parse_line_range(as, line0, line1);
2191 auto typeInfo = parse_type_info(as);
2192 std::string name;
2193 if (!as.in.readname(name)) {
2194 as.error(".function must have a name");
2197 as.fe = as.ue->newFuncEmitter(makeStaticString(name));
2198 as.fe->init(line0, line1, as.ue->bcPos(), attrs, isTop, 0);
2199 std::tie(as.fe->retUserType, as.fe->retTypeConstraint) = typeInfo;
2200 as.fe->userAttributes = userAttrs;
2202 parse_parameter_list(as);
2203 parse_function_flags(as);
2205 as.in.expectWs('{');
2207 as.srcLoc = Location::Range{-1,-1,-1,-1};
2208 parse_function_body(as);
2212 * directive-method : attribute-list ?line-range type-info identifier
2213 * parameter-list function-flags '{' function-body
2216 void parse_method(AsmState& as) {
2217 as.in.skipWhitespace();
2219 UserAttributeMap userAttrs;
2220 Attr attrs = parse_attribute_list(as, AttrContext::Func, &userAttrs);
2222 int line0;
2223 int line1;
2224 parse_line_range(as, line0, line1);
2226 auto typeInfo = parse_type_info(as);
2227 std::string name;
2228 if (!as.in.readname(name)) {
2229 as.error(".method requires a method name");
2232 as.fe = as.ue->newMethodEmitter(makeStaticString(name), as.pce);
2233 as.pce->addMethod(as.fe);
2234 as.fe->init(line0, line1,
2235 as.ue->bcPos(), attrs, false, 0);
2236 std::tie(as.fe->retUserType, as.fe->retTypeConstraint) = typeInfo;
2237 as.fe->userAttributes = userAttrs;
2239 parse_parameter_list(as);
2240 parse_function_flags(as);
2242 as.in.expectWs('{');
2244 as.srcLoc = Location::Range{-1,-1,-1,-1};
2245 parse_function_body(as);
2249 * member-tv-initializer : '=' php-serialized ';'
2250 * | '=' uninit ';'
2251 * | ';'
2254 TypedValue parse_member_tv_initializer(AsmState& as) {
2255 as.in.skipWhitespace();
2257 TypedValue tvInit;
2258 tvWriteNull(tvInit); // Don't confuse Variant with uninit data
2260 int what = as.in.getc();
2261 if (what == '=') {
2262 as.in.skipWhitespace();
2264 if (as.in.peek() != '\"') {
2265 // It might be an uninitialized property/constant.
2266 if (!as.in.tryConsume("uninit")) {
2267 as.error("Expected \"\"\" or \"uninit\" after '=' in "
2268 "const/property initializer");
2270 as.in.expectWs(';');
2271 tvWriteUninit(tvInit);
2272 return tvInit;
2275 tvAsVariant(&tvInit) = parse_php_serialized(as);
2276 if (tvInit.m_type == KindOfObject) {
2277 as.error("property initializer can't be an object");
2278 } else if (tvInit.m_type == KindOfResource) {
2279 as.error("property initializer can't be a resource");
2280 } else {
2281 tvAsVariant(&tvInit).setEvalScalar();
2283 as.in.expectWs(';');
2284 } else if (what == ';') {
2285 // already null
2286 } else {
2287 as.error("expected '=' or ';' after property name");
2290 return tvInit;
2294 * directive-property : attribute-list maybe-long-string-literal type-info
2295 * identifier member-tv-initializer
2298 * Define a property with an associated type and heredoc.
2300 void parse_property(AsmState& as) {
2301 as.in.skipWhitespace();
2303 Attr attrs = parse_attribute_list(as, AttrContext::Prop);
2305 auto const heredoc = makeStaticString(parse_maybe_long_string(as));
2306 auto const userTy = parse_type_info(as, false).first;
2307 auto const userTyStr = userTy ? userTy : staticEmptyString();
2309 std::string name;
2310 if (!as.in.readword(name)) {
2311 as.error("expected name for property");
2314 TypedValue tvInit = parse_member_tv_initializer(as);
2315 as.pce->addProperty(makeStaticString(name),
2316 attrs,
2317 userTyStr,
2318 heredoc,
2319 &tvInit,
2320 RepoAuthType{});
2324 * const-flags : isType
2327 * directive-const : identifier const-flags member-tv-initializer
2328 * | identifier const-flags ';'
2331 void parse_constant(AsmState& as) {
2332 as.in.skipWhitespace();
2334 std::string name;
2335 if (!as.in.readword(name)) {
2336 as.error("expected name for constant");
2339 bool isType = as.in.tryConsume("isType");
2340 as.in.skipWhitespace();
2342 if (as.in.peek() == ';') {
2343 as.in.getc();
2344 as.pce->addAbstractConstant(makeStaticString(name),
2345 staticEmptyString(),
2346 isType);
2347 return;
2350 TypedValue tvInit = parse_member_tv_initializer(as);
2351 as.pce->addConstant(makeStaticString(name),
2352 staticEmptyString(), &tvInit,
2353 staticEmptyString(),
2354 isType);
2358 * directive-default-ctor : ';'
2361 * No-op, for backward compat
2363 void parse_default_ctor(AsmState& as) {
2364 assert(!as.fe && as.pce);
2365 as.in.expectWs(';');
2369 * directive-use : identifier+ ';'
2370 * | identifier+ '{' use-line* '}'
2373 * use-line : use-name-ref "insteadof" identifier+ ';'
2374 * | use-name-ref "as" attribute-list identifier ';'
2375 * | use-name-ref "as" attribute-list ';'
2378 void parse_use(AsmState& as) {
2379 std::vector<std::string> usedTraits;
2380 for (;;) {
2381 std::string name;
2382 if (!as.in.readword(name)) break;
2383 usedTraits.push_back(name);
2385 if (usedTraits.empty()) {
2386 as.error(".use requires a trait name");
2389 for (size_t i = 0; i < usedTraits.size(); ++i) {
2390 as.pce->addUsedTrait(makeStaticString(usedTraits[i]));
2392 as.in.skipWhitespace();
2393 if (as.in.peek() != '{') {
2394 as.in.expect(';');
2395 return;
2397 as.in.getc();
2399 for (;;) {
2400 as.in.skipWhitespace();
2401 if (as.in.peek() == '}') break;
2403 std::string traitName;
2404 std::string identifier;
2405 if (!as.in.readword(traitName)) {
2406 as.error("expected identifier for line in .use block");
2408 as.in.skipWhitespace();
2409 if (as.in.peek() == ':') {
2410 as.in.getc();
2411 as.in.expect(':');
2412 if (!as.in.readword(identifier)) {
2413 as.error("expected identifier after ::");
2415 } else {
2416 identifier = traitName;
2417 traitName.clear();
2420 if (as.in.tryConsume("as")) {
2421 Attr attrs = parse_attribute_list(as, AttrContext::TraitImport);
2422 std::string alias;
2423 if (!as.in.readword(alias)) {
2424 if (attrs != AttrNone) {
2425 alias = identifier;
2426 } else {
2427 as.error("expected identifier or attribute list after "
2428 "`as' in .use block");
2432 as.pce->addTraitAliasRule(PreClass::TraitAliasRule(
2433 makeStaticString(traitName),
2434 makeStaticString(identifier),
2435 makeStaticString(alias),
2436 attrs));
2437 } else if (as.in.tryConsume("insteadof")) {
2438 if (traitName.empty()) {
2439 as.error("Must specify TraitName::name when using a trait insteadof");
2442 PreClass::TraitPrecRule precRule(
2443 makeStaticString(traitName),
2444 makeStaticString(identifier));
2446 bool addedOtherTraits = false;
2447 std::string whom;
2448 while (as.in.readword(whom)) {
2449 precRule.addOtherTraitName(makeStaticString(whom));
2450 addedOtherTraits = true;
2452 if (!addedOtherTraits) {
2453 as.error("one or more trait names expected after `insteadof'");
2456 as.pce->addTraitPrecRule(precRule);
2457 } else {
2458 as.error("expected `as' or `insteadof' in .use block");
2461 as.in.expectWs(';');
2464 as.in.expect('}');
2468 * directive-enum_ty : type-constraint ';'
2472 void parse_enum_ty(AsmState& as) {
2473 if (as.enumTySet) {
2474 as.error("only one .enum_ty directive may appear in a given class");
2476 as.enumTySet = true;
2478 as.pce->setEnumBaseTy(parse_type_constraint(as));
2480 as.in.expectWs(';');
2484 * directive-require : 'extends' '<' indentifier '>' ';'
2485 * | 'implements' '<' indentifier '>' ';'
2489 void parse_require(AsmState& as) {
2490 as.in.skipWhitespace();
2492 bool extends = as.in.tryConsume("extends");
2493 if (!extends && !as.in.tryConsume("implements")) {
2494 as.error(".require should be extends or implements");
2497 as.in.expectWs('<');
2498 std::string name;
2499 if (!as.in.readname(name)) {
2500 as.error(".require expects a class or interface name");
2502 as.in.expectWs('>');
2504 as.pce->addClassRequirement(PreClass::ClassRequirement(
2505 makeStaticString(name), extends
2508 as.in.expectWs(';');
2512 * directive-doccomment : long-string-literal ';'
2516 void parse_cls_doccomment(AsmState& as) {
2517 auto const doc = parse_long_string(as);
2518 as.in.expectWs(';');
2520 as.pce->setDocComment(makeStaticString(doc));
2524 * class-body : class-body-line* '}'
2527 * class-body-line : ".method" directive-method
2528 * | ".property" directive-property
2529 * | ".const" directive-const
2530 * | ".use" directive-use
2531 * | ".default_ctor" directive-default-ctor
2532 * | ".enum_ty" directive-enum-ty
2533 * | ".require" directive-require
2534 * | ".doc" directive-doccomment
2537 void parse_class_body(AsmState& as) {
2538 if (!as.emittedPseudoMain) {
2539 as.error(".class blocks must all follow the .main block");
2542 std::string directive;
2543 while (as.in.readword(directive)) {
2544 if (directive == ".method") { parse_method(as); continue; }
2545 if (directive == ".property") { parse_property(as); continue; }
2546 if (directive == ".const") { parse_constant(as); continue; }
2547 if (directive == ".use") { parse_use(as); continue; }
2548 if (directive == ".default_ctor") { parse_default_ctor(as); continue; }
2549 if (directive == ".enum_ty") { parse_enum_ty(as); continue; }
2550 if (directive == ".require") { parse_require(as); continue; }
2551 if (directive == ".doc") { parse_cls_doccomment(as); continue; }
2553 as.error("unrecognized directive `" + directive + "' in class");
2555 as.in.expect('}');
2558 PreClass::Hoistable compute_hoistable(AsmState& as,
2559 const std::string &name,
2560 const std::string &parentName) {
2561 auto &pce = *as.pce;
2562 bool system = pce.attrs() & AttrBuiltin;
2564 if (pce.methods().size() == 1 && pce.methods()[0]->isClosureBody) {
2565 return PreClass::NotHoistable;
2567 if (!system) {
2568 if (!pce.interfaces().empty() ||
2569 !pce.usedTraits().empty() ||
2570 !pce.requirements().empty() ||
2571 (pce.attrs() & AttrEnum)) {
2572 return PreClass::Mergeable;
2574 if (!parentName.empty() && !as.hoistables.count(parentName)) {
2575 return PreClass::MaybeHoistable;
2578 as.hoistables.insert(name);
2580 return pce.attrs() & AttrUnique ?
2581 PreClass::AlwaysHoistable : PreClass::MaybeHoistable;
2585 * directive-class : ?"top" attribute-list identifier ?line-range
2586 * extension-clause implements-clause '{' class-body
2589 * extension-clause : empty
2590 * | "extends" identifier
2593 * implements-clause : empty
2594 * | "implements" '(' identifier* ')'
2598 void parse_class(AsmState& as) {
2599 as.in.skipWhitespace();
2601 bool isTop = true;
2603 UserAttributeMap userAttrs;
2604 Attr attrs = parse_attribute_list(as, AttrContext::Class, &userAttrs, &isTop);
2605 std::string name;
2606 if (!as.in.readname(name)) {
2607 as.error(".class must have a name");
2609 if (ParserBase::IsAnonymousClassName(name)) {
2610 // refresh names of anonymous classes
2611 // to make sure they are unique
2612 auto p = name.find(';');
2613 if (p != std::string::npos) {
2614 name = name.substr(0, p);
2615 name = HPHP::NewAnonymousClassName(name);
2619 int line0;
2620 int line1;
2621 parse_line_range(as, line0, line1);
2623 std::string parentName;
2624 if (as.in.tryConsume("extends")) {
2625 if (!as.in.readname(parentName)) {
2626 as.error("expected parent class name after `extends'");
2630 std::vector<std::string> ifaces;
2631 if (as.in.tryConsume("implements")) {
2632 as.in.expectWs('(');
2633 std::string word;
2634 while (as.in.readname(word)) {
2635 ifaces.push_back(word);
2637 as.in.expect(')');
2640 auto off = folly::get_default(as.defClsOffsets, as.ue->numPreClasses(),
2641 as.ue->bcPos());
2643 as.pce = as.ue->newBarePreClassEmitter(name, PreClass::MaybeHoistable);
2644 as.pce->init(line0,
2645 line1,
2646 off,
2647 attrs,
2648 makeStaticString(parentName),
2649 staticEmptyString());
2650 for (auto const& iface : ifaces) {
2651 as.pce->addInterface(makeStaticString(iface));
2653 as.pce->setUserAttributes(userAttrs);
2655 as.in.expectWs('{');
2656 parse_class_body(as);
2658 as.pce->setHoistable(
2659 isTop ? compute_hoistable(as, name, parentName) : PreClass::NotHoistable
2662 as.finishClass();
2666 * directive-filepath : quoted-string-literal ';'
2669 void parse_filepath(AsmState& as) {
2670 auto const str = read_litstr(as);
2671 as.ue->m_filepath = str;
2672 as.in.expectWs(';');
2676 * directive-main : ?line-range '{' function-body
2679 void parse_main(AsmState& as) {
2680 if (as.emittedPseudoMain) {
2681 if (!SystemLib::s_inited) {
2682 as.error(".main found in systemlib");
2683 } else {
2684 as.error("Multiple .main directives found");
2688 int line0;
2689 int line1;
2690 bool fromSrcLoc = parse_line_range(as, line0, line1);
2692 as.in.expectWs('{');
2694 as.ue->initMain(line0, line1);
2695 as.fe = as.ue->getMain();
2696 as.emittedPseudoMain = true;
2697 if (fromSrcLoc) {
2698 as.srcLoc = Location::Range{line0,0,line1,0};
2699 } else {
2700 as.srcLoc = Location::Range{-1,-1,-1,-1};
2702 parse_function_body(as);
2706 * directive-adata : identifier '=' php-serialized ';'
2709 void parse_adata(AsmState& as) {
2710 as.in.skipWhitespace();
2711 std::string dataLabel;
2712 if (!as.in.readword(dataLabel)) {
2713 as.error("expected name for .adata");
2715 if (as.adataMap.count(dataLabel)) {
2716 as.error("duplicate adata label name " + dataLabel);
2719 as.in.expectWs('=');
2720 auto var = parse_php_serialized(as);
2721 if (!var.isArray()) {
2722 as.error(".adata only supports serialized arrays");
2724 auto const data = ArrayData::GetScalarArray(std::move(var));
2725 as.ue->mergeArray(data);
2726 as.adataMap[dataLabel] = data;
2728 as.in.expectWs(';');
2732 * directive-alias : attribute-list identifier '=' type-constraint
2733 * maybe-php-serialized ';'
2736 * We represent alias type information using the syntax for
2737 * TypeConstraints. We populate the name and nullable field of the
2738 * alias directly from the specified type constraint and derive the
2739 * AnnotType from the compute AnnotType in the constraint.
2741 * Following the type-constraint we encode the serialized type structure
2742 * corresponding to this alias.
2744 void parse_alias(AsmState& as) {
2745 as.in.skipWhitespace();
2747 TypeAlias record;
2748 Attr attrs = parse_attribute_list(as, AttrContext::Alias, &record.userAttrs);
2749 std::string name;
2750 if (!as.in.readname(name)) {
2751 as.error(".alias must have a name");
2753 as.in.expectWs('=');
2755 TypeConstraint ty = parse_type_constraint(as);
2756 Variant ts = parse_maybe_php_serialized(as);
2758 if (ts.isInitialized() && !ts.isArray()) {
2759 as.error(".alias must have an array type structure");
2762 const StringData* typeName = ty.typeName();
2763 if (!typeName) typeName = staticEmptyString();
2764 const StringData* sname = makeStaticString(name);
2765 // Merge to ensure namedentity creation, according to
2766 // emitTypedef in emitter.cpp
2767 as.ue->mergeLitstr(sname);
2768 as.ue->mergeLitstr(typeName);
2770 record.name = sname;
2771 record.value = typeName;
2772 record.type = typeName->empty() ? AnnotType::Mixed : ty.type();
2773 record.nullable = (ty.flags() & TypeConstraint::Nullable) != 0;
2774 record.attrs = attrs;
2775 if (ts.isInitialized()) {
2776 record.typeStructure = ArrNR(ArrayData::GetScalarArray(std::move(ts)));
2778 as.ue->addTypeAlias(record);
2780 as.in.expectWs(';');
2783 void parse_hh_file(AsmState& as) {
2784 as.in.skipWhitespace();
2785 std::string word;
2786 if (!as.in.readword(word)) {
2787 as.error(".hh_file must have a value");
2789 as.ue->m_isHHFile = word == "1";
2791 if (!as.ue->m_isHHFile && word != "0") {
2792 as.error(".hh_file must be either 1 or 0");
2795 as.in.expectWs(';');
2798 void parse_strict(AsmState& as) {
2799 as.in.skipWhitespace();
2800 std::string word;
2801 if (!as.in.readword(word)) {
2802 as.error(".strict must have a value");
2804 if (!RuntimeOption::PHP7_ScalarTypes) {
2805 as.error("Cannot set .strict without PHP7 ScalarTypes");
2808 as.ue->m_useStrictTypes = as.ue->m_useStrictTypesForBuiltins = word == "1";
2810 if (!as.ue->m_useStrictTypes && word != "0") {
2811 as.error("Strict types must be either 1 or 0");
2814 as.in.expectWs(';');
2817 void parse_symbol_refs(
2818 AsmState& as,
2819 void (AsmCallbacks::*onSymbol)(const std::string&)
2821 as.in.expectWs('{');
2823 if (as.callbacks) {
2824 while (true) {
2825 as.in.skipWhitespace();
2826 std::string symbol;
2827 as.in.consumePred(!boost::is_any_of(" \t\r\n#}"),
2828 std::back_inserter(symbol));
2829 if (symbol.empty()) {
2830 break;
2832 (as.callbacks->*onSymbol)(symbol);
2834 } else {
2835 while (as.in.peek() != '}') {
2836 as.in.skipWhitespace();
2837 if (!as.in.skipPred(!boost::is_any_of("#}"))) break;
2841 as.in.expect('}');
2844 void parse_includes(AsmState& as) {
2845 parse_symbol_refs(as, &AsmCallbacks::onInclude);
2848 void parse_constant_refs(AsmState& as) {
2849 parse_symbol_refs(as, &AsmCallbacks::onConstantRef);
2852 void parse_function_refs(AsmState& as) {
2853 parse_symbol_refs(as, &AsmCallbacks::onFunctionRef);
2856 void parse_class_refs(AsmState& as) {
2857 parse_symbol_refs(as, &AsmCallbacks::onClassRef);
2861 * directive-metadata : name = bareword ';'
2862 * | name = quoted-string-literal ';'
2863 * | name = long-string-literal ';'
2866 void parse_metadata(AsmState& as) {
2867 std::string key;
2868 if (as.in.readname(key)) {
2869 as.in.expectWs('=');
2870 as.in.skipWhitespace();
2871 auto const value = [&] () -> const StringData* {
2872 auto ret = parse_maybe_long_string(as);
2873 if (!ret.empty()) return makeStaticString(ret);
2874 std::string tmp;
2875 if (as.in.readQuotedStr(tmp) || as.in.readword(tmp)) {
2876 return makeStaticString(tmp);
2878 return nullptr;
2879 }();
2880 if (value) {
2881 as.in.expect(';');
2882 as.ue->m_metaData.emplace(
2883 makeStaticString(key),
2884 make_tv<KindOfPersistentString>(value)
2886 return;
2889 as.error(".metadata expects a key = value pair");
2893 * asm-file : asm-tld* <EOF>
2896 * asm-tld : ".filepath" directive-filepath
2897 * | ".main" directive-main
2898 * | ".function" directive-function
2899 * | ".adata" directive-adata
2900 * | ".class" directive-class
2901 * | ".alias" directive-alias
2902 * | ".strict" directive-strict
2903 * | ".includes directive-filepaths
2904 * | ".constant_refs directive-symbols
2905 * | ".function_refs directive-symbols
2906 * | ".class_refs directive-symbols
2907 * | ".metadata directive-meta-data
2910 void parse(AsmState& as) {
2911 as.in.skipWhitespace();
2912 std::string directive;
2913 if (!SystemLib::s_inited) {
2915 * The SystemLib::s_hhas_unit is required to be merge-only,
2916 * and we create the source by concatenating separate .hhas files
2917 * Rather than choosing one to have the .main directive, we just
2918 * generate a trivial pseudoMain automatically.
2920 as.ue->addTrivialPseudoMain();
2921 as.emittedPseudoMain = true;
2924 while (as.in.readword(directive)) {
2925 if (directive == ".filepath") { parse_filepath(as) ; continue; }
2926 if (directive == ".main") { parse_main(as) ; continue; }
2927 if (directive == ".function") { parse_function(as) ; continue; }
2928 if (directive == ".adata") { parse_adata(as) ; continue; }
2929 if (directive == ".class") { parse_class(as) ; continue; }
2930 if (directive == ".alias") { parse_alias(as) ; continue; }
2931 if (directive == ".strict") { parse_strict(as) ; continue; }
2932 if (directive == ".hh_file") { parse_hh_file(as) ; continue; }
2933 if (directive == ".includes") { parse_includes(as) ; continue; }
2934 if (directive == ".constant_refs") { parse_constant_refs(as) ; continue; }
2935 if (directive == ".function_refs") { parse_function_refs(as) ; continue; }
2936 if (directive == ".class_refs") { parse_class_refs(as) ; continue; }
2937 if (directive == ".metadata") { parse_metadata(as) ; continue; }
2939 as.error("unrecognized top-level directive `" + directive + "'");
2942 if (!as.emittedPseudoMain) {
2943 as.error("no .main found in hhas unit");
2949 //////////////////////////////////////////////////////////////////////
2951 std::unique_ptr<UnitEmitter> assemble_string(
2952 const char* code,
2953 int codeLen,
2954 const char* filename,
2955 const MD5& md5,
2956 bool swallowErrors,
2957 AsmCallbacks* callbacks
2959 auto ue = std::make_unique<UnitEmitter>(md5);
2960 StringData* sd = makeStaticString(filename);
2961 ue->m_filepath = sd;
2962 ue->m_useStrictTypes = RuntimeOption::EnableHipHopSyntax ||
2963 !RuntimeOption::PHP7_ScalarTypes;
2965 try {
2966 auto const mode = std::istringstream::binary | std::istringstream::in;
2967 std::istringstream instr(std::string(code, codeLen), mode);
2968 AsmState as(instr, callbacks);
2969 as.ue = ue.get();
2970 parse(as);
2971 } catch (const std::exception& e) {
2972 if (!swallowErrors) throw;
2973 ue = createFatalUnit(sd, md5, FatalOp::Runtime, makeStaticString(e.what()));
2976 return ue;
2979 AsmResult assemble_expression(UnitEmitter& ue, FuncEmitter* fe,
2980 int incomingStackDepth,
2981 const std::string& expr) {
2982 auto const mode = std::istringstream::binary | std::istringstream::in;
2983 std::stringstream sstr(expr + '}', mode);
2984 AsmState as(sstr);
2985 as.ue = &ue;
2986 as.fe = fe;
2987 as.initStackDepth.adjust(as, incomingStackDepth);
2988 parse_function_body(as, 1);
2989 as.finishSection();
2990 if (as.maxUnnamed >= 0) {
2991 as.error("Unnamed locals are not allowed in inline assembly");
2994 if (!as.currentStackDepth) return AsmResult::Unreachable;
2996 // If we fall off the end of the inline assembly, we're expected to
2997 // leave a single value on the stack, or leave the stack unchanged.
2998 if (!as.currentStackDepth->baseValue) {
2999 as.error("Unknown stack offset on exit from inline assembly");
3001 auto curStackDepth = as.currentStackDepth->absoluteDepth();
3002 if (curStackDepth == incomingStackDepth + 1) {
3003 return AsmResult::ValuePushed;
3005 if (curStackDepth != incomingStackDepth) {
3006 as.error("Inline assembly expressions should leave the stack unchanged, "
3007 "or push exactly one cell onto the stack.");
3010 return AsmResult::NoResult;
3013 //////////////////////////////////////////////////////////////////////