Revert records hhbbc diffs
[hiphop-php.git] / hphp / runtime / vm / as.cpp
blob2ad80ac73006147d39eaa60e5499d49e6b563c5e
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * This module contains an assembler implementation for HHBC. It is
19 * probably fairly close to allowing you to access most of the
20 * metadata associated with hhvm's compiled unit format, although it's
21 * possible something has been overlooked.
23 * To use it, run hhvm with -v Eval.AllowHhas=true on a file with a
24 * ".hhas" extension. The syntax is probably easiest to understand by
25 * looking at some examples (or the semi-BNF markup around some of the
26 * parse functions here). For examples, see hphp/tests/vm/asm_*.
29 * Notes:
31 * - You can crash hhvm very easily with this.
33 * Using this module, you can emit pretty much any sort of not
34 * trivially-illegal bytecode stream, and many trivially-illegal
35 * ones as well. You can also easily create Units with illegal
36 * metadata. Generally this will crash the VM. In other cases
37 * (especially if you don't bother to DefCls your classes in your
38 * .main) you'll just get mysterious "class not defined" errors
39 * or weird behavior.
41 * - Whitespace is not normally significant, but newlines may not
42 * be in the middle of a list of opcode arguments. (After the
43 * newline, the next thing seen is expected to be either a
44 * mnemonic for the next opcode in the stream or some sort of
45 * directive.) However, newlines (and comments) may appear
46 * *inside* certain opcode arguments (e.g. string literals or
47 * vector immediates).
49 * Rationale: this is partially intended to make it trivial to
50 * catch wrong-number-of-arguments errors, although it probably
51 * could be done without this if you feel like changing it.
54 * Wishlist:
56 * - It might be nice if you could refer to iterators by name
57 * instead of by index.
59 * - DefCls by name would be nice.
61 * Missing features (partial list):
63 * - while class/function names can contains ':', '$', and ';',
64 * .use declarations can't handle those names because of syntax
65 * conflicts
67 * @author Jordan DeLong <delong.j@fb.com>
70 #include "hphp/runtime/vm/as.h"
72 #include <algorithm>
73 #include <cstdio>
74 #include <iostream>
75 #include <iterator>
76 #include <sstream>
77 #include <vector>
79 #include <boost/algorithm/string.hpp>
80 #include <boost/scoped_ptr.hpp>
81 #include <boost/bind.hpp>
83 #include <folly/Conv.h>
84 #include <folly/MapUtil.h>
85 #include <folly/Memory.h>
86 #include <folly/Range.h>
87 #include <folly/String.h>
89 #include "hphp/util/sha1.h"
91 #include "hphp/runtime/base/builtin-functions.h"
92 #include "hphp/runtime/base/memory-manager-defs.h"
93 #include "hphp/runtime/base/repo-auth-type-codec.h"
94 #include "hphp/runtime/base/repo-auth-type.h"
95 #include "hphp/runtime/base/variable-serializer.h"
96 #include "hphp/runtime/base/tv-type.h"
97 #include "hphp/runtime/vm/as-shared.h"
98 #include "hphp/runtime/vm/bc-pattern.h"
99 #include "hphp/runtime/vm/extern-compiler.h"
100 #include "hphp/runtime/vm/func-emitter.h"
101 #include "hphp/runtime/vm/hhbc.h"
102 #include "hphp/runtime/vm/native.h"
103 #include "hphp/runtime/vm/preclass-emitter.h"
104 #include "hphp/runtime/vm/record-emitter.h"
105 #include "hphp/runtime/vm/rx.h"
106 #include "hphp/runtime/vm/unit.h"
107 #include "hphp/runtime/vm/unit-emitter.h"
108 #include "hphp/system/systemlib.h"
109 #include "hphp/zend/zend-string.h"
111 TRACE_SET_MOD(hhas);
113 namespace HPHP {
115 AssemblerError::AssemblerError(int where, const std::string& what)
116 : std::runtime_error(
117 folly::sformat("Assembler Error: line {}: {}", where, what))
120 //////////////////////////////////////////////////////////////////////
122 namespace {
124 StringData* makeDocComment(const String& s) {
125 if (RuntimeOption::EvalGenerateDocComments) return makeStaticString(s);
126 return staticEmptyString();
129 struct AsmState;
130 typedef void (*ParserFunc)(AsmState& as);
132 struct Input {
133 explicit Input(std::istream& in)
134 : m_in(in)
137 int peek() { return m_in.peek(); }
139 int getc() {
140 int ret = m_in.get();
141 if (ret == EOF) {
142 io_error_if_bad();
143 } else if (ret == '\n') {
144 ++m_lineNumber;
146 return ret;
149 void ungetc(char c) {
150 if (c == '\n') --m_lineNumber;
151 m_in.putback(c);
154 void expect(int c) {
155 if (getc() != c) {
156 error(folly::sformat("expected character `{}'", char(c)));
161 * Expect `c' after possible whitespace/comments. When convenient,
162 * preferable to doing skipWhitespace/expect manually to keep the
163 * line number in the error prior to the whitespace skipped.
165 void expectWs(int c) {
166 const int currentLine = m_lineNumber;
167 skipWhitespace();
168 if (getc() != c) {
169 throw AssemblerError(currentLine,
170 folly::sformat("expected character `{}'", char(c)));
174 int getLineNumber() const {
175 return m_lineNumber;
178 // Skips whitespace, then populates word with valid bareword
179 // characters. Returns true if we read any characters into word.
180 bool readword(std::string& word) {
181 word.clear();
182 skipWhitespace();
183 consumePred(is_bareword(), std::back_inserter(word));
184 return !word.empty();
186 // Skips whitespace, then populates name with valid extname
187 // characters. Returns true if we read any characters into name.
188 bool readname(std::string& name) {
189 name.clear();
190 skipWhitespace();
191 consumePred(is_extname(), std::back_inserter(name));
192 return !name.empty();
194 // Try to consume a bareword. Skips whitespace. If we can't
195 // consume the specified word, returns false.
196 bool tryConsume(const std::string& what) {
197 std::string word;
198 if (!readword(word)) {
199 return false;
201 if (word != what) {
202 std::for_each(word.rbegin(), word.rend(),
203 boost::bind(&Input::ungetc, this, _1));
204 return false;
206 return true;
208 int32_t readint() {
209 std::string buf;
210 skipWhitespace();
211 if (peek() == '-') buf += (char)getc();
212 consumePred(isdigit, std::back_inserter(buf));
213 if (buf.empty() || buf == "-") {
214 throw AssemblerError(m_lineNumber, "expected integral value");
216 return folly::to<int32_t>(buf);
219 // C-style character escapes, no support for unicode escapes or
220 // whatnot.
221 template<class OutCont>
222 void escapeChar(OutCont& out) {
223 auto is_oct = [&] (int i) { return i >= '0' && i <= '7'; };
224 auto is_hex = [&] (int i) {
225 return (i >= '0' && i <= '9') ||
226 (i >= 'a' && i <= 'f') ||
227 (i >= 'A' && i <= 'F');
229 auto hex_val = [&] (int i) -> uint32_t {
230 assertx(is_hex(i));
231 return i >= '0' && i <= '9' ? i - '0' :
232 i >= 'a' && i <= 'f' ? i - 'a' + 10 : i - 'A' + 10;
235 auto src = getc();
236 switch (src) {
237 case EOF: error("EOF in string literal");
238 case 'a': out.push_back('\a'); break;
239 case 'b': out.push_back('\b'); break;
240 case 'f': out.push_back('\f'); break;
241 case 'n': out.push_back('\n'); break;
242 case 'r': out.push_back('\r'); break;
243 case 't': out.push_back('\t'); break;
244 case 'v': out.push_back('\v'); break;
245 case '\'': out.push_back('\''); break;
246 case '\"': out.push_back('\"'); break;
247 case '\?': out.push_back('\?'); break;
248 case '\\': out.push_back('\\'); break;
249 case '\r': /* ignore */ break;
250 case '\n': /* ignore */ break;
251 default:
252 if (is_oct(src)) {
253 auto val = int64_t{src} - '0';
254 for (auto i = int{1}; i < 3; ++i) {
255 src = getc();
256 if (!is_oct(src)) { ungetc(src); break; }
257 val *= 8;
258 val += src - '0';
260 if (val > std::numeric_limits<uint8_t>::max()) {
261 error("octal escape sequence overflowed");
263 out.push_back(static_cast<uint8_t>(val));
264 return;
267 if (src == 'x' || src == 'X') {
268 auto val = uint64_t{0};
269 if (!is_hex(peek())) error("\\x used without no following hex digits");
270 for (auto i = int{0}; i < 2; ++i) {
271 src = getc();
272 if (!is_hex(src)) { ungetc(src); break; }
273 val *= 0x10;
274 val += hex_val(src);
276 if (val > std::numeric_limits<uint8_t>::max()) {
277 error("hex escape sequence overflowed");
279 out.push_back(static_cast<uint8_t>(val));
280 return;
283 error("unrecognized character escape");
287 // Reads a quoted string with typical escaping rules. Does not skip
288 // any whitespace. Returns true if we successfully read one, or
289 // false. EOF during the string throws.
290 bool readQuotedStr(std::string& str) {
291 str.clear();
292 if (peek() != '\"') {
293 return false;
295 getc();
297 int c;
298 while ((c = getc()) != EOF) {
299 switch (c) {
300 case '\"': return true;
301 case '\\': escapeChar(str); break;
302 default: str.push_back(c); break;
305 error("EOF in string literal");
306 not_reached();
307 return false;
311 * Reads a python-style longstring, or returns false if we don't
312 * have one. Does not skip any whitespace before looking for the
313 * string.
315 * Python longstrings start with \"\"\", and can contain any bytes
316 * other than \"\"\". A '\\' character introduces C-style escapes,
317 * but there's no need to escape single quote characters.
319 bool readLongString(std::vector<char>& buffer) {
320 if (peek() != '\"') return false;
321 getc();
322 if (peek() != '\"') { ungetc('\"'); return false; }
323 getc();
324 if (peek() != '\"') { ungetc('\"');
325 ungetc('\"'); return false; }
326 getc();
328 int c;
329 while ((c = getc()) != EOF) {
330 if (c == '\\') {
331 escapeChar(buffer);
332 continue;
334 if (c == '"') {
335 c = getc();
336 if (c != '"') {
337 buffer.push_back('"');
338 ungetc(c);
339 continue;
341 c = getc();
342 if (c != '"') {
343 buffer.push_back('"');
344 buffer.push_back('"');
345 ungetc(c);
346 continue;
348 return true;
351 buffer.push_back(c);
353 error("EOF in \"\"\"-string literal");
354 not_reached();
355 return false;
358 // Skips whitespace (including newlines and comments).
359 void skipWhitespace() {
360 while (skipPred(boost::is_any_of(" \t\r\n"))) {
361 if (peek() == '#') {
362 skipPred(!boost::is_any_of("\n"));
363 expect('\n');
364 } else {
365 break;
370 // Skip spaces and tabs, but other whitespace (such as comments or
371 // newlines) stop the skip.
372 void skipSpaceTab() {
373 skipPred(boost::is_any_of(" \t"));
376 template<class Predicate>
377 bool skipPred(Predicate pred) {
378 while (pred(peek())) {
379 if (getc() == EOF) {
380 return false;
384 return true;
387 template<class Predicate, class OutputIterator>
388 bool consumePred(Predicate pred, OutputIterator out) {
389 int c;
390 while (pred(c = peek())) {
391 if (getc() == EOF) {
392 return false;
395 *out++ = c;
398 return true;
401 private:
402 // whether a character is a valid part of the extended sorts of
403 // names that HHVM uses for certain generated constructs
404 // (closures, __Memoize implementations, etc)
405 struct is_extname {
406 bool operator()(int i) const {
407 is_bareword is_bw;
408 return is_bw(i) || i == ':' || i == ';' || i == '#' || i =='@' ||
409 (i >= 0x7f && i <= 0xff) /* see hphp.ll :( */;
413 void error(const std::string& what) {
414 throw AssemblerError(getLineNumber(), what);
417 void io_error_if_bad() {
418 if (m_in.bad()) {
419 error("I/O error reading stream: " +
420 folly::errnoStr(errno).toStdString());
424 private:
425 std::istream& m_in;
426 int m_lineNumber{1};
429 struct StackDepth;
432 * Tracks the depth of the stack in a given block of instructions.
434 * This structure is linked to a block of instructions (usually starting at a
435 * label), and tracks the current stack depth in this block. This tracking can
436 * take two forms:
437 * - Absolute depth: the depth of the stack is exactly known for this block
438 * - Relative depth: the depth of the stack is unknown for now. We keep track
439 * of an offset, relative to the depth of the stack at the first instruction
440 * of the block
442 struct StackDepth {
443 int currentOffset;
445 * Tracks the max depth of elem stack + desc stack offset inside a region
446 * where baseValue is unknown.
448 int maxOffset;
450 * Tracks the min depth of the elem stack inside a region where baseValue
451 * is unknown, and the line where the min occurred.
453 int minOffset;
454 int minOffsetLine;
455 folly::Optional<int> baseValue;
458 * During the parsing process, when a Jmp instruction is encountered, the
459 * StackDepth structure for this jump becomes linked to the StackDepth
460 * structure of the label (which is added to the listeners list).
462 * Once the absolute depth at the jump becomes known, its StackDepth
463 * instance calls the setBase method of the StackDepth instance of the label.
464 * The absolute depth at the label can then be inferred from the
465 * absolute depth at the jump.
467 std::vector<std::pair<StackDepth*, int> > listeners;
469 StackDepth()
470 : currentOffset(0)
471 , maxOffset(0)
472 , minOffset(0)
475 void adjust(AsmState& as, int delta);
476 void addListener(AsmState& as, StackDepth* target);
477 void setBase(AsmState& as, int stackDepth);
478 int absoluteDepth() {
479 assertx(baseValue.hasValue());
480 return baseValue.value() + currentOffset;
484 * Sets the baseValue such as the current stack depth matches the
485 * parameter.
487 * If the base value is already known, it may conflict with the
488 * parameter of this function. In this case, an error will be raised.
490 void setCurrentAbsolute(AsmState& as, int stackDepth);
493 struct Label {
494 bool bound{false};
495 Offset target;
496 StackDepth stackDepth;
499 * Each label source source has an Offset where the jmp should be
500 * patched up is, and an Offset from which the jump delta should be
501 * computed. (The second Offset is basically to the actual
502 * jump/switch/etc instruction, while the first points to the
503 * immediate.)
505 std::vector<std::pair<Offset,Offset>> sources;
508 * List of a parameter ids that use this label for its DV
509 * initializer.
511 std::vector<Id> dvInits;
514 * List of EHEnts that have m_handler pointing to this label.
516 std::vector<size_t> ehEnts;
519 struct HashSymbolRef {
520 size_t operator()(SymbolRef s) const {
521 return static_cast<size_t>(s);
525 struct AsmState {
526 explicit AsmState(std::istream& in, bool wants_symbol_refs = false)
527 : in{in}
528 , wants_symbol_refs{wants_symbol_refs}
530 currentStackDepth->setBase(*this, 0);
533 AsmState(const AsmState&) = delete;
534 AsmState& operator=(const AsmState&) = delete;
536 template<typename... Args>
537 void error(const std::string& fmt, Args&&... args) {
538 throw AssemblerError(in.getLineNumber(),
539 folly::sformat(fmt, std::forward<Args>(args)...));
543 void adjustStack(int delta) {
544 if (currentStackDepth == nullptr) {
545 // Instruction is unreachable, nothing to do here!
546 return;
549 currentStackDepth->adjust(*this, delta);
552 void adjustStackHighwater(int depth) {
553 if (depth) {
554 fe->maxStackCells = std::max(fe->maxStackCells, depth);
558 std::string displayStackDepth() {
559 std::ostringstream stack;
561 if (currentStackDepth == nullptr) {
562 stack << "/";
563 } else if (currentStackDepth->baseValue) {
564 stack << *currentStackDepth->baseValue +
565 currentStackDepth->currentOffset;
566 } else {
567 stack << "?" << currentStackDepth->currentOffset;
570 return stack.str();
573 void addLabelTarget(const std::string& name) {
574 auto& label = labelMap[name];
575 if (label.bound) {
576 error("Duplicate label " + name);
578 label.bound = true;
579 label.target = ue->bcPos();
581 StackDepth* newStack = &label.stackDepth;
583 if (currentStackDepth == nullptr) {
584 // Previous instruction was unreachable
585 currentStackDepth = newStack;
586 return;
589 // The stack depth at the label depends on the current depth
590 currentStackDepth->addListener(*this, newStack);
591 currentStackDepth = newStack;
594 void addLabelJump(const std::string& name, Offset immOff, Offset opcodeOff) {
595 auto& label = labelMap[name];
597 if (currentStackDepth != nullptr) {
598 // The stack depth at the target must be the same as the current depth
599 // (whatever this may be: it may still be unknown)
600 currentStackDepth->addListener(*this, &label.stackDepth);
603 label.sources.emplace_back(immOff, opcodeOff);
606 void enforceStackDepth(int stackDepth) {
607 if (currentStackDepth == nullptr) {
608 // Current instruction is unreachable, thus the constraint
609 // on the stack depth will never be violated
610 return;
613 currentStackDepth->setCurrentAbsolute(*this, stackDepth);
616 bool isUnreachable() {
617 return currentStackDepth == nullptr;
620 void enterUnreachableRegion() {
621 currentStackDepth = nullptr;
624 void enterReachableRegion(int stackDepth) {
625 unnamedStackDepths.emplace_back(std::make_unique<StackDepth>());
626 currentStackDepth = unnamedStackDepths.back().get();
627 currentStackDepth->setBase(*this, stackDepth);
630 void addLabelDVInit(const std::string& name, int paramId) {
631 labelMap[name].dvInits.push_back(paramId);
633 // Stack depth should be 0 when entering a DV init
634 labelMap[name].stackDepth.setBase(*this, 0);
637 void addLabelEHEnt(const std::string& name, size_t ehIdx) {
638 labelMap[name].ehEnts.push_back(ehIdx);
640 // Stack depth should be 0 when entering a fault funclet
641 labelMap[name].stackDepth.setBase(*this, 0);
644 void finishClass() {
645 assertx(!fe && !re);
646 ue->addPreClassEmitter(pce);
647 pce = 0;
648 enumTySet = false;
651 void finishRecord() {
652 assertx(!fe && !pce);
653 ue->addRecordEmitter(re);
654 re = nullptr;
657 void patchLabelOffsets(const Label& label) {
658 for (auto const& source : label.sources) {
659 ue->emitInt32(label.target - source.second, source.first);
662 for (auto const& dvinit : label.dvInits) {
663 fe->params[dvinit].funcletOff = label.target;
666 for (auto const& ehEnt : label.ehEnts) {
667 fe->ehtab[ehEnt].m_handler = label.target;
671 void finishSection() {
672 for (auto const& label : labelMap) {
673 if (!label.second.bound) {
674 error("Undefined label " + label.first);
676 if (label.second.target >= ue->bcPos()) {
677 error("label " + label.first + " falls of the end of the function");
680 patchLabelOffsets(label.second);
684 void finishFunction() {
685 finishSection();
687 // Stack depth should be 0 at the end of a function body
688 enforceStackDepth(0);
690 // Bump up the unnamed local count
691 const int numLocals = maxUnnamed + 1;
692 while (fe->numLocals() < numLocals) {
693 fe->allocUnnamedLocal();
696 fe->maxStackCells +=
697 fe->numLocals() +
698 fe->numIterators() * kNumIterCells;
700 fe->finish(ue->bcPos());
702 fe = 0;
703 labelMap.clear();
704 numItersSet = false;
705 initStackDepth = StackDepth();
706 initStackDepth.setBase(*this, 0);
707 currentStackDepth = &initStackDepth;
708 unnamedStackDepths.clear();
709 maxUnnamed = -1;
712 int getLocalId(const std::string& name) {
713 if (name[0] == '_') {
714 int id = folly::to<int>(name.substr(1));
715 if (id > maxUnnamed) maxUnnamed = id;
716 return id;
719 if (name[0] != '$') {
720 error("local variables must be prefixed with $ or _");
723 const StringData* sd = makeStaticString(name.c_str() + 1);
724 fe->allocVarId(sd);
725 return fe->lookupVarId(sd);
728 int getIterId(int32_t id) {
729 if (id >= fe->numIterators()) {
730 error("iterator id exceeded number of iterators in the function");
732 return id;
735 UnitEmitter* ue;
736 Input in;
737 bool emittedPseudoMain{false};
738 bool emittedTopLevelFunc{false};
741 * Map of adata identifiers to their serialized contents
742 * Needed because, when instrumenting array provenance, we're unable
743 * to initialize their static arrays until the adata is first referenced
745 * There's also some painful maneuvering around keeping either the serialized
746 * or unserialized array in request heap until it can be made static since
747 * this could potentially confusingly OOM a request that autoloads a large
748 * unit
750 std::unordered_map<std::string, std::vector<char>> adataDecls;
752 // Map of adata identifiers to their associated static arrays and potential DV
753 // overrides.
754 std::map<
755 std::string,
756 std::pair<ArrayData*,VariableSerializer::DVOverrides>
757 > adataMap;
758 // Map of array immediates to their adata identifiers.
759 std::map<Offset, std::string> adataUses;
761 // In whole program mode it isn't possible to lookup a litstr in the global
762 // table while emitting, so keep a lookaside of litstrs seen by the assembler.
763 std::unordered_map<Id, const StringData*> litstrMap;
765 // When inside a class, this state is active.
766 PreClassEmitter* pce{nullptr};
768 // When inside a record, this state is active.
769 RecordEmitter* re{nullptr};
771 // When we're doing a function or method body, this state is active.
772 FuncEmitter* fe{nullptr};
773 std::map<std::string,Label> labelMap;
774 bool numItersSet{false};
775 bool enumTySet{false};
776 StackDepth initStackDepth;
777 StackDepth* currentStackDepth{&initStackDepth};
778 std::vector<std::unique_ptr<StackDepth>> unnamedStackDepths;
779 int minStackDepth{0};
780 int maxUnnamed{-1};
781 std::set<std::string,stdltistr> hoistables;
782 std::unordered_map<uint32_t,Offset> defClsOffsets;
783 Location::Range srcLoc{-1,-1,-1,-1};
784 hphp_fast_map<SymbolRef,
785 CompactVector<std::string>,
786 HashSymbolRef> symbol_refs;
787 bool wants_symbol_refs;
790 void StackDepth::adjust(AsmState& as, int delta) {
791 currentOffset += delta;
793 if (!baseValue) {
794 // The absolute stack depth is unknown. We only store the min
795 // and max offsets, and we will take a decision later, when the
796 // base value will be known.
797 maxOffset = std::max(currentOffset, maxOffset);
798 if (currentOffset < minOffset) {
799 minOffsetLine = as.in.getLineNumber();
800 minOffset = currentOffset;
802 return;
805 if (*baseValue + currentOffset < 0) {
806 as.error("opcode sequence caused stack depth to go negative");
809 as.adjustStackHighwater(*baseValue + currentOffset);
812 void StackDepth::addListener(AsmState& as, StackDepth* target) {
813 if (baseValue) {
814 target->setBase(as, *baseValue + currentOffset);
815 } else {
816 listeners.emplace_back(target, currentOffset);
820 void StackDepth::setBase(AsmState& as, int stackDepth) {
821 if (baseValue && stackDepth != *baseValue) {
822 as.error("stack depth {} does not match base value {}",
823 stackDepth, *baseValue);
826 baseValue = stackDepth;
828 // We finally know the base value. Update AsmState accordingly.
829 if (*baseValue + minOffset < 0) {
830 throw AssemblerError(
831 minOffsetLine,
832 "opcode sequence caused stack depth to go negative"
835 as.adjustStackHighwater(*baseValue + maxOffset);
837 // Update the listeners
838 auto l = std::move(listeners);
839 // We won't need them anymore
840 listeners.clear();
841 for (auto& kv : l) {
842 kv.first->setBase(as, *baseValue + kv.second);
846 void StackDepth::setCurrentAbsolute(AsmState& as, int stackDepth) {
847 setBase(as, stackDepth - currentOffset);
850 template<class F>
851 decltype(auto) suppressOOM(F func) {
852 MemoryManager::SuppressOOM so(*tl_heap);
853 return func();
856 //////////////////////////////////////////////////////////////////////
859 * Opcode arguments must be on the same line as the opcode itself,
860 * although certain argument types may contain internal newlines (see,
861 * for example, read_jmpvector or string literals).
863 template<class Target> Target read_opcode_arg(AsmState& as) {
864 as.in.skipSpaceTab();
865 std::string strVal;
866 as.in.consumePred(!boost::is_any_of(" \t\r\n#;>"),
867 std::back_inserter(strVal));
868 if (strVal.empty()) {
869 as.error("expected opcode or directive argument");
871 try {
872 return folly::to<Target>(strVal);
873 } catch (std::range_error&) {
874 as.error("couldn't convert input argument (" + strVal + ") to "
875 "proper type");
876 not_reached();
880 template<class SubOpType>
881 uint8_t read_subop(AsmState& as) {
882 auto const str = read_opcode_arg<std::string>(as);
883 if (auto const ty = nameToSubop<SubOpType>(str.c_str())) {
884 return static_cast<uint8_t>(*ty);
886 as.error("unknown subop name");
887 not_reached();
890 const StringData* read_litstr(AsmState& as) {
891 as.in.skipSpaceTab();
892 std::string strVal;
893 if (!as.in.readQuotedStr(strVal)) {
894 as.error("expected quoted string literal");
896 return makeStaticString(strVal);
900 * maybe-string-literal : N
901 * | string-literal
904 const StringData* read_maybe_litstr(AsmState& as) {
905 as.in.skipSpaceTab();
906 if (as.in.peek() == 'N') {
907 as.in.getc();
908 return nullptr;
910 return read_litstr(as);
913 std::vector<std::string> read_strvector(AsmState& as) {
914 std::vector<std::string> ret;
915 as.in.skipSpaceTab();
916 as.in.expect('<');
917 std::string name;
918 while (as.in.skipSpaceTab(), as.in.readQuotedStr(name)) {
919 ret.push_back(name);
921 as.in.skipSpaceTab();
922 as.in.expectWs('>');
923 return ret;
926 Variant parse_php_serialized(folly::StringPiece,
927 VariableSerializer::DVOverrides*);
929 std::pair<ArrayData*, std::string> read_litarray(AsmState& as) {
930 as.in.skipSpaceTab();
931 if (as.in.getc() != '@') {
932 as.error("expecting an `@foo' array literal reference");
934 std::string name;
935 if (!as.in.readword(name)) {
936 as.error("expected name of .adata literal");
939 auto adata = [&]() -> ArrayData* {
940 auto const it = as.adataMap.find(name);
941 if (it != as.adataMap.end()) return it->second.first;
942 auto const decl = as.adataDecls.find(name);
943 if (decl == as.adataDecls.end()) return nullptr;
944 auto& buf = decl->second;
945 return suppressOOM([&] {
946 VariableSerializer::DVOverrides overrides;
947 auto var = parse_php_serialized(
948 buf,
949 RuntimeOption::EvalHackArrDVArrs ? &overrides : nullptr
951 if (!var.isArray()) {
952 as.error(".adata only supports serialized arrays");
955 auto data = var.detach().m_data.parr;
956 ArrayData::GetScalarArray(&data);
957 as.adataMap[name] = std::make_pair(data, std::move(overrides));
958 as.adataDecls.erase(decl);
959 return data;
961 }();
963 if (!adata) as.error("unknown array data literal name " + name);
965 return {adata, std::move(name)};
968 RepoAuthType read_repo_auth_type(AsmState& as) {
969 auto const str = read_opcode_arg<std::string>(as);
970 folly::StringPiece parse(str);
973 * Note: no support for reading array types. (The assembler only
974 * emits a single unit, so it can't really be involved in creating a
975 * ArrayTypeTable.)
978 using T = RepoAuthType::Tag;
980 #define X(what, tag) \
981 if (parse.startsWith(what)) return RepoAuthType{tag}
983 #define Y(what, tag) \
984 if (parse.startsWith(what)) { \
985 parse.removePrefix(what); \
986 auto const cls = makeStaticString(parse.data()); \
987 as.ue->mergeLitstr(cls); \
988 return RepoAuthType{tag, cls}; \
991 Y("Obj=", T::ExactObj);
992 Y("?Obj=", T::OptExactObj);
993 Y("?Obj<=", T::OptSubObj);
994 Y("Obj<=", T::SubObj);
995 Y("Cls=", T::ExactCls);
996 Y("?Cls=", T::OptExactCls);
997 Y("?Cls<=", T::OptSubCls);
998 Y("Cls<=", T::SubCls);
999 X("Arr", T::Arr);
1000 X("?Arr", T::OptArr);
1001 X("VArr", T::VArr);
1002 X("?VArr", T::OptVArr);
1003 X("DArr", T::DArr);
1004 X("?DArr", T::OptDArr);
1005 X("Vec", T::Vec);
1006 X("?Vec", T::OptVec);
1007 X("Dict", T::Dict);
1008 X("?Dict", T::OptDict);
1009 X("Keyset", T::Keyset);
1010 X("?Keyset", T::OptKeyset);
1011 X("Bool", T::Bool);
1012 X("?Bool", T::OptBool);
1013 X("Cell", T::Cell);
1014 X("Dbl", T::Dbl);
1015 X("?Dbl", T::OptDbl);
1016 X("Gen", T::Gen);
1017 X("InitCell", T::InitCell);
1018 X("InitGen", T::InitGen);
1019 X("InitNull", T::InitNull);
1020 X("InitUnc", T::InitUnc);
1021 X("Int", T::Int);
1022 X("?Int", T::OptInt);
1023 X("Null", T::Null);
1024 X("Obj", T::Obj);
1025 X("?Obj", T::OptObj);
1026 X("Func", T::Func);
1027 X("?Func", T::OptFunc);
1028 X("Cls", T::Cls);
1029 X("?Cls", T::OptCls);
1030 X("ClsMeth", T::ClsMeth);
1031 X("?ClsMeth", T::OptClsMeth);
1032 X("Record", T::Record);
1033 X("?Record", T::OptRecord);
1034 X("Ref", T::Ref);
1035 X("?Res", T::OptRes);
1036 X("Res", T::Res);
1037 X("?SArr", T::OptSArr);
1038 X("SArr", T::SArr);
1039 X("?SVArr", T::OptSVArr);
1040 X("SVArr", T::SVArr);
1041 X("?SDArr", T::OptSDArr);
1042 X("SDArr", T::SDArr);
1043 X("?SVec", T::OptSVec);
1044 X("SVec", T::SVec);
1045 X("?SDict", T::OptSDict);
1046 X("SDict", T::SDict);
1047 X("?SKeyset", T::OptSKeyset);
1048 X("SKeyset", T::SKeyset);
1049 X("?SStr", T::OptSStr);
1050 X("SStr", T::SStr);
1051 X("?Str", T::OptStr);
1052 X("Str", T::Str);
1053 X("Unc", T::Unc);
1054 X("?UncArrKey", T::OptUncArrKey);
1055 X("?ArrKey", T::OptArrKey);
1056 X("UncArrKey",T::UncArrKey);
1057 X("ArrKey", T::ArrKey);
1058 X("?UncStrLike",T::OptUncStrLike);
1059 X("?StrLike",T::OptStrLike);
1060 X("UncStrLike",T::UncStrLike);
1061 X("StrLike",T::StrLike);
1062 X("Uninit", T::Uninit);
1064 #undef X
1065 #undef Y
1067 // Make sure the above parsing code is revisited when new tags are
1068 // added (we'll get a warning for a missing case label):
1069 if (debug) switch (RepoAuthType{}.tag()) {
1070 case T::Uninit:
1071 case T::InitNull:
1072 case T::Null:
1073 case T::Int:
1074 case T::OptInt:
1075 case T::Dbl:
1076 case T::OptDbl:
1077 case T::Res:
1078 case T::OptRes:
1079 case T::Bool:
1080 case T::OptBool:
1081 case T::SStr:
1082 case T::OptSStr:
1083 case T::Str:
1084 case T::OptStr:
1085 case T::SArr:
1086 case T::OptSArr:
1087 case T::Arr:
1088 case T::OptArr:
1089 case T::SVArr:
1090 case T::OptSVArr:
1091 case T::VArr:
1092 case T::OptVArr:
1093 case T::SDArr:
1094 case T::OptSDArr:
1095 case T::DArr:
1096 case T::OptDArr:
1097 case T::SVec:
1098 case T::OptSVec:
1099 case T::Vec:
1100 case T::OptVec:
1101 case T::SDict:
1102 case T::OptSDict:
1103 case T::Dict:
1104 case T::OptDict:
1105 case T::SKeyset:
1106 case T::OptSKeyset:
1107 case T::Keyset:
1108 case T::OptKeyset:
1109 case T::Obj:
1110 case T::OptObj:
1111 case T::Func:
1112 case T::OptFunc:
1113 case T::Cls:
1114 case T::OptCls:
1115 case T::ClsMeth:
1116 case T::OptClsMeth:
1117 case T::Record:
1118 case T::OptRecord:
1119 case T::InitUnc:
1120 case T::Unc:
1121 case T::OptUncArrKey:
1122 case T::OptArrKey:
1123 case T::UncArrKey:
1124 case T::ArrKey:
1125 case T::OptUncStrLike:
1126 case T::OptStrLike:
1127 case T::UncStrLike:
1128 case T::StrLike:
1129 case T::InitCell:
1130 case T::Cell:
1131 case T::Ref:
1132 case T::InitGen:
1133 case T::Gen:
1134 case T::ExactObj:
1135 case T::SubObj:
1136 case T::OptExactObj:
1137 case T::OptSubObj:
1138 case T::ExactCls:
1139 case T::SubCls:
1140 case T::OptExactCls:
1141 case T::OptSubCls:
1142 break;
1145 as.error("unrecognized RepoAuthType format");
1146 not_reached();
1149 // Read a vector of IVAs, with format <int, int, int, ...>, the vector may be
1150 // excluded entirely if it is empty.
1151 std::vector<uint32_t> read_argv32(AsmState& as) {
1152 as.in.skipSpaceTab();
1153 if (as.in.peek() != '<') return {};
1154 as.in.getc();
1156 std::vector<uint32_t> result;
1157 for (;;) {
1158 auto const num = as.in.readint();
1159 if (num < 0) as.error("Was expecting a positive integer");
1160 result.push_back(num);
1161 as.in.skipWhitespace();
1162 if (as.in.peek() == '>') break;
1163 as.in.expectWs(',');
1165 as.in.expectWs('>');
1167 return result;
1170 // Read in a vector of iterators the format for this vector is:
1171 // <(TYPE) ID LOCAL?, (TYPE) ID LOCAL?, ...>
1172 // Where TYPE := Iter | LIter
1173 // and ID := Integer
1174 // and LOCAL := String (only valid when TYPE = LIter)
1175 IterTable read_iter_table(AsmState& as) {
1176 IterTable ret;
1178 as.in.skipSpaceTab();
1179 as.in.expect('<');
1181 std::string word;
1183 for (;;) {
1184 IterTableEnt ent;
1185 as.in.expectWs('(');
1186 if (!as.in.readword(word)) as.error("Was expecting Iterator type.");
1187 if (!word.compare("Iter")) ent.kind = KindOfIter;
1188 else if (!word.compare("LIter")) ent.kind = KindOfLIter;
1189 else as.error("Unknown iterator type `" + word + "'");
1190 as.in.expectWs(')');
1192 as.in.skipSpaceTab();
1194 if (!as.in.readword(word)) as.error("Was expecting iterator id.");
1195 ent.id = as.getIterId(folly::to<uint32_t>(word));
1197 if (ent.kind == KindOfLIter) {
1198 as.in.skipSpaceTab();
1199 if (!as.in.readword(word)) as.error("Was expecting local.");
1200 ent.local = as.getLocalId(word);
1201 } else {
1202 ent.local = kInvalidId;
1205 ret.push_back(std::move(ent));
1207 if (!isdigit(word.back())) {
1208 if (word.back() == '>') break;
1209 if (word.back() != ',') as.error("Was expecting `,'.");
1210 } else {
1211 as.in.skipSpaceTab();
1212 if (as.in.peek() == '>') { as.in.getc(); break; }
1213 as.in.expect(',');
1217 return ret;
1220 // Jump tables are lists of labels.
1221 std::vector<std::string> read_jmpvector(AsmState& as) {
1222 std::vector<std::string> ret;
1224 as.in.skipSpaceTab();
1225 as.in.expect('<');
1227 std::string word;
1228 while (as.in.readword(word)) {
1229 ret.push_back(word);
1231 as.in.expectWs('>');
1233 return ret;
1236 typedef std::vector<std::pair<Id, std::string>> SSwitchJmpVector;
1238 SSwitchJmpVector read_sswitch_jmpvector(AsmState& as) {
1239 SSwitchJmpVector ret;
1241 as.in.skipSpaceTab();
1242 as.in.expect('<');
1244 std::string defLabel;
1245 do {
1246 std::string caseStr;
1247 if (!as.in.readQuotedStr(caseStr)) {
1248 as.error("expected quoted string literal");
1251 as.in.expect(':');
1253 as.in.readword(defLabel);
1255 ret.emplace_back(
1256 as.ue->mergeLitstr(makeStaticString(caseStr)),
1257 defLabel
1260 as.in.skipWhitespace();
1261 } while (as.in.peek() != '-');
1263 as.in.expect('-');
1264 as.in.expect(':');
1265 as.in.readword(defLabel);
1267 // -1 stand for default case.
1268 ret.emplace_back(-1, defLabel);
1270 as.in.expect('>');
1272 return ret;
1275 MemberKey read_member_key(AsmState& as) {
1276 as.in.skipWhitespace();
1278 std::string word;
1279 if (!as.in.readword(word)) as.error("expected member code");
1281 auto optMcode = parseMemberCode(word.c_str());
1282 if (!optMcode) as.error("unrecognized member code `" + word + "'");
1284 auto const mcode = *optMcode;
1285 if (mcode != MW && as.in.getc() != ':') {
1286 as.error("expected `:' after member code `" + word + "'");
1289 switch (mcode) {
1290 case MW:
1291 return MemberKey{};
1292 case MEL: case MPL: {
1293 std::string name;
1294 if (!as.in.readword(name)) {
1295 as.error("couldn't read name for local variable in member key");
1297 return MemberKey{mcode, as.getLocalId(name)};
1299 case MEC: case MPC:
1300 return MemberKey{mcode, read_opcode_arg<int32_t>(as)};
1301 case MEI:
1302 return MemberKey{mcode, read_opcode_arg<int64_t>(as)};
1303 case MET: case MPT: case MQT:
1304 return MemberKey{mcode, read_litstr(as)};
1306 not_reached();
1309 LocalRange read_local_range(AsmState& as) {
1310 auto first = read_opcode_arg<std::string>(as);
1311 if (first.size() > 2 && first[0] == 'L' && first[1] == ':') {
1312 first = "_" + first.substr(2);
1314 auto const pos = first.find('+');
1315 if (pos == std::string::npos) as.error("expecting `+' in local range");
1316 auto const rest = first.substr(pos + 1);
1317 first = first.substr(0, pos);
1318 auto const count = folly::to<uint32_t>(rest);
1319 if (!count) return LocalRange{0, 0};
1320 auto const firstLoc = as.getLocalId(first);
1321 if (firstLoc + count - 1 > as.maxUnnamed) {
1322 as.maxUnnamed = firstLoc + count - 1;
1324 return LocalRange{uint32_t(firstLoc), count};
1327 std::pair<FCallArgs::Flags, bool>
1328 read_fcall_flags(AsmState& as, Op thisOpcode) {
1329 uint8_t flags = 0;
1330 bool lockWhileUnwinding = false;
1332 as.in.skipSpaceTab();
1333 as.in.expect('<');
1335 std::string flag;
1336 while (as.in.readword(flag)) {
1337 if (flag == "SupportsAER") {
1338 if (thisOpcode == Op::FCallCtor) {
1339 as.error("FCall flag SupportsAER is not valid for FCallCtor");
1340 } else {
1341 flags |= FCallArgs::SupportsAsyncEagerReturn;
1342 continue;
1345 if (flag == "LockWhileUnwinding") {
1346 if (thisOpcode == Op::FCallCtor) {
1347 lockWhileUnwinding = true;
1348 continue;
1349 } else {
1350 as.error("FCall flag LockWhileUnwinding is only valid for FCallCtor");
1353 if (flag == "Unpack") { flags |= FCallArgs::HasUnpack; continue; }
1354 if (flag == "Generics") { flags |= FCallArgs::HasGenerics; continue; }
1355 as.error("unrecognized FCall flag `" + flag + "'");
1357 as.in.expectWs('>');
1359 return std::make_pair(static_cast<FCallArgs::Flags>(flags),
1360 lockWhileUnwinding);
1363 // Read a vector of booleans formatted as a quoted string of '0' and '1'.
1364 std::unique_ptr<uint8_t[]> read_by_refs(AsmState& as, uint32_t numArgs) {
1365 as.in.skipSpaceTab();
1366 std::string strVal;
1367 if (!as.in.readQuotedStr(strVal)) {
1368 as.error("expected quoted string literal");
1371 if (strVal.empty()) return nullptr;
1372 if (strVal.length() != numArgs) {
1373 as.error("reffiness vector must be either empty or match number of args");
1376 auto result = std::make_unique<uint8_t[]>((numArgs + 7) / 8);
1377 for (auto i = 0; i < numArgs; ++i) {
1378 auto const c = strVal[i];
1379 if (c != '0' && c != '1') as.error("Was expecting a boolean (0 or 1)");
1380 result[i / 8] |= (c == '1' ? 1 : 0) << (i % 8);
1383 return result;
1386 std::tuple<FCallArgsBase, std::unique_ptr<uint8_t[]>, std::string>
1387 read_fcall_args(AsmState& as, Op thisOpcode) {
1388 FCallArgs::Flags flags;
1389 bool lockWhileUnwinding;
1390 std::tie(flags, lockWhileUnwinding) = read_fcall_flags(as, thisOpcode);
1391 auto const numArgs = read_opcode_arg<uint32_t>(as);
1392 auto const numRets = read_opcode_arg<uint32_t>(as);
1393 auto byRefs = read_by_refs(as, numArgs);
1394 auto asyncEagerLabel = read_opcode_arg<std::string>(as);
1395 return std::make_tuple(
1396 FCallArgsBase(flags, numArgs, numRets, lockWhileUnwinding),
1397 std::move(byRefs),
1398 std::move(asyncEagerLabel)
1402 Id create_litstr_id(AsmState& as) {
1403 auto const sd = read_litstr(as);
1404 auto const id = as.ue->mergeLitstr(sd);
1405 as.litstrMap.emplace(id, sd);
1406 return id;
1409 //////////////////////////////////////////////////////////////////////
1411 std::map<std::string,ParserFunc> opcode_parsers;
1413 #define IMM_NA
1414 #define IMM_ONE(t) IMM_##t
1415 #define IMM_TWO(t1, t2) IMM_ONE(t1); ++immIdx; IMM_##t2
1416 #define IMM_THREE(t1, t2, t3) IMM_TWO(t1, t2); ++immIdx; IMM_##t3
1417 #define IMM_FOUR(t1, t2, t3, t4) IMM_THREE(t1, t2, t3); ++immIdx; IMM_##t4
1418 #define IMM_FIVE(t1, t2, t3, t4, t5) IMM_FOUR(t1, t2, t3, t4); ++immIdx; IMM_##t5
1419 #define IMM_SIX(t1, t2, t3, t4, t5, t6) IMM_FIVE(t1, t2, t3, t4, t5); ++immIdx; IMM_##t6
1421 // Some bytecodes need to know an iva imm for (PUSH|POP)_*.
1422 #define IMM_IVA do { \
1423 auto imm = read_opcode_arg<uint32_t>(as); \
1424 as.ue->emitIVA(imm); \
1425 immIVA[immIdx] = imm; \
1426 } while (0)
1428 #define IMM_VSA \
1429 std::vector<std::string> vecImm = read_strvector(as); \
1430 auto const vecImmStackValues = vecImm.size(); \
1431 as.ue->emitIVA(vecImmStackValues); \
1432 for (size_t i = 0; i < vecImmStackValues; ++i) { \
1433 as.ue->emitInt32(as.ue->mergeLitstr(String(vecImm[i]).get())); \
1436 #define IMM_SA as.ue->emitInt32(create_litstr_id(as))
1437 #define IMM_RATA encodeRAT(*as.ue, read_repo_auth_type(as))
1438 #define IMM_I64A as.ue->emitInt64(read_opcode_arg<int64_t>(as))
1439 #define IMM_DA as.ue->emitDouble(read_opcode_arg<double>(as))
1440 #define IMM_LA as.ue->emitIVA(as.getLocalId( \
1441 read_opcode_arg<std::string>(as)))
1442 #define IMM_IA as.ue->emitIVA(as.getIterId( \
1443 read_opcode_arg<int32_t>(as)))
1444 #define IMM_OA(ty) as.ue->emitByte(read_subop<ty>(as));
1445 #define IMM_LAR encodeLocalRange(*as.ue, read_local_range(as))
1446 #define IMM_FCA do { \
1447 auto const fca = read_fcall_args(as, thisOpcode); \
1448 encodeFCallArgs( \
1449 *as.ue, std::get<0>(fca), std::get<1>(fca).get(), \
1450 std::get<2>(fca) != "-", \
1451 [&] { \
1452 labelJumps.emplace_back(std::get<2>(fca), as.ue->bcPos()); \
1453 as.ue->emitInt32(0); \
1455 ); \
1456 immFCA = std::get<0>(fca); \
1457 } while (0)
1459 // Record the offset of the immediate so that we can correlate it with its
1460 // associated adata later.
1461 #define IMM_AA do { \
1462 auto const p = read_litarray(as); \
1463 auto const pos = as.ue->bcPos(); \
1464 as.ue->emitInt32(as.ue->mergeArray(p.first)); \
1465 as.adataUses[pos] = std::move(p.second); \
1466 } while (0)
1469 * There can currently be no more than one immvector per instruction,
1470 * and we need access to the size of the immediate vector for
1471 * NUM_POP_*, so the member vector guy exposes a vecImmStackValues
1472 * integer.
1474 #define IMM_ILA do { \
1475 auto const immTable = read_iter_table(as); \
1476 as.ue->emitIVA(immTable.size()); \
1477 for (auto const& it : immTable) { \
1478 as.ue->emitIVA(it.kind); \
1479 as.ue->emitIVA(it.id); \
1480 if (it.kind == KindOfLIter) { \
1481 as.ue->emitIVA(it.local); \
1484 } while (0)
1486 #define IMM_I32LA do { \
1487 std::vector<uint32_t> vecImm = read_argv32(as); \
1488 as.ue->emitIVA(vecImm.size()); \
1489 for (auto i : vecImm) { \
1490 as.ue->emitInt32(i); \
1492 } while (0)
1494 #define IMM_BLA do { \
1495 std::vector<std::string> vecImm = read_jmpvector(as); \
1496 as.ue->emitIVA(vecImm.size()); \
1497 for (auto const& imm : vecImm) { \
1498 labelJumps.emplace_back(imm, as.ue->bcPos()); \
1499 as.ue->emitInt32(0); /* to be patched */ \
1501 } while (0)
1503 #define IMM_SLA do { \
1504 auto vecImm = read_sswitch_jmpvector(as); \
1505 as.ue->emitIVA(vecImm.size()); \
1506 for (auto const& pair : vecImm) { \
1507 as.ue->emitInt32(pair.first); \
1508 labelJumps.emplace_back(pair.second, as.ue->bcPos()); \
1509 as.ue->emitInt32(0); /* to be patched */ \
1511 } while(0)
1513 #define IMM_BA do { \
1514 labelJumps.emplace_back( \
1515 read_opcode_arg<std::string>(as), \
1516 as.ue->bcPos() \
1517 ); \
1518 as.ue->emitInt32(0); \
1519 } while (0)
1521 #define IMM_KA encode_member_key(read_member_key(as), *as.ue)
1523 #define NUM_PUSH_NOV 0
1524 #define NUM_PUSH_ONE(a) 1
1525 #define NUM_PUSH_TWO(a,b) 2
1526 #define NUM_PUSH_THREE(a,b,c) 3
1527 #define NUM_PUSH_CMANY immIVA[0]
1528 #define NUM_PUSH_FCALL immFCA.numRets
1529 #define NUM_PUSH_CALLNATIVE (immIVA[2] + 1)
1530 #define NUM_POP_NOV 0
1531 #define NUM_POP_ONE(a) 1
1532 #define NUM_POP_TWO(a,b) 2
1533 #define NUM_POP_THREE(a,b,c) 3
1534 #define NUM_POP_MFINAL immIVA[0]
1535 #define NUM_POP_C_MFINAL(n) (immIVA[0] + n)
1536 #define NUM_POP_CUMANY immIVA[0] /* number of arguments */
1537 #define NUM_POP_CMANY_U3 immIVA[0] + 3
1538 #define NUM_POP_CALLNATIVE (immIVA[0] + immIVA[2]) /* number of args + nout */
1539 #define NUM_POP_FCALL(nin, nobj) (nin + immFCA.numInputs() + 2 + immFCA.numRets)
1540 #define NUM_POP_CMANY immIVA[0] /* number of arguments */
1541 #define NUM_POP_SMANY vecImmStackValues
1543 #define O(name, imm, pop, push, flags) \
1544 void parse_opcode_##name(AsmState& as) { \
1545 UNUSED auto immFCA = FCallArgsBase(FCallArgsBase::None, -1, -1, \
1546 false); \
1547 UNUSED uint32_t immIVA[kMaxHhbcImms]; \
1548 UNUSED auto const thisOpcode = Op::name; \
1549 UNUSED const Offset curOpcodeOff = as.ue->bcPos(); \
1550 std::vector<std::pair<std::string, Offset> > labelJumps; \
1552 TRACE( \
1553 4, \
1554 "%d\t[%s] %s\n", \
1555 as.in.getLineNumber(), \
1556 as.displayStackDepth().c_str(), \
1557 #name \
1558 ); \
1560 /* Pretend the stack is reachable and empty, same as hphpc */ \
1561 if (as.currentStackDepth == nullptr) { \
1562 as.enterReachableRegion(0); \
1565 as.ue->emitOp(Op##name); \
1567 UNUSED size_t immIdx = 0; \
1568 IMM_##imm; \
1570 as.adjustStack(-NUM_POP_##pop); \
1572 if (thisOpcode == OpMemoGet) { \
1573 /* MemoGet pushes after branching */ \
1574 assertx(labelJumps.size() == 1); \
1575 as.addLabelJump( \
1576 labelJumps[0].first, labelJumps[0].second, curOpcodeOff \
1577 ); \
1578 as.adjustStack(NUM_PUSH_##push); \
1579 } else if (thisOpcode == OpMemoGetEager) { \
1580 /* MemoGetEager pushes on its second branch only */ \
1581 assertx(labelJumps.size() == 2); \
1582 as.addLabelJump( \
1583 labelJumps[0].first, labelJumps[0].second, curOpcodeOff \
1584 ); \
1585 as.adjustStack(NUM_PUSH_##push); \
1586 as.addLabelJump( \
1587 labelJumps[1].first, labelJumps[1].second, curOpcodeOff \
1588 ); \
1589 } else { \
1590 /* Everything else pushes before branching */ \
1591 as.adjustStack(NUM_PUSH_##push); \
1592 for (auto& kv : labelJumps) { \
1593 as.addLabelJump(kv.first, kv.second, curOpcodeOff); \
1597 /* FCalls with unpack perform their own bounds checking. */ \
1598 if (isFCall(Op##name) && !immFCA.hasUnpack()) { \
1599 as.fe->containsCalls = true; \
1602 /* Stack depth should be 0 after RetC or RetM. */ \
1603 if (thisOpcode == OpRetC || thisOpcode == OpRetCSuspended || \
1604 thisOpcode == OpRetM) { \
1605 as.enforceStackDepth(0); \
1608 /* Stack depth should be 1 after resume from suspend. */ \
1609 if (thisOpcode == OpCreateCont || thisOpcode == OpAwait || \
1610 thisOpcode == OpYield || thisOpcode == OpYieldK || \
1611 thisOpcode == OpYieldFromDelegate) { \
1612 as.enforceStackDepth(1); \
1615 /* Record source location. */ \
1616 as.ue->recordSourceLocation(as.srcLoc, curOpcodeOff); \
1618 if (Op##name == OpDefCls || Op##name == OpDefClsNop) { \
1619 as.defClsOffsets.emplace(immIVA[0], curOpcodeOff); \
1622 /* Retain stack depth after calls to exit */ \
1623 if ((instrFlags(thisOpcode) & InstrFlags::TF) && \
1624 (Op##name != OpExit)) { \
1625 as.enterUnreachableRegion(); \
1629 OPCODES
1631 #undef O
1633 #undef IMM_I64A
1634 #undef IMM_SA
1635 #undef IMM_RATA
1636 #undef IMM_DA
1637 #undef IMM_IVA
1638 #undef IMM_LA
1639 #undef IMM_BA
1640 #undef IMM_ILA
1641 #undef IMM_I32LA
1642 #undef IMM_BLA
1643 #undef IMM_SLA
1644 #undef IMM_OA
1645 #undef IMM_MA
1646 #undef IMM_AA
1647 #undef IMM_VSA
1648 #undef IMM_KA
1649 #undef IMM_LAR
1650 #undef IMM_FCA
1652 #undef NUM_PUSH_NOV
1653 #undef NUM_PUSH_ONE
1654 #undef NUM_PUSH_TWO
1655 #undef NUM_PUSH_THREE
1656 #undef NUM_PUSH_CMANY
1657 #undef NUM_PUSH_FCALL
1658 #undef NUM_PUSH_CALLNATIVE
1659 #undef NUM_POP_NOV
1660 #undef NUM_POP_ONE
1661 #undef NUM_POP_TWO
1662 #undef NUM_POP_THREE
1663 #undef NUM_POP_MFINAL
1664 #undef NUM_POP_C_MFINAL
1665 #undef NUM_POP_CUMANY
1666 #undef NUM_POP_CMANY_U3
1667 #undef NUM_POP_CALLNATIVE
1668 #undef NUM_POP_FCALL
1669 #undef NUM_POP_CMANY
1670 #undef NUM_POP_SMANY
1672 void initialize_opcode_map() {
1673 #define O(name, imm, pop, push, flags) \
1674 opcode_parsers[#name] = parse_opcode_##name;
1675 OPCODES
1676 #undef O
1679 struct Initializer {
1680 Initializer() { initialize_opcode_map(); }
1681 } initializer;
1683 //////////////////////////////////////////////////////////////////////
1685 std::vector<char> parse_long_string_raw(AsmState& as) {
1686 as.in.skipWhitespace();
1688 std::vector<char> buffer;
1689 if (!as.in.readLongString(buffer)) {
1690 as.error("expected \"\"\"-string of serialized php data");
1692 if (buffer.empty()) {
1693 as.error("empty php serialized data is not a valid php object");
1696 // String wants a null, and dereferences one past the size we give
1697 // it.
1698 buffer.push_back('\0');
1700 return buffer;
1704 * long-string-literal: <string>
1706 * `long-string-literal' is a python-style longstring. See
1707 * readLongString for more details.
1709 String parse_long_string(AsmState& as) {
1710 auto buffer = parse_long_string_raw(as);
1711 return String(&buffer[0], buffer.size() - 1, CopyString);
1715 * maybe-long-string-literal : long-string-literal
1719 String parse_maybe_long_string(AsmState& as) {
1720 as.in.skipWhitespace();
1722 std::vector<char> buffer;
1723 if (!as.in.readLongString(buffer)) {
1724 return StrNR(staticEmptyString());
1726 if (buffer.empty()) {
1727 return StrNR(staticEmptyString());
1730 // String wants a null, and dereferences one past the size we give
1731 // it.
1732 buffer.push_back('\0');
1733 return String(&buffer[0], buffer.size() - 1, CopyString);
1736 void checkSize(TypedValue tv, size_t& available) {
1737 auto const update = [&] (size_t sz) {
1738 if (sz > available) {
1739 throw AssemblerFatal("Maximum allowable size of scalar exceeded");
1741 available -= sz;
1744 if (isArrayLikeType(type(tv))) {
1745 update(allocSize(val(tv).parr));
1747 IterateKVNoInc(val(tv).parr, [&] (Cell k, TypedValue v) {
1748 if (isStringType(type(k))) {
1749 update(val(k).pstr->heapSize());
1751 checkSize(v, available);
1755 if (isStringType(type(tv))) {
1756 update(val(tv).pstr->heapSize());
1760 Variant checkSize(Variant val) {
1761 size_t avail = RuntimeOption::EvalAssemblerMaxScalarSize;
1762 checkSize(*val.asTypedValue(), avail);
1763 return val;
1767 * php-serialized : long-string-literal
1770 * `long-string-literal' is a python-style longstring. See
1771 * readLongString for more details.
1773 * Returns a Variant representing the serialized data. It's up to the
1774 * caller to make sure it is a legal literal.
1776 Variant parse_php_serialized(
1777 folly::StringPiece str,
1778 VariableSerializer::DVOverrides* overrides = nullptr
1780 VariableUnserializer vu(
1781 str.data(),
1782 str.size(),
1783 VariableUnserializer::Type::Internal,
1784 true
1786 if (overrides) vu.setDVOverrides(overrides);
1787 try {
1788 return checkSize(vu.unserialize());
1789 } catch (const FatalErrorException&) {
1790 throw;
1791 } catch (const AssemblerFatal&) {
1792 throw;
1793 } catch (const std::exception& e) {
1794 auto const msg =
1795 folly::sformat("AssemblerUnserializationError: {}", e.what());
1796 throw AssemblerUnserializationError(msg);
1800 Variant parse_php_serialized(
1801 AsmState& as,
1802 VariableSerializer::DVOverrides* overrides = nullptr
1804 auto str = parse_long_string(as);
1805 return parse_php_serialized(str.slice(), overrides);
1809 * maybe-php-serialized : maybe-long-string-literal
1812 Variant parse_maybe_php_serialized(AsmState& as) {
1813 auto s = parse_maybe_long_string(as);
1814 if (!s.empty()) {
1815 try {
1816 return unserialize_from_string(s, VariableUnserializer::Type::Internal);
1817 } catch (const FatalErrorException&) {
1818 throw;
1819 } catch (const AssemblerFatal&) {
1820 throw;
1821 } catch (const std::exception& e) {
1822 auto const msg =
1823 folly::sformat("AssemblerUnserializationError: {}", e.what());
1824 throw AssemblerUnserializationError(msg);
1827 return Variant();
1831 * directive-numiters : integer ';'
1834 void parse_numiters(AsmState& as) {
1835 if (as.numItersSet) {
1836 as.error("only one .numiters directive may appear in a given function");
1838 int32_t count = read_opcode_arg<int32_t>(as);
1839 as.numItersSet = true;
1840 as.fe->setNumIterators(count);
1841 as.in.expectWs(';');
1845 * directive-declvars : var-name* ';'
1848 * Variables are usually allocated when first seen, but
1849 * declvars can be used to preallocate varibles for when
1850 * the exact assignment matters (like for closures).
1852 void parse_declvars(AsmState& as) {
1853 while (true) {
1854 as.in.skipWhitespace();
1855 std::string var;
1856 if (as.in.readQuotedStr(var) || as.in.readword(var)) {
1857 as.getLocalId(var);
1859 else {
1860 break;
1863 as.in.expectWs(';');
1866 void parse_function_body(AsmState&, int nestLevel = 0);
1869 * directive-catch : identifier integer? '{' function-body
1872 void parse_catch(AsmState& as, int nestLevel) {
1873 const Offset start = as.ue->bcPos();
1875 std::string label;
1876 if (!as.in.readword(label)) {
1877 as.error("expected label name after .try_catch");
1879 int iterId = -1;
1880 as.in.skipWhitespace();
1881 if (as.in.peek() != '{') {
1882 iterId = read_opcode_arg<int32_t>(as);
1884 as.in.expectWs('{');
1885 parse_function_body(as, nestLevel + 1);
1887 auto& eh = as.fe->addEHEnt();
1888 eh.m_base = start;
1889 eh.m_past = as.ue->bcPos();
1890 eh.m_iterId = iterId;
1891 eh.m_end = kInvalidOffset;
1893 as.addLabelEHEnt(label, as.fe->ehtab.size() - 1);
1897 * directive-try-catch : integer? '{' function-body ".catch" '{' function-body
1900 void parse_try_catch(AsmState& as, int nestLevel) {
1901 const Offset start = as.ue->bcPos();
1903 int iterId = -1;
1904 as.in.skipWhitespace();
1905 if (as.in.peek() != '{') {
1906 iterId = read_opcode_arg<int32_t>(as);
1909 // Emit try body.
1910 as.in.expectWs('{');
1911 parse_function_body(as, nestLevel + 1);
1912 if (!as.isUnreachable()) {
1913 as.error("expected .try region to not fall-thru");
1916 const Offset handler = as.ue->bcPos();
1918 // Emit catch body.
1919 as.enterReachableRegion(0);
1920 as.adjustStack(1);
1921 as.enforceStackDepth(1);
1923 std::string word;
1924 as.in.skipWhitespace();
1925 if (!as.in.readword(word) || word != ".catch") {
1926 as.error("expected .catch directive after .try");
1928 as.in.skipWhitespace();
1929 as.in.expectWs('{');
1930 parse_function_body(as, nestLevel + 1);
1932 const Offset end = as.ue->bcPos();
1934 auto& eh = as.fe->addEHEnt();
1935 eh.m_base = start;
1936 eh.m_past = handler;
1937 eh.m_iterId = iterId;
1938 eh.m_handler = handler;
1939 eh.m_end = end;
1943 * directive-srcloc : line_no ':' chr_no ',' line_no ':' chr_no ';'
1945 * line_no : integer
1947 * chr_no : integer
1950 * Record that subsequent bytecodes are at the source location indicated by the
1951 * range of inline numbers and character positions specified.
1953 void parse_srcloc(AsmState& as, int /*nestLevel*/) {
1954 auto const line0 = as.in.readint();
1955 as.in.expectWs(':');
1956 auto const char0 = as.in.readint();
1957 as.in.expectWs(',');
1958 auto const line1 = as.in.readint();
1959 as.in.expectWs(':');
1960 auto const char1 = as.in.readint();
1961 as.in.expectWs(';');
1963 as.srcLoc = Location::Range(line0, char0, line1, char1);
1967 * directive-doccomment : long-string-literal ';'
1971 void parse_func_doccomment(AsmState& as) {
1972 auto const doc = parse_long_string(as);
1973 as.in.expectWs(';');
1975 as.fe->docComment = makeDocComment(doc);
1979 * fixup_default_values: This function does a *rough* match of the default value
1980 * initializers for a function and attempts to construct corresponding default
1981 * TypedValues for them. It will also attempt to normalize the phpCode using a
1982 * variable serializer.
1984 void fixup_default_values(AsmState& as, FuncEmitter* fe) {
1985 using Atom = BCPattern::Atom;
1986 using Captures = BCPattern::CaptureVec;
1988 auto end = as.ue->bc() + fe->past;
1989 for (uint32_t paramIdx = 0; paramIdx < fe->params.size(); ++paramIdx) {
1990 auto& pi = fe->params[paramIdx];
1991 if (!pi.hasDefaultValue() || pi.funcletOff == kInvalidOffset) continue;
1992 auto inst = as.ue->bc() + pi.funcletOff;
1994 // Check that the DV intitializer is actually setting the local for the
1995 // parameter being initialized.
1996 auto checkloc = [&] (PC pc, const Captures&) {
1997 auto const UNUSED op = decode_op(pc);
1998 assertx(op == OpSetL || op == OpPopL);
1999 auto const loc = decode_iva(pc);
2000 return loc == paramIdx;
2003 // Look for DV initializers which push a primitive value onto the stack and
2004 // then immediately use it to set the parameter local and pop it from the
2005 // stack. Currently the following relatively limited sequences are accepted:
2007 // Int | String | Double | Null | True | False | Array | Dict | Keyset | Vec
2008 // SetL loc, PopC | PopL loc
2009 auto result = BCPattern {
2010 Atom::alt(
2011 Atom(OpInt), Atom(OpString), Atom(OpDouble), Atom(OpNull), Atom(OpTrue),
2012 Atom(OpFalse), Atom(OpArray), Atom(OpDict), Atom(OpVec), Atom(OpKeyset)
2013 ).capture(),
2014 Atom::alt(
2015 Atom(OpPopL).onlyif(checkloc),
2016 Atom::seq(Atom(OpSetL).onlyif(checkloc), Atom(OpPopC))
2018 }.ignore({OpAssertRATL, OpAssertRATStk}).matchAnchored(inst, end);
2020 // Verify that the pattern we matched is either for the last DV initializer,
2021 // in which case it must end with a JmpNS that targets the function entry,
2022 // or is immediately followed by the next DV initializer.
2023 if (!result.found() || result.getEnd() >= end) continue;
2024 auto pc = result.getEnd();
2025 auto off = pc - as.ue->bc();
2026 auto const valid = [&] {
2027 for (uint32_t next = paramIdx + 1; next < fe->params.size(); ++next) {
2028 auto& npi = fe->params[next];
2029 if (!npi.hasDefaultValue() || npi.funcletOff == kInvalidOffset) {
2030 continue;
2032 return npi.funcletOff == off;
2034 auto const orig = pc;
2035 auto const base = as.ue->bc() + fe->base;
2036 return decode_op(pc) == OpJmpNS && orig + decode_raw<Offset>(pc) == base;
2037 }();
2038 if (!valid) continue;
2040 // Use the captured initializer bytecode to construct the default value for
2041 // this parameter.
2042 auto capture = result.getCapture(0);
2043 assertx(capture);
2045 TypedValue dv = make_tv<KindOfUninit>();
2046 const VariableSerializer::DVOverrides* overrides = nullptr;
2047 SCOPE_EXIT { overrides = nullptr; };
2048 auto decode_array = [&] (DataType dt) {
2049 auto const captureCopy = capture;
2050 if (auto arr = as.ue->lookupArray(decode_raw<uint32_t>(capture))) {
2051 dv.m_type = dt;
2052 dv.m_data.parr = const_cast<ArrayData*>(arr);
2053 if (RuntimeOption::EvalHackArrDVArrs) {
2054 auto const litOffset = captureCopy - as.ue->bc();
2055 auto const it = as.adataUses.find(litOffset);
2056 assertx(it != as.adataUses.end());
2057 overrides = &as.adataMap[it->second].second;
2062 switch (decode_op(capture)) {
2063 case OpNull: dv = make_tv<KindOfNull>(); break;
2064 case OpTrue: dv = make_tv<KindOfBoolean>(true); break;
2065 case OpFalse: dv = make_tv<KindOfBoolean>(false); break;
2066 case OpArray: decode_array(KindOfPersistentArray); break;
2067 case OpVec: decode_array(KindOfPersistentVec); break;
2068 case OpDict: decode_array(KindOfPersistentDict); break;
2069 case OpKeyset: decode_array(KindOfPersistentKeyset); break;
2070 case OpInt:
2071 dv = make_tv<KindOfInt64>(decode_raw<int64_t>(capture));
2072 break;
2073 case OpDouble:
2074 dv = make_tv<KindOfDouble>(decode_raw<double>(capture));
2075 break;
2076 case OpString:
2077 if (auto str = as.litstrMap[decode_raw<uint32_t>(capture)]) {
2078 dv = make_tv<KindOfPersistentString>(str);
2080 break;
2081 default:
2082 always_assert(false);
2085 // Use the variable serializer to construct a serialized version of the
2086 // default value, matching the behavior of hphpc.
2087 if (dv.m_type != KindOfUninit) {
2088 VariableSerializer vs(VariableSerializer::Type::PHPOutput);
2089 if (RuntimeOption::EvalHackArrDVArrs && overrides) {
2090 vs.setDVOverrides(overrides);
2092 auto str = vs.serialize(tvAsCVarRef(&dv), true);
2093 pi.defaultValue = dv;
2094 pi.phpCode = makeStaticString(str.get());
2100 * function-body : fbody-line* '}'
2103 * fbody-line : ".numiters" directive-numiters
2104 * | ".declvars" directive-declvars
2105 * | ".try_fault" directive-fault
2106 * | ".try_catch" directive-catch
2107 * | ".try" directive-try-catch
2108 * | ".ismemoizewrapper"
2109 * | ".ismemoizewrapperlsb"
2110 * | ".srcloc" directive-srcloc
2111 * | ".doc" directive-doccomment
2112 * | label-name
2113 * | opcode-line
2116 * label-name : identifier ':'
2119 * opcode-line : opcode-mnemonic <junk that depends on opcode> '\n'
2122 void parse_function_body(AsmState& as, int nestLevel /* = 0 */) {
2123 std::string word;
2124 for (;;) {
2125 as.in.skipWhitespace();
2126 if (as.in.peek() == '}') {
2127 as.in.getc();
2128 if (!nestLevel) {
2129 as.finishFunction();
2131 return;
2134 if (!as.in.readword(word)) {
2135 as.error("unexpected directive or opcode line in function body");
2137 if (word[0] == '.') {
2138 if (word == ".ismemoizewrapper") {
2139 as.fe->isMemoizeWrapper = true;
2140 as.in.expectWs(';');
2141 continue;
2143 if (word == ".ismemoizewrapperlsb") {
2144 as.fe->isMemoizeWrapper = true;
2145 as.fe->isMemoizeWrapperLSB = true;
2146 as.in.expectWs(';');
2147 continue;
2149 if (word == ".numiters") { parse_numiters(as); continue; }
2150 if (word == ".declvars") { parse_declvars(as); continue; }
2151 if (word == ".try_catch") { parse_catch(as, nestLevel); continue; }
2152 if (word == ".try") { parse_try_catch(as, nestLevel); continue; }
2153 if (word == ".srcloc") { parse_srcloc(as, nestLevel); continue; }
2154 if (word == ".doc") { parse_func_doccomment(as); continue; }
2155 as.error("unrecognized directive `" + word + "' in function");
2157 if (as.in.peek() == ':') {
2158 as.in.getc();
2159 as.addLabelTarget(word);
2160 continue;
2163 // Ok, it better be an opcode now.
2164 auto it = opcode_parsers.find(word);
2165 if (it == opcode_parsers.end()) {
2166 as.error("unrecognized opcode `" + word + "'");
2168 it->second(as);
2170 as.in.skipSpaceTab();
2171 if (as.in.peek() != '\n' &&
2172 as.in.peek() != '\r' &&
2173 as.in.peek() != '#' &&
2174 as.in.peek() != EOF) {
2175 as.error("too many arguments for opcode `" + word + "'");
2180 void parse_user_attribute(AsmState& as,
2181 UserAttributeMap& userAttrs) {
2182 suppressOOM([&] {
2183 auto name = read_litstr(as);
2184 as.in.expectWs('(');
2186 auto var = parse_php_serialized(as);
2188 as.in.expectWs(')');
2190 if (!var.isArray()) {
2191 as.error("user attribute values must be arrays");
2194 userAttrs[name] =
2195 RuntimeOption::EvalHackArrDVArrs
2196 ? make_tv<KindOfVec>(ArrayData::GetScalarArray(std::move(var)))
2197 : make_tv<KindOfArray>(ArrayData::GetScalarArray(std::move(var)));
2202 * attribute : attribute-name
2203 * | string-literal '(' long-string-literal ')'
2206 * attribute-list : empty
2207 * | '[' attribute* ']'
2210 * The `attribute-name' rule is context-sensitive; see as-shared.cpp.
2211 * The second attribute form is for user attributes and only applies
2212 * if attributeMap is non null.
2214 Attr parse_attribute_list(AsmState& as, AttrContext ctx,
2215 UserAttributeMap *userAttrs = nullptr,
2216 bool* isTop = nullptr) {
2217 as.in.skipWhitespace();
2218 int ret = AttrNone;
2219 if (as.in.peek() != '[') return Attr(ret);
2220 as.in.getc();
2222 std::string word;
2223 auto seen_rxl = false;
2224 for (;;) {
2225 as.in.skipWhitespace();
2226 if (as.in.peek() == ']') break;
2227 if (as.in.peek() == '"' && userAttrs) {
2228 parse_user_attribute(as, *userAttrs);
2229 continue;
2231 if (!as.in.readword(word)) break;
2233 auto const abit = string_to_attr(ctx, word);
2234 if (abit) {
2235 ret |= *abit;
2236 continue;
2238 if (isTop && word == "nontop") {
2239 *isTop = false;
2240 continue;
2242 auto const rxAttrs = rxAttrsFromAttrString(word);
2243 if (rxAttrs != 0) {
2244 if (seen_rxl) as.error("multiple rx attributes");
2245 seen_rxl = true;
2246 ret |= rxAttrs;
2247 continue;
2250 as.error("unrecognized attribute `" + word + "' in this context");
2252 as.in.expect(']');
2253 return Attr(ret);
2257 * type-info : empty
2258 * | '<' maybe-string-literal maybe-string-literal
2259 * type-flag* '>'
2261 * type-constraint : empty
2262 * | '<' maybe-string-literal
2263 * type-flag* '>'
2265 * This parses type-info if noUserType is false, type-constraint if true
2267 std::pair<const StringData *, TypeConstraint> parse_type_info(
2268 AsmState& as, bool noUserType = false) {
2269 as.in.skipWhitespace();
2270 if (as.in.peek() != '<') return {};
2271 as.in.getc();
2273 const StringData *userType = noUserType ? nullptr : read_maybe_litstr(as);
2274 const StringData *typeName = read_maybe_litstr(as);
2276 std::string word;
2277 auto flags = TypeConstraint::NoFlags;
2278 for (;;) {
2279 as.in.skipWhitespace();
2280 if (as.in.peek() == '>') break;
2281 if (!as.in.readword(word)) break;
2283 auto const abit = string_to_type_flag(word);
2284 if (abit) {
2285 flags = flags | *abit;
2286 continue;
2289 as.error("unrecognized type flag `" + word + "' in this context");
2291 as.in.expect('>');
2292 return std::make_pair(userType, TypeConstraint{typeName, flags});
2294 TypeConstraint parse_type_constraint(AsmState& as) {
2295 return parse_type_info(as, true).second;
2300 * parameter-list : '(' param-name-list ')'
2303 * param-name-list : empty
2304 * | param-name ',' param-name-list
2307 * param-name : '$' identifier dv-initializer
2308 * | '&' '$' identifier dv-initializer
2311 * dv-initializer : empty
2312 * | '=' identifier arg-default
2315 * arg-default : empty
2316 * | '(' long-string-literal ')'
2319 void parse_parameter_list(AsmState& as) {
2320 as.in.skipWhitespace();
2321 if (as.in.peek() != '(') return;
2322 as.in.getc();
2324 bool seenVariadic = false;
2325 bool seenRef = false;
2327 for (;;) {
2328 FuncEmitter::ParamInfo param;
2329 param.byRef = false;
2330 param.inout = false;
2332 as.in.skipWhitespace();
2333 int ch = as.in.peek();
2334 if (ch == ')') { as.in.getc(); break; } // allow empty param lists
2336 if (seenVariadic) {
2337 as.error("functions can only have one variadic argument");
2340 parse_attribute_list(as, AttrContext::Parameter, &param.userAttributes);
2342 if (ch == '.') {
2343 as.in.getc();
2344 if (as.in.getc() != '.' ||
2345 as.in.getc() != '.') {
2346 as.error("expecting '...'");
2349 seenVariadic = true;
2350 param.variadic = true;
2351 as.fe->attrs |= AttrVariadicParam;
2354 if (as.in.tryConsume("inout")) {
2355 if (seenVariadic) {
2356 as.error("inout parameters cannot be variadic");
2358 if (seenRef) {
2359 as.error("functions cannot contain both inout and ref parameters");
2361 param.inout = true;
2362 as.fe->attrs |= AttrTakesInOutParams;
2365 std::tie(param.userType, param.typeConstraint) = parse_type_info(as);
2367 as.in.skipWhitespace();
2368 ch = as.in.getc();
2370 if (ch == '&') {
2371 if (param.variadic) {
2372 as.error("ref parameters cannot be variadic");
2374 if (param.inout) {
2375 as.error("parameters cannot be marked both inout and ref");
2377 if (as.fe->attrs & AttrTakesInOutParams) {
2378 as.error("functions cannot contain both inout and ref parameters");
2380 seenRef = true;
2381 param.byRef = true;
2382 ch = as.in.getc();
2384 if (ch != '$') {
2385 as.error("function parameters must have a $ prefix");
2387 std::string name;
2388 if (!as.in.readword(name)) {
2389 as.error("expected parameter name after $");
2392 as.in.skipWhitespace();
2393 ch = as.in.getc();
2394 if (ch == '=') {
2395 if (seenVariadic) {
2396 as.error("variadic parameter cannot have dv-initializer");
2399 std::string label;
2400 if (!as.in.readword(label)) {
2401 as.error("expected label name for dv-initializer");
2403 as.addLabelDVInit(label, as.fe->params.size());
2405 as.in.skipWhitespace();
2406 ch = as.in.getc();
2407 if (ch == '(') {
2408 String str = parse_long_string(as);
2409 param.phpCode = makeStaticString(str);
2410 TypedValue tv;
2411 tvWriteUninit(tv);
2412 if (str.size() == 4) {
2413 if (!strcasecmp("null", str.data())) {
2414 tvWriteNull(tv);
2415 } else if (!strcasecmp("true", str.data())) {
2416 tv = make_tv<KindOfBoolean>(true);
2418 } else if (str.size() == 5 && !strcasecmp("false", str.data())) {
2419 tv = make_tv<KindOfBoolean>(false);
2421 auto utype = param.typeConstraint.underlyingDataType();
2422 if (tv.m_type == KindOfUninit &&
2423 (!utype || *utype == KindOfInt64 || *utype == KindOfDouble)) {
2424 int64_t ival;
2425 double dval;
2426 int overflow = 0;
2427 auto dt = str.get()->isNumericWithVal(ival, dval, false, &overflow);
2428 if (overflow == 0) {
2429 if (dt == KindOfInt64) {
2430 if (utype == KindOfDouble) tv = make_tv<KindOfDouble>(ival);
2431 else tv = make_tv<KindOfInt64>(ival);
2432 } else if (dt == KindOfDouble &&
2433 (!utype || utype == KindOfDouble)) {
2434 tv = make_tv<KindOfDouble>(dval);
2438 if (tv.m_type != KindOfUninit) {
2439 param.defaultValue = tv;
2441 as.in.expectWs(')');
2442 as.in.skipWhitespace();
2443 ch = as.in.getc();
2447 as.fe->appendParam(makeStaticString(name), param);
2449 if (ch == ')') break;
2450 if (ch != ',') as.error("expected , between parameter names");
2454 void parse_function_flags(AsmState& as) {
2455 as.in.skipWhitespace();
2456 std::string flag;
2457 for (;;) {
2458 if (as.in.peek() == '{') break;
2459 if (!as.in.readword(flag)) break;
2461 if (flag == "isGenerator") {
2462 as.fe->isGenerator = true;
2463 } else if (flag == "isAsync") {
2464 as.fe->isAsync = true;
2465 } else if (flag == "isClosureBody") {
2466 as.fe->isClosureBody = true;
2467 } else if (flag == "isPairGenerator") {
2468 as.fe->isPairGenerator = true;
2469 } else if (flag == "isRxDisabled") {
2470 // this relies on attributes being parsed before flags
2471 if (!funcAttrIsAnyRx(as.fe->attrs)) {
2472 as.error("isRxDisabled on non-rx func");
2474 as.fe->isRxDisabled = true;
2475 } else {
2476 as.error("Unexpected function flag \"" + flag + "\"");
2482 * line-range : "(" integer "," integer ")"
2485 bool parse_line_range(AsmState& as, int& line0, int& line1) {
2486 as.in.skipWhitespace();
2487 if (as.in.peek() != '(') {
2488 line0 = as.in.getLineNumber();
2489 line1 = as.in.getLineNumber() + 1;
2490 return false;
2492 as.in.getc();
2493 line0 = as.in.readint();
2494 as.in.expectWs(',');
2495 line1 = as.in.readint();
2496 as.in.expectWs(')');
2497 return true;
2501 * If we haven't seen a pseudomain and we are compiling systemlib,
2502 * add a pseudomain and return true
2503 * If we haven't seen a pseudomain and we are not compiling systemlib,
2504 * return false so that the caller can give an assembler error
2505 * Otherwise, return true
2507 bool ensure_pseudomain(AsmState& as) {
2508 if (!as.emittedPseudoMain) {
2509 if (!SystemLib::s_inited) {
2511 * The SystemLib::s_hhas_unit is required to be merge-only,
2512 * and we create the source by concatenating separate .hhas files
2513 * Rather than choosing one to have the .main directive, we just
2514 * generate a trivial pseudoMain automatically.
2516 as.ue->addTrivialPseudoMain();
2517 as.emittedPseudoMain = true;
2518 } else {
2519 return false;
2522 return true;
2525 static StaticString s_native("__Native");
2527 MaybeDataType type_constraint_to_data_type(LowStringPtr user_type,
2528 const TypeConstraint& tc) {
2529 if (auto type = tc.typeName()) {
2530 // in type_annotation.cpp this code uses m_typeArgs
2531 // as indicator that type can represent one of collection types
2532 // when we extract data from the constraint we know if type is one of
2533 // collection types but we don't have direct way to figure out if
2534 // type used to have type arguments - do it indirectly by checking
2535 // if name of user type contains '<'
2536 auto has_type_args =
2537 user_type && user_type->slice().str().find('<') != std::string::npos;
2538 return get_datatype(
2539 type->toCppString(),
2540 has_type_args,
2541 false, // no syntactic functions in type annotations
2542 false, // no xhp type annotation
2543 false, // no tuples in type annotation
2544 tc.isNullable(),
2545 tc.isSoft());
2547 return folly::none;
2551 * Checks whether the current function is native by looking at the user
2552 * attribute map and sets the isNative flag accoringly
2553 * If the give function is op code implementation, then isNative is not set
2555 void check_native(AsmState& as, bool is_construct) {
2556 if (as.fe->userAttributes.count(s_native.get())) {
2557 as.fe->hniReturnType = is_construct
2558 ? KindOfNull
2559 : type_constraint_to_data_type(as.fe->retUserType,
2560 as.fe->retTypeConstraint);
2562 as.fe->isNative =
2563 !(as.fe->parseNativeAttributes(as.fe->attrs) & Native::AttrOpCodeImpl);
2565 if (as.fe->isNative) {
2566 auto info = as.fe->getNativeInfo();
2567 if (!info) {
2568 if (SystemLib::s_inited) {
2569 // non-builtin native functions must have a valid binding
2570 as.error("No NativeFunctionInfo for function {}",
2571 as.fe->nativeFullname());
2572 } else {
2573 // Allow builtins to have mising NativeFunctionInfo, to support
2574 // conditional compilation. Calling such a function will Fatal.
2577 } else {
2578 // was AttrOpCodeImpl
2581 // set extra attributes for builtin native functions
2582 if (!SystemLib::s_inited) {
2583 as.fe->attrs |= AttrBuiltin | AttrMayUseVV;
2586 for (auto& pi : as.fe->params) {
2587 pi.builtinType =
2588 type_constraint_to_data_type(pi.userType, pi.typeConstraint);
2594 * directive-function : attribute-list ?line-range type-info identifier
2595 * parameter-list function-flags '{' function-body
2598 void parse_function(AsmState& as) {
2599 if (!ensure_pseudomain(as)) {
2600 as.error(".function blocks must all follow the .main block");
2603 as.in.skipWhitespace();
2605 bool isTop = true;
2607 UserAttributeMap userAttrs;
2608 Attr attrs = parse_attribute_list(as, AttrContext::Func, &userAttrs, &isTop);
2610 if (!SystemLib::s_inited) {
2611 attrs |= AttrUnique | AttrPersistent | AttrBuiltin;
2614 // Be conservative by default. HHBBC can clear it where appropriate.
2615 attrs |= AttrMayUseVV;
2617 if(!isTop && as.emittedTopLevelFunc) {
2618 as.error("All top level functions must be defined after any "
2619 "non-top functions");
2622 as.emittedTopLevelFunc |= isTop;
2624 int line0;
2625 int line1;
2626 parse_line_range(as, line0, line1);
2628 auto typeInfo = parse_type_info(as);
2629 std::string name;
2630 if (!as.in.readname(name)) {
2631 as.error(".function must have a name");
2634 as.fe = as.ue->newFuncEmitter(makeStaticString(name));
2635 as.fe->init(line0, line1, as.ue->bcPos(), attrs, isTop, 0);
2636 std::tie(as.fe->retUserType, as.fe->retTypeConstraint) = typeInfo;
2637 as.fe->userAttributes = userAttrs;
2639 parse_parameter_list(as);
2640 // parse_function_flabs relies on as.fe already having valid attrs
2641 parse_function_flags(as);
2643 check_native(as, false);
2645 as.in.expectWs('{');
2647 as.srcLoc = Location::Range{-1,-1,-1,-1};
2648 parse_function_body(as);
2652 * directive-method : attribute-list ?line-range type-info identifier
2653 * parameter-list function-flags '{' function-body
2656 void parse_method(AsmState& as) {
2657 as.in.skipWhitespace();
2659 UserAttributeMap userAttrs;
2660 Attr attrs = parse_attribute_list(as, AttrContext::Func, &userAttrs);
2662 if (!SystemLib::s_inited) attrs |= AttrBuiltin;
2664 int line0;
2665 int line1;
2666 parse_line_range(as, line0, line1);
2668 auto typeInfo = parse_type_info(as);
2669 std::string name;
2670 if (!as.in.readname(name)) {
2671 as.error(".method requires a method name");
2674 auto const sname = makeStaticString(name);
2675 if (as.pce->hasMethod(sname)) {
2676 as.error("duplicate method name " + sname->toCppString());
2679 as.fe = as.ue->newMethodEmitter(sname, as.pce);
2680 as.pce->addMethod(as.fe);
2681 as.fe->init(line0, line1,
2682 as.ue->bcPos(), attrs, false, 0);
2683 std::tie(as.fe->retUserType, as.fe->retTypeConstraint) = typeInfo;
2684 as.fe->userAttributes = userAttrs;
2686 parse_parameter_list(as);
2687 // parse_function_flabs relies on as.fe already having valid attrs
2688 parse_function_flags(as);
2690 check_native(as, name == "__construct");
2692 as.in.expectWs('{');
2694 as.srcLoc = Location::Range{-1,-1,-1,-1};
2695 parse_function_body(as);
2699 * member-tv-initializer : '=' php-serialized ';'
2700 * | '=' uninit ';'
2701 * | ';'
2704 TypedValue parse_member_tv_initializer(AsmState& as) {
2705 as.in.skipWhitespace();
2707 TypedValue tvInit;
2708 tvWriteNull(tvInit); // Don't confuse Variant with uninit data
2710 int what = as.in.getc();
2711 if (what == '=') {
2712 as.in.skipWhitespace();
2714 if (as.in.peek() != '\"') {
2715 // It might be an uninitialized property/constant.
2716 if (!as.in.tryConsume("uninit")) {
2717 as.error("Expected \"\"\" or \"uninit\" after '=' in "
2718 "const/property initializer");
2720 as.in.expectWs(';');
2721 tvWriteUninit(tvInit);
2722 return tvInit;
2725 suppressOOM([&] {
2726 tvAsVariant(&tvInit) = parse_php_serialized(as);
2727 if (tvInit.m_type == KindOfObject) {
2728 as.error("property initializer can't be an object");
2729 } else if (tvInit.m_type == KindOfResource) {
2730 as.error("property initializer can't be a resource");
2731 } else {
2732 tvAsVariant(&tvInit).setEvalScalar();
2735 as.in.expectWs(';');
2736 } else if (what == ';') {
2737 // already null
2738 } else {
2739 as.error("expected '=' or ';' after property name");
2742 return tvInit;
2745 template<typename AttrValidator, typename Adder>
2746 void parse_prop_or_field_impl(AsmState& as, AttrValidator validate, Adder add) {
2747 as.in.skipWhitespace();
2749 UserAttributeMap userAttributes;
2750 Attr attrs = parse_attribute_list(as, AttrContext::Prop, &userAttributes);
2751 validate(attrs);
2753 auto const heredoc = makeDocComment(parse_maybe_long_string(as));
2755 const StringData* userTy;
2756 TypeConstraint typeConstraint;
2757 std::tie(userTy, typeConstraint) = parse_type_info(as, false);
2758 auto const userTyStr = userTy ? userTy : staticEmptyString();
2760 std::string name;
2761 as.in.skipSpaceTab();
2762 as.in.consumePred(!boost::is_any_of(" \t\r\n#;="),
2763 std::back_inserter(name));
2764 if (name.empty()) {
2765 as.error("expected name for property or field");
2768 TypedValue tvInit = parse_member_tv_initializer(as);
2769 add(makeStaticString(name),
2770 attrs,
2771 userTyStr,
2772 typeConstraint,
2773 heredoc,
2774 &tvInit,
2775 RepoAuthType{},
2776 userAttributes);
2780 * directive-property : attribute-list maybe-long-string-literal type-info
2781 * identifier member-tv-initializer
2784 * Define a property with an associated type and heredoc.
2786 void parse_property(AsmState& as, bool class_is_const) {
2787 parse_prop_or_field_impl(
2789 [&](Attr attrs) {
2790 if (attrs & AttrIsConst) {
2791 if (attrs & AttrLateInit) {
2792 as.error("const properties may not also be late init");
2794 } else if (class_is_const && !(attrs & AttrStatic)) {
2795 as.error("all instance properties of a const class must be const");
2798 [&](auto&&... args) {
2799 as.pce->addProperty(std::forward<decltype(args)>(args)...);
2804 void parse_record_field(AsmState& as) {
2805 parse_prop_or_field_impl(
2807 [](Attr attrs) {},
2808 [&](auto&&... args) {
2809 as.re->addField(std::forward<decltype(args)>(args)...);
2816 * const-flags : isType
2819 * directive-const : identifier const-flags member-tv-initializer
2820 * | identifier const-flags ';'
2823 void parse_constant(AsmState& as) {
2824 as.in.skipWhitespace();
2826 std::string name;
2827 if (!as.in.readword(name)) {
2828 as.error("expected name for constant");
2831 bool isType = as.in.tryConsume("isType");
2832 as.in.skipWhitespace();
2834 if (as.in.peek() == ';') {
2835 as.in.getc();
2836 as.pce->addAbstractConstant(makeStaticString(name),
2837 staticEmptyString(),
2838 isType);
2839 return;
2842 TypedValue tvInit = parse_member_tv_initializer(as);
2843 as.pce->addConstant(makeStaticString(name),
2844 staticEmptyString(), &tvInit,
2845 staticEmptyString(),
2846 isType);
2850 * directive-default-ctor : ';'
2853 * No-op, for backward compat
2855 void parse_default_ctor(AsmState& as) {
2856 assertx(!as.fe && as.pce);
2857 as.in.expectWs(';');
2861 * directive-use : identifier+ ';'
2862 * | identifier+ '{' use-line* '}'
2865 * use-line : use-name-ref "insteadof" identifier+ ';'
2866 * | use-name-ref "as" attribute-list identifier ';'
2867 * | use-name-ref "as" attribute-list ';'
2870 void parse_use(AsmState& as) {
2871 std::vector<std::string> usedTraits;
2872 for (;;) {
2873 std::string name;
2874 if (!as.in.readword(name)) break;
2875 usedTraits.push_back(name);
2877 if (usedTraits.empty()) {
2878 as.error(".use requires a trait name");
2881 for (size_t i = 0; i < usedTraits.size(); ++i) {
2882 as.pce->addUsedTrait(makeStaticString(usedTraits[i]));
2884 as.in.skipWhitespace();
2885 if (as.in.peek() != '{') {
2886 as.in.expect(';');
2887 return;
2889 as.in.getc();
2891 for (;;) {
2892 as.in.skipWhitespace();
2893 if (as.in.peek() == '}') break;
2895 std::string traitName;
2896 std::string identifier;
2897 if (!as.in.readword(traitName)) {
2898 as.error("expected identifier for line in .use block");
2900 as.in.skipWhitespace();
2901 if (as.in.peek() == ':') {
2902 as.in.getc();
2903 as.in.expect(':');
2904 if (!as.in.readword(identifier)) {
2905 as.error("expected identifier after ::");
2907 } else {
2908 identifier = traitName;
2909 traitName.clear();
2912 if (as.in.tryConsume("as")) {
2913 bool strict = as.in.tryConsume("strict");
2914 bool async = as.in.tryConsume("async");
2916 Attr attrs = parse_attribute_list(as, AttrContext::TraitImport);
2917 std::string alias;
2918 if (!as.in.readword(alias)) {
2919 if (attrs != AttrNone) {
2920 alias = identifier;
2921 } else {
2922 as.error("expected identifier or attribute list after "
2923 "`as' in .use block");
2927 as.pce->addTraitAliasRule(PreClass::TraitAliasRule(
2928 makeStaticString(traitName),
2929 makeStaticString(identifier),
2930 makeStaticString(alias),
2931 attrs,
2932 strict,
2933 async));
2934 } else if (as.in.tryConsume("insteadof")) {
2935 if (traitName.empty()) {
2936 as.error("Must specify TraitName::name when using a trait insteadof");
2939 PreClass::TraitPrecRule precRule(
2940 makeStaticString(traitName),
2941 makeStaticString(identifier));
2943 bool addedOtherTraits = false;
2944 std::string whom;
2945 while (as.in.readword(whom)) {
2946 precRule.addOtherTraitName(makeStaticString(whom));
2947 addedOtherTraits = true;
2949 if (!addedOtherTraits) {
2950 as.error("one or more trait names expected after `insteadof'");
2953 as.pce->addTraitPrecRule(precRule);
2954 } else {
2955 as.error("expected `as' or `insteadof' in .use block");
2958 as.in.expectWs(';');
2961 as.in.expect('}');
2965 * directive-enum_ty : type-constraint ';'
2969 void parse_enum_ty(AsmState& as) {
2970 if (as.enumTySet) {
2971 as.error("only one .enum_ty directive may appear in a given class");
2973 as.enumTySet = true;
2975 as.pce->setEnumBaseTy(parse_type_constraint(as));
2977 as.in.expectWs(';');
2981 * directive-require : 'extends' '<' indentifier '>' ';'
2982 * | 'implements' '<' indentifier '>' ';'
2986 void parse_require(AsmState& as) {
2987 as.in.skipWhitespace();
2989 bool extends = as.in.tryConsume("extends");
2990 if (!extends && !as.in.tryConsume("implements")) {
2991 as.error(".require should be extends or implements");
2994 as.in.expectWs('<');
2995 std::string name;
2996 if (!as.in.readname(name)) {
2997 as.error(".require expects a class or interface name");
2999 as.in.expectWs('>');
3001 as.pce->addClassRequirement(PreClass::ClassRequirement(
3002 makeStaticString(name), extends
3005 as.in.expectWs(';');
3009 * directive-doccomment : long-string-literal ';'
3013 void parse_cls_doccomment(AsmState& as) {
3014 auto const doc = parse_long_string(as);
3015 as.in.expectWs(';');
3017 as.pce->setDocComment(makeDocComment(doc));
3021 * class-body : class-body-line* '}'
3024 * class-body-line : ".method" directive-method
3025 * | ".property" directive-property
3026 * | ".const" directive-const
3027 * | ".use" directive-use
3028 * | ".default_ctor" directive-default-ctor
3029 * | ".enum_ty" directive-enum-ty
3030 * | ".require" directive-require
3031 * | ".doc" directive-doccomment
3034 void parse_class_body(AsmState& as, bool class_is_const) {
3035 if (!ensure_pseudomain(as)) {
3036 as.error(".class blocks must all follow the .main block");
3039 std::string directive;
3040 while (as.in.readword(directive)) {
3041 if (directive == ".property") {
3042 parse_property(as, class_is_const);
3043 continue;
3045 if (directive == ".method") { parse_method(as); continue; }
3046 if (directive == ".const") { parse_constant(as); continue; }
3047 if (directive == ".use") { parse_use(as); continue; }
3048 if (directive == ".default_ctor") { parse_default_ctor(as); continue; }
3049 if (directive == ".enum_ty") { parse_enum_ty(as); continue; }
3050 if (directive == ".require") { parse_require(as); continue; }
3051 if (directive == ".doc") { parse_cls_doccomment(as); continue; }
3053 as.error("unrecognized directive `" + directive + "' in class");
3055 as.in.expect('}');
3059 * record-body : record-body-line* '}'
3062 * record-body-line : ".property" directive-property
3065 void parse_record_body(AsmState& as) {
3066 if (!ensure_pseudomain(as)) {
3067 as.error(".record blocks must all follow the .main block");
3070 std::string directive;
3071 while (as.in.readword(directive)) {
3072 if (directive == ".property") { parse_record_field(as); continue; }
3074 as.error(folly::to<std::string>("unrecognized directive `",
3075 directive, "` in record"));
3077 as.in.expect('}');
3080 PreClass::Hoistable compute_hoistable(AsmState& as,
3081 const std::string &name,
3082 const std::string &parentName) {
3083 auto &pce = *as.pce;
3084 bool system = pce.attrs() & AttrBuiltin;
3086 if (pce.methods().size() == 1 && pce.methods()[0]->isClosureBody) {
3087 return PreClass::NotHoistable;
3089 if (!system) {
3090 if (!pce.interfaces().empty() ||
3091 !pce.usedTraits().empty() ||
3092 !pce.requirements().empty() ||
3093 (pce.attrs() & AttrEnum)) {
3094 return PreClass::Mergeable;
3096 if (!parentName.empty() && !as.hoistables.count(parentName)) {
3097 return PreClass::MaybeHoistable;
3100 as.hoistables.insert(name);
3102 return pce.attrs() & AttrUnique ?
3103 PreClass::AlwaysHoistable : PreClass::MaybeHoistable;
3107 * directive-class : ?"top" attribute-list identifier ?line-range
3108 * extension-clause implements-clause '{' class-body
3111 * extension-clause : empty
3112 * | "extends" identifier
3115 * implements-clause : empty
3116 * | "implements" '(' identifier* ')'
3120 void parse_class(AsmState& as) {
3121 as.in.skipWhitespace();
3123 bool isTop = true;
3125 UserAttributeMap userAttrs;
3126 Attr attrs = parse_attribute_list(as, AttrContext::Class, &userAttrs, &isTop);
3127 if (!SystemLib::s_inited) {
3128 attrs |= AttrUnique | AttrPersistent | AttrBuiltin;
3130 if (attrs & AttrIsConst) {
3131 if (attrs & (AttrEnum | AttrInterface | AttrTrait)) {
3132 as.error("interfaces, traits and enums may not be const");
3134 if (!(attrs & AttrForbidDynamicProps)) {
3135 as.error("const class missing ForbidDynamicProps attribute");
3139 std::string name;
3140 if (!as.in.readname(name)) {
3141 as.error(".class must have a name");
3143 if (PreClassEmitter::IsAnonymousClassName(name)) {
3144 // assign unique numbers to anonymous classes
3145 // they must not be pre-numbered in the hhas
3146 auto p = name.find(';');
3147 if (p != std::string::npos) {
3148 as.error("anonymous class and closure names may not contain ids in hhas");
3150 name = HPHP::NewAnonymousClassName(name);
3153 int line0;
3154 int line1;
3155 parse_line_range(as, line0, line1);
3157 std::string parentName;
3158 if (as.in.tryConsume("extends")) {
3159 if (!as.in.readname(parentName)) {
3160 as.error("expected parent class name after `extends'");
3164 std::vector<std::string> ifaces;
3165 if (as.in.tryConsume("implements")) {
3166 as.in.expectWs('(');
3167 std::string word;
3168 while (as.in.readname(word)) {
3169 ifaces.push_back(word);
3171 as.in.expect(')');
3174 auto off = folly::get_default(as.defClsOffsets, as.ue->numPreClasses(),
3175 as.ue->bcPos());
3177 as.pce = as.ue->newBarePreClassEmitter(name, PreClass::MaybeHoistable);
3178 as.pce->init(line0,
3179 line1,
3180 off,
3181 attrs,
3182 makeStaticString(parentName),
3183 staticEmptyString());
3184 for (auto const& iface : ifaces) {
3185 as.pce->addInterface(makeStaticString(iface));
3187 as.pce->setUserAttributes(userAttrs);
3189 as.in.expectWs('{');
3190 parse_class_body(as, attrs & AttrIsConst);
3192 as.pce->setHoistable(
3193 isTop ? compute_hoistable(as, name, parentName) : PreClass::NotHoistable
3196 as.finishClass();
3200 * directive-record : attribute identifier ?line-range
3201 * extension-clause '{' record-body
3204 * extension-clause : empty
3205 * | "extends" identifier
3208 void parse_record(AsmState& as) {
3209 if (!RuntimeOption::EvalHackRecords && !RuntimeOption::EvalHackRecordArrays) {
3210 as.error("Records not supported");
3213 as.in.skipWhitespace();
3215 Attr attrs = parse_attribute_list(as, AttrContext::Class);
3216 if (!(attrs & AttrFinal)) {
3217 // parser only sets the final flag. If the final flag is not set,
3218 // the record is abstract.
3219 attrs |= AttrAbstract;
3220 } else if (attrs & AttrAbstract) {
3221 as.error("A record cannot be both final and abstract");
3225 std::string name;
3226 if (!as.in.readname(name)) {
3227 as.error(".record must have a name");
3230 int line0;
3231 int line1;
3232 parse_line_range(as, line0, line1);
3234 std::string parentName;
3235 if (as.in.tryConsume("extends")) {
3236 if (!as.in.readname(parentName)) {
3237 as.error("expected parent record name after `extends'");
3241 as.re = as.ue->newBareRecordEmitter(name);
3242 as.re->init(line0,
3243 line1,
3244 attrs,
3245 makeStaticString(parentName),
3246 staticEmptyString());
3248 as.in.expectWs('{');
3249 parse_record_body(as);
3251 as.finishRecord();
3255 * directive-filepath : quoted-string-literal ';'
3258 void parse_filepath(AsmState& as) {
3259 auto const str = read_litstr(as);
3260 if (nullptr == g_hhas_handler) {
3261 // We don't want to use file path from cached HHAS
3262 as.ue->m_filepath = str;
3264 as.in.expectWs(';');
3268 * directive-main : ?line-range '{' function-body
3271 void parse_main(AsmState& as) {
3272 if (as.emittedPseudoMain) {
3273 as.error("Multiple .main directives found");
3276 int line0;
3277 int line1;
3278 bool fromSrcLoc = parse_line_range(as, line0, line1);
3280 as.in.expectWs('{');
3282 as.ue->initMain(line0, line1);
3283 as.fe = as.ue->getMain();
3284 as.emittedPseudoMain = true;
3285 if (fromSrcLoc) {
3286 as.srcLoc = Location::Range{line0,0,line1,0};
3287 } else {
3288 as.srcLoc = Location::Range{-1,-1,-1,-1};
3290 parse_function_body(as);
3294 * directive-adata : identifier '=' php-serialized ';'
3297 void parse_adata(AsmState& as) {
3298 as.in.skipWhitespace();
3299 std::string dataLabel;
3300 if (!as.in.readword(dataLabel)) {
3301 as.error("expected name for .adata");
3303 if (as.adataMap.count(dataLabel)) {
3304 as.error("duplicate adata label name " + dataLabel);
3307 as.in.expectWs('=');
3308 as.adataDecls[dataLabel] = parse_long_string_raw(as);
3309 as.in.expectWs(';');
3313 * directive-alias : attribute-list identifier '=' type-constraint
3314 * maybe-php-serialized ';'
3317 * We represent alias type information using the syntax for
3318 * TypeConstraints. We populate the name and nullable field of the
3319 * alias directly from the specified type constraint and derive the
3320 * AnnotType from the compute AnnotType in the constraint.
3322 * Following the type-constraint we encode the serialized type structure
3323 * corresponding to this alias.
3325 void parse_alias(AsmState& as) {
3326 as.in.skipWhitespace();
3328 TypeAlias record;
3329 Attr attrs = parse_attribute_list(as, AttrContext::Alias, &record.userAttrs);
3330 if (!SystemLib::s_inited) {
3331 attrs |= AttrPersistent;
3333 std::string name;
3334 if (!as.in.readname(name)) {
3335 as.error(".alias must have a name");
3337 as.in.expectWs('=');
3339 TypeConstraint ty = parse_type_constraint(as);
3340 Variant ts = parse_maybe_php_serialized(as);
3342 if (ts.isInitialized() && !ts.isArray()) {
3343 as.error(".alias must have an array type structure");
3346 const StringData* typeName = ty.typeName();
3347 if (!typeName) typeName = staticEmptyString();
3348 const StringData* sname = makeStaticString(name);
3349 // Merge to ensure namedentity creation, according to
3350 // emitTypedef in emitter.cpp
3351 as.ue->mergeLitstr(sname);
3352 as.ue->mergeLitstr(typeName);
3354 record.name = sname;
3355 record.value = typeName;
3356 record.type = typeName->empty() ? AnnotType::Mixed : ty.type();
3357 record.nullable = (ty.flags() & TypeConstraint::Nullable) != 0;
3358 record.attrs = attrs;
3359 if (ts.isInitialized()) {
3360 record.typeStructure = ArrNR(ArrayData::GetScalarArray(std::move(ts)));
3362 auto aliasId = as.ue->addTypeAlias(record);
3363 as.ue->pushMergeableTypeAlias(aliasId);
3365 as.in.expectWs(';');
3369 * directive-hh-file : '1' ';'
3370 * | '0' ';'
3373 void parse_hh_file(AsmState& as) {
3374 as.in.skipWhitespace();
3375 std::string word;
3376 if (!as.in.readword(word)) {
3377 as.error(".hh_file must have a value");
3379 as.ue->m_isHHFile = word == "1";
3381 if (!as.ue->m_isHHFile && word != "0") {
3382 as.error(".hh_file must be either 1 or 0");
3385 as.in.expectWs(';');
3389 * directive-symbols : '{' identifier identifier* '}'
3391 void parse_symbol_refs(AsmState& as, SymbolRef symbol_kind) {
3392 as.in.expectWs('{');
3394 if (as.wants_symbol_refs) {
3395 while (true) {
3396 as.in.skipWhitespace();
3397 std::string symbol;
3398 as.in.consumePred(!boost::is_any_of(" \t\r\n#}"),
3399 std::back_inserter(symbol));
3400 if (symbol.empty()) {
3401 break;
3403 as.symbol_refs[symbol_kind].push_back(symbol);
3405 } else {
3406 while (as.in.peek() != '}') {
3407 as.in.skipWhitespace();
3408 if (!as.in.skipPred(!boost::is_any_of("#}"))) break;
3412 as.in.expect('}');
3416 * directive-filepaths : '{' string string* '}'
3418 void parse_includes(AsmState& as) {
3419 parse_symbol_refs(as, SymbolRef::Include);
3422 void parse_constant_refs(AsmState& as) {
3423 parse_symbol_refs(as, SymbolRef::Constant);
3426 void parse_function_refs(AsmState& as) {
3427 parse_symbol_refs(as, SymbolRef::Function);
3430 void parse_class_refs(AsmState& as) {
3431 parse_symbol_refs(as, SymbolRef::Class);
3435 * directive-metadata : identifier = identifier ';'
3436 * | identifier = quoted-string-literal ';'
3437 * | identifier = long-string-literal ';'
3440 void parse_metadata(AsmState& as) {
3441 std::string key;
3442 if (as.in.readname(key)) {
3443 as.in.expectWs('=');
3444 as.in.skipWhitespace();
3445 auto const value = [&] () -> const StringData* {
3446 auto ret = parse_maybe_long_string(as);
3447 if (!ret.empty()) return makeStaticString(ret);
3448 std::string tmp;
3449 if (as.in.readQuotedStr(tmp) || as.in.readword(tmp)) {
3450 return makeStaticString(tmp);
3452 return nullptr;
3453 }();
3454 if (value) {
3455 as.in.expect(';');
3456 as.ue->m_metaData.emplace(
3457 makeStaticString(key),
3458 make_tv<KindOfPersistentString>(value)
3460 return;
3463 as.error(".metadata expects a key = value pair");
3467 * directive-file-attributes : attribute-list ';'
3470 void parse_file_attributes(AsmState& as) {
3471 as.in.skipWhitespace();
3473 parse_attribute_list(as, AttrContext::Func, &(as.ue->m_fileAttributes));
3475 as.in.expectWs(';');
3479 * asm-file : asm-tld* <EOF>
3482 * asm-tld : ".filepath" directive-filepath
3483 * | ".main" directive-main
3484 * | ".function" directive-function
3485 * | ".adata" directive-adata
3486 * | ".class" directive-class
3487 * | ".alias" directive-alias
3488 * | ".hh_file" directive-hh-file
3489 * | ".includes" directive-filepaths
3490 * | ".constant_refs" directive-symbols
3491 * | ".function_refs" directive-symbols
3492 * | ".class_refs" directive-symbols
3493 * | ".metadata" directive-meta-data
3494 * | ".file_attributes" directive-file-attributes
3497 void parse(AsmState& as) {
3498 as.in.skipWhitespace();
3499 std::string directive;
3501 while (as.in.readword(directive)) {
3502 if (directive == ".filepath") { parse_filepath(as) ; continue; }
3503 if (directive == ".main") { parse_main(as) ; continue; }
3504 if (directive == ".function") { parse_function(as) ; continue; }
3505 if (directive == ".adata") { parse_adata(as) ; continue; }
3506 if (directive == ".class") { parse_class(as) ; continue; }
3507 if (directive == ".record") { parse_record(as) ; continue; }
3508 if (directive == ".alias") { parse_alias(as) ; continue; }
3509 if (directive == ".hh_file") { parse_hh_file(as) ; continue; }
3510 if (directive == ".includes") { parse_includes(as) ; continue; }
3511 if (directive == ".constant_refs") { parse_constant_refs(as) ; continue; }
3512 if (directive == ".function_refs") { parse_function_refs(as) ; continue; }
3513 if (directive == ".class_refs") { parse_class_refs(as) ; continue; }
3514 if (directive == ".metadata") { parse_metadata(as) ; continue; }
3515 if (directive == ".file_attributes") { parse_file_attributes(as); continue;}
3517 as.error("unrecognized top-level directive `" + directive + "'");
3520 if (!ensure_pseudomain(as)) {
3521 as.error("no .main found in hhas unit");
3524 if (as.symbol_refs.size()) {
3525 for (auto& ent : as.symbol_refs) {
3526 as.ue->m_symbol_refs.push_back(std::move(ent));
3530 if (RuntimeOption::EvalAssemblerFoldDefaultValues) {
3531 for (auto& fe : as.ue->fevec()) fixup_default_values(as, fe.get());
3532 for (size_t n = 0; n < as.ue->numPreClasses(); ++n) {
3533 for (auto fe : as.ue->pce(n)->methods()) fixup_default_values(as, fe);
3540 //////////////////////////////////////////////////////////////////////
3542 std::unique_ptr<UnitEmitter> assemble_string(
3543 const char* code,
3544 int codeLen,
3545 const char* filename,
3546 const SHA1& sha1,
3547 const Native::FuncTable& nativeFuncs,
3548 bool swallowErrors,
3549 bool wantsSymbolRefs
3551 auto const bcSha1 = SHA1{string_sha1(folly::StringPiece(code, codeLen))};
3552 auto ue = std::make_unique<UnitEmitter>(sha1, bcSha1, nativeFuncs, false);
3553 if (!SystemLib::s_inited) {
3554 ue->m_mergeOnly = true;
3556 StringData* sd = makeStaticString(filename);
3557 ue->m_filepath = sd;
3559 try {
3560 auto const mode = std::istringstream::binary | std::istringstream::in;
3561 std::istringstream instr(std::string(code, codeLen), mode);
3562 AsmState as(instr, wantsSymbolRefs);
3563 as.ue = ue.get();
3564 parse(as);
3565 } catch (const FatalErrorException& e) {
3566 if (!swallowErrors) throw;
3567 ue = createFatalUnit(sd, sha1, FatalOp::Runtime,
3568 makeStaticString(e.what()));
3569 } catch (const AssemblerError& e) {
3570 if (!swallowErrors) throw;
3571 ue = createFatalUnit(sd, sha1, FatalOp::Runtime, makeStaticString(e.what()));
3572 } catch (const AssemblerFatal& e) {
3573 if (!swallowErrors) throw;
3574 ue = createFatalUnit(sd, sha1, FatalOp::Runtime, makeStaticString(e.what()));
3575 } catch (const std::exception& e) {
3576 if (!swallowErrors) {
3577 // assembler should throw only AssemblerErrors and FatalErrorExceptions
3578 throw AssemblerError(folly::sformat("AssemblerError: {}", e.what()));
3580 ue = createFatalUnit(sd, sha1, FatalOp::Runtime,
3581 makeStaticString(e.what()));
3584 return ue;
3587 //////////////////////////////////////////////////////////////////////