Codemod asserts to assertxs in the runtime
[hiphop-php.git] / hphp / runtime / vm / as.cpp
blob5dfe79168a0a156b2070a09bb7b91d886fcd9d7a
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * This module contains an assembler implementation for HHBC. It is
19 * probably fairly close to allowing you to access most of the
20 * metadata associated with hhvm's compiled unit format, although it's
21 * possible something has been overlooked.
23 * To use it, run hhvm with -v Eval.AllowHhas=true on a file with a
24 * ".hhas" extension. The syntax is probably easiest to understand by
25 * looking at some examples (or the semi-BNF markup around some of the
26 * parse functions here). For examples, see hphp/tests/vm/asm_*.
29 * Notes:
31 * - You can crash hhvm very easily with this.
33 * Using this module, you can emit pretty much any sort of not
34 * trivially-illegal bytecode stream, and many trivially-illegal
35 * ones as well. You can also easily create Units with illegal
36 * metadata. Generally this will crash the VM. In other cases
37 * (especially if you don't bother to DefCls your classes in your
38 * .main) you'll just get mysterious "class not defined" errors
39 * or weird behavior.
41 * - Whitespace is not normally significant, but newlines may not
42 * be in the middle of a list of opcode arguments. (After the
43 * newline, the next thing seen is expected to be either a
44 * mnemonic for the next opcode in the stream or some sort of
45 * directive.) However, newlines (and comments) may appear
46 * *inside* certain opcode arguments (e.g. string literals or
47 * vector immediates).
49 * Rationale: this is partially intended to make it trivial to
50 * catch wrong-number-of-arguments errors, although it probably
51 * could be done without this if you feel like changing it.
54 * Wishlist:
56 * - It might be nice if you could refer to iterators by name
57 * instead of by index.
59 * - DefCls by name would be nice.
61 * Missing features (partial list):
63 * - while class/function names can contains ':', '$', and ';',
64 * .use declarations can't handle those names because of syntax
65 * conflicts
67 * @author Jordan DeLong <delong.j@fb.com>
70 #include "hphp/runtime/vm/as.h"
72 #include <algorithm>
73 #include <cstdio>
74 #include <iostream>
75 #include <iterator>
76 #include <vector>
78 #include <boost/algorithm/string.hpp>
79 #include <boost/scoped_ptr.hpp>
80 #include <boost/bind.hpp>
82 #include <folly/Conv.h>
83 #include <folly/MapUtil.h>
84 #include <folly/Memory.h>
85 #include <folly/Range.h>
86 #include <folly/String.h>
88 #include "hphp/util/md5.h"
90 #include "hphp/parser/parser.h"
91 #include "hphp/runtime/base/builtin-functions.h"
92 #include "hphp/runtime/base/repo-auth-type-codec.h"
93 #include "hphp/runtime/base/repo-auth-type.h"
94 #include "hphp/runtime/base/tv-type.h"
95 #include "hphp/runtime/vm/as-shared.h"
96 #include "hphp/runtime/vm/func-emitter.h"
97 #include "hphp/runtime/vm/hhbc.h"
98 #include "hphp/runtime/vm/native.h"
99 #include "hphp/runtime/vm/preclass-emitter.h"
100 #include "hphp/runtime/vm/unit.h"
101 #include "hphp/runtime/vm/unit-emitter.h"
102 #include "hphp/system/systemlib.h"
104 TRACE_SET_MOD(hhas);
106 namespace HPHP {
108 //////////////////////////////////////////////////////////////////////
110 namespace {
112 struct AsmState;
113 typedef void (*ParserFunc)(AsmState& as);
115 struct Error : std::runtime_error {
116 explicit Error(int where, const std::string& what)
117 : std::runtime_error(folly::sformat(
118 "Assembler Error: line {}: {}", where, what))
122 struct Input {
123 explicit Input(std::istream& in)
124 : m_in(in)
127 int peek() { return m_in.peek(); }
129 int getc() {
130 int ret = m_in.get();
131 if (ret == EOF) {
132 io_error_if_bad();
133 } else if (ret == '\n') {
134 ++m_lineNumber;
136 return ret;
139 void ungetc(char c) {
140 if (c == '\n') --m_lineNumber;
141 m_in.putback(c);
144 void expect(int c) {
145 if (getc() != c) {
146 error(folly::sformat("expected character `{}'", char(c)));
151 * Expect `c' after possible whitespace/comments. When convenient,
152 * preferable to doing skipWhitespace/expect manually to keep the
153 * line number in the error prior to the whitespace skipped.
155 void expectWs(int c) {
156 const int currentLine = m_lineNumber;
157 skipWhitespace();
158 if (getc() != c) {
159 throw Error(currentLine,
160 folly::sformat("expected character `{}'", char(c)));
164 int getLineNumber() const {
165 return m_lineNumber;
168 // Skips whitespace, then populates word with valid bareword
169 // characters. Returns true if we read any characters into word.
170 bool readword(std::string& word) {
171 word.clear();
172 skipWhitespace();
173 consumePred(is_bareword(), std::back_inserter(word));
174 return !word.empty();
176 // Skips whitespace, then populates name with valid extname
177 // characters. Returns true if we read any characters into name.
178 bool readname(std::string& name) {
179 name.clear();
180 skipWhitespace();
181 consumePred(is_extname(), std::back_inserter(name));
182 return !name.empty();
184 // Try to consume a bareword. Skips whitespace. If we can't
185 // consume the specified word, returns false.
186 bool tryConsume(const std::string& what) {
187 std::string word;
188 if (!readword(word)) {
189 return false;
191 if (word != what) {
192 std::for_each(word.rbegin(), word.rend(),
193 boost::bind(&Input::ungetc, this, _1));
194 return false;
196 return true;
198 int32_t readint() {
199 std::string buf;
200 skipWhitespace();
201 if (peek() == '-') buf += (char)getc();
202 consumePred(isdigit, std::back_inserter(buf));
203 if (buf.empty() || buf == "-") {
204 throw Error(m_lineNumber, "expected integral value");
206 return folly::to<int32_t>(buf);
209 // C-style character escapes, no support for unicode escapes or
210 // whatnot.
211 template<class OutCont>
212 void escapeChar(OutCont& out) {
213 auto is_oct = [&] (int i) { return i >= '0' && i <= '7'; };
214 auto is_hex = [&] (int i) {
215 return (i >= '0' && i <= '9') ||
216 (i >= 'a' && i <= 'f') ||
217 (i >= 'A' && i <= 'F');
219 auto hex_val = [&] (int i) -> uint32_t {
220 assertx(is_hex(i));
221 return i >= '0' && i <= '9' ? i - '0' :
222 i >= 'a' && i <= 'f' ? i - 'a' + 10 : i - 'A' + 10;
225 auto src = getc();
226 switch (src) {
227 case EOF: error("EOF in string literal");
228 case 'a': out.push_back('\a'); break;
229 case 'b': out.push_back('\b'); break;
230 case 'f': out.push_back('\f'); break;
231 case 'n': out.push_back('\n'); break;
232 case 'r': out.push_back('\r'); break;
233 case 't': out.push_back('\t'); break;
234 case 'v': out.push_back('\v'); break;
235 case '\'': out.push_back('\''); break;
236 case '\"': out.push_back('\"'); break;
237 case '\?': out.push_back('\?'); break;
238 case '\\': out.push_back('\\'); break;
239 case '\r': /* ignore */ break;
240 case '\n': /* ignore */ break;
241 default:
242 if (is_oct(src)) {
243 auto val = int64_t{src} - '0';
244 for (auto i = int{1}; i < 3; ++i) {
245 src = getc();
246 if (!is_oct(src)) { ungetc(src); break; }
247 val *= 8;
248 val += src - '0';
250 if (val > std::numeric_limits<uint8_t>::max()) {
251 error("octal escape sequence overflowed");
253 out.push_back(static_cast<uint8_t>(val));
254 return;
257 if (src == 'x' || src == 'X') {
258 auto val = uint64_t{0};
259 if (!is_hex(peek())) error("\\x used without no following hex digits");
260 for (auto i = int{0}; i < 2; ++i) {
261 src = getc();
262 if (!is_hex(src)) { ungetc(src); break; }
263 val *= 0x10;
264 val += hex_val(src);
266 if (val > std::numeric_limits<uint8_t>::max()) {
267 error("hex escape sequence overflowed");
269 out.push_back(static_cast<uint8_t>(val));
270 return;
273 error("unrecognized character escape");
277 // Reads a quoted string with typical escaping rules. Does not skip
278 // any whitespace. Returns true if we successfully read one, or
279 // false. EOF during the string throws.
280 bool readQuotedStr(std::string& str) {
281 str.clear();
282 if (peek() != '\"') {
283 return false;
285 getc();
287 int c;
288 while ((c = getc()) != EOF) {
289 switch (c) {
290 case '\"': return true;
291 case '\\': escapeChar(str); break;
292 default: str.push_back(c); break;
295 error("EOF in string literal");
296 not_reached();
297 return false;
301 * Reads a python-style longstring, or returns false if we don't
302 * have one. Does not skip any whitespace before looking for the
303 * string.
305 * Python longstrings start with \"\"\", and can contain any bytes
306 * other than \"\"\". A '\\' character introduces C-style escapes,
307 * but there's no need to escape single quote characters.
309 bool readLongString(std::vector<char>& buffer) {
310 if (peek() != '\"') return false;
311 getc();
312 if (peek() != '\"') { ungetc('\"'); return false; }
313 getc();
314 if (peek() != '\"') { ungetc('\"');
315 ungetc('\"'); return false; }
316 getc();
318 int c;
319 while ((c = getc()) != EOF) {
320 if (c == '\\') {
321 escapeChar(buffer);
322 continue;
324 if (c == '"') {
325 c = getc();
326 if (c != '"') {
327 buffer.push_back('"');
328 ungetc(c);
329 continue;
331 c = getc();
332 if (c != '"') {
333 buffer.push_back('"');
334 buffer.push_back('"');
335 ungetc(c);
336 continue;
338 return true;
341 buffer.push_back(c);
343 error("EOF in \"\"\"-string literal");
344 not_reached();
345 return false;
348 // Skips whitespace (including newlines and comments).
349 void skipWhitespace() {
350 while (skipPred(boost::is_any_of(" \t\r\n"))) {
351 if (peek() == '#') {
352 skipPred(!boost::is_any_of("\n"));
353 expect('\n');
354 } else {
355 break;
360 // Skip spaces and tabs, but other whitespace (such as comments or
361 // newlines) stop the skip.
362 void skipSpaceTab() {
363 skipPred(boost::is_any_of(" \t"));
366 template<class Predicate>
367 bool skipPred(Predicate pred) {
368 while (pred(peek())) {
369 if (getc() == EOF) {
370 return false;
374 return true;
377 template<class Predicate, class OutputIterator>
378 bool consumePred(Predicate pred, OutputIterator out) {
379 int c;
380 while (pred(c = peek())) {
381 if (getc() == EOF) {
382 return false;
385 *out++ = c;
388 return true;
391 private:
392 // whether a character is a valid part of the extended sorts of
393 // names that HHVM uses for certain generated constructs
394 // (closures, __Memoize implementations, etc)
395 struct is_extname {
396 bool operator()(int i) const {
397 is_bareword is_bw;
398 return is_bw(i) || i == ':' || i == ';' || i == '#' || i =='@' ||
399 (i >= 0x7f && i <= 0xff) /* see hphp.ll :( */;
403 void error(const std::string& what) {
404 throw Error(getLineNumber(), what);
407 void io_error_if_bad() {
408 if (m_in.bad()) {
409 error("I/O error reading stream: " +
410 folly::errnoStr(errno).toStdString());
414 private:
415 std::istream& m_in;
416 int m_lineNumber{1};
419 struct StackDepth;
421 struct FPIReg {
422 Offset fpushOff;
423 StackDepth* stackDepth;
424 int fpOff;
428 * Tracks the depth of the stack in a given block of instructions.
430 * This structure is linked to a block of instructions (usually starting at a
431 * label), and tracks the current stack depth in this block. This tracking can
432 * take two forms:
433 * - Absolute depth: the depth of the stack is exactly known for this block
434 * - Relative depth: the depth of the stack is unknown for now. We keep track
435 * of an offset, relative to the depth of the stack at the first instruction
436 * of the block
438 struct StackDepth {
439 int currentOffset;
441 * Tracks the max depth of elem stack + desc stack offset inside a region
442 * where baseValue is unknown.
444 int maxOffset;
446 * Tracks the min depth of the elem stack inside a region where baseValue
447 * is unknown, and the line where the min occurred.
449 int minOffset;
450 int minOffsetLine;
451 folly::Optional<int> baseValue;
454 * During the parsing process, when a Jmp instruction is encountered, the
455 * StackDepth structure for this jump becomes linked to the StackDepth
456 * structure of the label (which is added to the listeners list).
458 * Once the absolute depth at the jump becomes known, its StackDepth
459 * instance calls the setBase method of the StackDepth instance of the label.
460 * The absolute depth at the label can then be inferred from the
461 * absolute depth at the jump.
463 std::vector<std::pair<StackDepth*, int> > listeners;
465 StackDepth()
466 : currentOffset(0)
467 , maxOffset(0)
468 , minOffset(0)
471 void adjust(AsmState& as, int delta);
472 void addListener(AsmState& as, StackDepth* target);
473 void setBase(AsmState& as, int stackDepth);
474 int absoluteDepth() {
475 assertx(baseValue.hasValue());
476 return baseValue.value() + currentOffset;
480 * Sets the baseValue such as the current stack depth matches the
481 * parameter.
483 * If the base value is already known, it may conflict with the
484 * parameter of this function. In this case, an error will be raised.
486 void setCurrentAbsolute(AsmState& as, int stackDepth);
489 struct Label {
490 bool bound{false};
491 Offset target;
492 StackDepth stackDepth;
495 * Each label source source has an Offset where the jmp should be
496 * patched up is, and an Offset from which the jump delta should be
497 * computed. (The second Offset is basically to the actual
498 * jump/switch/etc instruction, while the first points to the
499 * immediate.)
501 std::vector<std::pair<Offset,Offset>> sources;
504 * List of a parameter ids that use this label for its DV
505 * initializer.
507 std::vector<Id> dvInits;
510 * List of EHEnts that have m_handler pointing to this label.
512 std::vector<size_t> ehEnts;
515 struct AsmState {
516 explicit AsmState(std::istream& in, AsmCallbacks* callbacks = nullptr)
517 : in(in)
518 , callbacks(callbacks)
520 currentStackDepth->setBase(*this, 0);
523 AsmState(const AsmState&) = delete;
524 AsmState& operator=(const AsmState&) = delete;
526 template<typename... Args>
527 void error(const std::string& fmt, Args&&... args) {
528 throw Error(in.getLineNumber(),
529 folly::sformat(fmt, std::forward<Args>(args)...));
533 void adjustStack(int delta) {
534 if (currentStackDepth == nullptr) {
535 // Instruction is unreachable, nothing to do here!
536 return;
539 currentStackDepth->adjust(*this, delta);
542 void adjustStackHighwater(int depth) {
543 if (depth) {
544 fe->maxStackCells = std::max(fe->maxStackCells, depth);
548 std::string displayStackDepth() {
549 std::ostringstream stack;
551 if (currentStackDepth == nullptr) {
552 stack << "/";
553 } else if (currentStackDepth->baseValue) {
554 stack << *currentStackDepth->baseValue +
555 currentStackDepth->currentOffset;
556 } else {
557 stack << "?" << currentStackDepth->currentOffset;
560 return stack.str();
563 void addLabelTarget(const std::string& name) {
564 auto& label = labelMap[name];
565 if (label.bound) {
566 error("Duplicate label " + name);
568 label.bound = true;
569 label.target = ue->bcPos();
571 StackDepth* newStack = &label.stackDepth;
573 if (currentStackDepth == nullptr) {
574 // Previous instruction was unreachable
575 currentStackDepth = newStack;
576 return;
579 // The stack depth at the label depends on the current depth
580 currentStackDepth->addListener(*this, newStack);
581 currentStackDepth = newStack;
584 void addLabelJump(const std::string& name, Offset immOff, Offset opcodeOff) {
585 auto& label = labelMap[name];
587 if (currentStackDepth != nullptr) {
588 // The stack depth at the target must be the same as the current depth
589 // (whatever this may be: it may still be unknown)
590 currentStackDepth->addListener(*this, &label.stackDepth);
593 label.sources.emplace_back(immOff, opcodeOff);
596 void enforceStackDepth(int stackDepth) {
597 if (currentStackDepth == nullptr) {
598 // Current instruction is unreachable, thus the constraint
599 // on the stack depth will never be violated
600 return;
603 currentStackDepth->setCurrentAbsolute(*this, stackDepth);
606 bool isUnreachable() {
607 return currentStackDepth == nullptr;
610 void enterUnreachableRegion() {
611 currentStackDepth = nullptr;
614 void enterReachableRegion(int stackDepth) {
615 unnamedStackDepths.emplace_back(std::make_unique<StackDepth>());
616 currentStackDepth = unnamedStackDepths.back().get();
617 currentStackDepth->setBase(*this, stackDepth);
620 void addLabelDVInit(const std::string& name, int paramId) {
621 labelMap[name].dvInits.push_back(paramId);
623 // Stack depth should be 0 when entering a DV init
624 labelMap[name].stackDepth.setBase(*this, 0);
627 void addLabelEHEnt(const std::string& name, size_t ehIdx) {
628 labelMap[name].ehEnts.push_back(ehIdx);
630 // Stack depth should be 0 when entering a fault funclet
631 labelMap[name].stackDepth.setBase(*this, 0);
634 void beginFpi(Offset fpushOff) {
635 fpiRegs.push_back(FPIReg{
636 fpushOff,
637 currentStackDepth,
638 currentStackDepth->currentOffset
640 fdescDepth += kNumActRecCells;
641 currentStackDepth->adjust(*this, 0);
644 void endFpi() {
645 if (fpiRegs.empty()) {
646 error("endFpi called with no active fpi region");
649 auto& ent = fe->addFPIEnt();
650 const auto& reg = fpiRegs.back();
651 ent.m_fpushOff = reg.fpushOff;
652 ent.m_fpiEndOff = ue->bcPos();
653 ent.m_fpOff = reg.fpOff;
654 if (reg.stackDepth->baseValue) {
655 ent.m_fpOff += *reg.stackDepth->baseValue;
656 } else {
657 // Base value still unknown, this will need to be updated later.
659 // Store the FPIEnt's index in the FuncEmitter's entry table.
660 assertx(&fe->fpitab[fe->fpitab.size()-1] == &ent);
661 fpiToUpdate.emplace_back(fe->fpitab.size() - 1, reg.stackDepth);
664 fpiRegs.pop_back();
665 always_assert(fdescDepth >= kNumActRecCells);
666 fdescDepth -= kNumActRecCells;
669 void finishClass() {
670 assertx(!fe);
671 ue->addPreClassEmitter(pce);
672 pce = 0;
673 enumTySet = false;
676 void patchLabelOffsets(const Label& label) {
677 for (auto const& source : label.sources) {
678 ue->emitInt32(label.target - source.second, source.first);
681 for (auto const& dvinit : label.dvInits) {
682 fe->params[dvinit].funcletOff = label.target;
685 for (auto const& ehEnt : label.ehEnts) {
686 fe->ehtab[ehEnt].m_handler = label.target;
690 void finishSection() {
691 for (auto const& label : labelMap) {
692 if (!label.second.bound) {
693 error("Undefined label " + label.first);
695 if (label.second.target >= ue->bcPos()) {
696 error("label " + label.first + " falls of the end of the function");
699 patchLabelOffsets(label.second);
702 // Patch the FPI structures
703 for (auto& kv : fpiToUpdate) {
704 if (!kv.second->baseValue) {
705 error("created a FPI from an unreachable instruction");
708 fe->fpitab[kv.first].m_fpOff += *kv.second->baseValue;
712 void finishFunction() {
713 finishSection();
715 // Stack depth should be 0 at the end of a function body
716 enforceStackDepth(0);
718 // Bump up the unnamed local count
719 const int numLocals = maxUnnamed + 1;
720 while (fe->numLocals() < numLocals) {
721 fe->allocUnnamedLocal();
724 fe->maxStackCells +=
725 fe->numLocals() +
726 fe->numIterators() * kNumIterCells +
727 clsRefCountToCells(fe->numClsRefSlots());
729 fe->finish(ue->bcPos(), false);
730 ue->recordFunction(fe);
732 fe = 0;
733 fpiRegs.clear();
734 labelMap.clear();
735 numItersSet = false;
736 numClsRefSlotsSet = false;
737 initStackDepth = StackDepth();
738 initStackDepth.setBase(*this, 0);
739 currentStackDepth = &initStackDepth;
740 unnamedStackDepths.clear();
741 fdescDepth = 0;
742 maxUnnamed = -1;
743 fpiToUpdate.clear();
746 int getLocalId(const std::string& name) {
747 if (name[0] == '_') {
748 int id = folly::to<int>(name.substr(1));
749 if (id > maxUnnamed) maxUnnamed = id;
750 return id;
753 if (name[0] != '$') {
754 error("local variables must be prefixed with $ or _");
757 const StringData* sd = makeStaticString(name.c_str() + 1);
758 fe->allocVarId(sd);
759 return fe->lookupVarId(sd);
762 int getIterId(int32_t id) {
763 if (id >= fe->numIterators()) {
764 error("iterator id exceeded number of iterators in the function");
766 return id;
769 int getClsRefSlot(int32_t slot) {
770 if (slot >= fe->numClsRefSlots()) {
771 error("class-ref slot id exceeded number of class-ref "
772 "slots in the function");
774 return slot;
777 UnitEmitter* ue;
778 Input in;
779 bool emittedPseudoMain{false};
780 bool emittedTopLevelFunc{false};
782 std::map<std::string,ArrayData*> adataMap;
784 // When inside a class, this state is active.
785 PreClassEmitter* pce;
787 // When we're doing a function or method body, this state is active.
788 FuncEmitter* fe{nullptr};
789 std::vector<FPIReg> fpiRegs;
790 std::map<std::string,Label> labelMap;
791 bool numItersSet{false};
792 bool numClsRefSlotsSet{false};
793 bool enumTySet{false};
794 StackDepth initStackDepth;
795 StackDepth* currentStackDepth{&initStackDepth};
796 std::vector<std::unique_ptr<StackDepth>> unnamedStackDepths;
797 int fdescDepth{0};
798 int minStackDepth{0};
799 int maxUnnamed{-1};
800 std::vector<std::pair<size_t, StackDepth*>> fpiToUpdate;
801 std::set<std::string,stdltistr> hoistables;
802 std::unordered_map<uint32_t,Offset> defClsOffsets;
803 Location::Range srcLoc{-1,-1,-1,-1};
804 AsmCallbacks* callbacks{ nullptr };
807 void StackDepth::adjust(AsmState& as, int delta) {
808 currentOffset += delta;
810 if (!baseValue) {
811 // The absolute stack depth is unknown. We only store the min
812 // and max offsets, and we will take a decision later, when the
813 // base value will be known.
814 maxOffset = std::max(currentOffset + as.fdescDepth, maxOffset);
815 if (currentOffset < minOffset) {
816 minOffsetLine = as.in.getLineNumber();
817 minOffset = currentOffset;
819 return;
822 if (*baseValue + currentOffset < 0) {
823 as.error("opcode sequence caused stack depth to go negative");
826 as.adjustStackHighwater(*baseValue + currentOffset + as.fdescDepth);
829 void StackDepth::addListener(AsmState& as, StackDepth* target) {
830 if (baseValue) {
831 target->setBase(as, *baseValue + currentOffset);
832 } else {
833 listeners.emplace_back(target, currentOffset);
837 void StackDepth::setBase(AsmState& as, int stackDepth) {
838 if (baseValue && stackDepth != *baseValue) {
839 as.error("stack depth {} does not match base value {}",
840 stackDepth, *baseValue);
843 baseValue = stackDepth;
845 // We finally know the base value. Update AsmState accordingly.
846 if (*baseValue + minOffset < 0) {
847 throw Error(
848 minOffsetLine,
849 "opcode sequence caused stack depth to go negative"
852 as.adjustStackHighwater(*baseValue + maxOffset);
854 // Update the listeners
855 auto l = std::move(listeners);
856 // We won't need them anymore
857 listeners.clear();
858 for (auto& kv : l) {
859 kv.first->setBase(as, *baseValue + kv.second);
863 void StackDepth::setCurrentAbsolute(AsmState& as, int stackDepth) {
864 setBase(as, stackDepth - currentOffset);
867 //////////////////////////////////////////////////////////////////////
870 * Opcode arguments must be on the same line as the opcode itself,
871 * although certain argument types may contain internal newlines (see,
872 * for example, read_jmpvector or string literals).
874 template<class Target> Target read_opcode_arg(AsmState& as) {
875 as.in.skipSpaceTab();
876 std::string strVal;
877 as.in.consumePred(!boost::is_any_of(" \t\r\n#;>"),
878 std::back_inserter(strVal));
879 if (strVal.empty()) {
880 as.error("expected opcode or directive argument");
882 try {
883 return folly::to<Target>(strVal);
884 } catch (std::range_error&) {
885 as.error("couldn't convert input argument (" + strVal + ") to "
886 "proper type");
887 not_reached();
891 template<class SubOpType>
892 uint8_t read_subop(AsmState& as) {
893 auto const str = read_opcode_arg<std::string>(as);
894 if (auto const ty = nameToSubop<SubOpType>(str.c_str())) {
895 return static_cast<uint8_t>(*ty);
897 as.error("unknown subop name");
898 not_reached();
901 const StringData* read_litstr(AsmState& as) {
902 as.in.skipSpaceTab();
903 std::string strVal;
904 if (!as.in.readQuotedStr(strVal)) {
905 as.error("expected quoted string literal");
907 return makeStaticString(strVal);
911 * maybe-string-literal : N
912 * | string-literal
915 const StringData* read_maybe_litstr(AsmState& as) {
916 as.in.skipSpaceTab();
917 if (as.in.peek() == 'N') {
918 as.in.getc();
919 return nullptr;
921 return read_litstr(as);
924 std::vector<std::string> read_strvector(AsmState& as) {
925 std::vector<std::string> ret;
926 as.in.skipSpaceTab();
927 as.in.expect('<');
928 std::string name;
929 while (as.in.skipSpaceTab(), as.in.readQuotedStr(name)) {
930 ret.push_back(name);
932 as.in.skipSpaceTab();
933 as.in.expectWs('>');
934 return ret;
937 ArrayData* read_litarray(AsmState& as) {
938 as.in.skipSpaceTab();
939 if (as.in.getc() != '@') {
940 as.error("expecting an `@foo' array literal reference");
942 std::string name;
943 if (!as.in.readword(name)) {
944 as.error("expected name of .adata literal");
947 auto const it = as.adataMap.find(name);
948 if (it == as.adataMap.end()) {
949 as.error("unknown array data literal name " + name);
951 return it->second;
954 RepoAuthType read_repo_auth_type(AsmState& as) {
955 auto const str = read_opcode_arg<std::string>(as);
956 folly::StringPiece parse(str);
959 * Note: no support for reading array types. (The assembler only
960 * emits a single unit, so it can't really be involved in creating a
961 * ArrayTypeTable.)
964 using T = RepoAuthType::Tag;
966 #define X(what, tag) \
967 if (parse.startsWith(what)) return RepoAuthType{tag}
969 #define Y(what, tag) \
970 if (parse.startsWith(what)) { \
971 parse.removePrefix(what); \
972 auto const cls = makeStaticString(parse.data()); \
973 as.ue->mergeLitstr(cls); \
974 return RepoAuthType{tag, cls}; \
977 Y("Obj=", T::ExactObj);
978 Y("?Obj=", T::OptExactObj);
979 Y("?Obj<=", T::OptSubObj);
980 Y("Obj<=", T::SubObj);
981 X("Arr", T::Arr);
982 X("?Arr", T::OptArr);
983 X("VArr", T::VArr);
984 X("?VArr", T::OptVArr);
985 X("DArr", T::DArr);
986 X("?DArr", T::OptDArr);
987 X("Vec", T::Vec);
988 X("?Vec", T::OptVec);
989 X("Dict", T::Dict);
990 X("?Dict", T::OptDict);
991 X("Keyset", T::Keyset);
992 X("?Keyset", T::OptKeyset);
993 X("Bool", T::Bool);
994 X("?Bool", T::OptBool);
995 X("Cell", T::Cell);
996 X("Dbl", T::Dbl);
997 X("?Dbl", T::OptDbl);
998 X("Gen", T::Gen);
999 X("InitCell", T::InitCell);
1000 X("InitGen", T::InitGen);
1001 X("InitNull", T::InitNull);
1002 X("InitUnc", T::InitUnc);
1003 X("Int", T::Int);
1004 X("?Int", T::OptInt);
1005 X("Null", T::Null);
1006 X("Obj", T::Obj);
1007 X("?Obj", T::OptObj);
1008 X("Ref", T::Ref);
1009 X("?Res", T::OptRes);
1010 X("Res", T::Res);
1011 X("?SArr", T::OptSArr);
1012 X("SArr", T::SArr);
1013 X("?SVArr", T::OptSVArr);
1014 X("SVArr", T::SVArr);
1015 X("?SDArr", T::OptSDArr);
1016 X("SDArr", T::SDArr);
1017 X("?SVec", T::OptSVec);
1018 X("SVec", T::SVec);
1019 X("?SDict", T::OptSDict);
1020 X("SDict", T::SDict);
1021 X("?SKeyset", T::OptSKeyset);
1022 X("SKeyset", T::SKeyset);
1023 X("?SStr", T::OptSStr);
1024 X("SStr", T::SStr);
1025 X("?Str", T::OptStr);
1026 X("Str", T::Str);
1027 X("Unc", T::Unc);
1028 X("?UncArrKey", T::OptUncArrKey);
1029 X("?ArrKey", T::OptArrKey);
1030 X("UncArrKey",T::UncArrKey);
1031 X("ArrKey", T::ArrKey);
1032 X("Uninit", T::Uninit);
1034 #undef X
1035 #undef Y
1037 // Make sure the above parsing code is revisited when new tags are
1038 // added (we'll get a warning for a missing case label):
1039 if (debug) switch (RepoAuthType{}.tag()) {
1040 case T::Uninit:
1041 case T::InitNull:
1042 case T::Null:
1043 case T::Int:
1044 case T::OptInt:
1045 case T::Dbl:
1046 case T::OptDbl:
1047 case T::Res:
1048 case T::OptRes:
1049 case T::Bool:
1050 case T::OptBool:
1051 case T::SStr:
1052 case T::OptSStr:
1053 case T::Str:
1054 case T::OptStr:
1055 case T::SArr:
1056 case T::OptSArr:
1057 case T::Arr:
1058 case T::OptArr:
1059 case T::SVArr:
1060 case T::OptSVArr:
1061 case T::VArr:
1062 case T::OptVArr:
1063 case T::SDArr:
1064 case T::OptSDArr:
1065 case T::DArr:
1066 case T::OptDArr:
1067 case T::SVec:
1068 case T::OptSVec:
1069 case T::Vec:
1070 case T::OptVec:
1071 case T::SDict:
1072 case T::OptSDict:
1073 case T::Dict:
1074 case T::OptDict:
1075 case T::SKeyset:
1076 case T::OptSKeyset:
1077 case T::Keyset:
1078 case T::OptKeyset:
1079 case T::Obj:
1080 case T::OptObj:
1081 case T::InitUnc:
1082 case T::Unc:
1083 case T::OptUncArrKey:
1084 case T::OptArrKey:
1085 case T::UncArrKey:
1086 case T::ArrKey:
1087 case T::InitCell:
1088 case T::Cell:
1089 case T::Ref:
1090 case T::InitGen:
1091 case T::Gen:
1092 case T::ExactObj:
1093 case T::SubObj:
1094 case T::OptExactObj:
1095 case T::OptSubObj:
1096 break;
1099 as.error("unrecognized RepoAuthType format");
1100 not_reached();
1103 // Read a vector of IVAs, with format <int, int, int, ...>, the vector may be
1104 // excluded entirely if it is empty.
1105 std::vector<uint32_t> read_argv(AsmState& as) {
1106 as.in.skipSpaceTab();
1107 if (as.in.peek() != '<') return {};
1108 as.in.getc();
1110 std::vector<uint32_t> result;
1111 for (;;) {
1112 auto const num = as.in.readint();
1113 if (num < 0) as.error("Was expecting a positive integer");
1114 result.push_back(num);
1115 as.in.skipWhitespace();
1116 if (as.in.peek() == '>') break;
1117 as.in.expectWs(',');
1119 as.in.expectWs('>');
1121 return result;
1124 // Read in a vector of iterators the format for this vector is:
1125 // <(TYPE) ID, (TYPE) ID, ...>
1126 // Where TYPE := Iter | MIter | CIter
1127 // and ID := Integer
1128 std::vector<uint32_t> read_itervec(AsmState& as) {
1129 std::vector<uint32_t> ret;
1131 as.in.skipSpaceTab();
1132 as.in.expect('<');
1134 std::string word;
1136 for (;;) {
1137 as.in.expectWs('(');
1138 if (!as.in.readword(word)) as.error("Was expecting Iterator type.");
1139 if (!word.compare("Iter")) ret.push_back(KindOfIter);
1140 else if (!word.compare("MIter")) ret.push_back(KindOfMIter);
1141 else if (!word.compare("CIter")) ret.push_back(KindOfCIter);
1142 else as.error("Unknown iterator type `" + word + "'");
1143 as.in.expectWs(')');
1145 as.in.skipSpaceTab();
1147 if (!as.in.readword(word)) as.error("Was expecting iterator id.");
1148 ret.push_back(folly::to<uint32_t>(word));
1150 if (!isdigit(word.back())) {
1151 if (word.back() == '>') break;
1152 if (word.back() != ',') as.error("Was expecting `,'.");
1153 } else {
1154 as.in.skipSpaceTab();
1155 if (as.in.peek() == '>') { as.in.getc(); break; }
1156 as.in.expect(',');
1160 return ret;
1163 // Jump tables are lists of labels.
1164 std::vector<std::string> read_jmpvector(AsmState& as) {
1165 std::vector<std::string> ret;
1167 as.in.skipSpaceTab();
1168 as.in.expect('<');
1170 std::string word;
1171 while (as.in.readword(word)) {
1172 ret.push_back(word);
1174 as.in.expectWs('>');
1176 return ret;
1179 typedef std::vector<std::pair<Id, std::string>> SSwitchJmpVector;
1181 SSwitchJmpVector read_sswitch_jmpvector(AsmState& as) {
1182 SSwitchJmpVector ret;
1184 as.in.skipSpaceTab();
1185 as.in.expect('<');
1187 std::string defLabel;
1188 do {
1189 std::string caseStr;
1190 if (!as.in.readQuotedStr(caseStr)) {
1191 as.error("expected quoted string literal");
1194 as.in.expect(':');
1196 as.in.readword(defLabel);
1198 ret.emplace_back(
1199 as.ue->mergeLitstr(makeStaticString(caseStr)),
1200 defLabel
1203 as.in.skipWhitespace();
1204 } while (as.in.peek() != '-');
1206 as.in.expect('-');
1207 as.in.expect(':');
1208 as.in.readword(defLabel);
1210 // -1 stand for default case.
1211 ret.emplace_back(-1, defLabel);
1213 as.in.expect('>');
1215 return ret;
1218 MemberKey read_member_key(AsmState& as) {
1219 as.in.skipWhitespace();
1221 std::string word;
1222 if (!as.in.readword(word)) as.error("expected member code");
1224 auto optMcode = parseMemberCode(word.c_str());
1225 if (!optMcode) as.error("unrecognized member code `" + word + "'");
1227 auto const mcode = *optMcode;
1228 if (mcode != MW && as.in.getc() != ':') {
1229 as.error("expected `:' after member code `" + word + "'");
1232 switch (mcode) {
1233 case MW:
1234 return MemberKey{};
1235 case MEL: case MPL: {
1236 std::string name;
1237 if (!as.in.readword(name)) {
1238 as.error("couldn't read name for local variable in member key");
1240 return MemberKey{mcode, as.getLocalId(name)};
1242 case MEC: case MPC:
1243 return MemberKey{mcode, read_opcode_arg<int32_t>(as)};
1244 case MEI:
1245 return MemberKey{mcode, read_opcode_arg<int64_t>(as)};
1246 case MET: case MPT: case MQT:
1247 return MemberKey{mcode, read_litstr(as)};
1249 not_reached();
1252 LocalRange read_local_range(AsmState& as) {
1253 auto first = read_opcode_arg<std::string>(as);
1254 if (first.size() > 2 && first[0] == 'L' && first[1] == ':') {
1255 first = "_" + first.substr(2);
1257 auto const pos = first.find('+');
1258 if (pos == std::string::npos) as.error("expecting `+' in local range");
1259 auto const rest = first.substr(pos + 1);
1260 first = first.substr(0, pos);
1261 auto const firstLoc = as.getLocalId(first);
1262 auto const restCount = folly::to<uint32_t>(rest);
1263 if (firstLoc + restCount > as.maxUnnamed) {
1264 as.maxUnnamed = firstLoc + restCount;
1266 return LocalRange{uint32_t(firstLoc), restCount};
1269 //////////////////////////////////////////////////////////////////////
1271 std::map<std::string,ParserFunc> opcode_parsers;
1273 #define IMM_NA
1274 #define IMM_ONE(t) IMM_##t
1275 #define IMM_TWO(t1, t2) IMM_ONE(t1); ++immIdx; IMM_##t2
1276 #define IMM_THREE(t1, t2, t3) IMM_TWO(t1, t2); ++immIdx; IMM_##t3
1277 #define IMM_FOUR(t1, t2, t3, t4) IMM_THREE(t1, t2, t3); ++immIdx; IMM_##t4
1279 // Some bytecodes need to know an iva imm for (PUSH|POP)_*.
1280 #define IMM_IVA do { \
1281 auto imm = read_opcode_arg<uint32_t>(as); \
1282 as.ue->emitIVA(imm); \
1283 immIVA[immIdx] = imm; \
1284 } while (0)
1286 #define IMM_VSA \
1287 std::vector<std::string> vecImm = read_strvector(as); \
1288 auto const vecImmStackValues = vecImm.size(); \
1289 as.ue->emitInt32(vecImmStackValues); \
1290 for (size_t i = 0; i < vecImmStackValues; ++i) { \
1291 as.ue->emitInt32(as.ue->mergeLitstr(String(vecImm[i]).get())); \
1294 #define IMM_SA as.ue->emitInt32(as.ue->mergeLitstr(read_litstr(as)))
1295 #define IMM_RATA encodeRAT(*as.ue, read_repo_auth_type(as))
1296 #define IMM_I64A as.ue->emitInt64(read_opcode_arg<int64_t>(as))
1297 #define IMM_DA as.ue->emitDouble(read_opcode_arg<double>(as))
1298 #define IMM_LA as.ue->emitIVA(as.getLocalId( \
1299 read_opcode_arg<std::string>(as)))
1300 #define IMM_IA as.ue->emitIVA(as.getIterId( \
1301 read_opcode_arg<int32_t>(as)))
1302 #define IMM_CAR as.ue->emitIVA(as.getClsRefSlot( \
1303 read_opcode_arg<int32_t>(as)))
1304 #define IMM_CAW as.ue->emitIVA(as.getClsRefSlot( \
1305 read_opcode_arg<int32_t>(as)))
1306 #define IMM_OA(ty) as.ue->emitByte(read_subop<ty>(as));
1307 #define IMM_AA as.ue->emitInt32(as.ue->mergeArray(read_litarray(as)))
1308 #define IMM_LAR encodeLocalRange(*as.ue, read_local_range(as))
1311 * There can currently be no more than one immvector per instruction,
1312 * and we need access to the size of the immediate vector for
1313 * NUM_POP_*, so the member vector guy exposes a vecImmStackValues
1314 * integer.
1316 #define IMM_ILA do { \
1317 std::vector<uint32_t> vecImm = read_itervec(as); \
1318 as.ue->emitInt32(vecImm.size() / 2); \
1319 for (auto& i : vecImm) { \
1320 as.ue->emitInt32(i); \
1322 } while (0)
1324 #define IMM_I32LA do { \
1325 std::vector<uint32_t> vecImm = read_argv(as); \
1326 as.ue->emitInt32(vecImm.size()); \
1327 for (auto i : vecImm) { \
1328 as.ue->emitInt32(i); \
1330 } while (0)
1332 #define IMM_BLA do { \
1333 std::vector<std::string> vecImm = read_jmpvector(as); \
1334 as.ue->emitInt32(vecImm.size()); \
1335 for (auto const& imm : vecImm) { \
1336 labelJumps.emplace_back(imm, as.ue->bcPos()); \
1337 as.ue->emitInt32(0); /* to be patched */ \
1339 } while (0)
1341 #define IMM_SLA do { \
1342 auto vecImm = read_sswitch_jmpvector(as); \
1343 as.ue->emitInt32(vecImm.size()); \
1344 for (auto const& pair : vecImm) { \
1345 as.ue->emitInt32(pair.first); \
1346 labelJumps.emplace_back(pair.second, as.ue->bcPos()); \
1347 as.ue->emitInt32(0); /* to be patched */ \
1349 } while(0)
1351 #define IMM_BA do { \
1352 labelJumps.emplace_back( \
1353 read_opcode_arg<std::string>(as), \
1354 as.ue->bcPos() \
1355 ); \
1356 as.ue->emitInt32(0); \
1357 } while (0)
1359 #define IMM_KA encode_member_key(read_member_key(as), *as.ue)
1361 #define NUM_PUSH_NOV 0
1362 #define NUM_PUSH_ONE(a) 1
1363 #define NUM_PUSH_TWO(a,b) 2
1364 #define NUM_PUSH_THREE(a,b,c) 3
1365 #define NUM_PUSH_INS_1(a) 1
1366 #define NUM_PUSH_CMANY immIVA[1] /* number of outputs */
1367 #define NUM_POP_NOV 0
1368 #define NUM_POP_ONE(a) 1
1369 #define NUM_POP_TWO(a,b) 2
1370 #define NUM_POP_THREE(a,b,c) 3
1371 #define NUM_POP_MFINAL immIVA[0]
1372 #define NUM_POP_F_MFINAL immIVA[1]
1373 #define NUM_POP_C_MFINAL (immIVA[0] + 1)
1374 #define NUM_POP_V_MFINAL NUM_POP_C_MFINAL
1375 #define NUM_POP_FMANY immIVA[0] /* number of arguments */
1376 #define NUM_POP_UFMANY (immIVA[0] + immIVA[1] - 1) /* number of arguments */
1377 #define NUM_POP_CVUMANY immIVA[0] /* number of arguments */
1378 #define NUM_POP_CMANY immIVA[0] /* number of arguments */
1379 #define NUM_POP_SMANY vecImmStackValues
1381 #define O(name, imm, pop, push, flags) \
1382 void parse_opcode_##name(AsmState& as) { \
1383 UNUSED uint32_t immIVA[4]; \
1384 UNUSED auto const thisOpcode = Op::name; \
1385 UNUSED const Offset curOpcodeOff = as.ue->bcPos(); \
1386 std::vector<std::pair<std::string, Offset> > labelJumps; \
1388 TRACE( \
1389 4, \
1390 "%d\t[%s] %s\n", \
1391 as.in.getLineNumber(), \
1392 as.displayStackDepth().c_str(), \
1393 #name \
1394 ); \
1396 /* Pretend the stack is reachable and empty, same as hphpc */ \
1397 if (as.currentStackDepth == nullptr) { \
1398 as.enterReachableRegion(0); \
1401 if (isFCallStar(Op##name)) { \
1402 as.endFpi(); \
1405 /* Other FCall* functions perform their own bounds checking. */ \
1406 if (Op##name == OpFCall || Op##name == OpFCallD || \
1407 Op##name == OpFCallAwait || Op##name == OpFCallM || \
1408 Op##name == OpFCallDM) { \
1409 as.fe->containsCalls = true; \
1412 as.ue->emitOp(Op##name); \
1414 UNUSED size_t immIdx = 0; \
1415 IMM_##imm; \
1417 int stackDelta = NUM_PUSH_##push - NUM_POP_##pop; \
1418 as.adjustStack(stackDelta); \
1420 if (isFPush(Op##name)) { \
1421 as.beginFpi(curOpcodeOff); \
1424 for (auto& kv : labelJumps) { \
1425 as.addLabelJump(kv.first, kv.second, curOpcodeOff); \
1428 /* Stack depth should be 0 after RetC or RetV. */ \
1429 if (thisOpcode == OpRetC || thisOpcode == OpRetV || \
1430 thisOpcode == OpRetM) { \
1431 as.enforceStackDepth(0); \
1434 /* Stack depth should be 1 after resume from suspend. */ \
1435 if (thisOpcode == OpCreateCont || thisOpcode == OpAwait || \
1436 thisOpcode == OpYield || thisOpcode == OpYieldK || \
1437 thisOpcode == OpYieldFromDelegate) { \
1438 as.enforceStackDepth(1); \
1441 /* Record source location. */ \
1442 as.ue->recordSourceLocation(as.srcLoc, curOpcodeOff); \
1444 if (Op##name == OpDefCls || Op##name == OpDefClsNop) { \
1445 as.defClsOffsets.emplace(immIVA[0], curOpcodeOff); \
1448 /* Retain stack depth after calls to exit */ \
1449 if ((instrFlags(thisOpcode) & InstrFlags::TF) && \
1450 (Op##name != OpExit)) { \
1451 as.enterUnreachableRegion(); \
1455 OPCODES
1457 #undef O
1459 #undef IMM_I64A
1460 #undef IMM_SA
1461 #undef IMM_RATA
1462 #undef IMM_DA
1463 #undef IMM_IVA
1464 #undef IMM_LA
1465 #undef IMM_CAR
1466 #undef IMM_CAW
1467 #undef IMM_BA
1468 #undef IMM_BLA
1469 #undef IMM_SLA
1470 #undef IMM_OA
1471 #undef IMM_MA
1472 #undef IMM_AA
1473 #undef IMM_VSA
1474 #undef IMM_KA
1475 #undef IMM_LAR
1477 #undef NUM_PUSH_NOV
1478 #undef NUM_PUSH_ONE
1479 #undef NUM_PUSH_TWO
1480 #undef NUM_PUSH_THREE
1481 #undef NUM_PUSH_POS_N
1482 #undef NUM_PUSH_INS_1
1483 #undef NUM_PUSH_CMANY
1484 #undef NUM_POP_NOV
1485 #undef NUM_POP_ONE
1486 #undef NUM_POP_TWO
1487 #undef NUM_POP_THREE
1488 #undef NUM_POP_POS_N
1489 #undef NUM_POP_MFINAL
1490 #undef NUM_POP_F_MFINAL
1491 #undef NUM_POP_C_MFINAL
1492 #undef NUM_POP_V_MFINAL
1493 #undef NUM_POP_FMANY
1494 #undef NUM_POP_UFMANY
1495 #undef NUM_POP_CVUMANY
1496 #undef NUM_POP_CMANY
1497 #undef NUM_POP_SMANY
1499 void initialize_opcode_map() {
1500 #define O(name, imm, pop, push, flags) \
1501 opcode_parsers[#name] = parse_opcode_##name;
1502 OPCODES
1503 #undef O
1506 struct Initializer {
1507 Initializer() { initialize_opcode_map(); }
1508 } initializer;
1510 //////////////////////////////////////////////////////////////////////
1513 * long-string-literal: <string>
1515 * `long-string-literal' is a python-style longstring. See
1516 * readLongString for more details.
1518 String parse_long_string(AsmState& as) {
1519 as.in.skipWhitespace();
1521 std::vector<char> buffer;
1522 if (!as.in.readLongString(buffer)) {
1523 as.error("expected \"\"\"-string of serialized php data");
1525 if (buffer.empty()) {
1526 as.error("empty php serialized data is not a valid php object");
1529 // String wants a null, and dereferences one past the size we give
1530 // it.
1531 buffer.push_back('\0');
1532 return String(&buffer[0], buffer.size() - 1, CopyString);
1536 * maybe-long-string-literal : long-string-literal
1540 String parse_maybe_long_string(AsmState& as) {
1541 as.in.skipWhitespace();
1543 std::vector<char> buffer;
1544 if (!as.in.readLongString(buffer)) {
1545 return StrNR(staticEmptyString());
1547 if (buffer.empty()) {
1548 return StrNR(staticEmptyString());
1551 // String wants a null, and dereferences one past the size we give
1552 // it.
1553 buffer.push_back('\0');
1554 return String(&buffer[0], buffer.size() - 1, CopyString);
1558 * php-serialized : long-string-literal
1561 * `long-string-literal' is a python-style longstring. See
1562 * readLongString for more details.
1564 * Returns a Variant representing the serialized data. It's up to the
1565 * caller to make sure it is a legal literal.
1567 Variant parse_php_serialized(AsmState& as) {
1568 return unserialize_from_string(
1569 parse_long_string(as),
1570 VariableUnserializer::Type::Internal
1575 * maybe-php-serialized : maybe-long-string-literal
1578 Variant parse_maybe_php_serialized(AsmState& as) {
1579 auto s = parse_maybe_long_string(as);
1580 if (!s.empty()) {
1581 return unserialize_from_string(s, VariableUnserializer::Type::Internal);
1583 return Variant();
1587 * directive-numiters : integer ';'
1590 void parse_numiters(AsmState& as) {
1591 if (as.numItersSet) {
1592 as.error("only one .numiters directive may appear in a given function");
1594 int32_t count = read_opcode_arg<int32_t>(as);
1595 as.numItersSet = true;
1596 as.fe->setNumIterators(count);
1597 as.in.expectWs(';');
1601 * directive-numclsrefslots : integer ';'
1604 void parse_numclsrefslots(AsmState& as) {
1605 if (as.numClsRefSlotsSet) {
1606 as.error("only one .numclsrefslots directive may appear "
1607 "in a given function");
1609 int32_t count = read_opcode_arg<int32_t>(as);
1610 as.numClsRefSlotsSet = true;
1611 as.fe->setNumClsRefSlots(count);
1612 as.in.expectWs(';');
1616 * directive-declvars : var-name* ';'
1619 * Variables are usually allocated when first seen, but
1620 * declvars can be used to preallocate varibles for when
1621 * the exact assignment matters (like for closures).
1623 void parse_declvars(AsmState& as) {
1624 while (true) {
1625 as.in.skipWhitespace();
1626 std::string var;
1627 if (as.in.readQuotedStr(var) || as.in.readword(var)) {
1628 as.getLocalId(var);
1630 else {
1631 break;
1634 as.in.expectWs(';');
1637 void parse_function_body(AsmState&, int nestLevel = 0);
1640 * directive-fault : identifier integer? '{' function-body
1643 void parse_fault(AsmState& as, int nestLevel) {
1644 const Offset start = as.ue->bcPos();
1646 std::string label;
1647 if (!as.in.readword(label)) {
1648 as.error("expected label name after .try_fault");
1650 int iterId = -1;
1651 as.in.skipWhitespace();
1652 if (as.in.peek() != '{') {
1653 iterId = read_opcode_arg<int32_t>(as);
1655 as.in.expectWs('{');
1656 parse_function_body(as, nestLevel + 1);
1658 auto& eh = as.fe->addEHEnt();
1659 eh.m_type = EHEnt::Type::Fault;
1660 eh.m_base = start;
1661 eh.m_past = as.ue->bcPos();
1662 eh.m_iterId = iterId;
1663 eh.m_end = kInvalidOffset;
1665 as.addLabelEHEnt(label, as.fe->ehtab.size() - 1);
1669 * directive-catch : identifier integer? '{' function-body
1672 void parse_catch(AsmState& as, int nestLevel) {
1673 const Offset start = as.ue->bcPos();
1675 std::string label;
1676 if (!as.in.readword(label)) {
1677 as.error("expected label name after .try_catch");
1679 int iterId = -1;
1680 as.in.skipWhitespace();
1681 if (as.in.peek() != '{') {
1682 iterId = read_opcode_arg<int32_t>(as);
1684 as.in.expectWs('{');
1685 parse_function_body(as, nestLevel + 1);
1687 auto& eh = as.fe->addEHEnt();
1688 eh.m_type = EHEnt::Type::Catch;
1689 eh.m_base = start;
1690 eh.m_past = as.ue->bcPos();
1691 eh.m_iterId = iterId;
1692 eh.m_end = kInvalidOffset;
1694 as.addLabelEHEnt(label, as.fe->ehtab.size() - 1);
1698 * directive-try-catch : integer? '{' function-body ".catch" '{' function-body
1701 void parse_try_catch(AsmState& as, int nestLevel) {
1702 const Offset start = as.ue->bcPos();
1704 int iterId = -1;
1705 as.in.skipWhitespace();
1706 if (as.in.peek() != '{') {
1707 iterId = read_opcode_arg<int32_t>(as);
1710 // Emit try body.
1711 as.in.expectWs('{');
1712 parse_function_body(as, nestLevel + 1);
1713 if (!as.isUnreachable()) {
1714 as.error("expected .try region to not fall-thru");
1717 const Offset handler = as.ue->bcPos();
1719 // Emit catch body.
1720 as.enterReachableRegion(0);
1721 as.ue->emitOp(OpCatch);
1722 as.adjustStack(1);
1723 as.enforceStackDepth(1);
1725 std::string word;
1726 as.in.skipWhitespace();
1727 if (!as.in.readword(word) || word != ".catch") {
1728 as.error("expected .catch directive after .try");
1730 as.in.skipWhitespace();
1731 as.in.expectWs('{');
1732 parse_function_body(as, nestLevel + 1);
1734 const Offset end = as.ue->bcPos();
1736 auto& eh = as.fe->addEHEnt();
1737 eh.m_type = EHEnt::Type::Catch;
1738 eh.m_base = start;
1739 eh.m_past = handler;
1740 eh.m_iterId = iterId;
1741 eh.m_handler = handler;
1742 eh.m_end = end;
1746 * directive-srcloc : line_no ':' chr_no ',' line_no ':' chr_no ';'
1748 * line_no : integer
1750 * chr_no : integer
1753 * Record that subsequent bytecodes are at the source location indicated by the
1754 * range of inline numbers and character positions specified.
1756 void parse_srcloc(AsmState& as, int /*nestLevel*/) {
1757 auto const line0 = as.in.readint();
1758 as.in.expectWs(':');
1759 auto const char0 = as.in.readint();
1760 as.in.expectWs(',');
1761 auto const line1 = as.in.readint();
1762 as.in.expectWs(':');
1763 auto const char1 = as.in.readint();
1764 as.in.expectWs(';');
1766 as.srcLoc = Location::Range(line0, char0, line1, char1);
1770 * directive-static : '$' local_name = long-string-literal ';'
1773 * Record that the function contains a static named local_name along with an
1774 * associated initializer.
1776 void parse_static(AsmState& as) {
1777 Func::SVInfo svInfo;
1778 std::string name;
1779 String init;
1781 as.in.expectWs('$');
1782 if (!as.in.readword(name)) {
1783 as.error("Statics must be named");
1785 svInfo.name = makeStaticString(name);
1786 as.fe->staticVars.push_back(svInfo);
1788 as.in.expectWs(';');
1792 * directive-doccomment : long-string-literal ';'
1796 void parse_func_doccomment(AsmState& as) {
1797 auto const doc = parse_long_string(as);
1798 as.in.expectWs(';');
1800 as.fe->docComment = makeStaticString(doc);
1804 * function-body : fbody-line* '}'
1807 * fbody-line : ".numiters" directive-numiters
1808 * | ".numclsrefslots" directive-numclsrefslots
1809 * | ".declvars" directive-declvars
1810 * | ".try_fault" directive-fault
1811 * | ".try_catch" directive-catch
1812 * | ".try" directive-try-catch
1813 * | ".ismemoizewrapper"
1814 * | ".dynamicallycallable"
1815 * | ".srcloc" directive-srcloc
1816 * | ".doc" directive-doccomment
1817 * | label-name
1818 * | opcode-line
1821 * label-name : identifier ':'
1824 * opcode-line : opcode-mnemonic <junk that depends on opcode> '\n'
1827 void parse_function_body(AsmState& as, int nestLevel /* = 0 */) {
1828 std::string word;
1829 for (;;) {
1830 as.in.skipWhitespace();
1831 if (as.in.peek() == '}') {
1832 as.in.getc();
1833 if (!nestLevel) {
1834 as.finishFunction();
1836 return;
1839 if (!as.in.readword(word)) {
1840 as.error("unexpected directive or opcode line in function body");
1842 if (word[0] == '.') {
1843 if (word == ".ismemoizewrapper") {
1844 as.fe->isMemoizeWrapper = true;
1845 as.in.expectWs(';');
1846 continue;
1848 if (word == ".numiters") { parse_numiters(as); continue; }
1849 if (word == ".declvars") { parse_declvars(as); continue; }
1850 if (word == ".numclsrefslots") { parse_numclsrefslots(as); continue; }
1851 if (word == ".try_fault") { parse_fault(as, nestLevel); continue; }
1852 if (word == ".try_catch") { parse_catch(as, nestLevel); continue; }
1853 if (word == ".try") { parse_try_catch(as, nestLevel); continue; }
1854 if (word == ".srcloc") { parse_srcloc(as, nestLevel); continue; }
1855 if (word == ".static") { parse_static(as); continue; }
1856 if (word == ".doc") { parse_func_doccomment(as); continue; }
1857 as.error("unrecognized directive `" + word + "' in function");
1859 if (as.in.peek() == ':') {
1860 as.in.getc();
1861 as.addLabelTarget(word);
1862 continue;
1865 // Ok, it better be an opcode now.
1866 auto it = opcode_parsers.find(word);
1867 if (it == opcode_parsers.end()) {
1868 as.error("unrecognized opcode `" + word + "'");
1870 it->second(as);
1872 as.in.skipSpaceTab();
1873 if (as.in.peek() != '\n' &&
1874 as.in.peek() != '\r' &&
1875 as.in.peek() != '#' &&
1876 as.in.peek() != EOF) {
1877 as.error("too many arguments for opcode `" + word + "'");
1882 void parse_user_attribute(AsmState& as,
1883 UserAttributeMap& userAttrs) {
1884 auto name = read_litstr(as);
1885 as.in.expectWs('(');
1887 auto var = parse_php_serialized(as);
1889 as.in.expectWs(')');
1891 if (!var.isPHPArray()) {
1892 as.error("user attribute values must be arrays");
1895 userAttrs[name] =
1896 make_tv<KindOfArray>(ArrayData::GetScalarArray(std::move(var)));
1900 * attribute : attribute-name
1901 * | string-literal '(' long-string-literal ')'
1904 * attribute-list : empty
1905 * | '[' attribute* ']'
1908 * The `attribute-name' rule is context-sensitive; see as-shared.cpp.
1909 * The second attribute form is for user attributes and only applies
1910 * if attributeMap is non null.
1912 Attr parse_attribute_list(AsmState& as, AttrContext ctx,
1913 UserAttributeMap *userAttrs = nullptr,
1914 bool* isTop = nullptr) {
1915 as.in.skipWhitespace();
1916 int ret = AttrNone;
1917 if (as.in.peek() != '[') return Attr(ret);
1918 as.in.getc();
1920 std::string word;
1921 for (;;) {
1922 as.in.skipWhitespace();
1923 if (as.in.peek() == ']') break;
1924 if (as.in.peek() == '"' && userAttrs) {
1925 parse_user_attribute(as, *userAttrs);
1926 continue;
1928 if (!as.in.readword(word)) break;
1930 auto const abit = string_to_attr(ctx, word);
1931 if (abit) {
1932 ret |= *abit;
1933 continue;
1935 if (isTop && word == "nontop") {
1936 *isTop = false;
1937 continue;
1940 as.error("unrecognized attribute `" + word + "' in this context");
1942 as.in.expect(']');
1943 return Attr(ret);
1947 * type-info : empty
1948 * | '<' maybe-string-literal maybe-string-literal
1949 * type-flag* '>'
1951 * type-constraint : empty
1952 * | '<' maybe-string-literal
1953 * type-flag* '>'
1955 * This parses type-info if noUserType is false, type-constraint if true
1957 std::pair<const StringData *, TypeConstraint> parse_type_info(
1958 AsmState& as, bool noUserType = false) {
1959 as.in.skipWhitespace();
1960 if (as.in.peek() != '<') return {};
1961 as.in.getc();
1963 const StringData *userType = noUserType ? nullptr : read_maybe_litstr(as);
1964 const StringData *typeName = read_maybe_litstr(as);
1966 std::string word;
1967 auto flags = TypeConstraint::NoFlags;
1968 for (;;) {
1969 as.in.skipWhitespace();
1970 if (as.in.peek() == '>') break;
1971 if (!as.in.readword(word)) break;
1973 auto const abit = string_to_type_flag(word);
1974 if (abit) {
1975 flags = flags | *abit;
1976 continue;
1979 as.error("unrecognized type flag `" + word + "' in this context");
1981 as.in.expect('>');
1982 return std::make_pair(userType, TypeConstraint{typeName, flags});
1984 TypeConstraint parse_type_constraint(AsmState& as) {
1985 return parse_type_info(as, true).second;
1990 * parameter-list : '(' param-name-list ')'
1993 * param-name-list : empty
1994 * | param-name ',' param-name-list
1997 * param-name : '$' identifier dv-initializer
1998 * | '&' '$' identifier dv-initializer
2001 * dv-initializer : empty
2002 * | '=' identifier arg-default
2005 * arg-default : empty
2006 * | '(' long-string-literal ')'
2009 void parse_parameter_list(AsmState& as) {
2010 as.in.skipWhitespace();
2011 if (as.in.peek() != '(') return;
2012 as.in.getc();
2014 bool seenVariadic = false;
2015 bool seenRef = false;
2017 for (;;) {
2018 FuncEmitter::ParamInfo param;
2019 param.byRef = false;
2020 param.inout = false;
2022 as.in.skipWhitespace();
2023 int ch = as.in.peek();
2024 if (ch == ')') { as.in.getc(); break; } // allow empty param lists
2026 if (seenVariadic) {
2027 as.error("functions can only have one variadic argument");
2030 parse_attribute_list(as, AttrContext::Parameter, &param.userAttributes);
2032 if (ch == '.') {
2033 as.in.getc();
2034 if (as.in.getc() != '.' ||
2035 as.in.getc() != '.') {
2036 as.error("expecting '...'");
2039 seenVariadic = true;
2040 param.variadic = true;
2041 as.fe->attrs |= AttrVariadicParam;
2044 if (as.in.tryConsume("inout")) {
2045 if (seenVariadic) {
2046 as.error("inout parameters cannot be variadic");
2048 if (seenRef) {
2049 as.error("functions cannot contain both inout and ref parameters");
2051 param.inout = true;
2052 as.fe->attrs |= AttrTakesInOutParams;
2055 std::tie(param.userType, param.typeConstraint) = parse_type_info(as);
2057 as.in.skipWhitespace();
2058 ch = as.in.getc();
2060 if (ch == '&') {
2061 if (param.inout) {
2062 as.error("parameters cannot be marked both inout and ref");
2064 if (as.fe->attrs & AttrTakesInOutParams) {
2065 as.error("functions cannot contain both inout and ref parameters");
2067 seenRef = true;
2068 param.byRef = true;
2069 ch = as.in.getc();
2071 if (ch != '$') {
2072 as.error("function parameters must have a $ prefix");
2074 std::string name;
2075 if (!as.in.readword(name)) {
2076 as.error("expected parameter name after $");
2079 as.in.skipWhitespace();
2080 ch = as.in.getc();
2081 if (ch == '=') {
2082 if (seenVariadic) {
2083 as.error("variadic parameter cannot have dv-initializer");
2086 std::string label;
2087 if (!as.in.readword(label)) {
2088 as.error("expected label name for dv-initializer");
2090 as.addLabelDVInit(label, as.fe->params.size());
2092 as.in.skipWhitespace();
2093 ch = as.in.getc();
2094 if (ch == '(') {
2095 String str = parse_long_string(as);
2096 param.phpCode = makeStaticString(str);
2097 TypedValue tv;
2098 tvWriteUninit(tv);
2099 if (str.size() == 4) {
2100 if (!strcasecmp("null", str.data())) {
2101 tvWriteNull(tv);
2102 } else if (!strcasecmp("true", str.data())) {
2103 tv = make_tv<KindOfBoolean>(true);
2105 } else if (str.size() == 5 && !strcasecmp("false", str.data())) {
2106 tv = make_tv<KindOfBoolean>(false);
2108 auto utype = param.typeConstraint.underlyingDataType();
2109 if (tv.m_type == KindOfUninit &&
2110 (!utype || *utype == KindOfInt64 || *utype == KindOfDouble)) {
2111 int64_t ival;
2112 double dval;
2113 int overflow = 0;
2114 auto dt = str.get()->isNumericWithVal(ival, dval, false, &overflow);
2115 if (overflow == 0) {
2116 if (dt == KindOfInt64) {
2117 if (utype == KindOfDouble) tv = make_tv<KindOfDouble>(ival);
2118 else tv = make_tv<KindOfInt64>(ival);
2119 } else if (dt == KindOfDouble &&
2120 (!utype || utype == KindOfDouble)) {
2121 tv = make_tv<KindOfDouble>(dval);
2125 if (tv.m_type != KindOfUninit) {
2126 param.defaultValue = tv;
2128 as.in.expectWs(')');
2129 as.in.skipWhitespace();
2130 ch = as.in.getc();
2134 as.fe->appendParam(makeStaticString(name), param);
2136 if (ch == ')') break;
2137 if (ch != ',') as.error("expected , between parameter names");
2141 void parse_function_flags(AsmState& as) {
2142 as.in.skipWhitespace();
2143 std::string flag;
2144 for (;;) {
2145 if (as.in.peek() == '{') break;
2146 if (!as.in.readword(flag)) break;
2148 if (flag == "isGenerator") {
2149 as.fe->isGenerator = true;
2150 } else if (flag == "isAsync") {
2151 as.fe->isAsync = true;
2152 } else if (flag == "isClosureBody") {
2153 as.fe->isClosureBody = true;
2154 } else if (flag == "isPairGenerator") {
2155 as.fe->isPairGenerator = true;
2156 } else {
2157 as.error("Unexpected function flag \"" + flag + "\"");
2163 * line-range : "(" integer "," integer ")"
2166 bool parse_line_range(AsmState& as, int& line0, int& line1) {
2167 as.in.skipWhitespace();
2168 if (as.in.peek() != '(') {
2169 line0 = as.in.getLineNumber();
2170 line1 = as.in.getLineNumber() + 1;
2171 return false;
2173 as.in.getc();
2174 line0 = as.in.readint();
2175 as.in.expectWs(',');
2176 line1 = as.in.readint();
2177 as.in.expectWs(')');
2178 return true;
2182 * If we haven't seen a pseudomain and we are compiling systemlib,
2183 * add a pseudomain and return true
2184 * If we haven't seen a pseudomain and we are not compiling systemlib,
2185 * return false so that the caller can give an assembler error
2186 * Otherwise, return true
2188 bool ensure_pseudomain(AsmState& as) {
2189 if (!as.emittedPseudoMain) {
2190 if (!SystemLib::s_inited) {
2192 * The SystemLib::s_hhas_unit is required to be merge-only,
2193 * and we create the source by concatenating separate .hhas files
2194 * Rather than choosing one to have the .main directive, we just
2195 * generate a trivial pseudoMain automatically.
2197 as.ue->addTrivialPseudoMain();
2198 as.emittedPseudoMain = true;
2199 } else {
2200 return false;
2203 return true;
2206 static StaticString s_native("__Native");
2208 MaybeDataType type_constraint_to_data_type(LowStringPtr user_type,
2209 const TypeConstraint& tc) {
2210 if (auto type = tc.typeName()) {
2211 // in type_annotation.cpp this code uses m_typeArgs
2212 // as indicator that type can represent one of collection types
2213 // when we extract data from the constraint we know if type is one of
2214 // collection types but we don't have direct way to figure out if
2215 // type used to have type arguments - do it indirectly by checking
2216 // if name of user type contains '<'
2217 auto has_type_args =
2218 user_type && user_type->slice().str().find('<') != std::string::npos;
2219 return get_datatype(
2220 type->toCppString(),
2221 has_type_args,
2222 false, // no syntactic functions in type annotations
2223 false, // no xhp type annotation
2224 false, // no tuples in type annotation
2225 tc.isNullable(),
2226 tc.isSoft());
2228 return folly::none;
2231 static const StaticString
2232 s_AllowStatic("__AllowStatic"),
2233 s_ParamCoerceModeNull("__ParamCoerceModeNull"),
2234 s_ParamCoerceModeFalse("__ParamCoerceModeFalse");
2238 * Checks whether the current function is native by looking at the user
2239 * attribute map and sets the isNative flag accoringly
2240 * If the give function is op code implementation, then isNative is not set
2242 void check_native(AsmState& as, bool is_construct_or_destruct) {
2243 if (as.fe->userAttributes.count(s_native.get())) {
2244 if (SystemLib::s_inited) {
2245 as.error("Native function may only appear in systemlib");
2248 as.fe->hniReturnType = is_construct_or_destruct
2249 ? KindOfNull
2250 : type_constraint_to_data_type(as.fe->retUserType,
2251 as.fe->retTypeConstraint);
2252 as.fe->isNative =
2253 !(as.fe->parseNativeAttributes(as.fe->attrs) & Native::AttrOpCodeImpl);
2255 // set extra attributes
2256 as.fe->attrs |= AttrBuiltin | AttrSkipFrame | AttrMayUseVV;
2258 if (as.fe->pce() &&
2259 !(as.fe->attrs & AttrStatic) &&
2260 !as.fe->userAttributes.count(s_AllowStatic.get())) {
2261 as.fe->attrs |= AttrRequiresThis;
2263 if (as.fe->userAttributes.count(s_ParamCoerceModeFalse.get())) {
2264 as.fe->attrs |= AttrParamCoerceModeFalse;
2266 if (as.fe->userAttributes.count(s_ParamCoerceModeNull.get())) {
2267 as.fe->attrs |= AttrParamCoerceModeNull;
2269 if (!(as.fe->attrs &
2270 (AttrParamCoerceModeFalse | AttrParamCoerceModeNull))) {
2271 as.fe->attrs |= AttrParamCoerceModeNull;
2274 for (auto& pi : as.fe->params) {
2275 pi.builtinType =
2276 type_constraint_to_data_type(pi.userType, pi.typeConstraint);
2282 * directive-function : attribute-list ?line-range type-info identifier
2283 * parameter-list function-flags '{' function-body
2286 void parse_function(AsmState& as) {
2287 if (!ensure_pseudomain(as)) {
2288 as.error(".function blocks must all follow the .main block");
2291 as.in.skipWhitespace();
2293 bool isTop = true;
2295 UserAttributeMap userAttrs;
2296 Attr attrs = parse_attribute_list(as, AttrContext::Func, &userAttrs, &isTop);
2298 if (!SystemLib::s_inited) {
2299 attrs |= AttrUnique | AttrPersistent | AttrBuiltin;
2302 // Be conservative by default. HHBBC can clear it where appropriate.
2303 attrs |= AttrMayUseVV;
2305 if(!isTop && as.emittedTopLevelFunc) {
2306 as.error("All top level functions must be defined after any "
2307 "non-top functions");
2310 as.emittedTopLevelFunc |= isTop;
2312 int line0;
2313 int line1;
2314 parse_line_range(as, line0, line1);
2316 auto typeInfo = parse_type_info(as);
2317 std::string name;
2318 if (!as.in.readname(name)) {
2319 as.error(".function must have a name");
2322 as.fe = as.ue->newFuncEmitter(makeStaticString(name));
2323 as.fe->init(line0, line1, as.ue->bcPos(), attrs, isTop, 0);
2324 std::tie(as.fe->retUserType, as.fe->retTypeConstraint) = typeInfo;
2325 as.fe->userAttributes = userAttrs;
2327 parse_parameter_list(as);
2328 parse_function_flags(as);
2330 check_native(as, false);
2332 as.in.expectWs('{');
2334 as.srcLoc = Location::Range{-1,-1,-1,-1};
2335 parse_function_body(as);
2339 * directive-method : attribute-list ?line-range type-info identifier
2340 * parameter-list function-flags '{' function-body
2343 void parse_method(AsmState& as) {
2344 as.in.skipWhitespace();
2346 UserAttributeMap userAttrs;
2347 Attr attrs = parse_attribute_list(as, AttrContext::Func, &userAttrs);
2349 if (!SystemLib::s_inited) {
2350 attrs |= AttrBuiltin;
2353 int line0;
2354 int line1;
2355 parse_line_range(as, line0, line1);
2357 auto typeInfo = parse_type_info(as);
2358 std::string name;
2359 if (!as.in.readname(name)) {
2360 as.error(".method requires a method name");
2363 as.fe = as.ue->newMethodEmitter(makeStaticString(name), as.pce);
2364 as.pce->addMethod(as.fe);
2365 as.fe->init(line0, line1,
2366 as.ue->bcPos(), attrs, false, 0);
2367 std::tie(as.fe->retUserType, as.fe->retTypeConstraint) = typeInfo;
2368 as.fe->userAttributes = userAttrs;
2370 parse_parameter_list(as);
2371 parse_function_flags(as);
2373 check_native(as, name == "__construct" || name == "__destruct");
2375 as.in.expectWs('{');
2377 as.srcLoc = Location::Range{-1,-1,-1,-1};
2378 parse_function_body(as);
2382 * member-tv-initializer : '=' php-serialized ';'
2383 * | '=' uninit ';'
2384 * | ';'
2387 TypedValue parse_member_tv_initializer(AsmState& as) {
2388 as.in.skipWhitespace();
2390 TypedValue tvInit;
2391 tvWriteNull(tvInit); // Don't confuse Variant with uninit data
2393 int what = as.in.getc();
2394 if (what == '=') {
2395 as.in.skipWhitespace();
2397 if (as.in.peek() != '\"') {
2398 // It might be an uninitialized property/constant.
2399 if (!as.in.tryConsume("uninit")) {
2400 as.error("Expected \"\"\" or \"uninit\" after '=' in "
2401 "const/property initializer");
2403 as.in.expectWs(';');
2404 tvWriteUninit(tvInit);
2405 return tvInit;
2408 tvAsVariant(&tvInit) = parse_php_serialized(as);
2409 if (tvInit.m_type == KindOfObject) {
2410 as.error("property initializer can't be an object");
2411 } else if (tvInit.m_type == KindOfResource) {
2412 as.error("property initializer can't be a resource");
2413 } else {
2414 tvAsVariant(&tvInit).setEvalScalar();
2416 as.in.expectWs(';');
2417 } else if (what == ';') {
2418 // already null
2419 } else {
2420 as.error("expected '=' or ';' after property name");
2423 return tvInit;
2427 * directive-property : attribute-list maybe-long-string-literal type-info
2428 * identifier member-tv-initializer
2431 * Define a property with an associated type and heredoc.
2433 void parse_property(AsmState& as) {
2434 as.in.skipWhitespace();
2436 Attr attrs = parse_attribute_list(as, AttrContext::Prop);
2438 auto const heredoc = makeStaticString(parse_maybe_long_string(as));
2439 auto const userTy = parse_type_info(as, false).first;
2440 auto const userTyStr = userTy ? userTy : staticEmptyString();
2442 std::string name;
2443 if (!as.in.readword(name)) {
2444 as.error("expected name for property");
2447 TypedValue tvInit = parse_member_tv_initializer(as);
2448 as.pce->addProperty(makeStaticString(name),
2449 attrs,
2450 userTyStr,
2451 heredoc,
2452 &tvInit,
2453 RepoAuthType{});
2457 * const-flags : isType
2460 * directive-const : identifier const-flags member-tv-initializer
2461 * | identifier const-flags ';'
2464 void parse_constant(AsmState& as) {
2465 as.in.skipWhitespace();
2467 std::string name;
2468 if (!as.in.readword(name)) {
2469 as.error("expected name for constant");
2472 bool isType = as.in.tryConsume("isType");
2473 as.in.skipWhitespace();
2475 if (as.in.peek() == ';') {
2476 as.in.getc();
2477 as.pce->addAbstractConstant(makeStaticString(name),
2478 staticEmptyString(),
2479 isType);
2480 return;
2483 TypedValue tvInit = parse_member_tv_initializer(as);
2484 as.pce->addConstant(makeStaticString(name),
2485 staticEmptyString(), &tvInit,
2486 staticEmptyString(),
2487 isType);
2491 * directive-default-ctor : ';'
2494 * No-op, for backward compat
2496 void parse_default_ctor(AsmState& as) {
2497 assertx(!as.fe && as.pce);
2498 as.in.expectWs(';');
2502 * directive-use : identifier+ ';'
2503 * | identifier+ '{' use-line* '}'
2506 * use-line : use-name-ref "insteadof" identifier+ ';'
2507 * | use-name-ref "as" attribute-list identifier ';'
2508 * | use-name-ref "as" attribute-list ';'
2511 void parse_use(AsmState& as) {
2512 std::vector<std::string> usedTraits;
2513 for (;;) {
2514 std::string name;
2515 if (!as.in.readword(name)) break;
2516 usedTraits.push_back(name);
2518 if (usedTraits.empty()) {
2519 as.error(".use requires a trait name");
2522 for (size_t i = 0; i < usedTraits.size(); ++i) {
2523 as.pce->addUsedTrait(makeStaticString(usedTraits[i]));
2525 as.in.skipWhitespace();
2526 if (as.in.peek() != '{') {
2527 as.in.expect(';');
2528 return;
2530 as.in.getc();
2532 for (;;) {
2533 as.in.skipWhitespace();
2534 if (as.in.peek() == '}') break;
2536 std::string traitName;
2537 std::string identifier;
2538 if (!as.in.readword(traitName)) {
2539 as.error("expected identifier for line in .use block");
2541 as.in.skipWhitespace();
2542 if (as.in.peek() == ':') {
2543 as.in.getc();
2544 as.in.expect(':');
2545 if (!as.in.readword(identifier)) {
2546 as.error("expected identifier after ::");
2548 } else {
2549 identifier = traitName;
2550 traitName.clear();
2553 if (as.in.tryConsume("as")) {
2554 Attr attrs = parse_attribute_list(as, AttrContext::TraitImport);
2555 std::string alias;
2556 if (!as.in.readword(alias)) {
2557 if (attrs != AttrNone) {
2558 alias = identifier;
2559 } else {
2560 as.error("expected identifier or attribute list after "
2561 "`as' in .use block");
2565 as.pce->addTraitAliasRule(PreClass::TraitAliasRule(
2566 makeStaticString(traitName),
2567 makeStaticString(identifier),
2568 makeStaticString(alias),
2569 attrs));
2570 } else if (as.in.tryConsume("insteadof")) {
2571 if (traitName.empty()) {
2572 as.error("Must specify TraitName::name when using a trait insteadof");
2575 PreClass::TraitPrecRule precRule(
2576 makeStaticString(traitName),
2577 makeStaticString(identifier));
2579 bool addedOtherTraits = false;
2580 std::string whom;
2581 while (as.in.readword(whom)) {
2582 precRule.addOtherTraitName(makeStaticString(whom));
2583 addedOtherTraits = true;
2585 if (!addedOtherTraits) {
2586 as.error("one or more trait names expected after `insteadof'");
2589 as.pce->addTraitPrecRule(precRule);
2590 } else {
2591 as.error("expected `as' or `insteadof' in .use block");
2594 as.in.expectWs(';');
2597 as.in.expect('}');
2601 * directive-enum_ty : type-constraint ';'
2605 void parse_enum_ty(AsmState& as) {
2606 if (as.enumTySet) {
2607 as.error("only one .enum_ty directive may appear in a given class");
2609 as.enumTySet = true;
2611 as.pce->setEnumBaseTy(parse_type_constraint(as));
2613 as.in.expectWs(';');
2617 * directive-require : 'extends' '<' indentifier '>' ';'
2618 * | 'implements' '<' indentifier '>' ';'
2622 void parse_require(AsmState& as) {
2623 as.in.skipWhitespace();
2625 bool extends = as.in.tryConsume("extends");
2626 if (!extends && !as.in.tryConsume("implements")) {
2627 as.error(".require should be extends or implements");
2630 as.in.expectWs('<');
2631 std::string name;
2632 if (!as.in.readname(name)) {
2633 as.error(".require expects a class or interface name");
2635 as.in.expectWs('>');
2637 as.pce->addClassRequirement(PreClass::ClassRequirement(
2638 makeStaticString(name), extends
2641 as.in.expectWs(';');
2645 * directive-doccomment : long-string-literal ';'
2649 void parse_cls_doccomment(AsmState& as) {
2650 auto const doc = parse_long_string(as);
2651 as.in.expectWs(';');
2653 as.pce->setDocComment(makeStaticString(doc));
2657 * class-body : class-body-line* '}'
2660 * class-body-line : ".method" directive-method
2661 * | ".property" directive-property
2662 * | ".const" directive-const
2663 * | ".use" directive-use
2664 * | ".default_ctor" directive-default-ctor
2665 * | ".enum_ty" directive-enum-ty
2666 * | ".require" directive-require
2667 * | ".doc" directive-doccomment
2670 void parse_class_body(AsmState& as) {
2671 if (!ensure_pseudomain(as)) {
2672 as.error(".class blocks must all follow the .main block");
2675 std::string directive;
2676 while (as.in.readword(directive)) {
2677 if (directive == ".method") { parse_method(as); continue; }
2678 if (directive == ".property") { parse_property(as); continue; }
2679 if (directive == ".const") { parse_constant(as); continue; }
2680 if (directive == ".use") { parse_use(as); continue; }
2681 if (directive == ".default_ctor") { parse_default_ctor(as); continue; }
2682 if (directive == ".enum_ty") { parse_enum_ty(as); continue; }
2683 if (directive == ".require") { parse_require(as); continue; }
2684 if (directive == ".doc") { parse_cls_doccomment(as); continue; }
2686 as.error("unrecognized directive `" + directive + "' in class");
2688 as.in.expect('}');
2691 PreClass::Hoistable compute_hoistable(AsmState& as,
2692 const std::string &name,
2693 const std::string &parentName) {
2694 auto &pce = *as.pce;
2695 bool system = pce.attrs() & AttrBuiltin;
2697 if (pce.methods().size() == 1 && pce.methods()[0]->isClosureBody) {
2698 return PreClass::NotHoistable;
2700 if (!system) {
2701 if (!pce.interfaces().empty() ||
2702 !pce.usedTraits().empty() ||
2703 !pce.requirements().empty() ||
2704 (pce.attrs() & AttrEnum)) {
2705 return PreClass::Mergeable;
2707 if (!parentName.empty() && !as.hoistables.count(parentName)) {
2708 return PreClass::MaybeHoistable;
2711 as.hoistables.insert(name);
2713 return pce.attrs() & AttrUnique ?
2714 PreClass::AlwaysHoistable : PreClass::MaybeHoistable;
2718 * directive-class : ?"top" attribute-list identifier ?line-range
2719 * extension-clause implements-clause '{' class-body
2722 * extension-clause : empty
2723 * | "extends" identifier
2726 * implements-clause : empty
2727 * | "implements" '(' identifier* ')'
2731 void parse_class(AsmState& as) {
2732 as.in.skipWhitespace();
2734 bool isTop = true;
2736 UserAttributeMap userAttrs;
2737 Attr attrs = parse_attribute_list(as, AttrContext::Class, &userAttrs, &isTop);
2738 if (!SystemLib::s_inited) {
2739 attrs |= AttrUnique | AttrPersistent | AttrBuiltin;
2742 std::string name;
2743 if (!as.in.readname(name)) {
2744 as.error(".class must have a name");
2746 if (ParserBase::IsAnonymousClassName(name)) {
2747 // refresh names of anonymous classes
2748 // to make sure they are unique
2749 auto p = name.find(';');
2750 if (p != std::string::npos) {
2751 name = name.substr(0, p);
2752 name = HPHP::NewAnonymousClassName(name);
2756 int line0;
2757 int line1;
2758 parse_line_range(as, line0, line1);
2760 std::string parentName;
2761 if (as.in.tryConsume("extends")) {
2762 if (!as.in.readname(parentName)) {
2763 as.error("expected parent class name after `extends'");
2767 std::vector<std::string> ifaces;
2768 if (as.in.tryConsume("implements")) {
2769 as.in.expectWs('(');
2770 std::string word;
2771 while (as.in.readname(word)) {
2772 ifaces.push_back(word);
2774 as.in.expect(')');
2777 auto off = folly::get_default(as.defClsOffsets, as.ue->numPreClasses(),
2778 as.ue->bcPos());
2780 as.pce = as.ue->newBarePreClassEmitter(name, PreClass::MaybeHoistable);
2781 as.pce->init(line0,
2782 line1,
2783 off,
2784 attrs,
2785 makeStaticString(parentName),
2786 staticEmptyString());
2787 for (auto const& iface : ifaces) {
2788 as.pce->addInterface(makeStaticString(iface));
2790 as.pce->setUserAttributes(userAttrs);
2792 as.in.expectWs('{');
2793 parse_class_body(as);
2795 as.pce->setHoistable(
2796 isTop ? compute_hoistable(as, name, parentName) : PreClass::NotHoistable
2799 as.finishClass();
2803 * directive-filepath : quoted-string-literal ';'
2806 void parse_filepath(AsmState& as) {
2807 auto const str = read_litstr(as);
2808 as.ue->m_filepath = str;
2809 as.in.expectWs(';');
2813 * directive-main : ?line-range '{' function-body
2816 void parse_main(AsmState& as) {
2817 if (as.emittedPseudoMain) {
2818 as.error("Multiple .main directives found");
2821 int line0;
2822 int line1;
2823 bool fromSrcLoc = parse_line_range(as, line0, line1);
2825 as.in.expectWs('{');
2827 as.ue->initMain(line0, line1);
2828 as.fe = as.ue->getMain();
2829 as.emittedPseudoMain = true;
2830 if (fromSrcLoc) {
2831 as.srcLoc = Location::Range{line0,0,line1,0};
2832 } else {
2833 as.srcLoc = Location::Range{-1,-1,-1,-1};
2835 parse_function_body(as);
2839 * directive-adata : identifier '=' php-serialized ';'
2842 void parse_adata(AsmState& as) {
2843 as.in.skipWhitespace();
2844 std::string dataLabel;
2845 if (!as.in.readword(dataLabel)) {
2846 as.error("expected name for .adata");
2848 if (as.adataMap.count(dataLabel)) {
2849 as.error("duplicate adata label name " + dataLabel);
2852 as.in.expectWs('=');
2853 auto var = parse_php_serialized(as);
2854 if (!var.isArray()) {
2855 as.error(".adata only supports serialized arrays");
2857 auto const data = ArrayData::GetScalarArray(std::move(var));
2858 as.ue->mergeArray(data);
2859 as.adataMap[dataLabel] = data;
2861 as.in.expectWs(';');
2865 * directive-alias : attribute-list identifier '=' type-constraint
2866 * maybe-php-serialized ';'
2869 * We represent alias type information using the syntax for
2870 * TypeConstraints. We populate the name and nullable field of the
2871 * alias directly from the specified type constraint and derive the
2872 * AnnotType from the compute AnnotType in the constraint.
2874 * Following the type-constraint we encode the serialized type structure
2875 * corresponding to this alias.
2877 void parse_alias(AsmState& as) {
2878 as.in.skipWhitespace();
2880 TypeAlias record;
2881 Attr attrs = parse_attribute_list(as, AttrContext::Alias, &record.userAttrs);
2882 if (!SystemLib::s_inited) {
2883 attrs |= AttrPersistent;
2885 std::string name;
2886 if (!as.in.readname(name)) {
2887 as.error(".alias must have a name");
2889 as.in.expectWs('=');
2891 TypeConstraint ty = parse_type_constraint(as);
2892 Variant ts = parse_maybe_php_serialized(as);
2894 if (ts.isInitialized() && !ts.isArray()) {
2895 as.error(".alias must have an array type structure");
2898 const StringData* typeName = ty.typeName();
2899 if (!typeName) typeName = staticEmptyString();
2900 const StringData* sname = makeStaticString(name);
2901 // Merge to ensure namedentity creation, according to
2902 // emitTypedef in emitter.cpp
2903 as.ue->mergeLitstr(sname);
2904 as.ue->mergeLitstr(typeName);
2906 record.name = sname;
2907 record.value = typeName;
2908 record.type = typeName->empty() ? AnnotType::Mixed : ty.type();
2909 record.nullable = (ty.flags() & TypeConstraint::Nullable) != 0;
2910 record.attrs = attrs;
2911 if (ts.isInitialized()) {
2912 record.typeStructure = ArrNR(ArrayData::GetScalarArray(std::move(ts)));
2914 auto aliasId = as.ue->addTypeAlias(record);
2915 as.ue->pushMergeableTypeAlias(Unit::MergeKind::TypeAlias, aliasId);
2917 as.in.expectWs(';');
2921 * directive-hh-file : '1' ';'
2922 * | '0' ';'
2925 void parse_hh_file(AsmState& as) {
2926 as.in.skipWhitespace();
2927 std::string word;
2928 if (!as.in.readword(word)) {
2929 as.error(".hh_file must have a value");
2931 as.ue->m_isHHFile = word == "1";
2933 if (!as.ue->m_isHHFile && word != "0") {
2934 as.error(".hh_file must be either 1 or 0");
2937 as.in.expectWs(';');
2941 * directive-strict : '1' ';'
2942 * | '0' ';'
2945 void parse_strict(AsmState& as) {
2946 as.in.skipWhitespace();
2947 std::string word;
2948 if (!as.in.readword(word)) {
2949 as.error(".strict must have a value");
2951 if (!RuntimeOption::PHP7_ScalarTypes) {
2952 as.error("Cannot set .strict without PHP7 ScalarTypes");
2955 as.ue->m_useStrictTypes = as.ue->m_useStrictTypesForBuiltins = word == "1";
2957 if (!as.ue->m_useStrictTypes && word != "0") {
2958 as.error("Strict types must be either 1 or 0");
2961 as.in.expectWs(';');
2965 * directive-symbols : '{' identifier identifier* '}'
2967 void parse_symbol_refs(
2968 AsmState& as,
2969 void (AsmCallbacks::*onSymbol)(const std::string&)
2971 as.in.expectWs('{');
2973 if (as.callbacks) {
2974 while (true) {
2975 as.in.skipWhitespace();
2976 std::string symbol;
2977 as.in.consumePred(!boost::is_any_of(" \t\r\n#}"),
2978 std::back_inserter(symbol));
2979 if (symbol.empty()) {
2980 break;
2982 (as.callbacks->*onSymbol)(symbol);
2984 } else {
2985 while (as.in.peek() != '}') {
2986 as.in.skipWhitespace();
2987 if (!as.in.skipPred(!boost::is_any_of("#}"))) break;
2991 as.in.expect('}');
2995 * directive-filepaths : '{' string string* '}'
2997 void parse_includes(AsmState& as) {
2998 parse_symbol_refs(as, &AsmCallbacks::onInclude);
3001 void parse_constant_refs(AsmState& as) {
3002 parse_symbol_refs(as, &AsmCallbacks::onConstantRef);
3005 void parse_function_refs(AsmState& as) {
3006 parse_symbol_refs(as, &AsmCallbacks::onFunctionRef);
3009 void parse_class_refs(AsmState& as) {
3010 parse_symbol_refs(as, &AsmCallbacks::onClassRef);
3014 * directive-metadata : identifier = identifier ';'
3015 * | identifier = quoted-string-literal ';'
3016 * | identifier = long-string-literal ';'
3019 void parse_metadata(AsmState& as) {
3020 std::string key;
3021 if (as.in.readname(key)) {
3022 as.in.expectWs('=');
3023 as.in.skipWhitespace();
3024 auto const value = [&] () -> const StringData* {
3025 auto ret = parse_maybe_long_string(as);
3026 if (!ret.empty()) return makeStaticString(ret);
3027 std::string tmp;
3028 if (as.in.readQuotedStr(tmp) || as.in.readword(tmp)) {
3029 return makeStaticString(tmp);
3031 return nullptr;
3032 }();
3033 if (value) {
3034 as.in.expect(';');
3035 as.ue->m_metaData.emplace(
3036 makeStaticString(key),
3037 make_tv<KindOfPersistentString>(value)
3039 return;
3042 as.error(".metadata expects a key = value pair");
3046 * asm-file : asm-tld* <EOF>
3049 * asm-tld : ".filepath" directive-filepath
3050 * | ".main" directive-main
3051 * | ".function" directive-function
3052 * | ".adata" directive-adata
3053 * | ".class" directive-class
3054 * | ".alias" directive-alias
3055 * | ".strict" directive-strict
3056 * | ".hh_file" directive-hh-file
3057 * | ".includes directive-filepaths
3058 * | ".constant_refs directive-symbols
3059 * | ".function_refs directive-symbols
3060 * | ".class_refs directive-symbols
3061 * | ".metadata directive-meta-data
3064 void parse(AsmState& as) {
3065 as.in.skipWhitespace();
3066 std::string directive;
3068 while (as.in.readword(directive)) {
3069 if (directive == ".filepath") { parse_filepath(as) ; continue; }
3070 if (directive == ".main") { parse_main(as) ; continue; }
3071 if (directive == ".function") { parse_function(as) ; continue; }
3072 if (directive == ".adata") { parse_adata(as) ; continue; }
3073 if (directive == ".class") { parse_class(as) ; continue; }
3074 if (directive == ".alias") { parse_alias(as) ; continue; }
3075 if (directive == ".strict") { parse_strict(as) ; continue; }
3076 if (directive == ".hh_file") { parse_hh_file(as) ; continue; }
3077 if (directive == ".includes") { parse_includes(as) ; continue; }
3078 if (directive == ".constant_refs") { parse_constant_refs(as) ; continue; }
3079 if (directive == ".function_refs") { parse_function_refs(as) ; continue; }
3080 if (directive == ".class_refs") { parse_class_refs(as) ; continue; }
3081 if (directive == ".metadata") { parse_metadata(as) ; continue; }
3083 as.error("unrecognized top-level directive `" + directive + "'");
3086 if (!ensure_pseudomain(as)) {
3087 as.error("no .main found in hhas unit");
3093 //////////////////////////////////////////////////////////////////////
3095 std::unique_ptr<UnitEmitter> assemble_string(
3096 const char* code,
3097 int codeLen,
3098 const char* filename,
3099 const MD5& md5,
3100 bool swallowErrors,
3101 AsmCallbacks* callbacks
3103 auto ue = std::make_unique<UnitEmitter>(md5);
3104 if (!SystemLib::s_inited) {
3105 ue->m_mergeOnly = true;
3107 StringData* sd = makeStaticString(filename);
3108 ue->m_filepath = sd;
3109 ue->m_useStrictTypes = RuntimeOption::EnableHipHopSyntax ||
3110 !RuntimeOption::PHP7_ScalarTypes;
3112 try {
3113 auto const mode = std::istringstream::binary | std::istringstream::in;
3114 std::istringstream instr(std::string(code, codeLen), mode);
3115 AsmState as(instr, callbacks);
3116 as.ue = ue.get();
3117 parse(as);
3118 if (ue->m_isHHFile) {
3119 ue->m_useStrictTypes = true;
3121 } catch (const std::exception& e) {
3122 if (!swallowErrors) throw;
3123 ue = createFatalUnit(sd, md5, FatalOp::Runtime, makeStaticString(e.what()));
3126 return ue;
3129 AsmResult assemble_expression(UnitEmitter& ue, FuncEmitter* fe,
3130 int incomingStackDepth,
3131 const std::string& expr) {
3132 auto const mode = std::istringstream::binary | std::istringstream::in;
3133 std::stringstream sstr(expr + '}', mode);
3134 AsmState as(sstr);
3135 as.ue = &ue;
3136 as.fe = fe;
3137 as.initStackDepth.adjust(as, incomingStackDepth);
3138 parse_function_body(as, 1);
3139 as.finishSection();
3140 if (as.maxUnnamed >= 0) {
3141 as.error("Unnamed locals are not allowed in inline assembly");
3144 if (!as.currentStackDepth) return AsmResult::Unreachable;
3146 // If we fall off the end of the inline assembly, we're expected to
3147 // leave a single value on the stack, or leave the stack unchanged.
3148 if (!as.currentStackDepth->baseValue) {
3149 as.error("Unknown stack offset on exit from inline assembly");
3151 auto curStackDepth = as.currentStackDepth->absoluteDepth();
3152 if (curStackDepth == incomingStackDepth + 1) {
3153 return AsmResult::ValuePushed;
3155 if (curStackDepth != incomingStackDepth) {
3156 as.error("Inline assembly expressions should leave the stack unchanged, "
3157 "or push exactly one cell onto the stack.");
3160 return AsmResult::NoResult;
3163 //////////////////////////////////////////////////////////////////////