Fix refcounting in arReturn() and stop leaking static strings.
[hiphop-php.git] / hphp / runtime / vm / as.cpp
blobeff0d5feb041bd9e913b28701507e6c2eb36a543
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * This module contains an assembler implementation for HHBC. It is
19 * probably fairly close to allowing you to access most of the
20 * metadata associated with hhvm's compiled unit format, although it's
21 * possible something has been overlooked.
23 * To use it, run hhvm with -v Eval.AllowHhas=true on a file with a
24 * ".hhas" extension. The syntax is probably easiest to understand by
25 * looking at some examples (or the semi-BNF markup around some of the
26 * parse functions here). For examples, see hphp/tests/vm/asm_*.
29 * Notes:
31 * - You can crash hhvm very easily with this.
33 * Using this module, you can emit pretty much any sort of not
34 * trivially-illegal bytecode stream, and many trivially-illegal
35 * ones as well. You can also easily create Units with illegal
36 * metadata. Generally this will crash the VM. In other cases
37 * (especially if you don't bother to DefCls your classes in your
38 * .main) you'll just get mysterious "class not defined" errors
39 * or weird behavior.
41 * - Whitespace is not normally significant, but newlines may not
42 * be in the middle of a list of opcode arguments. (After the
43 * newline, the next thing seen is expected to be either a
44 * mnemonic for the next opcode in the stream or some sort of
45 * directive.) However, newlines (and comments) may appear
46 * *inside* certain opcode arguments (e.g. string literals or
47 * vector immediates).
49 * Rationale: this is partially intended to make it trivial to
50 * catch wrong-number-of-arguments errors, although it probably
51 * could be done without this if you feel like changing it.
54 * Caveats:
56 * - It might be nice if you could refer to iterators by name
57 * instead of by index.
59 * - DefCls by name would be nice.
61 * - Line number information can't be propagated to the various Unit
62 * structures. (It might make sense to do this via something like
63 * a .line directive at some point.)
65 * - You can't currently create non-top functions or non-hoistable
66 * classes.
68 * - Missing support for static variables in a function/method.
70 * @author Jordan DeLong <delong.j@fb.com>
73 #include "hphp/runtime/vm/as.h"
75 #include <cstdio>
76 #include <iostream>
77 #include <algorithm>
78 #include <iterator>
79 #include <vector>
80 #include <boost/algorithm/string.hpp>
81 #include <boost/format.hpp>
82 #include <boost/lexical_cast.hpp>
83 #include <boost/scoped_ptr.hpp>
84 #include <boost/noncopyable.hpp>
85 #include <boost/bind.hpp>
87 #include "folly/String.h"
88 #include "folly/Range.h"
90 #include "hphp/util/md5.h"
92 #include "hphp/runtime/base/builtin-functions.h"
93 #include "hphp/runtime/base/repo-auth-type-codec.h"
94 #include "hphp/runtime/base/repo-auth-type.h"
95 #include "hphp/runtime/vm/as-shared.h"
96 #include "hphp/runtime/vm/func-emitter.h"
97 #include "hphp/runtime/vm/hhbc.h"
98 #include "hphp/runtime/vm/preclass-emitter.h"
99 #include "hphp/runtime/vm/unit.h"
100 #include "hphp/runtime/vm/unit-emitter.h"
101 #include "hphp/system/systemlib.h"
103 TRACE_SET_MOD(hhas);
105 namespace HPHP {
107 //////////////////////////////////////////////////////////////////////
109 namespace {
111 struct AsmState;
112 typedef void (*ParserFunc)(AsmState& as);
114 struct Error : std::runtime_error {
115 explicit Error(int where, const std::string& what)
116 : std::runtime_error(str(
117 boost::format("Assembler Error: line %1%: %2%") % where % what))
121 struct Input {
122 explicit Input(std::istream& in)
123 : m_in(in)
124 , m_lineNumber(1)
127 int peek() { return m_in.peek(); }
129 int getc() {
130 int ret = m_in.get();
131 if (ret == EOF) {
132 io_error_if_bad();
133 } else if (ret == '\n') {
134 ++m_lineNumber;
136 return ret;
139 void ungetc(char c) {
140 if (c == '\n') --m_lineNumber;
141 m_in.putback(c);
144 void expect(int c) {
145 if (getc() != c) {
146 error(str(boost::format("expected character `%1%'") % char(c)));
151 * Expect `c' after possible whitespace/comments. When convenient,
152 * preferable to doing skipWhitespace/expect manually to keep the
153 * line number in the error prior to the whitespace skipped.
155 void expectWs(int c) {
156 const int currentLine = m_lineNumber;
157 skipWhitespace();
158 if (getc() != c) {
159 throw Error(currentLine,
160 str(boost::format("expected character `%1%'") % char(c)));
164 int getLineNumber() const {
165 return m_lineNumber;
168 // Skips whitespace, then populates word with valid bareword
169 // characters. Returns true if we read any characters into word.
170 bool readword(std::string& word) {
171 word.clear();
172 skipWhitespace();
173 consumePred(is_bareword(), std::back_inserter(word));
174 return !word.empty();
177 // Try to consume a bareword. Skips whitespace. If we can't
178 // consume the specified word, returns false.
179 bool tryConsume(const std::string& what) {
180 std::string word;
181 if (!readword(word)) {
182 return false;
184 if (word != what) {
185 std::for_each(word.rbegin(), word.rend(),
186 boost::bind(&Input::ungetc, this, _1));
187 return false;
189 return true;
192 // C-style character escapes, no support for unicode escapes or
193 // whatnot.
194 template<class OutCont>
195 void escapeChar(OutCont& out) {
196 auto is_oct = [&] (int i) { return i >= '0' && i <= '7'; };
197 auto is_hex = [&] (int i) {
198 return (i >= '0' && i <= '9') ||
199 (i >= 'a' && i <= 'f') ||
200 (i >= 'A' && i <= 'F');
202 auto hex_val = [&] (int i) -> uint32_t {
203 assert(is_hex(i));
204 return i >= '0' && i <= '9' ? i - '0' :
205 i >= 'a' && i <= 'f' ? i - 'a' + 10 : i - 'A' + 10;
208 auto src = getc();
209 switch (src) {
210 case EOF: error("EOF in string literal");
211 case 'a': out.push_back('\a'); break;
212 case 'b': out.push_back('\b'); break;
213 case 'f': out.push_back('\f'); break;
214 case 'n': out.push_back('\n'); break;
215 case 'r': out.push_back('\r'); break;
216 case 't': out.push_back('\t'); break;
217 case 'v': out.push_back('\v'); break;
218 case '\'': out.push_back('\''); break;
219 case '\"': out.push_back('\"'); break;
220 case '\?': out.push_back('\?'); break;
221 case '\\': out.push_back('\\'); break;
222 case '\n': /* ignore */ break;
223 default:
224 if (is_oct(src)) {
225 auto val = int64_t{src} - '0';
226 for (auto i = int{0}; i < 3; ++i) {
227 src = getc();
228 if (!is_oct(src)) { ungetc(src); break; }
229 val *= 8;
230 val += src - '0';
232 if (val > std::numeric_limits<uint8_t>::max()) {
233 error("octal escape sequence overflowed");
235 out.push_back(static_cast<uint8_t>(val));
236 return;
239 if (src == 'x' || src == 'X') {
240 auto val = uint64_t{0};
241 if (!is_hex(peek())) error("\\x used without no following hex digits");
242 do {
243 src = getc();
244 val *= 0x10;
245 val += hex_val(src);
246 } while (is_hex(peek()));
247 if (val > std::numeric_limits<uint8_t>::max()) {
248 error("hex escape sequence overflowed");
250 out.push_back(static_cast<uint8_t>(val));
251 return;
254 error("unrecognized character escape");
258 // Reads a quoted string with typical escaping rules. Does not skip
259 // any whitespace. Returns true if we successfully read one, or
260 // false. EOF during the string throws.
261 bool readQuotedStr(std::string& str) {
262 str.clear();
263 if (peek() != '\"') {
264 return false;
266 getc();
268 int c;
269 while ((c = getc()) != EOF) {
270 switch (c) {
271 case '\"': return true;
272 case '\\': escapeChar(str); break;
273 default: str.push_back(c); break;
276 error("EOF in string literal");
277 not_reached();
278 return false;
282 * Reads a python-style longstring, or returns false if we don't
283 * have one. Does not skip any whitespace before looking for the
284 * string.
286 * Python longstrings start with \"\"\", and can contain any bytes
287 * other than \"\"\". A '\\' character introduces C-style escapes,
288 * but there's no need to escape single quote characters.
290 bool readLongString(std::vector<char>& buffer) {
291 if (peek() != '\"') return false;
292 getc();
293 if (peek() != '\"') { ungetc('\"'); return false; }
294 getc();
295 if (peek() != '\"') { ungetc('\"');
296 ungetc('\"'); return false; }
297 getc();
299 int c;
300 while ((c = getc()) != EOF) {
301 if (c == '\\') {
302 escapeChar(buffer);
303 continue;
305 if (c == '"') {
306 c = getc();
307 if (c != '"') {
308 buffer.push_back('"');
309 ungetc(c);
310 continue;
312 c = getc();
313 if (c != '"') {
314 buffer.push_back('"');
315 buffer.push_back('"');
316 ungetc(c);
317 continue;
319 return true;
322 buffer.push_back(c);
324 error("EOF in \"\"\"-string literal");
325 not_reached();
326 return false;
329 // Skips whitespace (including newlines and comments).
330 void skipWhitespace() {
331 for (;;) {
332 skipPred(boost::is_any_of(" \t\n"));
333 if (peek() == '#') {
334 skipPred(!boost::is_any_of("\n"));
335 expect('\n');
336 } else {
337 break;
342 // Skip spaces and tabs, but other whitespace (such as comments or
343 // newlines) stop the skip.
344 void skipSpaceTab() {
345 skipPred(boost::is_any_of(" \t"));
348 template<class Predicate>
349 void skipPred(Predicate pred) {
350 int c;
351 while (pred(c = peek())) { getc(); }
354 template<class Predicate, class OutputIterator>
355 void consumePred(Predicate pred, OutputIterator out) {
356 int c;
357 while (pred(c = peek())) { *out++ = getc(); }
360 private:
361 struct is_bareword {
362 bool operator()(int i) const {
363 return isalnum(i) || i == '_' || i == '.' || i == '$';
367 void error(const std::string& what) {
368 throw Error(getLineNumber(), what);
371 void io_error_if_bad() {
372 if (m_in.bad()) {
373 error("I/O error reading stream: " +
374 folly::errnoStr(errno).toStdString());
378 private:
379 std::istream& m_in;
380 int m_lineNumber;
383 struct StackDepth;
385 struct FPIReg {
386 Offset fpushOff;
387 StackDepth* stackDepth;
388 int fpOff;
392 * Tracks the depth of the stack in a given block of instructions.
394 * This structure is linked to a block of instructions (usually starting at a
395 * label), and tracks the current stack depth in this block. This tracking can
396 * take two forms:
397 * - Absolute depth: the depth of the stack is exactly known for this block
398 * - Relative depth: the depth of the stack is unknown for now. We keep track
399 * of an offset, relative to the depth of the stack at the first instruction
400 * of the block
402 struct StackDepth {
403 int currentOffset;
404 int maxOffset;
405 int minOffset;
406 int minOffsetLine;
407 folly::Optional<int> baseValue;
410 * During the parsing process, when a Jmp instruction is encountered, the
411 * StackDepth structure for this jump becomes linked to the StackDepth
412 * structure of the label (which is added to the listeners list).
414 * Once the absolute depth at the jump becomes known, its StackDepth
415 * instance calls the setBase method of the StackDepth instance of the label.
416 * The absolute depth at the label can then be inferred from the
417 * absolute depth at the jump.
419 std::vector<std::pair<StackDepth*, int> > listeners;
421 StackDepth()
422 : currentOffset(0)
423 , maxOffset(0)
424 , minOffset(0)
427 void adjust(AsmState& as, int delta);
428 void addListener(AsmState& as, StackDepth* target);
429 void setBase(AsmState& as, int stackDepth);
432 * Sets the baseValue such as the current stack depth matches the
433 * parameter.
435 * If the base value is already known, it may conflict with the
436 * parameter of this function. In this case, an error will be raised.
438 void setCurrentAbsolute(AsmState& as, int stackDepth);
441 struct Label {
442 Label() : bound(false) {}
444 bool bound;
445 Offset target;
446 StackDepth stackDepth;
449 * Each label source source has an Offset where the jmp should be
450 * patched up is, and an Offset from which the jump delta should be
451 * computed. (The second Offset is basically to the actual
452 * jump/switch/etc instruction, while the first points to the
453 * immediate.)
455 typedef std::vector<std::pair<Offset,Offset> > SourcesVec;
456 SourcesVec sources;
459 * List of a parameter ids that use this label for its DV
460 * initializer.
462 std::vector<Id> dvInits;
465 * List of EHEnt's that should have m_fault bound to the Offset of
466 * this label.
468 std::vector<size_t> ehFaults;
471 * Map from exception names to the list of EHEnt's that have a catch
472 * block jumping to this label for that name.
474 typedef std::map<std::string,std::vector<size_t> > CatchesMap;
475 CatchesMap ehCatches;
478 struct AsmState : private boost::noncopyable {
479 explicit AsmState(std::istream& in)
480 : in(in)
481 , emittedPseudoMain(false)
482 , fe(0)
483 , numItersSet(false)
484 , currentStackDepth(&initStackDepth)
485 , stackHighWater(0)
486 , fdescDepth(0)
487 , fdescHighWater(0)
489 currentStackDepth->setBase(*this, 0);
492 void error(const std::string& what) {
493 throw Error(in.getLineNumber(), what);
496 void adjustStack(int delta) {
497 if (currentStackDepth == nullptr) {
498 // Instruction is unreachable, nothing to do here!
499 return;
502 currentStackDepth->adjust(*this, delta);
505 void adjustStackHighwater(int depth) {
506 stackHighWater = std::max(stackHighWater, depth);
509 std::string displayStackDepth() {
510 std::ostringstream stack;
512 if (currentStackDepth == nullptr) {
513 stack << "/";
514 } else if (currentStackDepth->baseValue) {
515 stack << *currentStackDepth->baseValue +
516 currentStackDepth->currentOffset;
517 } else {
518 stack << "?" << currentStackDepth->currentOffset;
521 return stack.str();
524 void addLabelTarget(const std::string& name) {
525 Label& label = labelMap[name];
526 if (label.bound) {
527 error("Duplicate label " + name);
529 label.bound = true;
530 label.target = ue->bcPos();
532 StackDepth* newStack = &label.stackDepth;
534 if (currentStackDepth == nullptr) {
535 // Previous instruction was unreachable
536 currentStackDepth = newStack;
537 return;
540 // The stack depth at the label depends on the current depth
541 currentStackDepth->addListener(*this, newStack);
542 currentStackDepth = newStack;
545 void addLabelJump(const std::string& name, Offset immOff, Offset opcodeOff) {
546 Label& label = labelMap[name];
548 if (currentStackDepth == nullptr) {
549 // Jump is unreachable, nothing to do here
550 return;
553 // The stack depth at the target must be the same as the current depth
554 // (whatever this may be: it may still be unknown)
555 currentStackDepth->addListener(*this, &label.stackDepth);
557 label.sources.push_back(std::make_pair(immOff, opcodeOff));
560 void enforceStackDepth(int stackDepth) {
561 if (currentStackDepth == nullptr) {
562 // Current instruction is unreachable, thus the constraint
563 // on the stack depth will never be violated
564 return;
567 currentStackDepth->setCurrentAbsolute(*this, stackDepth);
570 void enterUnreachableRegion() {
571 currentStackDepth = nullptr;
574 void addLabelDVInit(const std::string& name, int paramId) {
575 labelMap[name].dvInits.push_back(paramId);
577 // Stack depth should be 0 when entering a DV init
578 labelMap[name].stackDepth.setBase(*this, 0);
581 void addLabelEHFault(const std::string& name, size_t ehIdx) {
582 labelMap[name].ehFaults.push_back(ehIdx);
584 // Stack depth should be 0 when entering a fault funclet
585 labelMap[name].stackDepth.setBase(*this, 0);
588 void addLabelEHCatch(const std::string& what,
589 const std::string& label,
590 size_t ehIdx) {
591 labelMap[label].ehCatches[what].push_back(ehIdx);
593 // Stack depth should be 0 when entering a catch block
594 labelMap[label].stackDepth.setBase(*this, 0);
597 void beginFpi(Offset fpushOff) {
598 if (currentStackDepth == nullptr) {
599 error("beginFpi called from unreachable instruction");
602 fpiRegs.push_back(FPIReg());
603 FPIReg& fpi = fpiRegs.back();
604 fpi.fpushOff = fpushOff;
605 fpi.stackDepth = currentStackDepth;
606 fpi.fpOff = currentStackDepth->currentOffset;
607 fdescDepth += kNumActRecCells;
608 fdescHighWater = std::max(fdescDepth, fdescHighWater);
611 void endFpi() {
612 assert(!fpiRegs.empty());
614 FPIEnt& ent = fe->addFPIEnt();
615 FPIReg& reg = fpiRegs.back();
616 ent.m_fpushOff = reg.fpushOff;
617 ent.m_fcallOff = ue->bcPos();
618 ent.m_fpOff = reg.fpOff;
619 if (reg.stackDepth->baseValue) {
620 ent.m_fpOff += *reg.stackDepth->baseValue;
621 } else {
622 // base value still unknown, this will need to be updated later
623 fpiToUpdate.push_back(std::make_pair(&ent, reg.stackDepth));
626 fpiRegs.pop_back();
627 fdescDepth -= kNumActRecCells;
630 void finishClass() {
631 assert(!fe);
632 pce = 0;
635 void patchLabelOffsets(const Label& label) {
636 for (Label::SourcesVec::const_iterator it = label.sources.begin();
637 it != label.sources.end();
638 ++it) {
639 ue->emitInt32(label.target - it->second, it->first);
642 for (std::vector<Id>::const_iterator it = label.dvInits.begin();
643 it != label.dvInits.end();
644 ++it) {
645 fe->params[*it].funcletOff = label.target;
648 for (std::vector<size_t>::const_iterator it = label.ehFaults.begin();
649 it != label.ehFaults.end();
650 ++it) {
651 fe->ehtab[*it].m_fault = label.target;
654 for (Label::CatchesMap::const_iterator it = label.ehCatches.begin();
655 it != label.ehCatches.end();
656 ++it) {
657 Id exId = ue->mergeLitstr(makeStaticString(it->first));
658 for (std::vector<size_t>::const_iterator idx_it = it->second.begin();
659 idx_it != it->second.end();
660 ++idx_it) {
661 fe->ehtab[*idx_it].m_catches.push_back(
662 std::make_pair(exId, label.target));
667 void finishFunction() {
668 for (LabelMap::const_iterator it = labelMap.begin();
669 it != labelMap.end();
670 ++it) {
671 if (!it->second.bound) {
672 error("Undefined label " + it->first);
674 if (it->second.target >= ue->bcPos()) {
675 error("label " + it->first + " falls of the end of the function");
678 patchLabelOffsets(it->second);
681 // Patch the FPI structures
682 for (auto& kv : fpiToUpdate) {
683 if (!kv.second->baseValue) {
684 error("created a FPI from an unreachable instruction");
687 kv.first->m_fpOff += *kv.second->baseValue;
690 // Stack depth should be 0 at the end of a function body
691 enforceStackDepth(0);
693 fe->maxStackCells = fe->numLocals() +
694 fe->numIterators() * kNumIterCells +
695 stackHighWater +
696 fdescHighWater; // in units of cells already
697 fe->finish(ue->bcPos(), false);
698 ue->recordFunction(fe);
700 fe = 0;
701 fpiRegs.clear();
702 labelMap.clear();
703 numItersSet = false;
704 initStackDepth = StackDepth();
705 initStackDepth.setBase(*this, 0);
706 currentStackDepth = &initStackDepth;
707 stackHighWater = 0;
708 fdescDepth = 0;
709 fdescHighWater = 0;
710 fpiToUpdate.clear();
713 int getLocalId(const std::string& name) {
714 if (name[0] != '$') {
715 error("local variables must be prefixed with $");
718 const StringData* sd = makeStaticString(name.c_str() + 1);
719 fe->allocVarId(sd);
720 return fe->lookupVarId(sd);
723 int getIterId(int32_t id) {
724 if (id >= fe->numIterators()) {
725 error("iterator id exceeded number of iterators in the function");
727 return id;
730 UnitEmitter* ue;
731 Input in;
732 bool emittedPseudoMain;
734 typedef std::map<std::string,ArrayData*> ADataMap;
735 ADataMap adataMap;
737 // When inside a class, this state is active.
738 PreClassEmitter* pce;
740 // When we're doing a function or method body, this state is active.
741 FuncEmitter* fe;
742 std::vector<FPIReg> fpiRegs;
743 typedef std::map<std::string,Label> LabelMap;
744 std::map<std::string,Label> labelMap;
745 bool numItersSet;
746 StackDepth initStackDepth;
747 StackDepth* currentStackDepth;
748 int stackHighWater;
749 int fdescDepth;
750 int fdescHighWater;
751 std::vector<std::pair<FPIEnt*, StackDepth*> > fpiToUpdate;
755 void StackDepth::adjust(AsmState& as, int delta) {
756 currentOffset += delta;
758 if (!baseValue) {
759 // The absolute stack depth is unknown. We only store the min
760 // and max offsets, and we will take a decision later, when the
761 // base value will be known.
762 maxOffset = std::max(currentOffset, maxOffset);
763 if (currentOffset < minOffset) {
764 minOffsetLine = as.in.getLineNumber();
765 minOffset = currentOffset;
767 return;
770 if (*baseValue + currentOffset < 0) {
771 as.error("opcode sequence caused stack depth to go negative");
774 as.adjustStackHighwater(*baseValue + currentOffset);
777 void StackDepth::addListener(AsmState& as, StackDepth* target) {
778 if (baseValue) {
779 target->setBase(as, *baseValue + currentOffset);
780 } else {
781 listeners.push_back(std::make_pair(target, currentOffset));
785 void StackDepth::setBase(AsmState& as, int stackDepth) {
786 if (baseValue && stackDepth != *baseValue) {
787 as.error("stack depth do not match");
790 baseValue = stackDepth;
792 // We finally know the base value. Update AsmState accordingly.
793 if (*baseValue + minOffset < 0) {
794 throw Error(
795 minOffsetLine,
796 "opcode sequence caused stack depth to go negative"
799 as.adjustStackHighwater(*baseValue + maxOffset);
801 // Update the listeners
802 for (auto& kv : listeners) {
803 kv.first->setBase(as, *baseValue + kv.second);
806 // We won't need them anymore
807 listeners.clear();
810 void StackDepth::setCurrentAbsolute(AsmState& as, int stackDepth) {
811 setBase(as, stackDepth - currentOffset);
814 //////////////////////////////////////////////////////////////////////
817 * Opcode arguments must be on the same line as the opcode itself,
818 * although certain argument types may contain internal newlines (see,
819 * for example, read_immvector, read_jmpvector, or string literals).
821 template<class Target> Target read_opcode_arg(AsmState& as) {
822 as.in.skipSpaceTab();
823 std::string strVal;
824 as.in.consumePred(!boost::is_any_of(" \t\n#;>"),
825 std::back_inserter(strVal));
826 if (strVal.empty()) {
827 as.error("expected opcode or directive argument");
829 try {
830 return boost::lexical_cast<Target>(strVal);
831 } catch (boost::bad_lexical_cast&) {
832 as.error("couldn't convert input argument (" + strVal + ") to "
833 "proper type");
834 not_reached();
838 template<class SubOpType>
839 uint8_t read_subop(AsmState& as) {
840 auto const str = read_opcode_arg<std::string>(as);
841 if (auto const ty = nameToSubop<SubOpType>(str.c_str())) {
842 return static_cast<uint8_t>(*ty);
844 as.error("unknown subop name");
845 not_reached();
848 const StringData* read_litstr(AsmState& as) {
849 as.in.skipSpaceTab();
850 std::string strVal;
851 if (!as.in.readQuotedStr(strVal)) {
852 as.error("expected quoted string literal");
854 return makeStaticString(strVal);
857 std::vector<std::string> read_strvector(AsmState& as) {
858 std::vector<std::string> ret;
859 as.in.skipSpaceTab();
860 as.in.expect('<');
861 std::string name;
862 while (as.in.skipSpaceTab(), as.in.readQuotedStr(name)) {
863 ret.push_back(name);
865 as.in.skipSpaceTab();
866 as.in.expectWs('>');
867 return ret;
870 ArrayData* read_litarray(AsmState& as) {
871 as.in.skipSpaceTab();
872 if (as.in.getc() != '@') {
873 as.error("expecting an `@foo' array literal reference");
875 std::string name;
876 if (!as.in.readword(name)) {
877 as.error("expected name of .adata literal");
880 AsmState::ADataMap::const_iterator it = as.adataMap.find(name);
881 if (it == as.adataMap.end()) {
882 as.error("unknown array data literal name " + name);
884 return it->second;
887 void read_immvector_immediate(AsmState& as, std::vector<unsigned char>& ret,
888 MemberCode mcode = InvalidMemberCode) {
889 if (memberCodeImmIsLoc(mcode) || mcode == InvalidMemberCode) {
890 if (as.in.getc() != '$') {
891 as.error("*L member code in vector immediate must be followed by "
892 "a local variable name");
894 std::string name;
895 if (!as.in.readword(name)) {
896 as.error("couldn't read name for local variable in vector immediate");
898 encodeIvaToVector(ret, as.getLocalId("$" + name));
899 } else if (memberCodeImmIsString(mcode)) {
900 encodeToVector<int32_t>(ret, as.ue->mergeLitstr(read_litstr(as)));
901 } else if (memberCodeImmIsInt(mcode)) {
902 encodeToVector<int64_t>(ret, read_opcode_arg<int64_t>(as));
903 } else {
904 as.error(std::string("don't understand immediate for member code ") +
905 memberCodeString(mcode));
909 std::vector<unsigned char> read_immvector(AsmState& as, int& stackCount) {
910 std::vector<unsigned char> ret;
912 as.in.skipSpaceTab();
913 as.in.expect('<');
915 std::string word;
916 if (!as.in.readword(word)) {
917 as.error("expected location code in immediate vector");
920 LocationCode lcode = parseLocationCode(word.c_str());
921 if (lcode == InvalidLocationCode) {
922 as.error("expected location code, saw `" + word + "'");
924 ret.push_back(uint8_t(lcode));
925 if (word[word.size() - 1] == 'L') {
926 if (as.in.getc() != ':') {
927 as.error("expected `:' after location code `" + word + "'");
930 for (int i = 0; i < numLocationCodeImms(lcode); ++i) {
931 read_immvector_immediate(as, ret);
933 stackCount = numLocationCodeStackVals(lcode);
935 // Read all the member entries.
936 for (;;) {
937 as.in.skipWhitespace();
938 if (as.in.peek() == '>') { as.in.getc(); break; }
940 if (!as.in.readword(word)) {
941 as.error("expected member code in immediate vector");
943 MemberCode mcode = parseMemberCode(word.c_str());
944 if (mcode == InvalidMemberCode) {
945 as.error("unrecognized member code `" + word + "'");
947 ret.push_back(uint8_t(mcode));
948 if (memberCodeHasImm(mcode)) {
949 if (as.in.getc() != ':') {
950 as.error("expected `:' after member code `" + word + "'");
952 read_immvector_immediate(as, ret, mcode);
953 } else if (mcode != MW) {
954 ++stackCount;
958 return ret;
961 RepoAuthType read_repo_auth_type(AsmState& as) {
962 auto const str = read_opcode_arg<std::string>(as);
963 folly::StringPiece parse(str);
966 * Note: no support for reading array types. (The assembler only
967 * emits a single unit, so it can't really be involved in creating a
968 * ArrayTypeTable.)
971 using T = RepoAuthType::Tag;
973 #define X(what, tag) \
974 if (parse.startsWith(what)) return RepoAuthType{tag}
976 #define Y(what, tag) \
977 if (parse.startsWith(what)) { \
978 parse.removePrefix(what); \
979 auto const cls = makeStaticString(parse.data()); \
980 as.ue->mergeLitstr(cls); \
981 return RepoAuthType{tag, cls}; \
984 Y("Obj=", T::ExactObj);
985 Y("?Obj=", T::OptExactObj);
986 Y("?Obj<=", T::OptSubObj);
987 Y("Obj<=", T::SubObj);
989 X("Arr", T::Arr);
990 X("?Arr", T::OptArr);
991 X("Bool", T::Bool);
992 X("?Bool", T::OptBool);
993 X("Cell", T::Cell);
994 X("Dbl", T::Dbl);
995 X("?Dbl", T::OptDbl);
996 X("Gen", T::Gen);
997 X("InitCell", T::InitCell);
998 X("InitGen", T::InitGen);
999 X("InitNull", T::InitNull);
1000 X("InitUnc", T::InitUnc);
1001 X("Int", T::Int);
1002 X("?Int", T::OptInt);
1003 X("Null", T::Null);
1004 X("Obj", T::Obj);
1005 X("?Obj", T::OptObj);
1006 X("Ref", T::Ref);
1007 X("?Res", T::OptRes);
1008 X("Res", T::Res);
1009 X("?SArr", T::OptSArr);
1010 X("SArr", T::SArr);
1011 X("?SStr", T::OptSStr);
1012 X("SStr", T::SStr);
1013 X("?Str", T::OptStr);
1014 X("Str", T::Str);
1015 X("Unc", T::Unc);
1016 X("Uninit", T::Uninit);
1018 #undef X
1019 #undef Y
1021 // Make sure the above parsing code is revisited when new tags are
1022 // added (we'll get a warning for a missing case label):
1023 if (debug) switch (RepoAuthType{}.tag()) {
1024 case T::Uninit:
1025 case T::InitNull:
1026 case T::Null:
1027 case T::Int:
1028 case T::OptInt:
1029 case T::Dbl:
1030 case T::OptDbl:
1031 case T::Res:
1032 case T::OptRes:
1033 case T::Bool:
1034 case T::OptBool:
1035 case T::SStr:
1036 case T::OptSStr:
1037 case T::Str:
1038 case T::OptStr:
1039 case T::SArr:
1040 case T::OptSArr:
1041 case T::Arr:
1042 case T::OptArr:
1043 case T::Obj:
1044 case T::OptObj:
1045 case T::InitUnc:
1046 case T::Unc:
1047 case T::InitCell:
1048 case T::Cell:
1049 case T::Ref:
1050 case T::InitGen:
1051 case T::Gen:
1052 case T::ExactObj:
1053 case T::SubObj:
1054 case T::OptExactObj:
1055 case T::OptSubObj:
1056 break;
1059 as.error("unrecognized RepoAuthType format");
1060 not_reached();
1063 // Read in a vector of iterators the format for this vector is:
1064 // <(TYPE) ID, (TYPE) ID, ...>
1065 // Where TYPE := Iter | MIter | CIter
1066 // and ID := Integer
1067 std::vector<uint32_t> read_itervec(AsmState& as) {
1068 std::vector<uint32_t> ret;
1070 as.in.skipSpaceTab();
1071 as.in.expect('<');
1073 std::string word;
1075 for (;;) {
1076 as.in.expectWs('(');
1077 if (!as.in.readword(word)) as.error("Was expecting Iterator type.");
1078 if (!word.compare("Iter")) ret.push_back(KindOfIter);
1079 else if (!word.compare("MIter")) ret.push_back(KindOfMIter);
1080 else if (!word.compare("CIter")) ret.push_back(KindOfCIter);
1081 else as.error("Unknown iterator type `" + word + "'");
1082 as.in.expectWs(')');
1084 as.in.skipSpaceTab();
1086 if (!as.in.readword(word)) as.error("Was expecting iterator id.");
1087 uint32_t iterId = folly::to<uint32_t>(word);
1088 ret.push_back(iterId);
1090 if (!isdigit(word.back())) {
1091 if (word.back() == '>') break;
1092 if (word.back() != ',') as.error("Was expecting `,'.");
1093 } else {
1094 as.in.skipSpaceTab();
1095 if (as.in.peek() == '>') { as.in.getc(); break; }
1096 as.in.expect(',');
1100 return ret;
1103 // Jump tables are lists of labels.
1104 std::vector<std::string> read_jmpvector(AsmState& as) {
1105 std::vector<std::string> ret;
1107 as.in.skipSpaceTab();
1108 as.in.expect('<');
1110 std::string word;
1111 while (as.in.readword(word)) {
1112 ret.push_back(word);
1114 as.in.expectWs('>');
1116 return ret;
1119 typedef std::vector<std::pair<Id, std::string>> SSwitchJmpVector;
1121 SSwitchJmpVector read_sswitch_jmpvector(AsmState& as) {
1122 SSwitchJmpVector ret;
1124 as.in.skipSpaceTab();
1125 as.in.expect('<');
1127 std::string defLabel;
1128 do {
1129 std::string caseStr;
1130 if (!as.in.readQuotedStr(caseStr)) {
1131 as.error("expected quoted string literal");
1134 as.in.expect(':');
1136 as.in.readword(defLabel);
1138 ret.push_back(std::make_pair(
1139 as.ue->mergeLitstr(makeStaticString(caseStr)),
1140 defLabel
1143 as.in.skipWhitespace();
1144 } while (as.in.peek() != '-');
1146 as.in.expect('-');
1147 as.in.expect(':');
1148 as.in.readword(defLabel);
1150 // -1 stand for default case.
1151 ret.push_back(std::make_pair(-1, defLabel));
1153 as.in.expect('>');
1155 return ret;
1158 //////////////////////////////////////////////////////////////////////
1160 typedef std::map<std::string,ParserFunc> OpcodeParserMap;
1161 OpcodeParserMap opcode_parsers;
1163 #define IMM_NA
1164 #define IMM_ONE(t) IMM_##t
1165 #define IMM_TWO(t1, t2) IMM_##t1; IMM_##t2
1166 #define IMM_THREE(t1, t2, t3) IMM_##t1; IMM_##t2; IMM_##t3
1167 #define IMM_FOUR(t1, t2, t3, t4) IMM_##t1; IMM_##t2; IMM_##t3; IMM_##t4
1169 // FCall and NewPackedArray need to know the the first imm do POP_*MANY.
1170 #define IMM_IVA do { \
1171 int imm = read_opcode_arg<int64_t>(as); \
1172 as.ue->emitIVA(imm); \
1173 if (immIVA < 0) immIVA = imm; \
1174 } while (0)
1176 #define IMM_VSA \
1177 std::vector<std::string> vecImm = read_strvector(as); \
1178 auto const vecImmStackValues = vecImm.size(); \
1179 as.ue->emitInt32(vecImmStackValues); \
1180 for (size_t i = 0; i < vecImmStackValues; ++i) { \
1181 as.ue->emitInt32(as.ue->mergeLitstr(String(vecImm[i]).get())); \
1184 #define IMM_SA as.ue->emitInt32(as.ue->mergeLitstr(read_litstr(as)))
1185 #define IMM_RATA encodeRAT(*as.ue, read_repo_auth_type(as))
1186 #define IMM_I64A as.ue->emitInt64(read_opcode_arg<int64_t>(as))
1187 #define IMM_DA as.ue->emitDouble(read_opcode_arg<double>(as))
1188 #define IMM_LA as.ue->emitIVA(as.getLocalId( \
1189 read_opcode_arg<std::string>(as)))
1190 #define IMM_IA as.ue->emitIVA(as.getIterId( \
1191 read_opcode_arg<int32_t>(as)))
1192 #define IMM_OA(ty) as.ue->emitByte(read_subop<ty>(as));
1193 #define IMM_AA as.ue->emitInt32(as.ue->mergeArray(read_litarray(as)))
1196 * There can currently be no more than one immvector per instruction,
1197 * and we need access to the size of the immediate vector for
1198 * NUM_POP_*, so the member vector guy exposes a vecImmStackValues
1199 * integer.
1201 #define IMM_MA \
1202 int vecImmStackValues = 0; \
1203 auto vecImm = read_immvector(as, vecImmStackValues); \
1204 as.ue->emitInt32(vecImm.size()); \
1205 as.ue->emitInt32(vecImmStackValues); \
1206 for (size_t i = 0; i < vecImm.size(); ++i) { \
1207 as.ue->emitByte(vecImm[i]); \
1210 #define IMM_ILA do { \
1211 std::vector<uint32_t> vecImm = read_itervec(as); \
1212 as.ue->emitInt32(vecImm.size() / 2); \
1213 for (auto& i : vecImm) { \
1214 as.ue->emitInt32(i); \
1216 } while (0)
1218 #define IMM_BLA do { \
1219 std::vector<std::string> vecImm = read_jmpvector(as); \
1220 as.ue->emitInt32(vecImm.size()); \
1221 for (size_t i = 0; i < vecImm.size(); ++i) { \
1222 labelJumps.push_back( \
1223 std::make_pair(vecImm[i], as.ue->bcPos())); \
1224 as.ue->emitInt32(0); /* to be patched */ \
1226 } while (0)
1228 #define IMM_SLA do { \
1229 SSwitchJmpVector vecImm = read_sswitch_jmpvector(as); \
1230 as.ue->emitInt32(vecImm.size()); \
1231 for (auto const& pair : vecImm) { \
1232 as.ue->emitInt32(pair.first); \
1233 labelJumps.push_back( \
1234 std::make_pair(pair.second, as.ue->bcPos())); \
1235 as.ue->emitInt32(0); /* to be patched */ \
1237 } while(0)
1239 #define IMM_BA do { \
1240 labelJumps.push_back(std::make_pair( \
1241 read_opcode_arg<std::string>(as), \
1242 as.ue->bcPos() \
1243 )); \
1244 as.ue->emitInt32(0); \
1245 } while (0)
1247 #define NUM_PUSH_NOV 0
1248 #define NUM_PUSH_ONE(a) 1
1249 #define NUM_PUSH_TWO(a,b) 2
1250 #define NUM_PUSH_THREE(a,b,c) 3
1251 #define NUM_PUSH_INS_1(a) 1
1252 #define NUM_PUSH_INS_2(a) 1
1253 #define NUM_POP_NOV 0
1254 #define NUM_POP_ONE(a) 1
1255 #define NUM_POP_TWO(a,b) 2
1256 #define NUM_POP_THREE(a,b,c) 3
1257 #define NUM_POP_MMANY vecImmStackValues
1258 #define NUM_POP_V_MMANY (1 + vecImmStackValues)
1259 #define NUM_POP_R_MMANY (1 + vecImmStackValues)
1260 #define NUM_POP_C_MMANY (1 + vecImmStackValues)
1261 #define NUM_POP_FMANY immIVA /* number of arguments */
1262 #define NUM_POP_CVMANY immIVA /* number of arguments */
1263 #define NUM_POP_CVUMANY immIVA /* number of arguments */
1264 #define NUM_POP_CMANY immIVA /* number of arguments */
1265 #define NUM_POP_SMANY vecImmStackValues
1267 #define O(name, imm, pop, push, flags) \
1268 void parse_opcode_##name(AsmState& as) { \
1269 UNUSED int64_t immIVA = -1; \
1270 UNUSED auto const thisOpcode = Op::name; \
1271 UNUSED const Offset curOpcodeOff = as.ue->bcPos(); \
1272 std::vector<std::pair<std::string, Offset> > labelJumps; \
1274 TRACE( \
1275 4, \
1276 "%d\t[%s] %s\n", \
1277 as.in.getLineNumber(), \
1278 as.displayStackDepth().c_str(), \
1279 #name \
1280 ); \
1282 if (isFCallStar(Op##name)) { \
1283 as.endFpi(); \
1286 as.ue->emitOp(Op##name); \
1288 IMM_##imm; \
1290 int stackDelta = NUM_PUSH_##push - NUM_POP_##pop; \
1291 as.adjustStack(stackDelta); \
1293 if (isFPush(Op##name)) { \
1294 as.beginFpi(curOpcodeOff); \
1297 for (auto& kv : labelJumps) { \
1298 as.addLabelJump(kv.first, kv.second, curOpcodeOff); \
1301 /* Stack depth should be 0 after RetC or RetV. */ \
1302 if (thisOpcode == OpRetC || thisOpcode == OpRetV) { \
1303 as.enforceStackDepth(0); \
1306 /* Stack depth should be 1 after resume from suspend. */ \
1307 if (thisOpcode == OpCreateCont || thisOpcode == OpAwait || \
1308 thisOpcode == OpYield || thisOpcode == OpYieldK) { \
1309 as.enforceStackDepth(1); \
1312 if (instrFlags(thisOpcode) & InstrFlags::TF) { \
1313 as.enterUnreachableRegion(); \
1317 OPCODES
1319 #undef O
1321 #undef IMM_I64A
1322 #undef IMM_SA
1323 #undef IMM_RATA
1324 #undef IMM_DA
1325 #undef IMM_IVA
1326 #undef IMM_LA
1327 #undef IMM_BA
1328 #undef IMM_BLA
1329 #undef IMM_SLA
1330 #undef IMM_OA
1331 #undef IMM_MA
1332 #undef IMM_AA
1333 #undef IMM_VSA
1335 #undef NUM_PUSH_NOV
1336 #undef NUM_PUSH_ONE
1337 #undef NUM_PUSH_TWO
1338 #undef NUM_PUSH_THREE
1339 #undef NUM_PUSH_POS_N
1340 #undef NUM_PUSH_INS_1
1341 #undef NUM_POP_NOV
1342 #undef NUM_POP_ONE
1343 #undef NUM_POP_TWO
1344 #undef NUM_POP_THREE
1345 #undef NUM_POP_POS_N
1346 #undef NUM_POP_MMANY
1347 #undef NUM_POP_V_MMANY
1348 #undef NUM_POP_R_MMANY
1349 #undef NUM_POP_C_MMANY
1350 #undef NUM_POP_FMANY
1351 #undef NUM_POP_CVMANY
1352 #undef NUM_POP_CVUMANY
1353 #undef NUM_POP_CMANY
1354 #undef NUM_POP_SMANY
1356 void initialize_opcode_map() {
1357 #define O(name, imm, pop, push, flags) \
1358 opcode_parsers[#name] = parse_opcode_##name;
1359 OPCODES
1360 #undef O
1363 struct Initializer {
1364 Initializer() { initialize_opcode_map(); }
1365 } initializer;
1367 //////////////////////////////////////////////////////////////////////
1370 * long-string-literal: <string>
1372 * `long-string-literal' is a python-style longstring. See
1373 * readLongString for more details.
1375 String parse_long_string(AsmState& as) {
1376 as.in.skipWhitespace();
1378 std::vector<char> buffer;
1379 if (!as.in.readLongString(buffer)) {
1380 as.error("expected \"\"\"-string of serialized php data");
1382 if (buffer.empty()) {
1383 as.error("empty php serialized data is not a valid php object");
1386 // String wants a null, and dereferences one past the size we give
1387 // it.
1388 buffer.push_back('\0');
1389 return String(&buffer[0], buffer.size() - 1, CopyString);
1393 * php-serialized : long-string-literal
1396 * `long-string-literal' is a python-style longstring. See
1397 * readLongString for more details.
1399 * Returns a Variant representing the serialized data. It's up to the
1400 * caller to make sure it is a legal literal.
1402 Variant parse_php_serialized(AsmState& as) {
1403 return unserialize_from_string(parse_long_string(as));
1407 * directive-numiters : integer ';'
1410 void parse_numiters(AsmState& as) {
1411 if (as.numItersSet) {
1412 as.error("only one .numiters directive may appear in a given function");
1414 int32_t count = read_opcode_arg<int32_t>(as);
1415 as.numItersSet = true;
1416 as.fe->setNumIterators(count);
1417 as.in.expectWs(';');
1420 void parse_function_body(AsmState&, int nestLevel = 0);
1423 * directive-fault : identifier integer? '{' function-body
1426 void parse_fault(AsmState& as, int nestLevel) {
1427 const Offset start = as.ue->bcPos();
1429 std::string label;
1430 if (!as.in.readword(label)) {
1431 as.error("expected label name after .try_fault");
1433 int iterId = -1;
1434 as.in.skipWhitespace();
1435 if (as.in.peek() != '{') {
1436 iterId = read_opcode_arg<int32_t>(as);
1438 as.in.expectWs('{');
1439 parse_function_body(as, nestLevel + 1);
1441 EHEnt& eh = as.fe->addEHEnt();
1442 eh.m_type = EHEnt::Type::Fault;
1443 eh.m_base = start;
1444 eh.m_past = as.ue->bcPos();
1445 eh.m_iterId = iterId;
1447 as.addLabelEHFault(label, as.fe->ehtab.size() - 1);
1451 * directive-catch : catch-spec+ '{' function-body
1454 * catch-spec : '(' identifier identifier ')'
1457 void parse_catch(AsmState& as, int nestLevel) {
1458 const Offset start = as.ue->bcPos();
1460 std::vector<std::pair<std::string,std::string> > catches;
1461 size_t numCatches = 0;
1462 as.in.skipWhitespace();
1463 for (; as.in.peek() == '('; ++numCatches) {
1464 as.in.getc();
1466 std::string except, label;
1467 if (!as.in.readword(except) || !as.in.readword(label)) {
1468 as.error("expected (ExceptionType label) after .try_catch");
1471 as.in.expectWs(')');
1473 catches.push_back(std::make_pair(except, label));
1474 as.in.skipWhitespace();
1476 if (catches.empty()) {
1477 as.error("expected at least one (ExceptionType label) pair "
1478 "after .try_catch");
1481 as.in.expect('{');
1482 parse_function_body(as, nestLevel + 1);
1484 EHEnt& eh = as.fe->addEHEnt();
1485 eh.m_type = EHEnt::Type::Catch;
1486 eh.m_base = start;
1487 eh.m_past = as.ue->bcPos();
1488 eh.m_iterId = -1;
1490 for (size_t i = 0; i < catches.size(); ++i) {
1491 as.addLabelEHCatch(catches[i].first,
1492 catches[i].second,
1493 as.fe->ehtab.size() - 1);
1498 * function-body : fbody-line* '}'
1501 * fbody-line : ".numiters" directive-numiters
1502 * | ".try_fault" directive-fault
1503 * | ".try_catch" directive-catch
1504 * | label-name
1505 * | opcode-line
1508 * label-name : identifier ':'
1511 * opcode-line : opcode-mnemonic <junk that depends on opcode> '\n'
1514 void parse_function_body(AsmState& as, int nestLevel /* = 0 */) {
1515 std::string word;
1516 for (;;) {
1517 as.in.skipWhitespace();
1518 if (as.in.peek() == '}') {
1519 as.in.getc();
1520 if (!nestLevel) {
1521 as.finishFunction();
1523 return;
1526 if (!as.in.readword(word)) {
1527 as.error("unexpected directive or opcode line in function body");
1529 if (word[0] == '.') {
1530 if (word == ".numiters") { parse_numiters(as); continue; }
1531 if (word == ".try_fault") { parse_fault(as, nestLevel); continue; }
1532 if (word == ".try_catch") { parse_catch(as, nestLevel); continue; }
1533 as.error("unrecognized directive `" + word + "' in function");
1535 if (as.in.peek() == ':') {
1536 as.in.getc();
1537 as.addLabelTarget(word);
1538 continue;
1541 // Ok, it better be an opcode now.
1542 OpcodeParserMap::const_iterator it = opcode_parsers.find(word);
1543 if (it == opcode_parsers.end()) {
1544 as.error("unrecognized opcode `" + word + "'");
1546 it->second(as);
1548 as.in.skipSpaceTab();
1549 if (as.in.peek() != '\n' && as.in.peek() != '#' && as.in.peek() != EOF) {
1550 as.error("too many arguments for opcode `" + word + "'");
1556 * attribute-list : empty
1557 * | '[' attribute-name* ']'
1560 * The `attribute-name' rule is context-sensitive; see as-shared.cpp.
1562 Attr parse_attribute_list(AsmState& as, AttrContext ctx) {
1563 as.in.skipWhitespace();
1564 int ret = AttrNone;
1565 if (ctx == AttrContext::Class || ctx == AttrContext::Func) {
1566 if (!SystemLib::s_inited) {
1567 ret |= AttrUnique | AttrPersistent | AttrBuiltin;
1570 if (as.in.peek() != '[') return Attr(ret);
1571 as.in.getc();
1573 std::string word;
1574 for (;;) {
1575 as.in.skipWhitespace();
1576 if (as.in.peek() == ']') break;
1577 if (!as.in.readword(word)) break;
1579 auto const abit = string_to_attr(ctx, word);
1580 if (abit) {
1581 ret |= *abit;
1582 continue;
1585 as.error("unrecognized attribute `" + word + "' in this context");
1587 as.in.expect(']');
1588 return Attr(ret);
1592 * parameter-list : '(' param-name-list ')'
1595 * param-name-list : empty
1596 * | param-name ',' param-name-list
1599 * param-name : '$' identifier dv-initializer
1600 * | '&' '$' identifier dv-initializer
1603 * dv-initializer : empty
1604 * | '=' identifier arg-default
1607 * arg-default : empty
1608 * | '(' long-string-literal ')'
1611 void parse_parameter_list(AsmState& as) {
1612 as.in.skipWhitespace();
1613 if (as.in.peek() != '(') return;
1614 as.in.getc();
1616 // Once we see one dv-initializer, every parameter after that must
1617 // have a dv-initializer.
1618 bool inDVInits = false;
1620 for (;;) {
1621 FuncEmitter::ParamInfo param;
1623 as.in.skipWhitespace();
1624 int ch = as.in.getc();
1625 if (ch == ')') break; // allow empty param lists
1626 if (ch == '.') {
1627 if (as.in.getc() != '.' ||
1628 as.in.getc() != '.') {
1629 as.error("expecting '...'");
1631 as.in.expectWs(')');
1632 as.fe->attrs |= AttrMayUseVV;
1633 break;
1635 if (ch == '&') {
1636 param.byRef = true;
1637 ch = as.in.getc();
1639 if (ch != '$') {
1640 as.error("function parameters must have a $ prefix");
1642 std::string name;
1643 if (!as.in.readword(name)) {
1644 as.error("expected parameter name after $");
1647 as.in.skipWhitespace();
1648 ch = as.in.getc();
1649 if (ch == '=') {
1650 inDVInits = true;
1652 std::string label;
1653 if (!as.in.readword(label)) {
1654 as.error("expected label name for dv-initializer");
1656 as.addLabelDVInit(label, as.fe->params.size());
1658 as.in.skipWhitespace();
1659 ch = as.in.getc();
1660 if (ch == '(') {
1661 String str = parse_long_string(as);
1662 param.phpCode = makeStaticString(str);
1663 TypedValue tv;
1664 tvWriteUninit(&tv);
1665 if (str.size() == 4) {
1666 if (!strcasecmp("null", str.data())) {
1667 tvWriteNull(&tv);
1668 } else if (!strcasecmp("true", str.data())) {
1669 tv = make_tv<KindOfBoolean>(true);
1671 } else if (str.size() == 5 && !strcasecmp("false", str.data())) {
1672 tv = make_tv<KindOfBoolean>(false);
1674 if (tv.m_type != KindOfUninit) {
1675 param.defaultValue = tv;
1677 as.in.expectWs(')');
1678 as.in.skipWhitespace();
1679 ch = as.in.getc();
1681 } else {
1682 if (inDVInits) {
1683 as.error("all parameters after the first with a dv-initializer "
1684 "must have a dv-initializer");
1688 as.fe->appendParam(makeStaticString(name), param);
1690 if (ch == ')') break;
1691 if (ch != ',') as.error("expected , between parameter names");
1695 void parse_function_flags(AsmState& as) {
1696 as.in.skipWhitespace();
1697 std::string flag;
1698 for (;;) {
1699 if (as.in.peek() == '{') break;
1700 if (!as.in.readword(flag)) break;
1702 if (flag == "isGenerator") {
1703 as.fe->isGenerator = true;
1704 } else if (flag == "isAsync") {
1705 as.fe->isAsync = true;
1706 } else if (flag == "isClosureBody") {
1707 as.fe->isClosureBody = true;
1708 } else if (flag == "isPairGenerator") {
1709 as.fe->isPairGenerator = true;
1710 } else {
1711 as.error("Unexpected function flag \"" + flag + "\"");
1717 * directive-function : attribute-list identifier parameter-list function-flags
1718 * '{' function-body
1721 void parse_function(AsmState& as) {
1722 if (!as.emittedPseudoMain) {
1723 as.error(".function blocks must all follow the .main block");
1726 Attr attrs = parse_attribute_list(as, AttrContext::Func);
1727 std::string name;
1728 if (!as.in.readword(name)) {
1729 as.error(".function must have a name");
1732 as.fe = as.ue->newFuncEmitter(makeStaticString(name));
1733 as.fe->init(as.in.getLineNumber(), as.in.getLineNumber() + 1 /* XXX */,
1734 as.ue->bcPos(), attrs, true, 0);
1736 parse_parameter_list(as);
1737 parse_function_flags(as);
1739 as.in.expectWs('{');
1741 parse_function_body(as);
1745 * directive-method : attribute-list identifier parameter-list
1746 * '{' function-body
1749 void parse_method(AsmState& as) {
1750 as.in.skipWhitespace();
1752 Attr attrs = parse_attribute_list(as, AttrContext::Func);
1753 std::string name;
1754 if (!as.in.readword(name)) {
1755 as.error(".method requires a method name");
1758 as.fe = as.ue->newMethodEmitter(makeStaticString(name), as.pce);
1759 as.pce->addMethod(as.fe);
1760 as.fe->init(as.in.getLineNumber(), as.in.getLineNumber() + 1 /* XXX */,
1761 as.ue->bcPos(), attrs, true, 0);
1763 parse_parameter_list(as);
1764 as.in.expectWs('{');
1766 parse_function_body(as);
1770 * member-tv-initializer : '=' php-serialized ';'
1771 * | '=' uninit ';'
1772 * | ';'
1775 TypedValue parse_member_tv_initializer(AsmState& as) {
1776 as.in.skipWhitespace();
1778 TypedValue tvInit;
1779 tvWriteNull(&tvInit); // Don't confuse Variant with uninit data
1781 int what = as.in.getc();
1782 if (what == '=') {
1783 as.in.skipWhitespace();
1785 if (as.in.peek() != '\"') {
1786 // It might be an uninitialized property/constant.
1787 if (!as.in.tryConsume("uninit")) {
1788 as.error("Expected \"\"\" or \"uninit\" after '=' in "
1789 "const/property initializer");
1791 as.in.expectWs(';');
1792 tvWriteUninit(&tvInit);
1793 return tvInit;
1796 tvAsVariant(&tvInit) = parse_php_serialized(as);
1797 if (IS_STRING_TYPE(tvInit.m_type)) {
1798 tvInit.m_data.pstr = makeStaticString(tvInit.m_data.pstr);
1799 as.ue->mergeLitstr(tvInit.m_data.pstr);
1800 } else if (IS_ARRAY_TYPE(tvInit.m_type)) {
1801 tvInit.m_data.parr = ArrayData::GetScalarArray(tvInit.m_data.parr);
1802 as.ue->mergeArray(tvInit.m_data.parr);
1803 } else if (tvInit.m_type == KindOfObject) {
1804 as.error("property initializer can't be an object");
1805 } else if (tvInit.m_type == KindOfResource) {
1806 as.error("property initializer can't be a resource");
1808 as.in.expectWs(';');
1809 } else if (what == ';') {
1810 // already null
1811 } else {
1812 as.error("expected '=' or ';' after property name");
1815 return tvInit;
1819 * directive-property : attribute-list identifier member-tv-initializer
1823 void parse_property(AsmState& as) {
1824 as.in.skipWhitespace();
1826 Attr attrs = parse_attribute_list(as, AttrContext::Prop);
1827 std::string name;
1828 if (!as.in.readword(name)) {
1829 as.error("expected name for property");
1832 TypedValue tvInit = parse_member_tv_initializer(as);
1833 as.pce->addProperty(makeStaticString(name),
1834 attrs, staticEmptyString(),
1835 staticEmptyString(),
1836 &tvInit,
1837 RepoAuthType{});
1841 * directive-const : identifier member-tv-initializer
1844 void parse_constant(AsmState& as) {
1845 as.in.skipWhitespace();
1847 std::string name;
1848 if (!as.in.readword(name)) {
1849 as.error("expected name for constant");
1852 TypedValue tvInit = parse_member_tv_initializer(as);
1853 as.pce->addConstant(makeStaticString(name),
1854 staticEmptyString(), &tvInit,
1855 staticEmptyString());
1859 * directive-default-ctor : ';'
1862 * Creates an 86ctor stub for the class.
1864 void parse_default_ctor(AsmState& as) {
1865 assert(!as.fe && as.pce);
1867 as.fe = as.ue->newMethodEmitter(
1868 makeStaticString("86ctor"), as.pce);
1869 as.pce->addMethod(as.fe);
1870 as.fe->init(as.in.getLineNumber(), as.in.getLineNumber(),
1871 as.ue->bcPos(), AttrPublic, true, 0);
1872 as.ue->emitOp(OpNull);
1873 as.ue->emitOp(OpRetC);
1874 as.stackHighWater = 1;
1875 as.finishFunction();
1877 as.in.expectWs(';');
1881 * directive-use : identifier+ ';'
1882 * | identifier+ '{' use-line* '}'
1885 * use-line : use-name-ref "insteadof" identifier+ ';'
1886 * | use-name-ref "as" attribute-list identifier ';'
1887 * | use-name-ref "as" attribute-list ';'
1890 void parse_use(AsmState& as) {
1891 std::vector<std::string> usedTraits;
1892 for (;;) {
1893 std::string name;
1894 if (!as.in.readword(name)) break;
1895 usedTraits.push_back(name);
1897 if (usedTraits.empty()) {
1898 as.error(".use requires a trait name");
1901 for (size_t i = 0; i < usedTraits.size(); ++i) {
1902 as.pce->addUsedTrait(makeStaticString(usedTraits[i]));
1904 as.in.skipWhitespace();
1905 if (as.in.peek() != '{') {
1906 as.in.expect(';');
1907 return;
1909 as.in.getc();
1911 for (;;) {
1912 as.in.skipWhitespace();
1913 if (as.in.peek() == '}') break;
1915 std::string traitName;
1916 std::string identifier;
1917 if (!as.in.readword(traitName)) {
1918 as.error("expected identifier for line in .use block");
1920 as.in.skipWhitespace();
1921 if (as.in.peek() == ':') {
1922 as.in.getc();
1923 as.in.expect(':');
1924 if (!as.in.readword(identifier)) {
1925 as.error("expected identifier after ::");
1927 } else {
1928 identifier = traitName;
1929 traitName.clear();
1932 if (as.in.tryConsume("as")) {
1933 Attr attrs = parse_attribute_list(as, AttrContext::TraitImport);
1934 std::string alias;
1935 if (!as.in.readword(alias)) {
1936 if (attrs != AttrNone) {
1937 alias = identifier;
1938 } else {
1939 as.error("expected identifier or attribute list after "
1940 "`as' in .use block");
1944 as.pce->addTraitAliasRule(PreClass::TraitAliasRule(
1945 makeStaticString(traitName),
1946 makeStaticString(identifier),
1947 makeStaticString(alias),
1948 attrs));
1949 } else if (as.in.tryConsume("insteadof")) {
1950 if (traitName.empty()) {
1951 as.error("Must specify TraitName::name when using a trait insteadof");
1954 PreClass::TraitPrecRule precRule(
1955 makeStaticString(traitName),
1956 makeStaticString(identifier));
1958 bool addedOtherTraits = false;
1959 std::string whom;
1960 while (as.in.readword(whom)) {
1961 precRule.addOtherTraitName(makeStaticString(whom));
1962 addedOtherTraits = true;
1964 if (!addedOtherTraits) {
1965 as.error("one or more trait names expected after `insteadof'");
1968 as.pce->addTraitPrecRule(precRule);
1969 } else {
1970 as.error("expected `as' or `insteadof' in .use block");
1973 as.in.expectWs(';');
1976 as.in.expect('}');
1980 * class-body : class-body-line* '}'
1983 * class-body-line : ".method" directive-method
1984 * | ".property" directive-property
1985 * | ".const" directive-const
1986 * | ".use" directive-use
1987 * | ".default_ctor" directive-default-ctor
1990 void parse_class_body(AsmState& as) {
1991 if (!as.emittedPseudoMain) {
1992 as.error(".class blocks must all follow the .main block");
1995 std::string directive;
1996 while (as.in.readword(directive)) {
1997 if (directive == ".method") { parse_method(as); continue; }
1998 if (directive == ".property") { parse_property(as); continue; }
1999 if (directive == ".const") { parse_constant(as); continue; }
2000 if (directive == ".use") { parse_use(as); continue; }
2001 if (directive == ".default_ctor") { parse_default_ctor(as); continue; }
2003 as.error("unrecognized directive `" + directive + "' in class");
2005 as.in.expect('}');
2006 as.finishClass();
2010 * directive-class : attribute-list identifier extension-clause
2011 * implements-clause '{' class-body
2014 * extension-clause : empty
2015 * | "extends" identifier
2018 * implements-clause : empty
2019 * | "implements" '(' identifier* ')'
2023 void parse_class(AsmState& as) {
2024 as.in.skipWhitespace();
2026 Attr attrs = parse_attribute_list(as, AttrContext::Class);
2027 std::string name;
2028 if (!as.in.readword(name)) {
2029 as.error(".class must have a name");
2032 std::string parentName;
2033 if (as.in.tryConsume("extends")) {
2034 if (!as.in.readword(parentName)) {
2035 as.error("expected parent class name after `extends'");
2039 std::vector<std::string> ifaces;
2040 if (as.in.tryConsume("implements")) {
2041 as.in.expectWs('(');
2042 std::string word;
2043 while (as.in.readword(word)) {
2044 ifaces.push_back(word);
2046 as.in.expect(')');
2049 as.pce = as.ue->newPreClassEmitter(makeStaticString(name),
2050 PreClass::MaybeHoistable);
2051 as.pce->init(as.in.getLineNumber(),
2052 as.in.getLineNumber() + 1, // XXX
2053 as.ue->bcPos(),
2054 attrs,
2055 makeStaticString(parentName),
2056 staticEmptyString());
2057 for (size_t i = 0; i < ifaces.size(); ++i) {
2058 as.pce->addInterface(makeStaticString(ifaces[i]));
2061 as.in.expectWs('{');
2062 parse_class_body(as);
2066 * directive-filepath : quoted-string-literal ';'
2069 void parse_filepath(AsmState& as) {
2070 auto const str = read_litstr(as);
2071 as.ue->m_filepath = str;
2072 as.in.expectWs(';');
2076 * directive-main : '{' function-body
2079 void parse_main(AsmState& as) {
2080 if (as.emittedPseudoMain) {
2081 if (!SystemLib::s_inited) {
2082 as.error(".main found in systemlib");
2083 } else {
2084 as.error("Multiple .main directives found");
2087 as.in.expectWs('{');
2089 as.ue->initMain(as.in.getLineNumber(),
2090 as.in.getLineNumber() + 1 /* XXX */);
2091 as.fe = as.ue->getMain();
2092 as.emittedPseudoMain = true;
2093 parse_function_body(as);
2097 * directive-adata : identifier '=' php-serialized ';'
2100 void parse_adata(AsmState& as) {
2101 as.in.skipWhitespace();
2102 std::string dataLabel;
2103 if (!as.in.readword(dataLabel)) {
2104 as.error("expected name for .adata");
2106 if (as.adataMap.count(dataLabel)) {
2107 as.error("duplicate adata label name " + dataLabel);
2110 as.in.expectWs('=');
2111 Variant var = parse_php_serialized(as);
2112 if (!var.isArray()) {
2113 as.error(".adata only supports serialized arrays");
2115 Array arr(var.toArray());
2116 ArrayData* data = ArrayData::GetScalarArray(arr.get());
2117 as.ue->mergeArray(data);
2118 as.adataMap[dataLabel] = data;
2120 as.in.expectWs(';');
2124 * asm-file : asm-tld* <EOF>
2127 * asm-tld : ".filepath" directive-filepath
2128 * | ".main" directive-main
2129 * | ".function" directive-function
2130 * | ".adata" directive-adata
2131 * | ".class" directive-class
2134 void parse(AsmState& as) {
2135 as.in.skipWhitespace();
2136 std::string directive;
2137 if (!SystemLib::s_inited) {
2139 * The SystemLib::s_hhas_unit is required to be merge-only,
2140 * and we create the source by concatenating separate .hhas files
2141 * Rather than choosing one to have the .main directive, we just
2142 * generate a trivial pseudoMain automatically.
2144 as.ue->addTrivialPseudoMain();
2145 as.emittedPseudoMain = true;
2148 while (as.in.readword(directive)) {
2149 if (directive == ".filepath") { parse_filepath(as); continue; }
2150 if (directive == ".main") { parse_main(as); continue; }
2151 if (directive == ".function") { parse_function(as); continue; }
2152 if (directive == ".adata") { parse_adata(as); continue; }
2153 if (directive == ".class") { parse_class(as); continue; }
2155 as.error("unrecognized top-level directive `" + directive + "'");
2158 if (!as.emittedPseudoMain) {
2159 as.error("no .main found in hhas unit");
2165 //////////////////////////////////////////////////////////////////////
2167 UnitEmitter* assemble_string(const char* code, int codeLen,
2168 const char* filename, const MD5& md5) {
2169 std::unique_ptr<UnitEmitter> ue(new UnitEmitter(md5));
2170 StringData* sd = makeStaticString(filename);
2171 ue->m_filepath = sd;
2173 try {
2174 std::istringstream instr(std::string(code, codeLen));
2175 AsmState as(instr);
2176 as.ue = ue.get();
2177 parse(as);
2178 } catch (const std::exception& e) {
2179 ue.reset(new UnitEmitter(md5));
2180 ue->m_filepath = sd;
2181 ue->initMain(1, 1);
2182 ue->emitOp(OpString);
2183 ue->emitInt32(ue->mergeLitstr(makeStaticString(e.what())));
2184 ue->emitOp(OpFatal);
2185 ue->emitByte(static_cast<uint8_t>(FatalOp::Runtime));
2186 FuncEmitter* fe = ue->getMain();
2187 fe->maxStackCells = 1;
2188 // XXX line numbers are bogus
2189 fe->finish(ue->bcPos(), false);
2190 ue->recordFunction(fe);
2193 return ue.release();
2196 //////////////////////////////////////////////////////////////////////