Codemod following promotion
[hiphop-php.git] / hphp / hhbbc / parse.cpp
bloba6941a2857e7f620d21a03b517b36d2bd8e32ad6
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/hhbbc/parse.h"
18 #include <thread>
19 #include <mutex>
20 #include <unordered_map>
21 #include <map>
23 #include <boost/next_prior.hpp>
25 #include "folly/gen/Base.h"
26 #include "folly/gen/String.h"
27 #include "folly/ScopeGuard.h"
28 #include "folly/Memory.h"
30 #include "hphp/runtime/vm/preclass-emit.h"
31 #include "hphp/runtime/vm/unit.h"
32 #include "hphp/runtime/vm/func.h"
34 #include "hphp/hhbbc/representation.h"
35 #include "hphp/hhbbc/cfg.h"
36 #include "hphp/hhbbc/unit-util.h"
38 namespace HPHP { namespace HHBBC {
40 TRACE_SET_MOD(hhbbc);
42 namespace {
44 //////////////////////////////////////////////////////////////////////
46 const StaticString s_Closure("Closure");
48 //////////////////////////////////////////////////////////////////////
50 struct ParseUnitState {
52 * This is computed once for each unit and stashed here.
54 SourceLocTable srcLocTable;
57 * Map from class id to the function containing its DefCls
58 * instruction. We use this to compute whether classes are defined
59 * at top-level.
61 * TODO_4: if we don't end up with a use for this, remove it.
63 std::vector<borrowed_ptr<php::Func>> defClsMap;
66 * Map from Closure names to the function(s) containing their
67 * associated CreateCl opcode(s).
69 std::unordered_map<
70 SString,
71 std::unordered_set<borrowed_ptr<php::Func>>,
72 string_data_hash,
73 string_data_isame
74 > createClMap;
77 * Generators come as two functions, "inner" and "outer". Part of
78 * our representation requires that we have the two linked uniquely
79 * by pointer. (See representation.h.)
81 * However, hhbc metadata only loosely connects these with the
82 * "outer" function having a name to the inner function. The
83 * context this name should be looked up in depends on what type of
84 * generator it was---if it is a method, but not a generator from a
85 * closure, it'll be on the class that contains the "outer"
86 * function. These get linked together as part of parse_class.
88 * In the other cases, we need to link them after the whole unit has
89 * been viewed, with information tracked here.
91 * Invariant: we are assuming that generator bodies that aren't on
92 * classes have unique names within a unit (this is asserted as we
93 * add to this map). This should not be hard to maintain, but
94 * there's nowhere to document this invariant in
95 * bytecode.specification right now, which doesn't specify how
96 * generators work.
98 * Right now, multiple definitions of non-top-level generator
99 * functions simply don't work (see #2906383), so this is definitely
100 * true. If we fix that task, we will need to ensure it doesn't
101 * break this invariant.
103 std::vector<std::pair<borrowed_ptr<php::Func>,SString>> generatorsToLink;
104 std::unordered_map<
105 SString,
106 borrowed_ptr<php::Func>,
107 string_data_hash,
108 string_data_isame
109 > innerGenerators;
112 //////////////////////////////////////////////////////////////////////
114 std::set<Offset> findBasicBlocks(const FuncEmitter& fe) {
115 std::set<Offset> blockStarts;
116 auto markBlock = [&] (Offset off) { blockStarts.insert(off); };
118 // Each entry point for a DV funclet is the start of a basic
119 // block.
120 for (auto& param : fe.params()) {
121 if (param.hasDefaultValue()) markBlock(param.funcletOff());
124 // The main entry point is also a basic block start.
125 markBlock(fe.base());
128 * For each instruction, add it to the set if it must be the start
129 * of a block. It is the start of a block if it is:
131 * - A jump target
133 * - Immediatelly following a control flow instruction, other than
134 * a call.
136 auto offset = fe.base();
137 for (;;) {
138 auto const bc = fe.ue().bc();
139 auto const pc = reinterpret_cast<const Op*>(bc + offset);
140 auto const nextOff = offset + instrLen(pc);
141 auto const atLast = nextOff == fe.past();
143 if (instrIsNonCallControlFlow(*pc) && !atLast) {
144 markBlock(nextOff);
147 if (isSwitch(*pc)) {
148 foreachSwitchTarget(pc, [&] (Offset delta) {
149 markBlock(offset + delta);
151 } else {
152 auto const target = instrJumpTarget(
153 reinterpret_cast<const Op*>(bc),
154 offset
156 if (target != InvalidAbsoluteOffset) markBlock(target);
159 offset = nextOff;
160 if (atLast) break;
164 * Find blocks associated with exception handlers.
166 * - The start of each fault-protected region begins a block.
168 * - The instruction immediately after the end of any
169 * fault-protected region begins a block.
171 * - Each fault or catch entry point begins a block.
173 for (auto& eh : fe.ehtab()) {
174 markBlock(eh.m_base);
175 markBlock(eh.m_past);
176 switch (eh.m_type) {
177 case EHEnt::Type::Catch:
178 for (auto& centry : eh.m_catches) markBlock(centry.second);
179 break;
180 case EHEnt::Type::Fault:
181 markBlock(eh.m_fault);
182 break;
186 // Now, each interval in blockStarts delinates a basic block.
187 blockStarts.insert(fe.past());
188 return blockStarts;
191 struct ExnTreeInfo {
193 * Map from EHEnt to the ExnNode that will represent exception
194 * behavior in that region.
196 std::map<const EHEnt*,borrowed_ptr<php::ExnNode>> ehMap;
199 * Fault funclets don't actually fall in the EHEnt region for all of
200 * their parent handlers in HHBC. There may be EHEnt regions
201 * covering the fault funclet, but if an exception occurs in the
202 * funclet it can also propagate to any EH region from the code that
203 * entered the funclet. We want factored exit edges from the fault
204 * funclets to any of these enclosing catch blocks (or other
205 * enclosing funclet blocks).
207 * Moreover, funclet offsets can be entered from multiple protected
208 * regions, so we need to keep a map of all the possible regions
209 * that could have entered a given funclet, so we can add exit edges
210 * to all their parent EHEnt handlers.
212 std::map<borrowed_ptr<php::Block>,std::vector<borrowed_ptr<php::ExnNode>>>
213 funcletNodes;
216 * Keep track of the start offsets for all fault funclets. This is
217 * used to find the extents of each handler for find_fault_funclets.
218 * It is assumed that each fault funclet handler extends from its
219 * entry offset until the next fault funclet entry offset (or end of
220 * the function).
222 * This relies on the following bytecode invariants:
224 * - All fault funclets come after the primary function body.
226 * - Each fault funclet is a contiguous region of bytecode that
227 * does not jump into other fault funclets or into the primary
228 * function body.
230 * - Nothing comes after the fault funclets.
232 std::set<Offset> faultFuncletStarts;
235 template<class FindBlock>
236 ExnTreeInfo build_exn_tree(const FuncEmitter& fe,
237 php::Func& func,
238 FindBlock findBlock) {
239 ExnTreeInfo ret;
240 auto nextExnNode = uint32_t{0};
242 for (auto& eh : fe.ehtab()) {
243 auto node = folly::make_unique<php::ExnNode>();
244 node->id = nextExnNode++;
245 node->parent = nullptr;
247 switch (eh.m_type) {
248 case EHEnt::Type::Fault:
250 auto const fault = findBlock(eh.m_fault);
251 ret.funcletNodes[fault].push_back(borrow(node));
252 ret.faultFuncletStarts.insert(eh.m_fault);
253 node->info = php::FaultRegion { fault, eh.m_iterId, eh.m_itRef };
255 break;
256 case EHEnt::Type::Catch:
258 auto treg = php::TryRegion {};
259 for (auto& centry : eh.m_catches) {
260 auto const catchBlk = findBlock(centry.second);
261 treg.catches.emplace_back(
262 fe.ue().lookupLitstr(centry.first),
263 catchBlk
266 node->info = treg;
268 break;
271 ret.ehMap[&eh] = borrow(node);
273 if (eh.m_parentIndex != -1) {
274 auto it = ret.ehMap.find(&fe.ehtab()[eh.m_parentIndex]);
275 assert(it != end(ret.ehMap));
276 node->parent = it->second;
277 it->second->children.emplace_back(std::move(node));
278 } else {
279 func.exnNodes.emplace_back(std::move(node));
283 ret.faultFuncletStarts.insert(fe.past());
285 return ret;
289 * Instead of breaking blocks on instructions that could throw, we
290 * represent the control flow edges for exception paths as a set of
291 * factored edges at the end of each block.
293 * When we initially add them here, no attempt is made to determine if
294 * the edge is actually possible to traverse.
296 void add_factored_exits(php::Block& blk,
297 borrowed_ptr<const php::ExnNode> node) {
298 for (; node; node = node->parent) {
299 match<void>(
300 node->info,
301 [&] (const php::TryRegion& tr) {
303 * Note: it seems like we should be able to stop adding edges
304 * when we see a catch handler for Exception; however, fatal
305 * errors don't stop there (and still run Fault handlers).
307 * For now we add all the edges, although we might be able to be
308 * less pessimistic later.
310 for (auto& c : tr.catches) {
311 blk.factoredExits.push_back(c.second);
314 [&] (const php::FaultRegion& fr) {
315 blk.factoredExits.push_back(fr.faultEntry);
322 * Locate all the basic blocks associated with fault funclets, and
323 * mark them as such. Also, add factored exit edges for exceptional
324 * control flow through any parent protected regions of the region(s)
325 * that pointed at each fault handler.
327 template<class BlockStarts, class FindBlock>
328 void find_fault_funclets(ExnTreeInfo& tinfo,
329 const php::Func& func,
330 const BlockStarts& blockStarts,
331 FindBlock findBlock) {
332 auto sectionId = uint32_t{1};
334 for (auto funcletStartIt = begin(tinfo.faultFuncletStarts);
335 boost::next(funcletStartIt) != end(tinfo.faultFuncletStarts);
336 ++funcletStartIt, ++sectionId) {
337 auto const nextFunclet = *boost::next(funcletStartIt);
339 auto offIt = blockStarts.find(*funcletStartIt);
340 assert(offIt != end(blockStarts));
342 auto const firstBlk = findBlock(*offIt);
343 auto const funcletIt = tinfo.funcletNodes.find(firstBlk);
344 assert(funcletIt != end(tinfo.funcletNodes));
345 assert(!funcletIt->second.empty());
347 do {
348 auto const blk = findBlock(*offIt);
349 blk->section = static_cast<php::Block::Section>(sectionId);
351 // Propagate the exit edges to the containing fault/try handlers,
352 // if there were any.
353 for (auto& node : funcletIt->second) {
354 add_factored_exits(*blk, node->parent);
357 // Fault funclets can have protected regions which may point to
358 // handlers that are also listed in parents of the EH-region that
359 // targets the funclet. This means we might have duplicate
360 // factored exits now, so we need to remove them.
361 std::sort(begin(blk->factoredExits), end(blk->factoredExits));
362 blk->factoredExits.erase(
363 std::unique(begin(blk->factoredExits), end(blk->factoredExits)),
364 end(blk->factoredExits)
367 ++offIt;
368 } while (offIt != end(blockStarts) && *offIt < nextFunclet);
372 template<class T> T decode(PC& pc) {
373 auto const ret = *reinterpret_cast<const T*>(pc);
374 pc += sizeof ret;
375 return ret;
378 template<class FindBlock>
379 void populate_block(ParseUnitState& puState,
380 const FuncEmitter& fe,
381 php::Func& func,
382 php::Block& blk,
383 PC pc,
384 PC const past,
385 FindBlock findBlock) {
386 auto const& ue = fe.ue();
388 auto decode_minstr = [&] {
389 auto const immVec = ImmVector::createFromStream(pc);
390 pc += immVec.size() + sizeof(int32_t) + sizeof(int32_t);
392 auto ret = MVector {};
393 auto vec = immVec.vec();
395 ret.lcode = static_cast<LocationCode>(*vec++);
396 if (numLocationCodeImms(ret.lcode)) {
397 assert(numLocationCodeImms(ret.lcode) == 1);
398 ret.locBase = borrow(func.locals[decodeVariableSizeImm(&vec)]);
401 while (vec < pc) {
402 auto elm = MElem {};
403 elm.mcode = static_cast<MemberCode>(*vec++);
404 switch (memberCodeImmType(elm.mcode)) {
405 case MCodeImm::None: break;
406 case MCodeImm::Local:
407 elm.immLoc = borrow(func.locals[decodeMemberCodeImm(&vec, elm.mcode)]);
408 break;
409 case MCodeImm::String:
410 elm.immStr = ue.lookupLitstr(decodeMemberCodeImm(&vec, elm.mcode));
411 break;
412 case MCodeImm::Int:
413 elm.immInt = decodeMemberCodeImm(&vec, elm.mcode);
414 break;
416 ret.mcodes.push_back(elm);
418 assert(vec == pc);
420 return ret;
423 auto decode_stringvec = [&] {
424 auto const vecLen = decode<int32_t>(pc);
425 std::vector<SString> keys;
426 for (auto i = size_t{0}; i < vecLen; ++i) {
427 keys.push_back(ue.lookupLitstr(decode<int32_t>(pc)));
429 return keys;
432 auto decode_switch = [&] (PC opPC) {
433 SwitchTab ret;
434 auto const vecLen = decode<int32_t>(pc);
435 for (int32_t i = 0; i < vecLen; ++i) {
436 ret.push_back(findBlock(
437 opPC + decode<Offset>(pc) - ue.bc()
440 return ret;
443 auto decode_sswitch = [&] (PC opPC) {
444 SSwitchTab ret;
446 auto const vecLen = decode<int32_t>(pc);
447 for (int32_t i = 0; i < vecLen - 1; ++i) {
448 auto const id = decode<Id>(pc);
449 auto const offset = decode<Offset>(pc);
450 ret.emplace_back(
451 ue.lookupLitstr(id),
452 findBlock(opPC + offset - ue.bc())
456 // Final case is the default, and must have a litstr id of -1.
457 DEBUG_ONLY auto const defId = decode<Id>(pc);
458 auto const defOff = decode<Offset>(pc);
459 assert(defId == -1);
460 ret.emplace_back(nullptr, findBlock(opPC + defOff - ue.bc()));
461 return ret;
464 auto decode_itertab = [&] {
465 IterTab ret;
466 auto const vecLen = decode<int32_t>(pc);
467 for (int32_t i = 0; i < vecLen; ++i) {
468 auto const kind = static_cast<IterKind>(decode<int32_t>(pc));
469 auto const id = decode<int32_t>(pc);
470 ret.emplace_back(kind, borrow(func.iters[id]));
472 return ret;
475 auto defcls = [&] (const Bytecode& b) {
476 puState.defClsMap[b.DefCls.arg1] = &func;
478 auto nopdefcls = [&] (const Bytecode& b) {
479 puState.defClsMap[b.NopDefCls.arg1] = &func;
481 auto createcl = [&] (const Bytecode& b) {
482 puState.createClMap[b.CreateCl.str2].insert(&func);
485 #define IMM_MA(n) auto mvec = decode_minstr();
486 #define IMM_BLA(n) auto targets = decode_switch(opPC);
487 #define IMM_SLA(n) auto targets = decode_sswitch(opPC);
488 #define IMM_ILA(n) auto iterTab = decode_itertab();
489 #define IMM_IVA(n) auto arg##n = decodeVariableSizeImm(&pc);
490 #define IMM_I64A(n) auto arg##n = decode<int64_t>(pc);
491 #define IMM_LA(n) auto loc##n = [&] { \
492 auto id = decodeVariableSizeImm(&pc); \
493 always_assert(id < func.locals.size()); \
494 return borrow(func.locals[id]); \
495 }();
496 #define IMM_IA(n) auto iter##n = [&] { \
497 auto id = decodeVariableSizeImm(&pc); \
498 always_assert(id < func.iters.size()); \
499 return borrow(func.iters[id]); \
500 }();
501 #define IMM_DA(n) auto dbl##n = decode<double>(pc);
502 #define IMM_SA(n) auto str##n = ue.lookupLitstr(decode<Id>(pc));
503 #define IMM_AA(n) auto arr##n = ue.lookupArray(decode<Id>(pc));
504 #define IMM_BA(n) assert(next == past); \
505 auto target = findBlock( \
506 opPC + decode<Offset>(pc) - ue.bc());
507 #define IMM_OA_IMPL(n) decode<uint8_t>(pc);
508 #define IMM_OA(type) auto subop = (type)IMM_OA_IMPL
509 #define IMM_VSA(n) auto keys = decode_stringvec();
511 #define IMM_NA
512 #define IMM_ONE(x) IMM_##x(1)
513 #define IMM_TWO(x, y) IMM_##x(1) IMM_##y(2)
514 #define IMM_THREE(x, y, z) IMM_TWO(x, y) IMM_##z(3)
515 #define IMM_FOUR(x, y, z, n) IMM_THREE(x, y, z) IMM_##n(4)
517 #define IMM_ARG(which, n) IMM_NAME_##which(n)
518 #define IMM_ARG_NA
519 #define IMM_ARG_ONE(x) IMM_ARG(x, 1)
520 #define IMM_ARG_TWO(x, y) IMM_ARG(x, 1), IMM_ARG(y, 2)
521 #define IMM_ARG_THREE(x, y, z) IMM_ARG(x, 1), IMM_ARG(y, 2), \
522 IMM_ARG(z, 3)
523 #define IMM_ARG_FOUR(x, y, z, l) IMM_ARG(x, 1), IMM_ARG(y, 2), \
524 IMM_ARG(z, 3), IMM_ARG(l, 4)
527 #define O(opcode, imms, inputs, outputs, flags) \
528 case Op::opcode: \
530 ++pc; \
531 auto b = Bytecode {}; \
532 b.op = Op::opcode; \
533 b.srcLoc = srcLoc; \
534 IMM_##imms \
535 new (&b.opcode) bc::opcode { IMM_ARG_##imms }; \
536 if (Op::opcode == Op::DefCls) defcls(b); \
537 if (Op::opcode == Op::NopDefCls) nopdefcls(b); \
538 if (Op::opcode == Op::CreateCl) createcl(b); \
539 blk.hhbcs.push_back(std::move(b)); \
540 assert(pc == next); \
542 break;
544 assert(pc != past);
545 do {
546 auto const opPC = pc;
547 auto const pop = reinterpret_cast<const Op*>(pc);
548 auto const next = pc + instrLen(pop);
549 assert(next <= past);
551 auto const srcLoc = [&] {
552 SourceLoc sloc;
553 if (getSourceLoc(puState.srcLocTable, opPC - ue.bc(), sloc)) {
554 return php::SrcLoc {
555 { static_cast<uint32_t>(sloc.line0),
556 static_cast<uint32_t>(sloc.char0) },
557 { static_cast<uint32_t>(sloc.line1),
558 static_cast<uint32_t>(sloc.char1) }
561 return php::SrcLoc{};
562 }();
564 switch (*pop) { OPCODES }
566 if (next == past) {
567 if (instrAllowsFallThru(*pop)) {
568 blk.fallthrough = findBlock(next - ue.bc());
572 pc = next;
573 } while (pc != past);
575 #undef O
577 #undef IMM_MA
578 #undef IMM_BLA
579 #undef IMM_SLA
580 #undef IMM_ILA
581 #undef IMM_IVA
582 #undef IMM_I64A
583 #undef IMM_LA
584 #undef IMM_IA
585 #undef IMM_DA
586 #undef IMM_SA
587 #undef IMM_AA
588 #undef IMM_BA
589 #undef IMM_OA_IMPL
590 #undef IMM_OA
591 #undef IMM_VSA
593 #undef IMM_NA
594 #undef IMM_ONE
595 #undef IMM_TWO
596 #undef IMM_THREE
597 #undef IMM_FOUR
599 #undef IMM_ARG
600 #undef IMM_ARG_NA
601 #undef IMM_ARG_ONE
602 #undef IMM_ARG_TWO
603 #undef IMM_ARG_THREE
604 #undef IMM_ARG_FOUR
607 * If a block ends with an unconditional jump, change it to a
608 * fallthrough edge.
610 * Just convert the opcode to a Nop, because this could create an
611 * empty block and we have an invariant that no blocks are empty.
614 auto make_fallthrough = [&] {
615 blk.fallthrough = blk.hhbcs.back().Jmp.target;
616 blk.hhbcs.back() = bc_with_loc(blk.hhbcs.back().srcLoc, bc::Nop{});
619 switch (blk.hhbcs.back().op) {
620 case Op::Jmp: make_fallthrough(); break;
621 case Op::JmpNS: make_fallthrough(); blk.fallthroughNS = true; break;
622 default: break;
626 template<class FindBlk>
627 void link_entry_points(php::Func& func,
628 const FuncEmitter& fe,
629 FindBlk findBlock) {
630 func.dvEntries.resize(fe.params().size());
631 for (size_t i = 0, sz = fe.params().size(); i < sz; ++i) {
632 if (fe.params()[i].hasDefaultValue()) {
633 auto const dv = findBlock(fe.params()[i].funcletOff());
634 func.params[i].dvEntryPoint = dv;
635 func.dvEntries[i] = dv;
638 func.mainEntry = findBlock(fe.base());
641 void build_cfg(ParseUnitState& puState,
642 php::Func& func,
643 const FuncEmitter& fe) {
644 auto const blockStarts = findBasicBlocks(fe);
646 FTRACE(3, " blocks are at: {}\n",
647 [&]() -> std::string {
648 using namespace folly::gen;
649 return from(blockStarts)
650 | eachTo<std::string>()
651 | unsplit<std::string>(" ");
655 std::map<Offset,std::unique_ptr<php::Block>> blockMap;
656 auto const bc = fe.ue().bc();
658 auto findBlock = [&] (Offset off) {
659 auto& ptr = blockMap[off];
660 if (!ptr) {
661 ptr = folly::make_unique<php::Block>();
662 ptr->id = func.nextBlockId++;
663 ptr->section = php::Block::Section::Main;
664 ptr->exnNode = nullptr;
666 return borrow(ptr);
669 auto exnTreeInfo = build_exn_tree(fe, func, findBlock);
671 for (auto it = begin(blockStarts);
672 boost::next(it) != end(blockStarts);
673 ++it) {
674 auto const block = findBlock(*it);
675 auto const bcStart = bc + *it;
676 auto const bcStop = bc + *boost::next(it);
678 if (auto const eh = findEH(fe.ehtab(), *it)) {
679 auto it = exnTreeInfo.ehMap.find(eh);
680 assert(it != end(exnTreeInfo.ehMap));
681 block->exnNode = it->second;
682 add_factored_exits(*block, block->exnNode);
685 populate_block(puState, fe, func, *block, bcStart, bcStop, findBlock);
688 link_entry_points(func, fe, findBlock);
689 find_fault_funclets(exnTreeInfo, func, blockStarts, findBlock);
691 for (auto& kv : blockMap) {
692 func.blocks.emplace_back(std::move(kv.second));
696 void add_frame_variables(php::Func& func, const FuncEmitter& fe) {
697 for (auto& param : fe.params()) {
698 func.params.push_back(
699 php::Param {
700 param.defaultValue(),
701 nullptr,
702 param.typeConstraint(),
703 param.userType(),
704 param.phpCode(),
705 param.userAttributes(),
706 param.builtinType(),
707 param.ref()
712 func.locals.resize(fe.numLocals());
713 for (size_t id = 0; id < func.locals.size(); ++id) {
714 auto& loc = func.locals[id];
715 loc = folly::make_unique<php::Local>();
716 loc->id = id;
717 loc->name = nullptr;
719 for (auto& kv : fe.localNameMap()) {
720 func.locals[kv.second]->name = kv.first;
723 func.iters.resize(fe.numIterators());
724 for (uint32_t i = 0; i < func.iters.size(); ++i) {
725 func.iters[i] = folly::make_unique<php::Iter>();
726 func.iters[i]->id = i;
729 func.staticLocals.reserve(fe.svInfo().size());
730 for (auto& sv : fe.svInfo()) {
731 func.staticLocals.push_back(
732 php::StaticLocalInfo { sv.name, sv.phpCode }
737 std::unique_ptr<php::Func> parse_func(ParseUnitState& puState,
738 borrowed_ptr<php::Unit> unit,
739 borrowed_ptr<php::Class> cls,
740 const FuncEmitter& fe) {
741 FTRACE(2, " func: {}\n",
742 fe.name()->data() && *fe.name()->data() ? fe.name()->data()
743 : "pseudomain");
745 auto ret = folly::make_unique<php::Func>();
746 ret->name = fe.name();
747 ret->srcInfo = php::SrcInfo { fe.getLocation(),
748 fe.getDocComment() };
749 ret->unit = unit;
750 ret->cls = cls;
751 ret->nextBlockId = 0;
753 ret->attrs = fe.attrs();
754 ret->userAttributes = fe.getUserAttributes();
755 ret->returnUserType = fe.returnUserType();
756 ret->originalFilename = fe.originalFilename();
758 ret->top = fe.top();
759 ret->isClosureBody = fe.isClosureBody();
760 ret->isGeneratorBody = fe.isGenerator();
761 ret->isGeneratorFromClosure = fe.isGeneratorFromClosure();
762 ret->isPairGenerator = fe.isPairGenerator();
763 ret->isAsync = fe.isAsync();
764 ret->innerGeneratorFunc = nullptr;
765 ret->outerGeneratorFunc = nullptr;
768 * Generators that aren't inside classes (includes generators from
769 * closures) end up with inner generator bodies living as free
770 * functions, not on a class. We track them here to link them after
771 * we've finished parsing the whole unit. parse_methods handles the
772 * within-class generator linking cases.
774 if (auto const innerName = fe.getGeneratorBodyName()) {
775 if (!ret->cls || ret->isClosureBody) {
776 puState.generatorsToLink.emplace_back(borrow(ret), innerName);
779 if (ret->isGeneratorBody && !cls) {
780 always_assert(!puState.innerGenerators.count(ret->name));
781 puState.innerGenerators[ret->name] = borrow(ret);
785 * HNI-style native functions get some extra information.
787 if (fe.isHNINative()) {
788 ret->nativeInfo = folly::make_unique<php::NativeInfo>();
789 ret->nativeInfo->returnType = fe.getReturnType();
792 add_frame_variables(*ret, fe);
793 build_cfg(puState, *ret, fe);
795 return ret;
798 void parse_methods(ParseUnitState& puState,
799 borrowed_ptr<php::Class> ret,
800 borrowed_ptr<php::Unit> unit,
801 const PreClassEmitter& pce) {
802 std::unordered_map<
803 SString,
804 borrowed_ptr<php::Func>,
805 string_data_hash,
806 string_data_isame
807 > innerGenerators;
808 std::vector<std::pair<borrowed_ptr<php::Func>,SString>> generatorsToLink;
810 for (auto& me : pce.methods()) {
811 auto f = parse_func(puState, unit, ret, *me);
813 if (f->isGeneratorBody) {
814 always_assert(!innerGenerators.count(f->name));
815 innerGenerators[f->name] = borrow(f);
817 if (me->getGeneratorBodyName() && !f->isClosureBody) {
818 generatorsToLink.emplace_back(borrow(f), me->getGeneratorBodyName());
821 ret->methods.push_back(std::move(f));
824 for (auto kv : generatorsToLink) {
825 auto const it = innerGenerators.find(kv.second);
826 assert(it != end(innerGenerators));
827 auto const outer = kv.first;
828 auto const inner = it->second;
829 assert(inner->isGeneratorBody);
830 assert(!inner->innerGeneratorFunc && !inner->outerGeneratorFunc);
831 assert(!outer->innerGeneratorFunc && !outer->outerGeneratorFunc);
832 inner->outerGeneratorFunc = outer;
833 outer->innerGeneratorFunc = inner;
837 std::unique_ptr<php::Class> parse_class(ParseUnitState& puState,
838 borrowed_ptr<php::Unit> unit,
839 const PreClassEmitter& pce) {
840 FTRACE(2, " class: {}\n", pce.name()->data());
842 auto ret = folly::make_unique<php::Class>();
843 ret->name = pce.name();
844 ret->srcInfo = php::SrcInfo { pce.getLocation(),
845 pce.docComment() };
846 ret->unit = unit;
847 ret->closureContextCls = nullptr;
848 ret->parentName = pce.parentName()->empty() ? nullptr
849 : pce.parentName();
850 ret->attrs = pce.attrs();
851 ret->hoistability = pce.hoistability();
852 ret->userAttributes = pce.userAttributes();
854 for (auto& iface : pce.interfaces()) {
855 ret->interfaceNames.push_back(iface);
858 ret->usedTraitNames = pce.usedTraits();
859 ret->traitPrecRules = pce.traitPrecRules();
860 ret->traitAliasRules = pce.traitAliasRules();
861 ret->traitRequirements = pce.traitRequirements();
863 parse_methods(puState, borrow(ret), unit, pce);
865 auto& propMap = pce.propMap();
866 for (size_t idx = 0; idx < propMap.size(); ++idx) {
867 auto& prop = propMap[idx];
868 ret->properties.push_back(
869 php::Prop {
870 prop.name(),
871 prop.attrs(),
872 prop.docComment(),
873 prop.typeConstraint(),
874 prop.val()
879 auto& constMap = pce.constMap();
880 for (size_t idx = 0; idx < constMap.size(); ++idx) {
881 auto& cconst = constMap[idx];
882 ret->constants.push_back(
883 php::Const {
884 cconst.name(),
885 cconst.val(),
886 cconst.phpCode(),
887 cconst.typeConstraint()
892 return ret;
895 //////////////////////////////////////////////////////////////////////
897 void assign_closure_context(const ParseUnitState&, borrowed_ptr<php::Class>);
899 borrowed_ptr<php::Class>
900 find_closure_context(const ParseUnitState& puState,
901 borrowed_ptr<php::Func> createClFunc) {
902 if (auto const cls = createClFunc->cls) {
903 if (cls->parentName &&
904 cls->parentName->isame(s_Closure.get())) {
905 // We have a closure created by a closure's invoke method, which
906 // means it should inherit the outer closure's context, so we
907 // have to know that first.
908 assign_closure_context(puState, cls);
909 return cls->closureContextCls;
911 return cls;
914 // If the creating function wasn't in a class, either we have a
915 // closure with no lexical class context, or a CreateCl site for a
916 // generator body that is not part of the class. If it's a
917 // generator from a closure, this could still result in a closure
918 // context.
919 return !createClFunc->isGeneratorBody
920 ? nullptr
921 : find_closure_context(puState, createClFunc->outerGeneratorFunc);
924 void assign_closure_context(const ParseUnitState& puState,
925 borrowed_ptr<php::Class> clo) {
926 if (clo->closureContextCls) return;
928 auto clIt = puState.createClMap.find(clo->name);
929 if (clIt == end(puState.createClMap)) {
930 // Unused closure class. Technically not prohibited by the spec.
931 return;
935 * Any route to the closure context must yield the same class, or
936 * things downstream won't understand. We try every route and
937 * assert they are all the same here.
939 * See bytecode.specification for CreateCl for the relevant
940 * invariants.
942 always_assert(!clIt->second.empty());
943 auto it = begin(clIt->second);
944 auto const representative = find_closure_context(puState, *it);
945 if (debug) {
946 ++it;
947 for (; it != end(clIt->second); ++it) {
948 assert(find_closure_context(puState, *it) == representative);
951 clo->closureContextCls = representative;
954 void find_additional_metadata(const ParseUnitState& puState,
955 borrowed_ptr<php::Unit> unit) {
957 * Before we can assign closure contexts, we need to finish linking
958 * all inner-and-outer generators to each other for a few cases to
959 * work.
961 * Essentially these cases boil down to the fact that a closure in a
962 * class context that is also a generator (including async function
963 * closures) has a generator body that is not part of any class.
964 * The links need to be there so find_context can chase the
965 * non-class-member "inner" generatorFromClosure function to its
966 * outer generator function (__invoke on the Closure subclass), and
967 * from there to the actual class that is the closure class context.
969 for (auto kv : puState.generatorsToLink) {
970 auto const outer = kv.first;
971 always_assert(puState.innerGenerators.count(kv.second));
972 auto const inner = puState.innerGenerators.find(kv.second)->second;
973 assert(!inner->outerGeneratorFunc && !inner->innerGeneratorFunc);
974 assert(!outer->outerGeneratorFunc && !outer->innerGeneratorFunc);
975 inner->outerGeneratorFunc = outer;
976 outer->innerGeneratorFunc = inner;
979 for (auto& c : unit->classes) {
980 if (!c->parentName || !c->parentName->isame(s_Closure.get())) {
981 continue;
983 assign_closure_context(puState, borrow(c));
987 //////////////////////////////////////////////////////////////////////
991 std::unique_ptr<php::Unit> parse_unit(const UnitEmitter& ue) {
992 Trace::Bump bumper{Trace::hhbbc, kSystemLibBump, ue.isASystemLib()};
993 FTRACE(2, "parse_unit {}\n", ue.getFilepath()->data());
995 auto ret = folly::make_unique<php::Unit>();
996 ret->md5 = ue.md5();
997 ret->filename = ue.getFilepath();
999 ParseUnitState puState;
1000 puState.srcLocTable = ue.createSourceLocTable();
1001 puState.defClsMap.resize(ue.numPreClasses(), nullptr);
1003 for (size_t i = 0; i < ue.numPreClasses(); ++i) {
1004 auto cls = parse_class(puState, borrow(ret), *ue.pce(i));
1005 ret->classes.push_back(std::move(cls));
1008 for (auto& fe : ue.fevec()) {
1009 auto func = parse_func(puState, borrow(ret), nullptr, *fe);
1010 assert(!fe->pce());
1011 if (fe->isPseudoMain()) {
1012 ret->pseudomain = std::move(func);
1013 } else {
1014 ret->funcs.push_back(std::move(func));
1018 for (auto& ta : ue.typeAliases()) {
1019 ret->typeAliases.push_back(
1020 folly::make_unique<php::TypeAlias>(ta)
1024 find_additional_metadata(puState, borrow(ret));
1026 return ret;
1029 //////////////////////////////////////////////////////////////////////