From 0f76237b718eda80d506b953773f8dc88266650c Mon Sep 17 00:00:00 2001 From: Joseph Griego Date: Thu, 13 Jul 2017 10:01:12 -0700 Subject: [PATCH] Reorder blocks in hhas to make sense Summary: I used to just dump every block in the order they were allocatd by the compiler. This adds a simple step to put the blocks in a topological ordering (after ignoring backedges.) This isn't perfect but it's a lot better and makes the resulting output at least somewhat more human-readable Reviewed By: swtaarrs Differential Revision: D5356304 fbshipit-source-id: efb803628192cec420fe58e01b8041daf4a8aea0 --- hphp/php7/bytecode.h | 12 ++++++-- hphp/php7/compiler.cpp | 8 ++--- hphp/php7/compiler.h | 10 ++++++ hphp/php7/hhas.cpp | 76 +++++++++++++++++++++++++++++++++++++++------ hphp/php7/unit.cpp | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ hphp/php7/unit.h | 42 +++++++++++++++++++++++-- 6 files changed, 215 insertions(+), 17 deletions(-) diff --git a/hphp/php7/bytecode.h b/hphp/php7/bytecode.h index c1aad171c26..5f411c7e41d 100644 --- a/hphp/php7/bytecode.h +++ b/hphp/php7/bytecode.h @@ -19,15 +19,23 @@ #include "hphp/runtime/vm/hhbc.h" +#include +#include + namespace HPHP { namespace php7 { struct Block; namespace bc { +struct StringOffsetVector { + std::unordered_map branches; + Block* defaultBranch{nullptr}; +}; + // void* immediate types just aren't being used right now -#define IMM_TYPE_BLA void* -#define IMM_TYPE_SLA void* +#define IMM_TYPE_BLA std::vector +#define IMM_TYPE_SLA StringOffsetVector #define IMM_TYPE_ILA void* #define IMM_TYPE_IVA void* #define IMM_TYPE_I64A int64_t diff --git a/hphp/php7/compiler.cpp b/hphp/php7/compiler.cpp index a3ffabaa9d1..c04eda6227e 100644 --- a/hphp/php7/compiler.cpp +++ b/hphp/php7/compiler.cpp @@ -58,7 +58,7 @@ void Compiler::compileProgram(const zend_ast* ast) { if (activeBlock) { activeBlock->emit(Int{-1}); - activeBlock->emit(RetC{}); + activeBlock->exit(RetC{}); } } @@ -240,15 +240,15 @@ void Compiler::compileIf(const zend_ast* ast) { activeBlock = branch; compileStatement(contents); - activeBlock->emit(Jmp{end}); + activeBlock->exit(Jmp{end}); activeBlock = mainBlock; compileExpression(condition); - activeBlock->emit(JmpNZ{branch}); + branchTo(branch); } } - activeBlock->emit(Jmp{end}); + activeBlock->exit(Jmp{end}); activeBlock = end; } diff --git a/hphp/php7/compiler.h b/hphp/php7/compiler.h index ba66ec88f20..3f116431930 100644 --- a/hphp/php7/compiler.h +++ b/hphp/php7/compiler.h @@ -56,6 +56,16 @@ struct Compiler { [[noreturn]] void panic(const std::string& msg); + template + void branchTo(Block* target) { + auto continuation = activeFunction->allocateBlock(); + + activeBlock->exit(Branch{target}); + activeBlock->exit(bc::Jmp{continuation}); + + activeBlock = continuation; + } + std::unique_ptr unit; Function* activeFunction; diff --git a/hphp/php7/hhas.cpp b/hphp/php7/hhas.cpp index 7c2d5b2339b..1fd7e1d60e9 100644 --- a/hphp/php7/hhas.cpp +++ b/hphp/php7/hhas.cpp @@ -23,7 +23,7 @@ namespace HPHP { namespace php7 { namespace { std::string dump_pseudomain(const Function& func); - std::string dump_block(const Block& func); + std::string dump_blocks(std::vector blocks); } // namespace std::string dump_asm(const Unit& unit) { @@ -41,6 +41,7 @@ struct InstrVisitor { template void bytecode(const Bytecode& bc) { + out.append(" "); out.append(Bytecode::name()); bc.visit_imms(*this); out.append("\n"); @@ -75,24 +76,81 @@ struct InstrVisitor { std::string& out; }; +// This is just a visitor for instructions and exits that will omit a jump +// (Jmp, JmpNS) iff the block that is the jump target follows immediately after +// the jump instruction +struct CFGVisitor : public boost::static_visitor { + explicit CFGVisitor(std::string& out) + : out(out) + , instr(out) + {} + + void beginBlock(Block* blk) { + // if there was an unconditional jump and its target was *not* this block + // actually emit the instruction + if (nextUnconditionalDestination + && nextUnconditionalDestination != blk) { + bytecode(bc::Jmp{nextUnconditionalDestination}); + } + nextUnconditionalDestination = nullptr; + folly::format(&out, "L{}:\n", blk->id); + } + + void end() { + if (nextUnconditionalDestination) { + instr.bytecode(bc::Jmp{nextUnconditionalDestination}); + } + } + + void operator()(const bc::Jmp& j) { + nextUnconditionalDestination = j.imm1; + } + + void operator()(const bc::JmpNS& j) { + nextUnconditionalDestination = j.imm1; + } + + template + void operator()(const Exit& e) { + bytecode(e); + } + + void bytecode(const Bytecode& bc) { + bc.visit(instr); + } + + void exit(const Block::ExitOp& exit) { + boost::apply_visitor(*this, exit); + } + + std::string& out; + InstrVisitor instr; + Block* nextUnconditionalDestination{nullptr}; +}; + std::string dump_pseudomain(const Function& func) { std::string out; out.append(".main {\n"); - for (const auto& blk : func.blocks) { - out.append(dump_block(*blk)); - } + out.append(dump_blocks(serializeControlFlowGraph(func.entry))); out.append("}"); return out; } -std::string dump_block(const Block& blk) { +std::string dump_blocks(std::vector blocks) { std::string out; - folly::format(&out, "L{}:\n", blk.id); - for (const auto& bc : blk.code) { - out.append(" "); - bc.visit(InstrVisitor(out)); + CFGVisitor visitor(out); + + for (auto blk : blocks) { + visitor.beginBlock(blk); + for (const auto& bc : blk->code) { + bc.visit(visitor); + } + for (const auto& exit : blk->exits) { + visitor.exit(exit); + } } + visitor.end(); return out; } diff --git a/hphp/php7/unit.cpp b/hphp/php7/unit.cpp index 246e1e02d42..f739ba2427b 100644 --- a/hphp/php7/unit.cpp +++ b/hphp/php7/unit.cpp @@ -16,6 +16,11 @@ #include "hphp/php7/unit.h" +#include "hphp/util/match.h" + +#include +#include + namespace HPHP { namespace php7 { Function::Function(Unit* parent, const std::string& name) @@ -44,4 +49,83 @@ Block* Function::getBlock(uint64_t id) { return blocks[id].get(); } +namespace { + +std::vector exitTargets(const Block::ExitOp& exit) { + using namespace bc; + std::vector targets; + + match(exit, + [&](const Jmp& j) { targets.push_back(j.imm1); }, + [&](const JmpNS& j) { targets.push_back(j.imm1); }, + [&](const JmpZ& j) { targets.push_back(j.imm1); }, + [&](const JmpNZ& j) { targets.push_back(j.imm1); }, + [&](const Switch& s) { + targets.insert(targets.end(), s.imm3.begin(), s.imm3.end()); + }, + [&](const SSwitch& s) { + const StringOffsetVector& sov = s.imm1; + for (const auto& pair : sov.branches) { + targets.push_back(pair.second); + } + targets.push_back(sov.defaultBranch); + }, + [&](const RetC& r) {}, + [&](const RetV& r) {}, + [&](const Unwind& u) {}, + [&](const Throw& t) {}); + + return targets; +} + +} // namespace + +std::vector serializeControlFlowGraph(Block* entry) { + // traverse the block graph in a DFS and build a reverse postorder + std::vector ordering; + + std::unordered_set visited; + std::unordered_set added; + + std::stack breadcrumbs; + added.insert(entry); + breadcrumbs.push(entry); + + // every node is visited twice, once to add its children to the stack, then + // (after its children) to add it to the ordering + while (!breadcrumbs.empty()) { + // we *don't* pop here, since we want to visit a node again, after all its + // children are fully processed, to add it to the ordering + auto blk = breadcrumbs.top(); + + if (0 != visited.count(blk)) { + // we've seen this node before so it's either a leaf or all its + // dependncies are added already + breadcrumbs.pop(); + ordering.push_back(blk); + continue; + } + + const auto& exits = blk->exits; + // iterate *backwards* to preserve the order of the exits + for (auto iter = exits.rbegin(); iter != exits.rend(); iter++) { + auto exit = *iter; + for (Block* child : exitTargets(exit)) { + // if this edge is not a back edge (or a self edge) + if (0 == added.count(child)) { + breadcrumbs.push(child); + added.insert(child); + } + } + } + + visited.insert(blk); + } + + // reverse the ordering to get an actual topological order + std::reverse(ordering.begin(), ordering.end()); + + return ordering; +} + }} // HPHP::php7 diff --git a/hphp/php7/unit.h b/hphp/php7/unit.h index 7bad7069d38..d1438889fe3 100644 --- a/hphp/php7/unit.h +++ b/hphp/php7/unit.h @@ -20,6 +20,8 @@ #include "hphp/php7/bytecode.h" #include "hphp/runtime/base/attr.h" +#include + #include #include @@ -30,19 +32,55 @@ struct Function; struct Unit; struct Block { + // these are the last instructions in the block, they must be jumps or leave + // the current function i.e. only these instructions: + using ExitOp = boost::variant< + bc::Jmp, + bc::JmpNS, + bc::JmpZ, + bc::JmpNZ, + bc::Switch, + bc::SSwitch, + bc::RetC, + bc::RetV, + bc::Unwind, + bc::Throw + >; + + void emit(bc::Jmp&&) = delete; + void emit(bc::JmpNS&&) = delete; + void emit(bc::JmpZ&&) = delete; + void emit(bc::JmpNZ&&) = delete; + void emit(bc::Switch&&) = delete; + void emit(bc::SSwitch&&) = delete; + void emit(bc::RetC&&) = delete; + void emit(bc::RetV&&) = delete; + void emit(bc::Unwind&&) = delete; + void emit(bc::Throw&&) = delete; + void emit(ExitOp&& op) = delete; + void emit(Bytecode&& bc) { + assert(!exited); code.push_back(std::move(bc)); } + void exit(ExitOp&& op) { + exited = true; + exits.push_back(std::move(op)); + } + // identifies this block in its unit uint64_t id; // code associated with this block std::vector code; - - Block* fallthrough = nullptr; + std::vector exits; + bool exited{false}; }; +// get the series of block pointers in the control graph that starts at `entry` +std::vector serializeControlFlowGraph(Block* entry); + struct Function { explicit Function(Unit* parent, const std::string& name); -- 2.11.4.GIT