hphp/runtime/vm/cti.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16
  17 #include "hphp/runtime/vm/cti.h"
  18 #include "hphp/util/asm-x64.h"
  19 #include "hphp/runtime/base/rds-header.h"
  20 #include "hphp/runtime/vm/verifier/cfg.h"
  21
  22 namespace HPHP {
  23 TRACE_SET_MOD(cti);
  24 using jit::X64Assembler;
  25 using jit::TCA;
  26 using Verifier::funcInstrs;
  27 using namespace jit::reg;
  28
  29 EntryStub g_enterCti;
  30 CodeAddress g_exitCti;
  31
  32 namespace {
  33
  34 std::mutex g_mutex;
  35 size_t bc_total;
  36 thread_local std::vector<PcPair> tl_cti_cache;
  37 __thread size_t cc_lookups, cc_misses;
  38
  39 // Number of bytes of x86 code for each opcode, used for interpolating
  40 // the x86 pc for an unknown vmpc, and function size estimation.
  41 // Must be the same for all instances of that opcode.
  42 uint8_t g_cti_sizes[Op_count];
  43
  44 struct PatchTable {
  45   explicit PatchTable(Func* func, PC unitpc, TCA cti_base)
  46     : m_func(func), m_unitpc(unitpc), m_ctibase(cti_base)
  47   {}
  48   void addPC(PC pc, CodeAddress cti) {
  49     m_ctimap[pc - m_unitpc] = cti - m_ctibase;
  50   }
  51   void addPatch(PC pc, CodeAddress next_ip) {
  52     auto unit_pc = m_func->unit()->entry();
  53     auto targets = instrJumpTargets(unit_pc, pc - unit_pc);
  54     assert(targets.size() == 1);
  55     auto target_pc = unit_pc + targets[0];
  56     m_patches.push_back({next_ip, target_pc});
  57   }
  58   void finish(uint32_t size) {
  59     for (auto p : m_patches) {
  60       assert(checkPc(p.pc));
  61       auto cti = m_ctibase + m_ctimap[p.pc - m_unitpc];
  62       ((int32_t*)p.ip)[-1] = cti - p.ip;
  63     }
  64     auto& params = m_func->params();
  65     auto& bytecode = cti_code();
  66     for (int i = 0, n = params.size(); i < n; ++i) {
  67       if (params[i].hasDefaultValue()) {
  68         assert(checkPc(m_unitpc + params[i].funcletOff));
  69         auto cti = m_ctibase + m_ctimap[params[i].funcletOff];
  70         m_func->setCtiFunclet(i, cti - bytecode.base());
  71       }
  72     }
  73     m_func->setCtiEntry(m_ctibase - bytecode.base(), size);
  74   }
  75 private:
  76   bool checkPc(PC pc) const {
  77     return m_ctimap.count(pc - m_unitpc) != 0;
  78   }
  79 private:
  80   boost::container::flat_map<Offset,uint32_t> m_ctimap;
  81   Func* m_func;
  82   const PC m_unitpc;
  83   const TCA m_ctibase;
  84   std::vector<PcPair> m_patches;
  85 };
  86
  87 TCA lookup_miss(const Func* func, Offset cti_entry, size_t h, PC misspc) {
  88   cc_misses++;
  89   auto cti = cti_code().base() + cti_entry;
  90   for (auto instrs = funcInstrs(func); !instrs.empty();) {
  91     auto pc = instrs.popFront();
  92     if (pc == misspc) {
  93       tl_cti_cache[h] = {cti, misspc};
  94       return cti;
  95     }
  96     cti += g_cti_sizes[(int)peek_op(pc)];
  97   }
  98   not_reached();
  99 }
 100
 101 // calculate cti code size for func, or return 0 if we can't predict it yet.
 102 size_t compute_size(const Func* func) {
 103   size_t size = 0;
 104   for (auto instrs = funcInstrs(func); !instrs.empty();) {
 105     auto pc = instrs.popFront();
 106     auto op = peek_op(pc);
 107     if (!g_cti_sizes[(int)op]) return 0;
 108     size += g_cti_sizes[(int)op];
 109   }
 110   return size;
 111 }
 112
 113 inline bool isNop(Op opcode) {
 114   if (!RuntimeOption::RepoAuthoritative) return false;
 115   return opcode == OpNop ||
 116          opcode == OpEntryNop ||
 117          opcode == OpCGetCUNop ||
 118          opcode == OpUGetCUNop ||
 119          (!debug && isTypeAssert(opcode)) ||
 120          opcode == OpBreakTraceHint;
 121 }
 122
 123 auto const pc_arg   = rdx;  // passed & returned by all opcodes
 124 auto const next_ip  = rax;  // returned by opcodes with unpredictable targets
 125 auto const next_reg = rbx;  // for conditional branches
 126 auto const nextpc_arg = rdi; // for predicted calls
 127 auto const tl_reg = r12;
 128 auto const modes_reg = r13d;
 129 auto const ra_reg = r14;
 130
 131 }
 132
 133 Offset compile_cti(Func* func, PC unitpc) {
 134   std::lock_guard<std::mutex> lock(g_mutex);
 135   auto cti_entry = func->ctiEntry();
 136   if (cti_entry) return cti_entry; // we lost the compile race
 137   auto cti_size = compute_size(func);
 138   auto mem = cti_size ? (TCA) cti_code().allocInner(cti_size) :
 139              nullptr;
 140   folly::Optional<CodeBlock> inner_block;
 141   if (mem) {
 142     inner_block.emplace();
 143     inner_block->init(mem, cti_size, "");
 144   }
 145   auto cti_table = RuntimeOption::RepoAuthoritative ? cti_ops : ctid_ops;
 146   X64Assembler a{mem ? *inner_block : cti_code()};
 147   auto cti_base = a.frontier();
 148   PatchTable patches(func, unitpc, cti_base);
 149   for (auto instrs = funcInstrs(func); !instrs.empty(); ) {
 150     auto pc = instrs.popFront();
 151     auto ip = a.frontier();
 152     patches.addPC(pc, ip);
 153     auto op = peek_op(pc);
 154     auto cti = cti_table[(int)op];
 155     if (isNop(op)) {
 156       // compile op as just one instruction: addq size(op), vmpc
 157       auto bc_size = instrLen(pc);
 158       if (bc_size == 1) {
 159         a.incq  (pc_arg);
 160       } else {
 161         a.addq  (bc_size, pc_arg);
 162       }
 163       if (debug) {
 164         a.storeq(pc_arg, r12[rds::kVmpcOff]);
 165       }
 166     } else if (isSimple(op) || isThrow(op)) {
 167       a.  call  (cti);
 168     } else if (isBranch(op)) {
 169       a.  lea   (pc_arg[instrLen(pc)], next_reg);
 170       a.  call  (cti);
 171       a.  cmpq  (pc_arg, next_reg);
 172       a.  jne   (cti_base);
 173       patches.addPatch(pc, a.frontier());
 174     } else if (isUnconditionalJmp(op)) {
 175       a.  call  (cti);
 176       a.  jmp   (cti_base);
 177       patches.addPatch(pc, a.frontier());
 178     } else {
 179       // these ops jump to unpredictable targets by setting pc, and some
 180       // can halt by returning g_haltBytecode or g_pauseBytecode in rax.
 181       if (isFCall(op)) {
 182         // It's cheaper to pass next_pc as an arg than to calculate it in the
 183         // stubs that need it, since some of these opcodes have IVA params
 184         // which vary in size by callsite.
 185         //
 186         // For FCall, instrLen(pc) can use either 1 or 4 byte encoding,
 187         // triggering the g_cti_sizes assert below. Force 4-byte encoding.
 188         a.lea   (pc_arg[1000], nextpc_arg);
 189         ((int32_t*)a.frontier())[-1] = instrLen(pc);
 190       }
 191       a.  call  (cti);
 192       DEBUG_ONLY auto after = a.frontier();
 193       a.  jmp   (next_ip);
 194       assert(a.frontier() - after == kCtiIndirectJmpSize);
 195     }
 196     auto size = a.frontier() - ip;
 197     if (!g_cti_sizes[(int)op]) {
 198       g_cti_sizes[(int)op] = size;
 199     } else {
 200       assert(size == g_cti_sizes[(int)op]);
 201     }
 202   }
 203   if (!cti_size) {
 204     cti_size = a.frontier() - cti_base;
 205   } else {
 206     assert(cti_size == a.frontier() - cti_base); // check calculate_size()
 207   }
 208   // patch jumps, update func with code addresses.
 209   patches.finish(cti_size);
 210   bc_total += func->past() - func->base();
 211   TRACE(1, "cti %s entry %p size %d %lu total %lu %lu\n",
 212         func->fullName()->size() > 0 ? func->fullName()->data() : "\"\"",
 213         func->entry(),
 214         func->past() - func->base(), cti_size,
 215         bc_total, a.used());
 216   TRACE(2, "cti lookups %lu misses %lu\n", cc_lookups, cc_misses);
 217   return cti_base - cti_code().base();
 218 }
 219
 220 // Return the cti ip for the given hhbc pc in func.
 221 TCA lookup_cti(const Func* func, Offset cti_entry, PC unitpc, PC pc) {
 222   assert(pc && unitpc);
 223   if (tl_cti_cache.empty()) {
 224     tl_cti_cache.resize(jit::CodeCache::ABytecodeSize >> 10);
 225     always_assert(tl_cti_cache.size() > 0 &&
 226                   (tl_cti_cache.size() & (tl_cti_cache.size()-1)) == 0);
 227   }
 228   cc_lookups++;
 229   auto h = hash_int64(int64_t(pc)) & (tl_cti_cache.size() - 1);
 230   auto& pcp = tl_cti_cache[h];
 231   if (pcp.pc == pc) return pcp.ip;
 232   return lookup_miss(func, cti_entry, h, pc);
 233 }
 234
 235 void free_cti(Offset cti_entry, uint32_t cti_size) {
 236   auto& bytecode = cti_code();
 237   auto ctibase = bytecode.base() + cti_entry;
 238   bytecode.free(ctibase, cti_size);
 239 }
 240
 241 void compile_cti_stubs() {
 242   auto& bc_section = cti_code();
 243   X64Assembler a{bc_section};
 244
 245   // pc is passed/returned in rdx, but we don't access it here
 246   // g_enterCti(modes, {ip, pc}, rds::Header*)
 247   //            edi    rsi  rdx  rcx           r8, r9 unused
 248   g_enterCti = (EntryStub) a.frontier();
 249   a.push  (rbp);
 250   a.movq  (rsp, rbp);
 251   a.movq  (rcx, tl_reg);
 252   a.movl  (edi, modes_reg);
 253   a.lea   (rbp[-8], ra_reg);
 254   a.jmp   (rsi);
 255   a.ud2   (); // cpu hint that we didn't indirectly jump to here.
 256
 257   // bytecode jumps back here to stop the interpreter, because either:
 258   // 1. a canHalt() instruction set pc=0 (exiting this level of vm nesting)
 259   // 2. a control-flow instruction was executed in dispatchBB() mode.
 260   g_exitCti = a.frontier();
 261   a.movq  (rdx, rax); // move retAddr from halt path to rax
 262   a.pop   (rbp);
 263   a.ret   ();
 264 }
 265
 266 }