Reland D23318594 and D23318592 add recordbasenativesp instr
[hiphop-php.git] / hphp / runtime / vm / cti.cpp
blobfbf5e7415dfa23c75318672abf376675e0a08fa8
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/cti.h"
18 #include "hphp/util/asm-x64.h"
19 #include "hphp/runtime/base/rds-header.h"
20 #include "hphp/runtime/vm/verifier/cfg.h"
22 namespace HPHP {
23 TRACE_SET_MOD(cti);
24 using jit::X64Assembler;
25 using jit::TCA;
26 using Verifier::funcInstrs;
27 using namespace jit::reg;
29 EntryStub g_enterCti;
30 CodeAddress g_exitCti;
32 namespace {
34 std::mutex g_mutex;
35 size_t bc_total;
36 thread_local std::vector<PcPair> tl_cti_cache;
37 __thread size_t cc_lookups, cc_misses;
39 // Number of bytes of x86 code for each opcode, used for interpolating
40 // the x86 pc for an unknown vmpc, and function size estimation.
41 // Must be the same for all instances of that opcode.
42 uint8_t g_cti_sizes[Op_count];
44 struct PatchTable {
45 explicit PatchTable(Func* func, PC unitpc, TCA cti_base)
46 : m_func(func), m_unitpc(unitpc), m_ctibase(cti_base)
48 void addPC(PC pc, CodeAddress cti) {
49 m_ctimap[pc - m_unitpc] = cti - m_ctibase;
51 void addPatch(PC pc, CodeAddress next_ip) {
52 auto unit_pc = m_func->unit()->entry();
53 auto targets = instrJumpTargets(unit_pc, pc - unit_pc);
54 assert(targets.size() == 1);
55 auto target_pc = unit_pc + targets[0];
56 m_patches.push_back({next_ip, target_pc});
58 void finish(uint32_t size) {
59 for (auto p : m_patches) {
60 assert(checkPc(p.pc));
61 auto cti = m_ctibase + m_ctimap[p.pc - m_unitpc];
62 ((int32_t*)p.ip)[-1] = cti - p.ip;
64 auto& params = m_func->params();
65 auto& bytecode = cti_code();
66 for (int i = 0, n = params.size(); i < n; ++i) {
67 if (params[i].hasDefaultValue()) {
68 assert(checkPc(m_unitpc + params[i].funcletOff));
69 auto cti = m_ctibase + m_ctimap[params[i].funcletOff];
70 m_func->setCtiFunclet(i, cti - bytecode.base());
73 m_func->setCtiEntry(m_ctibase - bytecode.base(), size);
75 private:
76 bool checkPc(PC pc) const {
77 return m_ctimap.count(pc - m_unitpc) != 0;
79 private:
80 boost::container::flat_map<Offset,uint32_t> m_ctimap;
81 Func* m_func;
82 const PC m_unitpc;
83 const TCA m_ctibase;
84 std::vector<PcPair> m_patches;
87 TCA lookup_miss(const Func* func, Offset cti_entry, size_t h, PC misspc) {
88 cc_misses++;
89 auto cti = cti_code().base() + cti_entry;
90 for (auto instrs = funcInstrs(func); !instrs.empty();) {
91 auto pc = instrs.popFront();
92 if (pc == misspc) {
93 tl_cti_cache[h] = {cti, misspc};
94 return cti;
96 cti += g_cti_sizes[(int)peek_op(pc)];
98 not_reached();
101 // calculate cti code size for func, or return 0 if we can't predict it yet.
102 size_t compute_size(const Func* func) {
103 size_t size = 0;
104 for (auto instrs = funcInstrs(func); !instrs.empty();) {
105 auto pc = instrs.popFront();
106 auto op = peek_op(pc);
107 if (!g_cti_sizes[(int)op]) return 0;
108 size += g_cti_sizes[(int)op];
110 return size;
113 inline bool isNop(Op opcode) {
114 if (!RuntimeOption::RepoAuthoritative) return false;
115 return opcode == OpNop ||
116 opcode == OpEntryNop ||
117 opcode == OpCGetCUNop ||
118 opcode == OpUGetCUNop ||
119 (!debug && isTypeAssert(opcode)) ||
120 opcode == OpBreakTraceHint;
123 auto const pc_arg = rdx; // passed & returned by all opcodes
124 auto const next_ip = rax; // returned by opcodes with unpredictable targets
125 auto const next_reg = rbx; // for conditional branches
126 auto const nextpc_arg = rdi; // for predicted calls
127 auto const tl_reg = r12;
128 auto const modes_reg = r13d;
129 auto const ra_reg = r14;
133 Offset compile_cti(Func* func, PC unitpc) {
134 std::lock_guard<std::mutex> lock(g_mutex);
135 auto cti_entry = func->ctiEntry();
136 if (cti_entry) return cti_entry; // we lost the compile race
137 auto cti_size = compute_size(func);
138 auto mem = cti_size ? (TCA) cti_code().allocInner(cti_size) :
139 nullptr;
140 folly::Optional<CodeBlock> inner_block;
141 if (mem) {
142 inner_block.emplace();
143 inner_block->init(mem, cti_size, "");
145 auto cti_table = RuntimeOption::RepoAuthoritative ? cti_ops : ctid_ops;
146 X64Assembler a{mem ? *inner_block : cti_code()};
147 auto cti_base = a.frontier();
148 PatchTable patches(func, unitpc, cti_base);
149 for (auto instrs = funcInstrs(func); !instrs.empty(); ) {
150 auto pc = instrs.popFront();
151 auto ip = a.frontier();
152 patches.addPC(pc, ip);
153 auto op = peek_op(pc);
154 auto cti = cti_table[(int)op];
155 if (isNop(op)) {
156 // compile op as just one instruction: addq size(op), vmpc
157 auto bc_size = instrLen(pc);
158 if (bc_size == 1) {
159 a.incq (pc_arg);
160 } else {
161 a.addq (bc_size, pc_arg);
163 if (debug) {
164 a.storeq(pc_arg, r12[rds::kVmpcOff]);
166 } else if (isSimple(op) || isThrow(op)) {
167 a. call (cti);
168 } else if (isBranch(op)) {
169 a. lea (pc_arg[instrLen(pc)], next_reg);
170 a. call (cti);
171 a. cmpq (pc_arg, next_reg);
172 a. jne (cti_base);
173 patches.addPatch(pc, a.frontier());
174 } else if (isUnconditionalJmp(op)) {
175 a. call (cti);
176 a. jmp (cti_base);
177 patches.addPatch(pc, a.frontier());
178 } else {
179 // these ops jump to unpredictable targets by setting pc, and some
180 // can halt by returning g_haltBytecode or g_pauseBytecode in rax.
181 if (isFCall(op)) {
182 // It's cheaper to pass next_pc as an arg than to calculate it in the
183 // stubs that need it, since some of these opcodes have IVA params
184 // which vary in size by callsite.
186 // For FCall, instrLen(pc) can use either 1 or 4 byte encoding,
187 // triggering the g_cti_sizes assert below. Force 4-byte encoding.
188 a.lea (pc_arg[1000], nextpc_arg);
189 ((int32_t*)a.frontier())[-1] = instrLen(pc);
191 a. call (cti);
192 DEBUG_ONLY auto after = a.frontier();
193 a. jmp (next_ip);
194 assert(a.frontier() - after == kCtiIndirectJmpSize);
196 auto size = a.frontier() - ip;
197 if (!g_cti_sizes[(int)op]) {
198 g_cti_sizes[(int)op] = size;
199 } else {
200 assert(size == g_cti_sizes[(int)op]);
203 if (!cti_size) {
204 cti_size = a.frontier() - cti_base;
205 } else {
206 assert(cti_size == a.frontier() - cti_base); // check calculate_size()
208 // patch jumps, update func with code addresses.
209 patches.finish(cti_size);
210 bc_total += func->past() - func->base();
211 TRACE(1, "cti %s entry %p size %d %lu total %lu %lu\n",
212 func->fullName()->size() > 0 ? func->fullName()->data() : "\"\"",
213 func->entry(),
214 func->past() - func->base(), cti_size,
215 bc_total, a.used());
216 TRACE(2, "cti lookups %lu misses %lu\n", cc_lookups, cc_misses);
217 return cti_base - cti_code().base();
220 // Return the cti ip for the given hhbc pc in func.
221 TCA lookup_cti(const Func* func, Offset cti_entry, PC unitpc, PC pc) {
222 assert(pc && unitpc);
223 if (tl_cti_cache.empty()) {
224 tl_cti_cache.resize(jit::CodeCache::ABytecodeSize >> 10);
225 always_assert(tl_cti_cache.size() > 0 &&
226 (tl_cti_cache.size() & (tl_cti_cache.size()-1)) == 0);
228 cc_lookups++;
229 auto h = hash_int64(int64_t(pc)) & (tl_cti_cache.size() - 1);
230 auto& pcp = tl_cti_cache[h];
231 if (pcp.pc == pc) return pcp.ip;
232 return lookup_miss(func, cti_entry, h, pc);
235 void free_cti(Offset cti_entry, uint32_t cti_size) {
236 auto& bytecode = cti_code();
237 auto ctibase = bytecode.base() + cti_entry;
238 bytecode.free(ctibase, cti_size);
241 void compile_cti_stubs() {
242 auto& bc_section = cti_code();
243 X64Assembler a{bc_section};
245 // pc is passed/returned in rdx, but we don't access it here
246 // g_enterCti(modes, {ip, pc}, rds::Header*)
247 // edi rsi rdx rcx r8, r9 unused
248 g_enterCti = (EntryStub) a.frontier();
249 a.push (rbp);
250 a.movq (rsp, rbp);
251 a.movq (rcx, tl_reg);
252 a.movl (edi, modes_reg);
253 a.lea (rbp[-8], ra_reg);
254 a.jmp (rsi);
255 a.ud2 (); // cpu hint that we didn't indirectly jump to here.
257 // bytecode jumps back here to stop the interpreter, because either:
258 // 1. a canHalt() instruction set pc=0 (exiting this level of vm nesting)
259 // 2. a control-flow instruction was executed in dispatchBB() mode.
260 g_exitCti = a.frontier();
261 a.movq (rdx, rax); // move retAddr from halt path to rax
262 a.pop (rbp);
263 a.ret ();