Do not record inline stacks in nothrow
[hiphop-php.git] / hphp / runtime / vm / jit / vasm-x64.cpp
blob905c583ca42f422ef8fe9d4e906ec68805cb129d
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm-emit.h"
19 #include "hphp/runtime/base/runtime-option.h"
20 #include "hphp/runtime/base/tracing.h"
22 #include "hphp/runtime/vm/jit/abi-x64.h"
23 #include "hphp/runtime/vm/jit/block.h"
24 #include "hphp/runtime/vm/jit/code-gen-helpers.h"
25 #include "hphp/runtime/vm/jit/print.h"
26 #include "hphp/runtime/vm/jit/prof-data.h"
27 #include "hphp/runtime/vm/jit/service-requests.h"
28 #include "hphp/runtime/vm/jit/smashable-instr-x64.h"
29 #include "hphp/runtime/vm/jit/target-cache.h"
30 #include "hphp/runtime/vm/jit/timer.h"
31 #include "hphp/runtime/vm/jit/vasm.h"
32 #include "hphp/runtime/vm/jit/vasm-block-counters.h"
33 #include "hphp/runtime/vm/jit/vasm-instr.h"
34 #include "hphp/runtime/vm/jit/vasm-internal.h"
35 #include "hphp/runtime/vm/jit/vasm-lower.h"
36 #include "hphp/runtime/vm/jit/vasm-print.h"
37 #include "hphp/runtime/vm/jit/vasm-prof.h"
38 #include "hphp/runtime/vm/jit/vasm-unit.h"
39 #include "hphp/runtime/vm/jit/vasm-util.h"
40 #include "hphp/runtime/vm/jit/vasm-visit.h"
42 #include <algorithm>
43 #include <tuple>
45 TRACE_SET_MOD(vasm);
47 namespace HPHP { namespace jit {
48 ///////////////////////////////////////////////////////////////////////////////
50 using namespace reg;
51 using namespace x64;
53 namespace x64 { struct ImmFolder; }
55 namespace {
56 ///////////////////////////////////////////////////////////////////////////////
58 static_assert(folly::kIsLittleEndian,
59 "Code contains little-endian specific optimizations.");
61 template<class X64Asm>
62 struct Vgen {
63 explicit Vgen(Venv& env)
64 : env(env)
65 , a(*env.cb)
66 , current(env.current)
67 , next(env.next)
68 , jmps(env.jmps)
69 , jccs(env.jccs)
70 , catches(env.catches)
73 static void emitVeneers(Venv& env) {}
74 static void handleLiterals(Venv& env) {}
75 static void retargetBinds(Venv& env);
76 static void patch(Venv& env);
77 static void pad(CodeBlock& cb);
79 /////////////////////////////////////////////////////////////////////////////
81 template<class Inst> void emit(const Inst& i) {
82 always_assert_flog(false, "unimplemented instruction: {} in B{}\n",
83 vinst_names[Vinstr(i).op], size_t(current));
86 // intrinsics
87 void emit(const prefetch& i) { a.prefetch(i.m.mr()); }
88 void emit(const copy& i);
89 void emit(const copy2& i);
90 void emit(const debugtrap& /*i*/) { a.int3(); }
91 void emit(const fallthru&);
92 void emit(const ldimmb& i);
93 void emit(const ldimml& i);
94 void emit(const ldimmq& i);
95 void emit(const ldundefq& /*i*/) {}
96 void emit(const load& i);
97 void emit(const store& i);
98 void emit(const mcprep& i);
100 // native function abi
101 void emit(const call& i);
102 void emit(const callm& i) { a.prefix(i.target.mr()).call(i.target); }
103 void emit(const callr& i) { a.call(i.target); }
104 void emit(const calls& i);
105 void emit(const ret& /*i*/) { a.ret(); }
107 // stub function abi
108 void emit(const stubret& i);
109 void emit(const callstub& i);
110 void emit(const callfaststub& i);
111 void emit(const tailcallstub& i);
112 void emit(const tailcallstubr& i);
114 // php function abi
115 void emit(const callphp& i) {
116 emit(call{i.target, i.args});
117 setCallFuncId(env, a.frontier());
119 void emit(const callphpr& i) {
120 emit(callr{i.target, i.args});
121 setCallFuncId(env, a.frontier());
123 void emit(const phpret& i);
124 void emit(const contenter& i);
126 // vm entry abi
127 void emit(const inittc& /*i*/) {}
128 void emit(const leavetc&) { a.ret(); }
130 // exceptions
131 void emit(const landingpad& /*i*/) {}
132 void emit(const nothrow& i);
133 void emit(const syncpoint& i);
134 void emit(const unwind& i);
136 // instructions
137 void emit(absdbl i) { unary(i); a.psllq(1, i.d); a.psrlq(1, i.d); }
138 void emit(andb i) { commuteSF(i); a.andb(i.s0, i.d); }
139 void emit(andbi i) { binary(i); a.andb(i.s0, i.d); }
140 void emit(const andbim& i) { a.prefix(i.m.mr()).andb(i.s, i.m); }
141 void emit(andw i) { commuteSF(i); a.andw(i.s0, i.d); }
142 void emit(andwi i) { binary(i); a.andw(i.s0, i.d); }
143 void emit(andl i) { commuteSF(i); a.andl(i.s0, i.d); }
144 void emit(andli i) { binary(i); a.andl(i.s0, i.d); }
145 void emit(andq i) { commuteSF(i); a.andq(i.s0, i.d); }
146 void emit(andqi i);
147 void emit(const addwm& i) { a.prefix(i.m.mr()).addw(i.s0, i.m); }
148 void emit(addl i) { commuteSF(i); a.addl(i.s0, i.d); }
149 void emit(addli i) { binary(i); a.addl(i.s0, i.d); }
150 void emit(const addlm& i) { a.prefix(i.m.mr()).addl(i.s0, i.m); }
151 void emit(const addlim& i);
152 void emit(addq i) { commuteSF(i); a.addq(i.s0, i.d); }
153 void emit(addqi i) { binary(i); a.addq(i.s0, i.d); }
154 void emit(const addqmr& i);
155 void emit(const addqrm& i);
156 void emit(const addqim& i);
157 void emit(addsd i) { commute(i); a.addsd(i.s0, i.d); }
158 void emit(const btrq& i) { binary(i); a.btrq(i.s0, i.d); }
159 void emit(const cloadq& i);
160 template<class cmov> void emit_cmov(const cmov& i);
161 void emit(const cmovb& i) { emit_cmov(i); }
162 void emit(const cmovw& i) { emit_cmov(i); }
163 void emit(const cmovl& i) { emit_cmov(i); }
164 void emit(const cmovq& i) { emit_cmov(i); }
165 void emit(const cmpb& i) { a.cmpb(i.s0, i.s1); }
166 void emit(const cmpbi& i) { a.cmpb(i.s0, i.s1); }
167 void emit(const cmpbim& i) { a.prefix(i.s1.mr()).cmpb(i.s0, i.s1); }
168 void emit(const cmpbm& i) { a.prefix(i.s1.mr()).cmpb(i.s0, i.s1); }
169 void emit(const cmpw& i) { a.cmpw(i.s0, i.s1); }
170 void emit(const cmpwi& i) { a.cmpw(i.s0, i.s1); }
171 void emit(const cmpwim& i) { a.prefix(i.s1.mr()).cmpw(i.s0, i.s1); }
172 void emit(const cmpwm& i) { a.prefix(i.s1.mr()).cmpw(i.s0, i.s1); }
173 void emit(const cmpl& i) { a.cmpl(i.s0, i.s1); }
174 void emit(const cmpli& i) { a.cmpl(i.s0, i.s1); }
175 void emit(const cmplim& i) { a.prefix(i.s1.mr()).cmpl(i.s0, i.s1); }
176 void emit(const cmplm& i) { a.prefix(i.s1.mr()).cmpl(i.s0, i.s1); }
177 void emit(const cmpq& i) { a.cmpq(i.s0, i.s1); }
178 void emit(const cmpqi& i) { a.cmpq(i.s0, i.s1); }
179 void emit(const cmpqim& i) { a.prefix(i.s1.mr()).cmpq(i.s0, i.s1); }
180 void emit(const cmpqm& i) { a.prefix(i.s1.mr()).cmpq(i.s0, i.s1); }
181 void emit(cmpsd i) { noncommute(i); a.cmpsd(i.s0, i.d, i.pred); }
182 void emit(const cqo& /*i*/) { a.cqo(); }
183 void emit(const cvttsd2siq& i) { a.cvttsd2siq(i.s, i.d); }
184 void emit(const cvtsi2sd& i);
185 void emit(const cvtsi2sdm& i);
186 void emit(decl i) { unary(i); a.decl(i.d); }
187 void emit(const declm& i) { a.prefix(i.m.mr()).decl(i.m); }
188 void emit(decq i) { unary(i); a.decq(i.d); }
189 void emit(const decqm& i) { a.prefix(i.m.mr()).decq(i.m); }
190 void emit(const decqmlock& i) { a.prefix(i.m.mr()).decqlock(i.m); }
191 void emit(const decqmlocknosf&);
192 void emit(divsd i) { noncommute(i); a.divsd(i.s0, i.d); }
193 void emit(imul i) { commuteSF(i); a.imul(i.s0, i.d); }
194 void emit(const idiv& i) { a.idiv(i.s); }
195 void emit(incl i) { unary(i); a.incl(i.d); }
196 void emit(const inclm& i) { a.prefix(i.m.mr()).incl(i.m); }
197 void emit(incq i) { unary(i); a.incq(i.d); }
198 void emit(const incqm& i) { a.prefix(i.m.mr()).incq(i.m); }
199 void emit(const incwm& i) { a.prefix(i.m.mr()).incw(i.m); }
200 void emit(const jcc& i);
201 void emit(const jcci& i);
202 void emit(const jmp& i);
203 void emit(const jmpr& i) { a.jmp(i.target); }
204 void emit(const jmpm& i) { a.prefix(i.target.mr()).jmp(i.target); }
205 void emit(const jmpi& i);
206 void emit(const ldbindretaddr& i);
207 void emit(const lea& i);
208 void emit(const leap& i) { a.lea(i.s, i.d); }
209 void emit(const leav& i);
210 void emit(const lead& i) { a.lea(rip[(intptr_t)i.s.get()], i.d); }
211 void emit(const loadups& i) { a.prefix(i.s.mr()).movups(i.s, i.d); }
212 void emit(const loadtqb& i) { a.prefix(i.s.mr()).loadb(i.s, i.d); }
213 void emit(const loadb& i) { a.prefix(i.s.mr()).loadb(i.s, i.d); }
214 void emit(const loadw& i) { a.prefix(i.s.mr()).loadw(i.s, i.d); }
215 void emit(const loadtql& i) { a.prefix(i.s.mr()).loadl(i.s, i.d); }
216 void emit(const loadl& i) { a.prefix(i.s.mr()).loadl(i.s, i.d); }
217 void emit(const loadqp& i) { a.loadq(i.s, i.d); }
218 void emit(const loadqd& i) { a.loadq(rip[(intptr_t)i.s.get()], i.d); }
219 void emit(const loadsd& i) { a.prefix(i.s.mr()).movsd(i.s, i.d); }
220 void emit(const loadzbl& i) { a.prefix(i.s.mr()).loadzbl(i.s, i.d); }
221 void emit(const loadzbq& i) { a.prefix(i.s.mr()).loadzbl(i.s, Reg32(i.d)); }
222 void emit(const loadsbq& i) { a.prefix(i.s.mr()).loadsbq(i.s, i.d); }
223 void emit(const loadzwq& i) { a.prefix(i.s.mr()).loadzwl(i.s, Reg32(i.d)); }
224 void emit(const loadzlq& i) { a.prefix(i.s.mr()).loadl(i.s, Reg32(i.d)); }
225 void emit(const movb& i) { a.movb(i.s, i.d); }
226 void emit(const movl& i) { a.movl(i.s, i.d); }
227 void emit(const movzbw& i) { a.movzbl(i.s, Reg32(i.d)); }
228 void emit(const movzbl& i) { a.movzbl(i.s, i.d); }
229 void emit(const movzbq& i) { a.movzbl(i.s, Reg32(i.d)); }
230 void emit(const movzwl& i) { a.movzwl(i.s, i.d); }
231 void emit(const movzwq& i) { a.movzwl(i.s, Reg32(i.d)); }
232 void emit(const movzlq& i) { a.movl(i.s, Reg32(i.d)); }
233 void emit(const movsbq& i) { a.movsbq(i.s, i.d); }
234 void emit(mulsd i) { commute(i); a.mulsd(i.s0, i.d); }
235 void emit(neg i) { unary(i); a.neg(i.d); }
236 void emit(const nop& /*i*/) { a.nop(); }
237 void emit(not i) { unary(i); a.not(i.d); }
238 void emit(notb i) { unary(i); a.notb(i.d); }
239 void emit(orbi i) { binary(i); a.orb(i.s0, i.d); }
240 void emit(const orbim& i) { a.prefix(i.m.mr()).orb(i.s0, i.m); }
241 void emit(const orwim& i) { a.prefix(i.m.mr()).orw(i.s0, i.m); }
242 void emit(const orlim& i) { a.prefix(i.m.mr()).orl(i.s0, i.m); }
243 void emit(orq i) { commuteSF(i); a.orq(i.s0, i.d); }
244 void emit(orwi i) { binary(i); a.orw(i.s0, i.d); }
245 void emit(orli i) { binary(i); a.orl(i.s0, i.d); }
246 void emit(orqi i) { binary(i); a.orq(i.s0, i.d); }
247 void emit(const orqim& i) { a.prefix(i.m.mr()).orq(i.s0, i.m); }
248 void emit(const pop& i) { a.pop(i.d); }
249 void emit(const popm& i) { a.prefix(i.d.mr()).pop(i.d); }
250 void emit(const popf& i) { assertx(i.d == RegSF{0}); a.popf(); }
251 void emit(const push& i) { a.push(i.s); }
252 void emit(const pushm& i) { a.prefix(i.s.mr()).push(i.s); }
253 void emit(const pushf& i) { assertx(i.s == RegSF{0}); a.pushf(); }
254 void emit(const roundsd& i) { a.roundsd(i.dir, i.s, i.d); }
255 void emit(const sarq& i) { unary(i); a.sarq(i.d); }
256 void emit(sarqi i) { binary(i); a.sarq(i.s0, i.d); }
257 void emit(const setcc& i) { a.setcc(i.cc, i.d); }
258 void emit(shlli i) { binary(i); a.shll(i.s0, i.d); }
259 void emit(shlq i) { unary(i); a.shlq(i.d); }
260 void emit(shrq i) { unary(i); a.shrq(i.d); }
261 void emit(shlqi i) { binary(i); a.shlq(i.s0, i.d); }
262 void emit(shrli i) { binary(i); a.shrl(i.s0, i.d); }
263 void emit(shrqi i) { binary(i); a.shrq(i.s0, i.d); }
264 void emit(const sqrtsd& i) { a.sqrtsd(i.s, i.d); }
265 void emit(const storeups& i) { a.prefix(i.m.mr()).movups(i.s, i.m); }
266 void emit(const storeb& i) { a.prefix(i.m.mr()).storeb(i.s, i.m); }
267 void emit(const storebi& i);
268 void emit(const storel& i) { a.prefix(i.m.mr()).storel(i.s, i.m); }
269 void emit(const storeli& i) { a.prefix(i.m.mr()).storel(i.s, i.m); }
270 void emit(const storeqi& i);
271 void emit(const storesd& i) { a.prefix(i.m.mr()).movsd(i.s, i.m); }
272 void emit(const storew& i) { a.prefix(i.m.mr()).storew(i.s, i.m); }
273 void emit(const storewi& i) { a.prefix(i.m.mr()).storew(i.s, i.m); }
274 void emit(subl i) { noncommute(i); a.subl(i.s0, i.d); }
275 void emit(subli i) { binary(i); a.subl(i.s0, i.d); }
276 void emit(subq i) { noncommute(i); a.subq(i.s0, i.d); }
277 void emit(subqi i) { binary(i); a.subq(i.s0, i.d); }
278 void emit(const subqim& i);
279 void emit(subsd i) { noncommute(i); a.subsd(i.s0, i.d); }
280 void emit(const testb& i) { a.testb(i.s0, i.s1); }
281 void emit(const testbi& i) { a.testb(i.s0, i.s1); }
282 void emit(const testbm& i) { a.prefix(i.s1.mr()).testb(i.s0, i.s1); }
283 void emit(const testbim& i) { a.prefix(i.s1.mr()).testb(i.s0, i.s1); }
284 void emit(const testw& i) { a.testw(i.s0, i.s1); }
285 void emit(const testwi& i);
286 void emit(const testwm& i) { a.prefix(i.s1.mr()).testw(i.s0, i.s1); }
287 void emit(const testwim& i);
288 void emit(const testl& i) { a.testl(i.s0, i.s1); }
289 void emit(const testli& i);
290 void emit(const testlm& i) { a.prefix(i.s1.mr()).testl(i.s0, i.s1); }
291 void emit(const testlim& i);
292 void emit(const testq& i) { a.testq(i.s0, i.s1); }
293 void emit(const testqi& i);
294 void emit(const testqm& i) { a.prefix(i.s1.mr()).testq(i.s0, i.s1); }
295 void emit(const testqim& i);
296 void emit(const trap& i);
297 void emit(const ucomisd& i) { a.ucomisd(i.s0, i.s1); }
298 void emit(unpcklpd i) { noncommute(i); a.unpcklpd(i.s0, i.d); }
299 void emit(xorb i) { commuteSF(i); a.xorb(i.s0, i.d); }
300 void emit(xorbi i) { binary(i); a.xorb(i.s0, i.d); }
301 void emit(xorw i) { commuteSF(i); a.xorw(i.s0, i.d); }
302 void emit(xorwi i) { binary(i); a.xorw(i.s0, i.d); }
303 void emit(xorl i) { commuteSF(i); a.xorl(i.s0, i.d); }
304 void emit(xorq i);
305 void emit(xorqi i) { binary(i); a.xorq(i.s0, i.d); }
306 void emit(const conjure& /*i*/) { always_assert(false); }
307 void emit(const conjureuse& /*i*/) { always_assert(false); }
308 void emit(const crc32q& i);
310 void emit_nop() {
311 emit(lea{rax[8], rax});
312 emit(lea{rax[-8], rax});
315 private:
316 // helpers
317 void prep(Reg8 s, Reg8 d) { if (s != d) a.movb(s, d); }
318 void prep(Reg16 s, Reg16 d) { if (s != d) a.movw(s, d); }
319 void prep(Reg32 s, Reg32 d) { if (s != d) a.movl(s, d); }
320 void prep(Reg64 s, Reg64 d) { if (s != d) a.movq(s, d); }
321 void prep(RegXMM s, RegXMM d) { if (s != d) a.movdqa(s, d); }
322 void emit_simd_imm(int64_t, Vreg);
324 template<class Inst> void unary(Inst& i) { prep(i.s, i.d); }
325 template<class Inst> void binary(Inst& i) { prep(i.s1, i.d); }
327 template<class Inst> void commuteSF(Inst&);
328 template<class Inst> void commute(Inst&);
329 template<class Inst> void noncommute(Inst&);
331 CodeBlock& frozen() { return env.text.frozen().code; }
333 private:
334 Venv& env;
335 X64Asm a;
337 const Vlabel current;
338 const Vlabel next;
339 jit::vector<Venv::LabelPatch>& jmps;
340 jit::vector<Venv::LabelPatch>& jccs;
341 jit::vector<Venv::LabelPatch>& catches;
344 ///////////////////////////////////////////////////////////////////////////////
346 * Prepare a binary op that is not commutative.
348 * s0 must be a different register than s1 so we don't clobber it.
350 template<class X64Asm>
351 template<class Inst> void Vgen<X64Asm>::noncommute(Inst& i) {
352 assertx(i.s1 == i.d || i.s0 != i.d); // do not clobber s0
353 binary(i);
357 * Prepare a binary op that is commutative.
359 * Swap operands if the dest is s0.
361 template<class X64Asm>
362 template<class Inst> void Vgen<X64Asm>::commuteSF(Inst& i) {
363 if (i.s1 != i.d && i.s0 == i.d) {
364 i = Inst{i.s1, i.s0, i.d, i.sf};
365 } else {
366 binary(i);
370 template<class X64Asm>
371 template<class Inst> void Vgen<X64Asm>::commute(Inst& i) {
372 if (i.s1 != i.d && i.s0 == i.d) {
373 i = Inst{i.s1, i.s0, i.d};
374 } else {
375 binary(i);
379 ///////////////////////////////////////////////////////////////////////////////
382 * Returns true iff the status flags necessary to take a j<a> imply that a j<b>
383 * will also be taken.
385 bool ccImplies(ConditionCode a, ConditionCode b) {
386 if (a == b) return true;
388 switch (a) {
389 case CC_None:
390 case CC_O: case CC_NO:
391 case CC_AE: case CC_BE:
392 case CC_NE:
393 case CC_S: case CC_NS:
394 case CC_P: case CC_NP:
395 case CC_GE: case CC_LE:
396 return false;
398 case CC_B: return b == CC_BE;
399 case CC_E: return b == CC_BE || b == CC_LE;
400 case CC_A: return b == CC_AE || b == CC_NE;
401 case CC_L: return b == CC_LE;
402 case CC_G: return b == CC_NE || b == CC_GE;
404 always_assert(false);
408 * When two jccs go to the same destination, the cc of the first is compatible
409 * with the cc of the second, and they're within a one-byte offset of each
410 * other, retarget the first to jump to the second. This will allow the
411 * relocator to shrink the first one, and the extra jmp shouldn't matter since
412 * we try to only do this to rarely taken jumps.
414 template<typename Key, typename Hash>
415 jit::hash_set<TCA> retargetJumps(
416 Venv& env,
417 const jit::hash_map<Key, jit::vector<TCA>, Hash>& jccs
419 jit::hash_set<TCA> retargeted;
420 for (auto& pair : jccs) {
421 auto const& jmps = pair.second;
422 if (jmps.size() < 2) continue;
424 for (size_t i = 0; i < jmps.size(); ++i) {
425 DecodedInstruction di(env.text.toDestAddress(jmps[i]), jmps[i]);
426 // Don't bother if the jump is already a short jump.
427 if (di.size() != 6) continue;
429 for (size_t j = jmps.size() - 1; j > i; --j) {
430 auto const delta = jmps[j] - jmps[i] + 2;
431 // Backwards jumps are probably not guards, and don't retarget to a
432 // dest that's more than a one-byte offset away.
433 if (delta < 0 || !deltaFits(delta, sz::byte)) continue;
435 DecodedInstruction dj(env.text.toDestAddress(jmps[j]), jmps[j]);
436 if (!ccImplies(di.jccCondCode(), dj.jccCondCode())) continue;
438 di.setPicAddress(jmps[j]);
439 retargeted.insert(jmps[i]);
441 // We might've converted a smashable jump to a regular in-unit jump, so
442 // remove any smashable alignments.
443 auto range = env.meta.alignments.equal_range(jmps[i]);
444 while (range.first != range.second) {
445 auto iter = range.first;
446 ++range.first;
448 auto& align = iter->second;
449 if (align.first == Alignment::SmashJcc &&
450 align.second == AlignContext::Live) {
451 env.meta.alignments.erase(iter);
455 break;
460 return retargeted;
463 namespace {
464 struct SrcKeyBoolTupleHasher {
465 size_t operator()(std::tuple<SrcKey, bool> v) const {
466 return folly::hash::hash_combine(
467 std::get<0>(v).toAtomicInt(),
468 std::get<1>(v)
474 template<class X64Asm>
475 void Vgen<X64Asm>::retargetBinds(Venv& env) {
476 if (RuntimeOption::EvalJitRetargetJumps < 1) return;
478 // The target is unique per the SrcKey and the fallback flag.
479 jit::hash_map<
480 std::pair<SrcKey, bool>,
481 jit::vector<TCA>,
482 SrcKeyBoolTupleHasher
483 > binds;
485 for (auto const& b : env.meta.smashableBinds) {
486 if (b.smashable.type() == IncomingBranch::Tag::JCC) {
487 binds[std::make_pair(b.sk, b.fallback)]
488 .emplace_back(b.smashable.toSmash());
492 auto const retargeted = retargetJumps(env, std::move(binds));
493 if (retargeted.empty()) return;
495 // Finally, remove any retargeted jmps from inProgressTailJumps and
496 // smashableBinds.
497 GrowableVector<IncomingBranch> newTailJumps;
498 for (auto& jmp : env.meta.inProgressTailJumps) {
499 if (retargeted.count(jmp.toSmash()) == 0) {
500 newTailJumps.push_back(jmp);
503 env.meta.inProgressTailJumps.swap(newTailJumps);
505 decltype(env.meta.smashableBinds) newBinds;
506 for (auto& bind : env.meta.smashableBinds) {
507 if (retargeted.count(bind.smashable.toSmash()) == 0) {
508 newBinds.push_back(bind);
509 } else {
510 FTRACE(3, "retargetBinds: removed {} from smashableBinds\n",
511 bind.smashable.toSmash());
514 env.meta.smashableBinds.swap(newBinds);
517 template<class X64Asm>
518 void Vgen<X64Asm>::patch(Venv& env) {
519 for (auto const& p : env.jmps) {
520 assertx(env.addrs[p.target]);
521 X64Asm::patchJmp(
522 env.text.toDestAddress(p.instr), p.instr, env.addrs[p.target]);
525 auto const optLevel = RuntimeOption::EvalJitRetargetJumps;
526 jit::hash_map<TCA, jit::vector<TCA>> jccs;
527 for (auto const& p : env.jccs) {
528 assertx(env.addrs[p.target]);
529 X64Asm::patchJcc(
530 env.text.toDestAddress(p.instr), p.instr, env.addrs[p.target]);
531 if (optLevel >= 2) {
532 jccs[env.addrs[p.target]].emplace_back(p.instr);
536 if (!jccs.empty()) retargetJumps(env, jccs);
538 for (auto const& p : env.leas) {
539 assertx(env.vaddrs[p.target]);
540 DecodedInstruction di(env.text.toDestAddress(p.instr), p.instr);
541 assertx(di.hasPicOffset());
542 di.setPicAddress(env.vaddrs[p.target]);
546 template<class X64Asm>
547 void Vgen<X64Asm>::pad(CodeBlock& cb) {
548 X64Asm a { cb };
549 a.pad();
552 ///////////////////////////////////////////////////////////////////////////////
554 template<class X64Asm>
555 void Vgen<X64Asm>::emit(const copy& i) {
556 if (i.s == i.d) return;
557 if (i.s.isGP()) {
558 if (i.d.isGP()) { // GP => GP
559 a.movq(i.s, i.d);
560 } else { // GP => XMM
561 assertx(i.d.isSIMD());
562 // This generates a movq x86 instruction, which zero extends
563 // the 64-bit value in srcReg into a 128-bit XMM register
564 a.movq_rx(i.s, i.d);
566 } else {
567 if (i.d.isGP()) { // XMM => GP
568 a.movq_xr(i.s, i.d);
569 } else { // XMM => XMM
570 assertx(i.d.isSIMD());
571 // This copies all 128 bits in XMM,
572 // thus avoiding partial register stalls
573 a.movdqa(i.s, i.d);
578 template<class X64Asm>
579 void Vgen<X64Asm>::emit(const copy2& i) {
580 assertx(i.s0.isValid() && i.s1.isValid() && i.d0.isValid() && i.d1.isValid());
581 auto s0 = i.s0, s1 = i.s1, d0 = i.d0, d1 = i.d1;
582 assertx(d0 != d1);
583 if (d0 == s1) {
584 if (d1 == s0) {
585 a.xchgq(d0, d1);
586 } else {
587 // could do this in a simplify pass
588 if (s1 != d1) a.movq(s1, d1); // save s1 first; d1 != s0
589 if (s0 != d0) a.movq(s0, d0);
591 } else {
592 // could do this in a simplify pass
593 if (s0 != d0) a.movq(s0, d0);
594 if (s1 != d1) a.movq(s1, d1);
598 template<class X64Asm>
599 void Vgen<X64Asm>::emit_simd_imm(int64_t val, Vreg d) {
600 if (val == 0) {
601 a.pxor(d, d); // does not modify flags
602 } else {
603 auto addr = alloc_literal(env, val);
604 a.movsd(rip[(intptr_t)addr], d);
608 template<class X64Asm>
609 void Vgen<X64Asm>::emit(const ldimmb& i) {
610 // ldimmb is for Vconst::Byte, which is treated as unsigned uint8_t
611 auto val = i.s.ub();
612 if (i.d.isGP()) {
613 Vreg8 d8 = i.d;
614 a.movb(static_cast<int8_t>(val), d8);
615 } else {
616 emit_simd_imm(val, i.d);
620 template<class X64Asm>
621 void Vgen<X64Asm>::emit(const ldimml& i) {
622 // ldimml is for Vconst::Long, which is treated as unsigned uint32_t
623 auto val = i.s.l();
624 if (i.d.isGP()) {
625 Vreg32 d32 = i.d;
626 a.movl(val, d32);
627 } else {
628 emit_simd_imm(uint32_t(val), i.d);
632 template<class X64Asm>
633 void Vgen<X64Asm>::emit(const ldimmq& i) {
634 auto val = i.s.q();
635 if (i.d.isGP()) {
636 if (val == 0) {
637 Vreg32 d32 = i.d;
638 a.movl(0, d32); // because emitImmReg tries the xor optimization
639 } else {
640 a.emitImmReg(i.s, i.d);
642 } else {
643 emit_simd_imm(val, i.d);
647 template<class X64Asm>
648 void Vgen<X64Asm>::emit(const load& i) {
649 auto mref = i.s.mr();
650 a.prefix(mref);
651 if (i.d.isGP()) {
652 a.loadq(mref, i.d);
653 } else {
654 assertx(i.d.isSIMD());
655 a.movsd(mref, i.d);
659 template<class X64Asm>
660 void Vgen<X64Asm>::emit(const store& i) {
661 auto const mref = i.d.mr();
662 a.prefix(mref);
663 if (i.s.isGP()) {
664 a.storeq(i.s, i.d);
665 } else {
666 assertx(i.s.isSIMD());
667 a.movsd(i.s, i.d);
671 ///////////////////////////////////////////////////////////////////////////////
673 template<class X64Asm>
674 void Vgen<X64Asm>::emit(const mcprep& i) {
676 * Initially, we set the cache to hold (addr << 1) | 1 (where `addr' is the
677 * address of the movq) so that we can find the movq from the handler.
679 * We set the low bit for two reasons: the Class* will never be a valid
680 * Class*, so we'll always miss the inline check before it's smashed, and
681 * MethodCache::handleStaticCall can tell it's not been smashed yet
683 auto const mov_addr = emitSmashableMovq(a.code(), env.meta, 0, r64(i.d));
684 auto const imm = reinterpret_cast<uint64_t>(mov_addr);
685 smashMovq(a.toDestAddress(mov_addr), (imm << 1) | 1);
687 env.meta.addressImmediates.insert(reinterpret_cast<TCA>(~imm));
690 ///////////////////////////////////////////////////////////////////////////////
692 template<class X64Asm>
693 void Vgen<X64Asm>::emit(const call& i) {
694 if (a.jmpDeltaFits(i.target)) {
695 a.call(i.target);
696 } else {
697 // can't do a near call; store address in data section.
698 // call by loading the address using rip-relative addressing. This
699 // assumes the data section is near the current code section. Since
700 // this sequence is directly in-line, rip-relative like this is
701 // more compact than loading a 64-bit immediate.
702 auto addr = alloc_literal(env, (uint64_t)i.target);
703 a.call(rip[(intptr_t)addr]);
705 if (i.watch) {
706 *i.watch = a.frontier();
707 env.meta.watchpoints.push_back(i.watch);
711 template<class X64Asm>
712 void Vgen<X64Asm>::emit(const calls& i) {
713 emitSmashableCall(a.code(), env.meta, i.target);
716 ///////////////////////////////////////////////////////////////////////////////
718 template<class X64Asm>
719 void Vgen<X64Asm>::emit(const stubret& i) {
720 if (i.saveframe) {
721 a.pop(x64::rvmfp());
722 } else {
723 a.addq(8, reg::rsp);
725 a.ret();
728 template<class X64Asm>
729 void Vgen<X64Asm>::emit(const callstub& i) {
730 emit(call{i.target, i.args});
733 template<class X64Asm>
734 void Vgen<X64Asm>::emit(const callfaststub& i) {
735 emit(call{i.target, i.args});
738 template<class X64Asm>
739 void Vgen<X64Asm>::emit(const tailcallstub& i) {
740 a.addq(8, reg::rsp);
741 emit(jmpi{i.target, i.args});
744 template<class X64Asm>
745 void Vgen<X64Asm>::emit(const tailcallstubr& i) {
746 a.addq(8, reg::rsp);
747 emit(jmpr{i.target, i.args});
750 ///////////////////////////////////////////////////////////////////////////////
752 template<class X64Asm>
753 void Vgen<X64Asm>::emit(const phpret& i) {
754 a.push(i.fp[AROFF(m_savedRip)]);
755 if (!i.noframe) {
756 a.loadq(i.fp[AROFF(m_sfp)], x64::rvmfp());
758 a.ret();
761 template<class X64Asm>
762 void Vgen<X64Asm>::emit(const contenter& i) {
763 Label Stub, End;
764 Reg64 fp = i.fp, target = i.target;
765 a.jmp8(End);
767 asm_label(a, Stub);
768 a.pop(fp[AROFF(m_savedRip)]);
769 a.jmp(target);
771 asm_label(a, End);
772 a.call(Stub);
773 // m_savedRip will point here.
774 emit(unwind{{i.targets[0], i.targets[1]}});
777 ///////////////////////////////////////////////////////////////////////////////
779 template<class X64Asm>
780 void Vgen<X64Asm>::emit(const nothrow& /*i*/) {
781 env.meta.catches.emplace_back(a.frontier(), nullptr);
784 template<class X64Asm>
785 void Vgen<X64Asm>::emit(const syncpoint& i) {
786 FTRACE(5, "IR recordSyncPoint: {} {}\n", a.frontier(), i.fix.show());
787 env.meta.fixups.emplace_back(a.frontier(), i.fix);
788 env.record_inline_stack(a.frontier());
791 template<class X64Asm>
792 void Vgen<X64Asm>::emit(const unwind& i) {
793 catches.push_back({a.frontier(), i.targets[1]});
794 env.record_inline_stack(a.frontier());
795 emit(jmp{i.targets[0]});
798 ///////////////////////////////////////////////////////////////////////////////
800 template<class X64Asm>
801 void Vgen<X64Asm>::emit(const fallthru&) {
802 a.nop();
805 ///////////////////////////////////////////////////////////////////////////////
807 template<class X64Asm>
808 void Vgen<X64Asm>::emit(andqi i) {
809 if (magFits(i.s0.q(), sz::dword)) {
810 emit(andli{int32_t(i.s0.q()), Reg32(i.s1), Reg32(i.d), i.sf});
811 return;
814 binary(i);
815 a.andq(i.s0, i.d);
818 template<class X64Asm>
819 void Vgen<X64Asm>::emit(const addlim& i) {
820 auto mref = i.m.mr();
821 a.prefix(mref).addl(i.s0, mref);
824 template<typename X64Asm>
825 void Vgen<X64Asm>::emit(const addqmr& i) {
826 binary(i);
827 auto const mref = i.m.mr();
828 a.prefix(mref).addq(mref, i.d);
831 template<typename X64Asm>
832 void Vgen<X64Asm>::emit(const addqrm& i) {
833 auto const mref = i.m.mr();
834 a.prefix(mref).addq(i.s1, mref);
837 template<class X64Asm>
838 void Vgen<X64Asm>::emit(const addqim& i) {
839 auto mref = i.m.mr();
840 a.prefix(mref).addq(i.s0, mref);
843 template<class X64Asm>
844 void Vgen<X64Asm>::emit(const subqim& i) {
845 auto mref = i.m.mr();
846 a.prefix(mref).subq(i.s0, mref);
849 template<class X64Asm>
850 void Vgen<X64Asm>::emit(const cloadq& i) {
851 auto m = i.t;
852 always_assert(!m.index.isValid()); // not supported, but could be later.
853 if (i.f != i.d) {
854 if (i.d == m.base) {
855 // We can't move f over d or we'll clobber the Vptr we need to load from.
856 // Since cload does the load unconditionally anyway, we can just load and
857 // cmov.
858 a.prefix(m.mr()).loadq(i.t, i.d);
859 a.cmov_reg64_reg64(ccNegate(i.cc), i.f, i.d);
860 return;
862 a.movq(i.f, i.d);
864 a.prefix(m.mr()).cload_reg64_disp_reg64(i.cc, m.base, m.disp, i.d);
867 // add s0 s1 d => mov s1->d; d += s0
868 // cmov cc s d => if cc { mov s->d }
869 template<class X64Asm>
870 template<class cmov>
871 void Vgen<X64Asm>::emit_cmov(const cmov& i) {
872 if (i.f != i.d && i.t == i.d) {
873 // negate the condition and swap t/f operands so we dont clobber i.t
874 return emit(cmov{ccNegate(i.cc), i.sf, i.t, i.f, i.d});
875 } else {
876 prep(i.f, i.d);
878 a.cmov_reg64_reg64(i.cc, r64(i.t), r64(i.d));
881 template<class X64Asm>
882 void Vgen<X64Asm>::emit(const cvtsi2sd& i) {
883 a.pxor(i.d, i.d);
884 a.cvtsi2sd(i.s, i.d);
887 template<class X64Asm>
888 void Vgen<X64Asm>::emit(const cvtsi2sdm& i) {
889 a.pxor(i.d, i.d);
890 a.cvtsi2sd(i.s, i.d);
893 template<class X64Asm>
894 void Vgen<X64Asm>::emit(const jcc& i) {
895 if (i.targets[1] != i.targets[0]) {
896 if (next == i.targets[1]) {
897 return emit(jcc{ccNegate(i.cc), i.sf, {i.targets[1], i.targets[0]}});
899 auto taken = i.targets[1];
900 jccs.push_back({a.frontier(), taken});
901 a.jcc(i.cc, a.frontier());
903 emit(jmp{i.targets[0]});
906 template<class X64Asm>
907 void Vgen<X64Asm>::emit(const jcci& i) {
908 a.jcc(i.cc, i.taken);
911 template<class X64Asm>
912 void Vgen<X64Asm>::emit(const jmp& i) {
913 if (next == i.target) return;
914 jmps.push_back({a.frontier(), i.target});
915 a.jmp(a.frontier());
918 template<class X64Asm>
919 void Vgen<X64Asm>::emit(const jmpi& i) {
920 if (a.jmpDeltaFits(i.target)) {
921 a.jmp(i.target);
922 } else {
923 // can't do a near jmp - use rip-relative addressing
924 auto addr = alloc_literal(env, (uint64_t)i.target);
925 a.jmp(rip[(intptr_t)addr]);
929 template<class X64Asm>
930 void Vgen<X64Asm>::emit(const ldbindretaddr& i) {
931 auto const addr = a.frontier();
932 emit(leap{reg::rip[(intptr_t)addr], i.d});
933 env.ldbindretaddrs.push_back({addr, i.target, i.spOff});
936 template<class X64Asm>
937 void Vgen<X64Asm>::emit(const lea& i) {
938 assertx(i.s.seg == Segment::DS);
939 // could do this in a simplify pass
940 if (i.s.disp == 0 && i.s.base.isValid() && !i.s.index.isValid()) {
941 emit(copy{i.s.base, i.d});
942 } else {
943 a.lea(i.s, i.d);
947 template<class X64Asm>
948 void Vgen<X64Asm>::emit(const leav& i) {
949 auto const addr = a.frontier();
950 emit(leap{reg::rip[(intptr_t)addr], i.d});
951 env.leas.push_back({addr, i.s});
954 template<class X64Asm>
955 void Vgen<X64Asm>::emit(const storebi& i) {
956 auto mref = i.m.mr();
957 a.prefix(mref).storeb(i.s, mref);
960 template<class X64Asm>
961 void Vgen<X64Asm>::emit(const storeqi& i) {
962 auto mref = i.m.mr();
963 a.prefix(mref).storeq(i.s, mref);
966 template<class VgenImpl, typename Inst>
967 bool testimHelper(VgenImpl& env, const Inst& i, uint64_t mask) {
968 // If there's only 1 byte of meaningful bits in the mask, we can adjust the
969 // pointer offset and use testbim instead.
970 int off = 0;
971 while (mask > 0xff && !(mask & 0xff)) {
972 off++;
973 mask >>= 8;
976 if (mask > 0xff) return false;
978 env.emit(testbim{int8_t(mask), i.s1 + off, i.sf});
979 return true;
982 template<class X64Asm>
983 void Vgen<X64Asm>::emit(const testwi& i) {
984 if (i.s0.w() == -1) {
985 return emit(testw{i.s1, i.s1, i.sf});
987 a.testw(i.s0, i.s1);
990 template<class X64Asm>
991 void Vgen<X64Asm>::Vgen::emit(const testwim& i) {
992 if (testimHelper(*this, i, i.s0.w())) return;
993 a.prefix(i.s1.mr()).testw(i.s0, i.s1);
996 template<class X64Asm>
997 void Vgen<X64Asm>::Vgen::emit(const testlim& i) {
998 if (testimHelper(*this, i, i.s0.l())) return;
999 a.prefix(i.s1.mr()).testl(i.s0, i.s1);
1002 template<class X64Asm>
1003 void Vgen<X64Asm>::Vgen::emit(const testli& i) {
1004 if (i.s0.l() == -1) {
1005 return emit(testl{i.s1, i.s1, i.sf});
1007 a.testl(i.s0, i.s1);
1010 template<class X64Asm>
1011 void Vgen<X64Asm>::emit(const testqi& i) {
1012 auto const imm = i.s0.q();
1013 if (magFits(imm, sz::byte)) {
1014 a.testb(int8_t(imm), rbyte(i.s1));
1015 } else if (magFits(imm, sz::dword)) {
1016 emit(testli{int32_t(imm), Reg32(i.s1), i.sf});
1017 } else if (imm == -1) {
1018 emit(testq{i.s1, i.s1, i.sf});
1019 } else {
1020 a.testq(i.s0, i.s1);
1024 template<class X64Asm>
1025 void Vgen<X64Asm>::emit(const testqim& i) {
1026 if (testimHelper(*this, i, i.s0.q())) return;
1027 if (magFits(i.s0.q(), sz::dword)) {
1028 // For an unsigned 32 bit immediate, we can get the same results
1029 // by emitting a testlim.
1030 emit(testlim{int32_t(i.s0.q()), i.s1, i.sf});
1031 } else {
1032 a.prefix(i.s1.mr()).testq(i.s0, i.s1);
1036 template<class X64Asm>
1037 void Vgen<X64Asm>::emit(const trap& i) {
1038 env.meta.trapReasons.emplace_back(a.frontier(), i.reason);
1039 a.ud2();
1042 template<class X64Asm>
1043 void Vgen<X64Asm>::emit(xorq i) {
1044 if (i.s0 == i.s1) {
1045 // 32-bit xor{s, s, d} zeroes the upper bits of `d'.
1046 return emit(xorl{r32(i.s0), r32(i.s1), r32(i.d), i.sf});
1048 commuteSF(i);
1049 a.xorq(i.s0, i.d);
1052 template<class X64Asm>
1053 void Vgen<X64Asm>::emit(const crc32q& i) {
1054 noncommute(i);
1055 a.crc32q(i.s0, i.d);
1058 template<typename X64Asm>
1059 void Vgen<X64Asm>::emit(const decqmlocknosf& i) {
1060 a.pushf();
1061 a.prefix(i.m.mr()).decqlock(i.m);
1062 a.popf();
1065 ///////////////////////////////////////////////////////////////////////////////
1067 template<typename Lower>
1068 void lower_impl(Vunit& unit, Vlabel b, size_t i, Lower lower) {
1069 vmodify(unit, b, i, [&] (Vout& v) { lower(v); return 1; });
1072 template <typename Inst>
1073 void lower(Vunit& /*unit*/, Inst& /*inst*/, Vlabel /*b*/, size_t /*i*/) {}
1075 ///////////////////////////////////////////////////////////////////////////////
1077 void lower(Vunit& unit, popp& inst, Vlabel b, size_t i) {
1078 lower_impl(unit, b, i, [&] (Vout& v) {
1079 v << pop{inst.d0};
1080 v << pop{inst.d1};
1084 void lower(Vunit& unit, poppm& inst, Vlabel b, size_t i) {
1085 lower_impl(unit, b, i, [&] (Vout& v) {
1086 v << popm{inst.d0};
1087 v << popm{inst.d1};
1091 void lower(Vunit& unit, pushp& inst, Vlabel b, size_t i) {
1092 lower_impl(unit, b, i, [&] (Vout& v) {
1093 v << push{inst.s0};
1094 v << push{inst.s1};
1098 void lower(Vunit& unit, pushpm& inst, Vlabel b, size_t i) {
1099 lower_impl(unit, b, i, [&] (Vout& v) {
1100 v << pushm{inst.s0};
1101 v << pushm{inst.s1};
1105 ///////////////////////////////////////////////////////////////////////////////
1107 void lower(Vunit& unit, stublogue& inst, Vlabel b, size_t i) {
1108 if (inst.saveframe) {
1109 unit.blocks[b].code[i] = push{x64::rvmfp()};
1110 } else {
1111 unit.blocks[b].code[i] = lea{reg::rsp[-8], reg::rsp};
1115 void lower(Vunit& unit, unstublogue& /*inst*/, Vlabel b, size_t i) {
1116 unit.blocks[b].code[i] = lea{reg::rsp[8], reg::rsp};
1119 void lower(Vunit& unit, stubunwind& inst, Vlabel b, size_t i) {
1120 lower_impl(unit, b, i, [&] (Vout& v) {
1121 v << lea{reg::rsp[8], reg::rsp};
1122 v << pop{inst.d};
1126 void lower(Vunit& unit, stubtophp& /*inst*/, Vlabel b, size_t i) {
1127 unit.blocks[b].code[i] = lea{reg::rsp[16], reg::rsp};
1130 void lower(Vunit& unit, loadstubret& inst, Vlabel b, size_t i) {
1131 unit.blocks[b].code[i] = load{reg::rsp[8], inst.d};
1134 void lower(Vunit& unit, phplogue& inst, Vlabel b, size_t i) {
1135 unit.blocks[b].code[i] = popm{inst.fp[AROFF(m_savedRip)]};
1138 void lower(Vunit& unit, resumetc& inst, Vlabel b, size_t i) {
1139 lower_impl(unit, b, i, [&] (Vout& v) {
1140 v << callr{inst.target, inst.args};
1141 v << jmpi{inst.exittc};
1145 ///////////////////////////////////////////////////////////////////////////////
1147 void lower(Vunit& unit, sar& inst, Vlabel b, size_t i) {
1148 lower_impl(unit, b, i, [&] (Vout& v) {
1149 v << copy{inst.s0, rcx};
1150 v << sarq{inst.s1, inst.d, inst.sf};
1154 void lower(Vunit& unit, shl& inst, Vlabel b, size_t i) {
1155 lower_impl(unit, b, i, [&] (Vout& v) {
1156 v << copy{inst.s0, rcx};
1157 v << shlq{inst.s1, inst.d, inst.sf};
1161 void lower(Vunit& unit, shr& inst, Vlabel b, size_t i) {
1162 lower_impl(unit, b, i, [&] (Vout& v) {
1163 v << copy{inst.s0, rcx};
1164 v << shrq{inst.s1, inst.d, inst.sf};
1168 void lower(Vunit& unit, srem& inst, Vlabel b, size_t i) {
1169 lower_impl(unit, b, i, [&] (Vout& v) {
1170 v << copy{inst.s0, rax};
1171 v << cqo{}; // sign-extend rax => rdx:rax
1172 v << idiv{inst.s1, v.makeReg()}; // rdx:rax/divisor => quot:rax, rem:rdx
1173 v << copy{rdx, inst.d};
1177 void lower(Vunit& unit, divint& inst, Vlabel b, size_t i) {
1178 lower_impl(unit, b, i, [&] (Vout& v) {
1179 v << copy{inst.s0, rax};
1180 v << cqo{}; // sign-extend rax => rdx:rax
1181 v << idiv{inst.s1, v.makeReg()}; // rdx:rax/divisor => quot:rax, rem:rdx
1182 v << copy{rax, inst.d};
1186 ///////////////////////////////////////////////////////////////////////////////
1188 void lower(Vunit& unit, movtqb& inst, Vlabel b, size_t i) {
1189 unit.blocks[b].code[i] = copy{inst.s, inst.d};
1191 void lower(Vunit& unit, movtdb& inst, Vlabel b, size_t i) {
1192 unit.blocks[b].code[i] = copy{inst.s, inst.d};
1194 void lower(Vunit& unit, movtdq& inst, Vlabel b, size_t i) {
1195 unit.blocks[b].code[i] = copy{inst.s, inst.d};
1197 void lower(Vunit& unit, movtqw& inst, Vlabel b, size_t i) {
1198 unit.blocks[b].code[i] = copy{inst.s, inst.d};
1200 void lower(Vunit& unit, movtql& inst, Vlabel b, size_t i) {
1201 unit.blocks[b].code[i] = copy{inst.s, inst.d};
1204 ///////////////////////////////////////////////////////////////////////////////
1207 * Lower a few abstractions to facilitate straightforward x64 codegen.
1209 void lowerForX64(Vunit& unit) {
1210 vasm_lower(unit, [&](const VLS& /*env*/, Vinstr& inst, Vlabel b, size_t i) {
1211 switch (inst.op) {
1212 #define O(name, ...) \
1213 case Vinstr::name: \
1214 lower(unit, inst.name##_, b, i); \
1215 break;
1217 VASM_OPCODES
1218 #undef O
1223 ///////////////////////////////////////////////////////////////////////////////
1227 void optimizeX64(Vunit& unit, const Abi& abi, bool regalloc) {
1228 Timer timer(Timer::vasm_optimize, unit.log_entry);
1230 tracing::Block _{
1231 "vasm-optimize",
1232 [&] { return traceProps(unit).add("reg_alloc", regalloc); }
1235 auto const doPass = [&] (const char* name, auto fun) {
1236 rqtrace::EventGuard trace{name};
1237 fun(unit);
1240 doPass("VOPT_DCE", removeDeadCode);
1241 doPass("VOPT_PHI", optimizePhis);
1242 doPass("VOPT_BRANCH", fuseBranches);
1243 doPass("VOPT_JMP", [] (Vunit& u) { optimizeJmps(u, false); });
1245 assertx(checkWidths(unit));
1247 if (unit.context && unit.context->kind == TransKind::Optimize &&
1248 RuntimeOption::EvalProfBranchSampleFreq > 0) {
1249 // Even when branch profiling is on, we still only want to profile
1250 // non-profiling translations of PHP functions. We also require that we
1251 // can spill, so that we can generate arbitrary profiling code, and also to
1252 // ensure we don't profile unique stubs and such.
1253 doPass("VOPT_PROF_BRANCH", profile_branches);
1256 doPass("VOPT_X64", lowerForX64);
1257 doPass("VOPT_SIMPLIFY", simplify);
1258 doPass("VOPT_X64", lowerForX64);
1260 if (!unit.constToReg.empty()) {
1261 doPass("VOPT_FOLD_IMM", foldImms<x64::ImmFolder>);
1264 doPass("VOPT_COPY", [&] (Vunit& u) { optimizeCopies(u, abi); });
1265 doPass("VOPT_DCE", removeDeadCode);
1266 doPass("VOPT_BRANCH", fuseBranches);
1268 if (unit.needsRegAlloc()) {
1269 doPass("VOPT_JMP", [] (Vunit& u) { optimizeJmps(u, false); });
1270 doPass("VOPT_DCE", removeDeadCode);
1272 if (regalloc) {
1273 // vasm-block-counts and register allocation require edges to
1274 // be pre-split.
1275 splitCriticalEdges(unit);
1277 doPass("VOPT_BLOCK_WEIGHTS", VasmBlockCounters::profileGuidedUpdate);
1279 if (RuntimeOption::EvalUseGraphColor &&
1280 unit.context &&
1281 (unit.context->kind == TransKind::Optimize ||
1282 unit.context->kind == TransKind::OptPrologue)) {
1283 rqtrace::EventGuard trace{"VOPT_GRAPH_COLOR"};
1284 allocateRegistersWithGraphColor(unit, abi);
1285 } else {
1286 rqtrace::EventGuard trace{"VOPT_XLS"};
1287 allocateRegistersWithXLS(unit, abi);
1289 doPass("VOPT_SF_PEEPHOLES", [&] (Vunit& u) { sfPeepholes(u, abi); });
1290 doPass("VOPT_POST_RA_SIMPLIFY", postRASimplify);
1294 // We can add side-exiting instructions now
1295 doPass("VOPT_EXIT", optimizeExits);
1296 doPass("VOPT_JMP", [] (Vunit& u) { optimizeJmps(u, true); });
1299 void emitX64(Vunit& unit, Vtext& text, CGMeta& fixups,
1300 AsmInfo* asmInfo) {
1301 tracing::Block _{"emit-X64", [&] { return traceProps(unit); }};
1303 #ifdef HAVE_LIBXED
1304 if (RuntimeOption::EvalUseXedAssembler) {
1305 return vasm_emit<Vgen<XedAssembler>>(unit, text, fixups, asmInfo);
1307 #endif
1308 vasm_emit<Vgen<X64Assembler>>(unit, text, fixups, asmInfo);
1311 ///////////////////////////////////////////////////////////////////////////////