Switch-related cleanup
[hiphop-php.git] / hphp / runtime / vm / jit / vasm-x64.cpp
blobf479fa6f35483e8ed79f01f5a3b8f13799fe1706
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm-emit.h"
19 #include "hphp/runtime/base/arch.h"
20 #include "hphp/runtime/vm/jit/back-end-x64.h"
21 #include "hphp/runtime/vm/jit/block.h"
22 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
23 #include "hphp/runtime/vm/jit/code-gen.h"
24 #include "hphp/runtime/vm/jit/func-prologues-x64.h"
25 #include "hphp/runtime/vm/jit/mc-generator.h"
26 #include "hphp/runtime/vm/jit/print.h"
27 #include "hphp/runtime/vm/jit/prof-data.h"
28 #include "hphp/runtime/vm/jit/service-requests-inline.h"
29 #include "hphp/runtime/vm/jit/target-cache.h"
30 #include "hphp/runtime/vm/jit/timer.h"
31 #include "hphp/runtime/vm/jit/vasm.h"
32 #include "hphp/runtime/vm/jit/vasm-instr.h"
33 #include "hphp/runtime/vm/jit/vasm-print.h"
34 #include "hphp/runtime/vm/jit/vasm-unit.h"
35 #include "hphp/runtime/vm/jit/vasm-util.h"
36 #include "hphp/runtime/vm/jit/vasm-visit.h"
38 #include <algorithm>
40 TRACE_SET_MOD(vasm);
42 namespace HPHP { namespace jit {
43 ///////////////////////////////////////////////////////////////////////////////
45 using namespace reg;
46 using namespace x64;
48 namespace x64 { struct ImmFolder; }
50 namespace {
51 ///////////////////////////////////////////////////////////////////////////////
53 struct Vgen {
54 Vgen(const Vunit& u, Vasm::AreaList& areas, AsmInfo* asmInfo)
55 : unit(u)
56 , backend(mcg->backEnd())
57 , areas(areas)
58 , m_asmInfo(asmInfo) {
59 addrs.resize(u.blocks.size());
60 points.resize(u.next_point);
62 void emit(jit::vector<Vlabel>&);
64 private:
65 template<class Inst> void emit(const Inst& i) {
66 always_assert_flog(false, "unimplemented instruction: {} in B{}\n",
67 vinst_names[Vinstr(i).op], size_t(current));
69 // intrinsics
70 void emit(const bindaddr& i);
71 void emit(const bindcall& i);
72 void emit(const bindjcc1st& i);
73 void emit(const bindjcc& i);
74 void emit(const bindjmp& i);
75 void emit(const callstub& i);
76 void emit(const contenter& i);
77 void emit(const copy& i);
78 void emit(const copy2& i);
79 void emit(const debugtrap& i) { a->int3(); }
80 void emit(const fallthru& i) {}
81 void emit(const ldimmb& i);
82 void emit(const ldimml& i);
83 void emit(const ldimmq& i);
84 void emit(const ldimmqs& i);
85 void emit(const fallback& i);
86 void emit(const fallbackcc& i);
87 void emit(const load& i);
88 void emit(const mccall& i);
89 void emit(const mcprep& i);
90 void emit(const nothrow& i);
91 void emit(const store& i);
92 void emit(const syncpoint& i);
93 void emit(const unwind& i);
94 void emit(const landingpad& i) {}
95 void emit(const vretm& i);
96 void emit(const vret& i);
98 // instructions
99 void emit(andb i) { commuteSF(i); a->andb(i.s0, i.d); }
100 void emit(andbi i) { binary(i); a->andb(i.s0, i.d); }
101 void emit(const andbim& i) { a->andb(i.s, i.m); }
102 void emit(andl i) { commuteSF(i); a->andl(i.s0, i.d); }
103 void emit(andli i) { binary(i); a->andl(i.s0, i.d); }
104 void emit(andq i) { commuteSF(i); a->andq(i.s0, i.d); }
105 void emit(andqi i) { binary(i); a->andq(i.s0, i.d); }
106 void emit(addli i) { binary(i); a->addl(i.s0, i.d); }
107 void emit(const addlm& i) { a->addl(i.s0, i.m); }
108 void emit(addq i) { commuteSF(i); a->addq(i.s0, i.d); }
109 void emit(addqi i) { binary(i); a->addq(i.s0, i.d); }
110 void emit(const addqim& i);
111 void emit(addsd i) { commute(i); a->addsd(i.s0, i.d); }
112 void emit(const call& i);
113 void emit(const callm& i) { a->call(i.target); }
114 void emit(const callr& i) { a->call(i.target); }
115 void emit(const cloadq& i);
116 void emit(const cmovq& i);
117 void emit(const cmpb& i) { a->cmpb(i.s0, i.s1); }
118 void emit(const cmpbi& i) { a->cmpb(i.s0, i.s1); }
119 void emit(const cmpbim& i) { a->cmpb(i.s0, i.s1); }
120 void emit(const cmpl& i) { a->cmpl(i.s0, i.s1); }
121 void emit(const cmpli& i) { a->cmpl(i.s0, i.s1); }
122 void emit(const cmplim& i) { a->cmpl(i.s0, i.s1); }
123 void emit(const cmplm& i) { a->cmpl(i.s0, i.s1); }
124 void emit(const cmpq& i) { a->cmpq(i.s0, i.s1); }
125 void emit(const cmpqi& i) { a->cmpq(i.s0, i.s1); }
126 void emit(const cmpqim& i) { a->cmpq(i.s0, i.s1); }
127 void emit(const cmpqims& i);
128 void emit(const cmpqm& i) { a->cmpq(i.s0, i.s1); }
129 void emit(cmpsd i) { noncommute(i); a->cmpsd(i.s0, i.d, i.pred); }
130 void emit(const cqo& i) { a->cqo(); }
131 void emit(const cvttsd2siq& i) { a->cvttsd2siq(i.s, i.d); }
132 void emit(const cvtsi2sd& i);
133 void emit(const cvtsi2sdm& i);
134 void emit(decl i) { unary(i); a->decl(i.d); }
135 void emit(const declm& i) { a->decl(i.m); }
136 void emit(decq i) { unary(i); a->decq(i.d); }
137 void emit(const decqm& i) { a->decq(i.m); }
138 void emit(divsd i) { noncommute(i); a->divsd(i.s0, i.d); }
139 void emit(imul i) { commuteSF(i); a->imul(i.s0, i.d); }
140 void emit(const idiv& i) { a->idiv(i.s); }
141 void emit(incl i) { unary(i); a->incl(i.d); }
142 void emit(const inclm& i) { a->incl(i.m); }
143 void emit(incq i) { unary(i); a->incq(i.d); }
144 void emit(const incqm& i) { a->incq(i.m); }
145 void emit(const incqmlock& i) { a->lock(); a->incq(i.m); }
146 void emit(const incwm& i) { a->incw(i.m); }
147 void emit(const jcc& i);
148 void emit(const jcci& i);
149 void emit(const jmp& i);
150 void emit(const jmpr& i) { a->jmp(i.target); }
151 void emit(const jmpm& i) { a->jmp(i.target); }
152 void emit(const jmpi& i) { a->jmp(i.target); }
153 void emit(const lea& i);
154 void emit(const leap& i) { a->lea(i.s, i.d); }
155 void emit(const loadups& i) { a->movups(i.s, i.d); }
156 void emit(const loadtqb& i) { a->loadb(i.s, i.d); }
157 void emit(const loadl& i) { a->loadl(i.s, i.d); }
158 void emit(const loadqp& i) { a->loadq(i.s, i.d); }
159 void emit(const loadsd& i) { a->movsd(i.s, i.d); }
160 void emit(const loadzbl& i) { a->loadzbl(i.s, i.d); }
161 void emit(const loadzbq& i) { a->loadzbl(i.s, Reg32(i.d)); }
162 void emit(const loadzlq& i) { a->loadl(i.s, Reg32(i.d)); }
163 void emit(const movb& i) { a->movb(i.s, i.d); }
164 void emit(const movl& i) { a->movl(i.s, i.d); }
165 void emit(const movzbl& i) { a->movzbl(i.s, i.d); }
166 void emit(const movzbq& i) { a->movzbl(i.s, Reg32(i.d)); }
167 void emit(mulsd i) { commute(i); a->mulsd(i.s0, i.d); }
168 void emit(neg i) { unary(i); a->neg(i.d); }
169 void emit(const nop& i) { a->nop(); }
170 void emit(not i) { unary(i); a->not(i.d); }
171 void emit(notb i) { unary(i); a->notb(i.d); }
172 void emit(const orwim& i) { a->orw(i.s0, i.m); }
173 void emit(orq i) { commuteSF(i); a->orq(i.s0, i.d); }
174 void emit(orqi i) { binary(i); a->orq(i.s0, i.d); }
175 void emit(const orqim& i) { a->orq(i.s0, i.m); }
176 void emit(const pop& i) { a->pop(i.d); }
177 void emit(const popm& i) { a->pop(i.d); }
178 void emit(psllq i) { binary(i); a->psllq(i.s0, i.d); }
179 void emit(psrlq i) { binary(i); a->psrlq(i.s0, i.d); }
180 void emit(const push& i) { a->push(i.s); }
181 void emit(const roundsd& i) { a->roundsd(i.dir, i.s, i.d); }
182 void emit(const ret& i) { a->ret(); }
183 void emit(const sarq& i) { unary(i); a->sarq(i.d); }
184 void emit(sarqi i) { binary(i); a->sarq(i.s0, i.d); }
185 void emit(const setcc& i) { a->setcc(i.cc, i.d); }
186 void emit(shlli i) { binary(i); a->shll(i.s0, i.d); }
187 void emit(shlq i) { unary(i); a->shlq(i.d); }
188 void emit(shlqi i) { binary(i); a->shlq(i.s0, i.d); }
189 void emit(shrli i) { binary(i); a->shrl(i.s0, i.d); }
190 void emit(shrqi i) { binary(i); a->shrq(i.s0, i.d); }
191 void emit(const sqrtsd& i) { a->sqrtsd(i.s, i.d); }
192 void emit(const storeups& i) { a->movups(i.s, i.m); }
193 void emit(const storeb& i) { a->storeb(i.s, i.m); }
194 void emit(const storebi& i);
195 void emit(const storel& i) { a->storel(i.s, i.m); }
196 void emit(const storeli& i) { a->storel(i.s, i.m); }
197 void emit(const storeqi& i) { a->storeq(i.s, i.m); }
198 void emit(const storesd& i) { a->movsd(i.s, i.m); }
199 void emit(const storew& i) { a->storew(i.s, i.m); }
200 void emit(const storewi& i) { a->storew(i.s, i.m); }
201 void emit(subbi i) { binary(i); a->subb(i.s0, i.d); }
202 void emit(subl i) { noncommute(i); a->subl(i.s0, i.d); }
203 void emit(subli i) { binary(i); a->subl(i.s0, i.d); }
204 void emit(subq i) { noncommute(i); a->subq(i.s0, i.d); }
205 void emit(subqi i) { binary(i); a->subq(i.s0, i.d); }
206 void emit(subsd i) { noncommute(i); a->subsd(i.s0, i.d); }
207 void emit(const testb& i) { a->testb(i.s0, i.s1); }
208 void emit(const testbi& i) { a->testb(i.s0, i.s1); }
209 void emit(const testbim& i) { a->testb(i.s0, i.s1); }
210 void emit(const testwim& i);
211 void emit(const testl& i) { a->testl(i.s0, i.s1); }
212 void emit(const testli& i) { a->testl(i.s0, i.s1); }
213 void emit(const testlim& i);
214 void emit(const testq& i) { a->testq(i.s0, i.s1); }
215 void emit(const testqm& i) { a->testq(i.s0, i.s1); }
216 void emit(const testqim& i);
217 void emit(const ucomisd& i) { a->ucomisd(i.s0, i.s1); }
218 void emit(const ud2& i) { a->ud2(); }
219 void emit(unpcklpd i) { noncommute(i); a->unpcklpd(i.s0, i.d); }
220 void emit(xorb i) { commuteSF(i); a->xorb(i.s0, i.d); }
221 void emit(xorbi i) { binary(i); a->xorb(i.s0, i.d); }
222 void emit(xorq i) { commuteSF(i); a->xorq(i.s0, i.d); }
223 void emit(xorqi i) { binary(i); a->xorq(i.s0, i.d); }
225 // helpers
226 void prep(Reg8 s, Reg8 d) { if (s != d) a->movb(s, d); }
227 void prep(Reg32 s, Reg32 d) { if (s != d) a->movl(s, d); }
228 void prep(Reg64 s, Reg64 d) { if (s != d) a->movq(s, d); }
229 void prep(RegXMM s, RegXMM d) { if (s != d) a->movdqa(s, d); }
230 CodeAddress start(Vlabel b) {
231 auto area = unit.blocks[b].area;
232 return areas[(int)area].start;
234 CodeBlock& main() { return area(AreaIndex::Main).code; }
235 CodeBlock& cold() { return area(AreaIndex::Cold).code; }
236 CodeBlock& frozen() { return area(AreaIndex::Frozen).code; }
237 template<class Inst> void unary(Inst& i) { prep(i.s, i.d); }
238 template<class Inst> void binary(Inst& i) { prep(i.s1, i.d); }
239 template<class Inst> void commuteSF(Inst&);
240 template<class Inst> void commute(Inst&);
241 template<class Inst> void noncommute(Inst&);
243 private:
244 Vasm::Area& area(AreaIndex i) {
245 assertx((unsigned)i < areas.size());
246 return areas[(unsigned)i];
249 private:
250 struct LabelPatch { CodeAddress instr; Vlabel target; };
251 struct PointPatch { CodeAddress instr; Vpoint pos; };
252 const Vunit& unit;
253 BackEnd& backend;
254 Vasm::AreaList& areas;
255 AsmInfo* m_asmInfo;
256 X64Assembler* a;
257 Vlabel current{0}, next{0}; // in linear order
258 jit::vector<CodeAddress> addrs, points;
259 jit::vector<LabelPatch> jccs, jmps, calls, catches;
260 jit::vector<PointPatch> ldpoints;
261 jit::hash_map<uint64_t,uint64_t*> constants;
264 // prepare a binary op that is not commutative. s0 must be a different
265 // register than s1 so we don't clobber it.
266 template<class Inst> void Vgen::noncommute(Inst& i) {
267 assertx(i.s1 == i.d || i.s0 != i.d); // do not clobber s0
268 binary(i);
271 // prepare a binary op that is commutative. Swap operands if the dest is s0.
272 template<class Inst> void Vgen::commuteSF(Inst& i) {
273 if (i.s1 != i.d && i.s0 == i.d) {
274 i = Inst{i.s1, i.s0, i.d, i.sf};
275 } else {
276 binary(i);
280 // prepare a binary op that is commutative. Swap operands if the dest is s0.
281 template<class Inst> void Vgen::commute(Inst& i) {
282 if (i.s1 != i.d && i.s0 == i.d) {
283 i = Inst{i.s1, i.s0, i.d};
284 } else {
285 binary(i);
289 ///////////////////////////////////////////////////////////////////////////////
291 void emitSimdImm(X64Assembler* a, int64_t val, Vreg d) {
292 if (val == 0) {
293 a->pxor(d, d); // does not modify flags
294 } else {
295 auto addr = mcg->allocLiteral(val);
296 a->movsd(rip[(intptr_t)addr], d);
300 ///////////////////////////////////////////////////////////////////////////////
302 void Vgen::emit(const addqim& i) {
303 if (i.m.seg == Vptr::FS) a->fs();
304 a->addq(i.s0, i.m.mr());
307 void Vgen::emit(const call& i) {
308 // warning: this is a copy of emitCall(TCA) in code-gen-helpers-x64.cpp
309 if (a->jmpDeltaFits(i.target)) {
310 a->call(i.target);
311 } else {
312 // can't do a near call; store address in data section.
313 // call by loading the address using rip-relative addressing. This
314 // assumes the data section is near the current code section. Since
315 // this sequence is directly in-line, rip-relative like this is
316 // more compact than loading a 64-bit immediate.
317 auto addr = mcg->allocLiteral((uint64_t)i.target);
318 a->call(rip[(intptr_t)addr]);
322 void Vgen::emit(const cloadq& i) {
323 auto m = i.t;
324 always_assert(!m.index.isValid()); // not supported, but could be later.
325 if (i.f != i.d) {
326 if (i.d == m.base) {
327 // We can't move f over d or we'll clobber the Vptr we need to load from.
328 // Since cload does the load unconditionally anyway, we can just load and
329 // cmov.
330 a->loadq(i.t, i.d);
331 a->cmov_reg64_reg64(ccNegate(i.cc), i.f, i.d);
332 return;
334 a->movq(i.f, i.d);
336 a->cload_reg64_disp_reg64(i.cc, m.base, m.disp, i.d);
339 // add s0 s1 d => mov s1->d; d += s0
340 // cmov cc s d => if cc { mov s->d }
341 void Vgen::emit(const cmovq& i) {
342 if (i.f != i.d && i.t == i.d) {
343 // negate the condition and swap t/f operands so we dont clobber i.t
344 return emit(cmovq{ccNegate(i.cc), i.sf, i.t, i.f, i.d});
345 } else {
346 prep(i.f, i.d);
348 a->cmov_reg64_reg64(i.cc, i.t, i.d);
351 void Vgen::emit(const contenter& i) {
352 Label Stub, End;
353 Reg64 fp = i.fp, target = i.target;
354 a->jmp8(End);
356 asm_label(*a, Stub);
357 a->pop(fp[AROFF(m_savedRip)]);
358 a->jmp(target);
360 asm_label(*a, End);
361 a->call(Stub);
362 // m_savedRip will point here.
363 emit(unwind{{i.targets[0], i.targets[1]}});
366 void Vgen::emit(const copy& i) {
367 if (i.s == i.d) return;
368 if (i.s.isGP()) {
369 if (i.d.isGP()) { // GP => GP
370 a->movq(i.s, i.d);
371 } else { // GP => XMM
372 assertx(i.d.isSIMD());
373 // This generates a movq x86 instruction, which zero extends
374 // the 64-bit value in srcReg into a 128-bit XMM register
375 a->movq_rx(i.s, i.d);
377 } else {
378 if (i.d.isGP()) { // XMM => GP
379 a->movq_xr(i.s, i.d);
380 } else { // XMM => XMM
381 assertx(i.d.isSIMD());
382 // This copies all 128 bits in XMM,
383 // thus avoiding partial register stalls
384 a->movdqa(i.s, i.d);
389 void Vgen::emit(const copy2& i) {
390 assertx(i.s0.isValid() && i.s1.isValid() && i.d0.isValid() && i.d1.isValid());
391 auto s0 = i.s0, s1 = i.s1, d0 = i.d0, d1 = i.d1;
392 assertx(d0 != d1);
393 if (d0 == s1) {
394 if (d1 == s0) {
395 a->xchgq(d0, d1);
396 } else {
397 // could do this in a simplify pass
398 if (s1 != d1) a->movq(s1, d1); // save s1 first; d1 != s0
399 if (s0 != d0) a->movq(s0, d0);
401 } else {
402 // could do this in a simplify pass
403 if (s0 != d0) a->movq(s0, d0);
404 if (s1 != d1) a->movq(s1, d1);
408 void Vgen::emit(const bindaddr& i) {
409 *i.dest = emitBindAddr(a->code(), frozen(), i.dest, i.sk, i.spOff);
410 mcg->setJmpTransID(TCA(i.dest));
413 void Vgen::emit(const bindcall& i) {
414 mcg->backEnd().prepareForSmash(a->code(), kCallLen);
415 a->call(i.stub);
416 emit(unwind{{i.targets[0], i.targets[1]}});
419 void Vgen::emit(const bindjcc1st& i) {
420 emitBindJmpccFirst(a->code(), frozen(), i.cc, i.targets[0], i.targets[1],
421 i.spOff);
424 void Vgen::emit(const bindjcc& i) {
425 emitBindJ(
426 a->code(),
427 frozen(),
428 i.cc,
429 i.target,
430 i.spOff,
431 i.trflags
435 void Vgen::emit(const bindjmp& i) {
436 emitBindJ(
437 a->code(),
438 frozen(),
439 CC_None,
440 i.target,
441 i.spOff,
442 i.trflags
446 void Vgen::emit(const callstub& i) {
447 emit(call{i.target, i.args});
450 void Vgen::emit(const cmpqims& i) {
451 backend.prepareForSmash(a->code(), kCmpLen);
452 a->cmpq(i.s0, i.s1);
455 void Vgen::emit(const fallback& i) {
456 emit(fallbackcc{CC_None, InvalidReg, i.dest, i.trflags, i.args});
459 void Vgen::emit(const fallbackcc& i) {
460 auto const destSR = mcg->tx().getSrcRec(i.dest);
461 if (!i.trflags.packed) {
462 destSR->emitFallbackJump(a->code(), i.cc);
463 } else {
464 destSR->emitFallbackJumpCustom(a->code(), frozen(), i.dest, i.trflags);
468 void Vgen::emit(const ldimmb& i) {
469 // ldimmb is for Vconst::Byte, which is treated as unsigned uint8_t
470 auto val = i.s.b();
471 if (i.d.isGP()) {
472 Vreg8 d = i.d;
473 if (val == 0 && !i.saveflags) {
474 a->xorb(d, d);
475 } else {
476 a->movb(val, d);
478 } else {
479 emitSimdImm(a, uint8_t(val), i.d);
483 void Vgen::emit(const ldimml& i) {
484 // ldimml is for Vconst::Long, which is treated as unsigned uint32_t
485 auto val = i.s.l();
486 if (i.d.isGP()) {
487 Vreg32 d = i.d;
488 if (val == 0 && !i.saveflags) {
489 a->xorl(d, d);
490 } else {
491 a->movl(val, d);
493 } else {
494 emitSimdImm(a, uint32_t(val), i.d);
498 void Vgen::emit(const ldimmq& i) {
499 auto val = i.s.q();
500 if (i.d.isGP()) {
501 if (val == 0) {
502 Reg64 d = i.d;
503 if (i.saveflags) {
504 a->movl(0, r32(d));
505 } else {
506 a->xorl(r32(d), r32(d));
508 } else {
509 a->emitImmReg(i.s, i.d);
511 } else {
512 emitSimdImm(a, val, i.d);
516 void Vgen::emit(const ldimmqs& i) {
517 backend.prepareForSmash(a->code(), kMovLen);
518 a->movq(0xdeadbeeffeedface, i.d);
520 auto immp = reinterpret_cast<uintptr_t*>(a->frontier()) - 1;
521 *immp = i.s.q();
524 void Vgen::emit(const load& i) {
525 if (i.s.seg == Vptr::FS) a->fs();
526 auto mref = i.s.mr();
527 if (i.d.isGP()) {
528 a->loadq(mref, i.d);
529 } else {
530 assertx(i.d.isSIMD());
531 a->movsd(mref, i.d);
535 void Vgen::emit(const mccall& i) {
536 backend.prepareForSmash(a->code(), kCallLen);
537 a->call(i.target);
540 // emit smashable mov as part of method cache callsite
541 void Vgen::emit(const mcprep& i) {
543 * For the first time through, set the cache to hold the address
544 * of the movq (*2 + 1), so we can find the movq from the handler.
546 * We set the low bit for two reasons: the Class* will never be a valid
547 * Class*, so we'll always miss the inline check before it's smashed, and
548 * handlePrimeCacheMiss can tell it's not been smashed yet
550 emit(ldimmqs{0x8000000000000000u, i.d});
552 auto movAddr = reinterpret_cast<uintptr_t>(a->frontier()) - x64::kMovLen;
553 auto immAddr = reinterpret_cast<uintptr_t*>(movAddr + x64::kMovImmOff);
555 *immAddr = (movAddr << 1) | 1;
556 mcg->cgFixups().m_addressImmediates.insert(reinterpret_cast<TCA>(~movAddr));
559 void Vgen::emit(const storebi& i) {
560 if (i.m.seg == Vptr::FS) a->fs();
561 a->storeb(i.s, i.m.mr());
564 void Vgen::emit(const store& i) {
565 if (i.s.isGP()) {
566 a->storeq(i.s, i.d);
567 } else {
568 assertx(i.s.isSIMD());
569 a->movsd(i.s, i.d);
573 void Vgen::emit(const syncpoint& i) {
574 FTRACE(5, "IR recordSyncPoint: {} {} {}\n", a->frontier(),
575 i.fix.pcOffset, i.fix.spOffset);
576 mcg->recordSyncPoint(a->frontier(), i.fix.pcOffset,
577 i.fix.spOffset);
580 void Vgen::emit(const testwim& i) {
581 // If there's only 1 byte of meaningful bits in the mask, we can adjust the
582 // pointer offset and use testbim instead.
583 int off = 0;
584 uint16_t newMask = i.s0.w();
585 while (newMask > 0xff && !(newMask & 0xff)) {
586 off++;
587 newMask >>= 8;
590 if (newMask > 0xff) {
591 a->testw(i.s0, i.s1);
592 } else {
593 emit(testbim{int8_t(newMask), i.s1 + off, i.sf});
597 void Vgen::emit(const testlim& i) {
598 a->testl(i.s0, i.s1);
601 void Vgen::emit(const testqim& i) {
602 // The immediate is 32 bits, sign-extended to 64. If the sign bit isn't set,
603 // we can get the same results by emitting a testlim.
604 if (i.s0.l() < 0) {
605 a->testq(i.s0, i.s1);
606 } else {
607 emit(testlim{i.s0, i.s1, i.sf});
611 void Vgen::emit(const nothrow& i) {
612 // register a null catch trace at this position, telling the unwinder that
613 // the function call returning to here isn't allowed to throw.
614 mcg->registerCatchBlock(a->frontier(), nullptr);
617 void Vgen::emit(const unwind& i) {
618 // Unwind instructions terminate blocks with calls that can throw, and have
619 // the edges to catch (unwinder) blocks and fall-through blocks.
620 catches.push_back({a->frontier(), i.targets[1]});
621 emit(jmp{i.targets[0]});
624 void Vgen::emit(const vretm& i) {
625 a->push(i.retAddr);
626 a->loadq(i.prevFp, i.d);
627 a->ret();
630 void Vgen::emit(const vret& i) {
631 a->push(i.retAddr);
632 a->ret();
635 // overall emitter
636 void Vgen::emit(jit::vector<Vlabel>& labels) {
637 // Some structures here track where we put things just for debug printing.
638 struct Snippet {
639 const IRInstruction* origin;
640 TcaRange range;
642 struct BlockInfo {
643 jit::vector<Snippet> snippets;
646 // This is under the printir tracemod because it mostly shows you IR and
647 // machine code, not vasm and machine code (not implemented).
648 bool shouldUpdateAsmInfo = !!m_asmInfo;
650 std::vector<TransBCMapping>* bcmap = nullptr;
651 if (mcg->tx().isTransDBEnabled() || RuntimeOption::EvalJitUseVtuneAPI) {
652 bcmap = &mcg->cgFixups().m_bcMap;
655 jit::vector<jit::vector<BlockInfo>> areaToBlockInfos;
656 if (shouldUpdateAsmInfo) {
657 areaToBlockInfos.resize(areas.size());
658 for (auto& r : areaToBlockInfos) {
659 r.resize(unit.blocks.size());
663 for (int i = 0, n = labels.size(); i < n; ++i) {
664 assertx(checkBlockEnd(unit, labels[i]));
666 auto b = labels[i];
667 auto& block = unit.blocks[b];
668 X64Assembler as { area(block.area).code };
669 a = &as;
670 auto blockStart = a->frontier();
671 addrs[b] = blockStart;
674 // Compute the next block we will emit into the current area.
675 auto cur_start = start(labels[i]);
676 auto j = i + 1;
677 while (j < labels.size() && cur_start != start(labels[j])) {
678 j++;
680 next = j < labels.size() ? labels[j] : Vlabel(unit.blocks.size());
681 current = b;
684 const IRInstruction* currentOrigin = nullptr;
685 auto blockInfo = shouldUpdateAsmInfo
686 ? &areaToBlockInfos[unsigned(block.area)][b]
687 : nullptr;
688 auto start_snippet = [&](const Vinstr& inst) {
689 if (!shouldUpdateAsmInfo) return;
691 blockInfo->snippets.push_back(
692 Snippet { inst.origin, TcaRange { a->code().frontier(), nullptr } }
695 auto finish_snippet = [&] {
696 if (!shouldUpdateAsmInfo) return;
698 if (!blockInfo->snippets.empty()) {
699 auto& snip = blockInfo->snippets.back();
700 snip.range = TcaRange { snip.range.start(), a->code().frontier() };
704 for (auto& inst : block.code) {
705 if (currentOrigin != inst.origin) {
706 finish_snippet();
707 start_snippet(inst);
708 currentOrigin = inst.origin;
711 if (bcmap && inst.origin) {
712 auto sk = inst.origin->marker().sk();
713 if (bcmap->empty() ||
714 bcmap->back().md5 != sk.unit()->md5() ||
715 bcmap->back().bcStart != sk.offset()) {
716 bcmap->push_back(TransBCMapping{sk.unit()->md5(), sk.offset(),
717 main().frontier(), cold().frontier(),
718 frozen().frontier()});
722 switch (inst.op) {
723 #define O(name, imms, uses, defs) \
724 case Vinstr::name: emit(inst.name##_); break;
725 VASM_OPCODES
726 #undef O
730 finish_snippet();
733 for (auto& p : jccs) {
734 assertx(addrs[p.target]);
735 X64Assembler::patchJcc(p.instr, addrs[p.target]);
737 for (auto& p : jmps) {
738 assertx(addrs[p.target]);
739 X64Assembler::patchJmp(p.instr, addrs[p.target]);
741 for (auto& p : calls) {
742 assertx(addrs[p.target]);
743 X64Assembler::patchCall(p.instr, addrs[p.target]);
745 for (auto& p : catches) {
746 mcg->registerCatchBlock(p.instr, addrs[p.target]);
748 for (auto& p : ldpoints) {
749 auto after_lea = p.instr + 7;
750 auto d = points[p.pos] - after_lea;
751 assertx(deltaFits(d, sz::dword));
752 ((int32_t*)after_lea)[-1] = d;
755 if (!shouldUpdateAsmInfo) {
756 return;
759 for (auto i = 0; i < areas.size(); ++i) {
760 auto& blockInfos = areaToBlockInfos[i];
761 for (auto const blockID : labels) {
762 auto const& blockInfo = blockInfos[static_cast<size_t>(blockID)];
763 if (blockInfo.snippets.empty()) continue;
765 const IRInstruction* currentOrigin = nullptr;
766 for (auto const& snip : blockInfo.snippets) {
767 if (currentOrigin != snip.origin && snip.origin) {
768 currentOrigin = snip.origin;
771 m_asmInfo->updateForInstruction(
772 currentOrigin,
773 static_cast<AreaIndex>(i),
774 snip.range.start(),
775 snip.range.end());
781 void Vgen::emit(const cvtsi2sd& i) {
782 a->pxor(i.d, i.d);
783 a->cvtsi2sd(i.s, i.d);
786 void Vgen::emit(const cvtsi2sdm& i) {
787 a->pxor(i.d, i.d);
788 a->cvtsi2sd(i.s, i.d);
791 void Vgen::emit(const jcc& i) {
792 if (i.targets[1] != i.targets[0]) {
793 if (next == i.targets[1]) {
794 return emit(jcc{ccNegate(i.cc), i.sf, {i.targets[1], i.targets[0]}});
796 auto taken = i.targets[1];
797 jccs.push_back({a->frontier(), taken});
798 a->jcc(i.cc, a->frontier());
800 emit(jmp{i.targets[0]});
803 void Vgen::emit(const jcci& i) {
804 a->jcc(i.cc, i.taken);
805 emit(jmp{i.target});
808 void Vgen::emit(const jmp& i) {
809 if (next == i.target) return;
810 jmps.push_back({a->frontier(), i.target});
811 a->jmp(a->frontier());
814 void Vgen::emit(const lea& i) {
815 // could do this in a simplify pass
816 if (i.s.disp == 0 && i.s.base.isValid() && !i.s.index.isValid()) {
817 emit(copy{i.s.base, i.d});
818 } else {
819 a->lea(i.s, i.d);
824 * Move all the elements of in into out, replacing count elements of out
825 * starting at idx. in be will be cleared at the end.
827 * Example: vector_splice([1, 2, 3, 4, 5], 2, 1, [10, 11, 12]) will change out
828 * to [1, 2, 10, 11, 12, 4, 5].
830 template<typename V>
831 void vector_splice(V& out, size_t idx, size_t count, V& in) {
832 auto out_size = out.size();
834 // Start by making room in out for the new elements.
835 out.resize(out.size() + in.size() - count);
837 // Move everything after the to-be-overwritten elements to the new end.
838 std::move_backward(out.begin() + idx + count, out.begin() + out_size,
839 out.end());
841 // Move the new elements in
842 std::move(in.begin(), in.end(), out.begin() + idx);
843 in.clear();
846 // Lower svcreq{} by making copies to abi registers explicit, saving
847 // vm regs, and returning to the VM. svcreq{} is guaranteed to be
848 // at the end of a block, so we can just keep appending to the same
849 // block.
850 void lower_svcreq(Vunit& unit, Vlabel b, const Vinstr& inst) {
851 assertx(unit.tuples[inst.svcreq_.extraArgs].size() < kNumServiceReqArgRegs);
852 auto svcreq = inst.svcreq_; // copy it
853 auto origin = inst.origin;
854 auto& argv = unit.tuples[svcreq.extraArgs];
855 unit.blocks[b].code.pop_back(); // delete the svcreq instruction
856 Vout v(unit, b, origin);
858 RegSet arg_regs = svcreq.args;
859 VregList arg_dests;
860 for (int i = 0, n = argv.size(); i < n; ++i) {
861 PhysReg d{serviceReqArgRegs[i]};
862 arg_dests.push_back(d);
863 arg_regs |= d;
865 v << copyargs{svcreq.extraArgs, v.makeTuple(arg_dests)};
866 if (svcreq.stub_block) {
867 v << leap{rip[(int64_t)svcreq.stub_block], rAsm};
868 } else {
869 v << ldimmq{0, rAsm}; // because persist flag
871 v << ldimmq{svcreq.req, rdi};
872 arg_regs |= rAsm | rdi | rVmFp | rVmSp;
874 v << jmpi{TCA(handleSRHelper), arg_regs};
877 void lowerSrem(Vunit& unit, Vlabel b, size_t iInst) {
878 auto const& inst = unit.blocks[b].code[iInst];
879 auto const& srem = inst.srem_;
880 auto scratch = unit.makeScratchBlock();
881 SCOPE_EXIT { unit.freeScratchBlock(scratch); };
882 Vout v(unit, scratch, inst.origin);
883 v << copy{srem.s0, rax};
884 v << cqo{}; // sign-extend rax => rdx:rax
885 v << idiv{srem.s1, v.makeReg()}; // rdx:rax/divisor => quot:rax, rem:rdx
886 v << copy{rdx, srem.d};
888 vector_splice(unit.blocks[b].code, iInst, 1, unit.blocks[scratch].code);
891 template<typename FromOp, typename ToOp>
892 void lowerShift(Vunit& unit, Vlabel b, size_t iInst) {
893 auto const& inst = unit.blocks[b].code[iInst];
894 auto const& shift = inst.get<FromOp>();
895 auto scratch = unit.makeScratchBlock();
896 SCOPE_EXIT { unit.freeScratchBlock(scratch); };
897 Vout v(unit, scratch, inst.origin);
898 v << copy{shift.s0, rcx};
899 v << ToOp{shift.s1, shift.d, shift.sf};
901 vector_splice(unit.blocks[b].code, iInst, 1, unit.blocks[scratch].code);
904 void lowerAbsdbl(Vunit& unit, Vlabel b, size_t iInst) {
905 auto const& inst = unit.blocks[b].code[iInst];
906 auto const& absdbl = inst.absdbl_;
907 auto scratch = unit.makeScratchBlock();
908 SCOPE_EXIT { unit.freeScratchBlock(scratch); };
909 Vout v(unit, scratch, inst.origin);
911 // clear the high bit
912 auto tmp = v.makeReg();
913 v << psllq{1, absdbl.s, tmp};
914 v << psrlq{1, tmp, absdbl.d};
916 vector_splice(unit.blocks[b].code, iInst, 1, unit.blocks[scratch].code);
919 void lowerVcall(Vunit& unit, Vlabel b, size_t iInst) {
920 auto& blocks = unit.blocks;
921 auto& inst = blocks[b].code[iInst];
922 auto const is_vcall = inst.op == Vinstr::vcall;
923 auto const vcall = inst.vcall_;
924 auto const vinvoke = inst.vinvoke_;
926 // Extract all the relevant information from the appropriate instruction.
927 auto const is_smashable = !is_vcall && vinvoke.smashable;
928 auto const call = is_vcall ? vcall.call : vinvoke.call;
929 auto const& vargs = unit.vcallArgs[is_vcall ? vcall.args : vinvoke.args];
930 auto const& stkArgs = vargs.stkArgs;
931 auto const dests = unit.tuples[is_vcall ? vcall.d : vinvoke.d];
932 auto const fixup = is_vcall ? vcall.fixup : vinvoke.fixup;
933 auto const destType = is_vcall ? vcall.destType : vinvoke.destType;
935 auto scratch = unit.makeScratchBlock();
936 SCOPE_EXIT { unit.freeScratchBlock(scratch); };
937 Vout v(unit, scratch, inst.origin);
939 int32_t const adjust = (stkArgs.size() & 0x1) ? sizeof(uintptr_t) : 0;
940 if (adjust) v << subqi{adjust, reg::rsp, reg::rsp, v.makeReg()};
942 // Push stack arguments, in reverse order.
943 for (int i = stkArgs.size() - 1; i >= 0; --i) v << push{stkArgs[i]};
945 // Get the arguments in the proper registers.
946 RegSet argRegs;
947 auto doArgs = [&](const VregList& srcs, const PhysReg argNames[]) {
948 VregList argDests;
949 for (size_t i = 0; i < srcs.size(); ++i) {
950 auto reg = argNames[i];
951 argDests.push_back(reg);
952 argRegs |= reg;
954 if (argDests.size()) {
955 v << copyargs{v.makeTuple(srcs),
956 v.makeTuple(std::move(argDests))};
959 doArgs(vargs.args, argNumToRegName);
960 doArgs(vargs.simdArgs, argNumToSIMDRegName);
962 // Emit the call.
963 if (is_smashable) v << mccall{(TCA)call.address(), argRegs};
964 else emitCall(v, call, argRegs);
966 // Handle fixup and unwind information.
967 if (fixup.isValid()) v << syncpoint{fixup};
969 if (!is_vcall) {
970 auto& targets = vinvoke.targets;
971 v << unwind{{targets[0], targets[1]}};
973 // Insert an lea fixup for any stack args at the beginning of the catch
974 // block.
975 if (auto rspOffset = ((stkArgs.size() + 1) & ~1) * sizeof(uintptr_t)) {
976 auto& taken = unit.blocks[targets[1]].code;
977 assertx(taken.front().op == Vinstr::landingpad ||
978 taken.front().op == Vinstr::jmp);
979 Vinstr v{lea{rsp[rspOffset], rsp}};
980 v.origin = taken.front().origin;
981 if (taken.front().op == Vinstr::jmp) {
982 taken.insert(taken.begin(), v);
983 } else {
984 taken.insert(taken.begin() + 1, v);
988 // Write out the code so far to the end of b. Remaining code will be
989 // emitted to the next block.
990 vector_splice(blocks[b].code, iInst, 1, blocks[scratch].code);
991 } else if (vcall.nothrow) {
992 v << nothrow{};
995 // Copy the call result to the destination register(s)
996 switch (destType) {
997 case DestType::TV: {
998 // rax contains m_type and m_aux but we're expecting just the type in
999 // the lower bits, so shift the type result register.
1000 static_assert(offsetof(TypedValue, m_data) == 0, "");
1001 static_assert(offsetof(TypedValue, m_type) == 8, "");
1002 if (dests.size() == 2) {
1003 v << copy2{reg::rax, reg::rdx, dests[0], dests[1]};
1004 } else {
1005 // We have cases where we statically know the type but need the value
1006 // from native call. Even if the type does not really need a register
1007 // (e.g., InitNull), a Vreg is still allocated in assignRegs(), so the
1008 // following assertion holds.
1009 assertx(dests.size() == 1);
1010 v << copy{reg::rax, dests[0]};
1012 break;
1014 case DestType::SIMD: {
1015 // rax contains m_type and m_aux but we're expecting just the type in
1016 // the lower bits, so shift the type result register.
1017 static_assert(offsetof(TypedValue, m_data) == 0, "");
1018 static_assert(offsetof(TypedValue, m_type) == 8, "");
1019 assertx(dests.size() == 1);
1020 pack2(v, reg::rax, reg::rdx, dests[0]);
1021 break;
1023 case DestType::SSA:
1024 case DestType::Byte:
1025 // copy the single-register result to dests[0]
1026 assertx(dests.size() == 1);
1027 assertx(dests[0].isValid());
1028 v << copy{reg::rax, dests[0]};
1029 break;
1030 case DestType::None:
1031 assertx(dests.empty());
1032 break;
1033 case DestType::Dbl:
1034 // copy the single-register result to dests[0]
1035 assertx(dests.size() == 1);
1036 assertx(dests[0].isValid());
1037 v << copy{reg::xmm0, dests[0]};
1038 break;
1041 if (stkArgs.size() > 0) {
1042 v << addqi{safe_cast<int32_t>(stkArgs.size() * sizeof(uintptr_t)
1043 + adjust),
1044 reg::rsp,
1045 reg::rsp,
1046 v.makeReg()};
1049 // Insert new instructions to the appropriate block
1050 if (is_vcall) {
1051 vector_splice(blocks[b].code, iInst, 1, blocks[scratch].code);
1052 } else {
1053 vector_splice(blocks[vinvoke.targets[0]].code, 0, 0,
1054 blocks[scratch].code);
1058 void lower_vcallstub(Vunit& unit, Vlabel b) {
1059 auto& code = unit.blocks[b].code;
1060 // vcallstub can only appear at the end of a block.
1061 auto const inst = code.back().get<vcallstub>();
1062 auto const origin = code.back().origin;
1064 auto argRegs = inst.args;
1065 auto const& srcs = unit.tuples[inst.extraArgs];
1066 jit::vector<Vreg> dsts;
1067 for (int i = 0; i < srcs.size(); ++i) {
1068 dsts.emplace_back(argNumToRegName[i]);
1069 argRegs |= argNumToRegName[i];
1072 code.back() = copyargs{unit.makeTuple(srcs), unit.makeTuple(std::move(dsts))};
1073 code.emplace_back(callstub{inst.target, argRegs});
1074 code.back().origin = origin;
1075 code.emplace_back(unwind{{inst.targets[0], inst.targets[1]}});
1076 code.back().origin = origin;
1080 * Lower a few abstractions to facilitate straightforward x64 codegen.
1082 void lowerForX64(Vunit& unit, const Abi& abi) {
1083 Timer _t(Timer::vasm_lower);
1085 // This pass relies on having no critical edges in the unit.
1086 splitCriticalEdges(unit);
1088 // Scratch block can change blocks allocation, hence cannot use regular
1089 // iterators.
1090 auto& blocks = unit.blocks;
1092 PostorderWalker{unit}.dfs([&](Vlabel ib) {
1093 assertx(!blocks[ib].code.empty());
1094 auto& back = blocks[ib].code.back();
1095 if (back.op == Vinstr::svcreq) {
1096 lower_svcreq(unit, Vlabel{ib}, blocks[ib].code.back());
1097 } else if (back.op == Vinstr::vcallstub) {
1098 lower_vcallstub(unit, Vlabel{ib});
1101 for (size_t ii = 0; ii < blocks[ib].code.size(); ++ii) {
1102 auto& inst = blocks[ib].code[ii];
1103 switch (inst.op) {
1104 case Vinstr::vcall:
1105 case Vinstr::vinvoke:
1106 lowerVcall(unit, Vlabel{ib}, ii);
1107 break;
1109 case Vinstr::srem:
1110 lowerSrem(unit, Vlabel{ib}, ii);
1111 break;
1113 case Vinstr::sar:
1114 lowerShift<sar, sarq>(unit, Vlabel{ib}, ii);
1115 break;
1117 case Vinstr::shl:
1118 lowerShift<shl, shlq>(unit, Vlabel{ib}, ii);
1119 break;
1121 case Vinstr::absdbl:
1122 lowerAbsdbl(unit, Vlabel{ib}, ii);
1123 break;
1125 case Vinstr::defvmsp:
1126 inst = copy{rVmSp, inst.defvmsp_.d};
1127 break;
1129 case Vinstr::syncvmsp:
1130 inst = copy{inst.syncvmsp_.s, rVmSp};
1131 break;
1133 case Vinstr::movtqb:
1134 inst = copy{inst.movtqb_.s, inst.movtqb_.d};
1135 break;
1137 case Vinstr::movtql:
1138 inst = copy{inst.movtql_.s, inst.movtql_.d};
1139 break;
1141 case Vinstr::countbytecode:
1142 inst = incqm{inst.countbytecode_.base[g_bytecodesVasm.handle()],
1143 inst.countbytecode_.sf};
1144 break;
1146 default:
1147 break;
1152 printUnit(kVasmLowerLevel, "after lower for X64", unit);
1155 ///////////////////////////////////////////////////////////////////////////////
1158 void optimizeX64(Vunit& unit, const Abi& abi) {
1159 Timer timer(Timer::vasm_optimize);
1161 removeTrivialNops(unit);
1162 fuseBranches(unit);
1163 optimizeJmps(unit);
1164 optimizeExits(unit);
1166 lowerForX64(unit, abi);
1168 if (!unit.constants.empty()) {
1169 foldImms<x64::ImmFolder>(unit);
1172 Timer timer(Timer::vasm_copy);
1173 optimizeCopies(unit, abi);
1175 if (unit.needsRegAlloc()) {
1176 Timer timer(Timer::vasm_xls);
1177 removeDeadCode(unit);
1178 allocateRegisters(unit, abi);
1180 if (unit.blocks.size() > 1) {
1181 Timer timer(Timer::vasm_jumps);
1182 optimizeJmps(unit);
1186 void emitX64(const Vunit& unit, Vasm::AreaList& areas, AsmInfo* asmInfo) {
1187 static thread_local bool busy;
1188 always_assert(!busy);
1189 busy = true;
1190 SCOPE_EXIT { busy = false; };
1192 Timer timer(Timer::vasm_gen);
1193 auto blocks = layoutBlocks(unit);
1194 Vgen(unit, areas, asmInfo).emit(blocks);
1197 ///////////////////////////////////////////////////////////////////////////////