Replace REQ_RETRANSLATE_OPT with a regular C++ call via ustub
[hiphop-php.git] / hphp / runtime / vm / jit / vasm-arm.cpp
blob7cb8373538bac5d15f4a91e0ef530c5f1db1df47
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * The HHVM's ARM64 backend works with an early-truncation policy.
19 * That means that:
21 * A Vreg8 is an extended W-register with a u8 value.
22 * A Vreg16 is an extended W-register with a u16 value.
23 * A Vreg32 is a W-register with a u32 value.
24 * A Vreg64 is a X-register with a u64 value.
26 * This allows to omit truncation instructions for sub-32-bit
27 * operations. E.g. a testb{Vreg8 s0, Vreg8 s1} has to truncate
28 * s0 and s1 before emitting a tst instruction. When using the
29 * early-truncation policy, the testb{} emitter can rely on the
30 * fact, that s0 and s1 are already truncated and can emit a
31 * cmp instruction without preceding uxtb's.
33 * Conversely any arithmetic instruction has to sign extend any
34 * Vreg8 before operating on it. Vasm is light on these instructions,
35 * with only the following, currently: csinc[bw]{} and cmp[bw][i]{}.
37 * Early-truncation has also consequences to extension/truncation
38 * vasm instructions. The following list shows how to use them:
40 * movzbw: Vreg8 -> Vreg16: mov w0, w0 #nop if s==d
41 * movzbl: Vreg8 -> Vreg32: mov w0, w0 #nop if s==d
42 * movzbq: Vreg8 -> Vreg64: uxtb x0, x0
43 * movzwl: Vreg16 -> Vreg32 mov w0, w0 #nop if s==d
44 * movzwq: Vreg16 -> Vreg64 uxth x0, x0
45 * movzlq: Vreg32 -> Vreg64 uxtw x0, x0
46 * movtqb: Vreg64 -> Vreg8: uxtb w0, w0
47 * movtql: Vreg64 -> Vreg32: uxtw w0, w0
49 * Early-truncation also implies, that instructions have to truncate
50 * after performing the actual operation if it cannot guarantee that
51 * the resulting VregN type matches. E.g. emitting code for the vasm
52 * instruction andbi{Immed imm, Vreg8 s, Vreg8 d} has to truncate the
53 * result to guarantee that register d indeed holds a u8 value.
55 * Note, that the early-truncation policy allows aarch64 specific
56 * optimizations, which are not relevant on other architectures.
57 * E.g. the x86_64 does not need this policy as the ISA allows
58 * direct register accesses for Vreg8, Vreg16, Vreg32 and Vreg64
59 * (e.g. AL, AX, EAX, RAX).
61 * The early-truncation policy relies on the following
62 * requirements of the Vreg type-system:
64 * * All VregNs are created for values of up to N bits
65 * * All conversions between VregNs are done via movz/movt vasm instructions
68 #include "hphp/runtime/vm/jit/vasm-emit.h"
70 #include "hphp/runtime/vm/jit/abi-arm.h"
71 #include "hphp/runtime/vm/jit/ir-instruction.h"
72 #include "hphp/runtime/vm/jit/print.h"
73 #include "hphp/runtime/vm/jit/service-requests.h"
74 #include "hphp/runtime/vm/jit/smashable-instr-arm.h"
75 #include "hphp/runtime/vm/jit/timer.h"
76 #include "hphp/runtime/vm/jit/vasm-gen.h"
77 #include "hphp/runtime/vm/jit/vasm.h"
78 #include "hphp/runtime/vm/jit/vasm-instr.h"
79 #include "hphp/runtime/vm/jit/vasm-internal.h"
80 #include "hphp/runtime/vm/jit/vasm-lower.h"
81 #include "hphp/runtime/vm/jit/vasm-print.h"
82 #include "hphp/runtime/vm/jit/vasm-reg.h"
83 #include "hphp/runtime/vm/jit/vasm-unit.h"
84 #include "hphp/runtime/vm/jit/vasm-util.h"
85 #include "hphp/runtime/vm/jit/vasm-visit.h"
87 #include "hphp/vixl/a64/macro-assembler-a64.h"
89 TRACE_SET_MOD(vasm);
91 namespace HPHP { namespace jit {
92 ///////////////////////////////////////////////////////////////////////////////
94 using namespace arm;
95 using namespace vixl;
97 namespace arm { struct ImmFolder; }
99 namespace {
100 ///////////////////////////////////////////////////////////////////////////////
102 static_assert(folly::kIsLittleEndian,
103 "Code contains little-endian specific optimizations.");
105 vixl::Register X(Vreg64 r) {
106 PhysReg pr(r.asReg());
107 return x2a(pr);
110 vixl::Register W(Vreg64 r) {
111 PhysReg pr(r.asReg());
112 return x2a(pr).W();
115 vixl::Register W(Vreg32 r) {
116 PhysReg pr(r.asReg());
117 return x2a(pr).W();
120 vixl::Register W(Vreg16 r) {
121 PhysReg pr(r.asReg());
122 return x2a(pr).W();
125 vixl::Register W(Vreg8 r) {
126 PhysReg pr(r.asReg());
127 return x2a(pr).W();
130 vixl::FPRegister D(Vreg r) {
131 return x2f(r);
134 vixl::VRegister V(Vreg r) {
135 return x2v(r);
138 uint8_t Log2(uint8_t value) {
139 switch (value) {
140 case 1:
141 return 0;
142 case 2:
143 return 1;
144 case 4:
145 return 2;
146 case 8:
147 return 3;
148 default:
149 always_assert(false);
153 vixl::MemOperand M(Vptr p) {
154 assertx(p.base.isValid());
155 if (p.index.isValid()) {
156 assertx(p.disp == 0);
157 return MemOperand(X(p.base), X(p.index), LSL, Log2(p.scale));
159 return MemOperand(X(p.base), p.disp);
162 vixl::Condition C(ConditionCode cc) {
163 return arm::convertCC(cc);
167 * Uses the flags from the Vinstr which defs SF to determine
168 * whether or not the Vixl assembler should emit code which
169 * sets the status flags.
171 vixl::FlagsUpdate UF(Vflags flags) {
172 return flags ? SetFlags : LeaveFlags;
176 * There are numerous ARM instructions that don't set status flags, and
177 * therefore those flags must be set synthetically in the emitters. This
178 * assertion is applied to the emitters which don't set all of the status
179 * flags required by the Vinstr which defs SF. The flags field of the
180 * Vinstr is used to determine which bits are required. Those required
181 * bits are compared against the bits which are actually set by the
182 * implementation.
184 template<class Inst> void checkSF(const Inst& i, StatusFlags s) {
185 Vflags required = i.fl;
186 Vflags set = static_cast<Vflags>(s);
187 always_assert_flog((required & set) == required,
188 "should def SF but does not: {}\n",
189 vinst_names[Vinstr(i).op]);
192 template<class Inst> void checkSF(const Inst& i) {
193 checkSF(i, StatusFlags::None);
197 * Returns true if the queried flag(s) is in the set of required flags.
199 bool flagRequired(Vflags flags, StatusFlags flag) {
200 return (flags & static_cast<Vflags>(flag));
203 ///////////////////////////////////////////////////////////////////////////////
205 struct Vgen {
206 explicit Vgen(Venv& env)
207 : env(env)
208 , assem(*env.cb)
209 , a(&assem)
210 , base(a->frontier())
211 , current(env.current)
212 , next(env.next)
213 , jmps(env.jmps)
214 , jccs(env.jccs)
215 , catches(env.catches)
217 ~Vgen() {
218 env.cb->sync(base);
221 static void emitVeneers(Venv& env);
222 static void handleLiterals(Venv& env);
223 static void patch(Venv& env);
225 static void pad(CodeBlock& cb) {
226 vixl::MacroAssembler a { cb };
227 auto const begin = cb.frontier();
228 while (cb.available() >= 4) a.Brk(1);
229 assertx(cb.available() == 0);
230 cb.sync(begin);
233 /////////////////////////////////////////////////////////////////////////////
235 template<class Inst> void emit(const Inst& i) {
236 always_assert_flog(false, "unimplemented instruction: {} in B{}\n",
237 vinst_names[Vinstr(i).op], size_t(current));
240 // intrinsics
241 void emit(const copy& i);
242 void emit(const copy2& i);
243 void emit(const debugtrap& /*i*/) { a->Brk(0); }
244 void emit(const fallthru& /*i*/);
245 void emit(const ldimmb& i);
246 void emit(const ldimml& i);
247 void emit(const ldimmq& i);
248 void emit(const ldimmw& i);
249 void emit(const ldundefq& /*i*/) {}
250 void emit(const load& i);
251 void emit(const store& i);
252 void emit(const mcprep& i);
254 // native function abi
255 void emit(const call& i);
256 void emit(const callr& i) { a->Blr(X(i.target)); }
257 void emit(const calls& i);
258 void emit(const ret& /*i*/) { a->Ret(); }
260 // stub function abi
261 void emit(const callstub& i);
262 void emit(const callfaststub& i);
264 // php function abi
265 void emit(const callphp& i) {
266 emit(call{i.target, i.args});
267 setCallFuncId(env, a->frontier());
269 void emit(const callphpr& i) {
270 emit(callr{i.target, i.args});
271 setCallFuncId(env, a->frontier());
273 void emit(const contenter& i);
274 void emit(const phpret& i);
276 // vm entry abi
277 void emit(const inittc& /*i*/) {}
278 void emit(const leavetc& i);
280 // exceptions
281 void emit(const landingpad& /*i*/) {}
282 void emit(const nothrow& i);
283 void emit(const syncpoint& i);
284 void emit(const unwind& i);
286 // instructions
287 void emit(const absdbl& i) { a->Fabs(D(i.d), D(i.s)); }
288 void emit(const addl& i) { a->Add(W(i.d), W(i.s1), W(i.s0), UF(i.fl)); }
289 void emit(const addli& i) { a->Add(W(i.d), W(i.s1), i.s0.l(), UF(i.fl)); }
290 void emit(const addq& i) { a->Add(X(i.d), X(i.s1), X(i.s0), UF(i.fl));}
291 void emit(const addqi& i) { a->Add(X(i.d), X(i.s1), i.s0.q(), UF(i.fl)); }
292 void emit(const addsd& i) { a->Fadd(D(i.d), D(i.s1), D(i.s0)); }
293 void emit(const andb& i) { a->And(W(i.d), W(i.s1), W(i.s0), UF(i.fl)); }
294 void emit(const andbi& i) { a->And(W(i.d), W(i.s1), i.s0.ub(), UF(i.fl)); }
295 void emit(const andw& i) { a->And(W(i.d), W(i.s1), W(i.s0), UF(i.fl)); }
296 void emit(const andwi& i) { a->And(W(i.d), W(i.s1), i.s0.uw(), UF(i.fl)); }
297 void emit(const andl& i) { a->And(W(i.d), W(i.s1), W(i.s0), UF(i.fl)); }
298 void emit(const andli& i) { a->And(W(i.d), W(i.s1), i.s0.l(), UF(i.fl)); }
299 void emit(const andq& i) { a->And(X(i.d), X(i.s1), X(i.s0), UF(i.fl)); }
300 void emit(const andqi& i) { a->And(X(i.d), X(i.s1), i.s0.q(), UF(i.fl)); }
301 void emit(const andqi64& i) { a->And(X(i.d), X(i.s1), i.s0.q(), UF(i.fl)); }
302 void emit(const cmovb& i) { a->Csel(W(i.d), W(i.t), W(i.f), C(i.cc)); }
303 void emit(const cmovw& i) { a->Csel(W(i.d), W(i.t), W(i.f), C(i.cc)); }
304 void emit(const cmovl& i) { a->Csel(W(i.d), W(i.t), W(i.f), C(i.cc)); }
305 void emit(const cmovq& i) { a->Csel(X(i.d), X(i.t), X(i.f), C(i.cc)); }
306 // note: cmp{bw}[i] are emitted only for narrow comparisons and _do not_ sign
307 // extend their arguments--these instructions are lowered to cmp{lq}[i] if
308 // the comparison is not narrow or not equality/inequality
309 void emit(const cmpb& i) { a->Cmp(W(i.s1), W(i.s0)); }
310 void emit(const cmpbi& i) { a->Cmp(W(i.s1), static_cast<uint8_t>(i.s0.b())); }
311 void emit(const cmpw& i) { a->Cmp(W(i.s1), W(i.s0)); }
312 void emit(const cmpwi& i) { a->Cmp(W(i.s1), static_cast<uint16_t>(i.s0.w())); }
313 void emit(const cmpl& i) { a->Cmp(W(i.s1), W(i.s0)); }
314 void emit(const cmpli& i) { a->Cmp(W(i.s1), i.s0.l()); }
315 void emit(const cmpq& i) { a->Cmp(X(i.s1), X(i.s0)); }
316 void emit(const cmpqi& i) { a->Cmp(X(i.s1), i.s0.q()); }
317 void emit(const cmpsd& i);
318 // TODO(CDE): csinc[bw]{} Should a) sign extend and b) set SF for overflow
319 void emit(const csincb& i) { a->Csinc(W(i.d), W(i.t), W(i.f), C(i.cc)); }
320 void emit(const csincw& i) { a->Csinc(W(i.d), W(i.t), W(i.f), C(i.cc)); }
321 void emit(const csincl& i) { a->Csinc(W(i.d), W(i.t), W(i.f), C(i.cc)); }
322 void emit(const csincq& i) { a->Csinc(X(i.d), X(i.t), X(i.f), C(i.cc)); }
323 void emit(const cvtsi2sd& i) { a->Scvtf(D(i.d), X(i.s)); }
324 void emit(const decl& i) { a->Sub(W(i.d), W(i.s), 1, UF(i.fl)); }
325 void emit(const decq& i) { a->Sub(X(i.d), X(i.s), 1, UF(i.fl)); }
326 void emit(const decqmlock& i);
327 void emit(const divint& i) { a->Sdiv(X(i.d), X(i.s0), X(i.s1)); }
328 void emit(const divsd& i) { a->Fdiv(D(i.d), D(i.s1), D(i.s0)); }
329 void emit(const imul& i);
330 void emit(const incl& i) { a->Add(W(i.d), W(i.s), 1, UF(i.fl)); }
331 void emit(const incq& i) { a->Add(X(i.d), X(i.s), 1, UF(i.fl)); }
332 void emit(const incw& i) { a->Add(W(i.d), W(i.s), 1, UF(i.fl)); }
333 void emit(const jcc& i);
334 void emit(const jcci& i);
335 void emit(const jmp& i);
336 void emit(const jmpi& i);
337 void emit(const jmpr& i) { a->Br(X(i.target)); }
338 void emit(const lea& i);
339 void emit(const leap& i);
340 void emit(const leav& i);
341 void emit(const lead& i);
342 void emit(const loadb& i) { a->Ldrb(W(i.d), M(i.s)); }
343 void emit(const loadl& i) { a->Ldr(W(i.d), M(i.s)); }
344 void emit(const loadsd& i) { a->Ldr(D(i.d), M(i.s)); }
345 void emit(const loadtqb& i) { a->Ldrb(W(i.d), M(i.s)); }
346 void emit(const loadtql& i) { a->Ldr(W(i.d), M(i.s)); }
347 void emit(const loadups& i);
348 void emit(const loadw& i) { a->Ldrh(W(i.d), M(i.s)); }
349 void emit(const loadzbl& i) { a->Ldrb(W(i.d), M(i.s)); }
350 void emit(const loadzbq& i) { a->Ldrb(W(i.d), M(i.s)); }
351 void emit(const loadsbq& i) { a->Ldrsb(X(i.d), M(i.s)); }
352 void emit(const loadsbl& i) { a->Ldrsb(W(i.d), M(i.s)); }
353 void emit(const loadzwq& i) { a->Ldrh(W(i.d), M(i.s)); }
354 void emit(const loadzlq& i) { a->Ldr(W(i.d), M(i.s)); }
355 void emit(const movb& i) { if (i.d != i.s) a->Mov(W(i.d), W(i.s)); }
356 void emit(const movw& i) { if (i.d != i.s) a->Mov(W(i.d), W(i.s)); }
357 void emit(const movl& i) { if (i.d != i.s) a->Mov(W(i.d), W(i.s)); }
358 void emit(const movsbl& i) { a->Sxtb(W(i.d), W(i.s)); }
359 void emit(const movsbq& i) { a->Sxtb(X(i.d), W(i.s).X()); }
360 void emit(const movswl& i) { a->Sxth(W(i.d), W(i.s)); }
361 void emit(const movtqb& i) { a->Uxtb(W(i.d), W(i.s)); }
362 void emit(const movtqw& i) { a->Uxth(W(i.d), W(i.s)); }
363 void emit(const movtql& i) { a->Uxtw(W(i.d), W(i.s)); }
364 void emit(const movzbq& i) { a->Uxtb(X(i.d), W(i.s).X()); }
365 void emit(const movzwq& i) { a->Uxth(X(i.d), W(i.s).X()); }
366 void emit(const movzlq& i) { a->Uxtw(X(i.d), W(i.s).X()); }
367 void emit(const mulsd& i) { a->Fmul(D(i.d), D(i.s1), D(i.s0)); }
368 void emit(const neg& i) { a->Neg(X(i.d), X(i.s), UF(i.fl)); }
369 void emit(const nop& /*i*/) { a->Nop(); }
370 void emit(const notb& i) { a->Mvn(W(i.d), W(i.s)); }
371 void emit(const not& i) { a->Mvn(X(i.d), X(i.s)); }
372 void emit(const orbi& i);
373 void emit(const orq& i);
374 void emit(const orwi& i);
375 void emit(const orli& i);
376 void emit(const orqi& i);
377 void emit(const pop& i);
378 void emit(const popp& i);
379 void emit(const push& i);
380 void emit(const pushp& i);
381 void emit(const roundsd& i);
382 void emit(const sar& i);
383 void emit(const sarqi& i);
384 void emit(const setcc& i) { a->Cset(W(i.d), C(i.cc)); }
385 void emit(const shl& i);
386 void emit(const shlli& i);
387 void emit(const shlqi& i);
388 void emit(const shrli& i);
389 void emit(const shrqi& i);
390 void emit(const sqrtsd& i) { a->Fsqrt(D(i.d), D(i.s)); }
391 void emit(const srem& i);
392 void emit(const storeb& i) { a->Strb(W(i.s), M(i.m)); }
393 void emit(const storel& i) { a->Str(W(i.s), M(i.m)); }
394 void emit(const storesd& i) { emit(store{i.s, i.m}); }
395 void emit(const storeups& i);
396 void emit(const storew& i) { a->Strh(W(i.s), M(i.m)); }
397 void emit(const subl& i) { a->Sub(W(i.d), W(i.s1), W(i.s0), UF(i.fl)); }
398 void emit(const subli& i) { a->Sub(W(i.d), W(i.s1), i.s0.l(), UF(i.fl)); }
399 void emit(const subq& i) { a->Sub(X(i.d), X(i.s1), X(i.s0), UF(i.fl)); }
400 void emit(const subqi& i) { a->Sub(X(i.d), X(i.s1), i.s0.q(), UF(i.fl)); }
401 void emit(const subsd& i) { a->Fsub(D(i.d), D(i.s1), D(i.s0)); }
402 void emit(const testb& i){ a->Tst(W(i.s1), W(i.s0)); }
403 void emit(const testbi& i){ a->Tst(W(i.s1), i.s0.ub()); }
404 void emit(const testw& i){ a->Tst(W(i.s1), W(i.s0)); }
405 void emit(const testwi& i){ a->Tst(W(i.s1), i.s0.uw()); }
406 void emit(const testl& i) { a->Tst(W(i.s1), W(i.s0)); }
407 void emit(const testli& i) { a->Tst(W(i.s1), i.s0.l()); }
408 void emit(const testq& i) { a->Tst(X(i.s1), X(i.s0)); }
409 void emit(const testqi& i) { a->Tst(X(i.s1), i.s0.q()); }
410 void emit(const trap& /*i*/);
411 void emit(const ucomisd& i) { a->Fcmp(D(i.s0), D(i.s1)); }
412 void emit(const unpcklpd&);
413 void emit(const xorb& i);
414 void emit(const xorbi& i);
415 void emit(const xorw& i);
416 void emit(const xorwi& i);
417 void emit(const xorl& i);
418 void emit(const xorq& i);
419 void emit(const xorqi& i);
421 // arm intrinsics
422 void emit(const fcvtzs& i) { a->Fcvtzs(X(i.d), D(i.s)); }
423 void emit(const mrs& i) { a->Mrs(X(i.r), vixl::SystemRegister(i.s.l())); }
424 void emit(const msr& i) { a->Msr(vixl::SystemRegister(i.s.l()), X(i.r)); }
425 void emit(const ubfmli& i) { a->ubfm(W(i.d), W(i.s), i.mr.w(), i.ms.w()); }
427 void emit_nop() { a->Nop(); }
429 private:
430 CodeBlock& frozen() { return env.text.frozen().code; }
431 static void recordAddressImmediate(Venv& env, TCA addr) {
432 env.meta.addressImmediates.insert(addr);
434 void recordAddressImmediate() {
435 env.meta.addressImmediates.insert(env.cb->frontier());
438 private:
439 Venv& env;
440 vixl::MacroAssembler assem;
441 vixl::MacroAssembler* a;
442 Address base;
444 const Vlabel current;
445 const Vlabel next;
446 jit::vector<Venv::LabelPatch>& jmps;
447 jit::vector<Venv::LabelPatch>& jccs;
448 jit::vector<Venv::LabelPatch>& catches;
451 ///////////////////////////////////////////////////////////////////////////////
453 static CodeBlock* getBlock(Venv& env, CodeAddress a) {
454 for (auto const& area : env.text.areas()) {
455 if (area.code.contains(a)) {
456 return &area.code;
459 return nullptr;
462 static CodeAddress toReal(Venv& env, CodeAddress a) {
463 CodeBlock* b = getBlock(env, a);
464 return (b == nullptr) ? a : b->toDestAddress(a);
467 void Vgen::emitVeneers(Venv& env) {
468 auto& meta = env.meta;
469 decltype(env.meta.veneers) notEmitted;
471 for (auto const& veneer : meta.veneers) {
472 auto cb = getBlock(env, veneer.source);
473 if (!cb) {
474 // If we can't find the code block, it must have been emitted by a Vunit
475 // wrapping this one (bindjmp emits a Vunit within a Vunit).
476 notEmitted.push_back(veneer);
477 continue;
479 auto const vaddr = cb->frontier();
481 FTRACE(1, "emitVeneers: source = {}, target = {}, veneer at {}\n",
482 veneer.source, veneer.target, vaddr);
484 // Emit the veneer code: LDR + BR.
485 meta.veneerAddrs.insert(vaddr);
486 MacroAssembler av{*cb};
487 vixl::Label target_data;
488 meta.addressImmediates.insert(vaddr);
489 poolLiteral(*cb, meta, (uint64_t)makeTarget32(veneer.target), 32, true);
490 av.bind(&target_data);
491 av.Ldr(rAsm_w, &target_data);
492 av.Br(rAsm);
494 // Update the veneer source instruction to jump/call the veneer.
495 auto const realSource = toReal(env, veneer.source);
496 CodeBlock tmpBlock;
497 tmpBlock.init(realSource, kInstructionSize, "emitVeneers");
498 MacroAssembler at{tmpBlock};
499 int64_t offset = vaddr - veneer.source;
500 auto sourceInst = Instruction::Cast(realSource);
502 if (sourceInst->Mask(UnconditionalBranchMask) == B) {
503 always_assert(is_int28(offset));
504 at.b(offset >> kInstructionSizeLog2);
506 } else if (sourceInst->Mask(UnconditionalBranchMask) == BL) {
507 always_assert(is_int28(offset));
508 at.bl(offset >> kInstructionSizeLog2);
510 } else if (sourceInst->IsCondBranchImm()) {
511 auto const cond = static_cast<Condition>(sourceInst->ConditionBranch());
512 if (is_int21(offset)) {
513 at.b(offset >> kInstructionSizeLog2, cond);
514 } else {
515 // The offset doesn't fit in a conditional jump. Hopefully it still fits
516 // in an unconditional jump, in which case we add an appendix to the
517 // veneer.
518 offset += 2 * kInstructionSize;
519 always_assert(is_int28(offset));
520 // Add an appendix to the veneer, and jump to it instead. The full
521 // veneer in this case looks like:
522 // VENEER:
523 // LDR RX, LITERAL_ADDR
524 // BR RX
525 // APPENDIX:
526 // B.CC VENEER
527 // B NEXT
528 // And the conditional jump into the veneer is turned into a jump to the
529 // appendix:
530 // B APPENDIX
531 // NEXT:
533 // Turn the original conditional branch into an unconditional one.
534 at.b(offset >> kInstructionSizeLog2);
536 // Emit appendix.
537 auto const appendix = cb->frontier();
538 av.b(-2 /* veneer starts 2 instructions before the appendix */, cond);
539 const int64_t nextOffset = (veneer.source + kInstructionSize) - // NEXT
540 (vaddr + 3 * kInstructionSize); // addr of "B NEXT"
541 always_assert(is_int28(nextOffset));
542 av.b(nextOffset >> kInstructionSizeLog2);
544 // Replace veneer.source with appendix in the relevant metadata.
545 meta.smashableLocations.erase(veneer.source);
546 meta.smashableLocations.insert(appendix);
547 for (auto& tj : meta.inProgressTailJumps) {
548 if (tj.toSmash() == veneer.source) tj.adjust(appendix);
550 for (auto& stub : env.stubs) {
551 if (stub.jcc == veneer.source) stub.jcc = appendix;
554 } else {
555 always_assert_flog(0, "emitVeneers: invalid source instruction at source"
556 " {} (realSource = {})",
557 veneer.source, realSource);
561 env.meta.veneers.swap(notEmitted);
564 void Vgen::handleLiterals(Venv& env) {
565 decltype(env.meta.literalsToPool) notEmitted;
566 for (auto const& pl : env.meta.literalsToPool) {
567 auto const cb = getBlock(env, pl.patchAddress);
568 if (!cb) {
569 // If we can't find the code block it must have been emitted by a Vunit
570 // wrapping this one. (bindjmp emits a Vunit within a Vunit)
571 notEmitted.push_back(pl);
572 continue;
575 // Emit the literal.
576 auto literalAddress = cb->frontier();
577 if (pl.width == 32) {
578 cb->dword(static_cast<uint32_t>(pl.value));
579 } else if (pl.width == 64) {
580 if (pl.smashable) {
581 // Although the region is actually dead, we mark it as live, so that
582 // the relocator can remove the padding.
583 align(*cb, &env.meta, Alignment::QuadWordSmashable, AlignContext::Live);
584 literalAddress = cb->frontier();
586 cb->qword(pl.value);
587 } else {
588 not_reached();
591 // Patch the LDR.
592 auto const patchAddressActual =
593 Instruction::Cast(toReal(env, pl.patchAddress));
594 assertx(patchAddressActual->IsLoadLiteral());
595 patchAddressActual->SetImmPCOffsetTarget(
596 Instruction::Cast(literalAddress),
597 Instruction::Cast(pl.patchAddress));
600 if (env.meta.fallthru) {
601 auto const fallthru = *env.meta.fallthru;
602 auto const cb = getBlock(env, fallthru);
603 if (!cb) {
604 always_assert_flog(false,
605 "Fallthrus shouldn't be used in nested Vunits.");
607 auto const blockEndAddr = cb->frontier();
608 auto const startAddr = cb->toDestAddress(fallthru);
609 CodeBlock tmp;
610 tmp.init(startAddr, kInstructionSize, "Tmp");
611 // Write the jmp.
612 Assembler a { tmp };
613 recordAddressImmediate(env, fallthru);
614 a.b((blockEndAddr - fallthru) >> kInstructionSizeLog2);
617 env.meta.literalsToPool.swap(notEmitted);
620 void Vgen::patch(Venv& env) {
621 // Patch the 32 bit target of the LDR
622 auto patch = [&env](TCA instr, TCA target) {
623 // The LDR loading the address to branch to.
624 auto ldr = Instruction::Cast(instr);
625 auto const DEBUG_ONLY br = ldr->NextInstruction();
626 assertx(ldr->Mask(LoadLiteralMask) == LDR_w_lit &&
627 br->Mask(UnconditionalBranchToRegisterMask) == BR &&
628 ldr->Rd() == br->Rn());
629 // The address the LDR loads.
630 auto targetAddr = ldr->LiteralAddress();
631 // Patch the 32 bit target following the LDR and BR
632 patchTarget32(targetAddr, target);
635 for (auto const& p : env.jmps) {
636 auto addr = toReal(env, p.instr);
637 auto const target = env.addrs[p.target];
638 assertx(target);
639 if (env.meta.smashableLocations.count(p.instr)) {
640 assertx(possiblySmashableJmp(addr));
641 // Update `addr' to point to the veneer.
642 addr = TCA(vixl::Instruction::Cast(addr)->ImmPCOffsetTarget());
644 // Patch the address we are jumping to.
645 patch(addr, target);
647 for (auto const& p : env.jccs) {
648 auto addr = toReal(env, p.instr);
649 auto const target = env.addrs[p.target];
650 assertx(target);
651 if (env.meta.smashableLocations.count(p.instr)) {
652 assertx(possiblySmashableJcc(addr));
653 // Update `addr' to point to the veneer.
654 addr = TCA(vixl::Instruction::Cast(addr)->ImmPCOffsetTarget());
655 } else {
656 assertx(Instruction::Cast(addr)->IsCondBranchImm());
657 // If the jcc starts with a conditional jump, patch the next instruction
658 // (which should start with a LDR).
659 addr += kInstructionSize;
661 patch(addr, target);
663 for (auto const& p : env.leas) {
664 (void)p;
665 not_implemented();
669 ///////////////////////////////////////////////////////////////////////////////
671 void Vgen::emit(const copy& i) {
672 if (i.s == i.d) return;
673 if (i.s.isGP() && i.d.isGP()) {
674 a->Mov(X(i.d), X(i.s));
675 } else if (i.s.isSIMD() && i.d.isGP()) {
676 a->Fmov(X(i.d), D(i.s));
677 } else if (i.s.isGP() && i.d.isSIMD()) {
678 a->Fmov(D(i.d), X(i.s));
679 } else {
680 assertx(i.s.isSIMD() && i.d.isSIMD());
681 a->mov(V(i.d), V(i.s));
685 void Vgen::emit(const copy2& i) {
686 assertx(i.s0.isValid() && i.s1.isValid() && i.d0.isValid() && i.d1.isValid());
687 auto s0 = i.s0, s1 = i.s1, d0 = i.d0, d1 = i.d1;
688 assertx(d0 != d1);
689 if (d0 == s1) {
690 if (d1 == s0) {
691 a->Eor(X(d0), X(d0), X(s0));
692 a->Eor(X(s0), X(d0), X(s0));
693 a->Eor(X(d0), X(d0), X(s0));
694 } else {
695 // could do this in a simplify pass
696 if (s1 != d1) a->Mov(X(s1), X(d1)); // save s1 first; d1 != s0
697 if (s0 != d0) a->Mov(X(s0), X(d0));
699 } else {
700 // could do this in a simplify pass
701 if (s0 != d0) a->Mov(X(s0), X(d0));
702 if (s1 != d1) a->Mov(X(s1), X(d1));
706 void emitSimdImmInt(vixl::MacroAssembler* a, uint64_t val, Vreg d) {
707 // Assembler::fmov emits a ldr from a literal pool if IsImmFP64 is false.
708 // In that case, emit the raw bits into a GPR first and then move them
709 // unmodified into destination SIMD
710 union { double dval; uint64_t ival; };
711 ival = val;
712 if (vixl::Assembler::IsImmFP64(dval)) {
713 a->Fmov(D(d), dval);
714 } else if (ival == 0) {
715 a->Fmov(D(d), vixl::xzr);
716 } else {
717 a->Mov(rAsm, ival);
718 a->Fmov(D(d), rAsm);
721 void Vgen::emit(const fallthru& /*i*/) {
722 always_assert(!env.meta.fallthru);
723 env.meta.fallthru = a->frontier();
724 a->nop();
727 #define Y(vasm_opc, simd_w, vr_w, gpr_w, imm) \
728 void Vgen::emit(const vasm_opc& i) { \
729 if (i.d.isSIMD()) { \
730 emitSimdImmInt(a, static_cast<uint##vr_w##_t>(i.s.simd_w()), i.d); \
731 } else { \
732 Vreg##vr_w d = i.d; \
733 a->Mov(gpr_w(d), imm); \
737 Y(ldimmb, ub, 8, W, i.s.ub())
738 Y(ldimmw, uw, 16, W, i.s.uw())
739 Y(ldimml, l, 32, W, i.s.l())
740 Y(ldimmq, q, 64, X, i.s.q())
742 #undef Y
744 void Vgen::emit(const load& i) {
745 if (i.d.isGP()) {
746 a->Ldr(X(i.d), M(i.s));
747 } else {
748 a->Ldr(D(i.d), M(i.s));
752 void Vgen::emit(const store& i) {
753 if (i.s.isGP()) {
754 if (i.s == rsp()) {
755 a->Mov(rAsm, X(i.s));
756 a->Str(rAsm, M(i.d));
757 } else {
758 a->Str(X(i.s), M(i.d));
760 } else {
761 a->Str(D(i.s), M(i.d));
765 ///////////////////////////////////////////////////////////////////////////////
767 void Vgen::emit(const mcprep& i) {
769 * Initially, we set the cache to hold (addr << 1) | 1 (where `addr' is the
770 * address of the movq) so that we can find the movq from the handler.
772 * We set the low bit for two reasons: the Class* will never be a valid
773 * Class*, so we'll always miss the inline check before it's smashed, and
774 * MethodCache::handleStaticCall can tell it's not been smashed yet
777 align(*env.cb, &env.meta, Alignment::SmashMovq, AlignContext::Live);
778 auto const imm = reinterpret_cast<uint64_t>(a->frontier());
779 emitSmashableMovq(*env.cb, env.meta, (imm << 1) | 1, r64(i.d));
781 env.meta.addressImmediates.insert(reinterpret_cast<TCA>(~imm));
784 ///////////////////////////////////////////////////////////////////////////////
786 void Vgen::emit(const call& i) {
787 recordAddressImmediate();
788 a->Mov(rAsm, i.target);
789 a->Blr(rAsm);
790 if (i.watch) {
791 *i.watch = a->frontier();
792 env.meta.watchpoints.push_back(i.watch);
796 void Vgen::emit(const calls& i) {
797 emitSmashableCall(*env.cb, env.meta, i.target);
800 ///////////////////////////////////////////////////////////////////////////////
802 void Vgen::emit(const callstub& i) {
803 emit(call{i.target, i.args});
806 void Vgen::emit(const callfaststub& i) {
807 emit(call{i.target, i.args});
808 emit(syncpoint{i.fix});
811 ///////////////////////////////////////////////////////////////////////////////
813 void Vgen::emit(const phpret& i) {
814 // prefer load-pair instruction
815 if (!i.noframe) {
816 a->ldp(X(rvmfp()), X(rlr()), X(i.fp)[AROFF(m_sfp)]);
817 } else {
818 a->Ldr(X(rlr()), X(i.fp)[AROFF(m_savedRip)]);
820 emit(ret{});
823 void Vgen::emit(const contenter& i) {
824 vixl::Label stub, end;
826 // Jump past the stub below.
827 recordAddressImmediate();
828 a->B(&end);
830 // We call into this stub from the end below. Take that LR and store it in
831 // m_savedRip. Then jump to the target.
832 a->bind(&stub);
833 a->Str(X(rlr()), M(i.fp[AROFF(m_savedRip)]));
834 a->Br(X(i.target));
836 // Call to stub above and then unwind.
837 a->bind(&end);
838 recordAddressImmediate();
839 a->Bl(&stub);
840 emit(unwind{{i.targets[0], i.targets[1]}});
843 ///////////////////////////////////////////////////////////////////////////////
845 void Vgen::emit(const leavetc& /*i*/) {
846 // The LR was preserved on the stack by resumetc. Pop it while preserving
847 // SP alignment and return.
848 a->Ldp(rAsm, X(rlr()), MemOperand(sp, 16, PostIndex));
849 a->Ret();
852 ///////////////////////////////////////////////////////////////////////////////
854 void Vgen::emit(const nothrow& /*i*/) {
855 env.meta.catches.emplace_back(a->frontier(), nullptr);
856 env.record_inline_stack(a->frontier());
859 void Vgen::emit(const syncpoint& i) {
860 FTRACE(5, "IR recordSyncPoint: {} {}\n", a->frontier(), i.fix.show());
861 env.meta.fixups.emplace_back(a->frontier(), i.fix);
862 env.record_inline_stack(a->frontier());
865 void Vgen::emit(const unwind& i) {
866 catches.push_back({a->frontier(), i.targets[1]});
867 env.record_inline_stack(a->frontier());
868 emit(jmp{i.targets[0]});
871 ///////////////////////////////////////////////////////////////////////////////
874 * Flags
875 * SF should be set to MSB of the result
876 * CF, OF should be set to (1, 1) if the result is truncated, (0, 0) otherwise
877 * ZF, AF, PF are undefined
879 * In the following implementation,
880 * N, Z, V are updated according to result
881 * C is cleared (FIXME)
883 void Vgen::emit(const imul& i) {
885 // Do the multiplication
886 a->Mul(X(i.d), X(i.s0), X(i.s1));
888 // If we have to set any flags, then always set N and Z since it's cheap.
889 // Only set V when absolutely necessary. C is not supported.
890 if (i.fl) {
891 vixl::Label after;
893 checkSF(i, StatusFlags::NotC);
895 if (flagRequired(i.fl, StatusFlags::V)) {
896 vixl::Label checkSign;
897 vixl::Label Overflow;
899 // Do the multiplication for the upper 64 bits of a 128 bit result.
900 // If the result is not all zeroes or all ones, then we have overflow.
901 // If the result is all zeroes or all ones, and the sign is the same,
902 // for both hi and low, then there is no overflow.
903 a->smulh(rAsm, X(i.s0), X(i.s1));
905 // If hi is all 0's or 1's, then check the sign, else overflow
906 // (fallthrough).
907 recordAddressImmediate();
908 a->Cbz(rAsm, &checkSign);
909 a->Cmp(rAsm, -1);
910 recordAddressImmediate();
911 a->B(&checkSign, vixl::eq);
913 // Overflow, so conditionally set N and Z bits and then or in V bit.
914 a->Bind(&Overflow);
915 a->Bic(vixl::xzr, X(i.d), vixl::xzr, SetFlags);
916 a->Mrs(rAsm, NZCV);
917 a->Orr(rAsm, rAsm, 1<<28);
918 a->Msr(NZCV, rAsm);
919 recordAddressImmediate();
920 a->B(&after);
922 // Check the signs of hi and lo.
923 a->Bind(&checkSign);
924 a->Eor(rAsm, rAsm, X(i.d));
925 recordAddressImmediate();
926 a->Tbnz(rAsm, 63, &Overflow);
929 // No Overflow, so conditionally set the N and Z only
930 a->Bic(vixl::xzr, X(i.d), vixl::xzr, SetFlags);
932 a->bind(&after);
936 void Vgen::emit(const decqmlock& i) {
937 auto adr = M(i.m);
938 /* Use VIXL's macroassembler scratch regs. */
939 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg);
940 if (RuntimeOption::EvalJitArmLse) {
941 a->Mov(rVixlScratch0, -1);
942 a->ldaddal(rVixlScratch0, rVixlScratch0, adr);
943 a->Sub(rAsm, rVixlScratch0, 1, SetFlags);
944 } else {
945 vixl::Label again;
946 a->bind(&again);
947 a->ldxr(rAsm, adr);
948 a->Sub(rAsm, rAsm, 1, SetFlags);
949 a->stxr(rVixlScratch0, rAsm, adr);
950 recordAddressImmediate();
951 a->Cbnz(rVixlScratch0, &again);
953 /* Restore VIXL's scratch regs. */
954 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1);
957 void Vgen::emit(const jcc& i) {
958 if (i.targets[1] != i.targets[0]) {
959 if (next == i.targets[1]) {
960 return emit(jcc{ccNegate(i.cc), i.sf, {i.targets[1], i.targets[0]}});
962 auto taken = i.targets[1];
963 jccs.push_back({a->frontier(), taken});
964 vixl::Label skip, data;
966 // Emit a "far JCC" sequence for easy patching later. Static relocation
967 // might be able to simplify this later (see optimizeFarJcc()).
968 recordAddressImmediate();
969 a->B(&skip, vixl::InvertCondition(C(i.cc)));
970 recordAddressImmediate();
971 poolLiteral(*env.cb, env.meta, (uint64_t)makeTarget32(a->frontier()),
972 32, false);
973 a->bind(&data); // This will be remmaped during the handleLiterals phase.
974 a->Ldr(rAsm_w, &data);
975 a->Br(rAsm);
976 a->bind(&skip);
978 emit(jmp{i.targets[0]});
981 void Vgen::emit(const jcci& i) {
982 vixl::Label skip;
984 recordAddressImmediate();
985 a->B(&skip, vixl::InvertCondition(C(i.cc)));
986 emit(jmpi{i.taken});
987 a->bind(&skip);
988 emit(jmp{i.target});
991 void Vgen::emit(const jmp& i) {
992 if (next == i.target) return;
993 jmps.push_back({a->frontier(), i.target});
994 vixl::Label data;
996 // Emit a "far JMP" sequence for easy patching later. Static relocation
997 // might be able to simplify this (see optimizeFarJmp()).
998 recordAddressImmediate();
999 poolLiteral(*env.cb, env.meta, (uint64_t)a->frontier(), 32, false);
1000 a->bind(&data); // This will be remapped during the handleLiterals phase.
1001 a->Ldr(rAsm_w, &data);
1002 a->Br(rAsm);
1005 void Vgen::emit(const jmpi& i) {
1006 vixl::Label data;
1008 // If target can be addressed by pc relative offset (signed 26 bits), emit
1009 // PC relative jump. Else, emit target address into code and load from there.
1010 auto diff = (i.target - a->frontier()) >> vixl::kInstructionSizeLog2;
1011 if (vixl::is_int26(diff)) {
1012 recordAddressImmediate();
1013 a->b(diff);
1014 } else {
1015 // Cannot use simple a->Mov() since such a sequence cannot be
1016 // adjusted while live following a relocation.
1017 recordAddressImmediate();
1018 poolLiteral(*env.cb, env.meta, (uint64_t)i.target, 32, false);
1019 a->bind(&data); // This will be remapped during the handleLiterals phase.
1020 a->Ldr(rAsm_w, &data);
1021 a->Br(rAsm);
1025 void Vgen::emit(const lea& i) {
1026 auto p = i.s;
1027 assertx(p.base.isValid());
1028 if (p.index.isValid()) {
1029 assertx(p.disp == 0);
1030 a->Add(X(i.d), X(p.base), Operand(X(p.index), LSL, Log2(p.scale)));
1031 } else {
1032 a->Add(X(i.d), X(p.base), p.disp);
1036 void Vgen::emit(const leav& i) {
1037 auto const addr = a->frontier();
1038 emit(leap{reg::rip[0xdeadbeef], i.d});
1039 env.leas.push_back({addr, i.s});
1042 void Vgen::emit(const leap& i) {
1043 vixl::Label imm_data;
1044 vixl::Label after_data;
1046 // Cannot use simple a->Mov() since such a sequence cannot be
1047 // adjusted while live following a relocation.
1048 recordAddressImmediate();
1049 poolLiteral(*env.cb, env.meta, (uint64_t)makeTarget32(i.s.r.disp),
1050 32, false);
1051 a->bind(&imm_data); // This will be remapped during the handleLiterals phase.
1052 a->Ldr(W(i.d), &imm_data);
1055 void Vgen::emit(const lead& i) {
1056 recordAddressImmediate();
1057 a->Mov(X(i.d), i.s.get());
1060 #define Y(vasm_opc, arm_opc, src_dst, m) \
1061 void Vgen::emit(const vasm_opc& i) { \
1062 assertx(i.m.base.isValid()); \
1063 a->Mov(rAsm, X(i.m.base)); \
1064 if (i.m.index.isValid()) { \
1065 a->Add(rAsm, rAsm, Operand(X(i.m.index), LSL, Log2(i.m.scale))); \
1067 if (i.m.disp != 0) { \
1068 a->Add(rAsm, rAsm, i.m.disp); \
1070 a->arm_opc(V(i.src_dst), MemOperand(rAsm)); \
1073 Y(loadups, ld1, d, s)
1074 Y(storeups, st1, s, m)
1076 #undef Y
1079 * Flags
1080 * SF, ZF, PF should be updated according to result
1081 * CF, OF should be cleared
1082 * AF is undefined
1084 * In the following implementation,
1085 * N, Z are updated according to result
1086 * C, V are cleared
1088 #define Y(vasm_opc, arm_opc, gpr_w, s0, zr) \
1089 void Vgen::emit(const vasm_opc& i) { \
1090 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), s0); \
1091 if (i.fl) { \
1092 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1096 Y(orbi, Orr, W, i.s0.ub(), wzr);
1097 Y(orwi, Orr, W, i.s0.uw(), xzr);
1098 Y(orli, Orr, W, i.s0.l(), xzr);
1099 Y(orqi, Orr, X, i.s0.q(), xzr);
1100 Y(orq, Orr, X, X(i.s0), xzr);
1101 Y(xorb, Eor, W, W(i.s0), wzr);
1102 Y(xorbi, Eor, W, i.s0.ub(), wzr);
1103 Y(xorw, Eor, W, W(i.s0), wzr);
1104 Y(xorwi, Eor, W, i.s0.uw(), wzr);
1105 Y(xorl, Eor, W, W(i.s0), wzr);
1106 Y(xorq, Eor, X, X(i.s0), xzr);
1107 Y(xorqi, Eor, X, i.s0.q(), xzr);
1109 #undef Y
1111 void Vgen::emit(const pop& i) {
1112 // SP access must be 8 byte aligned. Use rAsm instead.
1113 a->Mov(rAsm, sp);
1114 a->Ldr(X(i.d), MemOperand(rAsm, 8, PostIndex));
1115 a->Mov(sp, rAsm);
1118 void Vgen::emit(const push& i) {
1119 // SP access must be 8 byte aligned. Use rAsm instead.
1120 a->Mov(rAsm, sp);
1121 a->Str(X(i.s), MemOperand(rAsm, -8, PreIndex));
1122 a->Mov(sp, rAsm);
1125 void Vgen::emit(const roundsd& i) {
1126 switch (i.dir) {
1127 case RoundDirection::nearest: {
1128 a->frintn(D(i.d), D(i.s));
1129 break;
1132 case RoundDirection::floor: {
1133 a->frintm(D(i.d), D(i.s));
1134 break;
1137 case RoundDirection:: ceil: {
1138 a->frintp(D(i.d), D(i.s));
1139 break;
1142 default: {
1143 assertx(i.dir == RoundDirection::truncate);
1144 a->frintz(D(i.d), D(i.s));
1149 void Vgen::emit(const srem& i) {
1150 a->Sdiv(rAsm, X(i.s0), X(i.s1));
1151 a->Msub(X(i.d), rAsm, X(i.s1), X(i.s0));
1154 void Vgen::emit(const trap& i) {
1155 env.meta.trapReasons.emplace_back(a->frontier(), i.reason);
1156 a->Brk(1);
1159 void Vgen::emit(const unpcklpd& i) {
1160 // i.d and i.s1 can be same, i.s0 is unique.
1161 if (i.d != i.s1) a->fmov(D(i.d), D(i.s1));
1162 a->fmov(rAsm, D(i.s0));
1163 a->fmov(D(i.d), 1, rAsm);
1166 ///////////////////////////////////////////////////////////////////////////////
1168 void Vgen::emit(const cmpsd& i) {
1170 * cmpsd doesn't update SD, so read the flags into a temp.
1171 * Use one of the macroassembler scratch regs .
1173 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg);
1174 a->Mrs(rVixlScratch0, NZCV);
1176 a->Fcmp(D(i.s0), D(i.s1));
1177 switch (i.pred) {
1178 case ComparisonPred::eq_ord:
1179 a->Csetm(rAsm, C(jit::CC_E));
1180 break;
1181 case ComparisonPred::ne_unord:
1182 a->Csetm(rAsm, C(jit::CC_NE));
1183 break;
1184 default:
1185 always_assert(false);
1187 a->Fmov(D(i.d), rAsm);
1189 /* Copy the flags back to the system register. */
1190 a->Msr(NZCV, rVixlScratch0);
1191 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1);
1195 ///////////////////////////////////////////////////////////////////////////////
1198 * For the shifts:
1200 * C is set through inspection
1201 * N, Z are updated according to result
1202 * V is cleared (FIXME)
1203 * PF, AF are not available
1205 * Only set the flags if there are any required flags (i.fl).
1206 * Setting the C flag is particularly expensive, so when setting
1207 * flags check this flag specifically.
1209 #define Y(vasm_opc, arm_opc, gpr_w, zr) \
1210 void Vgen::emit(const vasm_opc& i) { \
1211 if (!i.fl) { \
1212 /* Just perform the shift. */ \
1213 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), gpr_w(i.s0)); \
1214 } else { \
1215 checkSF(i, StatusFlags::NotV); \
1216 if (!flagRequired(i.fl, StatusFlags::C)) { \
1217 /* Perform the shift and set N and Z. */ \
1218 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), gpr_w(i.s0)); \
1219 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1220 } else { \
1221 /* Use VIXL's macroassembler scratch regs. */ \
1222 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg); \
1223 /* Perform the shift using temp and set N and Z. */ \
1224 a->arm_opc(rVixlScratch0, gpr_w(i.s1), gpr_w(i.s0)); \
1225 a->Bic(vixl::zr, rVixlScratch0, vixl::zr, SetFlags); \
1226 /* Read the flags into a temp. */ \
1227 a->Mrs(rAsm, NZCV); \
1228 /* Reshift right leaving the last bit as bit 0. */ \
1229 a->Sub(rVixlScratch1, gpr_w(i.s0), 1); \
1230 a->Lsr(rVixlScratch1, gpr_w(i.s1), rVixlScratch1); \
1231 /* Negate the bits, including bit 0 to match X64. */ \
1232 a->Mvn(rVixlScratch1, rVixlScratch1); \
1233 /* Copy bit zero into bit 29 of the flags. */ \
1234 a->bfm(rAsm, rVixlScratch1, 35, 0); \
1235 /* Copy the flags back to the system register. */ \
1236 a->Msr(NZCV, rAsm); \
1237 /* Copy the result to the destination. */ \
1238 a->Mov(gpr_w(i.d), rVixlScratch0); \
1239 /* Restore VIXL's scratch regs. */ \
1240 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1); \
1245 Y(sar, Asr, X, xzr)
1247 #undef Y
1249 #define Y(vasm_opc, arm_opc, gpr_w, sz, zr) \
1250 void Vgen::emit(const vasm_opc& i) { \
1251 if (!i.fl) { \
1252 /* Just perform the shift. */ \
1253 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), gpr_w(i.s0)); \
1254 } else { \
1255 checkSF(i, StatusFlags::NotV); \
1256 if (!flagRequired(i.fl, StatusFlags::C)) { \
1257 /* Perform the shift and set N and Z. */ \
1258 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), gpr_w(i.s0)); \
1259 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1260 } else { \
1261 /* Use VIXL's macroassembler scratch regs. */ \
1262 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg); \
1263 /* Perform the shift using temp and set N and Z. */ \
1264 a->arm_opc(rVixlScratch0, gpr_w(i.s1), gpr_w(i.s0)); \
1265 a->Bic(vixl::zr, rVixlScratch0, vixl::zr, SetFlags); \
1266 /* Read the flags into a temp. */ \
1267 a->Mrs(rAsm, NZCV); \
1268 /* Reshift right leaving the last bit as bit 0. */ \
1269 a->Mov(rVixlScratch1, sz); \
1270 a->Sub(rVixlScratch1, rVixlScratch1, gpr_w(i.s0)); \
1271 a->Lsr(rVixlScratch1, gpr_w(i.s1), rVixlScratch1); \
1272 /* Negate the bits, including bit 0 to match X64. */ \
1273 a->Mvn(rVixlScratch1, rVixlScratch1); \
1274 /* Copy bit zero into bit 29 of the flags. */ \
1275 a->bfm(rAsm, rVixlScratch1, 35, 0); \
1276 /* Copy the flags back to the system register. */ \
1277 a->Msr(NZCV, rAsm); \
1278 /* Copy the result to the destination. */ \
1279 a->Mov(gpr_w(i.d), rVixlScratch0); \
1280 /* Restore VIXL's scratch regs. */ \
1281 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1); \
1286 Y(shl, Lsl, X, 64, xzr)
1288 #undef Y
1290 #define Y(vasm_opc, arm_opc, gpr_w, zr) \
1291 void Vgen::emit(const vasm_opc& i) { \
1292 if (!i.fl) { \
1293 /* Just perform the shift. */ \
1294 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), i.s0.l()); \
1295 } else { \
1296 checkSF(i, StatusFlags::NotV); \
1297 if (!flagRequired(i.fl, StatusFlags::C)) { \
1298 /* Perform the shift and set N and Z. */ \
1299 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), i.s0.l()); \
1300 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1301 } else { \
1302 /* Use VIXL's macroassembler scratch regs. */ \
1303 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg); \
1304 /* Perform the shift using temp and set N and Z. */ \
1305 a->arm_opc(rVixlScratch0, gpr_w(i.s1), i.s0.l()); \
1306 a->Bic(vixl::zr, rVixlScratch0, vixl::zr, SetFlags); \
1307 /* Read the flags into a temp. */ \
1308 a->Mrs(rAsm, NZCV); \
1309 /* Reshift right leaving the last bit as bit 0. */ \
1310 a->Lsr(rVixlScratch1, gpr_w(i.s1), i.s0.l() - 1); \
1311 /* Negate the bits, including bit 0 to match X64. */ \
1312 a->Mvn(rVixlScratch1, rVixlScratch1); \
1313 /* Copy bit zero into bit 29 of the flags. */ \
1314 a->bfm(rAsm, rVixlScratch1, 35, 0); \
1315 /* Copy the flags back to the system register. */ \
1316 a->Msr(NZCV, rAsm); \
1317 /* Copy the result to the destination. */ \
1318 a->Mov(gpr_w(i.d), rVixlScratch0); \
1319 /* Restore VIXL's scratch regs. */ \
1320 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1); \
1325 Y(sarqi, Asr, X, xzr)
1326 Y(shrli, Lsr, W, wzr)
1327 Y(shrqi, Lsr, X, xzr)
1329 #undef Y
1331 #define Y(vasm_opc, arm_opc, gpr_w, sz, zr) \
1332 void Vgen::emit(const vasm_opc& i) { \
1333 if (!i.fl) { \
1334 /* Just perform the shift. */ \
1335 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), i.s0.l()); \
1336 } else { \
1337 checkSF(i, StatusFlags::NotV); \
1338 if (!flagRequired(i.fl, StatusFlags::C)) { \
1339 /* Perform the shift and set N and Z. */ \
1340 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), i.s0.l()); \
1341 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1342 } else { \
1343 /* Use VIXL's macroassembler scratch regs. */ \
1344 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg); \
1345 /* Perform the shift using temp and set N and Z. */ \
1346 a->arm_opc(rVixlScratch0, gpr_w(i.s1), i.s0.l()); \
1347 a->Bic(vixl::zr, rVixlScratch0, vixl::zr, SetFlags); \
1348 /* Read the flags into a temp. */ \
1349 a->Mrs(rAsm, NZCV); \
1350 /* Reshift right leaving the last bit as bit 0. */ \
1351 a->Lsr(rVixlScratch1, gpr_w(i.s1), sz - i.s0.l()); \
1352 /* Negate the bits, including bit 0 to match X64. */ \
1353 a->Mvn(rVixlScratch1, rVixlScratch1); \
1354 /* Copy bit zero into bit 29 of the flags. */ \
1355 a->bfm(rAsm, rVixlScratch1, 35, 0); \
1356 /* Copy the flags back to the system register. */ \
1357 a->Msr(NZCV, rAsm); \
1358 /* Copy the result to the destination. */ \
1359 a->Mov(gpr_w(i.d), rVixlScratch0); \
1360 /* Restore VIXL's scratch regs. */ \
1361 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1); \
1366 Y(shlli, Lsl, W, 32, wzr)
1367 Y(shlqi, Lsl, X, 64, xzr)
1369 #undef Y
1371 ///////////////////////////////////////////////////////////////////////////////
1373 void Vgen::emit(const popp& i) {
1374 a->Ldp(X(i.d0), X(i.d1), MemOperand(sp, 16, PostIndex));
1377 void Vgen::emit(const pushp& i) {
1378 a->Stp(X(i.s1), X(i.s0), MemOperand(sp, -16, PreIndex));
1381 ///////////////////////////////////////////////////////////////////////////////
1383 template<typename Lower>
1384 void lower_impl(Vunit& unit, Vlabel b, size_t i, Lower lower) {
1385 vmodify(unit, b, i, [&] (Vout& v) { lower(v); return 1; });
1388 template <typename Inst>
1389 void lower(const VLS& /*env*/, Inst& /*inst*/, Vlabel /*b*/, size_t /*i*/) {}
1391 ///////////////////////////////////////////////////////////////////////////////
1394 * TODO: Using load size (ldr[bh]?), apply scaled address if 'disp' is unsigned
1396 void lowerVptr(Vptr& p, Vout& v) {
1397 enum {
1398 BASE = 1,
1399 INDEX = 2,
1400 DISP = 4
1403 uint8_t mode = (((p.base.isValid() & 0x1) << 0) |
1404 ((p.index.isValid() & 0x1) << 1) |
1405 (((p.disp != 0) & 0x1) << 2));
1406 switch (mode) {
1407 case BASE:
1408 case BASE | INDEX:
1409 // ldr/str allow [base] and [base, index], nothing to lower.
1410 break;
1412 case INDEX:
1413 // Not supported, convert to [base].
1414 if (p.scale > 1) {
1415 auto t = v.makeReg();
1416 v << shlqi{Log2(p.scale), p.index, t, v.makeReg()};
1417 p.base = t;
1418 } else {
1419 p.base = p.index;
1421 p.index = Vreg{};
1422 p.scale = 1;
1423 break;
1425 case BASE | DISP: {
1426 // ldr/str allow [base, #imm], where #imm is [-256 .. 255].
1427 if (p.disp >= -256 && p.disp <= 255)
1428 break;
1430 // #imm is out of range, convert to [base, index]
1431 auto index = v.makeReg();
1432 v << ldimmq{Immed64(p.disp), index};
1433 p.index = index;
1434 p.scale = 1;
1435 p.disp = 0;
1436 break;
1439 case DISP: {
1440 // Not supported, convert to [base].
1441 auto base = v.makeReg();
1442 v << ldimmq{Immed64(p.disp), base};
1443 p.base = base;
1444 p.index = Vreg{};
1445 p.scale = 1;
1446 p.disp = 0;
1447 break;
1450 case INDEX | DISP:
1451 // Not supported, convert to [base, #imm] or [base, index].
1452 if (p.scale > 1) {
1453 auto t = v.makeReg();
1454 v << shlqi{Log2(p.scale), p.index, t, v.makeReg()};
1455 p.base = t;
1456 } else {
1457 p.base = p.index;
1459 if (p.disp >= -256 && p.disp <= 255) {
1460 p.index = Vreg{};
1461 p.scale = 1;
1462 } else {
1463 auto index = v.makeReg();
1464 v << ldimmq{Immed64(p.disp), index};
1465 p.index = index;
1466 p.scale = 1;
1467 p.disp = 0;
1469 break;
1471 case BASE | INDEX | DISP: {
1472 // Not supported, convert to [base, index].
1473 auto index = v.makeReg();
1474 if (p.scale > 1) {
1475 auto t = v.makeReg();
1476 v << shlqi{Log2(p.scale), p.index, t, v.makeReg()};
1477 v << addqi{p.disp, t, index, v.makeReg()};
1478 } else {
1479 v << addqi{p.disp, p.index, index, v.makeReg()};
1481 p.index = index;
1482 p.scale = 1;
1483 p.disp = 0;
1484 break;
1489 #define Y(vasm_opc, m) \
1490 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1491 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1492 lowerVptr(i.m, v); \
1493 v << i; \
1494 }); \
1497 Y(decqmlock, m)
1498 Y(lea, s)
1499 Y(load, s)
1500 Y(loadb, s)
1501 Y(loadl, s)
1502 Y(loadsd, s)
1503 Y(loadtqb, s)
1504 Y(loadtql, s)
1505 Y(loadups, s)
1506 Y(loadw, s)
1507 Y(loadzbl, s)
1508 Y(loadzbq, s)
1509 Y(loadzlq, s)
1510 Y(store, d)
1511 Y(storeb, m)
1512 Y(storel, m)
1513 Y(storesd, m)
1514 Y(storeups, m)
1515 Y(storew, m)
1517 #undef Y
1519 #define Y(vasm_opc, lower_opc, load_opc, store_opc, arg, m) \
1520 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1521 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1522 lowerVptr(i.m, v); \
1523 auto r0 = v.makeReg(), r1 = v.makeReg(); \
1524 v << load_opc{i.m, r0}; \
1525 v << lower_opc{arg, r0, r1, i.sf, i.fl}; \
1526 v << store_opc{r1, i.m}; \
1527 }); \
1530 Y(addlim, addli, loadl, storel, i.s0, m)
1531 Y(addlm, addl, loadl, storel, i.s0, m)
1532 Y(addwm, addl, loadw, storew, Reg32(i.s0), m)
1533 Y(addqim, addqi, load, store, i.s0, m)
1534 Y(andbim, andbi, loadb, storeb, i.s, m)
1535 Y(orbim, orqi, loadb, storeb, i.s0, m)
1536 Y(orqim, orqi, load, store, i.s0, m)
1537 Y(orwim, orqi, loadw, storew, i.s0, m)
1538 Y(orlim, orqi, loadl, storel, i.s0, m)
1540 #undef Y
1542 #define Y(vasm_opc, lower_opc, movs_opc) \
1543 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1544 if (!i.fl || (i.fl & static_cast<Vflags>(StatusFlags::NV))) { \
1545 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1546 auto r0 = v.makeReg(), r1 = v.makeReg(); \
1547 v << movs_opc{i.s0, r0}; \
1548 v << movs_opc{i.s1, r1}; \
1549 v << lower_opc{r0, r1, i.sf, i.fl}; \
1550 }); \
1554 Y(cmpb, cmpl, movsbl)
1555 Y(cmpw, cmpl, movswl)
1557 #undef Y
1559 #define Y(vasm_opc, lower_opc, movs_opc) \
1560 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1561 if (!i.fl || (i.fl & static_cast<Vflags>(StatusFlags::NV))) { \
1562 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1563 auto r = v.makeReg(); \
1564 v << movs_opc{i.s1, r}; \
1565 v << lower_opc{i.s0, r, i.sf, i.fl}; \
1566 }); \
1570 Y(cmpbi, cmpli, movsbl)
1571 Y(cmpwi, cmpli, movswl)
1573 #undef Y
1575 #define Y(vasm_opc, lower_opc, load_opc) \
1576 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1577 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1578 lowerVptr(i.s1, v); \
1579 auto r = e.allow_vreg() ? v.makeReg() : Vreg(PhysReg(rAsm)); \
1580 v << load_opc{i.s1, r}; \
1581 v << lower_opc{i.s0, r, i.sf, i.fl}; \
1582 }); \
1585 Y(cmpbim, cmpbi, loadb)
1586 Y(cmplim, cmpli, loadl)
1587 Y(cmpbm, cmpb, loadb)
1588 Y(cmpwm, cmpw, loadb)
1589 Y(cmplm, cmpl, loadl)
1590 Y(cmpqim, cmpqi, load)
1591 Y(cmpqm, cmpq, load)
1592 Y(cmpwim, cmpwi, loadw)
1593 Y(testbim, testli, loadb)
1594 Y(testlim, testli, loadl)
1595 Y(testqim, testqi, load)
1596 Y(testbm, testb, loadb)
1597 Y(testwm, testw, loadw)
1598 Y(testlm, testl, loadl)
1599 Y(testqm, testq, load)
1600 Y(testwim, testli, loadw)
1602 #undef Y
1604 void lower(const VLS& e, cvtsi2sdm& i, Vlabel b, size_t z) {
1605 lower_impl(e.unit, b, z, [&] (Vout& v) {
1606 lowerVptr(i.s, v);
1607 auto r = v.makeReg();
1608 v << load{i.s, r};
1609 v << cvtsi2sd{r, i.d};
1613 #define Y(vasm_opc, lower_opc, load_opc, store_opc, m) \
1614 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1615 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1616 lowerVptr(i.m, v); \
1617 auto r0 = e.allow_vreg() ? v.makeReg() : Vreg(PhysReg(rAsm)); \
1618 auto r1 = e.allow_vreg() ? v.makeReg() : Vreg(PhysReg(rAsm)); \
1619 v << load_opc{i.m, r0}; \
1620 v << lower_opc{r0, r1, i.sf, i.fl}; \
1621 v << store_opc{r1, i.m}; \
1622 }); \
1625 Y(declm, decl, loadl, storel, m)
1626 Y(decqm, decq, load, store, m)
1627 Y(inclm, incl, loadl, storel, m)
1628 Y(incqm, incq, load, store, m)
1629 Y(incwm, incw, loadw, storew, m)
1631 #undef Y
1633 void lower(const VLS& e, cvttsd2siq& i, Vlabel b, size_t idx) {
1634 lower_impl(e.unit, b, idx, [&] (Vout& v) {
1635 // Clear FPSR IOC flag.
1636 auto const tmp1 = v.makeReg();
1637 auto const tmp2 = v.makeReg();
1638 v << mrs{FPSR, tmp1};
1639 v << andqi{~0x01, tmp1, tmp2, v.makeReg()};
1640 v << msr{tmp2, FPSR};
1642 // Load error value.
1643 auto const err = v.makeReg();
1644 v << ldimmq{0x8000000000000000, err};
1646 // Do ARM64's double to signed int64 conversion.
1647 auto const res = v.makeReg();
1648 v << fcvtzs{i.s, res};
1650 // Check if there was a conversion error.
1651 auto const fpsr = v.makeReg();
1652 auto const sf = v.makeReg();
1653 v << mrs{FPSR, fpsr};
1654 v << testqi{1, fpsr, sf};
1656 // Move converted value or error.
1657 v << cmovq{CC_NZ, sf, res, err, i.d};
1661 void lower(const VLS& e, callm& i, Vlabel b, size_t z) {
1662 lower_impl(e.unit, b, z, [&] (Vout& v) {
1663 lowerVptr(i.target, v);
1665 auto const scratch = v.makeReg();
1667 // Load the target from memory and then call it.
1668 v << load{i.target, scratch};
1669 v << callr{scratch, i.args};
1673 void lower(const VLS& e, jmpm& i, Vlabel b, size_t z) {
1674 lower_impl(e.unit, b, z, [&] (Vout& v) {
1675 lowerVptr(i.target, v);
1677 auto const scratch = v.makeReg();
1679 v << load{i.target, scratch};
1680 v << jmpr{scratch, i.args};
1684 ///////////////////////////////////////////////////////////////////////////////
1686 void lower(const VLS& e, stublogue& /*i*/, Vlabel b, size_t z) {
1687 lower_impl(e.unit, b, z, [&] (Vout& v) {
1688 // Push both the LR and FP regardless of i.saveframe to align SP.
1689 v << pushp{rlr(), rvmfp()};
1693 void lower(const VLS& e, unstublogue& /*i*/, Vlabel b, size_t z) {
1694 lower_impl(e.unit, b, z, [&] (Vout& v) {
1695 // Pop LR and remove FP from the stack.
1696 v << popp{PhysReg(rAsm), rlr()};
1700 void lower(const VLS& e, stubret& i, Vlabel b, size_t z) {
1701 lower_impl(e.unit, b, z, [&] (Vout& v) {
1702 // Pop LR and (optionally) FP.
1703 if (i.saveframe) {
1704 v << popp{rvmfp(), rlr()};
1705 } else {
1706 v << popp{PhysReg(rAsm), rlr()};
1709 v << ret{i.args};
1713 void lower(const VLS& e, tailcallstub& i, Vlabel b, size_t z) {
1714 lower_impl(e.unit, b, z, [&] (Vout& v) {
1715 // Restore LR from native stack and adjust SP.
1716 v << popp{PhysReg(rAsm), rlr()};
1718 // Then directly jump to the target.
1719 v << jmpi{i.target, i.args};
1723 void lower(const VLS& e, tailcallstubr& i, Vlabel b, size_t z) {
1724 lower_impl(e.unit, b, z, [&] (Vout& v) {
1725 // Restore LR from native stack and adjust SP.
1726 v << popp{PhysReg(rAsm), rlr()};
1728 v << jmpr{i.target, i.args};
1732 void lower(const VLS& e, stubunwind& i, Vlabel b, size_t z) {
1733 lower_impl(e.unit, b, z, [&] (Vout& v) {
1734 // Pop the call frame.
1735 v << popp{PhysReg(rAsm), i.d};
1739 void lower(const VLS& e, stubtophp& /*i*/, Vlabel b, size_t z) {
1740 lower_impl(e.unit, b, z, [&] (Vout& v) {
1741 // Pop the call frame
1742 v << lea{rsp()[16], rsp()};
1746 void lower(const VLS& e, loadstubret& i, Vlabel b, size_t z) {
1747 lower_impl(e.unit, b, z, [&] (Vout& v) {
1748 // Load the LR to the destination.
1749 v << load{rsp()[AROFF(m_savedRip)], i.d};
1753 ///////////////////////////////////////////////////////////////////////////////
1755 void lower(const VLS& e, phplogue& i, Vlabel b, size_t z) {
1756 lower_impl(e.unit, b, z, [&] (Vout& v) {
1757 v << store{rlr(), i.fp[AROFF(m_savedRip)]};
1761 ///////////////////////////////////////////////////////////////////////////////
1763 void lower(const VLS& e, resumetc& i, Vlabel b, size_t z) {
1764 lower_impl(e.unit, b, z, [&] (Vout& v) {
1765 // Call the translation target.
1766 v << callr{i.target, i.args};
1768 // After returning to the translation, jump directly to the exit.
1769 v << jmpi{i.exittc};
1773 ///////////////////////////////////////////////////////////////////////////////
1775 void lower(const VLS& e, popm& i, Vlabel b, size_t z) {
1776 lower_impl(e.unit, b, z, [&] (Vout& v) {
1777 auto r = v.makeReg();
1778 v << pop{r};
1779 lowerVptr(i.d, v);
1780 v << store{r, i.d};
1784 void lower(const VLS& e, poppm& i, Vlabel b, size_t z) {
1785 lower_impl(e.unit, b, z, [&] (Vout& v) {
1786 auto r0 = v.makeReg();
1787 auto r1 = v.makeReg();
1788 v << popp{r0, r1};
1789 lowerVptr(i.d0, v);
1790 lowerVptr(i.d1, v);
1791 v << store{r0, i.d0};
1792 v << store{r1, i.d1};
1796 void lower(const VLS& e, pushm& i, Vlabel b, size_t z) {
1797 lower_impl(e.unit, b, z, [&] (Vout& v) {
1798 auto r = v.makeReg();
1799 lowerVptr(i.s, v);
1800 v << load{i.s, r};
1801 v << push{r};
1805 void lower(const VLS& e, pushpm& i, Vlabel b, size_t z) {
1806 lower_impl(e.unit, b, z, [&] (Vout& v) {
1807 auto r0 = v.makeReg();
1808 auto r1 = v.makeReg();
1809 lowerVptr(i.s0, v);
1810 lowerVptr(i.s1, v);
1811 v << load{i.s0, r0};
1812 v << load{i.s1, r1};
1813 v << pushp{r0, r1};
1817 template<typename movz>
1818 void lower_movz(const VLS& e, movz& i, Vlabel b, size_t z) {
1819 lower_impl(e.unit, b, z, [&] (Vout& v) {
1820 v << copy{i.s, i.d};
1824 void lower(const VLS& e, movzbw& i, Vlabel b, size_t z) {
1825 lower_movz(e, i, b, z);
1828 void lower(const VLS& e, movzbl& i, Vlabel b, size_t z) {
1829 lower_movz(e, i, b, z);
1832 void lower(const VLS& e, movzwl& i, Vlabel b, size_t z) {
1833 lower_movz(e, i, b, z);
1836 void lower(const VLS& e, movtdb& i, Vlabel b, size_t z) {
1837 lower_impl(e.unit, b, z, [&] (Vout& v) {
1838 auto d = v.makeReg();
1839 v << copy{i.s, d};
1840 v << movtqb{d, i.d};
1844 void lower(const VLS& e, movtdq& i, Vlabel b, size_t z) {
1845 lower_impl(e.unit, b, z, [&] (Vout& v) {
1846 v << copy{i.s, i.d};
1850 #define Y(vasm_opc, lower_opc, load_opc, imm, zr, sz) \
1851 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1852 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1853 lowerVptr(i.m, v); \
1854 if (imm.sz() == 0u) { \
1855 v << lower_opc{PhysReg(vixl::zr), i.m}; \
1856 } else { \
1857 auto r = v.makeReg(); \
1858 v << load_opc{imm, r}; \
1859 v << lower_opc{r, i.m}; \
1861 }); \
1864 Y(storebi, storeb, ldimmb, i.s, wzr, b)
1865 Y(storewi, storew, ldimmw, i.s, wzr, w)
1866 Y(storeli, storel, ldimml, i.s, wzr, l)
1867 //storeqi only supports 32-bit immediates
1868 Y(storeqi, store, ldimmq, Immed64(i.s.l()), wzr, q)
1870 #undef Y
1872 void lower(const VLS& e, cloadq& i, Vlabel b, size_t z) {
1873 lower_impl(e.unit, b, z, [&] (Vout& v) {
1874 auto const scratch = v.makeReg();
1876 lowerVptr(i.t, v);
1878 v << load{i.t, scratch};
1879 v << cmovq{i.cc, i.sf, i.f, scratch, i.d};
1883 void lower(const VLS& e, loadqp& i, Vlabel b, size_t z) {
1884 lower_impl(e.unit, b, z, [&] (Vout& v) {
1885 auto const scratch = v.makeReg();
1887 v << leap{i.s, scratch};
1888 v << load{scratch[0], i.d};
1892 void lower(const VLS& e, loadqd& i, Vlabel b, size_t z) {
1893 lower_impl(e.unit, b, z, [&] (Vout& v) {
1894 auto const scratch = v.makeReg();
1896 v << lead{i.s.getRaw(), scratch};
1897 v << load{scratch[0], i.d};
1901 ///////////////////////////////////////////////////////////////////////////////
1903 void lowerForARM(Vunit& unit) {
1904 vasm_lower(unit, [&] (const VLS& env, Vinstr& inst, Vlabel b, size_t i) {
1905 switch (inst.op) {
1906 #define O(name, ...) \
1907 case Vinstr::name: \
1908 lower(env, inst.name##_, b, i); \
1909 break;
1911 VASM_OPCODES
1912 #undef O
1917 ///////////////////////////////////////////////////////////////////////////////
1920 void optimizeARM(Vunit& unit, const Abi& abi, bool regalloc) {
1921 Timer timer(Timer::vasm_optimize);
1923 removeTrivialNops(unit);
1924 optimizePhis(unit);
1925 fuseBranches(unit);
1926 optimizeJmps(unit);
1927 optimizeExits(unit);
1929 assertx(checkWidths(unit));
1931 simplify(unit);
1933 annotateSFUses(unit);
1934 lowerForARM(unit);
1936 simplify(unit);
1938 if (!unit.constToReg.empty()) {
1939 foldImms<arm::ImmFolder>(unit);
1941 reuseImmq(unit);
1943 optimizeCopies(unit, abi);
1945 annotateSFUses(unit);
1946 if (unit.needsRegAlloc()) {
1947 removeDeadCode(unit);
1948 if (regalloc) {
1949 if (RuntimeOption::EvalUseGraphColor &&
1950 unit.context &&
1951 (unit.context->kind == TransKind::Optimize ||
1952 unit.context->kind == TransKind::OptPrologue)) {
1953 allocateRegistersWithGraphColor(unit, abi);
1954 } else {
1955 allocateRegistersWithXLS(unit, abi);
1959 if (unit.blocks.size() > 1) {
1960 optimizeJmps(unit);
1964 void emitARM(Vunit& unit, Vtext& text, CGMeta& fixups,
1965 AsmInfo* asmInfo) {
1966 vasm_emit<Vgen>(unit, text, fixups, asmInfo);
1969 ///////////////////////////////////////////////////////////////////////////////