2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
18 * The HHVM's ARM64 backend works with an early-truncation policy.
21 * A Vreg8 is an extended W-register with a u8 value.
22 * A Vreg16 is an extended W-register with a u16 value.
23 * A Vreg32 is a W-register with a u32 value.
24 * A Vreg64 is a X-register with a u64 value.
26 * This allows to omit truncation instructions for sub-32-bit
27 * operations. E.g. a testb{Vreg8 s0, Vreg8 s1} has to truncate
28 * s0 and s1 before emitting a tst instruction. When using the
29 * early-truncation policy, the testb{} emitter can rely on the
30 * fact, that s0 and s1 are already truncated and can emit a
31 * cmp instruction without preceding uxtb's.
33 * Conversely any arithmetic instruction has to sign extend any
34 * Vreg8 before operating on it. Vasm is light on these instructions,
35 * with only the following, currently: csinc[bw]{} and cmp[bw][i]{}.
37 * Early-truncation has also consequences to extension/truncation
38 * vasm instructions. The following list shows how to use them:
40 * movzbw: Vreg8 -> Vreg16: mov w0, w0 #nop if s==d
41 * movzbl: Vreg8 -> Vreg32: mov w0, w0 #nop if s==d
42 * movzbq: Vreg8 -> Vreg64: uxtb x0, x0
43 * movzwl: Vreg16 -> Vreg32 mov w0, w0 #nop if s==d
44 * movzwq: Vreg16 -> Vreg64 uxth x0, x0
45 * movzlq: Vreg32 -> Vreg64 uxtw x0, x0
46 * movtqb: Vreg64 -> Vreg8: uxtb w0, w0
47 * movtql: Vreg64 -> Vreg32: uxtw w0, w0
49 * Early-truncation also implies, that instructions have to truncate
50 * after performing the actual operation if it cannot guarantee that
51 * the resulting VregN type matches. E.g. emitting code for the vasm
52 * instruction andbi{Immed imm, Vreg8 s, Vreg8 d} has to truncate the
53 * result to guarantee that register d indeed holds a u8 value.
55 * Note, that the early-truncation policy allows aarch64 specific
56 * optimizations, which are not relevant on other architectures.
57 * E.g. the x86_64 does not need this policy as the ISA allows
58 * direct register accesses for Vreg8, Vreg16, Vreg32 and Vreg64
59 * (e.g. AL, AX, EAX, RAX).
61 * The early-truncation policy relies on the following
62 * requirements of the Vreg type-system:
64 * * All VregNs are created for values of up to N bits
65 * * All conversions between VregNs are done via movz/movt vasm instructions
68 #include "hphp/runtime/vm/jit/vasm-emit.h"
70 #include "hphp/runtime/vm/jit/abi-arm.h"
71 #include "hphp/runtime/vm/jit/ir-instruction.h"
72 #include "hphp/runtime/vm/jit/print.h"
73 #include "hphp/runtime/vm/jit/service-requests.h"
74 #include "hphp/runtime/vm/jit/smashable-instr-arm.h"
75 #include "hphp/runtime/vm/jit/timer.h"
76 #include "hphp/runtime/vm/jit/vasm-gen.h"
77 #include "hphp/runtime/vm/jit/vasm.h"
78 #include "hphp/runtime/vm/jit/vasm-instr.h"
79 #include "hphp/runtime/vm/jit/vasm-internal.h"
80 #include "hphp/runtime/vm/jit/vasm-lower.h"
81 #include "hphp/runtime/vm/jit/vasm-print.h"
82 #include "hphp/runtime/vm/jit/vasm-reg.h"
83 #include "hphp/runtime/vm/jit/vasm-unit.h"
84 #include "hphp/runtime/vm/jit/vasm-util.h"
85 #include "hphp/runtime/vm/jit/vasm-visit.h"
87 #include "hphp/vixl/a64/macro-assembler-a64.h"
91 namespace HPHP
{ namespace jit
{
92 ///////////////////////////////////////////////////////////////////////////////
97 namespace arm
{ struct ImmFolder
; }
100 ///////////////////////////////////////////////////////////////////////////////
102 static_assert(folly::kIsLittleEndian
,
103 "Code contains little-endian specific optimizations.");
105 vixl::Register
X(Vreg64 r
) {
106 PhysReg
pr(r
.asReg());
110 vixl::Register
W(Vreg64 r
) {
111 PhysReg
pr(r
.asReg());
115 vixl::Register
W(Vreg32 r
) {
116 PhysReg
pr(r
.asReg());
120 vixl::Register
W(Vreg16 r
) {
121 PhysReg
pr(r
.asReg());
125 vixl::Register
W(Vreg8 r
) {
126 PhysReg
pr(r
.asReg());
130 vixl::FPRegister
D(Vreg r
) {
134 vixl::VRegister
V(Vreg r
) {
138 uint8_t Log2(uint8_t value
) {
149 always_assert(false);
153 vixl::MemOperand
M(Vptr p
) {
154 assertx(p
.base
.isValid());
155 if (p
.index
.isValid()) {
156 assertx(p
.disp
== 0);
157 return MemOperand(X(p
.base
), X(p
.index
), LSL
, Log2(p
.scale
));
159 return MemOperand(X(p
.base
), p
.disp
);
162 vixl::Condition
C(ConditionCode cc
) {
163 return arm::convertCC(cc
);
167 * Uses the flags from the Vinstr which defs SF to determine
168 * whether or not the Vixl assembler should emit code which
169 * sets the status flags.
171 vixl::FlagsUpdate
UF(Vflags flags
) {
172 return flags
? SetFlags
: LeaveFlags
;
176 * There are numerous ARM instructions that don't set status flags, and
177 * therefore those flags must be set synthetically in the emitters. This
178 * assertion is applied to the emitters which don't set all of the status
179 * flags required by the Vinstr which defs SF. The flags field of the
180 * Vinstr is used to determine which bits are required. Those required
181 * bits are compared against the bits which are actually set by the
184 template<class Inst
> void checkSF(const Inst
& i
, StatusFlags s
) {
185 Vflags required
= i
.fl
;
186 Vflags set
= static_cast<Vflags
>(s
);
187 always_assert_flog((required
& set
) == required
,
188 "should def SF but does not: {}\n",
189 vinst_names
[Vinstr(i
).op
]);
192 template<class Inst
> void checkSF(const Inst
& i
) {
193 checkSF(i
, StatusFlags::None
);
197 * Returns true if the queried flag(s) is in the set of required flags.
199 bool flagRequired(Vflags flags
, StatusFlags flag
) {
200 return (flags
& static_cast<Vflags
>(flag
));
203 ///////////////////////////////////////////////////////////////////////////////
206 explicit Vgen(Venv
& env
)
210 , base(a
->frontier())
211 , current(env
.current
)
215 , catches(env
.catches
)
221 static void emitVeneers(Venv
& env
);
222 static void handleLiterals(Venv
& env
);
223 static void patch(Venv
& env
);
225 static void pad(CodeBlock
& cb
) {
226 vixl::MacroAssembler a
{ cb
};
227 auto const begin
= cb
.frontier();
228 while (cb
.available() >= 4) a
.Brk(1);
229 assertx(cb
.available() == 0);
233 /////////////////////////////////////////////////////////////////////////////
235 template<class Inst
> void emit(const Inst
& i
) {
236 always_assert_flog(false, "unimplemented instruction: {} in B{}\n",
237 vinst_names
[Vinstr(i
).op
], size_t(current
));
241 void emit(const copy
& i
);
242 void emit(const copy2
& i
);
243 void emit(const debugtrap
& /*i*/) { a
->Brk(0); }
244 void emit(const fallthru
& /*i*/);
245 void emit(const ldimmb
& i
);
246 void emit(const ldimml
& i
);
247 void emit(const ldimmq
& i
);
248 void emit(const ldimmw
& i
);
249 void emit(const ldundefq
& /*i*/) {}
250 void emit(const load
& i
);
251 void emit(const store
& i
);
252 void emit(const mcprep
& i
);
254 // native function abi
255 void emit(const call
& i
);
256 void emit(const callr
& i
) { a
->Blr(X(i
.target
)); }
257 void emit(const calls
& i
);
258 void emit(const ret
& /*i*/) { a
->Ret(); }
261 void emit(const callstub
& i
);
262 void emit(const callfaststub
& i
);
265 void emit(const callphp
& i
) {
266 emit(call
{i
.target
, i
.args
});
267 setCallFuncId(env
, a
->frontier());
269 void emit(const callphpr
& i
) {
270 emit(callr
{i
.target
, i
.args
});
271 setCallFuncId(env
, a
->frontier());
273 void emit(const contenter
& i
);
274 void emit(const phpret
& i
);
277 void emit(const inittc
& /*i*/) {}
278 void emit(const leavetc
& i
);
281 void emit(const landingpad
& /*i*/) {}
282 void emit(const nothrow
& i
);
283 void emit(const syncpoint
& i
);
284 void emit(const unwind
& i
);
287 void emit(const absdbl
& i
) { a
->Fabs(D(i
.d
), D(i
.s
)); }
288 void emit(const addl
& i
) { a
->Add(W(i
.d
), W(i
.s1
), W(i
.s0
), UF(i
.fl
)); }
289 void emit(const addli
& i
) { a
->Add(W(i
.d
), W(i
.s1
), i
.s0
.l(), UF(i
.fl
)); }
290 void emit(const addq
& i
) { a
->Add(X(i
.d
), X(i
.s1
), X(i
.s0
), UF(i
.fl
));}
291 void emit(const addqi
& i
) { a
->Add(X(i
.d
), X(i
.s1
), i
.s0
.q(), UF(i
.fl
)); }
292 void emit(const addsd
& i
) { a
->Fadd(D(i
.d
), D(i
.s1
), D(i
.s0
)); }
293 void emit(const andb
& i
) { a
->And(W(i
.d
), W(i
.s1
), W(i
.s0
), UF(i
.fl
)); }
294 void emit(const andbi
& i
) { a
->And(W(i
.d
), W(i
.s1
), i
.s0
.ub(), UF(i
.fl
)); }
295 void emit(const andw
& i
) { a
->And(W(i
.d
), W(i
.s1
), W(i
.s0
), UF(i
.fl
)); }
296 void emit(const andwi
& i
) { a
->And(W(i
.d
), W(i
.s1
), i
.s0
.uw(), UF(i
.fl
)); }
297 void emit(const andl
& i
) { a
->And(W(i
.d
), W(i
.s1
), W(i
.s0
), UF(i
.fl
)); }
298 void emit(const andli
& i
) { a
->And(W(i
.d
), W(i
.s1
), i
.s0
.l(), UF(i
.fl
)); }
299 void emit(const andq
& i
) { a
->And(X(i
.d
), X(i
.s1
), X(i
.s0
), UF(i
.fl
)); }
300 void emit(const andqi
& i
) { a
->And(X(i
.d
), X(i
.s1
), i
.s0
.q(), UF(i
.fl
)); }
301 void emit(const andqi64
& i
) { a
->And(X(i
.d
), X(i
.s1
), i
.s0
.q(), UF(i
.fl
)); }
302 void emit(const cmovb
& i
) { a
->Csel(W(i
.d
), W(i
.t
), W(i
.f
), C(i
.cc
)); }
303 void emit(const cmovw
& i
) { a
->Csel(W(i
.d
), W(i
.t
), W(i
.f
), C(i
.cc
)); }
304 void emit(const cmovl
& i
) { a
->Csel(W(i
.d
), W(i
.t
), W(i
.f
), C(i
.cc
)); }
305 void emit(const cmovq
& i
) { a
->Csel(X(i
.d
), X(i
.t
), X(i
.f
), C(i
.cc
)); }
306 // note: cmp{bw}[i] are emitted only for narrow comparisons and _do not_ sign
307 // extend their arguments--these instructions are lowered to cmp{lq}[i] if
308 // the comparison is not narrow or not equality/inequality
309 void emit(const cmpb
& i
) { a
->Cmp(W(i
.s1
), W(i
.s0
)); }
310 void emit(const cmpbi
& i
) { a
->Cmp(W(i
.s1
), static_cast<uint8_t>(i
.s0
.b())); }
311 void emit(const cmpw
& i
) { a
->Cmp(W(i
.s1
), W(i
.s0
)); }
312 void emit(const cmpwi
& i
) { a
->Cmp(W(i
.s1
), static_cast<uint16_t>(i
.s0
.w())); }
313 void emit(const cmpl
& i
) { a
->Cmp(W(i
.s1
), W(i
.s0
)); }
314 void emit(const cmpli
& i
) { a
->Cmp(W(i
.s1
), i
.s0
.l()); }
315 void emit(const cmpq
& i
) { a
->Cmp(X(i
.s1
), X(i
.s0
)); }
316 void emit(const cmpqi
& i
) { a
->Cmp(X(i
.s1
), i
.s0
.q()); }
317 void emit(const cmpsd
& i
);
318 // TODO(CDE): csinc[bw]{} Should a) sign extend and b) set SF for overflow
319 void emit(const csincb
& i
) { a
->Csinc(W(i
.d
), W(i
.t
), W(i
.f
), C(i
.cc
)); }
320 void emit(const csincw
& i
) { a
->Csinc(W(i
.d
), W(i
.t
), W(i
.f
), C(i
.cc
)); }
321 void emit(const csincl
& i
) { a
->Csinc(W(i
.d
), W(i
.t
), W(i
.f
), C(i
.cc
)); }
322 void emit(const csincq
& i
) { a
->Csinc(X(i
.d
), X(i
.t
), X(i
.f
), C(i
.cc
)); }
323 void emit(const cvtsi2sd
& i
) { a
->Scvtf(D(i
.d
), X(i
.s
)); }
324 void emit(const decl
& i
) { a
->Sub(W(i
.d
), W(i
.s
), 1, UF(i
.fl
)); }
325 void emit(const decq
& i
) { a
->Sub(X(i
.d
), X(i
.s
), 1, UF(i
.fl
)); }
326 void emit(const decqmlock
& i
);
327 void emit(const divint
& i
) { a
->Sdiv(X(i
.d
), X(i
.s0
), X(i
.s1
)); }
328 void emit(const divsd
& i
) { a
->Fdiv(D(i
.d
), D(i
.s1
), D(i
.s0
)); }
329 void emit(const imul
& i
);
330 void emit(const incl
& i
) { a
->Add(W(i
.d
), W(i
.s
), 1, UF(i
.fl
)); }
331 void emit(const incq
& i
) { a
->Add(X(i
.d
), X(i
.s
), 1, UF(i
.fl
)); }
332 void emit(const incw
& i
) { a
->Add(W(i
.d
), W(i
.s
), 1, UF(i
.fl
)); }
333 void emit(const jcc
& i
);
334 void emit(const jcci
& i
);
335 void emit(const jmp
& i
);
336 void emit(const jmpi
& i
);
337 void emit(const jmpr
& i
) { a
->Br(X(i
.target
)); }
338 void emit(const lea
& i
);
339 void emit(const leap
& i
);
340 void emit(const leav
& i
);
341 void emit(const lead
& i
);
342 void emit(const loadb
& i
) { a
->Ldrb(W(i
.d
), M(i
.s
)); }
343 void emit(const loadl
& i
) { a
->Ldr(W(i
.d
), M(i
.s
)); }
344 void emit(const loadsd
& i
) { a
->Ldr(D(i
.d
), M(i
.s
)); }
345 void emit(const loadtqb
& i
) { a
->Ldrb(W(i
.d
), M(i
.s
)); }
346 void emit(const loadtql
& i
) { a
->Ldr(W(i
.d
), M(i
.s
)); }
347 void emit(const loadups
& i
);
348 void emit(const loadw
& i
) { a
->Ldrh(W(i
.d
), M(i
.s
)); }
349 void emit(const loadzbl
& i
) { a
->Ldrb(W(i
.d
), M(i
.s
)); }
350 void emit(const loadzbq
& i
) { a
->Ldrb(W(i
.d
), M(i
.s
)); }
351 void emit(const loadsbq
& i
) { a
->Ldrsb(X(i
.d
), M(i
.s
)); }
352 void emit(const loadsbl
& i
) { a
->Ldrsb(W(i
.d
), M(i
.s
)); }
353 void emit(const loadzwq
& i
) { a
->Ldrh(W(i
.d
), M(i
.s
)); }
354 void emit(const loadzlq
& i
) { a
->Ldr(W(i
.d
), M(i
.s
)); }
355 void emit(const movb
& i
) { if (i
.d
!= i
.s
) a
->Mov(W(i
.d
), W(i
.s
)); }
356 void emit(const movw
& i
) { if (i
.d
!= i
.s
) a
->Mov(W(i
.d
), W(i
.s
)); }
357 void emit(const movl
& i
) { if (i
.d
!= i
.s
) a
->Mov(W(i
.d
), W(i
.s
)); }
358 void emit(const movsbl
& i
) { a
->Sxtb(W(i
.d
), W(i
.s
)); }
359 void emit(const movsbq
& i
) { a
->Sxtb(X(i
.d
), W(i
.s
).X()); }
360 void emit(const movswl
& i
) { a
->Sxth(W(i
.d
), W(i
.s
)); }
361 void emit(const movtqb
& i
) { a
->Uxtb(W(i
.d
), W(i
.s
)); }
362 void emit(const movtqw
& i
) { a
->Uxth(W(i
.d
), W(i
.s
)); }
363 void emit(const movtql
& i
) { a
->Uxtw(W(i
.d
), W(i
.s
)); }
364 void emit(const movzbq
& i
) { a
->Uxtb(X(i
.d
), W(i
.s
).X()); }
365 void emit(const movzwq
& i
) { a
->Uxth(X(i
.d
), W(i
.s
).X()); }
366 void emit(const movzlq
& i
) { a
->Uxtw(X(i
.d
), W(i
.s
).X()); }
367 void emit(const mulsd
& i
) { a
->Fmul(D(i
.d
), D(i
.s1
), D(i
.s0
)); }
368 void emit(const neg
& i
) { a
->Neg(X(i
.d
), X(i
.s
), UF(i
.fl
)); }
369 void emit(const nop
& /*i*/) { a
->Nop(); }
370 void emit(const notb
& i
) { a
->Mvn(W(i
.d
), W(i
.s
)); }
371 void emit(const not& i
) { a
->Mvn(X(i
.d
), X(i
.s
)); }
372 void emit(const orbi
& i
);
373 void emit(const orq
& i
);
374 void emit(const orwi
& i
);
375 void emit(const orli
& i
);
376 void emit(const orqi
& i
);
377 void emit(const pop
& i
);
378 void emit(const popp
& i
);
379 void emit(const push
& i
);
380 void emit(const pushp
& i
);
381 void emit(const roundsd
& i
);
382 void emit(const sar
& i
);
383 void emit(const sarqi
& i
);
384 void emit(const setcc
& i
) { a
->Cset(W(i
.d
), C(i
.cc
)); }
385 void emit(const shl
& i
);
386 void emit(const shlli
& i
);
387 void emit(const shlqi
& i
);
388 void emit(const shrli
& i
);
389 void emit(const shrqi
& i
);
390 void emit(const sqrtsd
& i
) { a
->Fsqrt(D(i
.d
), D(i
.s
)); }
391 void emit(const srem
& i
);
392 void emit(const storeb
& i
) { a
->Strb(W(i
.s
), M(i
.m
)); }
393 void emit(const storel
& i
) { a
->Str(W(i
.s
), M(i
.m
)); }
394 void emit(const storesd
& i
) { emit(store
{i
.s
, i
.m
}); }
395 void emit(const storeups
& i
);
396 void emit(const storew
& i
) { a
->Strh(W(i
.s
), M(i
.m
)); }
397 void emit(const subl
& i
) { a
->Sub(W(i
.d
), W(i
.s1
), W(i
.s0
), UF(i
.fl
)); }
398 void emit(const subli
& i
) { a
->Sub(W(i
.d
), W(i
.s1
), i
.s0
.l(), UF(i
.fl
)); }
399 void emit(const subq
& i
) { a
->Sub(X(i
.d
), X(i
.s1
), X(i
.s0
), UF(i
.fl
)); }
400 void emit(const subqi
& i
) { a
->Sub(X(i
.d
), X(i
.s1
), i
.s0
.q(), UF(i
.fl
)); }
401 void emit(const subsd
& i
) { a
->Fsub(D(i
.d
), D(i
.s1
), D(i
.s0
)); }
402 void emit(const testb
& i
){ a
->Tst(W(i
.s1
), W(i
.s0
)); }
403 void emit(const testbi
& i
){ a
->Tst(W(i
.s1
), i
.s0
.ub()); }
404 void emit(const testw
& i
){ a
->Tst(W(i
.s1
), W(i
.s0
)); }
405 void emit(const testwi
& i
){ a
->Tst(W(i
.s1
), i
.s0
.uw()); }
406 void emit(const testl
& i
) { a
->Tst(W(i
.s1
), W(i
.s0
)); }
407 void emit(const testli
& i
) { a
->Tst(W(i
.s1
), i
.s0
.l()); }
408 void emit(const testq
& i
) { a
->Tst(X(i
.s1
), X(i
.s0
)); }
409 void emit(const testqi
& i
) { a
->Tst(X(i
.s1
), i
.s0
.q()); }
410 void emit(const trap
& /*i*/);
411 void emit(const ucomisd
& i
) { a
->Fcmp(D(i
.s0
), D(i
.s1
)); }
412 void emit(const unpcklpd
&);
413 void emit(const xorb
& i
);
414 void emit(const xorbi
& i
);
415 void emit(const xorw
& i
);
416 void emit(const xorwi
& i
);
417 void emit(const xorl
& i
);
418 void emit(const xorq
& i
);
419 void emit(const xorqi
& i
);
422 void emit(const fcvtzs
& i
) { a
->Fcvtzs(X(i
.d
), D(i
.s
)); }
423 void emit(const mrs
& i
) { a
->Mrs(X(i
.r
), vixl::SystemRegister(i
.s
.l())); }
424 void emit(const msr
& i
) { a
->Msr(vixl::SystemRegister(i
.s
.l()), X(i
.r
)); }
425 void emit(const ubfmli
& i
) { a
->ubfm(W(i
.d
), W(i
.s
), i
.mr
.w(), i
.ms
.w()); }
427 void emit_nop() { a
->Nop(); }
430 CodeBlock
& frozen() { return env
.text
.frozen().code
; }
431 static void recordAddressImmediate(Venv
& env
, TCA addr
) {
432 env
.meta
.addressImmediates
.insert(addr
);
434 void recordAddressImmediate() {
435 env
.meta
.addressImmediates
.insert(env
.cb
->frontier());
440 vixl::MacroAssembler assem
;
441 vixl::MacroAssembler
* a
;
444 const Vlabel current
;
446 jit::vector
<Venv::LabelPatch
>& jmps
;
447 jit::vector
<Venv::LabelPatch
>& jccs
;
448 jit::vector
<Venv::LabelPatch
>& catches
;
451 ///////////////////////////////////////////////////////////////////////////////
453 static CodeBlock
* getBlock(Venv
& env
, CodeAddress a
) {
454 for (auto const& area
: env
.text
.areas()) {
455 if (area
.code
.contains(a
)) {
462 static CodeAddress
toReal(Venv
& env
, CodeAddress a
) {
463 CodeBlock
* b
= getBlock(env
, a
);
464 return (b
== nullptr) ? a
: b
->toDestAddress(a
);
467 void Vgen::emitVeneers(Venv
& env
) {
468 auto& meta
= env
.meta
;
469 decltype(env
.meta
.veneers
) notEmitted
;
471 for (auto const& veneer
: meta
.veneers
) {
472 auto cb
= getBlock(env
, veneer
.source
);
474 // If we can't find the code block, it must have been emitted by a Vunit
475 // wrapping this one (bindjmp emits a Vunit within a Vunit).
476 notEmitted
.push_back(veneer
);
479 auto const vaddr
= cb
->frontier();
481 FTRACE(1, "emitVeneers: source = {}, target = {}, veneer at {}\n",
482 veneer
.source
, veneer
.target
, vaddr
);
484 // Emit the veneer code: LDR + BR.
485 meta
.veneerAddrs
.insert(vaddr
);
486 MacroAssembler av
{*cb
};
487 vixl::Label target_data
;
488 meta
.addressImmediates
.insert(vaddr
);
489 poolLiteral(*cb
, meta
, (uint64_t)makeTarget32(veneer
.target
), 32, true);
490 av
.bind(&target_data
);
491 av
.Ldr(rAsm_w
, &target_data
);
494 // Update the veneer source instruction to jump/call the veneer.
495 auto const realSource
= toReal(env
, veneer
.source
);
497 tmpBlock
.init(realSource
, kInstructionSize
, "emitVeneers");
498 MacroAssembler at
{tmpBlock
};
499 int64_t offset
= vaddr
- veneer
.source
;
500 auto sourceInst
= Instruction::Cast(realSource
);
502 if (sourceInst
->Mask(UnconditionalBranchMask
) == B
) {
503 always_assert(is_int28(offset
));
504 at
.b(offset
>> kInstructionSizeLog2
);
506 } else if (sourceInst
->Mask(UnconditionalBranchMask
) == BL
) {
507 always_assert(is_int28(offset
));
508 at
.bl(offset
>> kInstructionSizeLog2
);
510 } else if (sourceInst
->IsCondBranchImm()) {
511 auto const cond
= static_cast<Condition
>(sourceInst
->ConditionBranch());
512 if (is_int21(offset
)) {
513 at
.b(offset
>> kInstructionSizeLog2
, cond
);
515 // The offset doesn't fit in a conditional jump. Hopefully it still fits
516 // in an unconditional jump, in which case we add an appendix to the
518 offset
+= 2 * kInstructionSize
;
519 always_assert(is_int28(offset
));
520 // Add an appendix to the veneer, and jump to it instead. The full
521 // veneer in this case looks like:
523 // LDR RX, LITERAL_ADDR
528 // And the conditional jump into the veneer is turned into a jump to the
533 // Turn the original conditional branch into an unconditional one.
534 at
.b(offset
>> kInstructionSizeLog2
);
537 auto const appendix
= cb
->frontier();
538 av
.b(-2 /* veneer starts 2 instructions before the appendix */, cond
);
539 const int64_t nextOffset
= (veneer
.source
+ kInstructionSize
) - // NEXT
540 (vaddr
+ 3 * kInstructionSize
); // addr of "B NEXT"
541 always_assert(is_int28(nextOffset
));
542 av
.b(nextOffset
>> kInstructionSizeLog2
);
544 // Replace veneer.source with appendix in the relevant metadata.
545 meta
.smashableLocations
.erase(veneer
.source
);
546 meta
.smashableLocations
.insert(appendix
);
547 for (auto& tj
: meta
.inProgressTailJumps
) {
548 if (tj
.toSmash() == veneer
.source
) tj
.adjust(appendix
);
550 for (auto& stub
: env
.stubs
) {
551 if (stub
.jcc
== veneer
.source
) stub
.jcc
= appendix
;
555 always_assert_flog(0, "emitVeneers: invalid source instruction at source"
556 " {} (realSource = {})",
557 veneer
.source
, realSource
);
561 env
.meta
.veneers
.swap(notEmitted
);
564 void Vgen::handleLiterals(Venv
& env
) {
565 decltype(env
.meta
.literalsToPool
) notEmitted
;
566 for (auto const& pl
: env
.meta
.literalsToPool
) {
567 auto const cb
= getBlock(env
, pl
.patchAddress
);
569 // If we can't find the code block it must have been emitted by a Vunit
570 // wrapping this one. (bindjmp emits a Vunit within a Vunit)
571 notEmitted
.push_back(pl
);
576 auto literalAddress
= cb
->frontier();
577 if (pl
.width
== 32) {
578 cb
->dword(static_cast<uint32_t>(pl
.value
));
579 } else if (pl
.width
== 64) {
581 // Although the region is actually dead, we mark it as live, so that
582 // the relocator can remove the padding.
583 align(*cb
, &env
.meta
, Alignment::QuadWordSmashable
, AlignContext::Live
);
584 literalAddress
= cb
->frontier();
592 auto const patchAddressActual
=
593 Instruction::Cast(toReal(env
, pl
.patchAddress
));
594 assertx(patchAddressActual
->IsLoadLiteral());
595 patchAddressActual
->SetImmPCOffsetTarget(
596 Instruction::Cast(literalAddress
),
597 Instruction::Cast(pl
.patchAddress
));
600 if (env
.meta
.fallthru
) {
601 auto const fallthru
= *env
.meta
.fallthru
;
602 auto const cb
= getBlock(env
, fallthru
);
604 always_assert_flog(false,
605 "Fallthrus shouldn't be used in nested Vunits.");
607 auto const blockEndAddr
= cb
->frontier();
608 auto const startAddr
= cb
->toDestAddress(fallthru
);
610 tmp
.init(startAddr
, kInstructionSize
, "Tmp");
613 recordAddressImmediate(env
, fallthru
);
614 a
.b((blockEndAddr
- fallthru
) >> kInstructionSizeLog2
);
617 env
.meta
.literalsToPool
.swap(notEmitted
);
620 void Vgen::patch(Venv
& env
) {
621 // Patch the 32 bit target of the LDR
622 auto patch
= [&env
](TCA instr
, TCA target
) {
623 // The LDR loading the address to branch to.
624 auto ldr
= Instruction::Cast(instr
);
625 auto const DEBUG_ONLY br
= ldr
->NextInstruction();
626 assertx(ldr
->Mask(LoadLiteralMask
) == LDR_w_lit
&&
627 br
->Mask(UnconditionalBranchToRegisterMask
) == BR
&&
628 ldr
->Rd() == br
->Rn());
629 // The address the LDR loads.
630 auto targetAddr
= ldr
->LiteralAddress();
631 // Patch the 32 bit target following the LDR and BR
632 patchTarget32(targetAddr
, target
);
635 for (auto const& p
: env
.jmps
) {
636 auto addr
= toReal(env
, p
.instr
);
637 auto const target
= env
.addrs
[p
.target
];
639 if (env
.meta
.smashableLocations
.count(p
.instr
)) {
640 assertx(possiblySmashableJmp(addr
));
641 // Update `addr' to point to the veneer.
642 addr
= TCA(vixl::Instruction::Cast(addr
)->ImmPCOffsetTarget());
644 // Patch the address we are jumping to.
647 for (auto const& p
: env
.jccs
) {
648 auto addr
= toReal(env
, p
.instr
);
649 auto const target
= env
.addrs
[p
.target
];
651 if (env
.meta
.smashableLocations
.count(p
.instr
)) {
652 assertx(possiblySmashableJcc(addr
));
653 // Update `addr' to point to the veneer.
654 addr
= TCA(vixl::Instruction::Cast(addr
)->ImmPCOffsetTarget());
656 assertx(Instruction::Cast(addr
)->IsCondBranchImm());
657 // If the jcc starts with a conditional jump, patch the next instruction
658 // (which should start with a LDR).
659 addr
+= kInstructionSize
;
663 for (auto const& p
: env
.leas
) {
669 ///////////////////////////////////////////////////////////////////////////////
671 void Vgen::emit(const copy
& i
) {
672 if (i
.s
== i
.d
) return;
673 if (i
.s
.isGP() && i
.d
.isGP()) {
674 a
->Mov(X(i
.d
), X(i
.s
));
675 } else if (i
.s
.isSIMD() && i
.d
.isGP()) {
676 a
->Fmov(X(i
.d
), D(i
.s
));
677 } else if (i
.s
.isGP() && i
.d
.isSIMD()) {
678 a
->Fmov(D(i
.d
), X(i
.s
));
680 assertx(i
.s
.isSIMD() && i
.d
.isSIMD());
681 a
->mov(V(i
.d
), V(i
.s
));
685 void Vgen::emit(const copy2
& i
) {
686 assertx(i
.s0
.isValid() && i
.s1
.isValid() && i
.d0
.isValid() && i
.d1
.isValid());
687 auto s0
= i
.s0
, s1
= i
.s1
, d0
= i
.d0
, d1
= i
.d1
;
691 a
->Eor(X(d0
), X(d0
), X(s0
));
692 a
->Eor(X(s0
), X(d0
), X(s0
));
693 a
->Eor(X(d0
), X(d0
), X(s0
));
695 // could do this in a simplify pass
696 if (s1
!= d1
) a
->Mov(X(s1
), X(d1
)); // save s1 first; d1 != s0
697 if (s0
!= d0
) a
->Mov(X(s0
), X(d0
));
700 // could do this in a simplify pass
701 if (s0
!= d0
) a
->Mov(X(s0
), X(d0
));
702 if (s1
!= d1
) a
->Mov(X(s1
), X(d1
));
706 void emitSimdImmInt(vixl::MacroAssembler
* a
, uint64_t val
, Vreg d
) {
707 // Assembler::fmov emits a ldr from a literal pool if IsImmFP64 is false.
708 // In that case, emit the raw bits into a GPR first and then move them
709 // unmodified into destination SIMD
710 union { double dval
; uint64_t ival
; };
712 if (vixl::Assembler::IsImmFP64(dval
)) {
714 } else if (ival
== 0) {
715 a
->Fmov(D(d
), vixl::xzr
);
721 void Vgen::emit(const fallthru
& /*i*/) {
722 always_assert(!env
.meta
.fallthru
);
723 env
.meta
.fallthru
= a
->frontier();
727 #define Y(vasm_opc, simd_w, vr_w, gpr_w, imm) \
728 void Vgen::emit(const vasm_opc& i) { \
729 if (i.d.isSIMD()) { \
730 emitSimdImmInt(a, static_cast<uint##vr_w##_t>(i.s.simd_w()), i.d); \
732 Vreg##vr_w d = i.d; \
733 a->Mov(gpr_w(d), imm); \
737 Y(ldimmb
, ub
, 8, W
, i
.s
.ub())
738 Y(ldimmw
, uw
, 16, W
, i
.s
.uw())
739 Y(ldimml
, l
, 32, W
, i
.s
.l())
740 Y(ldimmq
, q
, 64, X
, i
.s
.q())
744 void Vgen::emit(const load
& i
) {
746 a
->Ldr(X(i
.d
), M(i
.s
));
748 a
->Ldr(D(i
.d
), M(i
.s
));
752 void Vgen::emit(const store
& i
) {
755 a
->Mov(rAsm
, X(i
.s
));
756 a
->Str(rAsm
, M(i
.d
));
758 a
->Str(X(i
.s
), M(i
.d
));
761 a
->Str(D(i
.s
), M(i
.d
));
765 ///////////////////////////////////////////////////////////////////////////////
767 void Vgen::emit(const mcprep
& i
) {
769 * Initially, we set the cache to hold (addr << 1) | 1 (where `addr' is the
770 * address of the movq) so that we can find the movq from the handler.
772 * We set the low bit for two reasons: the Class* will never be a valid
773 * Class*, so we'll always miss the inline check before it's smashed, and
774 * MethodCache::handleStaticCall can tell it's not been smashed yet
777 align(*env
.cb
, &env
.meta
, Alignment::SmashMovq
, AlignContext::Live
);
778 auto const imm
= reinterpret_cast<uint64_t>(a
->frontier());
779 emitSmashableMovq(*env
.cb
, env
.meta
, (imm
<< 1) | 1, r64(i
.d
));
781 env
.meta
.addressImmediates
.insert(reinterpret_cast<TCA
>(~imm
));
784 ///////////////////////////////////////////////////////////////////////////////
786 void Vgen::emit(const call
& i
) {
787 recordAddressImmediate();
788 a
->Mov(rAsm
, i
.target
);
791 *i
.watch
= a
->frontier();
792 env
.meta
.watchpoints
.push_back(i
.watch
);
796 void Vgen::emit(const calls
& i
) {
797 emitSmashableCall(*env
.cb
, env
.meta
, i
.target
);
800 ///////////////////////////////////////////////////////////////////////////////
802 void Vgen::emit(const callstub
& i
) {
803 emit(call
{i
.target
, i
.args
});
806 void Vgen::emit(const callfaststub
& i
) {
807 emit(call
{i
.target
, i
.args
});
808 emit(syncpoint
{i
.fix
});
811 ///////////////////////////////////////////////////////////////////////////////
813 void Vgen::emit(const phpret
& i
) {
814 // prefer load-pair instruction
816 a
->ldp(X(rvmfp()), X(rlr()), X(i
.fp
)[AROFF(m_sfp
)]);
818 a
->Ldr(X(rlr()), X(i
.fp
)[AROFF(m_savedRip
)]);
823 void Vgen::emit(const contenter
& i
) {
824 vixl::Label stub
, end
;
826 // Jump past the stub below.
827 recordAddressImmediate();
830 // We call into this stub from the end below. Take that LR and store it in
831 // m_savedRip. Then jump to the target.
833 a
->Str(X(rlr()), M(i
.fp
[AROFF(m_savedRip
)]));
836 // Call to stub above and then unwind.
838 recordAddressImmediate();
840 emit(unwind
{{i
.targets
[0], i
.targets
[1]}});
843 ///////////////////////////////////////////////////////////////////////////////
845 void Vgen::emit(const leavetc
& /*i*/) {
846 // The LR was preserved on the stack by resumetc. Pop it while preserving
847 // SP alignment and return.
848 a
->Ldp(rAsm
, X(rlr()), MemOperand(sp
, 16, PostIndex
));
852 ///////////////////////////////////////////////////////////////////////////////
854 void Vgen::emit(const nothrow
& /*i*/) {
855 env
.meta
.catches
.emplace_back(a
->frontier(), nullptr);
856 env
.record_inline_stack(a
->frontier());
859 void Vgen::emit(const syncpoint
& i
) {
860 FTRACE(5, "IR recordSyncPoint: {} {}\n", a
->frontier(), i
.fix
.show());
861 env
.meta
.fixups
.emplace_back(a
->frontier(), i
.fix
);
862 env
.record_inline_stack(a
->frontier());
865 void Vgen::emit(const unwind
& i
) {
866 catches
.push_back({a
->frontier(), i
.targets
[1]});
867 env
.record_inline_stack(a
->frontier());
868 emit(jmp
{i
.targets
[0]});
871 ///////////////////////////////////////////////////////////////////////////////
875 * SF should be set to MSB of the result
876 * CF, OF should be set to (1, 1) if the result is truncated, (0, 0) otherwise
877 * ZF, AF, PF are undefined
879 * In the following implementation,
880 * N, Z, V are updated according to result
881 * C is cleared (FIXME)
883 void Vgen::emit(const imul
& i
) {
885 // Do the multiplication
886 a
->Mul(X(i
.d
), X(i
.s0
), X(i
.s1
));
888 // If we have to set any flags, then always set N and Z since it's cheap.
889 // Only set V when absolutely necessary. C is not supported.
893 checkSF(i
, StatusFlags::NotC
);
895 if (flagRequired(i
.fl
, StatusFlags::V
)) {
896 vixl::Label checkSign
;
897 vixl::Label Overflow
;
899 // Do the multiplication for the upper 64 bits of a 128 bit result.
900 // If the result is not all zeroes or all ones, then we have overflow.
901 // If the result is all zeroes or all ones, and the sign is the same,
902 // for both hi and low, then there is no overflow.
903 a
->smulh(rAsm
, X(i
.s0
), X(i
.s1
));
905 // If hi is all 0's or 1's, then check the sign, else overflow
907 recordAddressImmediate();
908 a
->Cbz(rAsm
, &checkSign
);
910 recordAddressImmediate();
911 a
->B(&checkSign
, vixl::eq
);
913 // Overflow, so conditionally set N and Z bits and then or in V bit.
915 a
->Bic(vixl::xzr
, X(i
.d
), vixl::xzr
, SetFlags
);
917 a
->Orr(rAsm
, rAsm
, 1<<28);
919 recordAddressImmediate();
922 // Check the signs of hi and lo.
924 a
->Eor(rAsm
, rAsm
, X(i
.d
));
925 recordAddressImmediate();
926 a
->Tbnz(rAsm
, 63, &Overflow
);
929 // No Overflow, so conditionally set the N and Z only
930 a
->Bic(vixl::xzr
, X(i
.d
), vixl::xzr
, SetFlags
);
936 void Vgen::emit(const decqmlock
& i
) {
938 /* Use VIXL's macroassembler scratch regs. */
939 a
->SetScratchRegisters(vixl::NoReg
, vixl::NoReg
);
940 if (RuntimeOption::EvalJitArmLse
) {
941 a
->Mov(rVixlScratch0
, -1);
942 a
->ldaddal(rVixlScratch0
, rVixlScratch0
, adr
);
943 a
->Sub(rAsm
, rVixlScratch0
, 1, SetFlags
);
948 a
->Sub(rAsm
, rAsm
, 1, SetFlags
);
949 a
->stxr(rVixlScratch0
, rAsm
, adr
);
950 recordAddressImmediate();
951 a
->Cbnz(rVixlScratch0
, &again
);
953 /* Restore VIXL's scratch regs. */
954 a
->SetScratchRegisters(rVixlScratch0
, rVixlScratch1
);
957 void Vgen::emit(const jcc
& i
) {
958 if (i
.targets
[1] != i
.targets
[0]) {
959 if (next
== i
.targets
[1]) {
960 return emit(jcc
{ccNegate(i
.cc
), i
.sf
, {i
.targets
[1], i
.targets
[0]}});
962 auto taken
= i
.targets
[1];
963 jccs
.push_back({a
->frontier(), taken
});
964 vixl::Label skip
, data
;
966 // Emit a "far JCC" sequence for easy patching later. Static relocation
967 // might be able to simplify this later (see optimizeFarJcc()).
968 recordAddressImmediate();
969 a
->B(&skip
, vixl::InvertCondition(C(i
.cc
)));
970 recordAddressImmediate();
971 poolLiteral(*env
.cb
, env
.meta
, (uint64_t)makeTarget32(a
->frontier()),
973 a
->bind(&data
); // This will be remmaped during the handleLiterals phase.
974 a
->Ldr(rAsm_w
, &data
);
978 emit(jmp
{i
.targets
[0]});
981 void Vgen::emit(const jcci
& i
) {
984 recordAddressImmediate();
985 a
->B(&skip
, vixl::InvertCondition(C(i
.cc
)));
991 void Vgen::emit(const jmp
& i
) {
992 if (next
== i
.target
) return;
993 jmps
.push_back({a
->frontier(), i
.target
});
996 // Emit a "far JMP" sequence for easy patching later. Static relocation
997 // might be able to simplify this (see optimizeFarJmp()).
998 recordAddressImmediate();
999 poolLiteral(*env
.cb
, env
.meta
, (uint64_t)a
->frontier(), 32, false);
1000 a
->bind(&data
); // This will be remapped during the handleLiterals phase.
1001 a
->Ldr(rAsm_w
, &data
);
1005 void Vgen::emit(const jmpi
& i
) {
1008 // If target can be addressed by pc relative offset (signed 26 bits), emit
1009 // PC relative jump. Else, emit target address into code and load from there.
1010 auto diff
= (i
.target
- a
->frontier()) >> vixl::kInstructionSizeLog2
;
1011 if (vixl::is_int26(diff
)) {
1012 recordAddressImmediate();
1015 // Cannot use simple a->Mov() since such a sequence cannot be
1016 // adjusted while live following a relocation.
1017 recordAddressImmediate();
1018 poolLiteral(*env
.cb
, env
.meta
, (uint64_t)i
.target
, 32, false);
1019 a
->bind(&data
); // This will be remapped during the handleLiterals phase.
1020 a
->Ldr(rAsm_w
, &data
);
1025 void Vgen::emit(const lea
& i
) {
1027 assertx(p
.base
.isValid());
1028 if (p
.index
.isValid()) {
1029 assertx(p
.disp
== 0);
1030 a
->Add(X(i
.d
), X(p
.base
), Operand(X(p
.index
), LSL
, Log2(p
.scale
)));
1032 a
->Add(X(i
.d
), X(p
.base
), p
.disp
);
1036 void Vgen::emit(const leav
& i
) {
1037 auto const addr
= a
->frontier();
1038 emit(leap
{reg::rip
[0xdeadbeef], i
.d
});
1039 env
.leas
.push_back({addr
, i
.s
});
1042 void Vgen::emit(const leap
& i
) {
1043 vixl::Label imm_data
;
1044 vixl::Label after_data
;
1046 // Cannot use simple a->Mov() since such a sequence cannot be
1047 // adjusted while live following a relocation.
1048 recordAddressImmediate();
1049 poolLiteral(*env
.cb
, env
.meta
, (uint64_t)makeTarget32(i
.s
.r
.disp
),
1051 a
->bind(&imm_data
); // This will be remapped during the handleLiterals phase.
1052 a
->Ldr(W(i
.d
), &imm_data
);
1055 void Vgen::emit(const lead
& i
) {
1056 recordAddressImmediate();
1057 a
->Mov(X(i
.d
), i
.s
.get());
1060 #define Y(vasm_opc, arm_opc, src_dst, m) \
1061 void Vgen::emit(const vasm_opc& i) { \
1062 assertx(i.m.base.isValid()); \
1063 a->Mov(rAsm, X(i.m.base)); \
1064 if (i.m.index.isValid()) { \
1065 a->Add(rAsm, rAsm, Operand(X(i.m.index), LSL, Log2(i.m.scale))); \
1067 if (i.m.disp != 0) { \
1068 a->Add(rAsm, rAsm, i.m.disp); \
1070 a->arm_opc(V(i.src_dst), MemOperand(rAsm)); \
1073 Y(loadups
, ld1
, d
, s
)
1074 Y(storeups
, st1
, s
, m
)
1080 * SF, ZF, PF should be updated according to result
1081 * CF, OF should be cleared
1084 * In the following implementation,
1085 * N, Z are updated according to result
1088 #define Y(vasm_opc, arm_opc, gpr_w, s0, zr) \
1089 void Vgen::emit(const vasm_opc& i) { \
1090 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), s0); \
1092 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1096 Y(orbi
, Orr
, W
, i
.s0
.ub(), wzr
);
1097 Y(orwi
, Orr
, W
, i
.s0
.uw(), xzr
);
1098 Y(orli
, Orr
, W
, i
.s0
.l(), xzr
);
1099 Y(orqi
, Orr
, X
, i
.s0
.q(), xzr
);
1100 Y(orq
, Orr
, X
, X(i
.s0
), xzr
);
1101 Y(xorb
, Eor
, W
, W(i
.s0
), wzr
);
1102 Y(xorbi
, Eor
, W
, i
.s0
.ub(), wzr
);
1103 Y(xorw
, Eor
, W
, W(i
.s0
), wzr
);
1104 Y(xorwi
, Eor
, W
, i
.s0
.uw(), wzr
);
1105 Y(xorl
, Eor
, W
, W(i
.s0
), wzr
);
1106 Y(xorq
, Eor
, X
, X(i
.s0
), xzr
);
1107 Y(xorqi
, Eor
, X
, i
.s0
.q(), xzr
);
1111 void Vgen::emit(const pop
& i
) {
1112 // SP access must be 8 byte aligned. Use rAsm instead.
1114 a
->Ldr(X(i
.d
), MemOperand(rAsm
, 8, PostIndex
));
1118 void Vgen::emit(const push
& i
) {
1119 // SP access must be 8 byte aligned. Use rAsm instead.
1121 a
->Str(X(i
.s
), MemOperand(rAsm
, -8, PreIndex
));
1125 void Vgen::emit(const roundsd
& i
) {
1127 case RoundDirection::nearest
: {
1128 a
->frintn(D(i
.d
), D(i
.s
));
1132 case RoundDirection::floor
: {
1133 a
->frintm(D(i
.d
), D(i
.s
));
1137 case RoundDirection:: ceil
: {
1138 a
->frintp(D(i
.d
), D(i
.s
));
1143 assertx(i
.dir
== RoundDirection::truncate
);
1144 a
->frintz(D(i
.d
), D(i
.s
));
1149 void Vgen::emit(const srem
& i
) {
1150 a
->Sdiv(rAsm
, X(i
.s0
), X(i
.s1
));
1151 a
->Msub(X(i
.d
), rAsm
, X(i
.s1
), X(i
.s0
));
1154 void Vgen::emit(const trap
& i
) {
1155 env
.meta
.trapReasons
.emplace_back(a
->frontier(), i
.reason
);
1159 void Vgen::emit(const unpcklpd
& i
) {
1160 // i.d and i.s1 can be same, i.s0 is unique.
1161 if (i
.d
!= i
.s1
) a
->fmov(D(i
.d
), D(i
.s1
));
1162 a
->fmov(rAsm
, D(i
.s0
));
1163 a
->fmov(D(i
.d
), 1, rAsm
);
1166 ///////////////////////////////////////////////////////////////////////////////
1168 void Vgen::emit(const cmpsd
& i
) {
1170 * cmpsd doesn't update SD, so read the flags into a temp.
1171 * Use one of the macroassembler scratch regs .
1173 a
->SetScratchRegisters(vixl::NoReg
, vixl::NoReg
);
1174 a
->Mrs(rVixlScratch0
, NZCV
);
1176 a
->Fcmp(D(i
.s0
), D(i
.s1
));
1178 case ComparisonPred::eq_ord
:
1179 a
->Csetm(rAsm
, C(jit::CC_E
));
1181 case ComparisonPred::ne_unord
:
1182 a
->Csetm(rAsm
, C(jit::CC_NE
));
1185 always_assert(false);
1187 a
->Fmov(D(i
.d
), rAsm
);
1189 /* Copy the flags back to the system register. */
1190 a
->Msr(NZCV
, rVixlScratch0
);
1191 a
->SetScratchRegisters(rVixlScratch0
, rVixlScratch1
);
1195 ///////////////////////////////////////////////////////////////////////////////
1200 * C is set through inspection
1201 * N, Z are updated according to result
1202 * V is cleared (FIXME)
1203 * PF, AF are not available
1205 * Only set the flags if there are any required flags (i.fl).
1206 * Setting the C flag is particularly expensive, so when setting
1207 * flags check this flag specifically.
1209 #define Y(vasm_opc, arm_opc, gpr_w, zr) \
1210 void Vgen::emit(const vasm_opc& i) { \
1212 /* Just perform the shift. */ \
1213 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), gpr_w(i.s0)); \
1215 checkSF(i, StatusFlags::NotV); \
1216 if (!flagRequired(i.fl, StatusFlags::C)) { \
1217 /* Perform the shift and set N and Z. */ \
1218 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), gpr_w(i.s0)); \
1219 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1221 /* Use VIXL's macroassembler scratch regs. */ \
1222 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg); \
1223 /* Perform the shift using temp and set N and Z. */ \
1224 a->arm_opc(rVixlScratch0, gpr_w(i.s1), gpr_w(i.s0)); \
1225 a->Bic(vixl::zr, rVixlScratch0, vixl::zr, SetFlags); \
1226 /* Read the flags into a temp. */ \
1227 a->Mrs(rAsm, NZCV); \
1228 /* Reshift right leaving the last bit as bit 0. */ \
1229 a->Sub(rVixlScratch1, gpr_w(i.s0), 1); \
1230 a->Lsr(rVixlScratch1, gpr_w(i.s1), rVixlScratch1); \
1231 /* Negate the bits, including bit 0 to match X64. */ \
1232 a->Mvn(rVixlScratch1, rVixlScratch1); \
1233 /* Copy bit zero into bit 29 of the flags. */ \
1234 a->bfm(rAsm, rVixlScratch1, 35, 0); \
1235 /* Copy the flags back to the system register. */ \
1236 a->Msr(NZCV, rAsm); \
1237 /* Copy the result to the destination. */ \
1238 a->Mov(gpr_w(i.d), rVixlScratch0); \
1239 /* Restore VIXL's scratch regs. */ \
1240 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1); \
1249 #define Y(vasm_opc, arm_opc, gpr_w, sz, zr) \
1250 void Vgen::emit(const vasm_opc& i) { \
1252 /* Just perform the shift. */ \
1253 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), gpr_w(i.s0)); \
1255 checkSF(i, StatusFlags::NotV); \
1256 if (!flagRequired(i.fl, StatusFlags::C)) { \
1257 /* Perform the shift and set N and Z. */ \
1258 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), gpr_w(i.s0)); \
1259 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1261 /* Use VIXL's macroassembler scratch regs. */ \
1262 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg); \
1263 /* Perform the shift using temp and set N and Z. */ \
1264 a->arm_opc(rVixlScratch0, gpr_w(i.s1), gpr_w(i.s0)); \
1265 a->Bic(vixl::zr, rVixlScratch0, vixl::zr, SetFlags); \
1266 /* Read the flags into a temp. */ \
1267 a->Mrs(rAsm, NZCV); \
1268 /* Reshift right leaving the last bit as bit 0. */ \
1269 a->Mov(rVixlScratch1, sz); \
1270 a->Sub(rVixlScratch1, rVixlScratch1, gpr_w(i.s0)); \
1271 a->Lsr(rVixlScratch1, gpr_w(i.s1), rVixlScratch1); \
1272 /* Negate the bits, including bit 0 to match X64. */ \
1273 a->Mvn(rVixlScratch1, rVixlScratch1); \
1274 /* Copy bit zero into bit 29 of the flags. */ \
1275 a->bfm(rAsm, rVixlScratch1, 35, 0); \
1276 /* Copy the flags back to the system register. */ \
1277 a->Msr(NZCV, rAsm); \
1278 /* Copy the result to the destination. */ \
1279 a->Mov(gpr_w(i.d), rVixlScratch0); \
1280 /* Restore VIXL's scratch regs. */ \
1281 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1); \
1286 Y(shl
, Lsl
, X
, 64, xzr
)
1290 #define Y(vasm_opc, arm_opc, gpr_w, zr) \
1291 void Vgen::emit(const vasm_opc& i) { \
1293 /* Just perform the shift. */ \
1294 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), i.s0.l()); \
1296 checkSF(i, StatusFlags::NotV); \
1297 if (!flagRequired(i.fl, StatusFlags::C)) { \
1298 /* Perform the shift and set N and Z. */ \
1299 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), i.s0.l()); \
1300 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1302 /* Use VIXL's macroassembler scratch regs. */ \
1303 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg); \
1304 /* Perform the shift using temp and set N and Z. */ \
1305 a->arm_opc(rVixlScratch0, gpr_w(i.s1), i.s0.l()); \
1306 a->Bic(vixl::zr, rVixlScratch0, vixl::zr, SetFlags); \
1307 /* Read the flags into a temp. */ \
1308 a->Mrs(rAsm, NZCV); \
1309 /* Reshift right leaving the last bit as bit 0. */ \
1310 a->Lsr(rVixlScratch1, gpr_w(i.s1), i.s0.l() - 1); \
1311 /* Negate the bits, including bit 0 to match X64. */ \
1312 a->Mvn(rVixlScratch1, rVixlScratch1); \
1313 /* Copy bit zero into bit 29 of the flags. */ \
1314 a->bfm(rAsm, rVixlScratch1, 35, 0); \
1315 /* Copy the flags back to the system register. */ \
1316 a->Msr(NZCV, rAsm); \
1317 /* Copy the result to the destination. */ \
1318 a->Mov(gpr_w(i.d), rVixlScratch0); \
1319 /* Restore VIXL's scratch regs. */ \
1320 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1); \
1325 Y(sarqi
, Asr
, X
, xzr
)
1326 Y(shrli
, Lsr
, W
, wzr
)
1327 Y(shrqi
, Lsr
, X
, xzr
)
1331 #define Y(vasm_opc, arm_opc, gpr_w, sz, zr) \
1332 void Vgen::emit(const vasm_opc& i) { \
1334 /* Just perform the shift. */ \
1335 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), i.s0.l()); \
1337 checkSF(i, StatusFlags::NotV); \
1338 if (!flagRequired(i.fl, StatusFlags::C)) { \
1339 /* Perform the shift and set N and Z. */ \
1340 a->arm_opc(gpr_w(i.d), gpr_w(i.s1), i.s0.l()); \
1341 a->Bic(vixl::zr, gpr_w(i.d), vixl::zr, SetFlags); \
1343 /* Use VIXL's macroassembler scratch regs. */ \
1344 a->SetScratchRegisters(vixl::NoReg, vixl::NoReg); \
1345 /* Perform the shift using temp and set N and Z. */ \
1346 a->arm_opc(rVixlScratch0, gpr_w(i.s1), i.s0.l()); \
1347 a->Bic(vixl::zr, rVixlScratch0, vixl::zr, SetFlags); \
1348 /* Read the flags into a temp. */ \
1349 a->Mrs(rAsm, NZCV); \
1350 /* Reshift right leaving the last bit as bit 0. */ \
1351 a->Lsr(rVixlScratch1, gpr_w(i.s1), sz - i.s0.l()); \
1352 /* Negate the bits, including bit 0 to match X64. */ \
1353 a->Mvn(rVixlScratch1, rVixlScratch1); \
1354 /* Copy bit zero into bit 29 of the flags. */ \
1355 a->bfm(rAsm, rVixlScratch1, 35, 0); \
1356 /* Copy the flags back to the system register. */ \
1357 a->Msr(NZCV, rAsm); \
1358 /* Copy the result to the destination. */ \
1359 a->Mov(gpr_w(i.d), rVixlScratch0); \
1360 /* Restore VIXL's scratch regs. */ \
1361 a->SetScratchRegisters(rVixlScratch0, rVixlScratch1); \
1366 Y(shlli
, Lsl
, W
, 32, wzr
)
1367 Y(shlqi
, Lsl
, X
, 64, xzr
)
1371 ///////////////////////////////////////////////////////////////////////////////
1373 void Vgen::emit(const popp
& i
) {
1374 a
->Ldp(X(i
.d0
), X(i
.d1
), MemOperand(sp
, 16, PostIndex
));
1377 void Vgen::emit(const pushp
& i
) {
1378 a
->Stp(X(i
.s1
), X(i
.s0
), MemOperand(sp
, -16, PreIndex
));
1381 ///////////////////////////////////////////////////////////////////////////////
1383 template<typename Lower
>
1384 void lower_impl(Vunit
& unit
, Vlabel b
, size_t i
, Lower lower
) {
1385 vmodify(unit
, b
, i
, [&] (Vout
& v
) { lower(v
); return 1; });
1388 template <typename Inst
>
1389 void lower(const VLS
& /*env*/, Inst
& /*inst*/, Vlabel
/*b*/, size_t /*i*/) {}
1391 ///////////////////////////////////////////////////////////////////////////////
1394 * TODO: Using load size (ldr[bh]?), apply scaled address if 'disp' is unsigned
1396 void lowerVptr(Vptr
& p
, Vout
& v
) {
1403 uint8_t mode
= (((p
.base
.isValid() & 0x1) << 0) |
1404 ((p
.index
.isValid() & 0x1) << 1) |
1405 (((p
.disp
!= 0) & 0x1) << 2));
1409 // ldr/str allow [base] and [base, index], nothing to lower.
1413 // Not supported, convert to [base].
1415 auto t
= v
.makeReg();
1416 v
<< shlqi
{Log2(p
.scale
), p
.index
, t
, v
.makeReg()};
1426 // ldr/str allow [base, #imm], where #imm is [-256 .. 255].
1427 if (p
.disp
>= -256 && p
.disp
<= 255)
1430 // #imm is out of range, convert to [base, index]
1431 auto index
= v
.makeReg();
1432 v
<< ldimmq
{Immed64(p
.disp
), index
};
1440 // Not supported, convert to [base].
1441 auto base
= v
.makeReg();
1442 v
<< ldimmq
{Immed64(p
.disp
), base
};
1451 // Not supported, convert to [base, #imm] or [base, index].
1453 auto t
= v
.makeReg();
1454 v
<< shlqi
{Log2(p
.scale
), p
.index
, t
, v
.makeReg()};
1459 if (p
.disp
>= -256 && p
.disp
<= 255) {
1463 auto index
= v
.makeReg();
1464 v
<< ldimmq
{Immed64(p
.disp
), index
};
1471 case BASE
| INDEX
| DISP
: {
1472 // Not supported, convert to [base, index].
1473 auto index
= v
.makeReg();
1475 auto t
= v
.makeReg();
1476 v
<< shlqi
{Log2(p
.scale
), p
.index
, t
, v
.makeReg()};
1477 v
<< addqi
{p
.disp
, t
, index
, v
.makeReg()};
1479 v
<< addqi
{p
.disp
, p
.index
, index
, v
.makeReg()};
1489 #define Y(vasm_opc, m) \
1490 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1491 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1492 lowerVptr(i.m, v); \
1519 #define Y(vasm_opc, lower_opc, load_opc, store_opc, arg, m) \
1520 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1521 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1522 lowerVptr(i.m, v); \
1523 auto r0 = v.makeReg(), r1 = v.makeReg(); \
1524 v << load_opc{i.m, r0}; \
1525 v << lower_opc{arg, r0, r1, i.sf, i.fl}; \
1526 v << store_opc{r1, i.m}; \
1530 Y(addlim
, addli
, loadl
, storel
, i
.s0
, m
)
1531 Y(addlm
, addl
, loadl
, storel
, i
.s0
, m
)
1532 Y(addwm
, addl
, loadw
, storew
, Reg32(i
.s0
), m
)
1533 Y(addqim
, addqi
, load
, store
, i
.s0
, m
)
1534 Y(andbim
, andbi
, loadb
, storeb
, i
.s
, m
)
1535 Y(orbim
, orqi
, loadb
, storeb
, i
.s0
, m
)
1536 Y(orqim
, orqi
, load
, store
, i
.s0
, m
)
1537 Y(orwim
, orqi
, loadw
, storew
, i
.s0
, m
)
1538 Y(orlim
, orqi
, loadl
, storel
, i
.s0
, m
)
1542 #define Y(vasm_opc, lower_opc, movs_opc) \
1543 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1544 if (!i.fl || (i.fl & static_cast<Vflags>(StatusFlags::NV))) { \
1545 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1546 auto r0 = v.makeReg(), r1 = v.makeReg(); \
1547 v << movs_opc{i.s0, r0}; \
1548 v << movs_opc{i.s1, r1}; \
1549 v << lower_opc{r0, r1, i.sf, i.fl}; \
1554 Y(cmpb
, cmpl
, movsbl
)
1555 Y(cmpw
, cmpl
, movswl
)
1559 #define Y(vasm_opc, lower_opc, movs_opc) \
1560 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1561 if (!i.fl || (i.fl & static_cast<Vflags>(StatusFlags::NV))) { \
1562 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1563 auto r = v.makeReg(); \
1564 v << movs_opc{i.s1, r}; \
1565 v << lower_opc{i.s0, r, i.sf, i.fl}; \
1570 Y(cmpbi
, cmpli
, movsbl
)
1571 Y(cmpwi
, cmpli
, movswl
)
1575 #define Y(vasm_opc, lower_opc, load_opc) \
1576 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1577 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1578 lowerVptr(i.s1, v); \
1579 auto r = e.allow_vreg() ? v.makeReg() : Vreg(PhysReg(rAsm)); \
1580 v << load_opc{i.s1, r}; \
1581 v << lower_opc{i.s0, r, i.sf, i.fl}; \
1585 Y(cmpbim
, cmpbi
, loadb
)
1586 Y(cmplim
, cmpli
, loadl
)
1587 Y(cmpbm
, cmpb
, loadb
)
1588 Y(cmpwm
, cmpw
, loadb
)
1589 Y(cmplm
, cmpl
, loadl
)
1590 Y(cmpqim
, cmpqi
, load
)
1591 Y(cmpqm
, cmpq
, load
)
1592 Y(cmpwim
, cmpwi
, loadw
)
1593 Y(testbim
, testli
, loadb
)
1594 Y(testlim
, testli
, loadl
)
1595 Y(testqim
, testqi
, load
)
1596 Y(testbm
, testb
, loadb
)
1597 Y(testwm
, testw
, loadw
)
1598 Y(testlm
, testl
, loadl
)
1599 Y(testqm
, testq
, load
)
1600 Y(testwim
, testli
, loadw
)
1604 void lower(const VLS
& e
, cvtsi2sdm
& i
, Vlabel b
, size_t z
) {
1605 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1607 auto r
= v
.makeReg();
1609 v
<< cvtsi2sd
{r
, i
.d
};
1613 #define Y(vasm_opc, lower_opc, load_opc, store_opc, m) \
1614 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1615 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1616 lowerVptr(i.m, v); \
1617 auto r0 = e.allow_vreg() ? v.makeReg() : Vreg(PhysReg(rAsm)); \
1618 auto r1 = e.allow_vreg() ? v.makeReg() : Vreg(PhysReg(rAsm)); \
1619 v << load_opc{i.m, r0}; \
1620 v << lower_opc{r0, r1, i.sf, i.fl}; \
1621 v << store_opc{r1, i.m}; \
1625 Y(declm
, decl
, loadl
, storel
, m
)
1626 Y(decqm
, decq
, load
, store
, m
)
1627 Y(inclm
, incl
, loadl
, storel
, m
)
1628 Y(incqm
, incq
, load
, store
, m
)
1629 Y(incwm
, incw
, loadw
, storew
, m
)
1633 void lower(const VLS
& e
, cvttsd2siq
& i
, Vlabel b
, size_t idx
) {
1634 lower_impl(e
.unit
, b
, idx
, [&] (Vout
& v
) {
1635 // Clear FPSR IOC flag.
1636 auto const tmp1
= v
.makeReg();
1637 auto const tmp2
= v
.makeReg();
1638 v
<< mrs
{FPSR
, tmp1
};
1639 v
<< andqi
{~0x01, tmp1
, tmp2
, v
.makeReg()};
1640 v
<< msr
{tmp2
, FPSR
};
1642 // Load error value.
1643 auto const err
= v
.makeReg();
1644 v
<< ldimmq
{0x8000000000000000, err
};
1646 // Do ARM64's double to signed int64 conversion.
1647 auto const res
= v
.makeReg();
1648 v
<< fcvtzs
{i
.s
, res
};
1650 // Check if there was a conversion error.
1651 auto const fpsr
= v
.makeReg();
1652 auto const sf
= v
.makeReg();
1653 v
<< mrs
{FPSR
, fpsr
};
1654 v
<< testqi
{1, fpsr
, sf
};
1656 // Move converted value or error.
1657 v
<< cmovq
{CC_NZ
, sf
, res
, err
, i
.d
};
1661 void lower(const VLS
& e
, callm
& i
, Vlabel b
, size_t z
) {
1662 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1663 lowerVptr(i
.target
, v
);
1665 auto const scratch
= v
.makeReg();
1667 // Load the target from memory and then call it.
1668 v
<< load
{i
.target
, scratch
};
1669 v
<< callr
{scratch
, i
.args
};
1673 void lower(const VLS
& e
, jmpm
& i
, Vlabel b
, size_t z
) {
1674 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1675 lowerVptr(i
.target
, v
);
1677 auto const scratch
= v
.makeReg();
1679 v
<< load
{i
.target
, scratch
};
1680 v
<< jmpr
{scratch
, i
.args
};
1684 ///////////////////////////////////////////////////////////////////////////////
1686 void lower(const VLS
& e
, stublogue
& /*i*/, Vlabel b
, size_t z
) {
1687 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1688 // Push both the LR and FP regardless of i.saveframe to align SP.
1689 v
<< pushp
{rlr(), rvmfp()};
1693 void lower(const VLS
& e
, unstublogue
& /*i*/, Vlabel b
, size_t z
) {
1694 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1695 // Pop LR and remove FP from the stack.
1696 v
<< popp
{PhysReg(rAsm
), rlr()};
1700 void lower(const VLS
& e
, stubret
& i
, Vlabel b
, size_t z
) {
1701 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1702 // Pop LR and (optionally) FP.
1704 v
<< popp
{rvmfp(), rlr()};
1706 v
<< popp
{PhysReg(rAsm
), rlr()};
1713 void lower(const VLS
& e
, tailcallstub
& i
, Vlabel b
, size_t z
) {
1714 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1715 // Restore LR from native stack and adjust SP.
1716 v
<< popp
{PhysReg(rAsm
), rlr()};
1718 // Then directly jump to the target.
1719 v
<< jmpi
{i
.target
, i
.args
};
1723 void lower(const VLS
& e
, tailcallstubr
& i
, Vlabel b
, size_t z
) {
1724 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1725 // Restore LR from native stack and adjust SP.
1726 v
<< popp
{PhysReg(rAsm
), rlr()};
1728 v
<< jmpr
{i
.target
, i
.args
};
1732 void lower(const VLS
& e
, stubunwind
& i
, Vlabel b
, size_t z
) {
1733 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1734 // Pop the call frame.
1735 v
<< popp
{PhysReg(rAsm
), i
.d
};
1739 void lower(const VLS
& e
, stubtophp
& /*i*/, Vlabel b
, size_t z
) {
1740 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1741 // Pop the call frame
1742 v
<< lea
{rsp()[16], rsp()};
1746 void lower(const VLS
& e
, loadstubret
& i
, Vlabel b
, size_t z
) {
1747 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1748 // Load the LR to the destination.
1749 v
<< load
{rsp()[AROFF(m_savedRip
)], i
.d
};
1753 ///////////////////////////////////////////////////////////////////////////////
1755 void lower(const VLS
& e
, phplogue
& i
, Vlabel b
, size_t z
) {
1756 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1757 v
<< store
{rlr(), i
.fp
[AROFF(m_savedRip
)]};
1761 ///////////////////////////////////////////////////////////////////////////////
1763 void lower(const VLS
& e
, resumetc
& i
, Vlabel b
, size_t z
) {
1764 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1765 // Call the translation target.
1766 v
<< callr
{i
.target
, i
.args
};
1768 // After returning to the translation, jump directly to the exit.
1769 v
<< jmpi
{i
.exittc
};
1773 ///////////////////////////////////////////////////////////////////////////////
1775 void lower(const VLS
& e
, popm
& i
, Vlabel b
, size_t z
) {
1776 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1777 auto r
= v
.makeReg();
1784 void lower(const VLS
& e
, poppm
& i
, Vlabel b
, size_t z
) {
1785 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1786 auto r0
= v
.makeReg();
1787 auto r1
= v
.makeReg();
1791 v
<< store
{r0
, i
.d0
};
1792 v
<< store
{r1
, i
.d1
};
1796 void lower(const VLS
& e
, pushm
& i
, Vlabel b
, size_t z
) {
1797 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1798 auto r
= v
.makeReg();
1805 void lower(const VLS
& e
, pushpm
& i
, Vlabel b
, size_t z
) {
1806 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1807 auto r0
= v
.makeReg();
1808 auto r1
= v
.makeReg();
1811 v
<< load
{i
.s0
, r0
};
1812 v
<< load
{i
.s1
, r1
};
1817 template<typename movz
>
1818 void lower_movz(const VLS
& e
, movz
& i
, Vlabel b
, size_t z
) {
1819 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1820 v
<< copy
{i
.s
, i
.d
};
1824 void lower(const VLS
& e
, movzbw
& i
, Vlabel b
, size_t z
) {
1825 lower_movz(e
, i
, b
, z
);
1828 void lower(const VLS
& e
, movzbl
& i
, Vlabel b
, size_t z
) {
1829 lower_movz(e
, i
, b
, z
);
1832 void lower(const VLS
& e
, movzwl
& i
, Vlabel b
, size_t z
) {
1833 lower_movz(e
, i
, b
, z
);
1836 void lower(const VLS
& e
, movtdb
& i
, Vlabel b
, size_t z
) {
1837 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1838 auto d
= v
.makeReg();
1840 v
<< movtqb
{d
, i
.d
};
1844 void lower(const VLS
& e
, movtdq
& i
, Vlabel b
, size_t z
) {
1845 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1846 v
<< copy
{i
.s
, i
.d
};
1850 #define Y(vasm_opc, lower_opc, load_opc, imm, zr, sz) \
1851 void lower(const VLS& e, vasm_opc& i, Vlabel b, size_t z) { \
1852 lower_impl(e.unit, b, z, [&] (Vout& v) { \
1853 lowerVptr(i.m, v); \
1854 if (imm.sz() == 0u) { \
1855 v << lower_opc{PhysReg(vixl::zr), i.m}; \
1857 auto r = v.makeReg(); \
1858 v << load_opc{imm, r}; \
1859 v << lower_opc{r, i.m}; \
1864 Y(storebi
, storeb
, ldimmb
, i
.s
, wzr
, b
)
1865 Y(storewi
, storew
, ldimmw
, i
.s
, wzr
, w
)
1866 Y(storeli
, storel
, ldimml
, i
.s
, wzr
, l
)
1867 //storeqi only supports 32-bit immediates
1868 Y(storeqi
, store
, ldimmq
, Immed64(i
.s
.l()), wzr
, q
)
1872 void lower(const VLS
& e
, cloadq
& i
, Vlabel b
, size_t z
) {
1873 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1874 auto const scratch
= v
.makeReg();
1878 v
<< load
{i
.t
, scratch
};
1879 v
<< cmovq
{i
.cc
, i
.sf
, i
.f
, scratch
, i
.d
};
1883 void lower(const VLS
& e
, loadqp
& i
, Vlabel b
, size_t z
) {
1884 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1885 auto const scratch
= v
.makeReg();
1887 v
<< leap
{i
.s
, scratch
};
1888 v
<< load
{scratch
[0], i
.d
};
1892 void lower(const VLS
& e
, loadqd
& i
, Vlabel b
, size_t z
) {
1893 lower_impl(e
.unit
, b
, z
, [&] (Vout
& v
) {
1894 auto const scratch
= v
.makeReg();
1896 v
<< lead
{i
.s
.getRaw(), scratch
};
1897 v
<< load
{scratch
[0], i
.d
};
1901 ///////////////////////////////////////////////////////////////////////////////
1903 void lowerForARM(Vunit
& unit
) {
1904 vasm_lower(unit
, [&] (const VLS
& env
, Vinstr
& inst
, Vlabel b
, size_t i
) {
1906 #define O(name, ...) \
1907 case Vinstr::name: \
1908 lower(env, inst.name##_, b, i); \
1917 ///////////////////////////////////////////////////////////////////////////////
1920 void optimizeARM(Vunit
& unit
, const Abi
& abi
, bool regalloc
) {
1921 Timer
timer(Timer::vasm_optimize
);
1923 removeTrivialNops(unit
);
1927 optimizeExits(unit
);
1929 assertx(checkWidths(unit
));
1933 annotateSFUses(unit
);
1938 if (!unit
.constToReg
.empty()) {
1939 foldImms
<arm::ImmFolder
>(unit
);
1943 optimizeCopies(unit
, abi
);
1945 annotateSFUses(unit
);
1946 if (unit
.needsRegAlloc()) {
1947 removeDeadCode(unit
);
1949 if (RuntimeOption::EvalUseGraphColor
&&
1951 (unit
.context
->kind
== TransKind::Optimize
||
1952 unit
.context
->kind
== TransKind::OptPrologue
)) {
1953 allocateRegistersWithGraphColor(unit
, abi
);
1955 allocateRegistersWithXLS(unit
, abi
);
1959 if (unit
.blocks
.size() > 1) {
1964 void emitARM(Vunit
& unit
, Vtext
& text
, CGMeta
& fixups
,
1966 vasm_emit
<Vgen
>(unit
, text
, fixups
, asmInfo
);
1969 ///////////////////////////////////////////////////////////////////////////////