2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_isel.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2013-2017 OpenWorks
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "libvex_basictypes.h"
30 #include "libvex_ir.h"
34 #include "main_util.h"
35 #include "main_globals.h"
36 #include "host_generic_regs.h"
37 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
38 #include "host_arm64_defs.h"
41 /*---------------------------------------------------------*/
43 /*---------------------------------------------------------*/
45 /* This carries around:
47 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
48 might encounter. This is computed before insn selection starts,
51 - A mapping from IRTemp to HReg. This tells the insn selector
52 which virtual register is associated with each IRTemp temporary.
53 This is computed before insn selection starts, and does not
54 change. We expect this mapping to map precisely the same set of
55 IRTemps as the type mapping does.
57 |vregmap| holds the primary register for the IRTemp.
58 |vregmapHI| is only used for 128-bit integer-typed
59 IRTemps. It holds the identity of a second
60 64-bit virtual HReg, which holds the high half
63 - The code array, that is, the insns selected so far.
65 - A counter, for generating new virtual registers.
67 - The host hardware capabilities word. This is set at the start
70 - A Bool for indicating whether we may generate chain-me
71 instructions for control flow transfers, or whether we must use
74 - The maximum guest address of any guest insn in this block.
75 Actually, the address of the highest-addressed byte from any insn
76 in this block. Is set at the start and does not change. This is
77 used for detecting jumps which are definitely forward-edges from
78 this block, and therefore can be made (chained) to the fast entry
79 point of the destination, thereby avoiding the destination's
82 - An IRExpr*, which may be NULL, holding the IR expression (an
83 IRRoundingMode-encoded value) to which the FPU's rounding mode
84 was most recently set. Setting to NULL is always safe. Used to
85 avoid redundant settings of the FPU's rounding mode, as
86 described in set_FPCR_rounding_mode below.
88 Note, this is all (well, mostly) host-independent.
93 /* Constant -- are set at the start and do not change. */
102 Bool chainingAllowed
;
105 /* These are modified as we go along. */
113 static HReg
lookupIRTemp ( ISelEnv
* env
, IRTemp tmp
)
115 vassert(tmp
< env
->n_vregmap
);
116 return env
->vregmap
[tmp
];
119 static void lookupIRTempPair ( HReg
* vrHI
, HReg
* vrLO
,
120 ISelEnv
* env
, IRTemp tmp
)
122 vassert(tmp
< env
->n_vregmap
);
123 vassert(! hregIsInvalid(env
->vregmapHI
[tmp
]));
124 *vrLO
= env
->vregmap
[tmp
];
125 *vrHI
= env
->vregmapHI
[tmp
];
128 static void addInstr ( ISelEnv
* env
, ARM64Instr
* instr
)
130 addHInstr(env
->code
, instr
);
131 if (vex_traceflags
& VEX_TRACE_VCODE
) {
137 static HReg
newVRegI ( ISelEnv
* env
)
139 HReg reg
= mkHReg(True
/*virtual reg*/, HRcInt64
, 0, env
->vreg_ctr
);
144 static HReg
newVRegD ( ISelEnv
* env
)
146 HReg reg
= mkHReg(True
/*virtual reg*/, HRcFlt64
, 0, env
->vreg_ctr
);
151 static HReg
newVRegV ( ISelEnv
* env
)
153 HReg reg
= mkHReg(True
/*virtual reg*/, HRcVec128
, 0, env
->vreg_ctr
);
159 /*---------------------------------------------------------*/
160 /*--- ISEL: Forward declarations ---*/
161 /*---------------------------------------------------------*/
163 /* These are organised as iselXXX and iselXXX_wrk pairs. The
164 iselXXX_wrk do the real work, but are not to be called directly.
165 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
166 checks that all returned registers are virtual. You should not
167 call the _wrk version directly.
169 Because some forms of ARM64 memory amodes are implicitly scaled by
170 the access size, iselIntExpr_AMode takes an IRType which tells it
171 the type of the access for which the amode is to be used. This
172 type needs to be correct, else you'll get incorrect code.
174 static ARM64AMode
* iselIntExpr_AMode_wrk ( ISelEnv
* env
,
175 IRExpr
* e
, IRType dty
);
176 static ARM64AMode
* iselIntExpr_AMode ( ISelEnv
* env
,
177 IRExpr
* e
, IRType dty
);
179 static ARM64RIA
* iselIntExpr_RIA_wrk ( ISelEnv
* env
, IRExpr
* e
);
180 static ARM64RIA
* iselIntExpr_RIA ( ISelEnv
* env
, IRExpr
* e
);
182 static ARM64RIL
* iselIntExpr_RIL_wrk ( ISelEnv
* env
, IRExpr
* e
);
183 static ARM64RIL
* iselIntExpr_RIL ( ISelEnv
* env
, IRExpr
* e
);
185 static ARM64RI6
* iselIntExpr_RI6_wrk ( ISelEnv
* env
, IRExpr
* e
);
186 static ARM64RI6
* iselIntExpr_RI6 ( ISelEnv
* env
, IRExpr
* e
);
188 static ARM64CondCode
iselCondCode_C_wrk ( ISelEnv
* env
, IRExpr
* e
);
189 static ARM64CondCode
iselCondCode_C ( ISelEnv
* env
, IRExpr
* e
);
191 static HReg
iselCondCode_R_wrk ( ISelEnv
* env
, IRExpr
* e
);
192 static HReg
iselCondCode_R ( ISelEnv
* env
, IRExpr
* e
);
194 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, IRExpr
* e
);
195 static HReg
iselIntExpr_R ( ISelEnv
* env
, IRExpr
* e
);
197 static void iselInt128Expr_wrk ( /*OUT*/HReg
* rHi
, /*OUT*/HReg
* rLo
,
198 ISelEnv
* env
, IRExpr
* e
);
199 static void iselInt128Expr ( /*OUT*/HReg
* rHi
, /*OUT*/HReg
* rLo
,
200 ISelEnv
* env
, IRExpr
* e
);
202 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, IRExpr
* e
);
203 static HReg
iselDblExpr ( ISelEnv
* env
, IRExpr
* e
);
205 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, IRExpr
* e
);
206 static HReg
iselFltExpr ( ISelEnv
* env
, IRExpr
* e
);
208 static HReg
iselF16Expr_wrk ( ISelEnv
* env
, IRExpr
* e
);
209 static HReg
iselF16Expr ( ISelEnv
* env
, IRExpr
* e
);
211 static HReg
iselV128Expr_wrk ( ISelEnv
* env
, IRExpr
* e
);
212 static HReg
iselV128Expr ( ISelEnv
* env
, IRExpr
* e
);
214 static void iselV256Expr_wrk ( /*OUT*/HReg
* rHi
, HReg
* rLo
,
215 ISelEnv
* env
, IRExpr
* e
);
216 static void iselV256Expr ( /*OUT*/HReg
* rHi
, HReg
* rLo
,
217 ISelEnv
* env
, IRExpr
* e
);
219 static ARM64RIL
* mb_mkARM64RIL_I ( ULong imm64
);
222 /*---------------------------------------------------------*/
223 /*--- ISEL: Misc helpers ---*/
224 /*---------------------------------------------------------*/
226 /* Generate an amode suitable for a 64-bit sized access relative to
227 the baseblock register (X21). This generates an RI12 amode, which
228 means its scaled by the access size, which is why the access size
229 -- 64 bit -- is stated explicitly here. Consequently |off| needs
230 to be divisible by 8. */
231 static ARM64AMode
* mk_baseblock_64bit_access_amode ( UInt off
)
233 vassert(off
< (8 << 12)); /* otherwise it's unrepresentable */
234 vassert((off
& 7) == 0); /* ditto */
235 return ARM64AMode_RI12(hregARM64_X21(), off
>> 3, 8/*scale*/);
238 /* Ditto, for 32 bit accesses. */
239 static ARM64AMode
* mk_baseblock_32bit_access_amode ( UInt off
)
241 vassert(off
< (4 << 12)); /* otherwise it's unrepresentable */
242 vassert((off
& 3) == 0); /* ditto */
243 return ARM64AMode_RI12(hregARM64_X21(), off
>> 2, 4/*scale*/);
246 /* Ditto, for 16 bit accesses. */
247 static ARM64AMode
* mk_baseblock_16bit_access_amode ( UInt off
)
249 vassert(off
< (2 << 12)); /* otherwise it's unrepresentable */
250 vassert((off
& 1) == 0); /* ditto */
251 return ARM64AMode_RI12(hregARM64_X21(), off
>> 1, 2/*scale*/);
254 /* Ditto, for 8 bit accesses. */
255 static ARM64AMode
* mk_baseblock_8bit_access_amode ( UInt off
)
257 vassert(off
< (1 << 12)); /* otherwise it's unrepresentable */
258 return ARM64AMode_RI12(hregARM64_X21(), off
>> 0, 1/*scale*/);
261 static HReg
mk_baseblock_128bit_access_addr ( ISelEnv
* env
, UInt off
)
263 vassert(off
< (1<<12));
264 HReg r
= newVRegI(env
);
265 addInstr(env
, ARM64Instr_Arith(r
, hregARM64_X21(),
266 ARM64RIA_I12(off
,0), True
/*isAdd*/));
270 static HReg
get_baseblock_register ( void )
272 return hregARM64_X21();
275 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
276 a new register, and return the new register. */
277 static HReg
widen_z_32_to_64 ( ISelEnv
* env
, HReg src
)
279 HReg dst
= newVRegI(env
);
280 ARM64RIL
* mask
= ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
281 addInstr(env
, ARM64Instr_Logic(dst
, src
, mask
, ARM64lo_AND
));
285 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
286 a new register, and return the new register. */
287 static HReg
widen_s_16_to_64 ( ISelEnv
* env
, HReg src
)
289 HReg dst
= newVRegI(env
);
290 ARM64RI6
* n48
= ARM64RI6_I6(48);
291 addInstr(env
, ARM64Instr_Shift(dst
, src
, n48
, ARM64sh_SHL
));
292 addInstr(env
, ARM64Instr_Shift(dst
, dst
, n48
, ARM64sh_SAR
));
296 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
297 a new register, and return the new register. */
298 static HReg
widen_z_16_to_64 ( ISelEnv
* env
, HReg src
)
300 HReg dst
= newVRegI(env
);
301 ARM64RIL
* mask
= ARM64RIL_I13(1, 0, 15); /* encodes 0xFFFF */
302 addInstr(env
, ARM64Instr_Logic(dst
, src
, mask
, ARM64lo_AND
));
306 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
307 a new register, and return the new register. */
308 static HReg
widen_s_32_to_64 ( ISelEnv
* env
, HReg src
)
310 HReg dst
= newVRegI(env
);
311 ARM64RI6
* n32
= ARM64RI6_I6(32);
312 addInstr(env
, ARM64Instr_Shift(dst
, src
, n32
, ARM64sh_SHL
));
313 addInstr(env
, ARM64Instr_Shift(dst
, dst
, n32
, ARM64sh_SAR
));
317 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
318 a new register, and return the new register. */
319 static HReg
widen_s_8_to_64 ( ISelEnv
* env
, HReg src
)
321 HReg dst
= newVRegI(env
);
322 ARM64RI6
* n56
= ARM64RI6_I6(56);
323 addInstr(env
, ARM64Instr_Shift(dst
, src
, n56
, ARM64sh_SHL
));
324 addInstr(env
, ARM64Instr_Shift(dst
, dst
, n56
, ARM64sh_SAR
));
328 static HReg
widen_z_8_to_64 ( ISelEnv
* env
, HReg src
)
330 HReg dst
= newVRegI(env
);
331 ARM64RIL
* mask
= ARM64RIL_I13(1, 0, 7); /* encodes 0xFF */
332 addInstr(env
, ARM64Instr_Logic(dst
, src
, mask
, ARM64lo_AND
));
336 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
337 static Bool
isZeroU64 ( IRExpr
* e
) {
338 if (e
->tag
!= Iex_Const
) return False
;
339 IRConst
* con
= e
->Iex
.Const
.con
;
340 vassert(con
->tag
== Ico_U64
);
341 return con
->Ico
.U64
== 0;
345 /*---------------------------------------------------------*/
346 /*--- ISEL: FP rounding mode helpers ---*/
347 /*---------------------------------------------------------*/
349 /* Set the FP rounding mode: 'mode' is an I32-typed expression
350 denoting a value in the range 0 .. 3, indicating a round mode
351 encoded as per type IRRoundingMode -- the first four values only
352 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64
353 FSCR to have the same rounding.
355 For speed & simplicity, we're setting the *entire* FPCR here.
357 Setting the rounding mode is expensive. So this function tries to
358 avoid repeatedly setting the rounding mode to the same thing by
359 first comparing 'mode' to the 'mode' tree supplied in the previous
360 call to this function, if any. (The previous value is stored in
361 env->previous_rm.) If 'mode' is a single IR temporary 't' and
362 env->previous_rm is also just 't', then the setting is skipped.
364 This is safe because of the SSA property of IR: an IR temporary can
365 only be defined once and so will have the same value regardless of
366 where it appears in the block. Cool stuff, SSA.
368 A safety condition: all attempts to set the RM must be aware of
369 this mechanism - by being routed through the functions here.
371 Of course this only helps if blocks where the RM is set more than
372 once and it is set to the same value each time, *and* that value is
373 held in the same IR temporary each time. In order to assure the
374 latter as much as possible, the IR optimiser takes care to do CSE
375 on any block with any sign of floating point activity.
378 void set_FPCR_rounding_mode ( ISelEnv
* env
, IRExpr
* mode
)
380 vassert(typeOfIRExpr(env
->type_env
,mode
) == Ity_I32
);
382 /* Do we need to do anything? */
384 && env
->previous_rm
->tag
== Iex_RdTmp
385 && mode
->tag
== Iex_RdTmp
386 && env
->previous_rm
->Iex
.RdTmp
.tmp
== mode
->Iex
.RdTmp
.tmp
) {
387 /* no - setting it to what it was before. */
388 vassert(typeOfIRExpr(env
->type_env
, env
->previous_rm
) == Ity_I32
);
392 /* No luck - we better set it, and remember what we set it to. */
393 env
->previous_rm
= mode
;
395 /* Only supporting the rounding-mode bits - the rest of FPCR is set
396 to zero - so we can set the whole register at once (faster). */
398 /* This isn't simple, because 'mode' carries an IR rounding
399 encoding, and we need to translate that to an ARM64 FP one:
401 00 to nearest (the default)
405 The ARM64 FP encoding:
410 Easy enough to do; just swap the two bits.
412 HReg irrm
= iselIntExpr_R(env
, mode
);
413 HReg tL
= newVRegI(env
);
414 HReg tR
= newVRegI(env
);
415 HReg t3
= newVRegI(env
);
417 tR = irrm >> 1; if we're lucky, these will issue together
424 ARM64RIL
* ril_one
= mb_mkARM64RIL_I(1);
425 ARM64RIL
* ril_two
= mb_mkARM64RIL_I(2);
426 vassert(ril_one
&& ril_two
);
427 addInstr(env
, ARM64Instr_Shift(tL
, irrm
, ARM64RI6_I6(1), ARM64sh_SHL
));
428 addInstr(env
, ARM64Instr_Shift(tR
, irrm
, ARM64RI6_I6(1), ARM64sh_SHR
));
429 addInstr(env
, ARM64Instr_Logic(tL
, tL
, ril_two
, ARM64lo_AND
));
430 addInstr(env
, ARM64Instr_Logic(tR
, tR
, ril_one
, ARM64lo_AND
));
431 addInstr(env
, ARM64Instr_Logic(t3
, tL
, ARM64RIL_R(tR
), ARM64lo_OR
));
432 addInstr(env
, ARM64Instr_Shift(t3
, t3
, ARM64RI6_I6(22), ARM64sh_SHL
));
433 addInstr(env
, ARM64Instr_FPCR(True
/*toFPCR*/, t3
));
437 /*---------------------------------------------------------*/
438 /*--- ISEL: Function call helpers ---*/
439 /*---------------------------------------------------------*/
441 /* Used only in doHelperCall. See big comment in doHelperCall re
442 handling of register-parameter args. This function figures out
443 whether evaluation of an expression might require use of a fixed
444 register. If in doubt return True (safe but suboptimal).
447 Bool
mightRequireFixedRegs ( IRExpr
* e
)
449 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e
))) {
450 // These are always "safe" -- either a copy of SP in some
451 // arbitrary vreg, or a copy of x21, respectively.
454 /* Else it's a "normal" expression. */
456 case Iex_RdTmp
: case Iex_Const
: case Iex_Get
:
464 /* Do a complete function call. |guard| is a Ity_Bit expression
465 indicating whether or not the call happens. If guard==NULL, the
466 call is unconditional. |retloc| is set to indicate where the
467 return value is after the call. The caller (of this fn) must
468 generate code to add |stackAdjustAfterCall| to the stack pointer
469 after the call is done. Returns True iff it managed to handle this
470 combination of arg/return types, else returns False. */
473 Bool
doHelperCall ( /*OUT*/UInt
* stackAdjustAfterCall
,
474 /*OUT*/RetLoc
* retloc
,
477 IRCallee
* cee
, IRType retTy
, IRExpr
** args
)
480 HReg argregs
[ARM64_N_ARGREGS
];
481 HReg tmpregs
[ARM64_N_ARGREGS
];
483 Int n_args
, i
, nextArgReg
;
486 vassert(ARM64_N_ARGREGS
== 8);
488 /* Set default returns. We'll update them later if needed. */
489 *stackAdjustAfterCall
= 0;
490 *retloc
= mk_RetLoc_INVALID();
492 /* These are used for cross-checking that IR-level constraints on
493 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
497 /* Marshal args for a call and do the call.
499 This function only deals with a tiny set of possibilities, which
500 cover all helpers in practice. The restrictions are that only
501 arguments in registers are supported, hence only
502 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
503 fact the only supported arg type is I64.
505 The return type can be I{64,32} or V128. In the V128 case, it
506 is expected that |args| will contain the special node
507 IRExpr_VECRET(), in which case this routine generates code to
508 allocate space on the stack for the vector return value. Since
509 we are not passing any scalars on the stack, it is enough to
510 preallocate the return space before marshalling any arguments,
513 |args| may also contain IRExpr_GSPTR(), in which case the
514 value in x21 is passed as the corresponding argument.
516 Generating code which is both efficient and correct when
517 parameters are to be passed in registers is difficult, for the
518 reasons elaborated in detail in comments attached to
519 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
520 of the method described in those comments.
522 The problem is split into two cases: the fast scheme and the
523 slow scheme. In the fast scheme, arguments are computed
524 directly into the target (real) registers. This is only safe
525 when we can be sure that computation of each argument will not
526 trash any real registers set by computation of any other
529 In the slow scheme, all args are first computed into vregs, and
530 once they are all done, they are moved to the relevant real
531 regs. This always gives correct code, but it also gives a bunch
532 of vreg-to-rreg moves which are usually redundant but are hard
533 for the register allocator to get rid of.
535 To decide which scheme to use, all argument expressions are
536 first examined. If they are all so simple that it is clear they
537 will be evaluated without use of any fixed registers, use the
538 fast scheme, else use the slow scheme. Note also that only
539 unconditional calls may use the fast scheme, since having to
540 compute a condition expression could itself trash real
543 Note this requires being able to examine an expression and
544 determine whether or not evaluation of it might use a fixed
545 register. That requires knowledge of how the rest of this insn
546 selector works. Currently just the following 3 are regarded as
547 safe -- hopefully they cover the majority of arguments in
548 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
551 /* Note that the cee->regparms field is meaningless on ARM64 hosts
552 (since there is only one calling convention) and so we always
556 for (i
= 0; args
[i
]; i
++) {
557 IRExpr
* arg
= args
[i
];
558 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
560 } else if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
566 /* If this fails, the IR is ill-formed */
567 vassert(nGSPTRs
== 0 || nGSPTRs
== 1);
569 /* If we have a VECRET, allocate space on the stack for the return
570 value, and record the stack pointer after that. */
571 HReg r_vecRetAddr
= INVALID_HREG
;
573 vassert(retTy
== Ity_V128
|| retTy
== Ity_V256
);
574 vassert(retTy
!= Ity_V256
); // we don't handle that yet (if ever)
575 r_vecRetAddr
= newVRegI(env
);
576 addInstr(env
, ARM64Instr_AddToSP(-16));
577 addInstr(env
, ARM64Instr_FromSP(r_vecRetAddr
));
579 // If either of these fail, the IR is ill-formed
580 vassert(retTy
!= Ity_V128
&& retTy
!= Ity_V256
);
581 vassert(nVECRETs
== 0);
584 argregs
[0] = hregARM64_X0();
585 argregs
[1] = hregARM64_X1();
586 argregs
[2] = hregARM64_X2();
587 argregs
[3] = hregARM64_X3();
588 argregs
[4] = hregARM64_X4();
589 argregs
[5] = hregARM64_X5();
590 argregs
[6] = hregARM64_X6();
591 argregs
[7] = hregARM64_X7();
593 tmpregs
[0] = tmpregs
[1] = tmpregs
[2] = tmpregs
[3] = INVALID_HREG
;
594 tmpregs
[4] = tmpregs
[5] = tmpregs
[6] = tmpregs
[7] = INVALID_HREG
;
596 /* First decide which scheme (slow or fast) is to be used. First
597 assume the fast scheme, and select slow if any contraindications
603 if (guard
->tag
== Iex_Const
604 && guard
->Iex
.Const
.con
->tag
== Ico_U1
605 && guard
->Iex
.Const
.con
->Ico
.U1
== True
) {
608 /* Not manifestly unconditional -- be conservative. */
614 for (i
= 0; i
< n_args
; i
++) {
615 if (mightRequireFixedRegs(args
[i
])) {
623 if (retTy
== Ity_V128
|| retTy
== Ity_V256
)
627 /* At this point the scheme to use has been established. Generate
628 code to get the arg values into the argument rregs. If we run
629 out of arg regs, give up. */
636 for (i
= 0; i
< n_args
; i
++) {
637 IRExpr
* arg
= args
[i
];
639 IRType aTy
= Ity_INVALID
;
640 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
641 aTy
= typeOfIRExpr(env
->type_env
, args
[i
]);
643 if (nextArgReg
>= ARM64_N_ARGREGS
)
644 return False
; /* out of argregs */
646 if (aTy
== Ity_I64
) {
647 addInstr(env
, ARM64Instr_MovI( argregs
[nextArgReg
],
648 iselIntExpr_R(env
, args
[i
]) ));
651 else if (arg
->tag
== Iex_GSPTR
) {
653 addInstr(env
, ARM64Instr_MovI( argregs
[nextArgReg
],
657 else if (arg
->tag
== Iex_VECRET
) {
658 // because of the go_fast logic above, we can't get here,
659 // since vector return values makes us use the slow path
664 return False
; /* unhandled arg type */
667 /* Fast scheme only applies for unconditional calls. Hence: */
672 /* SLOW SCHEME; move via temporaries */
675 for (i
= 0; i
< n_args
; i
++) {
676 IRExpr
* arg
= args
[i
];
678 IRType aTy
= Ity_INVALID
;
679 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
680 aTy
= typeOfIRExpr(env
->type_env
, args
[i
]);
682 if (nextArgReg
>= ARM64_N_ARGREGS
)
683 return False
; /* out of argregs */
685 if (aTy
== Ity_I64
) {
686 tmpregs
[nextArgReg
] = iselIntExpr_R(env
, args
[i
]);
689 else if (arg
->tag
== Iex_GSPTR
) {
691 tmpregs
[nextArgReg
] = hregARM64_X21();
694 else if (arg
->tag
== Iex_VECRET
) {
695 vassert(!hregIsInvalid(r_vecRetAddr
));
696 tmpregs
[nextArgReg
] = r_vecRetAddr
;
700 return False
; /* unhandled arg type */
703 /* Now we can compute the condition. We can't do it earlier
704 because the argument computations could trash the condition
705 codes. Be a bit clever to handle the common case where the
709 if (guard
->tag
== Iex_Const
710 && guard
->Iex
.Const
.con
->tag
== Ico_U1
711 && guard
->Iex
.Const
.con
->Ico
.U1
== True
) {
712 /* unconditional -- do nothing */
714 cc
= iselCondCode_C( env
, guard
);
718 /* Move the args to their final destinations. */
719 for (i
= 0; i
< nextArgReg
; i
++) {
720 vassert(!(hregIsInvalid(tmpregs
[i
])));
721 /* None of these insns, including any spill code that might
722 be generated, may alter the condition codes. */
723 addInstr( env
, ARM64Instr_MovI( argregs
[i
], tmpregs
[i
] ) );
728 /* Should be assured by checks above */
729 vassert(nextArgReg
<= ARM64_N_ARGREGS
);
731 /* Do final checks, set the return values, and generate the call
732 instruction proper. */
733 vassert(nGSPTRs
== 0 || nGSPTRs
== 1);
734 vassert(nVECRETs
== ((retTy
== Ity_V128
|| retTy
== Ity_V256
) ? 1 : 0));
735 vassert(*stackAdjustAfterCall
== 0);
736 vassert(is_RetLoc_INVALID(*retloc
));
739 /* Function doesn't return a value. */
740 *retloc
= mk_RetLoc_simple(RLPri_None
);
742 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
743 *retloc
= mk_RetLoc_simple(RLPri_Int
);
746 *retloc
= mk_RetLoc_spRel(RLPri_V128SpRel
, 0);
747 *stackAdjustAfterCall
= 16;
751 *retloc
= mk_RetLoc_spRel(RLPri_V256SpRel
, 0);
752 *stackAdjustAfterCall
= 32;
755 /* IR can denote other possible return types, but we don't
756 handle those here. */
760 /* Finally, generate the call itself. This needs the *retloc value
761 set in the switch above, which is why it's at the end. */
763 /* nextArgReg doles out argument registers. Since these are
764 assigned in the order x0 .. x7, its numeric value at this point,
765 which must be between 0 and 8 inclusive, is going to be equal to
766 the number of arg regs in use for the call. Hence bake that
767 number into the call (we'll need to know it when doing register
768 allocation, to know what regs the call reads.) */
770 target
= (Addr
)cee
->addr
;
771 addInstr(env
, ARM64Instr_Call( cc
, target
, nextArgReg
, *retloc
));
773 return True
; /* success */
777 /*---------------------------------------------------------*/
778 /*--- ISEL: Integer expressions (64/32 bit) ---*/
779 /*---------------------------------------------------------*/
781 /* Select insns for an integer-typed expression, and add them to the
782 code list. Return a reg holding the result. This reg will be a
783 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
784 want to modify it, ask for a new vreg, copy it in there, and modify
785 the copy. The register allocator will do its best to map both
786 vregs to the same real register, so the copies will often disappear
789 This should handle expressions of 64- and 32-bit type. All results
790 are returned in a 64-bit register. For 32-bit expressions, the
791 upper 32 bits are arbitrary, so you should mask or sign extend
792 partial values if necessary.
795 /* ---------------- RRS matching helper ---------------- */
797 /* This helper matches 64-bit integer expressions of the form
798 {Add,Sub,And,Or,Xor}(E1, {Shl,Shr,Sar}(E2, immediate))
800 {Add,And,Or,Xor}({Shl,Shr,Sar}(E1, immediate), E2)
801 which is a useful thing to do because AArch64 can compute those in
802 a single instruction.
804 static Bool
matchesRegRegShift(/*OUT*/ARM64RRSOp
* mainOp
,
805 /*OUT*/ARM64ShiftOp
* shiftOp
,
807 /*OUT*/IRExpr
** argUnshifted
,
808 /*OUT*/IRExpr
** argToBeShifted
,
811 *mainOp
= (ARM64RRSOp
)0;
812 *shiftOp
= (ARM64ShiftOp
)0;
814 *argUnshifted
= NULL
;
815 *argToBeShifted
= NULL
;
816 if (e
->tag
!= Iex_Binop
) {
819 const IROp irMainOp
= e
->Iex
.Binop
.op
;
822 case Iop_And64
: *mainOp
= ARM64rrs_AND
; break;
823 case Iop_Or64
: *mainOp
= ARM64rrs_OR
; break;
824 case Iop_Xor64
: *mainOp
= ARM64rrs_XOR
; break;
825 case Iop_Add64
: *mainOp
= ARM64rrs_ADD
; break;
826 case Iop_Sub64
: *mainOp
= ARM64rrs_SUB
; canSwap
= False
; break;
827 default: return False
;
829 /* The root node is OK. Now check the right (2nd) arg. */
830 IRExpr
* argL
= e
->Iex
.Binop
.arg1
;
831 IRExpr
* argR
= e
->Iex
.Binop
.arg2
;
833 // This loop runs either one or two iterations. In the first iteration, we
834 // check for a shiftable right (second) arg. If that fails, at the end of
835 // the first iteration, the args are swapped, if that is valid, and we go
836 // round again, hence checking for a shiftable left (first) arg.
839 vassert(iterNo
== 1 || iterNo
== 2);
840 if (argR
->tag
== Iex_Binop
) {
841 const IROp irShiftOp
= argR
->Iex
.Binop
.op
;
842 if (irShiftOp
== Iop_Shl64
843 || irShiftOp
== Iop_Shr64
|| irShiftOp
== Iop_Sar64
) {
844 IRExpr
* argRL
= argR
->Iex
.Binop
.arg1
;
845 const IRExpr
* argRR
= argR
->Iex
.Binop
.arg2
;
846 if (argRR
->tag
== Iex_Const
) {
847 const IRConst
* argRRconst
= argRR
->Iex
.Const
.con
;
848 vassert(argRRconst
->tag
== Ico_U8
); // due to typecheck rules
849 const UChar amount
= argRRconst
->Ico
.U8
;
850 if (amount
>= 1 && amount
<= 63) {
851 // We got a match \o/
852 // *mainOp is already set
854 case Iop_Shl64
: *shiftOp
= ARM64sh_SHL
; break;
855 case Iop_Shr64
: *shiftOp
= ARM64sh_SHR
; break;
856 case Iop_Sar64
: *shiftOp
= ARM64sh_SAR
; break;
857 default: vassert(0); // guarded above
860 *argUnshifted
= argL
;
861 *argToBeShifted
= argRL
;
867 // We failed to get a match in the first iteration. So, provided the
868 // root node isn't SUB, swap the arguments and make one further
869 // iteration. If that doesn't succeed, we must give up.
870 if (iterNo
== 1 && canSwap
) {
883 /* --------------------- AMode --------------------- */
885 /* Return an AMode which computes the value of the specified
886 expression, possibly also adding insns to the code list as a
887 result. The expression may only be a 64-bit one.
890 static Bool
isValidScale ( UChar scale
)
893 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True
;
894 default: return False
;
898 static Bool
sane_AMode ( ARM64AMode
* am
)
903 toBool( hregClass(am
->ARM64am
.RI9
.reg
) == HRcInt64
904 && (hregIsVirtual(am
->ARM64am
.RI9
.reg
)
905 /* || sameHReg(am->ARM64am.RI9.reg,
906 hregARM64_X21()) */ )
907 && am
->ARM64am
.RI9
.simm9
>= -256
908 && am
->ARM64am
.RI9
.simm9
<= 255 );
911 toBool( hregClass(am
->ARM64am
.RI12
.reg
) == HRcInt64
912 && (hregIsVirtual(am
->ARM64am
.RI12
.reg
)
913 /* || sameHReg(am->ARM64am.RI12.reg,
914 hregARM64_X21()) */ )
915 && am
->ARM64am
.RI12
.uimm12
< 4096
916 && isValidScale(am
->ARM64am
.RI12
.szB
) );
919 toBool( hregClass(am
->ARM64am
.RR
.base
) == HRcInt64
920 && hregIsVirtual(am
->ARM64am
.RR
.base
)
921 && hregClass(am
->ARM64am
.RR
.index
) == HRcInt64
922 && hregIsVirtual(am
->ARM64am
.RR
.index
) );
924 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
929 ARM64AMode
* iselIntExpr_AMode ( ISelEnv
* env
, IRExpr
* e
, IRType dty
)
931 ARM64AMode
* am
= iselIntExpr_AMode_wrk(env
, e
, dty
);
932 vassert(sane_AMode(am
));
937 ARM64AMode
* iselIntExpr_AMode_wrk ( ISelEnv
* env
, IRExpr
* e
, IRType dty
)
939 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
940 vassert(ty
== Ity_I64
);
944 case Ity_I64
: szBbits
= 3; break;
945 case Ity_I32
: szBbits
= 2; break;
946 case Ity_I16
: szBbits
= 1; break;
947 case Ity_I8
: szBbits
= 0; break;
951 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
952 we're going to create an amode suitable for LDU* or STU*
953 instructions, which use unscaled immediate offsets. */
954 if (e
->tag
== Iex_Binop
955 && (e
->Iex
.Binop
.op
== Iop_Add64
|| e
->Iex
.Binop
.op
== Iop_Sub64
)
956 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
957 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U64
) {
958 Long simm
= (Long
)e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
;
959 if (simm
>= -255 && simm
<= 255) {
960 /* Although the gating condition might seem to be
961 simm >= -256 && simm <= 255
962 we will need to negate simm in the case where the op is Sub64.
963 Hence limit the lower value to -255 in order that its negation
965 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
966 if (e
->Iex
.Binop
.op
== Iop_Sub64
) simm
= -simm
;
967 return ARM64AMode_RI9(reg
, (Int
)simm
);
971 /* Add64(expr, uimm12 * transfer-size) */
972 if (e
->tag
== Iex_Binop
973 && e
->Iex
.Binop
.op
== Iop_Add64
974 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
975 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U64
) {
976 ULong uimm
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
;
977 ULong szB
= 1 << szBbits
;
978 if (0 == (uimm
& (szB
-1)) /* "uimm is szB-aligned" */
979 && (uimm
>> szBbits
) < 4096) {
980 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
981 return ARM64AMode_RI12(reg
, (UInt
)(uimm
>> szBbits
), (UChar
)szB
);
985 /* Add64(expr1, expr2) */
986 if (e
->tag
== Iex_Binop
987 && e
->Iex
.Binop
.op
== Iop_Add64
) {
988 HReg reg1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
989 HReg reg2
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
990 return ARM64AMode_RR(reg1
, reg2
);
993 /* Doesn't match anything in particular. Generate it into
994 a register and use that. */
995 HReg reg
= iselIntExpr_R(env
, e
);
996 return ARM64AMode_RI9(reg
, 0);
1000 /* --------------------- RIA --------------------- */
1002 /* Select instructions to generate 'e' into a RIA. */
1004 static ARM64RIA
* iselIntExpr_RIA ( ISelEnv
* env
, IRExpr
* e
)
1006 ARM64RIA
* ri
= iselIntExpr_RIA_wrk(env
, e
);
1007 /* sanity checks ... */
1010 vassert(ri
->ARM64riA
.I12
.imm12
< 4096);
1011 vassert(ri
->ARM64riA
.I12
.shift
== 0 || ri
->ARM64riA
.I12
.shift
== 12);
1014 vassert(hregClass(ri
->ARM64riA
.R
.reg
) == HRcInt64
);
1015 vassert(hregIsVirtual(ri
->ARM64riA
.R
.reg
));
1018 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1022 /* DO NOT CALL THIS DIRECTLY ! */
1023 static ARM64RIA
* iselIntExpr_RIA_wrk ( ISelEnv
* env
, IRExpr
* e
)
1025 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1026 vassert(ty
== Ity_I64
|| ty
== Ity_I32
);
1028 /* special case: immediate */
1029 if (e
->tag
== Iex_Const
) {
1030 ULong u
= 0xF000000ULL
; /* invalid */
1031 switch (e
->Iex
.Const
.con
->tag
) {
1032 case Ico_U64
: u
= e
->Iex
.Const
.con
->Ico
.U64
; break;
1033 case Ico_U32
: u
= e
->Iex
.Const
.con
->Ico
.U32
; break;
1034 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1036 if (0 == (u
& ~(0xFFFULL
<< 0)))
1037 return ARM64RIA_I12((UShort
)((u
>> 0) & 0xFFFULL
), 0);
1038 if (0 == (u
& ~(0xFFFULL
<< 12)))
1039 return ARM64RIA_I12((UShort
)((u
>> 12) & 0xFFFULL
), 12);
1040 /* else fail, fall through to default case */
1043 /* default case: calculate into a register and return that */
1045 HReg r
= iselIntExpr_R ( env
, e
);
1046 return ARM64RIA_R(r
);
1051 /* --------------------- RIL --------------------- */
1053 /* Select instructions to generate 'e' into a RIL. At this point we
1054 have to deal with the strange bitfield-immediate encoding for logic
1058 // The following four functions
1059 // CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1060 // are copied, with modifications, from
1061 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1062 // which has the following copyright notice:
1064 Copyright 2013, ARM Limited
1065 All rights reserved.
1067 Redistribution and use in source and binary forms, with or without
1068 modification, are permitted provided that the following conditions are met:
1070 * Redistributions of source code must retain the above copyright notice,
1071 this list of conditions and the following disclaimer.
1072 * Redistributions in binary form must reproduce the above copyright notice,
1073 this list of conditions and the following disclaimer in the documentation
1074 and/or other materials provided with the distribution.
1075 * Neither the name of ARM Limited nor the names of its contributors may be
1076 used to endorse or promote products derived from this software without
1077 specific prior written permission.
1079 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1080 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1081 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1082 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1083 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1084 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1085 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1086 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1087 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1088 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1091 static Int
CountLeadingZeros(ULong value
, Int width
)
1093 vassert(width
== 32 || width
== 64);
1095 ULong bit_test
= 1ULL << (width
- 1);
1096 while ((count
< width
) && ((bit_test
& value
) == 0)) {
1103 static Int
CountTrailingZeros(ULong value
, Int width
)
1105 vassert(width
== 32 || width
== 64);
1107 while ((count
< width
) && (((value
>> count
) & 1) == 0)) {
1113 static Int
CountSetBits(ULong value
, Int width
)
1115 // TODO: Other widths could be added here, as the implementation already
1117 vassert(width
== 32 || width
== 64);
1119 // Mask out unused bits to ensure that they are not counted.
1120 value
&= (0xffffffffffffffffULL
>> (64-width
));
1122 // Add up the set bits.
1123 // The algorithm works by adding pairs of bit fields together iteratively,
1124 // where the size of each bit field doubles each time.
1125 // An example for an 8-bit value:
1126 // Bits: h g f e d c b a
1128 // value = h+g f+e d+c b+a
1130 // value = h+g+f+e d+c+b+a
1132 // value = h+g+f+e+d+c+b+a
1133 value
= ((value
>> 1) & 0x5555555555555555ULL
)
1134 + (value
& 0x5555555555555555ULL
);
1135 value
= ((value
>> 2) & 0x3333333333333333ULL
)
1136 + (value
& 0x3333333333333333ULL
);
1137 value
= ((value
>> 4) & 0x0f0f0f0f0f0f0f0fULL
)
1138 + (value
& 0x0f0f0f0f0f0f0f0fULL
);
1139 value
= ((value
>> 8) & 0x00ff00ff00ff00ffULL
)
1140 + (value
& 0x00ff00ff00ff00ffULL
);
1141 value
= ((value
>> 16) & 0x0000ffff0000ffffULL
)
1142 + (value
& 0x0000ffff0000ffffULL
);
1143 value
= ((value
>> 32) & 0x00000000ffffffffULL
)
1144 + (value
& 0x00000000ffffffffULL
);
1149 static Bool
isImmLogical ( /*OUT*/UInt
* n
,
1150 /*OUT*/UInt
* imm_s
, /*OUT*/UInt
* imm_r
,
1151 ULong value
, UInt width
)
1153 // Test if a given value can be encoded in the immediate field of a
1154 // logical instruction.
1156 // If it can be encoded, the function returns true, and values
1157 // pointed to by n, imm_s and imm_r are updated with immediates
1158 // encoded in the format required by the corresponding fields in the
1159 // logical instruction. If it can not be encoded, the function
1160 // returns false, and the values pointed to by n, imm_s and imm_r
1162 vassert(n
!= NULL
&& imm_s
!= NULL
&& imm_r
!= NULL
);
1163 vassert(width
== 32 || width
== 64);
1165 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1166 // the following table:
1168 // N imms immr size S R
1169 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1170 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1171 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1172 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1173 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1174 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1175 // (s bits must not be all set)
1177 // A pattern is constructed of size bits, where the least significant S+1
1178 // bits are set. The pattern is rotated right by R, and repeated across a
1179 // 32 or 64-bit value, depending on destination register width.
1181 // To test if an arbitrary immediate can be encoded using this scheme, an
1182 // iterative algorithm is used.
1184 // TODO: This code does not consider using X/W register overlap to support
1185 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1186 // are an encodable logical immediate.
1188 // 1. If the value has all set or all clear bits, it can't be encoded.
1189 if ((value
== 0) || (value
== 0xffffffffffffffffULL
) ||
1190 ((width
== 32) && (value
== 0xffffffff))) {
1194 UInt lead_zero
= CountLeadingZeros(value
, width
);
1195 UInt lead_one
= CountLeadingZeros(~value
, width
);
1196 UInt trail_zero
= CountTrailingZeros(value
, width
);
1197 UInt trail_one
= CountTrailingZeros(~value
, width
);
1198 UInt set_bits
= CountSetBits(value
, width
);
1200 // The fixed bits in the immediate s field.
1201 // If width == 64 (X reg), start at 0xFFFFFF80.
1202 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1203 // widths won't be executed.
1204 Int imm_s_fixed
= (width
== 64) ? -128 : -64;
1205 Int imm_s_mask
= 0x3F;
1208 // 2. If the value is two bits wide, it can be encoded.
1212 *imm_r
= (value
& 3) - 1;
1216 *n
= (width
== 64) ? 1 : 0;
1217 *imm_s
= ((imm_s_fixed
| (set_bits
- 1)) & imm_s_mask
);
1218 if ((lead_zero
+ set_bits
) == width
) {
1221 *imm_r
= (lead_zero
> 0) ? (width
- trail_zero
) : lead_one
;
1224 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1225 // the bit width of the value, it can be encoded.
1226 if (lead_zero
+ trail_zero
+ set_bits
== width
) {
1230 // 4. If the sum of leading ones, trailing ones and unset bits in the
1231 // value is equal to the bit width of the value, it can be encoded.
1232 if (lead_one
+ trail_one
+ (width
- set_bits
) == width
) {
1236 // 5. If the most-significant half of the bitwise value is equal to the
1237 // least-significant half, return to step 2 using the least-significant
1238 // half of the value.
1239 ULong mask
= (1ULL << (width
>> 1)) - 1;
1240 if ((value
& mask
) == ((value
>> (width
>> 1)) & mask
)) {
1247 // 6. Otherwise, the value can't be encoded.
1253 /* Create a RIL for the given immediate, if it is representable, or
1254 return NULL if not. */
1256 static ARM64RIL
* mb_mkARM64RIL_I ( ULong imm64
)
1258 UInt n
= 0, imm_s
= 0, imm_r
= 0;
1259 Bool ok
= isImmLogical(&n
, &imm_s
, &imm_r
, imm64
, 64);
1260 if (!ok
) return NULL
;
1261 vassert(n
< 2 && imm_s
< 64 && imm_r
< 64);
1262 return ARM64RIL_I13(n
, imm_r
, imm_s
);
1265 /* So, finally .. */
1267 static ARM64RIL
* iselIntExpr_RIL ( ISelEnv
* env
, IRExpr
* e
)
1269 ARM64RIL
* ri
= iselIntExpr_RIL_wrk(env
, e
);
1270 /* sanity checks ... */
1273 vassert(ri
->ARM64riL
.I13
.bitN
< 2);
1274 vassert(ri
->ARM64riL
.I13
.immR
< 64);
1275 vassert(ri
->ARM64riL
.I13
.immS
< 64);
1278 vassert(hregClass(ri
->ARM64riL
.R
.reg
) == HRcInt64
);
1279 vassert(hregIsVirtual(ri
->ARM64riL
.R
.reg
));
1282 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1286 /* DO NOT CALL THIS DIRECTLY ! */
1287 static ARM64RIL
* iselIntExpr_RIL_wrk ( ISelEnv
* env
, IRExpr
* e
)
1289 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1290 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
);
1292 /* special case: immediate */
1293 if (e
->tag
== Iex_Const
) {
1294 ARM64RIL
* maybe
= NULL
;
1295 if (ty
== Ity_I64
) {
1296 vassert(e
->Iex
.Const
.con
->tag
== Ico_U64
);
1297 maybe
= mb_mkARM64RIL_I(e
->Iex
.Const
.con
->Ico
.U64
);
1298 } else if (ty
== Ity_I32
) {
1299 vassert(ty
== Ity_I32
);
1300 vassert(e
->Iex
.Const
.con
->tag
== Ico_U32
);
1301 UInt u32
= e
->Iex
.Const
.con
->Ico
.U32
;
1302 ULong u64
= (ULong
)u32
;
1303 /* First try with 32 leading zeroes. */
1304 maybe
= mb_mkARM64RIL_I(u64
);
1305 /* If that doesn't work, try with 2 copies, since it doesn't
1306 matter what winds up in the upper 32 bits. */
1308 maybe
= mb_mkARM64RIL_I((u64
<< 32) | u64
);
1311 vassert(ty
== Ity_I16
);
1312 vassert(e
->Iex
.Const
.con
->tag
== Ico_U16
);
1313 // `maybe` is still NULL. Be lame and fall through to the default
1314 // case. Obviously we could do better here.
1316 if (maybe
) return maybe
;
1317 /* else fail, fall through to default case */
1320 /* default case: calculate into a register and return that */
1322 HReg r
= iselIntExpr_R ( env
, e
);
1323 return ARM64RIL_R(r
);
1328 /* --------------------- RI6 --------------------- */
1330 /* Select instructions to generate 'e' into a RI6. */
1332 static ARM64RI6
* iselIntExpr_RI6 ( ISelEnv
* env
, IRExpr
* e
)
1334 ARM64RI6
* ri
= iselIntExpr_RI6_wrk(env
, e
);
1335 /* sanity checks ... */
1338 vassert(ri
->ARM64ri6
.I6
.imm6
< 64);
1339 vassert(ri
->ARM64ri6
.I6
.imm6
> 0);
1342 vassert(hregClass(ri
->ARM64ri6
.R
.reg
) == HRcInt64
);
1343 vassert(hregIsVirtual(ri
->ARM64ri6
.R
.reg
));
1346 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1350 /* DO NOT CALL THIS DIRECTLY ! */
1351 static ARM64RI6
* iselIntExpr_RI6_wrk ( ISelEnv
* env
, IRExpr
* e
)
1353 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1354 vassert(ty
== Ity_I64
|| ty
== Ity_I8
);
1356 /* special case: immediate */
1357 if (e
->tag
== Iex_Const
) {
1358 switch (e
->Iex
.Const
.con
->tag
) {
1360 UInt u
= e
->Iex
.Const
.con
->Ico
.U8
;
1361 if (u
> 0 && u
< 64)
1362 return ARM64RI6_I6(u
);
1368 /* else fail, fall through to default case */
1371 /* default case: calculate into a register and return that */
1373 HReg r
= iselIntExpr_R ( env
, e
);
1374 return ARM64RI6_R(r
);
1379 /* ------------------- CondCode ------------------- */
1381 /* Generate code to evaluated a bit-typed expression, returning the
1382 condition code which would correspond when the expression would
1383 notionally have returned 1.
1385 Note that iselCondCode_C and iselCondCode_R are mutually recursive. For
1386 future changes to either of them, take care not to introduce an infinite
1387 loop involving the two of them.
1389 static ARM64CondCode
iselCondCode_C ( ISelEnv
* env
, IRExpr
* e
)
1391 ARM64CondCode cc
= iselCondCode_C_wrk(env
,e
);
1392 vassert(cc
!= ARM64cc_NV
);
1396 static ARM64CondCode
iselCondCode_C_wrk ( ISelEnv
* env
, IRExpr
* e
)
1399 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I1
);
1402 if (e
->tag
== Iex_RdTmp
) {
1403 HReg rTmp
= lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
1404 /* Cmp doesn't modify rTmp; so this is OK. */
1405 ARM64RIL
* one
= mb_mkARM64RIL_I(1);
1407 addInstr(env
, ARM64Instr_Test(rTmp
, one
));
1411 /* Constant 1:Bit */
1412 if (e
->tag
== Iex_Const
) {
1413 /* This is a very stupid translation. Hopefully it doesn't occur much,
1415 vassert(e
->Iex
.Const
.con
->tag
== Ico_U1
);
1416 vassert(e
->Iex
.Const
.con
->Ico
.U1
== True
1417 || e
->Iex
.Const
.con
->Ico
.U1
== False
);
1418 HReg rTmp
= newVRegI(env
);
1419 addInstr(env
, ARM64Instr_Imm64(rTmp
, 0));
1420 ARM64RIL
* one
= mb_mkARM64RIL_I(1);
1422 addInstr(env
, ARM64Instr_Test(rTmp
, one
));
1423 return e
->Iex
.Const
.con
->Ico
.U1
? ARM64cc_EQ
: ARM64cc_NE
;
1427 if (e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== Iop_Not1
) {
1428 /* Generate code for the arg, and negate the test condition */
1429 ARM64CondCode cc
= iselCondCode_C(env
, e
->Iex
.Unop
.arg
);
1430 if (cc
== ARM64cc_AL
|| cc
== ARM64cc_NV
) {
1437 /* --- patterns rooted at: 64to1 --- */
1439 if (e
->tag
== Iex_Unop
1440 && e
->Iex
.Unop
.op
== Iop_64to1
) {
1441 HReg rTmp
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1442 ARM64RIL
* one
= mb_mkARM64RIL_I(1);
1443 vassert(one
); /* '1' must be representable */
1444 addInstr(env
, ARM64Instr_Test(rTmp
, one
));
1448 /* --- patterns rooted at: CmpNEZ8 --- */
1450 if (e
->tag
== Iex_Unop
1451 && e
->Iex
.Unop
.op
== Iop_CmpNEZ8
) {
1452 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1453 ARM64RIL
* xFF
= mb_mkARM64RIL_I(0xFF);
1454 addInstr(env
, ARM64Instr_Test(r1
, xFF
));
1458 /* --- patterns rooted at: CmpNEZ16 --- */
1460 if (e
->tag
== Iex_Unop
1461 && e
->Iex
.Unop
.op
== Iop_CmpNEZ16
) {
1462 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1463 ARM64RIL
* xFFFF
= mb_mkARM64RIL_I(0xFFFF);
1464 addInstr(env
, ARM64Instr_Test(r1
, xFFFF
));
1468 /* --- patterns rooted at: CmpNEZ64 --- */
1470 if (e
->tag
== Iex_Unop
1471 && e
->Iex
.Unop
.op
== Iop_CmpNEZ64
) {
1472 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1473 ARM64RIA
* zero
= ARM64RIA_I12(0,0);
1474 addInstr(env
, ARM64Instr_Cmp(r1
, zero
, True
/*is64*/));
1478 /* --- patterns rooted at: CmpNEZ32 --- */
1480 if (e
->tag
== Iex_Unop
1481 && e
->Iex
.Unop
.op
== Iop_CmpNEZ32
) {
1482 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1483 ARM64RIA
* zero
= ARM64RIA_I12(0,0);
1484 addInstr(env
, ARM64Instr_Cmp(r1
, zero
, False
/*!is64*/));
1488 /* --- Cmp*64*(x,y) --- */
1489 if (e
->tag
== Iex_Binop
1490 && (e
->Iex
.Binop
.op
== Iop_CmpEQ64
1491 || e
->Iex
.Binop
.op
== Iop_CmpNE64
1492 || e
->Iex
.Binop
.op
== Iop_CmpLT64S
1493 || e
->Iex
.Binop
.op
== Iop_CmpLT64U
1494 || e
->Iex
.Binop
.op
== Iop_CmpLE64S
1495 || e
->Iex
.Binop
.op
== Iop_CmpLE64U
1496 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ64
1497 || e
->Iex
.Binop
.op
== Iop_CasCmpNE64
)) {
1498 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1499 ARM64RIA
* argR
= iselIntExpr_RIA(env
, e
->Iex
.Binop
.arg2
);
1500 addInstr(env
, ARM64Instr_Cmp(argL
, argR
, True
/*is64*/));
1501 switch (e
->Iex
.Binop
.op
) {
1502 case Iop_CmpEQ64
: case Iop_CasCmpEQ64
: return ARM64cc_EQ
;
1503 case Iop_CmpNE64
: case Iop_CasCmpNE64
: return ARM64cc_NE
;
1504 case Iop_CmpLT64S
: return ARM64cc_LT
;
1505 case Iop_CmpLT64U
: return ARM64cc_CC
;
1506 case Iop_CmpLE64S
: return ARM64cc_LE
;
1507 case Iop_CmpLE64U
: return ARM64cc_LS
;
1508 default: vpanic("iselCondCode_C(arm64): CmpXX64");
1512 /* --- Cmp*32*(x,y) --- */
1513 if (e
->tag
== Iex_Binop
1514 && (e
->Iex
.Binop
.op
== Iop_CmpEQ32
1515 || e
->Iex
.Binop
.op
== Iop_CmpNE32
1516 || e
->Iex
.Binop
.op
== Iop_CmpLT32S
1517 || e
->Iex
.Binop
.op
== Iop_CmpLT32U
1518 || e
->Iex
.Binop
.op
== Iop_CmpLE32S
1519 || e
->Iex
.Binop
.op
== Iop_CmpLE32U
1520 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ32
1521 || e
->Iex
.Binop
.op
== Iop_CasCmpNE32
)) {
1522 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1523 ARM64RIA
* argR
= iselIntExpr_RIA(env
, e
->Iex
.Binop
.arg2
);
1524 addInstr(env
, ARM64Instr_Cmp(argL
, argR
, False
/*!is64*/));
1525 switch (e
->Iex
.Binop
.op
) {
1526 case Iop_CmpEQ32
: case Iop_CasCmpEQ32
: return ARM64cc_EQ
;
1527 case Iop_CmpNE32
: case Iop_CasCmpNE32
: return ARM64cc_NE
;
1528 case Iop_CmpLT32S
: return ARM64cc_LT
;
1529 case Iop_CmpLT32U
: return ARM64cc_CC
;
1530 case Iop_CmpLE32S
: return ARM64cc_LE
;
1531 case Iop_CmpLE32U
: return ARM64cc_LS
;
1532 default: vpanic("iselCondCode_C(arm64): CmpXX32");
1536 /* --- Cmp*16*(x,y) --- */
1537 if (e
->tag
== Iex_Binop
1538 && (e
->Iex
.Binop
.op
== Iop_CasCmpEQ16
1539 || e
->Iex
.Binop
.op
== Iop_CasCmpNE16
)) {
1540 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1541 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1542 HReg argL2
= widen_z_16_to_64(env
, argL
);
1543 HReg argR2
= widen_z_16_to_64(env
, argR
);
1544 addInstr(env
, ARM64Instr_Cmp(argL2
, ARM64RIA_R(argR2
), True
/*is64*/));
1545 switch (e
->Iex
.Binop
.op
) {
1546 case Iop_CasCmpEQ16
: return ARM64cc_EQ
;
1547 case Iop_CasCmpNE16
: return ARM64cc_NE
;
1548 default: vpanic("iselCondCode_C(arm64): CmpXX16");
1552 /* --- Cmp*8*(x,y) --- */
1553 if (e
->tag
== Iex_Binop
1554 && (e
->Iex
.Binop
.op
== Iop_CasCmpEQ8
1555 || e
->Iex
.Binop
.op
== Iop_CasCmpNE8
)) {
1556 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1557 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1558 HReg argL2
= widen_z_8_to_64(env
, argL
);
1559 HReg argR2
= widen_z_8_to_64(env
, argR
);
1560 addInstr(env
, ARM64Instr_Cmp(argL2
, ARM64RIA_R(argR2
), True
/*is64*/));
1561 switch (e
->Iex
.Binop
.op
) {
1562 case Iop_CasCmpEQ8
: return ARM64cc_EQ
;
1563 case Iop_CasCmpNE8
: return ARM64cc_NE
;
1564 default: vpanic("iselCondCode_C(arm64): CmpXX8");
1568 /* --- And1(x,y), Or1(x,y) --- */
1569 if (e
->tag
== Iex_Binop
1570 && (e
->Iex
.Binop
.op
== Iop_And1
|| e
->Iex
.Binop
.op
== Iop_Or1
)) {
1571 HReg tmp
= iselCondCode_R(env
, e
);
1572 ARM64RIL
* one
= mb_mkARM64RIL_I(1);
1574 addInstr(env
, ARM64Instr_Test(tmp
, one
));
1579 vpanic("iselCondCode_C");
1583 /* --------------------- CONDCODE as int reg --------------------- */
1585 /* Generate code to evaluated a bit-typed expression, returning the resulting
1586 value in bit 0 of an integer register. WARNING: all of the other bits in the
1587 register can be arbitrary. Callers must mask them off or otherwise ignore
1590 Note that iselCondCode_C and iselCondCode_R are mutually recursive. For
1591 future changes to either of them, take care not to introduce an infinite
1592 loop involving the two of them.
1594 static HReg
iselCondCode_R ( ISelEnv
* env
, IRExpr
* e
)
1596 /* Uh, there's nothing we can sanity check here, unfortunately. */
1597 return iselCondCode_R_wrk(env
,e
);
1600 /* DO NOT CALL THIS DIRECTLY ! */
1601 static HReg
iselCondCode_R_wrk ( ISelEnv
* env
, IRExpr
* e
)
1604 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I1
);
1607 if (e
->tag
== Iex_RdTmp
) {
1608 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
1611 /* And1(x,y), Or1(x,y) */
1612 if (e
->tag
== Iex_Binop
1613 && (e
->Iex
.Binop
.op
== Iop_And1
|| e
->Iex
.Binop
.op
== Iop_Or1
)) {
1614 HReg res
= newVRegI(env
);
1615 HReg x_as_64
= iselCondCode_R(env
, e
->Iex
.Binop
.arg1
);
1616 HReg y_as_64
= iselCondCode_R(env
, e
->Iex
.Binop
.arg2
);
1618 = e
->Iex
.Binop
.op
== Iop_And1
? ARM64lo_AND
: ARM64lo_OR
;
1619 addInstr(env
, ARM64Instr_Logic(res
, x_as_64
, ARM64RIL_R(y_as_64
), lop
));
1623 /* Anything else, we hand off to iselCondCode_C and force the value into a
1625 HReg res
= newVRegI(env
);
1626 ARM64CondCode cc
= iselCondCode_C(env
, e
);
1627 addInstr(env
, ARM64Instr_Set64(res
, cc
));
1630 /* PJF the following two lines are dead code
1632 vpanic("iselCondCode_R(arm64)");
1637 /* --------------------- Reg --------------------- */
1639 static HReg
iselIntExpr_R ( ISelEnv
* env
, IRExpr
* e
)
1641 HReg r
= iselIntExpr_R_wrk(env
, e
);
1642 /* sanity checks ... */
1644 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
1646 vassert(hregClass(r
) == HRcInt64
);
1647 vassert(hregIsVirtual(r
));
1651 /* DO NOT CALL THIS DIRECTLY ! */
1652 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, IRExpr
* e
)
1654 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1655 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
1659 /* --------- TEMP --------- */
1661 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
1664 /* --------- LOAD --------- */
1666 HReg dst
= newVRegI(env
);
1668 if (e
->Iex
.Load
.end
!= Iend_LE
)
1671 if (ty
== Ity_I64
) {
1672 ARM64AMode
* amode
= iselIntExpr_AMode ( env
, e
->Iex
.Load
.addr
, ty
);
1673 addInstr(env
, ARM64Instr_LdSt64(True
/*isLoad*/, dst
, amode
));
1676 if (ty
== Ity_I32
) {
1677 ARM64AMode
* amode
= iselIntExpr_AMode ( env
, e
->Iex
.Load
.addr
, ty
);
1678 addInstr(env
, ARM64Instr_LdSt32(True
/*isLoad*/, dst
, amode
));
1681 if (ty
== Ity_I16
) {
1682 ARM64AMode
* amode
= iselIntExpr_AMode ( env
, e
->Iex
.Load
.addr
, ty
);
1683 addInstr(env
, ARM64Instr_LdSt16(True
/*isLoad*/, dst
, amode
));
1687 ARM64AMode
* amode
= iselIntExpr_AMode ( env
, e
->Iex
.Load
.addr
, ty
);
1688 addInstr(env
, ARM64Instr_LdSt8(True
/*isLoad*/, dst
, amode
));
1694 /* --------- BINARY OP --------- */
1697 ARM64LogicOp lop
= 0; /* invalid */
1698 ARM64ShiftOp sop
= 0; /* invalid */
1700 /* Special-case 0-x into a Neg instruction. Not because it's
1701 particularly useful but more so as to give value flow using
1702 this instruction, so as to check its assembly correctness for
1703 implementation of Left32/Left64. */
1704 switch (e
->Iex
.Binop
.op
) {
1706 if (isZeroU64(e
->Iex
.Binop
.arg1
)) {
1707 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1708 HReg dst
= newVRegI(env
);
1709 addInstr(env
, ARM64Instr_Unary(dst
, argR
, ARM64un_NEG
));
1717 /* AND64/OR64/XOR64/ADD64/SUB64(e1, e2 shifted by imm)
1718 AND64/OR64/XOR64/ADD64(e1 shifted by imm, e2)
1721 switch (e
->Iex
.Binop
.op
) {
1722 case Iop_And64
: case Iop_Or64
: case Iop_Xor64
:
1723 case Iop_Add64
: case Iop_Sub64
: {
1724 ARM64RRSOp mainOp
= ARM64rrs_INVALID
;
1725 ARM64ShiftOp shiftOp
= (ARM64ShiftOp
)0; // Invalid
1726 IRExpr
* argUnshifted
= NULL
;
1727 IRExpr
* argToBeShifted
= NULL
;
1729 if (matchesRegRegShift(&mainOp
, &shiftOp
, &amt
, &argUnshifted
,
1730 &argToBeShifted
, e
)) {
1731 HReg rDst
= newVRegI(env
);
1732 HReg rUnshifted
= iselIntExpr_R(env
, argUnshifted
);
1733 HReg rToBeShifted
= iselIntExpr_R(env
, argToBeShifted
);
1734 addInstr(env
, ARM64Instr_RRS(rDst
, rUnshifted
, rToBeShifted
,
1735 shiftOp
, amt
, mainOp
));
1744 /* ADD/SUB(e1, e2) (for any e1, e2) */
1745 switch (e
->Iex
.Binop
.op
) {
1746 case Iop_Add64
: case Iop_Add32
:
1747 case Iop_Sub64
: case Iop_Sub32
: {
1748 Bool isAdd
= e
->Iex
.Binop
.op
== Iop_Add64
1749 || e
->Iex
.Binop
.op
== Iop_Add32
;
1750 HReg dst
= newVRegI(env
);
1751 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1752 ARM64RIA
* argR
= iselIntExpr_RIA(env
, e
->Iex
.Binop
.arg2
);
1753 addInstr(env
, ARM64Instr_Arith(dst
, argL
, argR
, isAdd
));
1760 /* AND/OR/XOR(e1, e2) (for any e1, e2) */
1761 switch (e
->Iex
.Binop
.op
) {
1762 case Iop_And64
: case Iop_And32
:
1763 lop
= ARM64lo_AND
; goto log_binop
;
1764 case Iop_Or64
: case Iop_Or32
: case Iop_Or16
:
1765 lop
= ARM64lo_OR
; goto log_binop
;
1766 case Iop_Xor64
: case Iop_Xor32
:
1767 lop
= ARM64lo_XOR
; goto log_binop
;
1769 HReg dst
= newVRegI(env
);
1770 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1771 ARM64RIL
* argR
= iselIntExpr_RIL(env
, e
->Iex
.Binop
.arg2
);
1772 addInstr(env
, ARM64Instr_Logic(dst
, argL
, argR
, lop
));
1780 switch (e
->Iex
.Binop
.op
) {
1781 case Iop_Shr64
: sop
= ARM64sh_SHR
; goto sh_binop
;
1782 case Iop_Sar64
: sop
= ARM64sh_SAR
; goto sh_binop
;
1783 case Iop_Shl64
: case Iop_Shl32
: sop
= ARM64sh_SHL
; goto sh_binop
;
1785 HReg dst
= newVRegI(env
);
1786 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1787 ARM64RI6
* argR
= iselIntExpr_RI6(env
, e
->Iex
.Binop
.arg2
);
1788 addInstr(env
, ARM64Instr_Shift(dst
, argL
, argR
, sop
));
1793 Bool zx
= e
->Iex
.Binop
.op
== Iop_Shr32
;
1794 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1795 ARM64RI6
* argR
= iselIntExpr_RI6(env
, e
->Iex
.Binop
.arg2
);
1796 HReg dst
= zx
? widen_z_32_to_64(env
, argL
)
1797 : widen_s_32_to_64(env
, argL
);
1798 addInstr(env
, ARM64Instr_Shift(dst
, dst
, argR
, ARM64sh_SHR
));
1805 if (e
->Iex
.Binop
.op
== Iop_Mul64
|| e
->Iex
.Binop
.op
== Iop_Mul32
) {
1806 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1807 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1808 HReg dst
= newVRegI(env
);
1809 addInstr(env
, ARM64Instr_Mul(dst
, argL
, argR
, ARM64mul_PLAIN
));
1814 if (e
->Iex
.Binop
.op
== Iop_MullU32
|| e
->Iex
.Binop
.op
== Iop_MullS32
) {
1815 Bool isS
= e
->Iex
.Binop
.op
== Iop_MullS32
;
1816 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1817 HReg extL
= (isS
? widen_s_32_to_64
: widen_z_32_to_64
)(env
, argL
);
1818 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1819 HReg extR
= (isS
? widen_s_32_to_64
: widen_z_32_to_64
)(env
, argR
);
1820 HReg dst
= newVRegI(env
);
1821 addInstr(env
, ARM64Instr_Mul(dst
, extL
, extR
, ARM64mul_PLAIN
));
1825 /* Handle misc other ops. */
1827 if (e
->Iex
.Binop
.op
== Iop_Max32U
) {
1828 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1829 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1830 HReg dst
= newVRegI(env
);
1831 addInstr(env
, ARM64Instr_Cmp(argL
, ARM64RIA_R(argR
), False
/*!is64*/));
1832 addInstr(env
, ARM64Instr_CSel(dst
, argL
, argR
, ARM64cc_CS
));
1836 if (e
->Iex
.Binop
.op
== Iop_32HLto64
) {
1837 HReg hi32s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1838 HReg lo32s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1839 HReg lo32
= widen_z_32_to_64(env
, lo32s
);
1840 HReg hi32
= newVRegI(env
);
1841 addInstr(env
, ARM64Instr_Shift(hi32
, hi32s
, ARM64RI6_I6(32),
1843 addInstr(env
, ARM64Instr_Logic(hi32
, hi32
, ARM64RIL_R(lo32
),
1848 if (e
->Iex
.Binop
.op
== Iop_CmpF64
|| e
->Iex
.Binop
.op
== Iop_CmpF32
||
1849 e
->Iex
.Binop
.op
== Iop_CmpF16
) {
1850 HReg (*iselExpr
)(ISelEnv
*, IRExpr
*) = NULL
;
1851 ARM64Instr
* (*VCmp
)(HReg
, HReg
) = NULL
;
1852 if (e
->Iex
.Binop
.op
== Iop_CmpF64
) {
1853 iselExpr
= &iselDblExpr
;
1854 VCmp
= &ARM64Instr_VCmpD
;
1856 else if (e
->Iex
.Binop
.op
== Iop_CmpF32
) {
1857 iselExpr
= &iselFltExpr
;
1858 VCmp
= &ARM64Instr_VCmpS
;
1861 iselExpr
= &iselF16Expr
;
1862 VCmp
= &ARM64Instr_VCmpH
;
1864 HReg dL
= (iselExpr
)(env
, e
->Iex
.Binop
.arg1
);
1865 HReg dR
= (iselExpr
)(env
, e
->Iex
.Binop
.arg2
);
1866 HReg dst
= newVRegI(env
);
1867 HReg imm
= newVRegI(env
);
1868 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1869 create in dst, the IRCmpF64Result encoded result. */
1870 addInstr(env
, (VCmp
)(dL
, dR
));
1871 addInstr(env
, ARM64Instr_Imm64(dst
, 0));
1872 addInstr(env
, ARM64Instr_Imm64(imm
, 0x40)); // 0x40 = Ircr_EQ
1873 addInstr(env
, ARM64Instr_CSel(dst
, imm
, dst
, ARM64cc_EQ
));
1874 addInstr(env
, ARM64Instr_Imm64(imm
, 0x01)); // 0x01 = Ircr_LT
1875 addInstr(env
, ARM64Instr_CSel(dst
, imm
, dst
, ARM64cc_MI
));
1876 addInstr(env
, ARM64Instr_Imm64(imm
, 0x00)); // 0x00 = Ircr_GT
1877 addInstr(env
, ARM64Instr_CSel(dst
, imm
, dst
, ARM64cc_GT
));
1878 addInstr(env
, ARM64Instr_Imm64(imm
, 0x45)); // 0x45 = Ircr_UN
1879 addInstr(env
, ARM64Instr_CSel(dst
, imm
, dst
, ARM64cc_VS
));
1884 ARM64CvtOp cvt_op
= ARM64cvt_INVALID
;
1885 Bool srcIsD
= False
;
1886 switch (e
->Iex
.Binop
.op
) {
1888 cvt_op
= ARM64cvt_F64_I64S
; srcIsD
= True
; break;
1890 cvt_op
= ARM64cvt_F64_I64U
; srcIsD
= True
; break;
1892 cvt_op
= ARM64cvt_F64_I32S
; srcIsD
= True
; break;
1894 cvt_op
= ARM64cvt_F64_I32U
; srcIsD
= True
; break;
1896 cvt_op
= ARM64cvt_F32_I32S
; srcIsD
= False
; break;
1898 cvt_op
= ARM64cvt_F32_I32U
; srcIsD
= False
; break;
1900 cvt_op
= ARM64cvt_F32_I64S
; srcIsD
= False
; break;
1902 cvt_op
= ARM64cvt_F32_I64U
; srcIsD
= False
; break;
1906 if (cvt_op
!= ARM64cvt_INVALID
) {
1907 /* This is all a bit dodgy, because we can't handle a
1908 non-constant (not-known-at-JIT-time) rounding mode
1909 indication. That's because there's no instruction
1910 AFAICS that does this conversion but rounds according to
1911 FPCR.RM, so we have to bake the rounding mode into the
1912 instruction right now. But that should be OK because
1913 (1) the front end attaches a literal Irrm_ value to the
1914 conversion binop, and (2) iropt will never float that
1915 off via CSE, into a literal. Hence we should always
1916 have an Irrm_ value as the first arg. */
1917 IRExpr
* arg1
= e
->Iex
.Binop
.arg1
;
1918 if (arg1
->tag
!= Iex_Const
) goto irreducible
;
1919 IRConst
* arg1con
= arg1
->Iex
.Const
.con
;
1920 vassert(arg1con
->tag
== Ico_U32
); // else ill-typed IR
1921 UInt irrm
= arg1con
->Ico
.U32
;
1922 /* Find the ARM-encoded equivalent for |irrm|. */
1923 UInt armrm
= 4; /* impossible */
1924 Bool tiesToAway
= False
;
1926 case Irrm_NEAREST
: armrm
= 0; break;
1927 case Irrm_NegINF
: armrm
= 2; break;
1928 case Irrm_PosINF
: armrm
= 1; break;
1929 case Irrm_ZERO
: armrm
= 3; break;
1930 case Irrm_NEAREST_TIE_AWAY_0
: armrm
= 0; tiesToAway
= True
; break;
1931 default: goto irreducible
;
1933 HReg src
= (srcIsD
? iselDblExpr
: iselFltExpr
)
1934 (env
, e
->Iex
.Binop
.arg2
);
1935 HReg dst
= newVRegI(env
);
1936 addInstr(env
, ARM64Instr_VCvtF2I(cvt_op
, dst
, src
, armrm
, tiesToAway
));
1941 /* All cases involving host-side helper calls. */
1943 switch (e
->Iex
.Binop
.op
) {
1945 fn
= &h_calc_udiv32_w_arm_semantics
; break;
1947 fn
= &h_calc_sdiv32_w_arm_semantics
; break;
1949 fn
= &h_calc_udiv64_w_arm_semantics
; break;
1951 fn
= &h_calc_sdiv64_w_arm_semantics
; break;
1957 HReg regL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1958 HReg regR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1959 HReg res
= newVRegI(env
);
1960 addInstr(env
, ARM64Instr_MovI(hregARM64_X0(), regL
));
1961 addInstr(env
, ARM64Instr_MovI(hregARM64_X1(), regR
));
1962 addInstr(env
, ARM64Instr_Call( ARM64cc_AL
, (Addr
)fn
,
1963 2, mk_RetLoc_simple(RLPri_Int
) ));
1964 addInstr(env
, ARM64Instr_MovI(res
, hregARM64_X0()));
1971 /* --------- UNARY OP --------- */
1974 switch (e
->Iex
.Unop
.op
) {
1976 /* This probably doesn't occur often enough to be worth
1977 rolling the extension into the load. */
1978 IRExpr
* arg
= e
->Iex
.Unop
.arg
;
1979 HReg src
= iselIntExpr_R(env
, arg
);
1980 HReg dst
= widen_z_16_to_64(env
, src
);
1984 IRExpr
* arg
= e
->Iex
.Unop
.arg
;
1985 if (arg
->tag
== Iex_Load
) {
1986 /* This correctly zero extends because _LdSt32 is
1987 defined to do a zero extending load. */
1988 HReg dst
= newVRegI(env
);
1990 = iselIntExpr_AMode(env
, arg
->Iex
.Load
.addr
, Ity_I32
);
1991 addInstr(env
, ARM64Instr_LdSt32(True
/*isLoad*/, dst
, am
));
1994 /* else be lame and mask it */
1995 HReg src
= iselIntExpr_R(env
, arg
);
1996 HReg dst
= widen_z_32_to_64(env
, src
);
1999 case Iop_8Uto32
: /* Just freeload on the 8Uto64 case */
2001 IRExpr
* arg
= e
->Iex
.Unop
.arg
;
2002 if (arg
->tag
== Iex_Load
) {
2003 /* This correctly zero extends because _LdSt8 is
2004 defined to do a zero extending load. */
2005 HReg dst
= newVRegI(env
);
2007 = iselIntExpr_AMode(env
, arg
->Iex
.Load
.addr
, Ity_I8
);
2008 addInstr(env
, ARM64Instr_LdSt8(True
/*isLoad*/, dst
, am
));
2011 /* else be lame and mask it */
2012 HReg src
= iselIntExpr_R(env
, arg
);
2013 HReg dst
= widen_z_8_to_64(env
, src
);
2016 case Iop_128HIto64
: {
2018 iselInt128Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
2019 return rHi
; /* and abandon rLo */
2023 iselInt128Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
2024 return rLo
; /* and abandon rHi */
2026 case Iop_8Sto32
: case Iop_8Sto64
: {
2027 IRExpr
* arg
= e
->Iex
.Unop
.arg
;
2028 HReg src
= iselIntExpr_R(env
, arg
);
2029 HReg dst
= widen_s_8_to_64(env
, src
);
2032 case Iop_16Sto32
: case Iop_16Sto64
: {
2033 IRExpr
* arg
= e
->Iex
.Unop
.arg
;
2034 HReg src
= iselIntExpr_R(env
, arg
);
2035 HReg dst
= widen_s_16_to_64(env
, src
);
2039 IRExpr
* arg
= e
->Iex
.Unop
.arg
;
2040 HReg src
= iselIntExpr_R(env
, arg
);
2041 HReg dst
= widen_s_32_to_64(env
, src
);
2046 HReg dst
= newVRegI(env
);
2047 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2048 addInstr(env
, ARM64Instr_Unary(dst
, src
, ARM64un_NOT
));
2052 HReg dst
= newVRegI(env
);
2053 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2054 addInstr(env
, ARM64Instr_Unary(dst
, src
, ARM64un_CLZ
));
2059 /* Left64(src) = src | -src. Left32 can use the same
2060 implementation since in that case we don't care what
2061 the upper 32 bits become. */
2062 HReg dst
= newVRegI(env
);
2063 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2064 addInstr(env
, ARM64Instr_Unary(dst
, src
, ARM64un_NEG
));
2065 addInstr(env
, ARM64Instr_Logic(dst
, dst
, ARM64RIL_R(src
),
2069 case Iop_CmpwNEZ64
: {
2070 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2071 = Left64(src) >>s 63 */
2072 HReg dst
= newVRegI(env
);
2073 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2074 addInstr(env
, ARM64Instr_Unary(dst
, src
, ARM64un_NEG
));
2075 addInstr(env
, ARM64Instr_Logic(dst
, dst
, ARM64RIL_R(src
),
2077 addInstr(env
, ARM64Instr_Shift(dst
, dst
, ARM64RI6_I6(63),
2081 case Iop_CmpwNEZ32
: {
2082 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2083 = Left64(src & 0xFFFFFFFF) >>s 63 */
2084 HReg dst
= newVRegI(env
);
2085 HReg pre
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2086 HReg src
= widen_z_32_to_64(env
, pre
);
2087 addInstr(env
, ARM64Instr_Unary(dst
, src
, ARM64un_NEG
));
2088 addInstr(env
, ARM64Instr_Logic(dst
, dst
, ARM64RIL_R(src
),
2090 addInstr(env
, ARM64Instr_Shift(dst
, dst
, ARM64RI6_I6(63),
2094 case Iop_V128to64
: case Iop_V128HIto64
: {
2095 HReg dst
= newVRegI(env
);
2096 HReg src
= iselV128Expr(env
, e
->Iex
.Unop
.arg
);
2097 UInt laneNo
= (e
->Iex
.Unop
.op
== Iop_V128HIto64
) ? 1 : 0;
2098 addInstr(env
, ARM64Instr_VXfromQ(dst
, src
, laneNo
));
2101 case Iop_ReinterpF64asI64
: {
2102 HReg dst
= newVRegI(env
);
2103 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
2104 addInstr(env
, ARM64Instr_VXfromDorS(dst
, src
, True
/*fromD*/));
2107 case Iop_ReinterpF32asI32
: {
2108 HReg dst
= newVRegI(env
);
2109 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
2110 addInstr(env
, ARM64Instr_VXfromDorS(dst
, src
, False
/*!fromD*/));
2116 /* As with the iselStmt case for 'tmp:I1 = expr', we could
2117 do a lot better here if it ever became necessary. (CSDEC?) */
2118 HReg zero
= hregARM64_XZR_XSP(); // XZR in this context
2119 HReg one
= newVRegI(env
);
2120 HReg dst
= newVRegI(env
);
2121 addInstr(env
, ARM64Instr_Imm64(one
, 1));
2122 ARM64CondCode cc
= iselCondCode_C(env
, e
->Iex
.Unop
.arg
);
2123 addInstr(env
, ARM64Instr_CSel(dst
, one
, zero
, cc
));
2124 addInstr(env
, ARM64Instr_Shift(dst
, dst
, ARM64RI6_I6(63),
2126 addInstr(env
, ARM64Instr_Shift(dst
, dst
, ARM64RI6_I6(63),
2130 case Iop_NarrowUn16to8x8
:
2131 case Iop_NarrowUn32to16x4
:
2132 case Iop_NarrowUn64to32x2
:
2133 case Iop_QNarrowUn16Sto8Sx8
:
2134 case Iop_QNarrowUn32Sto16Sx4
:
2135 case Iop_QNarrowUn64Sto32Sx2
:
2136 case Iop_QNarrowUn16Uto8Ux8
:
2137 case Iop_QNarrowUn32Uto16Ux4
:
2138 case Iop_QNarrowUn64Uto32Ux2
:
2139 case Iop_QNarrowUn16Sto8Ux8
:
2140 case Iop_QNarrowUn32Sto16Ux4
:
2141 case Iop_QNarrowUn64Sto32Ux2
:
2143 HReg src
= iselV128Expr(env
, e
->Iex
.Unop
.arg
);
2144 HReg tmp
= newVRegV(env
);
2145 HReg dst
= newVRegI(env
);
2146 UInt dszBlg2
= 3; /* illegal */
2147 ARM64VecNarrowOp op
= ARM64vecna_INVALID
;
2148 switch (e
->Iex
.Unop
.op
) {
2149 case Iop_NarrowUn16to8x8
:
2150 dszBlg2
= 0; op
= ARM64vecna_XTN
; break;
2151 case Iop_NarrowUn32to16x4
:
2152 dszBlg2
= 1; op
= ARM64vecna_XTN
; break;
2153 case Iop_NarrowUn64to32x2
:
2154 dszBlg2
= 2; op
= ARM64vecna_XTN
; break;
2155 case Iop_QNarrowUn16Sto8Sx8
:
2156 dszBlg2
= 0; op
= ARM64vecna_SQXTN
; break;
2157 case Iop_QNarrowUn32Sto16Sx4
:
2158 dszBlg2
= 1; op
= ARM64vecna_SQXTN
; break;
2159 case Iop_QNarrowUn64Sto32Sx2
:
2160 dszBlg2
= 2; op
= ARM64vecna_SQXTN
; break;
2161 case Iop_QNarrowUn16Uto8Ux8
:
2162 dszBlg2
= 0; op
= ARM64vecna_UQXTN
; break;
2163 case Iop_QNarrowUn32Uto16Ux4
:
2164 dszBlg2
= 1; op
= ARM64vecna_UQXTN
; break;
2165 case Iop_QNarrowUn64Uto32Ux2
:
2166 dszBlg2
= 2; op
= ARM64vecna_UQXTN
; break;
2167 case Iop_QNarrowUn16Sto8Ux8
:
2168 dszBlg2
= 0; op
= ARM64vecna_SQXTUN
; break;
2169 case Iop_QNarrowUn32Sto16Ux4
:
2170 dszBlg2
= 1; op
= ARM64vecna_SQXTUN
; break;
2171 case Iop_QNarrowUn64Sto32Ux2
:
2172 dszBlg2
= 2; op
= ARM64vecna_SQXTUN
; break;
2176 addInstr(env
, ARM64Instr_VNarrowV(op
, dszBlg2
, tmp
, src
));
2177 addInstr(env
, ARM64Instr_VXfromQ(dst
, tmp
, 0/*laneNo*/));
2182 HReg dst
= newVRegI(env
);
2183 if (e
->Iex
.Unop
.arg
->tag
== Iex_RdTmp
) {
2184 ARM64RIL
* one
= mb_mkARM64RIL_I(1);
2185 HReg src
= lookupIRTemp(env
, e
->Iex
.Unop
.arg
->Iex
.RdTmp
.tmp
);
2187 addInstr(env
, ARM64Instr_Logic(dst
, src
, one
, ARM64lo_AND
));
2190 HReg zero
= hregARM64_XZR_XSP(); // XZR in this context
2191 HReg one
= newVRegI(env
);
2192 addInstr(env
, ARM64Instr_Imm64(one
, 1));
2193 ARM64CondCode cc
= iselCondCode_C(env
, e
->Iex
.Unop
.arg
);
2194 addInstr(env
, ARM64Instr_CSel(dst
, one
, zero
, cc
));
2198 case Iop_64HIto32
: {
2199 HReg dst
= newVRegI(env
);
2200 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2201 addInstr(env
, ARM64Instr_Shift(dst
, src
, ARM64RI6_I6(32),
2209 /* These are no-ops. */
2210 return iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2218 /* --------- GET --------- */
2221 && 0 == (e
->Iex
.Get
.offset
& 7) && e
->Iex
.Get
.offset
< (8<<12)-8) {
2222 HReg dst
= newVRegI(env
);
2224 = mk_baseblock_64bit_access_amode(e
->Iex
.Get
.offset
);
2225 addInstr(env
, ARM64Instr_LdSt64(True
/*isLoad*/, dst
, am
));
2229 && 0 == (e
->Iex
.Get
.offset
& 3) && e
->Iex
.Get
.offset
< (4<<12)-4) {
2230 HReg dst
= newVRegI(env
);
2232 = mk_baseblock_32bit_access_amode(e
->Iex
.Get
.offset
);
2233 addInstr(env
, ARM64Instr_LdSt32(True
/*isLoad*/, dst
, am
));
2237 && 0 == (e
->Iex
.Get
.offset
& 1) && e
->Iex
.Get
.offset
< (2<<12)-2) {
2238 HReg dst
= newVRegI(env
);
2240 = mk_baseblock_16bit_access_amode(e
->Iex
.Get
.offset
);
2241 addInstr(env
, ARM64Instr_LdSt16(True
/*isLoad*/, dst
, am
));
2245 /* && no alignment check */ && e
->Iex
.Get
.offset
< (1<<12)-1) {
2246 HReg dst
= newVRegI(env
);
2248 = mk_baseblock_8bit_access_amode(e
->Iex
.Get
.offset
);
2249 addInstr(env
, ARM64Instr_LdSt8(True
/*isLoad*/, dst
, am
));
2255 /* --------- CCALL --------- */
2257 HReg dst
= newVRegI(env
);
2258 vassert(ty
== e
->Iex
.CCall
.retty
);
2260 /* be very restrictive for now. Only 64-bit ints allowed for
2261 args, and 64 bits for return type. Don't forget to change
2262 the RetLoc if more types are allowed in future. */
2263 if (e
->Iex
.CCall
.retty
!= Ity_I64
)
2266 /* Marshal args, do the call, clear stack. */
2268 RetLoc rloc
= mk_RetLoc_INVALID();
2269 Bool ok
= doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
2270 e
->Iex
.CCall
.cee
, e
->Iex
.CCall
.retty
,
2271 e
->Iex
.CCall
.args
);
2274 vassert(is_sane_RetLoc(rloc
));
2275 vassert(rloc
.pri
== RLPri_Int
);
2276 vassert(addToSp
== 0);
2277 addInstr(env
, ARM64Instr_MovI(dst
, hregARM64_X0()));
2283 /* --------- LITERAL --------- */
2284 /* 64-bit literals */
2287 HReg dst
= newVRegI(env
);
2288 switch (e
->Iex
.Const
.con
->tag
) {
2289 case Ico_U64
: u
= e
->Iex
.Const
.con
->Ico
.U64
; break;
2290 case Ico_U32
: u
= e
->Iex
.Const
.con
->Ico
.U32
; break;
2291 case Ico_U16
: u
= e
->Iex
.Const
.con
->Ico
.U16
; break;
2292 case Ico_U8
: u
= e
->Iex
.Const
.con
->Ico
.U8
; break;
2293 default: ppIRExpr(e
); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2295 addInstr(env
, ARM64Instr_Imm64(dst
, u
));
2299 /* --------- MULTIPLEX --------- */
2301 /* ITE(ccexpr, iftrue, iffalse) */
2302 if (ty
== Ity_I64
|| ty
== Ity_I32
) {
2304 HReg r1
= iselIntExpr_R(env
, e
->Iex
.ITE
.iftrue
);
2305 HReg r0
= iselIntExpr_R(env
, e
->Iex
.ITE
.iffalse
);
2306 HReg dst
= newVRegI(env
);
2307 cc
= iselCondCode_C(env
, e
->Iex
.ITE
.cond
);
2308 addInstr(env
, ARM64Instr_CSel(dst
, r1
, r0
, cc
));
2316 } /* switch (e->tag) */
2318 /* We get here if no pattern matched. */
2321 vpanic("iselIntExpr_R: cannot reduce tree");
2325 /*---------------------------------------------------------*/
2326 /*--- ISEL: Integer expressions (128 bit) ---*/
2327 /*---------------------------------------------------------*/
2329 /* Compute a 128-bit value into a register pair, which is returned as
2330 the first two parameters. As with iselIntExpr_R, these may be
2331 either real or virtual regs; in any case they must not be changed
2332 by subsequent code emitted by the caller. */
2334 static void iselInt128Expr ( HReg
* rHi
, HReg
* rLo
,
2335 ISelEnv
* env
, IRExpr
* e
)
2337 iselInt128Expr_wrk(rHi
, rLo
, env
, e
);
2339 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
2341 vassert(hregClass(*rHi
) == HRcInt64
);
2342 vassert(hregIsVirtual(*rHi
));
2343 vassert(hregClass(*rLo
) == HRcInt64
);
2344 vassert(hregIsVirtual(*rLo
));
2347 /* DO NOT CALL THIS DIRECTLY ! */
2348 static void iselInt128Expr_wrk ( HReg
* rHi
, HReg
* rLo
,
2349 ISelEnv
* env
, IRExpr
* e
)
2352 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I128
);
2354 /* --------- TEMP --------- */
2355 if (e
->tag
== Iex_RdTmp
) {
2356 lookupIRTempPair(rHi
, rLo
, env
, e
->Iex
.RdTmp
.tmp
);
2360 /* --------- CONST --------- */
2361 if (e
->tag
== Iex_Const
) {
2362 IRConst
* c
= e
->Iex
.Const
.con
;
2363 vassert(c
->tag
== Ico_U128
);
2364 if (c
->Ico
.U128
== 0) {
2365 // The only case we need to handle (so far)
2366 HReg zero
= newVRegI(env
);
2367 addInstr(env
, ARM64Instr_Imm64(zero
, 0));
2373 /* --------- UNARY ops --------- */
2374 if (e
->tag
== Iex_Unop
) {
2375 switch (e
->Iex
.Unop
.op
) {
2376 case Iop_ReinterpV128asI128
: {
2377 HReg dstHi
= newVRegI(env
);
2378 HReg dstLo
= newVRegI(env
);
2379 HReg src
= iselV128Expr(env
, e
->Iex
.Unop
.arg
);
2380 addInstr(env
, ARM64Instr_VXfromQ(dstHi
, src
, 1));
2381 addInstr(env
, ARM64Instr_VXfromQ(dstLo
, src
, 0));
2391 /* --------- BINARY ops --------- */
2392 if (e
->tag
== Iex_Binop
) {
2393 switch (e
->Iex
.Binop
.op
) {
2394 /* 64 x 64 -> 128 multiply */
2397 Bool syned
= toBool(e
->Iex
.Binop
.op
== Iop_MullS64
);
2398 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2399 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2400 HReg dstLo
= newVRegI(env
);
2401 HReg dstHi
= newVRegI(env
);
2402 addInstr(env
, ARM64Instr_Mul(dstLo
, argL
, argR
,
2404 addInstr(env
, ARM64Instr_Mul(dstHi
, argL
, argR
,
2405 syned
? ARM64mul_SX
: ARM64mul_ZX
));
2410 /* 64HLto128(e1,e2) */
2412 *rHi
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2413 *rLo
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2418 } /* if (e->tag == Iex_Binop) */
2421 vpanic("iselInt128Expr(arm64)");
2425 /*---------------------------------------------------------*/
2426 /*--- ISEL: Vector expressions (128 bit) ---*/
2427 /*---------------------------------------------------------*/
2429 static HReg
iselV128Expr ( ISelEnv
* env
, IRExpr
* e
)
2431 HReg r
= iselV128Expr_wrk( env
, e
);
2432 vassert(hregClass(r
) == HRcVec128
);
2433 vassert(hregIsVirtual(r
));
2437 /* DO NOT CALL THIS DIRECTLY */
2438 static HReg
iselV128Expr_wrk ( ISelEnv
* env
, IRExpr
* e
)
2440 IRType ty
= typeOfIRExpr(env
->type_env
, e
);
2442 vassert(ty
== Ity_V128
);
2444 if (e
->tag
== Iex_RdTmp
) {
2445 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
2448 if (e
->tag
== Iex_Const
) {
2449 /* Only a very limited range of constants is handled. */
2450 vassert(e
->Iex
.Const
.con
->tag
== Ico_V128
);
2451 UShort con
= e
->Iex
.Const
.con
->Ico
.V128
;
2452 HReg res
= newVRegV(env
);
2454 case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2455 addInstr(env
, ARM64Instr_VImmQ(res
, con
));
2458 addInstr(env
, ARM64Instr_VImmQ(res
, 0x000F));
2459 addInstr(env
, ARM64Instr_VExtV(res
, res
, res
, 12));
2462 addInstr(env
, ARM64Instr_VImmQ(res
, 0x000F));
2463 addInstr(env
, ARM64Instr_VExtV(res
, res
, res
, 8));
2466 addInstr(env
, ARM64Instr_VImmQ(res
, 0x00FF));
2467 addInstr(env
, ARM64Instr_VExtV(res
, res
, res
, 12));
2470 addInstr(env
, ARM64Instr_VImmQ(res
, 0x000F));
2471 addInstr(env
, ARM64Instr_VExtV(res
, res
, res
, 4));
2472 addInstr(env
, ARM64Instr_VUnaryV(ARM64vecu_NOT
, res
, res
));
2475 addInstr(env
, ARM64Instr_VImmQ(res
, 0x000F));
2476 addInstr(env
, ARM64Instr_VExtV(res
, res
, res
, 4));
2479 addInstr(env
, ARM64Instr_VImmQ(res
, 0x00FF));
2480 addInstr(env
, ARM64Instr_VExtV(res
, res
, res
, 8));
2489 if (e
->tag
== Iex_Load
) {
2490 HReg res
= newVRegV(env
);
2491 HReg rN
= iselIntExpr_R(env
, e
->Iex
.Load
.addr
);
2492 vassert(ty
== Ity_V128
);
2493 addInstr(env
, ARM64Instr_VLdStQ(True
/*isLoad*/, res
, rN
));
2497 if (e
->tag
== Iex_Get
) {
2498 UInt offs
= (UInt
)e
->Iex
.Get
.offset
;
2499 if (offs
< (1<<12)) {
2500 HReg addr
= mk_baseblock_128bit_access_addr(env
, offs
);
2501 HReg res
= newVRegV(env
);
2502 vassert(ty
== Ity_V128
);
2503 addInstr(env
, ARM64Instr_VLdStQ(True
/*isLoad*/, res
, addr
));
2509 if (e
->tag
== Iex_Unop
) {
2511 /* Iop_ZeroHIXXofV128 cases */
2513 switch (e
->Iex
.Unop
.op
) {
2514 case Iop_ZeroHI64ofV128
: imm16
= 0x00FF; break;
2515 case Iop_ZeroHI96ofV128
: imm16
= 0x000F; break;
2516 case Iop_ZeroHI112ofV128
: imm16
= 0x0003; break;
2517 case Iop_ZeroHI120ofV128
: imm16
= 0x0001; break;
2521 HReg src
= iselV128Expr(env
, e
->Iex
.Unop
.arg
);
2522 HReg imm
= newVRegV(env
);
2523 HReg res
= newVRegV(env
);
2524 addInstr(env
, ARM64Instr_VImmQ(imm
, imm16
));
2525 addInstr(env
, ARM64Instr_VBinV(ARM64vecb_AND
, res
, src
, imm
));
2530 switch (e
->Iex
.Unop
.op
) {
2532 case Iop_Abs64Fx2
: case Iop_Abs32Fx4
: case Iop_Abs16Fx8
:
2533 case Iop_Neg64Fx2
: case Iop_Neg32Fx4
: case Iop_Neg16Fx8
:
2534 case Iop_Abs64x2
: case Iop_Abs32x4
:
2535 case Iop_Abs16x8
: case Iop_Abs8x16
:
2536 case Iop_Cls32x4
: case Iop_Cls16x8
: case Iop_Cls8x16
:
2537 case Iop_Clz32x4
: case Iop_Clz16x8
: case Iop_Clz8x16
:
2539 case Iop_Reverse1sIn8_x16
:
2540 case Iop_Reverse8sIn16_x8
:
2541 case Iop_Reverse8sIn32_x4
: case Iop_Reverse16sIn32_x4
:
2542 case Iop_Reverse8sIn64_x2
: case Iop_Reverse16sIn64_x2
:
2543 case Iop_Reverse32sIn64_x2
:
2544 case Iop_RecipEst32Ux4
:
2545 case Iop_RSqrtEst32Ux4
:
2546 case Iop_RecipEst64Fx2
: case Iop_RecipEst32Fx4
:
2547 case Iop_RSqrtEst64Fx2
: case Iop_RSqrtEst32Fx4
:
2549 HReg res
= newVRegV(env
);
2550 HReg arg
= iselV128Expr(env
, e
->Iex
.Unop
.arg
);
2552 ARM64VecUnaryOp op
= ARM64vecu_INVALID
;
2553 switch (e
->Iex
.Unop
.op
) {
2554 case Iop_NotV128
: op
= ARM64vecu_NOT
; break;
2555 case Iop_Abs64Fx2
: op
= ARM64vecu_FABS64x2
; break;
2556 case Iop_Abs32Fx4
: op
= ARM64vecu_FABS32x4
; break;
2557 case Iop_Abs16Fx8
: op
= ARM64vecu_FABS16x8
; break;
2558 case Iop_Neg64Fx2
: op
= ARM64vecu_FNEG64x2
; break;
2559 case Iop_Neg32Fx4
: op
= ARM64vecu_FNEG32x4
; break;
2560 case Iop_Neg16Fx8
: op
= ARM64vecu_FNEG16x8
; break;
2561 case Iop_Abs64x2
: op
= ARM64vecu_ABS64x2
; break;
2562 case Iop_Abs32x4
: op
= ARM64vecu_ABS32x4
; break;
2563 case Iop_Abs16x8
: op
= ARM64vecu_ABS16x8
; break;
2564 case Iop_Abs8x16
: op
= ARM64vecu_ABS8x16
; break;
2565 case Iop_Cls32x4
: op
= ARM64vecu_CLS32x4
; break;
2566 case Iop_Cls16x8
: op
= ARM64vecu_CLS16x8
; break;
2567 case Iop_Cls8x16
: op
= ARM64vecu_CLS8x16
; break;
2568 case Iop_Clz32x4
: op
= ARM64vecu_CLZ32x4
; break;
2569 case Iop_Clz16x8
: op
= ARM64vecu_CLZ16x8
; break;
2570 case Iop_Clz8x16
: op
= ARM64vecu_CLZ8x16
; break;
2571 case Iop_Cnt8x16
: op
= ARM64vecu_CNT8x16
; break;
2572 case Iop_Reverse1sIn8_x16
: op
= ARM64vecu_RBIT
; break;
2573 case Iop_Reverse8sIn16_x8
: op
= ARM64vecu_REV1616B
; break;
2574 case Iop_Reverse8sIn32_x4
: op
= ARM64vecu_REV3216B
; break;
2575 case Iop_Reverse16sIn32_x4
: op
= ARM64vecu_REV328H
; break;
2576 case Iop_Reverse8sIn64_x2
: op
= ARM64vecu_REV6416B
; break;
2577 case Iop_Reverse16sIn64_x2
: op
= ARM64vecu_REV648H
; break;
2578 case Iop_Reverse32sIn64_x2
: op
= ARM64vecu_REV644S
; break;
2579 case Iop_RecipEst32Ux4
: op
= ARM64vecu_URECPE32x4
; break;
2580 case Iop_RSqrtEst32Ux4
: op
= ARM64vecu_URSQRTE32x4
; break;
2581 case Iop_RecipEst64Fx2
: setRM
= True
;
2582 op
= ARM64vecu_FRECPE64x2
; break;
2583 case Iop_RecipEst32Fx4
: setRM
= True
;
2584 op
= ARM64vecu_FRECPE32x4
; break;
2585 case Iop_RSqrtEst64Fx2
: setRM
= True
;
2586 op
= ARM64vecu_FRSQRTE64x2
; break;
2587 case Iop_RSqrtEst32Fx4
: setRM
= True
;
2588 op
= ARM64vecu_FRSQRTE32x4
; break;
2589 default: vassert(0);
2592 // This is a bit of a kludge. We should do rm properly for
2593 // these recip-est insns, but that would require changing the
2594 // primop's type to take an rmode.
2595 set_FPCR_rounding_mode(env
, IRExpr_Const(
2596 IRConst_U32(Irrm_NEAREST
)));
2598 addInstr(env
, ARM64Instr_VUnaryV(op
, res
, arg
));
2601 case Iop_CmpNEZ8x16
:
2602 case Iop_CmpNEZ16x8
:
2603 case Iop_CmpNEZ32x4
:
2604 case Iop_CmpNEZ64x2
: {
2605 HReg arg
= iselV128Expr(env
, e
->Iex
.Unop
.arg
);
2606 HReg zero
= newVRegV(env
);
2607 HReg res
= newVRegV(env
);
2608 ARM64VecBinOp cmp
= ARM64vecb_INVALID
;
2609 switch (e
->Iex
.Unop
.op
) {
2610 case Iop_CmpNEZ64x2
: cmp
= ARM64vecb_CMEQ64x2
; break;
2611 case Iop_CmpNEZ32x4
: cmp
= ARM64vecb_CMEQ32x4
; break;
2612 case Iop_CmpNEZ16x8
: cmp
= ARM64vecb_CMEQ16x8
; break;
2613 case Iop_CmpNEZ8x16
: cmp
= ARM64vecb_CMEQ8x16
; break;
2614 default: vassert(0);
2616 // This is pretty feeble. Better: use CMP against zero
2617 // and avoid the extra instruction and extra register.
2618 addInstr(env
, ARM64Instr_VImmQ(zero
, 0x0000));
2619 addInstr(env
, ARM64Instr_VBinV(cmp
, res
, arg
, zero
));
2620 addInstr(env
, ARM64Instr_VUnaryV(ARM64vecu_NOT
, res
, res
));
2623 case Iop_V256toV128_0
:
2624 case Iop_V256toV128_1
: {
2626 iselV256Expr(&vHi
, &vLo
, env
, e
->Iex
.Unop
.arg
);
2627 return (e
->Iex
.Unop
.op
== Iop_V256toV128_1
) ? vHi
: vLo
;
2629 case Iop_64UtoV128
: {
2630 HReg res
= newVRegV(env
);
2631 HReg arg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2632 addInstr(env
, ARM64Instr_VQfromX(res
, arg
));
2635 case Iop_Widen8Sto16x8
: {
2636 HReg res
= newVRegV(env
);
2637 HReg arg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2638 addInstr(env
, ARM64Instr_VQfromX(res
, arg
));
2639 addInstr(env
, ARM64Instr_VBinV(ARM64vecb_ZIP18x16
, res
, res
, res
));
2640 addInstr(env
, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8
,
2644 case Iop_Widen16Sto32x4
: {
2645 HReg res
= newVRegV(env
);
2646 HReg arg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2647 addInstr(env
, ARM64Instr_VQfromX(res
, arg
));
2648 addInstr(env
, ARM64Instr_VBinV(ARM64vecb_ZIP116x8
, res
, res
, res
));
2649 addInstr(env
, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4
,
2653 case Iop_Widen32Sto64x2
: {
2654 HReg res
= newVRegV(env
);
2655 HReg arg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2656 addInstr(env
, ARM64Instr_VQfromX(res
, arg
));
2657 addInstr(env
, ARM64Instr_VBinV(ARM64vecb_ZIP132x4
, res
, res
, res
));
2658 addInstr(env
, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2
,
2665 } /* switch on the unop */
2666 } /* if (e->tag == Iex_Unop) */
2668 if (e
->tag
== Iex_Binop
) {
2669 switch (e
->Iex
.Binop
.op
) {
2672 case Iop_Sqrt64Fx2
: {
2673 HReg arg
= iselV128Expr(env
, e
->Iex
.Binop
.arg2
);
2674 HReg res
= newVRegV(env
);
2675 set_FPCR_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
2677 switch (e
->Iex
.Binop
.op
) {
2678 case Iop_Sqrt16Fx8
: op
= ARM64vecu_FSQRT16x8
; break;
2679 case Iop_Sqrt32Fx4
: op
= ARM64vecu_FSQRT32x4
; break;
2680 case Iop_Sqrt64Fx2
: op
= ARM64vecu_FSQRT64x2
; break;
2681 default: vassert(0);
2683 addInstr(env
, ARM64Instr_VUnaryV(op
, res
, arg
));
2686 case Iop_64HLtoV128
: {
2687 HReg res
= newVRegV(env
);
2688 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2689 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2690 addInstr(env
, ARM64Instr_VQfromXX(res
, argL
, argR
));
2693 /* -- Cases where we can generate a simple three-reg instruction. -- */
2697 case Iop_Max32Ux4
: case Iop_Max16Ux8
: case Iop_Max8Ux16
:
2698 case Iop_Min32Ux4
: case Iop_Min16Ux8
: case Iop_Min8Ux16
:
2699 case Iop_Max32Sx4
: case Iop_Max16Sx8
: case Iop_Max8Sx16
:
2700 case Iop_Min32Sx4
: case Iop_Min16Sx8
: case Iop_Min8Sx16
:
2701 case Iop_Add64x2
: case Iop_Add32x4
:
2702 case Iop_Add16x8
: case Iop_Add8x16
:
2703 case Iop_Sub64x2
: case Iop_Sub32x4
:
2704 case Iop_Sub16x8
: case Iop_Sub8x16
:
2705 case Iop_Mul32x4
: case Iop_Mul16x8
: case Iop_Mul8x16
:
2706 case Iop_CmpEQ64x2
: case Iop_CmpEQ32x4
:
2707 case Iop_CmpEQ16x8
: case Iop_CmpEQ8x16
:
2708 case Iop_CmpGT64Ux2
: case Iop_CmpGT32Ux4
:
2709 case Iop_CmpGT16Ux8
: case Iop_CmpGT8Ux16
:
2710 case Iop_CmpGT64Sx2
: case Iop_CmpGT32Sx4
:
2711 case Iop_CmpGT16Sx8
: case Iop_CmpGT8Sx16
:
2712 case Iop_CmpEQ64Fx2
: case Iop_CmpEQ32Fx4
:
2713 case Iop_CmpLE64Fx2
: case Iop_CmpLE32Fx4
:
2714 case Iop_CmpLT64Fx2
: case Iop_CmpLT32Fx4
:
2715 case Iop_CmpLT16Fx8
: case Iop_CmpLE16Fx8
: case Iop_CmpEQ16Fx8
:
2717 case Iop_InterleaveLO64x2
: case Iop_CatEvenLanes32x4
:
2718 case Iop_CatEvenLanes16x8
: case Iop_CatEvenLanes8x16
:
2719 case Iop_InterleaveHI64x2
: case Iop_CatOddLanes32x4
:
2720 case Iop_CatOddLanes16x8
: case Iop_CatOddLanes8x16
:
2721 case Iop_InterleaveHI32x4
:
2722 case Iop_InterleaveHI16x8
: case Iop_InterleaveHI8x16
:
2723 case Iop_InterleaveLO32x4
:
2724 case Iop_InterleaveLO16x8
: case Iop_InterleaveLO8x16
:
2725 case Iop_PolynomialMul8x16
:
2726 case Iop_QAdd64Sx2
: case Iop_QAdd32Sx4
:
2727 case Iop_QAdd16Sx8
: case Iop_QAdd8Sx16
:
2728 case Iop_QAdd64Ux2
: case Iop_QAdd32Ux4
:
2729 case Iop_QAdd16Ux8
: case Iop_QAdd8Ux16
:
2730 case Iop_QSub64Sx2
: case Iop_QSub32Sx4
:
2731 case Iop_QSub16Sx8
: case Iop_QSub8Sx16
:
2732 case Iop_QSub64Ux2
: case Iop_QSub32Ux4
:
2733 case Iop_QSub16Ux8
: case Iop_QSub8Ux16
:
2734 case Iop_QDMulHi32Sx4
: case Iop_QDMulHi16Sx8
:
2735 case Iop_QRDMulHi32Sx4
: case Iop_QRDMulHi16Sx8
:
2736 case Iop_Sh8Sx16
: case Iop_Sh16Sx8
:
2737 case Iop_Sh32Sx4
: case Iop_Sh64Sx2
:
2738 case Iop_Sh8Ux16
: case Iop_Sh16Ux8
:
2739 case Iop_Sh32Ux4
: case Iop_Sh64Ux2
:
2740 case Iop_Rsh8Sx16
: case Iop_Rsh16Sx8
:
2741 case Iop_Rsh32Sx4
: case Iop_Rsh64Sx2
:
2742 case Iop_Rsh8Ux16
: case Iop_Rsh16Ux8
:
2743 case Iop_Rsh32Ux4
: case Iop_Rsh64Ux2
:
2744 case Iop_Max64Fx2
: case Iop_Max32Fx4
:
2745 case Iop_Min64Fx2
: case Iop_Min32Fx4
:
2746 case Iop_RecipStep64Fx2
: case Iop_RecipStep32Fx4
:
2747 case Iop_RSqrtStep64Fx2
: case Iop_RSqrtStep32Fx4
:
2749 HReg res
= newVRegV(env
);
2750 HReg argL
= iselV128Expr(env
, e
->Iex
.Binop
.arg1
);
2751 HReg argR
= iselV128Expr(env
, e
->Iex
.Binop
.arg2
);
2754 ARM64VecBinOp op
= ARM64vecb_INVALID
;
2755 switch (e
->Iex
.Binop
.op
) {
2756 case Iop_AndV128
: op
= ARM64vecb_AND
; break;
2757 case Iop_OrV128
: op
= ARM64vecb_ORR
; break;
2758 case Iop_XorV128
: op
= ARM64vecb_XOR
; break;
2759 case Iop_Max32Ux4
: op
= ARM64vecb_UMAX32x4
; break;
2760 case Iop_Max16Ux8
: op
= ARM64vecb_UMAX16x8
; break;
2761 case Iop_Max8Ux16
: op
= ARM64vecb_UMAX8x16
; break;
2762 case Iop_Min32Ux4
: op
= ARM64vecb_UMIN32x4
; break;
2763 case Iop_Min16Ux8
: op
= ARM64vecb_UMIN16x8
; break;
2764 case Iop_Min8Ux16
: op
= ARM64vecb_UMIN8x16
; break;
2765 case Iop_Max32Sx4
: op
= ARM64vecb_SMAX32x4
; break;
2766 case Iop_Max16Sx8
: op
= ARM64vecb_SMAX16x8
; break;
2767 case Iop_Max8Sx16
: op
= ARM64vecb_SMAX8x16
; break;
2768 case Iop_Min32Sx4
: op
= ARM64vecb_SMIN32x4
; break;
2769 case Iop_Min16Sx8
: op
= ARM64vecb_SMIN16x8
; break;
2770 case Iop_Min8Sx16
: op
= ARM64vecb_SMIN8x16
; break;
2771 case Iop_Add64x2
: op
= ARM64vecb_ADD64x2
; break;
2772 case Iop_Add32x4
: op
= ARM64vecb_ADD32x4
; break;
2773 case Iop_Add16x8
: op
= ARM64vecb_ADD16x8
; break;
2774 case Iop_Add8x16
: op
= ARM64vecb_ADD8x16
; break;
2775 case Iop_Sub64x2
: op
= ARM64vecb_SUB64x2
; break;
2776 case Iop_Sub32x4
: op
= ARM64vecb_SUB32x4
; break;
2777 case Iop_Sub16x8
: op
= ARM64vecb_SUB16x8
; break;
2778 case Iop_Sub8x16
: op
= ARM64vecb_SUB8x16
; break;
2779 case Iop_Mul32x4
: op
= ARM64vecb_MUL32x4
; break;
2780 case Iop_Mul16x8
: op
= ARM64vecb_MUL16x8
; break;
2781 case Iop_Mul8x16
: op
= ARM64vecb_MUL8x16
; break;
2782 case Iop_CmpEQ64x2
: op
= ARM64vecb_CMEQ64x2
; break;
2783 case Iop_CmpEQ32x4
: op
= ARM64vecb_CMEQ32x4
; break;
2784 case Iop_CmpEQ16x8
: op
= ARM64vecb_CMEQ16x8
; break;
2785 case Iop_CmpEQ8x16
: op
= ARM64vecb_CMEQ8x16
; break;
2786 case Iop_CmpGT64Ux2
: op
= ARM64vecb_CMHI64x2
; break;
2787 case Iop_CmpGT32Ux4
: op
= ARM64vecb_CMHI32x4
; break;
2788 case Iop_CmpGT16Ux8
: op
= ARM64vecb_CMHI16x8
; break;
2789 case Iop_CmpGT8Ux16
: op
= ARM64vecb_CMHI8x16
; break;
2790 case Iop_CmpGT64Sx2
: op
= ARM64vecb_CMGT64x2
; break;
2791 case Iop_CmpGT32Sx4
: op
= ARM64vecb_CMGT32x4
; break;
2792 case Iop_CmpGT16Sx8
: op
= ARM64vecb_CMGT16x8
; break;
2793 case Iop_CmpGT8Sx16
: op
= ARM64vecb_CMGT8x16
; break;
2794 case Iop_CmpEQ64Fx2
: op
= ARM64vecb_FCMEQ64x2
; break;
2795 case Iop_CmpEQ32Fx4
: op
= ARM64vecb_FCMEQ32x4
; break;
2796 case Iop_CmpLE64Fx2
: op
= ARM64vecb_FCMGE64x2
; sw
= True
; break;
2797 case Iop_CmpLE32Fx4
: op
= ARM64vecb_FCMGE32x4
; sw
= True
; break;
2798 case Iop_CmpLE16Fx8
: op
= ARM64vecb_FCMGE16x8
; sw
= True
; break;
2799 case Iop_CmpLT64Fx2
: op
= ARM64vecb_FCMGT64x2
; sw
= True
; break;
2800 case Iop_CmpLT16Fx8
: op
= ARM64vecb_FCMGT16x8
; sw
= True
; break;
2801 case Iop_CmpEQ16Fx8
: op
= ARM64vecb_FCMEQ16x8
; sw
= True
; break;
2802 case Iop_CmpLT32Fx4
: op
= ARM64vecb_FCMGT32x4
; sw
= True
; break;
2803 case Iop_Perm8x16
: op
= ARM64vecb_TBL1
; break;
2804 case Iop_InterleaveLO64x2
: op
= ARM64vecb_UZP164x2
; sw
= True
;
2806 case Iop_CatEvenLanes32x4
: op
= ARM64vecb_UZP132x4
; sw
= True
;
2808 case Iop_CatEvenLanes16x8
: op
= ARM64vecb_UZP116x8
; sw
= True
;
2810 case Iop_CatEvenLanes8x16
: op
= ARM64vecb_UZP18x16
; sw
= True
;
2812 case Iop_InterleaveHI64x2
: op
= ARM64vecb_UZP264x2
; sw
= True
;
2814 case Iop_CatOddLanes32x4
: op
= ARM64vecb_UZP232x4
; sw
= True
;
2816 case Iop_CatOddLanes16x8
: op
= ARM64vecb_UZP216x8
; sw
= True
;
2818 case Iop_CatOddLanes8x16
: op
= ARM64vecb_UZP28x16
; sw
= True
;
2820 case Iop_InterleaveHI32x4
: op
= ARM64vecb_ZIP232x4
; sw
= True
;
2822 case Iop_InterleaveHI16x8
: op
= ARM64vecb_ZIP216x8
; sw
= True
;
2824 case Iop_InterleaveHI8x16
: op
= ARM64vecb_ZIP28x16
; sw
= True
;
2826 case Iop_InterleaveLO32x4
: op
= ARM64vecb_ZIP132x4
; sw
= True
;
2828 case Iop_InterleaveLO16x8
: op
= ARM64vecb_ZIP116x8
; sw
= True
;
2830 case Iop_InterleaveLO8x16
: op
= ARM64vecb_ZIP18x16
; sw
= True
;
2832 case Iop_PolynomialMul8x16
: op
= ARM64vecb_PMUL8x16
; break;
2833 case Iop_QAdd64Sx2
: op
= ARM64vecb_SQADD64x2
; break;
2834 case Iop_QAdd32Sx4
: op
= ARM64vecb_SQADD32x4
; break;
2835 case Iop_QAdd16Sx8
: op
= ARM64vecb_SQADD16x8
; break;
2836 case Iop_QAdd8Sx16
: op
= ARM64vecb_SQADD8x16
; break;
2837 case Iop_QAdd64Ux2
: op
= ARM64vecb_UQADD64x2
; break;
2838 case Iop_QAdd32Ux4
: op
= ARM64vecb_UQADD32x4
; break;
2839 case Iop_QAdd16Ux8
: op
= ARM64vecb_UQADD16x8
; break;
2840 case Iop_QAdd8Ux16
: op
= ARM64vecb_UQADD8x16
; break;
2841 case Iop_QSub64Sx2
: op
= ARM64vecb_SQSUB64x2
; break;
2842 case Iop_QSub32Sx4
: op
= ARM64vecb_SQSUB32x4
; break;
2843 case Iop_QSub16Sx8
: op
= ARM64vecb_SQSUB16x8
; break;
2844 case Iop_QSub8Sx16
: op
= ARM64vecb_SQSUB8x16
; break;
2845 case Iop_QSub64Ux2
: op
= ARM64vecb_UQSUB64x2
; break;
2846 case Iop_QSub32Ux4
: op
= ARM64vecb_UQSUB32x4
; break;
2847 case Iop_QSub16Ux8
: op
= ARM64vecb_UQSUB16x8
; break;
2848 case Iop_QSub8Ux16
: op
= ARM64vecb_UQSUB8x16
; break;
2849 case Iop_QDMulHi32Sx4
: op
= ARM64vecb_SQDMULH32x4
; break;
2850 case Iop_QDMulHi16Sx8
: op
= ARM64vecb_SQDMULH16x8
; break;
2851 case Iop_QRDMulHi32Sx4
: op
= ARM64vecb_SQRDMULH32x4
; break;
2852 case Iop_QRDMulHi16Sx8
: op
= ARM64vecb_SQRDMULH16x8
; break;
2853 case Iop_Sh8Sx16
: op
= ARM64vecb_SSHL8x16
; break;
2854 case Iop_Sh16Sx8
: op
= ARM64vecb_SSHL16x8
; break;
2855 case Iop_Sh32Sx4
: op
= ARM64vecb_SSHL32x4
; break;
2856 case Iop_Sh64Sx2
: op
= ARM64vecb_SSHL64x2
; break;
2857 case Iop_Sh8Ux16
: op
= ARM64vecb_USHL8x16
; break;
2858 case Iop_Sh16Ux8
: op
= ARM64vecb_USHL16x8
; break;
2859 case Iop_Sh32Ux4
: op
= ARM64vecb_USHL32x4
; break;
2860 case Iop_Sh64Ux2
: op
= ARM64vecb_USHL64x2
; break;
2861 case Iop_Rsh8Sx16
: op
= ARM64vecb_SRSHL8x16
; break;
2862 case Iop_Rsh16Sx8
: op
= ARM64vecb_SRSHL16x8
; break;
2863 case Iop_Rsh32Sx4
: op
= ARM64vecb_SRSHL32x4
; break;
2864 case Iop_Rsh64Sx2
: op
= ARM64vecb_SRSHL64x2
; break;
2865 case Iop_Rsh8Ux16
: op
= ARM64vecb_URSHL8x16
; break;
2866 case Iop_Rsh16Ux8
: op
= ARM64vecb_URSHL16x8
; break;
2867 case Iop_Rsh32Ux4
: op
= ARM64vecb_URSHL32x4
; break;
2868 case Iop_Rsh64Ux2
: op
= ARM64vecb_URSHL64x2
; break;
2869 case Iop_Max64Fx2
: op
= ARM64vecb_FMAX64x2
; break;
2870 case Iop_Max32Fx4
: op
= ARM64vecb_FMAX32x4
; break;
2871 case Iop_Min64Fx2
: op
= ARM64vecb_FMIN64x2
; break;
2872 case Iop_Min32Fx4
: op
= ARM64vecb_FMIN32x4
; break;
2873 case Iop_RecipStep64Fx2
: setRM
= True
;
2874 op
= ARM64vecb_FRECPS64x2
; break;
2875 case Iop_RecipStep32Fx4
: setRM
= True
;
2876 op
= ARM64vecb_FRECPS32x4
; break;
2877 case Iop_RSqrtStep64Fx2
: setRM
= True
;
2878 op
= ARM64vecb_FRSQRTS64x2
; break;
2879 case Iop_RSqrtStep32Fx4
: setRM
= True
;
2880 op
= ARM64vecb_FRSQRTS32x4
; break;
2881 default: vassert(0);
2884 // This is a bit of a kludge. We should do rm properly for
2885 // these recip-step insns, but that would require changing the
2886 // primop's type to take an rmode.
2887 set_FPCR_rounding_mode(env
, IRExpr_Const(
2888 IRConst_U32(Irrm_NEAREST
)));
2891 addInstr(env
, ARM64Instr_VBinV(op
, res
, argR
, argL
));
2893 addInstr(env
, ARM64Instr_VBinV(op
, res
, argL
, argR
));
2897 /* -- These only have 2 operand instructions, so we have to first move
2898 the first argument into a new register, for modification. -- */
2899 case Iop_QAddExtUSsatSS8x16
: case Iop_QAddExtUSsatSS16x8
:
2900 case Iop_QAddExtUSsatSS32x4
: case Iop_QAddExtUSsatSS64x2
:
2901 case Iop_QAddExtSUsatUU8x16
: case Iop_QAddExtSUsatUU16x8
:
2902 case Iop_QAddExtSUsatUU32x4
: case Iop_QAddExtSUsatUU64x2
:
2904 HReg res
= newVRegV(env
);
2905 HReg argL
= iselV128Expr(env
, e
->Iex
.Binop
.arg1
);
2906 HReg argR
= iselV128Expr(env
, e
->Iex
.Binop
.arg2
);
2907 ARM64VecModifyOp op
= ARM64vecmo_INVALID
;
2908 switch (e
->Iex
.Binop
.op
) {
2909 /* In the following 8 cases, the US - SU switching is intended.
2910 See comments on the libvex_ir.h for details. Also in the
2911 ARM64 front end, where used these primops are generated. */
2912 case Iop_QAddExtUSsatSS8x16
: op
= ARM64vecmo_SUQADD8x16
; break;
2913 case Iop_QAddExtUSsatSS16x8
: op
= ARM64vecmo_SUQADD16x8
; break;
2914 case Iop_QAddExtUSsatSS32x4
: op
= ARM64vecmo_SUQADD32x4
; break;
2915 case Iop_QAddExtUSsatSS64x2
: op
= ARM64vecmo_SUQADD64x2
; break;
2916 case Iop_QAddExtSUsatUU8x16
: op
= ARM64vecmo_USQADD8x16
; break;
2917 case Iop_QAddExtSUsatUU16x8
: op
= ARM64vecmo_USQADD16x8
; break;
2918 case Iop_QAddExtSUsatUU32x4
: op
= ARM64vecmo_USQADD32x4
; break;
2919 case Iop_QAddExtSUsatUU64x2
: op
= ARM64vecmo_USQADD64x2
; break;
2920 default: vassert(0);
2922 /* The order of the operands is important. Although this is
2923 basically addition, the two operands are extended differently,
2924 making it important to get them into the correct registers in
2926 addInstr(env
, ARM64Instr_VMov(16, res
, argR
));
2927 addInstr(env
, ARM64Instr_VModifyV(op
, res
, argL
));
2930 /* -- Shifts by an immediate. -- */
2931 case Iop_ShrN64x2
: case Iop_ShrN32x4
:
2932 case Iop_ShrN16x8
: case Iop_ShrN8x16
:
2933 case Iop_SarN64x2
: case Iop_SarN32x4
:
2934 case Iop_SarN16x8
: case Iop_SarN8x16
:
2935 case Iop_ShlN64x2
: case Iop_ShlN32x4
:
2936 case Iop_ShlN16x8
: case Iop_ShlN8x16
:
2937 case Iop_QShlNsatUU64x2
: case Iop_QShlNsatUU32x4
:
2938 case Iop_QShlNsatUU16x8
: case Iop_QShlNsatUU8x16
:
2939 case Iop_QShlNsatSS64x2
: case Iop_QShlNsatSS32x4
:
2940 case Iop_QShlNsatSS16x8
: case Iop_QShlNsatSS8x16
:
2941 case Iop_QShlNsatSU64x2
: case Iop_QShlNsatSU32x4
:
2942 case Iop_QShlNsatSU16x8
: case Iop_QShlNsatSU8x16
:
2944 IRExpr
* argL
= e
->Iex
.Binop
.arg1
;
2945 IRExpr
* argR
= e
->Iex
.Binop
.arg2
;
2946 if (argR
->tag
== Iex_Const
&& argR
->Iex
.Const
.con
->tag
== Ico_U8
) {
2947 UInt amt
= argR
->Iex
.Const
.con
->Ico
.U8
;
2950 ARM64VecShiftImmOp op
= ARM64vecshi_INVALID
;
2951 /* Establish the instruction to use. */
2952 switch (e
->Iex
.Binop
.op
) {
2953 case Iop_ShrN64x2
: op
= ARM64vecshi_USHR64x2
; break;
2954 case Iop_ShrN32x4
: op
= ARM64vecshi_USHR32x4
; break;
2955 case Iop_ShrN16x8
: op
= ARM64vecshi_USHR16x8
; break;
2956 case Iop_ShrN8x16
: op
= ARM64vecshi_USHR8x16
; break;
2957 case Iop_SarN64x2
: op
= ARM64vecshi_SSHR64x2
; break;
2958 case Iop_SarN32x4
: op
= ARM64vecshi_SSHR32x4
; break;
2959 case Iop_SarN16x8
: op
= ARM64vecshi_SSHR16x8
; break;
2960 case Iop_SarN8x16
: op
= ARM64vecshi_SSHR8x16
; break;
2961 case Iop_ShlN64x2
: op
= ARM64vecshi_SHL64x2
; break;
2962 case Iop_ShlN32x4
: op
= ARM64vecshi_SHL32x4
; break;
2963 case Iop_ShlN16x8
: op
= ARM64vecshi_SHL16x8
; break;
2964 case Iop_ShlN8x16
: op
= ARM64vecshi_SHL8x16
; break;
2965 case Iop_QShlNsatUU64x2
: op
= ARM64vecshi_UQSHL64x2
; break;
2966 case Iop_QShlNsatUU32x4
: op
= ARM64vecshi_UQSHL32x4
; break;
2967 case Iop_QShlNsatUU16x8
: op
= ARM64vecshi_UQSHL16x8
; break;
2968 case Iop_QShlNsatUU8x16
: op
= ARM64vecshi_UQSHL8x16
; break;
2969 case Iop_QShlNsatSS64x2
: op
= ARM64vecshi_SQSHL64x2
; break;
2970 case Iop_QShlNsatSS32x4
: op
= ARM64vecshi_SQSHL32x4
; break;
2971 case Iop_QShlNsatSS16x8
: op
= ARM64vecshi_SQSHL16x8
; break;
2972 case Iop_QShlNsatSS8x16
: op
= ARM64vecshi_SQSHL8x16
; break;
2973 case Iop_QShlNsatSU64x2
: op
= ARM64vecshi_SQSHLU64x2
; break;
2974 case Iop_QShlNsatSU32x4
: op
= ARM64vecshi_SQSHLU32x4
; break;
2975 case Iop_QShlNsatSU16x8
: op
= ARM64vecshi_SQSHLU16x8
; break;
2976 case Iop_QShlNsatSU8x16
: op
= ARM64vecshi_SQSHLU8x16
; break;
2977 default: vassert(0);
2979 /* Establish the shift limits, for sanity check purposes only. */
2980 switch (e
->Iex
.Binop
.op
) {
2981 case Iop_ShrN64x2
: limLo
= 1; limHi
= 64; break;
2982 case Iop_ShrN32x4
: limLo
= 1; limHi
= 32; break;
2983 case Iop_ShrN16x8
: limLo
= 1; limHi
= 16; break;
2984 case Iop_ShrN8x16
: limLo
= 1; limHi
= 8; break;
2985 case Iop_SarN64x2
: limLo
= 1; limHi
= 64; break;
2986 case Iop_SarN32x4
: limLo
= 1; limHi
= 32; break;
2987 case Iop_SarN16x8
: limLo
= 1; limHi
= 16; break;
2988 case Iop_SarN8x16
: limLo
= 1; limHi
= 8; break;
2989 case Iop_ShlN64x2
: limLo
= 0; limHi
= 63; break;
2990 case Iop_ShlN32x4
: limLo
= 0; limHi
= 31; break;
2991 case Iop_ShlN16x8
: limLo
= 0; limHi
= 15; break;
2992 case Iop_ShlN8x16
: limLo
= 0; limHi
= 7; break;
2993 case Iop_QShlNsatUU64x2
: limLo
= 0; limHi
= 63; break;
2994 case Iop_QShlNsatUU32x4
: limLo
= 0; limHi
= 31; break;
2995 case Iop_QShlNsatUU16x8
: limLo
= 0; limHi
= 15; break;
2996 case Iop_QShlNsatUU8x16
: limLo
= 0; limHi
= 7; break;
2997 case Iop_QShlNsatSS64x2
: limLo
= 0; limHi
= 63; break;
2998 case Iop_QShlNsatSS32x4
: limLo
= 0; limHi
= 31; break;
2999 case Iop_QShlNsatSS16x8
: limLo
= 0; limHi
= 15; break;
3000 case Iop_QShlNsatSS8x16
: limLo
= 0; limHi
= 7; break;
3001 case Iop_QShlNsatSU64x2
: limLo
= 0; limHi
= 63; break;
3002 case Iop_QShlNsatSU32x4
: limLo
= 0; limHi
= 31; break;
3003 case Iop_QShlNsatSU16x8
: limLo
= 0; limHi
= 15; break;
3004 case Iop_QShlNsatSU8x16
: limLo
= 0; limHi
= 7; break;
3005 default: vassert(0);
3007 /* For left shifts, the allowable amt values are
3008 0 .. lane_bits-1. For right shifts the allowable
3009 values are 1 .. lane_bits. */
3010 if (op
!= ARM64vecshi_INVALID
&& amt
>= limLo
&& amt
<= limHi
) {
3011 HReg src
= iselV128Expr(env
, argL
);
3012 HReg dst
= newVRegV(env
);
3013 addInstr(env
, ARM64Instr_VShiftImmV(op
, dst
, src
, amt
));
3016 /* Special case some no-op shifts that the arm64 front end
3017 throws at us. We can't generate any instructions for these,
3018 but we don't need to either. */
3019 switch (e
->Iex
.Binop
.op
) {
3020 case Iop_ShrN64x2
: case Iop_ShrN32x4
:
3021 case Iop_ShrN16x8
: case Iop_ShrN8x16
:
3023 return iselV128Expr(env
, argL
);
3029 /* otherwise unhandled */
3031 /* else fall out; this is unhandled */
3034 /* -- Saturating narrowing by an immediate -- */
3036 case Iop_QandQShrNnarrow16Uto8Ux8
:
3037 case Iop_QandQShrNnarrow32Uto16Ux4
:
3038 case Iop_QandQShrNnarrow64Uto32Ux2
:
3040 case Iop_QandQSarNnarrow16Sto8Sx8
:
3041 case Iop_QandQSarNnarrow32Sto16Sx4
:
3042 case Iop_QandQSarNnarrow64Sto32Sx2
:
3044 case Iop_QandQSarNnarrow16Sto8Ux8
:
3045 case Iop_QandQSarNnarrow32Sto16Ux4
:
3046 case Iop_QandQSarNnarrow64Sto32Ux2
:
3048 case Iop_QandQRShrNnarrow16Uto8Ux8
:
3049 case Iop_QandQRShrNnarrow32Uto16Ux4
:
3050 case Iop_QandQRShrNnarrow64Uto32Ux2
:
3052 case Iop_QandQRSarNnarrow16Sto8Sx8
:
3053 case Iop_QandQRSarNnarrow32Sto16Sx4
:
3054 case Iop_QandQRSarNnarrow64Sto32Sx2
:
3056 case Iop_QandQRSarNnarrow16Sto8Ux8
:
3057 case Iop_QandQRSarNnarrow32Sto16Ux4
:
3058 case Iop_QandQRSarNnarrow64Sto32Ux2
:
3060 IRExpr
* argL
= e
->Iex
.Binop
.arg1
;
3061 IRExpr
* argR
= e
->Iex
.Binop
.arg2
;
3062 if (argR
->tag
== Iex_Const
&& argR
->Iex
.Const
.con
->tag
== Ico_U8
) {
3063 UInt amt
= argR
->Iex
.Const
.con
->Ico
.U8
;
3065 ARM64VecShiftImmOp op
= ARM64vecshi_INVALID
;
3066 switch (e
->Iex
.Binop
.op
) {
3068 case Iop_QandQShrNnarrow64Uto32Ux2
:
3069 op
= ARM64vecshi_UQSHRN2SD
; limit
= 64; break;
3070 case Iop_QandQShrNnarrow32Uto16Ux4
:
3071 op
= ARM64vecshi_UQSHRN4HS
; limit
= 32; break;
3072 case Iop_QandQShrNnarrow16Uto8Ux8
:
3073 op
= ARM64vecshi_UQSHRN8BH
; limit
= 16; break;
3075 case Iop_QandQSarNnarrow64Sto32Sx2
:
3076 op
= ARM64vecshi_SQSHRN2SD
; limit
= 64; break;
3077 case Iop_QandQSarNnarrow32Sto16Sx4
:
3078 op
= ARM64vecshi_SQSHRN4HS
; limit
= 32; break;
3079 case Iop_QandQSarNnarrow16Sto8Sx8
:
3080 op
= ARM64vecshi_SQSHRN8BH
; limit
= 16; break;
3082 case Iop_QandQSarNnarrow64Sto32Ux2
:
3083 op
= ARM64vecshi_SQSHRUN2SD
; limit
= 64; break;
3084 case Iop_QandQSarNnarrow32Sto16Ux4
:
3085 op
= ARM64vecshi_SQSHRUN4HS
; limit
= 32; break;
3086 case Iop_QandQSarNnarrow16Sto8Ux8
:
3087 op
= ARM64vecshi_SQSHRUN8BH
; limit
= 16; break;
3089 case Iop_QandQRShrNnarrow64Uto32Ux2
:
3090 op
= ARM64vecshi_UQRSHRN2SD
; limit
= 64; break;
3091 case Iop_QandQRShrNnarrow32Uto16Ux4
:
3092 op
= ARM64vecshi_UQRSHRN4HS
; limit
= 32; break;
3093 case Iop_QandQRShrNnarrow16Uto8Ux8
:
3094 op
= ARM64vecshi_UQRSHRN8BH
; limit
= 16; break;
3096 case Iop_QandQRSarNnarrow64Sto32Sx2
:
3097 op
= ARM64vecshi_SQRSHRN2SD
; limit
= 64; break;
3098 case Iop_QandQRSarNnarrow32Sto16Sx4
:
3099 op
= ARM64vecshi_SQRSHRN4HS
; limit
= 32; break;
3100 case Iop_QandQRSarNnarrow16Sto8Sx8
:
3101 op
= ARM64vecshi_SQRSHRN8BH
; limit
= 16; break;
3103 case Iop_QandQRSarNnarrow64Sto32Ux2
:
3104 op
= ARM64vecshi_SQRSHRUN2SD
; limit
= 64; break;
3105 case Iop_QandQRSarNnarrow32Sto16Ux4
:
3106 op
= ARM64vecshi_SQRSHRUN4HS
; limit
= 32; break;
3107 case Iop_QandQRSarNnarrow16Sto8Ux8
:
3108 op
= ARM64vecshi_SQRSHRUN8BH
; limit
= 16; break;
3113 if (op
!= ARM64vecshi_INVALID
&& amt
>= 1 && amt
<= limit
) {
3114 HReg src
= iselV128Expr(env
, argL
);
3115 HReg dst
= newVRegV(env
);
3116 HReg fpsr
= newVRegI(env
);
3117 /* Clear FPSR.Q, do the operation, and return both its
3118 result and the new value of FPSR.Q. We can simply
3119 zero out FPSR since all the other bits have no relevance
3120 in VEX generated code. */
3121 addInstr(env
, ARM64Instr_Imm64(fpsr
, 0));
3122 addInstr(env
, ARM64Instr_FPSR(True
/*toFPSR*/, fpsr
));
3123 addInstr(env
, ARM64Instr_VShiftImmV(op
, dst
, src
, amt
));
3124 addInstr(env
, ARM64Instr_FPSR(False
/*!toFPSR*/, fpsr
));
3125 addInstr(env
, ARM64Instr_Shift(fpsr
, fpsr
, ARM64RI6_I6(27),
3127 ARM64RIL
* ril_one
= mb_mkARM64RIL_I(1);
3129 addInstr(env
, ARM64Instr_Logic(fpsr
,
3130 fpsr
, ril_one
, ARM64lo_AND
));
3131 /* Now we have: the main (shift) result in the bottom half
3132 of |dst|, and the Q bit at the bottom of |fpsr|.
3133 Combining them with a "InterleaveLO64x2" style operation
3134 produces a 128 bit value, dst[63:0]:fpsr[63:0],
3135 which is what we want. */
3136 HReg scratch
= newVRegV(env
);
3137 addInstr(env
, ARM64Instr_VQfromX(scratch
, fpsr
));
3138 addInstr(env
, ARM64Instr_VBinV(ARM64vecb_UZP164x2
,
3139 dst
, dst
, scratch
));
3143 /* else fall out; this is unhandled */
3147 // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
3148 // as it is in some ways more general and often leads to better
3152 Bool isSHR
= e
->Iex
.Binop
.op
== Iop_ShrV128
;
3153 /* This is tricky. Generate an EXT instruction with zeroes in
3154 the high operand (shift right) or low operand (shift left).
3155 Note that we can only slice in the EXT instruction at a byte
3156 level of granularity, so the shift amount needs careful
3158 IRExpr
* argL
= e
->Iex
.Binop
.arg1
;
3159 IRExpr
* argR
= e
->Iex
.Binop
.arg2
;
3160 if (argR
->tag
== Iex_Const
&& argR
->Iex
.Const
.con
->tag
== Ico_U8
) {
3161 UInt amt
= argR
->Iex
.Const
.con
->Ico
.U8
;
3164 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
3165 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
3166 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
3167 amtOK
= True
; break;
3169 /* We could also deal with amt==0 by copying the source to
3170 the destination, but there's no need for that so far. */
3172 HReg src
= iselV128Expr(env
, argL
);
3173 HReg srcZ
= newVRegV(env
);
3174 addInstr(env
, ARM64Instr_VImmQ(srcZ
, 0x0000));
3175 UInt immB
= amt
/ 8;
3176 vassert(immB
>= 1 && immB
<= 15);
3177 HReg dst
= newVRegV(env
);
3179 addInstr(env
, ARM64Instr_VExtV(dst
, src
/*lo*/, srcZ
/*hi*/,
3182 addInstr(env
, ARM64Instr_VExtV(dst
, srcZ
/*lo*/, src
/*hi*/,
3188 /* else fall out; this is unhandled */
3192 case Iop_PolynomialMull8x8
:
3199 case Iop_QDMull32Sx2
:
3200 case Iop_QDMull16Sx4
:
3202 HReg iSrcL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
3203 HReg iSrcR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
3204 HReg vSrcL
= newVRegV(env
);
3205 HReg vSrcR
= newVRegV(env
);
3206 HReg dst
= newVRegV(env
);
3207 ARM64VecBinOp op
= ARM64vecb_INVALID
;
3208 switch (e
->Iex
.Binop
.op
) {
3209 case Iop_PolynomialMull8x8
: op
= ARM64vecb_PMULL8x8
; break;
3210 case Iop_Mull32Ux2
: op
= ARM64vecb_UMULL2DSS
; break;
3211 case Iop_Mull16Ux4
: op
= ARM64vecb_UMULL4SHH
; break;
3212 case Iop_Mull8Ux8
: op
= ARM64vecb_UMULL8HBB
; break;
3213 case Iop_Mull32Sx2
: op
= ARM64vecb_SMULL2DSS
; break;
3214 case Iop_Mull16Sx4
: op
= ARM64vecb_SMULL4SHH
; break;
3215 case Iop_Mull8Sx8
: op
= ARM64vecb_SMULL8HBB
; break;
3216 case Iop_QDMull32Sx2
: op
= ARM64vecb_SQDMULL2DSS
; break;
3217 case Iop_QDMull16Sx4
: op
= ARM64vecb_SQDMULL4SHH
; break;
3218 default: vassert(0);
3220 addInstr(env
, ARM64Instr_VQfromXX(vSrcL
, iSrcL
, iSrcL
));
3221 addInstr(env
, ARM64Instr_VQfromXX(vSrcR
, iSrcR
, iSrcR
));
3222 addInstr(env
, ARM64Instr_VBinV(op
, dst
, vSrcL
, vSrcR
));
3229 } /* switch on the binop */
3230 } /* if (e->tag == Iex_Binop) */
3232 if (e
->tag
== Iex_Triop
) {
3233 IRTriop
* triop
= e
->Iex
.Triop
.details
;
3234 ARM64VecBinOp vecbop
= ARM64vecb_INVALID
;
3235 switch (triop
->op
) {
3236 case Iop_Add64Fx2
: vecbop
= ARM64vecb_FADD64x2
; break;
3237 case Iop_Sub64Fx2
: vecbop
= ARM64vecb_FSUB64x2
; break;
3238 case Iop_Mul64Fx2
: vecbop
= ARM64vecb_FMUL64x2
; break;
3239 case Iop_Div64Fx2
: vecbop
= ARM64vecb_FDIV64x2
; break;
3240 case Iop_Add32Fx4
: vecbop
= ARM64vecb_FADD32x4
; break;
3241 case Iop_Sub32Fx4
: vecbop
= ARM64vecb_FSUB32x4
; break;
3242 case Iop_Mul32Fx4
: vecbop
= ARM64vecb_FMUL32x4
; break;
3243 case Iop_Div32Fx4
: vecbop
= ARM64vecb_FDIV32x4
; break;
3244 case Iop_Add16Fx8
: vecbop
= ARM64vecb_FADD16x8
; break;
3245 case Iop_Sub16Fx8
: vecbop
= ARM64vecb_FSUB16x8
; break;
3248 if (vecbop
!= ARM64vecb_INVALID
) {
3249 HReg argL
= iselV128Expr(env
, triop
->arg2
);
3250 HReg argR
= iselV128Expr(env
, triop
->arg3
);
3251 HReg dst
= newVRegV(env
);
3252 set_FPCR_rounding_mode(env
, triop
->arg1
);
3253 addInstr(env
, ARM64Instr_VBinV(vecbop
, dst
, argL
, argR
));
3257 if (triop
->op
== Iop_SliceV128
) {
3258 /* Note that, compared to ShlV128/ShrV128 just above, the shift
3259 amount here is in bytes, not bits. */
3260 IRExpr
* argHi
= triop
->arg1
;
3261 IRExpr
* argLo
= triop
->arg2
;
3262 IRExpr
* argAmt
= triop
->arg3
;
3263 if (argAmt
->tag
== Iex_Const
&& argAmt
->Iex
.Const
.con
->tag
== Ico_U8
) {
3264 UInt amt
= argAmt
->Iex
.Const
.con
->Ico
.U8
;
3265 Bool amtOK
= amt
>= 1 && amt
<= 15;
3266 /* We could also deal with amt==0 by copying argLO to
3267 the destination, but there's no need for that so far. */
3269 HReg srcHi
= iselV128Expr(env
, argHi
);
3270 HReg srcLo
= iselV128Expr(env
, argLo
);
3271 HReg dst
= newVRegV(env
);
3272 addInstr(env
, ARM64Instr_VExtV(dst
, srcLo
, srcHi
, amt
));
3276 /* else fall out; this is unhandled */
3279 } /* if (e->tag == Iex_Triop) */
3281 if (e
->tag
== Iex_ITE
) {
3282 // This code sequence is pretty feeble. We'd do better to generate BSL
3284 HReg rX
= newVRegI(env
);
3286 ARM64CondCode cc
= iselCondCode_C(env
, e
->Iex
.ITE
.cond
);
3287 addInstr(env
, ARM64Instr_Set64(rX
, cc
));
3288 // cond: rX = 1 !cond: rX = 0
3290 // Mask the Set64 result. This is paranoia (should be unnecessary).
3291 ARM64RIL
* one
= mb_mkARM64RIL_I(1);
3293 addInstr(env
, ARM64Instr_Logic(rX
, rX
, one
, ARM64lo_AND
));
3294 // cond: rX = 1 !cond: rX = 0
3296 // Propagate to all bits in the 64 bit word by subtracting 1 from it.
3297 // This also inverts the sense of the value.
3298 addInstr(env
, ARM64Instr_Arith(rX
, rX
, ARM64RIA_I12(1,0),
3300 // cond: rX = 0-(62)-0 !cond: rX = 1-(62)-1
3302 // Duplicate rX into a vector register
3303 HReg vMask
= newVRegV(env
);
3304 addInstr(env
, ARM64Instr_VQfromXX(vMask
, rX
, rX
));
3305 // cond: vMask = 0-(126)-0 !cond: vMask = 1-(126)-1
3307 HReg vIfTrue
= iselV128Expr(env
, e
->Iex
.ITE
.iftrue
);
3308 HReg vIfFalse
= iselV128Expr(env
, e
->Iex
.ITE
.iffalse
);
3310 // Mask out iffalse value as needed
3312 ARM64Instr_VBinV(ARM64vecb_AND
, vIfFalse
, vIfFalse
, vMask
));
3314 // Invert the mask so we can use it for the iftrue value
3315 addInstr(env
, ARM64Instr_VUnaryV(ARM64vecu_NOT
, vMask
, vMask
));
3316 // cond: vMask = 1-(126)-1 !cond: vMask = 0-(126)-0
3318 // Mask out iftrue value as needed
3320 ARM64Instr_VBinV(ARM64vecb_AND
, vIfTrue
, vIfTrue
, vMask
));
3322 // Merge the masked iftrue and iffalse results.
3323 HReg res
= newVRegV(env
);
3324 addInstr(env
, ARM64Instr_VBinV(ARM64vecb_ORR
, res
, vIfTrue
, vIfFalse
));
3331 vpanic("iselV128Expr_wrk");
3335 /*---------------------------------------------------------*/
3336 /*--- ISEL: Floating point expressions (64 bit) ---*/
3337 /*---------------------------------------------------------*/
3339 /* Compute a 64-bit floating point value into a register, the identity
3340 of which is returned. As with iselIntExpr_R, the reg may be either
3341 real or virtual; in any case it must not be changed by subsequent
3342 code emitted by the caller. */
3344 static HReg
iselDblExpr ( ISelEnv
* env
, IRExpr
* e
)
3346 HReg r
= iselDblExpr_wrk( env
, e
);
3348 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
3350 vassert(hregClass(r
) == HRcFlt64
);
3351 vassert(hregIsVirtual(r
));
3355 /* DO NOT CALL THIS DIRECTLY */
3356 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, IRExpr
* e
)
3358 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
3360 vassert(ty
== Ity_F64
);
3362 if (e
->tag
== Iex_RdTmp
) {
3363 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
3366 if (e
->tag
== Iex_Const
) {
3367 IRConst
* con
= e
->Iex
.Const
.con
;
3368 if (con
->tag
== Ico_F64i
) {
3369 HReg src
= newVRegI(env
);
3370 HReg dst
= newVRegD(env
);
3371 addInstr(env
, ARM64Instr_Imm64(src
, con
->Ico
.F64i
));
3372 addInstr(env
, ARM64Instr_VDfromX(dst
, src
));
3375 if (con
->tag
== Ico_F64
) {
3376 HReg src
= newVRegI(env
);
3377 HReg dst
= newVRegD(env
);
3378 union { Double d64
; ULong u64
; } u
;
3379 vassert(sizeof(u
) == 8);
3380 u
.d64
= con
->Ico
.F64
;
3381 addInstr(env
, ARM64Instr_Imm64(src
, u
.u64
));
3382 addInstr(env
, ARM64Instr_VDfromX(dst
, src
));
3387 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
3388 vassert(e
->Iex
.Load
.ty
== Ity_F64
);
3389 HReg addr
= iselIntExpr_R(env
, e
->Iex
.Load
.addr
);
3390 HReg res
= newVRegD(env
);
3391 addInstr(env
, ARM64Instr_VLdStD(True
/*isLoad*/, res
, addr
, 0));
3395 if (e
->tag
== Iex_Get
) {
3396 Int offs
= e
->Iex
.Get
.offset
;
3397 if (offs
>= 0 && offs
< 32768 && 0 == (offs
& 7)) {
3398 HReg rD
= newVRegD(env
);
3399 HReg rN
= get_baseblock_register();
3400 addInstr(env
, ARM64Instr_VLdStD(True
/*isLoad*/, rD
, rN
, offs
));
3405 if (e
->tag
== Iex_Unop
) {
3406 switch (e
->Iex
.Unop
.op
) {
3408 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
3409 HReg dst
= newVRegD(env
);
3410 addInstr(env
, ARM64Instr_VUnaryD(ARM64fpu_NEG
, dst
, src
));
3414 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
3415 HReg dst
= newVRegD(env
);
3416 addInstr(env
, ARM64Instr_VUnaryD(ARM64fpu_ABS
, dst
, src
));
3419 case Iop_F32toF64
: {
3420 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
3421 HReg dst
= newVRegD(env
);
3422 addInstr(env
, ARM64Instr_VCvtSD(True
/*sToD*/, dst
, src
));
3425 case Iop_F16toF64
: {
3426 HReg src
= iselF16Expr(env
, e
->Iex
.Unop
.arg
);
3427 HReg dst
= newVRegD(env
);
3428 addInstr(env
, ARM64Instr_VCvtHD(True
/*hToD*/, dst
, src
));
3432 case Iop_I32StoF64
: {
3433 /* Rounding mode is not involved here, since the
3434 conversion can always be done without loss of
3436 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
3437 HReg dst
= newVRegD(env
);
3438 Bool syned
= e
->Iex
.Unop
.op
== Iop_I32StoF64
;
3439 ARM64CvtOp cvt_op
= syned
? ARM64cvt_F64_I32S
: ARM64cvt_F64_I32U
;
3440 addInstr(env
, ARM64Instr_VCvtI2F(cvt_op
, dst
, src
));
3443 case Iop_RoundF64toIntA0
: {
3444 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
3445 HReg dst
= newVRegD(env
);
3446 addInstr(env
, ARM64Instr_VUnaryD(ARM64fpu_RINTA0
, dst
, src
));
3449 case Iop_RoundF64toIntE
: {
3450 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
3451 HReg dst
= newVRegD(env
);
3452 addInstr(env
, ARM64Instr_VUnaryD(ARM64fpu_RINTE
, dst
, src
));
3460 if (e
->tag
== Iex_Binop
) {
3461 switch (e
->Iex
.Binop
.op
) {
3462 case Iop_RoundF64toInt
:
3464 case Iop_RecpExpF64
: {
3465 HReg src
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
3466 HReg dst
= newVRegD(env
);
3467 set_FPCR_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
3468 ARM64FpUnaryOp op
= ARM64fpu_INVALID
;
3469 switch (e
->Iex
.Binop
.op
) {
3470 case Iop_RoundF64toInt
: op
= ARM64fpu_RINT
; break;
3471 case Iop_SqrtF64
: op
= ARM64fpu_SQRT
; break;
3472 case Iop_RecpExpF64
: op
= ARM64fpu_RECPX
; break;
3473 default: vassert(0);
3475 addInstr(env
, ARM64Instr_VUnaryD(op
, dst
, src
));
3479 case Iop_I64UtoF64
: {
3480 ARM64CvtOp cvt_op
= e
->Iex
.Binop
.op
== Iop_I64StoF64
3481 ? ARM64cvt_F64_I64S
: ARM64cvt_F64_I64U
;
3482 HReg srcI
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
3483 set_FPCR_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
3484 HReg dstS
= newVRegD(env
);
3485 addInstr(env
, ARM64Instr_VCvtI2F(cvt_op
, dstS
, srcI
));
3493 if (e
->tag
== Iex_Triop
) {
3494 IRTriop
* triop
= e
->Iex
.Triop
.details
;
3495 ARM64FpBinOp dblop
= ARM64fpb_INVALID
;
3496 switch (triop
->op
) {
3497 case Iop_DivF64
: dblop
= ARM64fpb_DIV
; break;
3498 case Iop_MulF64
: dblop
= ARM64fpb_MUL
; break;
3499 case Iop_SubF64
: dblop
= ARM64fpb_SUB
; break;
3500 case Iop_AddF64
: dblop
= ARM64fpb_ADD
; break;
3503 if (dblop
!= ARM64fpb_INVALID
) {
3504 HReg argL
= iselDblExpr(env
, triop
->arg2
);
3505 HReg argR
= iselDblExpr(env
, triop
->arg3
);
3506 HReg dst
= newVRegD(env
);
3507 set_FPCR_rounding_mode(env
, triop
->arg1
);
3508 addInstr(env
, ARM64Instr_VBinD(dblop
, dst
, argL
, argR
));
3513 if (e
->tag
== Iex_Qop
) {
3514 IRQop
* qop
= e
->Iex
.Qop
.details
;
3515 ARM64FpTriOp triop
= ARM64fpt_INVALID
;
3517 case Iop_MAddF64
: triop
= ARM64fpt_FMADD
; break;
3518 case Iop_MSubF64
: triop
= ARM64fpt_FMSUB
; break;
3521 if (triop
!= ARM64fpt_INVALID
) {
3522 HReg N
= iselDblExpr(env
, qop
->arg2
);
3523 HReg M
= iselDblExpr(env
, qop
->arg3
);
3524 HReg A
= iselDblExpr(env
, qop
->arg4
);
3525 HReg dst
= newVRegD(env
);
3526 set_FPCR_rounding_mode(env
, qop
->arg1
);
3527 addInstr(env
, ARM64Instr_VTriD(triop
, dst
, N
, M
, A
));
3532 if (e
->tag
== Iex_ITE
) {
3533 /* ITE(ccexpr, iftrue, iffalse) */
3535 HReg r1
= iselDblExpr(env
, e
->Iex
.ITE
.iftrue
);
3536 HReg r0
= iselDblExpr(env
, e
->Iex
.ITE
.iffalse
);
3537 HReg dst
= newVRegD(env
);
3538 cc
= iselCondCode_C(env
, e
->Iex
.ITE
.cond
);
3539 addInstr(env
, ARM64Instr_VFCSel(dst
, r1
, r0
, cc
, True
/*64-bit*/));
3544 vpanic("iselDblExpr_wrk");
3548 /*---------------------------------------------------------*/
3549 /*--- ISEL: Floating point expressions (32 bit) ---*/
3550 /*---------------------------------------------------------*/
3552 /* Compute a 32-bit floating point value into a register, the identity
3553 of which is returned. As with iselIntExpr_R, the reg may be either
3554 real or virtual; in any case it must not be changed by subsequent
3555 code emitted by the caller. Values are generated into HRcFlt64
3556 registers despite the values themselves being Ity_F32s. */
3558 static HReg
iselFltExpr ( ISelEnv
* env
, IRExpr
* e
)
3560 HReg r
= iselFltExpr_wrk( env
, e
);
3562 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
3564 vassert(hregClass(r
) == HRcFlt64
);
3565 vassert(hregIsVirtual(r
));
3569 /* DO NOT CALL THIS DIRECTLY */
3570 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, IRExpr
* e
)
3572 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
3574 vassert(ty
== Ity_F32
);
3576 if (e
->tag
== Iex_RdTmp
) {
3577 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
3580 if (e
->tag
== Iex_Const
) {
3581 /* This is something of a kludge. Since a 32 bit floating point
3582 zero is just .. all zeroes, just create a 64 bit zero word
3583 and transfer it. This avoids having to create a SfromW
3584 instruction for this specific case. */
3585 IRConst
* con
= e
->Iex
.Const
.con
;
3586 if (con
->tag
== Ico_F32i
&& con
->Ico
.F32i
== 0) {
3587 HReg src
= newVRegI(env
);
3588 HReg dst
= newVRegD(env
);
3589 addInstr(env
, ARM64Instr_Imm64(src
, 0));
3590 addInstr(env
, ARM64Instr_VDfromX(dst
, src
));
3593 if (con
->tag
== Ico_F32
) {
3594 HReg src
= newVRegI(env
);
3595 HReg dst
= newVRegD(env
);
3596 union { Float f32
; UInt u32
; } u
;
3597 vassert(sizeof(u
) == 4);
3598 u
.f32
= con
->Ico
.F32
;
3599 addInstr(env
, ARM64Instr_Imm64(src
, (ULong
)u
.u32
));
3600 addInstr(env
, ARM64Instr_VDfromX(dst
, src
));
3605 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
3606 vassert(e
->Iex
.Load
.ty
== Ity_F32
);
3607 HReg addr
= iselIntExpr_R(env
, e
->Iex
.Load
.addr
);
3608 HReg res
= newVRegD(env
);
3609 addInstr(env
, ARM64Instr_VLdStS(True
/*isLoad*/, res
, addr
, 0));
3613 if (e
->tag
== Iex_Get
) {
3614 Int offs
= e
->Iex
.Get
.offset
;
3615 if (offs
>= 0 && offs
< 16384 && 0 == (offs
& 3)) {
3616 HReg rD
= newVRegD(env
);
3617 HReg rN
= get_baseblock_register();
3618 addInstr(env
, ARM64Instr_VLdStS(True
/*isLoad*/, rD
, rN
, offs
));
3623 if (e
->tag
== Iex_Unop
) {
3624 switch (e
->Iex
.Unop
.op
) {
3626 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
3627 HReg dst
= newVRegD(env
);
3628 addInstr(env
, ARM64Instr_VUnaryS(ARM64fpu_NEG
, dst
, src
));
3632 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
3633 HReg dst
= newVRegD(env
);
3634 addInstr(env
, ARM64Instr_VUnaryS(ARM64fpu_ABS
, dst
, src
));
3637 case Iop_F16toF32
: {
3638 HReg src
= iselF16Expr(env
, e
->Iex
.Unop
.arg
);
3639 HReg dst
= newVRegD(env
);
3640 addInstr(env
, ARM64Instr_VCvtHS(True
/*hToS*/, dst
, src
));
3643 case Iop_RoundF32toIntA0
: {
3644 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
3645 HReg dst
= newVRegD(env
);
3646 addInstr(env
, ARM64Instr_VUnaryS(ARM64fpu_RINTA0
, dst
, src
));
3649 case Iop_RoundF32toIntE
: {
3650 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
3651 HReg dst
= newVRegD(env
);
3652 addInstr(env
, ARM64Instr_VUnaryS(ARM64fpu_RINTE
, dst
, src
));
3660 if (e
->tag
== Iex_Binop
) {
3661 switch (e
->Iex
.Binop
.op
) {
3662 case Iop_RoundF32toInt
:
3664 case Iop_RecpExpF32
: {
3665 HReg src
= iselFltExpr(env
, e
->Iex
.Binop
.arg2
);
3666 HReg dst
= newVRegD(env
);
3667 set_FPCR_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
3668 ARM64FpUnaryOp op
= ARM64fpu_INVALID
;
3669 switch (e
->Iex
.Binop
.op
) {
3670 case Iop_RoundF32toInt
: op
= ARM64fpu_RINT
; break;
3671 case Iop_SqrtF32
: op
= ARM64fpu_SQRT
; break;
3672 case Iop_RecpExpF32
: op
= ARM64fpu_RECPX
; break;
3673 default: vassert(0);
3675 addInstr(env
, ARM64Instr_VUnaryS(op
, dst
, src
));
3678 case Iop_F64toF32
: {
3679 HReg srcD
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
3680 set_FPCR_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
3681 HReg dstS
= newVRegD(env
);
3682 addInstr(env
, ARM64Instr_VCvtSD(False
/*!sToD*/, dstS
, srcD
));
3688 case Iop_I64StoF32
: {
3689 ARM64CvtOp cvt_op
= ARM64cvt_INVALID
;
3690 switch (e
->Iex
.Binop
.op
) {
3691 case Iop_I32UtoF32
: cvt_op
= ARM64cvt_F32_I32U
; break;
3692 case Iop_I32StoF32
: cvt_op
= ARM64cvt_F32_I32S
; break;
3693 case Iop_I64UtoF32
: cvt_op
= ARM64cvt_F32_I64U
; break;
3694 case Iop_I64StoF32
: cvt_op
= ARM64cvt_F32_I64S
; break;
3695 default: vassert(0);
3697 HReg srcI
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
3698 set_FPCR_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
3699 HReg dstS
= newVRegD(env
);
3700 addInstr(env
, ARM64Instr_VCvtI2F(cvt_op
, dstS
, srcI
));
3708 if (e
->tag
== Iex_Triop
) {
3709 IRTriop
* triop
= e
->Iex
.Triop
.details
;
3710 ARM64FpBinOp sglop
= ARM64fpb_INVALID
;
3711 switch (triop
->op
) {
3712 case Iop_DivF32
: sglop
= ARM64fpb_DIV
; break;
3713 case Iop_MulF32
: sglop
= ARM64fpb_MUL
; break;
3714 case Iop_SubF32
: sglop
= ARM64fpb_SUB
; break;
3715 case Iop_AddF32
: sglop
= ARM64fpb_ADD
; break;
3718 if (sglop
!= ARM64fpb_INVALID
) {
3719 HReg argL
= iselFltExpr(env
, triop
->arg2
);
3720 HReg argR
= iselFltExpr(env
, triop
->arg3
);
3721 HReg dst
= newVRegD(env
);
3722 set_FPCR_rounding_mode(env
, triop
->arg1
);
3723 addInstr(env
, ARM64Instr_VBinS(sglop
, dst
, argL
, argR
));
3728 if (e
->tag
== Iex_ITE
) {
3729 /* ITE(ccexpr, iftrue, iffalse) */
3731 HReg r1
= iselFltExpr(env
, e
->Iex
.ITE
.iftrue
);
3732 HReg r0
= iselFltExpr(env
, e
->Iex
.ITE
.iffalse
);
3733 HReg dst
= newVRegD(env
);
3734 cc
= iselCondCode_C(env
, e
->Iex
.ITE
.cond
);
3735 addInstr(env
, ARM64Instr_VFCSel(dst
, r1
, r0
, cc
, False
/*!64-bit*/));
3739 if (e
->tag
== Iex_Qop
) {
3740 IRQop
* qop
= e
->Iex
.Qop
.details
;
3741 ARM64FpTriOp triop
= ARM64fpt_INVALID
;
3743 case Iop_MAddF32
: triop
= ARM64fpt_FMADD
; break;
3744 case Iop_MSubF32
: triop
= ARM64fpt_FMSUB
; break;
3748 if (triop
!= ARM64fpt_INVALID
) {
3749 HReg N
= iselFltExpr(env
, qop
->arg2
);
3750 HReg M
= iselFltExpr(env
, qop
->arg3
);
3751 HReg A
= iselFltExpr(env
, qop
->arg4
);
3752 HReg dst
= newVRegD(env
);
3753 set_FPCR_rounding_mode(env
, qop
->arg1
);
3754 addInstr(env
, ARM64Instr_VTriS(triop
, dst
, N
, M
, A
));
3760 vpanic("iselFltExpr_wrk");
3764 /*---------------------------------------------------------*/
3765 /*--- ISEL: Floating point expressions (16 bit) ---*/
3766 /*---------------------------------------------------------*/
3768 /* Compute a 16-bit floating point value into a register, the identity
3769 of which is returned. As with iselIntExpr_R, the reg may be either
3770 real or virtual; in any case it must not be changed by subsequent
3771 code emitted by the caller. Values are generated into HRcFlt64
3772 registers despite the values themselves being Ity_F16s. */
3774 static HReg
iselF16Expr ( ISelEnv
* env
, IRExpr
* e
)
3776 HReg r
= iselF16Expr_wrk( env
, e
);
3778 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
3780 vassert(hregClass(r
) == HRcFlt64
);
3781 vassert(hregIsVirtual(r
));
3785 /* DO NOT CALL THIS DIRECTLY */
3786 static HReg
iselF16Expr_wrk ( ISelEnv
* env
, IRExpr
* e
)
3788 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
3790 vassert(ty
== Ity_F16
);
3792 if (e
->tag
== Iex_RdTmp
) {
3793 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
3796 if (e
->tag
== Iex_Get
) {
3797 Int offs
= e
->Iex
.Get
.offset
;
3798 if (offs
>= 0 && offs
< 8192 && 0 == (offs
& 1)) {
3799 HReg rD
= newVRegD(env
);
3800 HReg rN
= get_baseblock_register();
3801 addInstr(env
, ARM64Instr_VLdStH(True
/*isLoad*/, rD
, rN
, offs
));
3806 if (e
->tag
== Iex_Unop
) {
3807 switch (e
->Iex
.Unop
.op
) {
3809 HReg srcH
= iselF16Expr(env
, e
->Iex
.Unop
.arg
);
3810 HReg dstH
= newVRegD(env
);
3811 addInstr(env
, ARM64Instr_VUnaryH(ARM64fpu_NEG
, dstH
, srcH
));
3815 HReg srcH
= iselF16Expr(env
, e
->Iex
.Unop
.arg
);
3816 HReg dstH
= newVRegD(env
);
3817 addInstr(env
, ARM64Instr_VUnaryH(ARM64fpu_ABS
, dstH
, srcH
));
3825 if (e
->tag
== Iex_Binop
) {
3826 switch (e
->Iex
.Binop
.op
) {
3827 case Iop_F32toF16
: {
3828 HReg srcS
= iselFltExpr(env
, e
->Iex
.Binop
.arg2
);
3829 set_FPCR_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
3830 HReg dstH
= newVRegD(env
);
3831 addInstr(env
, ARM64Instr_VCvtHS(False
/*!hToS*/, dstH
, srcS
));
3834 case Iop_F64toF16
: {
3835 HReg srcD
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
3836 set_FPCR_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
3837 HReg dstH
= newVRegD(env
);
3838 addInstr(env
, ARM64Instr_VCvtHD(False
/*!hToD*/, dstH
, srcD
));
3842 HReg src
= iselF16Expr(env
, e
->Iex
.Binop
.arg2
);
3843 set_FPCR_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
3844 HReg dst
= newVRegD(env
);
3845 addInstr(env
, ARM64Instr_VUnaryH(ARM64fpu_SQRT
, dst
, src
));
3853 if (e
->tag
== Iex_Triop
) {
3854 IRTriop
* triop
= e
->Iex
.Triop
.details
;
3855 ARM64FpBinOp sglop
= ARM64fpb_INVALID
;
3856 switch (triop
->op
) {
3857 case Iop_AddF16
: sglop
= ARM64fpb_ADD
; break;
3858 case Iop_SubF16
: sglop
= ARM64fpb_SUB
; break;
3861 if (sglop
!= ARM64fpb_INVALID
) {
3862 HReg argL
= iselF16Expr(env
, triop
->arg2
);
3863 HReg argR
= iselF16Expr(env
, triop
->arg3
);
3864 HReg dst
= newVRegD(env
);
3865 set_FPCR_rounding_mode(env
, triop
->arg1
);
3866 addInstr(env
, ARM64Instr_VBinH(sglop
, dst
, argL
, argR
));
3872 vpanic("iselF16Expr_wrk");
3876 /*---------------------------------------------------------*/
3877 /*--- ISEL: Vector expressions (256 bit) ---*/
3878 /*---------------------------------------------------------*/
3880 static void iselV256Expr ( /*OUT*/HReg
* rHi
, HReg
* rLo
,
3881 ISelEnv
* env
, IRExpr
* e
)
3883 iselV256Expr_wrk( rHi
, rLo
, env
, e
);
3884 vassert(hregClass(*rHi
) == HRcVec128
);
3885 vassert(hregClass(*rLo
) == HRcVec128
);
3886 vassert(hregIsVirtual(*rHi
));
3887 vassert(hregIsVirtual(*rLo
));
3890 /* DO NOT CALL THIS DIRECTLY */
3891 static void iselV256Expr_wrk ( /*OUT*/HReg
* rHi
, /*OUT*/HReg
* rLo
,
3892 ISelEnv
* env
, IRExpr
* e
)
3895 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
3896 vassert(ty
== Ity_V256
);
3898 /* read 256-bit IRTemp */
3899 if (e
->tag
== Iex_RdTmp
) {
3900 lookupIRTempPair( rHi
, rLo
, env
, e
->Iex
.RdTmp
.tmp
);
3904 if (e
->tag
== Iex_Binop
) {
3905 switch (e
->Iex
.Binop
.op
) {
3906 case Iop_V128HLtoV256
: {
3907 *rHi
= iselV128Expr(env
, e
->Iex
.Binop
.arg1
);
3908 *rLo
= iselV128Expr(env
, e
->Iex
.Binop
.arg2
);
3911 case Iop_QandSQsh64x2
:
3912 case Iop_QandSQsh32x4
:
3913 case Iop_QandSQsh16x8
:
3914 case Iop_QandSQsh8x16
:
3915 case Iop_QandUQsh64x2
:
3916 case Iop_QandUQsh32x4
:
3917 case Iop_QandUQsh16x8
:
3918 case Iop_QandUQsh8x16
:
3919 case Iop_QandSQRsh64x2
:
3920 case Iop_QandSQRsh32x4
:
3921 case Iop_QandSQRsh16x8
:
3922 case Iop_QandSQRsh8x16
:
3923 case Iop_QandUQRsh64x2
:
3924 case Iop_QandUQRsh32x4
:
3925 case Iop_QandUQRsh16x8
:
3926 case Iop_QandUQRsh8x16
:
3928 HReg argL
= iselV128Expr(env
, e
->Iex
.Binop
.arg1
);
3929 HReg argR
= iselV128Expr(env
, e
->Iex
.Binop
.arg2
);
3930 HReg fpsr
= newVRegI(env
);
3931 HReg resHi
= newVRegV(env
);
3932 HReg resLo
= newVRegV(env
);
3933 ARM64VecBinOp op
= ARM64vecb_INVALID
;
3934 switch (e
->Iex
.Binop
.op
) {
3935 case Iop_QandSQsh64x2
: op
= ARM64vecb_SQSHL64x2
; break;
3936 case Iop_QandSQsh32x4
: op
= ARM64vecb_SQSHL32x4
; break;
3937 case Iop_QandSQsh16x8
: op
= ARM64vecb_SQSHL16x8
; break;
3938 case Iop_QandSQsh8x16
: op
= ARM64vecb_SQSHL8x16
; break;
3939 case Iop_QandUQsh64x2
: op
= ARM64vecb_UQSHL64x2
; break;
3940 case Iop_QandUQsh32x4
: op
= ARM64vecb_UQSHL32x4
; break;
3941 case Iop_QandUQsh16x8
: op
= ARM64vecb_UQSHL16x8
; break;
3942 case Iop_QandUQsh8x16
: op
= ARM64vecb_UQSHL8x16
; break;
3943 case Iop_QandSQRsh64x2
: op
= ARM64vecb_SQRSHL64x2
; break;
3944 case Iop_QandSQRsh32x4
: op
= ARM64vecb_SQRSHL32x4
; break;
3945 case Iop_QandSQRsh16x8
: op
= ARM64vecb_SQRSHL16x8
; break;
3946 case Iop_QandSQRsh8x16
: op
= ARM64vecb_SQRSHL8x16
; break;
3947 case Iop_QandUQRsh64x2
: op
= ARM64vecb_UQRSHL64x2
; break;
3948 case Iop_QandUQRsh32x4
: op
= ARM64vecb_UQRSHL32x4
; break;
3949 case Iop_QandUQRsh16x8
: op
= ARM64vecb_UQRSHL16x8
; break;
3950 case Iop_QandUQRsh8x16
: op
= ARM64vecb_UQRSHL8x16
; break;
3951 default: vassert(0);
3953 /* Clear FPSR.Q, do the operation, and return both its result
3954 and the new value of FPSR.Q. We can simply zero out FPSR
3955 since all the other bits have no relevance in VEX generated
3957 addInstr(env
, ARM64Instr_Imm64(fpsr
, 0));
3958 addInstr(env
, ARM64Instr_FPSR(True
/*toFPSR*/, fpsr
));
3959 addInstr(env
, ARM64Instr_VBinV(op
, resLo
, argL
, argR
));
3960 addInstr(env
, ARM64Instr_FPSR(False
/*!toFPSR*/, fpsr
));
3961 addInstr(env
, ARM64Instr_Shift(fpsr
, fpsr
, ARM64RI6_I6(27),
3963 ARM64RIL
* ril_one
= mb_mkARM64RIL_I(1);
3965 addInstr(env
, ARM64Instr_Logic(fpsr
, fpsr
, ril_one
, ARM64lo_AND
));
3966 /* Now we have: the main (shift) result in |resLo|, and the
3967 Q bit at the bottom of |fpsr|. */
3968 addInstr(env
, ARM64Instr_VQfromX(resHi
, fpsr
));
3977 } /* switch on the binop */
3978 } /* if (e->tag == Iex_Binop) */
3981 vpanic("iselV256Expr_wrk");
3985 /*---------------------------------------------------------*/
3986 /*--- ISEL: Statements ---*/
3987 /*---------------------------------------------------------*/
3989 static void iselStmt ( ISelEnv
* env
, IRStmt
* stmt
)
3991 if (vex_traceflags
& VEX_TRACE_VCODE
) {
3992 vex_printf("\n-- ");
3996 switch (stmt
->tag
) {
3998 /* --------- STORE --------- */
3999 /* little-endian write to memory */
4001 IRType tya
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.addr
);
4002 IRType tyd
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.data
);
4003 IREndness end
= stmt
->Ist
.Store
.end
;
4005 if (tya
!= Ity_I64
|| end
!= Iend_LE
)
4008 if (tyd
== Ity_I64
) {
4009 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
4010 ARM64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
, tyd
);
4011 addInstr(env
, ARM64Instr_LdSt64(False
/*!isLoad*/, rD
, am
));
4014 if (tyd
== Ity_I32
) {
4015 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
4016 ARM64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
, tyd
);
4017 addInstr(env
, ARM64Instr_LdSt32(False
/*!isLoad*/, rD
, am
));
4020 if (tyd
== Ity_I16
) {
4021 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
4022 ARM64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
, tyd
);
4023 addInstr(env
, ARM64Instr_LdSt16(False
/*!isLoad*/, rD
, am
));
4026 if (tyd
== Ity_I8
) {
4027 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
4028 ARM64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
, tyd
);
4029 addInstr(env
, ARM64Instr_LdSt8(False
/*!isLoad*/, rD
, am
));
4032 if (tyd
== Ity_V128
) {
4033 HReg qD
= iselV128Expr(env
, stmt
->Ist
.Store
.data
);
4034 HReg addr
= iselIntExpr_R(env
, stmt
->Ist
.Store
.addr
);
4035 addInstr(env
, ARM64Instr_VLdStQ(False
/*!isLoad*/, qD
, addr
));
4038 if (tyd
== Ity_F64
) {
4039 HReg dD
= iselDblExpr(env
, stmt
->Ist
.Store
.data
);
4040 HReg addr
= iselIntExpr_R(env
, stmt
->Ist
.Store
.addr
);
4041 addInstr(env
, ARM64Instr_VLdStD(False
/*!isLoad*/, dD
, addr
, 0));
4044 if (tyd
== Ity_F32
) {
4045 HReg sD
= iselFltExpr(env
, stmt
->Ist
.Store
.data
);
4046 HReg addr
= iselIntExpr_R(env
, stmt
->Ist
.Store
.addr
);
4047 addInstr(env
, ARM64Instr_VLdStS(False
/*!isLoad*/, sD
, addr
, 0));
4053 /* --------- PUT --------- */
4054 /* write guest state, fixed offset */
4056 IRType tyd
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Put
.data
);
4057 UInt offs
= (UInt
)stmt
->Ist
.Put
.offset
;
4058 if (tyd
== Ity_I64
&& 0 == (offs
& 7) && offs
< (8<<12)) {
4059 HReg rD
= INVALID_HREG
;
4060 if (isZeroU64(stmt
->Ist
.Put
.data
)) {
4061 // In this context, XZR_XSP denotes the zero register.
4062 rD
= hregARM64_XZR_XSP();
4064 rD
= iselIntExpr_R(env
, stmt
->Ist
.Put
.data
);
4066 ARM64AMode
* am
= mk_baseblock_64bit_access_amode(offs
);
4067 addInstr(env
, ARM64Instr_LdSt64(False
/*!isLoad*/, rD
, am
));
4070 if (tyd
== Ity_I32
&& 0 == (offs
& 3) && offs
< (4<<12)) {
4071 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Put
.data
);
4072 ARM64AMode
* am
= mk_baseblock_32bit_access_amode(offs
);
4073 addInstr(env
, ARM64Instr_LdSt32(False
/*!isLoad*/, rD
, am
));
4076 if (tyd
== Ity_I16
&& 0 == (offs
& 1) && offs
< (2<<12)) {
4077 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Put
.data
);
4078 ARM64AMode
* am
= mk_baseblock_16bit_access_amode(offs
);
4079 addInstr(env
, ARM64Instr_LdSt16(False
/*!isLoad*/, rD
, am
));
4082 if (tyd
== Ity_I8
&& offs
< (1<<12)) {
4083 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Put
.data
);
4084 ARM64AMode
* am
= mk_baseblock_8bit_access_amode(offs
);
4085 addInstr(env
, ARM64Instr_LdSt8(False
/*!isLoad*/, rD
, am
));
4088 if (tyd
== Ity_V128
&& offs
< (1<<12)) {
4089 HReg qD
= iselV128Expr(env
, stmt
->Ist
.Put
.data
);
4090 HReg addr
= mk_baseblock_128bit_access_addr(env
, offs
);
4091 addInstr(env
, ARM64Instr_VLdStQ(False
/*!isLoad*/, qD
, addr
));
4094 if (tyd
== Ity_F64
&& 0 == (offs
& 7) && offs
< (8<<12)) {
4095 HReg dD
= iselDblExpr(env
, stmt
->Ist
.Put
.data
);
4096 HReg bbp
= get_baseblock_register();
4097 addInstr(env
, ARM64Instr_VLdStD(False
/*!isLoad*/, dD
, bbp
, offs
));
4100 if (tyd
== Ity_F32
&& 0 == (offs
& 3) && offs
< (4<<12)) {
4101 HReg sD
= iselFltExpr(env
, stmt
->Ist
.Put
.data
);
4102 HReg bbp
= get_baseblock_register();
4103 addInstr(env
, ARM64Instr_VLdStS(False
/*!isLoad*/, sD
, bbp
, offs
));
4106 if (tyd
== Ity_F16
&& 0 == (offs
& 1) && offs
< (2<<12)) {
4107 HReg hD
= iselF16Expr(env
, stmt
->Ist
.Put
.data
);
4108 HReg bbp
= get_baseblock_register();
4109 addInstr(env
, ARM64Instr_VLdStH(False
/*!isLoad*/, hD
, bbp
, offs
));
4116 /* --------- TMP --------- */
4117 /* assign value to temporary */
4119 IRTemp tmp
= stmt
->Ist
.WrTmp
.tmp
;
4120 IRType ty
= typeOfIRTemp(env
->type_env
, tmp
);
4122 if (ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
) {
4123 /* We could do a lot better here. But for the time being: */
4124 HReg dst
= lookupIRTemp(env
, tmp
);
4125 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.WrTmp
.data
);
4126 addInstr(env
, ARM64Instr_MovI(dst
, rD
));
4130 /* Here, we are generating a I1 value into a 64 bit register.
4131 Make sure the value in the register is only zero or one,
4132 but no other. This allows optimisation of the
4133 1Uto64(tmp:I1) case, by making it simply a copy of the
4134 register holding 'tmp'. The point being that the value in
4135 the register holding 'tmp' can only have been created
4136 here. LATER: that seems dangerous; safer to do 'tmp & 1'
4137 in that case. Also, could do this just with a single CINC
4140 HReg zero
= hregARM64_XZR_XSP(); // XZR in this context
4141 HReg one
= newVRegI(env
);
4142 HReg dst
= lookupIRTemp(env
, tmp
);
4143 addInstr(env
, ARM64Instr_Imm64(one
, 1));
4144 ARM64CondCode cc
= iselCondCode_C(env
, stmt
->Ist
.WrTmp
.data
);
4145 addInstr(env
, ARM64Instr_CSel(dst
, one
, zero
, cc
));
4148 if (ty
== Ity_F64
) {
4149 HReg src
= iselDblExpr(env
, stmt
->Ist
.WrTmp
.data
);
4150 HReg dst
= lookupIRTemp(env
, tmp
);
4151 addInstr(env
, ARM64Instr_VMov(8, dst
, src
));
4154 if (ty
== Ity_F32
) {
4155 HReg src
= iselFltExpr(env
, stmt
->Ist
.WrTmp
.data
);
4156 HReg dst
= lookupIRTemp(env
, tmp
);
4157 addInstr(env
, ARM64Instr_VMov(8/*yes, really*/, dst
, src
));
4160 if (ty
== Ity_F16
) {
4161 HReg src
= iselF16Expr(env
, stmt
->Ist
.WrTmp
.data
);
4162 HReg dst
= lookupIRTemp(env
, tmp
);
4163 addInstr(env
, ARM64Instr_VMov(8/*yes, really*/, dst
, src
));
4166 if (ty
== Ity_I128
) {
4167 HReg rHi
, rLo
, dstHi
, dstLo
;
4168 iselInt128Expr(&rHi
,&rLo
, env
, stmt
->Ist
.WrTmp
.data
);
4169 lookupIRTempPair( &dstHi
, &dstLo
, env
, tmp
);
4170 addInstr(env
, ARM64Instr_MovI(dstHi
, rHi
));
4171 addInstr(env
, ARM64Instr_MovI(dstLo
, rLo
));
4174 if (ty
== Ity_V128
) {
4175 HReg src
= iselV128Expr(env
, stmt
->Ist
.WrTmp
.data
);
4176 HReg dst
= lookupIRTemp(env
, tmp
);
4177 addInstr(env
, ARM64Instr_VMov(16, dst
, src
));
4180 if (ty
== Ity_V256
) {
4181 HReg srcHi
, srcLo
, dstHi
, dstLo
;
4182 iselV256Expr(&srcHi
,&srcLo
, env
, stmt
->Ist
.WrTmp
.data
);
4183 lookupIRTempPair( &dstHi
, &dstLo
, env
, tmp
);
4184 addInstr(env
, ARM64Instr_VMov(16, dstHi
, srcHi
));
4185 addInstr(env
, ARM64Instr_VMov(16, dstLo
, srcLo
));
4191 /* --------- Call to DIRTY helper --------- */
4192 /* call complex ("dirty") helper function */
4194 IRDirty
* d
= stmt
->Ist
.Dirty
.details
;
4196 /* Figure out the return type, if any. */
4197 IRType retty
= Ity_INVALID
;
4198 if (d
->tmp
!= IRTemp_INVALID
)
4199 retty
= typeOfIRTemp(env
->type_env
, d
->tmp
);
4201 Bool retty_ok
= False
;
4203 case Ity_INVALID
: /* function doesn't return anything */
4204 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
4206 retty_ok
= True
; break;
4211 break; /* will go to stmt_fail: */
4213 /* Marshal args, do the call, and set the return value to 0x555..555
4214 if this is a conditional call that returns a value and the
4217 RetLoc rloc
= mk_RetLoc_INVALID();
4218 doHelperCall( &addToSp
, &rloc
, env
, d
->guard
, d
->cee
, retty
, d
->args
);
4219 vassert(is_sane_RetLoc(rloc
));
4221 /* Now figure out what to do with the returned value, if any. */
4224 /* No return value. Nothing to do. */
4225 vassert(d
->tmp
== IRTemp_INVALID
);
4226 vassert(rloc
.pri
== RLPri_None
);
4227 vassert(addToSp
== 0);
4230 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
: {
4231 vassert(rloc
.pri
== RLPri_Int
);
4232 vassert(addToSp
== 0);
4233 /* The returned value is in x0. Park it in the register
4234 associated with tmp. */
4235 HReg dst
= lookupIRTemp(env
, d
->tmp
);
4236 addInstr(env
, ARM64Instr_MovI(dst
, hregARM64_X0()) );
4240 /* The returned value is on the stack, and *retloc tells
4241 us where. Fish it off the stack and then move the
4242 stack pointer upwards to clear it, as directed by
4244 vassert(rloc
.pri
== RLPri_V128SpRel
);
4245 vassert(rloc
.spOff
< 256); // stay sane
4246 vassert(addToSp
>= 16); // ditto
4247 vassert(addToSp
< 256); // ditto
4248 HReg dst
= lookupIRTemp(env
, d
->tmp
);
4249 HReg tmp
= newVRegI(env
); // the address of the returned value
4250 addInstr(env
, ARM64Instr_FromSP(tmp
)); // tmp = SP
4251 addInstr(env
, ARM64Instr_Arith(tmp
, tmp
,
4252 ARM64RIA_I12((UShort
)rloc
.spOff
, 0),
4254 addInstr(env
, ARM64Instr_VLdStQ(True
/*isLoad*/, dst
, tmp
));
4255 addInstr(env
, ARM64Instr_AddToSP(addToSp
));
4265 /* --------- Load Linked and Store Conditional --------- */
4267 if (stmt
->Ist
.LLSC
.storedata
== NULL
) {
4269 IRTemp res
= stmt
->Ist
.LLSC
.result
;
4270 IRType ty
= typeOfIRTemp(env
->type_env
, res
);
4271 if (ty
== Ity_I128
|| ty
== Ity_I64
|| ty
== Ity_I32
4272 || ty
== Ity_I16
|| ty
== Ity_I8
) {
4274 HReg raddr
= iselIntExpr_R(env
, stmt
->Ist
.LLSC
.addr
);
4276 case Ity_I8
: szB
= 1; break;
4277 case Ity_I16
: szB
= 2; break;
4278 case Ity_I32
: szB
= 4; break;
4279 case Ity_I64
: szB
= 8; break;
4280 case Ity_I128
: szB
= 16; break;
4281 default: vassert(0);
4284 HReg r_dstMSword
= INVALID_HREG
;
4285 HReg r_dstLSword
= INVALID_HREG
;
4286 lookupIRTempPair(&r_dstMSword
, &r_dstLSword
, env
, res
);
4287 addInstr(env
, ARM64Instr_MovI(hregARM64_X4(), raddr
));
4288 addInstr(env
, ARM64Instr_LdrEXP());
4289 addInstr(env
, ARM64Instr_MovI(r_dstLSword
, hregARM64_X2()));
4290 addInstr(env
, ARM64Instr_MovI(r_dstMSword
, hregARM64_X3()));
4293 HReg r_dst
= lookupIRTemp(env
, res
);
4294 addInstr(env
, ARM64Instr_MovI(hregARM64_X4(), raddr
));
4295 addInstr(env
, ARM64Instr_LdrEX(szB
));
4296 addInstr(env
, ARM64Instr_MovI(r_dst
, hregARM64_X2()));
4303 IRType tyd
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.LLSC
.storedata
);
4304 if (tyd
== Ity_I128
|| tyd
== Ity_I64
|| tyd
== Ity_I32
4305 || tyd
== Ity_I16
|| tyd
== Ity_I8
) {
4307 HReg rA
= iselIntExpr_R(env
, stmt
->Ist
.LLSC
.addr
);
4309 case Ity_I8
: szB
= 1; break;
4310 case Ity_I16
: szB
= 2; break;
4311 case Ity_I32
: szB
= 4; break;
4312 case Ity_I64
: szB
= 8; break;
4313 case Ity_I128
: szB
= 16; break;
4314 default: vassert(0);
4317 HReg rD_MSword
= INVALID_HREG
;
4318 HReg rD_LSword
= INVALID_HREG
;
4319 iselInt128Expr(&rD_MSword
,
4320 &rD_LSword
, env
, stmt
->Ist
.LLSC
.storedata
);
4321 addInstr(env
, ARM64Instr_MovI(hregARM64_X2(), rD_LSword
));
4322 addInstr(env
, ARM64Instr_MovI(hregARM64_X3(), rD_MSword
));
4323 addInstr(env
, ARM64Instr_MovI(hregARM64_X4(), rA
));
4324 addInstr(env
, ARM64Instr_StrEXP());
4327 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.LLSC
.storedata
);
4328 addInstr(env
, ARM64Instr_MovI(hregARM64_X2(), rD
));
4329 addInstr(env
, ARM64Instr_MovI(hregARM64_X4(), rA
));
4330 addInstr(env
, ARM64Instr_StrEX(szB
));
4335 /* now r0 is 1 if failed, 0 if success. Change to IR
4336 conventions (0 is fail, 1 is success). Also transfer
4338 IRTemp res
= stmt
->Ist
.LLSC
.result
;
4339 IRType ty
= typeOfIRTemp(env
->type_env
, res
);
4340 HReg r_res
= lookupIRTemp(env
, res
);
4341 ARM64RIL
* one
= mb_mkARM64RIL_I(1);
4342 vassert(ty
== Ity_I1
);
4344 addInstr(env
, ARM64Instr_Logic(r_res
, hregARM64_X0(), one
,
4346 /* And be conservative -- mask off all but the lowest bit. */
4347 addInstr(env
, ARM64Instr_Logic(r_res
, r_res
, one
,
4354 /* --------- ACAS --------- */
4356 IRCAS
* cas
= stmt
->Ist
.CAS
.details
;
4357 if (cas
->oldHi
== IRTemp_INVALID
&& cas
->end
== Iend_LE
) {
4358 /* "normal" singleton CAS */
4360 IRType ty
= typeOfIRExpr(env
->type_env
, cas
->dataLo
);
4362 case Ity_I64
: sz
= 8; break;
4363 case Ity_I32
: sz
= 4; break;
4364 case Ity_I16
: sz
= 2; break;
4365 case Ity_I8
: sz
= 1; break;
4366 default: goto unhandled_cas
;
4368 HReg rAddr
= iselIntExpr_R(env
, cas
->addr
);
4369 HReg rExpd
= iselIntExpr_R(env
, cas
->expdLo
);
4370 HReg rData
= iselIntExpr_R(env
, cas
->dataLo
);
4371 vassert(cas
->expdHi
== NULL
);
4372 vassert(cas
->dataHi
== NULL
);
4373 addInstr(env
, ARM64Instr_MovI(hregARM64_X3(), rAddr
));
4374 addInstr(env
, ARM64Instr_MovI(hregARM64_X5(), rExpd
));
4375 addInstr(env
, ARM64Instr_MovI(hregARM64_X7(), rData
));
4376 addInstr(env
, ARM64Instr_CAS(sz
));
4377 /* Now we have the lowest szB bytes of x1 are either equal to
4378 the lowest szB bytes of x5, indicating success, or they
4379 aren't, indicating failure. */
4380 HReg rResult
= hregARM64_X1();
4383 case 4: rResult
= widen_z_32_to_64(env
, rResult
); break;
4384 case 2: rResult
= widen_z_16_to_64(env
, rResult
); break;
4385 case 1: rResult
= widen_z_8_to_64(env
, rResult
); break;
4386 default: vassert(0);
4388 // "old" in this case is interpreted somewhat liberally, per
4389 // the previous comment.
4390 HReg rOld
= lookupIRTemp(env
, cas
->oldLo
);
4391 addInstr(env
, ARM64Instr_MovI(rOld
, rResult
));
4394 if (cas
->oldHi
!= IRTemp_INVALID
&& cas
->end
== Iend_LE
) {
4395 /* Paired register CAS, i.e. CASP */
4397 IRType ty
= typeOfIRExpr(env
->type_env
, cas
->dataLo
);
4399 case Ity_I64
: sz
= 8; break;
4400 case Ity_I32
: sz
= 4; break;
4401 default: goto unhandled_cas
;
4403 HReg rAddr
= iselIntExpr_R(env
, cas
->addr
);
4405 HReg rExpd0
= iselIntExpr_R(env
, cas
->expdLo
);
4406 vassert(cas
->expdHi
!= NULL
);
4407 HReg rExpd1
= iselIntExpr_R(env
, cas
->expdHi
);
4409 HReg rData0
= iselIntExpr_R(env
, cas
->dataLo
);
4410 vassert(cas
->dataHi
!= NULL
);
4411 HReg rData1
= iselIntExpr_R(env
, cas
->dataHi
);
4413 addInstr(env
, ARM64Instr_MovI(hregARM64_X2(), rAddr
));
4415 addInstr(env
, ARM64Instr_MovI(hregARM64_X4(), rExpd0
));
4416 addInstr(env
, ARM64Instr_MovI(hregARM64_X5(), rExpd1
));
4418 addInstr(env
, ARM64Instr_MovI(hregARM64_X6(), rData0
));
4419 addInstr(env
, ARM64Instr_MovI(hregARM64_X7(), rData1
));
4421 addInstr(env
, ARM64Instr_CASP(sz
));
4423 HReg rResult0
= hregARM64_X0();
4424 HReg rResult1
= hregARM64_X1();
4427 case 4: rResult0
= widen_z_32_to_64(env
, rResult0
);
4428 rResult1
= widen_z_32_to_64(env
, rResult1
);
4430 default: vassert(0);
4432 HReg rOldLo
= lookupIRTemp(env
, cas
->oldLo
);
4433 HReg rOldHi
= lookupIRTemp(env
, cas
->oldHi
);
4434 addInstr(env
, ARM64Instr_MovI(rOldLo
, rResult0
));
4435 addInstr(env
, ARM64Instr_MovI(rOldHi
, rResult1
));
4442 /* --------- MEM FENCE --------- */
4444 switch (stmt
->Ist
.MBE
.event
) {
4446 addInstr(env
, ARM64Instr_MFence());
4448 case Imbe_CancelReservation
:
4449 addInstr(env
, ARM64Instr_ClrEX());
4456 /* --------- INSTR MARK --------- */
4457 /* Doesn't generate any executable code ... */
4461 /* --------- ABI HINT --------- */
4462 /* These have no meaning (denotation in the IR) and so we ignore
4463 them ... if any actually made it this far. */
4467 /* --------- NO-OP --------- */
4471 /* --------- EXIT --------- */
4473 if (stmt
->Ist
.Exit
.dst
->tag
!= Ico_U64
)
4474 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
4477 = iselCondCode_C(env
, stmt
->Ist
.Exit
.guard
);
4479 = mk_baseblock_64bit_access_amode(stmt
->Ist
.Exit
.offsIP
);
4481 /* Case: boring transfer to known address */
4482 if (stmt
->Ist
.Exit
.jk
== Ijk_Boring
) {
4483 if (env
->chainingAllowed
) {
4484 /* .. almost always true .. */
4485 /* Skip the event check at the dst if this is a forwards
4488 = ((Addr64
)stmt
->Ist
.Exit
.dst
->Ico
.U64
) > env
->max_ga
;
4489 if (0) vex_printf("%s", toFastEP
? "Y" : ",");
4490 addInstr(env
, ARM64Instr_XDirect(stmt
->Ist
.Exit
.dst
->Ico
.U64
,
4491 amPC
, cc
, toFastEP
));
4493 /* .. very occasionally .. */
4494 /* We can't use chaining, so ask for an assisted transfer,
4495 as that's the only alternative that is allowable. */
4496 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
4497 addInstr(env
, ARM64Instr_XAssisted(r
, amPC
, cc
, Ijk_Boring
));
4502 /* Case: assisted transfer to arbitrary address */
4503 switch (stmt
->Ist
.Exit
.jk
) {
4504 /* Keep this list in sync with that for iselNext below */
4508 case Ijk_Sys_syscall
:
4509 case Ijk_InvalICache
:
4510 case Ijk_FlushDCache
:
4514 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
4515 addInstr(env
, ARM64Instr_XAssisted(r
, amPC
, cc
,
4516 stmt
->Ist
.Exit
.jk
));
4523 /* Do we ever expect to see any other kind? */
4535 /*---------------------------------------------------------*/
4536 /*--- ISEL: Basic block terminators (Nexts) ---*/
4537 /*---------------------------------------------------------*/
4539 static void iselNext ( ISelEnv
* env
,
4540 IRExpr
* next
, IRJumpKind jk
, Int offsIP
)
4542 if (vex_traceflags
& VEX_TRACE_VCODE
) {
4543 vex_printf( "\n-- PUT(%d) = ", offsIP
);
4545 vex_printf( "; exit-");
4550 /* Case: boring transfer to known address */
4551 if (next
->tag
== Iex_Const
) {
4552 IRConst
* cdst
= next
->Iex
.Const
.con
;
4553 vassert(cdst
->tag
== Ico_U64
);
4554 if (jk
== Ijk_Boring
|| jk
== Ijk_Call
) {
4555 /* Boring transfer to known address */
4556 ARM64AMode
* amPC
= mk_baseblock_64bit_access_amode(offsIP
);
4557 if (env
->chainingAllowed
) {
4558 /* .. almost always true .. */
4559 /* Skip the event check at the dst if this is a forwards
4562 = ((Addr64
)cdst
->Ico
.U64
) > env
->max_ga
;
4563 if (0) vex_printf("%s", toFastEP
? "X" : ".");
4564 addInstr(env
, ARM64Instr_XDirect(cdst
->Ico
.U64
,
4568 /* .. very occasionally .. */
4569 /* We can't use chaining, so ask for an assisted transfer,
4570 as that's the only alternative that is allowable. */
4571 HReg r
= iselIntExpr_R(env
, next
);
4572 addInstr(env
, ARM64Instr_XAssisted(r
, amPC
, ARM64cc_AL
,
4579 /* Case: call/return (==boring) transfer to any address */
4581 case Ijk_Boring
: case Ijk_Ret
: case Ijk_Call
: {
4582 HReg r
= iselIntExpr_R(env
, next
);
4583 ARM64AMode
* amPC
= mk_baseblock_64bit_access_amode(offsIP
);
4584 if (env
->chainingAllowed
) {
4585 addInstr(env
, ARM64Instr_XIndir(r
, amPC
, ARM64cc_AL
));
4587 addInstr(env
, ARM64Instr_XAssisted(r
, amPC
, ARM64cc_AL
,
4596 /* Case: assisted transfer to arbitrary address */
4598 /* Keep this list in sync with that for Ist_Exit above */
4602 case Ijk_Sys_syscall
:
4603 case Ijk_InvalICache
:
4604 case Ijk_FlushDCache
:
4608 HReg r
= iselIntExpr_R(env
, next
);
4609 ARM64AMode
* amPC
= mk_baseblock_64bit_access_amode(offsIP
);
4610 addInstr(env
, ARM64Instr_XAssisted(r
, amPC
, ARM64cc_AL
, jk
));
4617 vex_printf( "\n-- PUT(%d) = ", offsIP
);
4619 vex_printf( "; exit-");
4622 vassert(0); // are we expecting any other kind?
4626 /*---------------------------------------------------------*/
4627 /*--- Insn selector top-level ---*/
4628 /*---------------------------------------------------------*/
4630 /* Translate an entire SB to arm64 code. */
4632 HInstrArray
* iselSB_ARM64 ( const IRSB
* bb
,
4634 const VexArchInfo
* archinfo_host
,
4635 const VexAbiInfo
* vbi
/*UNUSED*/,
4636 Int offs_Host_EvC_Counter
,
4637 Int offs_Host_EvC_FailAddr
,
4638 Bool chainingAllowed
,
4645 UInt hwcaps_host
= archinfo_host
->hwcaps
;
4646 ARM64AMode
*amCounter
, *amFailAddr
;
4649 vassert(arch_host
== VexArchARM64
);
4651 /* Check that the host's endianness is as expected. */
4652 vassert(archinfo_host
->endness
== VexEndnessLE
);
4654 /* guard against unexpected space regressions */
4655 vassert(sizeof(ARM64Instr
) <= 32);
4657 /* Make up an initial environment to use. */
4658 env
= LibVEX_Alloc_inline(sizeof(ISelEnv
));
4661 /* Set up output code array. */
4662 env
->code
= newHInstrArray();
4664 /* Copy BB's type env. */
4665 env
->type_env
= bb
->tyenv
;
4667 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4668 change as we go along. */
4669 env
->n_vregmap
= bb
->tyenv
->types_used
;
4670 env
->vregmap
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
4671 env
->vregmapHI
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
4673 /* and finally ... */
4674 env
->chainingAllowed
= chainingAllowed
;
4675 env
->hwcaps
= hwcaps_host
;
4676 env
->previous_rm
= NULL
;
4677 env
->max_ga
= max_ga
;
4679 /* For each IR temporary, allocate a suitably-kinded virtual
4682 for (i
= 0; i
< env
->n_vregmap
; i
++) {
4683 hregHI
= hreg
= INVALID_HREG
;
4684 switch (bb
->tyenv
->types
[i
]) {
4686 case Ity_I8
: case Ity_I16
: case Ity_I32
: case Ity_I64
:
4687 hreg
= mkHReg(True
, HRcInt64
, 0, j
++);
4690 hreg
= mkHReg(True
, HRcInt64
, 0, j
++);
4691 hregHI
= mkHReg(True
, HRcInt64
, 0, j
++);
4693 case Ity_F16
: // we'll use HRcFlt64 regs for F16 too
4694 case Ity_F32
: // we'll use HRcFlt64 regs for F32 too
4696 hreg
= mkHReg(True
, HRcFlt64
, 0, j
++);
4699 hreg
= mkHReg(True
, HRcVec128
, 0, j
++);
4702 hreg
= mkHReg(True
, HRcVec128
, 0, j
++);
4703 hregHI
= mkHReg(True
, HRcVec128
, 0, j
++);
4706 ppIRType(bb
->tyenv
->types
[i
]);
4707 vpanic("iselBB(arm64): IRTemp type");
4709 env
->vregmap
[i
] = hreg
;
4710 env
->vregmapHI
[i
] = hregHI
;
4714 /* The very first instruction must be an event check. */
4715 amCounter
= ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter
);
4716 amFailAddr
= ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr
);
4717 addInstr(env
, ARM64Instr_EvCheck(amCounter
, amFailAddr
));
4719 /* Possibly a block counter increment (for profiling). At this
4720 point we don't know the address of the counter, so just pretend
4721 it is zero. It will have to be patched later, but before this
4722 translation is used, by a call to LibVEX_patchProfCtr. */
4724 addInstr(env
, ARM64Instr_ProfInc());
4727 /* Ok, finally we can iterate over the statements. */
4728 for (i
= 0; i
< bb
->stmts_used
; i
++)
4729 iselStmt(env
, bb
->stmts
[i
]);
4731 iselNext(env
, bb
->next
, bb
->jumpkind
, bb
->offsIP
);
4733 /* record the number of vregs we used. */
4734 env
->code
->n_vregs
= env
->vreg_ctr
;
4739 /*---------------------------------------------------------------*/
4740 /*--- end host_arm64_isel.c ---*/
4741 /*---------------------------------------------------------------*/