arm64 isel: in a couple places, use `xzr` as a source rather than loading zero into...
[valgrind.git] / VEX / priv / host_arm64_isel.c
blob1b8ad20a5aa72b2367b1c340f818eedd80278c8a
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_isel.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "libvex_basictypes.h"
30 #include "libvex_ir.h"
31 #include "libvex.h"
32 #include "ir_match.h"
34 #include "main_util.h"
35 #include "main_globals.h"
36 #include "host_generic_regs.h"
37 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
38 #include "host_arm64_defs.h"
41 /*---------------------------------------------------------*/
42 /*--- ISelEnv ---*/
43 /*---------------------------------------------------------*/
45 /* This carries around:
47 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
48 might encounter. This is computed before insn selection starts,
49 and does not change.
51 - A mapping from IRTemp to HReg. This tells the insn selector
52 which virtual register is associated with each IRTemp temporary.
53 This is computed before insn selection starts, and does not
54 change. We expect this mapping to map precisely the same set of
55 IRTemps as the type mapping does.
57 |vregmap| holds the primary register for the IRTemp.
58 |vregmapHI| is only used for 128-bit integer-typed
59 IRTemps. It holds the identity of a second
60 64-bit virtual HReg, which holds the high half
61 of the value.
63 - The code array, that is, the insns selected so far.
65 - A counter, for generating new virtual registers.
67 - The host hardware capabilities word. This is set at the start
68 and does not change.
70 - A Bool for indicating whether we may generate chain-me
71 instructions for control flow transfers, or whether we must use
72 XAssisted.
74 - The maximum guest address of any guest insn in this block.
75 Actually, the address of the highest-addressed byte from any insn
76 in this block. Is set at the start and does not change. This is
77 used for detecting jumps which are definitely forward-edges from
78 this block, and therefore can be made (chained) to the fast entry
79 point of the destination, thereby avoiding the destination's
80 event check.
82 - An IRExpr*, which may be NULL, holding the IR expression (an
83 IRRoundingMode-encoded value) to which the FPU's rounding mode
84 was most recently set. Setting to NULL is always safe. Used to
85 avoid redundant settings of the FPU's rounding mode, as
86 described in set_FPCR_rounding_mode below.
88 Note, this is all (well, mostly) host-independent.
91 typedef
92 struct {
93 /* Constant -- are set at the start and do not change. */
94 IRTypeEnv* type_env;
96 HReg* vregmap;
97 HReg* vregmapHI;
98 Int n_vregmap;
100 UInt hwcaps;
102 Bool chainingAllowed;
103 Addr64 max_ga;
105 /* These are modified as we go along. */
106 HInstrArray* code;
107 Int vreg_ctr;
109 IRExpr* previous_rm;
111 ISelEnv;
113 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
115 vassert(tmp >= 0);
116 vassert(tmp < env->n_vregmap);
117 return env->vregmap[tmp];
120 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
121 ISelEnv* env, IRTemp tmp )
123 vassert(tmp >= 0);
124 vassert(tmp < env->n_vregmap);
125 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
126 *vrLO = env->vregmap[tmp];
127 *vrHI = env->vregmapHI[tmp];
130 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
132 addHInstr(env->code, instr);
133 if (vex_traceflags & VEX_TRACE_VCODE) {
134 ppARM64Instr(instr);
135 vex_printf("\n");
139 static HReg newVRegI ( ISelEnv* env )
141 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
142 env->vreg_ctr++;
143 return reg;
146 static HReg newVRegD ( ISelEnv* env )
148 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
149 env->vreg_ctr++;
150 return reg;
153 static HReg newVRegV ( ISelEnv* env )
155 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
156 env->vreg_ctr++;
157 return reg;
161 /*---------------------------------------------------------*/
162 /*--- ISEL: Forward declarations ---*/
163 /*---------------------------------------------------------*/
165 /* These are organised as iselXXX and iselXXX_wrk pairs. The
166 iselXXX_wrk do the real work, but are not to be called directly.
167 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
168 checks that all returned registers are virtual. You should not
169 call the _wrk version directly.
171 Because some forms of ARM64 memory amodes are implicitly scaled by
172 the access size, iselIntExpr_AMode takes an IRType which tells it
173 the type of the access for which the amode is to be used. This
174 type needs to be correct, else you'll get incorrect code.
176 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
177 IRExpr* e, IRType dty );
178 static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
179 IRExpr* e, IRType dty );
181 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
182 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
184 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
185 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
187 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
188 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
190 static ARM64CondCode iselCondCode_C_wrk ( ISelEnv* env, IRExpr* e );
191 static ARM64CondCode iselCondCode_C ( ISelEnv* env, IRExpr* e );
193 static HReg iselCondCode_R_wrk ( ISelEnv* env, IRExpr* e );
194 static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e );
196 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
197 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
199 static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
200 ISelEnv* env, IRExpr* e );
201 static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
202 ISelEnv* env, IRExpr* e );
204 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
205 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
207 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
208 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
210 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e );
211 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e );
213 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
214 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
216 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
217 ISelEnv* env, IRExpr* e );
218 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
219 ISelEnv* env, IRExpr* e );
221 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
224 /*---------------------------------------------------------*/
225 /*--- ISEL: Misc helpers ---*/
226 /*---------------------------------------------------------*/
228 /* Generate an amode suitable for a 64-bit sized access relative to
229 the baseblock register (X21). This generates an RI12 amode, which
230 means its scaled by the access size, which is why the access size
231 -- 64 bit -- is stated explicitly here. Consequently |off| needs
232 to be divisible by 8. */
233 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
235 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
236 vassert((off & 7) == 0); /* ditto */
237 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
240 /* Ditto, for 32 bit accesses. */
241 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
243 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
244 vassert((off & 3) == 0); /* ditto */
245 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
248 /* Ditto, for 16 bit accesses. */
249 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
251 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
252 vassert((off & 1) == 0); /* ditto */
253 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
256 /* Ditto, for 8 bit accesses. */
257 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
259 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
260 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
263 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
265 vassert(off < (1<<12));
266 HReg r = newVRegI(env);
267 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
268 ARM64RIA_I12(off,0), True/*isAdd*/));
269 return r;
272 static HReg get_baseblock_register ( void )
274 return hregARM64_X21();
277 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
278 a new register, and return the new register. */
279 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
281 HReg dst = newVRegI(env);
282 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
283 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
284 return dst;
287 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
288 a new register, and return the new register. */
289 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
291 HReg dst = newVRegI(env);
292 ARM64RI6* n48 = ARM64RI6_I6(48);
293 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
294 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
295 return dst;
298 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
299 a new register, and return the new register. */
300 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
302 HReg dst = newVRegI(env);
303 ARM64RIL* mask = ARM64RIL_I13(1, 0, 15); /* encodes 0xFFFF */
304 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
305 return dst;
308 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
309 a new register, and return the new register. */
310 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
312 HReg dst = newVRegI(env);
313 ARM64RI6* n32 = ARM64RI6_I6(32);
314 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
315 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
316 return dst;
319 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
320 a new register, and return the new register. */
321 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
323 HReg dst = newVRegI(env);
324 ARM64RI6* n56 = ARM64RI6_I6(56);
325 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
326 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
327 return dst;
330 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
332 HReg dst = newVRegI(env);
333 ARM64RIL* mask = ARM64RIL_I13(1, 0, 7); /* encodes 0xFF */
334 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
335 return dst;
338 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
339 static Bool isZeroU64 ( IRExpr* e ) {
340 if (e->tag != Iex_Const) return False;
341 IRConst* con = e->Iex.Const.con;
342 vassert(con->tag == Ico_U64);
343 return con->Ico.U64 == 0;
347 /*---------------------------------------------------------*/
348 /*--- ISEL: FP rounding mode helpers ---*/
349 /*---------------------------------------------------------*/
351 /* Set the FP rounding mode: 'mode' is an I32-typed expression
352 denoting a value in the range 0 .. 3, indicating a round mode
353 encoded as per type IRRoundingMode -- the first four values only
354 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64
355 FSCR to have the same rounding.
357 For speed & simplicity, we're setting the *entire* FPCR here.
359 Setting the rounding mode is expensive. So this function tries to
360 avoid repeatedly setting the rounding mode to the same thing by
361 first comparing 'mode' to the 'mode' tree supplied in the previous
362 call to this function, if any. (The previous value is stored in
363 env->previous_rm.) If 'mode' is a single IR temporary 't' and
364 env->previous_rm is also just 't', then the setting is skipped.
366 This is safe because of the SSA property of IR: an IR temporary can
367 only be defined once and so will have the same value regardless of
368 where it appears in the block. Cool stuff, SSA.
370 A safety condition: all attempts to set the RM must be aware of
371 this mechanism - by being routed through the functions here.
373 Of course this only helps if blocks where the RM is set more than
374 once and it is set to the same value each time, *and* that value is
375 held in the same IR temporary each time. In order to assure the
376 latter as much as possible, the IR optimiser takes care to do CSE
377 on any block with any sign of floating point activity.
379 static
380 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
382 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
384 /* Do we need to do anything? */
385 if (env->previous_rm
386 && env->previous_rm->tag == Iex_RdTmp
387 && mode->tag == Iex_RdTmp
388 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
389 /* no - setting it to what it was before. */
390 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
391 return;
394 /* No luck - we better set it, and remember what we set it to. */
395 env->previous_rm = mode;
397 /* Only supporting the rounding-mode bits - the rest of FPCR is set
398 to zero - so we can set the whole register at once (faster). */
400 /* This isn't simple, because 'mode' carries an IR rounding
401 encoding, and we need to translate that to an ARM64 FP one:
402 The IR encoding:
403 00 to nearest (the default)
404 10 to +infinity
405 01 to -infinity
406 11 to zero
407 The ARM64 FP encoding:
408 00 to nearest
409 01 to +infinity
410 10 to -infinity
411 11 to zero
412 Easy enough to do; just swap the two bits.
414 HReg irrm = iselIntExpr_R(env, mode);
415 HReg tL = newVRegI(env);
416 HReg tR = newVRegI(env);
417 HReg t3 = newVRegI(env);
418 /* tL = irrm << 1;
419 tR = irrm >> 1; if we're lucky, these will issue together
420 tL &= 2;
421 tR &= 1; ditto
422 t3 = tL | tR;
423 t3 <<= 22;
424 fmxr fpscr, t3
426 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
427 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
428 vassert(ril_one && ril_two);
429 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
430 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
431 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
432 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
433 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
434 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
435 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
439 /*---------------------------------------------------------*/
440 /*--- ISEL: Function call helpers ---*/
441 /*---------------------------------------------------------*/
443 /* Used only in doHelperCall. See big comment in doHelperCall re
444 handling of register-parameter args. This function figures out
445 whether evaluation of an expression might require use of a fixed
446 register. If in doubt return True (safe but suboptimal).
448 static
449 Bool mightRequireFixedRegs ( IRExpr* e )
451 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
452 // These are always "safe" -- either a copy of SP in some
453 // arbitrary vreg, or a copy of x21, respectively.
454 return False;
456 /* Else it's a "normal" expression. */
457 switch (e->tag) {
458 case Iex_RdTmp: case Iex_Const: case Iex_Get:
459 return False;
460 default:
461 return True;
466 /* Do a complete function call. |guard| is a Ity_Bit expression
467 indicating whether or not the call happens. If guard==NULL, the
468 call is unconditional. |retloc| is set to indicate where the
469 return value is after the call. The caller (of this fn) must
470 generate code to add |stackAdjustAfterCall| to the stack pointer
471 after the call is done. Returns True iff it managed to handle this
472 combination of arg/return types, else returns False. */
474 static
475 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
476 /*OUT*/RetLoc* retloc,
477 ISelEnv* env,
478 IRExpr* guard,
479 IRCallee* cee, IRType retTy, IRExpr** args )
481 ARM64CondCode cc;
482 HReg argregs[ARM64_N_ARGREGS];
483 HReg tmpregs[ARM64_N_ARGREGS];
484 Bool go_fast;
485 Int n_args, i, nextArgReg;
486 Addr64 target;
488 vassert(ARM64_N_ARGREGS == 8);
490 /* Set default returns. We'll update them later if needed. */
491 *stackAdjustAfterCall = 0;
492 *retloc = mk_RetLoc_INVALID();
494 /* These are used for cross-checking that IR-level constraints on
495 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
496 UInt nVECRETs = 0;
497 UInt nGSPTRs = 0;
499 /* Marshal args for a call and do the call.
501 This function only deals with a tiny set of possibilities, which
502 cover all helpers in practice. The restrictions are that only
503 arguments in registers are supported, hence only
504 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
505 fact the only supported arg type is I64.
507 The return type can be I{64,32} or V128. In the V128 case, it
508 is expected that |args| will contain the special node
509 IRExpr_VECRET(), in which case this routine generates code to
510 allocate space on the stack for the vector return value. Since
511 we are not passing any scalars on the stack, it is enough to
512 preallocate the return space before marshalling any arguments,
513 in this case.
515 |args| may also contain IRExpr_GSPTR(), in which case the
516 value in x21 is passed as the corresponding argument.
518 Generating code which is both efficient and correct when
519 parameters are to be passed in registers is difficult, for the
520 reasons elaborated in detail in comments attached to
521 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
522 of the method described in those comments.
524 The problem is split into two cases: the fast scheme and the
525 slow scheme. In the fast scheme, arguments are computed
526 directly into the target (real) registers. This is only safe
527 when we can be sure that computation of each argument will not
528 trash any real registers set by computation of any other
529 argument.
531 In the slow scheme, all args are first computed into vregs, and
532 once they are all done, they are moved to the relevant real
533 regs. This always gives correct code, but it also gives a bunch
534 of vreg-to-rreg moves which are usually redundant but are hard
535 for the register allocator to get rid of.
537 To decide which scheme to use, all argument expressions are
538 first examined. If they are all so simple that it is clear they
539 will be evaluated without use of any fixed registers, use the
540 fast scheme, else use the slow scheme. Note also that only
541 unconditional calls may use the fast scheme, since having to
542 compute a condition expression could itself trash real
543 registers.
545 Note this requires being able to examine an expression and
546 determine whether or not evaluation of it might use a fixed
547 register. That requires knowledge of how the rest of this insn
548 selector works. Currently just the following 3 are regarded as
549 safe -- hopefully they cover the majority of arguments in
550 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
553 /* Note that the cee->regparms field is meaningless on ARM64 hosts
554 (since there is only one calling convention) and so we always
555 ignore it. */
557 n_args = 0;
558 for (i = 0; args[i]; i++) {
559 IRExpr* arg = args[i];
560 if (UNLIKELY(arg->tag == Iex_VECRET)) {
561 nVECRETs++;
562 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
563 nGSPTRs++;
565 n_args++;
568 /* If this fails, the IR is ill-formed */
569 vassert(nGSPTRs == 0 || nGSPTRs == 1);
571 /* If we have a VECRET, allocate space on the stack for the return
572 value, and record the stack pointer after that. */
573 HReg r_vecRetAddr = INVALID_HREG;
574 if (nVECRETs == 1) {
575 vassert(retTy == Ity_V128 || retTy == Ity_V256);
576 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
577 r_vecRetAddr = newVRegI(env);
578 addInstr(env, ARM64Instr_AddToSP(-16));
579 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
580 } else {
581 // If either of these fail, the IR is ill-formed
582 vassert(retTy != Ity_V128 && retTy != Ity_V256);
583 vassert(nVECRETs == 0);
586 argregs[0] = hregARM64_X0();
587 argregs[1] = hregARM64_X1();
588 argregs[2] = hregARM64_X2();
589 argregs[3] = hregARM64_X3();
590 argregs[4] = hregARM64_X4();
591 argregs[5] = hregARM64_X5();
592 argregs[6] = hregARM64_X6();
593 argregs[7] = hregARM64_X7();
595 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
596 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
598 /* First decide which scheme (slow or fast) is to be used. First
599 assume the fast scheme, and select slow if any contraindications
600 (wow) appear. */
602 go_fast = True;
604 if (guard) {
605 if (guard->tag == Iex_Const
606 && guard->Iex.Const.con->tag == Ico_U1
607 && guard->Iex.Const.con->Ico.U1 == True) {
608 /* unconditional */
609 } else {
610 /* Not manifestly unconditional -- be conservative. */
611 go_fast = False;
615 if (go_fast) {
616 for (i = 0; i < n_args; i++) {
617 if (mightRequireFixedRegs(args[i])) {
618 go_fast = False;
619 break;
624 if (go_fast) {
625 if (retTy == Ity_V128 || retTy == Ity_V256)
626 go_fast = False;
629 /* At this point the scheme to use has been established. Generate
630 code to get the arg values into the argument rregs. If we run
631 out of arg regs, give up. */
633 if (go_fast) {
635 /* FAST SCHEME */
636 nextArgReg = 0;
638 for (i = 0; i < n_args; i++) {
639 IRExpr* arg = args[i];
641 IRType aTy = Ity_INVALID;
642 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
643 aTy = typeOfIRExpr(env->type_env, args[i]);
645 if (nextArgReg >= ARM64_N_ARGREGS)
646 return False; /* out of argregs */
648 if (aTy == Ity_I64) {
649 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
650 iselIntExpr_R(env, args[i]) ));
651 nextArgReg++;
653 else if (arg->tag == Iex_GSPTR) {
654 vassert(0); //ATC
655 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
656 hregARM64_X21() ));
657 nextArgReg++;
659 else if (arg->tag == Iex_VECRET) {
660 // because of the go_fast logic above, we can't get here,
661 // since vector return values makes us use the slow path
662 // instead.
663 vassert(0);
665 else
666 return False; /* unhandled arg type */
669 /* Fast scheme only applies for unconditional calls. Hence: */
670 cc = ARM64cc_AL;
672 } else {
674 /* SLOW SCHEME; move via temporaries */
675 nextArgReg = 0;
677 for (i = 0; i < n_args; i++) {
678 IRExpr* arg = args[i];
680 IRType aTy = Ity_INVALID;
681 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
682 aTy = typeOfIRExpr(env->type_env, args[i]);
684 if (nextArgReg >= ARM64_N_ARGREGS)
685 return False; /* out of argregs */
687 if (aTy == Ity_I64) {
688 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
689 nextArgReg++;
691 else if (arg->tag == Iex_GSPTR) {
692 vassert(0); //ATC
693 tmpregs[nextArgReg] = hregARM64_X21();
694 nextArgReg++;
696 else if (arg->tag == Iex_VECRET) {
697 vassert(!hregIsInvalid(r_vecRetAddr));
698 tmpregs[nextArgReg] = r_vecRetAddr;
699 nextArgReg++;
701 else
702 return False; /* unhandled arg type */
705 /* Now we can compute the condition. We can't do it earlier
706 because the argument computations could trash the condition
707 codes. Be a bit clever to handle the common case where the
708 guard is 1:Bit. */
709 cc = ARM64cc_AL;
710 if (guard) {
711 if (guard->tag == Iex_Const
712 && guard->Iex.Const.con->tag == Ico_U1
713 && guard->Iex.Const.con->Ico.U1 == True) {
714 /* unconditional -- do nothing */
715 } else {
716 cc = iselCondCode_C( env, guard );
720 /* Move the args to their final destinations. */
721 for (i = 0; i < nextArgReg; i++) {
722 vassert(!(hregIsInvalid(tmpregs[i])));
723 /* None of these insns, including any spill code that might
724 be generated, may alter the condition codes. */
725 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
730 /* Should be assured by checks above */
731 vassert(nextArgReg <= ARM64_N_ARGREGS);
733 /* Do final checks, set the return values, and generate the call
734 instruction proper. */
735 vassert(nGSPTRs == 0 || nGSPTRs == 1);
736 vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
737 vassert(*stackAdjustAfterCall == 0);
738 vassert(is_RetLoc_INVALID(*retloc));
739 switch (retTy) {
740 case Ity_INVALID:
741 /* Function doesn't return a value. */
742 *retloc = mk_RetLoc_simple(RLPri_None);
743 break;
744 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
745 *retloc = mk_RetLoc_simple(RLPri_Int);
746 break;
747 case Ity_V128:
748 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
749 *stackAdjustAfterCall = 16;
750 break;
751 case Ity_V256:
752 vassert(0); // ATC
753 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
754 *stackAdjustAfterCall = 32;
755 break;
756 default:
757 /* IR can denote other possible return types, but we don't
758 handle those here. */
759 vassert(0);
762 /* Finally, generate the call itself. This needs the *retloc value
763 set in the switch above, which is why it's at the end. */
765 /* nextArgReg doles out argument registers. Since these are
766 assigned in the order x0 .. x7, its numeric value at this point,
767 which must be between 0 and 8 inclusive, is going to be equal to
768 the number of arg regs in use for the call. Hence bake that
769 number into the call (we'll need to know it when doing register
770 allocation, to know what regs the call reads.) */
772 target = (Addr)cee->addr;
773 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
775 return True; /* success */
779 /*---------------------------------------------------------*/
780 /*--- ISEL: Integer expressions (64/32 bit) ---*/
781 /*---------------------------------------------------------*/
783 /* Select insns for an integer-typed expression, and add them to the
784 code list. Return a reg holding the result. This reg will be a
785 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
786 want to modify it, ask for a new vreg, copy it in there, and modify
787 the copy. The register allocator will do its best to map both
788 vregs to the same real register, so the copies will often disappear
789 later in the game.
791 This should handle expressions of 64- and 32-bit type. All results
792 are returned in a 64-bit register. For 32-bit expressions, the
793 upper 32 bits are arbitrary, so you should mask or sign extend
794 partial values if necessary.
797 /* ---------------- RRS matching helper ---------------- */
799 /* This helper matches 64-bit integer expressions of the form
800 {Add,Sub,And,Or,Xor}(E1, {Shl,Shr,Sar}(E2, immediate))
802 {Add,And,Or,Xor}({Shl,Shr,Sar}(E1, immediate), E2)
803 which is a useful thing to do because AArch64 can compute those in
804 a single instruction.
806 static Bool matchesRegRegShift(/*OUT*/ARM64RRSOp* mainOp,
807 /*OUT*/ARM64ShiftOp* shiftOp,
808 /*OUT*/UChar* amt,
809 /*OUT*/IRExpr** argUnshifted,
810 /*OUT*/IRExpr** argToBeShifted,
811 IRExpr* e)
813 *mainOp = (ARM64RRSOp)0;
814 *shiftOp = (ARM64ShiftOp)0;
815 *amt = 0;
816 *argUnshifted = NULL;
817 *argToBeShifted = NULL;
818 if (e->tag != Iex_Binop) {
819 return False;
821 const IROp irMainOp = e->Iex.Binop.op;
822 Bool canSwap = True;
823 switch (irMainOp) {
824 case Iop_And64: *mainOp = ARM64rrs_AND; break;
825 case Iop_Or64: *mainOp = ARM64rrs_OR; break;
826 case Iop_Xor64: *mainOp = ARM64rrs_XOR; break;
827 case Iop_Add64: *mainOp = ARM64rrs_ADD; break;
828 case Iop_Sub64: *mainOp = ARM64rrs_SUB; canSwap = False; break;
829 default: return False;
831 /* The root node is OK. Now check the right (2nd) arg. */
832 IRExpr* argL = e->Iex.Binop.arg1;
833 IRExpr* argR = e->Iex.Binop.arg2;
835 // This loop runs either one or two iterations. In the first iteration, we
836 // check for a shiftable right (second) arg. If that fails, at the end of
837 // the first iteration, the args are swapped, if that is valid, and we go
838 // round again, hence checking for a shiftable left (first) arg.
839 UInt iterNo = 1;
840 while (True) {
841 vassert(iterNo == 1 || iterNo == 2);
842 if (argR->tag == Iex_Binop) {
843 const IROp irShiftOp = argR->Iex.Binop.op;
844 if (irShiftOp == Iop_Shl64
845 || irShiftOp == Iop_Shr64 || irShiftOp == Iop_Sar64) {
846 IRExpr* argRL = argR->Iex.Binop.arg1;
847 const IRExpr* argRR = argR->Iex.Binop.arg2;
848 if (argRR->tag == Iex_Const) {
849 const IRConst* argRRconst = argRR->Iex.Const.con;
850 vassert(argRRconst->tag == Ico_U8); // due to typecheck rules
851 const UChar amount = argRRconst->Ico.U8;
852 if (amount >= 1 && amount <= 63) {
853 // We got a match \o/
854 // *mainOp is already set
855 switch (irShiftOp) {
856 case Iop_Shl64: *shiftOp = ARM64sh_SHL; break;
857 case Iop_Shr64: *shiftOp = ARM64sh_SHR; break;
858 case Iop_Sar64: *shiftOp = ARM64sh_SAR; break;
859 default: vassert(0); // guarded above
861 *amt = amount;
862 *argUnshifted = argL;
863 *argToBeShifted = argRL;
864 return True;
869 // We failed to get a match in the first iteration. So, provided the
870 // root node isn't SUB, swap the arguments and make one further
871 // iteration. If that doesn't succeed, we must give up.
872 if (iterNo == 1 && canSwap) {
873 IRExpr* tmp = argL;
874 argL = argR;
875 argR = tmp;
876 iterNo = 2;
877 continue;
879 // Give up.
880 return False;
882 /*NOTREACHED*/
885 /* --------------------- AMode --------------------- */
887 /* Return an AMode which computes the value of the specified
888 expression, possibly also adding insns to the code list as a
889 result. The expression may only be a 64-bit one.
892 static Bool isValidScale ( UChar scale )
894 switch (scale) {
895 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
896 default: return False;
900 static Bool sane_AMode ( ARM64AMode* am )
902 switch (am->tag) {
903 case ARM64am_RI9:
904 return
905 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
906 && (hregIsVirtual(am->ARM64am.RI9.reg)
907 /* || sameHReg(am->ARM64am.RI9.reg,
908 hregARM64_X21()) */ )
909 && am->ARM64am.RI9.simm9 >= -256
910 && am->ARM64am.RI9.simm9 <= 255 );
911 case ARM64am_RI12:
912 return
913 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
914 && (hregIsVirtual(am->ARM64am.RI12.reg)
915 /* || sameHReg(am->ARM64am.RI12.reg,
916 hregARM64_X21()) */ )
917 && am->ARM64am.RI12.uimm12 < 4096
918 && isValidScale(am->ARM64am.RI12.szB) );
919 case ARM64am_RR:
920 return
921 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
922 && hregIsVirtual(am->ARM64am.RR.base)
923 && hregClass(am->ARM64am.RR.index) == HRcInt64
924 && hregIsVirtual(am->ARM64am.RR.index) );
925 default:
926 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
930 static
931 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
933 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
934 vassert(sane_AMode(am));
935 return am;
938 static
939 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
941 IRType ty = typeOfIRExpr(env->type_env,e);
942 vassert(ty == Ity_I64);
944 ULong szBbits = 0;
945 switch (dty) {
946 case Ity_I64: szBbits = 3; break;
947 case Ity_I32: szBbits = 2; break;
948 case Ity_I16: szBbits = 1; break;
949 case Ity_I8: szBbits = 0; break;
950 default: vassert(0);
953 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
954 we're going to create an amode suitable for LDU* or STU*
955 instructions, which use unscaled immediate offsets. */
956 if (e->tag == Iex_Binop
957 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
958 && e->Iex.Binop.arg2->tag == Iex_Const
959 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
960 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
961 if (simm >= -255 && simm <= 255) {
962 /* Although the gating condition might seem to be
963 simm >= -256 && simm <= 255
964 we will need to negate simm in the case where the op is Sub64.
965 Hence limit the lower value to -255 in order that its negation
966 is representable. */
967 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
968 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
969 return ARM64AMode_RI9(reg, (Int)simm);
973 /* Add64(expr, uimm12 * transfer-size) */
974 if (e->tag == Iex_Binop
975 && e->Iex.Binop.op == Iop_Add64
976 && e->Iex.Binop.arg2->tag == Iex_Const
977 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
978 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
979 ULong szB = 1 << szBbits;
980 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
981 && (uimm >> szBbits) < 4096) {
982 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
983 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
987 /* Add64(expr1, expr2) */
988 if (e->tag == Iex_Binop
989 && e->Iex.Binop.op == Iop_Add64) {
990 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
991 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
992 return ARM64AMode_RR(reg1, reg2);
995 /* Doesn't match anything in particular. Generate it into
996 a register and use that. */
997 HReg reg = iselIntExpr_R(env, e);
998 return ARM64AMode_RI9(reg, 0);
1002 /* --------------------- RIA --------------------- */
1004 /* Select instructions to generate 'e' into a RIA. */
1006 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
1008 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
1009 /* sanity checks ... */
1010 switch (ri->tag) {
1011 case ARM64riA_I12:
1012 vassert(ri->ARM64riA.I12.imm12 < 4096);
1013 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
1014 return ri;
1015 case ARM64riA_R:
1016 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
1017 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
1018 return ri;
1019 default:
1020 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1024 /* DO NOT CALL THIS DIRECTLY ! */
1025 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
1027 IRType ty = typeOfIRExpr(env->type_env,e);
1028 vassert(ty == Ity_I64 || ty == Ity_I32);
1030 /* special case: immediate */
1031 if (e->tag == Iex_Const) {
1032 ULong u = 0xF000000ULL; /* invalid */
1033 switch (e->Iex.Const.con->tag) {
1034 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
1035 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1036 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1038 if (0 == (u & ~(0xFFFULL << 0)))
1039 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
1040 if (0 == (u & ~(0xFFFULL << 12)))
1041 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
1042 /* else fail, fall through to default case */
1045 /* default case: calculate into a register and return that */
1047 HReg r = iselIntExpr_R ( env, e );
1048 return ARM64RIA_R(r);
1053 /* --------------------- RIL --------------------- */
1055 /* Select instructions to generate 'e' into a RIL. At this point we
1056 have to deal with the strange bitfield-immediate encoding for logic
1057 instructions. */
1060 // The following four functions
1061 // CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1062 // are copied, with modifications, from
1063 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1064 // which has the following copyright notice:
1066 Copyright 2013, ARM Limited
1067 All rights reserved.
1069 Redistribution and use in source and binary forms, with or without
1070 modification, are permitted provided that the following conditions are met:
1072 * Redistributions of source code must retain the above copyright notice,
1073 this list of conditions and the following disclaimer.
1074 * Redistributions in binary form must reproduce the above copyright notice,
1075 this list of conditions and the following disclaimer in the documentation
1076 and/or other materials provided with the distribution.
1077 * Neither the name of ARM Limited nor the names of its contributors may be
1078 used to endorse or promote products derived from this software without
1079 specific prior written permission.
1081 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1082 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1083 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1084 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1085 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1086 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1087 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1088 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1089 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1090 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1093 static Int CountLeadingZeros(ULong value, Int width)
1095 vassert(width == 32 || width == 64);
1096 Int count = 0;
1097 ULong bit_test = 1ULL << (width - 1);
1098 while ((count < width) && ((bit_test & value) == 0)) {
1099 count++;
1100 bit_test >>= 1;
1102 return count;
1105 static Int CountTrailingZeros(ULong value, Int width)
1107 vassert(width == 32 || width == 64);
1108 Int count = 0;
1109 while ((count < width) && (((value >> count) & 1) == 0)) {
1110 count++;
1112 return count;
1115 static Int CountSetBits(ULong value, Int width)
1117 // TODO: Other widths could be added here, as the implementation already
1118 // supports them.
1119 vassert(width == 32 || width == 64);
1121 // Mask out unused bits to ensure that they are not counted.
1122 value &= (0xffffffffffffffffULL >> (64-width));
1124 // Add up the set bits.
1125 // The algorithm works by adding pairs of bit fields together iteratively,
1126 // where the size of each bit field doubles each time.
1127 // An example for an 8-bit value:
1128 // Bits: h g f e d c b a
1129 // \ | \ | \ | \ |
1130 // value = h+g f+e d+c b+a
1131 // \ | \ |
1132 // value = h+g+f+e d+c+b+a
1133 // \ |
1134 // value = h+g+f+e+d+c+b+a
1135 value = ((value >> 1) & 0x5555555555555555ULL)
1136 + (value & 0x5555555555555555ULL);
1137 value = ((value >> 2) & 0x3333333333333333ULL)
1138 + (value & 0x3333333333333333ULL);
1139 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL)
1140 + (value & 0x0f0f0f0f0f0f0f0fULL);
1141 value = ((value >> 8) & 0x00ff00ff00ff00ffULL)
1142 + (value & 0x00ff00ff00ff00ffULL);
1143 value = ((value >> 16) & 0x0000ffff0000ffffULL)
1144 + (value & 0x0000ffff0000ffffULL);
1145 value = ((value >> 32) & 0x00000000ffffffffULL)
1146 + (value & 0x00000000ffffffffULL);
1148 return value;
1151 static Bool isImmLogical ( /*OUT*/UInt* n,
1152 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1153 ULong value, UInt width )
1155 // Test if a given value can be encoded in the immediate field of a
1156 // logical instruction.
1158 // If it can be encoded, the function returns true, and values
1159 // pointed to by n, imm_s and imm_r are updated with immediates
1160 // encoded in the format required by the corresponding fields in the
1161 // logical instruction. If it can not be encoded, the function
1162 // returns false, and the values pointed to by n, imm_s and imm_r
1163 // are undefined.
1164 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1165 vassert(width == 32 || width == 64);
1167 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1168 // the following table:
1170 // N imms immr size S R
1171 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1172 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1173 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1174 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1175 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1176 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1177 // (s bits must not be all set)
1179 // A pattern is constructed of size bits, where the least significant S+1
1180 // bits are set. The pattern is rotated right by R, and repeated across a
1181 // 32 or 64-bit value, depending on destination register width.
1183 // To test if an arbitrary immediate can be encoded using this scheme, an
1184 // iterative algorithm is used.
1186 // TODO: This code does not consider using X/W register overlap to support
1187 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1188 // are an encodable logical immediate.
1190 // 1. If the value has all set or all clear bits, it can't be encoded.
1191 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1192 ((width == 32) && (value == 0xffffffff))) {
1193 return False;
1196 UInt lead_zero = CountLeadingZeros(value, width);
1197 UInt lead_one = CountLeadingZeros(~value, width);
1198 UInt trail_zero = CountTrailingZeros(value, width);
1199 UInt trail_one = CountTrailingZeros(~value, width);
1200 UInt set_bits = CountSetBits(value, width);
1202 // The fixed bits in the immediate s field.
1203 // If width == 64 (X reg), start at 0xFFFFFF80.
1204 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1205 // widths won't be executed.
1206 Int imm_s_fixed = (width == 64) ? -128 : -64;
1207 Int imm_s_mask = 0x3F;
1209 for (;;) {
1210 // 2. If the value is two bits wide, it can be encoded.
1211 if (width == 2) {
1212 *n = 0;
1213 *imm_s = 0x3C;
1214 *imm_r = (value & 3) - 1;
1215 return True;
1218 *n = (width == 64) ? 1 : 0;
1219 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1220 if ((lead_zero + set_bits) == width) {
1221 *imm_r = 0;
1222 } else {
1223 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1226 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1227 // the bit width of the value, it can be encoded.
1228 if (lead_zero + trail_zero + set_bits == width) {
1229 return True;
1232 // 4. If the sum of leading ones, trailing ones and unset bits in the
1233 // value is equal to the bit width of the value, it can be encoded.
1234 if (lead_one + trail_one + (width - set_bits) == width) {
1235 return True;
1238 // 5. If the most-significant half of the bitwise value is equal to the
1239 // least-significant half, return to step 2 using the least-significant
1240 // half of the value.
1241 ULong mask = (1ULL << (width >> 1)) - 1;
1242 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1243 width >>= 1;
1244 set_bits >>= 1;
1245 imm_s_fixed >>= 1;
1246 continue;
1249 // 6. Otherwise, the value can't be encoded.
1250 return False;
1255 /* Create a RIL for the given immediate, if it is representable, or
1256 return NULL if not. */
1258 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1260 UInt n = 0, imm_s = 0, imm_r = 0;
1261 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1262 if (!ok) return NULL;
1263 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1264 return ARM64RIL_I13(n, imm_r, imm_s);
1267 /* So, finally .. */
1269 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1271 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1272 /* sanity checks ... */
1273 switch (ri->tag) {
1274 case ARM64riL_I13:
1275 vassert(ri->ARM64riL.I13.bitN < 2);
1276 vassert(ri->ARM64riL.I13.immR < 64);
1277 vassert(ri->ARM64riL.I13.immS < 64);
1278 return ri;
1279 case ARM64riL_R:
1280 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1281 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1282 return ri;
1283 default:
1284 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1288 /* DO NOT CALL THIS DIRECTLY ! */
1289 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1291 IRType ty = typeOfIRExpr(env->type_env,e);
1292 vassert(ty == Ity_I64 || ty == Ity_I32);
1294 /* special case: immediate */
1295 if (e->tag == Iex_Const) {
1296 ARM64RIL* maybe = NULL;
1297 if (ty == Ity_I64) {
1298 vassert(e->Iex.Const.con->tag == Ico_U64);
1299 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1300 } else {
1301 vassert(ty == Ity_I32);
1302 vassert(e->Iex.Const.con->tag == Ico_U32);
1303 UInt u32 = e->Iex.Const.con->Ico.U32;
1304 ULong u64 = (ULong)u32;
1305 /* First try with 32 leading zeroes. */
1306 maybe = mb_mkARM64RIL_I(u64);
1307 /* If that doesn't work, try with 2 copies, since it doesn't
1308 matter what winds up in the upper 32 bits. */
1309 if (!maybe) {
1310 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1313 if (maybe) return maybe;
1314 /* else fail, fall through to default case */
1317 /* default case: calculate into a register and return that */
1319 HReg r = iselIntExpr_R ( env, e );
1320 return ARM64RIL_R(r);
1325 /* --------------------- RI6 --------------------- */
1327 /* Select instructions to generate 'e' into a RI6. */
1329 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1331 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1332 /* sanity checks ... */
1333 switch (ri->tag) {
1334 case ARM64ri6_I6:
1335 vassert(ri->ARM64ri6.I6.imm6 < 64);
1336 vassert(ri->ARM64ri6.I6.imm6 > 0);
1337 return ri;
1338 case ARM64ri6_R:
1339 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1340 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1341 return ri;
1342 default:
1343 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1347 /* DO NOT CALL THIS DIRECTLY ! */
1348 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1350 IRType ty = typeOfIRExpr(env->type_env,e);
1351 vassert(ty == Ity_I64 || ty == Ity_I8);
1353 /* special case: immediate */
1354 if (e->tag == Iex_Const) {
1355 switch (e->Iex.Const.con->tag) {
1356 case Ico_U8: {
1357 UInt u = e->Iex.Const.con->Ico.U8;
1358 if (u > 0 && u < 64)
1359 return ARM64RI6_I6(u);
1360 break;
1361 default:
1362 break;
1365 /* else fail, fall through to default case */
1368 /* default case: calculate into a register and return that */
1370 HReg r = iselIntExpr_R ( env, e );
1371 return ARM64RI6_R(r);
1376 /* ------------------- CondCode ------------------- */
1378 /* Generate code to evaluated a bit-typed expression, returning the
1379 condition code which would correspond when the expression would
1380 notionally have returned 1.
1382 Note that iselCondCode_C and iselCondCode_R are mutually recursive. For
1383 future changes to either of them, take care not to introduce an infinite
1384 loop involving the two of them.
1386 static ARM64CondCode iselCondCode_C ( ISelEnv* env, IRExpr* e )
1388 ARM64CondCode cc = iselCondCode_C_wrk(env,e);
1389 vassert(cc != ARM64cc_NV);
1390 return cc;
1393 static ARM64CondCode iselCondCode_C_wrk ( ISelEnv* env, IRExpr* e )
1395 vassert(e);
1396 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1398 /* var */
1399 if (e->tag == Iex_RdTmp) {
1400 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1401 /* Cmp doesn't modify rTmp; so this is OK. */
1402 ARM64RIL* one = mb_mkARM64RIL_I(1);
1403 vassert(one);
1404 addInstr(env, ARM64Instr_Test(rTmp, one));
1405 return ARM64cc_NE;
1408 /* Constant 1:Bit */
1409 if (e->tag == Iex_Const) {
1410 /* This is a very stupid translation. Hopefully it doesn't occur much,
1411 if ever. */
1412 vassert(e->Iex.Const.con->tag == Ico_U1);
1413 vassert(e->Iex.Const.con->Ico.U1 == True
1414 || e->Iex.Const.con->Ico.U1 == False);
1415 HReg rTmp = newVRegI(env);
1416 addInstr(env, ARM64Instr_Imm64(rTmp, 0));
1417 ARM64RIL* one = mb_mkARM64RIL_I(1);
1418 vassert(one);
1419 addInstr(env, ARM64Instr_Test(rTmp, one));
1420 return e->Iex.Const.con->Ico.U1 ? ARM64cc_EQ : ARM64cc_NE;
1423 /* Not1(e) */
1424 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1425 /* Generate code for the arg, and negate the test condition */
1426 ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
1427 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1428 return ARM64cc_AL;
1429 } else {
1430 return 1 ^ cc;
1434 /* --- patterns rooted at: 64to1 --- */
1436 if (e->tag == Iex_Unop
1437 && e->Iex.Unop.op == Iop_64to1) {
1438 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1439 ARM64RIL* one = mb_mkARM64RIL_I(1);
1440 vassert(one); /* '1' must be representable */
1441 addInstr(env, ARM64Instr_Test(rTmp, one));
1442 return ARM64cc_NE;
1445 /* --- patterns rooted at: CmpNEZ8 --- */
1447 if (e->tag == Iex_Unop
1448 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1449 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1450 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1451 addInstr(env, ARM64Instr_Test(r1, xFF));
1452 return ARM64cc_NE;
1455 /* --- patterns rooted at: CmpNEZ16 --- */
1457 if (e->tag == Iex_Unop
1458 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1459 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1460 ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1461 addInstr(env, ARM64Instr_Test(r1, xFFFF));
1462 return ARM64cc_NE;
1465 /* --- patterns rooted at: CmpNEZ64 --- */
1467 if (e->tag == Iex_Unop
1468 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1469 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1470 ARM64RIA* zero = ARM64RIA_I12(0,0);
1471 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1472 return ARM64cc_NE;
1475 /* --- patterns rooted at: CmpNEZ32 --- */
1477 if (e->tag == Iex_Unop
1478 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1479 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1480 ARM64RIA* zero = ARM64RIA_I12(0,0);
1481 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1482 return ARM64cc_NE;
1485 /* --- Cmp*64*(x,y) --- */
1486 if (e->tag == Iex_Binop
1487 && (e->Iex.Binop.op == Iop_CmpEQ64
1488 || e->Iex.Binop.op == Iop_CmpNE64
1489 || e->Iex.Binop.op == Iop_CmpLT64S
1490 || e->Iex.Binop.op == Iop_CmpLT64U
1491 || e->Iex.Binop.op == Iop_CmpLE64S
1492 || e->Iex.Binop.op == Iop_CmpLE64U
1493 || e->Iex.Binop.op == Iop_CasCmpEQ64
1494 || e->Iex.Binop.op == Iop_CasCmpNE64)) {
1495 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1496 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1497 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1498 switch (e->Iex.Binop.op) {
1499 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return ARM64cc_EQ;
1500 case Iop_CmpNE64: case Iop_CasCmpNE64: return ARM64cc_NE;
1501 case Iop_CmpLT64S: return ARM64cc_LT;
1502 case Iop_CmpLT64U: return ARM64cc_CC;
1503 case Iop_CmpLE64S: return ARM64cc_LE;
1504 case Iop_CmpLE64U: return ARM64cc_LS;
1505 default: vpanic("iselCondCode_C(arm64): CmpXX64");
1509 /* --- Cmp*32*(x,y) --- */
1510 if (e->tag == Iex_Binop
1511 && (e->Iex.Binop.op == Iop_CmpEQ32
1512 || e->Iex.Binop.op == Iop_CmpNE32
1513 || e->Iex.Binop.op == Iop_CmpLT32S
1514 || e->Iex.Binop.op == Iop_CmpLT32U
1515 || e->Iex.Binop.op == Iop_CmpLE32S
1516 || e->Iex.Binop.op == Iop_CmpLE32U
1517 || e->Iex.Binop.op == Iop_CasCmpEQ32
1518 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1519 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1520 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1521 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1522 switch (e->Iex.Binop.op) {
1523 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return ARM64cc_EQ;
1524 case Iop_CmpNE32: case Iop_CasCmpNE32: return ARM64cc_NE;
1525 case Iop_CmpLT32S: return ARM64cc_LT;
1526 case Iop_CmpLT32U: return ARM64cc_CC;
1527 case Iop_CmpLE32S: return ARM64cc_LE;
1528 case Iop_CmpLE32U: return ARM64cc_LS;
1529 default: vpanic("iselCondCode_C(arm64): CmpXX32");
1533 /* --- Cmp*16*(x,y) --- */
1534 if (e->tag == Iex_Binop
1535 && (e->Iex.Binop.op == Iop_CasCmpEQ16
1536 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1537 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1538 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1539 HReg argL2 = widen_z_16_to_64(env, argL);
1540 HReg argR2 = widen_z_16_to_64(env, argR);
1541 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1542 switch (e->Iex.Binop.op) {
1543 case Iop_CasCmpEQ16: return ARM64cc_EQ;
1544 case Iop_CasCmpNE16: return ARM64cc_NE;
1545 default: vpanic("iselCondCode_C(arm64): CmpXX16");
1549 /* --- Cmp*8*(x,y) --- */
1550 if (e->tag == Iex_Binop
1551 && (e->Iex.Binop.op == Iop_CasCmpEQ8
1552 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1553 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1554 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1555 HReg argL2 = widen_z_8_to_64(env, argL);
1556 HReg argR2 = widen_z_8_to_64(env, argR);
1557 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1558 switch (e->Iex.Binop.op) {
1559 case Iop_CasCmpEQ8: return ARM64cc_EQ;
1560 case Iop_CasCmpNE8: return ARM64cc_NE;
1561 default: vpanic("iselCondCode_C(arm64): CmpXX8");
1565 /* --- And1(x,y), Or1(x,y) --- */
1566 if (e->tag == Iex_Binop
1567 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1568 HReg tmp = iselCondCode_R(env, e);
1569 ARM64RIL* one = mb_mkARM64RIL_I(1);
1570 vassert(one);
1571 addInstr(env, ARM64Instr_Test(tmp, one));
1572 return ARM64cc_NE;
1575 ppIRExpr(e);
1576 vpanic("iselCondCode_C");
1580 /* --------------------- CONDCODE as int reg --------------------- */
1582 /* Generate code to evaluated a bit-typed expression, returning the resulting
1583 value in bit 0 of an integer register. WARNING: all of the other bits in the
1584 register can be arbitrary. Callers must mask them off or otherwise ignore
1585 them, as necessary.
1587 Note that iselCondCode_C and iselCondCode_R are mutually recursive. For
1588 future changes to either of them, take care not to introduce an infinite
1589 loop involving the two of them.
1591 static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e )
1593 /* Uh, there's nothing we can sanity check here, unfortunately. */
1594 return iselCondCode_R_wrk(env,e);
1597 /* DO NOT CALL THIS DIRECTLY ! */
1598 static HReg iselCondCode_R_wrk ( ISelEnv* env, IRExpr* e )
1600 vassert(e);
1601 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1603 /* var */
1604 if (e->tag == Iex_RdTmp) {
1605 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1608 /* And1(x,y), Or1(x,y) */
1609 if (e->tag == Iex_Binop
1610 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1611 HReg res = newVRegI(env);
1612 HReg x_as_64 = iselCondCode_R(env, e->Iex.Binop.arg1);
1613 HReg y_as_64 = iselCondCode_R(env, e->Iex.Binop.arg2);
1614 ARM64LogicOp lop
1615 = e->Iex.Binop.op == Iop_And1 ? ARM64lo_AND : ARM64lo_OR;
1616 addInstr(env, ARM64Instr_Logic(res, x_as_64, ARM64RIL_R(y_as_64), lop));
1617 return res;
1620 /* Anything else, we hand off to iselCondCode_C and force the value into a
1621 register. */
1622 HReg res = newVRegI(env);
1623 ARM64CondCode cc = iselCondCode_C(env, e);
1624 addInstr(env, ARM64Instr_Set64(res, cc));
1625 return res;
1627 ppIRExpr(e);
1628 vpanic("iselCondCode_R(amd64)");
1632 /* --------------------- Reg --------------------- */
1634 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1636 HReg r = iselIntExpr_R_wrk(env, e);
1637 /* sanity checks ... */
1638 # if 0
1639 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1640 # endif
1641 vassert(hregClass(r) == HRcInt64);
1642 vassert(hregIsVirtual(r));
1643 return r;
1646 /* DO NOT CALL THIS DIRECTLY ! */
1647 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1649 IRType ty = typeOfIRExpr(env->type_env,e);
1650 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1652 switch (e->tag) {
1654 /* --------- TEMP --------- */
1655 case Iex_RdTmp: {
1656 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1659 /* --------- LOAD --------- */
1660 case Iex_Load: {
1661 HReg dst = newVRegI(env);
1663 if (e->Iex.Load.end != Iend_LE)
1664 goto irreducible;
1666 if (ty == Ity_I64) {
1667 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1668 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1669 return dst;
1671 if (ty == Ity_I32) {
1672 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1673 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1674 return dst;
1676 if (ty == Ity_I16) {
1677 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1678 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1679 return dst;
1681 if (ty == Ity_I8) {
1682 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1683 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1684 return dst;
1686 break;
1689 /* --------- BINARY OP --------- */
1690 case Iex_Binop: {
1692 ARM64LogicOp lop = 0; /* invalid */
1693 ARM64ShiftOp sop = 0; /* invalid */
1695 /* Special-case 0-x into a Neg instruction. Not because it's
1696 particularly useful but more so as to give value flow using
1697 this instruction, so as to check its assembly correctness for
1698 implementation of Left32/Left64. */
1699 switch (e->Iex.Binop.op) {
1700 case Iop_Sub64:
1701 if (isZeroU64(e->Iex.Binop.arg1)) {
1702 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1703 HReg dst = newVRegI(env);
1704 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1705 return dst;
1707 break;
1708 default:
1709 break;
1712 /* AND64/OR64/XOR64/ADD64/SUB64(e1, e2 shifted by imm)
1713 AND64/OR64/XOR64/ADD64(e1 shifted by imm, e2)
1716 switch (e->Iex.Binop.op) {
1717 case Iop_And64: case Iop_Or64: case Iop_Xor64:
1718 case Iop_Add64: case Iop_Sub64:{
1719 ARM64RRSOp mainOp = ARM64rrs_INVALID;
1720 ARM64ShiftOp shiftOp = (ARM64ShiftOp)0; // Invalid
1721 IRExpr* argUnshifted = NULL;
1722 IRExpr* argToBeShifted = NULL;
1723 UChar amt = 0;
1724 if (matchesRegRegShift(&mainOp, &shiftOp, &amt, &argUnshifted,
1725 &argToBeShifted, e)) {
1726 HReg rDst = newVRegI(env);
1727 HReg rUnshifted = iselIntExpr_R(env, argUnshifted);
1728 HReg rToBeShifted = iselIntExpr_R(env, argToBeShifted);
1729 addInstr(env, ARM64Instr_RRS(rDst, rUnshifted, rToBeShifted,
1730 shiftOp, amt, mainOp));
1731 return rDst;
1734 default:
1735 break;
1739 /* ADD/SUB(e1, e2) (for any e1, e2) */
1740 switch (e->Iex.Binop.op) {
1741 case Iop_Add64: case Iop_Add32:
1742 case Iop_Sub64: case Iop_Sub32: {
1743 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1744 || e->Iex.Binop.op == Iop_Add32;
1745 HReg dst = newVRegI(env);
1746 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1747 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1748 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1749 return dst;
1751 default:
1752 break;
1755 /* AND/OR/XOR(e1, e2) (for any e1, e2) */
1756 switch (e->Iex.Binop.op) {
1757 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1758 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
1759 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1760 log_binop: {
1761 HReg dst = newVRegI(env);
1762 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1763 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1764 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1765 return dst;
1767 default:
1768 break;
1771 /* SHL/SHR/SAR */
1772 switch (e->Iex.Binop.op) {
1773 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1774 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1775 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1776 sh_binop: {
1777 HReg dst = newVRegI(env);
1778 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1779 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1780 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1781 return dst;
1783 case Iop_Shr32:
1784 case Iop_Sar32: {
1785 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1786 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1787 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1788 HReg dst = zx ? widen_z_32_to_64(env, argL)
1789 : widen_s_32_to_64(env, argL);
1790 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1791 return dst;
1793 default: break;
1796 /* MUL */
1797 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1798 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1799 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1800 HReg dst = newVRegI(env);
1801 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1802 return dst;
1805 /* MULL */
1806 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1807 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1808 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1809 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1810 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1811 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1812 HReg dst = newVRegI(env);
1813 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1814 return dst;
1817 /* Handle misc other ops. */
1819 if (e->Iex.Binop.op == Iop_Max32U) {
1820 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1821 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1822 HReg dst = newVRegI(env);
1823 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1824 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1825 return dst;
1828 if (e->Iex.Binop.op == Iop_32HLto64) {
1829 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1830 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1831 HReg lo32 = widen_z_32_to_64(env, lo32s);
1832 HReg hi32 = newVRegI(env);
1833 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1834 ARM64sh_SHL));
1835 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1836 ARM64lo_OR));
1837 return hi32;
1840 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1841 Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1842 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1843 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1844 HReg dst = newVRegI(env);
1845 HReg imm = newVRegI(env);
1846 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1847 create in dst, the IRCmpF64Result encoded result. */
1848 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1849 addInstr(env, ARM64Instr_Imm64(dst, 0));
1850 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1851 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1852 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1853 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1854 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1855 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1856 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1857 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1858 return dst;
1861 { /* local scope */
1862 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1863 Bool srcIsD = False;
1864 switch (e->Iex.Binop.op) {
1865 case Iop_F64toI64S:
1866 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1867 case Iop_F64toI64U:
1868 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1869 case Iop_F64toI32S:
1870 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1871 case Iop_F64toI32U:
1872 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1873 case Iop_F32toI32S:
1874 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1875 case Iop_F32toI32U:
1876 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1877 case Iop_F32toI64S:
1878 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1879 case Iop_F32toI64U:
1880 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1881 default:
1882 break;
1884 if (cvt_op != ARM64cvt_INVALID) {
1885 /* This is all a bit dodgy, because we can't handle a
1886 non-constant (not-known-at-JIT-time) rounding mode
1887 indication. That's because there's no instruction
1888 AFAICS that does this conversion but rounds according to
1889 FPCR.RM, so we have to bake the rounding mode into the
1890 instruction right now. But that should be OK because
1891 (1) the front end attaches a literal Irrm_ value to the
1892 conversion binop, and (2) iropt will never float that
1893 off via CSE, into a literal. Hence we should always
1894 have an Irrm_ value as the first arg. */
1895 IRExpr* arg1 = e->Iex.Binop.arg1;
1896 if (arg1->tag != Iex_Const) goto irreducible;
1897 IRConst* arg1con = arg1->Iex.Const.con;
1898 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1899 UInt irrm = arg1con->Ico.U32;
1900 /* Find the ARM-encoded equivalent for |irrm|. */
1901 UInt armrm = 4; /* impossible */
1902 switch (irrm) {
1903 case Irrm_NEAREST: armrm = 0; break;
1904 case Irrm_NegINF: armrm = 2; break;
1905 case Irrm_PosINF: armrm = 1; break;
1906 case Irrm_ZERO: armrm = 3; break;
1907 default: goto irreducible;
1909 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1910 (env, e->Iex.Binop.arg2);
1911 HReg dst = newVRegI(env);
1912 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1913 return dst;
1915 } /* local scope */
1917 /* All cases involving host-side helper calls. */
1918 void* fn = NULL;
1919 switch (e->Iex.Binop.op) {
1920 case Iop_DivU32:
1921 fn = &h_calc_udiv32_w_arm_semantics; break;
1922 case Iop_DivS32:
1923 fn = &h_calc_sdiv32_w_arm_semantics; break;
1924 case Iop_DivU64:
1925 fn = &h_calc_udiv64_w_arm_semantics; break;
1926 case Iop_DivS64:
1927 fn = &h_calc_sdiv64_w_arm_semantics; break;
1928 default:
1929 break;
1932 if (fn) {
1933 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1934 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1935 HReg res = newVRegI(env);
1936 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1937 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1938 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1939 2, mk_RetLoc_simple(RLPri_Int) ));
1940 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1941 return res;
1944 break;
1947 /* --------- UNARY OP --------- */
1948 case Iex_Unop: {
1950 switch (e->Iex.Unop.op) {
1951 case Iop_16Uto64: {
1952 /* This probably doesn't occur often enough to be worth
1953 rolling the extension into the load. */
1954 IRExpr* arg = e->Iex.Unop.arg;
1955 HReg src = iselIntExpr_R(env, arg);
1956 HReg dst = widen_z_16_to_64(env, src);
1957 return dst;
1959 case Iop_32Uto64: {
1960 IRExpr* arg = e->Iex.Unop.arg;
1961 if (arg->tag == Iex_Load) {
1962 /* This correctly zero extends because _LdSt32 is
1963 defined to do a zero extending load. */
1964 HReg dst = newVRegI(env);
1965 ARM64AMode* am
1966 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1967 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1968 return dst;
1970 /* else be lame and mask it */
1971 HReg src = iselIntExpr_R(env, arg);
1972 HReg dst = widen_z_32_to_64(env, src);
1973 return dst;
1975 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
1976 case Iop_8Uto64: {
1977 IRExpr* arg = e->Iex.Unop.arg;
1978 if (arg->tag == Iex_Load) {
1979 /* This correctly zero extends because _LdSt8 is
1980 defined to do a zero extending load. */
1981 HReg dst = newVRegI(env);
1982 ARM64AMode* am
1983 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
1984 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1985 return dst;
1987 /* else be lame and mask it */
1988 HReg src = iselIntExpr_R(env, arg);
1989 HReg dst = widen_z_8_to_64(env, src);
1990 return dst;
1992 case Iop_128HIto64: {
1993 HReg rHi, rLo;
1994 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1995 return rHi; /* and abandon rLo */
1997 case Iop_8Sto32: case Iop_8Sto64: {
1998 IRExpr* arg = e->Iex.Unop.arg;
1999 HReg src = iselIntExpr_R(env, arg);
2000 HReg dst = widen_s_8_to_64(env, src);
2001 return dst;
2003 case Iop_16Sto32: case Iop_16Sto64: {
2004 IRExpr* arg = e->Iex.Unop.arg;
2005 HReg src = iselIntExpr_R(env, arg);
2006 HReg dst = widen_s_16_to_64(env, src);
2007 return dst;
2009 case Iop_32Sto64: {
2010 IRExpr* arg = e->Iex.Unop.arg;
2011 HReg src = iselIntExpr_R(env, arg);
2012 HReg dst = widen_s_32_to_64(env, src);
2013 return dst;
2015 case Iop_Not32:
2016 case Iop_Not64: {
2017 HReg dst = newVRegI(env);
2018 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2019 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
2020 return dst;
2022 case Iop_Clz64: {
2023 HReg dst = newVRegI(env);
2024 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2025 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
2026 return dst;
2028 case Iop_Left32:
2029 case Iop_Left64: {
2030 /* Left64(src) = src | -src. Left32 can use the same
2031 implementation since in that case we don't care what
2032 the upper 32 bits become. */
2033 HReg dst = newVRegI(env);
2034 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2035 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2036 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2037 ARM64lo_OR));
2038 return dst;
2040 case Iop_CmpwNEZ64: {
2041 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2042 = Left64(src) >>s 63 */
2043 HReg dst = newVRegI(env);
2044 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2045 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2046 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2047 ARM64lo_OR));
2048 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2049 ARM64sh_SAR));
2050 return dst;
2052 case Iop_CmpwNEZ32: {
2053 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2054 = Left64(src & 0xFFFFFFFF) >>s 63 */
2055 HReg dst = newVRegI(env);
2056 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
2057 HReg src = widen_z_32_to_64(env, pre);
2058 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2059 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2060 ARM64lo_OR));
2061 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2062 ARM64sh_SAR));
2063 return dst;
2065 case Iop_V128to64: case Iop_V128HIto64: {
2066 HReg dst = newVRegI(env);
2067 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2068 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
2069 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
2070 return dst;
2072 case Iop_ReinterpF64asI64: {
2073 HReg dst = newVRegI(env);
2074 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2075 addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
2076 return dst;
2078 case Iop_ReinterpF32asI32: {
2079 HReg dst = newVRegI(env);
2080 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2081 addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
2082 return dst;
2084 case Iop_1Sto16:
2085 case Iop_1Sto32:
2086 case Iop_1Sto64: {
2087 /* As with the iselStmt case for 'tmp:I1 = expr', we could
2088 do a lot better here if it ever became necessary. (CSDEC?) */
2089 HReg zero = hregARM64_XZR_XSP(); // XZR in this context
2090 HReg one = newVRegI(env);
2091 HReg dst = newVRegI(env);
2092 addInstr(env, ARM64Instr_Imm64(one, 1));
2093 ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
2094 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2095 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2096 ARM64sh_SHL));
2097 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2098 ARM64sh_SAR));
2099 return dst;
2101 case Iop_NarrowUn16to8x8:
2102 case Iop_NarrowUn32to16x4:
2103 case Iop_NarrowUn64to32x2:
2104 case Iop_QNarrowUn16Sto8Sx8:
2105 case Iop_QNarrowUn32Sto16Sx4:
2106 case Iop_QNarrowUn64Sto32Sx2:
2107 case Iop_QNarrowUn16Uto8Ux8:
2108 case Iop_QNarrowUn32Uto16Ux4:
2109 case Iop_QNarrowUn64Uto32Ux2:
2110 case Iop_QNarrowUn16Sto8Ux8:
2111 case Iop_QNarrowUn32Sto16Ux4:
2112 case Iop_QNarrowUn64Sto32Ux2:
2114 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2115 HReg tmp = newVRegV(env);
2116 HReg dst = newVRegI(env);
2117 UInt dszBlg2 = 3; /* illegal */
2118 ARM64VecNarrowOp op = ARM64vecna_INVALID;
2119 switch (e->Iex.Unop.op) {
2120 case Iop_NarrowUn16to8x8:
2121 dszBlg2 = 0; op = ARM64vecna_XTN; break;
2122 case Iop_NarrowUn32to16x4:
2123 dszBlg2 = 1; op = ARM64vecna_XTN; break;
2124 case Iop_NarrowUn64to32x2:
2125 dszBlg2 = 2; op = ARM64vecna_XTN; break;
2126 case Iop_QNarrowUn16Sto8Sx8:
2127 dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
2128 case Iop_QNarrowUn32Sto16Sx4:
2129 dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
2130 case Iop_QNarrowUn64Sto32Sx2:
2131 dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
2132 case Iop_QNarrowUn16Uto8Ux8:
2133 dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
2134 case Iop_QNarrowUn32Uto16Ux4:
2135 dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
2136 case Iop_QNarrowUn64Uto32Ux2:
2137 dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
2138 case Iop_QNarrowUn16Sto8Ux8:
2139 dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
2140 case Iop_QNarrowUn32Sto16Ux4:
2141 dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
2142 case Iop_QNarrowUn64Sto32Ux2:
2143 dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
2144 default:
2145 vassert(0);
2147 addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
2148 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
2149 return dst;
2151 case Iop_1Uto64: {
2152 /* 1Uto64(tmp). */
2153 HReg dst = newVRegI(env);
2154 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
2155 ARM64RIL* one = mb_mkARM64RIL_I(1);
2156 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
2157 vassert(one);
2158 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
2159 } else {
2160 /* CLONE-01 */
2161 HReg zero = hregARM64_XZR_XSP(); // XZR in this context
2162 HReg one = newVRegI(env);
2163 addInstr(env, ARM64Instr_Imm64(one, 1));
2164 ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
2165 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2167 return dst;
2169 case Iop_64to32:
2170 case Iop_64to16:
2171 case Iop_64to8:
2172 /* These are no-ops. */
2173 return iselIntExpr_R(env, e->Iex.Unop.arg);
2175 default:
2176 break;
2179 break;
2182 /* --------- GET --------- */
2183 case Iex_Get: {
2184 if (ty == Ity_I64
2185 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
2186 HReg dst = newVRegI(env);
2187 ARM64AMode* am
2188 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2189 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2190 return dst;
2192 if (ty == Ity_I32
2193 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
2194 HReg dst = newVRegI(env);
2195 ARM64AMode* am
2196 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2197 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2198 return dst;
2200 if (ty == Ity_I16
2201 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2202 HReg dst = newVRegI(env);
2203 ARM64AMode* am
2204 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2205 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2206 return dst;
2208 if (ty == Ity_I8
2209 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2210 HReg dst = newVRegI(env);
2211 ARM64AMode* am
2212 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2213 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2214 return dst;
2216 break;
2219 /* --------- CCALL --------- */
2220 case Iex_CCall: {
2221 HReg dst = newVRegI(env);
2222 vassert(ty == e->Iex.CCall.retty);
2224 /* be very restrictive for now. Only 64-bit ints allowed for
2225 args, and 64 bits for return type. Don't forget to change
2226 the RetLoc if more types are allowed in future. */
2227 if (e->Iex.CCall.retty != Ity_I64)
2228 goto irreducible;
2230 /* Marshal args, do the call, clear stack. */
2231 UInt addToSp = 0;
2232 RetLoc rloc = mk_RetLoc_INVALID();
2233 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2234 e->Iex.CCall.cee, e->Iex.CCall.retty,
2235 e->Iex.CCall.args );
2236 /* */
2237 if (ok) {
2238 vassert(is_sane_RetLoc(rloc));
2239 vassert(rloc.pri == RLPri_Int);
2240 vassert(addToSp == 0);
2241 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2242 return dst;
2244 goto irreducible;
2247 /* --------- LITERAL --------- */
2248 /* 64-bit literals */
2249 case Iex_Const: {
2250 ULong u = 0;
2251 HReg dst = newVRegI(env);
2252 switch (e->Iex.Const.con->tag) {
2253 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2254 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2255 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2256 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2257 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2259 addInstr(env, ARM64Instr_Imm64(dst, u));
2260 return dst;
2263 /* --------- MULTIPLEX --------- */
2264 case Iex_ITE: {
2265 /* ITE(ccexpr, iftrue, iffalse) */
2266 if (ty == Ity_I64 || ty == Ity_I32) {
2267 ARM64CondCode cc;
2268 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2269 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2270 HReg dst = newVRegI(env);
2271 cc = iselCondCode_C(env, e->Iex.ITE.cond);
2272 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2273 return dst;
2275 break;
2278 default:
2279 break;
2280 } /* switch (e->tag) */
2282 /* We get here if no pattern matched. */
2283 irreducible:
2284 ppIRExpr(e);
2285 vpanic("iselIntExpr_R: cannot reduce tree");
2289 /*---------------------------------------------------------*/
2290 /*--- ISEL: Integer expressions (128 bit) ---*/
2291 /*---------------------------------------------------------*/
2293 /* Compute a 128-bit value into a register pair, which is returned as
2294 the first two parameters. As with iselIntExpr_R, these may be
2295 either real or virtual regs; in any case they must not be changed
2296 by subsequent code emitted by the caller. */
2298 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2299 ISelEnv* env, IRExpr* e )
2301 iselInt128Expr_wrk(rHi, rLo, env, e);
2302 # if 0
2303 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2304 # endif
2305 vassert(hregClass(*rHi) == HRcInt64);
2306 vassert(hregIsVirtual(*rHi));
2307 vassert(hregClass(*rLo) == HRcInt64);
2308 vassert(hregIsVirtual(*rLo));
2311 /* DO NOT CALL THIS DIRECTLY ! */
2312 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2313 ISelEnv* env, IRExpr* e )
2315 vassert(e);
2316 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2318 /* --------- BINARY ops --------- */
2319 if (e->tag == Iex_Binop) {
2320 switch (e->Iex.Binop.op) {
2321 /* 64 x 64 -> 128 multiply */
2322 case Iop_MullU64:
2323 case Iop_MullS64: {
2324 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2325 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2326 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2327 HReg dstLo = newVRegI(env);
2328 HReg dstHi = newVRegI(env);
2329 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2330 ARM64mul_PLAIN));
2331 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2332 syned ? ARM64mul_SX : ARM64mul_ZX));
2333 *rHi = dstHi;
2334 *rLo = dstLo;
2335 return;
2337 /* 64HLto128(e1,e2) */
2338 case Iop_64HLto128:
2339 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2340 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2341 return;
2342 default:
2343 break;
2345 } /* if (e->tag == Iex_Binop) */
2347 ppIRExpr(e);
2348 vpanic("iselInt128Expr(arm64)");
2352 /*---------------------------------------------------------*/
2353 /*--- ISEL: Vector expressions (128 bit) ---*/
2354 /*---------------------------------------------------------*/
2356 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2358 HReg r = iselV128Expr_wrk( env, e );
2359 vassert(hregClass(r) == HRcVec128);
2360 vassert(hregIsVirtual(r));
2361 return r;
2364 /* DO NOT CALL THIS DIRECTLY */
2365 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2367 IRType ty = typeOfIRExpr(env->type_env, e);
2368 vassert(e);
2369 vassert(ty == Ity_V128);
2371 if (e->tag == Iex_RdTmp) {
2372 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2375 if (e->tag == Iex_Const) {
2376 /* Only a very limited range of constants is handled. */
2377 vassert(e->Iex.Const.con->tag == Ico_V128);
2378 UShort con = e->Iex.Const.con->Ico.V128;
2379 HReg res = newVRegV(env);
2380 switch (con) {
2381 case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2382 addInstr(env, ARM64Instr_VImmQ(res, con));
2383 return res;
2384 case 0x00F0:
2385 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2386 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2387 return res;
2388 case 0x0F00:
2389 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2390 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2391 return res;
2392 case 0x0FF0:
2393 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2394 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2395 return res;
2396 case 0x0FFF:
2397 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2398 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2399 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2400 return res;
2401 case 0xF000:
2402 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2403 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2404 return res;
2405 case 0xFF00:
2406 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2407 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2408 return res;
2409 default:
2410 break;
2412 /* Unhandled */
2413 goto v128_expr_bad;
2416 if (e->tag == Iex_Load) {
2417 HReg res = newVRegV(env);
2418 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
2419 vassert(ty == Ity_V128);
2420 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2421 return res;
2424 if (e->tag == Iex_Get) {
2425 UInt offs = (UInt)e->Iex.Get.offset;
2426 if (offs < (1<<12)) {
2427 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2428 HReg res = newVRegV(env);
2429 vassert(ty == Ity_V128);
2430 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2431 return res;
2433 goto v128_expr_bad;
2436 if (e->tag == Iex_Unop) {
2438 /* Iop_ZeroHIXXofV128 cases */
2439 UShort imm16 = 0;
2440 switch (e->Iex.Unop.op) {
2441 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break;
2442 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
2443 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2444 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2445 default: break;
2447 if (imm16 != 0) {
2448 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2449 HReg imm = newVRegV(env);
2450 HReg res = newVRegV(env);
2451 addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2452 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2453 return res;
2456 /* Other cases */
2457 switch (e->Iex.Unop.op) {
2458 case Iop_NotV128:
2459 case Iop_Abs64Fx2: case Iop_Abs32Fx4:
2460 case Iop_Neg64Fx2: case Iop_Neg32Fx4:
2461 case Iop_Abs64x2: case Iop_Abs32x4:
2462 case Iop_Abs16x8: case Iop_Abs8x16:
2463 case Iop_Cls32x4: case Iop_Cls16x8: case Iop_Cls8x16:
2464 case Iop_Clz32x4: case Iop_Clz16x8: case Iop_Clz8x16:
2465 case Iop_Cnt8x16:
2466 case Iop_Reverse1sIn8_x16:
2467 case Iop_Reverse8sIn16_x8:
2468 case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2469 case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2470 case Iop_Reverse32sIn64_x2:
2471 case Iop_RecipEst32Ux4:
2472 case Iop_RSqrtEst32Ux4:
2473 case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2474 case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2476 HReg res = newVRegV(env);
2477 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2478 Bool setRM = False;
2479 ARM64VecUnaryOp op = ARM64vecu_INVALID;
2480 switch (e->Iex.Unop.op) {
2481 case Iop_NotV128: op = ARM64vecu_NOT; break;
2482 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
2483 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
2484 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
2485 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
2486 case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break;
2487 case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
2488 case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
2489 case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
2490 case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break;
2491 case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break;
2492 case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break;
2493 case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break;
2494 case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break;
2495 case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break;
2496 case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
2497 case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break;
2498 case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break;
2499 case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break;
2500 case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break;
2501 case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break;
2502 case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break;
2503 case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break;
2504 case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break;
2505 case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break;
2506 case Iop_RecipEst64Fx2: setRM = True;
2507 op = ARM64vecu_FRECPE64x2; break;
2508 case Iop_RecipEst32Fx4: setRM = True;
2509 op = ARM64vecu_FRECPE32x4; break;
2510 case Iop_RSqrtEst64Fx2: setRM = True;
2511 op = ARM64vecu_FRSQRTE64x2; break;
2512 case Iop_RSqrtEst32Fx4: setRM = True;
2513 op = ARM64vecu_FRSQRTE32x4; break;
2514 default: vassert(0);
2516 if (setRM) {
2517 // This is a bit of a kludge. We should do rm properly for
2518 // these recip-est insns, but that would require changing the
2519 // primop's type to take an rmode.
2520 set_FPCR_rounding_mode(env, IRExpr_Const(
2521 IRConst_U32(Irrm_NEAREST)));
2523 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2524 return res;
2526 case Iop_CmpNEZ8x16:
2527 case Iop_CmpNEZ16x8:
2528 case Iop_CmpNEZ32x4:
2529 case Iop_CmpNEZ64x2: {
2530 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2531 HReg zero = newVRegV(env);
2532 HReg res = newVRegV(env);
2533 ARM64VecBinOp cmp = ARM64vecb_INVALID;
2534 switch (e->Iex.Unop.op) {
2535 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2536 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2537 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2538 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2539 default: vassert(0);
2541 // This is pretty feeble. Better: use CMP against zero
2542 // and avoid the extra instruction and extra register.
2543 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2544 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2545 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2546 return res;
2548 case Iop_V256toV128_0:
2549 case Iop_V256toV128_1: {
2550 HReg vHi, vLo;
2551 iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2552 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2554 case Iop_64UtoV128: {
2555 HReg res = newVRegV(env);
2556 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2557 addInstr(env, ARM64Instr_VQfromX(res, arg));
2558 return res;
2560 case Iop_Widen8Sto16x8: {
2561 HReg res = newVRegV(env);
2562 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2563 addInstr(env, ARM64Instr_VQfromX(res, arg));
2564 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2565 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2566 res, res, 8));
2567 return res;
2569 case Iop_Widen16Sto32x4: {
2570 HReg res = newVRegV(env);
2571 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2572 addInstr(env, ARM64Instr_VQfromX(res, arg));
2573 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2574 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2575 res, res, 16));
2576 return res;
2578 case Iop_Widen32Sto64x2: {
2579 HReg res = newVRegV(env);
2580 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2581 addInstr(env, ARM64Instr_VQfromX(res, arg));
2582 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2583 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2584 res, res, 32));
2585 return res;
2587 /* ... */
2588 default:
2589 break;
2590 } /* switch on the unop */
2591 } /* if (e->tag == Iex_Unop) */
2593 if (e->tag == Iex_Binop) {
2594 switch (e->Iex.Binop.op) {
2595 case Iop_Sqrt32Fx4:
2596 case Iop_Sqrt64Fx2: {
2597 HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2598 HReg res = newVRegV(env);
2599 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2600 ARM64VecUnaryOp op
2601 = e->Iex.Binop.op == Iop_Sqrt32Fx4
2602 ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2;
2603 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2604 return res;
2606 case Iop_64HLtoV128: {
2607 HReg res = newVRegV(env);
2608 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2609 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2610 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2611 return res;
2613 /* -- Cases where we can generate a simple three-reg instruction. -- */
2614 case Iop_AndV128:
2615 case Iop_OrV128:
2616 case Iop_XorV128:
2617 case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2618 case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2619 case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2620 case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2621 case Iop_Add64x2: case Iop_Add32x4:
2622 case Iop_Add16x8: case Iop_Add8x16:
2623 case Iop_Sub64x2: case Iop_Sub32x4:
2624 case Iop_Sub16x8: case Iop_Sub8x16:
2625 case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2626 case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2627 case Iop_CmpEQ16x8: case Iop_CmpEQ8x16:
2628 case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2629 case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2630 case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2631 case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2632 case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2633 case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2634 case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2635 case Iop_Perm8x16:
2636 case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2637 case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2638 case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2639 case Iop_CatOddLanes16x8: case Iop_CatOddLanes8x16:
2640 case Iop_InterleaveHI32x4:
2641 case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2642 case Iop_InterleaveLO32x4:
2643 case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2644 case Iop_PolynomialMul8x16:
2645 case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2646 case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2647 case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2648 case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2649 case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2650 case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2651 case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2652 case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2653 case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8:
2654 case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2655 case Iop_Sh8Sx16: case Iop_Sh16Sx8:
2656 case Iop_Sh32Sx4: case Iop_Sh64Sx2:
2657 case Iop_Sh8Ux16: case Iop_Sh16Ux8:
2658 case Iop_Sh32Ux4: case Iop_Sh64Ux2:
2659 case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2660 case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2661 case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2662 case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2663 case Iop_Max64Fx2: case Iop_Max32Fx4:
2664 case Iop_Min64Fx2: case Iop_Min32Fx4:
2665 case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2666 case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2668 HReg res = newVRegV(env);
2669 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2670 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2671 Bool sw = False;
2672 Bool setRM = False;
2673 ARM64VecBinOp op = ARM64vecb_INVALID;
2674 switch (e->Iex.Binop.op) {
2675 case Iop_AndV128: op = ARM64vecb_AND; break;
2676 case Iop_OrV128: op = ARM64vecb_ORR; break;
2677 case Iop_XorV128: op = ARM64vecb_XOR; break;
2678 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break;
2679 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
2680 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break;
2681 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
2682 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
2683 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break;
2684 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
2685 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
2686 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break;
2687 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
2688 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
2689 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break;
2690 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
2691 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
2692 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
2693 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break;
2694 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
2695 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
2696 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
2697 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break;
2698 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
2699 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
2700 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break;
2701 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
2702 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break;
2703 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break;
2704 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break;
2705 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2706 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2707 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2708 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2709 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2710 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2711 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2712 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2713 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2714 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2715 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2716 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2717 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2718 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2719 case Iop_Perm8x16: op = ARM64vecb_TBL1; break;
2720 case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2721 break;
2722 case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2723 break;
2724 case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2725 break;
2726 case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2727 break;
2728 case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2729 break;
2730 case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True;
2731 break;
2732 case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True;
2733 break;
2734 case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True;
2735 break;
2736 case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2737 break;
2738 case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2739 break;
2740 case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2741 break;
2742 case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2743 break;
2744 case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2745 break;
2746 case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2747 break;
2748 case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2749 case Iop_QAdd64Sx2: op = ARM64vecb_SQADD64x2; break;
2750 case Iop_QAdd32Sx4: op = ARM64vecb_SQADD32x4; break;
2751 case Iop_QAdd16Sx8: op = ARM64vecb_SQADD16x8; break;
2752 case Iop_QAdd8Sx16: op = ARM64vecb_SQADD8x16; break;
2753 case Iop_QAdd64Ux2: op = ARM64vecb_UQADD64x2; break;
2754 case Iop_QAdd32Ux4: op = ARM64vecb_UQADD32x4; break;
2755 case Iop_QAdd16Ux8: op = ARM64vecb_UQADD16x8; break;
2756 case Iop_QAdd8Ux16: op = ARM64vecb_UQADD8x16; break;
2757 case Iop_QSub64Sx2: op = ARM64vecb_SQSUB64x2; break;
2758 case Iop_QSub32Sx4: op = ARM64vecb_SQSUB32x4; break;
2759 case Iop_QSub16Sx8: op = ARM64vecb_SQSUB16x8; break;
2760 case Iop_QSub8Sx16: op = ARM64vecb_SQSUB8x16; break;
2761 case Iop_QSub64Ux2: op = ARM64vecb_UQSUB64x2; break;
2762 case Iop_QSub32Ux4: op = ARM64vecb_UQSUB32x4; break;
2763 case Iop_QSub16Ux8: op = ARM64vecb_UQSUB16x8; break;
2764 case Iop_QSub8Ux16: op = ARM64vecb_UQSUB8x16; break;
2765 case Iop_QDMulHi32Sx4: op = ARM64vecb_SQDMULH32x4; break;
2766 case Iop_QDMulHi16Sx8: op = ARM64vecb_SQDMULH16x8; break;
2767 case Iop_QRDMulHi32Sx4: op = ARM64vecb_SQRDMULH32x4; break;
2768 case Iop_QRDMulHi16Sx8: op = ARM64vecb_SQRDMULH16x8; break;
2769 case Iop_Sh8Sx16: op = ARM64vecb_SSHL8x16; break;
2770 case Iop_Sh16Sx8: op = ARM64vecb_SSHL16x8; break;
2771 case Iop_Sh32Sx4: op = ARM64vecb_SSHL32x4; break;
2772 case Iop_Sh64Sx2: op = ARM64vecb_SSHL64x2; break;
2773 case Iop_Sh8Ux16: op = ARM64vecb_USHL8x16; break;
2774 case Iop_Sh16Ux8: op = ARM64vecb_USHL16x8; break;
2775 case Iop_Sh32Ux4: op = ARM64vecb_USHL32x4; break;
2776 case Iop_Sh64Ux2: op = ARM64vecb_USHL64x2; break;
2777 case Iop_Rsh8Sx16: op = ARM64vecb_SRSHL8x16; break;
2778 case Iop_Rsh16Sx8: op = ARM64vecb_SRSHL16x8; break;
2779 case Iop_Rsh32Sx4: op = ARM64vecb_SRSHL32x4; break;
2780 case Iop_Rsh64Sx2: op = ARM64vecb_SRSHL64x2; break;
2781 case Iop_Rsh8Ux16: op = ARM64vecb_URSHL8x16; break;
2782 case Iop_Rsh16Ux8: op = ARM64vecb_URSHL16x8; break;
2783 case Iop_Rsh32Ux4: op = ARM64vecb_URSHL32x4; break;
2784 case Iop_Rsh64Ux2: op = ARM64vecb_URSHL64x2; break;
2785 case Iop_Max64Fx2: op = ARM64vecb_FMAX64x2; break;
2786 case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break;
2787 case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break;
2788 case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break;
2789 case Iop_RecipStep64Fx2: setRM = True;
2790 op = ARM64vecb_FRECPS64x2; break;
2791 case Iop_RecipStep32Fx4: setRM = True;
2792 op = ARM64vecb_FRECPS32x4; break;
2793 case Iop_RSqrtStep64Fx2: setRM = True;
2794 op = ARM64vecb_FRSQRTS64x2; break;
2795 case Iop_RSqrtStep32Fx4: setRM = True;
2796 op = ARM64vecb_FRSQRTS32x4; break;
2797 default: vassert(0);
2799 if (setRM) {
2800 // This is a bit of a kludge. We should do rm properly for
2801 // these recip-step insns, but that would require changing the
2802 // primop's type to take an rmode.
2803 set_FPCR_rounding_mode(env, IRExpr_Const(
2804 IRConst_U32(Irrm_NEAREST)));
2806 if (sw) {
2807 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2808 } else {
2809 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2811 return res;
2813 /* -- These only have 2 operand instructions, so we have to first move
2814 the first argument into a new register, for modification. -- */
2815 case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2816 case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2817 case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2818 case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2820 HReg res = newVRegV(env);
2821 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2822 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2823 ARM64VecModifyOp op = ARM64vecmo_INVALID;
2824 switch (e->Iex.Binop.op) {
2825 /* In the following 8 cases, the US - SU switching is intended.
2826 See comments on the libvex_ir.h for details. Also in the
2827 ARM64 front end, where used these primops are generated. */
2828 case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2829 case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2830 case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2831 case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2832 case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2833 case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2834 case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2835 case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2836 default: vassert(0);
2838 /* The order of the operands is important. Although this is
2839 basically addition, the two operands are extended differently,
2840 making it important to get them into the correct registers in
2841 the instruction. */
2842 addInstr(env, ARM64Instr_VMov(16, res, argR));
2843 addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2844 return res;
2846 /* -- Shifts by an immediate. -- */
2847 case Iop_ShrN64x2: case Iop_ShrN32x4:
2848 case Iop_ShrN16x8: case Iop_ShrN8x16:
2849 case Iop_SarN64x2: case Iop_SarN32x4:
2850 case Iop_SarN16x8: case Iop_SarN8x16:
2851 case Iop_ShlN64x2: case Iop_ShlN32x4:
2852 case Iop_ShlN16x8: case Iop_ShlN8x16:
2853 case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2854 case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2855 case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2856 case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2857 case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2858 case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2860 IRExpr* argL = e->Iex.Binop.arg1;
2861 IRExpr* argR = e->Iex.Binop.arg2;
2862 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2863 UInt amt = argR->Iex.Const.con->Ico.U8;
2864 UInt limLo = 0;
2865 UInt limHi = 0;
2866 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2867 /* Establish the instruction to use. */
2868 switch (e->Iex.Binop.op) {
2869 case Iop_ShrN64x2: op = ARM64vecshi_USHR64x2; break;
2870 case Iop_ShrN32x4: op = ARM64vecshi_USHR32x4; break;
2871 case Iop_ShrN16x8: op = ARM64vecshi_USHR16x8; break;
2872 case Iop_ShrN8x16: op = ARM64vecshi_USHR8x16; break;
2873 case Iop_SarN64x2: op = ARM64vecshi_SSHR64x2; break;
2874 case Iop_SarN32x4: op = ARM64vecshi_SSHR32x4; break;
2875 case Iop_SarN16x8: op = ARM64vecshi_SSHR16x8; break;
2876 case Iop_SarN8x16: op = ARM64vecshi_SSHR8x16; break;
2877 case Iop_ShlN64x2: op = ARM64vecshi_SHL64x2; break;
2878 case Iop_ShlN32x4: op = ARM64vecshi_SHL32x4; break;
2879 case Iop_ShlN16x8: op = ARM64vecshi_SHL16x8; break;
2880 case Iop_ShlN8x16: op = ARM64vecshi_SHL8x16; break;
2881 case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2; break;
2882 case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4; break;
2883 case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8; break;
2884 case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16; break;
2885 case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2; break;
2886 case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4; break;
2887 case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8; break;
2888 case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16; break;
2889 case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2890 case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2891 case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2892 case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2893 default: vassert(0);
2895 /* Establish the shift limits, for sanity check purposes only. */
2896 switch (e->Iex.Binop.op) {
2897 case Iop_ShrN64x2: limLo = 1; limHi = 64; break;
2898 case Iop_ShrN32x4: limLo = 1; limHi = 32; break;
2899 case Iop_ShrN16x8: limLo = 1; limHi = 16; break;
2900 case Iop_ShrN8x16: limLo = 1; limHi = 8; break;
2901 case Iop_SarN64x2: limLo = 1; limHi = 64; break;
2902 case Iop_SarN32x4: limLo = 1; limHi = 32; break;
2903 case Iop_SarN16x8: limLo = 1; limHi = 16; break;
2904 case Iop_SarN8x16: limLo = 1; limHi = 8; break;
2905 case Iop_ShlN64x2: limLo = 0; limHi = 63; break;
2906 case Iop_ShlN32x4: limLo = 0; limHi = 31; break;
2907 case Iop_ShlN16x8: limLo = 0; limHi = 15; break;
2908 case Iop_ShlN8x16: limLo = 0; limHi = 7; break;
2909 case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2910 case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2911 case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2912 case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7; break;
2913 case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2914 case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2915 case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
2916 case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7; break;
2917 case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
2918 case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
2919 case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
2920 case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7; break;
2921 default: vassert(0);
2923 /* For left shifts, the allowable amt values are
2924 0 .. lane_bits-1. For right shifts the allowable
2925 values are 1 .. lane_bits. */
2926 if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
2927 HReg src = iselV128Expr(env, argL);
2928 HReg dst = newVRegV(env);
2929 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2930 return dst;
2932 /* Special case some no-op shifts that the arm64 front end
2933 throws at us. We can't generate any instructions for these,
2934 but we don't need to either. */
2935 switch (e->Iex.Binop.op) {
2936 case Iop_ShrN64x2: case Iop_ShrN32x4:
2937 case Iop_ShrN16x8: case Iop_ShrN8x16:
2938 if (amt == 0) {
2939 return iselV128Expr(env, argL);
2941 break;
2942 default:
2943 break;
2945 /* otherwise unhandled */
2947 /* else fall out; this is unhandled */
2948 break;
2950 /* -- Saturating narrowing by an immediate -- */
2951 /* uu */
2952 case Iop_QandQShrNnarrow16Uto8Ux8:
2953 case Iop_QandQShrNnarrow32Uto16Ux4:
2954 case Iop_QandQShrNnarrow64Uto32Ux2:
2955 /* ss */
2956 case Iop_QandQSarNnarrow16Sto8Sx8:
2957 case Iop_QandQSarNnarrow32Sto16Sx4:
2958 case Iop_QandQSarNnarrow64Sto32Sx2:
2959 /* su */
2960 case Iop_QandQSarNnarrow16Sto8Ux8:
2961 case Iop_QandQSarNnarrow32Sto16Ux4:
2962 case Iop_QandQSarNnarrow64Sto32Ux2:
2963 /* ruu */
2964 case Iop_QandQRShrNnarrow16Uto8Ux8:
2965 case Iop_QandQRShrNnarrow32Uto16Ux4:
2966 case Iop_QandQRShrNnarrow64Uto32Ux2:
2967 /* rss */
2968 case Iop_QandQRSarNnarrow16Sto8Sx8:
2969 case Iop_QandQRSarNnarrow32Sto16Sx4:
2970 case Iop_QandQRSarNnarrow64Sto32Sx2:
2971 /* rsu */
2972 case Iop_QandQRSarNnarrow16Sto8Ux8:
2973 case Iop_QandQRSarNnarrow32Sto16Ux4:
2974 case Iop_QandQRSarNnarrow64Sto32Ux2:
2976 IRExpr* argL = e->Iex.Binop.arg1;
2977 IRExpr* argR = e->Iex.Binop.arg2;
2978 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2979 UInt amt = argR->Iex.Const.con->Ico.U8;
2980 UInt limit = 0;
2981 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2982 switch (e->Iex.Binop.op) {
2983 /* uu */
2984 case Iop_QandQShrNnarrow64Uto32Ux2:
2985 op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
2986 case Iop_QandQShrNnarrow32Uto16Ux4:
2987 op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
2988 case Iop_QandQShrNnarrow16Uto8Ux8:
2989 op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
2990 /* ss */
2991 case Iop_QandQSarNnarrow64Sto32Sx2:
2992 op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
2993 case Iop_QandQSarNnarrow32Sto16Sx4:
2994 op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
2995 case Iop_QandQSarNnarrow16Sto8Sx8:
2996 op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
2997 /* su */
2998 case Iop_QandQSarNnarrow64Sto32Ux2:
2999 op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
3000 case Iop_QandQSarNnarrow32Sto16Ux4:
3001 op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
3002 case Iop_QandQSarNnarrow16Sto8Ux8:
3003 op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
3004 /* ruu */
3005 case Iop_QandQRShrNnarrow64Uto32Ux2:
3006 op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
3007 case Iop_QandQRShrNnarrow32Uto16Ux4:
3008 op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
3009 case Iop_QandQRShrNnarrow16Uto8Ux8:
3010 op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
3011 /* rss */
3012 case Iop_QandQRSarNnarrow64Sto32Sx2:
3013 op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
3014 case Iop_QandQRSarNnarrow32Sto16Sx4:
3015 op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
3016 case Iop_QandQRSarNnarrow16Sto8Sx8:
3017 op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
3018 /* rsu */
3019 case Iop_QandQRSarNnarrow64Sto32Ux2:
3020 op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
3021 case Iop_QandQRSarNnarrow32Sto16Ux4:
3022 op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
3023 case Iop_QandQRSarNnarrow16Sto8Ux8:
3024 op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
3025 /**/
3026 default:
3027 vassert(0);
3029 if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
3030 HReg src = iselV128Expr(env, argL);
3031 HReg dst = newVRegV(env);
3032 HReg fpsr = newVRegI(env);
3033 /* Clear FPSR.Q, do the operation, and return both its
3034 result and the new value of FPSR.Q. We can simply
3035 zero out FPSR since all the other bits have no relevance
3036 in VEX generated code. */
3037 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3038 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3039 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
3040 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3041 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3042 ARM64sh_SHR));
3043 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3044 vassert(ril_one);
3045 addInstr(env, ARM64Instr_Logic(fpsr,
3046 fpsr, ril_one, ARM64lo_AND));
3047 /* Now we have: the main (shift) result in the bottom half
3048 of |dst|, and the Q bit at the bottom of |fpsr|.
3049 Combining them with a "InterleaveLO64x2" style operation
3050 produces a 128 bit value, dst[63:0]:fpsr[63:0],
3051 which is what we want. */
3052 HReg scratch = newVRegV(env);
3053 addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
3054 addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
3055 dst, dst, scratch));
3056 return dst;
3059 /* else fall out; this is unhandled */
3060 break;
3063 // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
3064 // as it is in some ways more general and often leads to better
3065 // code overall.
3066 case Iop_ShlV128:
3067 case Iop_ShrV128: {
3068 Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
3069 /* This is tricky. Generate an EXT instruction with zeroes in
3070 the high operand (shift right) or low operand (shift left).
3071 Note that we can only slice in the EXT instruction at a byte
3072 level of granularity, so the shift amount needs careful
3073 checking. */
3074 IRExpr* argL = e->Iex.Binop.arg1;
3075 IRExpr* argR = e->Iex.Binop.arg2;
3076 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
3077 UInt amt = argR->Iex.Const.con->Ico.U8;
3078 Bool amtOK = False;
3079 switch (amt) {
3080 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
3081 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
3082 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
3083 amtOK = True; break;
3085 /* We could also deal with amt==0 by copying the source to
3086 the destination, but there's no need for that so far. */
3087 if (amtOK) {
3088 HReg src = iselV128Expr(env, argL);
3089 HReg srcZ = newVRegV(env);
3090 addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
3091 UInt immB = amt / 8;
3092 vassert(immB >= 1 && immB <= 15);
3093 HReg dst = newVRegV(env);
3094 if (isSHR) {
3095 addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
3096 immB));
3097 } else {
3098 addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
3099 16 - immB));
3101 return dst;
3104 /* else fall out; this is unhandled */
3105 break;
3108 case Iop_PolynomialMull8x8:
3109 case Iop_Mull32Ux2:
3110 case Iop_Mull16Ux4:
3111 case Iop_Mull8Ux8:
3112 case Iop_Mull32Sx2:
3113 case Iop_Mull16Sx4:
3114 case Iop_Mull8Sx8:
3115 case Iop_QDMull32Sx2:
3116 case Iop_QDMull16Sx4:
3118 HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
3119 HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3120 HReg vSrcL = newVRegV(env);
3121 HReg vSrcR = newVRegV(env);
3122 HReg dst = newVRegV(env);
3123 ARM64VecBinOp op = ARM64vecb_INVALID;
3124 switch (e->Iex.Binop.op) {
3125 case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8; break;
3126 case Iop_Mull32Ux2: op = ARM64vecb_UMULL2DSS; break;
3127 case Iop_Mull16Ux4: op = ARM64vecb_UMULL4SHH; break;
3128 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8HBB; break;
3129 case Iop_Mull32Sx2: op = ARM64vecb_SMULL2DSS; break;
3130 case Iop_Mull16Sx4: op = ARM64vecb_SMULL4SHH; break;
3131 case Iop_Mull8Sx8: op = ARM64vecb_SMULL8HBB; break;
3132 case Iop_QDMull32Sx2: op = ARM64vecb_SQDMULL2DSS; break;
3133 case Iop_QDMull16Sx4: op = ARM64vecb_SQDMULL4SHH; break;
3134 default: vassert(0);
3136 addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
3137 addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
3138 addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
3139 return dst;
3142 /* ... */
3143 default:
3144 break;
3145 } /* switch on the binop */
3146 } /* if (e->tag == Iex_Binop) */
3148 if (e->tag == Iex_Triop) {
3149 IRTriop* triop = e->Iex.Triop.details;
3150 ARM64VecBinOp vecbop = ARM64vecb_INVALID;
3151 switch (triop->op) {
3152 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
3153 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
3154 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
3155 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
3156 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
3157 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
3158 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
3159 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
3160 default: break;
3162 if (vecbop != ARM64vecb_INVALID) {
3163 HReg argL = iselV128Expr(env, triop->arg2);
3164 HReg argR = iselV128Expr(env, triop->arg3);
3165 HReg dst = newVRegV(env);
3166 set_FPCR_rounding_mode(env, triop->arg1);
3167 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
3168 return dst;
3171 if (triop->op == Iop_SliceV128) {
3172 /* Note that, compared to ShlV128/ShrV128 just above, the shift
3173 amount here is in bytes, not bits. */
3174 IRExpr* argHi = triop->arg1;
3175 IRExpr* argLo = triop->arg2;
3176 IRExpr* argAmt = triop->arg3;
3177 if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
3178 UInt amt = argAmt->Iex.Const.con->Ico.U8;
3179 Bool amtOK = amt >= 1 && amt <= 15;
3180 /* We could also deal with amt==0 by copying argLO to
3181 the destination, but there's no need for that so far. */
3182 if (amtOK) {
3183 HReg srcHi = iselV128Expr(env, argHi);
3184 HReg srcLo = iselV128Expr(env, argLo);
3185 HReg dst = newVRegV(env);
3186 addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
3187 return dst;
3190 /* else fall out; this is unhandled */
3193 } /* if (e->tag == Iex_Triop) */
3195 if (e->tag == Iex_ITE) {
3196 // This code sequence is pretty feeble. We'd do better to generate BSL
3197 // here.
3198 HReg rX = newVRegI(env);
3200 ARM64CondCode cc = iselCondCode_C(env, e->Iex.ITE.cond);
3201 addInstr(env, ARM64Instr_Set64(rX, cc));
3202 // cond: rX = 1 !cond: rX = 0
3204 // Mask the Set64 result. This is paranoia (should be unnecessary).
3205 ARM64RIL* one = mb_mkARM64RIL_I(1);
3206 vassert(one);
3207 addInstr(env, ARM64Instr_Logic(rX, rX, one, ARM64lo_AND));
3208 // cond: rX = 1 !cond: rX = 0
3210 // Propagate to all bits in the 64 bit word by subtracting 1 from it.
3211 // This also inverts the sense of the value.
3212 addInstr(env, ARM64Instr_Arith(rX, rX, ARM64RIA_I12(1,0),
3213 /*isAdd=*/False));
3214 // cond: rX = 0-(62)-0 !cond: rX = 1-(62)-1
3216 // Duplicate rX into a vector register
3217 HReg vMask = newVRegV(env);
3218 addInstr(env, ARM64Instr_VQfromXX(vMask, rX, rX));
3219 // cond: vMask = 0-(126)-0 !cond: vMask = 1-(126)-1
3221 HReg vIfTrue = iselV128Expr(env, e->Iex.ITE.iftrue);
3222 HReg vIfFalse = iselV128Expr(env, e->Iex.ITE.iffalse);
3224 // Mask out iffalse value as needed
3225 addInstr(env,
3226 ARM64Instr_VBinV(ARM64vecb_AND, vIfFalse, vIfFalse, vMask));
3228 // Invert the mask so we can use it for the iftrue value
3229 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, vMask, vMask));
3230 // cond: vMask = 1-(126)-1 !cond: vMask = 0-(126)-0
3232 // Mask out iftrue value as needed
3233 addInstr(env,
3234 ARM64Instr_VBinV(ARM64vecb_AND, vIfTrue, vIfTrue, vMask));
3236 // Merge the masked iftrue and iffalse results.
3237 HReg res = newVRegV(env);
3238 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ORR, res, vIfTrue, vIfFalse));
3240 return res;
3243 v128_expr_bad:
3244 ppIRExpr(e);
3245 vpanic("iselV128Expr_wrk");
3249 /*---------------------------------------------------------*/
3250 /*--- ISEL: Floating point expressions (64 bit) ---*/
3251 /*---------------------------------------------------------*/
3253 /* Compute a 64-bit floating point value into a register, the identity
3254 of which is returned. As with iselIntExpr_R, the reg may be either
3255 real or virtual; in any case it must not be changed by subsequent
3256 code emitted by the caller. */
3258 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
3260 HReg r = iselDblExpr_wrk( env, e );
3261 # if 0
3262 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3263 # endif
3264 vassert(hregClass(r) == HRcFlt64);
3265 vassert(hregIsVirtual(r));
3266 return r;
3269 /* DO NOT CALL THIS DIRECTLY */
3270 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3272 IRType ty = typeOfIRExpr(env->type_env,e);
3273 vassert(e);
3274 vassert(ty == Ity_F64);
3276 if (e->tag == Iex_RdTmp) {
3277 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3280 if (e->tag == Iex_Const) {
3281 IRConst* con = e->Iex.Const.con;
3282 if (con->tag == Ico_F64i) {
3283 HReg src = newVRegI(env);
3284 HReg dst = newVRegD(env);
3285 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3286 addInstr(env, ARM64Instr_VDfromX(dst, src));
3287 return dst;
3289 if (con->tag == Ico_F64) {
3290 HReg src = newVRegI(env);
3291 HReg dst = newVRegD(env);
3292 union { Double d64; ULong u64; } u;
3293 vassert(sizeof(u) == 8);
3294 u.d64 = con->Ico.F64;
3295 addInstr(env, ARM64Instr_Imm64(src, u.u64));
3296 addInstr(env, ARM64Instr_VDfromX(dst, src));
3297 return dst;
3301 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3302 vassert(e->Iex.Load.ty == Ity_F64);
3303 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3304 HReg res = newVRegD(env);
3305 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3306 return res;
3309 if (e->tag == Iex_Get) {
3310 Int offs = e->Iex.Get.offset;
3311 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3312 HReg rD = newVRegD(env);
3313 HReg rN = get_baseblock_register();
3314 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3315 return rD;
3319 if (e->tag == Iex_Unop) {
3320 switch (e->Iex.Unop.op) {
3321 case Iop_NegF64: {
3322 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3323 HReg dst = newVRegD(env);
3324 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3325 return dst;
3327 case Iop_AbsF64: {
3328 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3329 HReg dst = newVRegD(env);
3330 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3331 return dst;
3333 case Iop_F32toF64: {
3334 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3335 HReg dst = newVRegD(env);
3336 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3337 return dst;
3339 case Iop_F16toF64: {
3340 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3341 HReg dst = newVRegD(env);
3342 addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3343 return dst;
3345 case Iop_I32UtoF64:
3346 case Iop_I32StoF64: {
3347 /* Rounding mode is not involved here, since the
3348 conversion can always be done without loss of
3349 precision. */
3350 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3351 HReg dst = newVRegD(env);
3352 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3353 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3354 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3355 return dst;
3357 default:
3358 break;
3362 if (e->tag == Iex_Binop) {
3363 switch (e->Iex.Binop.op) {
3364 case Iop_RoundF64toInt:
3365 case Iop_SqrtF64:
3366 case Iop_RecpExpF64: {
3367 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3368 HReg dst = newVRegD(env);
3369 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3370 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3371 switch (e->Iex.Binop.op) {
3372 case Iop_RoundF64toInt: op = ARM64fpu_RINT; break;
3373 case Iop_SqrtF64: op = ARM64fpu_SQRT; break;
3374 case Iop_RecpExpF64: op = ARM64fpu_RECPX; break;
3375 default: vassert(0);
3377 addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3378 return dst;
3380 case Iop_I64StoF64:
3381 case Iop_I64UtoF64: {
3382 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3383 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3384 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3385 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3386 HReg dstS = newVRegD(env);
3387 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3388 return dstS;
3390 default:
3391 break;
3395 if (e->tag == Iex_Triop) {
3396 IRTriop* triop = e->Iex.Triop.details;
3397 ARM64FpBinOp dblop = ARM64fpb_INVALID;
3398 switch (triop->op) {
3399 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3400 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3401 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3402 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3403 default: break;
3405 if (dblop != ARM64fpb_INVALID) {
3406 HReg argL = iselDblExpr(env, triop->arg2);
3407 HReg argR = iselDblExpr(env, triop->arg3);
3408 HReg dst = newVRegD(env);
3409 set_FPCR_rounding_mode(env, triop->arg1);
3410 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3411 return dst;
3415 if (e->tag == Iex_Qop) {
3416 IRQop* qop = e->Iex.Qop.details;
3417 ARM64FpTriOp triop = ARM64fpt_INVALID;
3418 switch (qop->op) {
3419 case Iop_MAddF64: triop = ARM64fpt_FMADD; break;
3420 case Iop_MSubF64: triop = ARM64fpt_FMSUB; break;
3421 default: break;
3423 if (triop != ARM64fpt_INVALID) {
3424 HReg N = iselDblExpr(env, qop->arg2);
3425 HReg M = iselDblExpr(env, qop->arg3);
3426 HReg A = iselDblExpr(env, qop->arg4);
3427 HReg dst = newVRegD(env);
3428 set_FPCR_rounding_mode(env, qop->arg1);
3429 addInstr(env, ARM64Instr_VTriD(triop, dst, N, M, A));
3430 return dst;
3434 if (e->tag == Iex_ITE) {
3435 /* ITE(ccexpr, iftrue, iffalse) */
3436 ARM64CondCode cc;
3437 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3438 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3439 HReg dst = newVRegD(env);
3440 cc = iselCondCode_C(env, e->Iex.ITE.cond);
3441 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3442 return dst;
3445 ppIRExpr(e);
3446 vpanic("iselDblExpr_wrk");
3450 /*---------------------------------------------------------*/
3451 /*--- ISEL: Floating point expressions (32 bit) ---*/
3452 /*---------------------------------------------------------*/
3454 /* Compute a 32-bit floating point value into a register, the identity
3455 of which is returned. As with iselIntExpr_R, the reg may be either
3456 real or virtual; in any case it must not be changed by subsequent
3457 code emitted by the caller. Values are generated into HRcFlt64
3458 registers despite the values themselves being Ity_F32s. */
3460 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3462 HReg r = iselFltExpr_wrk( env, e );
3463 # if 0
3464 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3465 # endif
3466 vassert(hregClass(r) == HRcFlt64);
3467 vassert(hregIsVirtual(r));
3468 return r;
3471 /* DO NOT CALL THIS DIRECTLY */
3472 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3474 IRType ty = typeOfIRExpr(env->type_env,e);
3475 vassert(e);
3476 vassert(ty == Ity_F32);
3478 if (e->tag == Iex_RdTmp) {
3479 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3482 if (e->tag == Iex_Const) {
3483 /* This is something of a kludge. Since a 32 bit floating point
3484 zero is just .. all zeroes, just create a 64 bit zero word
3485 and transfer it. This avoids having to create a SfromW
3486 instruction for this specific case. */
3487 IRConst* con = e->Iex.Const.con;
3488 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3489 HReg src = newVRegI(env);
3490 HReg dst = newVRegD(env);
3491 addInstr(env, ARM64Instr_Imm64(src, 0));
3492 addInstr(env, ARM64Instr_VDfromX(dst, src));
3493 return dst;
3495 if (con->tag == Ico_F32) {
3496 HReg src = newVRegI(env);
3497 HReg dst = newVRegD(env);
3498 union { Float f32; UInt u32; } u;
3499 vassert(sizeof(u) == 4);
3500 u.f32 = con->Ico.F32;
3501 addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3502 addInstr(env, ARM64Instr_VDfromX(dst, src));
3503 return dst;
3507 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3508 vassert(e->Iex.Load.ty == Ity_F32);
3509 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3510 HReg res = newVRegD(env);
3511 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3512 return res;
3515 if (e->tag == Iex_Get) {
3516 Int offs = e->Iex.Get.offset;
3517 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3518 HReg rD = newVRegD(env);
3519 HReg rN = get_baseblock_register();
3520 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3521 return rD;
3525 if (e->tag == Iex_Unop) {
3526 switch (e->Iex.Unop.op) {
3527 case Iop_NegF32: {
3528 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3529 HReg dst = newVRegD(env);
3530 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3531 return dst;
3533 case Iop_AbsF32: {
3534 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3535 HReg dst = newVRegD(env);
3536 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3537 return dst;
3539 case Iop_F16toF32: {
3540 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3541 HReg dst = newVRegD(env);
3542 addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3543 return dst;
3545 default:
3546 break;
3550 if (e->tag == Iex_Binop) {
3551 switch (e->Iex.Binop.op) {
3552 case Iop_RoundF32toInt:
3553 case Iop_SqrtF32:
3554 case Iop_RecpExpF32: {
3555 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3556 HReg dst = newVRegD(env);
3557 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3558 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3559 switch (e->Iex.Binop.op) {
3560 case Iop_RoundF32toInt: op = ARM64fpu_RINT; break;
3561 case Iop_SqrtF32: op = ARM64fpu_SQRT; break;
3562 case Iop_RecpExpF32: op = ARM64fpu_RECPX; break;
3563 default: vassert(0);
3565 addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3566 return dst;
3568 case Iop_F64toF32: {
3569 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3570 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3571 HReg dstS = newVRegD(env);
3572 addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3573 return dstS;
3575 case Iop_I32UtoF32:
3576 case Iop_I32StoF32:
3577 case Iop_I64UtoF32:
3578 case Iop_I64StoF32: {
3579 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3580 switch (e->Iex.Binop.op) {
3581 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3582 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3583 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3584 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3585 default: vassert(0);
3587 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3588 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3589 HReg dstS = newVRegD(env);
3590 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3591 return dstS;
3593 default:
3594 break;
3598 if (e->tag == Iex_Triop) {
3599 IRTriop* triop = e->Iex.Triop.details;
3600 ARM64FpBinOp sglop = ARM64fpb_INVALID;
3601 switch (triop->op) {
3602 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3603 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3604 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3605 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3606 default: break;
3608 if (sglop != ARM64fpb_INVALID) {
3609 HReg argL = iselFltExpr(env, triop->arg2);
3610 HReg argR = iselFltExpr(env, triop->arg3);
3611 HReg dst = newVRegD(env);
3612 set_FPCR_rounding_mode(env, triop->arg1);
3613 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3614 return dst;
3618 if (e->tag == Iex_ITE) {
3619 /* ITE(ccexpr, iftrue, iffalse) */
3620 ARM64CondCode cc;
3621 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
3622 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
3623 HReg dst = newVRegD(env);
3624 cc = iselCondCode_C(env, e->Iex.ITE.cond);
3625 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3626 return dst;
3629 if (e->tag == Iex_Qop) {
3630 IRQop* qop = e->Iex.Qop.details;
3631 ARM64FpTriOp triop = ARM64fpt_INVALID;
3632 switch (qop->op) {
3633 case Iop_MAddF32: triop = ARM64fpt_FMADD; break;
3634 case Iop_MSubF32: triop = ARM64fpt_FMSUB; break;
3635 default: break;
3638 if (triop != ARM64fpt_INVALID) {
3639 HReg N = iselFltExpr(env, qop->arg2);
3640 HReg M = iselFltExpr(env, qop->arg3);
3641 HReg A = iselFltExpr(env, qop->arg4);
3642 HReg dst = newVRegD(env);
3643 set_FPCR_rounding_mode(env, qop->arg1);
3644 addInstr(env, ARM64Instr_VTriS(triop, dst, N, M, A));
3645 return dst;
3649 ppIRExpr(e);
3650 vpanic("iselFltExpr_wrk");
3654 /*---------------------------------------------------------*/
3655 /*--- ISEL: Floating point expressions (16 bit) ---*/
3656 /*---------------------------------------------------------*/
3658 /* Compute a 16-bit floating point value into a register, the identity
3659 of which is returned. As with iselIntExpr_R, the reg may be either
3660 real or virtual; in any case it must not be changed by subsequent
3661 code emitted by the caller. Values are generated into HRcFlt64
3662 registers despite the values themselves being Ity_F16s. */
3664 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3666 HReg r = iselF16Expr_wrk( env, e );
3667 # if 0
3668 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3669 # endif
3670 vassert(hregClass(r) == HRcFlt64);
3671 vassert(hregIsVirtual(r));
3672 return r;
3675 /* DO NOT CALL THIS DIRECTLY */
3676 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3678 IRType ty = typeOfIRExpr(env->type_env,e);
3679 vassert(e);
3680 vassert(ty == Ity_F16);
3682 if (e->tag == Iex_RdTmp) {
3683 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3686 if (e->tag == Iex_Get) {
3687 Int offs = e->Iex.Get.offset;
3688 if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3689 HReg rD = newVRegD(env);
3690 HReg rN = get_baseblock_register();
3691 addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3692 return rD;
3696 if (e->tag == Iex_Binop) {
3697 switch (e->Iex.Binop.op) {
3698 case Iop_F32toF16: {
3699 HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3700 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3701 HReg dstH = newVRegD(env);
3702 addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3703 return dstH;
3705 case Iop_F64toF16: {
3706 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3707 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3708 HReg dstH = newVRegD(env);
3709 addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3710 return dstH;
3712 default:
3713 break;
3717 ppIRExpr(e);
3718 vpanic("iselF16Expr_wrk");
3722 /*---------------------------------------------------------*/
3723 /*--- ISEL: Vector expressions (256 bit) ---*/
3724 /*---------------------------------------------------------*/
3726 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3727 ISelEnv* env, IRExpr* e )
3729 iselV256Expr_wrk( rHi, rLo, env, e );
3730 vassert(hregClass(*rHi) == HRcVec128);
3731 vassert(hregClass(*rLo) == HRcVec128);
3732 vassert(hregIsVirtual(*rHi));
3733 vassert(hregIsVirtual(*rLo));
3736 /* DO NOT CALL THIS DIRECTLY */
3737 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3738 ISelEnv* env, IRExpr* e )
3740 vassert(e);
3741 IRType ty = typeOfIRExpr(env->type_env,e);
3742 vassert(ty == Ity_V256);
3744 /* read 256-bit IRTemp */
3745 if (e->tag == Iex_RdTmp) {
3746 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3747 return;
3750 if (e->tag == Iex_Binop) {
3751 switch (e->Iex.Binop.op) {
3752 case Iop_V128HLtoV256: {
3753 *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3754 *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3755 return;
3757 case Iop_QandSQsh64x2:
3758 case Iop_QandSQsh32x4:
3759 case Iop_QandSQsh16x8:
3760 case Iop_QandSQsh8x16:
3761 case Iop_QandUQsh64x2:
3762 case Iop_QandUQsh32x4:
3763 case Iop_QandUQsh16x8:
3764 case Iop_QandUQsh8x16:
3765 case Iop_QandSQRsh64x2:
3766 case Iop_QandSQRsh32x4:
3767 case Iop_QandSQRsh16x8:
3768 case Iop_QandSQRsh8x16:
3769 case Iop_QandUQRsh64x2:
3770 case Iop_QandUQRsh32x4:
3771 case Iop_QandUQRsh16x8:
3772 case Iop_QandUQRsh8x16:
3774 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
3775 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
3776 HReg fpsr = newVRegI(env);
3777 HReg resHi = newVRegV(env);
3778 HReg resLo = newVRegV(env);
3779 ARM64VecBinOp op = ARM64vecb_INVALID;
3780 switch (e->Iex.Binop.op) {
3781 case Iop_QandSQsh64x2: op = ARM64vecb_SQSHL64x2; break;
3782 case Iop_QandSQsh32x4: op = ARM64vecb_SQSHL32x4; break;
3783 case Iop_QandSQsh16x8: op = ARM64vecb_SQSHL16x8; break;
3784 case Iop_QandSQsh8x16: op = ARM64vecb_SQSHL8x16; break;
3785 case Iop_QandUQsh64x2: op = ARM64vecb_UQSHL64x2; break;
3786 case Iop_QandUQsh32x4: op = ARM64vecb_UQSHL32x4; break;
3787 case Iop_QandUQsh16x8: op = ARM64vecb_UQSHL16x8; break;
3788 case Iop_QandUQsh8x16: op = ARM64vecb_UQSHL8x16; break;
3789 case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3790 case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3791 case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3792 case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3793 case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3794 case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3795 case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3796 case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3797 default: vassert(0);
3799 /* Clear FPSR.Q, do the operation, and return both its result
3800 and the new value of FPSR.Q. We can simply zero out FPSR
3801 since all the other bits have no relevance in VEX generated
3802 code. */
3803 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3804 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3805 addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3806 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3807 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3808 ARM64sh_SHR));
3809 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3810 vassert(ril_one);
3811 addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3812 /* Now we have: the main (shift) result in |resLo|, and the
3813 Q bit at the bottom of |fpsr|. */
3814 addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3815 *rHi = resHi;
3816 *rLo = resLo;
3817 return;
3820 /* ... */
3821 default:
3822 break;
3823 } /* switch on the binop */
3824 } /* if (e->tag == Iex_Binop) */
3826 ppIRExpr(e);
3827 vpanic("iselV256Expr_wrk");
3831 /*---------------------------------------------------------*/
3832 /*--- ISEL: Statements ---*/
3833 /*---------------------------------------------------------*/
3835 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3837 if (vex_traceflags & VEX_TRACE_VCODE) {
3838 vex_printf("\n-- ");
3839 ppIRStmt(stmt);
3840 vex_printf("\n");
3842 switch (stmt->tag) {
3844 /* --------- STORE --------- */
3845 /* little-endian write to memory */
3846 case Ist_Store: {
3847 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3848 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3849 IREndness end = stmt->Ist.Store.end;
3851 if (tya != Ity_I64 || end != Iend_LE)
3852 goto stmt_fail;
3854 if (tyd == Ity_I64) {
3855 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3856 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3857 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3858 return;
3860 if (tyd == Ity_I32) {
3861 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3862 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3863 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3864 return;
3866 if (tyd == Ity_I16) {
3867 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3868 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3869 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3870 return;
3872 if (tyd == Ity_I8) {
3873 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3874 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3875 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3876 return;
3878 if (tyd == Ity_V128) {
3879 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
3880 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3881 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3882 return;
3884 if (tyd == Ity_F64) {
3885 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
3886 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3887 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
3888 return;
3890 if (tyd == Ity_F32) {
3891 HReg sD = iselFltExpr(env, stmt->Ist.Store.data);
3892 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3893 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
3894 return;
3896 break;
3899 /* --------- PUT --------- */
3900 /* write guest state, fixed offset */
3901 case Ist_Put: {
3902 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3903 UInt offs = (UInt)stmt->Ist.Put.offset;
3904 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
3905 HReg rD = INVALID_HREG;
3906 if (isZeroU64(stmt->Ist.Put.data)) {
3907 // In this context, XZR_XSP denotes the zero register.
3908 rD = hregARM64_XZR_XSP();
3909 } else {
3910 rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3912 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
3913 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3914 return;
3916 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
3917 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3918 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
3919 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3920 return;
3922 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
3923 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3924 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
3925 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3926 return;
3928 if (tyd == Ity_I8 && offs < (1<<12)) {
3929 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3930 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
3931 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3932 return;
3934 if (tyd == Ity_V128 && offs < (1<<12)) {
3935 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
3936 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
3937 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3938 return;
3940 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
3941 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
3942 HReg bbp = get_baseblock_register();
3943 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
3944 return;
3946 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
3947 HReg sD = iselFltExpr(env, stmt->Ist.Put.data);
3948 HReg bbp = get_baseblock_register();
3949 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
3950 return;
3952 if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
3953 HReg hD = iselF16Expr(env, stmt->Ist.Put.data);
3954 HReg bbp = get_baseblock_register();
3955 addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
3956 return;
3959 break;
3962 /* --------- TMP --------- */
3963 /* assign value to temporary */
3964 case Ist_WrTmp: {
3965 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3966 IRType ty = typeOfIRTemp(env->type_env, tmp);
3968 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3969 /* We could do a lot better here. But for the time being: */
3970 HReg dst = lookupIRTemp(env, tmp);
3971 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
3972 addInstr(env, ARM64Instr_MovI(dst, rD));
3973 return;
3975 if (ty == Ity_I1) {
3976 /* Here, we are generating a I1 value into a 64 bit register.
3977 Make sure the value in the register is only zero or one,
3978 but no other. This allows optimisation of the
3979 1Uto64(tmp:I1) case, by making it simply a copy of the
3980 register holding 'tmp'. The point being that the value in
3981 the register holding 'tmp' can only have been created
3982 here. LATER: that seems dangerous; safer to do 'tmp & 1'
3983 in that case. Also, could do this just with a single CINC
3984 insn. */
3985 /* CLONE-01 */
3986 HReg zero = hregARM64_XZR_XSP(); // XZR in this context
3987 HReg one = newVRegI(env);
3988 HReg dst = lookupIRTemp(env, tmp);
3989 addInstr(env, ARM64Instr_Imm64(one, 1));
3990 ARM64CondCode cc = iselCondCode_C(env, stmt->Ist.WrTmp.data);
3991 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
3992 return;
3994 if (ty == Ity_F64) {
3995 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3996 HReg dst = lookupIRTemp(env, tmp);
3997 addInstr(env, ARM64Instr_VMov(8, dst, src));
3998 return;
4000 if (ty == Ity_F32) {
4001 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4002 HReg dst = lookupIRTemp(env, tmp);
4003 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
4004 return;
4006 if (ty == Ity_F16) {
4007 HReg src = iselF16Expr(env, stmt->Ist.WrTmp.data);
4008 HReg dst = lookupIRTemp(env, tmp);
4009 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
4010 return;
4012 if (ty == Ity_V128) {
4013 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
4014 HReg dst = lookupIRTemp(env, tmp);
4015 addInstr(env, ARM64Instr_VMov(16, dst, src));
4016 return;
4018 if (ty == Ity_V256) {
4019 HReg srcHi, srcLo, dstHi, dstLo;
4020 iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
4021 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4022 addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
4023 addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
4024 return;
4026 break;
4029 /* --------- Call to DIRTY helper --------- */
4030 /* call complex ("dirty") helper function */
4031 case Ist_Dirty: {
4032 IRDirty* d = stmt->Ist.Dirty.details;
4034 /* Figure out the return type, if any. */
4035 IRType retty = Ity_INVALID;
4036 if (d->tmp != IRTemp_INVALID)
4037 retty = typeOfIRTemp(env->type_env, d->tmp);
4039 Bool retty_ok = False;
4040 switch (retty) {
4041 case Ity_INVALID: /* function doesn't return anything */
4042 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4043 case Ity_V128:
4044 retty_ok = True; break;
4045 default:
4046 break;
4048 if (!retty_ok)
4049 break; /* will go to stmt_fail: */
4051 /* Marshal args, do the call, and set the return value to 0x555..555
4052 if this is a conditional call that returns a value and the
4053 call is skipped. */
4054 UInt addToSp = 0;
4055 RetLoc rloc = mk_RetLoc_INVALID();
4056 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4057 vassert(is_sane_RetLoc(rloc));
4059 /* Now figure out what to do with the returned value, if any. */
4060 switch (retty) {
4061 case Ity_INVALID: {
4062 /* No return value. Nothing to do. */
4063 vassert(d->tmp == IRTemp_INVALID);
4064 vassert(rloc.pri == RLPri_None);
4065 vassert(addToSp == 0);
4066 return;
4068 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
4069 vassert(rloc.pri == RLPri_Int);
4070 vassert(addToSp == 0);
4071 /* The returned value is in x0. Park it in the register
4072 associated with tmp. */
4073 HReg dst = lookupIRTemp(env, d->tmp);
4074 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
4075 return;
4077 case Ity_V128: {
4078 /* The returned value is on the stack, and *retloc tells
4079 us where. Fish it off the stack and then move the
4080 stack pointer upwards to clear it, as directed by
4081 doHelperCall. */
4082 vassert(rloc.pri == RLPri_V128SpRel);
4083 vassert(rloc.spOff < 256); // stay sane
4084 vassert(addToSp >= 16); // ditto
4085 vassert(addToSp < 256); // ditto
4086 HReg dst = lookupIRTemp(env, d->tmp);
4087 HReg tmp = newVRegI(env); // the address of the returned value
4088 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
4089 addInstr(env, ARM64Instr_Arith(tmp, tmp,
4090 ARM64RIA_I12((UShort)rloc.spOff, 0),
4091 True/*isAdd*/ ));
4092 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
4093 addInstr(env, ARM64Instr_AddToSP(addToSp));
4094 return;
4096 default:
4097 /*NOTREACHED*/
4098 vassert(0);
4100 break;
4103 /* --------- Load Linked and Store Conditional --------- */
4104 case Ist_LLSC: {
4105 if (stmt->Ist.LLSC.storedata == NULL) {
4106 /* LL */
4107 IRTemp res = stmt->Ist.LLSC.result;
4108 IRType ty = typeOfIRTemp(env->type_env, res);
4109 if (ty == Ity_I64 || ty == Ity_I32
4110 || ty == Ity_I16 || ty == Ity_I8) {
4111 Int szB = 0;
4112 HReg r_dst = lookupIRTemp(env, res);
4113 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
4114 switch (ty) {
4115 case Ity_I8: szB = 1; break;
4116 case Ity_I16: szB = 2; break;
4117 case Ity_I32: szB = 4; break;
4118 case Ity_I64: szB = 8; break;
4119 default: vassert(0);
4121 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
4122 addInstr(env, ARM64Instr_LdrEX(szB));
4123 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
4124 return;
4126 goto stmt_fail;
4127 } else {
4128 /* SC */
4129 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
4130 if (tyd == Ity_I64 || tyd == Ity_I32
4131 || tyd == Ity_I16 || tyd == Ity_I8) {
4132 Int szB = 0;
4133 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
4134 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
4135 switch (tyd) {
4136 case Ity_I8: szB = 1; break;
4137 case Ity_I16: szB = 2; break;
4138 case Ity_I32: szB = 4; break;
4139 case Ity_I64: szB = 8; break;
4140 default: vassert(0);
4142 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
4143 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
4144 addInstr(env, ARM64Instr_StrEX(szB));
4145 } else {
4146 goto stmt_fail;
4148 /* now r0 is 1 if failed, 0 if success. Change to IR
4149 conventions (0 is fail, 1 is success). Also transfer
4150 result to r_res. */
4151 IRTemp res = stmt->Ist.LLSC.result;
4152 IRType ty = typeOfIRTemp(env->type_env, res);
4153 HReg r_res = lookupIRTemp(env, res);
4154 ARM64RIL* one = mb_mkARM64RIL_I(1);
4155 vassert(ty == Ity_I1);
4156 vassert(one);
4157 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
4158 ARM64lo_XOR));
4159 /* And be conservative -- mask off all but the lowest bit. */
4160 addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
4161 ARM64lo_AND));
4162 return;
4164 break;
4167 /* --------- ACAS --------- */
4168 case Ist_CAS: {
4169 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4170 /* "normal" singleton CAS */
4171 UChar sz;
4172 IRCAS* cas = stmt->Ist.CAS.details;
4173 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4174 switch (ty) {
4175 case Ity_I64: sz = 8; break;
4176 case Ity_I32: sz = 4; break;
4177 case Ity_I16: sz = 2; break;
4178 case Ity_I8: sz = 1; break;
4179 default: goto unhandled_cas;
4181 HReg rAddr = iselIntExpr_R(env, cas->addr);
4182 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
4183 HReg rData = iselIntExpr_R(env, cas->dataLo);
4184 vassert(cas->expdHi == NULL);
4185 vassert(cas->dataHi == NULL);
4186 addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rAddr));
4187 addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd));
4188 addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData));
4189 addInstr(env, ARM64Instr_CAS(sz));
4190 /* Now we have the lowest szB bytes of x1 are either equal to
4191 the lowest szB bytes of x5, indicating success, or they
4192 aren't, indicating failure. */
4193 HReg rResult = hregARM64_X1();
4194 switch (sz) {
4195 case 8: break;
4196 case 4: rResult = widen_z_32_to_64(env, rResult); break;
4197 case 2: rResult = widen_z_16_to_64(env, rResult); break;
4198 case 1: rResult = widen_z_8_to_64(env, rResult); break;
4199 default: vassert(0);
4201 // "old" in this case is interpreted somewhat liberally, per
4202 // the previous comment.
4203 HReg rOld = lookupIRTemp(env, cas->oldLo);
4204 addInstr(env, ARM64Instr_MovI(rOld, rResult));
4205 return;
4207 else {
4208 /* Paired register CAS, i.e. CASP */
4209 UChar sz;
4210 IRCAS* cas = stmt->Ist.CAS.details;
4211 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4212 switch (ty) {
4213 case Ity_I64: sz = 8; break;
4214 case Ity_I32: sz = 4; break;
4215 default: goto unhandled_cas;
4217 HReg rAddr = iselIntExpr_R(env, cas->addr);
4219 HReg rExpd0 = iselIntExpr_R(env, cas->expdLo);
4220 vassert(cas->expdHi != NULL);
4221 HReg rExpd1 = iselIntExpr_R(env, cas->expdHi);
4223 HReg rData0 = iselIntExpr_R(env, cas->dataLo);
4224 vassert(cas->dataHi != NULL);
4225 HReg rData1 = iselIntExpr_R(env, cas->dataHi);
4227 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rAddr));
4229 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rExpd0));
4230 addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd1));
4232 addInstr(env, ARM64Instr_MovI(hregARM64_X6(), rData0));
4233 addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData1));
4235 addInstr(env, ARM64Instr_CASP(sz));
4237 HReg rResult0 = hregARM64_X0();
4238 HReg rResult1 = hregARM64_X1();
4239 switch (sz) {
4240 case 8: break;
4241 case 4: rResult0 = widen_z_32_to_64(env, rResult0);
4242 rResult1 = widen_z_32_to_64(env, rResult1);
4243 break;
4244 default: vassert(0);
4246 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4247 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4248 addInstr(env, ARM64Instr_MovI(rOldLo, rResult0));
4249 addInstr(env, ARM64Instr_MovI(rOldHi, rResult1));
4250 return;
4252 unhandled_cas:
4253 break;
4256 /* --------- MEM FENCE --------- */
4257 case Ist_MBE:
4258 switch (stmt->Ist.MBE.event) {
4259 case Imbe_Fence:
4260 addInstr(env, ARM64Instr_MFence());
4261 return;
4262 case Imbe_CancelReservation:
4263 addInstr(env, ARM64Instr_ClrEX());
4264 return;
4265 default:
4266 break;
4268 break;
4270 /* --------- INSTR MARK --------- */
4271 /* Doesn't generate any executable code ... */
4272 case Ist_IMark:
4273 return;
4275 /* --------- ABI HINT --------- */
4276 /* These have no meaning (denotation in the IR) and so we ignore
4277 them ... if any actually made it this far. */
4278 case Ist_AbiHint:
4279 return;
4281 /* --------- NO-OP --------- */
4282 case Ist_NoOp:
4283 return;
4285 /* --------- EXIT --------- */
4286 case Ist_Exit: {
4287 if (stmt->Ist.Exit.dst->tag != Ico_U64)
4288 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
4290 ARM64CondCode cc
4291 = iselCondCode_C(env, stmt->Ist.Exit.guard);
4292 ARM64AMode* amPC
4293 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
4295 /* Case: boring transfer to known address */
4296 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4297 if (env->chainingAllowed) {
4298 /* .. almost always true .. */
4299 /* Skip the event check at the dst if this is a forwards
4300 edge. */
4301 Bool toFastEP
4302 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4303 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4304 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4305 amPC, cc, toFastEP));
4306 } else {
4307 /* .. very occasionally .. */
4308 /* We can't use chaining, so ask for an assisted transfer,
4309 as that's the only alternative that is allowable. */
4310 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4311 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
4313 return;
4316 /* Case: assisted transfer to arbitrary address */
4317 switch (stmt->Ist.Exit.jk) {
4318 /* Keep this list in sync with that for iselNext below */
4319 case Ijk_ClientReq:
4320 case Ijk_NoDecode:
4321 case Ijk_NoRedir:
4322 case Ijk_Sys_syscall:
4323 case Ijk_InvalICache:
4324 case Ijk_FlushDCache:
4325 case Ijk_SigTRAP:
4326 case Ijk_Yield: {
4327 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4328 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
4329 stmt->Ist.Exit.jk));
4330 return;
4332 default:
4333 break;
4336 /* Do we ever expect to see any other kind? */
4337 goto stmt_fail;
4340 default: break;
4342 stmt_fail:
4343 ppIRStmt(stmt);
4344 vpanic("iselStmt");
4348 /*---------------------------------------------------------*/
4349 /*--- ISEL: Basic block terminators (Nexts) ---*/
4350 /*---------------------------------------------------------*/
4352 static void iselNext ( ISelEnv* env,
4353 IRExpr* next, IRJumpKind jk, Int offsIP )
4355 if (vex_traceflags & VEX_TRACE_VCODE) {
4356 vex_printf( "\n-- PUT(%d) = ", offsIP);
4357 ppIRExpr( next );
4358 vex_printf( "; exit-");
4359 ppIRJumpKind(jk);
4360 vex_printf( "\n");
4363 /* Case: boring transfer to known address */
4364 if (next->tag == Iex_Const) {
4365 IRConst* cdst = next->Iex.Const.con;
4366 vassert(cdst->tag == Ico_U64);
4367 if (jk == Ijk_Boring || jk == Ijk_Call) {
4368 /* Boring transfer to known address */
4369 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4370 if (env->chainingAllowed) {
4371 /* .. almost always true .. */
4372 /* Skip the event check at the dst if this is a forwards
4373 edge. */
4374 Bool toFastEP
4375 = ((Addr64)cdst->Ico.U64) > env->max_ga;
4376 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4377 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
4378 amPC, ARM64cc_AL,
4379 toFastEP));
4380 } else {
4381 /* .. very occasionally .. */
4382 /* We can't use chaining, so ask for an assisted transfer,
4383 as that's the only alternative that is allowable. */
4384 HReg r = iselIntExpr_R(env, next);
4385 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4386 Ijk_Boring));
4388 return;
4392 /* Case: call/return (==boring) transfer to any address */
4393 switch (jk) {
4394 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4395 HReg r = iselIntExpr_R(env, next);
4396 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4397 if (env->chainingAllowed) {
4398 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
4399 } else {
4400 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4401 Ijk_Boring));
4403 return;
4405 default:
4406 break;
4409 /* Case: assisted transfer to arbitrary address */
4410 switch (jk) {
4411 /* Keep this list in sync with that for Ist_Exit above */
4412 case Ijk_ClientReq:
4413 case Ijk_NoDecode:
4414 case Ijk_NoRedir:
4415 case Ijk_Sys_syscall:
4416 case Ijk_InvalICache:
4417 case Ijk_FlushDCache:
4418 case Ijk_SigTRAP:
4419 case Ijk_Yield:
4421 HReg r = iselIntExpr_R(env, next);
4422 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4423 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
4424 return;
4426 default:
4427 break;
4430 vex_printf( "\n-- PUT(%d) = ", offsIP);
4431 ppIRExpr( next );
4432 vex_printf( "; exit-");
4433 ppIRJumpKind(jk);
4434 vex_printf( "\n");
4435 vassert(0); // are we expecting any other kind?
4439 /*---------------------------------------------------------*/
4440 /*--- Insn selector top-level ---*/
4441 /*---------------------------------------------------------*/
4443 /* Translate an entire SB to arm64 code. */
4445 HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4446 VexArch arch_host,
4447 const VexArchInfo* archinfo_host,
4448 const VexAbiInfo* vbi/*UNUSED*/,
4449 Int offs_Host_EvC_Counter,
4450 Int offs_Host_EvC_FailAddr,
4451 Bool chainingAllowed,
4452 Bool addProfInc,
4453 Addr max_ga )
4455 Int i, j;
4456 HReg hreg, hregHI;
4457 ISelEnv* env;
4458 UInt hwcaps_host = archinfo_host->hwcaps;
4459 ARM64AMode *amCounter, *amFailAddr;
4461 /* sanity ... */
4462 vassert(arch_host == VexArchARM64);
4464 /* Check that the host's endianness is as expected. */
4465 vassert(archinfo_host->endness == VexEndnessLE);
4467 /* guard against unexpected space regressions */
4468 vassert(sizeof(ARM64Instr) <= 32);
4470 /* Make up an initial environment to use. */
4471 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4472 env->vreg_ctr = 0;
4474 /* Set up output code array. */
4475 env->code = newHInstrArray();
4477 /* Copy BB's type env. */
4478 env->type_env = bb->tyenv;
4480 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4481 change as we go along. */
4482 env->n_vregmap = bb->tyenv->types_used;
4483 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4484 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4486 /* and finally ... */
4487 env->chainingAllowed = chainingAllowed;
4488 env->hwcaps = hwcaps_host;
4489 env->previous_rm = NULL;
4490 env->max_ga = max_ga;
4492 /* For each IR temporary, allocate a suitably-kinded virtual
4493 register. */
4494 j = 0;
4495 for (i = 0; i < env->n_vregmap; i++) {
4496 hregHI = hreg = INVALID_HREG;
4497 switch (bb->tyenv->types[i]) {
4498 case Ity_I1:
4499 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4500 hreg = mkHReg(True, HRcInt64, 0, j++);
4501 break;
4502 case Ity_I128:
4503 hreg = mkHReg(True, HRcInt64, 0, j++);
4504 hregHI = mkHReg(True, HRcInt64, 0, j++);
4505 break;
4506 case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4507 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4508 case Ity_F64:
4509 hreg = mkHReg(True, HRcFlt64, 0, j++);
4510 break;
4511 case Ity_V128:
4512 hreg = mkHReg(True, HRcVec128, 0, j++);
4513 break;
4514 case Ity_V256:
4515 hreg = mkHReg(True, HRcVec128, 0, j++);
4516 hregHI = mkHReg(True, HRcVec128, 0, j++);
4517 break;
4518 default:
4519 ppIRType(bb->tyenv->types[i]);
4520 vpanic("iselBB(arm64): IRTemp type");
4522 env->vregmap[i] = hreg;
4523 env->vregmapHI[i] = hregHI;
4525 env->vreg_ctr = j;
4527 /* The very first instruction must be an event check. */
4528 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4529 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4530 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4532 /* Possibly a block counter increment (for profiling). At this
4533 point we don't know the address of the counter, so just pretend
4534 it is zero. It will have to be patched later, but before this
4535 translation is used, by a call to LibVEX_patchProfCtr. */
4536 if (addProfInc) {
4537 addInstr(env, ARM64Instr_ProfInc());
4540 /* Ok, finally we can iterate over the statements. */
4541 for (i = 0; i < bb->stmts_used; i++)
4542 iselStmt(env, bb->stmts[i]);
4544 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4546 /* record the number of vregs we used. */
4547 env->code->n_vregs = env->vreg_ctr;
4548 return env->code;
4552 /*---------------------------------------------------------------*/
4553 /*--- end host_arm64_isel.c ---*/
4554 /*---------------------------------------------------------------*/