Bug 418702 - ARMv8.1 Paired register compare-and-swap instructions are not supported.
[valgrind.git] / VEX / priv / host_arm64_isel.c
blob2f19eab8146d8f3dae3afb61da02cae8850d8849
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_isel.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "libvex_basictypes.h"
30 #include "libvex_ir.h"
31 #include "libvex.h"
32 #include "ir_match.h"
34 #include "main_util.h"
35 #include "main_globals.h"
36 #include "host_generic_regs.h"
37 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
38 #include "host_arm64_defs.h"
41 /*---------------------------------------------------------*/
42 /*--- ISelEnv ---*/
43 /*---------------------------------------------------------*/
45 /* This carries around:
47 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
48 might encounter. This is computed before insn selection starts,
49 and does not change.
51 - A mapping from IRTemp to HReg. This tells the insn selector
52 which virtual register is associated with each IRTemp temporary.
53 This is computed before insn selection starts, and does not
54 change. We expect this mapping to map precisely the same set of
55 IRTemps as the type mapping does.
57 |vregmap| holds the primary register for the IRTemp.
58 |vregmapHI| is only used for 128-bit integer-typed
59 IRTemps. It holds the identity of a second
60 64-bit virtual HReg, which holds the high half
61 of the value.
63 - The code array, that is, the insns selected so far.
65 - A counter, for generating new virtual registers.
67 - The host hardware capabilities word. This is set at the start
68 and does not change.
70 - A Bool for indicating whether we may generate chain-me
71 instructions for control flow transfers, or whether we must use
72 XAssisted.
74 - The maximum guest address of any guest insn in this block.
75 Actually, the address of the highest-addressed byte from any insn
76 in this block. Is set at the start and does not change. This is
77 used for detecting jumps which are definitely forward-edges from
78 this block, and therefore can be made (chained) to the fast entry
79 point of the destination, thereby avoiding the destination's
80 event check.
82 - An IRExpr*, which may be NULL, holding the IR expression (an
83 IRRoundingMode-encoded value) to which the FPU's rounding mode
84 was most recently set. Setting to NULL is always safe. Used to
85 avoid redundant settings of the FPU's rounding mode, as
86 described in set_FPCR_rounding_mode below.
88 Note, this is all (well, mostly) host-independent.
91 typedef
92 struct {
93 /* Constant -- are set at the start and do not change. */
94 IRTypeEnv* type_env;
96 HReg* vregmap;
97 HReg* vregmapHI;
98 Int n_vregmap;
100 UInt hwcaps;
102 Bool chainingAllowed;
103 Addr64 max_ga;
105 /* These are modified as we go along. */
106 HInstrArray* code;
107 Int vreg_ctr;
109 IRExpr* previous_rm;
111 ISelEnv;
113 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
115 vassert(tmp >= 0);
116 vassert(tmp < env->n_vregmap);
117 return env->vregmap[tmp];
120 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
121 ISelEnv* env, IRTemp tmp )
123 vassert(tmp >= 0);
124 vassert(tmp < env->n_vregmap);
125 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
126 *vrLO = env->vregmap[tmp];
127 *vrHI = env->vregmapHI[tmp];
130 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
132 addHInstr(env->code, instr);
133 if (vex_traceflags & VEX_TRACE_VCODE) {
134 ppARM64Instr(instr);
135 vex_printf("\n");
139 static HReg newVRegI ( ISelEnv* env )
141 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
142 env->vreg_ctr++;
143 return reg;
146 static HReg newVRegD ( ISelEnv* env )
148 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
149 env->vreg_ctr++;
150 return reg;
153 static HReg newVRegV ( ISelEnv* env )
155 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
156 env->vreg_ctr++;
157 return reg;
161 /*---------------------------------------------------------*/
162 /*--- ISEL: Forward declarations ---*/
163 /*---------------------------------------------------------*/
165 /* These are organised as iselXXX and iselXXX_wrk pairs. The
166 iselXXX_wrk do the real work, but are not to be called directly.
167 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
168 checks that all returned registers are virtual. You should not
169 call the _wrk version directly.
171 Because some forms of ARM64 memory amodes are implicitly scaled by
172 the access size, iselIntExpr_AMode takes an IRType which tells it
173 the type of the access for which the amode is to be used. This
174 type needs to be correct, else you'll get incorrect code.
176 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
177 IRExpr* e, IRType dty );
178 static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
179 IRExpr* e, IRType dty );
181 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
182 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
184 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
185 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
187 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
188 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
190 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
191 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
193 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
194 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
196 static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
197 ISelEnv* env, IRExpr* e );
198 static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
199 ISelEnv* env, IRExpr* e );
201 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
202 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
204 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
205 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
207 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e );
208 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e );
210 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
211 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
213 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
214 ISelEnv* env, IRExpr* e );
215 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
216 ISelEnv* env, IRExpr* e );
218 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
221 /*---------------------------------------------------------*/
222 /*--- ISEL: Misc helpers ---*/
223 /*---------------------------------------------------------*/
225 /* Generate an amode suitable for a 64-bit sized access relative to
226 the baseblock register (X21). This generates an RI12 amode, which
227 means its scaled by the access size, which is why the access size
228 -- 64 bit -- is stated explicitly here. Consequently |off| needs
229 to be divisible by 8. */
230 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
232 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
233 vassert((off & 7) == 0); /* ditto */
234 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
237 /* Ditto, for 32 bit accesses. */
238 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
240 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
241 vassert((off & 3) == 0); /* ditto */
242 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
245 /* Ditto, for 16 bit accesses. */
246 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
248 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
249 vassert((off & 1) == 0); /* ditto */
250 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
253 /* Ditto, for 8 bit accesses. */
254 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
256 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
257 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
260 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
262 vassert(off < (1<<12));
263 HReg r = newVRegI(env);
264 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
265 ARM64RIA_I12(off,0), True/*isAdd*/));
266 return r;
269 static HReg get_baseblock_register ( void )
271 return hregARM64_X21();
274 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
275 a new register, and return the new register. */
276 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
278 HReg dst = newVRegI(env);
279 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
280 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
281 return dst;
284 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
285 a new register, and return the new register. */
286 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
288 HReg dst = newVRegI(env);
289 ARM64RI6* n48 = ARM64RI6_I6(48);
290 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
291 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
292 return dst;
295 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
296 a new register, and return the new register. */
297 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
299 HReg dst = newVRegI(env);
300 ARM64RIL* mask = ARM64RIL_I13(1, 0, 15); /* encodes 0xFFFF */
301 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
302 return dst;
305 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
306 a new register, and return the new register. */
307 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
309 HReg dst = newVRegI(env);
310 ARM64RI6* n32 = ARM64RI6_I6(32);
311 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
312 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
313 return dst;
316 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
317 a new register, and return the new register. */
318 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
320 HReg dst = newVRegI(env);
321 ARM64RI6* n56 = ARM64RI6_I6(56);
322 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
323 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
324 return dst;
327 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
329 HReg dst = newVRegI(env);
330 ARM64RIL* mask = ARM64RIL_I13(1, 0, 7); /* encodes 0xFF */
331 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
332 return dst;
335 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
336 static Bool isZeroU64 ( IRExpr* e ) {
337 if (e->tag != Iex_Const) return False;
338 IRConst* con = e->Iex.Const.con;
339 vassert(con->tag == Ico_U64);
340 return con->Ico.U64 == 0;
344 /*---------------------------------------------------------*/
345 /*--- ISEL: FP rounding mode helpers ---*/
346 /*---------------------------------------------------------*/
348 /* Set the FP rounding mode: 'mode' is an I32-typed expression
349 denoting a value in the range 0 .. 3, indicating a round mode
350 encoded as per type IRRoundingMode -- the first four values only
351 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64
352 FSCR to have the same rounding.
354 For speed & simplicity, we're setting the *entire* FPCR here.
356 Setting the rounding mode is expensive. So this function tries to
357 avoid repeatedly setting the rounding mode to the same thing by
358 first comparing 'mode' to the 'mode' tree supplied in the previous
359 call to this function, if any. (The previous value is stored in
360 env->previous_rm.) If 'mode' is a single IR temporary 't' and
361 env->previous_rm is also just 't', then the setting is skipped.
363 This is safe because of the SSA property of IR: an IR temporary can
364 only be defined once and so will have the same value regardless of
365 where it appears in the block. Cool stuff, SSA.
367 A safety condition: all attempts to set the RM must be aware of
368 this mechanism - by being routed through the functions here.
370 Of course this only helps if blocks where the RM is set more than
371 once and it is set to the same value each time, *and* that value is
372 held in the same IR temporary each time. In order to assure the
373 latter as much as possible, the IR optimiser takes care to do CSE
374 on any block with any sign of floating point activity.
376 static
377 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
379 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
381 /* Do we need to do anything? */
382 if (env->previous_rm
383 && env->previous_rm->tag == Iex_RdTmp
384 && mode->tag == Iex_RdTmp
385 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
386 /* no - setting it to what it was before. */
387 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
388 return;
391 /* No luck - we better set it, and remember what we set it to. */
392 env->previous_rm = mode;
394 /* Only supporting the rounding-mode bits - the rest of FPCR is set
395 to zero - so we can set the whole register at once (faster). */
397 /* This isn't simple, because 'mode' carries an IR rounding
398 encoding, and we need to translate that to an ARM64 FP one:
399 The IR encoding:
400 00 to nearest (the default)
401 10 to +infinity
402 01 to -infinity
403 11 to zero
404 The ARM64 FP encoding:
405 00 to nearest
406 01 to +infinity
407 10 to -infinity
408 11 to zero
409 Easy enough to do; just swap the two bits.
411 HReg irrm = iselIntExpr_R(env, mode);
412 HReg tL = newVRegI(env);
413 HReg tR = newVRegI(env);
414 HReg t3 = newVRegI(env);
415 /* tL = irrm << 1;
416 tR = irrm >> 1; if we're lucky, these will issue together
417 tL &= 2;
418 tR &= 1; ditto
419 t3 = tL | tR;
420 t3 <<= 22;
421 fmxr fpscr, t3
423 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
424 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
425 vassert(ril_one && ril_two);
426 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
427 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
428 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
429 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
430 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
431 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
432 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
436 /*---------------------------------------------------------*/
437 /*--- ISEL: Function call helpers ---*/
438 /*---------------------------------------------------------*/
440 /* Used only in doHelperCall. See big comment in doHelperCall re
441 handling of register-parameter args. This function figures out
442 whether evaluation of an expression might require use of a fixed
443 register. If in doubt return True (safe but suboptimal).
445 static
446 Bool mightRequireFixedRegs ( IRExpr* e )
448 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
449 // These are always "safe" -- either a copy of SP in some
450 // arbitrary vreg, or a copy of x21, respectively.
451 return False;
453 /* Else it's a "normal" expression. */
454 switch (e->tag) {
455 case Iex_RdTmp: case Iex_Const: case Iex_Get:
456 return False;
457 default:
458 return True;
463 /* Do a complete function call. |guard| is a Ity_Bit expression
464 indicating whether or not the call happens. If guard==NULL, the
465 call is unconditional. |retloc| is set to indicate where the
466 return value is after the call. The caller (of this fn) must
467 generate code to add |stackAdjustAfterCall| to the stack pointer
468 after the call is done. Returns True iff it managed to handle this
469 combination of arg/return types, else returns False. */
471 static
472 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
473 /*OUT*/RetLoc* retloc,
474 ISelEnv* env,
475 IRExpr* guard,
476 IRCallee* cee, IRType retTy, IRExpr** args )
478 ARM64CondCode cc;
479 HReg argregs[ARM64_N_ARGREGS];
480 HReg tmpregs[ARM64_N_ARGREGS];
481 Bool go_fast;
482 Int n_args, i, nextArgReg;
483 Addr64 target;
485 vassert(ARM64_N_ARGREGS == 8);
487 /* Set default returns. We'll update them later if needed. */
488 *stackAdjustAfterCall = 0;
489 *retloc = mk_RetLoc_INVALID();
491 /* These are used for cross-checking that IR-level constraints on
492 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
493 UInt nVECRETs = 0;
494 UInt nGSPTRs = 0;
496 /* Marshal args for a call and do the call.
498 This function only deals with a tiny set of possibilities, which
499 cover all helpers in practice. The restrictions are that only
500 arguments in registers are supported, hence only
501 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
502 fact the only supported arg type is I64.
504 The return type can be I{64,32} or V128. In the V128 case, it
505 is expected that |args| will contain the special node
506 IRExpr_VECRET(), in which case this routine generates code to
507 allocate space on the stack for the vector return value. Since
508 we are not passing any scalars on the stack, it is enough to
509 preallocate the return space before marshalling any arguments,
510 in this case.
512 |args| may also contain IRExpr_GSPTR(), in which case the
513 value in x21 is passed as the corresponding argument.
515 Generating code which is both efficient and correct when
516 parameters are to be passed in registers is difficult, for the
517 reasons elaborated in detail in comments attached to
518 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
519 of the method described in those comments.
521 The problem is split into two cases: the fast scheme and the
522 slow scheme. In the fast scheme, arguments are computed
523 directly into the target (real) registers. This is only safe
524 when we can be sure that computation of each argument will not
525 trash any real registers set by computation of any other
526 argument.
528 In the slow scheme, all args are first computed into vregs, and
529 once they are all done, they are moved to the relevant real
530 regs. This always gives correct code, but it also gives a bunch
531 of vreg-to-rreg moves which are usually redundant but are hard
532 for the register allocator to get rid of.
534 To decide which scheme to use, all argument expressions are
535 first examined. If they are all so simple that it is clear they
536 will be evaluated without use of any fixed registers, use the
537 fast scheme, else use the slow scheme. Note also that only
538 unconditional calls may use the fast scheme, since having to
539 compute a condition expression could itself trash real
540 registers.
542 Note this requires being able to examine an expression and
543 determine whether or not evaluation of it might use a fixed
544 register. That requires knowledge of how the rest of this insn
545 selector works. Currently just the following 3 are regarded as
546 safe -- hopefully they cover the majority of arguments in
547 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
550 /* Note that the cee->regparms field is meaningless on ARM64 hosts
551 (since there is only one calling convention) and so we always
552 ignore it. */
554 n_args = 0;
555 for (i = 0; args[i]; i++) {
556 IRExpr* arg = args[i];
557 if (UNLIKELY(arg->tag == Iex_VECRET)) {
558 nVECRETs++;
559 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
560 nGSPTRs++;
562 n_args++;
565 /* If this fails, the IR is ill-formed */
566 vassert(nGSPTRs == 0 || nGSPTRs == 1);
568 /* If we have a VECRET, allocate space on the stack for the return
569 value, and record the stack pointer after that. */
570 HReg r_vecRetAddr = INVALID_HREG;
571 if (nVECRETs == 1) {
572 vassert(retTy == Ity_V128 || retTy == Ity_V256);
573 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
574 r_vecRetAddr = newVRegI(env);
575 addInstr(env, ARM64Instr_AddToSP(-16));
576 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
577 } else {
578 // If either of these fail, the IR is ill-formed
579 vassert(retTy != Ity_V128 && retTy != Ity_V256);
580 vassert(nVECRETs == 0);
583 argregs[0] = hregARM64_X0();
584 argregs[1] = hregARM64_X1();
585 argregs[2] = hregARM64_X2();
586 argregs[3] = hregARM64_X3();
587 argregs[4] = hregARM64_X4();
588 argregs[5] = hregARM64_X5();
589 argregs[6] = hregARM64_X6();
590 argregs[7] = hregARM64_X7();
592 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
593 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
595 /* First decide which scheme (slow or fast) is to be used. First
596 assume the fast scheme, and select slow if any contraindications
597 (wow) appear. */
599 go_fast = True;
601 if (guard) {
602 if (guard->tag == Iex_Const
603 && guard->Iex.Const.con->tag == Ico_U1
604 && guard->Iex.Const.con->Ico.U1 == True) {
605 /* unconditional */
606 } else {
607 /* Not manifestly unconditional -- be conservative. */
608 go_fast = False;
612 if (go_fast) {
613 for (i = 0; i < n_args; i++) {
614 if (mightRequireFixedRegs(args[i])) {
615 go_fast = False;
616 break;
621 if (go_fast) {
622 if (retTy == Ity_V128 || retTy == Ity_V256)
623 go_fast = False;
626 /* At this point the scheme to use has been established. Generate
627 code to get the arg values into the argument rregs. If we run
628 out of arg regs, give up. */
630 if (go_fast) {
632 /* FAST SCHEME */
633 nextArgReg = 0;
635 for (i = 0; i < n_args; i++) {
636 IRExpr* arg = args[i];
638 IRType aTy = Ity_INVALID;
639 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
640 aTy = typeOfIRExpr(env->type_env, args[i]);
642 if (nextArgReg >= ARM64_N_ARGREGS)
643 return False; /* out of argregs */
645 if (aTy == Ity_I64) {
646 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
647 iselIntExpr_R(env, args[i]) ));
648 nextArgReg++;
650 else if (arg->tag == Iex_GSPTR) {
651 vassert(0); //ATC
652 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
653 hregARM64_X21() ));
654 nextArgReg++;
656 else if (arg->tag == Iex_VECRET) {
657 // because of the go_fast logic above, we can't get here,
658 // since vector return values makes us use the slow path
659 // instead.
660 vassert(0);
662 else
663 return False; /* unhandled arg type */
666 /* Fast scheme only applies for unconditional calls. Hence: */
667 cc = ARM64cc_AL;
669 } else {
671 /* SLOW SCHEME; move via temporaries */
672 nextArgReg = 0;
674 for (i = 0; i < n_args; i++) {
675 IRExpr* arg = args[i];
677 IRType aTy = Ity_INVALID;
678 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
679 aTy = typeOfIRExpr(env->type_env, args[i]);
681 if (nextArgReg >= ARM64_N_ARGREGS)
682 return False; /* out of argregs */
684 if (aTy == Ity_I64) {
685 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
686 nextArgReg++;
688 else if (arg->tag == Iex_GSPTR) {
689 vassert(0); //ATC
690 tmpregs[nextArgReg] = hregARM64_X21();
691 nextArgReg++;
693 else if (arg->tag == Iex_VECRET) {
694 vassert(!hregIsInvalid(r_vecRetAddr));
695 tmpregs[nextArgReg] = r_vecRetAddr;
696 nextArgReg++;
698 else
699 return False; /* unhandled arg type */
702 /* Now we can compute the condition. We can't do it earlier
703 because the argument computations could trash the condition
704 codes. Be a bit clever to handle the common case where the
705 guard is 1:Bit. */
706 cc = ARM64cc_AL;
707 if (guard) {
708 if (guard->tag == Iex_Const
709 && guard->Iex.Const.con->tag == Ico_U1
710 && guard->Iex.Const.con->Ico.U1 == True) {
711 /* unconditional -- do nothing */
712 } else {
713 cc = iselCondCode( env, guard );
717 /* Move the args to their final destinations. */
718 for (i = 0; i < nextArgReg; i++) {
719 vassert(!(hregIsInvalid(tmpregs[i])));
720 /* None of these insns, including any spill code that might
721 be generated, may alter the condition codes. */
722 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
727 /* Should be assured by checks above */
728 vassert(nextArgReg <= ARM64_N_ARGREGS);
730 /* Do final checks, set the return values, and generate the call
731 instruction proper. */
732 vassert(nGSPTRs == 0 || nGSPTRs == 1);
733 vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
734 vassert(*stackAdjustAfterCall == 0);
735 vassert(is_RetLoc_INVALID(*retloc));
736 switch (retTy) {
737 case Ity_INVALID:
738 /* Function doesn't return a value. */
739 *retloc = mk_RetLoc_simple(RLPri_None);
740 break;
741 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
742 *retloc = mk_RetLoc_simple(RLPri_Int);
743 break;
744 case Ity_V128:
745 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
746 *stackAdjustAfterCall = 16;
747 break;
748 case Ity_V256:
749 vassert(0); // ATC
750 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
751 *stackAdjustAfterCall = 32;
752 break;
753 default:
754 /* IR can denote other possible return types, but we don't
755 handle those here. */
756 vassert(0);
759 /* Finally, generate the call itself. This needs the *retloc value
760 set in the switch above, which is why it's at the end. */
762 /* nextArgReg doles out argument registers. Since these are
763 assigned in the order x0 .. x7, its numeric value at this point,
764 which must be between 0 and 8 inclusive, is going to be equal to
765 the number of arg regs in use for the call. Hence bake that
766 number into the call (we'll need to know it when doing register
767 allocation, to know what regs the call reads.) */
769 target = (Addr)cee->addr;
770 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
772 return True; /* success */
776 /*---------------------------------------------------------*/
777 /*--- ISEL: Integer expressions (64/32 bit) ---*/
778 /*---------------------------------------------------------*/
780 /* Select insns for an integer-typed expression, and add them to the
781 code list. Return a reg holding the result. This reg will be a
782 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
783 want to modify it, ask for a new vreg, copy it in there, and modify
784 the copy. The register allocator will do its best to map both
785 vregs to the same real register, so the copies will often disappear
786 later in the game.
788 This should handle expressions of 64- and 32-bit type. All results
789 are returned in a 64-bit register. For 32-bit expressions, the
790 upper 32 bits are arbitrary, so you should mask or sign extend
791 partial values if necessary.
794 /* --------------------- AMode --------------------- */
796 /* Return an AMode which computes the value of the specified
797 expression, possibly also adding insns to the code list as a
798 result. The expression may only be a 64-bit one.
801 static Bool isValidScale ( UChar scale )
803 switch (scale) {
804 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
805 default: return False;
809 static Bool sane_AMode ( ARM64AMode* am )
811 switch (am->tag) {
812 case ARM64am_RI9:
813 return
814 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
815 && (hregIsVirtual(am->ARM64am.RI9.reg)
816 /* || sameHReg(am->ARM64am.RI9.reg,
817 hregARM64_X21()) */ )
818 && am->ARM64am.RI9.simm9 >= -256
819 && am->ARM64am.RI9.simm9 <= 255 );
820 case ARM64am_RI12:
821 return
822 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
823 && (hregIsVirtual(am->ARM64am.RI12.reg)
824 /* || sameHReg(am->ARM64am.RI12.reg,
825 hregARM64_X21()) */ )
826 && am->ARM64am.RI12.uimm12 < 4096
827 && isValidScale(am->ARM64am.RI12.szB) );
828 case ARM64am_RR:
829 return
830 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
831 && hregIsVirtual(am->ARM64am.RR.base)
832 && hregClass(am->ARM64am.RR.index) == HRcInt64
833 && hregIsVirtual(am->ARM64am.RR.index) );
834 default:
835 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
839 static
840 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
842 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
843 vassert(sane_AMode(am));
844 return am;
847 static
848 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
850 IRType ty = typeOfIRExpr(env->type_env,e);
851 vassert(ty == Ity_I64);
853 ULong szBbits = 0;
854 switch (dty) {
855 case Ity_I64: szBbits = 3; break;
856 case Ity_I32: szBbits = 2; break;
857 case Ity_I16: szBbits = 1; break;
858 case Ity_I8: szBbits = 0; break;
859 default: vassert(0);
862 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
863 we're going to create an amode suitable for LDU* or STU*
864 instructions, which use unscaled immediate offsets. */
865 if (e->tag == Iex_Binop
866 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
867 && e->Iex.Binop.arg2->tag == Iex_Const
868 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
869 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
870 if (simm >= -255 && simm <= 255) {
871 /* Although the gating condition might seem to be
872 simm >= -256 && simm <= 255
873 we will need to negate simm in the case where the op is Sub64.
874 Hence limit the lower value to -255 in order that its negation
875 is representable. */
876 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
877 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
878 return ARM64AMode_RI9(reg, (Int)simm);
882 /* Add64(expr, uimm12 * transfer-size) */
883 if (e->tag == Iex_Binop
884 && e->Iex.Binop.op == Iop_Add64
885 && e->Iex.Binop.arg2->tag == Iex_Const
886 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
887 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
888 ULong szB = 1 << szBbits;
889 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
890 && (uimm >> szBbits) < 4096) {
891 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
892 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
896 /* Add64(expr1, expr2) */
897 if (e->tag == Iex_Binop
898 && e->Iex.Binop.op == Iop_Add64) {
899 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
900 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
901 return ARM64AMode_RR(reg1, reg2);
904 /* Doesn't match anything in particular. Generate it into
905 a register and use that. */
906 HReg reg = iselIntExpr_R(env, e);
907 return ARM64AMode_RI9(reg, 0);
911 /* --------------------- RIA --------------------- */
913 /* Select instructions to generate 'e' into a RIA. */
915 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
917 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
918 /* sanity checks ... */
919 switch (ri->tag) {
920 case ARM64riA_I12:
921 vassert(ri->ARM64riA.I12.imm12 < 4096);
922 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
923 return ri;
924 case ARM64riA_R:
925 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
926 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
927 return ri;
928 default:
929 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
933 /* DO NOT CALL THIS DIRECTLY ! */
934 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
936 IRType ty = typeOfIRExpr(env->type_env,e);
937 vassert(ty == Ity_I64 || ty == Ity_I32);
939 /* special case: immediate */
940 if (e->tag == Iex_Const) {
941 ULong u = 0xF000000ULL; /* invalid */
942 switch (e->Iex.Const.con->tag) {
943 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
944 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
945 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
947 if (0 == (u & ~(0xFFFULL << 0)))
948 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
949 if (0 == (u & ~(0xFFFULL << 12)))
950 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
951 /* else fail, fall through to default case */
954 /* default case: calculate into a register and return that */
956 HReg r = iselIntExpr_R ( env, e );
957 return ARM64RIA_R(r);
962 /* --------------------- RIL --------------------- */
964 /* Select instructions to generate 'e' into a RIL. At this point we
965 have to deal with the strange bitfield-immediate encoding for logic
966 instructions. */
969 // The following four functions
970 // CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
971 // are copied, with modifications, from
972 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
973 // which has the following copyright notice:
975 Copyright 2013, ARM Limited
976 All rights reserved.
978 Redistribution and use in source and binary forms, with or without
979 modification, are permitted provided that the following conditions are met:
981 * Redistributions of source code must retain the above copyright notice,
982 this list of conditions and the following disclaimer.
983 * Redistributions in binary form must reproduce the above copyright notice,
984 this list of conditions and the following disclaimer in the documentation
985 and/or other materials provided with the distribution.
986 * Neither the name of ARM Limited nor the names of its contributors may be
987 used to endorse or promote products derived from this software without
988 specific prior written permission.
990 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
991 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
992 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
993 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
994 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
995 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
996 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
997 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
998 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
999 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1002 static Int CountLeadingZeros(ULong value, Int width)
1004 vassert(width == 32 || width == 64);
1005 Int count = 0;
1006 ULong bit_test = 1ULL << (width - 1);
1007 while ((count < width) && ((bit_test & value) == 0)) {
1008 count++;
1009 bit_test >>= 1;
1011 return count;
1014 static Int CountTrailingZeros(ULong value, Int width)
1016 vassert(width == 32 || width == 64);
1017 Int count = 0;
1018 while ((count < width) && (((value >> count) & 1) == 0)) {
1019 count++;
1021 return count;
1024 static Int CountSetBits(ULong value, Int width)
1026 // TODO: Other widths could be added here, as the implementation already
1027 // supports them.
1028 vassert(width == 32 || width == 64);
1030 // Mask out unused bits to ensure that they are not counted.
1031 value &= (0xffffffffffffffffULL >> (64-width));
1033 // Add up the set bits.
1034 // The algorithm works by adding pairs of bit fields together iteratively,
1035 // where the size of each bit field doubles each time.
1036 // An example for an 8-bit value:
1037 // Bits: h g f e d c b a
1038 // \ | \ | \ | \ |
1039 // value = h+g f+e d+c b+a
1040 // \ | \ |
1041 // value = h+g+f+e d+c+b+a
1042 // \ |
1043 // value = h+g+f+e+d+c+b+a
1044 value = ((value >> 1) & 0x5555555555555555ULL)
1045 + (value & 0x5555555555555555ULL);
1046 value = ((value >> 2) & 0x3333333333333333ULL)
1047 + (value & 0x3333333333333333ULL);
1048 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL)
1049 + (value & 0x0f0f0f0f0f0f0f0fULL);
1050 value = ((value >> 8) & 0x00ff00ff00ff00ffULL)
1051 + (value & 0x00ff00ff00ff00ffULL);
1052 value = ((value >> 16) & 0x0000ffff0000ffffULL)
1053 + (value & 0x0000ffff0000ffffULL);
1054 value = ((value >> 32) & 0x00000000ffffffffULL)
1055 + (value & 0x00000000ffffffffULL);
1057 return value;
1060 static Bool isImmLogical ( /*OUT*/UInt* n,
1061 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1062 ULong value, UInt width )
1064 // Test if a given value can be encoded in the immediate field of a
1065 // logical instruction.
1067 // If it can be encoded, the function returns true, and values
1068 // pointed to by n, imm_s and imm_r are updated with immediates
1069 // encoded in the format required by the corresponding fields in the
1070 // logical instruction. If it can not be encoded, the function
1071 // returns false, and the values pointed to by n, imm_s and imm_r
1072 // are undefined.
1073 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1074 vassert(width == 32 || width == 64);
1076 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1077 // the following table:
1079 // N imms immr size S R
1080 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1081 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1082 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1083 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1084 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1085 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1086 // (s bits must not be all set)
1088 // A pattern is constructed of size bits, where the least significant S+1
1089 // bits are set. The pattern is rotated right by R, and repeated across a
1090 // 32 or 64-bit value, depending on destination register width.
1092 // To test if an arbitrary immediate can be encoded using this scheme, an
1093 // iterative algorithm is used.
1095 // TODO: This code does not consider using X/W register overlap to support
1096 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1097 // are an encodable logical immediate.
1099 // 1. If the value has all set or all clear bits, it can't be encoded.
1100 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1101 ((width == 32) && (value == 0xffffffff))) {
1102 return False;
1105 UInt lead_zero = CountLeadingZeros(value, width);
1106 UInt lead_one = CountLeadingZeros(~value, width);
1107 UInt trail_zero = CountTrailingZeros(value, width);
1108 UInt trail_one = CountTrailingZeros(~value, width);
1109 UInt set_bits = CountSetBits(value, width);
1111 // The fixed bits in the immediate s field.
1112 // If width == 64 (X reg), start at 0xFFFFFF80.
1113 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1114 // widths won't be executed.
1115 Int imm_s_fixed = (width == 64) ? -128 : -64;
1116 Int imm_s_mask = 0x3F;
1118 for (;;) {
1119 // 2. If the value is two bits wide, it can be encoded.
1120 if (width == 2) {
1121 *n = 0;
1122 *imm_s = 0x3C;
1123 *imm_r = (value & 3) - 1;
1124 return True;
1127 *n = (width == 64) ? 1 : 0;
1128 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1129 if ((lead_zero + set_bits) == width) {
1130 *imm_r = 0;
1131 } else {
1132 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1135 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1136 // the bit width of the value, it can be encoded.
1137 if (lead_zero + trail_zero + set_bits == width) {
1138 return True;
1141 // 4. If the sum of leading ones, trailing ones and unset bits in the
1142 // value is equal to the bit width of the value, it can be encoded.
1143 if (lead_one + trail_one + (width - set_bits) == width) {
1144 return True;
1147 // 5. If the most-significant half of the bitwise value is equal to the
1148 // least-significant half, return to step 2 using the least-significant
1149 // half of the value.
1150 ULong mask = (1ULL << (width >> 1)) - 1;
1151 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1152 width >>= 1;
1153 set_bits >>= 1;
1154 imm_s_fixed >>= 1;
1155 continue;
1158 // 6. Otherwise, the value can't be encoded.
1159 return False;
1164 /* Create a RIL for the given immediate, if it is representable, or
1165 return NULL if not. */
1167 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1169 UInt n = 0, imm_s = 0, imm_r = 0;
1170 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1171 if (!ok) return NULL;
1172 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1173 return ARM64RIL_I13(n, imm_r, imm_s);
1176 /* So, finally .. */
1178 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1180 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1181 /* sanity checks ... */
1182 switch (ri->tag) {
1183 case ARM64riL_I13:
1184 vassert(ri->ARM64riL.I13.bitN < 2);
1185 vassert(ri->ARM64riL.I13.immR < 64);
1186 vassert(ri->ARM64riL.I13.immS < 64);
1187 return ri;
1188 case ARM64riL_R:
1189 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1190 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1191 return ri;
1192 default:
1193 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1197 /* DO NOT CALL THIS DIRECTLY ! */
1198 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1200 IRType ty = typeOfIRExpr(env->type_env,e);
1201 vassert(ty == Ity_I64 || ty == Ity_I32);
1203 /* special case: immediate */
1204 if (e->tag == Iex_Const) {
1205 ARM64RIL* maybe = NULL;
1206 if (ty == Ity_I64) {
1207 vassert(e->Iex.Const.con->tag == Ico_U64);
1208 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1209 } else {
1210 vassert(ty == Ity_I32);
1211 vassert(e->Iex.Const.con->tag == Ico_U32);
1212 UInt u32 = e->Iex.Const.con->Ico.U32;
1213 ULong u64 = (ULong)u32;
1214 /* First try with 32 leading zeroes. */
1215 maybe = mb_mkARM64RIL_I(u64);
1216 /* If that doesn't work, try with 2 copies, since it doesn't
1217 matter what winds up in the upper 32 bits. */
1218 if (!maybe) {
1219 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1222 if (maybe) return maybe;
1223 /* else fail, fall through to default case */
1226 /* default case: calculate into a register and return that */
1228 HReg r = iselIntExpr_R ( env, e );
1229 return ARM64RIL_R(r);
1234 /* --------------------- RI6 --------------------- */
1236 /* Select instructions to generate 'e' into a RI6. */
1238 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1240 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1241 /* sanity checks ... */
1242 switch (ri->tag) {
1243 case ARM64ri6_I6:
1244 vassert(ri->ARM64ri6.I6.imm6 < 64);
1245 vassert(ri->ARM64ri6.I6.imm6 > 0);
1246 return ri;
1247 case ARM64ri6_R:
1248 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1249 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1250 return ri;
1251 default:
1252 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1256 /* DO NOT CALL THIS DIRECTLY ! */
1257 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1259 IRType ty = typeOfIRExpr(env->type_env,e);
1260 vassert(ty == Ity_I64 || ty == Ity_I8);
1262 /* special case: immediate */
1263 if (e->tag == Iex_Const) {
1264 switch (e->Iex.Const.con->tag) {
1265 case Ico_U8: {
1266 UInt u = e->Iex.Const.con->Ico.U8;
1267 if (u > 0 && u < 64)
1268 return ARM64RI6_I6(u);
1269 break;
1270 default:
1271 break;
1274 /* else fail, fall through to default case */
1277 /* default case: calculate into a register and return that */
1279 HReg r = iselIntExpr_R ( env, e );
1280 return ARM64RI6_R(r);
1285 /* ------------------- CondCode ------------------- */
1287 /* Generate code to evaluated a bit-typed expression, returning the
1288 condition code which would correspond when the expression would
1289 notionally have returned 1. */
1291 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1293 ARM64CondCode cc = iselCondCode_wrk(env,e);
1294 vassert(cc != ARM64cc_NV);
1295 return cc;
1298 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1300 vassert(e);
1301 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1303 /* var */
1304 if (e->tag == Iex_RdTmp) {
1305 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1306 /* Cmp doesn't modify rTmp; so this is OK. */
1307 ARM64RIL* one = mb_mkARM64RIL_I(1);
1308 vassert(one);
1309 addInstr(env, ARM64Instr_Test(rTmp, one));
1310 return ARM64cc_NE;
1313 /* Constant 1:Bit */
1314 if (e->tag == Iex_Const) {
1315 /* This is a very stupid translation. Hopefully it doesn't occur much,
1316 if ever. */
1317 vassert(e->Iex.Const.con->tag == Ico_U1);
1318 vassert(e->Iex.Const.con->Ico.U1 == True
1319 || e->Iex.Const.con->Ico.U1 == False);
1320 HReg rTmp = newVRegI(env);
1321 addInstr(env, ARM64Instr_Imm64(rTmp, 0));
1322 ARM64RIL* one = mb_mkARM64RIL_I(1);
1323 vassert(one);
1324 addInstr(env, ARM64Instr_Test(rTmp, one));
1325 return e->Iex.Const.con->Ico.U1 ? ARM64cc_EQ : ARM64cc_NE;
1328 /* Not1(e) */
1329 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1330 /* Generate code for the arg, and negate the test condition */
1331 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1332 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1333 return ARM64cc_AL;
1334 } else {
1335 return 1 ^ cc;
1339 /* --- patterns rooted at: 64to1 --- */
1341 if (e->tag == Iex_Unop
1342 && e->Iex.Unop.op == Iop_64to1) {
1343 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1344 ARM64RIL* one = mb_mkARM64RIL_I(1);
1345 vassert(one); /* '1' must be representable */
1346 addInstr(env, ARM64Instr_Test(rTmp, one));
1347 return ARM64cc_NE;
1350 /* --- patterns rooted at: CmpNEZ8 --- */
1352 if (e->tag == Iex_Unop
1353 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1354 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1355 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1356 addInstr(env, ARM64Instr_Test(r1, xFF));
1357 return ARM64cc_NE;
1360 /* --- patterns rooted at: CmpNEZ16 --- */
1362 if (e->tag == Iex_Unop
1363 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1364 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1365 ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1366 addInstr(env, ARM64Instr_Test(r1, xFFFF));
1367 return ARM64cc_NE;
1370 /* --- patterns rooted at: CmpNEZ64 --- */
1372 if (e->tag == Iex_Unop
1373 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1374 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1375 ARM64RIA* zero = ARM64RIA_I12(0,0);
1376 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1377 return ARM64cc_NE;
1380 /* --- patterns rooted at: CmpNEZ32 --- */
1382 if (e->tag == Iex_Unop
1383 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1384 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1385 ARM64RIA* zero = ARM64RIA_I12(0,0);
1386 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1387 return ARM64cc_NE;
1390 /* --- Cmp*64*(x,y) --- */
1391 if (e->tag == Iex_Binop
1392 && (e->Iex.Binop.op == Iop_CmpEQ64
1393 || e->Iex.Binop.op == Iop_CmpNE64
1394 || e->Iex.Binop.op == Iop_CmpLT64S
1395 || e->Iex.Binop.op == Iop_CmpLT64U
1396 || e->Iex.Binop.op == Iop_CmpLE64S
1397 || e->Iex.Binop.op == Iop_CmpLE64U
1398 || e->Iex.Binop.op == Iop_CasCmpEQ64
1399 || e->Iex.Binop.op == Iop_CasCmpNE64)) {
1400 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1401 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1402 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1403 switch (e->Iex.Binop.op) {
1404 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return ARM64cc_EQ;
1405 case Iop_CmpNE64: case Iop_CasCmpNE64: return ARM64cc_NE;
1406 case Iop_CmpLT64S: return ARM64cc_LT;
1407 case Iop_CmpLT64U: return ARM64cc_CC;
1408 case Iop_CmpLE64S: return ARM64cc_LE;
1409 case Iop_CmpLE64U: return ARM64cc_LS;
1410 default: vpanic("iselCondCode(arm64): CmpXX64");
1414 /* --- Cmp*32*(x,y) --- */
1415 if (e->tag == Iex_Binop
1416 && (e->Iex.Binop.op == Iop_CmpEQ32
1417 || e->Iex.Binop.op == Iop_CmpNE32
1418 || e->Iex.Binop.op == Iop_CmpLT32S
1419 || e->Iex.Binop.op == Iop_CmpLT32U
1420 || e->Iex.Binop.op == Iop_CmpLE32S
1421 || e->Iex.Binop.op == Iop_CmpLE32U
1422 || e->Iex.Binop.op == Iop_CasCmpEQ32
1423 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1424 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1425 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1426 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1427 switch (e->Iex.Binop.op) {
1428 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return ARM64cc_EQ;
1429 case Iop_CmpNE32: case Iop_CasCmpNE32: return ARM64cc_NE;
1430 case Iop_CmpLT32S: return ARM64cc_LT;
1431 case Iop_CmpLT32U: return ARM64cc_CC;
1432 case Iop_CmpLE32S: return ARM64cc_LE;
1433 case Iop_CmpLE32U: return ARM64cc_LS;
1434 default: vpanic("iselCondCode(arm64): CmpXX32");
1438 /* --- Cmp*16*(x,y) --- */
1439 if (e->tag == Iex_Binop
1440 && (e->Iex.Binop.op == Iop_CasCmpEQ16
1441 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1442 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1443 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1444 HReg argL2 = widen_z_16_to_64(env, argL);
1445 HReg argR2 = widen_z_16_to_64(env, argR);
1446 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1447 switch (e->Iex.Binop.op) {
1448 case Iop_CasCmpEQ16: return ARM64cc_EQ;
1449 case Iop_CasCmpNE16: return ARM64cc_NE;
1450 default: vpanic("iselCondCode(arm64): CmpXX16");
1454 /* --- Cmp*8*(x,y) --- */
1455 if (e->tag == Iex_Binop
1456 && (e->Iex.Binop.op == Iop_CasCmpEQ8
1457 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1458 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1459 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1460 HReg argL2 = widen_z_8_to_64(env, argL);
1461 HReg argR2 = widen_z_8_to_64(env, argR);
1462 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1463 switch (e->Iex.Binop.op) {
1464 case Iop_CasCmpEQ8: return ARM64cc_EQ;
1465 case Iop_CasCmpNE8: return ARM64cc_NE;
1466 default: vpanic("iselCondCode(arm64): CmpXX8");
1470 /* --- And1(x,y), Or1(x,y) --- */
1471 /* FIXME: We could (and probably should) do a lot better here, by using the
1472 iselCondCode_C/_R scheme used in the amd64 insn selector. */
1473 if (e->tag == Iex_Binop
1474 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1475 HReg x_as_64 = newVRegI(env);
1476 ARM64CondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1);
1477 addInstr(env, ARM64Instr_Set64(x_as_64, cc_x));
1479 HReg y_as_64 = newVRegI(env);
1480 ARM64CondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2);
1481 addInstr(env, ARM64Instr_Set64(y_as_64, cc_y));
1483 HReg tmp = newVRegI(env);
1484 ARM64LogicOp lop
1485 = e->Iex.Binop.op == Iop_And1 ? ARM64lo_AND : ARM64lo_OR;
1486 addInstr(env, ARM64Instr_Logic(tmp, x_as_64, ARM64RIL_R(y_as_64), lop));
1488 ARM64RIL* one = mb_mkARM64RIL_I(1);
1489 vassert(one);
1490 addInstr(env, ARM64Instr_Test(tmp, one));
1492 return ARM64cc_NE;
1495 ppIRExpr(e);
1496 vpanic("iselCondCode");
1500 /* --------------------- Reg --------------------- */
1502 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1504 HReg r = iselIntExpr_R_wrk(env, e);
1505 /* sanity checks ... */
1506 # if 0
1507 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1508 # endif
1509 vassert(hregClass(r) == HRcInt64);
1510 vassert(hregIsVirtual(r));
1511 return r;
1514 /* DO NOT CALL THIS DIRECTLY ! */
1515 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1517 IRType ty = typeOfIRExpr(env->type_env,e);
1518 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1520 switch (e->tag) {
1522 /* --------- TEMP --------- */
1523 case Iex_RdTmp: {
1524 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1527 /* --------- LOAD --------- */
1528 case Iex_Load: {
1529 HReg dst = newVRegI(env);
1531 if (e->Iex.Load.end != Iend_LE)
1532 goto irreducible;
1534 if (ty == Ity_I64) {
1535 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1536 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1537 return dst;
1539 if (ty == Ity_I32) {
1540 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1541 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1542 return dst;
1544 if (ty == Ity_I16) {
1545 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1546 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1547 return dst;
1549 if (ty == Ity_I8) {
1550 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1551 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1552 return dst;
1554 break;
1557 /* --------- BINARY OP --------- */
1558 case Iex_Binop: {
1560 ARM64LogicOp lop = 0; /* invalid */
1561 ARM64ShiftOp sop = 0; /* invalid */
1563 /* Special-case 0-x into a Neg instruction. Not because it's
1564 particularly useful but more so as to give value flow using
1565 this instruction, so as to check its assembly correctness for
1566 implementation of Left32/Left64. */
1567 switch (e->Iex.Binop.op) {
1568 case Iop_Sub64:
1569 if (isZeroU64(e->Iex.Binop.arg1)) {
1570 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1571 HReg dst = newVRegI(env);
1572 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1573 return dst;
1575 break;
1576 default:
1577 break;
1580 /* ADD/SUB */
1581 switch (e->Iex.Binop.op) {
1582 case Iop_Add64: case Iop_Add32:
1583 case Iop_Sub64: case Iop_Sub32: {
1584 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1585 || e->Iex.Binop.op == Iop_Add32;
1586 HReg dst = newVRegI(env);
1587 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1588 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1589 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1590 return dst;
1592 default:
1593 break;
1596 /* AND/OR/XOR */
1597 switch (e->Iex.Binop.op) {
1598 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1599 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
1600 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1601 log_binop: {
1602 HReg dst = newVRegI(env);
1603 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1604 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1605 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1606 return dst;
1608 default:
1609 break;
1612 /* SHL/SHR/SAR */
1613 switch (e->Iex.Binop.op) {
1614 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1615 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1616 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1617 sh_binop: {
1618 HReg dst = newVRegI(env);
1619 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1620 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1621 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1622 return dst;
1624 case Iop_Shr32:
1625 case Iop_Sar32: {
1626 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1627 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1628 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1629 HReg dst = zx ? widen_z_32_to_64(env, argL)
1630 : widen_s_32_to_64(env, argL);
1631 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1632 return dst;
1634 default: break;
1637 /* MUL */
1638 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1639 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1640 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1641 HReg dst = newVRegI(env);
1642 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1643 return dst;
1646 /* MULL */
1647 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1648 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1649 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1650 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1651 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1652 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1653 HReg dst = newVRegI(env);
1654 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1655 return dst;
1658 /* Handle misc other ops. */
1660 if (e->Iex.Binop.op == Iop_Max32U) {
1661 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1662 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1663 HReg dst = newVRegI(env);
1664 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1665 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1666 return dst;
1669 if (e->Iex.Binop.op == Iop_32HLto64) {
1670 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1671 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1672 HReg lo32 = widen_z_32_to_64(env, lo32s);
1673 HReg hi32 = newVRegI(env);
1674 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1675 ARM64sh_SHL));
1676 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1677 ARM64lo_OR));
1678 return hi32;
1681 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1682 Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1683 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1684 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1685 HReg dst = newVRegI(env);
1686 HReg imm = newVRegI(env);
1687 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1688 create in dst, the IRCmpF64Result encoded result. */
1689 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1690 addInstr(env, ARM64Instr_Imm64(dst, 0));
1691 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1692 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1693 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1694 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1695 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1696 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1697 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1698 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1699 return dst;
1702 { /* local scope */
1703 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1704 Bool srcIsD = False;
1705 switch (e->Iex.Binop.op) {
1706 case Iop_F64toI64S:
1707 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1708 case Iop_F64toI64U:
1709 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1710 case Iop_F64toI32S:
1711 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1712 case Iop_F64toI32U:
1713 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1714 case Iop_F32toI32S:
1715 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1716 case Iop_F32toI32U:
1717 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1718 case Iop_F32toI64S:
1719 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1720 case Iop_F32toI64U:
1721 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1722 default:
1723 break;
1725 if (cvt_op != ARM64cvt_INVALID) {
1726 /* This is all a bit dodgy, because we can't handle a
1727 non-constant (not-known-at-JIT-time) rounding mode
1728 indication. That's because there's no instruction
1729 AFAICS that does this conversion but rounds according to
1730 FPCR.RM, so we have to bake the rounding mode into the
1731 instruction right now. But that should be OK because
1732 (1) the front end attaches a literal Irrm_ value to the
1733 conversion binop, and (2) iropt will never float that
1734 off via CSE, into a literal. Hence we should always
1735 have an Irrm_ value as the first arg. */
1736 IRExpr* arg1 = e->Iex.Binop.arg1;
1737 if (arg1->tag != Iex_Const) goto irreducible;
1738 IRConst* arg1con = arg1->Iex.Const.con;
1739 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1740 UInt irrm = arg1con->Ico.U32;
1741 /* Find the ARM-encoded equivalent for |irrm|. */
1742 UInt armrm = 4; /* impossible */
1743 switch (irrm) {
1744 case Irrm_NEAREST: armrm = 0; break;
1745 case Irrm_NegINF: armrm = 2; break;
1746 case Irrm_PosINF: armrm = 1; break;
1747 case Irrm_ZERO: armrm = 3; break;
1748 default: goto irreducible;
1750 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1751 (env, e->Iex.Binop.arg2);
1752 HReg dst = newVRegI(env);
1753 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1754 return dst;
1756 } /* local scope */
1758 /* All cases involving host-side helper calls. */
1759 void* fn = NULL;
1760 switch (e->Iex.Binop.op) {
1761 case Iop_DivU32:
1762 fn = &h_calc_udiv32_w_arm_semantics; break;
1763 case Iop_DivS32:
1764 fn = &h_calc_sdiv32_w_arm_semantics; break;
1765 case Iop_DivU64:
1766 fn = &h_calc_udiv64_w_arm_semantics; break;
1767 case Iop_DivS64:
1768 fn = &h_calc_sdiv64_w_arm_semantics; break;
1769 default:
1770 break;
1773 if (fn) {
1774 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1775 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1776 HReg res = newVRegI(env);
1777 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1778 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1779 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1780 2, mk_RetLoc_simple(RLPri_Int) ));
1781 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1782 return res;
1785 break;
1788 /* --------- UNARY OP --------- */
1789 case Iex_Unop: {
1791 switch (e->Iex.Unop.op) {
1792 case Iop_16Uto64: {
1793 /* This probably doesn't occur often enough to be worth
1794 rolling the extension into the load. */
1795 IRExpr* arg = e->Iex.Unop.arg;
1796 HReg src = iselIntExpr_R(env, arg);
1797 HReg dst = widen_z_16_to_64(env, src);
1798 return dst;
1800 case Iop_32Uto64: {
1801 IRExpr* arg = e->Iex.Unop.arg;
1802 if (arg->tag == Iex_Load) {
1803 /* This correctly zero extends because _LdSt32 is
1804 defined to do a zero extending load. */
1805 HReg dst = newVRegI(env);
1806 ARM64AMode* am
1807 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1808 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1809 return dst;
1811 /* else be lame and mask it */
1812 HReg src = iselIntExpr_R(env, arg);
1813 HReg dst = widen_z_32_to_64(env, src);
1814 return dst;
1816 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
1817 case Iop_8Uto64: {
1818 IRExpr* arg = e->Iex.Unop.arg;
1819 if (arg->tag == Iex_Load) {
1820 /* This correctly zero extends because _LdSt8 is
1821 defined to do a zero extending load. */
1822 HReg dst = newVRegI(env);
1823 ARM64AMode* am
1824 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
1825 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1826 return dst;
1828 /* else be lame and mask it */
1829 HReg src = iselIntExpr_R(env, arg);
1830 HReg dst = widen_z_8_to_64(env, src);
1831 return dst;
1833 case Iop_128HIto64: {
1834 HReg rHi, rLo;
1835 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1836 return rHi; /* and abandon rLo */
1838 case Iop_8Sto32: case Iop_8Sto64: {
1839 IRExpr* arg = e->Iex.Unop.arg;
1840 HReg src = iselIntExpr_R(env, arg);
1841 HReg dst = widen_s_8_to_64(env, src);
1842 return dst;
1844 case Iop_16Sto32: case Iop_16Sto64: {
1845 IRExpr* arg = e->Iex.Unop.arg;
1846 HReg src = iselIntExpr_R(env, arg);
1847 HReg dst = widen_s_16_to_64(env, src);
1848 return dst;
1850 case Iop_32Sto64: {
1851 IRExpr* arg = e->Iex.Unop.arg;
1852 HReg src = iselIntExpr_R(env, arg);
1853 HReg dst = widen_s_32_to_64(env, src);
1854 return dst;
1856 case Iop_Not32:
1857 case Iop_Not64: {
1858 HReg dst = newVRegI(env);
1859 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1860 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
1861 return dst;
1863 case Iop_Clz64: {
1864 HReg dst = newVRegI(env);
1865 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1866 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
1867 return dst;
1869 case Iop_Left32:
1870 case Iop_Left64: {
1871 /* Left64(src) = src | -src. Left32 can use the same
1872 implementation since in that case we don't care what
1873 the upper 32 bits become. */
1874 HReg dst = newVRegI(env);
1875 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1876 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1877 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1878 ARM64lo_OR));
1879 return dst;
1881 case Iop_CmpwNEZ64: {
1882 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
1883 = Left64(src) >>s 63 */
1884 HReg dst = newVRegI(env);
1885 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1886 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1887 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1888 ARM64lo_OR));
1889 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1890 ARM64sh_SAR));
1891 return dst;
1893 case Iop_CmpwNEZ32: {
1894 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
1895 = Left64(src & 0xFFFFFFFF) >>s 63 */
1896 HReg dst = newVRegI(env);
1897 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1898 HReg src = widen_z_32_to_64(env, pre);
1899 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1900 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1901 ARM64lo_OR));
1902 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1903 ARM64sh_SAR));
1904 return dst;
1906 case Iop_V128to64: case Iop_V128HIto64: {
1907 HReg dst = newVRegI(env);
1908 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1909 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
1910 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
1911 return dst;
1913 case Iop_ReinterpF64asI64: {
1914 HReg dst = newVRegI(env);
1915 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1916 addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
1917 return dst;
1919 case Iop_ReinterpF32asI32: {
1920 HReg dst = newVRegI(env);
1921 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1922 addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
1923 return dst;
1925 case Iop_1Sto16:
1926 case Iop_1Sto32:
1927 case Iop_1Sto64: {
1928 /* As with the iselStmt case for 'tmp:I1 = expr', we could
1929 do a lot better here if it ever became necessary. */
1930 HReg zero = newVRegI(env);
1931 HReg one = newVRegI(env);
1932 HReg dst = newVRegI(env);
1933 addInstr(env, ARM64Instr_Imm64(zero, 0));
1934 addInstr(env, ARM64Instr_Imm64(one, 1));
1935 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1936 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1937 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1938 ARM64sh_SHL));
1939 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1940 ARM64sh_SAR));
1941 return dst;
1943 case Iop_NarrowUn16to8x8:
1944 case Iop_NarrowUn32to16x4:
1945 case Iop_NarrowUn64to32x2:
1946 case Iop_QNarrowUn16Sto8Sx8:
1947 case Iop_QNarrowUn32Sto16Sx4:
1948 case Iop_QNarrowUn64Sto32Sx2:
1949 case Iop_QNarrowUn16Uto8Ux8:
1950 case Iop_QNarrowUn32Uto16Ux4:
1951 case Iop_QNarrowUn64Uto32Ux2:
1952 case Iop_QNarrowUn16Sto8Ux8:
1953 case Iop_QNarrowUn32Sto16Ux4:
1954 case Iop_QNarrowUn64Sto32Ux2:
1956 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1957 HReg tmp = newVRegV(env);
1958 HReg dst = newVRegI(env);
1959 UInt dszBlg2 = 3; /* illegal */
1960 ARM64VecNarrowOp op = ARM64vecna_INVALID;
1961 switch (e->Iex.Unop.op) {
1962 case Iop_NarrowUn16to8x8:
1963 dszBlg2 = 0; op = ARM64vecna_XTN; break;
1964 case Iop_NarrowUn32to16x4:
1965 dszBlg2 = 1; op = ARM64vecna_XTN; break;
1966 case Iop_NarrowUn64to32x2:
1967 dszBlg2 = 2; op = ARM64vecna_XTN; break;
1968 case Iop_QNarrowUn16Sto8Sx8:
1969 dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
1970 case Iop_QNarrowUn32Sto16Sx4:
1971 dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
1972 case Iop_QNarrowUn64Sto32Sx2:
1973 dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
1974 case Iop_QNarrowUn16Uto8Ux8:
1975 dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
1976 case Iop_QNarrowUn32Uto16Ux4:
1977 dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
1978 case Iop_QNarrowUn64Uto32Ux2:
1979 dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
1980 case Iop_QNarrowUn16Sto8Ux8:
1981 dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
1982 case Iop_QNarrowUn32Sto16Ux4:
1983 dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
1984 case Iop_QNarrowUn64Sto32Ux2:
1985 dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
1986 default:
1987 vassert(0);
1989 addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
1990 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
1991 return dst;
1993 case Iop_1Uto64: {
1994 /* 1Uto64(tmp). */
1995 HReg dst = newVRegI(env);
1996 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1997 ARM64RIL* one = mb_mkARM64RIL_I(1);
1998 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1999 vassert(one);
2000 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
2001 } else {
2002 /* CLONE-01 */
2003 HReg zero = newVRegI(env);
2004 HReg one = newVRegI(env);
2005 addInstr(env, ARM64Instr_Imm64(zero, 0));
2006 addInstr(env, ARM64Instr_Imm64(one, 1));
2007 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2008 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2010 return dst;
2012 case Iop_64to32:
2013 case Iop_64to16:
2014 case Iop_64to8:
2015 /* These are no-ops. */
2016 return iselIntExpr_R(env, e->Iex.Unop.arg);
2018 default:
2019 break;
2022 break;
2025 /* --------- GET --------- */
2026 case Iex_Get: {
2027 if (ty == Ity_I64
2028 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
2029 HReg dst = newVRegI(env);
2030 ARM64AMode* am
2031 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2032 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2033 return dst;
2035 if (ty == Ity_I32
2036 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
2037 HReg dst = newVRegI(env);
2038 ARM64AMode* am
2039 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2040 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2041 return dst;
2043 if (ty == Ity_I16
2044 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2045 HReg dst = newVRegI(env);
2046 ARM64AMode* am
2047 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2048 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2049 return dst;
2051 if (ty == Ity_I8
2052 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2053 HReg dst = newVRegI(env);
2054 ARM64AMode* am
2055 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2056 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2057 return dst;
2059 break;
2062 /* --------- CCALL --------- */
2063 case Iex_CCall: {
2064 HReg dst = newVRegI(env);
2065 vassert(ty == e->Iex.CCall.retty);
2067 /* be very restrictive for now. Only 64-bit ints allowed for
2068 args, and 64 bits for return type. Don't forget to change
2069 the RetLoc if more types are allowed in future. */
2070 if (e->Iex.CCall.retty != Ity_I64)
2071 goto irreducible;
2073 /* Marshal args, do the call, clear stack. */
2074 UInt addToSp = 0;
2075 RetLoc rloc = mk_RetLoc_INVALID();
2076 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2077 e->Iex.CCall.cee, e->Iex.CCall.retty,
2078 e->Iex.CCall.args );
2079 /* */
2080 if (ok) {
2081 vassert(is_sane_RetLoc(rloc));
2082 vassert(rloc.pri == RLPri_Int);
2083 vassert(addToSp == 0);
2084 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2085 return dst;
2087 goto irreducible;
2090 /* --------- LITERAL --------- */
2091 /* 64-bit literals */
2092 case Iex_Const: {
2093 ULong u = 0;
2094 HReg dst = newVRegI(env);
2095 switch (e->Iex.Const.con->tag) {
2096 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2097 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2098 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2099 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2100 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2102 addInstr(env, ARM64Instr_Imm64(dst, u));
2103 return dst;
2106 /* --------- MULTIPLEX --------- */
2107 case Iex_ITE: {
2108 /* ITE(ccexpr, iftrue, iffalse) */
2109 if (ty == Ity_I64 || ty == Ity_I32) {
2110 ARM64CondCode cc;
2111 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2112 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2113 HReg dst = newVRegI(env);
2114 cc = iselCondCode(env, e->Iex.ITE.cond);
2115 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2116 return dst;
2118 break;
2121 default:
2122 break;
2123 } /* switch (e->tag) */
2125 /* We get here if no pattern matched. */
2126 irreducible:
2127 ppIRExpr(e);
2128 vpanic("iselIntExpr_R: cannot reduce tree");
2132 /*---------------------------------------------------------*/
2133 /*--- ISEL: Integer expressions (128 bit) ---*/
2134 /*---------------------------------------------------------*/
2136 /* Compute a 128-bit value into a register pair, which is returned as
2137 the first two parameters. As with iselIntExpr_R, these may be
2138 either real or virtual regs; in any case they must not be changed
2139 by subsequent code emitted by the caller. */
2141 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2142 ISelEnv* env, IRExpr* e )
2144 iselInt128Expr_wrk(rHi, rLo, env, e);
2145 # if 0
2146 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2147 # endif
2148 vassert(hregClass(*rHi) == HRcInt64);
2149 vassert(hregIsVirtual(*rHi));
2150 vassert(hregClass(*rLo) == HRcInt64);
2151 vassert(hregIsVirtual(*rLo));
2154 /* DO NOT CALL THIS DIRECTLY ! */
2155 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2156 ISelEnv* env, IRExpr* e )
2158 vassert(e);
2159 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2161 /* --------- BINARY ops --------- */
2162 if (e->tag == Iex_Binop) {
2163 switch (e->Iex.Binop.op) {
2164 /* 64 x 64 -> 128 multiply */
2165 case Iop_MullU64:
2166 case Iop_MullS64: {
2167 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2168 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2169 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2170 HReg dstLo = newVRegI(env);
2171 HReg dstHi = newVRegI(env);
2172 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2173 ARM64mul_PLAIN));
2174 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2175 syned ? ARM64mul_SX : ARM64mul_ZX));
2176 *rHi = dstHi;
2177 *rLo = dstLo;
2178 return;
2180 /* 64HLto128(e1,e2) */
2181 case Iop_64HLto128:
2182 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2183 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2184 return;
2185 default:
2186 break;
2188 } /* if (e->tag == Iex_Binop) */
2190 ppIRExpr(e);
2191 vpanic("iselInt128Expr(arm64)");
2195 /*---------------------------------------------------------*/
2196 /*--- ISEL: Vector expressions (128 bit) ---*/
2197 /*---------------------------------------------------------*/
2199 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2201 HReg r = iselV128Expr_wrk( env, e );
2202 vassert(hregClass(r) == HRcVec128);
2203 vassert(hregIsVirtual(r));
2204 return r;
2207 /* DO NOT CALL THIS DIRECTLY */
2208 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2210 IRType ty = typeOfIRExpr(env->type_env, e);
2211 vassert(e);
2212 vassert(ty == Ity_V128);
2214 if (e->tag == Iex_RdTmp) {
2215 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2218 if (e->tag == Iex_Const) {
2219 /* Only a very limited range of constants is handled. */
2220 vassert(e->Iex.Const.con->tag == Ico_V128);
2221 UShort con = e->Iex.Const.con->Ico.V128;
2222 HReg res = newVRegV(env);
2223 switch (con) {
2224 case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2225 addInstr(env, ARM64Instr_VImmQ(res, con));
2226 return res;
2227 case 0x00F0:
2228 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2229 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2230 return res;
2231 case 0x0F00:
2232 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2233 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2234 return res;
2235 case 0x0FF0:
2236 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2237 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2238 return res;
2239 case 0x0FFF:
2240 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2241 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2242 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2243 return res;
2244 case 0xF000:
2245 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2246 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2247 return res;
2248 case 0xFF00:
2249 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2250 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2251 return res;
2252 default:
2253 break;
2255 /* Unhandled */
2256 goto v128_expr_bad;
2259 if (e->tag == Iex_Load) {
2260 HReg res = newVRegV(env);
2261 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
2262 vassert(ty == Ity_V128);
2263 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2264 return res;
2267 if (e->tag == Iex_Get) {
2268 UInt offs = (UInt)e->Iex.Get.offset;
2269 if (offs < (1<<12)) {
2270 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2271 HReg res = newVRegV(env);
2272 vassert(ty == Ity_V128);
2273 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2274 return res;
2276 goto v128_expr_bad;
2279 if (e->tag == Iex_Unop) {
2281 /* Iop_ZeroHIXXofV128 cases */
2282 UShort imm16 = 0;
2283 switch (e->Iex.Unop.op) {
2284 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break;
2285 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
2286 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2287 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2288 default: break;
2290 if (imm16 != 0) {
2291 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2292 HReg imm = newVRegV(env);
2293 HReg res = newVRegV(env);
2294 addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2295 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2296 return res;
2299 /* Other cases */
2300 switch (e->Iex.Unop.op) {
2301 case Iop_NotV128:
2302 case Iop_Abs64Fx2: case Iop_Abs32Fx4:
2303 case Iop_Neg64Fx2: case Iop_Neg32Fx4:
2304 case Iop_Abs64x2: case Iop_Abs32x4:
2305 case Iop_Abs16x8: case Iop_Abs8x16:
2306 case Iop_Cls32x4: case Iop_Cls16x8: case Iop_Cls8x16:
2307 case Iop_Clz32x4: case Iop_Clz16x8: case Iop_Clz8x16:
2308 case Iop_Cnt8x16:
2309 case Iop_Reverse1sIn8_x16:
2310 case Iop_Reverse8sIn16_x8:
2311 case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2312 case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2313 case Iop_Reverse32sIn64_x2:
2314 case Iop_RecipEst32Ux4:
2315 case Iop_RSqrtEst32Ux4:
2316 case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2317 case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2319 HReg res = newVRegV(env);
2320 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2321 Bool setRM = False;
2322 ARM64VecUnaryOp op = ARM64vecu_INVALID;
2323 switch (e->Iex.Unop.op) {
2324 case Iop_NotV128: op = ARM64vecu_NOT; break;
2325 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
2326 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
2327 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
2328 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
2329 case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break;
2330 case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
2331 case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
2332 case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
2333 case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break;
2334 case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break;
2335 case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break;
2336 case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break;
2337 case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break;
2338 case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break;
2339 case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
2340 case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break;
2341 case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break;
2342 case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break;
2343 case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break;
2344 case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break;
2345 case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break;
2346 case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break;
2347 case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break;
2348 case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break;
2349 case Iop_RecipEst64Fx2: setRM = True;
2350 op = ARM64vecu_FRECPE64x2; break;
2351 case Iop_RecipEst32Fx4: setRM = True;
2352 op = ARM64vecu_FRECPE32x4; break;
2353 case Iop_RSqrtEst64Fx2: setRM = True;
2354 op = ARM64vecu_FRSQRTE64x2; break;
2355 case Iop_RSqrtEst32Fx4: setRM = True;
2356 op = ARM64vecu_FRSQRTE32x4; break;
2357 default: vassert(0);
2359 if (setRM) {
2360 // This is a bit of a kludge. We should do rm properly for
2361 // these recip-est insns, but that would require changing the
2362 // primop's type to take an rmode.
2363 set_FPCR_rounding_mode(env, IRExpr_Const(
2364 IRConst_U32(Irrm_NEAREST)));
2366 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2367 return res;
2369 case Iop_CmpNEZ8x16:
2370 case Iop_CmpNEZ16x8:
2371 case Iop_CmpNEZ32x4:
2372 case Iop_CmpNEZ64x2: {
2373 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2374 HReg zero = newVRegV(env);
2375 HReg res = newVRegV(env);
2376 ARM64VecBinOp cmp = ARM64vecb_INVALID;
2377 switch (e->Iex.Unop.op) {
2378 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2379 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2380 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2381 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2382 default: vassert(0);
2384 // This is pretty feeble. Better: use CMP against zero
2385 // and avoid the extra instruction and extra register.
2386 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2387 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2388 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2389 return res;
2391 case Iop_V256toV128_0:
2392 case Iop_V256toV128_1: {
2393 HReg vHi, vLo;
2394 iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2395 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2397 case Iop_64UtoV128: {
2398 HReg res = newVRegV(env);
2399 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2400 addInstr(env, ARM64Instr_VQfromX(res, arg));
2401 return res;
2403 case Iop_Widen8Sto16x8: {
2404 HReg res = newVRegV(env);
2405 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2406 addInstr(env, ARM64Instr_VQfromX(res, arg));
2407 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2408 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2409 res, res, 8));
2410 return res;
2412 case Iop_Widen16Sto32x4: {
2413 HReg res = newVRegV(env);
2414 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2415 addInstr(env, ARM64Instr_VQfromX(res, arg));
2416 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2417 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2418 res, res, 16));
2419 return res;
2421 case Iop_Widen32Sto64x2: {
2422 HReg res = newVRegV(env);
2423 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2424 addInstr(env, ARM64Instr_VQfromX(res, arg));
2425 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2426 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2427 res, res, 32));
2428 return res;
2430 /* ... */
2431 default:
2432 break;
2433 } /* switch on the unop */
2434 } /* if (e->tag == Iex_Unop) */
2436 if (e->tag == Iex_Binop) {
2437 switch (e->Iex.Binop.op) {
2438 case Iop_Sqrt32Fx4:
2439 case Iop_Sqrt64Fx2: {
2440 HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2441 HReg res = newVRegV(env);
2442 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2443 ARM64VecUnaryOp op
2444 = e->Iex.Binop.op == Iop_Sqrt32Fx4
2445 ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2;
2446 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2447 return res;
2449 case Iop_64HLtoV128: {
2450 HReg res = newVRegV(env);
2451 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2452 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2453 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2454 return res;
2456 /* -- Cases where we can generate a simple three-reg instruction. -- */
2457 case Iop_AndV128:
2458 case Iop_OrV128:
2459 case Iop_XorV128:
2460 case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2461 case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2462 case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2463 case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2464 case Iop_Add64x2: case Iop_Add32x4:
2465 case Iop_Add16x8: case Iop_Add8x16:
2466 case Iop_Sub64x2: case Iop_Sub32x4:
2467 case Iop_Sub16x8: case Iop_Sub8x16:
2468 case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2469 case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2470 case Iop_CmpEQ16x8: case Iop_CmpEQ8x16:
2471 case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2472 case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2473 case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2474 case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2475 case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2476 case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2477 case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2478 case Iop_Perm8x16:
2479 case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2480 case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2481 case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2482 case Iop_CatOddLanes16x8: case Iop_CatOddLanes8x16:
2483 case Iop_InterleaveHI32x4:
2484 case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2485 case Iop_InterleaveLO32x4:
2486 case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2487 case Iop_PolynomialMul8x16:
2488 case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2489 case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2490 case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2491 case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2492 case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2493 case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2494 case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2495 case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2496 case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8:
2497 case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2498 case Iop_Sh8Sx16: case Iop_Sh16Sx8:
2499 case Iop_Sh32Sx4: case Iop_Sh64Sx2:
2500 case Iop_Sh8Ux16: case Iop_Sh16Ux8:
2501 case Iop_Sh32Ux4: case Iop_Sh64Ux2:
2502 case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2503 case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2504 case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2505 case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2506 case Iop_Max64Fx2: case Iop_Max32Fx4:
2507 case Iop_Min64Fx2: case Iop_Min32Fx4:
2508 case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2509 case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2511 HReg res = newVRegV(env);
2512 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2513 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2514 Bool sw = False;
2515 Bool setRM = False;
2516 ARM64VecBinOp op = ARM64vecb_INVALID;
2517 switch (e->Iex.Binop.op) {
2518 case Iop_AndV128: op = ARM64vecb_AND; break;
2519 case Iop_OrV128: op = ARM64vecb_ORR; break;
2520 case Iop_XorV128: op = ARM64vecb_XOR; break;
2521 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break;
2522 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
2523 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break;
2524 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
2525 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
2526 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break;
2527 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
2528 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
2529 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break;
2530 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
2531 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
2532 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break;
2533 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
2534 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
2535 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
2536 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break;
2537 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
2538 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
2539 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
2540 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break;
2541 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
2542 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
2543 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break;
2544 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
2545 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break;
2546 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break;
2547 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break;
2548 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2549 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2550 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2551 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2552 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2553 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2554 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2555 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2556 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2557 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2558 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2559 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2560 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2561 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2562 case Iop_Perm8x16: op = ARM64vecb_TBL1; break;
2563 case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2564 break;
2565 case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2566 break;
2567 case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2568 break;
2569 case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2570 break;
2571 case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2572 break;
2573 case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True;
2574 break;
2575 case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True;
2576 break;
2577 case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True;
2578 break;
2579 case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2580 break;
2581 case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2582 break;
2583 case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2584 break;
2585 case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2586 break;
2587 case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2588 break;
2589 case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2590 break;
2591 case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2592 case Iop_QAdd64Sx2: op = ARM64vecb_SQADD64x2; break;
2593 case Iop_QAdd32Sx4: op = ARM64vecb_SQADD32x4; break;
2594 case Iop_QAdd16Sx8: op = ARM64vecb_SQADD16x8; break;
2595 case Iop_QAdd8Sx16: op = ARM64vecb_SQADD8x16; break;
2596 case Iop_QAdd64Ux2: op = ARM64vecb_UQADD64x2; break;
2597 case Iop_QAdd32Ux4: op = ARM64vecb_UQADD32x4; break;
2598 case Iop_QAdd16Ux8: op = ARM64vecb_UQADD16x8; break;
2599 case Iop_QAdd8Ux16: op = ARM64vecb_UQADD8x16; break;
2600 case Iop_QSub64Sx2: op = ARM64vecb_SQSUB64x2; break;
2601 case Iop_QSub32Sx4: op = ARM64vecb_SQSUB32x4; break;
2602 case Iop_QSub16Sx8: op = ARM64vecb_SQSUB16x8; break;
2603 case Iop_QSub8Sx16: op = ARM64vecb_SQSUB8x16; break;
2604 case Iop_QSub64Ux2: op = ARM64vecb_UQSUB64x2; break;
2605 case Iop_QSub32Ux4: op = ARM64vecb_UQSUB32x4; break;
2606 case Iop_QSub16Ux8: op = ARM64vecb_UQSUB16x8; break;
2607 case Iop_QSub8Ux16: op = ARM64vecb_UQSUB8x16; break;
2608 case Iop_QDMulHi32Sx4: op = ARM64vecb_SQDMULH32x4; break;
2609 case Iop_QDMulHi16Sx8: op = ARM64vecb_SQDMULH16x8; break;
2610 case Iop_QRDMulHi32Sx4: op = ARM64vecb_SQRDMULH32x4; break;
2611 case Iop_QRDMulHi16Sx8: op = ARM64vecb_SQRDMULH16x8; break;
2612 case Iop_Sh8Sx16: op = ARM64vecb_SSHL8x16; break;
2613 case Iop_Sh16Sx8: op = ARM64vecb_SSHL16x8; break;
2614 case Iop_Sh32Sx4: op = ARM64vecb_SSHL32x4; break;
2615 case Iop_Sh64Sx2: op = ARM64vecb_SSHL64x2; break;
2616 case Iop_Sh8Ux16: op = ARM64vecb_USHL8x16; break;
2617 case Iop_Sh16Ux8: op = ARM64vecb_USHL16x8; break;
2618 case Iop_Sh32Ux4: op = ARM64vecb_USHL32x4; break;
2619 case Iop_Sh64Ux2: op = ARM64vecb_USHL64x2; break;
2620 case Iop_Rsh8Sx16: op = ARM64vecb_SRSHL8x16; break;
2621 case Iop_Rsh16Sx8: op = ARM64vecb_SRSHL16x8; break;
2622 case Iop_Rsh32Sx4: op = ARM64vecb_SRSHL32x4; break;
2623 case Iop_Rsh64Sx2: op = ARM64vecb_SRSHL64x2; break;
2624 case Iop_Rsh8Ux16: op = ARM64vecb_URSHL8x16; break;
2625 case Iop_Rsh16Ux8: op = ARM64vecb_URSHL16x8; break;
2626 case Iop_Rsh32Ux4: op = ARM64vecb_URSHL32x4; break;
2627 case Iop_Rsh64Ux2: op = ARM64vecb_URSHL64x2; break;
2628 case Iop_Max64Fx2: op = ARM64vecb_FMAX64x2; break;
2629 case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break;
2630 case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break;
2631 case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break;
2632 case Iop_RecipStep64Fx2: setRM = True;
2633 op = ARM64vecb_FRECPS64x2; break;
2634 case Iop_RecipStep32Fx4: setRM = True;
2635 op = ARM64vecb_FRECPS32x4; break;
2636 case Iop_RSqrtStep64Fx2: setRM = True;
2637 op = ARM64vecb_FRSQRTS64x2; break;
2638 case Iop_RSqrtStep32Fx4: setRM = True;
2639 op = ARM64vecb_FRSQRTS32x4; break;
2640 default: vassert(0);
2642 if (setRM) {
2643 // This is a bit of a kludge. We should do rm properly for
2644 // these recip-step insns, but that would require changing the
2645 // primop's type to take an rmode.
2646 set_FPCR_rounding_mode(env, IRExpr_Const(
2647 IRConst_U32(Irrm_NEAREST)));
2649 if (sw) {
2650 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2651 } else {
2652 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2654 return res;
2656 /* -- These only have 2 operand instructions, so we have to first move
2657 the first argument into a new register, for modification. -- */
2658 case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2659 case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2660 case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2661 case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2663 HReg res = newVRegV(env);
2664 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2665 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2666 ARM64VecModifyOp op = ARM64vecmo_INVALID;
2667 switch (e->Iex.Binop.op) {
2668 /* In the following 8 cases, the US - SU switching is intended.
2669 See comments on the libvex_ir.h for details. Also in the
2670 ARM64 front end, where used these primops are generated. */
2671 case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2672 case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2673 case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2674 case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2675 case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2676 case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2677 case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2678 case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2679 default: vassert(0);
2681 /* The order of the operands is important. Although this is
2682 basically addition, the two operands are extended differently,
2683 making it important to get them into the correct registers in
2684 the instruction. */
2685 addInstr(env, ARM64Instr_VMov(16, res, argR));
2686 addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2687 return res;
2689 /* -- Shifts by an immediate. -- */
2690 case Iop_ShrN64x2: case Iop_ShrN32x4:
2691 case Iop_ShrN16x8: case Iop_ShrN8x16:
2692 case Iop_SarN64x2: case Iop_SarN32x4:
2693 case Iop_SarN16x8: case Iop_SarN8x16:
2694 case Iop_ShlN64x2: case Iop_ShlN32x4:
2695 case Iop_ShlN16x8: case Iop_ShlN8x16:
2696 case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2697 case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2698 case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2699 case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2700 case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2701 case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2703 IRExpr* argL = e->Iex.Binop.arg1;
2704 IRExpr* argR = e->Iex.Binop.arg2;
2705 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2706 UInt amt = argR->Iex.Const.con->Ico.U8;
2707 UInt limLo = 0;
2708 UInt limHi = 0;
2709 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2710 /* Establish the instruction to use. */
2711 switch (e->Iex.Binop.op) {
2712 case Iop_ShrN64x2: op = ARM64vecshi_USHR64x2; break;
2713 case Iop_ShrN32x4: op = ARM64vecshi_USHR32x4; break;
2714 case Iop_ShrN16x8: op = ARM64vecshi_USHR16x8; break;
2715 case Iop_ShrN8x16: op = ARM64vecshi_USHR8x16; break;
2716 case Iop_SarN64x2: op = ARM64vecshi_SSHR64x2; break;
2717 case Iop_SarN32x4: op = ARM64vecshi_SSHR32x4; break;
2718 case Iop_SarN16x8: op = ARM64vecshi_SSHR16x8; break;
2719 case Iop_SarN8x16: op = ARM64vecshi_SSHR8x16; break;
2720 case Iop_ShlN64x2: op = ARM64vecshi_SHL64x2; break;
2721 case Iop_ShlN32x4: op = ARM64vecshi_SHL32x4; break;
2722 case Iop_ShlN16x8: op = ARM64vecshi_SHL16x8; break;
2723 case Iop_ShlN8x16: op = ARM64vecshi_SHL8x16; break;
2724 case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2; break;
2725 case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4; break;
2726 case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8; break;
2727 case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16; break;
2728 case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2; break;
2729 case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4; break;
2730 case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8; break;
2731 case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16; break;
2732 case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2733 case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2734 case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2735 case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2736 default: vassert(0);
2738 /* Establish the shift limits, for sanity check purposes only. */
2739 switch (e->Iex.Binop.op) {
2740 case Iop_ShrN64x2: limLo = 1; limHi = 64; break;
2741 case Iop_ShrN32x4: limLo = 1; limHi = 32; break;
2742 case Iop_ShrN16x8: limLo = 1; limHi = 16; break;
2743 case Iop_ShrN8x16: limLo = 1; limHi = 8; break;
2744 case Iop_SarN64x2: limLo = 1; limHi = 64; break;
2745 case Iop_SarN32x4: limLo = 1; limHi = 32; break;
2746 case Iop_SarN16x8: limLo = 1; limHi = 16; break;
2747 case Iop_SarN8x16: limLo = 1; limHi = 8; break;
2748 case Iop_ShlN64x2: limLo = 0; limHi = 63; break;
2749 case Iop_ShlN32x4: limLo = 0; limHi = 31; break;
2750 case Iop_ShlN16x8: limLo = 0; limHi = 15; break;
2751 case Iop_ShlN8x16: limLo = 0; limHi = 7; break;
2752 case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2753 case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2754 case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2755 case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7; break;
2756 case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2757 case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2758 case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
2759 case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7; break;
2760 case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
2761 case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
2762 case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
2763 case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7; break;
2764 default: vassert(0);
2766 /* For left shifts, the allowable amt values are
2767 0 .. lane_bits-1. For right shifts the allowable
2768 values are 1 .. lane_bits. */
2769 if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
2770 HReg src = iselV128Expr(env, argL);
2771 HReg dst = newVRegV(env);
2772 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2773 return dst;
2775 /* Special case some no-op shifts that the arm64 front end
2776 throws at us. We can't generate any instructions for these,
2777 but we don't need to either. */
2778 switch (e->Iex.Binop.op) {
2779 case Iop_ShrN64x2: case Iop_ShrN32x4:
2780 case Iop_ShrN16x8: case Iop_ShrN8x16:
2781 if (amt == 0) {
2782 return iselV128Expr(env, argL);
2784 break;
2785 default:
2786 break;
2788 /* otherwise unhandled */
2790 /* else fall out; this is unhandled */
2791 break;
2793 /* -- Saturating narrowing by an immediate -- */
2794 /* uu */
2795 case Iop_QandQShrNnarrow16Uto8Ux8:
2796 case Iop_QandQShrNnarrow32Uto16Ux4:
2797 case Iop_QandQShrNnarrow64Uto32Ux2:
2798 /* ss */
2799 case Iop_QandQSarNnarrow16Sto8Sx8:
2800 case Iop_QandQSarNnarrow32Sto16Sx4:
2801 case Iop_QandQSarNnarrow64Sto32Sx2:
2802 /* su */
2803 case Iop_QandQSarNnarrow16Sto8Ux8:
2804 case Iop_QandQSarNnarrow32Sto16Ux4:
2805 case Iop_QandQSarNnarrow64Sto32Ux2:
2806 /* ruu */
2807 case Iop_QandQRShrNnarrow16Uto8Ux8:
2808 case Iop_QandQRShrNnarrow32Uto16Ux4:
2809 case Iop_QandQRShrNnarrow64Uto32Ux2:
2810 /* rss */
2811 case Iop_QandQRSarNnarrow16Sto8Sx8:
2812 case Iop_QandQRSarNnarrow32Sto16Sx4:
2813 case Iop_QandQRSarNnarrow64Sto32Sx2:
2814 /* rsu */
2815 case Iop_QandQRSarNnarrow16Sto8Ux8:
2816 case Iop_QandQRSarNnarrow32Sto16Ux4:
2817 case Iop_QandQRSarNnarrow64Sto32Ux2:
2819 IRExpr* argL = e->Iex.Binop.arg1;
2820 IRExpr* argR = e->Iex.Binop.arg2;
2821 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2822 UInt amt = argR->Iex.Const.con->Ico.U8;
2823 UInt limit = 0;
2824 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2825 switch (e->Iex.Binop.op) {
2826 /* uu */
2827 case Iop_QandQShrNnarrow64Uto32Ux2:
2828 op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
2829 case Iop_QandQShrNnarrow32Uto16Ux4:
2830 op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
2831 case Iop_QandQShrNnarrow16Uto8Ux8:
2832 op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
2833 /* ss */
2834 case Iop_QandQSarNnarrow64Sto32Sx2:
2835 op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
2836 case Iop_QandQSarNnarrow32Sto16Sx4:
2837 op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
2838 case Iop_QandQSarNnarrow16Sto8Sx8:
2839 op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
2840 /* su */
2841 case Iop_QandQSarNnarrow64Sto32Ux2:
2842 op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
2843 case Iop_QandQSarNnarrow32Sto16Ux4:
2844 op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
2845 case Iop_QandQSarNnarrow16Sto8Ux8:
2846 op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
2847 /* ruu */
2848 case Iop_QandQRShrNnarrow64Uto32Ux2:
2849 op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
2850 case Iop_QandQRShrNnarrow32Uto16Ux4:
2851 op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
2852 case Iop_QandQRShrNnarrow16Uto8Ux8:
2853 op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
2854 /* rss */
2855 case Iop_QandQRSarNnarrow64Sto32Sx2:
2856 op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
2857 case Iop_QandQRSarNnarrow32Sto16Sx4:
2858 op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
2859 case Iop_QandQRSarNnarrow16Sto8Sx8:
2860 op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
2861 /* rsu */
2862 case Iop_QandQRSarNnarrow64Sto32Ux2:
2863 op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
2864 case Iop_QandQRSarNnarrow32Sto16Ux4:
2865 op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
2866 case Iop_QandQRSarNnarrow16Sto8Ux8:
2867 op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
2868 /**/
2869 default:
2870 vassert(0);
2872 if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
2873 HReg src = iselV128Expr(env, argL);
2874 HReg dst = newVRegV(env);
2875 HReg fpsr = newVRegI(env);
2876 /* Clear FPSR.Q, do the operation, and return both its
2877 result and the new value of FPSR.Q. We can simply
2878 zero out FPSR since all the other bits have no relevance
2879 in VEX generated code. */
2880 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
2881 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
2882 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2883 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
2884 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
2885 ARM64sh_SHR));
2886 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
2887 vassert(ril_one);
2888 addInstr(env, ARM64Instr_Logic(fpsr,
2889 fpsr, ril_one, ARM64lo_AND));
2890 /* Now we have: the main (shift) result in the bottom half
2891 of |dst|, and the Q bit at the bottom of |fpsr|.
2892 Combining them with a "InterleaveLO64x2" style operation
2893 produces a 128 bit value, dst[63:0]:fpsr[63:0],
2894 which is what we want. */
2895 HReg scratch = newVRegV(env);
2896 addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
2897 addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
2898 dst, dst, scratch));
2899 return dst;
2902 /* else fall out; this is unhandled */
2903 break;
2906 // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
2907 // as it is in some ways more general and often leads to better
2908 // code overall.
2909 case Iop_ShlV128:
2910 case Iop_ShrV128: {
2911 Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
2912 /* This is tricky. Generate an EXT instruction with zeroes in
2913 the high operand (shift right) or low operand (shift left).
2914 Note that we can only slice in the EXT instruction at a byte
2915 level of granularity, so the shift amount needs careful
2916 checking. */
2917 IRExpr* argL = e->Iex.Binop.arg1;
2918 IRExpr* argR = e->Iex.Binop.arg2;
2919 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2920 UInt amt = argR->Iex.Const.con->Ico.U8;
2921 Bool amtOK = False;
2922 switch (amt) {
2923 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
2924 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
2925 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
2926 amtOK = True; break;
2928 /* We could also deal with amt==0 by copying the source to
2929 the destination, but there's no need for that so far. */
2930 if (amtOK) {
2931 HReg src = iselV128Expr(env, argL);
2932 HReg srcZ = newVRegV(env);
2933 addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
2934 UInt immB = amt / 8;
2935 vassert(immB >= 1 && immB <= 15);
2936 HReg dst = newVRegV(env);
2937 if (isSHR) {
2938 addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
2939 immB));
2940 } else {
2941 addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
2942 16 - immB));
2944 return dst;
2947 /* else fall out; this is unhandled */
2948 break;
2951 case Iop_PolynomialMull8x8:
2952 case Iop_Mull32Ux2:
2953 case Iop_Mull16Ux4:
2954 case Iop_Mull8Ux8:
2955 case Iop_Mull32Sx2:
2956 case Iop_Mull16Sx4:
2957 case Iop_Mull8Sx8:
2958 case Iop_QDMull32Sx2:
2959 case Iop_QDMull16Sx4:
2961 HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2962 HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2963 HReg vSrcL = newVRegV(env);
2964 HReg vSrcR = newVRegV(env);
2965 HReg dst = newVRegV(env);
2966 ARM64VecBinOp op = ARM64vecb_INVALID;
2967 switch (e->Iex.Binop.op) {
2968 case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8; break;
2969 case Iop_Mull32Ux2: op = ARM64vecb_UMULL2DSS; break;
2970 case Iop_Mull16Ux4: op = ARM64vecb_UMULL4SHH; break;
2971 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8HBB; break;
2972 case Iop_Mull32Sx2: op = ARM64vecb_SMULL2DSS; break;
2973 case Iop_Mull16Sx4: op = ARM64vecb_SMULL4SHH; break;
2974 case Iop_Mull8Sx8: op = ARM64vecb_SMULL8HBB; break;
2975 case Iop_QDMull32Sx2: op = ARM64vecb_SQDMULL2DSS; break;
2976 case Iop_QDMull16Sx4: op = ARM64vecb_SQDMULL4SHH; break;
2977 default: vassert(0);
2979 addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
2980 addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
2981 addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
2982 return dst;
2985 /* ... */
2986 default:
2987 break;
2988 } /* switch on the binop */
2989 } /* if (e->tag == Iex_Binop) */
2991 if (e->tag == Iex_Triop) {
2992 IRTriop* triop = e->Iex.Triop.details;
2993 ARM64VecBinOp vecbop = ARM64vecb_INVALID;
2994 switch (triop->op) {
2995 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
2996 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
2997 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
2998 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
2999 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
3000 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
3001 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
3002 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
3003 default: break;
3005 if (vecbop != ARM64vecb_INVALID) {
3006 HReg argL = iselV128Expr(env, triop->arg2);
3007 HReg argR = iselV128Expr(env, triop->arg3);
3008 HReg dst = newVRegV(env);
3009 set_FPCR_rounding_mode(env, triop->arg1);
3010 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
3011 return dst;
3014 if (triop->op == Iop_SliceV128) {
3015 /* Note that, compared to ShlV128/ShrV128 just above, the shift
3016 amount here is in bytes, not bits. */
3017 IRExpr* argHi = triop->arg1;
3018 IRExpr* argLo = triop->arg2;
3019 IRExpr* argAmt = triop->arg3;
3020 if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
3021 UInt amt = argAmt->Iex.Const.con->Ico.U8;
3022 Bool amtOK = amt >= 1 && amt <= 15;
3023 /* We could also deal with amt==0 by copying argLO to
3024 the destination, but there's no need for that so far. */
3025 if (amtOK) {
3026 HReg srcHi = iselV128Expr(env, argHi);
3027 HReg srcLo = iselV128Expr(env, argLo);
3028 HReg dst = newVRegV(env);
3029 addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
3030 return dst;
3033 /* else fall out; this is unhandled */
3036 } /* if (e->tag == Iex_Triop) */
3038 if (e->tag == Iex_ITE) {
3039 // This code sequence is pretty feeble. We'd do better to generate BSL
3040 // here.
3041 HReg rX = newVRegI(env);
3043 ARM64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3044 addInstr(env, ARM64Instr_Set64(rX, cc));
3045 // cond: rX = 1 !cond: rX = 0
3047 // Mask the Set64 result. This is paranoia (should be unnecessary).
3048 ARM64RIL* one = mb_mkARM64RIL_I(1);
3049 vassert(one);
3050 addInstr(env, ARM64Instr_Logic(rX, rX, one, ARM64lo_AND));
3051 // cond: rX = 1 !cond: rX = 0
3053 // Propagate to all bits in the 64 bit word by subtracting 1 from it.
3054 // This also inverts the sense of the value.
3055 addInstr(env, ARM64Instr_Arith(rX, rX, ARM64RIA_I12(1,0),
3056 /*isAdd=*/False));
3057 // cond: rX = 0-(62)-0 !cond: rX = 1-(62)-1
3059 // Duplicate rX into a vector register
3060 HReg vMask = newVRegV(env);
3061 addInstr(env, ARM64Instr_VQfromXX(vMask, rX, rX));
3062 // cond: vMask = 0-(126)-0 !cond: vMask = 1-(126)-1
3064 HReg vIfTrue = iselV128Expr(env, e->Iex.ITE.iftrue);
3065 HReg vIfFalse = iselV128Expr(env, e->Iex.ITE.iffalse);
3067 // Mask out iffalse value as needed
3068 addInstr(env,
3069 ARM64Instr_VBinV(ARM64vecb_AND, vIfFalse, vIfFalse, vMask));
3071 // Invert the mask so we can use it for the iftrue value
3072 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, vMask, vMask));
3073 // cond: vMask = 1-(126)-1 !cond: vMask = 0-(126)-0
3075 // Mask out iftrue value as needed
3076 addInstr(env,
3077 ARM64Instr_VBinV(ARM64vecb_AND, vIfTrue, vIfTrue, vMask));
3079 // Merge the masked iftrue and iffalse results.
3080 HReg res = newVRegV(env);
3081 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ORR, res, vIfTrue, vIfFalse));
3083 return res;
3086 v128_expr_bad:
3087 ppIRExpr(e);
3088 vpanic("iselV128Expr_wrk");
3092 /*---------------------------------------------------------*/
3093 /*--- ISEL: Floating point expressions (64 bit) ---*/
3094 /*---------------------------------------------------------*/
3096 /* Compute a 64-bit floating point value into a register, the identity
3097 of which is returned. As with iselIntExpr_R, the reg may be either
3098 real or virtual; in any case it must not be changed by subsequent
3099 code emitted by the caller. */
3101 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
3103 HReg r = iselDblExpr_wrk( env, e );
3104 # if 0
3105 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3106 # endif
3107 vassert(hregClass(r) == HRcFlt64);
3108 vassert(hregIsVirtual(r));
3109 return r;
3112 /* DO NOT CALL THIS DIRECTLY */
3113 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3115 IRType ty = typeOfIRExpr(env->type_env,e);
3116 vassert(e);
3117 vassert(ty == Ity_F64);
3119 if (e->tag == Iex_RdTmp) {
3120 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3123 if (e->tag == Iex_Const) {
3124 IRConst* con = e->Iex.Const.con;
3125 if (con->tag == Ico_F64i) {
3126 HReg src = newVRegI(env);
3127 HReg dst = newVRegD(env);
3128 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3129 addInstr(env, ARM64Instr_VDfromX(dst, src));
3130 return dst;
3132 if (con->tag == Ico_F64) {
3133 HReg src = newVRegI(env);
3134 HReg dst = newVRegD(env);
3135 union { Double d64; ULong u64; } u;
3136 vassert(sizeof(u) == 8);
3137 u.d64 = con->Ico.F64;
3138 addInstr(env, ARM64Instr_Imm64(src, u.u64));
3139 addInstr(env, ARM64Instr_VDfromX(dst, src));
3140 return dst;
3144 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3145 vassert(e->Iex.Load.ty == Ity_F64);
3146 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3147 HReg res = newVRegD(env);
3148 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3149 return res;
3152 if (e->tag == Iex_Get) {
3153 Int offs = e->Iex.Get.offset;
3154 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3155 HReg rD = newVRegD(env);
3156 HReg rN = get_baseblock_register();
3157 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3158 return rD;
3162 if (e->tag == Iex_Unop) {
3163 switch (e->Iex.Unop.op) {
3164 case Iop_NegF64: {
3165 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3166 HReg dst = newVRegD(env);
3167 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3168 return dst;
3170 case Iop_AbsF64: {
3171 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3172 HReg dst = newVRegD(env);
3173 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3174 return dst;
3176 case Iop_F32toF64: {
3177 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3178 HReg dst = newVRegD(env);
3179 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3180 return dst;
3182 case Iop_F16toF64: {
3183 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3184 HReg dst = newVRegD(env);
3185 addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3186 return dst;
3188 case Iop_I32UtoF64:
3189 case Iop_I32StoF64: {
3190 /* Rounding mode is not involved here, since the
3191 conversion can always be done without loss of
3192 precision. */
3193 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3194 HReg dst = newVRegD(env);
3195 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3196 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3197 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3198 return dst;
3200 default:
3201 break;
3205 if (e->tag == Iex_Binop) {
3206 switch (e->Iex.Binop.op) {
3207 case Iop_RoundF64toInt:
3208 case Iop_SqrtF64:
3209 case Iop_RecpExpF64: {
3210 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3211 HReg dst = newVRegD(env);
3212 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3213 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3214 switch (e->Iex.Binop.op) {
3215 case Iop_RoundF64toInt: op = ARM64fpu_RINT; break;
3216 case Iop_SqrtF64: op = ARM64fpu_SQRT; break;
3217 case Iop_RecpExpF64: op = ARM64fpu_RECPX; break;
3218 default: vassert(0);
3220 addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3221 return dst;
3223 case Iop_I64StoF64:
3224 case Iop_I64UtoF64: {
3225 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3226 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3227 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3228 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3229 HReg dstS = newVRegD(env);
3230 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3231 return dstS;
3233 default:
3234 break;
3238 if (e->tag == Iex_Triop) {
3239 IRTriop* triop = e->Iex.Triop.details;
3240 ARM64FpBinOp dblop = ARM64fpb_INVALID;
3241 switch (triop->op) {
3242 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3243 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3244 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3245 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3246 default: break;
3248 if (dblop != ARM64fpb_INVALID) {
3249 HReg argL = iselDblExpr(env, triop->arg2);
3250 HReg argR = iselDblExpr(env, triop->arg3);
3251 HReg dst = newVRegD(env);
3252 set_FPCR_rounding_mode(env, triop->arg1);
3253 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3254 return dst;
3258 if (e->tag == Iex_ITE) {
3259 /* ITE(ccexpr, iftrue, iffalse) */
3260 ARM64CondCode cc;
3261 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3262 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3263 HReg dst = newVRegD(env);
3264 cc = iselCondCode(env, e->Iex.ITE.cond);
3265 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3266 return dst;
3269 ppIRExpr(e);
3270 vpanic("iselDblExpr_wrk");
3274 /*---------------------------------------------------------*/
3275 /*--- ISEL: Floating point expressions (32 bit) ---*/
3276 /*---------------------------------------------------------*/
3278 /* Compute a 32-bit floating point value into a register, the identity
3279 of which is returned. As with iselIntExpr_R, the reg may be either
3280 real or virtual; in any case it must not be changed by subsequent
3281 code emitted by the caller. Values are generated into HRcFlt64
3282 registers despite the values themselves being Ity_F32s. */
3284 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3286 HReg r = iselFltExpr_wrk( env, e );
3287 # if 0
3288 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3289 # endif
3290 vassert(hregClass(r) == HRcFlt64);
3291 vassert(hregIsVirtual(r));
3292 return r;
3295 /* DO NOT CALL THIS DIRECTLY */
3296 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3298 IRType ty = typeOfIRExpr(env->type_env,e);
3299 vassert(e);
3300 vassert(ty == Ity_F32);
3302 if (e->tag == Iex_RdTmp) {
3303 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3306 if (e->tag == Iex_Const) {
3307 /* This is something of a kludge. Since a 32 bit floating point
3308 zero is just .. all zeroes, just create a 64 bit zero word
3309 and transfer it. This avoids having to create a SfromW
3310 instruction for this specific case. */
3311 IRConst* con = e->Iex.Const.con;
3312 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3313 HReg src = newVRegI(env);
3314 HReg dst = newVRegD(env);
3315 addInstr(env, ARM64Instr_Imm64(src, 0));
3316 addInstr(env, ARM64Instr_VDfromX(dst, src));
3317 return dst;
3319 if (con->tag == Ico_F32) {
3320 HReg src = newVRegI(env);
3321 HReg dst = newVRegD(env);
3322 union { Float f32; UInt u32; } u;
3323 vassert(sizeof(u) == 4);
3324 u.f32 = con->Ico.F32;
3325 addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3326 addInstr(env, ARM64Instr_VDfromX(dst, src));
3327 return dst;
3331 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3332 vassert(e->Iex.Load.ty == Ity_F32);
3333 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3334 HReg res = newVRegD(env);
3335 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3336 return res;
3339 if (e->tag == Iex_Get) {
3340 Int offs = e->Iex.Get.offset;
3341 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3342 HReg rD = newVRegD(env);
3343 HReg rN = get_baseblock_register();
3344 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3345 return rD;
3349 if (e->tag == Iex_Unop) {
3350 switch (e->Iex.Unop.op) {
3351 case Iop_NegF32: {
3352 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3353 HReg dst = newVRegD(env);
3354 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3355 return dst;
3357 case Iop_AbsF32: {
3358 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3359 HReg dst = newVRegD(env);
3360 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3361 return dst;
3363 case Iop_F16toF32: {
3364 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3365 HReg dst = newVRegD(env);
3366 addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3367 return dst;
3369 default:
3370 break;
3374 if (e->tag == Iex_Binop) {
3375 switch (e->Iex.Binop.op) {
3376 case Iop_RoundF32toInt:
3377 case Iop_SqrtF32:
3378 case Iop_RecpExpF32: {
3379 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3380 HReg dst = newVRegD(env);
3381 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3382 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3383 switch (e->Iex.Binop.op) {
3384 case Iop_RoundF32toInt: op = ARM64fpu_RINT; break;
3385 case Iop_SqrtF32: op = ARM64fpu_SQRT; break;
3386 case Iop_RecpExpF32: op = ARM64fpu_RECPX; break;
3387 default: vassert(0);
3389 addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3390 return dst;
3392 case Iop_F64toF32: {
3393 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3394 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3395 HReg dstS = newVRegD(env);
3396 addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3397 return dstS;
3399 case Iop_I32UtoF32:
3400 case Iop_I32StoF32:
3401 case Iop_I64UtoF32:
3402 case Iop_I64StoF32: {
3403 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3404 switch (e->Iex.Binop.op) {
3405 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3406 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3407 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3408 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3409 default: vassert(0);
3411 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3412 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3413 HReg dstS = newVRegD(env);
3414 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3415 return dstS;
3417 default:
3418 break;
3422 if (e->tag == Iex_Triop) {
3423 IRTriop* triop = e->Iex.Triop.details;
3424 ARM64FpBinOp sglop = ARM64fpb_INVALID;
3425 switch (triop->op) {
3426 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3427 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3428 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3429 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3430 default: break;
3432 if (sglop != ARM64fpb_INVALID) {
3433 HReg argL = iselFltExpr(env, triop->arg2);
3434 HReg argR = iselFltExpr(env, triop->arg3);
3435 HReg dst = newVRegD(env);
3436 set_FPCR_rounding_mode(env, triop->arg1);
3437 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3438 return dst;
3442 if (e->tag == Iex_ITE) {
3443 /* ITE(ccexpr, iftrue, iffalse) */
3444 ARM64CondCode cc;
3445 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
3446 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
3447 HReg dst = newVRegD(env);
3448 cc = iselCondCode(env, e->Iex.ITE.cond);
3449 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3450 return dst;
3453 ppIRExpr(e);
3454 vpanic("iselFltExpr_wrk");
3458 /*---------------------------------------------------------*/
3459 /*--- ISEL: Floating point expressions (16 bit) ---*/
3460 /*---------------------------------------------------------*/
3462 /* Compute a 16-bit floating point value into a register, the identity
3463 of which is returned. As with iselIntExpr_R, the reg may be either
3464 real or virtual; in any case it must not be changed by subsequent
3465 code emitted by the caller. Values are generated into HRcFlt64
3466 registers despite the values themselves being Ity_F16s. */
3468 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3470 HReg r = iselF16Expr_wrk( env, e );
3471 # if 0
3472 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3473 # endif
3474 vassert(hregClass(r) == HRcFlt64);
3475 vassert(hregIsVirtual(r));
3476 return r;
3479 /* DO NOT CALL THIS DIRECTLY */
3480 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3482 IRType ty = typeOfIRExpr(env->type_env,e);
3483 vassert(e);
3484 vassert(ty == Ity_F16);
3486 if (e->tag == Iex_RdTmp) {
3487 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3490 if (e->tag == Iex_Get) {
3491 Int offs = e->Iex.Get.offset;
3492 if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3493 HReg rD = newVRegD(env);
3494 HReg rN = get_baseblock_register();
3495 addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3496 return rD;
3500 if (e->tag == Iex_Binop) {
3501 switch (e->Iex.Binop.op) {
3502 case Iop_F32toF16: {
3503 HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3504 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3505 HReg dstH = newVRegD(env);
3506 addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3507 return dstH;
3509 case Iop_F64toF16: {
3510 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3511 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3512 HReg dstH = newVRegD(env);
3513 addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3514 return dstH;
3516 default:
3517 break;
3521 ppIRExpr(e);
3522 vpanic("iselF16Expr_wrk");
3526 /*---------------------------------------------------------*/
3527 /*--- ISEL: Vector expressions (256 bit) ---*/
3528 /*---------------------------------------------------------*/
3530 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3531 ISelEnv* env, IRExpr* e )
3533 iselV256Expr_wrk( rHi, rLo, env, e );
3534 vassert(hregClass(*rHi) == HRcVec128);
3535 vassert(hregClass(*rLo) == HRcVec128);
3536 vassert(hregIsVirtual(*rHi));
3537 vassert(hregIsVirtual(*rLo));
3540 /* DO NOT CALL THIS DIRECTLY */
3541 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3542 ISelEnv* env, IRExpr* e )
3544 vassert(e);
3545 IRType ty = typeOfIRExpr(env->type_env,e);
3546 vassert(ty == Ity_V256);
3548 /* read 256-bit IRTemp */
3549 if (e->tag == Iex_RdTmp) {
3550 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3551 return;
3554 if (e->tag == Iex_Binop) {
3555 switch (e->Iex.Binop.op) {
3556 case Iop_V128HLtoV256: {
3557 *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3558 *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3559 return;
3561 case Iop_QandSQsh64x2:
3562 case Iop_QandSQsh32x4:
3563 case Iop_QandSQsh16x8:
3564 case Iop_QandSQsh8x16:
3565 case Iop_QandUQsh64x2:
3566 case Iop_QandUQsh32x4:
3567 case Iop_QandUQsh16x8:
3568 case Iop_QandUQsh8x16:
3569 case Iop_QandSQRsh64x2:
3570 case Iop_QandSQRsh32x4:
3571 case Iop_QandSQRsh16x8:
3572 case Iop_QandSQRsh8x16:
3573 case Iop_QandUQRsh64x2:
3574 case Iop_QandUQRsh32x4:
3575 case Iop_QandUQRsh16x8:
3576 case Iop_QandUQRsh8x16:
3578 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
3579 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
3580 HReg fpsr = newVRegI(env);
3581 HReg resHi = newVRegV(env);
3582 HReg resLo = newVRegV(env);
3583 ARM64VecBinOp op = ARM64vecb_INVALID;
3584 switch (e->Iex.Binop.op) {
3585 case Iop_QandSQsh64x2: op = ARM64vecb_SQSHL64x2; break;
3586 case Iop_QandSQsh32x4: op = ARM64vecb_SQSHL32x4; break;
3587 case Iop_QandSQsh16x8: op = ARM64vecb_SQSHL16x8; break;
3588 case Iop_QandSQsh8x16: op = ARM64vecb_SQSHL8x16; break;
3589 case Iop_QandUQsh64x2: op = ARM64vecb_UQSHL64x2; break;
3590 case Iop_QandUQsh32x4: op = ARM64vecb_UQSHL32x4; break;
3591 case Iop_QandUQsh16x8: op = ARM64vecb_UQSHL16x8; break;
3592 case Iop_QandUQsh8x16: op = ARM64vecb_UQSHL8x16; break;
3593 case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3594 case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3595 case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3596 case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3597 case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3598 case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3599 case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3600 case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3601 default: vassert(0);
3603 /* Clear FPSR.Q, do the operation, and return both its result
3604 and the new value of FPSR.Q. We can simply zero out FPSR
3605 since all the other bits have no relevance in VEX generated
3606 code. */
3607 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3608 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3609 addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3610 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3611 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3612 ARM64sh_SHR));
3613 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3614 vassert(ril_one);
3615 addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3616 /* Now we have: the main (shift) result in |resLo|, and the
3617 Q bit at the bottom of |fpsr|. */
3618 addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3619 *rHi = resHi;
3620 *rLo = resLo;
3621 return;
3624 /* ... */
3625 default:
3626 break;
3627 } /* switch on the binop */
3628 } /* if (e->tag == Iex_Binop) */
3630 ppIRExpr(e);
3631 vpanic("iselV256Expr_wrk");
3635 /*---------------------------------------------------------*/
3636 /*--- ISEL: Statements ---*/
3637 /*---------------------------------------------------------*/
3639 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3641 if (vex_traceflags & VEX_TRACE_VCODE) {
3642 vex_printf("\n-- ");
3643 ppIRStmt(stmt);
3644 vex_printf("\n");
3646 switch (stmt->tag) {
3648 /* --------- STORE --------- */
3649 /* little-endian write to memory */
3650 case Ist_Store: {
3651 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3652 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3653 IREndness end = stmt->Ist.Store.end;
3655 if (tya != Ity_I64 || end != Iend_LE)
3656 goto stmt_fail;
3658 if (tyd == Ity_I64) {
3659 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3660 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3661 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3662 return;
3664 if (tyd == Ity_I32) {
3665 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3666 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3667 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3668 return;
3670 if (tyd == Ity_I16) {
3671 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3672 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3673 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3674 return;
3676 if (tyd == Ity_I8) {
3677 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3678 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3679 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3680 return;
3682 if (tyd == Ity_V128) {
3683 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
3684 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3685 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3686 return;
3688 if (tyd == Ity_F64) {
3689 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
3690 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3691 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
3692 return;
3694 if (tyd == Ity_F32) {
3695 HReg sD = iselFltExpr(env, stmt->Ist.Store.data);
3696 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3697 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
3698 return;
3700 break;
3703 /* --------- PUT --------- */
3704 /* write guest state, fixed offset */
3705 case Ist_Put: {
3706 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3707 UInt offs = (UInt)stmt->Ist.Put.offset;
3708 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
3709 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3710 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
3711 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3712 return;
3714 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
3715 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3716 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
3717 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3718 return;
3720 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
3721 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3722 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
3723 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3724 return;
3726 if (tyd == Ity_I8 && offs < (1<<12)) {
3727 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3728 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
3729 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3730 return;
3732 if (tyd == Ity_V128 && offs < (1<<12)) {
3733 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
3734 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
3735 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3736 return;
3738 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
3739 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
3740 HReg bbp = get_baseblock_register();
3741 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
3742 return;
3744 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
3745 HReg sD = iselFltExpr(env, stmt->Ist.Put.data);
3746 HReg bbp = get_baseblock_register();
3747 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
3748 return;
3750 if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
3751 HReg hD = iselF16Expr(env, stmt->Ist.Put.data);
3752 HReg bbp = get_baseblock_register();
3753 addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
3754 return;
3757 break;
3760 /* --------- TMP --------- */
3761 /* assign value to temporary */
3762 case Ist_WrTmp: {
3763 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3764 IRType ty = typeOfIRTemp(env->type_env, tmp);
3766 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3767 /* We could do a lot better here. But for the time being: */
3768 HReg dst = lookupIRTemp(env, tmp);
3769 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
3770 addInstr(env, ARM64Instr_MovI(dst, rD));
3771 return;
3773 if (ty == Ity_I1) {
3774 /* Here, we are generating a I1 value into a 64 bit register.
3775 Make sure the value in the register is only zero or one,
3776 but no other. This allows optimisation of the
3777 1Uto64(tmp:I1) case, by making it simply a copy of the
3778 register holding 'tmp'. The point being that the value in
3779 the register holding 'tmp' can only have been created
3780 here. LATER: that seems dangerous; safer to do 'tmp & 1'
3781 in that case. Also, could do this just with a single CINC
3782 insn. */
3783 /* CLONE-01 */
3784 HReg zero = newVRegI(env);
3785 HReg one = newVRegI(env);
3786 HReg dst = lookupIRTemp(env, tmp);
3787 addInstr(env, ARM64Instr_Imm64(zero, 0));
3788 addInstr(env, ARM64Instr_Imm64(one, 1));
3789 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
3790 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
3791 return;
3793 if (ty == Ity_F64) {
3794 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3795 HReg dst = lookupIRTemp(env, tmp);
3796 addInstr(env, ARM64Instr_VMov(8, dst, src));
3797 return;
3799 if (ty == Ity_F32) {
3800 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3801 HReg dst = lookupIRTemp(env, tmp);
3802 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
3803 return;
3805 if (ty == Ity_F16) {
3806 HReg src = iselF16Expr(env, stmt->Ist.WrTmp.data);
3807 HReg dst = lookupIRTemp(env, tmp);
3808 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
3809 return;
3811 if (ty == Ity_V128) {
3812 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
3813 HReg dst = lookupIRTemp(env, tmp);
3814 addInstr(env, ARM64Instr_VMov(16, dst, src));
3815 return;
3817 if (ty == Ity_V256) {
3818 HReg srcHi, srcLo, dstHi, dstLo;
3819 iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
3820 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
3821 addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
3822 addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
3823 return;
3825 break;
3828 /* --------- Call to DIRTY helper --------- */
3829 /* call complex ("dirty") helper function */
3830 case Ist_Dirty: {
3831 IRDirty* d = stmt->Ist.Dirty.details;
3833 /* Figure out the return type, if any. */
3834 IRType retty = Ity_INVALID;
3835 if (d->tmp != IRTemp_INVALID)
3836 retty = typeOfIRTemp(env->type_env, d->tmp);
3838 Bool retty_ok = False;
3839 switch (retty) {
3840 case Ity_INVALID: /* function doesn't return anything */
3841 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
3842 case Ity_V128:
3843 retty_ok = True; break;
3844 default:
3845 break;
3847 if (!retty_ok)
3848 break; /* will go to stmt_fail: */
3850 /* Marshal args, do the call, and set the return value to 0x555..555
3851 if this is a conditional call that returns a value and the
3852 call is skipped. */
3853 UInt addToSp = 0;
3854 RetLoc rloc = mk_RetLoc_INVALID();
3855 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
3856 vassert(is_sane_RetLoc(rloc));
3858 /* Now figure out what to do with the returned value, if any. */
3859 switch (retty) {
3860 case Ity_INVALID: {
3861 /* No return value. Nothing to do. */
3862 vassert(d->tmp == IRTemp_INVALID);
3863 vassert(rloc.pri == RLPri_None);
3864 vassert(addToSp == 0);
3865 return;
3867 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
3868 vassert(rloc.pri == RLPri_Int);
3869 vassert(addToSp == 0);
3870 /* The returned value is in x0. Park it in the register
3871 associated with tmp. */
3872 HReg dst = lookupIRTemp(env, d->tmp);
3873 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
3874 return;
3876 case Ity_V128: {
3877 /* The returned value is on the stack, and *retloc tells
3878 us where. Fish it off the stack and then move the
3879 stack pointer upwards to clear it, as directed by
3880 doHelperCall. */
3881 vassert(rloc.pri == RLPri_V128SpRel);
3882 vassert(rloc.spOff < 256); // stay sane
3883 vassert(addToSp >= 16); // ditto
3884 vassert(addToSp < 256); // ditto
3885 HReg dst = lookupIRTemp(env, d->tmp);
3886 HReg tmp = newVRegI(env); // the address of the returned value
3887 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
3888 addInstr(env, ARM64Instr_Arith(tmp, tmp,
3889 ARM64RIA_I12((UShort)rloc.spOff, 0),
3890 True/*isAdd*/ ));
3891 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
3892 addInstr(env, ARM64Instr_AddToSP(addToSp));
3893 return;
3895 default:
3896 /*NOTREACHED*/
3897 vassert(0);
3899 break;
3902 /* --------- Load Linked and Store Conditional --------- */
3903 case Ist_LLSC: {
3904 if (stmt->Ist.LLSC.storedata == NULL) {
3905 /* LL */
3906 IRTemp res = stmt->Ist.LLSC.result;
3907 IRType ty = typeOfIRTemp(env->type_env, res);
3908 if (ty == Ity_I64 || ty == Ity_I32
3909 || ty == Ity_I16 || ty == Ity_I8) {
3910 Int szB = 0;
3911 HReg r_dst = lookupIRTemp(env, res);
3912 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3913 switch (ty) {
3914 case Ity_I8: szB = 1; break;
3915 case Ity_I16: szB = 2; break;
3916 case Ity_I32: szB = 4; break;
3917 case Ity_I64: szB = 8; break;
3918 default: vassert(0);
3920 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
3921 addInstr(env, ARM64Instr_LdrEX(szB));
3922 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
3923 return;
3925 goto stmt_fail;
3926 } else {
3927 /* SC */
3928 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
3929 if (tyd == Ity_I64 || tyd == Ity_I32
3930 || tyd == Ity_I16 || tyd == Ity_I8) {
3931 Int szB = 0;
3932 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
3933 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3934 switch (tyd) {
3935 case Ity_I8: szB = 1; break;
3936 case Ity_I16: szB = 2; break;
3937 case Ity_I32: szB = 4; break;
3938 case Ity_I64: szB = 8; break;
3939 default: vassert(0);
3941 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
3942 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
3943 addInstr(env, ARM64Instr_StrEX(szB));
3944 } else {
3945 goto stmt_fail;
3947 /* now r0 is 1 if failed, 0 if success. Change to IR
3948 conventions (0 is fail, 1 is success). Also transfer
3949 result to r_res. */
3950 IRTemp res = stmt->Ist.LLSC.result;
3951 IRType ty = typeOfIRTemp(env->type_env, res);
3952 HReg r_res = lookupIRTemp(env, res);
3953 ARM64RIL* one = mb_mkARM64RIL_I(1);
3954 vassert(ty == Ity_I1);
3955 vassert(one);
3956 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
3957 ARM64lo_XOR));
3958 /* And be conservative -- mask off all but the lowest bit. */
3959 addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
3960 ARM64lo_AND));
3961 return;
3963 break;
3966 /* --------- ACAS --------- */
3967 case Ist_CAS: {
3968 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3969 /* "normal" singleton CAS */
3970 UChar sz;
3971 IRCAS* cas = stmt->Ist.CAS.details;
3972 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3973 switch (ty) {
3974 case Ity_I64: sz = 8; break;
3975 case Ity_I32: sz = 4; break;
3976 case Ity_I16: sz = 2; break;
3977 case Ity_I8: sz = 1; break;
3978 default: goto unhandled_cas;
3980 HReg rAddr = iselIntExpr_R(env, cas->addr);
3981 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
3982 HReg rData = iselIntExpr_R(env, cas->dataLo);
3983 vassert(cas->expdHi == NULL);
3984 vassert(cas->dataHi == NULL);
3985 addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rAddr));
3986 addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd));
3987 addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData));
3988 addInstr(env, ARM64Instr_CAS(sz));
3989 /* Now we have the lowest szB bytes of x1 are either equal to
3990 the lowest szB bytes of x5, indicating success, or they
3991 aren't, indicating failure. */
3992 HReg rResult = hregARM64_X1();
3993 switch (sz) {
3994 case 8: break;
3995 case 4: rResult = widen_z_32_to_64(env, rResult); break;
3996 case 2: rResult = widen_z_16_to_64(env, rResult); break;
3997 case 1: rResult = widen_z_8_to_64(env, rResult); break;
3998 default: vassert(0);
4000 // "old" in this case is interpreted somewhat liberally, per
4001 // the previous comment.
4002 HReg rOld = lookupIRTemp(env, cas->oldLo);
4003 addInstr(env, ARM64Instr_MovI(rOld, rResult));
4004 return;
4006 else {
4007 /* Paired register CAS, i.e. CASP */
4008 UChar sz;
4009 IRCAS* cas = stmt->Ist.CAS.details;
4010 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4011 switch (ty) {
4012 case Ity_I64: sz = 8; break;
4013 case Ity_I32: sz = 4; break;
4014 default: goto unhandled_cas;
4016 HReg rAddr = iselIntExpr_R(env, cas->addr);
4018 HReg rExpd0 = iselIntExpr_R(env, cas->expdLo);
4019 vassert(cas->expdHi != NULL);
4020 HReg rExpd1 = iselIntExpr_R(env, cas->expdHi);
4022 HReg rData0 = iselIntExpr_R(env, cas->dataLo);
4023 vassert(cas->dataHi != NULL);
4024 HReg rData1 = iselIntExpr_R(env, cas->dataHi);
4026 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rAddr));
4028 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rExpd0));
4029 addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd1));
4031 addInstr(env, ARM64Instr_MovI(hregARM64_X6(), rData0));
4032 addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData1));
4034 addInstr(env, ARM64Instr_CASP(sz));
4036 HReg rResult0 = hregARM64_X0();
4037 HReg rResult1 = hregARM64_X1();
4038 switch (sz) {
4039 case 8: break;
4040 case 4: rResult0 = widen_z_32_to_64(env, rResult0);
4041 rResult1 = widen_z_32_to_64(env, rResult1);
4042 break;
4043 default: vassert(0);
4045 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4046 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4047 addInstr(env, ARM64Instr_MovI(rOldLo, rResult0));
4048 addInstr(env, ARM64Instr_MovI(rOldHi, rResult1));
4049 return;
4051 unhandled_cas:
4052 break;
4055 /* --------- MEM FENCE --------- */
4056 case Ist_MBE:
4057 switch (stmt->Ist.MBE.event) {
4058 case Imbe_Fence:
4059 addInstr(env, ARM64Instr_MFence());
4060 return;
4061 case Imbe_CancelReservation:
4062 addInstr(env, ARM64Instr_ClrEX());
4063 return;
4064 default:
4065 break;
4067 break;
4069 /* --------- INSTR MARK --------- */
4070 /* Doesn't generate any executable code ... */
4071 case Ist_IMark:
4072 return;
4074 /* --------- ABI HINT --------- */
4075 /* These have no meaning (denotation in the IR) and so we ignore
4076 them ... if any actually made it this far. */
4077 case Ist_AbiHint:
4078 return;
4080 /* --------- NO-OP --------- */
4081 case Ist_NoOp:
4082 return;
4084 /* --------- EXIT --------- */
4085 case Ist_Exit: {
4086 if (stmt->Ist.Exit.dst->tag != Ico_U64)
4087 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
4089 ARM64CondCode cc
4090 = iselCondCode(env, stmt->Ist.Exit.guard);
4091 ARM64AMode* amPC
4092 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
4094 /* Case: boring transfer to known address */
4095 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4096 if (env->chainingAllowed) {
4097 /* .. almost always true .. */
4098 /* Skip the event check at the dst if this is a forwards
4099 edge. */
4100 Bool toFastEP
4101 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4102 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4103 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4104 amPC, cc, toFastEP));
4105 } else {
4106 /* .. very occasionally .. */
4107 /* We can't use chaining, so ask for an assisted transfer,
4108 as that's the only alternative that is allowable. */
4109 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4110 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
4112 return;
4115 /* Case: assisted transfer to arbitrary address */
4116 switch (stmt->Ist.Exit.jk) {
4117 /* Keep this list in sync with that for iselNext below */
4118 case Ijk_ClientReq:
4119 case Ijk_NoDecode:
4120 case Ijk_NoRedir:
4121 case Ijk_Sys_syscall:
4122 case Ijk_InvalICache:
4123 case Ijk_FlushDCache:
4124 case Ijk_SigTRAP:
4125 case Ijk_Yield: {
4126 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4127 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
4128 stmt->Ist.Exit.jk));
4129 return;
4131 default:
4132 break;
4135 /* Do we ever expect to see any other kind? */
4136 goto stmt_fail;
4139 default: break;
4141 stmt_fail:
4142 ppIRStmt(stmt);
4143 vpanic("iselStmt");
4147 /*---------------------------------------------------------*/
4148 /*--- ISEL: Basic block terminators (Nexts) ---*/
4149 /*---------------------------------------------------------*/
4151 static void iselNext ( ISelEnv* env,
4152 IRExpr* next, IRJumpKind jk, Int offsIP )
4154 if (vex_traceflags & VEX_TRACE_VCODE) {
4155 vex_printf( "\n-- PUT(%d) = ", offsIP);
4156 ppIRExpr( next );
4157 vex_printf( "; exit-");
4158 ppIRJumpKind(jk);
4159 vex_printf( "\n");
4162 /* Case: boring transfer to known address */
4163 if (next->tag == Iex_Const) {
4164 IRConst* cdst = next->Iex.Const.con;
4165 vassert(cdst->tag == Ico_U64);
4166 if (jk == Ijk_Boring || jk == Ijk_Call) {
4167 /* Boring transfer to known address */
4168 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4169 if (env->chainingAllowed) {
4170 /* .. almost always true .. */
4171 /* Skip the event check at the dst if this is a forwards
4172 edge. */
4173 Bool toFastEP
4174 = ((Addr64)cdst->Ico.U64) > env->max_ga;
4175 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4176 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
4177 amPC, ARM64cc_AL,
4178 toFastEP));
4179 } else {
4180 /* .. very occasionally .. */
4181 /* We can't use chaining, so ask for an assisted transfer,
4182 as that's the only alternative that is allowable. */
4183 HReg r = iselIntExpr_R(env, next);
4184 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4185 Ijk_Boring));
4187 return;
4191 /* Case: call/return (==boring) transfer to any address */
4192 switch (jk) {
4193 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4194 HReg r = iselIntExpr_R(env, next);
4195 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4196 if (env->chainingAllowed) {
4197 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
4198 } else {
4199 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4200 Ijk_Boring));
4202 return;
4204 default:
4205 break;
4208 /* Case: assisted transfer to arbitrary address */
4209 switch (jk) {
4210 /* Keep this list in sync with that for Ist_Exit above */
4211 case Ijk_ClientReq:
4212 case Ijk_NoDecode:
4213 case Ijk_NoRedir:
4214 case Ijk_Sys_syscall:
4215 case Ijk_InvalICache:
4216 case Ijk_FlushDCache:
4217 case Ijk_SigTRAP:
4218 case Ijk_Yield:
4220 HReg r = iselIntExpr_R(env, next);
4221 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4222 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
4223 return;
4225 default:
4226 break;
4229 vex_printf( "\n-- PUT(%d) = ", offsIP);
4230 ppIRExpr( next );
4231 vex_printf( "; exit-");
4232 ppIRJumpKind(jk);
4233 vex_printf( "\n");
4234 vassert(0); // are we expecting any other kind?
4238 /*---------------------------------------------------------*/
4239 /*--- Insn selector top-level ---*/
4240 /*---------------------------------------------------------*/
4242 /* Translate an entire SB to arm64 code. */
4244 HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4245 VexArch arch_host,
4246 const VexArchInfo* archinfo_host,
4247 const VexAbiInfo* vbi/*UNUSED*/,
4248 Int offs_Host_EvC_Counter,
4249 Int offs_Host_EvC_FailAddr,
4250 Bool chainingAllowed,
4251 Bool addProfInc,
4252 Addr max_ga )
4254 Int i, j;
4255 HReg hreg, hregHI;
4256 ISelEnv* env;
4257 UInt hwcaps_host = archinfo_host->hwcaps;
4258 ARM64AMode *amCounter, *amFailAddr;
4260 /* sanity ... */
4261 vassert(arch_host == VexArchARM64);
4263 /* Check that the host's endianness is as expected. */
4264 vassert(archinfo_host->endness == VexEndnessLE);
4266 /* guard against unexpected space regressions */
4267 vassert(sizeof(ARM64Instr) <= 32);
4269 /* Make up an initial environment to use. */
4270 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4271 env->vreg_ctr = 0;
4273 /* Set up output code array. */
4274 env->code = newHInstrArray();
4276 /* Copy BB's type env. */
4277 env->type_env = bb->tyenv;
4279 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4280 change as we go along. */
4281 env->n_vregmap = bb->tyenv->types_used;
4282 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4283 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4285 /* and finally ... */
4286 env->chainingAllowed = chainingAllowed;
4287 env->hwcaps = hwcaps_host;
4288 env->previous_rm = NULL;
4289 env->max_ga = max_ga;
4291 /* For each IR temporary, allocate a suitably-kinded virtual
4292 register. */
4293 j = 0;
4294 for (i = 0; i < env->n_vregmap; i++) {
4295 hregHI = hreg = INVALID_HREG;
4296 switch (bb->tyenv->types[i]) {
4297 case Ity_I1:
4298 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4299 hreg = mkHReg(True, HRcInt64, 0, j++);
4300 break;
4301 case Ity_I128:
4302 hreg = mkHReg(True, HRcInt64, 0, j++);
4303 hregHI = mkHReg(True, HRcInt64, 0, j++);
4304 break;
4305 case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4306 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4307 case Ity_F64:
4308 hreg = mkHReg(True, HRcFlt64, 0, j++);
4309 break;
4310 case Ity_V128:
4311 hreg = mkHReg(True, HRcVec128, 0, j++);
4312 break;
4313 case Ity_V256:
4314 hreg = mkHReg(True, HRcVec128, 0, j++);
4315 hregHI = mkHReg(True, HRcVec128, 0, j++);
4316 break;
4317 default:
4318 ppIRType(bb->tyenv->types[i]);
4319 vpanic("iselBB(arm64): IRTemp type");
4321 env->vregmap[i] = hreg;
4322 env->vregmapHI[i] = hregHI;
4324 env->vreg_ctr = j;
4326 /* The very first instruction must be an event check. */
4327 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4328 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4329 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4331 /* Possibly a block counter increment (for profiling). At this
4332 point we don't know the address of the counter, so just pretend
4333 it is zero. It will have to be patched later, but before this
4334 translation is used, by a call to LibVEX_patchProfCtr. */
4335 if (addProfInc) {
4336 addInstr(env, ARM64Instr_ProfInc());
4339 /* Ok, finally we can iterate over the statements. */
4340 for (i = 0; i < bb->stmts_used; i++)
4341 iselStmt(env, bb->stmts[i]);
4343 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4345 /* record the number of vregs we used. */
4346 env->code->n_vregs = env->vreg_ctr;
4347 return env->code;
4351 /*---------------------------------------------------------------*/
4352 /*--- end host_arm64_isel.c ---*/
4353 /*---------------------------------------------------------------*/