FreeBSD regtest: add _write to none filter_fdleak
[valgrind.git] / VEX / priv / host_arm64_isel.c
blob34c526559010b695e0e5a93ac154ad595d99dd06
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_isel.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "libvex_basictypes.h"
30 #include "libvex_ir.h"
31 #include "libvex.h"
32 #include "ir_match.h"
34 #include "main_util.h"
35 #include "main_globals.h"
36 #include "host_generic_regs.h"
37 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
38 #include "host_arm64_defs.h"
41 /*---------------------------------------------------------*/
42 /*--- ISelEnv ---*/
43 /*---------------------------------------------------------*/
45 /* This carries around:
47 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
48 might encounter. This is computed before insn selection starts,
49 and does not change.
51 - A mapping from IRTemp to HReg. This tells the insn selector
52 which virtual register is associated with each IRTemp temporary.
53 This is computed before insn selection starts, and does not
54 change. We expect this mapping to map precisely the same set of
55 IRTemps as the type mapping does.
57 |vregmap| holds the primary register for the IRTemp.
58 |vregmapHI| is only used for 128-bit integer-typed
59 IRTemps. It holds the identity of a second
60 64-bit virtual HReg, which holds the high half
61 of the value.
63 - The code array, that is, the insns selected so far.
65 - A counter, for generating new virtual registers.
67 - The host hardware capabilities word. This is set at the start
68 and does not change.
70 - A Bool for indicating whether we may generate chain-me
71 instructions for control flow transfers, or whether we must use
72 XAssisted.
74 - The maximum guest address of any guest insn in this block.
75 Actually, the address of the highest-addressed byte from any insn
76 in this block. Is set at the start and does not change. This is
77 used for detecting jumps which are definitely forward-edges from
78 this block, and therefore can be made (chained) to the fast entry
79 point of the destination, thereby avoiding the destination's
80 event check.
82 - An IRExpr*, which may be NULL, holding the IR expression (an
83 IRRoundingMode-encoded value) to which the FPU's rounding mode
84 was most recently set. Setting to NULL is always safe. Used to
85 avoid redundant settings of the FPU's rounding mode, as
86 described in set_FPCR_rounding_mode below.
88 Note, this is all (well, mostly) host-independent.
91 typedef
92 struct {
93 /* Constant -- are set at the start and do not change. */
94 IRTypeEnv* type_env;
96 HReg* vregmap;
97 HReg* vregmapHI;
98 Int n_vregmap;
100 UInt hwcaps;
102 Bool chainingAllowed;
103 Addr64 max_ga;
105 /* These are modified as we go along. */
106 HInstrArray* code;
107 Int vreg_ctr;
109 IRExpr* previous_rm;
111 ISelEnv;
113 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
115 vassert(tmp < env->n_vregmap);
116 return env->vregmap[tmp];
119 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
120 ISelEnv* env, IRTemp tmp )
122 vassert(tmp < env->n_vregmap);
123 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
124 *vrLO = env->vregmap[tmp];
125 *vrHI = env->vregmapHI[tmp];
128 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
130 addHInstr(env->code, instr);
131 if (vex_traceflags & VEX_TRACE_VCODE) {
132 ppARM64Instr(instr);
133 vex_printf("\n");
137 static HReg newVRegI ( ISelEnv* env )
139 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
140 env->vreg_ctr++;
141 return reg;
144 static HReg newVRegD ( ISelEnv* env )
146 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
147 env->vreg_ctr++;
148 return reg;
151 static HReg newVRegV ( ISelEnv* env )
153 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
154 env->vreg_ctr++;
155 return reg;
159 /*---------------------------------------------------------*/
160 /*--- ISEL: Forward declarations ---*/
161 /*---------------------------------------------------------*/
163 /* These are organised as iselXXX and iselXXX_wrk pairs. The
164 iselXXX_wrk do the real work, but are not to be called directly.
165 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
166 checks that all returned registers are virtual. You should not
167 call the _wrk version directly.
169 Because some forms of ARM64 memory amodes are implicitly scaled by
170 the access size, iselIntExpr_AMode takes an IRType which tells it
171 the type of the access for which the amode is to be used. This
172 type needs to be correct, else you'll get incorrect code.
174 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
175 IRExpr* e, IRType dty );
176 static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
177 IRExpr* e, IRType dty );
179 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
180 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
182 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
183 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
185 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
186 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
188 static ARM64CondCode iselCondCode_C_wrk ( ISelEnv* env, IRExpr* e );
189 static ARM64CondCode iselCondCode_C ( ISelEnv* env, IRExpr* e );
191 static HReg iselCondCode_R_wrk ( ISelEnv* env, IRExpr* e );
192 static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e );
194 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
195 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
197 static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
198 ISelEnv* env, IRExpr* e );
199 static void iselInt128Expr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
200 ISelEnv* env, IRExpr* e );
202 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
203 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
205 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
206 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
208 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e );
209 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e );
211 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
212 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
214 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
215 ISelEnv* env, IRExpr* e );
216 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
217 ISelEnv* env, IRExpr* e );
219 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
222 /*---------------------------------------------------------*/
223 /*--- ISEL: Misc helpers ---*/
224 /*---------------------------------------------------------*/
226 /* Generate an amode suitable for a 64-bit sized access relative to
227 the baseblock register (X21). This generates an RI12 amode, which
228 means its scaled by the access size, which is why the access size
229 -- 64 bit -- is stated explicitly here. Consequently |off| needs
230 to be divisible by 8. */
231 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
233 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
234 vassert((off & 7) == 0); /* ditto */
235 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
238 /* Ditto, for 32 bit accesses. */
239 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
241 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
242 vassert((off & 3) == 0); /* ditto */
243 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
246 /* Ditto, for 16 bit accesses. */
247 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
249 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
250 vassert((off & 1) == 0); /* ditto */
251 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
254 /* Ditto, for 8 bit accesses. */
255 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
257 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
258 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
261 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
263 vassert(off < (1<<12));
264 HReg r = newVRegI(env);
265 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
266 ARM64RIA_I12(off,0), True/*isAdd*/));
267 return r;
270 static HReg get_baseblock_register ( void )
272 return hregARM64_X21();
275 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
276 a new register, and return the new register. */
277 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
279 HReg dst = newVRegI(env);
280 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
281 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
282 return dst;
285 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
286 a new register, and return the new register. */
287 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
289 HReg dst = newVRegI(env);
290 ARM64RI6* n48 = ARM64RI6_I6(48);
291 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
292 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
293 return dst;
296 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
297 a new register, and return the new register. */
298 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
300 HReg dst = newVRegI(env);
301 ARM64RIL* mask = ARM64RIL_I13(1, 0, 15); /* encodes 0xFFFF */
302 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
303 return dst;
306 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
307 a new register, and return the new register. */
308 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
310 HReg dst = newVRegI(env);
311 ARM64RI6* n32 = ARM64RI6_I6(32);
312 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
313 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
314 return dst;
317 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
318 a new register, and return the new register. */
319 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
321 HReg dst = newVRegI(env);
322 ARM64RI6* n56 = ARM64RI6_I6(56);
323 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
324 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
325 return dst;
328 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
330 HReg dst = newVRegI(env);
331 ARM64RIL* mask = ARM64RIL_I13(1, 0, 7); /* encodes 0xFF */
332 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
333 return dst;
336 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
337 static Bool isZeroU64 ( IRExpr* e ) {
338 if (e->tag != Iex_Const) return False;
339 IRConst* con = e->Iex.Const.con;
340 vassert(con->tag == Ico_U64);
341 return con->Ico.U64 == 0;
345 /*---------------------------------------------------------*/
346 /*--- ISEL: FP rounding mode helpers ---*/
347 /*---------------------------------------------------------*/
349 /* Set the FP rounding mode: 'mode' is an I32-typed expression
350 denoting a value in the range 0 .. 3, indicating a round mode
351 encoded as per type IRRoundingMode -- the first four values only
352 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64
353 FSCR to have the same rounding.
355 For speed & simplicity, we're setting the *entire* FPCR here.
357 Setting the rounding mode is expensive. So this function tries to
358 avoid repeatedly setting the rounding mode to the same thing by
359 first comparing 'mode' to the 'mode' tree supplied in the previous
360 call to this function, if any. (The previous value is stored in
361 env->previous_rm.) If 'mode' is a single IR temporary 't' and
362 env->previous_rm is also just 't', then the setting is skipped.
364 This is safe because of the SSA property of IR: an IR temporary can
365 only be defined once and so will have the same value regardless of
366 where it appears in the block. Cool stuff, SSA.
368 A safety condition: all attempts to set the RM must be aware of
369 this mechanism - by being routed through the functions here.
371 Of course this only helps if blocks where the RM is set more than
372 once and it is set to the same value each time, *and* that value is
373 held in the same IR temporary each time. In order to assure the
374 latter as much as possible, the IR optimiser takes care to do CSE
375 on any block with any sign of floating point activity.
377 static
378 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
380 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
382 /* Do we need to do anything? */
383 if (env->previous_rm
384 && env->previous_rm->tag == Iex_RdTmp
385 && mode->tag == Iex_RdTmp
386 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
387 /* no - setting it to what it was before. */
388 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
389 return;
392 /* No luck - we better set it, and remember what we set it to. */
393 env->previous_rm = mode;
395 /* Only supporting the rounding-mode bits - the rest of FPCR is set
396 to zero - so we can set the whole register at once (faster). */
398 /* This isn't simple, because 'mode' carries an IR rounding
399 encoding, and we need to translate that to an ARM64 FP one:
400 The IR encoding:
401 00 to nearest (the default)
402 10 to +infinity
403 01 to -infinity
404 11 to zero
405 The ARM64 FP encoding:
406 00 to nearest
407 01 to +infinity
408 10 to -infinity
409 11 to zero
410 Easy enough to do; just swap the two bits.
412 HReg irrm = iselIntExpr_R(env, mode);
413 HReg tL = newVRegI(env);
414 HReg tR = newVRegI(env);
415 HReg t3 = newVRegI(env);
416 /* tL = irrm << 1;
417 tR = irrm >> 1; if we're lucky, these will issue together
418 tL &= 2;
419 tR &= 1; ditto
420 t3 = tL | tR;
421 t3 <<= 22;
422 fmxr fpscr, t3
424 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
425 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
426 vassert(ril_one && ril_two);
427 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
428 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
429 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
430 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
431 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
432 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
433 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
437 /*---------------------------------------------------------*/
438 /*--- ISEL: Function call helpers ---*/
439 /*---------------------------------------------------------*/
441 /* Used only in doHelperCall. See big comment in doHelperCall re
442 handling of register-parameter args. This function figures out
443 whether evaluation of an expression might require use of a fixed
444 register. If in doubt return True (safe but suboptimal).
446 static
447 Bool mightRequireFixedRegs ( IRExpr* e )
449 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
450 // These are always "safe" -- either a copy of SP in some
451 // arbitrary vreg, or a copy of x21, respectively.
452 return False;
454 /* Else it's a "normal" expression. */
455 switch (e->tag) {
456 case Iex_RdTmp: case Iex_Const: case Iex_Get:
457 return False;
458 default:
459 return True;
464 /* Do a complete function call. |guard| is a Ity_Bit expression
465 indicating whether or not the call happens. If guard==NULL, the
466 call is unconditional. |retloc| is set to indicate where the
467 return value is after the call. The caller (of this fn) must
468 generate code to add |stackAdjustAfterCall| to the stack pointer
469 after the call is done. Returns True iff it managed to handle this
470 combination of arg/return types, else returns False. */
472 static
473 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
474 /*OUT*/RetLoc* retloc,
475 ISelEnv* env,
476 IRExpr* guard,
477 IRCallee* cee, IRType retTy, IRExpr** args )
479 ARM64CondCode cc;
480 HReg argregs[ARM64_N_ARGREGS];
481 HReg tmpregs[ARM64_N_ARGREGS];
482 Bool go_fast;
483 Int n_args, i, nextArgReg;
484 Addr64 target;
486 vassert(ARM64_N_ARGREGS == 8);
488 /* Set default returns. We'll update them later if needed. */
489 *stackAdjustAfterCall = 0;
490 *retloc = mk_RetLoc_INVALID();
492 /* These are used for cross-checking that IR-level constraints on
493 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
494 UInt nVECRETs = 0;
495 UInt nGSPTRs = 0;
497 /* Marshal args for a call and do the call.
499 This function only deals with a tiny set of possibilities, which
500 cover all helpers in practice. The restrictions are that only
501 arguments in registers are supported, hence only
502 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
503 fact the only supported arg type is I64.
505 The return type can be I{64,32} or V128. In the V128 case, it
506 is expected that |args| will contain the special node
507 IRExpr_VECRET(), in which case this routine generates code to
508 allocate space on the stack for the vector return value. Since
509 we are not passing any scalars on the stack, it is enough to
510 preallocate the return space before marshalling any arguments,
511 in this case.
513 |args| may also contain IRExpr_GSPTR(), in which case the
514 value in x21 is passed as the corresponding argument.
516 Generating code which is both efficient and correct when
517 parameters are to be passed in registers is difficult, for the
518 reasons elaborated in detail in comments attached to
519 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
520 of the method described in those comments.
522 The problem is split into two cases: the fast scheme and the
523 slow scheme. In the fast scheme, arguments are computed
524 directly into the target (real) registers. This is only safe
525 when we can be sure that computation of each argument will not
526 trash any real registers set by computation of any other
527 argument.
529 In the slow scheme, all args are first computed into vregs, and
530 once they are all done, they are moved to the relevant real
531 regs. This always gives correct code, but it also gives a bunch
532 of vreg-to-rreg moves which are usually redundant but are hard
533 for the register allocator to get rid of.
535 To decide which scheme to use, all argument expressions are
536 first examined. If they are all so simple that it is clear they
537 will be evaluated without use of any fixed registers, use the
538 fast scheme, else use the slow scheme. Note also that only
539 unconditional calls may use the fast scheme, since having to
540 compute a condition expression could itself trash real
541 registers.
543 Note this requires being able to examine an expression and
544 determine whether or not evaluation of it might use a fixed
545 register. That requires knowledge of how the rest of this insn
546 selector works. Currently just the following 3 are regarded as
547 safe -- hopefully they cover the majority of arguments in
548 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
551 /* Note that the cee->regparms field is meaningless on ARM64 hosts
552 (since there is only one calling convention) and so we always
553 ignore it. */
555 n_args = 0;
556 for (i = 0; args[i]; i++) {
557 IRExpr* arg = args[i];
558 if (UNLIKELY(arg->tag == Iex_VECRET)) {
559 nVECRETs++;
560 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
561 nGSPTRs++;
563 n_args++;
566 /* If this fails, the IR is ill-formed */
567 vassert(nGSPTRs == 0 || nGSPTRs == 1);
569 /* If we have a VECRET, allocate space on the stack for the return
570 value, and record the stack pointer after that. */
571 HReg r_vecRetAddr = INVALID_HREG;
572 if (nVECRETs == 1) {
573 vassert(retTy == Ity_V128 || retTy == Ity_V256);
574 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
575 r_vecRetAddr = newVRegI(env);
576 addInstr(env, ARM64Instr_AddToSP(-16));
577 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
578 } else {
579 // If either of these fail, the IR is ill-formed
580 vassert(retTy != Ity_V128 && retTy != Ity_V256);
581 vassert(nVECRETs == 0);
584 argregs[0] = hregARM64_X0();
585 argregs[1] = hregARM64_X1();
586 argregs[2] = hregARM64_X2();
587 argregs[3] = hregARM64_X3();
588 argregs[4] = hregARM64_X4();
589 argregs[5] = hregARM64_X5();
590 argregs[6] = hregARM64_X6();
591 argregs[7] = hregARM64_X7();
593 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
594 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
596 /* First decide which scheme (slow or fast) is to be used. First
597 assume the fast scheme, and select slow if any contraindications
598 (wow) appear. */
600 go_fast = True;
602 if (guard) {
603 if (guard->tag == Iex_Const
604 && guard->Iex.Const.con->tag == Ico_U1
605 && guard->Iex.Const.con->Ico.U1 == True) {
606 /* unconditional */
607 } else {
608 /* Not manifestly unconditional -- be conservative. */
609 go_fast = False;
613 if (go_fast) {
614 for (i = 0; i < n_args; i++) {
615 if (mightRequireFixedRegs(args[i])) {
616 go_fast = False;
617 break;
622 if (go_fast) {
623 if (retTy == Ity_V128 || retTy == Ity_V256)
624 go_fast = False;
627 /* At this point the scheme to use has been established. Generate
628 code to get the arg values into the argument rregs. If we run
629 out of arg regs, give up. */
631 if (go_fast) {
633 /* FAST SCHEME */
634 nextArgReg = 0;
636 for (i = 0; i < n_args; i++) {
637 IRExpr* arg = args[i];
639 IRType aTy = Ity_INVALID;
640 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
641 aTy = typeOfIRExpr(env->type_env, args[i]);
643 if (nextArgReg >= ARM64_N_ARGREGS)
644 return False; /* out of argregs */
646 if (aTy == Ity_I64) {
647 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
648 iselIntExpr_R(env, args[i]) ));
649 nextArgReg++;
651 else if (arg->tag == Iex_GSPTR) {
652 vassert(0); //ATC
653 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
654 hregARM64_X21() ));
655 nextArgReg++;
657 else if (arg->tag == Iex_VECRET) {
658 // because of the go_fast logic above, we can't get here,
659 // since vector return values makes us use the slow path
660 // instead.
661 vassert(0);
663 else
664 return False; /* unhandled arg type */
667 /* Fast scheme only applies for unconditional calls. Hence: */
668 cc = ARM64cc_AL;
670 } else {
672 /* SLOW SCHEME; move via temporaries */
673 nextArgReg = 0;
675 for (i = 0; i < n_args; i++) {
676 IRExpr* arg = args[i];
678 IRType aTy = Ity_INVALID;
679 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
680 aTy = typeOfIRExpr(env->type_env, args[i]);
682 if (nextArgReg >= ARM64_N_ARGREGS)
683 return False; /* out of argregs */
685 if (aTy == Ity_I64) {
686 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
687 nextArgReg++;
689 else if (arg->tag == Iex_GSPTR) {
690 vassert(0); //ATC
691 tmpregs[nextArgReg] = hregARM64_X21();
692 nextArgReg++;
694 else if (arg->tag == Iex_VECRET) {
695 vassert(!hregIsInvalid(r_vecRetAddr));
696 tmpregs[nextArgReg] = r_vecRetAddr;
697 nextArgReg++;
699 else
700 return False; /* unhandled arg type */
703 /* Now we can compute the condition. We can't do it earlier
704 because the argument computations could trash the condition
705 codes. Be a bit clever to handle the common case where the
706 guard is 1:Bit. */
707 cc = ARM64cc_AL;
708 if (guard) {
709 if (guard->tag == Iex_Const
710 && guard->Iex.Const.con->tag == Ico_U1
711 && guard->Iex.Const.con->Ico.U1 == True) {
712 /* unconditional -- do nothing */
713 } else {
714 cc = iselCondCode_C( env, guard );
718 /* Move the args to their final destinations. */
719 for (i = 0; i < nextArgReg; i++) {
720 vassert(!(hregIsInvalid(tmpregs[i])));
721 /* None of these insns, including any spill code that might
722 be generated, may alter the condition codes. */
723 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
728 /* Should be assured by checks above */
729 vassert(nextArgReg <= ARM64_N_ARGREGS);
731 /* Do final checks, set the return values, and generate the call
732 instruction proper. */
733 vassert(nGSPTRs == 0 || nGSPTRs == 1);
734 vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
735 vassert(*stackAdjustAfterCall == 0);
736 vassert(is_RetLoc_INVALID(*retloc));
737 switch (retTy) {
738 case Ity_INVALID:
739 /* Function doesn't return a value. */
740 *retloc = mk_RetLoc_simple(RLPri_None);
741 break;
742 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
743 *retloc = mk_RetLoc_simple(RLPri_Int);
744 break;
745 case Ity_V128:
746 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
747 *stackAdjustAfterCall = 16;
748 break;
749 case Ity_V256:
750 vassert(0); // ATC
751 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
752 *stackAdjustAfterCall = 32;
753 break;
754 default:
755 /* IR can denote other possible return types, but we don't
756 handle those here. */
757 vassert(0);
760 /* Finally, generate the call itself. This needs the *retloc value
761 set in the switch above, which is why it's at the end. */
763 /* nextArgReg doles out argument registers. Since these are
764 assigned in the order x0 .. x7, its numeric value at this point,
765 which must be between 0 and 8 inclusive, is going to be equal to
766 the number of arg regs in use for the call. Hence bake that
767 number into the call (we'll need to know it when doing register
768 allocation, to know what regs the call reads.) */
770 target = (Addr)cee->addr;
771 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
773 return True; /* success */
777 /*---------------------------------------------------------*/
778 /*--- ISEL: Integer expressions (64/32 bit) ---*/
779 /*---------------------------------------------------------*/
781 /* Select insns for an integer-typed expression, and add them to the
782 code list. Return a reg holding the result. This reg will be a
783 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
784 want to modify it, ask for a new vreg, copy it in there, and modify
785 the copy. The register allocator will do its best to map both
786 vregs to the same real register, so the copies will often disappear
787 later in the game.
789 This should handle expressions of 64- and 32-bit type. All results
790 are returned in a 64-bit register. For 32-bit expressions, the
791 upper 32 bits are arbitrary, so you should mask or sign extend
792 partial values if necessary.
795 /* ---------------- RRS matching helper ---------------- */
797 /* This helper matches 64-bit integer expressions of the form
798 {Add,Sub,And,Or,Xor}(E1, {Shl,Shr,Sar}(E2, immediate))
800 {Add,And,Or,Xor}({Shl,Shr,Sar}(E1, immediate), E2)
801 which is a useful thing to do because AArch64 can compute those in
802 a single instruction.
804 static Bool matchesRegRegShift(/*OUT*/ARM64RRSOp* mainOp,
805 /*OUT*/ARM64ShiftOp* shiftOp,
806 /*OUT*/UChar* amt,
807 /*OUT*/IRExpr** argUnshifted,
808 /*OUT*/IRExpr** argToBeShifted,
809 IRExpr* e)
811 *mainOp = (ARM64RRSOp)0;
812 *shiftOp = (ARM64ShiftOp)0;
813 *amt = 0;
814 *argUnshifted = NULL;
815 *argToBeShifted = NULL;
816 if (e->tag != Iex_Binop) {
817 return False;
819 const IROp irMainOp = e->Iex.Binop.op;
820 Bool canSwap = True;
821 switch (irMainOp) {
822 case Iop_And64: *mainOp = ARM64rrs_AND; break;
823 case Iop_Or64: *mainOp = ARM64rrs_OR; break;
824 case Iop_Xor64: *mainOp = ARM64rrs_XOR; break;
825 case Iop_Add64: *mainOp = ARM64rrs_ADD; break;
826 case Iop_Sub64: *mainOp = ARM64rrs_SUB; canSwap = False; break;
827 default: return False;
829 /* The root node is OK. Now check the right (2nd) arg. */
830 IRExpr* argL = e->Iex.Binop.arg1;
831 IRExpr* argR = e->Iex.Binop.arg2;
833 // This loop runs either one or two iterations. In the first iteration, we
834 // check for a shiftable right (second) arg. If that fails, at the end of
835 // the first iteration, the args are swapped, if that is valid, and we go
836 // round again, hence checking for a shiftable left (first) arg.
837 UInt iterNo = 1;
838 while (True) {
839 vassert(iterNo == 1 || iterNo == 2);
840 if (argR->tag == Iex_Binop) {
841 const IROp irShiftOp = argR->Iex.Binop.op;
842 if (irShiftOp == Iop_Shl64
843 || irShiftOp == Iop_Shr64 || irShiftOp == Iop_Sar64) {
844 IRExpr* argRL = argR->Iex.Binop.arg1;
845 const IRExpr* argRR = argR->Iex.Binop.arg2;
846 if (argRR->tag == Iex_Const) {
847 const IRConst* argRRconst = argRR->Iex.Const.con;
848 vassert(argRRconst->tag == Ico_U8); // due to typecheck rules
849 const UChar amount = argRRconst->Ico.U8;
850 if (amount >= 1 && amount <= 63) {
851 // We got a match \o/
852 // *mainOp is already set
853 switch (irShiftOp) {
854 case Iop_Shl64: *shiftOp = ARM64sh_SHL; break;
855 case Iop_Shr64: *shiftOp = ARM64sh_SHR; break;
856 case Iop_Sar64: *shiftOp = ARM64sh_SAR; break;
857 default: vassert(0); // guarded above
859 *amt = amount;
860 *argUnshifted = argL;
861 *argToBeShifted = argRL;
862 return True;
867 // We failed to get a match in the first iteration. So, provided the
868 // root node isn't SUB, swap the arguments and make one further
869 // iteration. If that doesn't succeed, we must give up.
870 if (iterNo == 1 && canSwap) {
871 IRExpr* tmp = argL;
872 argL = argR;
873 argR = tmp;
874 iterNo = 2;
875 continue;
877 // Give up.
878 return False;
880 /*NOTREACHED*/
883 /* --------------------- AMode --------------------- */
885 /* Return an AMode which computes the value of the specified
886 expression, possibly also adding insns to the code list as a
887 result. The expression may only be a 64-bit one.
890 static Bool isValidScale ( UChar scale )
892 switch (scale) {
893 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
894 default: return False;
898 static Bool sane_AMode ( ARM64AMode* am )
900 switch (am->tag) {
901 case ARM64am_RI9:
902 return
903 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
904 && (hregIsVirtual(am->ARM64am.RI9.reg)
905 /* || sameHReg(am->ARM64am.RI9.reg,
906 hregARM64_X21()) */ )
907 && am->ARM64am.RI9.simm9 >= -256
908 && am->ARM64am.RI9.simm9 <= 255 );
909 case ARM64am_RI12:
910 return
911 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
912 && (hregIsVirtual(am->ARM64am.RI12.reg)
913 /* || sameHReg(am->ARM64am.RI12.reg,
914 hregARM64_X21()) */ )
915 && am->ARM64am.RI12.uimm12 < 4096
916 && isValidScale(am->ARM64am.RI12.szB) );
917 case ARM64am_RR:
918 return
919 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
920 && hregIsVirtual(am->ARM64am.RR.base)
921 && hregClass(am->ARM64am.RR.index) == HRcInt64
922 && hregIsVirtual(am->ARM64am.RR.index) );
923 default:
924 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
928 static
929 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
931 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
932 vassert(sane_AMode(am));
933 return am;
936 static
937 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
939 IRType ty = typeOfIRExpr(env->type_env,e);
940 vassert(ty == Ity_I64);
942 ULong szBbits = 0;
943 switch (dty) {
944 case Ity_I64: szBbits = 3; break;
945 case Ity_I32: szBbits = 2; break;
946 case Ity_I16: szBbits = 1; break;
947 case Ity_I8: szBbits = 0; break;
948 default: vassert(0);
951 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
952 we're going to create an amode suitable for LDU* or STU*
953 instructions, which use unscaled immediate offsets. */
954 if (e->tag == Iex_Binop
955 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
956 && e->Iex.Binop.arg2->tag == Iex_Const
957 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
958 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
959 if (simm >= -255 && simm <= 255) {
960 /* Although the gating condition might seem to be
961 simm >= -256 && simm <= 255
962 we will need to negate simm in the case where the op is Sub64.
963 Hence limit the lower value to -255 in order that its negation
964 is representable. */
965 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
966 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
967 return ARM64AMode_RI9(reg, (Int)simm);
971 /* Add64(expr, uimm12 * transfer-size) */
972 if (e->tag == Iex_Binop
973 && e->Iex.Binop.op == Iop_Add64
974 && e->Iex.Binop.arg2->tag == Iex_Const
975 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
976 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
977 ULong szB = 1 << szBbits;
978 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
979 && (uimm >> szBbits) < 4096) {
980 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
981 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
985 /* Add64(expr1, expr2) */
986 if (e->tag == Iex_Binop
987 && e->Iex.Binop.op == Iop_Add64) {
988 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
989 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
990 return ARM64AMode_RR(reg1, reg2);
993 /* Doesn't match anything in particular. Generate it into
994 a register and use that. */
995 HReg reg = iselIntExpr_R(env, e);
996 return ARM64AMode_RI9(reg, 0);
1000 /* --------------------- RIA --------------------- */
1002 /* Select instructions to generate 'e' into a RIA. */
1004 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
1006 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
1007 /* sanity checks ... */
1008 switch (ri->tag) {
1009 case ARM64riA_I12:
1010 vassert(ri->ARM64riA.I12.imm12 < 4096);
1011 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
1012 return ri;
1013 case ARM64riA_R:
1014 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
1015 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
1016 return ri;
1017 default:
1018 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1022 /* DO NOT CALL THIS DIRECTLY ! */
1023 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
1025 IRType ty = typeOfIRExpr(env->type_env,e);
1026 vassert(ty == Ity_I64 || ty == Ity_I32);
1028 /* special case: immediate */
1029 if (e->tag == Iex_Const) {
1030 ULong u = 0xF000000ULL; /* invalid */
1031 switch (e->Iex.Const.con->tag) {
1032 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
1033 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1034 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1036 if (0 == (u & ~(0xFFFULL << 0)))
1037 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
1038 if (0 == (u & ~(0xFFFULL << 12)))
1039 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
1040 /* else fail, fall through to default case */
1043 /* default case: calculate into a register and return that */
1045 HReg r = iselIntExpr_R ( env, e );
1046 return ARM64RIA_R(r);
1051 /* --------------------- RIL --------------------- */
1053 /* Select instructions to generate 'e' into a RIL. At this point we
1054 have to deal with the strange bitfield-immediate encoding for logic
1055 instructions. */
1058 // The following four functions
1059 // CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1060 // are copied, with modifications, from
1061 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1062 // which has the following copyright notice:
1064 Copyright 2013, ARM Limited
1065 All rights reserved.
1067 Redistribution and use in source and binary forms, with or without
1068 modification, are permitted provided that the following conditions are met:
1070 * Redistributions of source code must retain the above copyright notice,
1071 this list of conditions and the following disclaimer.
1072 * Redistributions in binary form must reproduce the above copyright notice,
1073 this list of conditions and the following disclaimer in the documentation
1074 and/or other materials provided with the distribution.
1075 * Neither the name of ARM Limited nor the names of its contributors may be
1076 used to endorse or promote products derived from this software without
1077 specific prior written permission.
1079 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1080 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1081 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1082 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1083 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1084 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1085 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1086 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1087 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1088 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1091 static Int CountLeadingZeros(ULong value, Int width)
1093 vassert(width == 32 || width == 64);
1094 Int count = 0;
1095 ULong bit_test = 1ULL << (width - 1);
1096 while ((count < width) && ((bit_test & value) == 0)) {
1097 count++;
1098 bit_test >>= 1;
1100 return count;
1103 static Int CountTrailingZeros(ULong value, Int width)
1105 vassert(width == 32 || width == 64);
1106 Int count = 0;
1107 while ((count < width) && (((value >> count) & 1) == 0)) {
1108 count++;
1110 return count;
1113 static Int CountSetBits(ULong value, Int width)
1115 // TODO: Other widths could be added here, as the implementation already
1116 // supports them.
1117 vassert(width == 32 || width == 64);
1119 // Mask out unused bits to ensure that they are not counted.
1120 value &= (0xffffffffffffffffULL >> (64-width));
1122 // Add up the set bits.
1123 // The algorithm works by adding pairs of bit fields together iteratively,
1124 // where the size of each bit field doubles each time.
1125 // An example for an 8-bit value:
1126 // Bits: h g f e d c b a
1127 // \ | \ | \ | \ |
1128 // value = h+g f+e d+c b+a
1129 // \ | \ |
1130 // value = h+g+f+e d+c+b+a
1131 // \ |
1132 // value = h+g+f+e+d+c+b+a
1133 value = ((value >> 1) & 0x5555555555555555ULL)
1134 + (value & 0x5555555555555555ULL);
1135 value = ((value >> 2) & 0x3333333333333333ULL)
1136 + (value & 0x3333333333333333ULL);
1137 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL)
1138 + (value & 0x0f0f0f0f0f0f0f0fULL);
1139 value = ((value >> 8) & 0x00ff00ff00ff00ffULL)
1140 + (value & 0x00ff00ff00ff00ffULL);
1141 value = ((value >> 16) & 0x0000ffff0000ffffULL)
1142 + (value & 0x0000ffff0000ffffULL);
1143 value = ((value >> 32) & 0x00000000ffffffffULL)
1144 + (value & 0x00000000ffffffffULL);
1146 return value;
1149 static Bool isImmLogical ( /*OUT*/UInt* n,
1150 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1151 ULong value, UInt width )
1153 // Test if a given value can be encoded in the immediate field of a
1154 // logical instruction.
1156 // If it can be encoded, the function returns true, and values
1157 // pointed to by n, imm_s and imm_r are updated with immediates
1158 // encoded in the format required by the corresponding fields in the
1159 // logical instruction. If it can not be encoded, the function
1160 // returns false, and the values pointed to by n, imm_s and imm_r
1161 // are undefined.
1162 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1163 vassert(width == 32 || width == 64);
1165 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1166 // the following table:
1168 // N imms immr size S R
1169 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1170 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1171 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1172 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1173 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1174 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1175 // (s bits must not be all set)
1177 // A pattern is constructed of size bits, where the least significant S+1
1178 // bits are set. The pattern is rotated right by R, and repeated across a
1179 // 32 or 64-bit value, depending on destination register width.
1181 // To test if an arbitrary immediate can be encoded using this scheme, an
1182 // iterative algorithm is used.
1184 // TODO: This code does not consider using X/W register overlap to support
1185 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1186 // are an encodable logical immediate.
1188 // 1. If the value has all set or all clear bits, it can't be encoded.
1189 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1190 ((width == 32) && (value == 0xffffffff))) {
1191 return False;
1194 UInt lead_zero = CountLeadingZeros(value, width);
1195 UInt lead_one = CountLeadingZeros(~value, width);
1196 UInt trail_zero = CountTrailingZeros(value, width);
1197 UInt trail_one = CountTrailingZeros(~value, width);
1198 UInt set_bits = CountSetBits(value, width);
1200 // The fixed bits in the immediate s field.
1201 // If width == 64 (X reg), start at 0xFFFFFF80.
1202 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1203 // widths won't be executed.
1204 Int imm_s_fixed = (width == 64) ? -128 : -64;
1205 Int imm_s_mask = 0x3F;
1207 for (;;) {
1208 // 2. If the value is two bits wide, it can be encoded.
1209 if (width == 2) {
1210 *n = 0;
1211 *imm_s = 0x3C;
1212 *imm_r = (value & 3) - 1;
1213 return True;
1216 *n = (width == 64) ? 1 : 0;
1217 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1218 if ((lead_zero + set_bits) == width) {
1219 *imm_r = 0;
1220 } else {
1221 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1224 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1225 // the bit width of the value, it can be encoded.
1226 if (lead_zero + trail_zero + set_bits == width) {
1227 return True;
1230 // 4. If the sum of leading ones, trailing ones and unset bits in the
1231 // value is equal to the bit width of the value, it can be encoded.
1232 if (lead_one + trail_one + (width - set_bits) == width) {
1233 return True;
1236 // 5. If the most-significant half of the bitwise value is equal to the
1237 // least-significant half, return to step 2 using the least-significant
1238 // half of the value.
1239 ULong mask = (1ULL << (width >> 1)) - 1;
1240 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1241 width >>= 1;
1242 set_bits >>= 1;
1243 imm_s_fixed >>= 1;
1244 continue;
1247 // 6. Otherwise, the value can't be encoded.
1248 return False;
1253 /* Create a RIL for the given immediate, if it is representable, or
1254 return NULL if not. */
1256 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1258 UInt n = 0, imm_s = 0, imm_r = 0;
1259 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1260 if (!ok) return NULL;
1261 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1262 return ARM64RIL_I13(n, imm_r, imm_s);
1265 /* So, finally .. */
1267 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1269 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1270 /* sanity checks ... */
1271 switch (ri->tag) {
1272 case ARM64riL_I13:
1273 vassert(ri->ARM64riL.I13.bitN < 2);
1274 vassert(ri->ARM64riL.I13.immR < 64);
1275 vassert(ri->ARM64riL.I13.immS < 64);
1276 return ri;
1277 case ARM64riL_R:
1278 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1279 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1280 return ri;
1281 default:
1282 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1286 /* DO NOT CALL THIS DIRECTLY ! */
1287 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1289 IRType ty = typeOfIRExpr(env->type_env,e);
1290 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
1292 /* special case: immediate */
1293 if (e->tag == Iex_Const) {
1294 ARM64RIL* maybe = NULL;
1295 if (ty == Ity_I64) {
1296 vassert(e->Iex.Const.con->tag == Ico_U64);
1297 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1298 } else if (ty == Ity_I32) {
1299 vassert(ty == Ity_I32);
1300 vassert(e->Iex.Const.con->tag == Ico_U32);
1301 UInt u32 = e->Iex.Const.con->Ico.U32;
1302 ULong u64 = (ULong)u32;
1303 /* First try with 32 leading zeroes. */
1304 maybe = mb_mkARM64RIL_I(u64);
1305 /* If that doesn't work, try with 2 copies, since it doesn't
1306 matter what winds up in the upper 32 bits. */
1307 if (!maybe) {
1308 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1310 } else {
1311 vassert(ty == Ity_I16);
1312 vassert(e->Iex.Const.con->tag == Ico_U16);
1313 // `maybe` is still NULL. Be lame and fall through to the default
1314 // case. Obviously we could do better here.
1316 if (maybe) return maybe;
1317 /* else fail, fall through to default case */
1320 /* default case: calculate into a register and return that */
1322 HReg r = iselIntExpr_R ( env, e );
1323 return ARM64RIL_R(r);
1328 /* --------------------- RI6 --------------------- */
1330 /* Select instructions to generate 'e' into a RI6. */
1332 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1334 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1335 /* sanity checks ... */
1336 switch (ri->tag) {
1337 case ARM64ri6_I6:
1338 vassert(ri->ARM64ri6.I6.imm6 < 64);
1339 vassert(ri->ARM64ri6.I6.imm6 > 0);
1340 return ri;
1341 case ARM64ri6_R:
1342 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1343 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1344 return ri;
1345 default:
1346 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1350 /* DO NOT CALL THIS DIRECTLY ! */
1351 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1353 IRType ty = typeOfIRExpr(env->type_env,e);
1354 vassert(ty == Ity_I64 || ty == Ity_I8);
1356 /* special case: immediate */
1357 if (e->tag == Iex_Const) {
1358 switch (e->Iex.Const.con->tag) {
1359 case Ico_U8: {
1360 UInt u = e->Iex.Const.con->Ico.U8;
1361 if (u > 0 && u < 64)
1362 return ARM64RI6_I6(u);
1363 break;
1364 default:
1365 break;
1368 /* else fail, fall through to default case */
1371 /* default case: calculate into a register and return that */
1373 HReg r = iselIntExpr_R ( env, e );
1374 return ARM64RI6_R(r);
1379 /* ------------------- CondCode ------------------- */
1381 /* Generate code to evaluated a bit-typed expression, returning the
1382 condition code which would correspond when the expression would
1383 notionally have returned 1.
1385 Note that iselCondCode_C and iselCondCode_R are mutually recursive. For
1386 future changes to either of them, take care not to introduce an infinite
1387 loop involving the two of them.
1389 static ARM64CondCode iselCondCode_C ( ISelEnv* env, IRExpr* e )
1391 ARM64CondCode cc = iselCondCode_C_wrk(env,e);
1392 vassert(cc != ARM64cc_NV);
1393 return cc;
1396 static ARM64CondCode iselCondCode_C_wrk ( ISelEnv* env, IRExpr* e )
1398 vassert(e);
1399 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1401 /* var */
1402 if (e->tag == Iex_RdTmp) {
1403 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1404 /* Cmp doesn't modify rTmp; so this is OK. */
1405 ARM64RIL* one = mb_mkARM64RIL_I(1);
1406 vassert(one);
1407 addInstr(env, ARM64Instr_Test(rTmp, one));
1408 return ARM64cc_NE;
1411 /* Constant 1:Bit */
1412 if (e->tag == Iex_Const) {
1413 /* This is a very stupid translation. Hopefully it doesn't occur much,
1414 if ever. */
1415 vassert(e->Iex.Const.con->tag == Ico_U1);
1416 vassert(e->Iex.Const.con->Ico.U1 == True
1417 || e->Iex.Const.con->Ico.U1 == False);
1418 HReg rTmp = newVRegI(env);
1419 addInstr(env, ARM64Instr_Imm64(rTmp, 0));
1420 ARM64RIL* one = mb_mkARM64RIL_I(1);
1421 vassert(one);
1422 addInstr(env, ARM64Instr_Test(rTmp, one));
1423 return e->Iex.Const.con->Ico.U1 ? ARM64cc_EQ : ARM64cc_NE;
1426 /* Not1(e) */
1427 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1428 /* Generate code for the arg, and negate the test condition */
1429 ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
1430 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1431 return ARM64cc_AL;
1432 } else {
1433 return 1 ^ cc;
1437 /* --- patterns rooted at: 64to1 --- */
1439 if (e->tag == Iex_Unop
1440 && e->Iex.Unop.op == Iop_64to1) {
1441 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1442 ARM64RIL* one = mb_mkARM64RIL_I(1);
1443 vassert(one); /* '1' must be representable */
1444 addInstr(env, ARM64Instr_Test(rTmp, one));
1445 return ARM64cc_NE;
1448 /* --- patterns rooted at: CmpNEZ8 --- */
1450 if (e->tag == Iex_Unop
1451 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1452 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1453 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1454 addInstr(env, ARM64Instr_Test(r1, xFF));
1455 return ARM64cc_NE;
1458 /* --- patterns rooted at: CmpNEZ16 --- */
1460 if (e->tag == Iex_Unop
1461 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1462 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1463 ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1464 addInstr(env, ARM64Instr_Test(r1, xFFFF));
1465 return ARM64cc_NE;
1468 /* --- patterns rooted at: CmpNEZ64 --- */
1470 if (e->tag == Iex_Unop
1471 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1472 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1473 ARM64RIA* zero = ARM64RIA_I12(0,0);
1474 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1475 return ARM64cc_NE;
1478 /* --- patterns rooted at: CmpNEZ32 --- */
1480 if (e->tag == Iex_Unop
1481 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1482 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1483 ARM64RIA* zero = ARM64RIA_I12(0,0);
1484 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1485 return ARM64cc_NE;
1488 /* --- Cmp*64*(x,y) --- */
1489 if (e->tag == Iex_Binop
1490 && (e->Iex.Binop.op == Iop_CmpEQ64
1491 || e->Iex.Binop.op == Iop_CmpNE64
1492 || e->Iex.Binop.op == Iop_CmpLT64S
1493 || e->Iex.Binop.op == Iop_CmpLT64U
1494 || e->Iex.Binop.op == Iop_CmpLE64S
1495 || e->Iex.Binop.op == Iop_CmpLE64U
1496 || e->Iex.Binop.op == Iop_CasCmpEQ64
1497 || e->Iex.Binop.op == Iop_CasCmpNE64)) {
1498 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1499 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1500 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1501 switch (e->Iex.Binop.op) {
1502 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return ARM64cc_EQ;
1503 case Iop_CmpNE64: case Iop_CasCmpNE64: return ARM64cc_NE;
1504 case Iop_CmpLT64S: return ARM64cc_LT;
1505 case Iop_CmpLT64U: return ARM64cc_CC;
1506 case Iop_CmpLE64S: return ARM64cc_LE;
1507 case Iop_CmpLE64U: return ARM64cc_LS;
1508 default: vpanic("iselCondCode_C(arm64): CmpXX64");
1512 /* --- Cmp*32*(x,y) --- */
1513 if (e->tag == Iex_Binop
1514 && (e->Iex.Binop.op == Iop_CmpEQ32
1515 || e->Iex.Binop.op == Iop_CmpNE32
1516 || e->Iex.Binop.op == Iop_CmpLT32S
1517 || e->Iex.Binop.op == Iop_CmpLT32U
1518 || e->Iex.Binop.op == Iop_CmpLE32S
1519 || e->Iex.Binop.op == Iop_CmpLE32U
1520 || e->Iex.Binop.op == Iop_CasCmpEQ32
1521 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1522 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1523 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1524 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1525 switch (e->Iex.Binop.op) {
1526 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return ARM64cc_EQ;
1527 case Iop_CmpNE32: case Iop_CasCmpNE32: return ARM64cc_NE;
1528 case Iop_CmpLT32S: return ARM64cc_LT;
1529 case Iop_CmpLT32U: return ARM64cc_CC;
1530 case Iop_CmpLE32S: return ARM64cc_LE;
1531 case Iop_CmpLE32U: return ARM64cc_LS;
1532 default: vpanic("iselCondCode_C(arm64): CmpXX32");
1536 /* --- Cmp*16*(x,y) --- */
1537 if (e->tag == Iex_Binop
1538 && (e->Iex.Binop.op == Iop_CasCmpEQ16
1539 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1540 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1541 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1542 HReg argL2 = widen_z_16_to_64(env, argL);
1543 HReg argR2 = widen_z_16_to_64(env, argR);
1544 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1545 switch (e->Iex.Binop.op) {
1546 case Iop_CasCmpEQ16: return ARM64cc_EQ;
1547 case Iop_CasCmpNE16: return ARM64cc_NE;
1548 default: vpanic("iselCondCode_C(arm64): CmpXX16");
1552 /* --- Cmp*8*(x,y) --- */
1553 if (e->tag == Iex_Binop
1554 && (e->Iex.Binop.op == Iop_CasCmpEQ8
1555 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1556 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1557 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1558 HReg argL2 = widen_z_8_to_64(env, argL);
1559 HReg argR2 = widen_z_8_to_64(env, argR);
1560 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1561 switch (e->Iex.Binop.op) {
1562 case Iop_CasCmpEQ8: return ARM64cc_EQ;
1563 case Iop_CasCmpNE8: return ARM64cc_NE;
1564 default: vpanic("iselCondCode_C(arm64): CmpXX8");
1568 /* --- And1(x,y), Or1(x,y) --- */
1569 if (e->tag == Iex_Binop
1570 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1571 HReg tmp = iselCondCode_R(env, e);
1572 ARM64RIL* one = mb_mkARM64RIL_I(1);
1573 vassert(one);
1574 addInstr(env, ARM64Instr_Test(tmp, one));
1575 return ARM64cc_NE;
1578 ppIRExpr(e);
1579 vpanic("iselCondCode_C");
1583 /* --------------------- CONDCODE as int reg --------------------- */
1585 /* Generate code to evaluated a bit-typed expression, returning the resulting
1586 value in bit 0 of an integer register. WARNING: all of the other bits in the
1587 register can be arbitrary. Callers must mask them off or otherwise ignore
1588 them, as necessary.
1590 Note that iselCondCode_C and iselCondCode_R are mutually recursive. For
1591 future changes to either of them, take care not to introduce an infinite
1592 loop involving the two of them.
1594 static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e )
1596 /* Uh, there's nothing we can sanity check here, unfortunately. */
1597 return iselCondCode_R_wrk(env,e);
1600 /* DO NOT CALL THIS DIRECTLY ! */
1601 static HReg iselCondCode_R_wrk ( ISelEnv* env, IRExpr* e )
1603 vassert(e);
1604 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1606 /* var */
1607 if (e->tag == Iex_RdTmp) {
1608 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1611 /* And1(x,y), Or1(x,y) */
1612 if (e->tag == Iex_Binop
1613 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1614 HReg res = newVRegI(env);
1615 HReg x_as_64 = iselCondCode_R(env, e->Iex.Binop.arg1);
1616 HReg y_as_64 = iselCondCode_R(env, e->Iex.Binop.arg2);
1617 ARM64LogicOp lop
1618 = e->Iex.Binop.op == Iop_And1 ? ARM64lo_AND : ARM64lo_OR;
1619 addInstr(env, ARM64Instr_Logic(res, x_as_64, ARM64RIL_R(y_as_64), lop));
1620 return res;
1623 /* Anything else, we hand off to iselCondCode_C and force the value into a
1624 register. */
1625 HReg res = newVRegI(env);
1626 ARM64CondCode cc = iselCondCode_C(env, e);
1627 addInstr(env, ARM64Instr_Set64(res, cc));
1628 return res;
1630 /* PJF the following two lines are dead code
1631 ppIRExpr(e);
1632 vpanic("iselCondCode_R(arm64)");
1637 /* --------------------- Reg --------------------- */
1639 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1641 HReg r = iselIntExpr_R_wrk(env, e);
1642 /* sanity checks ... */
1643 # if 0
1644 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1645 # endif
1646 vassert(hregClass(r) == HRcInt64);
1647 vassert(hregIsVirtual(r));
1648 return r;
1651 /* DO NOT CALL THIS DIRECTLY ! */
1652 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1654 IRType ty = typeOfIRExpr(env->type_env,e);
1655 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1657 switch (e->tag) {
1659 /* --------- TEMP --------- */
1660 case Iex_RdTmp: {
1661 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1664 /* --------- LOAD --------- */
1665 case Iex_Load: {
1666 HReg dst = newVRegI(env);
1668 if (e->Iex.Load.end != Iend_LE)
1669 goto irreducible;
1671 if (ty == Ity_I64) {
1672 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1673 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1674 return dst;
1676 if (ty == Ity_I32) {
1677 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1678 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1679 return dst;
1681 if (ty == Ity_I16) {
1682 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1683 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1684 return dst;
1686 if (ty == Ity_I8) {
1687 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1688 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1689 return dst;
1691 break;
1694 /* --------- BINARY OP --------- */
1695 case Iex_Binop: {
1697 ARM64LogicOp lop = 0; /* invalid */
1698 ARM64ShiftOp sop = 0; /* invalid */
1700 /* Special-case 0-x into a Neg instruction. Not because it's
1701 particularly useful but more so as to give value flow using
1702 this instruction, so as to check its assembly correctness for
1703 implementation of Left32/Left64. */
1704 switch (e->Iex.Binop.op) {
1705 case Iop_Sub64:
1706 if (isZeroU64(e->Iex.Binop.arg1)) {
1707 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1708 HReg dst = newVRegI(env);
1709 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1710 return dst;
1712 break;
1713 default:
1714 break;
1717 /* AND64/OR64/XOR64/ADD64/SUB64(e1, e2 shifted by imm)
1718 AND64/OR64/XOR64/ADD64(e1 shifted by imm, e2)
1721 switch (e->Iex.Binop.op) {
1722 case Iop_And64: case Iop_Or64: case Iop_Xor64:
1723 case Iop_Add64: case Iop_Sub64: {
1724 ARM64RRSOp mainOp = ARM64rrs_INVALID;
1725 ARM64ShiftOp shiftOp = (ARM64ShiftOp)0; // Invalid
1726 IRExpr* argUnshifted = NULL;
1727 IRExpr* argToBeShifted = NULL;
1728 UChar amt = 0;
1729 if (matchesRegRegShift(&mainOp, &shiftOp, &amt, &argUnshifted,
1730 &argToBeShifted, e)) {
1731 HReg rDst = newVRegI(env);
1732 HReg rUnshifted = iselIntExpr_R(env, argUnshifted);
1733 HReg rToBeShifted = iselIntExpr_R(env, argToBeShifted);
1734 addInstr(env, ARM64Instr_RRS(rDst, rUnshifted, rToBeShifted,
1735 shiftOp, amt, mainOp));
1736 return rDst;
1739 default:
1740 break;
1744 /* ADD/SUB(e1, e2) (for any e1, e2) */
1745 switch (e->Iex.Binop.op) {
1746 case Iop_Add64: case Iop_Add32:
1747 case Iop_Sub64: case Iop_Sub32: {
1748 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1749 || e->Iex.Binop.op == Iop_Add32;
1750 HReg dst = newVRegI(env);
1751 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1752 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1753 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1754 return dst;
1756 default:
1757 break;
1760 /* AND/OR/XOR(e1, e2) (for any e1, e2) */
1761 switch (e->Iex.Binop.op) {
1762 case Iop_And64: case Iop_And32:
1763 lop = ARM64lo_AND; goto log_binop;
1764 case Iop_Or64: case Iop_Or32: case Iop_Or16:
1765 lop = ARM64lo_OR; goto log_binop;
1766 case Iop_Xor64: case Iop_Xor32:
1767 lop = ARM64lo_XOR; goto log_binop;
1768 log_binop: {
1769 HReg dst = newVRegI(env);
1770 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1771 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1772 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1773 return dst;
1775 default:
1776 break;
1779 /* SHL/SHR/SAR */
1780 switch (e->Iex.Binop.op) {
1781 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1782 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1783 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1784 sh_binop: {
1785 HReg dst = newVRegI(env);
1786 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1787 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1788 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1789 return dst;
1791 case Iop_Shr32:
1792 case Iop_Sar32: {
1793 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1794 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1795 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1796 HReg dst = zx ? widen_z_32_to_64(env, argL)
1797 : widen_s_32_to_64(env, argL);
1798 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1799 return dst;
1801 default: break;
1804 /* MUL */
1805 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1806 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1807 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1808 HReg dst = newVRegI(env);
1809 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1810 return dst;
1813 /* MULL */
1814 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1815 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1816 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1817 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1818 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1819 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1820 HReg dst = newVRegI(env);
1821 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1822 return dst;
1825 /* Handle misc other ops. */
1827 if (e->Iex.Binop.op == Iop_Max32U) {
1828 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1829 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1830 HReg dst = newVRegI(env);
1831 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1832 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1833 return dst;
1836 if (e->Iex.Binop.op == Iop_32HLto64) {
1837 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1838 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1839 HReg lo32 = widen_z_32_to_64(env, lo32s);
1840 HReg hi32 = newVRegI(env);
1841 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1842 ARM64sh_SHL));
1843 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1844 ARM64lo_OR));
1845 return hi32;
1848 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32 ||
1849 e->Iex.Binop.op == Iop_CmpF16) {
1850 HReg (*iselExpr)(ISelEnv*, IRExpr*) = NULL;
1851 ARM64Instr* (*VCmp)(HReg, HReg) = NULL;
1852 if (e->Iex.Binop.op == Iop_CmpF64) {
1853 iselExpr = &iselDblExpr;
1854 VCmp = &ARM64Instr_VCmpD;
1856 else if (e->Iex.Binop.op == Iop_CmpF32) {
1857 iselExpr = &iselFltExpr;
1858 VCmp = &ARM64Instr_VCmpS;
1860 else {
1861 iselExpr = &iselF16Expr;
1862 VCmp = &ARM64Instr_VCmpH;
1864 HReg dL = (iselExpr)(env, e->Iex.Binop.arg1);
1865 HReg dR = (iselExpr)(env, e->Iex.Binop.arg2);
1866 HReg dst = newVRegI(env);
1867 HReg imm = newVRegI(env);
1868 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1869 create in dst, the IRCmpF64Result encoded result. */
1870 addInstr(env, (VCmp)(dL, dR));
1871 addInstr(env, ARM64Instr_Imm64(dst, 0));
1872 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1873 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1874 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1875 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1876 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1877 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1878 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1879 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1880 return dst;
1883 { /* local scope */
1884 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1885 Bool srcIsD = False;
1886 switch (e->Iex.Binop.op) {
1887 case Iop_F64toI64S:
1888 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1889 case Iop_F64toI64U:
1890 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1891 case Iop_F64toI32S:
1892 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1893 case Iop_F64toI32U:
1894 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1895 case Iop_F32toI32S:
1896 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1897 case Iop_F32toI32U:
1898 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1899 case Iop_F32toI64S:
1900 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1901 case Iop_F32toI64U:
1902 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1903 default:
1904 break;
1906 if (cvt_op != ARM64cvt_INVALID) {
1907 /* This is all a bit dodgy, because we can't handle a
1908 non-constant (not-known-at-JIT-time) rounding mode
1909 indication. That's because there's no instruction
1910 AFAICS that does this conversion but rounds according to
1911 FPCR.RM, so we have to bake the rounding mode into the
1912 instruction right now. But that should be OK because
1913 (1) the front end attaches a literal Irrm_ value to the
1914 conversion binop, and (2) iropt will never float that
1915 off via CSE, into a literal. Hence we should always
1916 have an Irrm_ value as the first arg. */
1917 IRExpr* arg1 = e->Iex.Binop.arg1;
1918 if (arg1->tag != Iex_Const) goto irreducible;
1919 IRConst* arg1con = arg1->Iex.Const.con;
1920 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1921 UInt irrm = arg1con->Ico.U32;
1922 /* Find the ARM-encoded equivalent for |irrm|. */
1923 UInt armrm = 4; /* impossible */
1924 Bool tiesToAway = False;
1925 switch (irrm) {
1926 case Irrm_NEAREST: armrm = 0; break;
1927 case Irrm_NegINF: armrm = 2; break;
1928 case Irrm_PosINF: armrm = 1; break;
1929 case Irrm_ZERO: armrm = 3; break;
1930 case Irrm_NEAREST_TIE_AWAY_0: armrm = 0; tiesToAway = True; break;
1931 default: goto irreducible;
1933 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1934 (env, e->Iex.Binop.arg2);
1935 HReg dst = newVRegI(env);
1936 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm, tiesToAway));
1937 return dst;
1939 } /* local scope */
1941 /* All cases involving host-side helper calls. */
1942 void* fn = NULL;
1943 switch (e->Iex.Binop.op) {
1944 case Iop_DivU32:
1945 fn = &h_calc_udiv32_w_arm_semantics; break;
1946 case Iop_DivS32:
1947 fn = &h_calc_sdiv32_w_arm_semantics; break;
1948 case Iop_DivU64:
1949 fn = &h_calc_udiv64_w_arm_semantics; break;
1950 case Iop_DivS64:
1951 fn = &h_calc_sdiv64_w_arm_semantics; break;
1952 default:
1953 break;
1956 if (fn) {
1957 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1958 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1959 HReg res = newVRegI(env);
1960 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1961 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1962 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1963 2, mk_RetLoc_simple(RLPri_Int) ));
1964 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1965 return res;
1968 break;
1971 /* --------- UNARY OP --------- */
1972 case Iex_Unop: {
1974 switch (e->Iex.Unop.op) {
1975 case Iop_16Uto64: {
1976 /* This probably doesn't occur often enough to be worth
1977 rolling the extension into the load. */
1978 IRExpr* arg = e->Iex.Unop.arg;
1979 HReg src = iselIntExpr_R(env, arg);
1980 HReg dst = widen_z_16_to_64(env, src);
1981 return dst;
1983 case Iop_32Uto64: {
1984 IRExpr* arg = e->Iex.Unop.arg;
1985 if (arg->tag == Iex_Load) {
1986 /* This correctly zero extends because _LdSt32 is
1987 defined to do a zero extending load. */
1988 HReg dst = newVRegI(env);
1989 ARM64AMode* am
1990 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1991 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1992 return dst;
1994 /* else be lame and mask it */
1995 HReg src = iselIntExpr_R(env, arg);
1996 HReg dst = widen_z_32_to_64(env, src);
1997 return dst;
1999 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
2000 case Iop_8Uto64: {
2001 IRExpr* arg = e->Iex.Unop.arg;
2002 if (arg->tag == Iex_Load) {
2003 /* This correctly zero extends because _LdSt8 is
2004 defined to do a zero extending load. */
2005 HReg dst = newVRegI(env);
2006 ARM64AMode* am
2007 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
2008 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2009 return dst;
2011 /* else be lame and mask it */
2012 HReg src = iselIntExpr_R(env, arg);
2013 HReg dst = widen_z_8_to_64(env, src);
2014 return dst;
2016 case Iop_128HIto64: {
2017 HReg rHi, rLo;
2018 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2019 return rHi; /* and abandon rLo */
2021 case Iop_128to64: {
2022 HReg rHi, rLo;
2023 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2024 return rLo; /* and abandon rHi */
2026 case Iop_8Sto32: case Iop_8Sto64: {
2027 IRExpr* arg = e->Iex.Unop.arg;
2028 HReg src = iselIntExpr_R(env, arg);
2029 HReg dst = widen_s_8_to_64(env, src);
2030 return dst;
2032 case Iop_16Sto32: case Iop_16Sto64: {
2033 IRExpr* arg = e->Iex.Unop.arg;
2034 HReg src = iselIntExpr_R(env, arg);
2035 HReg dst = widen_s_16_to_64(env, src);
2036 return dst;
2038 case Iop_32Sto64: {
2039 IRExpr* arg = e->Iex.Unop.arg;
2040 HReg src = iselIntExpr_R(env, arg);
2041 HReg dst = widen_s_32_to_64(env, src);
2042 return dst;
2044 case Iop_Not32:
2045 case Iop_Not64: {
2046 HReg dst = newVRegI(env);
2047 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2048 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
2049 return dst;
2051 case Iop_Clz64: {
2052 HReg dst = newVRegI(env);
2053 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2054 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
2055 return dst;
2057 case Iop_Left32:
2058 case Iop_Left64: {
2059 /* Left64(src) = src | -src. Left32 can use the same
2060 implementation since in that case we don't care what
2061 the upper 32 bits become. */
2062 HReg dst = newVRegI(env);
2063 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2064 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2065 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2066 ARM64lo_OR));
2067 return dst;
2069 case Iop_CmpwNEZ64: {
2070 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2071 = Left64(src) >>s 63 */
2072 HReg dst = newVRegI(env);
2073 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2074 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2075 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2076 ARM64lo_OR));
2077 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2078 ARM64sh_SAR));
2079 return dst;
2081 case Iop_CmpwNEZ32: {
2082 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2083 = Left64(src & 0xFFFFFFFF) >>s 63 */
2084 HReg dst = newVRegI(env);
2085 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
2086 HReg src = widen_z_32_to_64(env, pre);
2087 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2088 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2089 ARM64lo_OR));
2090 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2091 ARM64sh_SAR));
2092 return dst;
2094 case Iop_V128to64: case Iop_V128HIto64: {
2095 HReg dst = newVRegI(env);
2096 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2097 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
2098 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
2099 return dst;
2101 case Iop_ReinterpF64asI64: {
2102 HReg dst = newVRegI(env);
2103 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2104 addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
2105 return dst;
2107 case Iop_ReinterpF32asI32: {
2108 HReg dst = newVRegI(env);
2109 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2110 addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
2111 return dst;
2113 case Iop_1Sto16:
2114 case Iop_1Sto32:
2115 case Iop_1Sto64: {
2116 /* As with the iselStmt case for 'tmp:I1 = expr', we could
2117 do a lot better here if it ever became necessary. (CSDEC?) */
2118 HReg zero = hregARM64_XZR_XSP(); // XZR in this context
2119 HReg one = newVRegI(env);
2120 HReg dst = newVRegI(env);
2121 addInstr(env, ARM64Instr_Imm64(one, 1));
2122 ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
2123 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2124 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2125 ARM64sh_SHL));
2126 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2127 ARM64sh_SAR));
2128 return dst;
2130 case Iop_NarrowUn16to8x8:
2131 case Iop_NarrowUn32to16x4:
2132 case Iop_NarrowUn64to32x2:
2133 case Iop_QNarrowUn16Sto8Sx8:
2134 case Iop_QNarrowUn32Sto16Sx4:
2135 case Iop_QNarrowUn64Sto32Sx2:
2136 case Iop_QNarrowUn16Uto8Ux8:
2137 case Iop_QNarrowUn32Uto16Ux4:
2138 case Iop_QNarrowUn64Uto32Ux2:
2139 case Iop_QNarrowUn16Sto8Ux8:
2140 case Iop_QNarrowUn32Sto16Ux4:
2141 case Iop_QNarrowUn64Sto32Ux2:
2143 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2144 HReg tmp = newVRegV(env);
2145 HReg dst = newVRegI(env);
2146 UInt dszBlg2 = 3; /* illegal */
2147 ARM64VecNarrowOp op = ARM64vecna_INVALID;
2148 switch (e->Iex.Unop.op) {
2149 case Iop_NarrowUn16to8x8:
2150 dszBlg2 = 0; op = ARM64vecna_XTN; break;
2151 case Iop_NarrowUn32to16x4:
2152 dszBlg2 = 1; op = ARM64vecna_XTN; break;
2153 case Iop_NarrowUn64to32x2:
2154 dszBlg2 = 2; op = ARM64vecna_XTN; break;
2155 case Iop_QNarrowUn16Sto8Sx8:
2156 dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
2157 case Iop_QNarrowUn32Sto16Sx4:
2158 dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
2159 case Iop_QNarrowUn64Sto32Sx2:
2160 dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
2161 case Iop_QNarrowUn16Uto8Ux8:
2162 dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
2163 case Iop_QNarrowUn32Uto16Ux4:
2164 dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
2165 case Iop_QNarrowUn64Uto32Ux2:
2166 dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
2167 case Iop_QNarrowUn16Sto8Ux8:
2168 dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
2169 case Iop_QNarrowUn32Sto16Ux4:
2170 dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
2171 case Iop_QNarrowUn64Sto32Ux2:
2172 dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
2173 default:
2174 vassert(0);
2176 addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
2177 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
2178 return dst;
2180 case Iop_1Uto64: {
2181 /* 1Uto64(tmp). */
2182 HReg dst = newVRegI(env);
2183 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
2184 ARM64RIL* one = mb_mkARM64RIL_I(1);
2185 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
2186 vassert(one);
2187 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
2188 } else {
2189 /* CLONE-01 */
2190 HReg zero = hregARM64_XZR_XSP(); // XZR in this context
2191 HReg one = newVRegI(env);
2192 addInstr(env, ARM64Instr_Imm64(one, 1));
2193 ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
2194 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2196 return dst;
2198 case Iop_64HIto32: {
2199 HReg dst = newVRegI(env);
2200 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2201 addInstr(env, ARM64Instr_Shift(dst, src, ARM64RI6_I6(32),
2202 ARM64sh_SHR));
2203 return dst;
2205 case Iop_64to32:
2206 case Iop_64to16:
2207 case Iop_64to8:
2208 case Iop_32to16:
2209 /* These are no-ops. */
2210 return iselIntExpr_R(env, e->Iex.Unop.arg);
2211 default:
2212 break;
2215 break;
2218 /* --------- GET --------- */
2219 case Iex_Get: {
2220 if (ty == Ity_I64
2221 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
2222 HReg dst = newVRegI(env);
2223 ARM64AMode* am
2224 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2225 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2226 return dst;
2228 if (ty == Ity_I32
2229 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
2230 HReg dst = newVRegI(env);
2231 ARM64AMode* am
2232 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2233 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2234 return dst;
2236 if (ty == Ity_I16
2237 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2238 HReg dst = newVRegI(env);
2239 ARM64AMode* am
2240 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2241 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2242 return dst;
2244 if (ty == Ity_I8
2245 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2246 HReg dst = newVRegI(env);
2247 ARM64AMode* am
2248 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2249 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2250 return dst;
2252 break;
2255 /* --------- CCALL --------- */
2256 case Iex_CCall: {
2257 HReg dst = newVRegI(env);
2258 vassert(ty == e->Iex.CCall.retty);
2260 /* be very restrictive for now. Only 64-bit ints allowed for
2261 args, and 64 bits for return type. Don't forget to change
2262 the RetLoc if more types are allowed in future. */
2263 if (e->Iex.CCall.retty != Ity_I64)
2264 goto irreducible;
2266 /* Marshal args, do the call, clear stack. */
2267 UInt addToSp = 0;
2268 RetLoc rloc = mk_RetLoc_INVALID();
2269 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2270 e->Iex.CCall.cee, e->Iex.CCall.retty,
2271 e->Iex.CCall.args );
2272 /* */
2273 if (ok) {
2274 vassert(is_sane_RetLoc(rloc));
2275 vassert(rloc.pri == RLPri_Int);
2276 vassert(addToSp == 0);
2277 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2278 return dst;
2280 goto irreducible;
2283 /* --------- LITERAL --------- */
2284 /* 64-bit literals */
2285 case Iex_Const: {
2286 ULong u = 0;
2287 HReg dst = newVRegI(env);
2288 switch (e->Iex.Const.con->tag) {
2289 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2290 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2291 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2292 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2293 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2295 addInstr(env, ARM64Instr_Imm64(dst, u));
2296 return dst;
2299 /* --------- MULTIPLEX --------- */
2300 case Iex_ITE: {
2301 /* ITE(ccexpr, iftrue, iffalse) */
2302 if (ty == Ity_I64 || ty == Ity_I32) {
2303 ARM64CondCode cc;
2304 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2305 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2306 HReg dst = newVRegI(env);
2307 cc = iselCondCode_C(env, e->Iex.ITE.cond);
2308 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2309 return dst;
2311 break;
2314 default:
2315 break;
2316 } /* switch (e->tag) */
2318 /* We get here if no pattern matched. */
2319 irreducible:
2320 ppIRExpr(e);
2321 vpanic("iselIntExpr_R: cannot reduce tree");
2325 /*---------------------------------------------------------*/
2326 /*--- ISEL: Integer expressions (128 bit) ---*/
2327 /*---------------------------------------------------------*/
2329 /* Compute a 128-bit value into a register pair, which is returned as
2330 the first two parameters. As with iselIntExpr_R, these may be
2331 either real or virtual regs; in any case they must not be changed
2332 by subsequent code emitted by the caller. */
2334 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2335 ISelEnv* env, IRExpr* e )
2337 iselInt128Expr_wrk(rHi, rLo, env, e);
2338 # if 0
2339 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2340 # endif
2341 vassert(hregClass(*rHi) == HRcInt64);
2342 vassert(hregIsVirtual(*rHi));
2343 vassert(hregClass(*rLo) == HRcInt64);
2344 vassert(hregIsVirtual(*rLo));
2347 /* DO NOT CALL THIS DIRECTLY ! */
2348 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2349 ISelEnv* env, IRExpr* e )
2351 vassert(e);
2352 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2354 /* --------- TEMP --------- */
2355 if (e->tag == Iex_RdTmp) {
2356 lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp);
2357 return;
2360 /* --------- CONST --------- */
2361 if (e->tag == Iex_Const) {
2362 IRConst* c = e->Iex.Const.con;
2363 vassert(c->tag == Ico_U128);
2364 if (c->Ico.U128 == 0) {
2365 // The only case we need to handle (so far)
2366 HReg zero = newVRegI(env);
2367 addInstr(env, ARM64Instr_Imm64(zero, 0));
2368 *rHi = *rLo = zero;
2369 return;
2373 /* --------- UNARY ops --------- */
2374 if (e->tag == Iex_Unop) {
2375 switch (e->Iex.Unop.op) {
2376 case Iop_ReinterpV128asI128: {
2377 HReg dstHi = newVRegI(env);
2378 HReg dstLo = newVRegI(env);
2379 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2380 addInstr(env, ARM64Instr_VXfromQ(dstHi, src, 1));
2381 addInstr(env, ARM64Instr_VXfromQ(dstLo, src, 0));
2382 *rHi = dstHi;
2383 *rLo = dstLo;
2384 return;
2386 default:
2387 break;
2391 /* --------- BINARY ops --------- */
2392 if (e->tag == Iex_Binop) {
2393 switch (e->Iex.Binop.op) {
2394 /* 64 x 64 -> 128 multiply */
2395 case Iop_MullU64:
2396 case Iop_MullS64: {
2397 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2398 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2399 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2400 HReg dstLo = newVRegI(env);
2401 HReg dstHi = newVRegI(env);
2402 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2403 ARM64mul_PLAIN));
2404 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2405 syned ? ARM64mul_SX : ARM64mul_ZX));
2406 *rHi = dstHi;
2407 *rLo = dstLo;
2408 return;
2410 /* 64HLto128(e1,e2) */
2411 case Iop_64HLto128:
2412 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2413 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2414 return;
2415 default:
2416 break;
2418 } /* if (e->tag == Iex_Binop) */
2420 ppIRExpr(e);
2421 vpanic("iselInt128Expr(arm64)");
2425 /*---------------------------------------------------------*/
2426 /*--- ISEL: Vector expressions (128 bit) ---*/
2427 /*---------------------------------------------------------*/
2429 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2431 HReg r = iselV128Expr_wrk( env, e );
2432 vassert(hregClass(r) == HRcVec128);
2433 vassert(hregIsVirtual(r));
2434 return r;
2437 /* DO NOT CALL THIS DIRECTLY */
2438 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2440 IRType ty = typeOfIRExpr(env->type_env, e);
2441 vassert(e);
2442 vassert(ty == Ity_V128);
2444 if (e->tag == Iex_RdTmp) {
2445 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2448 if (e->tag == Iex_Const) {
2449 /* Only a very limited range of constants is handled. */
2450 vassert(e->Iex.Const.con->tag == Ico_V128);
2451 UShort con = e->Iex.Const.con->Ico.V128;
2452 HReg res = newVRegV(env);
2453 switch (con) {
2454 case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2455 addInstr(env, ARM64Instr_VImmQ(res, con));
2456 return res;
2457 case 0x00F0:
2458 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2459 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2460 return res;
2461 case 0x0F00:
2462 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2463 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2464 return res;
2465 case 0x0FF0:
2466 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2467 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2468 return res;
2469 case 0x0FFF:
2470 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2471 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2472 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2473 return res;
2474 case 0xF000:
2475 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2476 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2477 return res;
2478 case 0xFF00:
2479 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2480 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2481 return res;
2482 default:
2483 break;
2485 /* Unhandled */
2486 goto v128_expr_bad;
2489 if (e->tag == Iex_Load) {
2490 HReg res = newVRegV(env);
2491 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
2492 vassert(ty == Ity_V128);
2493 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2494 return res;
2497 if (e->tag == Iex_Get) {
2498 UInt offs = (UInt)e->Iex.Get.offset;
2499 if (offs < (1<<12)) {
2500 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2501 HReg res = newVRegV(env);
2502 vassert(ty == Ity_V128);
2503 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2504 return res;
2506 goto v128_expr_bad;
2509 if (e->tag == Iex_Unop) {
2511 /* Iop_ZeroHIXXofV128 cases */
2512 UShort imm16 = 0;
2513 switch (e->Iex.Unop.op) {
2514 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break;
2515 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
2516 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2517 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2518 default: break;
2520 if (imm16 != 0) {
2521 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2522 HReg imm = newVRegV(env);
2523 HReg res = newVRegV(env);
2524 addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2525 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2526 return res;
2529 /* Other cases */
2530 switch (e->Iex.Unop.op) {
2531 case Iop_NotV128:
2532 case Iop_Abs64Fx2: case Iop_Abs32Fx4: case Iop_Abs16Fx8:
2533 case Iop_Neg64Fx2: case Iop_Neg32Fx4: case Iop_Neg16Fx8:
2534 case Iop_Abs64x2: case Iop_Abs32x4:
2535 case Iop_Abs16x8: case Iop_Abs8x16:
2536 case Iop_Cls32x4: case Iop_Cls16x8: case Iop_Cls8x16:
2537 case Iop_Clz32x4: case Iop_Clz16x8: case Iop_Clz8x16:
2538 case Iop_Cnt8x16:
2539 case Iop_Reverse1sIn8_x16:
2540 case Iop_Reverse8sIn16_x8:
2541 case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2542 case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2543 case Iop_Reverse32sIn64_x2:
2544 case Iop_RecipEst32Ux4:
2545 case Iop_RSqrtEst32Ux4:
2546 case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2547 case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2549 HReg res = newVRegV(env);
2550 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2551 Bool setRM = False;
2552 ARM64VecUnaryOp op = ARM64vecu_INVALID;
2553 switch (e->Iex.Unop.op) {
2554 case Iop_NotV128: op = ARM64vecu_NOT; break;
2555 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
2556 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
2557 case Iop_Abs16Fx8: op = ARM64vecu_FABS16x8; break;
2558 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
2559 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
2560 case Iop_Neg16Fx8: op = ARM64vecu_FNEG16x8; break;
2561 case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break;
2562 case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
2563 case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
2564 case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
2565 case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break;
2566 case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break;
2567 case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break;
2568 case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break;
2569 case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break;
2570 case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break;
2571 case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
2572 case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break;
2573 case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break;
2574 case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break;
2575 case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break;
2576 case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break;
2577 case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break;
2578 case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break;
2579 case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break;
2580 case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break;
2581 case Iop_RecipEst64Fx2: setRM = True;
2582 op = ARM64vecu_FRECPE64x2; break;
2583 case Iop_RecipEst32Fx4: setRM = True;
2584 op = ARM64vecu_FRECPE32x4; break;
2585 case Iop_RSqrtEst64Fx2: setRM = True;
2586 op = ARM64vecu_FRSQRTE64x2; break;
2587 case Iop_RSqrtEst32Fx4: setRM = True;
2588 op = ARM64vecu_FRSQRTE32x4; break;
2589 default: vassert(0);
2591 if (setRM) {
2592 // This is a bit of a kludge. We should do rm properly for
2593 // these recip-est insns, but that would require changing the
2594 // primop's type to take an rmode.
2595 set_FPCR_rounding_mode(env, IRExpr_Const(
2596 IRConst_U32(Irrm_NEAREST)));
2598 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2599 return res;
2601 case Iop_CmpNEZ8x16:
2602 case Iop_CmpNEZ16x8:
2603 case Iop_CmpNEZ32x4:
2604 case Iop_CmpNEZ64x2: {
2605 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2606 HReg zero = newVRegV(env);
2607 HReg res = newVRegV(env);
2608 ARM64VecBinOp cmp = ARM64vecb_INVALID;
2609 switch (e->Iex.Unop.op) {
2610 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2611 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2612 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2613 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2614 default: vassert(0);
2616 // This is pretty feeble. Better: use CMP against zero
2617 // and avoid the extra instruction and extra register.
2618 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2619 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2620 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2621 return res;
2623 case Iop_V256toV128_0:
2624 case Iop_V256toV128_1: {
2625 HReg vHi, vLo;
2626 iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2627 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2629 case Iop_64UtoV128: {
2630 HReg res = newVRegV(env);
2631 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2632 addInstr(env, ARM64Instr_VQfromX(res, arg));
2633 return res;
2635 case Iop_Widen8Sto16x8: {
2636 HReg res = newVRegV(env);
2637 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2638 addInstr(env, ARM64Instr_VQfromX(res, arg));
2639 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2640 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2641 res, res, 8));
2642 return res;
2644 case Iop_Widen16Sto32x4: {
2645 HReg res = newVRegV(env);
2646 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2647 addInstr(env, ARM64Instr_VQfromX(res, arg));
2648 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2649 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2650 res, res, 16));
2651 return res;
2653 case Iop_Widen32Sto64x2: {
2654 HReg res = newVRegV(env);
2655 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2656 addInstr(env, ARM64Instr_VQfromX(res, arg));
2657 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2658 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2659 res, res, 32));
2660 return res;
2662 /* ... */
2663 default:
2664 break;
2665 } /* switch on the unop */
2666 } /* if (e->tag == Iex_Unop) */
2668 if (e->tag == Iex_Binop) {
2669 switch (e->Iex.Binop.op) {
2670 case Iop_Sqrt16Fx8:
2671 case Iop_Sqrt32Fx4:
2672 case Iop_Sqrt64Fx2: {
2673 HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2674 HReg res = newVRegV(env);
2675 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2676 ARM64VecUnaryOp op;
2677 switch (e->Iex.Binop.op) {
2678 case Iop_Sqrt16Fx8: op = ARM64vecu_FSQRT16x8; break;
2679 case Iop_Sqrt32Fx4: op = ARM64vecu_FSQRT32x4; break;
2680 case Iop_Sqrt64Fx2: op = ARM64vecu_FSQRT64x2; break;
2681 default: vassert(0);
2683 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2684 return res;
2686 case Iop_64HLtoV128: {
2687 HReg res = newVRegV(env);
2688 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2689 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2690 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2691 return res;
2693 /* -- Cases where we can generate a simple three-reg instruction. -- */
2694 case Iop_AndV128:
2695 case Iop_OrV128:
2696 case Iop_XorV128:
2697 case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2698 case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2699 case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2700 case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2701 case Iop_Add64x2: case Iop_Add32x4:
2702 case Iop_Add16x8: case Iop_Add8x16:
2703 case Iop_Sub64x2: case Iop_Sub32x4:
2704 case Iop_Sub16x8: case Iop_Sub8x16:
2705 case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2706 case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2707 case Iop_CmpEQ16x8: case Iop_CmpEQ8x16:
2708 case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2709 case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2710 case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2711 case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2712 case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2713 case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2714 case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2715 case Iop_CmpLT16Fx8: case Iop_CmpLE16Fx8: case Iop_CmpEQ16Fx8:
2716 case Iop_Perm8x16:
2717 case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2718 case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2719 case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2720 case Iop_CatOddLanes16x8: case Iop_CatOddLanes8x16:
2721 case Iop_InterleaveHI32x4:
2722 case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2723 case Iop_InterleaveLO32x4:
2724 case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2725 case Iop_PolynomialMul8x16:
2726 case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2727 case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2728 case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2729 case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2730 case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2731 case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2732 case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2733 case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2734 case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8:
2735 case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2736 case Iop_Sh8Sx16: case Iop_Sh16Sx8:
2737 case Iop_Sh32Sx4: case Iop_Sh64Sx2:
2738 case Iop_Sh8Ux16: case Iop_Sh16Ux8:
2739 case Iop_Sh32Ux4: case Iop_Sh64Ux2:
2740 case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2741 case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2742 case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2743 case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2744 case Iop_Max64Fx2: case Iop_Max32Fx4:
2745 case Iop_Min64Fx2: case Iop_Min32Fx4:
2746 case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2747 case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2749 HReg res = newVRegV(env);
2750 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2751 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2752 Bool sw = False;
2753 Bool setRM = False;
2754 ARM64VecBinOp op = ARM64vecb_INVALID;
2755 switch (e->Iex.Binop.op) {
2756 case Iop_AndV128: op = ARM64vecb_AND; break;
2757 case Iop_OrV128: op = ARM64vecb_ORR; break;
2758 case Iop_XorV128: op = ARM64vecb_XOR; break;
2759 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break;
2760 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
2761 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break;
2762 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
2763 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
2764 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break;
2765 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
2766 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
2767 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break;
2768 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
2769 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
2770 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break;
2771 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
2772 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
2773 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
2774 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break;
2775 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
2776 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
2777 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
2778 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break;
2779 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
2780 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
2781 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break;
2782 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
2783 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break;
2784 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break;
2785 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break;
2786 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2787 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2788 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2789 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2790 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2791 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2792 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2793 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2794 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2795 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2796 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2797 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2798 case Iop_CmpLE16Fx8: op = ARM64vecb_FCMGE16x8; sw = True; break;
2799 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2800 case Iop_CmpLT16Fx8: op = ARM64vecb_FCMGT16x8; sw = True; break;
2801 case Iop_CmpEQ16Fx8: op = ARM64vecb_FCMEQ16x8; sw = True; break;
2802 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2803 case Iop_Perm8x16: op = ARM64vecb_TBL1; break;
2804 case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2805 break;
2806 case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2807 break;
2808 case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2809 break;
2810 case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2811 break;
2812 case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2813 break;
2814 case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True;
2815 break;
2816 case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True;
2817 break;
2818 case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True;
2819 break;
2820 case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2821 break;
2822 case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2823 break;
2824 case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2825 break;
2826 case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2827 break;
2828 case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2829 break;
2830 case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2831 break;
2832 case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2833 case Iop_QAdd64Sx2: op = ARM64vecb_SQADD64x2; break;
2834 case Iop_QAdd32Sx4: op = ARM64vecb_SQADD32x4; break;
2835 case Iop_QAdd16Sx8: op = ARM64vecb_SQADD16x8; break;
2836 case Iop_QAdd8Sx16: op = ARM64vecb_SQADD8x16; break;
2837 case Iop_QAdd64Ux2: op = ARM64vecb_UQADD64x2; break;
2838 case Iop_QAdd32Ux4: op = ARM64vecb_UQADD32x4; break;
2839 case Iop_QAdd16Ux8: op = ARM64vecb_UQADD16x8; break;
2840 case Iop_QAdd8Ux16: op = ARM64vecb_UQADD8x16; break;
2841 case Iop_QSub64Sx2: op = ARM64vecb_SQSUB64x2; break;
2842 case Iop_QSub32Sx4: op = ARM64vecb_SQSUB32x4; break;
2843 case Iop_QSub16Sx8: op = ARM64vecb_SQSUB16x8; break;
2844 case Iop_QSub8Sx16: op = ARM64vecb_SQSUB8x16; break;
2845 case Iop_QSub64Ux2: op = ARM64vecb_UQSUB64x2; break;
2846 case Iop_QSub32Ux4: op = ARM64vecb_UQSUB32x4; break;
2847 case Iop_QSub16Ux8: op = ARM64vecb_UQSUB16x8; break;
2848 case Iop_QSub8Ux16: op = ARM64vecb_UQSUB8x16; break;
2849 case Iop_QDMulHi32Sx4: op = ARM64vecb_SQDMULH32x4; break;
2850 case Iop_QDMulHi16Sx8: op = ARM64vecb_SQDMULH16x8; break;
2851 case Iop_QRDMulHi32Sx4: op = ARM64vecb_SQRDMULH32x4; break;
2852 case Iop_QRDMulHi16Sx8: op = ARM64vecb_SQRDMULH16x8; break;
2853 case Iop_Sh8Sx16: op = ARM64vecb_SSHL8x16; break;
2854 case Iop_Sh16Sx8: op = ARM64vecb_SSHL16x8; break;
2855 case Iop_Sh32Sx4: op = ARM64vecb_SSHL32x4; break;
2856 case Iop_Sh64Sx2: op = ARM64vecb_SSHL64x2; break;
2857 case Iop_Sh8Ux16: op = ARM64vecb_USHL8x16; break;
2858 case Iop_Sh16Ux8: op = ARM64vecb_USHL16x8; break;
2859 case Iop_Sh32Ux4: op = ARM64vecb_USHL32x4; break;
2860 case Iop_Sh64Ux2: op = ARM64vecb_USHL64x2; break;
2861 case Iop_Rsh8Sx16: op = ARM64vecb_SRSHL8x16; break;
2862 case Iop_Rsh16Sx8: op = ARM64vecb_SRSHL16x8; break;
2863 case Iop_Rsh32Sx4: op = ARM64vecb_SRSHL32x4; break;
2864 case Iop_Rsh64Sx2: op = ARM64vecb_SRSHL64x2; break;
2865 case Iop_Rsh8Ux16: op = ARM64vecb_URSHL8x16; break;
2866 case Iop_Rsh16Ux8: op = ARM64vecb_URSHL16x8; break;
2867 case Iop_Rsh32Ux4: op = ARM64vecb_URSHL32x4; break;
2868 case Iop_Rsh64Ux2: op = ARM64vecb_URSHL64x2; break;
2869 case Iop_Max64Fx2: op = ARM64vecb_FMAX64x2; break;
2870 case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break;
2871 case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break;
2872 case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break;
2873 case Iop_RecipStep64Fx2: setRM = True;
2874 op = ARM64vecb_FRECPS64x2; break;
2875 case Iop_RecipStep32Fx4: setRM = True;
2876 op = ARM64vecb_FRECPS32x4; break;
2877 case Iop_RSqrtStep64Fx2: setRM = True;
2878 op = ARM64vecb_FRSQRTS64x2; break;
2879 case Iop_RSqrtStep32Fx4: setRM = True;
2880 op = ARM64vecb_FRSQRTS32x4; break;
2881 default: vassert(0);
2883 if (setRM) {
2884 // This is a bit of a kludge. We should do rm properly for
2885 // these recip-step insns, but that would require changing the
2886 // primop's type to take an rmode.
2887 set_FPCR_rounding_mode(env, IRExpr_Const(
2888 IRConst_U32(Irrm_NEAREST)));
2890 if (sw) {
2891 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2892 } else {
2893 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2895 return res;
2897 /* -- These only have 2 operand instructions, so we have to first move
2898 the first argument into a new register, for modification. -- */
2899 case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2900 case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2901 case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2902 case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2904 HReg res = newVRegV(env);
2905 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2906 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2907 ARM64VecModifyOp op = ARM64vecmo_INVALID;
2908 switch (e->Iex.Binop.op) {
2909 /* In the following 8 cases, the US - SU switching is intended.
2910 See comments on the libvex_ir.h for details. Also in the
2911 ARM64 front end, where used these primops are generated. */
2912 case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2913 case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2914 case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2915 case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2916 case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2917 case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2918 case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2919 case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2920 default: vassert(0);
2922 /* The order of the operands is important. Although this is
2923 basically addition, the two operands are extended differently,
2924 making it important to get them into the correct registers in
2925 the instruction. */
2926 addInstr(env, ARM64Instr_VMov(16, res, argR));
2927 addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2928 return res;
2930 /* -- Shifts by an immediate. -- */
2931 case Iop_ShrN64x2: case Iop_ShrN32x4:
2932 case Iop_ShrN16x8: case Iop_ShrN8x16:
2933 case Iop_SarN64x2: case Iop_SarN32x4:
2934 case Iop_SarN16x8: case Iop_SarN8x16:
2935 case Iop_ShlN64x2: case Iop_ShlN32x4:
2936 case Iop_ShlN16x8: case Iop_ShlN8x16:
2937 case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2938 case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2939 case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2940 case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2941 case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2942 case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2944 IRExpr* argL = e->Iex.Binop.arg1;
2945 IRExpr* argR = e->Iex.Binop.arg2;
2946 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2947 UInt amt = argR->Iex.Const.con->Ico.U8;
2948 UInt limLo = 0;
2949 UInt limHi = 0;
2950 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2951 /* Establish the instruction to use. */
2952 switch (e->Iex.Binop.op) {
2953 case Iop_ShrN64x2: op = ARM64vecshi_USHR64x2; break;
2954 case Iop_ShrN32x4: op = ARM64vecshi_USHR32x4; break;
2955 case Iop_ShrN16x8: op = ARM64vecshi_USHR16x8; break;
2956 case Iop_ShrN8x16: op = ARM64vecshi_USHR8x16; break;
2957 case Iop_SarN64x2: op = ARM64vecshi_SSHR64x2; break;
2958 case Iop_SarN32x4: op = ARM64vecshi_SSHR32x4; break;
2959 case Iop_SarN16x8: op = ARM64vecshi_SSHR16x8; break;
2960 case Iop_SarN8x16: op = ARM64vecshi_SSHR8x16; break;
2961 case Iop_ShlN64x2: op = ARM64vecshi_SHL64x2; break;
2962 case Iop_ShlN32x4: op = ARM64vecshi_SHL32x4; break;
2963 case Iop_ShlN16x8: op = ARM64vecshi_SHL16x8; break;
2964 case Iop_ShlN8x16: op = ARM64vecshi_SHL8x16; break;
2965 case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2; break;
2966 case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4; break;
2967 case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8; break;
2968 case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16; break;
2969 case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2; break;
2970 case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4; break;
2971 case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8; break;
2972 case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16; break;
2973 case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2974 case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2975 case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2976 case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2977 default: vassert(0);
2979 /* Establish the shift limits, for sanity check purposes only. */
2980 switch (e->Iex.Binop.op) {
2981 case Iop_ShrN64x2: limLo = 1; limHi = 64; break;
2982 case Iop_ShrN32x4: limLo = 1; limHi = 32; break;
2983 case Iop_ShrN16x8: limLo = 1; limHi = 16; break;
2984 case Iop_ShrN8x16: limLo = 1; limHi = 8; break;
2985 case Iop_SarN64x2: limLo = 1; limHi = 64; break;
2986 case Iop_SarN32x4: limLo = 1; limHi = 32; break;
2987 case Iop_SarN16x8: limLo = 1; limHi = 16; break;
2988 case Iop_SarN8x16: limLo = 1; limHi = 8; break;
2989 case Iop_ShlN64x2: limLo = 0; limHi = 63; break;
2990 case Iop_ShlN32x4: limLo = 0; limHi = 31; break;
2991 case Iop_ShlN16x8: limLo = 0; limHi = 15; break;
2992 case Iop_ShlN8x16: limLo = 0; limHi = 7; break;
2993 case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2994 case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2995 case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2996 case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7; break;
2997 case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2998 case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2999 case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
3000 case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7; break;
3001 case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
3002 case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
3003 case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
3004 case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7; break;
3005 default: vassert(0);
3007 /* For left shifts, the allowable amt values are
3008 0 .. lane_bits-1. For right shifts the allowable
3009 values are 1 .. lane_bits. */
3010 if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
3011 HReg src = iselV128Expr(env, argL);
3012 HReg dst = newVRegV(env);
3013 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
3014 return dst;
3016 /* Special case some no-op shifts that the arm64 front end
3017 throws at us. We can't generate any instructions for these,
3018 but we don't need to either. */
3019 switch (e->Iex.Binop.op) {
3020 case Iop_ShrN64x2: case Iop_ShrN32x4:
3021 case Iop_ShrN16x8: case Iop_ShrN8x16:
3022 if (amt == 0) {
3023 return iselV128Expr(env, argL);
3025 break;
3026 default:
3027 break;
3029 /* otherwise unhandled */
3031 /* else fall out; this is unhandled */
3032 break;
3034 /* -- Saturating narrowing by an immediate -- */
3035 /* uu */
3036 case Iop_QandQShrNnarrow16Uto8Ux8:
3037 case Iop_QandQShrNnarrow32Uto16Ux4:
3038 case Iop_QandQShrNnarrow64Uto32Ux2:
3039 /* ss */
3040 case Iop_QandQSarNnarrow16Sto8Sx8:
3041 case Iop_QandQSarNnarrow32Sto16Sx4:
3042 case Iop_QandQSarNnarrow64Sto32Sx2:
3043 /* su */
3044 case Iop_QandQSarNnarrow16Sto8Ux8:
3045 case Iop_QandQSarNnarrow32Sto16Ux4:
3046 case Iop_QandQSarNnarrow64Sto32Ux2:
3047 /* ruu */
3048 case Iop_QandQRShrNnarrow16Uto8Ux8:
3049 case Iop_QandQRShrNnarrow32Uto16Ux4:
3050 case Iop_QandQRShrNnarrow64Uto32Ux2:
3051 /* rss */
3052 case Iop_QandQRSarNnarrow16Sto8Sx8:
3053 case Iop_QandQRSarNnarrow32Sto16Sx4:
3054 case Iop_QandQRSarNnarrow64Sto32Sx2:
3055 /* rsu */
3056 case Iop_QandQRSarNnarrow16Sto8Ux8:
3057 case Iop_QandQRSarNnarrow32Sto16Ux4:
3058 case Iop_QandQRSarNnarrow64Sto32Ux2:
3060 IRExpr* argL = e->Iex.Binop.arg1;
3061 IRExpr* argR = e->Iex.Binop.arg2;
3062 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
3063 UInt amt = argR->Iex.Const.con->Ico.U8;
3064 UInt limit = 0;
3065 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
3066 switch (e->Iex.Binop.op) {
3067 /* uu */
3068 case Iop_QandQShrNnarrow64Uto32Ux2:
3069 op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
3070 case Iop_QandQShrNnarrow32Uto16Ux4:
3071 op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
3072 case Iop_QandQShrNnarrow16Uto8Ux8:
3073 op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
3074 /* ss */
3075 case Iop_QandQSarNnarrow64Sto32Sx2:
3076 op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
3077 case Iop_QandQSarNnarrow32Sto16Sx4:
3078 op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
3079 case Iop_QandQSarNnarrow16Sto8Sx8:
3080 op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
3081 /* su */
3082 case Iop_QandQSarNnarrow64Sto32Ux2:
3083 op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
3084 case Iop_QandQSarNnarrow32Sto16Ux4:
3085 op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
3086 case Iop_QandQSarNnarrow16Sto8Ux8:
3087 op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
3088 /* ruu */
3089 case Iop_QandQRShrNnarrow64Uto32Ux2:
3090 op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
3091 case Iop_QandQRShrNnarrow32Uto16Ux4:
3092 op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
3093 case Iop_QandQRShrNnarrow16Uto8Ux8:
3094 op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
3095 /* rss */
3096 case Iop_QandQRSarNnarrow64Sto32Sx2:
3097 op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
3098 case Iop_QandQRSarNnarrow32Sto16Sx4:
3099 op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
3100 case Iop_QandQRSarNnarrow16Sto8Sx8:
3101 op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
3102 /* rsu */
3103 case Iop_QandQRSarNnarrow64Sto32Ux2:
3104 op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
3105 case Iop_QandQRSarNnarrow32Sto16Ux4:
3106 op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
3107 case Iop_QandQRSarNnarrow16Sto8Ux8:
3108 op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
3109 /**/
3110 default:
3111 vassert(0);
3113 if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
3114 HReg src = iselV128Expr(env, argL);
3115 HReg dst = newVRegV(env);
3116 HReg fpsr = newVRegI(env);
3117 /* Clear FPSR.Q, do the operation, and return both its
3118 result and the new value of FPSR.Q. We can simply
3119 zero out FPSR since all the other bits have no relevance
3120 in VEX generated code. */
3121 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3122 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3123 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
3124 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3125 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3126 ARM64sh_SHR));
3127 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3128 vassert(ril_one);
3129 addInstr(env, ARM64Instr_Logic(fpsr,
3130 fpsr, ril_one, ARM64lo_AND));
3131 /* Now we have: the main (shift) result in the bottom half
3132 of |dst|, and the Q bit at the bottom of |fpsr|.
3133 Combining them with a "InterleaveLO64x2" style operation
3134 produces a 128 bit value, dst[63:0]:fpsr[63:0],
3135 which is what we want. */
3136 HReg scratch = newVRegV(env);
3137 addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
3138 addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
3139 dst, dst, scratch));
3140 return dst;
3143 /* else fall out; this is unhandled */
3144 break;
3147 // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
3148 // as it is in some ways more general and often leads to better
3149 // code overall.
3150 case Iop_ShlV128:
3151 case Iop_ShrV128: {
3152 Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
3153 /* This is tricky. Generate an EXT instruction with zeroes in
3154 the high operand (shift right) or low operand (shift left).
3155 Note that we can only slice in the EXT instruction at a byte
3156 level of granularity, so the shift amount needs careful
3157 checking. */
3158 IRExpr* argL = e->Iex.Binop.arg1;
3159 IRExpr* argR = e->Iex.Binop.arg2;
3160 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
3161 UInt amt = argR->Iex.Const.con->Ico.U8;
3162 Bool amtOK = False;
3163 switch (amt) {
3164 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
3165 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
3166 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
3167 amtOK = True; break;
3169 /* We could also deal with amt==0 by copying the source to
3170 the destination, but there's no need for that so far. */
3171 if (amtOK) {
3172 HReg src = iselV128Expr(env, argL);
3173 HReg srcZ = newVRegV(env);
3174 addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
3175 UInt immB = amt / 8;
3176 vassert(immB >= 1 && immB <= 15);
3177 HReg dst = newVRegV(env);
3178 if (isSHR) {
3179 addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
3180 immB));
3181 } else {
3182 addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
3183 16 - immB));
3185 return dst;
3188 /* else fall out; this is unhandled */
3189 break;
3192 case Iop_PolynomialMull8x8:
3193 case Iop_Mull32Ux2:
3194 case Iop_Mull16Ux4:
3195 case Iop_Mull8Ux8:
3196 case Iop_Mull32Sx2:
3197 case Iop_Mull16Sx4:
3198 case Iop_Mull8Sx8:
3199 case Iop_QDMull32Sx2:
3200 case Iop_QDMull16Sx4:
3202 HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
3203 HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3204 HReg vSrcL = newVRegV(env);
3205 HReg vSrcR = newVRegV(env);
3206 HReg dst = newVRegV(env);
3207 ARM64VecBinOp op = ARM64vecb_INVALID;
3208 switch (e->Iex.Binop.op) {
3209 case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8; break;
3210 case Iop_Mull32Ux2: op = ARM64vecb_UMULL2DSS; break;
3211 case Iop_Mull16Ux4: op = ARM64vecb_UMULL4SHH; break;
3212 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8HBB; break;
3213 case Iop_Mull32Sx2: op = ARM64vecb_SMULL2DSS; break;
3214 case Iop_Mull16Sx4: op = ARM64vecb_SMULL4SHH; break;
3215 case Iop_Mull8Sx8: op = ARM64vecb_SMULL8HBB; break;
3216 case Iop_QDMull32Sx2: op = ARM64vecb_SQDMULL2DSS; break;
3217 case Iop_QDMull16Sx4: op = ARM64vecb_SQDMULL4SHH; break;
3218 default: vassert(0);
3220 addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
3221 addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
3222 addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
3223 return dst;
3226 /* ... */
3227 default:
3228 break;
3229 } /* switch on the binop */
3230 } /* if (e->tag == Iex_Binop) */
3232 if (e->tag == Iex_Triop) {
3233 IRTriop* triop = e->Iex.Triop.details;
3234 ARM64VecBinOp vecbop = ARM64vecb_INVALID;
3235 switch (triop->op) {
3236 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
3237 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
3238 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
3239 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
3240 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
3241 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
3242 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
3243 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
3244 case Iop_Add16Fx8: vecbop = ARM64vecb_FADD16x8; break;
3245 case Iop_Sub16Fx8: vecbop = ARM64vecb_FSUB16x8; break;
3246 default: break;
3248 if (vecbop != ARM64vecb_INVALID) {
3249 HReg argL = iselV128Expr(env, triop->arg2);
3250 HReg argR = iselV128Expr(env, triop->arg3);
3251 HReg dst = newVRegV(env);
3252 set_FPCR_rounding_mode(env, triop->arg1);
3253 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
3254 return dst;
3257 if (triop->op == Iop_SliceV128) {
3258 /* Note that, compared to ShlV128/ShrV128 just above, the shift
3259 amount here is in bytes, not bits. */
3260 IRExpr* argHi = triop->arg1;
3261 IRExpr* argLo = triop->arg2;
3262 IRExpr* argAmt = triop->arg3;
3263 if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
3264 UInt amt = argAmt->Iex.Const.con->Ico.U8;
3265 Bool amtOK = amt >= 1 && amt <= 15;
3266 /* We could also deal with amt==0 by copying argLO to
3267 the destination, but there's no need for that so far. */
3268 if (amtOK) {
3269 HReg srcHi = iselV128Expr(env, argHi);
3270 HReg srcLo = iselV128Expr(env, argLo);
3271 HReg dst = newVRegV(env);
3272 addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
3273 return dst;
3276 /* else fall out; this is unhandled */
3279 } /* if (e->tag == Iex_Triop) */
3281 if (e->tag == Iex_ITE) {
3282 // This code sequence is pretty feeble. We'd do better to generate BSL
3283 // here.
3284 HReg rX = newVRegI(env);
3286 ARM64CondCode cc = iselCondCode_C(env, e->Iex.ITE.cond);
3287 addInstr(env, ARM64Instr_Set64(rX, cc));
3288 // cond: rX = 1 !cond: rX = 0
3290 // Mask the Set64 result. This is paranoia (should be unnecessary).
3291 ARM64RIL* one = mb_mkARM64RIL_I(1);
3292 vassert(one);
3293 addInstr(env, ARM64Instr_Logic(rX, rX, one, ARM64lo_AND));
3294 // cond: rX = 1 !cond: rX = 0
3296 // Propagate to all bits in the 64 bit word by subtracting 1 from it.
3297 // This also inverts the sense of the value.
3298 addInstr(env, ARM64Instr_Arith(rX, rX, ARM64RIA_I12(1,0),
3299 /*isAdd=*/False));
3300 // cond: rX = 0-(62)-0 !cond: rX = 1-(62)-1
3302 // Duplicate rX into a vector register
3303 HReg vMask = newVRegV(env);
3304 addInstr(env, ARM64Instr_VQfromXX(vMask, rX, rX));
3305 // cond: vMask = 0-(126)-0 !cond: vMask = 1-(126)-1
3307 HReg vIfTrue = iselV128Expr(env, e->Iex.ITE.iftrue);
3308 HReg vIfFalse = iselV128Expr(env, e->Iex.ITE.iffalse);
3310 // Mask out iffalse value as needed
3311 addInstr(env,
3312 ARM64Instr_VBinV(ARM64vecb_AND, vIfFalse, vIfFalse, vMask));
3314 // Invert the mask so we can use it for the iftrue value
3315 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, vMask, vMask));
3316 // cond: vMask = 1-(126)-1 !cond: vMask = 0-(126)-0
3318 // Mask out iftrue value as needed
3319 addInstr(env,
3320 ARM64Instr_VBinV(ARM64vecb_AND, vIfTrue, vIfTrue, vMask));
3322 // Merge the masked iftrue and iffalse results.
3323 HReg res = newVRegV(env);
3324 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ORR, res, vIfTrue, vIfFalse));
3326 return res;
3329 v128_expr_bad:
3330 ppIRExpr(e);
3331 vpanic("iselV128Expr_wrk");
3335 /*---------------------------------------------------------*/
3336 /*--- ISEL: Floating point expressions (64 bit) ---*/
3337 /*---------------------------------------------------------*/
3339 /* Compute a 64-bit floating point value into a register, the identity
3340 of which is returned. As with iselIntExpr_R, the reg may be either
3341 real or virtual; in any case it must not be changed by subsequent
3342 code emitted by the caller. */
3344 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
3346 HReg r = iselDblExpr_wrk( env, e );
3347 # if 0
3348 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3349 # endif
3350 vassert(hregClass(r) == HRcFlt64);
3351 vassert(hregIsVirtual(r));
3352 return r;
3355 /* DO NOT CALL THIS DIRECTLY */
3356 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3358 IRType ty = typeOfIRExpr(env->type_env,e);
3359 vassert(e);
3360 vassert(ty == Ity_F64);
3362 if (e->tag == Iex_RdTmp) {
3363 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3366 if (e->tag == Iex_Const) {
3367 IRConst* con = e->Iex.Const.con;
3368 if (con->tag == Ico_F64i) {
3369 HReg src = newVRegI(env);
3370 HReg dst = newVRegD(env);
3371 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3372 addInstr(env, ARM64Instr_VDfromX(dst, src));
3373 return dst;
3375 if (con->tag == Ico_F64) {
3376 HReg src = newVRegI(env);
3377 HReg dst = newVRegD(env);
3378 union { Double d64; ULong u64; } u;
3379 vassert(sizeof(u) == 8);
3380 u.d64 = con->Ico.F64;
3381 addInstr(env, ARM64Instr_Imm64(src, u.u64));
3382 addInstr(env, ARM64Instr_VDfromX(dst, src));
3383 return dst;
3387 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3388 vassert(e->Iex.Load.ty == Ity_F64);
3389 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3390 HReg res = newVRegD(env);
3391 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3392 return res;
3395 if (e->tag == Iex_Get) {
3396 Int offs = e->Iex.Get.offset;
3397 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3398 HReg rD = newVRegD(env);
3399 HReg rN = get_baseblock_register();
3400 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3401 return rD;
3405 if (e->tag == Iex_Unop) {
3406 switch (e->Iex.Unop.op) {
3407 case Iop_NegF64: {
3408 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3409 HReg dst = newVRegD(env);
3410 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3411 return dst;
3413 case Iop_AbsF64: {
3414 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3415 HReg dst = newVRegD(env);
3416 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3417 return dst;
3419 case Iop_F32toF64: {
3420 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3421 HReg dst = newVRegD(env);
3422 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3423 return dst;
3425 case Iop_F16toF64: {
3426 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3427 HReg dst = newVRegD(env);
3428 addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3429 return dst;
3431 case Iop_I32UtoF64:
3432 case Iop_I32StoF64: {
3433 /* Rounding mode is not involved here, since the
3434 conversion can always be done without loss of
3435 precision. */
3436 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3437 HReg dst = newVRegD(env);
3438 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3439 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3440 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3441 return dst;
3443 case Iop_RoundF64toIntA0: {
3444 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3445 HReg dst = newVRegD(env);
3446 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINTA0, dst, src));
3447 return dst;
3449 case Iop_RoundF64toIntE: {
3450 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3451 HReg dst = newVRegD(env);
3452 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINTE, dst, src));
3453 return dst;
3455 default:
3456 break;
3460 if (e->tag == Iex_Binop) {
3461 switch (e->Iex.Binop.op) {
3462 case Iop_RoundF64toInt:
3463 case Iop_SqrtF64:
3464 case Iop_RecpExpF64: {
3465 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3466 HReg dst = newVRegD(env);
3467 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3468 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3469 switch (e->Iex.Binop.op) {
3470 case Iop_RoundF64toInt: op = ARM64fpu_RINT; break;
3471 case Iop_SqrtF64: op = ARM64fpu_SQRT; break;
3472 case Iop_RecpExpF64: op = ARM64fpu_RECPX; break;
3473 default: vassert(0);
3475 addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3476 return dst;
3478 case Iop_I64StoF64:
3479 case Iop_I64UtoF64: {
3480 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3481 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3482 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3483 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3484 HReg dstS = newVRegD(env);
3485 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3486 return dstS;
3488 default:
3489 break;
3493 if (e->tag == Iex_Triop) {
3494 IRTriop* triop = e->Iex.Triop.details;
3495 ARM64FpBinOp dblop = ARM64fpb_INVALID;
3496 switch (triop->op) {
3497 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3498 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3499 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3500 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3501 default: break;
3503 if (dblop != ARM64fpb_INVALID) {
3504 HReg argL = iselDblExpr(env, triop->arg2);
3505 HReg argR = iselDblExpr(env, triop->arg3);
3506 HReg dst = newVRegD(env);
3507 set_FPCR_rounding_mode(env, triop->arg1);
3508 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3509 return dst;
3513 if (e->tag == Iex_Qop) {
3514 IRQop* qop = e->Iex.Qop.details;
3515 ARM64FpTriOp triop = ARM64fpt_INVALID;
3516 switch (qop->op) {
3517 case Iop_MAddF64: triop = ARM64fpt_FMADD; break;
3518 case Iop_MSubF64: triop = ARM64fpt_FMSUB; break;
3519 default: break;
3521 if (triop != ARM64fpt_INVALID) {
3522 HReg N = iselDblExpr(env, qop->arg2);
3523 HReg M = iselDblExpr(env, qop->arg3);
3524 HReg A = iselDblExpr(env, qop->arg4);
3525 HReg dst = newVRegD(env);
3526 set_FPCR_rounding_mode(env, qop->arg1);
3527 addInstr(env, ARM64Instr_VTriD(triop, dst, N, M, A));
3528 return dst;
3532 if (e->tag == Iex_ITE) {
3533 /* ITE(ccexpr, iftrue, iffalse) */
3534 ARM64CondCode cc;
3535 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3536 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3537 HReg dst = newVRegD(env);
3538 cc = iselCondCode_C(env, e->Iex.ITE.cond);
3539 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3540 return dst;
3543 ppIRExpr(e);
3544 vpanic("iselDblExpr_wrk");
3548 /*---------------------------------------------------------*/
3549 /*--- ISEL: Floating point expressions (32 bit) ---*/
3550 /*---------------------------------------------------------*/
3552 /* Compute a 32-bit floating point value into a register, the identity
3553 of which is returned. As with iselIntExpr_R, the reg may be either
3554 real or virtual; in any case it must not be changed by subsequent
3555 code emitted by the caller. Values are generated into HRcFlt64
3556 registers despite the values themselves being Ity_F32s. */
3558 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3560 HReg r = iselFltExpr_wrk( env, e );
3561 # if 0
3562 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3563 # endif
3564 vassert(hregClass(r) == HRcFlt64);
3565 vassert(hregIsVirtual(r));
3566 return r;
3569 /* DO NOT CALL THIS DIRECTLY */
3570 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3572 IRType ty = typeOfIRExpr(env->type_env,e);
3573 vassert(e);
3574 vassert(ty == Ity_F32);
3576 if (e->tag == Iex_RdTmp) {
3577 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3580 if (e->tag == Iex_Const) {
3581 /* This is something of a kludge. Since a 32 bit floating point
3582 zero is just .. all zeroes, just create a 64 bit zero word
3583 and transfer it. This avoids having to create a SfromW
3584 instruction for this specific case. */
3585 IRConst* con = e->Iex.Const.con;
3586 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3587 HReg src = newVRegI(env);
3588 HReg dst = newVRegD(env);
3589 addInstr(env, ARM64Instr_Imm64(src, 0));
3590 addInstr(env, ARM64Instr_VDfromX(dst, src));
3591 return dst;
3593 if (con->tag == Ico_F32) {
3594 HReg src = newVRegI(env);
3595 HReg dst = newVRegD(env);
3596 union { Float f32; UInt u32; } u;
3597 vassert(sizeof(u) == 4);
3598 u.f32 = con->Ico.F32;
3599 addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3600 addInstr(env, ARM64Instr_VDfromX(dst, src));
3601 return dst;
3605 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3606 vassert(e->Iex.Load.ty == Ity_F32);
3607 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3608 HReg res = newVRegD(env);
3609 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3610 return res;
3613 if (e->tag == Iex_Get) {
3614 Int offs = e->Iex.Get.offset;
3615 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3616 HReg rD = newVRegD(env);
3617 HReg rN = get_baseblock_register();
3618 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3619 return rD;
3623 if (e->tag == Iex_Unop) {
3624 switch (e->Iex.Unop.op) {
3625 case Iop_NegF32: {
3626 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3627 HReg dst = newVRegD(env);
3628 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3629 return dst;
3631 case Iop_AbsF32: {
3632 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3633 HReg dst = newVRegD(env);
3634 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3635 return dst;
3637 case Iop_F16toF32: {
3638 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3639 HReg dst = newVRegD(env);
3640 addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3641 return dst;
3643 case Iop_RoundF32toIntA0: {
3644 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3645 HReg dst = newVRegD(env);
3646 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINTA0, dst, src));
3647 return dst;
3649 case Iop_RoundF32toIntE: {
3650 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3651 HReg dst = newVRegD(env);
3652 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINTE, dst, src));
3653 return dst;
3655 default:
3656 break;
3660 if (e->tag == Iex_Binop) {
3661 switch (e->Iex.Binop.op) {
3662 case Iop_RoundF32toInt:
3663 case Iop_SqrtF32:
3664 case Iop_RecpExpF32: {
3665 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3666 HReg dst = newVRegD(env);
3667 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3668 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3669 switch (e->Iex.Binop.op) {
3670 case Iop_RoundF32toInt: op = ARM64fpu_RINT; break;
3671 case Iop_SqrtF32: op = ARM64fpu_SQRT; break;
3672 case Iop_RecpExpF32: op = ARM64fpu_RECPX; break;
3673 default: vassert(0);
3675 addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3676 return dst;
3678 case Iop_F64toF32: {
3679 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3680 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3681 HReg dstS = newVRegD(env);
3682 addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3683 return dstS;
3685 case Iop_I32UtoF32:
3686 case Iop_I32StoF32:
3687 case Iop_I64UtoF32:
3688 case Iop_I64StoF32: {
3689 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3690 switch (e->Iex.Binop.op) {
3691 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3692 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3693 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3694 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3695 default: vassert(0);
3697 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3698 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3699 HReg dstS = newVRegD(env);
3700 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3701 return dstS;
3703 default:
3704 break;
3708 if (e->tag == Iex_Triop) {
3709 IRTriop* triop = e->Iex.Triop.details;
3710 ARM64FpBinOp sglop = ARM64fpb_INVALID;
3711 switch (triop->op) {
3712 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3713 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3714 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3715 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3716 default: break;
3718 if (sglop != ARM64fpb_INVALID) {
3719 HReg argL = iselFltExpr(env, triop->arg2);
3720 HReg argR = iselFltExpr(env, triop->arg3);
3721 HReg dst = newVRegD(env);
3722 set_FPCR_rounding_mode(env, triop->arg1);
3723 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3724 return dst;
3728 if (e->tag == Iex_ITE) {
3729 /* ITE(ccexpr, iftrue, iffalse) */
3730 ARM64CondCode cc;
3731 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
3732 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
3733 HReg dst = newVRegD(env);
3734 cc = iselCondCode_C(env, e->Iex.ITE.cond);
3735 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3736 return dst;
3739 if (e->tag == Iex_Qop) {
3740 IRQop* qop = e->Iex.Qop.details;
3741 ARM64FpTriOp triop = ARM64fpt_INVALID;
3742 switch (qop->op) {
3743 case Iop_MAddF32: triop = ARM64fpt_FMADD; break;
3744 case Iop_MSubF32: triop = ARM64fpt_FMSUB; break;
3745 default: break;
3748 if (triop != ARM64fpt_INVALID) {
3749 HReg N = iselFltExpr(env, qop->arg2);
3750 HReg M = iselFltExpr(env, qop->arg3);
3751 HReg A = iselFltExpr(env, qop->arg4);
3752 HReg dst = newVRegD(env);
3753 set_FPCR_rounding_mode(env, qop->arg1);
3754 addInstr(env, ARM64Instr_VTriS(triop, dst, N, M, A));
3755 return dst;
3759 ppIRExpr(e);
3760 vpanic("iselFltExpr_wrk");
3764 /*---------------------------------------------------------*/
3765 /*--- ISEL: Floating point expressions (16 bit) ---*/
3766 /*---------------------------------------------------------*/
3768 /* Compute a 16-bit floating point value into a register, the identity
3769 of which is returned. As with iselIntExpr_R, the reg may be either
3770 real or virtual; in any case it must not be changed by subsequent
3771 code emitted by the caller. Values are generated into HRcFlt64
3772 registers despite the values themselves being Ity_F16s. */
3774 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3776 HReg r = iselF16Expr_wrk( env, e );
3777 # if 0
3778 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3779 # endif
3780 vassert(hregClass(r) == HRcFlt64);
3781 vassert(hregIsVirtual(r));
3782 return r;
3785 /* DO NOT CALL THIS DIRECTLY */
3786 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3788 IRType ty = typeOfIRExpr(env->type_env,e);
3789 vassert(e);
3790 vassert(ty == Ity_F16);
3792 if (e->tag == Iex_RdTmp) {
3793 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3796 if (e->tag == Iex_Get) {
3797 Int offs = e->Iex.Get.offset;
3798 if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3799 HReg rD = newVRegD(env);
3800 HReg rN = get_baseblock_register();
3801 addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3802 return rD;
3806 if (e->tag == Iex_Unop) {
3807 switch (e->Iex.Unop.op) {
3808 case Iop_NegF16: {
3809 HReg srcH = iselF16Expr(env, e->Iex.Unop.arg);
3810 HReg dstH = newVRegD(env);
3811 addInstr(env, ARM64Instr_VUnaryH(ARM64fpu_NEG, dstH, srcH));
3812 return dstH;
3814 case Iop_AbsF16: {
3815 HReg srcH = iselF16Expr(env, e->Iex.Unop.arg);
3816 HReg dstH = newVRegD(env);
3817 addInstr(env, ARM64Instr_VUnaryH(ARM64fpu_ABS, dstH, srcH));
3818 return dstH;
3820 default:
3821 break;
3825 if (e->tag == Iex_Binop) {
3826 switch (e->Iex.Binop.op) {
3827 case Iop_F32toF16: {
3828 HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3829 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3830 HReg dstH = newVRegD(env);
3831 addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3832 return dstH;
3834 case Iop_F64toF16: {
3835 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3836 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3837 HReg dstH = newVRegD(env);
3838 addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3839 return dstH;
3841 case Iop_SqrtF16: {
3842 HReg src = iselF16Expr(env, e->Iex.Binop.arg2);
3843 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3844 HReg dst = newVRegD(env);
3845 addInstr(env, ARM64Instr_VUnaryH(ARM64fpu_SQRT, dst, src));
3846 return dst;
3848 default:
3849 break;
3853 if (e->tag == Iex_Triop) {
3854 IRTriop* triop = e->Iex.Triop.details;
3855 ARM64FpBinOp sglop = ARM64fpb_INVALID;
3856 switch (triop->op) {
3857 case Iop_AddF16: sglop = ARM64fpb_ADD; break;
3858 case Iop_SubF16: sglop = ARM64fpb_SUB; break;
3859 default: break;
3861 if (sglop != ARM64fpb_INVALID) {
3862 HReg argL = iselF16Expr(env, triop->arg2);
3863 HReg argR = iselF16Expr(env, triop->arg3);
3864 HReg dst = newVRegD(env);
3865 set_FPCR_rounding_mode(env, triop->arg1);
3866 addInstr(env, ARM64Instr_VBinH(sglop, dst, argL, argR));
3867 return dst;
3871 ppIRExpr(e);
3872 vpanic("iselF16Expr_wrk");
3876 /*---------------------------------------------------------*/
3877 /*--- ISEL: Vector expressions (256 bit) ---*/
3878 /*---------------------------------------------------------*/
3880 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3881 ISelEnv* env, IRExpr* e )
3883 iselV256Expr_wrk( rHi, rLo, env, e );
3884 vassert(hregClass(*rHi) == HRcVec128);
3885 vassert(hregClass(*rLo) == HRcVec128);
3886 vassert(hregIsVirtual(*rHi));
3887 vassert(hregIsVirtual(*rLo));
3890 /* DO NOT CALL THIS DIRECTLY */
3891 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3892 ISelEnv* env, IRExpr* e )
3894 vassert(e);
3895 IRType ty = typeOfIRExpr(env->type_env,e);
3896 vassert(ty == Ity_V256);
3898 /* read 256-bit IRTemp */
3899 if (e->tag == Iex_RdTmp) {
3900 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3901 return;
3904 if (e->tag == Iex_Binop) {
3905 switch (e->Iex.Binop.op) {
3906 case Iop_V128HLtoV256: {
3907 *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3908 *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3909 return;
3911 case Iop_QandSQsh64x2:
3912 case Iop_QandSQsh32x4:
3913 case Iop_QandSQsh16x8:
3914 case Iop_QandSQsh8x16:
3915 case Iop_QandUQsh64x2:
3916 case Iop_QandUQsh32x4:
3917 case Iop_QandUQsh16x8:
3918 case Iop_QandUQsh8x16:
3919 case Iop_QandSQRsh64x2:
3920 case Iop_QandSQRsh32x4:
3921 case Iop_QandSQRsh16x8:
3922 case Iop_QandSQRsh8x16:
3923 case Iop_QandUQRsh64x2:
3924 case Iop_QandUQRsh32x4:
3925 case Iop_QandUQRsh16x8:
3926 case Iop_QandUQRsh8x16:
3928 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
3929 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
3930 HReg fpsr = newVRegI(env);
3931 HReg resHi = newVRegV(env);
3932 HReg resLo = newVRegV(env);
3933 ARM64VecBinOp op = ARM64vecb_INVALID;
3934 switch (e->Iex.Binop.op) {
3935 case Iop_QandSQsh64x2: op = ARM64vecb_SQSHL64x2; break;
3936 case Iop_QandSQsh32x4: op = ARM64vecb_SQSHL32x4; break;
3937 case Iop_QandSQsh16x8: op = ARM64vecb_SQSHL16x8; break;
3938 case Iop_QandSQsh8x16: op = ARM64vecb_SQSHL8x16; break;
3939 case Iop_QandUQsh64x2: op = ARM64vecb_UQSHL64x2; break;
3940 case Iop_QandUQsh32x4: op = ARM64vecb_UQSHL32x4; break;
3941 case Iop_QandUQsh16x8: op = ARM64vecb_UQSHL16x8; break;
3942 case Iop_QandUQsh8x16: op = ARM64vecb_UQSHL8x16; break;
3943 case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3944 case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3945 case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3946 case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3947 case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3948 case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3949 case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3950 case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3951 default: vassert(0);
3953 /* Clear FPSR.Q, do the operation, and return both its result
3954 and the new value of FPSR.Q. We can simply zero out FPSR
3955 since all the other bits have no relevance in VEX generated
3956 code. */
3957 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3958 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3959 addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3960 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3961 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3962 ARM64sh_SHR));
3963 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3964 vassert(ril_one);
3965 addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3966 /* Now we have: the main (shift) result in |resLo|, and the
3967 Q bit at the bottom of |fpsr|. */
3968 addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3969 *rHi = resHi;
3970 *rLo = resLo;
3971 return;
3974 /* ... */
3975 default:
3976 break;
3977 } /* switch on the binop */
3978 } /* if (e->tag == Iex_Binop) */
3980 ppIRExpr(e);
3981 vpanic("iselV256Expr_wrk");
3985 /*---------------------------------------------------------*/
3986 /*--- ISEL: Statements ---*/
3987 /*---------------------------------------------------------*/
3989 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3991 if (vex_traceflags & VEX_TRACE_VCODE) {
3992 vex_printf("\n-- ");
3993 ppIRStmt(stmt);
3994 vex_printf("\n");
3996 switch (stmt->tag) {
3998 /* --------- STORE --------- */
3999 /* little-endian write to memory */
4000 case Ist_Store: {
4001 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
4002 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
4003 IREndness end = stmt->Ist.Store.end;
4005 if (tya != Ity_I64 || end != Iend_LE)
4006 goto stmt_fail;
4008 if (tyd == Ity_I64) {
4009 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
4010 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
4011 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
4012 return;
4014 if (tyd == Ity_I32) {
4015 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
4016 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
4017 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
4018 return;
4020 if (tyd == Ity_I16) {
4021 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
4022 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
4023 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
4024 return;
4026 if (tyd == Ity_I8) {
4027 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
4028 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
4029 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
4030 return;
4032 if (tyd == Ity_V128) {
4033 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
4034 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
4035 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
4036 return;
4038 if (tyd == Ity_F64) {
4039 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
4040 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
4041 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
4042 return;
4044 if (tyd == Ity_F32) {
4045 HReg sD = iselFltExpr(env, stmt->Ist.Store.data);
4046 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
4047 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
4048 return;
4050 break;
4053 /* --------- PUT --------- */
4054 /* write guest state, fixed offset */
4055 case Ist_Put: {
4056 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
4057 UInt offs = (UInt)stmt->Ist.Put.offset;
4058 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
4059 HReg rD = INVALID_HREG;
4060 if (isZeroU64(stmt->Ist.Put.data)) {
4061 // In this context, XZR_XSP denotes the zero register.
4062 rD = hregARM64_XZR_XSP();
4063 } else {
4064 rD = iselIntExpr_R(env, stmt->Ist.Put.data);
4066 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
4067 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
4068 return;
4070 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
4071 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
4072 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
4073 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
4074 return;
4076 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
4077 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
4078 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
4079 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
4080 return;
4082 if (tyd == Ity_I8 && offs < (1<<12)) {
4083 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
4084 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
4085 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
4086 return;
4088 if (tyd == Ity_V128 && offs < (1<<12)) {
4089 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
4090 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
4091 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
4092 return;
4094 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
4095 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
4096 HReg bbp = get_baseblock_register();
4097 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
4098 return;
4100 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
4101 HReg sD = iselFltExpr(env, stmt->Ist.Put.data);
4102 HReg bbp = get_baseblock_register();
4103 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
4104 return;
4106 if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
4107 HReg hD = iselF16Expr(env, stmt->Ist.Put.data);
4108 HReg bbp = get_baseblock_register();
4109 addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
4110 return;
4113 break;
4116 /* --------- TMP --------- */
4117 /* assign value to temporary */
4118 case Ist_WrTmp: {
4119 IRTemp tmp = stmt->Ist.WrTmp.tmp;
4120 IRType ty = typeOfIRTemp(env->type_env, tmp);
4122 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4123 /* We could do a lot better here. But for the time being: */
4124 HReg dst = lookupIRTemp(env, tmp);
4125 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
4126 addInstr(env, ARM64Instr_MovI(dst, rD));
4127 return;
4129 if (ty == Ity_I1) {
4130 /* Here, we are generating a I1 value into a 64 bit register.
4131 Make sure the value in the register is only zero or one,
4132 but no other. This allows optimisation of the
4133 1Uto64(tmp:I1) case, by making it simply a copy of the
4134 register holding 'tmp'. The point being that the value in
4135 the register holding 'tmp' can only have been created
4136 here. LATER: that seems dangerous; safer to do 'tmp & 1'
4137 in that case. Also, could do this just with a single CINC
4138 insn. */
4139 /* CLONE-01 */
4140 HReg zero = hregARM64_XZR_XSP(); // XZR in this context
4141 HReg one = newVRegI(env);
4142 HReg dst = lookupIRTemp(env, tmp);
4143 addInstr(env, ARM64Instr_Imm64(one, 1));
4144 ARM64CondCode cc = iselCondCode_C(env, stmt->Ist.WrTmp.data);
4145 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
4146 return;
4148 if (ty == Ity_F64) {
4149 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4150 HReg dst = lookupIRTemp(env, tmp);
4151 addInstr(env, ARM64Instr_VMov(8, dst, src));
4152 return;
4154 if (ty == Ity_F32) {
4155 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4156 HReg dst = lookupIRTemp(env, tmp);
4157 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
4158 return;
4160 if (ty == Ity_F16) {
4161 HReg src = iselF16Expr(env, stmt->Ist.WrTmp.data);
4162 HReg dst = lookupIRTemp(env, tmp);
4163 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
4164 return;
4166 if (ty == Ity_I128) {
4167 HReg rHi, rLo, dstHi, dstLo;
4168 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4169 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4170 addInstr(env, ARM64Instr_MovI(dstHi, rHi));
4171 addInstr(env, ARM64Instr_MovI(dstLo, rLo));
4172 return;
4174 if (ty == Ity_V128) {
4175 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
4176 HReg dst = lookupIRTemp(env, tmp);
4177 addInstr(env, ARM64Instr_VMov(16, dst, src));
4178 return;
4180 if (ty == Ity_V256) {
4181 HReg srcHi, srcLo, dstHi, dstLo;
4182 iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
4183 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4184 addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
4185 addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
4186 return;
4188 break;
4191 /* --------- Call to DIRTY helper --------- */
4192 /* call complex ("dirty") helper function */
4193 case Ist_Dirty: {
4194 IRDirty* d = stmt->Ist.Dirty.details;
4196 /* Figure out the return type, if any. */
4197 IRType retty = Ity_INVALID;
4198 if (d->tmp != IRTemp_INVALID)
4199 retty = typeOfIRTemp(env->type_env, d->tmp);
4201 Bool retty_ok = False;
4202 switch (retty) {
4203 case Ity_INVALID: /* function doesn't return anything */
4204 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4205 case Ity_V128:
4206 retty_ok = True; break;
4207 default:
4208 break;
4210 if (!retty_ok)
4211 break; /* will go to stmt_fail: */
4213 /* Marshal args, do the call, and set the return value to 0x555..555
4214 if this is a conditional call that returns a value and the
4215 call is skipped. */
4216 UInt addToSp = 0;
4217 RetLoc rloc = mk_RetLoc_INVALID();
4218 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4219 vassert(is_sane_RetLoc(rloc));
4221 /* Now figure out what to do with the returned value, if any. */
4222 switch (retty) {
4223 case Ity_INVALID: {
4224 /* No return value. Nothing to do. */
4225 vassert(d->tmp == IRTemp_INVALID);
4226 vassert(rloc.pri == RLPri_None);
4227 vassert(addToSp == 0);
4228 return;
4230 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
4231 vassert(rloc.pri == RLPri_Int);
4232 vassert(addToSp == 0);
4233 /* The returned value is in x0. Park it in the register
4234 associated with tmp. */
4235 HReg dst = lookupIRTemp(env, d->tmp);
4236 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
4237 return;
4239 case Ity_V128: {
4240 /* The returned value is on the stack, and *retloc tells
4241 us where. Fish it off the stack and then move the
4242 stack pointer upwards to clear it, as directed by
4243 doHelperCall. */
4244 vassert(rloc.pri == RLPri_V128SpRel);
4245 vassert(rloc.spOff < 256); // stay sane
4246 vassert(addToSp >= 16); // ditto
4247 vassert(addToSp < 256); // ditto
4248 HReg dst = lookupIRTemp(env, d->tmp);
4249 HReg tmp = newVRegI(env); // the address of the returned value
4250 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
4251 addInstr(env, ARM64Instr_Arith(tmp, tmp,
4252 ARM64RIA_I12((UShort)rloc.spOff, 0),
4253 True/*isAdd*/ ));
4254 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
4255 addInstr(env, ARM64Instr_AddToSP(addToSp));
4256 return;
4258 default:
4259 /*NOTREACHED*/
4260 vassert(0);
4262 break;
4265 /* --------- Load Linked and Store Conditional --------- */
4266 case Ist_LLSC: {
4267 if (stmt->Ist.LLSC.storedata == NULL) {
4268 /* LL */
4269 IRTemp res = stmt->Ist.LLSC.result;
4270 IRType ty = typeOfIRTemp(env->type_env, res);
4271 if (ty == Ity_I128 || ty == Ity_I64 || ty == Ity_I32
4272 || ty == Ity_I16 || ty == Ity_I8) {
4273 Int szB = 0;
4274 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
4275 switch (ty) {
4276 case Ity_I8: szB = 1; break;
4277 case Ity_I16: szB = 2; break;
4278 case Ity_I32: szB = 4; break;
4279 case Ity_I64: szB = 8; break;
4280 case Ity_I128: szB = 16; break;
4281 default: vassert(0);
4283 if (szB == 16) {
4284 HReg r_dstMSword = INVALID_HREG;
4285 HReg r_dstLSword = INVALID_HREG;
4286 lookupIRTempPair(&r_dstMSword, &r_dstLSword, env, res);
4287 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
4288 addInstr(env, ARM64Instr_LdrEXP());
4289 addInstr(env, ARM64Instr_MovI(r_dstLSword, hregARM64_X2()));
4290 addInstr(env, ARM64Instr_MovI(r_dstMSword, hregARM64_X3()));
4291 } else {
4292 vassert(szB != 0);
4293 HReg r_dst = lookupIRTemp(env, res);
4294 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
4295 addInstr(env, ARM64Instr_LdrEX(szB));
4296 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
4298 return;
4300 goto stmt_fail;
4301 } else {
4302 /* SC */
4303 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
4304 if (tyd == Ity_I128 || tyd == Ity_I64 || tyd == Ity_I32
4305 || tyd == Ity_I16 || tyd == Ity_I8) {
4306 Int szB = 0;
4307 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
4308 switch (tyd) {
4309 case Ity_I8: szB = 1; break;
4310 case Ity_I16: szB = 2; break;
4311 case Ity_I32: szB = 4; break;
4312 case Ity_I64: szB = 8; break;
4313 case Ity_I128: szB = 16; break;
4314 default: vassert(0);
4316 if (szB == 16) {
4317 HReg rD_MSword = INVALID_HREG;
4318 HReg rD_LSword = INVALID_HREG;
4319 iselInt128Expr(&rD_MSword,
4320 &rD_LSword, env, stmt->Ist.LLSC.storedata);
4321 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD_LSword));
4322 addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rD_MSword));
4323 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
4324 addInstr(env, ARM64Instr_StrEXP());
4325 } else {
4326 vassert(szB != 0);
4327 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
4328 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
4329 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
4330 addInstr(env, ARM64Instr_StrEX(szB));
4332 } else {
4333 goto stmt_fail;
4335 /* now r0 is 1 if failed, 0 if success. Change to IR
4336 conventions (0 is fail, 1 is success). Also transfer
4337 result to r_res. */
4338 IRTemp res = stmt->Ist.LLSC.result;
4339 IRType ty = typeOfIRTemp(env->type_env, res);
4340 HReg r_res = lookupIRTemp(env, res);
4341 ARM64RIL* one = mb_mkARM64RIL_I(1);
4342 vassert(ty == Ity_I1);
4343 vassert(one);
4344 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
4345 ARM64lo_XOR));
4346 /* And be conservative -- mask off all but the lowest bit. */
4347 addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
4348 ARM64lo_AND));
4349 return;
4351 break;
4354 /* --------- ACAS --------- */
4355 case Ist_CAS: {
4356 IRCAS* cas = stmt->Ist.CAS.details;
4357 if (cas->oldHi == IRTemp_INVALID && cas->end == Iend_LE) {
4358 /* "normal" singleton CAS */
4359 UChar sz;
4360 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4361 switch (ty) {
4362 case Ity_I64: sz = 8; break;
4363 case Ity_I32: sz = 4; break;
4364 case Ity_I16: sz = 2; break;
4365 case Ity_I8: sz = 1; break;
4366 default: goto unhandled_cas;
4368 HReg rAddr = iselIntExpr_R(env, cas->addr);
4369 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
4370 HReg rData = iselIntExpr_R(env, cas->dataLo);
4371 vassert(cas->expdHi == NULL);
4372 vassert(cas->dataHi == NULL);
4373 addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rAddr));
4374 addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd));
4375 addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData));
4376 addInstr(env, ARM64Instr_CAS(sz));
4377 /* Now we have the lowest szB bytes of x1 are either equal to
4378 the lowest szB bytes of x5, indicating success, or they
4379 aren't, indicating failure. */
4380 HReg rResult = hregARM64_X1();
4381 switch (sz) {
4382 case 8: break;
4383 case 4: rResult = widen_z_32_to_64(env, rResult); break;
4384 case 2: rResult = widen_z_16_to_64(env, rResult); break;
4385 case 1: rResult = widen_z_8_to_64(env, rResult); break;
4386 default: vassert(0);
4388 // "old" in this case is interpreted somewhat liberally, per
4389 // the previous comment.
4390 HReg rOld = lookupIRTemp(env, cas->oldLo);
4391 addInstr(env, ARM64Instr_MovI(rOld, rResult));
4392 return;
4394 if (cas->oldHi != IRTemp_INVALID && cas->end == Iend_LE) {
4395 /* Paired register CAS, i.e. CASP */
4396 UChar sz;
4397 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4398 switch (ty) {
4399 case Ity_I64: sz = 8; break;
4400 case Ity_I32: sz = 4; break;
4401 default: goto unhandled_cas;
4403 HReg rAddr = iselIntExpr_R(env, cas->addr);
4405 HReg rExpd0 = iselIntExpr_R(env, cas->expdLo);
4406 vassert(cas->expdHi != NULL);
4407 HReg rExpd1 = iselIntExpr_R(env, cas->expdHi);
4409 HReg rData0 = iselIntExpr_R(env, cas->dataLo);
4410 vassert(cas->dataHi != NULL);
4411 HReg rData1 = iselIntExpr_R(env, cas->dataHi);
4413 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rAddr));
4415 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rExpd0));
4416 addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd1));
4418 addInstr(env, ARM64Instr_MovI(hregARM64_X6(), rData0));
4419 addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData1));
4421 addInstr(env, ARM64Instr_CASP(sz));
4423 HReg rResult0 = hregARM64_X0();
4424 HReg rResult1 = hregARM64_X1();
4425 switch (sz) {
4426 case 8: break;
4427 case 4: rResult0 = widen_z_32_to_64(env, rResult0);
4428 rResult1 = widen_z_32_to_64(env, rResult1);
4429 break;
4430 default: vassert(0);
4432 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4433 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4434 addInstr(env, ARM64Instr_MovI(rOldLo, rResult0));
4435 addInstr(env, ARM64Instr_MovI(rOldHi, rResult1));
4436 return;
4438 unhandled_cas:
4439 break;
4442 /* --------- MEM FENCE --------- */
4443 case Ist_MBE:
4444 switch (stmt->Ist.MBE.event) {
4445 case Imbe_Fence:
4446 addInstr(env, ARM64Instr_MFence());
4447 return;
4448 case Imbe_CancelReservation:
4449 addInstr(env, ARM64Instr_ClrEX());
4450 return;
4451 default:
4452 break;
4454 break;
4456 /* --------- INSTR MARK --------- */
4457 /* Doesn't generate any executable code ... */
4458 case Ist_IMark:
4459 return;
4461 /* --------- ABI HINT --------- */
4462 /* These have no meaning (denotation in the IR) and so we ignore
4463 them ... if any actually made it this far. */
4464 case Ist_AbiHint:
4465 return;
4467 /* --------- NO-OP --------- */
4468 case Ist_NoOp:
4469 return;
4471 /* --------- EXIT --------- */
4472 case Ist_Exit: {
4473 if (stmt->Ist.Exit.dst->tag != Ico_U64)
4474 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
4476 ARM64CondCode cc
4477 = iselCondCode_C(env, stmt->Ist.Exit.guard);
4478 ARM64AMode* amPC
4479 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
4481 /* Case: boring transfer to known address */
4482 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4483 if (env->chainingAllowed) {
4484 /* .. almost always true .. */
4485 /* Skip the event check at the dst if this is a forwards
4486 edge. */
4487 Bool toFastEP
4488 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4489 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4490 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4491 amPC, cc, toFastEP));
4492 } else {
4493 /* .. very occasionally .. */
4494 /* We can't use chaining, so ask for an assisted transfer,
4495 as that's the only alternative that is allowable. */
4496 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4497 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
4499 return;
4502 /* Case: assisted transfer to arbitrary address */
4503 switch (stmt->Ist.Exit.jk) {
4504 /* Keep this list in sync with that for iselNext below */
4505 case Ijk_ClientReq:
4506 case Ijk_NoDecode:
4507 case Ijk_NoRedir:
4508 case Ijk_Sys_syscall:
4509 case Ijk_InvalICache:
4510 case Ijk_FlushDCache:
4511 case Ijk_SigTRAP:
4512 case Ijk_SigBUS:
4513 case Ijk_Yield: {
4514 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4515 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
4516 stmt->Ist.Exit.jk));
4517 return;
4519 default:
4520 break;
4523 /* Do we ever expect to see any other kind? */
4524 goto stmt_fail;
4527 default: break;
4529 stmt_fail:
4530 ppIRStmt(stmt);
4531 vpanic("iselStmt");
4535 /*---------------------------------------------------------*/
4536 /*--- ISEL: Basic block terminators (Nexts) ---*/
4537 /*---------------------------------------------------------*/
4539 static void iselNext ( ISelEnv* env,
4540 IRExpr* next, IRJumpKind jk, Int offsIP )
4542 if (vex_traceflags & VEX_TRACE_VCODE) {
4543 vex_printf( "\n-- PUT(%d) = ", offsIP);
4544 ppIRExpr( next );
4545 vex_printf( "; exit-");
4546 ppIRJumpKind(jk);
4547 vex_printf( "\n");
4550 /* Case: boring transfer to known address */
4551 if (next->tag == Iex_Const) {
4552 IRConst* cdst = next->Iex.Const.con;
4553 vassert(cdst->tag == Ico_U64);
4554 if (jk == Ijk_Boring || jk == Ijk_Call) {
4555 /* Boring transfer to known address */
4556 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4557 if (env->chainingAllowed) {
4558 /* .. almost always true .. */
4559 /* Skip the event check at the dst if this is a forwards
4560 edge. */
4561 Bool toFastEP
4562 = ((Addr64)cdst->Ico.U64) > env->max_ga;
4563 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4564 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
4565 amPC, ARM64cc_AL,
4566 toFastEP));
4567 } else {
4568 /* .. very occasionally .. */
4569 /* We can't use chaining, so ask for an assisted transfer,
4570 as that's the only alternative that is allowable. */
4571 HReg r = iselIntExpr_R(env, next);
4572 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4573 Ijk_Boring));
4575 return;
4579 /* Case: call/return (==boring) transfer to any address */
4580 switch (jk) {
4581 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4582 HReg r = iselIntExpr_R(env, next);
4583 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4584 if (env->chainingAllowed) {
4585 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
4586 } else {
4587 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4588 Ijk_Boring));
4590 return;
4592 default:
4593 break;
4596 /* Case: assisted transfer to arbitrary address */
4597 switch (jk) {
4598 /* Keep this list in sync with that for Ist_Exit above */
4599 case Ijk_ClientReq:
4600 case Ijk_NoDecode:
4601 case Ijk_NoRedir:
4602 case Ijk_Sys_syscall:
4603 case Ijk_InvalICache:
4604 case Ijk_FlushDCache:
4605 case Ijk_SigTRAP:
4606 case Ijk_SigBUS:
4607 case Ijk_Yield: {
4608 HReg r = iselIntExpr_R(env, next);
4609 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4610 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
4611 return;
4613 default:
4614 break;
4617 vex_printf( "\n-- PUT(%d) = ", offsIP);
4618 ppIRExpr( next );
4619 vex_printf( "; exit-");
4620 ppIRJumpKind(jk);
4621 vex_printf( "\n");
4622 vassert(0); // are we expecting any other kind?
4626 /*---------------------------------------------------------*/
4627 /*--- Insn selector top-level ---*/
4628 /*---------------------------------------------------------*/
4630 /* Translate an entire SB to arm64 code. */
4632 HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4633 VexArch arch_host,
4634 const VexArchInfo* archinfo_host,
4635 const VexAbiInfo* vbi/*UNUSED*/,
4636 Int offs_Host_EvC_Counter,
4637 Int offs_Host_EvC_FailAddr,
4638 Bool chainingAllowed,
4639 Bool addProfInc,
4640 Addr max_ga )
4642 Int i, j;
4643 HReg hreg, hregHI;
4644 ISelEnv* env;
4645 UInt hwcaps_host = archinfo_host->hwcaps;
4646 ARM64AMode *amCounter, *amFailAddr;
4648 /* sanity ... */
4649 vassert(arch_host == VexArchARM64);
4651 /* Check that the host's endianness is as expected. */
4652 vassert(archinfo_host->endness == VexEndnessLE);
4654 /* guard against unexpected space regressions */
4655 vassert(sizeof(ARM64Instr) <= 32);
4657 /* Make up an initial environment to use. */
4658 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4659 env->vreg_ctr = 0;
4661 /* Set up output code array. */
4662 env->code = newHInstrArray();
4664 /* Copy BB's type env. */
4665 env->type_env = bb->tyenv;
4667 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4668 change as we go along. */
4669 env->n_vregmap = bb->tyenv->types_used;
4670 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4671 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4673 /* and finally ... */
4674 env->chainingAllowed = chainingAllowed;
4675 env->hwcaps = hwcaps_host;
4676 env->previous_rm = NULL;
4677 env->max_ga = max_ga;
4679 /* For each IR temporary, allocate a suitably-kinded virtual
4680 register. */
4681 j = 0;
4682 for (i = 0; i < env->n_vregmap; i++) {
4683 hregHI = hreg = INVALID_HREG;
4684 switch (bb->tyenv->types[i]) {
4685 case Ity_I1:
4686 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4687 hreg = mkHReg(True, HRcInt64, 0, j++);
4688 break;
4689 case Ity_I128:
4690 hreg = mkHReg(True, HRcInt64, 0, j++);
4691 hregHI = mkHReg(True, HRcInt64, 0, j++);
4692 break;
4693 case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4694 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4695 case Ity_F64:
4696 hreg = mkHReg(True, HRcFlt64, 0, j++);
4697 break;
4698 case Ity_V128:
4699 hreg = mkHReg(True, HRcVec128, 0, j++);
4700 break;
4701 case Ity_V256:
4702 hreg = mkHReg(True, HRcVec128, 0, j++);
4703 hregHI = mkHReg(True, HRcVec128, 0, j++);
4704 break;
4705 default:
4706 ppIRType(bb->tyenv->types[i]);
4707 vpanic("iselBB(arm64): IRTemp type");
4709 env->vregmap[i] = hreg;
4710 env->vregmapHI[i] = hregHI;
4712 env->vreg_ctr = j;
4714 /* The very first instruction must be an event check. */
4715 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4716 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4717 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4719 /* Possibly a block counter increment (for profiling). At this
4720 point we don't know the address of the counter, so just pretend
4721 it is zero. It will have to be patched later, but before this
4722 translation is used, by a call to LibVEX_patchProfCtr. */
4723 if (addProfInc) {
4724 addInstr(env, ARM64Instr_ProfInc());
4727 /* Ok, finally we can iterate over the statements. */
4728 for (i = 0; i < bb->stmts_used; i++)
4729 iselStmt(env, bb->stmts[i]);
4731 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4733 /* record the number of vregs we used. */
4734 env->code->n_vregs = env->vreg_ctr;
4735 return env->code;
4739 /*---------------------------------------------------------------*/
4740 /*--- end host_arm64_isel.c ---*/
4741 /*---------------------------------------------------------------*/