coverity: most of the remaining unsigned >= 0 comparison warnings
[valgrind.git] / VEX / priv / host_arm_isel.c
blob8b3264843891f8fcd35a014dccffa8db688a67de
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_isel.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 NEON support is
14 Copyright (C) 2010-2017 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, see <http://www.gnu.org/licenses/>.
31 The GNU General Public License is contained in the file COPYING.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
36 #include "libvex.h"
37 #include "ir_match.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "host_generic_regs.h"
42 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
43 #include "host_arm_defs.h"
46 /*---------------------------------------------------------*/
47 /*--- ARMvfp control word stuff ---*/
48 /*---------------------------------------------------------*/
50 /* Vex-generated code expects to run with the FPU set as follows: all
51 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
52 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
53 this corresponds to a FPSCR value of zero.
55 fpscr should therefore be zero on entry to Vex-generated code, and
56 should be unchanged at exit. (Or at least the bottom 28 bits
57 should be zero).
60 #define DEFAULT_FPSCR 0
63 /*---------------------------------------------------------*/
64 /*--- ISelEnv ---*/
65 /*---------------------------------------------------------*/
67 /* This carries around:
69 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
70 might encounter. This is computed before insn selection starts,
71 and does not change.
73 - A mapping from IRTemp to HReg. This tells the insn selector
74 which virtual register(s) are associated with each IRTemp
75 temporary. This is computed before insn selection starts, and
76 does not change. We expect this mapping to map precisely the
77 same set of IRTemps as the type mapping does.
79 - vregmap holds the primary register for the IRTemp.
80 - vregmapHI is only used for 64-bit integer-typed
81 IRTemps. It holds the identity of a second
82 32-bit virtual HReg, which holds the high half
83 of the value.
85 - The code array, that is, the insns selected so far.
87 - A counter, for generating new virtual registers.
89 - The host hardware capabilities word. This is set at the start
90 and does not change.
92 - A Bool for indicating whether we may generate chain-me
93 instructions for control flow transfers, or whether we must use
94 XAssisted.
96 - The maximum guest address of any guest insn in this block.
97 Actually, the address of the highest-addressed byte from any insn
98 in this block. Is set at the start and does not change. This is
99 used for detecting jumps which are definitely forward-edges from
100 this block, and therefore can be made (chained) to the fast entry
101 point of the destination, thereby avoiding the destination's
102 event check.
104 Note, this is all (well, mostly) host-independent.
107 typedef
108 struct {
109 /* Constant -- are set at the start and do not change. */
110 IRTypeEnv* type_env;
112 HReg* vregmap;
113 HReg* vregmapHI;
114 Int n_vregmap;
116 UInt hwcaps;
118 Bool chainingAllowed;
119 Addr32 max_ga;
121 /* These are modified as we go along. */
122 HInstrArray* code;
123 Int vreg_ctr;
125 ISelEnv;
127 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
129 vassert(tmp < env->n_vregmap);
130 return env->vregmap[tmp];
133 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
135 vassert(tmp < env->n_vregmap);
136 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
137 *vrLO = env->vregmap[tmp];
138 *vrHI = env->vregmapHI[tmp];
141 static void addInstr ( ISelEnv* env, ARMInstr* instr )
143 addHInstr(env->code, instr);
144 if (vex_traceflags & VEX_TRACE_VCODE) {
145 ppARMInstr(instr);
146 vex_printf("\n");
150 static HReg newVRegI ( ISelEnv* env )
152 HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
153 env->vreg_ctr++;
154 return reg;
157 static HReg newVRegD ( ISelEnv* env )
159 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
160 env->vreg_ctr++;
161 return reg;
164 static HReg newVRegF ( ISelEnv* env )
166 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt32, 0/*enc*/, env->vreg_ctr);
167 env->vreg_ctr++;
168 return reg;
171 static HReg newVRegV ( ISelEnv* env )
173 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
174 env->vreg_ctr++;
175 return reg;
178 /* These are duplicated in guest_arm_toIR.c */
179 static IRExpr* unop ( IROp op, IRExpr* a )
181 return IRExpr_Unop(op, a);
184 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
186 return IRExpr_Binop(op, a1, a2);
189 static IRExpr* bind ( Int binder )
191 return IRExpr_Binder(binder);
195 /*---------------------------------------------------------*/
196 /*--- ISEL: Forward declarations ---*/
197 /*---------------------------------------------------------*/
199 /* These are organised as iselXXX and iselXXX_wrk pairs. The
200 iselXXX_wrk do the real work, but are not to be called directly.
201 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
202 checks that all returned registers are virtual. You should not
203 call the _wrk version directly.
205 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
206 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
208 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
209 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
211 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
212 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
214 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
215 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
217 static ARMRI84* iselIntExpr_RI84_wrk
218 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
219 static ARMRI84* iselIntExpr_RI84
220 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
222 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
223 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
225 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
226 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
228 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
229 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
231 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
232 ISelEnv* env, const IRExpr* e );
233 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
234 ISelEnv* env, const IRExpr* e );
236 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
237 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
239 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
240 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
242 static HReg iselNeon64Expr_wrk ( ISelEnv* env, const IRExpr* e );
243 static HReg iselNeon64Expr ( ISelEnv* env, const IRExpr* e );
245 static HReg iselNeonExpr_wrk ( ISelEnv* env, const IRExpr* e );
246 static HReg iselNeonExpr ( ISelEnv* env, const IRExpr* e );
248 /*---------------------------------------------------------*/
249 /*--- ISEL: Misc helpers ---*/
250 /*---------------------------------------------------------*/
252 static UInt ROR32 ( UInt x, UInt sh ) {
253 vassert(sh < 32);
254 if (sh == 0)
255 return x;
256 else
257 return (x << (32-sh)) | (x >> sh);
260 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
261 form, and if so return the components. */
262 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
264 UInt i;
265 for (i = 0; i < 16; i++) {
266 if (0 == (u & 0xFFFFFF00)) {
267 *u8 = u;
268 *u4 = i;
269 return True;
271 u = ROR32(u, 30);
273 vassert(i == 16);
274 return False;
277 /* Make a int reg-reg move. */
278 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
280 vassert(hregClass(src) == HRcInt32);
281 vassert(hregClass(dst) == HRcInt32);
282 return ARMInstr_Mov(dst, ARMRI84_R(src));
285 /* Set the VFP unit's rounding mode to default (round to nearest). */
286 static void set_VFP_rounding_default ( ISelEnv* env )
288 /* mov rTmp, #DEFAULT_FPSCR
289 fmxr fpscr, rTmp
291 HReg rTmp = newVRegI(env);
292 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
293 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
296 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
297 expression denoting a value in the range 0 .. 3, indicating a round
298 mode encoded as per type IRRoundingMode. Set FPSCR to have the
299 same rounding.
301 static
302 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
304 /* This isn't simple, because 'mode' carries an IR rounding
305 encoding, and we need to translate that to an ARMvfp one:
306 The IR encoding:
307 00 to nearest (the default)
308 10 to +infinity
309 01 to -infinity
310 11 to zero
311 The ARMvfp encoding:
312 00 to nearest
313 01 to +infinity
314 10 to -infinity
315 11 to zero
316 Easy enough to do; just swap the two bits.
318 HReg irrm = iselIntExpr_R(env, mode);
319 HReg tL = newVRegI(env);
320 HReg tR = newVRegI(env);
321 HReg t3 = newVRegI(env);
322 /* tL = irrm << 1;
323 tR = irrm >> 1; if we're lucky, these will issue together
324 tL &= 2;
325 tR &= 1; ditto
326 t3 = tL | tR;
327 t3 <<= 22;
328 fmxr fpscr, t3
330 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
331 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
332 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
333 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
334 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
335 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
336 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
340 /*---------------------------------------------------------*/
341 /*--- ISEL: Function call helpers ---*/
342 /*---------------------------------------------------------*/
344 /* Used only in doHelperCall. See big comment in doHelperCall re
345 handling of register-parameter args. This function figures out
346 whether evaluation of an expression might require use of a fixed
347 register. If in doubt return True (safe but suboptimal).
349 static
350 Bool mightRequireFixedRegs ( IRExpr* e )
352 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
353 // These are always "safe" -- either a copy of r13(sp) in some
354 // arbitrary vreg, or a copy of r8, respectively.
355 return False;
357 /* Else it's a "normal" expression. */
358 switch (e->tag) {
359 case Iex_RdTmp: case Iex_Const: case Iex_Get:
360 return False;
361 default:
362 return True;
367 static
368 Bool doHelperCallWithArgsOnStack ( /*OUT*/UInt* stackAdjustAfterCall,
369 /*OUT*/RetLoc* retloc,
370 ISelEnv* env,
371 IRExpr* guard,
372 IRCallee* cee, IRType retTy, IRExpr** args )
374 /* This function deals just with the case where the arg sequence is:
375 VECRET followed by between 4 and 12 Ity_I32 values. So far no other
376 cases are necessary or supported. */
378 /* Check this matches the required format. */
379 if (args[0] == NULL || args[0]->tag != Iex_VECRET)
380 goto no_match;
382 UInt i;
383 UInt n_real_args = 0;
384 for (i = 1; args[i]; i++) {
385 IRExpr* arg = args[i];
386 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)))
387 goto no_match;
388 IRType argTy = typeOfIRExpr(env->type_env, arg);
389 if (UNLIKELY(argTy != Ity_I32))
390 goto no_match;
391 n_real_args++;
394 /* We expect to pass at least some args on the stack. */
395 if (n_real_args <= 3)
396 goto no_match;
398 /* But not too many. */
399 if (n_real_args > 12)
400 goto no_match;
402 /* General rules for a call:
404 Args 1 .. 4 go in R0 .. R3. The rest are pushed R to L on the
405 stack; that is, arg 5 is at the lowest address, arg 6 at the
406 next lowest, etc.
408 The stack is to be kept 8 aligned.
410 It appears (for unclear reasons) that the highest 3 words made
411 available when moving SP downwards are not to be used. For
412 example, if 5 args are to go on the stack, then SP must be moved
413 down 32 bytes, and the area at SP+20 .. SP+31 is not to be used
414 by the caller.
417 /* For this particular case, we use the following layout:
419 ------ original SP
420 112 bytes
421 ------
422 return value
423 ------ original SP - 128
424 space
425 args words, between 1 and 11
426 ------ new SP = original_SP - 256
428 Using 256 bytes is overkill, but it is simple and good enough.
431 /* This should really be
432 HReg argVRegs[n_real_args];
433 but that makes it impossible to do 'goto's forward past.
434 Hence the following kludge. */
435 vassert(n_real_args <= 12);
436 HReg argVRegs[12];
437 for (i = 0; i < 12; i++)
438 argVRegs[i] = INVALID_HREG;
440 /* Compute args into vregs. */
441 for (i = 0; i < n_real_args; i++) {
442 argVRegs[i] = iselIntExpr_R(env, args[i+1]);
445 /* Now we can compute the condition. We can't do it earlier
446 because the argument computations could trash the condition
447 codes. Be a bit clever to handle the common case where the
448 guard is 1:Bit. */
449 ARMCondCode cc = ARMcc_AL;
450 if (guard) {
451 if (guard->tag == Iex_Const
452 && guard->Iex.Const.con->tag == Ico_U1
453 && guard->Iex.Const.con->Ico.U1 == True) {
454 /* unconditional -- do nothing */
455 } else {
456 goto no_match; //ATC
457 cc = iselCondCode( env, guard );
461 HReg r0 = hregARM_R0();
462 HReg sp = hregARM_R13();
464 ARMRI84* c256 = ARMRI84_I84(64, 15); // 64 `ror` (15 * 2)
466 addInstr(env, ARMInstr_Alu(ARMalu_SUB, r0, sp, ARMRI84_I84(128, 0)));
468 addInstr(env, mk_iMOVds_RR(hregARM_R1(), argVRegs[0]));
469 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argVRegs[1]));
470 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argVRegs[2]));
472 addInstr(env, ARMInstr_Alu(ARMalu_SUB, sp, sp, c256));
474 for (i = 3; i < n_real_args; i++) {
475 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, argVRegs[i],
476 ARMAMode1_RI(sp, (i-3) * 4)));
479 vassert(*stackAdjustAfterCall == 0);
480 vassert(is_RetLoc_INVALID(*retloc));
482 *stackAdjustAfterCall = 256;
483 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 128);
485 Addr32 target = (Addr)cee->addr;
486 addInstr(env, ARMInstr_Call( cc, target, 4, *retloc ));
488 return True; /* success */
490 no_match:
491 return False;
495 /* Do a complete function call. |guard| is a Ity_Bit expression
496 indicating whether or not the call happens. If guard==NULL, the
497 call is unconditional. |retloc| is set to indicate where the
498 return value is after the call. The caller (of this fn) must
499 generate code to add |stackAdjustAfterCall| to the stack pointer
500 after the call is done. Returns True iff it managed to handle this
501 combination of arg/return types, else returns False. */
503 static
504 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
505 /*OUT*/RetLoc* retloc,
506 ISelEnv* env,
507 IRExpr* guard,
508 IRCallee* cee, IRType retTy, IRExpr** args )
510 ARMCondCode cc;
511 HReg argregs[ARM_N_ARGREGS];
512 HReg tmpregs[ARM_N_ARGREGS];
513 Bool go_fast;
514 Int n_args, i, nextArgReg;
515 Addr32 target;
517 vassert(ARM_N_ARGREGS == 4);
519 /* Set default returns. We'll update them later if needed. */
520 *stackAdjustAfterCall = 0;
521 *retloc = mk_RetLoc_INVALID();
523 /* These are used for cross-checking that IR-level constraints on
524 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
525 UInt nVECRETs = 0;
526 UInt nGSPTRs = 0;
528 /* Marshal args for a call and do the call.
530 This function only deals with a tiny set of possibilities, which
531 cover all helpers in practice. The restrictions are that only
532 arguments in registers are supported, hence only ARM_N_REGPARMS
533 x 32 integer bits in total can be passed. In fact the only
534 supported arg types are I32 and I64.
536 The return type can be I{64,32} or V128. In the V128 case, it
537 is expected that |args| will contain the special node
538 IRExpr_VECRET(), in which case this routine generates code to
539 allocate space on the stack for the vector return value. Since
540 we are not passing any scalars on the stack, it is enough to
541 preallocate the return space before marshalling any arguments,
542 in this case.
544 |args| may also contain IRExpr_GSPTR(), in which case the
545 value in r8 is passed as the corresponding argument.
547 Generating code which is both efficient and correct when
548 parameters are to be passed in registers is difficult, for the
549 reasons elaborated in detail in comments attached to
550 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
551 of the method described in those comments.
553 The problem is split into two cases: the fast scheme and the
554 slow scheme. In the fast scheme, arguments are computed
555 directly into the target (real) registers. This is only safe
556 when we can be sure that computation of each argument will not
557 trash any real registers set by computation of any other
558 argument.
560 In the slow scheme, all args are first computed into vregs, and
561 once they are all done, they are moved to the relevant real
562 regs. This always gives correct code, but it also gives a bunch
563 of vreg-to-rreg moves which are usually redundant but are hard
564 for the register allocator to get rid of.
566 To decide which scheme to use, all argument expressions are
567 first examined. If they are all so simple that it is clear they
568 will be evaluated without use of any fixed registers, use the
569 fast scheme, else use the slow scheme. Note also that only
570 unconditional calls may use the fast scheme, since having to
571 compute a condition expression could itself trash real
572 registers.
574 Note this requires being able to examine an expression and
575 determine whether or not evaluation of it might use a fixed
576 register. That requires knowledge of how the rest of this insn
577 selector works. Currently just the following 3 are regarded as
578 safe -- hopefully they cover the majority of arguments in
579 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
582 /* Note that the cee->regparms field is meaningless on ARM hosts
583 (since there is only one calling convention) and so we always
584 ignore it. */
586 n_args = 0;
587 for (i = 0; args[i]; i++) {
588 IRExpr* arg = args[i];
589 if (UNLIKELY(arg->tag == Iex_VECRET)) {
590 nVECRETs++;
591 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
592 nGSPTRs++;
594 n_args++;
597 /* If there are more than 4 args, we are going to have to pass
598 some via memory. Use a different function to (possibly) deal with
599 that; dealing with it here is too complex. */
600 if (n_args > ARM_N_ARGREGS) {
601 return doHelperCallWithArgsOnStack(stackAdjustAfterCall, retloc,
602 env, guard, cee, retTy, args );
606 /* After this point we make no attempt to pass args on the stack,
607 and just give up if that case (which is OK because it never
608 happens). Even if there are for example only 3 args, it might
609 still be necessary to pass some of them on the stack if for example
610 two or more of them are 64-bit integers. */
612 argregs[0] = hregARM_R0();
613 argregs[1] = hregARM_R1();
614 argregs[2] = hregARM_R2();
615 argregs[3] = hregARM_R3();
617 tmpregs[0] = tmpregs[1] = tmpregs[2] =
618 tmpregs[3] = INVALID_HREG;
620 /* First decide which scheme (slow or fast) is to be used. First
621 assume the fast scheme, and select slow if any contraindications
622 (wow) appear. */
624 go_fast = True;
626 if (guard) {
627 if (guard->tag == Iex_Const
628 && guard->Iex.Const.con->tag == Ico_U1
629 && guard->Iex.Const.con->Ico.U1 == True) {
630 /* unconditional */
631 } else {
632 /* Not manifestly unconditional -- be conservative. */
633 go_fast = False;
637 if (go_fast) {
638 for (i = 0; i < n_args; i++) {
639 if (mightRequireFixedRegs(args[i])) {
640 go_fast = False;
641 break;
646 if (go_fast) {
647 if (retTy == Ity_V128 || retTy == Ity_V256)
648 go_fast = False;
651 /* At this point the scheme to use has been established. Generate
652 code to get the arg values into the argument rregs. If we run
653 out of arg regs, give up. */
655 if (go_fast) {
657 /* FAST SCHEME */
658 nextArgReg = 0;
660 for (i = 0; i < n_args; i++) {
661 IRExpr* arg = args[i];
663 IRType aTy = Ity_INVALID;
664 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
665 aTy = typeOfIRExpr(env->type_env, arg);
667 if (nextArgReg >= ARM_N_ARGREGS)
668 return False; /* out of argregs */
670 if (aTy == Ity_I32) {
671 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
672 iselIntExpr_R(env, arg) ));
673 nextArgReg++;
675 else if (aTy == Ity_I64) {
676 /* 64-bit args must be passed in an a reg-pair of the form
677 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
678 On a little-endian host, the less significant word is
679 passed in the lower-numbered register. */
680 if (nextArgReg & 1) {
681 if (nextArgReg >= ARM_N_ARGREGS)
682 return False; /* out of argregs */
683 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
684 nextArgReg++;
686 if (nextArgReg >= ARM_N_ARGREGS)
687 return False; /* out of argregs */
688 HReg raHi, raLo;
689 iselInt64Expr(&raHi, &raLo, env, arg);
690 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
691 nextArgReg++;
692 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
693 nextArgReg++;
695 else if (arg->tag == Iex_GSPTR) {
696 vassert(0); //ATC
697 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
698 hregARM_R8() ));
699 nextArgReg++;
701 else if (arg->tag == Iex_VECRET) {
702 // If this happens, it denotes ill-formed IR
703 vassert(0);
705 else
706 return False; /* unhandled arg type */
709 /* Fast scheme only applies for unconditional calls. Hence: */
710 cc = ARMcc_AL;
712 } else {
714 /* SLOW SCHEME; move via temporaries */
715 nextArgReg = 0;
717 for (i = 0; i < n_args; i++) {
718 IRExpr* arg = args[i];
720 IRType aTy = Ity_INVALID;
721 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
722 aTy = typeOfIRExpr(env->type_env, arg);
724 if (nextArgReg >= ARM_N_ARGREGS)
725 return False; /* out of argregs */
727 if (aTy == Ity_I32) {
728 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
729 nextArgReg++;
731 else if (aTy == Ity_I64) {
732 /* Same comment applies as in the Fast-scheme case. */
733 if (nextArgReg & 1)
734 nextArgReg++;
735 if (nextArgReg + 1 >= ARM_N_ARGREGS)
736 return False; /* out of argregs */
737 HReg raHi, raLo;
738 iselInt64Expr(&raHi, &raLo, env, args[i]);
739 tmpregs[nextArgReg] = raLo;
740 nextArgReg++;
741 tmpregs[nextArgReg] = raHi;
742 nextArgReg++;
744 else if (arg->tag == Iex_GSPTR) {
745 vassert(0); //ATC
746 tmpregs[nextArgReg] = hregARM_R8();
747 nextArgReg++;
749 else if (arg->tag == Iex_VECRET) {
750 // If this happens, it denotes ill-formed IR
751 vassert(0);
753 else
754 return False; /* unhandled arg type */
757 /* Now we can compute the condition. We can't do it earlier
758 because the argument computations could trash the condition
759 codes. Be a bit clever to handle the common case where the
760 guard is 1:Bit. */
761 cc = ARMcc_AL;
762 if (guard) {
763 if (guard->tag == Iex_Const
764 && guard->Iex.Const.con->tag == Ico_U1
765 && guard->Iex.Const.con->Ico.U1 == True) {
766 /* unconditional -- do nothing */
767 } else {
768 cc = iselCondCode( env, guard );
772 /* Move the args to their final destinations. */
773 for (i = 0; i < nextArgReg; i++) {
774 if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs
775 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
776 continue;
778 /* None of these insns, including any spill code that might
779 be generated, may alter the condition codes. */
780 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
785 /* Should be assured by checks above */
786 vassert(nextArgReg <= ARM_N_ARGREGS);
788 /* Do final checks, set the return values, and generate the call
789 instruction proper. */
790 vassert(nGSPTRs == 0 || nGSPTRs == 1);
791 vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
792 vassert(*stackAdjustAfterCall == 0);
793 vassert(is_RetLoc_INVALID(*retloc));
794 switch (retTy) {
795 case Ity_INVALID:
796 /* Function doesn't return a value. */
797 *retloc = mk_RetLoc_simple(RLPri_None);
798 break;
799 case Ity_I64:
800 *retloc = mk_RetLoc_simple(RLPri_2Int);
801 break;
802 case Ity_I32: case Ity_I16: case Ity_I8:
803 *retloc = mk_RetLoc_simple(RLPri_Int);
804 break;
805 case Ity_V128:
806 vassert(0); // ATC
807 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
808 *stackAdjustAfterCall = 16;
809 break;
810 case Ity_V256:
811 vassert(0); // ATC
812 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
813 *stackAdjustAfterCall = 32;
814 break;
815 default:
816 /* IR can denote other possible return types, but we don't
817 handle those here. */
818 vassert(0);
821 /* Finally, generate the call itself. This needs the *retloc value
822 set in the switch above, which is why it's at the end. */
824 /* nextArgReg doles out argument registers. Since these are
825 assigned in the order r0, r1, r2, r3, its numeric value at this
826 point, which must be between 0 and 4 inclusive, is going to be
827 equal to the number of arg regs in use for the call. Hence bake
828 that number into the call (we'll need to know it when doing
829 register allocation, to know what regs the call reads.)
831 There is a bit of a twist -- harmless but worth recording.
832 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
833 the first arg in r0 and the second in r3:r2, but r1 isn't used.
834 We nevertheless have nextArgReg==4 and bake that into the call
835 instruction. This will mean the register allocator wil believe
836 this insn reads r1 when in fact it doesn't. But that's
837 harmless; it just artificially extends the live range of r1
838 unnecessarily. The best fix would be to put into the
839 instruction, a bitmask indicating which of r0/1/2/3 carry live
840 values. But that's too much hassle. */
842 target = (Addr)cee->addr;
843 addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc ));
845 return True; /* success */
849 /*---------------------------------------------------------*/
850 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
851 /*---------------------------------------------------------*/
853 /* Select insns for an integer-typed expression, and add them to the
854 code list. Return a reg holding the result. This reg will be a
855 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
856 want to modify it, ask for a new vreg, copy it in there, and modify
857 the copy. The register allocator will do its best to map both
858 vregs to the same real register, so the copies will often disappear
859 later in the game.
861 This should handle expressions of 32, 16 and 8-bit type. All
862 results are returned in a 32-bit register. For 16- and 8-bit
863 expressions, the upper 16/24 bits are arbitrary, so you should mask
864 or sign extend partial values if necessary.
867 /* --------------------- AMode1 --------------------- */
869 /* Return an AMode1 which computes the value of the specified
870 expression, possibly also adding insns to the code list as a
871 result. The expression may only be a 32-bit one.
874 static Bool sane_AMode1 ( ARMAMode1* am )
876 switch (am->tag) {
877 case ARMam1_RI:
878 return
879 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
880 && (hregIsVirtual(am->ARMam1.RI.reg)
881 || sameHReg(am->ARMam1.RI.reg, hregARM_R8()))
882 && am->ARMam1.RI.simm13 >= -4095
883 && am->ARMam1.RI.simm13 <= 4095 );
884 case ARMam1_RRS:
885 return
886 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
887 && hregIsVirtual(am->ARMam1.RRS.base)
888 && hregClass(am->ARMam1.RRS.index) == HRcInt32
889 && hregIsVirtual(am->ARMam1.RRS.index)
890 && am->ARMam1.RRS.shift >= 0
891 && am->ARMam1.RRS.shift <= 3 );
892 default:
893 vpanic("sane_AMode: unknown ARM AMode1 tag");
897 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
899 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
900 vassert(sane_AMode1(am));
901 return am;
904 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
906 IRType ty = typeOfIRExpr(env->type_env,e);
907 vassert(ty == Ity_I32);
909 /* FIXME: add RRS matching */
911 /* {Add32,Sub32}(expr,simm13) */
912 if (e->tag == Iex_Binop
913 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
914 && e->Iex.Binop.arg2->tag == Iex_Const
915 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
916 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
917 if (simm >= -4095 && simm <= 4095) {
918 HReg reg;
919 if (e->Iex.Binop.op == Iop_Sub32)
920 simm = -simm;
921 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
922 return ARMAMode1_RI(reg, simm);
926 /* Doesn't match anything in particular. Generate it into
927 a register and use that. */
929 HReg reg = iselIntExpr_R(env, e);
930 return ARMAMode1_RI(reg, 0);
936 /* --------------------- AMode2 --------------------- */
938 /* Return an AMode2 which computes the value of the specified
939 expression, possibly also adding insns to the code list as a
940 result. The expression may only be a 32-bit one.
943 static Bool sane_AMode2 ( ARMAMode2* am )
945 switch (am->tag) {
946 case ARMam2_RI:
947 return
948 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
949 && hregIsVirtual(am->ARMam2.RI.reg)
950 && am->ARMam2.RI.simm9 >= -255
951 && am->ARMam2.RI.simm9 <= 255 );
952 case ARMam2_RR:
953 return
954 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
955 && hregIsVirtual(am->ARMam2.RR.base)
956 && hregClass(am->ARMam2.RR.index) == HRcInt32
957 && hregIsVirtual(am->ARMam2.RR.index) );
958 default:
959 vpanic("sane_AMode: unknown ARM AMode2 tag");
963 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
965 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
966 vassert(sane_AMode2(am));
967 return am;
970 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
972 IRType ty = typeOfIRExpr(env->type_env,e);
973 vassert(ty == Ity_I32);
975 /* FIXME: add RR matching */
977 /* {Add32,Sub32}(expr,simm8) */
978 if (e->tag == Iex_Binop
979 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
980 && e->Iex.Binop.arg2->tag == Iex_Const
981 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
982 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
983 if (simm >= -255 && simm <= 255) {
984 HReg reg;
985 if (e->Iex.Binop.op == Iop_Sub32)
986 simm = -simm;
987 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
988 return ARMAMode2_RI(reg, simm);
992 /* Doesn't match anything in particular. Generate it into
993 a register and use that. */
995 HReg reg = iselIntExpr_R(env, e);
996 return ARMAMode2_RI(reg, 0);
1002 /* --------------------- AModeV --------------------- */
1004 /* Return an AModeV which computes the value of the specified
1005 expression, possibly also adding insns to the code list as a
1006 result. The expression may only be a 32-bit one.
1009 static Bool sane_AModeV ( ARMAModeV* am )
1011 return toBool( hregClass(am->reg) == HRcInt32
1012 && hregIsVirtual(am->reg)
1013 && am->simm11 >= -1020 && am->simm11 <= 1020
1014 && 0 == (am->simm11 & 3) );
1017 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
1019 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
1020 vassert(sane_AModeV(am));
1021 return am;
1024 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
1026 IRType ty = typeOfIRExpr(env->type_env,e);
1027 vassert(ty == Ity_I32);
1029 /* {Add32,Sub32}(expr, simm8 << 2) */
1030 if (e->tag == Iex_Binop
1031 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
1032 && e->Iex.Binop.arg2->tag == Iex_Const
1033 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1034 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1035 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
1036 HReg reg;
1037 if (e->Iex.Binop.op == Iop_Sub32)
1038 simm = -simm;
1039 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
1040 return mkARMAModeV(reg, simm);
1044 /* Doesn't match anything in particular. Generate it into
1045 a register and use that. */
1047 HReg reg = iselIntExpr_R(env, e);
1048 return mkARMAModeV(reg, 0);
1053 /* -------------------- AModeN -------------------- */
1055 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
1057 return iselIntExpr_AModeN_wrk(env, e);
1060 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
1062 HReg reg = iselIntExpr_R(env, e);
1063 return mkARMAModeN_R(reg);
1067 /* --------------------- RI84 --------------------- */
1069 /* Select instructions to generate 'e' into a RI84. If mayInv is
1070 true, then the caller will also accept an I84 form that denotes
1071 'not e'. In this case didInv may not be NULL, and *didInv is set
1072 to True. This complication is so as to allow generation of an RI84
1073 which is suitable for use in either an AND or BIC instruction,
1074 without knowing (before this call) which one.
1076 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
1077 ISelEnv* env, IRExpr* e )
1079 ARMRI84* ri;
1080 if (mayInv)
1081 vassert(didInv != NULL);
1082 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
1083 /* sanity checks ... */
1084 switch (ri->tag) {
1085 case ARMri84_I84:
1086 return ri;
1087 case ARMri84_R:
1088 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
1089 vassert(hregIsVirtual(ri->ARMri84.R.reg));
1090 return ri;
1091 default:
1092 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1096 /* DO NOT CALL THIS DIRECTLY ! */
1097 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
1098 ISelEnv* env, IRExpr* e )
1100 IRType ty = typeOfIRExpr(env->type_env,e);
1101 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1103 if (didInv) *didInv = False;
1105 /* special case: immediate */
1106 if (e->tag == Iex_Const) {
1107 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
1108 switch (e->Iex.Const.con->tag) {
1109 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1110 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1111 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1112 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1114 if (fitsIn8x4(&u8, &u4, u)) {
1115 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1117 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
1118 vassert(didInv);
1119 *didInv = True;
1120 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1122 /* else fail, fall through to default case */
1125 /* default case: calculate into a register and return that */
1127 HReg r = iselIntExpr_R ( env, e );
1128 return ARMRI84_R(r);
1133 /* --------------------- RI5 --------------------- */
1135 /* Select instructions to generate 'e' into a RI5. */
1137 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
1139 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
1140 /* sanity checks ... */
1141 switch (ri->tag) {
1142 case ARMri5_I5:
1143 return ri;
1144 case ARMri5_R:
1145 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
1146 vassert(hregIsVirtual(ri->ARMri5.R.reg));
1147 return ri;
1148 default:
1149 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
1153 /* DO NOT CALL THIS DIRECTLY ! */
1154 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
1156 IRType ty = typeOfIRExpr(env->type_env,e);
1157 vassert(ty == Ity_I32 || ty == Ity_I8);
1159 /* special case: immediate */
1160 if (e->tag == Iex_Const) {
1161 UInt u; /* both invalid */
1162 switch (e->Iex.Const.con->tag) {
1163 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1164 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1165 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1166 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
1168 if (u >= 1 && u <= 31) {
1169 return ARMRI5_I5(u);
1171 /* else fail, fall through to default case */
1174 /* default case: calculate into a register and return that */
1176 HReg r = iselIntExpr_R ( env, e );
1177 return ARMRI5_R(r);
1182 /* ------------------- CondCode ------------------- */
1184 /* Generate code to evaluated a bit-typed expression, returning the
1185 condition code which would correspond when the expression would
1186 notionally have returned 1. */
1188 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1190 ARMCondCode cc = iselCondCode_wrk(env,e);
1191 vassert(cc != ARMcc_NV);
1192 return cc;
1195 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1197 vassert(e);
1198 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1200 /* var */
1201 if (e->tag == Iex_RdTmp) {
1202 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1203 /* CmpOrTst doesn't modify rTmp; so this is OK. */
1204 ARMRI84* one = ARMRI84_I84(1,0);
1205 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1206 return ARMcc_NE;
1209 /* Not1(e) */
1210 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1211 /* Generate code for the arg, and negate the test condition */
1212 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1215 /* --- patterns rooted at: 32to1 --- */
1217 if (e->tag == Iex_Unop
1218 && e->Iex.Unop.op == Iop_32to1) {
1219 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1220 ARMRI84* one = ARMRI84_I84(1,0);
1221 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1222 return ARMcc_NE;
1225 /* --- patterns rooted at: CmpNEZ8 --- */
1227 if (e->tag == Iex_Unop
1228 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1229 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1230 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
1231 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1232 return ARMcc_NE;
1235 /* --- patterns rooted at: CmpNEZ32 --- */
1237 if (e->tag == Iex_Unop
1238 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1239 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1240 ARMRI84* zero = ARMRI84_I84(0,0);
1241 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1242 return ARMcc_NE;
1245 /* --- patterns rooted at: CmpNEZ64 --- */
1247 if (e->tag == Iex_Unop
1248 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1249 HReg tHi, tLo;
1250 HReg tmp = newVRegI(env);
1251 ARMRI84* zero = ARMRI84_I84(0,0);
1252 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1253 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1254 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1255 return ARMcc_NE;
1258 /* --- Cmp*32*(x,y) --- */
1259 if (e->tag == Iex_Binop
1260 && (e->Iex.Binop.op == Iop_CmpEQ32
1261 || e->Iex.Binop.op == Iop_CmpNE32
1262 || e->Iex.Binop.op == Iop_CmpLT32S
1263 || e->Iex.Binop.op == Iop_CmpLT32U
1264 || e->Iex.Binop.op == Iop_CmpLE32S
1265 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1266 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1267 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1268 env, e->Iex.Binop.arg2);
1269 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1270 switch (e->Iex.Binop.op) {
1271 case Iop_CmpEQ32: return ARMcc_EQ;
1272 case Iop_CmpNE32: return ARMcc_NE;
1273 case Iop_CmpLT32S: return ARMcc_LT;
1274 case Iop_CmpLT32U: return ARMcc_LO;
1275 case Iop_CmpLE32S: return ARMcc_LE;
1276 case Iop_CmpLE32U: return ARMcc_LS;
1277 default: vpanic("iselCondCode(arm): CmpXX32");
1281 /* const */
1282 /* Constant 1:Bit */
1283 if (e->tag == Iex_Const) {
1284 HReg r;
1285 vassert(e->Iex.Const.con->tag == Ico_U1);
1286 vassert(e->Iex.Const.con->Ico.U1 == True
1287 || e->Iex.Const.con->Ico.U1 == False);
1288 r = newVRegI(env);
1289 addInstr(env, ARMInstr_Imm32(r, 0));
1290 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1291 return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1294 /* --- And1(x,y), Or1(x,y) --- */
1295 /* FIXME: We could (and probably should) do a lot better here, by using the
1296 iselCondCode_C/_R scheme used in the amd64 insn selector. */
1297 if (e->tag == Iex_Binop
1298 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1299 HReg x_as_32 = newVRegI(env);
1300 ARMCondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1);
1301 addInstr(env, ARMInstr_Mov(x_as_32, ARMRI84_I84(0,0)));
1302 addInstr(env, ARMInstr_CMov(cc_x, x_as_32, ARMRI84_I84(1,0)));
1304 HReg y_as_32 = newVRegI(env);
1305 ARMCondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2);
1306 addInstr(env, ARMInstr_Mov(y_as_32, ARMRI84_I84(0,0)));
1307 addInstr(env, ARMInstr_CMov(cc_y, y_as_32, ARMRI84_I84(1,0)));
1309 HReg tmp = newVRegI(env);
1310 ARMAluOp aop = e->Iex.Binop.op == Iop_And1 ? ARMalu_AND : ARMalu_OR;
1311 addInstr(env, ARMInstr_Alu(aop, tmp, x_as_32, ARMRI84_R(y_as_32)));
1313 ARMRI84* one = ARMRI84_I84(1,0);
1314 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, tmp, one));
1315 return ARMcc_NE;
1318 // JRS 2013-Jan-03: this seems completely nonsensical
1319 /* --- CasCmpEQ* --- */
1320 /* Ist_Cas has a dummy argument to compare with, so comparison is
1321 always true. */
1322 //if (e->tag == Iex_Binop
1323 // && (e->Iex.Binop.op == Iop_CasCmpEQ32
1324 // || e->Iex.Binop.op == Iop_CasCmpEQ16
1325 // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1326 // return ARMcc_AL;
1329 ppIRExpr(e);
1330 vpanic("iselCondCode");
1334 /* --------------------- Reg --------------------- */
1336 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1338 HReg r = iselIntExpr_R_wrk(env, e);
1339 /* sanity checks ... */
1340 # if 0
1341 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1342 # endif
1343 vassert(hregClass(r) == HRcInt32);
1344 vassert(hregIsVirtual(r));
1345 return r;
1348 /* DO NOT CALL THIS DIRECTLY ! */
1349 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1351 IRType ty = typeOfIRExpr(env->type_env,e);
1352 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1354 switch (e->tag) {
1356 /* --------- TEMP --------- */
1357 case Iex_RdTmp: {
1358 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1361 /* --------- LOAD --------- */
1362 case Iex_Load: {
1363 HReg dst = newVRegI(env);
1365 if (e->Iex.Load.end != Iend_LE)
1366 goto irreducible;
1368 if (ty == Ity_I32) {
1369 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1370 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
1371 return dst;
1373 if (ty == Ity_I16) {
1374 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1375 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
1376 True/*isLoad*/, False/*!signedLoad*/,
1377 dst, amode));
1378 return dst;
1380 if (ty == Ity_I8) {
1381 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1382 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
1383 return dst;
1385 break;
1388 //zz /* --------- TERNARY OP --------- */
1389 //zz case Iex_Triop: {
1390 //zz IRTriop *triop = e->Iex.Triop.details;
1391 //zz /* C3210 flags following FPU partial remainder (fprem), both
1392 //zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1393 //zz if (triop->op == Iop_PRemC3210F64
1394 //zz || triop->op == Iop_PRem1C3210F64) {
1395 //zz HReg junk = newVRegF(env);
1396 //zz HReg dst = newVRegI(env);
1397 //zz HReg srcL = iselDblExpr(env, triop->arg2);
1398 //zz HReg srcR = iselDblExpr(env, triop->arg3);
1399 //zz /* XXXROUNDINGFIXME */
1400 //zz /* set roundingmode here */
1401 //zz addInstr(env, X86Instr_FpBinary(
1402 //zz e->Iex.Binop.op==Iop_PRemC3210F64
1403 //zz ? Xfp_PREM : Xfp_PREM1,
1404 //zz srcL,srcR,junk
1405 //zz ));
1406 //zz /* The previous pseudo-insn will have left the FPU's C3210
1407 //zz flags set correctly. So bag them. */
1408 //zz addInstr(env, X86Instr_FpStSW_AX());
1409 //zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1410 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1411 //zz return dst;
1412 //zz }
1413 //zz
1414 //zz break;
1415 //zz }
1417 /* --------- BINARY OP --------- */
1418 case Iex_Binop: {
1420 ARMAluOp aop = 0; /* invalid */
1421 ARMShiftOp sop = 0; /* invalid */
1423 /* ADD/SUB/AND/OR/XOR */
1424 switch (e->Iex.Binop.op) {
1425 case Iop_And32: {
1426 Bool didInv = False;
1427 HReg dst = newVRegI(env);
1428 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1429 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1430 env, e->Iex.Binop.arg2);
1431 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1432 dst, argL, argR));
1433 return dst;
1435 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1436 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1437 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1438 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1439 std_binop: {
1440 HReg dst = newVRegI(env);
1441 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1442 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1443 env, e->Iex.Binop.arg2);
1444 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1445 return dst;
1447 default: break;
1450 /* SHL/SHR/SAR */
1451 switch (e->Iex.Binop.op) {
1452 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1453 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1454 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1455 sh_binop: {
1456 HReg dst = newVRegI(env);
1457 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1458 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1459 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1460 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1461 return dst;
1463 default: break;
1466 /* MUL */
1467 if (e->Iex.Binop.op == Iop_Mul32) {
1468 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1469 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1470 HReg dst = newVRegI(env);
1471 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1472 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1473 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1474 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1475 return dst;
1478 /* Handle misc other ops. */
1480 if (e->Iex.Binop.op == Iop_Max32U) {
1481 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1482 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1483 HReg dst = newVRegI(env);
1484 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1485 ARMRI84_R(argR)));
1486 addInstr(env, mk_iMOVds_RR(dst, argL));
1487 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1488 return dst;
1491 if (e->Iex.Binop.op == Iop_CmpF64) {
1492 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1493 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1494 HReg dst = newVRegI(env);
1495 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1496 FMSTAT, so we can examine the results directly. */
1497 addInstr(env, ARMInstr_VCmpD(dL, dR));
1498 /* Create in dst, the IRCmpF64Result encoded result. */
1499 addInstr(env, ARMInstr_Imm32(dst, 0));
1500 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1501 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1502 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1503 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1504 return dst;
1507 if (e->Iex.Binop.op == Iop_F64toI32S
1508 || e->Iex.Binop.op == Iop_F64toI32U) {
1509 /* Wretched uglyness all round, due to having to deal
1510 with rounding modes. Oh well. */
1511 /* FIXME: if arg1 is a constant indicating round-to-zero,
1512 then we could skip all this arsing around with FPSCR and
1513 simply emit FTO{S,U}IZD. */
1514 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1515 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1516 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1517 /* FTO{S,U}ID valF, valD */
1518 HReg valF = newVRegF(env);
1519 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1520 valF, valD));
1521 set_VFP_rounding_default(env);
1522 /* VMOV dst, valF */
1523 HReg dst = newVRegI(env);
1524 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1525 return dst;
1528 if (e->Iex.Binop.op == Iop_GetElem8x8
1529 || e->Iex.Binop.op == Iop_GetElem16x4
1530 || e->Iex.Binop.op == Iop_GetElem32x2) {
1531 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1532 HReg res = newVRegI(env);
1533 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1534 UInt index, size;
1535 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1536 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1537 vpanic("ARM target supports GetElem with constant "
1538 "second argument only (neon)\n");
1540 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1541 switch (e->Iex.Binop.op) {
1542 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1543 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1544 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1545 default: vassert(0);
1547 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1548 mkARMNRS(ARMNRS_Reg, res, 0),
1549 mkARMNRS(ARMNRS_Scalar, arg, index),
1550 size, False));
1551 return res;
1555 if (e->Iex.Binop.op == Iop_GetElem32x2
1556 && e->Iex.Binop.arg2->tag == Iex_Const
1557 && !(env->hwcaps & VEX_HWCAPS_ARM_NEON)) {
1558 /* We may have to do GetElem32x2 on a non-NEON capable
1559 target. */
1560 IRConst* con = e->Iex.Binop.arg2->Iex.Const.con;
1561 vassert(con->tag == Ico_U8); /* else IR is ill-typed */
1562 UInt index = con->Ico.U8;
1563 if (index >= 0 && index <= 1) {
1564 HReg rHi, rLo;
1565 iselInt64Expr(&rHi, &rLo, env, e->Iex.Binop.arg1);
1566 return index == 0 ? rLo : rHi;
1570 if (e->Iex.Binop.op == Iop_GetElem8x16
1571 || e->Iex.Binop.op == Iop_GetElem16x8
1572 || e->Iex.Binop.op == Iop_GetElem32x4) {
1573 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1574 HReg res = newVRegI(env);
1575 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1576 UInt index, size;
1577 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1578 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1579 vpanic("ARM target supports GetElem with constant "
1580 "second argument only (neon)\n");
1582 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1583 switch (e->Iex.Binop.op) {
1584 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1585 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1586 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1587 default: vassert(0);
1589 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1590 mkARMNRS(ARMNRS_Reg, res, 0),
1591 mkARMNRS(ARMNRS_Scalar, arg, index),
1592 size, True));
1593 return res;
1597 /* All cases involving host-side helper calls. */
1598 void* fn = NULL;
1599 switch (e->Iex.Binop.op) {
1600 case Iop_Add16x2:
1601 fn = &h_generic_calc_Add16x2; break;
1602 case Iop_Sub16x2:
1603 fn = &h_generic_calc_Sub16x2; break;
1604 case Iop_HAdd16Ux2:
1605 fn = &h_generic_calc_HAdd16Ux2; break;
1606 case Iop_HAdd16Sx2:
1607 fn = &h_generic_calc_HAdd16Sx2; break;
1608 case Iop_HSub16Ux2:
1609 fn = &h_generic_calc_HSub16Ux2; break;
1610 case Iop_HSub16Sx2:
1611 fn = &h_generic_calc_HSub16Sx2; break;
1612 case Iop_QAdd16Sx2:
1613 fn = &h_generic_calc_QAdd16Sx2; break;
1614 case Iop_QAdd16Ux2:
1615 fn = &h_generic_calc_QAdd16Ux2; break;
1616 case Iop_QSub16Sx2:
1617 fn = &h_generic_calc_QSub16Sx2; break;
1618 case Iop_Add8x4:
1619 fn = &h_generic_calc_Add8x4; break;
1620 case Iop_Sub8x4:
1621 fn = &h_generic_calc_Sub8x4; break;
1622 case Iop_HAdd8Ux4:
1623 fn = &h_generic_calc_HAdd8Ux4; break;
1624 case Iop_HAdd8Sx4:
1625 fn = &h_generic_calc_HAdd8Sx4; break;
1626 case Iop_HSub8Ux4:
1627 fn = &h_generic_calc_HSub8Ux4; break;
1628 case Iop_HSub8Sx4:
1629 fn = &h_generic_calc_HSub8Sx4; break;
1630 case Iop_QAdd8Sx4:
1631 fn = &h_generic_calc_QAdd8Sx4; break;
1632 case Iop_QAdd8Ux4:
1633 fn = &h_generic_calc_QAdd8Ux4; break;
1634 case Iop_QSub8Sx4:
1635 fn = &h_generic_calc_QSub8Sx4; break;
1636 case Iop_QSub8Ux4:
1637 fn = &h_generic_calc_QSub8Ux4; break;
1638 case Iop_Sad8Ux4:
1639 fn = &h_generic_calc_Sad8Ux4; break;
1640 case Iop_QAdd32S:
1641 fn = &h_generic_calc_QAdd32S; break;
1642 case Iop_QSub32S:
1643 fn = &h_generic_calc_QSub32S; break;
1644 case Iop_QSub16Ux2:
1645 fn = &h_generic_calc_QSub16Ux2; break;
1646 case Iop_DivU32:
1647 fn = &h_calc_udiv32_w_arm_semantics; break;
1648 case Iop_DivS32:
1649 fn = &h_calc_sdiv32_w_arm_semantics; break;
1650 default:
1651 break;
1654 if (fn) {
1655 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1656 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1657 HReg res = newVRegI(env);
1658 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1659 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1660 addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1661 2, mk_RetLoc_simple(RLPri_Int) ));
1662 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1663 return res;
1666 break;
1669 /* --------- UNARY OP --------- */
1670 case Iex_Unop: {
1672 //zz /* 1Uto8(32to1(expr32)) */
1673 //zz if (e->Iex.Unop.op == Iop_1Uto8) {
1674 //zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1675 //zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1676 //zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1677 //zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1678 //zz const IRExpr* expr32 = mi.bindee[0];
1679 //zz HReg dst = newVRegI(env);
1680 //zz HReg src = iselIntExpr_R(env, expr32);
1681 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1682 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1683 //zz X86RMI_Imm(1), dst));
1684 //zz return dst;
1685 //zz }
1686 //zz }
1687 //zz
1688 //zz /* 8Uto32(LDle(expr32)) */
1689 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1690 //zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1691 //zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1692 //zz unop(Iop_8Uto32,
1693 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1694 //zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1695 //zz HReg dst = newVRegI(env);
1696 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1697 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1698 //zz return dst;
1699 //zz }
1700 //zz }
1701 //zz
1702 //zz /* 8Sto32(LDle(expr32)) */
1703 //zz if (e->Iex.Unop.op == Iop_8Sto32) {
1704 //zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1705 //zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1706 //zz unop(Iop_8Sto32,
1707 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1708 //zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1709 //zz HReg dst = newVRegI(env);
1710 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1711 //zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1712 //zz return dst;
1713 //zz }
1714 //zz }
1715 //zz
1716 //zz /* 16Uto32(LDle(expr32)) */
1717 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1718 //zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1719 //zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1720 //zz unop(Iop_16Uto32,
1721 //zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1722 //zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1723 //zz HReg dst = newVRegI(env);
1724 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1725 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1726 //zz return dst;
1727 //zz }
1728 //zz }
1729 //zz
1730 //zz /* 8Uto32(GET:I8) */
1731 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1732 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1733 //zz HReg dst;
1734 //zz X86AMode* amode;
1735 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1736 //zz dst = newVRegI(env);
1737 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1738 //zz hregX86_EBP());
1739 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1740 //zz return dst;
1741 //zz }
1742 //zz }
1743 //zz
1744 //zz /* 16to32(GET:I16) */
1745 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1746 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1747 //zz HReg dst;
1748 //zz X86AMode* amode;
1749 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1750 //zz dst = newVRegI(env);
1751 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1752 //zz hregX86_EBP());
1753 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1754 //zz return dst;
1755 //zz }
1756 //zz }
1758 switch (e->Iex.Unop.op) {
1759 case Iop_8Uto32: {
1760 HReg dst = newVRegI(env);
1761 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1762 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1763 dst, src, ARMRI84_I84(0xFF,0)));
1764 return dst;
1766 //zz case Iop_8Uto16:
1767 //zz case Iop_8Uto32:
1768 //zz case Iop_16Uto32: {
1769 //zz HReg dst = newVRegI(env);
1770 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1771 //zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1772 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1773 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1774 //zz X86RMI_Imm(mask), dst));
1775 //zz return dst;
1776 //zz }
1777 //zz case Iop_8Sto16:
1778 //zz case Iop_8Sto32:
1779 case Iop_16Uto32: {
1780 HReg dst = newVRegI(env);
1781 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1782 ARMRI5* amt = ARMRI5_I5(16);
1783 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1784 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1785 return dst;
1787 case Iop_8Sto32:
1788 case Iop_16Sto32: {
1789 HReg dst = newVRegI(env);
1790 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1791 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1792 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1793 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1794 return dst;
1796 //zz case Iop_Not8:
1797 //zz case Iop_Not16:
1798 case Iop_Not32: {
1799 HReg dst = newVRegI(env);
1800 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1801 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1802 return dst;
1804 case Iop_64HIto32: {
1805 HReg rHi, rLo;
1806 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1807 return rHi; /* and abandon rLo .. poor wee thing :-) */
1809 case Iop_64to32: {
1810 HReg rHi, rLo;
1811 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1812 return rLo; /* similar stupid comment to the above ... */
1814 case Iop_64to8: {
1815 HReg rHi, rLo;
1816 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1817 HReg tHi = newVRegI(env);
1818 HReg tLo = newVRegI(env);
1819 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1820 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1821 rHi = tHi;
1822 rLo = tLo;
1823 } else {
1824 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1826 return rLo;
1829 case Iop_1Uto32:
1830 /* 1Uto32(tmp). Since I1 values generated into registers
1831 are guaranteed to have value either only zero or one,
1832 we can simply return the value of the register in this
1833 case. */
1834 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1835 HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1836 return dst;
1838 /* else fall through */
1839 case Iop_1Uto8: {
1840 HReg dst = newVRegI(env);
1841 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1842 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1843 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1844 return dst;
1847 case Iop_1Sto32: {
1848 HReg dst = newVRegI(env);
1849 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1850 ARMRI5* amt = ARMRI5_I5(31);
1851 /* This is really rough. We could do much better here;
1852 perhaps mvn{cond} dst, #0 as the second insn?
1853 (same applies to 1Sto64) */
1854 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1855 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1856 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1857 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1858 return dst;
1862 //zz case Iop_1Sto8:
1863 //zz case Iop_1Sto16:
1864 //zz case Iop_1Sto32: {
1865 //zz /* could do better than this, but for now ... */
1866 //zz HReg dst = newVRegI(env);
1867 //zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1868 //zz addInstr(env, X86Instr_Set32(cond,dst));
1869 //zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1870 //zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1871 //zz return dst;
1872 //zz }
1873 //zz case Iop_Ctz32: {
1874 //zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1875 //zz HReg dst = newVRegI(env);
1876 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1877 //zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1878 //zz return dst;
1879 //zz }
1880 case Iop_Clz32: {
1881 /* Count leading zeroes; easy on ARM. */
1882 HReg dst = newVRegI(env);
1883 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1884 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1885 return dst;
1888 case Iop_CmpwNEZ32: {
1889 HReg dst = newVRegI(env);
1890 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1891 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1892 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1893 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1894 return dst;
1897 case Iop_Left32: {
1898 HReg dst = newVRegI(env);
1899 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1900 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1901 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1902 return dst;
1905 //zz case Iop_V128to32: {
1906 //zz HReg dst = newVRegI(env);
1907 //zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1908 //zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1909 //zz sub_from_esp(env, 16);
1910 //zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1911 //zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1912 //zz add_to_esp(env, 16);
1913 //zz return dst;
1914 //zz }
1915 //zz
1916 case Iop_ReinterpF32asI32: {
1917 HReg dst = newVRegI(env);
1918 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1919 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1920 return dst;
1923 //zz
1924 //zz case Iop_16to8:
1925 case Iop_32to8:
1926 case Iop_32to16:
1927 /* These are no-ops. */
1928 return iselIntExpr_R(env, e->Iex.Unop.arg);
1930 default:
1931 break;
1934 /* All Unop cases involving host-side helper calls. */
1935 void* fn = NULL;
1936 switch (e->Iex.Unop.op) {
1937 case Iop_CmpNEZ16x2:
1938 fn = &h_generic_calc_CmpNEZ16x2; break;
1939 case Iop_CmpNEZ8x4:
1940 fn = &h_generic_calc_CmpNEZ8x4; break;
1941 default:
1942 break;
1945 if (fn) {
1946 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1947 HReg res = newVRegI(env);
1948 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1949 addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1950 1, mk_RetLoc_simple(RLPri_Int) ));
1951 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1952 return res;
1955 break;
1958 /* --------- GET --------- */
1959 case Iex_Get: {
1960 if (ty == Ity_I32
1961 && 0 == (e->Iex.Get.offset & 3)
1962 && e->Iex.Get.offset < 4096-4) {
1963 HReg dst = newVRegI(env);
1964 addInstr(env, ARMInstr_LdSt32(
1965 ARMcc_AL, True/*isLoad*/,
1966 dst,
1967 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1968 return dst;
1970 //zz if (ty == Ity_I8 || ty == Ity_I16) {
1971 //zz HReg dst = newVRegI(env);
1972 //zz addInstr(env, X86Instr_LoadEX(
1973 //zz toUChar(ty==Ity_I8 ? 1 : 2),
1974 //zz False,
1975 //zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1976 //zz dst));
1977 //zz return dst;
1978 //zz }
1979 break;
1982 //zz case Iex_GetI: {
1983 //zz X86AMode* am
1984 //zz = genGuestArrayOffset(
1985 //zz env, e->Iex.GetI.descr,
1986 //zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1987 //zz HReg dst = newVRegI(env);
1988 //zz if (ty == Ity_I8) {
1989 //zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1990 //zz return dst;
1991 //zz }
1992 //zz if (ty == Ity_I32) {
1993 //zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1994 //zz return dst;
1995 //zz }
1996 //zz break;
1997 //zz }
1999 /* --------- CCALL --------- */
2000 case Iex_CCall: {
2001 HReg dst = newVRegI(env);
2002 vassert(ty == e->Iex.CCall.retty);
2004 /* be very restrictive for now. Only 32/64-bit ints allowed for
2005 args, and 32 bits for return type. Don't forget to change
2006 the RetLoc if more types are allowed in future. */
2007 if (e->Iex.CCall.retty != Ity_I32)
2008 goto irreducible;
2010 /* Marshal args, do the call, clear stack. */
2011 UInt addToSp = 0;
2012 RetLoc rloc = mk_RetLoc_INVALID();
2013 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2014 e->Iex.CCall.cee, e->Iex.CCall.retty,
2015 e->Iex.CCall.args );
2016 /* */
2017 if (ok) {
2018 vassert(is_sane_RetLoc(rloc));
2019 vassert(rloc.pri == RLPri_Int);
2020 vassert(addToSp == 0);
2021 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
2022 return dst;
2024 goto irreducible;
2027 /* --------- LITERAL --------- */
2028 /* 32 literals */
2029 case Iex_Const: {
2030 UInt u = 0;
2031 HReg dst = newVRegI(env);
2032 switch (e->Iex.Const.con->tag) {
2033 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2034 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
2035 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
2036 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
2038 addInstr(env, ARMInstr_Imm32(dst, u));
2039 return dst;
2042 /* --------- MULTIPLEX --------- */
2043 case Iex_ITE: { // VFD
2044 /* ITE(ccexpr, iftrue, iffalse) */
2045 if (ty == Ity_I32) {
2046 ARMCondCode cc;
2047 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2048 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse);
2049 HReg dst = newVRegI(env);
2050 addInstr(env, mk_iMOVds_RR(dst, r1));
2051 cc = iselCondCode(env, e->Iex.ITE.cond);
2052 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
2053 return dst;
2055 break;
2058 default:
2059 break;
2060 } /* switch (e->tag) */
2062 /* We get here if no pattern matched. */
2063 irreducible:
2064 ppIRExpr(e);
2065 vpanic("iselIntExpr_R: cannot reduce tree");
2069 /* -------------------- 64-bit -------------------- */
2071 /* Compute a 64-bit value into a register pair, which is returned as
2072 the first two parameters. As with iselIntExpr_R, these may be
2073 either real or virtual regs; in any case they must not be changed
2074 by subsequent code emitted by the caller. */
2076 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2077 const IRExpr* e )
2079 iselInt64Expr_wrk(rHi, rLo, env, e);
2080 # if 0
2081 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2082 # endif
2083 vassert(hregClass(*rHi) == HRcInt32);
2084 vassert(hregIsVirtual(*rHi));
2085 vassert(hregClass(*rLo) == HRcInt32);
2086 vassert(hregIsVirtual(*rLo));
2089 /* DO NOT CALL THIS DIRECTLY ! */
2090 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2091 const IRExpr* e )
2093 vassert(e);
2094 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2096 /* 64-bit literal */
2097 if (e->tag == Iex_Const) {
2098 ULong w64 = e->Iex.Const.con->Ico.U64;
2099 UInt wHi = toUInt(w64 >> 32);
2100 UInt wLo = toUInt(w64);
2101 HReg tHi = newVRegI(env);
2102 HReg tLo = newVRegI(env);
2103 vassert(e->Iex.Const.con->tag == Ico_U64);
2104 addInstr(env, ARMInstr_Imm32(tHi, wHi));
2105 addInstr(env, ARMInstr_Imm32(tLo, wLo));
2106 *rHi = tHi;
2107 *rLo = tLo;
2108 return;
2111 /* read 64-bit IRTemp */
2112 if (e->tag == Iex_RdTmp) {
2113 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2114 HReg tHi = newVRegI(env);
2115 HReg tLo = newVRegI(env);
2116 HReg tmp = iselNeon64Expr(env, e);
2117 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2118 *rHi = tHi;
2119 *rLo = tLo;
2120 } else {
2121 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2123 return;
2126 /* 64-bit load */
2127 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2128 HReg tLo, tHi, rA;
2129 vassert(e->Iex.Load.ty == Ity_I64);
2130 rA = iselIntExpr_R(env, e->Iex.Load.addr);
2131 tHi = newVRegI(env);
2132 tLo = newVRegI(env);
2133 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2134 tHi, ARMAMode1_RI(rA, 4)));
2135 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2136 tLo, ARMAMode1_RI(rA, 0)));
2137 *rHi = tHi;
2138 *rLo = tLo;
2139 return;
2142 /* 64-bit GET */
2143 if (e->tag == Iex_Get) {
2144 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
2145 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
2146 HReg tHi = newVRegI(env);
2147 HReg tLo = newVRegI(env);
2148 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
2149 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
2150 *rHi = tHi;
2151 *rLo = tLo;
2152 return;
2155 /* --------- BINARY ops --------- */
2156 if (e->tag == Iex_Binop) {
2157 switch (e->Iex.Binop.op) {
2159 /* 32 x 32 -> 64 multiply */
2160 case Iop_MullS32:
2161 case Iop_MullU32: {
2162 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2163 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2164 HReg tHi = newVRegI(env);
2165 HReg tLo = newVRegI(env);
2166 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
2167 ? ARMmul_SX : ARMmul_ZX;
2168 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2169 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2170 addInstr(env, ARMInstr_Mul(mop));
2171 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2172 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2173 *rHi = tHi;
2174 *rLo = tLo;
2175 return;
2178 case Iop_Or64: {
2179 HReg xLo, xHi, yLo, yHi;
2180 HReg tHi = newVRegI(env);
2181 HReg tLo = newVRegI(env);
2182 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2183 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2184 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2185 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2186 *rHi = tHi;
2187 *rLo = tLo;
2188 return;
2191 case Iop_Add64: {
2192 HReg xLo, xHi, yLo, yHi;
2193 HReg tHi = newVRegI(env);
2194 HReg tLo = newVRegI(env);
2195 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2196 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2197 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2198 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
2199 *rHi = tHi;
2200 *rLo = tLo;
2201 return;
2204 /* 32HLto64(e1,e2) */
2205 case Iop_32HLto64: {
2206 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2207 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2208 return;
2211 default:
2212 break;
2216 /* --------- UNARY ops --------- */
2217 if (e->tag == Iex_Unop) {
2218 switch (e->Iex.Unop.op) {
2220 /* ReinterpF64asI64 */
2221 case Iop_ReinterpF64asI64: {
2222 HReg dstHi = newVRegI(env);
2223 HReg dstLo = newVRegI(env);
2224 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2225 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2226 *rHi = dstHi;
2227 *rLo = dstLo;
2228 return;
2231 /* Left64(e) */
2232 case Iop_Left64: {
2233 HReg yLo, yHi;
2234 HReg tHi = newVRegI(env);
2235 HReg tLo = newVRegI(env);
2236 HReg zero = newVRegI(env);
2237 /* yHi:yLo = arg */
2238 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2239 /* zero = 0 */
2240 addInstr(env, ARMInstr_Imm32(zero, 0));
2241 /* tLo = 0 - yLo, and set carry */
2242 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2243 tLo, zero, ARMRI84_R(yLo)));
2244 /* tHi = 0 - yHi - carry */
2245 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2246 tHi, zero, ARMRI84_R(yHi)));
2247 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2248 back in, so as to give the final result
2249 tHi:tLo = arg | -arg. */
2250 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2251 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2252 *rHi = tHi;
2253 *rLo = tLo;
2254 return;
2257 /* CmpwNEZ64(e) */
2258 case Iop_CmpwNEZ64: {
2259 HReg srcLo, srcHi;
2260 HReg tmp1 = newVRegI(env);
2261 HReg tmp2 = newVRegI(env);
2262 /* srcHi:srcLo = arg */
2263 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2264 /* tmp1 = srcHi | srcLo */
2265 addInstr(env, ARMInstr_Alu(ARMalu_OR,
2266 tmp1, srcHi, ARMRI84_R(srcLo)));
2267 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2268 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2269 addInstr(env, ARMInstr_Alu(ARMalu_OR,
2270 tmp2, tmp2, ARMRI84_R(tmp1)));
2271 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2272 tmp2, tmp2, ARMRI5_I5(31)));
2273 *rHi = tmp2;
2274 *rLo = tmp2;
2275 return;
2278 case Iop_1Sto64: {
2279 HReg dst = newVRegI(env);
2280 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2281 ARMRI5* amt = ARMRI5_I5(31);
2282 /* This is really rough. We could do much better here;
2283 perhaps mvn{cond} dst, #0 as the second insn?
2284 (same applies to 1Sto32) */
2285 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2286 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2287 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2288 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2289 *rHi = dst;
2290 *rLo = dst;
2291 return;
2294 default:
2295 break;
2297 } /* if (e->tag == Iex_Unop) */
2299 /* --------- MULTIPLEX --------- */
2300 if (e->tag == Iex_ITE) { // VFD
2301 IRType tyC;
2302 HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2303 ARMCondCode cc;
2304 tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2305 vassert(tyC == Ity_I1);
2306 iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2307 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2308 dstHi = newVRegI(env);
2309 dstLo = newVRegI(env);
2310 addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2311 addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2312 cc = iselCondCode(env, e->Iex.ITE.cond);
2313 addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2314 addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2315 *rHi = dstHi;
2316 *rLo = dstLo;
2317 return;
2320 /* It is convenient sometimes to call iselInt64Expr even when we
2321 have NEON support (e.g. in do_helper_call we need 64-bit
2322 arguments as 2 x 32 regs). */
2323 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2324 HReg tHi = newVRegI(env);
2325 HReg tLo = newVRegI(env);
2326 HReg tmp = iselNeon64Expr(env, e);
2327 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2328 *rHi = tHi;
2329 *rLo = tLo;
2330 return ;
2333 ppIRExpr(e);
2334 vpanic("iselInt64Expr");
2338 /*---------------------------------------------------------*/
2339 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2340 /*---------------------------------------------------------*/
2342 static HReg iselNeon64Expr ( ISelEnv* env, const IRExpr* e )
2344 HReg r;
2345 vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
2346 r = iselNeon64Expr_wrk( env, e );
2347 vassert(hregClass(r) == HRcFlt64);
2348 vassert(hregIsVirtual(r));
2349 return r;
2352 /* DO NOT CALL THIS DIRECTLY */
2353 static HReg iselNeon64Expr_wrk ( ISelEnv* env, const IRExpr* e )
2355 IRType ty = typeOfIRExpr(env->type_env, e);
2356 MatchInfo mi;
2357 vassert(e);
2358 vassert(ty == Ity_I64);
2360 if (e->tag == Iex_RdTmp) {
2361 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2364 if (e->tag == Iex_Const) {
2365 HReg rLo, rHi;
2366 HReg res = newVRegD(env);
2367 iselInt64Expr(&rHi, &rLo, env, e);
2368 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2369 return res;
2372 /* 64-bit load */
2373 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2374 HReg res = newVRegD(env);
2375 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2376 vassert(ty == Ity_I64);
2377 addInstr(env, ARMInstr_NLdStD(True, res, am));
2378 return res;
2381 /* 64-bit GET */
2382 if (e->tag == Iex_Get) {
2383 HReg addr = newVRegI(env);
2384 HReg res = newVRegD(env);
2385 vassert(ty == Ity_I64);
2386 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2387 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2388 return res;
2391 /* --------- BINARY ops --------- */
2392 if (e->tag == Iex_Binop) {
2393 switch (e->Iex.Binop.op) {
2395 /* 32 x 32 -> 64 multiply */
2396 case Iop_MullS32:
2397 case Iop_MullU32: {
2398 HReg rLo, rHi;
2399 HReg res = newVRegD(env);
2400 iselInt64Expr(&rHi, &rLo, env, e);
2401 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2402 return res;
2405 case Iop_And64: {
2406 HReg res = newVRegD(env);
2407 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2408 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2409 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2410 res, argL, argR, 4, False));
2411 return res;
2413 case Iop_Or64: {
2414 HReg res = newVRegD(env);
2415 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2416 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2417 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2418 res, argL, argR, 4, False));
2419 return res;
2421 case Iop_Xor64: {
2422 HReg res = newVRegD(env);
2423 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2424 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2425 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2426 res, argL, argR, 4, False));
2427 return res;
2430 /* 32HLto64(e1,e2) */
2431 case Iop_32HLto64: {
2432 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2433 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2434 HReg res = newVRegD(env);
2435 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2436 return res;
2439 case Iop_Add8x8:
2440 case Iop_Add16x4:
2441 case Iop_Add32x2:
2442 case Iop_Add64: {
2443 HReg res = newVRegD(env);
2444 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2445 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2446 UInt size;
2447 switch (e->Iex.Binop.op) {
2448 case Iop_Add8x8: size = 0; break;
2449 case Iop_Add16x4: size = 1; break;
2450 case Iop_Add32x2: size = 2; break;
2451 case Iop_Add64: size = 3; break;
2452 default: vassert(0);
2454 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2455 res, argL, argR, size, False));
2456 return res;
2458 case Iop_Add32Fx2: {
2459 HReg res = newVRegD(env);
2460 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2461 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2462 UInt size = 0;
2463 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2464 res, argL, argR, size, False));
2465 return res;
2467 case Iop_RecipStep32Fx2: {
2468 HReg res = newVRegD(env);
2469 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2470 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2471 UInt size = 0;
2472 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2473 res, argL, argR, size, False));
2474 return res;
2476 case Iop_RSqrtStep32Fx2: {
2477 HReg res = newVRegD(env);
2478 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2479 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2480 UInt size = 0;
2481 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2482 res, argL, argR, size, False));
2483 return res;
2486 // These 6 verified 18 Apr 2013
2487 case Iop_InterleaveHI32x2:
2488 case Iop_InterleaveLO32x2:
2489 case Iop_InterleaveOddLanes8x8:
2490 case Iop_InterleaveEvenLanes8x8:
2491 case Iop_InterleaveOddLanes16x4:
2492 case Iop_InterleaveEvenLanes16x4: {
2493 HReg rD = newVRegD(env);
2494 HReg rM = newVRegD(env);
2495 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2496 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2497 UInt size;
2498 Bool resRd; // is the result in rD or rM ?
2499 switch (e->Iex.Binop.op) {
2500 case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break;
2501 case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break;
2502 case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break;
2503 case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break;
2504 case Iop_InterleaveHI32x2: resRd = False; size = 2; break;
2505 case Iop_InterleaveLO32x2: resRd = True; size = 2; break;
2506 default: vassert(0);
2508 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2509 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2510 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2511 return resRd ? rD : rM;
2514 // These 4 verified 18 Apr 2013
2515 case Iop_InterleaveHI8x8:
2516 case Iop_InterleaveLO8x8:
2517 case Iop_InterleaveHI16x4:
2518 case Iop_InterleaveLO16x4: {
2519 HReg rD = newVRegD(env);
2520 HReg rM = newVRegD(env);
2521 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2522 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2523 UInt size;
2524 Bool resRd; // is the result in rD or rM ?
2525 switch (e->Iex.Binop.op) {
2526 case Iop_InterleaveHI8x8: resRd = False; size = 0; break;
2527 case Iop_InterleaveLO8x8: resRd = True; size = 0; break;
2528 case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2529 case Iop_InterleaveLO16x4: resRd = True; size = 1; break;
2530 default: vassert(0);
2532 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2533 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2534 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2535 return resRd ? rD : rM;
2538 // These 4 verified 18 Apr 2013
2539 case Iop_CatOddLanes8x8:
2540 case Iop_CatEvenLanes8x8:
2541 case Iop_CatOddLanes16x4:
2542 case Iop_CatEvenLanes16x4: {
2543 HReg rD = newVRegD(env);
2544 HReg rM = newVRegD(env);
2545 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2546 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2547 UInt size;
2548 Bool resRd; // is the result in rD or rM ?
2549 switch (e->Iex.Binop.op) {
2550 case Iop_CatOddLanes8x8: resRd = False; size = 0; break;
2551 case Iop_CatEvenLanes8x8: resRd = True; size = 0; break;
2552 case Iop_CatOddLanes16x4: resRd = False; size = 1; break;
2553 case Iop_CatEvenLanes16x4: resRd = True; size = 1; break;
2554 default: vassert(0);
2556 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2557 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2558 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2559 return resRd ? rD : rM;
2562 case Iop_QAdd8Ux8:
2563 case Iop_QAdd16Ux4:
2564 case Iop_QAdd32Ux2:
2565 case Iop_QAdd64Ux1: {
2566 HReg res = newVRegD(env);
2567 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2568 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2569 UInt size;
2570 switch (e->Iex.Binop.op) {
2571 case Iop_QAdd8Ux8: size = 0; break;
2572 case Iop_QAdd16Ux4: size = 1; break;
2573 case Iop_QAdd32Ux2: size = 2; break;
2574 case Iop_QAdd64Ux1: size = 3; break;
2575 default: vassert(0);
2577 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2578 res, argL, argR, size, False));
2579 return res;
2581 case Iop_QAdd8Sx8:
2582 case Iop_QAdd16Sx4:
2583 case Iop_QAdd32Sx2:
2584 case Iop_QAdd64Sx1: {
2585 HReg res = newVRegD(env);
2586 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2587 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2588 UInt size;
2589 switch (e->Iex.Binop.op) {
2590 case Iop_QAdd8Sx8: size = 0; break;
2591 case Iop_QAdd16Sx4: size = 1; break;
2592 case Iop_QAdd32Sx2: size = 2; break;
2593 case Iop_QAdd64Sx1: size = 3; break;
2594 default: vassert(0);
2596 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2597 res, argL, argR, size, False));
2598 return res;
2600 case Iop_Sub8x8:
2601 case Iop_Sub16x4:
2602 case Iop_Sub32x2:
2603 case Iop_Sub64: {
2604 HReg res = newVRegD(env);
2605 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2606 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2607 UInt size;
2608 switch (e->Iex.Binop.op) {
2609 case Iop_Sub8x8: size = 0; break;
2610 case Iop_Sub16x4: size = 1; break;
2611 case Iop_Sub32x2: size = 2; break;
2612 case Iop_Sub64: size = 3; break;
2613 default: vassert(0);
2615 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2616 res, argL, argR, size, False));
2617 return res;
2619 case Iop_Sub32Fx2: {
2620 HReg res = newVRegD(env);
2621 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2622 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2623 UInt size = 0;
2624 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2625 res, argL, argR, size, False));
2626 return res;
2628 case Iop_QSub8Ux8:
2629 case Iop_QSub16Ux4:
2630 case Iop_QSub32Ux2:
2631 case Iop_QSub64Ux1: {
2632 HReg res = newVRegD(env);
2633 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2634 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2635 UInt size;
2636 switch (e->Iex.Binop.op) {
2637 case Iop_QSub8Ux8: size = 0; break;
2638 case Iop_QSub16Ux4: size = 1; break;
2639 case Iop_QSub32Ux2: size = 2; break;
2640 case Iop_QSub64Ux1: size = 3; break;
2641 default: vassert(0);
2643 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2644 res, argL, argR, size, False));
2645 return res;
2647 case Iop_QSub8Sx8:
2648 case Iop_QSub16Sx4:
2649 case Iop_QSub32Sx2:
2650 case Iop_QSub64Sx1: {
2651 HReg res = newVRegD(env);
2652 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2653 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2654 UInt size;
2655 switch (e->Iex.Binop.op) {
2656 case Iop_QSub8Sx8: size = 0; break;
2657 case Iop_QSub16Sx4: size = 1; break;
2658 case Iop_QSub32Sx2: size = 2; break;
2659 case Iop_QSub64Sx1: size = 3; break;
2660 default: vassert(0);
2662 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2663 res, argL, argR, size, False));
2664 return res;
2666 case Iop_Max8Ux8:
2667 case Iop_Max16Ux4:
2668 case Iop_Max32Ux2: {
2669 HReg res = newVRegD(env);
2670 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2671 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2672 UInt size;
2673 switch (e->Iex.Binop.op) {
2674 case Iop_Max8Ux8: size = 0; break;
2675 case Iop_Max16Ux4: size = 1; break;
2676 case Iop_Max32Ux2: size = 2; break;
2677 default: vassert(0);
2679 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2680 res, argL, argR, size, False));
2681 return res;
2683 case Iop_Max8Sx8:
2684 case Iop_Max16Sx4:
2685 case Iop_Max32Sx2: {
2686 HReg res = newVRegD(env);
2687 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2688 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2689 UInt size;
2690 switch (e->Iex.Binop.op) {
2691 case Iop_Max8Sx8: size = 0; break;
2692 case Iop_Max16Sx4: size = 1; break;
2693 case Iop_Max32Sx2: size = 2; break;
2694 default: vassert(0);
2696 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2697 res, argL, argR, size, False));
2698 return res;
2700 case Iop_Min8Ux8:
2701 case Iop_Min16Ux4:
2702 case Iop_Min32Ux2: {
2703 HReg res = newVRegD(env);
2704 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2705 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2706 UInt size;
2707 switch (e->Iex.Binop.op) {
2708 case Iop_Min8Ux8: size = 0; break;
2709 case Iop_Min16Ux4: size = 1; break;
2710 case Iop_Min32Ux2: size = 2; break;
2711 default: vassert(0);
2713 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2714 res, argL, argR, size, False));
2715 return res;
2717 case Iop_Min8Sx8:
2718 case Iop_Min16Sx4:
2719 case Iop_Min32Sx2: {
2720 HReg res = newVRegD(env);
2721 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2722 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2723 UInt size;
2724 switch (e->Iex.Binop.op) {
2725 case Iop_Min8Sx8: size = 0; break;
2726 case Iop_Min16Sx4: size = 1; break;
2727 case Iop_Min32Sx2: size = 2; break;
2728 default: vassert(0);
2730 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2731 res, argL, argR, size, False));
2732 return res;
2734 case Iop_Sar8x8:
2735 case Iop_Sar16x4:
2736 case Iop_Sar32x2: {
2737 HReg res = newVRegD(env);
2738 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2739 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2740 HReg argR2 = newVRegD(env);
2741 HReg zero = newVRegD(env);
2742 UInt size;
2743 switch (e->Iex.Binop.op) {
2744 case Iop_Sar8x8: size = 0; break;
2745 case Iop_Sar16x4: size = 1; break;
2746 case Iop_Sar32x2: size = 2; break;
2747 case Iop_Sar64: size = 3; break;
2748 default: vassert(0);
2750 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2751 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2752 argR2, zero, argR, size, False));
2753 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2754 res, argL, argR2, size, False));
2755 return res;
2757 case Iop_Sal8x8:
2758 case Iop_Sal16x4:
2759 case Iop_Sal32x2:
2760 case Iop_Sal64x1: {
2761 HReg res = newVRegD(env);
2762 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2763 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2764 UInt size;
2765 switch (e->Iex.Binop.op) {
2766 case Iop_Sal8x8: size = 0; break;
2767 case Iop_Sal16x4: size = 1; break;
2768 case Iop_Sal32x2: size = 2; break;
2769 case Iop_Sal64x1: size = 3; break;
2770 default: vassert(0);
2772 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2773 res, argL, argR, size, False));
2774 return res;
2776 case Iop_Shr8x8:
2777 case Iop_Shr16x4:
2778 case Iop_Shr32x2: {
2779 HReg res = newVRegD(env);
2780 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2781 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2782 HReg argR2 = newVRegD(env);
2783 HReg zero = newVRegD(env);
2784 UInt size;
2785 switch (e->Iex.Binop.op) {
2786 case Iop_Shr8x8: size = 0; break;
2787 case Iop_Shr16x4: size = 1; break;
2788 case Iop_Shr32x2: size = 2; break;
2789 default: vassert(0);
2791 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2792 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2793 argR2, zero, argR, size, False));
2794 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2795 res, argL, argR2, size, False));
2796 return res;
2798 case Iop_Shl8x8:
2799 case Iop_Shl16x4:
2800 case Iop_Shl32x2: {
2801 HReg res = newVRegD(env);
2802 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2803 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2804 UInt size;
2805 switch (e->Iex.Binop.op) {
2806 case Iop_Shl8x8: size = 0; break;
2807 case Iop_Shl16x4: size = 1; break;
2808 case Iop_Shl32x2: size = 2; break;
2809 default: vassert(0);
2811 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2812 res, argL, argR, size, False));
2813 return res;
2815 case Iop_QShl8x8:
2816 case Iop_QShl16x4:
2817 case Iop_QShl32x2:
2818 case Iop_QShl64x1: {
2819 HReg res = newVRegD(env);
2820 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2821 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2822 UInt size;
2823 switch (e->Iex.Binop.op) {
2824 case Iop_QShl8x8: size = 0; break;
2825 case Iop_QShl16x4: size = 1; break;
2826 case Iop_QShl32x2: size = 2; break;
2827 case Iop_QShl64x1: size = 3; break;
2828 default: vassert(0);
2830 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2831 res, argL, argR, size, False));
2832 return res;
2834 case Iop_QSal8x8:
2835 case Iop_QSal16x4:
2836 case Iop_QSal32x2:
2837 case Iop_QSal64x1: {
2838 HReg res = newVRegD(env);
2839 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2840 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2841 UInt size;
2842 switch (e->Iex.Binop.op) {
2843 case Iop_QSal8x8: size = 0; break;
2844 case Iop_QSal16x4: size = 1; break;
2845 case Iop_QSal32x2: size = 2; break;
2846 case Iop_QSal64x1: size = 3; break;
2847 default: vassert(0);
2849 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2850 res, argL, argR, size, False));
2851 return res;
2853 case Iop_QShlNsatUU8x8:
2854 case Iop_QShlNsatUU16x4:
2855 case Iop_QShlNsatUU32x2:
2856 case Iop_QShlNsatUU64x1: {
2857 HReg res = newVRegD(env);
2858 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2859 UInt size, imm;
2860 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2861 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2862 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
2863 "second argument only\n");
2865 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2866 switch (e->Iex.Binop.op) {
2867 case Iop_QShlNsatUU8x8: size = 8 | imm; break;
2868 case Iop_QShlNsatUU16x4: size = 16 | imm; break;
2869 case Iop_QShlNsatUU32x2: size = 32 | imm; break;
2870 case Iop_QShlNsatUU64x1: size = 64 | imm; break;
2871 default: vassert(0);
2873 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2874 res, argL, size, False));
2875 return res;
2877 case Iop_QShlNsatSU8x8:
2878 case Iop_QShlNsatSU16x4:
2879 case Iop_QShlNsatSU32x2:
2880 case Iop_QShlNsatSU64x1: {
2881 HReg res = newVRegD(env);
2882 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2883 UInt size, imm;
2884 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2885 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2886 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
2887 "second argument only\n");
2889 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2890 switch (e->Iex.Binop.op) {
2891 case Iop_QShlNsatSU8x8: size = 8 | imm; break;
2892 case Iop_QShlNsatSU16x4: size = 16 | imm; break;
2893 case Iop_QShlNsatSU32x2: size = 32 | imm; break;
2894 case Iop_QShlNsatSU64x1: size = 64 | imm; break;
2895 default: vassert(0);
2897 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2898 res, argL, size, False));
2899 return res;
2901 case Iop_QShlNsatSS8x8:
2902 case Iop_QShlNsatSS16x4:
2903 case Iop_QShlNsatSS32x2:
2904 case Iop_QShlNsatSS64x1: {
2905 HReg res = newVRegD(env);
2906 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2907 UInt size, imm;
2908 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2909 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2910 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
2911 "second argument only\n");
2913 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2914 switch (e->Iex.Binop.op) {
2915 case Iop_QShlNsatSS8x8: size = 8 | imm; break;
2916 case Iop_QShlNsatSS16x4: size = 16 | imm; break;
2917 case Iop_QShlNsatSS32x2: size = 32 | imm; break;
2918 case Iop_QShlNsatSS64x1: size = 64 | imm; break;
2919 default: vassert(0);
2921 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2922 res, argL, size, False));
2923 return res;
2925 case Iop_ShrN8x8:
2926 case Iop_ShrN16x4:
2927 case Iop_ShrN32x2:
2928 case Iop_Shr64: {
2929 HReg res = newVRegD(env);
2930 HReg tmp = newVRegD(env);
2931 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2932 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2933 HReg argR2 = newVRegI(env);
2934 UInt size;
2935 switch (e->Iex.Binop.op) {
2936 case Iop_ShrN8x8: size = 0; break;
2937 case Iop_ShrN16x4: size = 1; break;
2938 case Iop_ShrN32x2: size = 2; break;
2939 case Iop_Shr64: size = 3; break;
2940 default: vassert(0);
2942 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2943 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2944 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2945 res, argL, tmp, size, False));
2946 return res;
2948 case Iop_ShlN8x8:
2949 case Iop_ShlN16x4:
2950 case Iop_ShlN32x2:
2951 case Iop_Shl64: {
2952 HReg res = newVRegD(env);
2953 HReg tmp = newVRegD(env);
2954 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2955 /* special-case Shl64(x, imm8) since the Neon front
2956 end produces a lot of those for V{LD,ST}{1,2,3,4}. */
2957 if (e->Iex.Binop.op == Iop_Shl64
2958 && e->Iex.Binop.arg2->tag == Iex_Const) {
2959 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
2960 Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2961 if (nshift >= 1 && nshift <= 63) {
2962 addInstr(env, ARMInstr_NShl64(res, argL, nshift));
2963 return res;
2965 /* else fall through to general case */
2967 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2968 UInt size;
2969 switch (e->Iex.Binop.op) {
2970 case Iop_ShlN8x8: size = 0; break;
2971 case Iop_ShlN16x4: size = 1; break;
2972 case Iop_ShlN32x2: size = 2; break;
2973 case Iop_Shl64: size = 3; break;
2974 default: vassert(0);
2976 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
2977 tmp, argR, 0, False));
2978 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2979 res, argL, tmp, size, False));
2980 return res;
2982 case Iop_SarN8x8:
2983 case Iop_SarN16x4:
2984 case Iop_SarN32x2:
2985 case Iop_Sar64: {
2986 HReg res = newVRegD(env);
2987 HReg tmp = newVRegD(env);
2988 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2989 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2990 HReg argR2 = newVRegI(env);
2991 UInt size;
2992 switch (e->Iex.Binop.op) {
2993 case Iop_SarN8x8: size = 0; break;
2994 case Iop_SarN16x4: size = 1; break;
2995 case Iop_SarN32x2: size = 2; break;
2996 case Iop_Sar64: size = 3; break;
2997 default: vassert(0);
2999 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3000 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3001 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3002 res, argL, tmp, size, False));
3003 return res;
3005 case Iop_CmpGT8Ux8:
3006 case Iop_CmpGT16Ux4:
3007 case Iop_CmpGT32Ux2: {
3008 HReg res = newVRegD(env);
3009 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3010 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3011 UInt size;
3012 switch (e->Iex.Binop.op) {
3013 case Iop_CmpGT8Ux8: size = 0; break;
3014 case Iop_CmpGT16Ux4: size = 1; break;
3015 case Iop_CmpGT32Ux2: size = 2; break;
3016 default: vassert(0);
3018 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
3019 res, argL, argR, size, False));
3020 return res;
3022 case Iop_CmpGT8Sx8:
3023 case Iop_CmpGT16Sx4:
3024 case Iop_CmpGT32Sx2: {
3025 HReg res = newVRegD(env);
3026 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3027 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3028 UInt size;
3029 switch (e->Iex.Binop.op) {
3030 case Iop_CmpGT8Sx8: size = 0; break;
3031 case Iop_CmpGT16Sx4: size = 1; break;
3032 case Iop_CmpGT32Sx2: size = 2; break;
3033 default: vassert(0);
3035 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
3036 res, argL, argR, size, False));
3037 return res;
3039 case Iop_CmpEQ8x8:
3040 case Iop_CmpEQ16x4:
3041 case Iop_CmpEQ32x2: {
3042 HReg res = newVRegD(env);
3043 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3044 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3045 UInt size;
3046 switch (e->Iex.Binop.op) {
3047 case Iop_CmpEQ8x8: size = 0; break;
3048 case Iop_CmpEQ16x4: size = 1; break;
3049 case Iop_CmpEQ32x2: size = 2; break;
3050 default: vassert(0);
3052 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
3053 res, argL, argR, size, False));
3054 return res;
3056 case Iop_Mul8x8:
3057 case Iop_Mul16x4:
3058 case Iop_Mul32x2: {
3059 HReg res = newVRegD(env);
3060 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3061 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3062 UInt size = 0;
3063 switch(e->Iex.Binop.op) {
3064 case Iop_Mul8x8: size = 0; break;
3065 case Iop_Mul16x4: size = 1; break;
3066 case Iop_Mul32x2: size = 2; break;
3067 default: vassert(0);
3069 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
3070 res, argL, argR, size, False));
3071 return res;
3073 case Iop_Mul32Fx2: {
3074 HReg res = newVRegD(env);
3075 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3076 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3077 UInt size = 0;
3078 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
3079 res, argL, argR, size, False));
3080 return res;
3082 case Iop_QDMulHi16Sx4:
3083 case Iop_QDMulHi32Sx2: {
3084 HReg res = newVRegD(env);
3085 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3086 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3087 UInt size = 0;
3088 switch(e->Iex.Binop.op) {
3089 case Iop_QDMulHi16Sx4: size = 1; break;
3090 case Iop_QDMulHi32Sx2: size = 2; break;
3091 default: vassert(0);
3093 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
3094 res, argL, argR, size, False));
3095 return res;
3098 case Iop_QRDMulHi16Sx4:
3099 case Iop_QRDMulHi32Sx2: {
3100 HReg res = newVRegD(env);
3101 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3102 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3103 UInt size = 0;
3104 switch(e->Iex.Binop.op) {
3105 case Iop_QRDMulHi16Sx4: size = 1; break;
3106 case Iop_QRDMulHi32Sx2: size = 2; break;
3107 default: vassert(0);
3109 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
3110 res, argL, argR, size, False));
3111 return res;
3114 case Iop_PwAdd8x8:
3115 case Iop_PwAdd16x4:
3116 case Iop_PwAdd32x2: {
3117 HReg res = newVRegD(env);
3118 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3119 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3120 UInt size = 0;
3121 switch(e->Iex.Binop.op) {
3122 case Iop_PwAdd8x8: size = 0; break;
3123 case Iop_PwAdd16x4: size = 1; break;
3124 case Iop_PwAdd32x2: size = 2; break;
3125 default: vassert(0);
3127 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
3128 res, argL, argR, size, False));
3129 return res;
3131 case Iop_PwAdd32Fx2: {
3132 HReg res = newVRegD(env);
3133 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3134 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3135 UInt size = 0;
3136 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
3137 res, argL, argR, size, False));
3138 return res;
3140 case Iop_PwMin8Ux8:
3141 case Iop_PwMin16Ux4:
3142 case Iop_PwMin32Ux2: {
3143 HReg res = newVRegD(env);
3144 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3145 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3146 UInt size = 0;
3147 switch(e->Iex.Binop.op) {
3148 case Iop_PwMin8Ux8: size = 0; break;
3149 case Iop_PwMin16Ux4: size = 1; break;
3150 case Iop_PwMin32Ux2: size = 2; break;
3151 default: vassert(0);
3153 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
3154 res, argL, argR, size, False));
3155 return res;
3157 case Iop_PwMin8Sx8:
3158 case Iop_PwMin16Sx4:
3159 case Iop_PwMin32Sx2: {
3160 HReg res = newVRegD(env);
3161 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3162 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3163 UInt size = 0;
3164 switch(e->Iex.Binop.op) {
3165 case Iop_PwMin8Sx8: size = 0; break;
3166 case Iop_PwMin16Sx4: size = 1; break;
3167 case Iop_PwMin32Sx2: size = 2; break;
3168 default: vassert(0);
3170 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3171 res, argL, argR, size, False));
3172 return res;
3174 case Iop_PwMax8Ux8:
3175 case Iop_PwMax16Ux4:
3176 case Iop_PwMax32Ux2: {
3177 HReg res = newVRegD(env);
3178 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3179 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3180 UInt size = 0;
3181 switch(e->Iex.Binop.op) {
3182 case Iop_PwMax8Ux8: size = 0; break;
3183 case Iop_PwMax16Ux4: size = 1; break;
3184 case Iop_PwMax32Ux2: size = 2; break;
3185 default: vassert(0);
3187 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3188 res, argL, argR, size, False));
3189 return res;
3191 case Iop_PwMax8Sx8:
3192 case Iop_PwMax16Sx4:
3193 case Iop_PwMax32Sx2: {
3194 HReg res = newVRegD(env);
3195 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3196 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3197 UInt size = 0;
3198 switch(e->Iex.Binop.op) {
3199 case Iop_PwMax8Sx8: size = 0; break;
3200 case Iop_PwMax16Sx4: size = 1; break;
3201 case Iop_PwMax32Sx2: size = 2; break;
3202 default: vassert(0);
3204 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3205 res, argL, argR, size, False));
3206 return res;
3208 case Iop_Perm8x8: {
3209 HReg res = newVRegD(env);
3210 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3211 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3212 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3213 res, argL, argR, 0, False));
3214 return res;
3216 case Iop_PolynomialMul8x8: {
3217 HReg res = newVRegD(env);
3218 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3219 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3220 UInt size = 0;
3221 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3222 res, argL, argR, size, False));
3223 return res;
3225 case Iop_Max32Fx2: {
3226 HReg res = newVRegD(env);
3227 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3228 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3229 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3230 res, argL, argR, 2, False));
3231 return res;
3233 case Iop_Min32Fx2: {
3234 HReg res = newVRegD(env);
3235 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3236 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3237 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3238 res, argL, argR, 2, False));
3239 return res;
3241 case Iop_PwMax32Fx2: {
3242 HReg res = newVRegD(env);
3243 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3244 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3245 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3246 res, argL, argR, 2, False));
3247 return res;
3249 case Iop_PwMin32Fx2: {
3250 HReg res = newVRegD(env);
3251 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3252 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3253 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3254 res, argL, argR, 2, False));
3255 return res;
3257 case Iop_CmpGT32Fx2: {
3258 HReg res = newVRegD(env);
3259 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3260 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3261 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3262 res, argL, argR, 2, False));
3263 return res;
3265 case Iop_CmpGE32Fx2: {
3266 HReg res = newVRegD(env);
3267 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3268 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3269 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3270 res, argL, argR, 2, False));
3271 return res;
3273 case Iop_CmpEQ32Fx2: {
3274 HReg res = newVRegD(env);
3275 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3276 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3277 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3278 res, argL, argR, 2, False));
3279 return res;
3281 case Iop_F32ToFixed32Ux2_RZ:
3282 case Iop_F32ToFixed32Sx2_RZ:
3283 case Iop_Fixed32UToF32x2_RN:
3284 case Iop_Fixed32SToF32x2_RN: {
3285 HReg res = newVRegD(env);
3286 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3287 ARMNeonUnOp op;
3288 UInt imm6;
3289 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3290 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3291 vpanic("ARM supports FP <-> Fixed conversion with constant "
3292 "second argument less than 33 only\n");
3294 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3295 vassert(imm6 <= 32 && imm6 > 0);
3296 imm6 = 64 - imm6;
3297 switch(e->Iex.Binop.op) {
3298 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3299 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3300 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3301 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3302 default: vassert(0);
3304 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3305 return res;
3308 FIXME: is this here or not?
3309 case Iop_VDup8x8:
3310 case Iop_VDup16x4:
3311 case Iop_VDup32x2: {
3312 HReg res = newVRegD(env);
3313 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3314 UInt index;
3315 UInt imm4;
3316 UInt size = 0;
3317 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3318 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3319 vpanic("ARM supports Iop_VDup with constant "
3320 "second argument less than 16 only\n");
3322 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3323 switch(e->Iex.Binop.op) {
3324 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3325 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3326 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3327 default: vassert(0);
3329 if (imm4 >= 16) {
3330 vpanic("ARM supports Iop_VDup with constant "
3331 "second argument less than 16 only\n");
3333 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3334 res, argL, imm4, False));
3335 return res;
3338 default:
3339 break;
3343 /* --------- UNARY ops --------- */
3344 if (e->tag == Iex_Unop) {
3345 switch (e->Iex.Unop.op) {
3347 /* 32Uto64 */
3348 case Iop_32Uto64: {
3349 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3350 HReg rHi = newVRegI(env);
3351 HReg res = newVRegD(env);
3352 addInstr(env, ARMInstr_Imm32(rHi, 0));
3353 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3354 return res;
3357 /* 32Sto64 */
3358 case Iop_32Sto64: {
3359 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3360 HReg rHi = newVRegI(env);
3361 addInstr(env, mk_iMOVds_RR(rHi, rLo));
3362 addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3363 HReg res = newVRegD(env);
3364 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3365 return res;
3368 /* The next 3 are pass-throughs */
3369 /* ReinterpF64asI64 */
3370 case Iop_ReinterpF64asI64:
3371 /* Left64(e) */
3372 case Iop_Left64:
3373 /* CmpwNEZ64(e) */
3374 case Iop_1Sto64: {
3375 HReg rLo, rHi;
3376 HReg res = newVRegD(env);
3377 iselInt64Expr(&rHi, &rLo, env, e);
3378 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3379 return res;
3382 case Iop_Not64: {
3383 DECLARE_PATTERN(p_veqz_8x8);
3384 DECLARE_PATTERN(p_veqz_16x4);
3385 DECLARE_PATTERN(p_veqz_32x2);
3386 DECLARE_PATTERN(p_vcge_8sx8);
3387 DECLARE_PATTERN(p_vcge_16sx4);
3388 DECLARE_PATTERN(p_vcge_32sx2);
3389 DECLARE_PATTERN(p_vcge_8ux8);
3390 DECLARE_PATTERN(p_vcge_16ux4);
3391 DECLARE_PATTERN(p_vcge_32ux2);
3392 DEFINE_PATTERN(p_veqz_8x8,
3393 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3394 DEFINE_PATTERN(p_veqz_16x4,
3395 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3396 DEFINE_PATTERN(p_veqz_32x2,
3397 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3398 DEFINE_PATTERN(p_vcge_8sx8,
3399 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3400 DEFINE_PATTERN(p_vcge_16sx4,
3401 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3402 DEFINE_PATTERN(p_vcge_32sx2,
3403 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3404 DEFINE_PATTERN(p_vcge_8ux8,
3405 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3406 DEFINE_PATTERN(p_vcge_16ux4,
3407 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3408 DEFINE_PATTERN(p_vcge_32ux2,
3409 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3410 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3411 HReg res = newVRegD(env);
3412 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3413 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3414 return res;
3415 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3416 HReg res = newVRegD(env);
3417 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3418 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3419 return res;
3420 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3421 HReg res = newVRegD(env);
3422 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3423 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3424 return res;
3425 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3426 HReg res = newVRegD(env);
3427 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3428 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3429 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3430 res, argL, argR, 0, False));
3431 return res;
3432 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3433 HReg res = newVRegD(env);
3434 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3435 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3436 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3437 res, argL, argR, 1, False));
3438 return res;
3439 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3440 HReg res = newVRegD(env);
3441 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3442 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3443 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3444 res, argL, argR, 2, False));
3445 return res;
3446 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3447 HReg res = newVRegD(env);
3448 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3449 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3450 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3451 res, argL, argR, 0, False));
3452 return res;
3453 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3454 HReg res = newVRegD(env);
3455 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3456 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3457 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3458 res, argL, argR, 1, False));
3459 return res;
3460 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3461 HReg res = newVRegD(env);
3462 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3463 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3464 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3465 res, argL, argR, 2, False));
3466 return res;
3467 } else {
3468 HReg res = newVRegD(env);
3469 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3470 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3471 return res;
3474 case Iop_Dup8x8:
3475 case Iop_Dup16x4:
3476 case Iop_Dup32x2: {
3477 HReg res, arg;
3478 UInt size;
3479 DECLARE_PATTERN(p_vdup_8x8);
3480 DECLARE_PATTERN(p_vdup_16x4);
3481 DECLARE_PATTERN(p_vdup_32x2);
3482 DEFINE_PATTERN(p_vdup_8x8,
3483 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3484 DEFINE_PATTERN(p_vdup_16x4,
3485 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3486 DEFINE_PATTERN(p_vdup_32x2,
3487 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3488 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3489 UInt index;
3490 UInt imm4;
3491 if (mi.bindee[1]->tag == Iex_Const &&
3492 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3493 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3494 imm4 = (index << 1) + 1;
3495 if (index < 8) {
3496 res = newVRegD(env);
3497 arg = iselNeon64Expr(env, mi.bindee[0]);
3498 addInstr(env, ARMInstr_NUnaryS(
3499 ARMneon_VDUP,
3500 mkARMNRS(ARMNRS_Reg, res, 0),
3501 mkARMNRS(ARMNRS_Scalar, arg, index),
3502 imm4, False
3504 return res;
3507 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3508 UInt index;
3509 UInt imm4;
3510 if (mi.bindee[1]->tag == Iex_Const &&
3511 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3512 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3513 imm4 = (index << 2) + 2;
3514 if (index < 4) {
3515 res = newVRegD(env);
3516 arg = iselNeon64Expr(env, mi.bindee[0]);
3517 addInstr(env, ARMInstr_NUnaryS(
3518 ARMneon_VDUP,
3519 mkARMNRS(ARMNRS_Reg, res, 0),
3520 mkARMNRS(ARMNRS_Scalar, arg, index),
3521 imm4, False
3523 return res;
3526 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3527 UInt index;
3528 UInt imm4;
3529 if (mi.bindee[1]->tag == Iex_Const &&
3530 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3531 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3532 imm4 = (index << 3) + 4;
3533 if (index < 2) {
3534 res = newVRegD(env);
3535 arg = iselNeon64Expr(env, mi.bindee[0]);
3536 addInstr(env, ARMInstr_NUnaryS(
3537 ARMneon_VDUP,
3538 mkARMNRS(ARMNRS_Reg, res, 0),
3539 mkARMNRS(ARMNRS_Scalar, arg, index),
3540 imm4, False
3542 return res;
3546 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3547 res = newVRegD(env);
3548 switch (e->Iex.Unop.op) {
3549 case Iop_Dup8x8: size = 0; break;
3550 case Iop_Dup16x4: size = 1; break;
3551 case Iop_Dup32x2: size = 2; break;
3552 default: vassert(0);
3554 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3555 return res;
3557 case Iop_Abs8x8:
3558 case Iop_Abs16x4:
3559 case Iop_Abs32x2: {
3560 HReg res = newVRegD(env);
3561 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3562 UInt size = 0;
3563 switch(e->Iex.Binop.op) {
3564 case Iop_Abs8x8: size = 0; break;
3565 case Iop_Abs16x4: size = 1; break;
3566 case Iop_Abs32x2: size = 2; break;
3567 default: vassert(0);
3569 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3570 return res;
3572 case Iop_Reverse8sIn64_x1:
3573 case Iop_Reverse16sIn64_x1:
3574 case Iop_Reverse32sIn64_x1: {
3575 HReg res = newVRegD(env);
3576 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3577 UInt size = 0;
3578 switch(e->Iex.Binop.op) {
3579 case Iop_Reverse8sIn64_x1: size = 0; break;
3580 case Iop_Reverse16sIn64_x1: size = 1; break;
3581 case Iop_Reverse32sIn64_x1: size = 2; break;
3582 default: vassert(0);
3584 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3585 res, arg, size, False));
3586 return res;
3588 case Iop_Reverse8sIn32_x2:
3589 case Iop_Reverse16sIn32_x2: {
3590 HReg res = newVRegD(env);
3591 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3592 UInt size = 0;
3593 switch(e->Iex.Binop.op) {
3594 case Iop_Reverse8sIn32_x2: size = 0; break;
3595 case Iop_Reverse16sIn32_x2: size = 1; break;
3596 default: vassert(0);
3598 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3599 res, arg, size, False));
3600 return res;
3602 case Iop_Reverse8sIn16_x4: {
3603 HReg res = newVRegD(env);
3604 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3605 UInt size = 0;
3606 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3607 res, arg, size, False));
3608 return res;
3610 case Iop_CmpwNEZ64: {
3611 HReg x_lsh = newVRegD(env);
3612 HReg x_rsh = newVRegD(env);
3613 HReg lsh_amt = newVRegD(env);
3614 HReg rsh_amt = newVRegD(env);
3615 HReg zero = newVRegD(env);
3616 HReg tmp = newVRegD(env);
3617 HReg tmp2 = newVRegD(env);
3618 HReg res = newVRegD(env);
3619 HReg x = newVRegD(env);
3620 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3621 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3622 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3623 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3624 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3625 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3626 rsh_amt, zero, lsh_amt, 2, False));
3627 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3628 x_lsh, x, lsh_amt, 3, False));
3629 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3630 x_rsh, x, rsh_amt, 3, False));
3631 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3632 tmp, x_lsh, x_rsh, 0, False));
3633 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3634 res, tmp, x, 0, False));
3635 return res;
3637 case Iop_CmpNEZ8x8:
3638 case Iop_CmpNEZ16x4:
3639 case Iop_CmpNEZ32x2: {
3640 HReg res = newVRegD(env);
3641 HReg tmp = newVRegD(env);
3642 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3643 UInt size;
3644 switch (e->Iex.Unop.op) {
3645 case Iop_CmpNEZ8x8: size = 0; break;
3646 case Iop_CmpNEZ16x4: size = 1; break;
3647 case Iop_CmpNEZ32x2: size = 2; break;
3648 default: vassert(0);
3650 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3651 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3652 return res;
3654 case Iop_NarrowUn16to8x8:
3655 case Iop_NarrowUn32to16x4:
3656 case Iop_NarrowUn64to32x2: {
3657 HReg res = newVRegD(env);
3658 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3659 UInt size = 0;
3660 switch(e->Iex.Binop.op) {
3661 case Iop_NarrowUn16to8x8: size = 0; break;
3662 case Iop_NarrowUn32to16x4: size = 1; break;
3663 case Iop_NarrowUn64to32x2: size = 2; break;
3664 default: vassert(0);
3666 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3667 res, arg, size, False));
3668 return res;
3670 case Iop_QNarrowUn16Sto8Sx8:
3671 case Iop_QNarrowUn32Sto16Sx4:
3672 case Iop_QNarrowUn64Sto32Sx2: {
3673 HReg res = newVRegD(env);
3674 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3675 UInt size = 0;
3676 switch(e->Iex.Binop.op) {
3677 case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3678 case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3679 case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3680 default: vassert(0);
3682 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3683 res, arg, size, False));
3684 return res;
3686 case Iop_QNarrowUn16Sto8Ux8:
3687 case Iop_QNarrowUn32Sto16Ux4:
3688 case Iop_QNarrowUn64Sto32Ux2: {
3689 HReg res = newVRegD(env);
3690 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3691 UInt size = 0;
3692 switch(e->Iex.Binop.op) {
3693 case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
3694 case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3695 case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3696 default: vassert(0);
3698 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3699 res, arg, size, False));
3700 return res;
3702 case Iop_QNarrowUn16Uto8Ux8:
3703 case Iop_QNarrowUn32Uto16Ux4:
3704 case Iop_QNarrowUn64Uto32Ux2: {
3705 HReg res = newVRegD(env);
3706 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3707 UInt size = 0;
3708 switch(e->Iex.Binop.op) {
3709 case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
3710 case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3711 case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3712 default: vassert(0);
3714 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3715 res, arg, size, False));
3716 return res;
3718 case Iop_PwAddL8Sx8:
3719 case Iop_PwAddL16Sx4:
3720 case Iop_PwAddL32Sx2: {
3721 HReg res = newVRegD(env);
3722 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3723 UInt size = 0;
3724 switch(e->Iex.Binop.op) {
3725 case Iop_PwAddL8Sx8: size = 0; break;
3726 case Iop_PwAddL16Sx4: size = 1; break;
3727 case Iop_PwAddL32Sx2: size = 2; break;
3728 default: vassert(0);
3730 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3731 res, arg, size, False));
3732 return res;
3734 case Iop_PwAddL8Ux8:
3735 case Iop_PwAddL16Ux4:
3736 case Iop_PwAddL32Ux2: {
3737 HReg res = newVRegD(env);
3738 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3739 UInt size = 0;
3740 switch(e->Iex.Binop.op) {
3741 case Iop_PwAddL8Ux8: size = 0; break;
3742 case Iop_PwAddL16Ux4: size = 1; break;
3743 case Iop_PwAddL32Ux2: size = 2; break;
3744 default: vassert(0);
3746 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3747 res, arg, size, False));
3748 return res;
3750 case Iop_Cnt8x8: {
3751 HReg res = newVRegD(env);
3752 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3753 UInt size = 0;
3754 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3755 res, arg, size, False));
3756 return res;
3758 case Iop_Clz8x8:
3759 case Iop_Clz16x4:
3760 case Iop_Clz32x2: {
3761 HReg res = newVRegD(env);
3762 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3763 UInt size = 0;
3764 switch(e->Iex.Binop.op) {
3765 case Iop_Clz8x8: size = 0; break;
3766 case Iop_Clz16x4: size = 1; break;
3767 case Iop_Clz32x2: size = 2; break;
3768 default: vassert(0);
3770 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3771 res, arg, size, False));
3772 return res;
3774 case Iop_Cls8x8:
3775 case Iop_Cls16x4:
3776 case Iop_Cls32x2: {
3777 HReg res = newVRegD(env);
3778 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3779 UInt size = 0;
3780 switch(e->Iex.Binop.op) {
3781 case Iop_Cls8x8: size = 0; break;
3782 case Iop_Cls16x4: size = 1; break;
3783 case Iop_Cls32x2: size = 2; break;
3784 default: vassert(0);
3786 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3787 res, arg, size, False));
3788 return res;
3790 case Iop_F32toI32Sx2_RZ: {
3791 HReg res = newVRegD(env);
3792 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3793 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3794 res, arg, 2, False));
3795 return res;
3797 case Iop_F32toI32Ux2_RZ: {
3798 HReg res = newVRegD(env);
3799 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3800 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3801 res, arg, 2, False));
3802 return res;
3804 case Iop_I32StoF32x2_DEP: {
3805 HReg res = newVRegD(env);
3806 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3807 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3808 res, arg, 2, False));
3809 return res;
3811 case Iop_I32UtoF32x2_DEP: {
3812 HReg res = newVRegD(env);
3813 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3814 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3815 res, arg, 2, False));
3816 return res;
3818 case Iop_F32toF16x4_DEP: {
3819 HReg res = newVRegD(env);
3820 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3821 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3822 res, arg, 2, False));
3823 return res;
3825 case Iop_RecipEst32Fx2: {
3826 HReg res = newVRegD(env);
3827 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3828 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3829 res, argL, 0, False));
3830 return res;
3832 case Iop_RecipEst32Ux2: {
3833 HReg res = newVRegD(env);
3834 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3835 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3836 res, argL, 0, False));
3837 return res;
3839 case Iop_Abs32Fx2: {
3840 DECLARE_PATTERN(p_vabd_32fx2);
3841 DEFINE_PATTERN(p_vabd_32fx2,
3842 unop(Iop_Abs32Fx2,
3843 binop(Iop_Sub32Fx2,
3844 bind(0),
3845 bind(1))));
3846 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3847 HReg res = newVRegD(env);
3848 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3849 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3850 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3851 res, argL, argR, 0, False));
3852 return res;
3853 } else {
3854 HReg res = newVRegD(env);
3855 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3856 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3857 res, arg, 0, False));
3858 return res;
3861 case Iop_RSqrtEst32Fx2: {
3862 HReg res = newVRegD(env);
3863 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3864 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3865 res, arg, 0, False));
3866 return res;
3868 case Iop_RSqrtEst32Ux2: {
3869 HReg res = newVRegD(env);
3870 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3871 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3872 res, arg, 0, False));
3873 return res;
3875 case Iop_Neg32Fx2: {
3876 HReg res = newVRegD(env);
3877 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3878 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3879 res, arg, 0, False));
3880 return res;
3882 case Iop_V128to64:
3883 case Iop_V128HIto64: {
3884 HReg src = iselNeonExpr(env, e->Iex.Unop.arg);
3885 HReg resLo = newVRegD(env);
3886 HReg resHi = newVRegD(env);
3887 addInstr(env, ARMInstr_VXferQ(False/*!toQ*/, src, resHi, resLo));
3888 return e->Iex.Unop.op == Iop_V128HIto64 ? resHi : resLo;
3890 default:
3891 break;
3893 } /* if (e->tag == Iex_Unop) */
3895 if (e->tag == Iex_Triop) {
3896 IRTriop *triop = e->Iex.Triop.details;
3898 switch (triop->op) {
3899 case Iop_Slice64: {
3900 HReg res = newVRegD(env);
3901 HReg argL = iselNeon64Expr(env, triop->arg2);
3902 HReg argR = iselNeon64Expr(env, triop->arg1);
3903 UInt imm4;
3904 if (triop->arg3->tag != Iex_Const ||
3905 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
3906 vpanic("ARM target supports Iop_Extract64 with constant "
3907 "third argument less than 16 only\n");
3909 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
3910 if (imm4 >= 8) {
3911 vpanic("ARM target supports Iop_Extract64 with constant "
3912 "third argument less than 16 only\n");
3914 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3915 res, argL, argR, imm4, False));
3916 return res;
3918 case Iop_SetElem8x8:
3919 case Iop_SetElem16x4:
3920 case Iop_SetElem32x2: {
3921 HReg res = newVRegD(env);
3922 HReg dreg = iselNeon64Expr(env, triop->arg1);
3923 HReg arg = iselIntExpr_R(env, triop->arg3);
3924 UInt index, size;
3925 if (triop->arg2->tag != Iex_Const ||
3926 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
3927 vpanic("ARM target supports SetElem with constant "
3928 "second argument only\n");
3930 index = triop->arg2->Iex.Const.con->Ico.U8;
3931 switch (triop->op) {
3932 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3933 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3934 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3935 default: vassert(0);
3937 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3938 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3939 mkARMNRS(ARMNRS_Scalar, res, index),
3940 mkARMNRS(ARMNRS_Reg, arg, 0),
3941 size, False));
3942 return res;
3944 default:
3945 break;
3949 /* --------- MULTIPLEX --------- */
3950 if (e->tag == Iex_ITE) { // VFD
3951 HReg rLo, rHi;
3952 HReg res = newVRegD(env);
3953 iselInt64Expr(&rHi, &rLo, env, e);
3954 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3955 return res;
3958 ppIRExpr(e);
3959 vpanic("iselNeon64Expr");
3963 static HReg iselNeonExpr ( ISelEnv* env, const IRExpr* e )
3965 HReg r;
3966 vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
3967 r = iselNeonExpr_wrk( env, e );
3968 vassert(hregClass(r) == HRcVec128);
3969 vassert(hregIsVirtual(r));
3970 return r;
3973 /* DO NOT CALL THIS DIRECTLY */
3974 static HReg iselNeonExpr_wrk ( ISelEnv* env, const IRExpr* e )
3976 IRType ty = typeOfIRExpr(env->type_env, e);
3977 MatchInfo mi;
3978 vassert(e);
3979 vassert(ty == Ity_V128);
3981 if (e->tag == Iex_RdTmp) {
3982 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3985 if (e->tag == Iex_Const) {
3986 /* At the moment there should be no 128-bit constants in IR for ARM
3987 generated during disassemble. They are represented as Iop_64HLtoV128
3988 binary operation and are handled among binary ops. */
3989 /* But zero can be created by valgrind internal optimizer */
3990 if (e->Iex.Const.con->Ico.V128 == 0x0000) {
3991 HReg res = newVRegV(env);
3992 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0)));
3993 return res;
3995 if (e->Iex.Const.con->Ico.V128 == 0xFFFF) {
3996 HReg res = newVRegV(env);
3997 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255)));
3998 return res;
4000 ppIRExpr(e);
4001 vpanic("128-bit constant is not implemented");
4004 if (e->tag == Iex_Load) {
4005 HReg res = newVRegV(env);
4006 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
4007 vassert(ty == Ity_V128);
4008 addInstr(env, ARMInstr_NLdStQ(True, res, am));
4009 return res;
4012 if (e->tag == Iex_Get) {
4013 HReg addr = newVRegI(env);
4014 HReg res = newVRegV(env);
4015 vassert(ty == Ity_V128);
4016 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
4017 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
4018 return res;
4021 if (e->tag == Iex_Unop) {
4022 switch (e->Iex.Unop.op) {
4023 case Iop_NotV128: {
4024 DECLARE_PATTERN(p_veqz_8x16);
4025 DECLARE_PATTERN(p_veqz_16x8);
4026 DECLARE_PATTERN(p_veqz_32x4);
4027 DECLARE_PATTERN(p_vcge_8sx16);
4028 DECLARE_PATTERN(p_vcge_16sx8);
4029 DECLARE_PATTERN(p_vcge_32sx4);
4030 DECLARE_PATTERN(p_vcge_8ux16);
4031 DECLARE_PATTERN(p_vcge_16ux8);
4032 DECLARE_PATTERN(p_vcge_32ux4);
4033 DEFINE_PATTERN(p_veqz_8x16,
4034 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
4035 DEFINE_PATTERN(p_veqz_16x8,
4036 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
4037 DEFINE_PATTERN(p_veqz_32x4,
4038 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
4039 DEFINE_PATTERN(p_vcge_8sx16,
4040 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
4041 DEFINE_PATTERN(p_vcge_16sx8,
4042 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
4043 DEFINE_PATTERN(p_vcge_32sx4,
4044 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
4045 DEFINE_PATTERN(p_vcge_8ux16,
4046 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
4047 DEFINE_PATTERN(p_vcge_16ux8,
4048 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
4049 DEFINE_PATTERN(p_vcge_32ux4,
4050 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
4051 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
4052 HReg res = newVRegV(env);
4053 HReg arg = iselNeonExpr(env, mi.bindee[0]);
4054 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
4055 return res;
4056 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
4057 HReg res = newVRegV(env);
4058 HReg arg = iselNeonExpr(env, mi.bindee[0]);
4059 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
4060 return res;
4061 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
4062 HReg res = newVRegV(env);
4063 HReg arg = iselNeonExpr(env, mi.bindee[0]);
4064 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
4065 return res;
4066 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
4067 HReg res = newVRegV(env);
4068 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4069 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4070 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4071 res, argL, argR, 0, True));
4072 return res;
4073 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
4074 HReg res = newVRegV(env);
4075 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4076 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4077 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4078 res, argL, argR, 1, True));
4079 return res;
4080 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
4081 HReg res = newVRegV(env);
4082 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4083 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4084 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4085 res, argL, argR, 2, True));
4086 return res;
4087 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
4088 HReg res = newVRegV(env);
4089 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4090 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4091 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4092 res, argL, argR, 0, True));
4093 return res;
4094 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
4095 HReg res = newVRegV(env);
4096 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4097 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4098 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4099 res, argL, argR, 1, True));
4100 return res;
4101 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
4102 HReg res = newVRegV(env);
4103 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4104 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4105 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4106 res, argL, argR, 2, True));
4107 return res;
4108 } else {
4109 HReg res = newVRegV(env);
4110 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4111 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
4112 return res;
4115 case Iop_Dup8x16:
4116 case Iop_Dup16x8:
4117 case Iop_Dup32x4: {
4118 HReg res, arg;
4119 UInt size;
4120 DECLARE_PATTERN(p_vdup_8x16);
4121 DECLARE_PATTERN(p_vdup_16x8);
4122 DECLARE_PATTERN(p_vdup_32x4);
4123 DEFINE_PATTERN(p_vdup_8x16,
4124 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
4125 DEFINE_PATTERN(p_vdup_16x8,
4126 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
4127 DEFINE_PATTERN(p_vdup_32x4,
4128 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
4129 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
4130 UInt index;
4131 UInt imm4;
4132 if (mi.bindee[1]->tag == Iex_Const &&
4133 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4134 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4135 imm4 = (index << 1) + 1;
4136 if (index < 8) {
4137 res = newVRegV(env);
4138 arg = iselNeon64Expr(env, mi.bindee[0]);
4139 addInstr(env, ARMInstr_NUnaryS(
4140 ARMneon_VDUP,
4141 mkARMNRS(ARMNRS_Reg, res, 0),
4142 mkARMNRS(ARMNRS_Scalar, arg, index),
4143 imm4, True
4145 return res;
4148 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
4149 UInt index;
4150 UInt imm4;
4151 if (mi.bindee[1]->tag == Iex_Const &&
4152 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4153 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4154 imm4 = (index << 2) + 2;
4155 if (index < 4) {
4156 res = newVRegV(env);
4157 arg = iselNeon64Expr(env, mi.bindee[0]);
4158 addInstr(env, ARMInstr_NUnaryS(
4159 ARMneon_VDUP,
4160 mkARMNRS(ARMNRS_Reg, res, 0),
4161 mkARMNRS(ARMNRS_Scalar, arg, index),
4162 imm4, True
4164 return res;
4167 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
4168 UInt index;
4169 UInt imm4;
4170 if (mi.bindee[1]->tag == Iex_Const &&
4171 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4172 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4173 imm4 = (index << 3) + 4;
4174 if (index < 2) {
4175 res = newVRegV(env);
4176 arg = iselNeon64Expr(env, mi.bindee[0]);
4177 addInstr(env, ARMInstr_NUnaryS(
4178 ARMneon_VDUP,
4179 mkARMNRS(ARMNRS_Reg, res, 0),
4180 mkARMNRS(ARMNRS_Scalar, arg, index),
4181 imm4, True
4183 return res;
4187 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4188 res = newVRegV(env);
4189 switch (e->Iex.Unop.op) {
4190 case Iop_Dup8x16: size = 0; break;
4191 case Iop_Dup16x8: size = 1; break;
4192 case Iop_Dup32x4: size = 2; break;
4193 default: vassert(0);
4195 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4196 return res;
4198 case Iop_Abs8x16:
4199 case Iop_Abs16x8:
4200 case Iop_Abs32x4: {
4201 HReg res = newVRegV(env);
4202 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4203 UInt size = 0;
4204 switch(e->Iex.Binop.op) {
4205 case Iop_Abs8x16: size = 0; break;
4206 case Iop_Abs16x8: size = 1; break;
4207 case Iop_Abs32x4: size = 2; break;
4208 default: vassert(0);
4210 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4211 return res;
4213 case Iop_Reverse8sIn64_x2:
4214 case Iop_Reverse16sIn64_x2:
4215 case Iop_Reverse32sIn64_x2: {
4216 HReg res = newVRegV(env);
4217 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4218 UInt size = 0;
4219 switch(e->Iex.Binop.op) {
4220 case Iop_Reverse8sIn64_x2: size = 0; break;
4221 case Iop_Reverse16sIn64_x2: size = 1; break;
4222 case Iop_Reverse32sIn64_x2: size = 2; break;
4223 default: vassert(0);
4225 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4226 res, arg, size, True));
4227 return res;
4229 case Iop_Reverse8sIn32_x4:
4230 case Iop_Reverse16sIn32_x4: {
4231 HReg res = newVRegV(env);
4232 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4233 UInt size = 0;
4234 switch(e->Iex.Binop.op) {
4235 case Iop_Reverse8sIn32_x4: size = 0; break;
4236 case Iop_Reverse16sIn32_x4: size = 1; break;
4237 default: vassert(0);
4239 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4240 res, arg, size, True));
4241 return res;
4243 case Iop_Reverse8sIn16_x8: {
4244 HReg res = newVRegV(env);
4245 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4246 UInt size = 0;
4247 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4248 res, arg, size, True));
4249 return res;
4251 case Iop_CmpNEZ64x2: {
4252 HReg x_lsh = newVRegV(env);
4253 HReg x_rsh = newVRegV(env);
4254 HReg lsh_amt = newVRegV(env);
4255 HReg rsh_amt = newVRegV(env);
4256 HReg zero = newVRegV(env);
4257 HReg tmp = newVRegV(env);
4258 HReg tmp2 = newVRegV(env);
4259 HReg res = newVRegV(env);
4260 HReg x = newVRegV(env);
4261 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4262 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4263 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4264 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4265 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4266 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4267 rsh_amt, zero, lsh_amt, 2, True));
4268 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4269 x_lsh, x, lsh_amt, 3, True));
4270 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4271 x_rsh, x, rsh_amt, 3, True));
4272 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4273 tmp, x_lsh, x_rsh, 0, True));
4274 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4275 res, tmp, x, 0, True));
4276 return res;
4278 case Iop_CmpNEZ8x16:
4279 case Iop_CmpNEZ16x8:
4280 case Iop_CmpNEZ32x4: {
4281 HReg res = newVRegV(env);
4282 HReg tmp = newVRegV(env);
4283 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4284 UInt size;
4285 switch (e->Iex.Unop.op) {
4286 case Iop_CmpNEZ8x16: size = 0; break;
4287 case Iop_CmpNEZ16x8: size = 1; break;
4288 case Iop_CmpNEZ32x4: size = 2; break;
4289 default: vassert(0);
4291 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4292 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4293 return res;
4295 case Iop_Widen8Uto16x8:
4296 case Iop_Widen16Uto32x4:
4297 case Iop_Widen32Uto64x2: {
4298 HReg res = newVRegV(env);
4299 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4300 UInt size;
4301 switch (e->Iex.Unop.op) {
4302 case Iop_Widen8Uto16x8: size = 0; break;
4303 case Iop_Widen16Uto32x4: size = 1; break;
4304 case Iop_Widen32Uto64x2: size = 2; break;
4305 default: vassert(0);
4307 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4308 res, arg, size, True));
4309 return res;
4311 case Iop_Widen8Sto16x8:
4312 case Iop_Widen16Sto32x4:
4313 case Iop_Widen32Sto64x2: {
4314 HReg res = newVRegV(env);
4315 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4316 UInt size;
4317 switch (e->Iex.Unop.op) {
4318 case Iop_Widen8Sto16x8: size = 0; break;
4319 case Iop_Widen16Sto32x4: size = 1; break;
4320 case Iop_Widen32Sto64x2: size = 2; break;
4321 default: vassert(0);
4323 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4324 res, arg, size, True));
4325 return res;
4327 case Iop_PwAddL8Sx16:
4328 case Iop_PwAddL16Sx8:
4329 case Iop_PwAddL32Sx4: {
4330 HReg res = newVRegV(env);
4331 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4332 UInt size = 0;
4333 switch(e->Iex.Binop.op) {
4334 case Iop_PwAddL8Sx16: size = 0; break;
4335 case Iop_PwAddL16Sx8: size = 1; break;
4336 case Iop_PwAddL32Sx4: size = 2; break;
4337 default: vassert(0);
4339 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4340 res, arg, size, True));
4341 return res;
4343 case Iop_PwAddL8Ux16:
4344 case Iop_PwAddL16Ux8:
4345 case Iop_PwAddL32Ux4: {
4346 HReg res = newVRegV(env);
4347 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4348 UInt size = 0;
4349 switch(e->Iex.Binop.op) {
4350 case Iop_PwAddL8Ux16: size = 0; break;
4351 case Iop_PwAddL16Ux8: size = 1; break;
4352 case Iop_PwAddL32Ux4: size = 2; break;
4353 default: vassert(0);
4355 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4356 res, arg, size, True));
4357 return res;
4359 case Iop_Cnt8x16: {
4360 HReg res = newVRegV(env);
4361 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4362 UInt size = 0;
4363 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4364 return res;
4366 case Iop_Clz8x16:
4367 case Iop_Clz16x8:
4368 case Iop_Clz32x4: {
4369 HReg res = newVRegV(env);
4370 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4371 UInt size = 0;
4372 switch(e->Iex.Binop.op) {
4373 case Iop_Clz8x16: size = 0; break;
4374 case Iop_Clz16x8: size = 1; break;
4375 case Iop_Clz32x4: size = 2; break;
4376 default: vassert(0);
4378 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4379 return res;
4381 case Iop_Cls8x16:
4382 case Iop_Cls16x8:
4383 case Iop_Cls32x4: {
4384 HReg res = newVRegV(env);
4385 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4386 UInt size = 0;
4387 switch(e->Iex.Binop.op) {
4388 case Iop_Cls8x16: size = 0; break;
4389 case Iop_Cls16x8: size = 1; break;
4390 case Iop_Cls32x4: size = 2; break;
4391 default: vassert(0);
4393 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4394 return res;
4396 case Iop_F32toI32Sx4_RZ: {
4397 HReg res = newVRegV(env);
4398 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4399 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4400 res, arg, 2, True));
4401 return res;
4403 case Iop_F32toI32Ux4_RZ: {
4404 HReg res = newVRegV(env);
4405 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4406 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4407 res, arg, 2, True));
4408 return res;
4410 case Iop_I32StoF32x4_DEP: {
4411 HReg res = newVRegV(env);
4412 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4413 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4414 res, arg, 2, True));
4415 return res;
4417 case Iop_I32UtoF32x4_DEP: {
4418 HReg res = newVRegV(env);
4419 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4420 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4421 res, arg, 2, True));
4422 return res;
4424 case Iop_F16toF32x4: {
4425 HReg res = newVRegV(env);
4426 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4427 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4428 res, arg, 2, True));
4429 return res;
4431 case Iop_RecipEst32Fx4: {
4432 HReg res = newVRegV(env);
4433 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4434 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4435 res, argL, 0, True));
4436 return res;
4438 case Iop_RecipEst32Ux4: {
4439 HReg res = newVRegV(env);
4440 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4441 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4442 res, argL, 0, True));
4443 return res;
4445 case Iop_Abs32Fx4: {
4446 HReg res = newVRegV(env);
4447 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4448 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4449 res, argL, 0, True));
4450 return res;
4452 case Iop_RSqrtEst32Fx4: {
4453 HReg res = newVRegV(env);
4454 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4455 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4456 res, argL, 0, True));
4457 return res;
4459 case Iop_RSqrtEst32Ux4: {
4460 HReg res = newVRegV(env);
4461 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4462 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4463 res, argL, 0, True));
4464 return res;
4466 case Iop_Neg32Fx4: {
4467 HReg res = newVRegV(env);
4468 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4469 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4470 res, arg, 0, True));
4471 return res;
4473 /* ... */
4474 default:
4475 break;
4479 if (e->tag == Iex_Binop) {
4480 switch (e->Iex.Binop.op) {
4481 case Iop_64HLtoV128: {
4482 /* Try to match into single "VMOV reg, imm" instruction */
4483 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4484 e->Iex.Binop.arg2->tag == Iex_Const &&
4485 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4486 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4487 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4488 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4489 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4490 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4491 if (imm) {
4492 HReg res = newVRegV(env);
4493 addInstr(env, ARMInstr_NeonImm(res, imm));
4494 return res;
4496 if ((imm64 >> 32) == 0LL &&
4497 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4498 HReg tmp1 = newVRegV(env);
4499 HReg tmp2 = newVRegV(env);
4500 HReg res = newVRegV(env);
4501 if (imm->type < 10) {
4502 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4503 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4504 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4505 res, tmp1, tmp2, 4, True));
4506 return res;
4509 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4510 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4511 HReg tmp1 = newVRegV(env);
4512 HReg tmp2 = newVRegV(env);
4513 HReg res = newVRegV(env);
4514 if (imm->type < 10) {
4515 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4516 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4517 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4518 res, tmp1, tmp2, 4, True));
4519 return res;
4523 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4524 it the slow way. */
4525 HReg dHi = iselNeon64Expr(env, e->Iex.Binop.arg1);
4526 HReg dLo = iselNeon64Expr(env, e->Iex.Binop.arg2);
4527 HReg res = newVRegV(env);
4528 addInstr(env, ARMInstr_VXferQ(True/*toQ*/, res, dHi, dLo));
4529 return res;
4531 case Iop_AndV128: {
4532 HReg res = newVRegV(env);
4533 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4534 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4535 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4536 res, argL, argR, 4, True));
4537 return res;
4539 case Iop_OrV128: {
4540 HReg res = newVRegV(env);
4541 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4542 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4543 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4544 res, argL, argR, 4, True));
4545 return res;
4547 case Iop_XorV128: {
4548 HReg res = newVRegV(env);
4549 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4550 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4551 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4552 res, argL, argR, 4, True));
4553 return res;
4555 case Iop_Add8x16:
4556 case Iop_Add16x8:
4557 case Iop_Add32x4:
4558 case Iop_Add64x2: {
4560 FIXME: remove this if not used
4561 DECLARE_PATTERN(p_vrhadd_32sx4);
4562 ULong one = (1LL << 32) | 1LL;
4563 DEFINE_PATTERN(p_vrhadd_32sx4,
4564 binop(Iop_Add32x4,
4565 binop(Iop_Add32x4,
4566 binop(Iop_SarN32x4,
4567 bind(0),
4568 mkU8(1)),
4569 binop(Iop_SarN32x4,
4570 bind(1),
4571 mkU8(1))),
4572 binop(Iop_SarN32x4,
4573 binop(Iop_Add32x4,
4574 binop(Iop_Add32x4,
4575 binop(Iop_AndV128,
4576 bind(0),
4577 mkU128(one)),
4578 binop(Iop_AndV128,
4579 bind(1),
4580 mkU128(one))),
4581 mkU128(one)),
4582 mkU8(1))));
4584 HReg res = newVRegV(env);
4585 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4586 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4587 UInt size;
4588 switch (e->Iex.Binop.op) {
4589 case Iop_Add8x16: size = 0; break;
4590 case Iop_Add16x8: size = 1; break;
4591 case Iop_Add32x4: size = 2; break;
4592 case Iop_Add64x2: size = 3; break;
4593 default:
4594 ppIROp(e->Iex.Binop.op);
4595 vpanic("Illegal element size in VADD");
4597 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4598 res, argL, argR, size, True));
4599 return res;
4601 case Iop_RecipStep32Fx4: {
4602 HReg res = newVRegV(env);
4603 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4604 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4605 UInt size = 0;
4606 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4607 res, argL, argR, size, True));
4608 return res;
4610 case Iop_RSqrtStep32Fx4: {
4611 HReg res = newVRegV(env);
4612 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4613 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4614 UInt size = 0;
4615 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4616 res, argL, argR, size, True));
4617 return res;
4620 // These 6 verified 18 Apr 2013
4621 case Iop_InterleaveEvenLanes8x16:
4622 case Iop_InterleaveOddLanes8x16:
4623 case Iop_InterleaveEvenLanes16x8:
4624 case Iop_InterleaveOddLanes16x8:
4625 case Iop_InterleaveEvenLanes32x4:
4626 case Iop_InterleaveOddLanes32x4: {
4627 HReg rD = newVRegV(env);
4628 HReg rM = newVRegV(env);
4629 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4630 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4631 UInt size;
4632 Bool resRd; // is the result in rD or rM ?
4633 switch (e->Iex.Binop.op) {
4634 case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break;
4635 case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break;
4636 case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break;
4637 case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break;
4638 case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break;
4639 case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break;
4640 default: vassert(0);
4642 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4643 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4644 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4645 return resRd ? rD : rM;
4648 // These 6 verified 18 Apr 2013
4649 case Iop_InterleaveHI8x16:
4650 case Iop_InterleaveLO8x16:
4651 case Iop_InterleaveHI16x8:
4652 case Iop_InterleaveLO16x8:
4653 case Iop_InterleaveHI32x4:
4654 case Iop_InterleaveLO32x4: {
4655 HReg rD = newVRegV(env);
4656 HReg rM = newVRegV(env);
4657 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4658 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4659 UInt size;
4660 Bool resRd; // is the result in rD or rM ?
4661 switch (e->Iex.Binop.op) {
4662 case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4663 case Iop_InterleaveLO8x16: resRd = True; size = 0; break;
4664 case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4665 case Iop_InterleaveLO16x8: resRd = True; size = 1; break;
4666 case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4667 case Iop_InterleaveLO32x4: resRd = True; size = 2; break;
4668 default: vassert(0);
4670 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4671 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4672 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4673 return resRd ? rD : rM;
4676 // These 6 verified 18 Apr 2013
4677 case Iop_CatOddLanes8x16:
4678 case Iop_CatEvenLanes8x16:
4679 case Iop_CatOddLanes16x8:
4680 case Iop_CatEvenLanes16x8:
4681 case Iop_CatOddLanes32x4:
4682 case Iop_CatEvenLanes32x4: {
4683 HReg rD = newVRegV(env);
4684 HReg rM = newVRegV(env);
4685 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4686 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4687 UInt size;
4688 Bool resRd; // is the result in rD or rM ?
4689 switch (e->Iex.Binop.op) {
4690 case Iop_CatOddLanes8x16: resRd = False; size = 0; break;
4691 case Iop_CatEvenLanes8x16: resRd = True; size = 0; break;
4692 case Iop_CatOddLanes16x8: resRd = False; size = 1; break;
4693 case Iop_CatEvenLanes16x8: resRd = True; size = 1; break;
4694 case Iop_CatOddLanes32x4: resRd = False; size = 2; break;
4695 case Iop_CatEvenLanes32x4: resRd = True; size = 2; break;
4696 default: vassert(0);
4698 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4699 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4700 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
4701 return resRd ? rD : rM;
4704 case Iop_QAdd8Ux16:
4705 case Iop_QAdd16Ux8:
4706 case Iop_QAdd32Ux4:
4707 case Iop_QAdd64Ux2: {
4708 HReg res = newVRegV(env);
4709 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4710 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4711 UInt size;
4712 switch (e->Iex.Binop.op) {
4713 case Iop_QAdd8Ux16: size = 0; break;
4714 case Iop_QAdd16Ux8: size = 1; break;
4715 case Iop_QAdd32Ux4: size = 2; break;
4716 case Iop_QAdd64Ux2: size = 3; break;
4717 default:
4718 ppIROp(e->Iex.Binop.op);
4719 vpanic("Illegal element size in VQADDU");
4721 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4722 res, argL, argR, size, True));
4723 return res;
4725 case Iop_QAdd8Sx16:
4726 case Iop_QAdd16Sx8:
4727 case Iop_QAdd32Sx4:
4728 case Iop_QAdd64Sx2: {
4729 HReg res = newVRegV(env);
4730 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4731 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4732 UInt size;
4733 switch (e->Iex.Binop.op) {
4734 case Iop_QAdd8Sx16: size = 0; break;
4735 case Iop_QAdd16Sx8: size = 1; break;
4736 case Iop_QAdd32Sx4: size = 2; break;
4737 case Iop_QAdd64Sx2: size = 3; break;
4738 default:
4739 ppIROp(e->Iex.Binop.op);
4740 vpanic("Illegal element size in VQADDS");
4742 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4743 res, argL, argR, size, True));
4744 return res;
4746 case Iop_Sub8x16:
4747 case Iop_Sub16x8:
4748 case Iop_Sub32x4:
4749 case Iop_Sub64x2: {
4750 HReg res = newVRegV(env);
4751 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4752 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4753 UInt size;
4754 switch (e->Iex.Binop.op) {
4755 case Iop_Sub8x16: size = 0; break;
4756 case Iop_Sub16x8: size = 1; break;
4757 case Iop_Sub32x4: size = 2; break;
4758 case Iop_Sub64x2: size = 3; break;
4759 default:
4760 ppIROp(e->Iex.Binop.op);
4761 vpanic("Illegal element size in VSUB");
4763 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4764 res, argL, argR, size, True));
4765 return res;
4767 case Iop_QSub8Ux16:
4768 case Iop_QSub16Ux8:
4769 case Iop_QSub32Ux4:
4770 case Iop_QSub64Ux2: {
4771 HReg res = newVRegV(env);
4772 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4773 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4774 UInt size;
4775 switch (e->Iex.Binop.op) {
4776 case Iop_QSub8Ux16: size = 0; break;
4777 case Iop_QSub16Ux8: size = 1; break;
4778 case Iop_QSub32Ux4: size = 2; break;
4779 case Iop_QSub64Ux2: size = 3; break;
4780 default:
4781 ppIROp(e->Iex.Binop.op);
4782 vpanic("Illegal element size in VQSUBU");
4784 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4785 res, argL, argR, size, True));
4786 return res;
4788 case Iop_QSub8Sx16:
4789 case Iop_QSub16Sx8:
4790 case Iop_QSub32Sx4:
4791 case Iop_QSub64Sx2: {
4792 HReg res = newVRegV(env);
4793 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4794 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4795 UInt size;
4796 switch (e->Iex.Binop.op) {
4797 case Iop_QSub8Sx16: size = 0; break;
4798 case Iop_QSub16Sx8: size = 1; break;
4799 case Iop_QSub32Sx4: size = 2; break;
4800 case Iop_QSub64Sx2: size = 3; break;
4801 default:
4802 ppIROp(e->Iex.Binop.op);
4803 vpanic("Illegal element size in VQSUBS");
4805 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4806 res, argL, argR, size, True));
4807 return res;
4809 case Iop_Max8Ux16:
4810 case Iop_Max16Ux8:
4811 case Iop_Max32Ux4: {
4812 HReg res = newVRegV(env);
4813 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4814 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4815 UInt size;
4816 switch (e->Iex.Binop.op) {
4817 case Iop_Max8Ux16: size = 0; break;
4818 case Iop_Max16Ux8: size = 1; break;
4819 case Iop_Max32Ux4: size = 2; break;
4820 default: vpanic("Illegal element size in VMAXU");
4822 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4823 res, argL, argR, size, True));
4824 return res;
4826 case Iop_Max8Sx16:
4827 case Iop_Max16Sx8:
4828 case Iop_Max32Sx4: {
4829 HReg res = newVRegV(env);
4830 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4831 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4832 UInt size;
4833 switch (e->Iex.Binop.op) {
4834 case Iop_Max8Sx16: size = 0; break;
4835 case Iop_Max16Sx8: size = 1; break;
4836 case Iop_Max32Sx4: size = 2; break;
4837 default: vpanic("Illegal element size in VMAXU");
4839 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4840 res, argL, argR, size, True));
4841 return res;
4843 case Iop_Min8Ux16:
4844 case Iop_Min16Ux8:
4845 case Iop_Min32Ux4: {
4846 HReg res = newVRegV(env);
4847 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4848 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4849 UInt size;
4850 switch (e->Iex.Binop.op) {
4851 case Iop_Min8Ux16: size = 0; break;
4852 case Iop_Min16Ux8: size = 1; break;
4853 case Iop_Min32Ux4: size = 2; break;
4854 default: vpanic("Illegal element size in VMAXU");
4856 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4857 res, argL, argR, size, True));
4858 return res;
4860 case Iop_Min8Sx16:
4861 case Iop_Min16Sx8:
4862 case Iop_Min32Sx4: {
4863 HReg res = newVRegV(env);
4864 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4865 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4866 UInt size;
4867 switch (e->Iex.Binop.op) {
4868 case Iop_Min8Sx16: size = 0; break;
4869 case Iop_Min16Sx8: size = 1; break;
4870 case Iop_Min32Sx4: size = 2; break;
4871 default: vpanic("Illegal element size in VMAXU");
4873 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4874 res, argL, argR, size, True));
4875 return res;
4877 case Iop_Sar8x16:
4878 case Iop_Sar16x8:
4879 case Iop_Sar32x4:
4880 case Iop_Sar64x2: {
4881 HReg res = newVRegV(env);
4882 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4883 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4884 HReg argR2 = newVRegV(env);
4885 HReg zero = newVRegV(env);
4886 UInt size;
4887 switch (e->Iex.Binop.op) {
4888 case Iop_Sar8x16: size = 0; break;
4889 case Iop_Sar16x8: size = 1; break;
4890 case Iop_Sar32x4: size = 2; break;
4891 case Iop_Sar64x2: size = 3; break;
4892 default: vassert(0);
4894 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4895 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4896 argR2, zero, argR, size, True));
4897 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4898 res, argL, argR2, size, True));
4899 return res;
4901 case Iop_Sal8x16:
4902 case Iop_Sal16x8:
4903 case Iop_Sal32x4:
4904 case Iop_Sal64x2: {
4905 HReg res = newVRegV(env);
4906 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4907 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4908 UInt size;
4909 switch (e->Iex.Binop.op) {
4910 case Iop_Sal8x16: size = 0; break;
4911 case Iop_Sal16x8: size = 1; break;
4912 case Iop_Sal32x4: size = 2; break;
4913 case Iop_Sal64x2: size = 3; break;
4914 default: vassert(0);
4916 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4917 res, argL, argR, size, True));
4918 return res;
4920 case Iop_Shr8x16:
4921 case Iop_Shr16x8:
4922 case Iop_Shr32x4:
4923 case Iop_Shr64x2: {
4924 HReg res = newVRegV(env);
4925 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4926 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4927 HReg argR2 = newVRegV(env);
4928 HReg zero = newVRegV(env);
4929 UInt size;
4930 switch (e->Iex.Binop.op) {
4931 case Iop_Shr8x16: size = 0; break;
4932 case Iop_Shr16x8: size = 1; break;
4933 case Iop_Shr32x4: size = 2; break;
4934 case Iop_Shr64x2: size = 3; break;
4935 default: vassert(0);
4937 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4938 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4939 argR2, zero, argR, size, True));
4940 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4941 res, argL, argR2, size, True));
4942 return res;
4944 case Iop_Shl8x16:
4945 case Iop_Shl16x8:
4946 case Iop_Shl32x4:
4947 case Iop_Shl64x2: {
4948 HReg res = newVRegV(env);
4949 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4950 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4951 UInt size;
4952 switch (e->Iex.Binop.op) {
4953 case Iop_Shl8x16: size = 0; break;
4954 case Iop_Shl16x8: size = 1; break;
4955 case Iop_Shl32x4: size = 2; break;
4956 case Iop_Shl64x2: size = 3; break;
4957 default: vassert(0);
4959 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4960 res, argL, argR, size, True));
4961 return res;
4963 case Iop_QShl8x16:
4964 case Iop_QShl16x8:
4965 case Iop_QShl32x4:
4966 case Iop_QShl64x2: {
4967 HReg res = newVRegV(env);
4968 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4969 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4970 UInt size;
4971 switch (e->Iex.Binop.op) {
4972 case Iop_QShl8x16: size = 0; break;
4973 case Iop_QShl16x8: size = 1; break;
4974 case Iop_QShl32x4: size = 2; break;
4975 case Iop_QShl64x2: size = 3; break;
4976 default: vassert(0);
4978 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4979 res, argL, argR, size, True));
4980 return res;
4982 case Iop_QSal8x16:
4983 case Iop_QSal16x8:
4984 case Iop_QSal32x4:
4985 case Iop_QSal64x2: {
4986 HReg res = newVRegV(env);
4987 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4988 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4989 UInt size;
4990 switch (e->Iex.Binop.op) {
4991 case Iop_QSal8x16: size = 0; break;
4992 case Iop_QSal16x8: size = 1; break;
4993 case Iop_QSal32x4: size = 2; break;
4994 case Iop_QSal64x2: size = 3; break;
4995 default: vassert(0);
4997 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4998 res, argL, argR, size, True));
4999 return res;
5001 case Iop_QShlNsatUU8x16:
5002 case Iop_QShlNsatUU16x8:
5003 case Iop_QShlNsatUU32x4:
5004 case Iop_QShlNsatUU64x2: {
5005 HReg res = newVRegV(env);
5006 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5007 UInt size, imm;
5008 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5009 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5010 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
5011 "second argument only\n");
5013 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5014 switch (e->Iex.Binop.op) {
5015 case Iop_QShlNsatUU8x16: size = 8 | imm; break;
5016 case Iop_QShlNsatUU16x8: size = 16 | imm; break;
5017 case Iop_QShlNsatUU32x4: size = 32 | imm; break;
5018 case Iop_QShlNsatUU64x2: size = 64 | imm; break;
5019 default: vassert(0);
5021 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
5022 res, argL, size, True));
5023 return res;
5025 case Iop_QShlNsatSU8x16:
5026 case Iop_QShlNsatSU16x8:
5027 case Iop_QShlNsatSU32x4:
5028 case Iop_QShlNsatSU64x2: {
5029 HReg res = newVRegV(env);
5030 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5031 UInt size, imm;
5032 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5033 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5034 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
5035 "second argument only\n");
5037 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5038 switch (e->Iex.Binop.op) {
5039 case Iop_QShlNsatSU8x16: size = 8 | imm; break;
5040 case Iop_QShlNsatSU16x8: size = 16 | imm; break;
5041 case Iop_QShlNsatSU32x4: size = 32 | imm; break;
5042 case Iop_QShlNsatSU64x2: size = 64 | imm; break;
5043 default: vassert(0);
5045 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
5046 res, argL, size, True));
5047 return res;
5049 case Iop_QShlNsatSS8x16:
5050 case Iop_QShlNsatSS16x8:
5051 case Iop_QShlNsatSS32x4:
5052 case Iop_QShlNsatSS64x2: {
5053 HReg res = newVRegV(env);
5054 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5055 UInt size, imm;
5056 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5057 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5058 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
5059 "second argument only\n");
5061 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5062 switch (e->Iex.Binop.op) {
5063 case Iop_QShlNsatSS8x16: size = 8 | imm; break;
5064 case Iop_QShlNsatSS16x8: size = 16 | imm; break;
5065 case Iop_QShlNsatSS32x4: size = 32 | imm; break;
5066 case Iop_QShlNsatSS64x2: size = 64 | imm; break;
5067 default: vassert(0);
5069 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
5070 res, argL, size, True));
5071 return res;
5073 case Iop_ShrN8x16:
5074 case Iop_ShrN16x8:
5075 case Iop_ShrN32x4:
5076 case Iop_ShrN64x2: {
5077 HReg res = newVRegV(env);
5078 HReg tmp = newVRegV(env);
5079 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5080 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5081 HReg argR2 = newVRegI(env);
5082 UInt size;
5083 switch (e->Iex.Binop.op) {
5084 case Iop_ShrN8x16: size = 0; break;
5085 case Iop_ShrN16x8: size = 1; break;
5086 case Iop_ShrN32x4: size = 2; break;
5087 case Iop_ShrN64x2: size = 3; break;
5088 default: vassert(0);
5090 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5091 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
5092 tmp, argR2, 0, True));
5093 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5094 res, argL, tmp, size, True));
5095 return res;
5097 case Iop_ShlN8x16:
5098 case Iop_ShlN16x8:
5099 case Iop_ShlN32x4:
5100 case Iop_ShlN64x2: {
5101 HReg res = newVRegV(env);
5102 HReg tmp = newVRegV(env);
5103 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5104 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5105 UInt size;
5106 switch (e->Iex.Binop.op) {
5107 case Iop_ShlN8x16: size = 0; break;
5108 case Iop_ShlN16x8: size = 1; break;
5109 case Iop_ShlN32x4: size = 2; break;
5110 case Iop_ShlN64x2: size = 3; break;
5111 default: vassert(0);
5113 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
5114 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5115 res, argL, tmp, size, True));
5116 return res;
5118 case Iop_SarN8x16:
5119 case Iop_SarN16x8:
5120 case Iop_SarN32x4:
5121 case Iop_SarN64x2: {
5122 HReg res = newVRegV(env);
5123 HReg tmp = newVRegV(env);
5124 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5125 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5126 HReg argR2 = newVRegI(env);
5127 UInt size;
5128 switch (e->Iex.Binop.op) {
5129 case Iop_SarN8x16: size = 0; break;
5130 case Iop_SarN16x8: size = 1; break;
5131 case Iop_SarN32x4: size = 2; break;
5132 case Iop_SarN64x2: size = 3; break;
5133 default: vassert(0);
5135 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5136 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
5137 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5138 res, argL, tmp, size, True));
5139 return res;
5141 case Iop_CmpGT8Ux16:
5142 case Iop_CmpGT16Ux8:
5143 case Iop_CmpGT32Ux4: {
5144 HReg res = newVRegV(env);
5145 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5146 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5147 UInt size;
5148 switch (e->Iex.Binop.op) {
5149 case Iop_CmpGT8Ux16: size = 0; break;
5150 case Iop_CmpGT16Ux8: size = 1; break;
5151 case Iop_CmpGT32Ux4: size = 2; break;
5152 default: vassert(0);
5154 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5155 res, argL, argR, size, True));
5156 return res;
5158 case Iop_CmpGT8Sx16:
5159 case Iop_CmpGT16Sx8:
5160 case Iop_CmpGT32Sx4: {
5161 HReg res = newVRegV(env);
5162 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5163 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5164 UInt size;
5165 switch (e->Iex.Binop.op) {
5166 case Iop_CmpGT8Sx16: size = 0; break;
5167 case Iop_CmpGT16Sx8: size = 1; break;
5168 case Iop_CmpGT32Sx4: size = 2; break;
5169 default: vassert(0);
5171 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5172 res, argL, argR, size, True));
5173 return res;
5175 case Iop_CmpEQ8x16:
5176 case Iop_CmpEQ16x8:
5177 case Iop_CmpEQ32x4: {
5178 HReg res = newVRegV(env);
5179 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5180 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5181 UInt size;
5182 switch (e->Iex.Binop.op) {
5183 case Iop_CmpEQ8x16: size = 0; break;
5184 case Iop_CmpEQ16x8: size = 1; break;
5185 case Iop_CmpEQ32x4: size = 2; break;
5186 default: vassert(0);
5188 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5189 res, argL, argR, size, True));
5190 return res;
5192 case Iop_Mul8x16:
5193 case Iop_Mul16x8:
5194 case Iop_Mul32x4: {
5195 HReg res = newVRegV(env);
5196 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5197 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5198 UInt size = 0;
5199 switch(e->Iex.Binop.op) {
5200 case Iop_Mul8x16: size = 0; break;
5201 case Iop_Mul16x8: size = 1; break;
5202 case Iop_Mul32x4: size = 2; break;
5203 default: vassert(0);
5205 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5206 res, argL, argR, size, True));
5207 return res;
5209 case Iop_Mull8Ux8:
5210 case Iop_Mull16Ux4:
5211 case Iop_Mull32Ux2: {
5212 HReg res = newVRegV(env);
5213 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5214 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5215 UInt size = 0;
5216 switch(e->Iex.Binop.op) {
5217 case Iop_Mull8Ux8: size = 0; break;
5218 case Iop_Mull16Ux4: size = 1; break;
5219 case Iop_Mull32Ux2: size = 2; break;
5220 default: vassert(0);
5222 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5223 res, argL, argR, size, True));
5224 return res;
5227 case Iop_Mull8Sx8:
5228 case Iop_Mull16Sx4:
5229 case Iop_Mull32Sx2: {
5230 HReg res = newVRegV(env);
5231 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5232 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5233 UInt size = 0;
5234 switch(e->Iex.Binop.op) {
5235 case Iop_Mull8Sx8: size = 0; break;
5236 case Iop_Mull16Sx4: size = 1; break;
5237 case Iop_Mull32Sx2: size = 2; break;
5238 default: vassert(0);
5240 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5241 res, argL, argR, size, True));
5242 return res;
5245 case Iop_QDMulHi16Sx8:
5246 case Iop_QDMulHi32Sx4: {
5247 HReg res = newVRegV(env);
5248 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5249 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5250 UInt size = 0;
5251 switch(e->Iex.Binop.op) {
5252 case Iop_QDMulHi16Sx8: size = 1; break;
5253 case Iop_QDMulHi32Sx4: size = 2; break;
5254 default: vassert(0);
5256 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5257 res, argL, argR, size, True));
5258 return res;
5261 case Iop_QRDMulHi16Sx8:
5262 case Iop_QRDMulHi32Sx4: {
5263 HReg res = newVRegV(env);
5264 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5265 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5266 UInt size = 0;
5267 switch(e->Iex.Binop.op) {
5268 case Iop_QRDMulHi16Sx8: size = 1; break;
5269 case Iop_QRDMulHi32Sx4: size = 2; break;
5270 default: vassert(0);
5272 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5273 res, argL, argR, size, True));
5274 return res;
5277 case Iop_QDMull16Sx4:
5278 case Iop_QDMull32Sx2: {
5279 HReg res = newVRegV(env);
5280 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5281 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5282 UInt size = 0;
5283 switch(e->Iex.Binop.op) {
5284 case Iop_QDMull16Sx4: size = 1; break;
5285 case Iop_QDMull32Sx2: size = 2; break;
5286 default: vassert(0);
5288 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5289 res, argL, argR, size, True));
5290 return res;
5292 case Iop_PolynomialMul8x16: {
5293 HReg res = newVRegV(env);
5294 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5295 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5296 UInt size = 0;
5297 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5298 res, argL, argR, size, True));
5299 return res;
5301 case Iop_Max32Fx4: {
5302 HReg res = newVRegV(env);
5303 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5304 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5305 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5306 res, argL, argR, 2, True));
5307 return res;
5309 case Iop_Min32Fx4: {
5310 HReg res = newVRegV(env);
5311 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5312 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5313 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5314 res, argL, argR, 2, True));
5315 return res;
5317 case Iop_PwMax32Fx4: {
5318 HReg res = newVRegV(env);
5319 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5320 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5321 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5322 res, argL, argR, 2, True));
5323 return res;
5325 case Iop_PwMin32Fx4: {
5326 HReg res = newVRegV(env);
5327 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5328 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5329 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5330 res, argL, argR, 2, True));
5331 return res;
5333 case Iop_CmpGT32Fx4: {
5334 HReg res = newVRegV(env);
5335 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5336 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5337 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5338 res, argL, argR, 2, True));
5339 return res;
5341 case Iop_CmpGE32Fx4: {
5342 HReg res = newVRegV(env);
5343 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5344 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5345 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5346 res, argL, argR, 2, True));
5347 return res;
5349 case Iop_CmpEQ32Fx4: {
5350 HReg res = newVRegV(env);
5351 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5352 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5353 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5354 res, argL, argR, 2, True));
5355 return res;
5358 case Iop_PolynomialMull8x8: {
5359 HReg res = newVRegV(env);
5360 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5361 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5362 UInt size = 0;
5363 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5364 res, argL, argR, size, True));
5365 return res;
5367 case Iop_F32ToFixed32Ux4_RZ:
5368 case Iop_F32ToFixed32Sx4_RZ:
5369 case Iop_Fixed32UToF32x4_RN:
5370 case Iop_Fixed32SToF32x4_RN: {
5371 HReg res = newVRegV(env);
5372 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5373 ARMNeonUnOp op;
5374 UInt imm6;
5375 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5376 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5377 vpanic("ARM supports FP <-> Fixed conversion with constant "
5378 "second argument less than 33 only\n");
5380 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5381 vassert(imm6 <= 32 && imm6 > 0);
5382 imm6 = 64 - imm6;
5383 switch(e->Iex.Binop.op) {
5384 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5385 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5386 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5387 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5388 default: vassert(0);
5390 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5391 return res;
5394 FIXME remove if not used
5395 case Iop_VDup8x16:
5396 case Iop_VDup16x8:
5397 case Iop_VDup32x4: {
5398 HReg res = newVRegV(env);
5399 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5400 UInt imm4;
5401 UInt index;
5402 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5403 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5404 vpanic("ARM supports Iop_VDup with constant "
5405 "second argument less than 16 only\n");
5407 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5408 switch(e->Iex.Binop.op) {
5409 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5410 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5411 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5412 default: vassert(0);
5414 if (imm4 >= 16) {
5415 vpanic("ARM supports Iop_VDup with constant "
5416 "second argument less than 16 only\n");
5418 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5419 res, argL, imm4, True));
5420 return res;
5423 case Iop_PwAdd8x16:
5424 case Iop_PwAdd16x8:
5425 case Iop_PwAdd32x4: {
5426 HReg res = newVRegV(env);
5427 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5428 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5429 UInt size = 0;
5430 switch(e->Iex.Binop.op) {
5431 case Iop_PwAdd8x16: size = 0; break;
5432 case Iop_PwAdd16x8: size = 1; break;
5433 case Iop_PwAdd32x4: size = 2; break;
5434 default: vassert(0);
5436 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5437 res, argL, argR, size, True));
5438 return res;
5440 /* ... */
5441 default:
5442 break;
5446 if (e->tag == Iex_Triop) {
5447 IRTriop *triop = e->Iex.Triop.details;
5449 switch (triop->op) {
5450 case Iop_SliceV128: {
5451 HReg res = newVRegV(env);
5452 HReg argL = iselNeonExpr(env, triop->arg2);
5453 HReg argR = iselNeonExpr(env, triop->arg1);
5454 UInt imm4;
5455 if (triop->arg3->tag != Iex_Const ||
5456 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5457 vpanic("ARM target supports Iop_ExtractV128 with constant "
5458 "third argument less than 16 only\n");
5460 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5461 if (imm4 >= 16) {
5462 vpanic("ARM target supports Iop_ExtractV128 with constant "
5463 "third argument less than 16 only\n");
5465 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5466 res, argL, argR, imm4, True));
5467 return res;
5469 case Iop_Mul32Fx4:
5470 case Iop_Sub32Fx4:
5471 case Iop_Add32Fx4: {
5472 HReg res = newVRegV(env);
5473 HReg argL = iselNeonExpr(env, triop->arg2);
5474 HReg argR = iselNeonExpr(env, triop->arg3);
5475 UInt size = 0;
5476 ARMNeonBinOp op = ARMneon_INVALID;
5477 switch (triop->op) {
5478 case Iop_Mul32Fx4: op = ARMneon_VMULFP; break;
5479 case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break;
5480 case Iop_Add32Fx4: op = ARMneon_VADDFP; break;
5481 default: vassert(0);
5483 addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True));
5484 return res;
5486 default:
5487 break;
5491 if (e->tag == Iex_ITE) { // VFD
5492 ARMCondCode cc;
5493 HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue);
5494 HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse);
5495 HReg dst = newVRegV(env);
5496 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5497 cc = iselCondCode(env, e->Iex.ITE.cond);
5498 addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5499 return dst;
5502 /* neon_expr_bad: */
5503 ppIRExpr(e);
5504 vpanic("iselNeonExpr_wrk");
5507 /*---------------------------------------------------------*/
5508 /*--- ISEL: Floating point expressions (64 bit) ---*/
5509 /*---------------------------------------------------------*/
5511 /* Compute a 64-bit floating point value into a register, the identity
5512 of which is returned. As with iselIntExpr_R, the reg may be either
5513 real or virtual; in any case it must not be changed by subsequent
5514 code emitted by the caller. */
5516 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5518 HReg r = iselDblExpr_wrk( env, e );
5519 # if 0
5520 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5521 # endif
5522 vassert(hregClass(r) == HRcFlt64);
5523 vassert(hregIsVirtual(r));
5524 return r;
5527 /* DO NOT CALL THIS DIRECTLY */
5528 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5530 IRType ty = typeOfIRExpr(env->type_env,e);
5531 vassert(e);
5532 vassert(ty == Ity_F64);
5534 if (e->tag == Iex_RdTmp) {
5535 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5538 if (e->tag == Iex_Const) {
5539 /* Just handle the zero case. */
5540 IRConst* con = e->Iex.Const.con;
5541 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5542 HReg z32 = newVRegI(env);
5543 HReg dst = newVRegD(env);
5544 addInstr(env, ARMInstr_Imm32(z32, 0));
5545 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5546 return dst;
5550 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5551 ARMAModeV* am;
5552 HReg res = newVRegD(env);
5553 vassert(e->Iex.Load.ty == Ity_F64);
5554 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5555 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5556 return res;
5559 if (e->tag == Iex_Get) {
5560 // XXX This won't work if offset > 1020 or is not 0 % 4.
5561 // In which case we'll have to generate more longwinded code.
5562 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5563 HReg res = newVRegD(env);
5564 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5565 return res;
5568 if (e->tag == Iex_Unop) {
5569 switch (e->Iex.Unop.op) {
5570 case Iop_ReinterpI64asF64: {
5571 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5572 return iselNeon64Expr(env, e->Iex.Unop.arg);
5573 } else {
5574 HReg srcHi, srcLo;
5575 HReg dst = newVRegD(env);
5576 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5577 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5578 return dst;
5581 case Iop_NegF64: {
5582 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5583 HReg dst = newVRegD(env);
5584 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5585 return dst;
5587 case Iop_AbsF64: {
5588 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5589 HReg dst = newVRegD(env);
5590 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5591 return dst;
5593 case Iop_F32toF64: {
5594 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5595 HReg dst = newVRegD(env);
5596 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5597 return dst;
5599 case Iop_I32UtoF64:
5600 case Iop_I32StoF64: {
5601 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5602 HReg f32 = newVRegF(env);
5603 HReg dst = newVRegD(env);
5604 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5605 /* VMOV f32, src */
5606 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5607 /* FSITOD dst, f32 */
5608 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5609 dst, f32));
5610 return dst;
5612 default:
5613 break;
5617 if (e->tag == Iex_Binop) {
5618 switch (e->Iex.Binop.op) {
5619 case Iop_SqrtF64: {
5620 /* first arg is rounding mode; we ignore it. */
5621 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5622 HReg dst = newVRegD(env);
5623 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5624 return dst;
5626 case Iop_RoundF64toInt: {
5627 /* We can only generate this on a >= V8 capable target. But
5628 that's OK since we should only be asked to generate for V8
5629 capable guests, and we assume here that host == guest. */
5630 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5631 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5632 HReg dst = newVRegD(env);
5633 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5634 addInstr(env, ARMInstr_VRIntR(True/*isF64*/, dst, src));
5635 set_VFP_rounding_default(env);
5636 return dst;
5638 /* not a V8 target, so we can't select insns for this. */
5639 break;
5641 case Iop_MaxNumF64:
5642 case Iop_MinNumF64: {
5643 /* Same comments regarding V8 support as for Iop_RoundF64toInt. */
5644 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5645 HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
5646 HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
5647 HReg dst = newVRegD(env);
5648 Bool isMax = e->Iex.Binop.op == Iop_MaxNumF64;
5649 addInstr(env, ARMInstr_VMinMaxNum(
5650 True/*isF64*/, isMax, dst, srcL, srcR));
5651 return dst;
5653 /* not a V8 target, so we can't select insns for this. */
5654 break;
5656 default:
5657 break;
5661 if (e->tag == Iex_Triop) {
5662 IRTriop *triop = e->Iex.Triop.details;
5664 switch (triop->op) {
5665 case Iop_DivF64:
5666 case Iop_MulF64:
5667 case Iop_AddF64:
5668 case Iop_SubF64: {
5669 ARMVfpOp op = 0; /*INVALID*/
5670 HReg argL = iselDblExpr(env, triop->arg2);
5671 HReg argR = iselDblExpr(env, triop->arg3);
5672 HReg dst = newVRegD(env);
5673 switch (triop->op) {
5674 case Iop_DivF64: op = ARMvfp_DIV; break;
5675 case Iop_MulF64: op = ARMvfp_MUL; break;
5676 case Iop_AddF64: op = ARMvfp_ADD; break;
5677 case Iop_SubF64: op = ARMvfp_SUB; break;
5678 default: vassert(0);
5680 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5681 return dst;
5683 default:
5684 break;
5688 if (e->tag == Iex_ITE) { // VFD
5689 if (ty == Ity_F64
5690 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5691 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
5692 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
5693 HReg dst = newVRegD(env);
5694 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
5695 ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
5696 addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
5697 return dst;
5701 ppIRExpr(e);
5702 vpanic("iselDblExpr_wrk");
5706 /*---------------------------------------------------------*/
5707 /*--- ISEL: Floating point expressions (32 bit) ---*/
5708 /*---------------------------------------------------------*/
5710 /* Compute a 32-bit floating point value into a register, the identity
5711 of which is returned. As with iselIntExpr_R, the reg may be either
5712 real or virtual; in any case it must not be changed by subsequent
5713 code emitted by the caller. */
5715 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5717 HReg r = iselFltExpr_wrk( env, e );
5718 # if 0
5719 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5720 # endif
5721 vassert(hregClass(r) == HRcFlt32);
5722 vassert(hregIsVirtual(r));
5723 return r;
5726 /* DO NOT CALL THIS DIRECTLY */
5727 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5729 IRType ty = typeOfIRExpr(env->type_env,e);
5730 vassert(e);
5731 vassert(ty == Ity_F32);
5733 if (e->tag == Iex_RdTmp) {
5734 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5737 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5738 ARMAModeV* am;
5739 HReg res = newVRegF(env);
5740 vassert(e->Iex.Load.ty == Ity_F32);
5741 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5742 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5743 return res;
5746 if (e->tag == Iex_Get) {
5747 // XXX This won't work if offset > 1020 or is not 0 % 4.
5748 // In which case we'll have to generate more longwinded code.
5749 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5750 HReg res = newVRegF(env);
5751 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5752 return res;
5755 if (e->tag == Iex_Unop) {
5756 switch (e->Iex.Unop.op) {
5757 case Iop_ReinterpI32asF32: {
5758 HReg dst = newVRegF(env);
5759 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5760 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5761 return dst;
5763 case Iop_NegF32: {
5764 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5765 HReg dst = newVRegF(env);
5766 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5767 return dst;
5769 case Iop_AbsF32: {
5770 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5771 HReg dst = newVRegF(env);
5772 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5773 return dst;
5775 default:
5776 break;
5780 if (e->tag == Iex_Binop) {
5781 switch (e->Iex.Binop.op) {
5782 case Iop_SqrtF32: {
5783 /* first arg is rounding mode; we ignore it. */
5784 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5785 HReg dst = newVRegF(env);
5786 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5787 return dst;
5789 case Iop_F64toF32: {
5790 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5791 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5792 HReg valS = newVRegF(env);
5793 /* FCVTSD valS, valD */
5794 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5795 set_VFP_rounding_default(env);
5796 return valS;
5798 case Iop_RoundF32toInt: {
5799 /* We can only generate this on a >= V8 capable target. But
5800 that's OK since we should only be asked to generate for V8
5801 capable guests, and we assume here that host == guest. */
5802 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5803 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5804 HReg dst = newVRegF(env);
5805 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5806 addInstr(env, ARMInstr_VRIntR(False/*!isF64*/, dst, src));
5807 set_VFP_rounding_default(env);
5808 return dst;
5810 /* not a V8 target, so we can't select insns for this. */
5811 break;
5813 case Iop_MaxNumF32:
5814 case Iop_MinNumF32: {
5815 /* Same comments regarding V8 support as for Iop_RoundF32toInt. */
5816 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5817 HReg srcL = iselFltExpr(env, e->Iex.Binop.arg1);
5818 HReg srcR = iselFltExpr(env, e->Iex.Binop.arg2);
5819 HReg dst = newVRegF(env);
5820 Bool isMax = e->Iex.Binop.op == Iop_MaxNumF32;
5821 addInstr(env, ARMInstr_VMinMaxNum(
5822 False/*!isF64*/, isMax, dst, srcL, srcR));
5823 return dst;
5825 /* not a V8 target, so we can't select insns for this. */
5826 break;
5828 default:
5829 break;
5833 if (e->tag == Iex_Triop) {
5834 IRTriop *triop = e->Iex.Triop.details;
5836 switch (triop->op) {
5837 case Iop_DivF32:
5838 case Iop_MulF32:
5839 case Iop_AddF32:
5840 case Iop_SubF32: {
5841 ARMVfpOp op = 0; /*INVALID*/
5842 HReg argL = iselFltExpr(env, triop->arg2);
5843 HReg argR = iselFltExpr(env, triop->arg3);
5844 HReg dst = newVRegF(env);
5845 switch (triop->op) {
5846 case Iop_DivF32: op = ARMvfp_DIV; break;
5847 case Iop_MulF32: op = ARMvfp_MUL; break;
5848 case Iop_AddF32: op = ARMvfp_ADD; break;
5849 case Iop_SubF32: op = ARMvfp_SUB; break;
5850 default: vassert(0);
5852 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5853 return dst;
5855 default:
5856 break;
5860 if (e->tag == Iex_ITE) { // VFD
5861 if (ty == Ity_F32
5862 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5863 ARMCondCode cc;
5864 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
5865 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
5866 HReg dst = newVRegF(env);
5867 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
5868 cc = iselCondCode(env, e->Iex.ITE.cond);
5869 addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
5870 return dst;
5874 ppIRExpr(e);
5875 vpanic("iselFltExpr_wrk");
5879 /*---------------------------------------------------------*/
5880 /*--- ISEL: Statements ---*/
5881 /*---------------------------------------------------------*/
5883 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5885 if (vex_traceflags & VEX_TRACE_VCODE) {
5886 vex_printf("\n-- ");
5887 ppIRStmt(stmt);
5888 vex_printf("\n");
5890 switch (stmt->tag) {
5892 /* --------- STORE --------- */
5893 /* little-endian write to memory */
5894 case Ist_Store: {
5895 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5896 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5897 IREndness end = stmt->Ist.Store.end;
5899 if (tya != Ity_I32 || end != Iend_LE)
5900 goto stmt_fail;
5902 if (tyd == Ity_I32) {
5903 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5904 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5905 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5906 return;
5908 if (tyd == Ity_I16) {
5909 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5910 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5911 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
5912 False/*!isLoad*/,
5913 False/*!isSignedLoad*/, rD, am));
5914 return;
5916 if (tyd == Ity_I8) {
5917 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5918 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5919 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
5920 return;
5922 if (tyd == Ity_I64) {
5923 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5924 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5925 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5926 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5927 } else {
5928 HReg rDhi, rDlo, rA;
5929 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5930 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5931 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
5932 ARMAMode1_RI(rA,4)));
5933 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
5934 ARMAMode1_RI(rA,0)));
5936 return;
5938 if (tyd == Ity_F64) {
5939 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5940 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5941 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5942 return;
5944 if (tyd == Ity_F32) {
5945 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5946 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5947 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5948 return;
5950 if (tyd == Ity_V128) {
5951 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5952 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5953 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5954 return;
5957 break;
5960 /* --------- CONDITIONAL STORE --------- */
5961 /* conditional little-endian write to memory */
5962 case Ist_StoreG: {
5963 IRStoreG* sg = stmt->Ist.StoreG.details;
5964 IRType tya = typeOfIRExpr(env->type_env, sg->addr);
5965 IRType tyd = typeOfIRExpr(env->type_env, sg->data);
5966 IREndness end = sg->end;
5968 if (tya != Ity_I32 || end != Iend_LE)
5969 goto stmt_fail;
5971 switch (tyd) {
5972 case Ity_I8:
5973 case Ity_I32: {
5974 HReg rD = iselIntExpr_R(env, sg->data);
5975 ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
5976 ARMCondCode cc = iselCondCode(env, sg->guard);
5977 addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
5978 (cc, False/*!isLoad*/, rD, am));
5979 return;
5981 case Ity_I16: {
5982 HReg rD = iselIntExpr_R(env, sg->data);
5983 ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
5984 ARMCondCode cc = iselCondCode(env, sg->guard);
5985 addInstr(env, ARMInstr_LdSt16(cc,
5986 False/*!isLoad*/,
5987 False/*!isSignedLoad*/, rD, am));
5988 return;
5990 default:
5991 break;
5993 break;
5996 /* --------- CONDITIONAL LOAD --------- */
5997 /* conditional little-endian load from memory */
5998 case Ist_LoadG: {
5999 IRLoadG* lg = stmt->Ist.LoadG.details;
6000 IRType tya = typeOfIRExpr(env->type_env, lg->addr);
6001 IREndness end = lg->end;
6003 if (tya != Ity_I32 || end != Iend_LE)
6004 goto stmt_fail;
6006 switch (lg->cvt) {
6007 case ILGop_8Uto32:
6008 case ILGop_Ident32: {
6009 HReg rAlt = iselIntExpr_R(env, lg->alt);
6010 ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
6011 HReg rD = lookupIRTemp(env, lg->dst);
6012 addInstr(env, mk_iMOVds_RR(rD, rAlt));
6013 ARMCondCode cc = iselCondCode(env, lg->guard);
6014 addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
6015 : ARMInstr_LdSt8U)
6016 (cc, True/*isLoad*/, rD, am));
6017 return;
6019 case ILGop_16Sto32:
6020 case ILGop_16Uto32:
6021 case ILGop_8Sto32: {
6022 HReg rAlt = iselIntExpr_R(env, lg->alt);
6023 ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
6024 HReg rD = lookupIRTemp(env, lg->dst);
6025 addInstr(env, mk_iMOVds_RR(rD, rAlt));
6026 ARMCondCode cc = iselCondCode(env, lg->guard);
6027 if (lg->cvt == ILGop_8Sto32) {
6028 addInstr(env, ARMInstr_Ld8S(cc, rD, am));
6029 } else {
6030 vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
6031 Bool sx = lg->cvt == ILGop_16Sto32;
6032 addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
6034 return;
6036 default:
6037 break;
6039 break;
6042 /* --------- PUT --------- */
6043 /* write guest state, fixed offset */
6044 case Ist_Put: {
6045 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6047 if (tyd == Ity_I32) {
6048 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6049 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
6050 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
6051 return;
6053 if (tyd == Ity_I64) {
6054 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6055 HReg addr = newVRegI(env);
6056 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
6057 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6058 stmt->Ist.Put.offset));
6059 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
6060 } else {
6061 HReg rDhi, rDlo;
6062 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
6063 stmt->Ist.Put.offset + 0);
6064 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
6065 stmt->Ist.Put.offset + 4);
6066 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
6067 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6068 rDhi, am4));
6069 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6070 rDlo, am0));
6072 return;
6074 if (tyd == Ity_F64) {
6075 // XXX This won't work if offset > 1020 or is not 0 % 4.
6076 // In which case we'll have to generate more longwinded code.
6077 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6078 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
6079 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
6080 return;
6082 if (tyd == Ity_F32) {
6083 // XXX This won't work if offset > 1020 or is not 0 % 4.
6084 // In which case we'll have to generate more longwinded code.
6085 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6086 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
6087 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
6088 return;
6090 if (tyd == Ity_V128) {
6091 HReg addr = newVRegI(env);
6092 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
6093 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6094 stmt->Ist.Put.offset));
6095 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
6096 return;
6098 break;
6101 /* --------- TMP --------- */
6102 /* assign value to temporary */
6103 case Ist_WrTmp: {
6104 IRTemp tmp = stmt->Ist.WrTmp.tmp;
6105 IRType ty = typeOfIRTemp(env->type_env, tmp);
6107 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6108 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
6109 env, stmt->Ist.WrTmp.data);
6110 HReg dst = lookupIRTemp(env, tmp);
6111 addInstr(env, ARMInstr_Mov(dst,ri84));
6112 return;
6114 if (ty == Ity_I1) {
6115 /* Here, we are generating a I1 value into a 32 bit register.
6116 Make sure the value in the register is only zero or one,
6117 but no other. This allows optimisation of the
6118 1Uto32(tmp:I1) case, by making it simply a copy of the
6119 register holding 'tmp'. The point being that the value in
6120 the register holding 'tmp' can only have been created
6121 here. */
6122 HReg dst = lookupIRTemp(env, tmp);
6123 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
6124 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
6125 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
6126 return;
6128 if (ty == Ity_I64) {
6129 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6130 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
6131 HReg dst = lookupIRTemp(env, tmp);
6132 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
6133 } else {
6134 HReg rHi, rLo, dstHi, dstLo;
6135 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
6136 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
6137 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
6138 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
6140 return;
6142 if (ty == Ity_F64) {
6143 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
6144 HReg dst = lookupIRTemp(env, tmp);
6145 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
6146 return;
6148 if (ty == Ity_F32) {
6149 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
6150 HReg dst = lookupIRTemp(env, tmp);
6151 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
6152 return;
6154 if (ty == Ity_V128) {
6155 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
6156 HReg dst = lookupIRTemp(env, tmp);
6157 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
6158 return;
6160 break;
6163 /* --------- Call to DIRTY helper --------- */
6164 /* call complex ("dirty") helper function */
6165 case Ist_Dirty: {
6166 IRDirty* d = stmt->Ist.Dirty.details;
6168 /* Figure out the return type, if any. */
6169 IRType retty = Ity_INVALID;
6170 if (d->tmp != IRTemp_INVALID)
6171 retty = typeOfIRTemp(env->type_env, d->tmp);
6173 Bool retty_ok = False;
6174 switch (retty) {
6175 case Ity_INVALID: /* function doesn't return anything */
6176 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6177 case Ity_V128:
6178 retty_ok = True; break;
6179 default:
6180 break;
6182 if (!retty_ok)
6183 break; /* will go to stmt_fail: */
6185 /* Marshal args, do the call, and set the return value to 0x555..555
6186 if this is a conditional call that returns a value and the
6187 call is skipped. */
6188 UInt addToSp = 0;
6189 RetLoc rloc = mk_RetLoc_INVALID();
6190 Bool ok = doHelperCall( &addToSp, &rloc, env,
6191 d->guard, d->cee, retty, d->args );
6192 if (!ok) goto stmt_fail;
6193 vassert(is_sane_RetLoc(rloc));
6195 /* Now figure out what to do with the returned value, if any. */
6196 switch (retty) {
6197 case Ity_INVALID: {
6198 /* No return value. Nothing to do. */
6199 vassert(d->tmp == IRTemp_INVALID);
6200 vassert(rloc.pri == RLPri_None);
6201 vassert(addToSp == 0);
6202 return;
6204 case Ity_I64: {
6205 vassert(rloc.pri == RLPri_2Int);
6206 vassert(addToSp == 0);
6207 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6208 HReg tmp = lookupIRTemp(env, d->tmp);
6209 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
6210 hregARM_R0()));
6211 } else {
6212 HReg dstHi, dstLo;
6213 /* The returned value is in r1:r0. Park it in the
6214 register-pair associated with tmp. */
6215 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
6216 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
6217 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
6219 return;
6221 case Ity_I32: case Ity_I16: case Ity_I8: {
6222 vassert(rloc.pri == RLPri_Int);
6223 vassert(addToSp == 0);
6224 /* The returned value is in r0. Park it in the register
6225 associated with tmp. */
6226 HReg dst = lookupIRTemp(env, d->tmp);
6227 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
6228 return;
6230 case Ity_V128: {
6231 /* The returned value is on the stack, and *retloc tells
6232 us where. Fish it off the stack and then move the
6233 stack pointer upwards to clear it, as directed by
6234 doHelperCall. */
6235 vassert(rloc.pri == RLPri_V128SpRel);
6236 vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
6237 vassert(addToSp >= 16);
6238 vassert(addToSp <= 256);
6239 /* Both the stack delta and the offset must be at least 8-aligned.
6240 If that isn't so, doHelperCall() has generated bad code. */
6241 vassert(0 == (rloc.spOff % 8));
6242 vassert(0 == (addToSp % 8));
6243 HReg dst = lookupIRTemp(env, d->tmp);
6244 HReg tmp = newVRegI(env);
6245 HReg sp = hregARM_R13();
6246 addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6247 tmp, sp, ARMRI84_I84(rloc.spOff,0)));
6248 ARMAModeN* am = mkARMAModeN_R(tmp);
6249 /* This load could be done with its effective address 0 % 8,
6250 because that's the best stack alignment that we can be
6251 assured of. */
6252 addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
6254 ARMRI84* spAdj
6255 = addToSp == 256 ? ARMRI84_I84(64, 15) // 64 `ror` (15 * 2)
6256 : ARMRI84_I84(addToSp, 0);
6257 addInstr(env, ARMInstr_Alu(ARMalu_ADD, sp, sp, spAdj));
6258 return;
6260 default:
6261 /*NOTREACHED*/
6262 vassert(0);
6264 break;
6267 /* --------- Load Linked and Store Conditional --------- */
6268 case Ist_LLSC: {
6269 if (stmt->Ist.LLSC.storedata == NULL) {
6270 /* LL */
6271 IRTemp res = stmt->Ist.LLSC.result;
6272 IRType ty = typeOfIRTemp(env->type_env, res);
6273 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6274 Int szB = 0;
6275 HReg r_dst = lookupIRTemp(env, res);
6276 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6277 switch (ty) {
6278 case Ity_I8: szB = 1; break;
6279 case Ity_I16: szB = 2; break;
6280 case Ity_I32: szB = 4; break;
6281 default: vassert(0);
6283 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6284 addInstr(env, ARMInstr_LdrEX(szB));
6285 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6286 return;
6288 if (ty == Ity_I64) {
6289 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6290 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6291 addInstr(env, ARMInstr_LdrEX(8));
6292 /* Result is in r3:r2. On a non-NEON capable CPU, we must
6293 move it into a result register pair. On a NEON capable
6294 CPU, the result register will be a 64 bit NEON
6295 register, so we must move it there instead. */
6296 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6297 HReg dst = lookupIRTemp(env, res);
6298 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6299 hregARM_R2()));
6300 } else {
6301 HReg r_dst_hi, r_dst_lo;
6302 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6303 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6304 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6306 return;
6308 /*NOTREACHED*/
6309 vassert(0);
6310 } else {
6311 /* SC */
6312 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6313 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6314 Int szB = 0;
6315 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6316 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6317 switch (tyd) {
6318 case Ity_I8: szB = 1; break;
6319 case Ity_I16: szB = 2; break;
6320 case Ity_I32: szB = 4; break;
6321 default: vassert(0);
6323 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6324 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6325 addInstr(env, ARMInstr_StrEX(szB));
6326 } else {
6327 vassert(tyd == Ity_I64);
6328 /* This is really ugly. There is no is/is-not NEON
6329 decision akin to the case for LL, because iselInt64Expr
6330 fudges this for us, and always gets the result into two
6331 GPRs even if this means moving it from a NEON
6332 register. */
6333 HReg rDhi, rDlo;
6334 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6335 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6336 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6337 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6338 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6339 addInstr(env, ARMInstr_StrEX(8));
6341 /* now r0 is 1 if failed, 0 if success. Change to IR
6342 conventions (0 is fail, 1 is success). Also transfer
6343 result to r_res. */
6344 IRTemp res = stmt->Ist.LLSC.result;
6345 IRType ty = typeOfIRTemp(env->type_env, res);
6346 HReg r_res = lookupIRTemp(env, res);
6347 ARMRI84* one = ARMRI84_I84(1,0);
6348 vassert(ty == Ity_I1);
6349 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6350 /* And be conservative -- mask off all but the lowest bit */
6351 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6352 return;
6354 break;
6357 /* --------- MEM FENCE --------- */
6358 case Ist_MBE:
6359 switch (stmt->Ist.MBE.event) {
6360 case Imbe_Fence:
6361 addInstr(env, ARMInstr_MFence());
6362 return;
6363 case Imbe_CancelReservation:
6364 addInstr(env, ARMInstr_CLREX());
6365 return;
6366 default:
6367 break;
6369 break;
6371 /* --------- INSTR MARK --------- */
6372 /* Doesn't generate any executable code ... */
6373 case Ist_IMark:
6374 return;
6376 /* --------- NO-OP --------- */
6377 case Ist_NoOp:
6378 return;
6380 /* --------- EXIT --------- */
6381 case Ist_Exit: {
6382 if (stmt->Ist.Exit.dst->tag != Ico_U32)
6383 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6385 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
6386 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
6387 stmt->Ist.Exit.offsIP);
6389 /* Case: boring transfer to known address */
6390 if (stmt->Ist.Exit.jk == Ijk_Boring
6391 || stmt->Ist.Exit.jk == Ijk_Call
6392 || stmt->Ist.Exit.jk == Ijk_Ret) {
6393 if (env->chainingAllowed) {
6394 /* .. almost always true .. */
6395 /* Skip the event check at the dst if this is a forwards
6396 edge. */
6397 Bool toFastEP
6398 = stmt->Ist.Exit.dst->Ico.U32 > env->max_ga;
6399 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6400 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6401 amR15T, cc, toFastEP));
6402 } else {
6403 /* .. very occasionally .. */
6404 /* We can't use chaining, so ask for an assisted transfer,
6405 as that's the only alternative that is allowable. */
6406 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6407 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6409 return;
6412 /* Case: assisted transfer to arbitrary address */
6413 switch (stmt->Ist.Exit.jk) {
6414 /* Keep this list in sync with that in iselNext below */
6415 case Ijk_ClientReq:
6416 case Ijk_NoDecode:
6417 case Ijk_NoRedir:
6418 case Ijk_Sys_syscall:
6419 case Ijk_InvalICache:
6420 case Ijk_Yield:
6422 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6423 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6424 stmt->Ist.Exit.jk));
6425 return;
6427 default:
6428 break;
6431 /* Do we ever expect to see any other kind? */
6432 goto stmt_fail;
6435 default: break;
6437 stmt_fail:
6438 ppIRStmt(stmt);
6439 vpanic("iselStmt");
6443 /*---------------------------------------------------------*/
6444 /*--- ISEL: Basic block terminators (Nexts) ---*/
6445 /*---------------------------------------------------------*/
6447 static void iselNext ( ISelEnv* env,
6448 IRExpr* next, IRJumpKind jk, Int offsIP )
6450 if (vex_traceflags & VEX_TRACE_VCODE) {
6451 vex_printf( "\n-- PUT(%d) = ", offsIP);
6452 ppIRExpr( next );
6453 vex_printf( "; exit-");
6454 ppIRJumpKind(jk);
6455 vex_printf( "\n");
6458 /* Case: boring transfer to known address */
6459 if (next->tag == Iex_Const) {
6460 IRConst* cdst = next->Iex.Const.con;
6461 vassert(cdst->tag == Ico_U32);
6462 if (jk == Ijk_Boring || jk == Ijk_Call) {
6463 /* Boring transfer to known address */
6464 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6465 if (env->chainingAllowed) {
6466 /* .. almost always true .. */
6467 /* Skip the event check at the dst if this is a forwards
6468 edge. */
6469 Bool toFastEP
6470 = cdst->Ico.U32 > env->max_ga;
6471 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6472 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6473 amR15T, ARMcc_AL,
6474 toFastEP));
6475 } else {
6476 /* .. very occasionally .. */
6477 /* We can't use chaining, so ask for an assisted transfer,
6478 as that's the only alternative that is allowable. */
6479 HReg r = iselIntExpr_R(env, next);
6480 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6481 Ijk_Boring));
6483 return;
6487 /* Case: call/return (==boring) transfer to any address */
6488 switch (jk) {
6489 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6490 HReg r = iselIntExpr_R(env, next);
6491 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6492 if (env->chainingAllowed) {
6493 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6494 } else {
6495 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6496 Ijk_Boring));
6498 return;
6500 default:
6501 break;
6504 /* Case: assisted transfer to arbitrary address */
6505 switch (jk) {
6506 /* Keep this list in sync with that for Ist_Exit above */
6507 case Ijk_ClientReq:
6508 case Ijk_NoDecode:
6509 case Ijk_NoRedir:
6510 case Ijk_Sys_syscall:
6511 case Ijk_InvalICache:
6512 case Ijk_Yield:
6514 HReg r = iselIntExpr_R(env, next);
6515 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6516 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6517 return;
6519 default:
6520 break;
6523 vex_printf( "\n-- PUT(%d) = ", offsIP);
6524 ppIRExpr( next );
6525 vex_printf( "; exit-");
6526 ppIRJumpKind(jk);
6527 vex_printf( "\n");
6528 vassert(0); // are we expecting any other kind?
6532 /*---------------------------------------------------------*/
6533 /*--- Insn selector top-level ---*/
6534 /*---------------------------------------------------------*/
6536 /* Translate an entire SB to arm code. */
6538 HInstrArray* iselSB_ARM ( const IRSB* bb,
6539 VexArch arch_host,
6540 const VexArchInfo* archinfo_host,
6541 const VexAbiInfo* vbi/*UNUSED*/,
6542 Int offs_Host_EvC_Counter,
6543 Int offs_Host_EvC_FailAddr,
6544 Bool chainingAllowed,
6545 Bool addProfInc,
6546 Addr max_ga )
6548 Int i, j;
6549 HReg hreg, hregHI;
6550 ISelEnv* env;
6551 UInt hwcaps_host = archinfo_host->hwcaps;
6552 ARMAMode1 *amCounter, *amFailAddr;
6554 /* sanity ... */
6555 vassert(arch_host == VexArchARM);
6557 /* Check that the host's endianness is as expected. */
6558 vassert(archinfo_host->endness == VexEndnessLE);
6560 /* guard against unexpected space regressions */
6561 vassert(sizeof(ARMInstr) <= 28);
6563 /* hwcaps should not change from one ISEL call to another. */
6564 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
6566 /* Make up an initial environment to use. */
6567 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
6568 env->vreg_ctr = 0;
6570 /* Set up output code array. */
6571 env->code = newHInstrArray();
6573 /* Copy BB's type env. */
6574 env->type_env = bb->tyenv;
6576 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6577 change as we go along. */
6578 env->n_vregmap = bb->tyenv->types_used;
6579 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6580 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6582 /* and finally ... */
6583 env->chainingAllowed = chainingAllowed;
6584 env->hwcaps = hwcaps_host;
6585 env->max_ga = max_ga;
6587 /* For each IR temporary, allocate a suitably-kinded virtual
6588 register. */
6589 j = 0;
6590 for (i = 0; i < env->n_vregmap; i++) {
6591 hregHI = hreg = INVALID_HREG;
6592 switch (bb->tyenv->types[i]) {
6593 case Ity_I1:
6594 case Ity_I8:
6595 case Ity_I16:
6596 case Ity_I32: hreg = mkHReg(True, HRcInt32, 0, j++); break;
6597 case Ity_I64:
6598 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
6599 hreg = mkHReg(True, HRcFlt64, 0, j++);
6600 } else {
6601 hregHI = mkHReg(True, HRcInt32, 0, j++);
6602 hreg = mkHReg(True, HRcInt32, 0, j++);
6604 break;
6605 case Ity_F32: hreg = mkHReg(True, HRcFlt32, 0, j++); break;
6606 case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break;
6607 case Ity_V128: hreg = mkHReg(True, HRcVec128, 0, j++); break;
6608 default: ppIRType(bb->tyenv->types[i]);
6609 vpanic("iselBB: IRTemp type");
6611 env->vregmap[i] = hreg;
6612 env->vregmapHI[i] = hregHI;
6614 env->vreg_ctr = j;
6616 /* The very first instruction must be an event check. */
6617 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6618 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6619 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6621 /* Possibly a block counter increment (for profiling). At this
6622 point we don't know the address of the counter, so just pretend
6623 it is zero. It will have to be patched later, but before this
6624 translation is used, by a call to LibVEX_patchProfCtr. */
6625 if (addProfInc) {
6626 addInstr(env, ARMInstr_ProfInc());
6629 /* Ok, finally we can iterate over the statements. */
6630 for (i = 0; i < bb->stmts_used; i++)
6631 iselStmt(env, bb->stmts[i]);
6633 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6635 /* record the number of vregs we used. */
6636 env->code->n_vregs = env->vreg_ctr;
6637 return env->code;
6641 /*---------------------------------------------------------------*/
6642 /*--- end host_arm_isel.c ---*/
6643 /*---------------------------------------------------------------*/