Implement VRINT{A,N,P,M}.F64 d_d, VRINT{A,N,P,M}.F32 s_s.
[valgrind.git] / VEX / priv / host_arm_isel.c
blob8dd1b4d6127aa09bbaff2a36d06e96a22d39f577
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_isel.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
13 NEON support is
14 Copyright (C) 2010-2015 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 02110-1301, USA.
33 The GNU General Public License is contained in the file COPYING.
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
45 #include "host_arm_defs.h"
48 /*---------------------------------------------------------*/
49 /*--- ARMvfp control word stuff ---*/
50 /*---------------------------------------------------------*/
52 /* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
55 this corresponds to a FPSCR value of zero.
57 fpscr should therefore be zero on entry to Vex-generated code, and
58 should be unchanged at exit. (Or at least the bottom 28 bits
59 should be zero).
62 #define DEFAULT_FPSCR 0
65 /*---------------------------------------------------------*/
66 /*--- ISelEnv ---*/
67 /*---------------------------------------------------------*/
69 /* This carries around:
71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72 might encounter. This is computed before insn selection starts,
73 and does not change.
75 - A mapping from IRTemp to HReg. This tells the insn selector
76 which virtual register(s) are associated with each IRTemp
77 temporary. This is computed before insn selection starts, and
78 does not change. We expect this mapping to map precisely the
79 same set of IRTemps as the type mapping does.
81 - vregmap holds the primary register for the IRTemp.
82 - vregmapHI is only used for 64-bit integer-typed
83 IRTemps. It holds the identity of a second
84 32-bit virtual HReg, which holds the high half
85 of the value.
87 - The code array, that is, the insns selected so far.
89 - A counter, for generating new virtual registers.
91 - The host hardware capabilities word. This is set at the start
92 and does not change.
94 - A Bool for indicating whether we may generate chain-me
95 instructions for control flow transfers, or whether we must use
96 XAssisted.
98 - The maximum guest address of any guest insn in this block.
99 Actually, the address of the highest-addressed byte from any insn
100 in this block. Is set at the start and does not change. This is
101 used for detecting jumps which are definitely forward-edges from
102 this block, and therefore can be made (chained) to the fast entry
103 point of the destination, thereby avoiding the destination's
104 event check.
106 Note, this is all (well, mostly) host-independent.
109 typedef
110 struct {
111 /* Constant -- are set at the start and do not change. */
112 IRTypeEnv* type_env;
114 HReg* vregmap;
115 HReg* vregmapHI;
116 Int n_vregmap;
118 UInt hwcaps;
120 Bool chainingAllowed;
121 Addr32 max_ga;
123 /* These are modified as we go along. */
124 HInstrArray* code;
125 Int vreg_ctr;
127 ISelEnv;
129 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
131 vassert(tmp >= 0);
132 vassert(tmp < env->n_vregmap);
133 return env->vregmap[tmp];
136 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
138 vassert(tmp >= 0);
139 vassert(tmp < env->n_vregmap);
140 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
141 *vrLO = env->vregmap[tmp];
142 *vrHI = env->vregmapHI[tmp];
145 static void addInstr ( ISelEnv* env, ARMInstr* instr )
147 addHInstr(env->code, instr);
148 if (vex_traceflags & VEX_TRACE_VCODE) {
149 ppARMInstr(instr);
150 vex_printf("\n");
154 static HReg newVRegI ( ISelEnv* env )
156 HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
157 env->vreg_ctr++;
158 return reg;
161 static HReg newVRegD ( ISelEnv* env )
163 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
164 env->vreg_ctr++;
165 return reg;
168 static HReg newVRegF ( ISelEnv* env )
170 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt32, 0/*enc*/, env->vreg_ctr);
171 env->vreg_ctr++;
172 return reg;
175 static HReg newVRegV ( ISelEnv* env )
177 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
178 env->vreg_ctr++;
179 return reg;
182 /* These are duplicated in guest_arm_toIR.c */
183 static IRExpr* unop ( IROp op, IRExpr* a )
185 return IRExpr_Unop(op, a);
188 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
190 return IRExpr_Binop(op, a1, a2);
193 static IRExpr* bind ( Int binder )
195 return IRExpr_Binder(binder);
199 /*---------------------------------------------------------*/
200 /*--- ISEL: Forward declarations ---*/
201 /*---------------------------------------------------------*/
203 /* These are organised as iselXXX and iselXXX_wrk pairs. The
204 iselXXX_wrk do the real work, but are not to be called directly.
205 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
206 checks that all returned registers are virtual. You should not
207 call the _wrk version directly.
209 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
210 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
212 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
213 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
215 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
216 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
218 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
219 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
221 static ARMRI84* iselIntExpr_RI84_wrk
222 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
223 static ARMRI84* iselIntExpr_RI84
224 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
226 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
227 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
229 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
230 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
232 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
233 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
235 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
236 ISelEnv* env, const IRExpr* e );
237 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
238 ISelEnv* env, const IRExpr* e );
240 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
241 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
243 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
244 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
246 static HReg iselNeon64Expr_wrk ( ISelEnv* env, const IRExpr* e );
247 static HReg iselNeon64Expr ( ISelEnv* env, const IRExpr* e );
249 static HReg iselNeonExpr_wrk ( ISelEnv* env, const IRExpr* e );
250 static HReg iselNeonExpr ( ISelEnv* env, const IRExpr* e );
252 /*---------------------------------------------------------*/
253 /*--- ISEL: Misc helpers ---*/
254 /*---------------------------------------------------------*/
256 static UInt ROR32 ( UInt x, UInt sh ) {
257 vassert(sh >= 0 && sh < 32);
258 if (sh == 0)
259 return x;
260 else
261 return (x << (32-sh)) | (x >> sh);
264 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
265 form, and if so return the components. */
266 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
268 UInt i;
269 for (i = 0; i < 16; i++) {
270 if (0 == (u & 0xFFFFFF00)) {
271 *u8 = u;
272 *u4 = i;
273 return True;
275 u = ROR32(u, 30);
277 vassert(i == 16);
278 return False;
281 /* Make a int reg-reg move. */
282 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
284 vassert(hregClass(src) == HRcInt32);
285 vassert(hregClass(dst) == HRcInt32);
286 return ARMInstr_Mov(dst, ARMRI84_R(src));
289 /* Set the VFP unit's rounding mode to default (round to nearest). */
290 static void set_VFP_rounding_default ( ISelEnv* env )
292 /* mov rTmp, #DEFAULT_FPSCR
293 fmxr fpscr, rTmp
295 HReg rTmp = newVRegI(env);
296 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
297 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
300 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
301 expression denoting a value in the range 0 .. 3, indicating a round
302 mode encoded as per type IRRoundingMode. Set FPSCR to have the
303 same rounding.
305 static
306 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
308 /* This isn't simple, because 'mode' carries an IR rounding
309 encoding, and we need to translate that to an ARMvfp one:
310 The IR encoding:
311 00 to nearest (the default)
312 10 to +infinity
313 01 to -infinity
314 11 to zero
315 The ARMvfp encoding:
316 00 to nearest
317 01 to +infinity
318 10 to -infinity
319 11 to zero
320 Easy enough to do; just swap the two bits.
322 HReg irrm = iselIntExpr_R(env, mode);
323 HReg tL = newVRegI(env);
324 HReg tR = newVRegI(env);
325 HReg t3 = newVRegI(env);
326 /* tL = irrm << 1;
327 tR = irrm >> 1; if we're lucky, these will issue together
328 tL &= 2;
329 tR &= 1; ditto
330 t3 = tL | tR;
331 t3 <<= 22;
332 fmxr fpscr, t3
334 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
335 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
336 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
337 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
338 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
339 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
340 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
344 /*---------------------------------------------------------*/
345 /*--- ISEL: Function call helpers ---*/
346 /*---------------------------------------------------------*/
348 /* Used only in doHelperCall. See big comment in doHelperCall re
349 handling of register-parameter args. This function figures out
350 whether evaluation of an expression might require use of a fixed
351 register. If in doubt return True (safe but suboptimal).
353 static
354 Bool mightRequireFixedRegs ( IRExpr* e )
356 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
357 // These are always "safe" -- either a copy of r13(sp) in some
358 // arbitrary vreg, or a copy of r8, respectively.
359 return False;
361 /* Else it's a "normal" expression. */
362 switch (e->tag) {
363 case Iex_RdTmp: case Iex_Const: case Iex_Get:
364 return False;
365 default:
366 return True;
371 static
372 Bool doHelperCallWithArgsOnStack ( /*OUT*/UInt* stackAdjustAfterCall,
373 /*OUT*/RetLoc* retloc,
374 ISelEnv* env,
375 IRExpr* guard,
376 IRCallee* cee, IRType retTy, IRExpr** args )
378 /* This function deals just with the case where the arg sequence is:
379 VECRET followed by between 4 and 12 Ity_I32 values. So far no other
380 cases are necessary or supported. */
382 /* Check this matches the required format. */
383 if (args[0] == NULL || args[0]->tag != Iex_VECRET)
384 goto no_match;
386 UInt i;
387 UInt n_real_args = 0;
388 for (i = 1; args[i]; i++) {
389 IRExpr* arg = args[i];
390 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)))
391 goto no_match;
392 IRType argTy = typeOfIRExpr(env->type_env, arg);
393 if (UNLIKELY(argTy != Ity_I32))
394 goto no_match;
395 n_real_args++;
398 /* We expect to pass at least some args on the stack. */
399 if (n_real_args <= 3)
400 goto no_match;
402 /* But not too many. */
403 if (n_real_args > 12)
404 goto no_match;
406 /* General rules for a call:
408 Args 1 .. 4 go in R0 .. R3. The rest are pushed R to L on the
409 stack; that is, arg 5 is at the lowest address, arg 6 at the
410 next lowest, etc.
412 The stack is to be kept 8 aligned.
414 It appears (for unclear reasons) that the highest 3 words made
415 available when moving SP downwards are not to be used. For
416 example, if 5 args are to go on the stack, then SP must be moved
417 down 32 bytes, and the area at SP+20 .. SP+31 is not to be used
418 by the caller.
421 /* For this particular case, we use the following layout:
423 ------ original SP
424 112 bytes
425 ------
426 return value
427 ------ original SP - 128
428 space
429 args words, between 1 and 11
430 ------ new SP = original_SP - 256
432 Using 256 bytes is overkill, but it is simple and good enough.
435 /* This should really be
436 HReg argVRegs[n_real_args];
437 but that makes it impossible to do 'goto's forward past.
438 Hence the following kludge. */
439 vassert(n_real_args <= 12);
440 HReg argVRegs[12];
441 for (i = 0; i < 12; i++)
442 argVRegs[i] = INVALID_HREG;
444 /* Compute args into vregs. */
445 for (i = 0; i < n_real_args; i++) {
446 argVRegs[i] = iselIntExpr_R(env, args[i+1]);
449 /* Now we can compute the condition. We can't do it earlier
450 because the argument computations could trash the condition
451 codes. Be a bit clever to handle the common case where the
452 guard is 1:Bit. */
453 ARMCondCode cc = ARMcc_AL;
454 if (guard) {
455 if (guard->tag == Iex_Const
456 && guard->Iex.Const.con->tag == Ico_U1
457 && guard->Iex.Const.con->Ico.U1 == True) {
458 /* unconditional -- do nothing */
459 } else {
460 goto no_match; //ATC
461 cc = iselCondCode( env, guard );
465 HReg r0 = hregARM_R0();
466 HReg sp = hregARM_R13();
468 ARMRI84* c256 = ARMRI84_I84(64, 15); // 64 `ror` (15 * 2)
470 addInstr(env, ARMInstr_Alu(ARMalu_SUB, r0, sp, ARMRI84_I84(128, 0)));
472 addInstr(env, mk_iMOVds_RR(hregARM_R1(), argVRegs[0]));
473 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argVRegs[1]));
474 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argVRegs[2]));
476 addInstr(env, ARMInstr_Alu(ARMalu_SUB, sp, sp, c256));
478 for (i = 3; i < n_real_args; i++) {
479 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, argVRegs[i],
480 ARMAMode1_RI(sp, (i-3) * 4)));
483 vassert(*stackAdjustAfterCall == 0);
484 vassert(is_RetLoc_INVALID(*retloc));
486 *stackAdjustAfterCall = 256;
487 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 128);
489 Addr32 target = (Addr)cee->addr;
490 addInstr(env, ARMInstr_Call( cc, target, 4, *retloc ));
492 return True; /* success */
494 no_match:
495 return False;
499 /* Do a complete function call. |guard| is a Ity_Bit expression
500 indicating whether or not the call happens. If guard==NULL, the
501 call is unconditional. |retloc| is set to indicate where the
502 return value is after the call. The caller (of this fn) must
503 generate code to add |stackAdjustAfterCall| to the stack pointer
504 after the call is done. Returns True iff it managed to handle this
505 combination of arg/return types, else returns False. */
507 static
508 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
509 /*OUT*/RetLoc* retloc,
510 ISelEnv* env,
511 IRExpr* guard,
512 IRCallee* cee, IRType retTy, IRExpr** args )
514 ARMCondCode cc;
515 HReg argregs[ARM_N_ARGREGS];
516 HReg tmpregs[ARM_N_ARGREGS];
517 Bool go_fast;
518 Int n_args, i, nextArgReg;
519 Addr32 target;
521 vassert(ARM_N_ARGREGS == 4);
523 /* Set default returns. We'll update them later if needed. */
524 *stackAdjustAfterCall = 0;
525 *retloc = mk_RetLoc_INVALID();
527 /* These are used for cross-checking that IR-level constraints on
528 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
529 UInt nVECRETs = 0;
530 UInt nGSPTRs = 0;
532 /* Marshal args for a call and do the call.
534 This function only deals with a tiny set of possibilities, which
535 cover all helpers in practice. The restrictions are that only
536 arguments in registers are supported, hence only ARM_N_REGPARMS
537 x 32 integer bits in total can be passed. In fact the only
538 supported arg types are I32 and I64.
540 The return type can be I{64,32} or V128. In the V128 case, it
541 is expected that |args| will contain the special node
542 IRExpr_VECRET(), in which case this routine generates code to
543 allocate space on the stack for the vector return value. Since
544 we are not passing any scalars on the stack, it is enough to
545 preallocate the return space before marshalling any arguments,
546 in this case.
548 |args| may also contain IRExpr_GSPTR(), in which case the
549 value in r8 is passed as the corresponding argument.
551 Generating code which is both efficient and correct when
552 parameters are to be passed in registers is difficult, for the
553 reasons elaborated in detail in comments attached to
554 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
555 of the method described in those comments.
557 The problem is split into two cases: the fast scheme and the
558 slow scheme. In the fast scheme, arguments are computed
559 directly into the target (real) registers. This is only safe
560 when we can be sure that computation of each argument will not
561 trash any real registers set by computation of any other
562 argument.
564 In the slow scheme, all args are first computed into vregs, and
565 once they are all done, they are moved to the relevant real
566 regs. This always gives correct code, but it also gives a bunch
567 of vreg-to-rreg moves which are usually redundant but are hard
568 for the register allocator to get rid of.
570 To decide which scheme to use, all argument expressions are
571 first examined. If they are all so simple that it is clear they
572 will be evaluated without use of any fixed registers, use the
573 fast scheme, else use the slow scheme. Note also that only
574 unconditional calls may use the fast scheme, since having to
575 compute a condition expression could itself trash real
576 registers.
578 Note this requires being able to examine an expression and
579 determine whether or not evaluation of it might use a fixed
580 register. That requires knowledge of how the rest of this insn
581 selector works. Currently just the following 3 are regarded as
582 safe -- hopefully they cover the majority of arguments in
583 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
586 /* Note that the cee->regparms field is meaningless on ARM hosts
587 (since there is only one calling convention) and so we always
588 ignore it. */
590 n_args = 0;
591 for (i = 0; args[i]; i++) {
592 IRExpr* arg = args[i];
593 if (UNLIKELY(arg->tag == Iex_VECRET)) {
594 nVECRETs++;
595 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
596 nGSPTRs++;
598 n_args++;
601 /* If there are more than 4 args, we are going to have to pass
602 some via memory. Use a different function to (possibly) deal with
603 that; dealing with it here is too complex. */
604 if (n_args > ARM_N_ARGREGS) {
605 return doHelperCallWithArgsOnStack(stackAdjustAfterCall, retloc,
606 env, guard, cee, retTy, args );
610 /* After this point we make no attempt to pass args on the stack,
611 and just give up if that case (which is OK because it never
612 happens). Even if there are for example only 3 args, it might
613 still be necessary to pass some of them on the stack if for example
614 two or more of them are 64-bit integers. */
616 argregs[0] = hregARM_R0();
617 argregs[1] = hregARM_R1();
618 argregs[2] = hregARM_R2();
619 argregs[3] = hregARM_R3();
621 tmpregs[0] = tmpregs[1] = tmpregs[2] =
622 tmpregs[3] = INVALID_HREG;
624 /* First decide which scheme (slow or fast) is to be used. First
625 assume the fast scheme, and select slow if any contraindications
626 (wow) appear. */
628 go_fast = True;
630 if (guard) {
631 if (guard->tag == Iex_Const
632 && guard->Iex.Const.con->tag == Ico_U1
633 && guard->Iex.Const.con->Ico.U1 == True) {
634 /* unconditional */
635 } else {
636 /* Not manifestly unconditional -- be conservative. */
637 go_fast = False;
641 if (go_fast) {
642 for (i = 0; i < n_args; i++) {
643 if (mightRequireFixedRegs(args[i])) {
644 go_fast = False;
645 break;
650 if (go_fast) {
651 if (retTy == Ity_V128 || retTy == Ity_V256)
652 go_fast = False;
655 /* At this point the scheme to use has been established. Generate
656 code to get the arg values into the argument rregs. If we run
657 out of arg regs, give up. */
659 if (go_fast) {
661 /* FAST SCHEME */
662 nextArgReg = 0;
664 for (i = 0; i < n_args; i++) {
665 IRExpr* arg = args[i];
667 IRType aTy = Ity_INVALID;
668 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
669 aTy = typeOfIRExpr(env->type_env, arg);
671 if (nextArgReg >= ARM_N_ARGREGS)
672 return False; /* out of argregs */
674 if (aTy == Ity_I32) {
675 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
676 iselIntExpr_R(env, arg) ));
677 nextArgReg++;
679 else if (aTy == Ity_I64) {
680 /* 64-bit args must be passed in an a reg-pair of the form
681 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
682 On a little-endian host, the less significant word is
683 passed in the lower-numbered register. */
684 if (nextArgReg & 1) {
685 if (nextArgReg >= ARM_N_ARGREGS)
686 return False; /* out of argregs */
687 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
688 nextArgReg++;
690 if (nextArgReg >= ARM_N_ARGREGS)
691 return False; /* out of argregs */
692 HReg raHi, raLo;
693 iselInt64Expr(&raHi, &raLo, env, arg);
694 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
695 nextArgReg++;
696 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
697 nextArgReg++;
699 else if (arg->tag == Iex_GSPTR) {
700 vassert(0); //ATC
701 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
702 hregARM_R8() ));
703 nextArgReg++;
705 else if (arg->tag == Iex_VECRET) {
706 // If this happens, it denotes ill-formed IR
707 vassert(0);
709 else
710 return False; /* unhandled arg type */
713 /* Fast scheme only applies for unconditional calls. Hence: */
714 cc = ARMcc_AL;
716 } else {
718 /* SLOW SCHEME; move via temporaries */
719 nextArgReg = 0;
721 for (i = 0; i < n_args; i++) {
722 IRExpr* arg = args[i];
724 IRType aTy = Ity_INVALID;
725 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
726 aTy = typeOfIRExpr(env->type_env, arg);
728 if (nextArgReg >= ARM_N_ARGREGS)
729 return False; /* out of argregs */
731 if (aTy == Ity_I32) {
732 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
733 nextArgReg++;
735 else if (aTy == Ity_I64) {
736 /* Same comment applies as in the Fast-scheme case. */
737 if (nextArgReg & 1)
738 nextArgReg++;
739 if (nextArgReg + 1 >= ARM_N_ARGREGS)
740 return False; /* out of argregs */
741 HReg raHi, raLo;
742 iselInt64Expr(&raHi, &raLo, env, args[i]);
743 tmpregs[nextArgReg] = raLo;
744 nextArgReg++;
745 tmpregs[nextArgReg] = raHi;
746 nextArgReg++;
748 else if (arg->tag == Iex_GSPTR) {
749 vassert(0); //ATC
750 tmpregs[nextArgReg] = hregARM_R8();
751 nextArgReg++;
753 else if (arg->tag == Iex_VECRET) {
754 // If this happens, it denotes ill-formed IR
755 vassert(0);
757 else
758 return False; /* unhandled arg type */
761 /* Now we can compute the condition. We can't do it earlier
762 because the argument computations could trash the condition
763 codes. Be a bit clever to handle the common case where the
764 guard is 1:Bit. */
765 cc = ARMcc_AL;
766 if (guard) {
767 if (guard->tag == Iex_Const
768 && guard->Iex.Const.con->tag == Ico_U1
769 && guard->Iex.Const.con->Ico.U1 == True) {
770 /* unconditional -- do nothing */
771 } else {
772 cc = iselCondCode( env, guard );
776 /* Move the args to their final destinations. */
777 for (i = 0; i < nextArgReg; i++) {
778 if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs
779 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
780 continue;
782 /* None of these insns, including any spill code that might
783 be generated, may alter the condition codes. */
784 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
789 /* Should be assured by checks above */
790 vassert(nextArgReg <= ARM_N_ARGREGS);
792 /* Do final checks, set the return values, and generate the call
793 instruction proper. */
794 vassert(nGSPTRs == 0 || nGSPTRs == 1);
795 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
796 vassert(*stackAdjustAfterCall == 0);
797 vassert(is_RetLoc_INVALID(*retloc));
798 switch (retTy) {
799 case Ity_INVALID:
800 /* Function doesn't return a value. */
801 *retloc = mk_RetLoc_simple(RLPri_None);
802 break;
803 case Ity_I64:
804 *retloc = mk_RetLoc_simple(RLPri_2Int);
805 break;
806 case Ity_I32: case Ity_I16: case Ity_I8:
807 *retloc = mk_RetLoc_simple(RLPri_Int);
808 break;
809 case Ity_V128:
810 vassert(0); // ATC
811 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
812 *stackAdjustAfterCall = 16;
813 break;
814 case Ity_V256:
815 vassert(0); // ATC
816 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
817 *stackAdjustAfterCall = 32;
818 break;
819 default:
820 /* IR can denote other possible return types, but we don't
821 handle those here. */
822 vassert(0);
825 /* Finally, generate the call itself. This needs the *retloc value
826 set in the switch above, which is why it's at the end. */
828 /* nextArgReg doles out argument registers. Since these are
829 assigned in the order r0, r1, r2, r3, its numeric value at this
830 point, which must be between 0 and 4 inclusive, is going to be
831 equal to the number of arg regs in use for the call. Hence bake
832 that number into the call (we'll need to know it when doing
833 register allocation, to know what regs the call reads.)
835 There is a bit of a twist -- harmless but worth recording.
836 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
837 the first arg in r0 and the second in r3:r2, but r1 isn't used.
838 We nevertheless have nextArgReg==4 and bake that into the call
839 instruction. This will mean the register allocator wil believe
840 this insn reads r1 when in fact it doesn't. But that's
841 harmless; it just artificially extends the live range of r1
842 unnecessarily. The best fix would be to put into the
843 instruction, a bitmask indicating which of r0/1/2/3 carry live
844 values. But that's too much hassle. */
846 target = (Addr)cee->addr;
847 addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc ));
849 return True; /* success */
853 /*---------------------------------------------------------*/
854 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
855 /*---------------------------------------------------------*/
857 /* Select insns for an integer-typed expression, and add them to the
858 code list. Return a reg holding the result. This reg will be a
859 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
860 want to modify it, ask for a new vreg, copy it in there, and modify
861 the copy. The register allocator will do its best to map both
862 vregs to the same real register, so the copies will often disappear
863 later in the game.
865 This should handle expressions of 32, 16 and 8-bit type. All
866 results are returned in a 32-bit register. For 16- and 8-bit
867 expressions, the upper 16/24 bits are arbitrary, so you should mask
868 or sign extend partial values if necessary.
871 /* --------------------- AMode1 --------------------- */
873 /* Return an AMode1 which computes the value of the specified
874 expression, possibly also adding insns to the code list as a
875 result. The expression may only be a 32-bit one.
878 static Bool sane_AMode1 ( ARMAMode1* am )
880 switch (am->tag) {
881 case ARMam1_RI:
882 return
883 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
884 && (hregIsVirtual(am->ARMam1.RI.reg)
885 || sameHReg(am->ARMam1.RI.reg, hregARM_R8()))
886 && am->ARMam1.RI.simm13 >= -4095
887 && am->ARMam1.RI.simm13 <= 4095 );
888 case ARMam1_RRS:
889 return
890 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
891 && hregIsVirtual(am->ARMam1.RRS.base)
892 && hregClass(am->ARMam1.RRS.index) == HRcInt32
893 && hregIsVirtual(am->ARMam1.RRS.index)
894 && am->ARMam1.RRS.shift >= 0
895 && am->ARMam1.RRS.shift <= 3 );
896 default:
897 vpanic("sane_AMode: unknown ARM AMode1 tag");
901 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
903 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
904 vassert(sane_AMode1(am));
905 return am;
908 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
910 IRType ty = typeOfIRExpr(env->type_env,e);
911 vassert(ty == Ity_I32);
913 /* FIXME: add RRS matching */
915 /* {Add32,Sub32}(expr,simm13) */
916 if (e->tag == Iex_Binop
917 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
918 && e->Iex.Binop.arg2->tag == Iex_Const
919 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
920 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
921 if (simm >= -4095 && simm <= 4095) {
922 HReg reg;
923 if (e->Iex.Binop.op == Iop_Sub32)
924 simm = -simm;
925 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
926 return ARMAMode1_RI(reg, simm);
930 /* Doesn't match anything in particular. Generate it into
931 a register and use that. */
933 HReg reg = iselIntExpr_R(env, e);
934 return ARMAMode1_RI(reg, 0);
940 /* --------------------- AMode2 --------------------- */
942 /* Return an AMode2 which computes the value of the specified
943 expression, possibly also adding insns to the code list as a
944 result. The expression may only be a 32-bit one.
947 static Bool sane_AMode2 ( ARMAMode2* am )
949 switch (am->tag) {
950 case ARMam2_RI:
951 return
952 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
953 && hregIsVirtual(am->ARMam2.RI.reg)
954 && am->ARMam2.RI.simm9 >= -255
955 && am->ARMam2.RI.simm9 <= 255 );
956 case ARMam2_RR:
957 return
958 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
959 && hregIsVirtual(am->ARMam2.RR.base)
960 && hregClass(am->ARMam2.RR.index) == HRcInt32
961 && hregIsVirtual(am->ARMam2.RR.index) );
962 default:
963 vpanic("sane_AMode: unknown ARM AMode2 tag");
967 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
969 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
970 vassert(sane_AMode2(am));
971 return am;
974 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
976 IRType ty = typeOfIRExpr(env->type_env,e);
977 vassert(ty == Ity_I32);
979 /* FIXME: add RR matching */
981 /* {Add32,Sub32}(expr,simm8) */
982 if (e->tag == Iex_Binop
983 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
984 && e->Iex.Binop.arg2->tag == Iex_Const
985 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
986 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
987 if (simm >= -255 && simm <= 255) {
988 HReg reg;
989 if (e->Iex.Binop.op == Iop_Sub32)
990 simm = -simm;
991 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
992 return ARMAMode2_RI(reg, simm);
996 /* Doesn't match anything in particular. Generate it into
997 a register and use that. */
999 HReg reg = iselIntExpr_R(env, e);
1000 return ARMAMode2_RI(reg, 0);
1006 /* --------------------- AModeV --------------------- */
1008 /* Return an AModeV which computes the value of the specified
1009 expression, possibly also adding insns to the code list as a
1010 result. The expression may only be a 32-bit one.
1013 static Bool sane_AModeV ( ARMAModeV* am )
1015 return toBool( hregClass(am->reg) == HRcInt32
1016 && hregIsVirtual(am->reg)
1017 && am->simm11 >= -1020 && am->simm11 <= 1020
1018 && 0 == (am->simm11 & 3) );
1021 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
1023 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
1024 vassert(sane_AModeV(am));
1025 return am;
1028 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
1030 IRType ty = typeOfIRExpr(env->type_env,e);
1031 vassert(ty == Ity_I32);
1033 /* {Add32,Sub32}(expr, simm8 << 2) */
1034 if (e->tag == Iex_Binop
1035 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
1036 && e->Iex.Binop.arg2->tag == Iex_Const
1037 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1038 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1039 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
1040 HReg reg;
1041 if (e->Iex.Binop.op == Iop_Sub32)
1042 simm = -simm;
1043 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
1044 return mkARMAModeV(reg, simm);
1048 /* Doesn't match anything in particular. Generate it into
1049 a register and use that. */
1051 HReg reg = iselIntExpr_R(env, e);
1052 return mkARMAModeV(reg, 0);
1057 /* -------------------- AModeN -------------------- */
1059 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
1061 return iselIntExpr_AModeN_wrk(env, e);
1064 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
1066 HReg reg = iselIntExpr_R(env, e);
1067 return mkARMAModeN_R(reg);
1071 /* --------------------- RI84 --------------------- */
1073 /* Select instructions to generate 'e' into a RI84. If mayInv is
1074 true, then the caller will also accept an I84 form that denotes
1075 'not e'. In this case didInv may not be NULL, and *didInv is set
1076 to True. This complication is so as to allow generation of an RI84
1077 which is suitable for use in either an AND or BIC instruction,
1078 without knowing (before this call) which one.
1080 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
1081 ISelEnv* env, IRExpr* e )
1083 ARMRI84* ri;
1084 if (mayInv)
1085 vassert(didInv != NULL);
1086 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
1087 /* sanity checks ... */
1088 switch (ri->tag) {
1089 case ARMri84_I84:
1090 return ri;
1091 case ARMri84_R:
1092 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
1093 vassert(hregIsVirtual(ri->ARMri84.R.reg));
1094 return ri;
1095 default:
1096 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1100 /* DO NOT CALL THIS DIRECTLY ! */
1101 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
1102 ISelEnv* env, IRExpr* e )
1104 IRType ty = typeOfIRExpr(env->type_env,e);
1105 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1107 if (didInv) *didInv = False;
1109 /* special case: immediate */
1110 if (e->tag == Iex_Const) {
1111 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
1112 switch (e->Iex.Const.con->tag) {
1113 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1114 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1115 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1116 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1118 if (fitsIn8x4(&u8, &u4, u)) {
1119 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1121 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
1122 vassert(didInv);
1123 *didInv = True;
1124 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1126 /* else fail, fall through to default case */
1129 /* default case: calculate into a register and return that */
1131 HReg r = iselIntExpr_R ( env, e );
1132 return ARMRI84_R(r);
1137 /* --------------------- RI5 --------------------- */
1139 /* Select instructions to generate 'e' into a RI5. */
1141 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
1143 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
1144 /* sanity checks ... */
1145 switch (ri->tag) {
1146 case ARMri5_I5:
1147 return ri;
1148 case ARMri5_R:
1149 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
1150 vassert(hregIsVirtual(ri->ARMri5.R.reg));
1151 return ri;
1152 default:
1153 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
1157 /* DO NOT CALL THIS DIRECTLY ! */
1158 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
1160 IRType ty = typeOfIRExpr(env->type_env,e);
1161 vassert(ty == Ity_I32 || ty == Ity_I8);
1163 /* special case: immediate */
1164 if (e->tag == Iex_Const) {
1165 UInt u; /* both invalid */
1166 switch (e->Iex.Const.con->tag) {
1167 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1168 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1169 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1170 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
1172 if (u >= 1 && u <= 31) {
1173 return ARMRI5_I5(u);
1175 /* else fail, fall through to default case */
1178 /* default case: calculate into a register and return that */
1180 HReg r = iselIntExpr_R ( env, e );
1181 return ARMRI5_R(r);
1186 /* ------------------- CondCode ------------------- */
1188 /* Generate code to evaluated a bit-typed expression, returning the
1189 condition code which would correspond when the expression would
1190 notionally have returned 1. */
1192 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1194 ARMCondCode cc = iselCondCode_wrk(env,e);
1195 vassert(cc != ARMcc_NV);
1196 return cc;
1199 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1201 vassert(e);
1202 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1204 /* var */
1205 if (e->tag == Iex_RdTmp) {
1206 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1207 /* CmpOrTst doesn't modify rTmp; so this is OK. */
1208 ARMRI84* one = ARMRI84_I84(1,0);
1209 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1210 return ARMcc_NE;
1213 /* Not1(e) */
1214 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1215 /* Generate code for the arg, and negate the test condition */
1216 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1219 /* --- patterns rooted at: 32to1 --- */
1221 if (e->tag == Iex_Unop
1222 && e->Iex.Unop.op == Iop_32to1) {
1223 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1224 ARMRI84* one = ARMRI84_I84(1,0);
1225 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1226 return ARMcc_NE;
1229 /* --- patterns rooted at: CmpNEZ8 --- */
1231 if (e->tag == Iex_Unop
1232 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1233 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1234 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
1235 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1236 return ARMcc_NE;
1239 /* --- patterns rooted at: CmpNEZ32 --- */
1241 if (e->tag == Iex_Unop
1242 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1243 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1244 ARMRI84* zero = ARMRI84_I84(0,0);
1245 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1246 return ARMcc_NE;
1249 /* --- patterns rooted at: CmpNEZ64 --- */
1251 if (e->tag == Iex_Unop
1252 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1253 HReg tHi, tLo;
1254 HReg tmp = newVRegI(env);
1255 ARMRI84* zero = ARMRI84_I84(0,0);
1256 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1257 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1258 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1259 return ARMcc_NE;
1262 /* --- Cmp*32*(x,y) --- */
1263 if (e->tag == Iex_Binop
1264 && (e->Iex.Binop.op == Iop_CmpEQ32
1265 || e->Iex.Binop.op == Iop_CmpNE32
1266 || e->Iex.Binop.op == Iop_CmpLT32S
1267 || e->Iex.Binop.op == Iop_CmpLT32U
1268 || e->Iex.Binop.op == Iop_CmpLE32S
1269 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1270 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1271 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1272 env, e->Iex.Binop.arg2);
1273 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1274 switch (e->Iex.Binop.op) {
1275 case Iop_CmpEQ32: return ARMcc_EQ;
1276 case Iop_CmpNE32: return ARMcc_NE;
1277 case Iop_CmpLT32S: return ARMcc_LT;
1278 case Iop_CmpLT32U: return ARMcc_LO;
1279 case Iop_CmpLE32S: return ARMcc_LE;
1280 case Iop_CmpLE32U: return ARMcc_LS;
1281 default: vpanic("iselCondCode(arm): CmpXX32");
1285 /* const */
1286 /* Constant 1:Bit */
1287 if (e->tag == Iex_Const) {
1288 HReg r;
1289 vassert(e->Iex.Const.con->tag == Ico_U1);
1290 vassert(e->Iex.Const.con->Ico.U1 == True
1291 || e->Iex.Const.con->Ico.U1 == False);
1292 r = newVRegI(env);
1293 addInstr(env, ARMInstr_Imm32(r, 0));
1294 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1295 return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1298 // JRS 2013-Jan-03: this seems completely nonsensical
1299 /* --- CasCmpEQ* --- */
1300 /* Ist_Cas has a dummy argument to compare with, so comparison is
1301 always true. */
1302 //if (e->tag == Iex_Binop
1303 // && (e->Iex.Binop.op == Iop_CasCmpEQ32
1304 // || e->Iex.Binop.op == Iop_CasCmpEQ16
1305 // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1306 // return ARMcc_AL;
1309 ppIRExpr(e);
1310 vpanic("iselCondCode");
1314 /* --------------------- Reg --------------------- */
1316 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1318 HReg r = iselIntExpr_R_wrk(env, e);
1319 /* sanity checks ... */
1320 # if 0
1321 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1322 # endif
1323 vassert(hregClass(r) == HRcInt32);
1324 vassert(hregIsVirtual(r));
1325 return r;
1328 /* DO NOT CALL THIS DIRECTLY ! */
1329 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1331 IRType ty = typeOfIRExpr(env->type_env,e);
1332 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1334 switch (e->tag) {
1336 /* --------- TEMP --------- */
1337 case Iex_RdTmp: {
1338 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1341 /* --------- LOAD --------- */
1342 case Iex_Load: {
1343 HReg dst = newVRegI(env);
1345 if (e->Iex.Load.end != Iend_LE)
1346 goto irreducible;
1348 if (ty == Ity_I32) {
1349 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1350 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
1351 return dst;
1353 if (ty == Ity_I16) {
1354 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1355 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
1356 True/*isLoad*/, False/*!signedLoad*/,
1357 dst, amode));
1358 return dst;
1360 if (ty == Ity_I8) {
1361 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1362 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
1363 return dst;
1365 break;
1368 //zz /* --------- TERNARY OP --------- */
1369 //zz case Iex_Triop: {
1370 //zz IRTriop *triop = e->Iex.Triop.details;
1371 //zz /* C3210 flags following FPU partial remainder (fprem), both
1372 //zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1373 //zz if (triop->op == Iop_PRemC3210F64
1374 //zz || triop->op == Iop_PRem1C3210F64) {
1375 //zz HReg junk = newVRegF(env);
1376 //zz HReg dst = newVRegI(env);
1377 //zz HReg srcL = iselDblExpr(env, triop->arg2);
1378 //zz HReg srcR = iselDblExpr(env, triop->arg3);
1379 //zz /* XXXROUNDINGFIXME */
1380 //zz /* set roundingmode here */
1381 //zz addInstr(env, X86Instr_FpBinary(
1382 //zz e->Iex.Binop.op==Iop_PRemC3210F64
1383 //zz ? Xfp_PREM : Xfp_PREM1,
1384 //zz srcL,srcR,junk
1385 //zz ));
1386 //zz /* The previous pseudo-insn will have left the FPU's C3210
1387 //zz flags set correctly. So bag them. */
1388 //zz addInstr(env, X86Instr_FpStSW_AX());
1389 //zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1390 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1391 //zz return dst;
1392 //zz }
1393 //zz
1394 //zz break;
1395 //zz }
1397 /* --------- BINARY OP --------- */
1398 case Iex_Binop: {
1400 ARMAluOp aop = 0; /* invalid */
1401 ARMShiftOp sop = 0; /* invalid */
1403 /* ADD/SUB/AND/OR/XOR */
1404 switch (e->Iex.Binop.op) {
1405 case Iop_And32: {
1406 Bool didInv = False;
1407 HReg dst = newVRegI(env);
1408 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1409 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1410 env, e->Iex.Binop.arg2);
1411 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1412 dst, argL, argR));
1413 return dst;
1415 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1416 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1417 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1418 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1419 std_binop: {
1420 HReg dst = newVRegI(env);
1421 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1422 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1423 env, e->Iex.Binop.arg2);
1424 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1425 return dst;
1427 default: break;
1430 /* SHL/SHR/SAR */
1431 switch (e->Iex.Binop.op) {
1432 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1433 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1434 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1435 sh_binop: {
1436 HReg dst = newVRegI(env);
1437 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1438 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1439 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1440 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1441 return dst;
1443 default: break;
1446 /* MUL */
1447 if (e->Iex.Binop.op == Iop_Mul32) {
1448 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1449 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1450 HReg dst = newVRegI(env);
1451 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1452 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1453 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1454 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1455 return dst;
1458 /* Handle misc other ops. */
1460 if (e->Iex.Binop.op == Iop_Max32U) {
1461 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1462 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1463 HReg dst = newVRegI(env);
1464 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1465 ARMRI84_R(argR)));
1466 addInstr(env, mk_iMOVds_RR(dst, argL));
1467 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1468 return dst;
1471 if (e->Iex.Binop.op == Iop_CmpF64) {
1472 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1473 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1474 HReg dst = newVRegI(env);
1475 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1476 FMSTAT, so we can examine the results directly. */
1477 addInstr(env, ARMInstr_VCmpD(dL, dR));
1478 /* Create in dst, the IRCmpF64Result encoded result. */
1479 addInstr(env, ARMInstr_Imm32(dst, 0));
1480 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1481 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1482 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1483 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1484 return dst;
1487 if (e->Iex.Binop.op == Iop_F64toI32S
1488 || e->Iex.Binop.op == Iop_F64toI32U) {
1489 /* Wretched uglyness all round, due to having to deal
1490 with rounding modes. Oh well. */
1491 /* FIXME: if arg1 is a constant indicating round-to-zero,
1492 then we could skip all this arsing around with FPSCR and
1493 simply emit FTO{S,U}IZD. */
1494 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1495 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1496 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1497 /* FTO{S,U}ID valF, valD */
1498 HReg valF = newVRegF(env);
1499 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1500 valF, valD));
1501 set_VFP_rounding_default(env);
1502 /* VMOV dst, valF */
1503 HReg dst = newVRegI(env);
1504 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1505 return dst;
1508 if (e->Iex.Binop.op == Iop_GetElem8x8
1509 || e->Iex.Binop.op == Iop_GetElem16x4
1510 || e->Iex.Binop.op == Iop_GetElem32x2) {
1511 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1512 HReg res = newVRegI(env);
1513 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1514 UInt index, size;
1515 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1516 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1517 vpanic("ARM target supports GetElem with constant "
1518 "second argument only (neon)\n");
1520 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1521 switch (e->Iex.Binop.op) {
1522 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1523 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1524 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1525 default: vassert(0);
1527 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1528 mkARMNRS(ARMNRS_Reg, res, 0),
1529 mkARMNRS(ARMNRS_Scalar, arg, index),
1530 size, False));
1531 return res;
1535 if (e->Iex.Binop.op == Iop_GetElem32x2
1536 && e->Iex.Binop.arg2->tag == Iex_Const
1537 && !(env->hwcaps & VEX_HWCAPS_ARM_NEON)) {
1538 /* We may have to do GetElem32x2 on a non-NEON capable
1539 target. */
1540 IRConst* con = e->Iex.Binop.arg2->Iex.Const.con;
1541 vassert(con->tag == Ico_U8); /* else IR is ill-typed */
1542 UInt index = con->Ico.U8;
1543 if (index >= 0 && index <= 1) {
1544 HReg rHi, rLo;
1545 iselInt64Expr(&rHi, &rLo, env, e->Iex.Binop.arg1);
1546 return index == 0 ? rLo : rHi;
1550 if (e->Iex.Binop.op == Iop_GetElem8x16
1551 || e->Iex.Binop.op == Iop_GetElem16x8
1552 || e->Iex.Binop.op == Iop_GetElem32x4) {
1553 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1554 HReg res = newVRegI(env);
1555 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1556 UInt index, size;
1557 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1558 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1559 vpanic("ARM target supports GetElem with constant "
1560 "second argument only (neon)\n");
1562 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1563 switch (e->Iex.Binop.op) {
1564 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1565 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1566 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1567 default: vassert(0);
1569 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1570 mkARMNRS(ARMNRS_Reg, res, 0),
1571 mkARMNRS(ARMNRS_Scalar, arg, index),
1572 size, True));
1573 return res;
1577 /* All cases involving host-side helper calls. */
1578 void* fn = NULL;
1579 switch (e->Iex.Binop.op) {
1580 case Iop_Add16x2:
1581 fn = &h_generic_calc_Add16x2; break;
1582 case Iop_Sub16x2:
1583 fn = &h_generic_calc_Sub16x2; break;
1584 case Iop_HAdd16Ux2:
1585 fn = &h_generic_calc_HAdd16Ux2; break;
1586 case Iop_HAdd16Sx2:
1587 fn = &h_generic_calc_HAdd16Sx2; break;
1588 case Iop_HSub16Ux2:
1589 fn = &h_generic_calc_HSub16Ux2; break;
1590 case Iop_HSub16Sx2:
1591 fn = &h_generic_calc_HSub16Sx2; break;
1592 case Iop_QAdd16Sx2:
1593 fn = &h_generic_calc_QAdd16Sx2; break;
1594 case Iop_QAdd16Ux2:
1595 fn = &h_generic_calc_QAdd16Ux2; break;
1596 case Iop_QSub16Sx2:
1597 fn = &h_generic_calc_QSub16Sx2; break;
1598 case Iop_Add8x4:
1599 fn = &h_generic_calc_Add8x4; break;
1600 case Iop_Sub8x4:
1601 fn = &h_generic_calc_Sub8x4; break;
1602 case Iop_HAdd8Ux4:
1603 fn = &h_generic_calc_HAdd8Ux4; break;
1604 case Iop_HAdd8Sx4:
1605 fn = &h_generic_calc_HAdd8Sx4; break;
1606 case Iop_HSub8Ux4:
1607 fn = &h_generic_calc_HSub8Ux4; break;
1608 case Iop_HSub8Sx4:
1609 fn = &h_generic_calc_HSub8Sx4; break;
1610 case Iop_QAdd8Sx4:
1611 fn = &h_generic_calc_QAdd8Sx4; break;
1612 case Iop_QAdd8Ux4:
1613 fn = &h_generic_calc_QAdd8Ux4; break;
1614 case Iop_QSub8Sx4:
1615 fn = &h_generic_calc_QSub8Sx4; break;
1616 case Iop_QSub8Ux4:
1617 fn = &h_generic_calc_QSub8Ux4; break;
1618 case Iop_Sad8Ux4:
1619 fn = &h_generic_calc_Sad8Ux4; break;
1620 case Iop_QAdd32S:
1621 fn = &h_generic_calc_QAdd32S; break;
1622 case Iop_QSub32S:
1623 fn = &h_generic_calc_QSub32S; break;
1624 case Iop_QSub16Ux2:
1625 fn = &h_generic_calc_QSub16Ux2; break;
1626 case Iop_DivU32:
1627 fn = &h_calc_udiv32_w_arm_semantics; break;
1628 case Iop_DivS32:
1629 fn = &h_calc_sdiv32_w_arm_semantics; break;
1630 default:
1631 break;
1634 if (fn) {
1635 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1636 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1637 HReg res = newVRegI(env);
1638 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1639 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1640 addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1641 2, mk_RetLoc_simple(RLPri_Int) ));
1642 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1643 return res;
1646 break;
1649 /* --------- UNARY OP --------- */
1650 case Iex_Unop: {
1652 //zz /* 1Uto8(32to1(expr32)) */
1653 //zz if (e->Iex.Unop.op == Iop_1Uto8) {
1654 //zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1655 //zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1656 //zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1657 //zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1658 //zz const IRExpr* expr32 = mi.bindee[0];
1659 //zz HReg dst = newVRegI(env);
1660 //zz HReg src = iselIntExpr_R(env, expr32);
1661 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1662 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1663 //zz X86RMI_Imm(1), dst));
1664 //zz return dst;
1665 //zz }
1666 //zz }
1667 //zz
1668 //zz /* 8Uto32(LDle(expr32)) */
1669 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1670 //zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1671 //zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1672 //zz unop(Iop_8Uto32,
1673 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1674 //zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1675 //zz HReg dst = newVRegI(env);
1676 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1677 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1678 //zz return dst;
1679 //zz }
1680 //zz }
1681 //zz
1682 //zz /* 8Sto32(LDle(expr32)) */
1683 //zz if (e->Iex.Unop.op == Iop_8Sto32) {
1684 //zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1685 //zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1686 //zz unop(Iop_8Sto32,
1687 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1688 //zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1689 //zz HReg dst = newVRegI(env);
1690 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1691 //zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1692 //zz return dst;
1693 //zz }
1694 //zz }
1695 //zz
1696 //zz /* 16Uto32(LDle(expr32)) */
1697 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1698 //zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1699 //zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1700 //zz unop(Iop_16Uto32,
1701 //zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1702 //zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1703 //zz HReg dst = newVRegI(env);
1704 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1705 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1706 //zz return dst;
1707 //zz }
1708 //zz }
1709 //zz
1710 //zz /* 8Uto32(GET:I8) */
1711 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1712 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1713 //zz HReg dst;
1714 //zz X86AMode* amode;
1715 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1716 //zz dst = newVRegI(env);
1717 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1718 //zz hregX86_EBP());
1719 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1720 //zz return dst;
1721 //zz }
1722 //zz }
1723 //zz
1724 //zz /* 16to32(GET:I16) */
1725 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1726 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1727 //zz HReg dst;
1728 //zz X86AMode* amode;
1729 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1730 //zz dst = newVRegI(env);
1731 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1732 //zz hregX86_EBP());
1733 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1734 //zz return dst;
1735 //zz }
1736 //zz }
1738 switch (e->Iex.Unop.op) {
1739 case Iop_8Uto32: {
1740 HReg dst = newVRegI(env);
1741 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1742 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1743 dst, src, ARMRI84_I84(0xFF,0)));
1744 return dst;
1746 //zz case Iop_8Uto16:
1747 //zz case Iop_8Uto32:
1748 //zz case Iop_16Uto32: {
1749 //zz HReg dst = newVRegI(env);
1750 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1751 //zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1752 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1753 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1754 //zz X86RMI_Imm(mask), dst));
1755 //zz return dst;
1756 //zz }
1757 //zz case Iop_8Sto16:
1758 //zz case Iop_8Sto32:
1759 case Iop_16Uto32: {
1760 HReg dst = newVRegI(env);
1761 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1762 ARMRI5* amt = ARMRI5_I5(16);
1763 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1764 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1765 return dst;
1767 case Iop_8Sto32:
1768 case Iop_16Sto32: {
1769 HReg dst = newVRegI(env);
1770 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1771 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1772 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1773 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1774 return dst;
1776 //zz case Iop_Not8:
1777 //zz case Iop_Not16:
1778 case Iop_Not32: {
1779 HReg dst = newVRegI(env);
1780 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1781 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1782 return dst;
1784 case Iop_64HIto32: {
1785 HReg rHi, rLo;
1786 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1787 return rHi; /* and abandon rLo .. poor wee thing :-) */
1789 case Iop_64to32: {
1790 HReg rHi, rLo;
1791 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1792 return rLo; /* similar stupid comment to the above ... */
1794 case Iop_64to8: {
1795 HReg rHi, rLo;
1796 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1797 HReg tHi = newVRegI(env);
1798 HReg tLo = newVRegI(env);
1799 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1800 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1801 rHi = tHi;
1802 rLo = tLo;
1803 } else {
1804 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1806 return rLo;
1809 case Iop_1Uto32:
1810 /* 1Uto32(tmp). Since I1 values generated into registers
1811 are guaranteed to have value either only zero or one,
1812 we can simply return the value of the register in this
1813 case. */
1814 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1815 HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1816 return dst;
1818 /* else fall through */
1819 case Iop_1Uto8: {
1820 HReg dst = newVRegI(env);
1821 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1822 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1823 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1824 return dst;
1827 case Iop_1Sto32: {
1828 HReg dst = newVRegI(env);
1829 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1830 ARMRI5* amt = ARMRI5_I5(31);
1831 /* This is really rough. We could do much better here;
1832 perhaps mvn{cond} dst, #0 as the second insn?
1833 (same applies to 1Sto64) */
1834 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1835 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1836 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1837 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1838 return dst;
1842 //zz case Iop_1Sto8:
1843 //zz case Iop_1Sto16:
1844 //zz case Iop_1Sto32: {
1845 //zz /* could do better than this, but for now ... */
1846 //zz HReg dst = newVRegI(env);
1847 //zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1848 //zz addInstr(env, X86Instr_Set32(cond,dst));
1849 //zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1850 //zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1851 //zz return dst;
1852 //zz }
1853 //zz case Iop_Ctz32: {
1854 //zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1855 //zz HReg dst = newVRegI(env);
1856 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1857 //zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1858 //zz return dst;
1859 //zz }
1860 case Iop_Clz32: {
1861 /* Count leading zeroes; easy on ARM. */
1862 HReg dst = newVRegI(env);
1863 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1864 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1865 return dst;
1868 case Iop_CmpwNEZ32: {
1869 HReg dst = newVRegI(env);
1870 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1871 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1872 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1873 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1874 return dst;
1877 case Iop_Left32: {
1878 HReg dst = newVRegI(env);
1879 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1880 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1881 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1882 return dst;
1885 //zz case Iop_V128to32: {
1886 //zz HReg dst = newVRegI(env);
1887 //zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1888 //zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1889 //zz sub_from_esp(env, 16);
1890 //zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1891 //zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1892 //zz add_to_esp(env, 16);
1893 //zz return dst;
1894 //zz }
1895 //zz
1896 case Iop_ReinterpF32asI32: {
1897 HReg dst = newVRegI(env);
1898 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1899 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1900 return dst;
1903 //zz
1904 //zz case Iop_16to8:
1905 case Iop_32to8:
1906 case Iop_32to16:
1907 /* These are no-ops. */
1908 return iselIntExpr_R(env, e->Iex.Unop.arg);
1910 default:
1911 break;
1914 /* All Unop cases involving host-side helper calls. */
1915 void* fn = NULL;
1916 switch (e->Iex.Unop.op) {
1917 case Iop_CmpNEZ16x2:
1918 fn = &h_generic_calc_CmpNEZ16x2; break;
1919 case Iop_CmpNEZ8x4:
1920 fn = &h_generic_calc_CmpNEZ8x4; break;
1921 default:
1922 break;
1925 if (fn) {
1926 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1927 HReg res = newVRegI(env);
1928 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1929 addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1930 1, mk_RetLoc_simple(RLPri_Int) ));
1931 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1932 return res;
1935 break;
1938 /* --------- GET --------- */
1939 case Iex_Get: {
1940 if (ty == Ity_I32
1941 && 0 == (e->Iex.Get.offset & 3)
1942 && e->Iex.Get.offset < 4096-4) {
1943 HReg dst = newVRegI(env);
1944 addInstr(env, ARMInstr_LdSt32(
1945 ARMcc_AL, True/*isLoad*/,
1946 dst,
1947 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1948 return dst;
1950 //zz if (ty == Ity_I8 || ty == Ity_I16) {
1951 //zz HReg dst = newVRegI(env);
1952 //zz addInstr(env, X86Instr_LoadEX(
1953 //zz toUChar(ty==Ity_I8 ? 1 : 2),
1954 //zz False,
1955 //zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1956 //zz dst));
1957 //zz return dst;
1958 //zz }
1959 break;
1962 //zz case Iex_GetI: {
1963 //zz X86AMode* am
1964 //zz = genGuestArrayOffset(
1965 //zz env, e->Iex.GetI.descr,
1966 //zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1967 //zz HReg dst = newVRegI(env);
1968 //zz if (ty == Ity_I8) {
1969 //zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1970 //zz return dst;
1971 //zz }
1972 //zz if (ty == Ity_I32) {
1973 //zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1974 //zz return dst;
1975 //zz }
1976 //zz break;
1977 //zz }
1979 /* --------- CCALL --------- */
1980 case Iex_CCall: {
1981 HReg dst = newVRegI(env);
1982 vassert(ty == e->Iex.CCall.retty);
1984 /* be very restrictive for now. Only 32/64-bit ints allowed for
1985 args, and 32 bits for return type. Don't forget to change
1986 the RetLoc if more types are allowed in future. */
1987 if (e->Iex.CCall.retty != Ity_I32)
1988 goto irreducible;
1990 /* Marshal args, do the call, clear stack. */
1991 UInt addToSp = 0;
1992 RetLoc rloc = mk_RetLoc_INVALID();
1993 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1994 e->Iex.CCall.cee, e->Iex.CCall.retty,
1995 e->Iex.CCall.args );
1996 /* */
1997 if (ok) {
1998 vassert(is_sane_RetLoc(rloc));
1999 vassert(rloc.pri == RLPri_Int);
2000 vassert(addToSp == 0);
2001 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
2002 return dst;
2004 /* else fall through; will hit the irreducible: label */
2007 /* --------- LITERAL --------- */
2008 /* 32 literals */
2009 case Iex_Const: {
2010 UInt u = 0;
2011 HReg dst = newVRegI(env);
2012 switch (e->Iex.Const.con->tag) {
2013 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2014 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
2015 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
2016 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
2018 addInstr(env, ARMInstr_Imm32(dst, u));
2019 return dst;
2022 /* --------- MULTIPLEX --------- */
2023 case Iex_ITE: { // VFD
2024 /* ITE(ccexpr, iftrue, iffalse) */
2025 if (ty == Ity_I32) {
2026 ARMCondCode cc;
2027 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2028 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse);
2029 HReg dst = newVRegI(env);
2030 addInstr(env, mk_iMOVds_RR(dst, r1));
2031 cc = iselCondCode(env, e->Iex.ITE.cond);
2032 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
2033 return dst;
2035 break;
2038 default:
2039 break;
2040 } /* switch (e->tag) */
2042 /* We get here if no pattern matched. */
2043 irreducible:
2044 ppIRExpr(e);
2045 vpanic("iselIntExpr_R: cannot reduce tree");
2049 /* -------------------- 64-bit -------------------- */
2051 /* Compute a 64-bit value into a register pair, which is returned as
2052 the first two parameters. As with iselIntExpr_R, these may be
2053 either real or virtual regs; in any case they must not be changed
2054 by subsequent code emitted by the caller. */
2056 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2057 const IRExpr* e )
2059 iselInt64Expr_wrk(rHi, rLo, env, e);
2060 # if 0
2061 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2062 # endif
2063 vassert(hregClass(*rHi) == HRcInt32);
2064 vassert(hregIsVirtual(*rHi));
2065 vassert(hregClass(*rLo) == HRcInt32);
2066 vassert(hregIsVirtual(*rLo));
2069 /* DO NOT CALL THIS DIRECTLY ! */
2070 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2071 const IRExpr* e )
2073 vassert(e);
2074 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2076 /* 64-bit literal */
2077 if (e->tag == Iex_Const) {
2078 ULong w64 = e->Iex.Const.con->Ico.U64;
2079 UInt wHi = toUInt(w64 >> 32);
2080 UInt wLo = toUInt(w64);
2081 HReg tHi = newVRegI(env);
2082 HReg tLo = newVRegI(env);
2083 vassert(e->Iex.Const.con->tag == Ico_U64);
2084 addInstr(env, ARMInstr_Imm32(tHi, wHi));
2085 addInstr(env, ARMInstr_Imm32(tLo, wLo));
2086 *rHi = tHi;
2087 *rLo = tLo;
2088 return;
2091 /* read 64-bit IRTemp */
2092 if (e->tag == Iex_RdTmp) {
2093 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2094 HReg tHi = newVRegI(env);
2095 HReg tLo = newVRegI(env);
2096 HReg tmp = iselNeon64Expr(env, e);
2097 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2098 *rHi = tHi;
2099 *rLo = tLo;
2100 } else {
2101 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2103 return;
2106 /* 64-bit load */
2107 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2108 HReg tLo, tHi, rA;
2109 vassert(e->Iex.Load.ty == Ity_I64);
2110 rA = iselIntExpr_R(env, e->Iex.Load.addr);
2111 tHi = newVRegI(env);
2112 tLo = newVRegI(env);
2113 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2114 tHi, ARMAMode1_RI(rA, 4)));
2115 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2116 tLo, ARMAMode1_RI(rA, 0)));
2117 *rHi = tHi;
2118 *rLo = tLo;
2119 return;
2122 /* 64-bit GET */
2123 if (e->tag == Iex_Get) {
2124 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
2125 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
2126 HReg tHi = newVRegI(env);
2127 HReg tLo = newVRegI(env);
2128 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
2129 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
2130 *rHi = tHi;
2131 *rLo = tLo;
2132 return;
2135 /* --------- BINARY ops --------- */
2136 if (e->tag == Iex_Binop) {
2137 switch (e->Iex.Binop.op) {
2139 /* 32 x 32 -> 64 multiply */
2140 case Iop_MullS32:
2141 case Iop_MullU32: {
2142 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2143 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2144 HReg tHi = newVRegI(env);
2145 HReg tLo = newVRegI(env);
2146 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
2147 ? ARMmul_SX : ARMmul_ZX;
2148 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2149 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2150 addInstr(env, ARMInstr_Mul(mop));
2151 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2152 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2153 *rHi = tHi;
2154 *rLo = tLo;
2155 return;
2158 case Iop_Or64: {
2159 HReg xLo, xHi, yLo, yHi;
2160 HReg tHi = newVRegI(env);
2161 HReg tLo = newVRegI(env);
2162 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2163 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2164 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2165 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2166 *rHi = tHi;
2167 *rLo = tLo;
2168 return;
2171 case Iop_Add64: {
2172 HReg xLo, xHi, yLo, yHi;
2173 HReg tHi = newVRegI(env);
2174 HReg tLo = newVRegI(env);
2175 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2176 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2177 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2178 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
2179 *rHi = tHi;
2180 *rLo = tLo;
2181 return;
2184 /* 32HLto64(e1,e2) */
2185 case Iop_32HLto64: {
2186 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2187 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2188 return;
2191 default:
2192 break;
2196 /* --------- UNARY ops --------- */
2197 if (e->tag == Iex_Unop) {
2198 switch (e->Iex.Unop.op) {
2200 /* ReinterpF64asI64 */
2201 case Iop_ReinterpF64asI64: {
2202 HReg dstHi = newVRegI(env);
2203 HReg dstLo = newVRegI(env);
2204 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2205 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2206 *rHi = dstHi;
2207 *rLo = dstLo;
2208 return;
2211 /* Left64(e) */
2212 case Iop_Left64: {
2213 HReg yLo, yHi;
2214 HReg tHi = newVRegI(env);
2215 HReg tLo = newVRegI(env);
2216 HReg zero = newVRegI(env);
2217 /* yHi:yLo = arg */
2218 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2219 /* zero = 0 */
2220 addInstr(env, ARMInstr_Imm32(zero, 0));
2221 /* tLo = 0 - yLo, and set carry */
2222 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2223 tLo, zero, ARMRI84_R(yLo)));
2224 /* tHi = 0 - yHi - carry */
2225 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2226 tHi, zero, ARMRI84_R(yHi)));
2227 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2228 back in, so as to give the final result
2229 tHi:tLo = arg | -arg. */
2230 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2231 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2232 *rHi = tHi;
2233 *rLo = tLo;
2234 return;
2237 /* CmpwNEZ64(e) */
2238 case Iop_CmpwNEZ64: {
2239 HReg srcLo, srcHi;
2240 HReg tmp1 = newVRegI(env);
2241 HReg tmp2 = newVRegI(env);
2242 /* srcHi:srcLo = arg */
2243 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2244 /* tmp1 = srcHi | srcLo */
2245 addInstr(env, ARMInstr_Alu(ARMalu_OR,
2246 tmp1, srcHi, ARMRI84_R(srcLo)));
2247 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2248 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2249 addInstr(env, ARMInstr_Alu(ARMalu_OR,
2250 tmp2, tmp2, ARMRI84_R(tmp1)));
2251 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2252 tmp2, tmp2, ARMRI5_I5(31)));
2253 *rHi = tmp2;
2254 *rLo = tmp2;
2255 return;
2258 case Iop_1Sto64: {
2259 HReg dst = newVRegI(env);
2260 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2261 ARMRI5* amt = ARMRI5_I5(31);
2262 /* This is really rough. We could do much better here;
2263 perhaps mvn{cond} dst, #0 as the second insn?
2264 (same applies to 1Sto32) */
2265 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2266 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2267 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2268 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2269 *rHi = dst;
2270 *rLo = dst;
2271 return;
2274 default:
2275 break;
2277 } /* if (e->tag == Iex_Unop) */
2279 /* --------- MULTIPLEX --------- */
2280 if (e->tag == Iex_ITE) { // VFD
2281 IRType tyC;
2282 HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2283 ARMCondCode cc;
2284 tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2285 vassert(tyC == Ity_I1);
2286 iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2287 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2288 dstHi = newVRegI(env);
2289 dstLo = newVRegI(env);
2290 addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2291 addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2292 cc = iselCondCode(env, e->Iex.ITE.cond);
2293 addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2294 addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2295 *rHi = dstHi;
2296 *rLo = dstLo;
2297 return;
2300 /* It is convenient sometimes to call iselInt64Expr even when we
2301 have NEON support (e.g. in do_helper_call we need 64-bit
2302 arguments as 2 x 32 regs). */
2303 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2304 HReg tHi = newVRegI(env);
2305 HReg tLo = newVRegI(env);
2306 HReg tmp = iselNeon64Expr(env, e);
2307 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2308 *rHi = tHi;
2309 *rLo = tLo;
2310 return ;
2313 ppIRExpr(e);
2314 vpanic("iselInt64Expr");
2318 /*---------------------------------------------------------*/
2319 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2320 /*---------------------------------------------------------*/
2322 static HReg iselNeon64Expr ( ISelEnv* env, const IRExpr* e )
2324 HReg r;
2325 vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
2326 r = iselNeon64Expr_wrk( env, e );
2327 vassert(hregClass(r) == HRcFlt64);
2328 vassert(hregIsVirtual(r));
2329 return r;
2332 /* DO NOT CALL THIS DIRECTLY */
2333 static HReg iselNeon64Expr_wrk ( ISelEnv* env, const IRExpr* e )
2335 IRType ty = typeOfIRExpr(env->type_env, e);
2336 MatchInfo mi;
2337 vassert(e);
2338 vassert(ty == Ity_I64);
2340 if (e->tag == Iex_RdTmp) {
2341 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2344 if (e->tag == Iex_Const) {
2345 HReg rLo, rHi;
2346 HReg res = newVRegD(env);
2347 iselInt64Expr(&rHi, &rLo, env, e);
2348 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2349 return res;
2352 /* 64-bit load */
2353 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2354 HReg res = newVRegD(env);
2355 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2356 vassert(ty == Ity_I64);
2357 addInstr(env, ARMInstr_NLdStD(True, res, am));
2358 return res;
2361 /* 64-bit GET */
2362 if (e->tag == Iex_Get) {
2363 HReg addr = newVRegI(env);
2364 HReg res = newVRegD(env);
2365 vassert(ty == Ity_I64);
2366 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2367 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2368 return res;
2371 /* --------- BINARY ops --------- */
2372 if (e->tag == Iex_Binop) {
2373 switch (e->Iex.Binop.op) {
2375 /* 32 x 32 -> 64 multiply */
2376 case Iop_MullS32:
2377 case Iop_MullU32: {
2378 HReg rLo, rHi;
2379 HReg res = newVRegD(env);
2380 iselInt64Expr(&rHi, &rLo, env, e);
2381 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2382 return res;
2385 case Iop_And64: {
2386 HReg res = newVRegD(env);
2387 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2388 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2389 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2390 res, argL, argR, 4, False));
2391 return res;
2393 case Iop_Or64: {
2394 HReg res = newVRegD(env);
2395 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2396 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2397 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2398 res, argL, argR, 4, False));
2399 return res;
2401 case Iop_Xor64: {
2402 HReg res = newVRegD(env);
2403 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2404 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2405 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2406 res, argL, argR, 4, False));
2407 return res;
2410 /* 32HLto64(e1,e2) */
2411 case Iop_32HLto64: {
2412 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2413 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2414 HReg res = newVRegD(env);
2415 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2416 return res;
2419 case Iop_Add8x8:
2420 case Iop_Add16x4:
2421 case Iop_Add32x2:
2422 case Iop_Add64: {
2423 HReg res = newVRegD(env);
2424 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2425 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2426 UInt size;
2427 switch (e->Iex.Binop.op) {
2428 case Iop_Add8x8: size = 0; break;
2429 case Iop_Add16x4: size = 1; break;
2430 case Iop_Add32x2: size = 2; break;
2431 case Iop_Add64: size = 3; break;
2432 default: vassert(0);
2434 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2435 res, argL, argR, size, False));
2436 return res;
2438 case Iop_Add32Fx2: {
2439 HReg res = newVRegD(env);
2440 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2441 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2442 UInt size = 0;
2443 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2444 res, argL, argR, size, False));
2445 return res;
2447 case Iop_RecipStep32Fx2: {
2448 HReg res = newVRegD(env);
2449 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2450 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2451 UInt size = 0;
2452 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2453 res, argL, argR, size, False));
2454 return res;
2456 case Iop_RSqrtStep32Fx2: {
2457 HReg res = newVRegD(env);
2458 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2459 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2460 UInt size = 0;
2461 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2462 res, argL, argR, size, False));
2463 return res;
2466 // These 6 verified 18 Apr 2013
2467 case Iop_InterleaveHI32x2:
2468 case Iop_InterleaveLO32x2:
2469 case Iop_InterleaveOddLanes8x8:
2470 case Iop_InterleaveEvenLanes8x8:
2471 case Iop_InterleaveOddLanes16x4:
2472 case Iop_InterleaveEvenLanes16x4: {
2473 HReg rD = newVRegD(env);
2474 HReg rM = newVRegD(env);
2475 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2476 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2477 UInt size;
2478 Bool resRd; // is the result in rD or rM ?
2479 switch (e->Iex.Binop.op) {
2480 case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break;
2481 case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break;
2482 case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break;
2483 case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break;
2484 case Iop_InterleaveHI32x2: resRd = False; size = 2; break;
2485 case Iop_InterleaveLO32x2: resRd = True; size = 2; break;
2486 default: vassert(0);
2488 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2489 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2490 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2491 return resRd ? rD : rM;
2494 // These 4 verified 18 Apr 2013
2495 case Iop_InterleaveHI8x8:
2496 case Iop_InterleaveLO8x8:
2497 case Iop_InterleaveHI16x4:
2498 case Iop_InterleaveLO16x4: {
2499 HReg rD = newVRegD(env);
2500 HReg rM = newVRegD(env);
2501 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2502 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2503 UInt size;
2504 Bool resRd; // is the result in rD or rM ?
2505 switch (e->Iex.Binop.op) {
2506 case Iop_InterleaveHI8x8: resRd = False; size = 0; break;
2507 case Iop_InterleaveLO8x8: resRd = True; size = 0; break;
2508 case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2509 case Iop_InterleaveLO16x4: resRd = True; size = 1; break;
2510 default: vassert(0);
2512 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2513 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2514 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2515 return resRd ? rD : rM;
2518 // These 4 verified 18 Apr 2013
2519 case Iop_CatOddLanes8x8:
2520 case Iop_CatEvenLanes8x8:
2521 case Iop_CatOddLanes16x4:
2522 case Iop_CatEvenLanes16x4: {
2523 HReg rD = newVRegD(env);
2524 HReg rM = newVRegD(env);
2525 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2526 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2527 UInt size;
2528 Bool resRd; // is the result in rD or rM ?
2529 switch (e->Iex.Binop.op) {
2530 case Iop_CatOddLanes8x8: resRd = False; size = 0; break;
2531 case Iop_CatEvenLanes8x8: resRd = True; size = 0; break;
2532 case Iop_CatOddLanes16x4: resRd = False; size = 1; break;
2533 case Iop_CatEvenLanes16x4: resRd = True; size = 1; break;
2534 default: vassert(0);
2536 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2537 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2538 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2539 return resRd ? rD : rM;
2542 case Iop_QAdd8Ux8:
2543 case Iop_QAdd16Ux4:
2544 case Iop_QAdd32Ux2:
2545 case Iop_QAdd64Ux1: {
2546 HReg res = newVRegD(env);
2547 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2548 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2549 UInt size;
2550 switch (e->Iex.Binop.op) {
2551 case Iop_QAdd8Ux8: size = 0; break;
2552 case Iop_QAdd16Ux4: size = 1; break;
2553 case Iop_QAdd32Ux2: size = 2; break;
2554 case Iop_QAdd64Ux1: size = 3; break;
2555 default: vassert(0);
2557 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2558 res, argL, argR, size, False));
2559 return res;
2561 case Iop_QAdd8Sx8:
2562 case Iop_QAdd16Sx4:
2563 case Iop_QAdd32Sx2:
2564 case Iop_QAdd64Sx1: {
2565 HReg res = newVRegD(env);
2566 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2567 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2568 UInt size;
2569 switch (e->Iex.Binop.op) {
2570 case Iop_QAdd8Sx8: size = 0; break;
2571 case Iop_QAdd16Sx4: size = 1; break;
2572 case Iop_QAdd32Sx2: size = 2; break;
2573 case Iop_QAdd64Sx1: size = 3; break;
2574 default: vassert(0);
2576 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2577 res, argL, argR, size, False));
2578 return res;
2580 case Iop_Sub8x8:
2581 case Iop_Sub16x4:
2582 case Iop_Sub32x2:
2583 case Iop_Sub64: {
2584 HReg res = newVRegD(env);
2585 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2586 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2587 UInt size;
2588 switch (e->Iex.Binop.op) {
2589 case Iop_Sub8x8: size = 0; break;
2590 case Iop_Sub16x4: size = 1; break;
2591 case Iop_Sub32x2: size = 2; break;
2592 case Iop_Sub64: size = 3; break;
2593 default: vassert(0);
2595 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2596 res, argL, argR, size, False));
2597 return res;
2599 case Iop_Sub32Fx2: {
2600 HReg res = newVRegD(env);
2601 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2602 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2603 UInt size = 0;
2604 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2605 res, argL, argR, size, False));
2606 return res;
2608 case Iop_QSub8Ux8:
2609 case Iop_QSub16Ux4:
2610 case Iop_QSub32Ux2:
2611 case Iop_QSub64Ux1: {
2612 HReg res = newVRegD(env);
2613 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2614 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2615 UInt size;
2616 switch (e->Iex.Binop.op) {
2617 case Iop_QSub8Ux8: size = 0; break;
2618 case Iop_QSub16Ux4: size = 1; break;
2619 case Iop_QSub32Ux2: size = 2; break;
2620 case Iop_QSub64Ux1: size = 3; break;
2621 default: vassert(0);
2623 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2624 res, argL, argR, size, False));
2625 return res;
2627 case Iop_QSub8Sx8:
2628 case Iop_QSub16Sx4:
2629 case Iop_QSub32Sx2:
2630 case Iop_QSub64Sx1: {
2631 HReg res = newVRegD(env);
2632 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2633 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2634 UInt size;
2635 switch (e->Iex.Binop.op) {
2636 case Iop_QSub8Sx8: size = 0; break;
2637 case Iop_QSub16Sx4: size = 1; break;
2638 case Iop_QSub32Sx2: size = 2; break;
2639 case Iop_QSub64Sx1: size = 3; break;
2640 default: vassert(0);
2642 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2643 res, argL, argR, size, False));
2644 return res;
2646 case Iop_Max8Ux8:
2647 case Iop_Max16Ux4:
2648 case Iop_Max32Ux2: {
2649 HReg res = newVRegD(env);
2650 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2651 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2652 UInt size;
2653 switch (e->Iex.Binop.op) {
2654 case Iop_Max8Ux8: size = 0; break;
2655 case Iop_Max16Ux4: size = 1; break;
2656 case Iop_Max32Ux2: size = 2; break;
2657 default: vassert(0);
2659 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2660 res, argL, argR, size, False));
2661 return res;
2663 case Iop_Max8Sx8:
2664 case Iop_Max16Sx4:
2665 case Iop_Max32Sx2: {
2666 HReg res = newVRegD(env);
2667 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2668 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2669 UInt size;
2670 switch (e->Iex.Binop.op) {
2671 case Iop_Max8Sx8: size = 0; break;
2672 case Iop_Max16Sx4: size = 1; break;
2673 case Iop_Max32Sx2: size = 2; break;
2674 default: vassert(0);
2676 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2677 res, argL, argR, size, False));
2678 return res;
2680 case Iop_Min8Ux8:
2681 case Iop_Min16Ux4:
2682 case Iop_Min32Ux2: {
2683 HReg res = newVRegD(env);
2684 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2685 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2686 UInt size;
2687 switch (e->Iex.Binop.op) {
2688 case Iop_Min8Ux8: size = 0; break;
2689 case Iop_Min16Ux4: size = 1; break;
2690 case Iop_Min32Ux2: size = 2; break;
2691 default: vassert(0);
2693 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2694 res, argL, argR, size, False));
2695 return res;
2697 case Iop_Min8Sx8:
2698 case Iop_Min16Sx4:
2699 case Iop_Min32Sx2: {
2700 HReg res = newVRegD(env);
2701 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2702 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2703 UInt size;
2704 switch (e->Iex.Binop.op) {
2705 case Iop_Min8Sx8: size = 0; break;
2706 case Iop_Min16Sx4: size = 1; break;
2707 case Iop_Min32Sx2: size = 2; break;
2708 default: vassert(0);
2710 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2711 res, argL, argR, size, False));
2712 return res;
2714 case Iop_Sar8x8:
2715 case Iop_Sar16x4:
2716 case Iop_Sar32x2: {
2717 HReg res = newVRegD(env);
2718 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2719 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2720 HReg argR2 = newVRegD(env);
2721 HReg zero = newVRegD(env);
2722 UInt size;
2723 switch (e->Iex.Binop.op) {
2724 case Iop_Sar8x8: size = 0; break;
2725 case Iop_Sar16x4: size = 1; break;
2726 case Iop_Sar32x2: size = 2; break;
2727 case Iop_Sar64: size = 3; break;
2728 default: vassert(0);
2730 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2731 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2732 argR2, zero, argR, size, False));
2733 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2734 res, argL, argR2, size, False));
2735 return res;
2737 case Iop_Sal8x8:
2738 case Iop_Sal16x4:
2739 case Iop_Sal32x2:
2740 case Iop_Sal64x1: {
2741 HReg res = newVRegD(env);
2742 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2743 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2744 UInt size;
2745 switch (e->Iex.Binop.op) {
2746 case Iop_Sal8x8: size = 0; break;
2747 case Iop_Sal16x4: size = 1; break;
2748 case Iop_Sal32x2: size = 2; break;
2749 case Iop_Sal64x1: size = 3; break;
2750 default: vassert(0);
2752 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2753 res, argL, argR, size, False));
2754 return res;
2756 case Iop_Shr8x8:
2757 case Iop_Shr16x4:
2758 case Iop_Shr32x2: {
2759 HReg res = newVRegD(env);
2760 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2761 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2762 HReg argR2 = newVRegD(env);
2763 HReg zero = newVRegD(env);
2764 UInt size;
2765 switch (e->Iex.Binop.op) {
2766 case Iop_Shr8x8: size = 0; break;
2767 case Iop_Shr16x4: size = 1; break;
2768 case Iop_Shr32x2: size = 2; break;
2769 default: vassert(0);
2771 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2772 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2773 argR2, zero, argR, size, False));
2774 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2775 res, argL, argR2, size, False));
2776 return res;
2778 case Iop_Shl8x8:
2779 case Iop_Shl16x4:
2780 case Iop_Shl32x2: {
2781 HReg res = newVRegD(env);
2782 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784 UInt size;
2785 switch (e->Iex.Binop.op) {
2786 case Iop_Shl8x8: size = 0; break;
2787 case Iop_Shl16x4: size = 1; break;
2788 case Iop_Shl32x2: size = 2; break;
2789 default: vassert(0);
2791 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2792 res, argL, argR, size, False));
2793 return res;
2795 case Iop_QShl8x8:
2796 case Iop_QShl16x4:
2797 case Iop_QShl32x2:
2798 case Iop_QShl64x1: {
2799 HReg res = newVRegD(env);
2800 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2801 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2802 UInt size;
2803 switch (e->Iex.Binop.op) {
2804 case Iop_QShl8x8: size = 0; break;
2805 case Iop_QShl16x4: size = 1; break;
2806 case Iop_QShl32x2: size = 2; break;
2807 case Iop_QShl64x1: size = 3; break;
2808 default: vassert(0);
2810 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2811 res, argL, argR, size, False));
2812 return res;
2814 case Iop_QSal8x8:
2815 case Iop_QSal16x4:
2816 case Iop_QSal32x2:
2817 case Iop_QSal64x1: {
2818 HReg res = newVRegD(env);
2819 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2820 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2821 UInt size;
2822 switch (e->Iex.Binop.op) {
2823 case Iop_QSal8x8: size = 0; break;
2824 case Iop_QSal16x4: size = 1; break;
2825 case Iop_QSal32x2: size = 2; break;
2826 case Iop_QSal64x1: size = 3; break;
2827 default: vassert(0);
2829 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2830 res, argL, argR, size, False));
2831 return res;
2833 case Iop_QShlNsatUU8x8:
2834 case Iop_QShlNsatUU16x4:
2835 case Iop_QShlNsatUU32x2:
2836 case Iop_QShlNsatUU64x1: {
2837 HReg res = newVRegD(env);
2838 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2839 UInt size, imm;
2840 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2841 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2842 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
2843 "second argument only\n");
2845 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2846 switch (e->Iex.Binop.op) {
2847 case Iop_QShlNsatUU8x8: size = 8 | imm; break;
2848 case Iop_QShlNsatUU16x4: size = 16 | imm; break;
2849 case Iop_QShlNsatUU32x2: size = 32 | imm; break;
2850 case Iop_QShlNsatUU64x1: size = 64 | imm; break;
2851 default: vassert(0);
2853 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2854 res, argL, size, False));
2855 return res;
2857 case Iop_QShlNsatSU8x8:
2858 case Iop_QShlNsatSU16x4:
2859 case Iop_QShlNsatSU32x2:
2860 case Iop_QShlNsatSU64x1: {
2861 HReg res = newVRegD(env);
2862 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2863 UInt size, imm;
2864 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2865 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2866 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
2867 "second argument only\n");
2869 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2870 switch (e->Iex.Binop.op) {
2871 case Iop_QShlNsatSU8x8: size = 8 | imm; break;
2872 case Iop_QShlNsatSU16x4: size = 16 | imm; break;
2873 case Iop_QShlNsatSU32x2: size = 32 | imm; break;
2874 case Iop_QShlNsatSU64x1: size = 64 | imm; break;
2875 default: vassert(0);
2877 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2878 res, argL, size, False));
2879 return res;
2881 case Iop_QShlNsatSS8x8:
2882 case Iop_QShlNsatSS16x4:
2883 case Iop_QShlNsatSS32x2:
2884 case Iop_QShlNsatSS64x1: {
2885 HReg res = newVRegD(env);
2886 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2887 UInt size, imm;
2888 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2889 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2890 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
2891 "second argument only\n");
2893 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2894 switch (e->Iex.Binop.op) {
2895 case Iop_QShlNsatSS8x8: size = 8 | imm; break;
2896 case Iop_QShlNsatSS16x4: size = 16 | imm; break;
2897 case Iop_QShlNsatSS32x2: size = 32 | imm; break;
2898 case Iop_QShlNsatSS64x1: size = 64 | imm; break;
2899 default: vassert(0);
2901 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2902 res, argL, size, False));
2903 return res;
2905 case Iop_ShrN8x8:
2906 case Iop_ShrN16x4:
2907 case Iop_ShrN32x2:
2908 case Iop_Shr64: {
2909 HReg res = newVRegD(env);
2910 HReg tmp = newVRegD(env);
2911 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2912 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2913 HReg argR2 = newVRegI(env);
2914 UInt size;
2915 switch (e->Iex.Binop.op) {
2916 case Iop_ShrN8x8: size = 0; break;
2917 case Iop_ShrN16x4: size = 1; break;
2918 case Iop_ShrN32x2: size = 2; break;
2919 case Iop_Shr64: size = 3; break;
2920 default: vassert(0);
2922 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2923 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2924 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2925 res, argL, tmp, size, False));
2926 return res;
2928 case Iop_ShlN8x8:
2929 case Iop_ShlN16x4:
2930 case Iop_ShlN32x2:
2931 case Iop_Shl64: {
2932 HReg res = newVRegD(env);
2933 HReg tmp = newVRegD(env);
2934 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2935 /* special-case Shl64(x, imm8) since the Neon front
2936 end produces a lot of those for V{LD,ST}{1,2,3,4}. */
2937 if (e->Iex.Binop.op == Iop_Shl64
2938 && e->Iex.Binop.arg2->tag == Iex_Const) {
2939 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
2940 Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2941 if (nshift >= 1 && nshift <= 63) {
2942 addInstr(env, ARMInstr_NShl64(res, argL, nshift));
2943 return res;
2945 /* else fall through to general case */
2947 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2948 UInt size;
2949 switch (e->Iex.Binop.op) {
2950 case Iop_ShlN8x8: size = 0; break;
2951 case Iop_ShlN16x4: size = 1; break;
2952 case Iop_ShlN32x2: size = 2; break;
2953 case Iop_Shl64: size = 3; break;
2954 default: vassert(0);
2956 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
2957 tmp, argR, 0, False));
2958 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2959 res, argL, tmp, size, False));
2960 return res;
2962 case Iop_SarN8x8:
2963 case Iop_SarN16x4:
2964 case Iop_SarN32x2:
2965 case Iop_Sar64: {
2966 HReg res = newVRegD(env);
2967 HReg tmp = newVRegD(env);
2968 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2969 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2970 HReg argR2 = newVRegI(env);
2971 UInt size;
2972 switch (e->Iex.Binop.op) {
2973 case Iop_SarN8x8: size = 0; break;
2974 case Iop_SarN16x4: size = 1; break;
2975 case Iop_SarN32x2: size = 2; break;
2976 case Iop_Sar64: size = 3; break;
2977 default: vassert(0);
2979 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2980 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2981 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2982 res, argL, tmp, size, False));
2983 return res;
2985 case Iop_CmpGT8Ux8:
2986 case Iop_CmpGT16Ux4:
2987 case Iop_CmpGT32Ux2: {
2988 HReg res = newVRegD(env);
2989 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2990 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2991 UInt size;
2992 switch (e->Iex.Binop.op) {
2993 case Iop_CmpGT8Ux8: size = 0; break;
2994 case Iop_CmpGT16Ux4: size = 1; break;
2995 case Iop_CmpGT32Ux2: size = 2; break;
2996 default: vassert(0);
2998 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2999 res, argL, argR, size, False));
3000 return res;
3002 case Iop_CmpGT8Sx8:
3003 case Iop_CmpGT16Sx4:
3004 case Iop_CmpGT32Sx2: {
3005 HReg res = newVRegD(env);
3006 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3007 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3008 UInt size;
3009 switch (e->Iex.Binop.op) {
3010 case Iop_CmpGT8Sx8: size = 0; break;
3011 case Iop_CmpGT16Sx4: size = 1; break;
3012 case Iop_CmpGT32Sx2: size = 2; break;
3013 default: vassert(0);
3015 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
3016 res, argL, argR, size, False));
3017 return res;
3019 case Iop_CmpEQ8x8:
3020 case Iop_CmpEQ16x4:
3021 case Iop_CmpEQ32x2: {
3022 HReg res = newVRegD(env);
3023 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3024 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3025 UInt size;
3026 switch (e->Iex.Binop.op) {
3027 case Iop_CmpEQ8x8: size = 0; break;
3028 case Iop_CmpEQ16x4: size = 1; break;
3029 case Iop_CmpEQ32x2: size = 2; break;
3030 default: vassert(0);
3032 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
3033 res, argL, argR, size, False));
3034 return res;
3036 case Iop_Mul8x8:
3037 case Iop_Mul16x4:
3038 case Iop_Mul32x2: {
3039 HReg res = newVRegD(env);
3040 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3041 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3042 UInt size = 0;
3043 switch(e->Iex.Binop.op) {
3044 case Iop_Mul8x8: size = 0; break;
3045 case Iop_Mul16x4: size = 1; break;
3046 case Iop_Mul32x2: size = 2; break;
3047 default: vassert(0);
3049 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
3050 res, argL, argR, size, False));
3051 return res;
3053 case Iop_Mul32Fx2: {
3054 HReg res = newVRegD(env);
3055 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3056 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3057 UInt size = 0;
3058 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
3059 res, argL, argR, size, False));
3060 return res;
3062 case Iop_QDMulHi16Sx4:
3063 case Iop_QDMulHi32Sx2: {
3064 HReg res = newVRegD(env);
3065 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3066 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3067 UInt size = 0;
3068 switch(e->Iex.Binop.op) {
3069 case Iop_QDMulHi16Sx4: size = 1; break;
3070 case Iop_QDMulHi32Sx2: size = 2; break;
3071 default: vassert(0);
3073 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
3074 res, argL, argR, size, False));
3075 return res;
3078 case Iop_QRDMulHi16Sx4:
3079 case Iop_QRDMulHi32Sx2: {
3080 HReg res = newVRegD(env);
3081 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3082 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3083 UInt size = 0;
3084 switch(e->Iex.Binop.op) {
3085 case Iop_QRDMulHi16Sx4: size = 1; break;
3086 case Iop_QRDMulHi32Sx2: size = 2; break;
3087 default: vassert(0);
3089 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
3090 res, argL, argR, size, False));
3091 return res;
3094 case Iop_PwAdd8x8:
3095 case Iop_PwAdd16x4:
3096 case Iop_PwAdd32x2: {
3097 HReg res = newVRegD(env);
3098 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3099 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3100 UInt size = 0;
3101 switch(e->Iex.Binop.op) {
3102 case Iop_PwAdd8x8: size = 0; break;
3103 case Iop_PwAdd16x4: size = 1; break;
3104 case Iop_PwAdd32x2: size = 2; break;
3105 default: vassert(0);
3107 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
3108 res, argL, argR, size, False));
3109 return res;
3111 case Iop_PwAdd32Fx2: {
3112 HReg res = newVRegD(env);
3113 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3114 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3115 UInt size = 0;
3116 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
3117 res, argL, argR, size, False));
3118 return res;
3120 case Iop_PwMin8Ux8:
3121 case Iop_PwMin16Ux4:
3122 case Iop_PwMin32Ux2: {
3123 HReg res = newVRegD(env);
3124 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3125 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3126 UInt size = 0;
3127 switch(e->Iex.Binop.op) {
3128 case Iop_PwMin8Ux8: size = 0; break;
3129 case Iop_PwMin16Ux4: size = 1; break;
3130 case Iop_PwMin32Ux2: size = 2; break;
3131 default: vassert(0);
3133 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
3134 res, argL, argR, size, False));
3135 return res;
3137 case Iop_PwMin8Sx8:
3138 case Iop_PwMin16Sx4:
3139 case Iop_PwMin32Sx2: {
3140 HReg res = newVRegD(env);
3141 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3142 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3143 UInt size = 0;
3144 switch(e->Iex.Binop.op) {
3145 case Iop_PwMin8Sx8: size = 0; break;
3146 case Iop_PwMin16Sx4: size = 1; break;
3147 case Iop_PwMin32Sx2: size = 2; break;
3148 default: vassert(0);
3150 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3151 res, argL, argR, size, False));
3152 return res;
3154 case Iop_PwMax8Ux8:
3155 case Iop_PwMax16Ux4:
3156 case Iop_PwMax32Ux2: {
3157 HReg res = newVRegD(env);
3158 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3159 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3160 UInt size = 0;
3161 switch(e->Iex.Binop.op) {
3162 case Iop_PwMax8Ux8: size = 0; break;
3163 case Iop_PwMax16Ux4: size = 1; break;
3164 case Iop_PwMax32Ux2: size = 2; break;
3165 default: vassert(0);
3167 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3168 res, argL, argR, size, False));
3169 return res;
3171 case Iop_PwMax8Sx8:
3172 case Iop_PwMax16Sx4:
3173 case Iop_PwMax32Sx2: {
3174 HReg res = newVRegD(env);
3175 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3176 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3177 UInt size = 0;
3178 switch(e->Iex.Binop.op) {
3179 case Iop_PwMax8Sx8: size = 0; break;
3180 case Iop_PwMax16Sx4: size = 1; break;
3181 case Iop_PwMax32Sx2: size = 2; break;
3182 default: vassert(0);
3184 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3185 res, argL, argR, size, False));
3186 return res;
3188 case Iop_Perm8x8: {
3189 HReg res = newVRegD(env);
3190 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3191 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3192 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3193 res, argL, argR, 0, False));
3194 return res;
3196 case Iop_PolynomialMul8x8: {
3197 HReg res = newVRegD(env);
3198 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3199 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3200 UInt size = 0;
3201 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3202 res, argL, argR, size, False));
3203 return res;
3205 case Iop_Max32Fx2: {
3206 HReg res = newVRegD(env);
3207 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3208 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3209 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3210 res, argL, argR, 2, False));
3211 return res;
3213 case Iop_Min32Fx2: {
3214 HReg res = newVRegD(env);
3215 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3216 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3217 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3218 res, argL, argR, 2, False));
3219 return res;
3221 case Iop_PwMax32Fx2: {
3222 HReg res = newVRegD(env);
3223 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3224 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3225 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3226 res, argL, argR, 2, False));
3227 return res;
3229 case Iop_PwMin32Fx2: {
3230 HReg res = newVRegD(env);
3231 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3232 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3233 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3234 res, argL, argR, 2, False));
3235 return res;
3237 case Iop_CmpGT32Fx2: {
3238 HReg res = newVRegD(env);
3239 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3240 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3241 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3242 res, argL, argR, 2, False));
3243 return res;
3245 case Iop_CmpGE32Fx2: {
3246 HReg res = newVRegD(env);
3247 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3248 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3249 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3250 res, argL, argR, 2, False));
3251 return res;
3253 case Iop_CmpEQ32Fx2: {
3254 HReg res = newVRegD(env);
3255 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3256 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3257 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3258 res, argL, argR, 2, False));
3259 return res;
3261 case Iop_F32ToFixed32Ux2_RZ:
3262 case Iop_F32ToFixed32Sx2_RZ:
3263 case Iop_Fixed32UToF32x2_RN:
3264 case Iop_Fixed32SToF32x2_RN: {
3265 HReg res = newVRegD(env);
3266 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3267 ARMNeonUnOp op;
3268 UInt imm6;
3269 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3270 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3271 vpanic("ARM supports FP <-> Fixed conversion with constant "
3272 "second argument less than 33 only\n");
3274 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3275 vassert(imm6 <= 32 && imm6 > 0);
3276 imm6 = 64 - imm6;
3277 switch(e->Iex.Binop.op) {
3278 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3279 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3280 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3281 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3282 default: vassert(0);
3284 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3285 return res;
3288 FIXME: is this here or not?
3289 case Iop_VDup8x8:
3290 case Iop_VDup16x4:
3291 case Iop_VDup32x2: {
3292 HReg res = newVRegD(env);
3293 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3294 UInt index;
3295 UInt imm4;
3296 UInt size = 0;
3297 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3298 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3299 vpanic("ARM supports Iop_VDup with constant "
3300 "second argument less than 16 only\n");
3302 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3303 switch(e->Iex.Binop.op) {
3304 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3305 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3306 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3307 default: vassert(0);
3309 if (imm4 >= 16) {
3310 vpanic("ARM supports Iop_VDup with constant "
3311 "second argument less than 16 only\n");
3313 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3314 res, argL, imm4, False));
3315 return res;
3318 default:
3319 break;
3323 /* --------- UNARY ops --------- */
3324 if (e->tag == Iex_Unop) {
3325 switch (e->Iex.Unop.op) {
3327 /* 32Uto64 */
3328 case Iop_32Uto64: {
3329 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3330 HReg rHi = newVRegI(env);
3331 HReg res = newVRegD(env);
3332 addInstr(env, ARMInstr_Imm32(rHi, 0));
3333 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3334 return res;
3337 /* 32Sto64 */
3338 case Iop_32Sto64: {
3339 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3340 HReg rHi = newVRegI(env);
3341 addInstr(env, mk_iMOVds_RR(rHi, rLo));
3342 addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3343 HReg res = newVRegD(env);
3344 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3345 return res;
3348 /* The next 3 are pass-throughs */
3349 /* ReinterpF64asI64 */
3350 case Iop_ReinterpF64asI64:
3351 /* Left64(e) */
3352 case Iop_Left64:
3353 /* CmpwNEZ64(e) */
3354 case Iop_1Sto64: {
3355 HReg rLo, rHi;
3356 HReg res = newVRegD(env);
3357 iselInt64Expr(&rHi, &rLo, env, e);
3358 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3359 return res;
3362 case Iop_Not64: {
3363 DECLARE_PATTERN(p_veqz_8x8);
3364 DECLARE_PATTERN(p_veqz_16x4);
3365 DECLARE_PATTERN(p_veqz_32x2);
3366 DECLARE_PATTERN(p_vcge_8sx8);
3367 DECLARE_PATTERN(p_vcge_16sx4);
3368 DECLARE_PATTERN(p_vcge_32sx2);
3369 DECLARE_PATTERN(p_vcge_8ux8);
3370 DECLARE_PATTERN(p_vcge_16ux4);
3371 DECLARE_PATTERN(p_vcge_32ux2);
3372 DEFINE_PATTERN(p_veqz_8x8,
3373 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3374 DEFINE_PATTERN(p_veqz_16x4,
3375 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3376 DEFINE_PATTERN(p_veqz_32x2,
3377 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3378 DEFINE_PATTERN(p_vcge_8sx8,
3379 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3380 DEFINE_PATTERN(p_vcge_16sx4,
3381 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3382 DEFINE_PATTERN(p_vcge_32sx2,
3383 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3384 DEFINE_PATTERN(p_vcge_8ux8,
3385 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3386 DEFINE_PATTERN(p_vcge_16ux4,
3387 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3388 DEFINE_PATTERN(p_vcge_32ux2,
3389 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3390 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3391 HReg res = newVRegD(env);
3392 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3393 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3394 return res;
3395 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3396 HReg res = newVRegD(env);
3397 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3398 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3399 return res;
3400 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3401 HReg res = newVRegD(env);
3402 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3403 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3404 return res;
3405 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3406 HReg res = newVRegD(env);
3407 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3408 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3409 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3410 res, argL, argR, 0, False));
3411 return res;
3412 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3413 HReg res = newVRegD(env);
3414 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3415 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3416 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3417 res, argL, argR, 1, False));
3418 return res;
3419 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3420 HReg res = newVRegD(env);
3421 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3422 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3423 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3424 res, argL, argR, 2, False));
3425 return res;
3426 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3427 HReg res = newVRegD(env);
3428 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3429 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3430 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3431 res, argL, argR, 0, False));
3432 return res;
3433 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3434 HReg res = newVRegD(env);
3435 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3436 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3437 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3438 res, argL, argR, 1, False));
3439 return res;
3440 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3441 HReg res = newVRegD(env);
3442 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3443 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3444 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3445 res, argL, argR, 2, False));
3446 return res;
3447 } else {
3448 HReg res = newVRegD(env);
3449 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3450 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3451 return res;
3454 case Iop_Dup8x8:
3455 case Iop_Dup16x4:
3456 case Iop_Dup32x2: {
3457 HReg res, arg;
3458 UInt size;
3459 DECLARE_PATTERN(p_vdup_8x8);
3460 DECLARE_PATTERN(p_vdup_16x4);
3461 DECLARE_PATTERN(p_vdup_32x2);
3462 DEFINE_PATTERN(p_vdup_8x8,
3463 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3464 DEFINE_PATTERN(p_vdup_16x4,
3465 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3466 DEFINE_PATTERN(p_vdup_32x2,
3467 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3468 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3469 UInt index;
3470 UInt imm4;
3471 if (mi.bindee[1]->tag == Iex_Const &&
3472 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3473 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3474 imm4 = (index << 1) + 1;
3475 if (index < 8) {
3476 res = newVRegD(env);
3477 arg = iselNeon64Expr(env, mi.bindee[0]);
3478 addInstr(env, ARMInstr_NUnaryS(
3479 ARMneon_VDUP,
3480 mkARMNRS(ARMNRS_Reg, res, 0),
3481 mkARMNRS(ARMNRS_Scalar, arg, index),
3482 imm4, False
3484 return res;
3487 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3488 UInt index;
3489 UInt imm4;
3490 if (mi.bindee[1]->tag == Iex_Const &&
3491 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3492 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3493 imm4 = (index << 2) + 2;
3494 if (index < 4) {
3495 res = newVRegD(env);
3496 arg = iselNeon64Expr(env, mi.bindee[0]);
3497 addInstr(env, ARMInstr_NUnaryS(
3498 ARMneon_VDUP,
3499 mkARMNRS(ARMNRS_Reg, res, 0),
3500 mkARMNRS(ARMNRS_Scalar, arg, index),
3501 imm4, False
3503 return res;
3506 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3507 UInt index;
3508 UInt imm4;
3509 if (mi.bindee[1]->tag == Iex_Const &&
3510 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3511 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3512 imm4 = (index << 3) + 4;
3513 if (index < 2) {
3514 res = newVRegD(env);
3515 arg = iselNeon64Expr(env, mi.bindee[0]);
3516 addInstr(env, ARMInstr_NUnaryS(
3517 ARMneon_VDUP,
3518 mkARMNRS(ARMNRS_Reg, res, 0),
3519 mkARMNRS(ARMNRS_Scalar, arg, index),
3520 imm4, False
3522 return res;
3526 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3527 res = newVRegD(env);
3528 switch (e->Iex.Unop.op) {
3529 case Iop_Dup8x8: size = 0; break;
3530 case Iop_Dup16x4: size = 1; break;
3531 case Iop_Dup32x2: size = 2; break;
3532 default: vassert(0);
3534 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3535 return res;
3537 case Iop_Abs8x8:
3538 case Iop_Abs16x4:
3539 case Iop_Abs32x2: {
3540 HReg res = newVRegD(env);
3541 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3542 UInt size = 0;
3543 switch(e->Iex.Binop.op) {
3544 case Iop_Abs8x8: size = 0; break;
3545 case Iop_Abs16x4: size = 1; break;
3546 case Iop_Abs32x2: size = 2; break;
3547 default: vassert(0);
3549 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3550 return res;
3552 case Iop_Reverse8sIn64_x1:
3553 case Iop_Reverse16sIn64_x1:
3554 case Iop_Reverse32sIn64_x1: {
3555 HReg res = newVRegD(env);
3556 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3557 UInt size = 0;
3558 switch(e->Iex.Binop.op) {
3559 case Iop_Reverse8sIn64_x1: size = 0; break;
3560 case Iop_Reverse16sIn64_x1: size = 1; break;
3561 case Iop_Reverse32sIn64_x1: size = 2; break;
3562 default: vassert(0);
3564 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3565 res, arg, size, False));
3566 return res;
3568 case Iop_Reverse8sIn32_x2:
3569 case Iop_Reverse16sIn32_x2: {
3570 HReg res = newVRegD(env);
3571 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3572 UInt size = 0;
3573 switch(e->Iex.Binop.op) {
3574 case Iop_Reverse8sIn32_x2: size = 0; break;
3575 case Iop_Reverse16sIn32_x2: size = 1; break;
3576 default: vassert(0);
3578 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3579 res, arg, size, False));
3580 return res;
3582 case Iop_Reverse8sIn16_x4: {
3583 HReg res = newVRegD(env);
3584 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3585 UInt size = 0;
3586 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3587 res, arg, size, False));
3588 return res;
3590 case Iop_CmpwNEZ64: {
3591 HReg x_lsh = newVRegD(env);
3592 HReg x_rsh = newVRegD(env);
3593 HReg lsh_amt = newVRegD(env);
3594 HReg rsh_amt = newVRegD(env);
3595 HReg zero = newVRegD(env);
3596 HReg tmp = newVRegD(env);
3597 HReg tmp2 = newVRegD(env);
3598 HReg res = newVRegD(env);
3599 HReg x = newVRegD(env);
3600 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3601 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3602 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3603 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3604 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3605 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3606 rsh_amt, zero, lsh_amt, 2, False));
3607 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3608 x_lsh, x, lsh_amt, 3, False));
3609 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3610 x_rsh, x, rsh_amt, 3, False));
3611 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3612 tmp, x_lsh, x_rsh, 0, False));
3613 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3614 res, tmp, x, 0, False));
3615 return res;
3617 case Iop_CmpNEZ8x8:
3618 case Iop_CmpNEZ16x4:
3619 case Iop_CmpNEZ32x2: {
3620 HReg res = newVRegD(env);
3621 HReg tmp = newVRegD(env);
3622 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3623 UInt size;
3624 switch (e->Iex.Unop.op) {
3625 case Iop_CmpNEZ8x8: size = 0; break;
3626 case Iop_CmpNEZ16x4: size = 1; break;
3627 case Iop_CmpNEZ32x2: size = 2; break;
3628 default: vassert(0);
3630 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3631 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3632 return res;
3634 case Iop_NarrowUn16to8x8:
3635 case Iop_NarrowUn32to16x4:
3636 case Iop_NarrowUn64to32x2: {
3637 HReg res = newVRegD(env);
3638 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3639 UInt size = 0;
3640 switch(e->Iex.Binop.op) {
3641 case Iop_NarrowUn16to8x8: size = 0; break;
3642 case Iop_NarrowUn32to16x4: size = 1; break;
3643 case Iop_NarrowUn64to32x2: size = 2; break;
3644 default: vassert(0);
3646 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3647 res, arg, size, False));
3648 return res;
3650 case Iop_QNarrowUn16Sto8Sx8:
3651 case Iop_QNarrowUn32Sto16Sx4:
3652 case Iop_QNarrowUn64Sto32Sx2: {
3653 HReg res = newVRegD(env);
3654 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3655 UInt size = 0;
3656 switch(e->Iex.Binop.op) {
3657 case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3658 case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3659 case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3660 default: vassert(0);
3662 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3663 res, arg, size, False));
3664 return res;
3666 case Iop_QNarrowUn16Sto8Ux8:
3667 case Iop_QNarrowUn32Sto16Ux4:
3668 case Iop_QNarrowUn64Sto32Ux2: {
3669 HReg res = newVRegD(env);
3670 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3671 UInt size = 0;
3672 switch(e->Iex.Binop.op) {
3673 case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
3674 case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3675 case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3676 default: vassert(0);
3678 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3679 res, arg, size, False));
3680 return res;
3682 case Iop_QNarrowUn16Uto8Ux8:
3683 case Iop_QNarrowUn32Uto16Ux4:
3684 case Iop_QNarrowUn64Uto32Ux2: {
3685 HReg res = newVRegD(env);
3686 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3687 UInt size = 0;
3688 switch(e->Iex.Binop.op) {
3689 case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
3690 case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3691 case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3692 default: vassert(0);
3694 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3695 res, arg, size, False));
3696 return res;
3698 case Iop_PwAddL8Sx8:
3699 case Iop_PwAddL16Sx4:
3700 case Iop_PwAddL32Sx2: {
3701 HReg res = newVRegD(env);
3702 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3703 UInt size = 0;
3704 switch(e->Iex.Binop.op) {
3705 case Iop_PwAddL8Sx8: size = 0; break;
3706 case Iop_PwAddL16Sx4: size = 1; break;
3707 case Iop_PwAddL32Sx2: size = 2; break;
3708 default: vassert(0);
3710 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3711 res, arg, size, False));
3712 return res;
3714 case Iop_PwAddL8Ux8:
3715 case Iop_PwAddL16Ux4:
3716 case Iop_PwAddL32Ux2: {
3717 HReg res = newVRegD(env);
3718 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3719 UInt size = 0;
3720 switch(e->Iex.Binop.op) {
3721 case Iop_PwAddL8Ux8: size = 0; break;
3722 case Iop_PwAddL16Ux4: size = 1; break;
3723 case Iop_PwAddL32Ux2: size = 2; break;
3724 default: vassert(0);
3726 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3727 res, arg, size, False));
3728 return res;
3730 case Iop_Cnt8x8: {
3731 HReg res = newVRegD(env);
3732 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3733 UInt size = 0;
3734 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3735 res, arg, size, False));
3736 return res;
3738 case Iop_Clz8x8:
3739 case Iop_Clz16x4:
3740 case Iop_Clz32x2: {
3741 HReg res = newVRegD(env);
3742 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3743 UInt size = 0;
3744 switch(e->Iex.Binop.op) {
3745 case Iop_Clz8x8: size = 0; break;
3746 case Iop_Clz16x4: size = 1; break;
3747 case Iop_Clz32x2: size = 2; break;
3748 default: vassert(0);
3750 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3751 res, arg, size, False));
3752 return res;
3754 case Iop_Cls8x8:
3755 case Iop_Cls16x4:
3756 case Iop_Cls32x2: {
3757 HReg res = newVRegD(env);
3758 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3759 UInt size = 0;
3760 switch(e->Iex.Binop.op) {
3761 case Iop_Cls8x8: size = 0; break;
3762 case Iop_Cls16x4: size = 1; break;
3763 case Iop_Cls32x2: size = 2; break;
3764 default: vassert(0);
3766 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3767 res, arg, size, False));
3768 return res;
3770 case Iop_FtoI32Sx2_RZ: {
3771 HReg res = newVRegD(env);
3772 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3773 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3774 res, arg, 2, False));
3775 return res;
3777 case Iop_FtoI32Ux2_RZ: {
3778 HReg res = newVRegD(env);
3779 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3780 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3781 res, arg, 2, False));
3782 return res;
3784 case Iop_I32StoFx2: {
3785 HReg res = newVRegD(env);
3786 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3787 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3788 res, arg, 2, False));
3789 return res;
3791 case Iop_I32UtoFx2: {
3792 HReg res = newVRegD(env);
3793 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3794 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3795 res, arg, 2, False));
3796 return res;
3798 case Iop_F32toF16x4: {
3799 HReg res = newVRegD(env);
3800 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3801 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3802 res, arg, 2, False));
3803 return res;
3805 case Iop_RecipEst32Fx2: {
3806 HReg res = newVRegD(env);
3807 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3808 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3809 res, argL, 0, False));
3810 return res;
3812 case Iop_RecipEst32Ux2: {
3813 HReg res = newVRegD(env);
3814 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3815 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3816 res, argL, 0, False));
3817 return res;
3819 case Iop_Abs32Fx2: {
3820 DECLARE_PATTERN(p_vabd_32fx2);
3821 DEFINE_PATTERN(p_vabd_32fx2,
3822 unop(Iop_Abs32Fx2,
3823 binop(Iop_Sub32Fx2,
3824 bind(0),
3825 bind(1))));
3826 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3827 HReg res = newVRegD(env);
3828 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3829 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3830 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3831 res, argL, argR, 0, False));
3832 return res;
3833 } else {
3834 HReg res = newVRegD(env);
3835 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3836 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3837 res, arg, 0, False));
3838 return res;
3841 case Iop_RSqrtEst32Fx2: {
3842 HReg res = newVRegD(env);
3843 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3844 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3845 res, arg, 0, False));
3846 return res;
3848 case Iop_RSqrtEst32Ux2: {
3849 HReg res = newVRegD(env);
3850 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3851 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3852 res, arg, 0, False));
3853 return res;
3855 case Iop_Neg32Fx2: {
3856 HReg res = newVRegD(env);
3857 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3858 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3859 res, arg, 0, False));
3860 return res;
3862 case Iop_V128to64:
3863 case Iop_V128HIto64: {
3864 HReg src = iselNeonExpr(env, e->Iex.Unop.arg);
3865 HReg resLo = newVRegD(env);
3866 HReg resHi = newVRegD(env);
3867 addInstr(env, ARMInstr_VXferQ(False/*!toQ*/, src, resHi, resLo));
3868 return e->Iex.Unop.op == Iop_V128HIto64 ? resHi : resLo;
3870 default:
3871 break;
3873 } /* if (e->tag == Iex_Unop) */
3875 if (e->tag == Iex_Triop) {
3876 IRTriop *triop = e->Iex.Triop.details;
3878 switch (triop->op) {
3879 case Iop_Slice64: {
3880 HReg res = newVRegD(env);
3881 HReg argL = iselNeon64Expr(env, triop->arg2);
3882 HReg argR = iselNeon64Expr(env, triop->arg1);
3883 UInt imm4;
3884 if (triop->arg3->tag != Iex_Const ||
3885 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
3886 vpanic("ARM target supports Iop_Extract64 with constant "
3887 "third argument less than 16 only\n");
3889 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
3890 if (imm4 >= 8) {
3891 vpanic("ARM target supports Iop_Extract64 with constant "
3892 "third argument less than 16 only\n");
3894 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3895 res, argL, argR, imm4, False));
3896 return res;
3898 case Iop_SetElem8x8:
3899 case Iop_SetElem16x4:
3900 case Iop_SetElem32x2: {
3901 HReg res = newVRegD(env);
3902 HReg dreg = iselNeon64Expr(env, triop->arg1);
3903 HReg arg = iselIntExpr_R(env, triop->arg3);
3904 UInt index, size;
3905 if (triop->arg2->tag != Iex_Const ||
3906 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
3907 vpanic("ARM target supports SetElem with constant "
3908 "second argument only\n");
3910 index = triop->arg2->Iex.Const.con->Ico.U8;
3911 switch (triop->op) {
3912 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3913 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3914 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3915 default: vassert(0);
3917 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3918 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3919 mkARMNRS(ARMNRS_Scalar, res, index),
3920 mkARMNRS(ARMNRS_Reg, arg, 0),
3921 size, False));
3922 return res;
3924 default:
3925 break;
3929 /* --------- MULTIPLEX --------- */
3930 if (e->tag == Iex_ITE) { // VFD
3931 HReg rLo, rHi;
3932 HReg res = newVRegD(env);
3933 iselInt64Expr(&rHi, &rLo, env, e);
3934 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3935 return res;
3938 ppIRExpr(e);
3939 vpanic("iselNeon64Expr");
3943 static HReg iselNeonExpr ( ISelEnv* env, const IRExpr* e )
3945 HReg r;
3946 vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
3947 r = iselNeonExpr_wrk( env, e );
3948 vassert(hregClass(r) == HRcVec128);
3949 vassert(hregIsVirtual(r));
3950 return r;
3953 /* DO NOT CALL THIS DIRECTLY */
3954 static HReg iselNeonExpr_wrk ( ISelEnv* env, const IRExpr* e )
3956 IRType ty = typeOfIRExpr(env->type_env, e);
3957 MatchInfo mi;
3958 vassert(e);
3959 vassert(ty == Ity_V128);
3961 if (e->tag == Iex_RdTmp) {
3962 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3965 if (e->tag == Iex_Const) {
3966 /* At the moment there should be no 128-bit constants in IR for ARM
3967 generated during disassemble. They are represented as Iop_64HLtoV128
3968 binary operation and are handled among binary ops. */
3969 /* But zero can be created by valgrind internal optimizer */
3970 if (e->Iex.Const.con->Ico.V128 == 0x0000) {
3971 HReg res = newVRegV(env);
3972 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0)));
3973 return res;
3975 if (e->Iex.Const.con->Ico.V128 == 0xFFFF) {
3976 HReg res = newVRegV(env);
3977 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255)));
3978 return res;
3980 ppIRExpr(e);
3981 vpanic("128-bit constant is not implemented");
3984 if (e->tag == Iex_Load) {
3985 HReg res = newVRegV(env);
3986 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3987 vassert(ty == Ity_V128);
3988 addInstr(env, ARMInstr_NLdStQ(True, res, am));
3989 return res;
3992 if (e->tag == Iex_Get) {
3993 HReg addr = newVRegI(env);
3994 HReg res = newVRegV(env);
3995 vassert(ty == Ity_V128);
3996 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3997 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3998 return res;
4001 if (e->tag == Iex_Unop) {
4002 switch (e->Iex.Unop.op) {
4003 case Iop_NotV128: {
4004 DECLARE_PATTERN(p_veqz_8x16);
4005 DECLARE_PATTERN(p_veqz_16x8);
4006 DECLARE_PATTERN(p_veqz_32x4);
4007 DECLARE_PATTERN(p_vcge_8sx16);
4008 DECLARE_PATTERN(p_vcge_16sx8);
4009 DECLARE_PATTERN(p_vcge_32sx4);
4010 DECLARE_PATTERN(p_vcge_8ux16);
4011 DECLARE_PATTERN(p_vcge_16ux8);
4012 DECLARE_PATTERN(p_vcge_32ux4);
4013 DEFINE_PATTERN(p_veqz_8x16,
4014 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
4015 DEFINE_PATTERN(p_veqz_16x8,
4016 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
4017 DEFINE_PATTERN(p_veqz_32x4,
4018 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
4019 DEFINE_PATTERN(p_vcge_8sx16,
4020 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
4021 DEFINE_PATTERN(p_vcge_16sx8,
4022 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
4023 DEFINE_PATTERN(p_vcge_32sx4,
4024 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
4025 DEFINE_PATTERN(p_vcge_8ux16,
4026 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
4027 DEFINE_PATTERN(p_vcge_16ux8,
4028 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
4029 DEFINE_PATTERN(p_vcge_32ux4,
4030 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
4031 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
4032 HReg res = newVRegV(env);
4033 HReg arg = iselNeonExpr(env, mi.bindee[0]);
4034 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
4035 return res;
4036 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
4037 HReg res = newVRegV(env);
4038 HReg arg = iselNeonExpr(env, mi.bindee[0]);
4039 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
4040 return res;
4041 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
4042 HReg res = newVRegV(env);
4043 HReg arg = iselNeonExpr(env, mi.bindee[0]);
4044 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
4045 return res;
4046 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
4047 HReg res = newVRegV(env);
4048 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4049 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4050 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4051 res, argL, argR, 0, True));
4052 return res;
4053 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
4054 HReg res = newVRegV(env);
4055 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4056 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4057 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4058 res, argL, argR, 1, True));
4059 return res;
4060 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
4061 HReg res = newVRegV(env);
4062 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4063 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4064 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4065 res, argL, argR, 2, True));
4066 return res;
4067 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
4068 HReg res = newVRegV(env);
4069 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4070 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4071 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4072 res, argL, argR, 0, True));
4073 return res;
4074 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
4075 HReg res = newVRegV(env);
4076 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4077 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4078 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4079 res, argL, argR, 1, True));
4080 return res;
4081 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
4082 HReg res = newVRegV(env);
4083 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4084 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4085 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4086 res, argL, argR, 2, True));
4087 return res;
4088 } else {
4089 HReg res = newVRegV(env);
4090 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4091 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
4092 return res;
4095 case Iop_Dup8x16:
4096 case Iop_Dup16x8:
4097 case Iop_Dup32x4: {
4098 HReg res, arg;
4099 UInt size;
4100 DECLARE_PATTERN(p_vdup_8x16);
4101 DECLARE_PATTERN(p_vdup_16x8);
4102 DECLARE_PATTERN(p_vdup_32x4);
4103 DEFINE_PATTERN(p_vdup_8x16,
4104 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
4105 DEFINE_PATTERN(p_vdup_16x8,
4106 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
4107 DEFINE_PATTERN(p_vdup_32x4,
4108 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
4109 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
4110 UInt index;
4111 UInt imm4;
4112 if (mi.bindee[1]->tag == Iex_Const &&
4113 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4114 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4115 imm4 = (index << 1) + 1;
4116 if (index < 8) {
4117 res = newVRegV(env);
4118 arg = iselNeon64Expr(env, mi.bindee[0]);
4119 addInstr(env, ARMInstr_NUnaryS(
4120 ARMneon_VDUP,
4121 mkARMNRS(ARMNRS_Reg, res, 0),
4122 mkARMNRS(ARMNRS_Scalar, arg, index),
4123 imm4, True
4125 return res;
4128 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
4129 UInt index;
4130 UInt imm4;
4131 if (mi.bindee[1]->tag == Iex_Const &&
4132 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4133 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4134 imm4 = (index << 2) + 2;
4135 if (index < 4) {
4136 res = newVRegV(env);
4137 arg = iselNeon64Expr(env, mi.bindee[0]);
4138 addInstr(env, ARMInstr_NUnaryS(
4139 ARMneon_VDUP,
4140 mkARMNRS(ARMNRS_Reg, res, 0),
4141 mkARMNRS(ARMNRS_Scalar, arg, index),
4142 imm4, True
4144 return res;
4147 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
4148 UInt index;
4149 UInt imm4;
4150 if (mi.bindee[1]->tag == Iex_Const &&
4151 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4152 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4153 imm4 = (index << 3) + 4;
4154 if (index < 2) {
4155 res = newVRegV(env);
4156 arg = iselNeon64Expr(env, mi.bindee[0]);
4157 addInstr(env, ARMInstr_NUnaryS(
4158 ARMneon_VDUP,
4159 mkARMNRS(ARMNRS_Reg, res, 0),
4160 mkARMNRS(ARMNRS_Scalar, arg, index),
4161 imm4, True
4163 return res;
4167 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4168 res = newVRegV(env);
4169 switch (e->Iex.Unop.op) {
4170 case Iop_Dup8x16: size = 0; break;
4171 case Iop_Dup16x8: size = 1; break;
4172 case Iop_Dup32x4: size = 2; break;
4173 default: vassert(0);
4175 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4176 return res;
4178 case Iop_Abs8x16:
4179 case Iop_Abs16x8:
4180 case Iop_Abs32x4: {
4181 HReg res = newVRegV(env);
4182 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4183 UInt size = 0;
4184 switch(e->Iex.Binop.op) {
4185 case Iop_Abs8x16: size = 0; break;
4186 case Iop_Abs16x8: size = 1; break;
4187 case Iop_Abs32x4: size = 2; break;
4188 default: vassert(0);
4190 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4191 return res;
4193 case Iop_Reverse8sIn64_x2:
4194 case Iop_Reverse16sIn64_x2:
4195 case Iop_Reverse32sIn64_x2: {
4196 HReg res = newVRegV(env);
4197 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4198 UInt size = 0;
4199 switch(e->Iex.Binop.op) {
4200 case Iop_Reverse8sIn64_x2: size = 0; break;
4201 case Iop_Reverse16sIn64_x2: size = 1; break;
4202 case Iop_Reverse32sIn64_x2: size = 2; break;
4203 default: vassert(0);
4205 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4206 res, arg, size, True));
4207 return res;
4209 case Iop_Reverse8sIn32_x4:
4210 case Iop_Reverse16sIn32_x4: {
4211 HReg res = newVRegV(env);
4212 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4213 UInt size = 0;
4214 switch(e->Iex.Binop.op) {
4215 case Iop_Reverse8sIn32_x4: size = 0; break;
4216 case Iop_Reverse16sIn32_x4: size = 1; break;
4217 default: vassert(0);
4219 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4220 res, arg, size, True));
4221 return res;
4223 case Iop_Reverse8sIn16_x8: {
4224 HReg res = newVRegV(env);
4225 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4226 UInt size = 0;
4227 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4228 res, arg, size, True));
4229 return res;
4231 case Iop_CmpNEZ64x2: {
4232 HReg x_lsh = newVRegV(env);
4233 HReg x_rsh = newVRegV(env);
4234 HReg lsh_amt = newVRegV(env);
4235 HReg rsh_amt = newVRegV(env);
4236 HReg zero = newVRegV(env);
4237 HReg tmp = newVRegV(env);
4238 HReg tmp2 = newVRegV(env);
4239 HReg res = newVRegV(env);
4240 HReg x = newVRegV(env);
4241 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4242 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4243 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4244 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4245 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4246 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4247 rsh_amt, zero, lsh_amt, 2, True));
4248 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4249 x_lsh, x, lsh_amt, 3, True));
4250 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4251 x_rsh, x, rsh_amt, 3, True));
4252 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4253 tmp, x_lsh, x_rsh, 0, True));
4254 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4255 res, tmp, x, 0, True));
4256 return res;
4258 case Iop_CmpNEZ8x16:
4259 case Iop_CmpNEZ16x8:
4260 case Iop_CmpNEZ32x4: {
4261 HReg res = newVRegV(env);
4262 HReg tmp = newVRegV(env);
4263 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4264 UInt size;
4265 switch (e->Iex.Unop.op) {
4266 case Iop_CmpNEZ8x16: size = 0; break;
4267 case Iop_CmpNEZ16x8: size = 1; break;
4268 case Iop_CmpNEZ32x4: size = 2; break;
4269 default: vassert(0);
4271 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4272 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4273 return res;
4275 case Iop_Widen8Uto16x8:
4276 case Iop_Widen16Uto32x4:
4277 case Iop_Widen32Uto64x2: {
4278 HReg res = newVRegV(env);
4279 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4280 UInt size;
4281 switch (e->Iex.Unop.op) {
4282 case Iop_Widen8Uto16x8: size = 0; break;
4283 case Iop_Widen16Uto32x4: size = 1; break;
4284 case Iop_Widen32Uto64x2: size = 2; break;
4285 default: vassert(0);
4287 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4288 res, arg, size, True));
4289 return res;
4291 case Iop_Widen8Sto16x8:
4292 case Iop_Widen16Sto32x4:
4293 case Iop_Widen32Sto64x2: {
4294 HReg res = newVRegV(env);
4295 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4296 UInt size;
4297 switch (e->Iex.Unop.op) {
4298 case Iop_Widen8Sto16x8: size = 0; break;
4299 case Iop_Widen16Sto32x4: size = 1; break;
4300 case Iop_Widen32Sto64x2: size = 2; break;
4301 default: vassert(0);
4303 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4304 res, arg, size, True));
4305 return res;
4307 case Iop_PwAddL8Sx16:
4308 case Iop_PwAddL16Sx8:
4309 case Iop_PwAddL32Sx4: {
4310 HReg res = newVRegV(env);
4311 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4312 UInt size = 0;
4313 switch(e->Iex.Binop.op) {
4314 case Iop_PwAddL8Sx16: size = 0; break;
4315 case Iop_PwAddL16Sx8: size = 1; break;
4316 case Iop_PwAddL32Sx4: size = 2; break;
4317 default: vassert(0);
4319 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4320 res, arg, size, True));
4321 return res;
4323 case Iop_PwAddL8Ux16:
4324 case Iop_PwAddL16Ux8:
4325 case Iop_PwAddL32Ux4: {
4326 HReg res = newVRegV(env);
4327 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4328 UInt size = 0;
4329 switch(e->Iex.Binop.op) {
4330 case Iop_PwAddL8Ux16: size = 0; break;
4331 case Iop_PwAddL16Ux8: size = 1; break;
4332 case Iop_PwAddL32Ux4: size = 2; break;
4333 default: vassert(0);
4335 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4336 res, arg, size, True));
4337 return res;
4339 case Iop_Cnt8x16: {
4340 HReg res = newVRegV(env);
4341 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4342 UInt size = 0;
4343 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4344 return res;
4346 case Iop_Clz8x16:
4347 case Iop_Clz16x8:
4348 case Iop_Clz32x4: {
4349 HReg res = newVRegV(env);
4350 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4351 UInt size = 0;
4352 switch(e->Iex.Binop.op) {
4353 case Iop_Clz8x16: size = 0; break;
4354 case Iop_Clz16x8: size = 1; break;
4355 case Iop_Clz32x4: size = 2; break;
4356 default: vassert(0);
4358 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4359 return res;
4361 case Iop_Cls8x16:
4362 case Iop_Cls16x8:
4363 case Iop_Cls32x4: {
4364 HReg res = newVRegV(env);
4365 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4366 UInt size = 0;
4367 switch(e->Iex.Binop.op) {
4368 case Iop_Cls8x16: size = 0; break;
4369 case Iop_Cls16x8: size = 1; break;
4370 case Iop_Cls32x4: size = 2; break;
4371 default: vassert(0);
4373 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4374 return res;
4376 case Iop_FtoI32Sx4_RZ: {
4377 HReg res = newVRegV(env);
4378 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4379 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4380 res, arg, 2, True));
4381 return res;
4383 case Iop_FtoI32Ux4_RZ: {
4384 HReg res = newVRegV(env);
4385 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4386 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4387 res, arg, 2, True));
4388 return res;
4390 case Iop_I32StoFx4: {
4391 HReg res = newVRegV(env);
4392 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4393 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4394 res, arg, 2, True));
4395 return res;
4397 case Iop_I32UtoFx4: {
4398 HReg res = newVRegV(env);
4399 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4400 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4401 res, arg, 2, True));
4402 return res;
4404 case Iop_F16toF32x4: {
4405 HReg res = newVRegV(env);
4406 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4407 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4408 res, arg, 2, True));
4409 return res;
4411 case Iop_RecipEst32Fx4: {
4412 HReg res = newVRegV(env);
4413 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4414 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4415 res, argL, 0, True));
4416 return res;
4418 case Iop_RecipEst32Ux4: {
4419 HReg res = newVRegV(env);
4420 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4421 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4422 res, argL, 0, True));
4423 return res;
4425 case Iop_Abs32Fx4: {
4426 HReg res = newVRegV(env);
4427 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4428 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4429 res, argL, 0, True));
4430 return res;
4432 case Iop_RSqrtEst32Fx4: {
4433 HReg res = newVRegV(env);
4434 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4435 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4436 res, argL, 0, True));
4437 return res;
4439 case Iop_RSqrtEst32Ux4: {
4440 HReg res = newVRegV(env);
4441 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4442 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4443 res, argL, 0, True));
4444 return res;
4446 case Iop_Neg32Fx4: {
4447 HReg res = newVRegV(env);
4448 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4449 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4450 res, arg, 0, True));
4451 return res;
4453 /* ... */
4454 default:
4455 break;
4459 if (e->tag == Iex_Binop) {
4460 switch (e->Iex.Binop.op) {
4461 case Iop_64HLtoV128: {
4462 /* Try to match into single "VMOV reg, imm" instruction */
4463 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4464 e->Iex.Binop.arg2->tag == Iex_Const &&
4465 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4466 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4467 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4468 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4469 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4470 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4471 if (imm) {
4472 HReg res = newVRegV(env);
4473 addInstr(env, ARMInstr_NeonImm(res, imm));
4474 return res;
4476 if ((imm64 >> 32) == 0LL &&
4477 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4478 HReg tmp1 = newVRegV(env);
4479 HReg tmp2 = newVRegV(env);
4480 HReg res = newVRegV(env);
4481 if (imm->type < 10) {
4482 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4483 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4484 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4485 res, tmp1, tmp2, 4, True));
4486 return res;
4489 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4490 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4491 HReg tmp1 = newVRegV(env);
4492 HReg tmp2 = newVRegV(env);
4493 HReg res = newVRegV(env);
4494 if (imm->type < 10) {
4495 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4496 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4497 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4498 res, tmp1, tmp2, 4, True));
4499 return res;
4503 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4504 it the slow way. */
4505 HReg dHi = iselNeon64Expr(env, e->Iex.Binop.arg1);
4506 HReg dLo = iselNeon64Expr(env, e->Iex.Binop.arg2);
4507 HReg res = newVRegV(env);
4508 addInstr(env, ARMInstr_VXferQ(True/*toQ*/, res, dHi, dLo));
4509 return res;
4511 case Iop_AndV128: {
4512 HReg res = newVRegV(env);
4513 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4514 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4515 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4516 res, argL, argR, 4, True));
4517 return res;
4519 case Iop_OrV128: {
4520 HReg res = newVRegV(env);
4521 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4522 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4523 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4524 res, argL, argR, 4, True));
4525 return res;
4527 case Iop_XorV128: {
4528 HReg res = newVRegV(env);
4529 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4530 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4531 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4532 res, argL, argR, 4, True));
4533 return res;
4535 case Iop_Add8x16:
4536 case Iop_Add16x8:
4537 case Iop_Add32x4:
4538 case Iop_Add64x2: {
4540 FIXME: remove this if not used
4541 DECLARE_PATTERN(p_vrhadd_32sx4);
4542 ULong one = (1LL << 32) | 1LL;
4543 DEFINE_PATTERN(p_vrhadd_32sx4,
4544 binop(Iop_Add32x4,
4545 binop(Iop_Add32x4,
4546 binop(Iop_SarN32x4,
4547 bind(0),
4548 mkU8(1)),
4549 binop(Iop_SarN32x4,
4550 bind(1),
4551 mkU8(1))),
4552 binop(Iop_SarN32x4,
4553 binop(Iop_Add32x4,
4554 binop(Iop_Add32x4,
4555 binop(Iop_AndV128,
4556 bind(0),
4557 mkU128(one)),
4558 binop(Iop_AndV128,
4559 bind(1),
4560 mkU128(one))),
4561 mkU128(one)),
4562 mkU8(1))));
4564 HReg res = newVRegV(env);
4565 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4566 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4567 UInt size;
4568 switch (e->Iex.Binop.op) {
4569 case Iop_Add8x16: size = 0; break;
4570 case Iop_Add16x8: size = 1; break;
4571 case Iop_Add32x4: size = 2; break;
4572 case Iop_Add64x2: size = 3; break;
4573 default:
4574 ppIROp(e->Iex.Binop.op);
4575 vpanic("Illegal element size in VADD");
4577 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4578 res, argL, argR, size, True));
4579 return res;
4581 case Iop_RecipStep32Fx4: {
4582 HReg res = newVRegV(env);
4583 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4584 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4585 UInt size = 0;
4586 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4587 res, argL, argR, size, True));
4588 return res;
4590 case Iop_RSqrtStep32Fx4: {
4591 HReg res = newVRegV(env);
4592 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4593 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4594 UInt size = 0;
4595 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4596 res, argL, argR, size, True));
4597 return res;
4600 // These 6 verified 18 Apr 2013
4601 case Iop_InterleaveEvenLanes8x16:
4602 case Iop_InterleaveOddLanes8x16:
4603 case Iop_InterleaveEvenLanes16x8:
4604 case Iop_InterleaveOddLanes16x8:
4605 case Iop_InterleaveEvenLanes32x4:
4606 case Iop_InterleaveOddLanes32x4: {
4607 HReg rD = newVRegV(env);
4608 HReg rM = newVRegV(env);
4609 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4610 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4611 UInt size;
4612 Bool resRd; // is the result in rD or rM ?
4613 switch (e->Iex.Binop.op) {
4614 case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break;
4615 case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break;
4616 case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break;
4617 case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break;
4618 case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break;
4619 case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break;
4620 default: vassert(0);
4622 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4623 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4624 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4625 return resRd ? rD : rM;
4628 // These 6 verified 18 Apr 2013
4629 case Iop_InterleaveHI8x16:
4630 case Iop_InterleaveLO8x16:
4631 case Iop_InterleaveHI16x8:
4632 case Iop_InterleaveLO16x8:
4633 case Iop_InterleaveHI32x4:
4634 case Iop_InterleaveLO32x4: {
4635 HReg rD = newVRegV(env);
4636 HReg rM = newVRegV(env);
4637 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4638 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4639 UInt size;
4640 Bool resRd; // is the result in rD or rM ?
4641 switch (e->Iex.Binop.op) {
4642 case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4643 case Iop_InterleaveLO8x16: resRd = True; size = 0; break;
4644 case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4645 case Iop_InterleaveLO16x8: resRd = True; size = 1; break;
4646 case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4647 case Iop_InterleaveLO32x4: resRd = True; size = 2; break;
4648 default: vassert(0);
4650 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4651 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4652 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4653 return resRd ? rD : rM;
4656 // These 6 verified 18 Apr 2013
4657 case Iop_CatOddLanes8x16:
4658 case Iop_CatEvenLanes8x16:
4659 case Iop_CatOddLanes16x8:
4660 case Iop_CatEvenLanes16x8:
4661 case Iop_CatOddLanes32x4:
4662 case Iop_CatEvenLanes32x4: {
4663 HReg rD = newVRegV(env);
4664 HReg rM = newVRegV(env);
4665 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4666 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4667 UInt size;
4668 Bool resRd; // is the result in rD or rM ?
4669 switch (e->Iex.Binop.op) {
4670 case Iop_CatOddLanes8x16: resRd = False; size = 0; break;
4671 case Iop_CatEvenLanes8x16: resRd = True; size = 0; break;
4672 case Iop_CatOddLanes16x8: resRd = False; size = 1; break;
4673 case Iop_CatEvenLanes16x8: resRd = True; size = 1; break;
4674 case Iop_CatOddLanes32x4: resRd = False; size = 2; break;
4675 case Iop_CatEvenLanes32x4: resRd = True; size = 2; break;
4676 default: vassert(0);
4678 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4679 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4680 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
4681 return resRd ? rD : rM;
4684 case Iop_QAdd8Ux16:
4685 case Iop_QAdd16Ux8:
4686 case Iop_QAdd32Ux4:
4687 case Iop_QAdd64Ux2: {
4688 HReg res = newVRegV(env);
4689 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4690 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4691 UInt size;
4692 switch (e->Iex.Binop.op) {
4693 case Iop_QAdd8Ux16: size = 0; break;
4694 case Iop_QAdd16Ux8: size = 1; break;
4695 case Iop_QAdd32Ux4: size = 2; break;
4696 case Iop_QAdd64Ux2: size = 3; break;
4697 default:
4698 ppIROp(e->Iex.Binop.op);
4699 vpanic("Illegal element size in VQADDU");
4701 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4702 res, argL, argR, size, True));
4703 return res;
4705 case Iop_QAdd8Sx16:
4706 case Iop_QAdd16Sx8:
4707 case Iop_QAdd32Sx4:
4708 case Iop_QAdd64Sx2: {
4709 HReg res = newVRegV(env);
4710 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4711 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4712 UInt size;
4713 switch (e->Iex.Binop.op) {
4714 case Iop_QAdd8Sx16: size = 0; break;
4715 case Iop_QAdd16Sx8: size = 1; break;
4716 case Iop_QAdd32Sx4: size = 2; break;
4717 case Iop_QAdd64Sx2: size = 3; break;
4718 default:
4719 ppIROp(e->Iex.Binop.op);
4720 vpanic("Illegal element size in VQADDS");
4722 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4723 res, argL, argR, size, True));
4724 return res;
4726 case Iop_Sub8x16:
4727 case Iop_Sub16x8:
4728 case Iop_Sub32x4:
4729 case Iop_Sub64x2: {
4730 HReg res = newVRegV(env);
4731 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4732 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4733 UInt size;
4734 switch (e->Iex.Binop.op) {
4735 case Iop_Sub8x16: size = 0; break;
4736 case Iop_Sub16x8: size = 1; break;
4737 case Iop_Sub32x4: size = 2; break;
4738 case Iop_Sub64x2: size = 3; break;
4739 default:
4740 ppIROp(e->Iex.Binop.op);
4741 vpanic("Illegal element size in VSUB");
4743 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4744 res, argL, argR, size, True));
4745 return res;
4747 case Iop_QSub8Ux16:
4748 case Iop_QSub16Ux8:
4749 case Iop_QSub32Ux4:
4750 case Iop_QSub64Ux2: {
4751 HReg res = newVRegV(env);
4752 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4753 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4754 UInt size;
4755 switch (e->Iex.Binop.op) {
4756 case Iop_QSub8Ux16: size = 0; break;
4757 case Iop_QSub16Ux8: size = 1; break;
4758 case Iop_QSub32Ux4: size = 2; break;
4759 case Iop_QSub64Ux2: size = 3; break;
4760 default:
4761 ppIROp(e->Iex.Binop.op);
4762 vpanic("Illegal element size in VQSUBU");
4764 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4765 res, argL, argR, size, True));
4766 return res;
4768 case Iop_QSub8Sx16:
4769 case Iop_QSub16Sx8:
4770 case Iop_QSub32Sx4:
4771 case Iop_QSub64Sx2: {
4772 HReg res = newVRegV(env);
4773 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4774 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4775 UInt size;
4776 switch (e->Iex.Binop.op) {
4777 case Iop_QSub8Sx16: size = 0; break;
4778 case Iop_QSub16Sx8: size = 1; break;
4779 case Iop_QSub32Sx4: size = 2; break;
4780 case Iop_QSub64Sx2: size = 3; break;
4781 default:
4782 ppIROp(e->Iex.Binop.op);
4783 vpanic("Illegal element size in VQSUBS");
4785 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4786 res, argL, argR, size, True));
4787 return res;
4789 case Iop_Max8Ux16:
4790 case Iop_Max16Ux8:
4791 case Iop_Max32Ux4: {
4792 HReg res = newVRegV(env);
4793 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4794 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4795 UInt size;
4796 switch (e->Iex.Binop.op) {
4797 case Iop_Max8Ux16: size = 0; break;
4798 case Iop_Max16Ux8: size = 1; break;
4799 case Iop_Max32Ux4: size = 2; break;
4800 default: vpanic("Illegal element size in VMAXU");
4802 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4803 res, argL, argR, size, True));
4804 return res;
4806 case Iop_Max8Sx16:
4807 case Iop_Max16Sx8:
4808 case Iop_Max32Sx4: {
4809 HReg res = newVRegV(env);
4810 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4811 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4812 UInt size;
4813 switch (e->Iex.Binop.op) {
4814 case Iop_Max8Sx16: size = 0; break;
4815 case Iop_Max16Sx8: size = 1; break;
4816 case Iop_Max32Sx4: size = 2; break;
4817 default: vpanic("Illegal element size in VMAXU");
4819 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4820 res, argL, argR, size, True));
4821 return res;
4823 case Iop_Min8Ux16:
4824 case Iop_Min16Ux8:
4825 case Iop_Min32Ux4: {
4826 HReg res = newVRegV(env);
4827 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4828 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4829 UInt size;
4830 switch (e->Iex.Binop.op) {
4831 case Iop_Min8Ux16: size = 0; break;
4832 case Iop_Min16Ux8: size = 1; break;
4833 case Iop_Min32Ux4: size = 2; break;
4834 default: vpanic("Illegal element size in VMAXU");
4836 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4837 res, argL, argR, size, True));
4838 return res;
4840 case Iop_Min8Sx16:
4841 case Iop_Min16Sx8:
4842 case Iop_Min32Sx4: {
4843 HReg res = newVRegV(env);
4844 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4845 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4846 UInt size;
4847 switch (e->Iex.Binop.op) {
4848 case Iop_Min8Sx16: size = 0; break;
4849 case Iop_Min16Sx8: size = 1; break;
4850 case Iop_Min32Sx4: size = 2; break;
4851 default: vpanic("Illegal element size in VMAXU");
4853 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4854 res, argL, argR, size, True));
4855 return res;
4857 case Iop_Sar8x16:
4858 case Iop_Sar16x8:
4859 case Iop_Sar32x4:
4860 case Iop_Sar64x2: {
4861 HReg res = newVRegV(env);
4862 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4863 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4864 HReg argR2 = newVRegV(env);
4865 HReg zero = newVRegV(env);
4866 UInt size;
4867 switch (e->Iex.Binop.op) {
4868 case Iop_Sar8x16: size = 0; break;
4869 case Iop_Sar16x8: size = 1; break;
4870 case Iop_Sar32x4: size = 2; break;
4871 case Iop_Sar64x2: size = 3; break;
4872 default: vassert(0);
4874 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4875 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4876 argR2, zero, argR, size, True));
4877 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4878 res, argL, argR2, size, True));
4879 return res;
4881 case Iop_Sal8x16:
4882 case Iop_Sal16x8:
4883 case Iop_Sal32x4:
4884 case Iop_Sal64x2: {
4885 HReg res = newVRegV(env);
4886 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4887 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4888 UInt size;
4889 switch (e->Iex.Binop.op) {
4890 case Iop_Sal8x16: size = 0; break;
4891 case Iop_Sal16x8: size = 1; break;
4892 case Iop_Sal32x4: size = 2; break;
4893 case Iop_Sal64x2: size = 3; break;
4894 default: vassert(0);
4896 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4897 res, argL, argR, size, True));
4898 return res;
4900 case Iop_Shr8x16:
4901 case Iop_Shr16x8:
4902 case Iop_Shr32x4:
4903 case Iop_Shr64x2: {
4904 HReg res = newVRegV(env);
4905 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4906 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4907 HReg argR2 = newVRegV(env);
4908 HReg zero = newVRegV(env);
4909 UInt size;
4910 switch (e->Iex.Binop.op) {
4911 case Iop_Shr8x16: size = 0; break;
4912 case Iop_Shr16x8: size = 1; break;
4913 case Iop_Shr32x4: size = 2; break;
4914 case Iop_Shr64x2: size = 3; break;
4915 default: vassert(0);
4917 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4918 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4919 argR2, zero, argR, size, True));
4920 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4921 res, argL, argR2, size, True));
4922 return res;
4924 case Iop_Shl8x16:
4925 case Iop_Shl16x8:
4926 case Iop_Shl32x4:
4927 case Iop_Shl64x2: {
4928 HReg res = newVRegV(env);
4929 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4930 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4931 UInt size;
4932 switch (e->Iex.Binop.op) {
4933 case Iop_Shl8x16: size = 0; break;
4934 case Iop_Shl16x8: size = 1; break;
4935 case Iop_Shl32x4: size = 2; break;
4936 case Iop_Shl64x2: size = 3; break;
4937 default: vassert(0);
4939 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4940 res, argL, argR, size, True));
4941 return res;
4943 case Iop_QShl8x16:
4944 case Iop_QShl16x8:
4945 case Iop_QShl32x4:
4946 case Iop_QShl64x2: {
4947 HReg res = newVRegV(env);
4948 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4949 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4950 UInt size;
4951 switch (e->Iex.Binop.op) {
4952 case Iop_QShl8x16: size = 0; break;
4953 case Iop_QShl16x8: size = 1; break;
4954 case Iop_QShl32x4: size = 2; break;
4955 case Iop_QShl64x2: size = 3; break;
4956 default: vassert(0);
4958 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4959 res, argL, argR, size, True));
4960 return res;
4962 case Iop_QSal8x16:
4963 case Iop_QSal16x8:
4964 case Iop_QSal32x4:
4965 case Iop_QSal64x2: {
4966 HReg res = newVRegV(env);
4967 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4968 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4969 UInt size;
4970 switch (e->Iex.Binop.op) {
4971 case Iop_QSal8x16: size = 0; break;
4972 case Iop_QSal16x8: size = 1; break;
4973 case Iop_QSal32x4: size = 2; break;
4974 case Iop_QSal64x2: size = 3; break;
4975 default: vassert(0);
4977 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4978 res, argL, argR, size, True));
4979 return res;
4981 case Iop_QShlNsatUU8x16:
4982 case Iop_QShlNsatUU16x8:
4983 case Iop_QShlNsatUU32x4:
4984 case Iop_QShlNsatUU64x2: {
4985 HReg res = newVRegV(env);
4986 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4987 UInt size, imm;
4988 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4989 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4990 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
4991 "second argument only\n");
4993 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4994 switch (e->Iex.Binop.op) {
4995 case Iop_QShlNsatUU8x16: size = 8 | imm; break;
4996 case Iop_QShlNsatUU16x8: size = 16 | imm; break;
4997 case Iop_QShlNsatUU32x4: size = 32 | imm; break;
4998 case Iop_QShlNsatUU64x2: size = 64 | imm; break;
4999 default: vassert(0);
5001 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
5002 res, argL, size, True));
5003 return res;
5005 case Iop_QShlNsatSU8x16:
5006 case Iop_QShlNsatSU16x8:
5007 case Iop_QShlNsatSU32x4:
5008 case Iop_QShlNsatSU64x2: {
5009 HReg res = newVRegV(env);
5010 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5011 UInt size, imm;
5012 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5013 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5014 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
5015 "second argument only\n");
5017 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5018 switch (e->Iex.Binop.op) {
5019 case Iop_QShlNsatSU8x16: size = 8 | imm; break;
5020 case Iop_QShlNsatSU16x8: size = 16 | imm; break;
5021 case Iop_QShlNsatSU32x4: size = 32 | imm; break;
5022 case Iop_QShlNsatSU64x2: size = 64 | imm; break;
5023 default: vassert(0);
5025 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
5026 res, argL, size, True));
5027 return res;
5029 case Iop_QShlNsatSS8x16:
5030 case Iop_QShlNsatSS16x8:
5031 case Iop_QShlNsatSS32x4:
5032 case Iop_QShlNsatSS64x2: {
5033 HReg res = newVRegV(env);
5034 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5035 UInt size, imm;
5036 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5037 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5038 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
5039 "second argument only\n");
5041 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5042 switch (e->Iex.Binop.op) {
5043 case Iop_QShlNsatSS8x16: size = 8 | imm; break;
5044 case Iop_QShlNsatSS16x8: size = 16 | imm; break;
5045 case Iop_QShlNsatSS32x4: size = 32 | imm; break;
5046 case Iop_QShlNsatSS64x2: size = 64 | imm; break;
5047 default: vassert(0);
5049 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
5050 res, argL, size, True));
5051 return res;
5053 case Iop_ShrN8x16:
5054 case Iop_ShrN16x8:
5055 case Iop_ShrN32x4:
5056 case Iop_ShrN64x2: {
5057 HReg res = newVRegV(env);
5058 HReg tmp = newVRegV(env);
5059 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5060 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5061 HReg argR2 = newVRegI(env);
5062 UInt size;
5063 switch (e->Iex.Binop.op) {
5064 case Iop_ShrN8x16: size = 0; break;
5065 case Iop_ShrN16x8: size = 1; break;
5066 case Iop_ShrN32x4: size = 2; break;
5067 case Iop_ShrN64x2: size = 3; break;
5068 default: vassert(0);
5070 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5071 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
5072 tmp, argR2, 0, True));
5073 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5074 res, argL, tmp, size, True));
5075 return res;
5077 case Iop_ShlN8x16:
5078 case Iop_ShlN16x8:
5079 case Iop_ShlN32x4:
5080 case Iop_ShlN64x2: {
5081 HReg res = newVRegV(env);
5082 HReg tmp = newVRegV(env);
5083 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5084 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5085 UInt size;
5086 switch (e->Iex.Binop.op) {
5087 case Iop_ShlN8x16: size = 0; break;
5088 case Iop_ShlN16x8: size = 1; break;
5089 case Iop_ShlN32x4: size = 2; break;
5090 case Iop_ShlN64x2: size = 3; break;
5091 default: vassert(0);
5093 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
5094 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5095 res, argL, tmp, size, True));
5096 return res;
5098 case Iop_SarN8x16:
5099 case Iop_SarN16x8:
5100 case Iop_SarN32x4:
5101 case Iop_SarN64x2: {
5102 HReg res = newVRegV(env);
5103 HReg tmp = newVRegV(env);
5104 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5105 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5106 HReg argR2 = newVRegI(env);
5107 UInt size;
5108 switch (e->Iex.Binop.op) {
5109 case Iop_SarN8x16: size = 0; break;
5110 case Iop_SarN16x8: size = 1; break;
5111 case Iop_SarN32x4: size = 2; break;
5112 case Iop_SarN64x2: size = 3; break;
5113 default: vassert(0);
5115 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5116 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
5117 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5118 res, argL, tmp, size, True));
5119 return res;
5121 case Iop_CmpGT8Ux16:
5122 case Iop_CmpGT16Ux8:
5123 case Iop_CmpGT32Ux4: {
5124 HReg res = newVRegV(env);
5125 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5126 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5127 UInt size;
5128 switch (e->Iex.Binop.op) {
5129 case Iop_CmpGT8Ux16: size = 0; break;
5130 case Iop_CmpGT16Ux8: size = 1; break;
5131 case Iop_CmpGT32Ux4: size = 2; break;
5132 default: vassert(0);
5134 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5135 res, argL, argR, size, True));
5136 return res;
5138 case Iop_CmpGT8Sx16:
5139 case Iop_CmpGT16Sx8:
5140 case Iop_CmpGT32Sx4: {
5141 HReg res = newVRegV(env);
5142 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5143 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5144 UInt size;
5145 switch (e->Iex.Binop.op) {
5146 case Iop_CmpGT8Sx16: size = 0; break;
5147 case Iop_CmpGT16Sx8: size = 1; break;
5148 case Iop_CmpGT32Sx4: size = 2; break;
5149 default: vassert(0);
5151 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5152 res, argL, argR, size, True));
5153 return res;
5155 case Iop_CmpEQ8x16:
5156 case Iop_CmpEQ16x8:
5157 case Iop_CmpEQ32x4: {
5158 HReg res = newVRegV(env);
5159 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5160 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5161 UInt size;
5162 switch (e->Iex.Binop.op) {
5163 case Iop_CmpEQ8x16: size = 0; break;
5164 case Iop_CmpEQ16x8: size = 1; break;
5165 case Iop_CmpEQ32x4: size = 2; break;
5166 default: vassert(0);
5168 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5169 res, argL, argR, size, True));
5170 return res;
5172 case Iop_Mul8x16:
5173 case Iop_Mul16x8:
5174 case Iop_Mul32x4: {
5175 HReg res = newVRegV(env);
5176 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5177 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5178 UInt size = 0;
5179 switch(e->Iex.Binop.op) {
5180 case Iop_Mul8x16: size = 0; break;
5181 case Iop_Mul16x8: size = 1; break;
5182 case Iop_Mul32x4: size = 2; break;
5183 default: vassert(0);
5185 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5186 res, argL, argR, size, True));
5187 return res;
5189 case Iop_Mull8Ux8:
5190 case Iop_Mull16Ux4:
5191 case Iop_Mull32Ux2: {
5192 HReg res = newVRegV(env);
5193 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5194 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5195 UInt size = 0;
5196 switch(e->Iex.Binop.op) {
5197 case Iop_Mull8Ux8: size = 0; break;
5198 case Iop_Mull16Ux4: size = 1; break;
5199 case Iop_Mull32Ux2: size = 2; break;
5200 default: vassert(0);
5202 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5203 res, argL, argR, size, True));
5204 return res;
5207 case Iop_Mull8Sx8:
5208 case Iop_Mull16Sx4:
5209 case Iop_Mull32Sx2: {
5210 HReg res = newVRegV(env);
5211 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5212 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5213 UInt size = 0;
5214 switch(e->Iex.Binop.op) {
5215 case Iop_Mull8Sx8: size = 0; break;
5216 case Iop_Mull16Sx4: size = 1; break;
5217 case Iop_Mull32Sx2: size = 2; break;
5218 default: vassert(0);
5220 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5221 res, argL, argR, size, True));
5222 return res;
5225 case Iop_QDMulHi16Sx8:
5226 case Iop_QDMulHi32Sx4: {
5227 HReg res = newVRegV(env);
5228 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5229 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5230 UInt size = 0;
5231 switch(e->Iex.Binop.op) {
5232 case Iop_QDMulHi16Sx8: size = 1; break;
5233 case Iop_QDMulHi32Sx4: size = 2; break;
5234 default: vassert(0);
5236 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5237 res, argL, argR, size, True));
5238 return res;
5241 case Iop_QRDMulHi16Sx8:
5242 case Iop_QRDMulHi32Sx4: {
5243 HReg res = newVRegV(env);
5244 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5245 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5246 UInt size = 0;
5247 switch(e->Iex.Binop.op) {
5248 case Iop_QRDMulHi16Sx8: size = 1; break;
5249 case Iop_QRDMulHi32Sx4: size = 2; break;
5250 default: vassert(0);
5252 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5253 res, argL, argR, size, True));
5254 return res;
5257 case Iop_QDMull16Sx4:
5258 case Iop_QDMull32Sx2: {
5259 HReg res = newVRegV(env);
5260 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5261 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5262 UInt size = 0;
5263 switch(e->Iex.Binop.op) {
5264 case Iop_QDMull16Sx4: size = 1; break;
5265 case Iop_QDMull32Sx2: size = 2; break;
5266 default: vassert(0);
5268 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5269 res, argL, argR, size, True));
5270 return res;
5272 case Iop_PolynomialMul8x16: {
5273 HReg res = newVRegV(env);
5274 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5275 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5276 UInt size = 0;
5277 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5278 res, argL, argR, size, True));
5279 return res;
5281 case Iop_Max32Fx4: {
5282 HReg res = newVRegV(env);
5283 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5284 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5285 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5286 res, argL, argR, 2, True));
5287 return res;
5289 case Iop_Min32Fx4: {
5290 HReg res = newVRegV(env);
5291 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5292 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5293 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5294 res, argL, argR, 2, True));
5295 return res;
5297 case Iop_PwMax32Fx4: {
5298 HReg res = newVRegV(env);
5299 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5300 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5301 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5302 res, argL, argR, 2, True));
5303 return res;
5305 case Iop_PwMin32Fx4: {
5306 HReg res = newVRegV(env);
5307 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5308 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5309 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5310 res, argL, argR, 2, True));
5311 return res;
5313 case Iop_CmpGT32Fx4: {
5314 HReg res = newVRegV(env);
5315 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5316 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5317 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5318 res, argL, argR, 2, True));
5319 return res;
5321 case Iop_CmpGE32Fx4: {
5322 HReg res = newVRegV(env);
5323 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5324 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5325 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5326 res, argL, argR, 2, True));
5327 return res;
5329 case Iop_CmpEQ32Fx4: {
5330 HReg res = newVRegV(env);
5331 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5332 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5333 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5334 res, argL, argR, 2, True));
5335 return res;
5338 case Iop_PolynomialMull8x8: {
5339 HReg res = newVRegV(env);
5340 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5341 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5342 UInt size = 0;
5343 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5344 res, argL, argR, size, True));
5345 return res;
5347 case Iop_F32ToFixed32Ux4_RZ:
5348 case Iop_F32ToFixed32Sx4_RZ:
5349 case Iop_Fixed32UToF32x4_RN:
5350 case Iop_Fixed32SToF32x4_RN: {
5351 HReg res = newVRegV(env);
5352 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5353 ARMNeonUnOp op;
5354 UInt imm6;
5355 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5356 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5357 vpanic("ARM supports FP <-> Fixed conversion with constant "
5358 "second argument less than 33 only\n");
5360 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5361 vassert(imm6 <= 32 && imm6 > 0);
5362 imm6 = 64 - imm6;
5363 switch(e->Iex.Binop.op) {
5364 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5365 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5366 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5367 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5368 default: vassert(0);
5370 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5371 return res;
5374 FIXME remove if not used
5375 case Iop_VDup8x16:
5376 case Iop_VDup16x8:
5377 case Iop_VDup32x4: {
5378 HReg res = newVRegV(env);
5379 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5380 UInt imm4;
5381 UInt index;
5382 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5383 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5384 vpanic("ARM supports Iop_VDup with constant "
5385 "second argument less than 16 only\n");
5387 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5388 switch(e->Iex.Binop.op) {
5389 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5390 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5391 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5392 default: vassert(0);
5394 if (imm4 >= 16) {
5395 vpanic("ARM supports Iop_VDup with constant "
5396 "second argument less than 16 only\n");
5398 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5399 res, argL, imm4, True));
5400 return res;
5403 case Iop_PwAdd8x16:
5404 case Iop_PwAdd16x8:
5405 case Iop_PwAdd32x4: {
5406 HReg res = newVRegV(env);
5407 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5408 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5409 UInt size = 0;
5410 switch(e->Iex.Binop.op) {
5411 case Iop_PwAdd8x16: size = 0; break;
5412 case Iop_PwAdd16x8: size = 1; break;
5413 case Iop_PwAdd32x4: size = 2; break;
5414 default: vassert(0);
5416 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5417 res, argL, argR, size, True));
5418 return res;
5420 /* ... */
5421 default:
5422 break;
5426 if (e->tag == Iex_Triop) {
5427 IRTriop *triop = e->Iex.Triop.details;
5429 switch (triop->op) {
5430 case Iop_SliceV128: {
5431 HReg res = newVRegV(env);
5432 HReg argL = iselNeonExpr(env, triop->arg2);
5433 HReg argR = iselNeonExpr(env, triop->arg1);
5434 UInt imm4;
5435 if (triop->arg3->tag != Iex_Const ||
5436 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5437 vpanic("ARM target supports Iop_ExtractV128 with constant "
5438 "third argument less than 16 only\n");
5440 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5441 if (imm4 >= 16) {
5442 vpanic("ARM target supports Iop_ExtractV128 with constant "
5443 "third argument less than 16 only\n");
5445 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5446 res, argL, argR, imm4, True));
5447 return res;
5449 case Iop_Mul32Fx4:
5450 case Iop_Sub32Fx4:
5451 case Iop_Add32Fx4: {
5452 HReg res = newVRegV(env);
5453 HReg argL = iselNeonExpr(env, triop->arg2);
5454 HReg argR = iselNeonExpr(env, triop->arg3);
5455 UInt size = 0;
5456 ARMNeonBinOp op = ARMneon_INVALID;
5457 switch (triop->op) {
5458 case Iop_Mul32Fx4: op = ARMneon_VMULFP; break;
5459 case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break;
5460 case Iop_Add32Fx4: op = ARMneon_VADDFP; break;
5461 default: vassert(0);
5463 addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True));
5464 return res;
5466 default:
5467 break;
5471 if (e->tag == Iex_ITE) { // VFD
5472 ARMCondCode cc;
5473 HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue);
5474 HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse);
5475 HReg dst = newVRegV(env);
5476 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5477 cc = iselCondCode(env, e->Iex.ITE.cond);
5478 addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5479 return dst;
5482 /* neon_expr_bad: */
5483 ppIRExpr(e);
5484 vpanic("iselNeonExpr_wrk");
5487 /*---------------------------------------------------------*/
5488 /*--- ISEL: Floating point expressions (64 bit) ---*/
5489 /*---------------------------------------------------------*/
5491 /* Compute a 64-bit floating point value into a register, the identity
5492 of which is returned. As with iselIntExpr_R, the reg may be either
5493 real or virtual; in any case it must not be changed by subsequent
5494 code emitted by the caller. */
5496 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5498 HReg r = iselDblExpr_wrk( env, e );
5499 # if 0
5500 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5501 # endif
5502 vassert(hregClass(r) == HRcFlt64);
5503 vassert(hregIsVirtual(r));
5504 return r;
5507 /* DO NOT CALL THIS DIRECTLY */
5508 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5510 IRType ty = typeOfIRExpr(env->type_env,e);
5511 vassert(e);
5512 vassert(ty == Ity_F64);
5514 if (e->tag == Iex_RdTmp) {
5515 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5518 if (e->tag == Iex_Const) {
5519 /* Just handle the zero case. */
5520 IRConst* con = e->Iex.Const.con;
5521 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5522 HReg z32 = newVRegI(env);
5523 HReg dst = newVRegD(env);
5524 addInstr(env, ARMInstr_Imm32(z32, 0));
5525 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5526 return dst;
5530 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5531 ARMAModeV* am;
5532 HReg res = newVRegD(env);
5533 vassert(e->Iex.Load.ty == Ity_F64);
5534 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5535 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5536 return res;
5539 if (e->tag == Iex_Get) {
5540 // XXX This won't work if offset > 1020 or is not 0 % 4.
5541 // In which case we'll have to generate more longwinded code.
5542 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5543 HReg res = newVRegD(env);
5544 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5545 return res;
5548 if (e->tag == Iex_Unop) {
5549 switch (e->Iex.Unop.op) {
5550 case Iop_ReinterpI64asF64: {
5551 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5552 return iselNeon64Expr(env, e->Iex.Unop.arg);
5553 } else {
5554 HReg srcHi, srcLo;
5555 HReg dst = newVRegD(env);
5556 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5557 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5558 return dst;
5561 case Iop_NegF64: {
5562 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5563 HReg dst = newVRegD(env);
5564 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5565 return dst;
5567 case Iop_AbsF64: {
5568 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5569 HReg dst = newVRegD(env);
5570 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5571 return dst;
5573 case Iop_F32toF64: {
5574 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5575 HReg dst = newVRegD(env);
5576 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5577 return dst;
5579 case Iop_I32UtoF64:
5580 case Iop_I32StoF64: {
5581 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5582 HReg f32 = newVRegF(env);
5583 HReg dst = newVRegD(env);
5584 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5585 /* VMOV f32, src */
5586 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5587 /* FSITOD dst, f32 */
5588 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5589 dst, f32));
5590 return dst;
5592 default:
5593 break;
5597 if (e->tag == Iex_Binop) {
5598 switch (e->Iex.Binop.op) {
5599 case Iop_SqrtF64: {
5600 /* first arg is rounding mode; we ignore it. */
5601 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5602 HReg dst = newVRegD(env);
5603 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5604 return dst;
5606 case Iop_RoundF64toInt: {
5607 /* We can only generate this on a >= V8 capable target. But
5608 that's OK since we should only be asked to generate for V8
5609 capable guests, and we assume here that host == guest. */
5610 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5611 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5612 HReg dst = newVRegD(env);
5613 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5614 addInstr(env, ARMInstr_VRIntR(True/*isF64*/, dst, src));
5615 set_VFP_rounding_default(env);
5616 return dst;
5618 /* not a V8 target, so we can't select insns for this. */
5619 break;
5621 default:
5622 break;
5626 if (e->tag == Iex_Triop) {
5627 IRTriop *triop = e->Iex.Triop.details;
5629 switch (triop->op) {
5630 case Iop_DivF64:
5631 case Iop_MulF64:
5632 case Iop_AddF64:
5633 case Iop_SubF64: {
5634 ARMVfpOp op = 0; /*INVALID*/
5635 HReg argL = iselDblExpr(env, triop->arg2);
5636 HReg argR = iselDblExpr(env, triop->arg3);
5637 HReg dst = newVRegD(env);
5638 switch (triop->op) {
5639 case Iop_DivF64: op = ARMvfp_DIV; break;
5640 case Iop_MulF64: op = ARMvfp_MUL; break;
5641 case Iop_AddF64: op = ARMvfp_ADD; break;
5642 case Iop_SubF64: op = ARMvfp_SUB; break;
5643 default: vassert(0);
5645 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5646 return dst;
5648 default:
5649 break;
5653 if (e->tag == Iex_ITE) { // VFD
5654 if (ty == Ity_F64
5655 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5656 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
5657 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
5658 HReg dst = newVRegD(env);
5659 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
5660 ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
5661 addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
5662 return dst;
5666 ppIRExpr(e);
5667 vpanic("iselDblExpr_wrk");
5671 /*---------------------------------------------------------*/
5672 /*--- ISEL: Floating point expressions (32 bit) ---*/
5673 /*---------------------------------------------------------*/
5675 /* Compute a 32-bit floating point value into a register, the identity
5676 of which is returned. As with iselIntExpr_R, the reg may be either
5677 real or virtual; in any case it must not be changed by subsequent
5678 code emitted by the caller. */
5680 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5682 HReg r = iselFltExpr_wrk( env, e );
5683 # if 0
5684 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5685 # endif
5686 vassert(hregClass(r) == HRcFlt32);
5687 vassert(hregIsVirtual(r));
5688 return r;
5691 /* DO NOT CALL THIS DIRECTLY */
5692 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5694 IRType ty = typeOfIRExpr(env->type_env,e);
5695 vassert(e);
5696 vassert(ty == Ity_F32);
5698 if (e->tag == Iex_RdTmp) {
5699 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5702 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5703 ARMAModeV* am;
5704 HReg res = newVRegF(env);
5705 vassert(e->Iex.Load.ty == Ity_F32);
5706 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5707 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5708 return res;
5711 if (e->tag == Iex_Get) {
5712 // XXX This won't work if offset > 1020 or is not 0 % 4.
5713 // In which case we'll have to generate more longwinded code.
5714 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5715 HReg res = newVRegF(env);
5716 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5717 return res;
5720 if (e->tag == Iex_Unop) {
5721 switch (e->Iex.Unop.op) {
5722 case Iop_ReinterpI32asF32: {
5723 HReg dst = newVRegF(env);
5724 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5725 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5726 return dst;
5728 case Iop_NegF32: {
5729 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5730 HReg dst = newVRegF(env);
5731 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5732 return dst;
5734 case Iop_AbsF32: {
5735 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5736 HReg dst = newVRegF(env);
5737 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5738 return dst;
5740 default:
5741 break;
5745 if (e->tag == Iex_Binop) {
5746 switch (e->Iex.Binop.op) {
5747 case Iop_SqrtF32: {
5748 /* first arg is rounding mode; we ignore it. */
5749 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5750 HReg dst = newVRegF(env);
5751 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5752 return dst;
5754 case Iop_F64toF32: {
5755 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5756 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5757 HReg valS = newVRegF(env);
5758 /* FCVTSD valS, valD */
5759 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5760 set_VFP_rounding_default(env);
5761 return valS;
5763 case Iop_RoundF32toInt: {
5764 /* We can only generate this on a >= V8 capable target. But
5765 that's OK since we should only be asked to generate for V8
5766 capable guests, and we assume here that host == guest. */
5767 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5768 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5769 HReg dst = newVRegF(env);
5770 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5771 addInstr(env, ARMInstr_VRIntR(False/*!isF64*/, dst, src));
5772 set_VFP_rounding_default(env);
5773 return dst;
5775 /* not a V8 target, so we can't select insns for this. */
5776 break;
5778 default:
5779 break;
5783 if (e->tag == Iex_Triop) {
5784 IRTriop *triop = e->Iex.Triop.details;
5786 switch (triop->op) {
5787 case Iop_DivF32:
5788 case Iop_MulF32:
5789 case Iop_AddF32:
5790 case Iop_SubF32: {
5791 ARMVfpOp op = 0; /*INVALID*/
5792 HReg argL = iselFltExpr(env, triop->arg2);
5793 HReg argR = iselFltExpr(env, triop->arg3);
5794 HReg dst = newVRegF(env);
5795 switch (triop->op) {
5796 case Iop_DivF32: op = ARMvfp_DIV; break;
5797 case Iop_MulF32: op = ARMvfp_MUL; break;
5798 case Iop_AddF32: op = ARMvfp_ADD; break;
5799 case Iop_SubF32: op = ARMvfp_SUB; break;
5800 default: vassert(0);
5802 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5803 return dst;
5805 default:
5806 break;
5810 if (e->tag == Iex_ITE) { // VFD
5811 if (ty == Ity_F32
5812 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5813 ARMCondCode cc;
5814 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
5815 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
5816 HReg dst = newVRegF(env);
5817 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
5818 cc = iselCondCode(env, e->Iex.ITE.cond);
5819 addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
5820 return dst;
5824 ppIRExpr(e);
5825 vpanic("iselFltExpr_wrk");
5829 /*---------------------------------------------------------*/
5830 /*--- ISEL: Statements ---*/
5831 /*---------------------------------------------------------*/
5833 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5835 if (vex_traceflags & VEX_TRACE_VCODE) {
5836 vex_printf("\n-- ");
5837 ppIRStmt(stmt);
5838 vex_printf("\n");
5840 switch (stmt->tag) {
5842 /* --------- STORE --------- */
5843 /* little-endian write to memory */
5844 case Ist_Store: {
5845 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5846 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5847 IREndness end = stmt->Ist.Store.end;
5849 if (tya != Ity_I32 || end != Iend_LE)
5850 goto stmt_fail;
5852 if (tyd == Ity_I32) {
5853 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5854 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5855 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5856 return;
5858 if (tyd == Ity_I16) {
5859 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5860 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5861 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
5862 False/*!isLoad*/,
5863 False/*!isSignedLoad*/, rD, am));
5864 return;
5866 if (tyd == Ity_I8) {
5867 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5868 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5869 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
5870 return;
5872 if (tyd == Ity_I64) {
5873 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5874 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5875 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5876 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5877 } else {
5878 HReg rDhi, rDlo, rA;
5879 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5880 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5881 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
5882 ARMAMode1_RI(rA,4)));
5883 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
5884 ARMAMode1_RI(rA,0)));
5886 return;
5888 if (tyd == Ity_F64) {
5889 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5890 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5891 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5892 return;
5894 if (tyd == Ity_F32) {
5895 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5896 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5897 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5898 return;
5900 if (tyd == Ity_V128) {
5901 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5902 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5903 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5904 return;
5907 break;
5910 /* --------- CONDITIONAL STORE --------- */
5911 /* conditional little-endian write to memory */
5912 case Ist_StoreG: {
5913 IRStoreG* sg = stmt->Ist.StoreG.details;
5914 IRType tya = typeOfIRExpr(env->type_env, sg->addr);
5915 IRType tyd = typeOfIRExpr(env->type_env, sg->data);
5916 IREndness end = sg->end;
5918 if (tya != Ity_I32 || end != Iend_LE)
5919 goto stmt_fail;
5921 switch (tyd) {
5922 case Ity_I8:
5923 case Ity_I32: {
5924 HReg rD = iselIntExpr_R(env, sg->data);
5925 ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
5926 ARMCondCode cc = iselCondCode(env, sg->guard);
5927 addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
5928 (cc, False/*!isLoad*/, rD, am));
5929 return;
5931 case Ity_I16: {
5932 HReg rD = iselIntExpr_R(env, sg->data);
5933 ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
5934 ARMCondCode cc = iselCondCode(env, sg->guard);
5935 addInstr(env, ARMInstr_LdSt16(cc,
5936 False/*!isLoad*/,
5937 False/*!isSignedLoad*/, rD, am));
5938 return;
5940 default:
5941 break;
5943 break;
5946 /* --------- CONDITIONAL LOAD --------- */
5947 /* conditional little-endian load from memory */
5948 case Ist_LoadG: {
5949 IRLoadG* lg = stmt->Ist.LoadG.details;
5950 IRType tya = typeOfIRExpr(env->type_env, lg->addr);
5951 IREndness end = lg->end;
5953 if (tya != Ity_I32 || end != Iend_LE)
5954 goto stmt_fail;
5956 switch (lg->cvt) {
5957 case ILGop_8Uto32:
5958 case ILGop_Ident32: {
5959 HReg rAlt = iselIntExpr_R(env, lg->alt);
5960 ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
5961 HReg rD = lookupIRTemp(env, lg->dst);
5962 addInstr(env, mk_iMOVds_RR(rD, rAlt));
5963 ARMCondCode cc = iselCondCode(env, lg->guard);
5964 addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
5965 : ARMInstr_LdSt8U)
5966 (cc, True/*isLoad*/, rD, am));
5967 return;
5969 case ILGop_16Sto32:
5970 case ILGop_16Uto32:
5971 case ILGop_8Sto32: {
5972 HReg rAlt = iselIntExpr_R(env, lg->alt);
5973 ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
5974 HReg rD = lookupIRTemp(env, lg->dst);
5975 addInstr(env, mk_iMOVds_RR(rD, rAlt));
5976 ARMCondCode cc = iselCondCode(env, lg->guard);
5977 if (lg->cvt == ILGop_8Sto32) {
5978 addInstr(env, ARMInstr_Ld8S(cc, rD, am));
5979 } else {
5980 vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
5981 Bool sx = lg->cvt == ILGop_16Sto32;
5982 addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
5984 return;
5986 default:
5987 break;
5989 break;
5992 /* --------- PUT --------- */
5993 /* write guest state, fixed offset */
5994 case Ist_Put: {
5995 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5997 if (tyd == Ity_I32) {
5998 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5999 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
6000 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
6001 return;
6003 if (tyd == Ity_I64) {
6004 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6005 HReg addr = newVRegI(env);
6006 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
6007 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6008 stmt->Ist.Put.offset));
6009 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
6010 } else {
6011 HReg rDhi, rDlo;
6012 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
6013 stmt->Ist.Put.offset + 0);
6014 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
6015 stmt->Ist.Put.offset + 4);
6016 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
6017 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6018 rDhi, am4));
6019 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6020 rDlo, am0));
6022 return;
6024 if (tyd == Ity_F64) {
6025 // XXX This won't work if offset > 1020 or is not 0 % 4.
6026 // In which case we'll have to generate more longwinded code.
6027 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6028 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
6029 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
6030 return;
6032 if (tyd == Ity_F32) {
6033 // XXX This won't work if offset > 1020 or is not 0 % 4.
6034 // In which case we'll have to generate more longwinded code.
6035 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6036 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
6037 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
6038 return;
6040 if (tyd == Ity_V128) {
6041 HReg addr = newVRegI(env);
6042 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
6043 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6044 stmt->Ist.Put.offset));
6045 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
6046 return;
6048 break;
6051 /* --------- TMP --------- */
6052 /* assign value to temporary */
6053 case Ist_WrTmp: {
6054 IRTemp tmp = stmt->Ist.WrTmp.tmp;
6055 IRType ty = typeOfIRTemp(env->type_env, tmp);
6057 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6058 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
6059 env, stmt->Ist.WrTmp.data);
6060 HReg dst = lookupIRTemp(env, tmp);
6061 addInstr(env, ARMInstr_Mov(dst,ri84));
6062 return;
6064 if (ty == Ity_I1) {
6065 /* Here, we are generating a I1 value into a 32 bit register.
6066 Make sure the value in the register is only zero or one,
6067 but no other. This allows optimisation of the
6068 1Uto32(tmp:I1) case, by making it simply a copy of the
6069 register holding 'tmp'. The point being that the value in
6070 the register holding 'tmp' can only have been created
6071 here. */
6072 HReg dst = lookupIRTemp(env, tmp);
6073 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
6074 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
6075 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
6076 return;
6078 if (ty == Ity_I64) {
6079 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6080 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
6081 HReg dst = lookupIRTemp(env, tmp);
6082 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
6083 } else {
6084 HReg rHi, rLo, dstHi, dstLo;
6085 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
6086 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
6087 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
6088 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
6090 return;
6092 if (ty == Ity_F64) {
6093 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
6094 HReg dst = lookupIRTemp(env, tmp);
6095 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
6096 return;
6098 if (ty == Ity_F32) {
6099 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
6100 HReg dst = lookupIRTemp(env, tmp);
6101 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
6102 return;
6104 if (ty == Ity_V128) {
6105 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
6106 HReg dst = lookupIRTemp(env, tmp);
6107 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
6108 return;
6110 break;
6113 /* --------- Call to DIRTY helper --------- */
6114 /* call complex ("dirty") helper function */
6115 case Ist_Dirty: {
6116 IRDirty* d = stmt->Ist.Dirty.details;
6118 /* Figure out the return type, if any. */
6119 IRType retty = Ity_INVALID;
6120 if (d->tmp != IRTemp_INVALID)
6121 retty = typeOfIRTemp(env->type_env, d->tmp);
6123 Bool retty_ok = False;
6124 switch (retty) {
6125 case Ity_INVALID: /* function doesn't return anything */
6126 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6127 case Ity_V128:
6128 retty_ok = True; break;
6129 default:
6130 break;
6132 if (!retty_ok)
6133 break; /* will go to stmt_fail: */
6135 /* Marshal args, do the call, and set the return value to 0x555..555
6136 if this is a conditional call that returns a value and the
6137 call is skipped. */
6138 UInt addToSp = 0;
6139 RetLoc rloc = mk_RetLoc_INVALID();
6140 Bool ok = doHelperCall( &addToSp, &rloc, env,
6141 d->guard, d->cee, retty, d->args );
6142 if (!ok) goto stmt_fail;
6143 vassert(is_sane_RetLoc(rloc));
6145 /* Now figure out what to do with the returned value, if any. */
6146 switch (retty) {
6147 case Ity_INVALID: {
6148 /* No return value. Nothing to do. */
6149 vassert(d->tmp == IRTemp_INVALID);
6150 vassert(rloc.pri == RLPri_None);
6151 vassert(addToSp == 0);
6152 return;
6154 case Ity_I64: {
6155 vassert(rloc.pri == RLPri_2Int);
6156 vassert(addToSp == 0);
6157 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6158 HReg tmp = lookupIRTemp(env, d->tmp);
6159 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
6160 hregARM_R0()));
6161 } else {
6162 HReg dstHi, dstLo;
6163 /* The returned value is in r1:r0. Park it in the
6164 register-pair associated with tmp. */
6165 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
6166 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
6167 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
6169 return;
6171 case Ity_I32: case Ity_I16: case Ity_I8: {
6172 vassert(rloc.pri == RLPri_Int);
6173 vassert(addToSp == 0);
6174 /* The returned value is in r0. Park it in the register
6175 associated with tmp. */
6176 HReg dst = lookupIRTemp(env, d->tmp);
6177 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
6178 return;
6180 case Ity_V128: {
6181 /* The returned value is on the stack, and *retloc tells
6182 us where. Fish it off the stack and then move the
6183 stack pointer upwards to clear it, as directed by
6184 doHelperCall. */
6185 vassert(rloc.pri == RLPri_V128SpRel);
6186 vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
6187 vassert(addToSp >= 16);
6188 vassert(addToSp <= 256);
6189 /* Both the stack delta and the offset must be at least 8-aligned.
6190 If that isn't so, doHelperCall() has generated bad code. */
6191 vassert(0 == (rloc.spOff % 8));
6192 vassert(0 == (addToSp % 8));
6193 HReg dst = lookupIRTemp(env, d->tmp);
6194 HReg tmp = newVRegI(env);
6195 HReg sp = hregARM_R13();
6196 addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6197 tmp, sp, ARMRI84_I84(rloc.spOff,0)));
6198 ARMAModeN* am = mkARMAModeN_R(tmp);
6199 /* This load could be done with its effective address 0 % 8,
6200 because that's the best stack alignment that we can be
6201 assured of. */
6202 addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
6204 ARMRI84* spAdj
6205 = addToSp == 256 ? ARMRI84_I84(64, 15) // 64 `ror` (15 * 2)
6206 : ARMRI84_I84(addToSp, 0);
6207 addInstr(env, ARMInstr_Alu(ARMalu_ADD, sp, sp, spAdj));
6208 return;
6210 default:
6211 /*NOTREACHED*/
6212 vassert(0);
6214 break;
6217 /* --------- Load Linked and Store Conditional --------- */
6218 case Ist_LLSC: {
6219 if (stmt->Ist.LLSC.storedata == NULL) {
6220 /* LL */
6221 IRTemp res = stmt->Ist.LLSC.result;
6222 IRType ty = typeOfIRTemp(env->type_env, res);
6223 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6224 Int szB = 0;
6225 HReg r_dst = lookupIRTemp(env, res);
6226 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6227 switch (ty) {
6228 case Ity_I8: szB = 1; break;
6229 case Ity_I16: szB = 2; break;
6230 case Ity_I32: szB = 4; break;
6231 default: vassert(0);
6233 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6234 addInstr(env, ARMInstr_LdrEX(szB));
6235 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6236 return;
6238 if (ty == Ity_I64) {
6239 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6240 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6241 addInstr(env, ARMInstr_LdrEX(8));
6242 /* Result is in r3:r2. On a non-NEON capable CPU, we must
6243 move it into a result register pair. On a NEON capable
6244 CPU, the result register will be a 64 bit NEON
6245 register, so we must move it there instead. */
6246 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6247 HReg dst = lookupIRTemp(env, res);
6248 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6249 hregARM_R2()));
6250 } else {
6251 HReg r_dst_hi, r_dst_lo;
6252 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6253 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6254 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6256 return;
6258 /*NOTREACHED*/
6259 vassert(0);
6260 } else {
6261 /* SC */
6262 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6263 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6264 Int szB = 0;
6265 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6266 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6267 switch (tyd) {
6268 case Ity_I8: szB = 1; break;
6269 case Ity_I16: szB = 2; break;
6270 case Ity_I32: szB = 4; break;
6271 default: vassert(0);
6273 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6274 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6275 addInstr(env, ARMInstr_StrEX(szB));
6276 } else {
6277 vassert(tyd == Ity_I64);
6278 /* This is really ugly. There is no is/is-not NEON
6279 decision akin to the case for LL, because iselInt64Expr
6280 fudges this for us, and always gets the result into two
6281 GPRs even if this means moving it from a NEON
6282 register. */
6283 HReg rDhi, rDlo;
6284 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6285 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6286 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6287 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6288 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6289 addInstr(env, ARMInstr_StrEX(8));
6291 /* now r0 is 1 if failed, 0 if success. Change to IR
6292 conventions (0 is fail, 1 is success). Also transfer
6293 result to r_res. */
6294 IRTemp res = stmt->Ist.LLSC.result;
6295 IRType ty = typeOfIRTemp(env->type_env, res);
6296 HReg r_res = lookupIRTemp(env, res);
6297 ARMRI84* one = ARMRI84_I84(1,0);
6298 vassert(ty == Ity_I1);
6299 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6300 /* And be conservative -- mask off all but the lowest bit */
6301 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6302 return;
6304 break;
6307 /* --------- MEM FENCE --------- */
6308 case Ist_MBE:
6309 switch (stmt->Ist.MBE.event) {
6310 case Imbe_Fence:
6311 addInstr(env, ARMInstr_MFence());
6312 return;
6313 case Imbe_CancelReservation:
6314 addInstr(env, ARMInstr_CLREX());
6315 return;
6316 default:
6317 break;
6319 break;
6321 /* --------- INSTR MARK --------- */
6322 /* Doesn't generate any executable code ... */
6323 case Ist_IMark:
6324 return;
6326 /* --------- NO-OP --------- */
6327 case Ist_NoOp:
6328 return;
6330 /* --------- EXIT --------- */
6331 case Ist_Exit: {
6332 if (stmt->Ist.Exit.dst->tag != Ico_U32)
6333 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6335 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
6336 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
6337 stmt->Ist.Exit.offsIP);
6339 /* Case: boring transfer to known address */
6340 if (stmt->Ist.Exit.jk == Ijk_Boring
6341 || stmt->Ist.Exit.jk == Ijk_Call
6342 || stmt->Ist.Exit.jk == Ijk_Ret) {
6343 if (env->chainingAllowed) {
6344 /* .. almost always true .. */
6345 /* Skip the event check at the dst if this is a forwards
6346 edge. */
6347 Bool toFastEP
6348 = stmt->Ist.Exit.dst->Ico.U32 > env->max_ga;
6349 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6350 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6351 amR15T, cc, toFastEP));
6352 } else {
6353 /* .. very occasionally .. */
6354 /* We can't use chaining, so ask for an assisted transfer,
6355 as that's the only alternative that is allowable. */
6356 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6357 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6359 return;
6362 /* Case: assisted transfer to arbitrary address */
6363 switch (stmt->Ist.Exit.jk) {
6364 /* Keep this list in sync with that in iselNext below */
6365 case Ijk_ClientReq:
6366 case Ijk_NoDecode:
6367 case Ijk_NoRedir:
6368 case Ijk_Sys_syscall:
6369 case Ijk_InvalICache:
6370 case Ijk_Yield:
6372 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6373 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6374 stmt->Ist.Exit.jk));
6375 return;
6377 default:
6378 break;
6381 /* Do we ever expect to see any other kind? */
6382 goto stmt_fail;
6385 default: break;
6387 stmt_fail:
6388 ppIRStmt(stmt);
6389 vpanic("iselStmt");
6393 /*---------------------------------------------------------*/
6394 /*--- ISEL: Basic block terminators (Nexts) ---*/
6395 /*---------------------------------------------------------*/
6397 static void iselNext ( ISelEnv* env,
6398 IRExpr* next, IRJumpKind jk, Int offsIP )
6400 if (vex_traceflags & VEX_TRACE_VCODE) {
6401 vex_printf( "\n-- PUT(%d) = ", offsIP);
6402 ppIRExpr( next );
6403 vex_printf( "; exit-");
6404 ppIRJumpKind(jk);
6405 vex_printf( "\n");
6408 /* Case: boring transfer to known address */
6409 if (next->tag == Iex_Const) {
6410 IRConst* cdst = next->Iex.Const.con;
6411 vassert(cdst->tag == Ico_U32);
6412 if (jk == Ijk_Boring || jk == Ijk_Call) {
6413 /* Boring transfer to known address */
6414 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6415 if (env->chainingAllowed) {
6416 /* .. almost always true .. */
6417 /* Skip the event check at the dst if this is a forwards
6418 edge. */
6419 Bool toFastEP
6420 = cdst->Ico.U32 > env->max_ga;
6421 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6422 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6423 amR15T, ARMcc_AL,
6424 toFastEP));
6425 } else {
6426 /* .. very occasionally .. */
6427 /* We can't use chaining, so ask for an assisted transfer,
6428 as that's the only alternative that is allowable. */
6429 HReg r = iselIntExpr_R(env, next);
6430 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6431 Ijk_Boring));
6433 return;
6437 /* Case: call/return (==boring) transfer to any address */
6438 switch (jk) {
6439 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6440 HReg r = iselIntExpr_R(env, next);
6441 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6442 if (env->chainingAllowed) {
6443 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6444 } else {
6445 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6446 Ijk_Boring));
6448 return;
6450 default:
6451 break;
6454 /* Case: assisted transfer to arbitrary address */
6455 switch (jk) {
6456 /* Keep this list in sync with that for Ist_Exit above */
6457 case Ijk_ClientReq:
6458 case Ijk_NoDecode:
6459 case Ijk_NoRedir:
6460 case Ijk_Sys_syscall:
6461 case Ijk_InvalICache:
6462 case Ijk_Yield:
6464 HReg r = iselIntExpr_R(env, next);
6465 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6466 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6467 return;
6469 default:
6470 break;
6473 vex_printf( "\n-- PUT(%d) = ", offsIP);
6474 ppIRExpr( next );
6475 vex_printf( "; exit-");
6476 ppIRJumpKind(jk);
6477 vex_printf( "\n");
6478 vassert(0); // are we expecting any other kind?
6482 /*---------------------------------------------------------*/
6483 /*--- Insn selector top-level ---*/
6484 /*---------------------------------------------------------*/
6486 /* Translate an entire SB to arm code. */
6488 HInstrArray* iselSB_ARM ( const IRSB* bb,
6489 VexArch arch_host,
6490 const VexArchInfo* archinfo_host,
6491 const VexAbiInfo* vbi/*UNUSED*/,
6492 Int offs_Host_EvC_Counter,
6493 Int offs_Host_EvC_FailAddr,
6494 Bool chainingAllowed,
6495 Bool addProfInc,
6496 Addr max_ga )
6498 Int i, j;
6499 HReg hreg, hregHI;
6500 ISelEnv* env;
6501 UInt hwcaps_host = archinfo_host->hwcaps;
6502 ARMAMode1 *amCounter, *amFailAddr;
6504 /* sanity ... */
6505 vassert(arch_host == VexArchARM);
6507 /* Check that the host's endianness is as expected. */
6508 vassert(archinfo_host->endness == VexEndnessLE);
6510 /* guard against unexpected space regressions */
6511 vassert(sizeof(ARMInstr) <= 28);
6513 /* hwcaps should not change from one ISEL call to another. */
6514 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
6516 /* Make up an initial environment to use. */
6517 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
6518 env->vreg_ctr = 0;
6520 /* Set up output code array. */
6521 env->code = newHInstrArray();
6523 /* Copy BB's type env. */
6524 env->type_env = bb->tyenv;
6526 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6527 change as we go along. */
6528 env->n_vregmap = bb->tyenv->types_used;
6529 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6530 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6532 /* and finally ... */
6533 env->chainingAllowed = chainingAllowed;
6534 env->hwcaps = hwcaps_host;
6535 env->max_ga = max_ga;
6537 /* For each IR temporary, allocate a suitably-kinded virtual
6538 register. */
6539 j = 0;
6540 for (i = 0; i < env->n_vregmap; i++) {
6541 hregHI = hreg = INVALID_HREG;
6542 switch (bb->tyenv->types[i]) {
6543 case Ity_I1:
6544 case Ity_I8:
6545 case Ity_I16:
6546 case Ity_I32: hreg = mkHReg(True, HRcInt32, 0, j++); break;
6547 case Ity_I64:
6548 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
6549 hreg = mkHReg(True, HRcFlt64, 0, j++);
6550 } else {
6551 hregHI = mkHReg(True, HRcInt32, 0, j++);
6552 hreg = mkHReg(True, HRcInt32, 0, j++);
6554 break;
6555 case Ity_F32: hreg = mkHReg(True, HRcFlt32, 0, j++); break;
6556 case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break;
6557 case Ity_V128: hreg = mkHReg(True, HRcVec128, 0, j++); break;
6558 default: ppIRType(bb->tyenv->types[i]);
6559 vpanic("iselBB: IRTemp type");
6561 env->vregmap[i] = hreg;
6562 env->vregmapHI[i] = hregHI;
6564 env->vreg_ctr = j;
6566 /* The very first instruction must be an event check. */
6567 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6568 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6569 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6571 /* Possibly a block counter increment (for profiling). At this
6572 point we don't know the address of the counter, so just pretend
6573 it is zero. It will have to be patched later, but before this
6574 translation is used, by a call to LibVEX_patchProfCtr. */
6575 if (addProfInc) {
6576 addInstr(env, ARMInstr_ProfInc());
6579 /* Ok, finally we can iterate over the statements. */
6580 for (i = 0; i < bb->stmts_used; i++)
6581 iselStmt(env, bb->stmts[i]);
6583 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6585 /* record the number of vregs we used. */
6586 env->code->n_vregs = env->vreg_ctr;
6587 return env->code;
6591 /*---------------------------------------------------------------*/
6592 /*--- end host_arm_isel.c ---*/
6593 /*---------------------------------------------------------------*/