Remove warning caused by D demangle testcase
[valgrind.git] / VEX / priv / host_arm_isel.c
blobacbd39ad4dddbf9b12eb54d13183c6cd3e08b829
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_isel.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 NEON support is
14 Copyright (C) 2010-2017 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, see <http://www.gnu.org/licenses/>.
31 The GNU General Public License is contained in the file COPYING.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
36 #include "libvex.h"
37 #include "ir_match.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "host_generic_regs.h"
42 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
43 #include "host_arm_defs.h"
46 /*---------------------------------------------------------*/
47 /*--- ARMvfp control word stuff ---*/
48 /*---------------------------------------------------------*/
50 /* Vex-generated code expects to run with the FPU set as follows: all
51 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
52 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
53 this corresponds to a FPSCR value of zero.
55 fpscr should therefore be zero on entry to Vex-generated code, and
56 should be unchanged at exit. (Or at least the bottom 28 bits
57 should be zero).
60 #define DEFAULT_FPSCR 0
63 /*---------------------------------------------------------*/
64 /*--- ISelEnv ---*/
65 /*---------------------------------------------------------*/
67 /* This carries around:
69 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
70 might encounter. This is computed before insn selection starts,
71 and does not change.
73 - A mapping from IRTemp to HReg. This tells the insn selector
74 which virtual register(s) are associated with each IRTemp
75 temporary. This is computed before insn selection starts, and
76 does not change. We expect this mapping to map precisely the
77 same set of IRTemps as the type mapping does.
79 - vregmap holds the primary register for the IRTemp.
80 - vregmapHI is only used for 64-bit integer-typed
81 IRTemps. It holds the identity of a second
82 32-bit virtual HReg, which holds the high half
83 of the value.
85 - The code array, that is, the insns selected so far.
87 - A counter, for generating new virtual registers.
89 - The host hardware capabilities word. This is set at the start
90 and does not change.
92 - A Bool for indicating whether we may generate chain-me
93 instructions for control flow transfers, or whether we must use
94 XAssisted.
96 - The maximum guest address of any guest insn in this block.
97 Actually, the address of the highest-addressed byte from any insn
98 in this block. Is set at the start and does not change. This is
99 used for detecting jumps which are definitely forward-edges from
100 this block, and therefore can be made (chained) to the fast entry
101 point of the destination, thereby avoiding the destination's
102 event check.
104 Note, this is all (well, mostly) host-independent.
107 typedef
108 struct {
109 /* Constant -- are set at the start and do not change. */
110 IRTypeEnv* type_env;
112 HReg* vregmap;
113 HReg* vregmapHI;
114 Int n_vregmap;
116 UInt hwcaps;
118 Bool chainingAllowed;
119 Addr32 max_ga;
121 /* These are modified as we go along. */
122 HInstrArray* code;
123 Int vreg_ctr;
125 ISelEnv;
127 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
129 vassert(tmp >= 0);
130 vassert(tmp < env->n_vregmap);
131 return env->vregmap[tmp];
134 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
136 vassert(tmp >= 0);
137 vassert(tmp < env->n_vregmap);
138 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
139 *vrLO = env->vregmap[tmp];
140 *vrHI = env->vregmapHI[tmp];
143 static void addInstr ( ISelEnv* env, ARMInstr* instr )
145 addHInstr(env->code, instr);
146 if (vex_traceflags & VEX_TRACE_VCODE) {
147 ppARMInstr(instr);
148 vex_printf("\n");
152 static HReg newVRegI ( ISelEnv* env )
154 HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
155 env->vreg_ctr++;
156 return reg;
159 static HReg newVRegD ( ISelEnv* env )
161 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
162 env->vreg_ctr++;
163 return reg;
166 static HReg newVRegF ( ISelEnv* env )
168 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt32, 0/*enc*/, env->vreg_ctr);
169 env->vreg_ctr++;
170 return reg;
173 static HReg newVRegV ( ISelEnv* env )
175 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
176 env->vreg_ctr++;
177 return reg;
180 /* These are duplicated in guest_arm_toIR.c */
181 static IRExpr* unop ( IROp op, IRExpr* a )
183 return IRExpr_Unop(op, a);
186 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
188 return IRExpr_Binop(op, a1, a2);
191 static IRExpr* bind ( Int binder )
193 return IRExpr_Binder(binder);
197 /*---------------------------------------------------------*/
198 /*--- ISEL: Forward declarations ---*/
199 /*---------------------------------------------------------*/
201 /* These are organised as iselXXX and iselXXX_wrk pairs. The
202 iselXXX_wrk do the real work, but are not to be called directly.
203 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
204 checks that all returned registers are virtual. You should not
205 call the _wrk version directly.
207 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
208 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
210 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
211 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
213 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
214 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
216 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
217 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
219 static ARMRI84* iselIntExpr_RI84_wrk
220 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
221 static ARMRI84* iselIntExpr_RI84
222 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
224 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
225 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
227 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
228 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
230 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
231 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
233 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
234 ISelEnv* env, const IRExpr* e );
235 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
236 ISelEnv* env, const IRExpr* e );
238 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
239 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
241 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
242 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
244 static HReg iselNeon64Expr_wrk ( ISelEnv* env, const IRExpr* e );
245 static HReg iselNeon64Expr ( ISelEnv* env, const IRExpr* e );
247 static HReg iselNeonExpr_wrk ( ISelEnv* env, const IRExpr* e );
248 static HReg iselNeonExpr ( ISelEnv* env, const IRExpr* e );
250 /*---------------------------------------------------------*/
251 /*--- ISEL: Misc helpers ---*/
252 /*---------------------------------------------------------*/
254 static UInt ROR32 ( UInt x, UInt sh ) {
255 vassert(sh >= 0 && sh < 32);
256 if (sh == 0)
257 return x;
258 else
259 return (x << (32-sh)) | (x >> sh);
262 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
263 form, and if so return the components. */
264 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
266 UInt i;
267 for (i = 0; i < 16; i++) {
268 if (0 == (u & 0xFFFFFF00)) {
269 *u8 = u;
270 *u4 = i;
271 return True;
273 u = ROR32(u, 30);
275 vassert(i == 16);
276 return False;
279 /* Make a int reg-reg move. */
280 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
282 vassert(hregClass(src) == HRcInt32);
283 vassert(hregClass(dst) == HRcInt32);
284 return ARMInstr_Mov(dst, ARMRI84_R(src));
287 /* Set the VFP unit's rounding mode to default (round to nearest). */
288 static void set_VFP_rounding_default ( ISelEnv* env )
290 /* mov rTmp, #DEFAULT_FPSCR
291 fmxr fpscr, rTmp
293 HReg rTmp = newVRegI(env);
294 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
295 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
298 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
299 expression denoting a value in the range 0 .. 3, indicating a round
300 mode encoded as per type IRRoundingMode. Set FPSCR to have the
301 same rounding.
303 static
304 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
306 /* This isn't simple, because 'mode' carries an IR rounding
307 encoding, and we need to translate that to an ARMvfp one:
308 The IR encoding:
309 00 to nearest (the default)
310 10 to +infinity
311 01 to -infinity
312 11 to zero
313 The ARMvfp encoding:
314 00 to nearest
315 01 to +infinity
316 10 to -infinity
317 11 to zero
318 Easy enough to do; just swap the two bits.
320 HReg irrm = iselIntExpr_R(env, mode);
321 HReg tL = newVRegI(env);
322 HReg tR = newVRegI(env);
323 HReg t3 = newVRegI(env);
324 /* tL = irrm << 1;
325 tR = irrm >> 1; if we're lucky, these will issue together
326 tL &= 2;
327 tR &= 1; ditto
328 t3 = tL | tR;
329 t3 <<= 22;
330 fmxr fpscr, t3
332 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
333 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
334 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
335 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
336 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
337 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
338 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
342 /*---------------------------------------------------------*/
343 /*--- ISEL: Function call helpers ---*/
344 /*---------------------------------------------------------*/
346 /* Used only in doHelperCall. See big comment in doHelperCall re
347 handling of register-parameter args. This function figures out
348 whether evaluation of an expression might require use of a fixed
349 register. If in doubt return True (safe but suboptimal).
351 static
352 Bool mightRequireFixedRegs ( IRExpr* e )
354 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
355 // These are always "safe" -- either a copy of r13(sp) in some
356 // arbitrary vreg, or a copy of r8, respectively.
357 return False;
359 /* Else it's a "normal" expression. */
360 switch (e->tag) {
361 case Iex_RdTmp: case Iex_Const: case Iex_Get:
362 return False;
363 default:
364 return True;
369 static
370 Bool doHelperCallWithArgsOnStack ( /*OUT*/UInt* stackAdjustAfterCall,
371 /*OUT*/RetLoc* retloc,
372 ISelEnv* env,
373 IRExpr* guard,
374 IRCallee* cee, IRType retTy, IRExpr** args )
376 /* This function deals just with the case where the arg sequence is:
377 VECRET followed by between 4 and 12 Ity_I32 values. So far no other
378 cases are necessary or supported. */
380 /* Check this matches the required format. */
381 if (args[0] == NULL || args[0]->tag != Iex_VECRET)
382 goto no_match;
384 UInt i;
385 UInt n_real_args = 0;
386 for (i = 1; args[i]; i++) {
387 IRExpr* arg = args[i];
388 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)))
389 goto no_match;
390 IRType argTy = typeOfIRExpr(env->type_env, arg);
391 if (UNLIKELY(argTy != Ity_I32))
392 goto no_match;
393 n_real_args++;
396 /* We expect to pass at least some args on the stack. */
397 if (n_real_args <= 3)
398 goto no_match;
400 /* But not too many. */
401 if (n_real_args > 12)
402 goto no_match;
404 /* General rules for a call:
406 Args 1 .. 4 go in R0 .. R3. The rest are pushed R to L on the
407 stack; that is, arg 5 is at the lowest address, arg 6 at the
408 next lowest, etc.
410 The stack is to be kept 8 aligned.
412 It appears (for unclear reasons) that the highest 3 words made
413 available when moving SP downwards are not to be used. For
414 example, if 5 args are to go on the stack, then SP must be moved
415 down 32 bytes, and the area at SP+20 .. SP+31 is not to be used
416 by the caller.
419 /* For this particular case, we use the following layout:
421 ------ original SP
422 112 bytes
423 ------
424 return value
425 ------ original SP - 128
426 space
427 args words, between 1 and 11
428 ------ new SP = original_SP - 256
430 Using 256 bytes is overkill, but it is simple and good enough.
433 /* This should really be
434 HReg argVRegs[n_real_args];
435 but that makes it impossible to do 'goto's forward past.
436 Hence the following kludge. */
437 vassert(n_real_args <= 12);
438 HReg argVRegs[12];
439 for (i = 0; i < 12; i++)
440 argVRegs[i] = INVALID_HREG;
442 /* Compute args into vregs. */
443 for (i = 0; i < n_real_args; i++) {
444 argVRegs[i] = iselIntExpr_R(env, args[i+1]);
447 /* Now we can compute the condition. We can't do it earlier
448 because the argument computations could trash the condition
449 codes. Be a bit clever to handle the common case where the
450 guard is 1:Bit. */
451 ARMCondCode cc = ARMcc_AL;
452 if (guard) {
453 if (guard->tag == Iex_Const
454 && guard->Iex.Const.con->tag == Ico_U1
455 && guard->Iex.Const.con->Ico.U1 == True) {
456 /* unconditional -- do nothing */
457 } else {
458 goto no_match; //ATC
459 cc = iselCondCode( env, guard );
463 HReg r0 = hregARM_R0();
464 HReg sp = hregARM_R13();
466 ARMRI84* c256 = ARMRI84_I84(64, 15); // 64 `ror` (15 * 2)
468 addInstr(env, ARMInstr_Alu(ARMalu_SUB, r0, sp, ARMRI84_I84(128, 0)));
470 addInstr(env, mk_iMOVds_RR(hregARM_R1(), argVRegs[0]));
471 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argVRegs[1]));
472 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argVRegs[2]));
474 addInstr(env, ARMInstr_Alu(ARMalu_SUB, sp, sp, c256));
476 for (i = 3; i < n_real_args; i++) {
477 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, argVRegs[i],
478 ARMAMode1_RI(sp, (i-3) * 4)));
481 vassert(*stackAdjustAfterCall == 0);
482 vassert(is_RetLoc_INVALID(*retloc));
484 *stackAdjustAfterCall = 256;
485 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 128);
487 Addr32 target = (Addr)cee->addr;
488 addInstr(env, ARMInstr_Call( cc, target, 4, *retloc ));
490 return True; /* success */
492 no_match:
493 return False;
497 /* Do a complete function call. |guard| is a Ity_Bit expression
498 indicating whether or not the call happens. If guard==NULL, the
499 call is unconditional. |retloc| is set to indicate where the
500 return value is after the call. The caller (of this fn) must
501 generate code to add |stackAdjustAfterCall| to the stack pointer
502 after the call is done. Returns True iff it managed to handle this
503 combination of arg/return types, else returns False. */
505 static
506 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
507 /*OUT*/RetLoc* retloc,
508 ISelEnv* env,
509 IRExpr* guard,
510 IRCallee* cee, IRType retTy, IRExpr** args )
512 ARMCondCode cc;
513 HReg argregs[ARM_N_ARGREGS];
514 HReg tmpregs[ARM_N_ARGREGS];
515 Bool go_fast;
516 Int n_args, i, nextArgReg;
517 Addr32 target;
519 vassert(ARM_N_ARGREGS == 4);
521 /* Set default returns. We'll update them later if needed. */
522 *stackAdjustAfterCall = 0;
523 *retloc = mk_RetLoc_INVALID();
525 /* These are used for cross-checking that IR-level constraints on
526 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
527 UInt nVECRETs = 0;
528 UInt nGSPTRs = 0;
530 /* Marshal args for a call and do the call.
532 This function only deals with a tiny set of possibilities, which
533 cover all helpers in practice. The restrictions are that only
534 arguments in registers are supported, hence only ARM_N_REGPARMS
535 x 32 integer bits in total can be passed. In fact the only
536 supported arg types are I32 and I64.
538 The return type can be I{64,32} or V128. In the V128 case, it
539 is expected that |args| will contain the special node
540 IRExpr_VECRET(), in which case this routine generates code to
541 allocate space on the stack for the vector return value. Since
542 we are not passing any scalars on the stack, it is enough to
543 preallocate the return space before marshalling any arguments,
544 in this case.
546 |args| may also contain IRExpr_GSPTR(), in which case the
547 value in r8 is passed as the corresponding argument.
549 Generating code which is both efficient and correct when
550 parameters are to be passed in registers is difficult, for the
551 reasons elaborated in detail in comments attached to
552 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
553 of the method described in those comments.
555 The problem is split into two cases: the fast scheme and the
556 slow scheme. In the fast scheme, arguments are computed
557 directly into the target (real) registers. This is only safe
558 when we can be sure that computation of each argument will not
559 trash any real registers set by computation of any other
560 argument.
562 In the slow scheme, all args are first computed into vregs, and
563 once they are all done, they are moved to the relevant real
564 regs. This always gives correct code, but it also gives a bunch
565 of vreg-to-rreg moves which are usually redundant but are hard
566 for the register allocator to get rid of.
568 To decide which scheme to use, all argument expressions are
569 first examined. If they are all so simple that it is clear they
570 will be evaluated without use of any fixed registers, use the
571 fast scheme, else use the slow scheme. Note also that only
572 unconditional calls may use the fast scheme, since having to
573 compute a condition expression could itself trash real
574 registers.
576 Note this requires being able to examine an expression and
577 determine whether or not evaluation of it might use a fixed
578 register. That requires knowledge of how the rest of this insn
579 selector works. Currently just the following 3 are regarded as
580 safe -- hopefully they cover the majority of arguments in
581 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
584 /* Note that the cee->regparms field is meaningless on ARM hosts
585 (since there is only one calling convention) and so we always
586 ignore it. */
588 n_args = 0;
589 for (i = 0; args[i]; i++) {
590 IRExpr* arg = args[i];
591 if (UNLIKELY(arg->tag == Iex_VECRET)) {
592 nVECRETs++;
593 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
594 nGSPTRs++;
596 n_args++;
599 /* If there are more than 4 args, we are going to have to pass
600 some via memory. Use a different function to (possibly) deal with
601 that; dealing with it here is too complex. */
602 if (n_args > ARM_N_ARGREGS) {
603 return doHelperCallWithArgsOnStack(stackAdjustAfterCall, retloc,
604 env, guard, cee, retTy, args );
608 /* After this point we make no attempt to pass args on the stack,
609 and just give up if that case (which is OK because it never
610 happens). Even if there are for example only 3 args, it might
611 still be necessary to pass some of them on the stack if for example
612 two or more of them are 64-bit integers. */
614 argregs[0] = hregARM_R0();
615 argregs[1] = hregARM_R1();
616 argregs[2] = hregARM_R2();
617 argregs[3] = hregARM_R3();
619 tmpregs[0] = tmpregs[1] = tmpregs[2] =
620 tmpregs[3] = INVALID_HREG;
622 /* First decide which scheme (slow or fast) is to be used. First
623 assume the fast scheme, and select slow if any contraindications
624 (wow) appear. */
626 go_fast = True;
628 if (guard) {
629 if (guard->tag == Iex_Const
630 && guard->Iex.Const.con->tag == Ico_U1
631 && guard->Iex.Const.con->Ico.U1 == True) {
632 /* unconditional */
633 } else {
634 /* Not manifestly unconditional -- be conservative. */
635 go_fast = False;
639 if (go_fast) {
640 for (i = 0; i < n_args; i++) {
641 if (mightRequireFixedRegs(args[i])) {
642 go_fast = False;
643 break;
648 if (go_fast) {
649 if (retTy == Ity_V128 || retTy == Ity_V256)
650 go_fast = False;
653 /* At this point the scheme to use has been established. Generate
654 code to get the arg values into the argument rregs. If we run
655 out of arg regs, give up. */
657 if (go_fast) {
659 /* FAST SCHEME */
660 nextArgReg = 0;
662 for (i = 0; i < n_args; i++) {
663 IRExpr* arg = args[i];
665 IRType aTy = Ity_INVALID;
666 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
667 aTy = typeOfIRExpr(env->type_env, arg);
669 if (nextArgReg >= ARM_N_ARGREGS)
670 return False; /* out of argregs */
672 if (aTy == Ity_I32) {
673 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
674 iselIntExpr_R(env, arg) ));
675 nextArgReg++;
677 else if (aTy == Ity_I64) {
678 /* 64-bit args must be passed in an a reg-pair of the form
679 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
680 On a little-endian host, the less significant word is
681 passed in the lower-numbered register. */
682 if (nextArgReg & 1) {
683 if (nextArgReg >= ARM_N_ARGREGS)
684 return False; /* out of argregs */
685 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
686 nextArgReg++;
688 if (nextArgReg >= ARM_N_ARGREGS)
689 return False; /* out of argregs */
690 HReg raHi, raLo;
691 iselInt64Expr(&raHi, &raLo, env, arg);
692 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
693 nextArgReg++;
694 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
695 nextArgReg++;
697 else if (arg->tag == Iex_GSPTR) {
698 vassert(0); //ATC
699 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
700 hregARM_R8() ));
701 nextArgReg++;
703 else if (arg->tag == Iex_VECRET) {
704 // If this happens, it denotes ill-formed IR
705 vassert(0);
707 else
708 return False; /* unhandled arg type */
711 /* Fast scheme only applies for unconditional calls. Hence: */
712 cc = ARMcc_AL;
714 } else {
716 /* SLOW SCHEME; move via temporaries */
717 nextArgReg = 0;
719 for (i = 0; i < n_args; i++) {
720 IRExpr* arg = args[i];
722 IRType aTy = Ity_INVALID;
723 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
724 aTy = typeOfIRExpr(env->type_env, arg);
726 if (nextArgReg >= ARM_N_ARGREGS)
727 return False; /* out of argregs */
729 if (aTy == Ity_I32) {
730 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
731 nextArgReg++;
733 else if (aTy == Ity_I64) {
734 /* Same comment applies as in the Fast-scheme case. */
735 if (nextArgReg & 1)
736 nextArgReg++;
737 if (nextArgReg + 1 >= ARM_N_ARGREGS)
738 return False; /* out of argregs */
739 HReg raHi, raLo;
740 iselInt64Expr(&raHi, &raLo, env, args[i]);
741 tmpregs[nextArgReg] = raLo;
742 nextArgReg++;
743 tmpregs[nextArgReg] = raHi;
744 nextArgReg++;
746 else if (arg->tag == Iex_GSPTR) {
747 vassert(0); //ATC
748 tmpregs[nextArgReg] = hregARM_R8();
749 nextArgReg++;
751 else if (arg->tag == Iex_VECRET) {
752 // If this happens, it denotes ill-formed IR
753 vassert(0);
755 else
756 return False; /* unhandled arg type */
759 /* Now we can compute the condition. We can't do it earlier
760 because the argument computations could trash the condition
761 codes. Be a bit clever to handle the common case where the
762 guard is 1:Bit. */
763 cc = ARMcc_AL;
764 if (guard) {
765 if (guard->tag == Iex_Const
766 && guard->Iex.Const.con->tag == Ico_U1
767 && guard->Iex.Const.con->Ico.U1 == True) {
768 /* unconditional -- do nothing */
769 } else {
770 cc = iselCondCode( env, guard );
774 /* Move the args to their final destinations. */
775 for (i = 0; i < nextArgReg; i++) {
776 if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs
777 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
778 continue;
780 /* None of these insns, including any spill code that might
781 be generated, may alter the condition codes. */
782 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
787 /* Should be assured by checks above */
788 vassert(nextArgReg <= ARM_N_ARGREGS);
790 /* Do final checks, set the return values, and generate the call
791 instruction proper. */
792 vassert(nGSPTRs == 0 || nGSPTRs == 1);
793 vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
794 vassert(*stackAdjustAfterCall == 0);
795 vassert(is_RetLoc_INVALID(*retloc));
796 switch (retTy) {
797 case Ity_INVALID:
798 /* Function doesn't return a value. */
799 *retloc = mk_RetLoc_simple(RLPri_None);
800 break;
801 case Ity_I64:
802 *retloc = mk_RetLoc_simple(RLPri_2Int);
803 break;
804 case Ity_I32: case Ity_I16: case Ity_I8:
805 *retloc = mk_RetLoc_simple(RLPri_Int);
806 break;
807 case Ity_V128:
808 vassert(0); // ATC
809 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
810 *stackAdjustAfterCall = 16;
811 break;
812 case Ity_V256:
813 vassert(0); // ATC
814 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
815 *stackAdjustAfterCall = 32;
816 break;
817 default:
818 /* IR can denote other possible return types, but we don't
819 handle those here. */
820 vassert(0);
823 /* Finally, generate the call itself. This needs the *retloc value
824 set in the switch above, which is why it's at the end. */
826 /* nextArgReg doles out argument registers. Since these are
827 assigned in the order r0, r1, r2, r3, its numeric value at this
828 point, which must be between 0 and 4 inclusive, is going to be
829 equal to the number of arg regs in use for the call. Hence bake
830 that number into the call (we'll need to know it when doing
831 register allocation, to know what regs the call reads.)
833 There is a bit of a twist -- harmless but worth recording.
834 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
835 the first arg in r0 and the second in r3:r2, but r1 isn't used.
836 We nevertheless have nextArgReg==4 and bake that into the call
837 instruction. This will mean the register allocator wil believe
838 this insn reads r1 when in fact it doesn't. But that's
839 harmless; it just artificially extends the live range of r1
840 unnecessarily. The best fix would be to put into the
841 instruction, a bitmask indicating which of r0/1/2/3 carry live
842 values. But that's too much hassle. */
844 target = (Addr)cee->addr;
845 addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc ));
847 return True; /* success */
851 /*---------------------------------------------------------*/
852 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
853 /*---------------------------------------------------------*/
855 /* Select insns for an integer-typed expression, and add them to the
856 code list. Return a reg holding the result. This reg will be a
857 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
858 want to modify it, ask for a new vreg, copy it in there, and modify
859 the copy. The register allocator will do its best to map both
860 vregs to the same real register, so the copies will often disappear
861 later in the game.
863 This should handle expressions of 32, 16 and 8-bit type. All
864 results are returned in a 32-bit register. For 16- and 8-bit
865 expressions, the upper 16/24 bits are arbitrary, so you should mask
866 or sign extend partial values if necessary.
869 /* --------------------- AMode1 --------------------- */
871 /* Return an AMode1 which computes the value of the specified
872 expression, possibly also adding insns to the code list as a
873 result. The expression may only be a 32-bit one.
876 static Bool sane_AMode1 ( ARMAMode1* am )
878 switch (am->tag) {
879 case ARMam1_RI:
880 return
881 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
882 && (hregIsVirtual(am->ARMam1.RI.reg)
883 || sameHReg(am->ARMam1.RI.reg, hregARM_R8()))
884 && am->ARMam1.RI.simm13 >= -4095
885 && am->ARMam1.RI.simm13 <= 4095 );
886 case ARMam1_RRS:
887 return
888 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
889 && hregIsVirtual(am->ARMam1.RRS.base)
890 && hregClass(am->ARMam1.RRS.index) == HRcInt32
891 && hregIsVirtual(am->ARMam1.RRS.index)
892 && am->ARMam1.RRS.shift >= 0
893 && am->ARMam1.RRS.shift <= 3 );
894 default:
895 vpanic("sane_AMode: unknown ARM AMode1 tag");
899 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
901 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
902 vassert(sane_AMode1(am));
903 return am;
906 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
908 IRType ty = typeOfIRExpr(env->type_env,e);
909 vassert(ty == Ity_I32);
911 /* FIXME: add RRS matching */
913 /* {Add32,Sub32}(expr,simm13) */
914 if (e->tag == Iex_Binop
915 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
916 && e->Iex.Binop.arg2->tag == Iex_Const
917 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
918 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
919 if (simm >= -4095 && simm <= 4095) {
920 HReg reg;
921 if (e->Iex.Binop.op == Iop_Sub32)
922 simm = -simm;
923 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
924 return ARMAMode1_RI(reg, simm);
928 /* Doesn't match anything in particular. Generate it into
929 a register and use that. */
931 HReg reg = iselIntExpr_R(env, e);
932 return ARMAMode1_RI(reg, 0);
938 /* --------------------- AMode2 --------------------- */
940 /* Return an AMode2 which computes the value of the specified
941 expression, possibly also adding insns to the code list as a
942 result. The expression may only be a 32-bit one.
945 static Bool sane_AMode2 ( ARMAMode2* am )
947 switch (am->tag) {
948 case ARMam2_RI:
949 return
950 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
951 && hregIsVirtual(am->ARMam2.RI.reg)
952 && am->ARMam2.RI.simm9 >= -255
953 && am->ARMam2.RI.simm9 <= 255 );
954 case ARMam2_RR:
955 return
956 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
957 && hregIsVirtual(am->ARMam2.RR.base)
958 && hregClass(am->ARMam2.RR.index) == HRcInt32
959 && hregIsVirtual(am->ARMam2.RR.index) );
960 default:
961 vpanic("sane_AMode: unknown ARM AMode2 tag");
965 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
967 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
968 vassert(sane_AMode2(am));
969 return am;
972 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
974 IRType ty = typeOfIRExpr(env->type_env,e);
975 vassert(ty == Ity_I32);
977 /* FIXME: add RR matching */
979 /* {Add32,Sub32}(expr,simm8) */
980 if (e->tag == Iex_Binop
981 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
982 && e->Iex.Binop.arg2->tag == Iex_Const
983 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
984 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
985 if (simm >= -255 && simm <= 255) {
986 HReg reg;
987 if (e->Iex.Binop.op == Iop_Sub32)
988 simm = -simm;
989 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
990 return ARMAMode2_RI(reg, simm);
994 /* Doesn't match anything in particular. Generate it into
995 a register and use that. */
997 HReg reg = iselIntExpr_R(env, e);
998 return ARMAMode2_RI(reg, 0);
1004 /* --------------------- AModeV --------------------- */
1006 /* Return an AModeV which computes the value of the specified
1007 expression, possibly also adding insns to the code list as a
1008 result. The expression may only be a 32-bit one.
1011 static Bool sane_AModeV ( ARMAModeV* am )
1013 return toBool( hregClass(am->reg) == HRcInt32
1014 && hregIsVirtual(am->reg)
1015 && am->simm11 >= -1020 && am->simm11 <= 1020
1016 && 0 == (am->simm11 & 3) );
1019 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
1021 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
1022 vassert(sane_AModeV(am));
1023 return am;
1026 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
1028 IRType ty = typeOfIRExpr(env->type_env,e);
1029 vassert(ty == Ity_I32);
1031 /* {Add32,Sub32}(expr, simm8 << 2) */
1032 if (e->tag == Iex_Binop
1033 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
1034 && e->Iex.Binop.arg2->tag == Iex_Const
1035 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1036 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1037 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
1038 HReg reg;
1039 if (e->Iex.Binop.op == Iop_Sub32)
1040 simm = -simm;
1041 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
1042 return mkARMAModeV(reg, simm);
1046 /* Doesn't match anything in particular. Generate it into
1047 a register and use that. */
1049 HReg reg = iselIntExpr_R(env, e);
1050 return mkARMAModeV(reg, 0);
1055 /* -------------------- AModeN -------------------- */
1057 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
1059 return iselIntExpr_AModeN_wrk(env, e);
1062 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
1064 HReg reg = iselIntExpr_R(env, e);
1065 return mkARMAModeN_R(reg);
1069 /* --------------------- RI84 --------------------- */
1071 /* Select instructions to generate 'e' into a RI84. If mayInv is
1072 true, then the caller will also accept an I84 form that denotes
1073 'not e'. In this case didInv may not be NULL, and *didInv is set
1074 to True. This complication is so as to allow generation of an RI84
1075 which is suitable for use in either an AND or BIC instruction,
1076 without knowing (before this call) which one.
1078 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
1079 ISelEnv* env, IRExpr* e )
1081 ARMRI84* ri;
1082 if (mayInv)
1083 vassert(didInv != NULL);
1084 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
1085 /* sanity checks ... */
1086 switch (ri->tag) {
1087 case ARMri84_I84:
1088 return ri;
1089 case ARMri84_R:
1090 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
1091 vassert(hregIsVirtual(ri->ARMri84.R.reg));
1092 return ri;
1093 default:
1094 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1098 /* DO NOT CALL THIS DIRECTLY ! */
1099 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
1100 ISelEnv* env, IRExpr* e )
1102 IRType ty = typeOfIRExpr(env->type_env,e);
1103 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1105 if (didInv) *didInv = False;
1107 /* special case: immediate */
1108 if (e->tag == Iex_Const) {
1109 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
1110 switch (e->Iex.Const.con->tag) {
1111 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1112 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1113 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1114 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1116 if (fitsIn8x4(&u8, &u4, u)) {
1117 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1119 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
1120 vassert(didInv);
1121 *didInv = True;
1122 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1124 /* else fail, fall through to default case */
1127 /* default case: calculate into a register and return that */
1129 HReg r = iselIntExpr_R ( env, e );
1130 return ARMRI84_R(r);
1135 /* --------------------- RI5 --------------------- */
1137 /* Select instructions to generate 'e' into a RI5. */
1139 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
1141 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
1142 /* sanity checks ... */
1143 switch (ri->tag) {
1144 case ARMri5_I5:
1145 return ri;
1146 case ARMri5_R:
1147 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
1148 vassert(hregIsVirtual(ri->ARMri5.R.reg));
1149 return ri;
1150 default:
1151 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
1155 /* DO NOT CALL THIS DIRECTLY ! */
1156 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
1158 IRType ty = typeOfIRExpr(env->type_env,e);
1159 vassert(ty == Ity_I32 || ty == Ity_I8);
1161 /* special case: immediate */
1162 if (e->tag == Iex_Const) {
1163 UInt u; /* both invalid */
1164 switch (e->Iex.Const.con->tag) {
1165 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1166 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1167 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1168 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
1170 if (u >= 1 && u <= 31) {
1171 return ARMRI5_I5(u);
1173 /* else fail, fall through to default case */
1176 /* default case: calculate into a register and return that */
1178 HReg r = iselIntExpr_R ( env, e );
1179 return ARMRI5_R(r);
1184 /* ------------------- CondCode ------------------- */
1186 /* Generate code to evaluated a bit-typed expression, returning the
1187 condition code which would correspond when the expression would
1188 notionally have returned 1. */
1190 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1192 ARMCondCode cc = iselCondCode_wrk(env,e);
1193 vassert(cc != ARMcc_NV);
1194 return cc;
1197 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1199 vassert(e);
1200 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1202 /* var */
1203 if (e->tag == Iex_RdTmp) {
1204 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1205 /* CmpOrTst doesn't modify rTmp; so this is OK. */
1206 ARMRI84* one = ARMRI84_I84(1,0);
1207 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1208 return ARMcc_NE;
1211 /* Not1(e) */
1212 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1213 /* Generate code for the arg, and negate the test condition */
1214 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1217 /* --- patterns rooted at: 32to1 --- */
1219 if (e->tag == Iex_Unop
1220 && e->Iex.Unop.op == Iop_32to1) {
1221 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1222 ARMRI84* one = ARMRI84_I84(1,0);
1223 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1224 return ARMcc_NE;
1227 /* --- patterns rooted at: CmpNEZ8 --- */
1229 if (e->tag == Iex_Unop
1230 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1231 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1232 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
1233 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1234 return ARMcc_NE;
1237 /* --- patterns rooted at: CmpNEZ32 --- */
1239 if (e->tag == Iex_Unop
1240 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1241 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1242 ARMRI84* zero = ARMRI84_I84(0,0);
1243 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1244 return ARMcc_NE;
1247 /* --- patterns rooted at: CmpNEZ64 --- */
1249 if (e->tag == Iex_Unop
1250 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1251 HReg tHi, tLo;
1252 HReg tmp = newVRegI(env);
1253 ARMRI84* zero = ARMRI84_I84(0,0);
1254 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1255 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1256 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1257 return ARMcc_NE;
1260 /* --- Cmp*32*(x,y) --- */
1261 if (e->tag == Iex_Binop
1262 && (e->Iex.Binop.op == Iop_CmpEQ32
1263 || e->Iex.Binop.op == Iop_CmpNE32
1264 || e->Iex.Binop.op == Iop_CmpLT32S
1265 || e->Iex.Binop.op == Iop_CmpLT32U
1266 || e->Iex.Binop.op == Iop_CmpLE32S
1267 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1268 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1269 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1270 env, e->Iex.Binop.arg2);
1271 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1272 switch (e->Iex.Binop.op) {
1273 case Iop_CmpEQ32: return ARMcc_EQ;
1274 case Iop_CmpNE32: return ARMcc_NE;
1275 case Iop_CmpLT32S: return ARMcc_LT;
1276 case Iop_CmpLT32U: return ARMcc_LO;
1277 case Iop_CmpLE32S: return ARMcc_LE;
1278 case Iop_CmpLE32U: return ARMcc_LS;
1279 default: vpanic("iselCondCode(arm): CmpXX32");
1283 /* const */
1284 /* Constant 1:Bit */
1285 if (e->tag == Iex_Const) {
1286 HReg r;
1287 vassert(e->Iex.Const.con->tag == Ico_U1);
1288 vassert(e->Iex.Const.con->Ico.U1 == True
1289 || e->Iex.Const.con->Ico.U1 == False);
1290 r = newVRegI(env);
1291 addInstr(env, ARMInstr_Imm32(r, 0));
1292 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1293 return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1296 /* --- And1(x,y), Or1(x,y) --- */
1297 /* FIXME: We could (and probably should) do a lot better here, by using the
1298 iselCondCode_C/_R scheme used in the amd64 insn selector. */
1299 if (e->tag == Iex_Binop
1300 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1301 HReg x_as_32 = newVRegI(env);
1302 ARMCondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1);
1303 addInstr(env, ARMInstr_Mov(x_as_32, ARMRI84_I84(0,0)));
1304 addInstr(env, ARMInstr_CMov(cc_x, x_as_32, ARMRI84_I84(1,0)));
1306 HReg y_as_32 = newVRegI(env);
1307 ARMCondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2);
1308 addInstr(env, ARMInstr_Mov(y_as_32, ARMRI84_I84(0,0)));
1309 addInstr(env, ARMInstr_CMov(cc_y, y_as_32, ARMRI84_I84(1,0)));
1311 HReg tmp = newVRegI(env);
1312 ARMAluOp aop = e->Iex.Binop.op == Iop_And1 ? ARMalu_AND : ARMalu_OR;
1313 addInstr(env, ARMInstr_Alu(aop, tmp, x_as_32, ARMRI84_R(y_as_32)));
1315 ARMRI84* one = ARMRI84_I84(1,0);
1316 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, tmp, one));
1317 return ARMcc_NE;
1320 // JRS 2013-Jan-03: this seems completely nonsensical
1321 /* --- CasCmpEQ* --- */
1322 /* Ist_Cas has a dummy argument to compare with, so comparison is
1323 always true. */
1324 //if (e->tag == Iex_Binop
1325 // && (e->Iex.Binop.op == Iop_CasCmpEQ32
1326 // || e->Iex.Binop.op == Iop_CasCmpEQ16
1327 // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1328 // return ARMcc_AL;
1331 ppIRExpr(e);
1332 vpanic("iselCondCode");
1336 /* --------------------- Reg --------------------- */
1338 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1340 HReg r = iselIntExpr_R_wrk(env, e);
1341 /* sanity checks ... */
1342 # if 0
1343 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1344 # endif
1345 vassert(hregClass(r) == HRcInt32);
1346 vassert(hregIsVirtual(r));
1347 return r;
1350 /* DO NOT CALL THIS DIRECTLY ! */
1351 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1353 IRType ty = typeOfIRExpr(env->type_env,e);
1354 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1356 switch (e->tag) {
1358 /* --------- TEMP --------- */
1359 case Iex_RdTmp: {
1360 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1363 /* --------- LOAD --------- */
1364 case Iex_Load: {
1365 HReg dst = newVRegI(env);
1367 if (e->Iex.Load.end != Iend_LE)
1368 goto irreducible;
1370 if (ty == Ity_I32) {
1371 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1372 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
1373 return dst;
1375 if (ty == Ity_I16) {
1376 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1377 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
1378 True/*isLoad*/, False/*!signedLoad*/,
1379 dst, amode));
1380 return dst;
1382 if (ty == Ity_I8) {
1383 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1384 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
1385 return dst;
1387 break;
1390 //zz /* --------- TERNARY OP --------- */
1391 //zz case Iex_Triop: {
1392 //zz IRTriop *triop = e->Iex.Triop.details;
1393 //zz /* C3210 flags following FPU partial remainder (fprem), both
1394 //zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1395 //zz if (triop->op == Iop_PRemC3210F64
1396 //zz || triop->op == Iop_PRem1C3210F64) {
1397 //zz HReg junk = newVRegF(env);
1398 //zz HReg dst = newVRegI(env);
1399 //zz HReg srcL = iselDblExpr(env, triop->arg2);
1400 //zz HReg srcR = iselDblExpr(env, triop->arg3);
1401 //zz /* XXXROUNDINGFIXME */
1402 //zz /* set roundingmode here */
1403 //zz addInstr(env, X86Instr_FpBinary(
1404 //zz e->Iex.Binop.op==Iop_PRemC3210F64
1405 //zz ? Xfp_PREM : Xfp_PREM1,
1406 //zz srcL,srcR,junk
1407 //zz ));
1408 //zz /* The previous pseudo-insn will have left the FPU's C3210
1409 //zz flags set correctly. So bag them. */
1410 //zz addInstr(env, X86Instr_FpStSW_AX());
1411 //zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1412 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1413 //zz return dst;
1414 //zz }
1415 //zz
1416 //zz break;
1417 //zz }
1419 /* --------- BINARY OP --------- */
1420 case Iex_Binop: {
1422 ARMAluOp aop = 0; /* invalid */
1423 ARMShiftOp sop = 0; /* invalid */
1425 /* ADD/SUB/AND/OR/XOR */
1426 switch (e->Iex.Binop.op) {
1427 case Iop_And32: {
1428 Bool didInv = False;
1429 HReg dst = newVRegI(env);
1430 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1431 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1432 env, e->Iex.Binop.arg2);
1433 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1434 dst, argL, argR));
1435 return dst;
1437 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1438 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1439 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1440 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1441 std_binop: {
1442 HReg dst = newVRegI(env);
1443 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1444 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1445 env, e->Iex.Binop.arg2);
1446 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1447 return dst;
1449 default: break;
1452 /* SHL/SHR/SAR */
1453 switch (e->Iex.Binop.op) {
1454 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1455 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1456 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1457 sh_binop: {
1458 HReg dst = newVRegI(env);
1459 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1460 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1461 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1462 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1463 return dst;
1465 default: break;
1468 /* MUL */
1469 if (e->Iex.Binop.op == Iop_Mul32) {
1470 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1471 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1472 HReg dst = newVRegI(env);
1473 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1474 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1475 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1476 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1477 return dst;
1480 /* Handle misc other ops. */
1482 if (e->Iex.Binop.op == Iop_Max32U) {
1483 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1484 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1485 HReg dst = newVRegI(env);
1486 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1487 ARMRI84_R(argR)));
1488 addInstr(env, mk_iMOVds_RR(dst, argL));
1489 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1490 return dst;
1493 if (e->Iex.Binop.op == Iop_CmpF64) {
1494 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1495 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1496 HReg dst = newVRegI(env);
1497 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1498 FMSTAT, so we can examine the results directly. */
1499 addInstr(env, ARMInstr_VCmpD(dL, dR));
1500 /* Create in dst, the IRCmpF64Result encoded result. */
1501 addInstr(env, ARMInstr_Imm32(dst, 0));
1502 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1503 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1504 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1505 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1506 return dst;
1509 if (e->Iex.Binop.op == Iop_F64toI32S
1510 || e->Iex.Binop.op == Iop_F64toI32U) {
1511 /* Wretched uglyness all round, due to having to deal
1512 with rounding modes. Oh well. */
1513 /* FIXME: if arg1 is a constant indicating round-to-zero,
1514 then we could skip all this arsing around with FPSCR and
1515 simply emit FTO{S,U}IZD. */
1516 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1517 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1518 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1519 /* FTO{S,U}ID valF, valD */
1520 HReg valF = newVRegF(env);
1521 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1522 valF, valD));
1523 set_VFP_rounding_default(env);
1524 /* VMOV dst, valF */
1525 HReg dst = newVRegI(env);
1526 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1527 return dst;
1530 if (e->Iex.Binop.op == Iop_GetElem8x8
1531 || e->Iex.Binop.op == Iop_GetElem16x4
1532 || e->Iex.Binop.op == Iop_GetElem32x2) {
1533 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1534 HReg res = newVRegI(env);
1535 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1536 UInt index, size;
1537 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1538 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1539 vpanic("ARM target supports GetElem with constant "
1540 "second argument only (neon)\n");
1542 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1543 switch (e->Iex.Binop.op) {
1544 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1545 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1546 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1547 default: vassert(0);
1549 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1550 mkARMNRS(ARMNRS_Reg, res, 0),
1551 mkARMNRS(ARMNRS_Scalar, arg, index),
1552 size, False));
1553 return res;
1557 if (e->Iex.Binop.op == Iop_GetElem32x2
1558 && e->Iex.Binop.arg2->tag == Iex_Const
1559 && !(env->hwcaps & VEX_HWCAPS_ARM_NEON)) {
1560 /* We may have to do GetElem32x2 on a non-NEON capable
1561 target. */
1562 IRConst* con = e->Iex.Binop.arg2->Iex.Const.con;
1563 vassert(con->tag == Ico_U8); /* else IR is ill-typed */
1564 UInt index = con->Ico.U8;
1565 if (index >= 0 && index <= 1) {
1566 HReg rHi, rLo;
1567 iselInt64Expr(&rHi, &rLo, env, e->Iex.Binop.arg1);
1568 return index == 0 ? rLo : rHi;
1572 if (e->Iex.Binop.op == Iop_GetElem8x16
1573 || e->Iex.Binop.op == Iop_GetElem16x8
1574 || e->Iex.Binop.op == Iop_GetElem32x4) {
1575 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1576 HReg res = newVRegI(env);
1577 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1578 UInt index, size;
1579 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1580 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1581 vpanic("ARM target supports GetElem with constant "
1582 "second argument only (neon)\n");
1584 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1585 switch (e->Iex.Binop.op) {
1586 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1587 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1588 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1589 default: vassert(0);
1591 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1592 mkARMNRS(ARMNRS_Reg, res, 0),
1593 mkARMNRS(ARMNRS_Scalar, arg, index),
1594 size, True));
1595 return res;
1599 /* All cases involving host-side helper calls. */
1600 void* fn = NULL;
1601 switch (e->Iex.Binop.op) {
1602 case Iop_Add16x2:
1603 fn = &h_generic_calc_Add16x2; break;
1604 case Iop_Sub16x2:
1605 fn = &h_generic_calc_Sub16x2; break;
1606 case Iop_HAdd16Ux2:
1607 fn = &h_generic_calc_HAdd16Ux2; break;
1608 case Iop_HAdd16Sx2:
1609 fn = &h_generic_calc_HAdd16Sx2; break;
1610 case Iop_HSub16Ux2:
1611 fn = &h_generic_calc_HSub16Ux2; break;
1612 case Iop_HSub16Sx2:
1613 fn = &h_generic_calc_HSub16Sx2; break;
1614 case Iop_QAdd16Sx2:
1615 fn = &h_generic_calc_QAdd16Sx2; break;
1616 case Iop_QAdd16Ux2:
1617 fn = &h_generic_calc_QAdd16Ux2; break;
1618 case Iop_QSub16Sx2:
1619 fn = &h_generic_calc_QSub16Sx2; break;
1620 case Iop_Add8x4:
1621 fn = &h_generic_calc_Add8x4; break;
1622 case Iop_Sub8x4:
1623 fn = &h_generic_calc_Sub8x4; break;
1624 case Iop_HAdd8Ux4:
1625 fn = &h_generic_calc_HAdd8Ux4; break;
1626 case Iop_HAdd8Sx4:
1627 fn = &h_generic_calc_HAdd8Sx4; break;
1628 case Iop_HSub8Ux4:
1629 fn = &h_generic_calc_HSub8Ux4; break;
1630 case Iop_HSub8Sx4:
1631 fn = &h_generic_calc_HSub8Sx4; break;
1632 case Iop_QAdd8Sx4:
1633 fn = &h_generic_calc_QAdd8Sx4; break;
1634 case Iop_QAdd8Ux4:
1635 fn = &h_generic_calc_QAdd8Ux4; break;
1636 case Iop_QSub8Sx4:
1637 fn = &h_generic_calc_QSub8Sx4; break;
1638 case Iop_QSub8Ux4:
1639 fn = &h_generic_calc_QSub8Ux4; break;
1640 case Iop_Sad8Ux4:
1641 fn = &h_generic_calc_Sad8Ux4; break;
1642 case Iop_QAdd32S:
1643 fn = &h_generic_calc_QAdd32S; break;
1644 case Iop_QSub32S:
1645 fn = &h_generic_calc_QSub32S; break;
1646 case Iop_QSub16Ux2:
1647 fn = &h_generic_calc_QSub16Ux2; break;
1648 case Iop_DivU32:
1649 fn = &h_calc_udiv32_w_arm_semantics; break;
1650 case Iop_DivS32:
1651 fn = &h_calc_sdiv32_w_arm_semantics; break;
1652 default:
1653 break;
1656 if (fn) {
1657 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1658 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1659 HReg res = newVRegI(env);
1660 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1661 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1662 addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1663 2, mk_RetLoc_simple(RLPri_Int) ));
1664 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1665 return res;
1668 break;
1671 /* --------- UNARY OP --------- */
1672 case Iex_Unop: {
1674 //zz /* 1Uto8(32to1(expr32)) */
1675 //zz if (e->Iex.Unop.op == Iop_1Uto8) {
1676 //zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1677 //zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1678 //zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1679 //zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1680 //zz const IRExpr* expr32 = mi.bindee[0];
1681 //zz HReg dst = newVRegI(env);
1682 //zz HReg src = iselIntExpr_R(env, expr32);
1683 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1684 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1685 //zz X86RMI_Imm(1), dst));
1686 //zz return dst;
1687 //zz }
1688 //zz }
1689 //zz
1690 //zz /* 8Uto32(LDle(expr32)) */
1691 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1692 //zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1693 //zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1694 //zz unop(Iop_8Uto32,
1695 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1696 //zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1697 //zz HReg dst = newVRegI(env);
1698 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1699 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1700 //zz return dst;
1701 //zz }
1702 //zz }
1703 //zz
1704 //zz /* 8Sto32(LDle(expr32)) */
1705 //zz if (e->Iex.Unop.op == Iop_8Sto32) {
1706 //zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1707 //zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1708 //zz unop(Iop_8Sto32,
1709 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1710 //zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1711 //zz HReg dst = newVRegI(env);
1712 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1713 //zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1714 //zz return dst;
1715 //zz }
1716 //zz }
1717 //zz
1718 //zz /* 16Uto32(LDle(expr32)) */
1719 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1720 //zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1721 //zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1722 //zz unop(Iop_16Uto32,
1723 //zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1724 //zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1725 //zz HReg dst = newVRegI(env);
1726 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1727 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1728 //zz return dst;
1729 //zz }
1730 //zz }
1731 //zz
1732 //zz /* 8Uto32(GET:I8) */
1733 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1734 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1735 //zz HReg dst;
1736 //zz X86AMode* amode;
1737 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1738 //zz dst = newVRegI(env);
1739 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1740 //zz hregX86_EBP());
1741 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1742 //zz return dst;
1743 //zz }
1744 //zz }
1745 //zz
1746 //zz /* 16to32(GET:I16) */
1747 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1748 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1749 //zz HReg dst;
1750 //zz X86AMode* amode;
1751 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1752 //zz dst = newVRegI(env);
1753 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1754 //zz hregX86_EBP());
1755 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1756 //zz return dst;
1757 //zz }
1758 //zz }
1760 switch (e->Iex.Unop.op) {
1761 case Iop_8Uto32: {
1762 HReg dst = newVRegI(env);
1763 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1764 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1765 dst, src, ARMRI84_I84(0xFF,0)));
1766 return dst;
1768 //zz case Iop_8Uto16:
1769 //zz case Iop_8Uto32:
1770 //zz case Iop_16Uto32: {
1771 //zz HReg dst = newVRegI(env);
1772 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1773 //zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1774 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1775 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1776 //zz X86RMI_Imm(mask), dst));
1777 //zz return dst;
1778 //zz }
1779 //zz case Iop_8Sto16:
1780 //zz case Iop_8Sto32:
1781 case Iop_16Uto32: {
1782 HReg dst = newVRegI(env);
1783 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1784 ARMRI5* amt = ARMRI5_I5(16);
1785 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1786 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1787 return dst;
1789 case Iop_8Sto32:
1790 case Iop_16Sto32: {
1791 HReg dst = newVRegI(env);
1792 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1793 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1794 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1795 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1796 return dst;
1798 //zz case Iop_Not8:
1799 //zz case Iop_Not16:
1800 case Iop_Not32: {
1801 HReg dst = newVRegI(env);
1802 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1803 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1804 return dst;
1806 case Iop_64HIto32: {
1807 HReg rHi, rLo;
1808 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1809 return rHi; /* and abandon rLo .. poor wee thing :-) */
1811 case Iop_64to32: {
1812 HReg rHi, rLo;
1813 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1814 return rLo; /* similar stupid comment to the above ... */
1816 case Iop_64to8: {
1817 HReg rHi, rLo;
1818 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1819 HReg tHi = newVRegI(env);
1820 HReg tLo = newVRegI(env);
1821 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1822 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1823 rHi = tHi;
1824 rLo = tLo;
1825 } else {
1826 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1828 return rLo;
1831 case Iop_1Uto32:
1832 /* 1Uto32(tmp). Since I1 values generated into registers
1833 are guaranteed to have value either only zero or one,
1834 we can simply return the value of the register in this
1835 case. */
1836 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1837 HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1838 return dst;
1840 /* else fall through */
1841 case Iop_1Uto8: {
1842 HReg dst = newVRegI(env);
1843 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1844 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1845 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1846 return dst;
1849 case Iop_1Sto32: {
1850 HReg dst = newVRegI(env);
1851 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1852 ARMRI5* amt = ARMRI5_I5(31);
1853 /* This is really rough. We could do much better here;
1854 perhaps mvn{cond} dst, #0 as the second insn?
1855 (same applies to 1Sto64) */
1856 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1857 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1858 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1859 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1860 return dst;
1864 //zz case Iop_1Sto8:
1865 //zz case Iop_1Sto16:
1866 //zz case Iop_1Sto32: {
1867 //zz /* could do better than this, but for now ... */
1868 //zz HReg dst = newVRegI(env);
1869 //zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1870 //zz addInstr(env, X86Instr_Set32(cond,dst));
1871 //zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1872 //zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1873 //zz return dst;
1874 //zz }
1875 //zz case Iop_Ctz32: {
1876 //zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1877 //zz HReg dst = newVRegI(env);
1878 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1879 //zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1880 //zz return dst;
1881 //zz }
1882 case Iop_Clz32: {
1883 /* Count leading zeroes; easy on ARM. */
1884 HReg dst = newVRegI(env);
1885 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1886 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1887 return dst;
1890 case Iop_CmpwNEZ32: {
1891 HReg dst = newVRegI(env);
1892 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1893 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1894 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1895 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1896 return dst;
1899 case Iop_Left32: {
1900 HReg dst = newVRegI(env);
1901 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1902 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1903 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1904 return dst;
1907 //zz case Iop_V128to32: {
1908 //zz HReg dst = newVRegI(env);
1909 //zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1910 //zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1911 //zz sub_from_esp(env, 16);
1912 //zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1913 //zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1914 //zz add_to_esp(env, 16);
1915 //zz return dst;
1916 //zz }
1917 //zz
1918 case Iop_ReinterpF32asI32: {
1919 HReg dst = newVRegI(env);
1920 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1921 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1922 return dst;
1925 //zz
1926 //zz case Iop_16to8:
1927 case Iop_32to8:
1928 case Iop_32to16:
1929 /* These are no-ops. */
1930 return iselIntExpr_R(env, e->Iex.Unop.arg);
1932 default:
1933 break;
1936 /* All Unop cases involving host-side helper calls. */
1937 void* fn = NULL;
1938 switch (e->Iex.Unop.op) {
1939 case Iop_CmpNEZ16x2:
1940 fn = &h_generic_calc_CmpNEZ16x2; break;
1941 case Iop_CmpNEZ8x4:
1942 fn = &h_generic_calc_CmpNEZ8x4; break;
1943 default:
1944 break;
1947 if (fn) {
1948 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1949 HReg res = newVRegI(env);
1950 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1951 addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1952 1, mk_RetLoc_simple(RLPri_Int) ));
1953 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1954 return res;
1957 break;
1960 /* --------- GET --------- */
1961 case Iex_Get: {
1962 if (ty == Ity_I32
1963 && 0 == (e->Iex.Get.offset & 3)
1964 && e->Iex.Get.offset < 4096-4) {
1965 HReg dst = newVRegI(env);
1966 addInstr(env, ARMInstr_LdSt32(
1967 ARMcc_AL, True/*isLoad*/,
1968 dst,
1969 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1970 return dst;
1972 //zz if (ty == Ity_I8 || ty == Ity_I16) {
1973 //zz HReg dst = newVRegI(env);
1974 //zz addInstr(env, X86Instr_LoadEX(
1975 //zz toUChar(ty==Ity_I8 ? 1 : 2),
1976 //zz False,
1977 //zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1978 //zz dst));
1979 //zz return dst;
1980 //zz }
1981 break;
1984 //zz case Iex_GetI: {
1985 //zz X86AMode* am
1986 //zz = genGuestArrayOffset(
1987 //zz env, e->Iex.GetI.descr,
1988 //zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1989 //zz HReg dst = newVRegI(env);
1990 //zz if (ty == Ity_I8) {
1991 //zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1992 //zz return dst;
1993 //zz }
1994 //zz if (ty == Ity_I32) {
1995 //zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1996 //zz return dst;
1997 //zz }
1998 //zz break;
1999 //zz }
2001 /* --------- CCALL --------- */
2002 case Iex_CCall: {
2003 HReg dst = newVRegI(env);
2004 vassert(ty == e->Iex.CCall.retty);
2006 /* be very restrictive for now. Only 32/64-bit ints allowed for
2007 args, and 32 bits for return type. Don't forget to change
2008 the RetLoc if more types are allowed in future. */
2009 if (e->Iex.CCall.retty != Ity_I32)
2010 goto irreducible;
2012 /* Marshal args, do the call, clear stack. */
2013 UInt addToSp = 0;
2014 RetLoc rloc = mk_RetLoc_INVALID();
2015 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2016 e->Iex.CCall.cee, e->Iex.CCall.retty,
2017 e->Iex.CCall.args );
2018 /* */
2019 if (ok) {
2020 vassert(is_sane_RetLoc(rloc));
2021 vassert(rloc.pri == RLPri_Int);
2022 vassert(addToSp == 0);
2023 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
2024 return dst;
2026 goto irreducible;
2029 /* --------- LITERAL --------- */
2030 /* 32 literals */
2031 case Iex_Const: {
2032 UInt u = 0;
2033 HReg dst = newVRegI(env);
2034 switch (e->Iex.Const.con->tag) {
2035 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2036 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
2037 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
2038 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
2040 addInstr(env, ARMInstr_Imm32(dst, u));
2041 return dst;
2044 /* --------- MULTIPLEX --------- */
2045 case Iex_ITE: { // VFD
2046 /* ITE(ccexpr, iftrue, iffalse) */
2047 if (ty == Ity_I32) {
2048 ARMCondCode cc;
2049 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2050 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse);
2051 HReg dst = newVRegI(env);
2052 addInstr(env, mk_iMOVds_RR(dst, r1));
2053 cc = iselCondCode(env, e->Iex.ITE.cond);
2054 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
2055 return dst;
2057 break;
2060 default:
2061 break;
2062 } /* switch (e->tag) */
2064 /* We get here if no pattern matched. */
2065 irreducible:
2066 ppIRExpr(e);
2067 vpanic("iselIntExpr_R: cannot reduce tree");
2071 /* -------------------- 64-bit -------------------- */
2073 /* Compute a 64-bit value into a register pair, which is returned as
2074 the first two parameters. As with iselIntExpr_R, these may be
2075 either real or virtual regs; in any case they must not be changed
2076 by subsequent code emitted by the caller. */
2078 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2079 const IRExpr* e )
2081 iselInt64Expr_wrk(rHi, rLo, env, e);
2082 # if 0
2083 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2084 # endif
2085 vassert(hregClass(*rHi) == HRcInt32);
2086 vassert(hregIsVirtual(*rHi));
2087 vassert(hregClass(*rLo) == HRcInt32);
2088 vassert(hregIsVirtual(*rLo));
2091 /* DO NOT CALL THIS DIRECTLY ! */
2092 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2093 const IRExpr* e )
2095 vassert(e);
2096 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2098 /* 64-bit literal */
2099 if (e->tag == Iex_Const) {
2100 ULong w64 = e->Iex.Const.con->Ico.U64;
2101 UInt wHi = toUInt(w64 >> 32);
2102 UInt wLo = toUInt(w64);
2103 HReg tHi = newVRegI(env);
2104 HReg tLo = newVRegI(env);
2105 vassert(e->Iex.Const.con->tag == Ico_U64);
2106 addInstr(env, ARMInstr_Imm32(tHi, wHi));
2107 addInstr(env, ARMInstr_Imm32(tLo, wLo));
2108 *rHi = tHi;
2109 *rLo = tLo;
2110 return;
2113 /* read 64-bit IRTemp */
2114 if (e->tag == Iex_RdTmp) {
2115 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2116 HReg tHi = newVRegI(env);
2117 HReg tLo = newVRegI(env);
2118 HReg tmp = iselNeon64Expr(env, e);
2119 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2120 *rHi = tHi;
2121 *rLo = tLo;
2122 } else {
2123 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2125 return;
2128 /* 64-bit load */
2129 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2130 HReg tLo, tHi, rA;
2131 vassert(e->Iex.Load.ty == Ity_I64);
2132 rA = iselIntExpr_R(env, e->Iex.Load.addr);
2133 tHi = newVRegI(env);
2134 tLo = newVRegI(env);
2135 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2136 tHi, ARMAMode1_RI(rA, 4)));
2137 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2138 tLo, ARMAMode1_RI(rA, 0)));
2139 *rHi = tHi;
2140 *rLo = tLo;
2141 return;
2144 /* 64-bit GET */
2145 if (e->tag == Iex_Get) {
2146 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
2147 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
2148 HReg tHi = newVRegI(env);
2149 HReg tLo = newVRegI(env);
2150 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
2151 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
2152 *rHi = tHi;
2153 *rLo = tLo;
2154 return;
2157 /* --------- BINARY ops --------- */
2158 if (e->tag == Iex_Binop) {
2159 switch (e->Iex.Binop.op) {
2161 /* 32 x 32 -> 64 multiply */
2162 case Iop_MullS32:
2163 case Iop_MullU32: {
2164 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2165 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2166 HReg tHi = newVRegI(env);
2167 HReg tLo = newVRegI(env);
2168 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
2169 ? ARMmul_SX : ARMmul_ZX;
2170 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2171 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2172 addInstr(env, ARMInstr_Mul(mop));
2173 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2174 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2175 *rHi = tHi;
2176 *rLo = tLo;
2177 return;
2180 case Iop_Or64: {
2181 HReg xLo, xHi, yLo, yHi;
2182 HReg tHi = newVRegI(env);
2183 HReg tLo = newVRegI(env);
2184 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2185 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2186 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2187 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2188 *rHi = tHi;
2189 *rLo = tLo;
2190 return;
2193 case Iop_Add64: {
2194 HReg xLo, xHi, yLo, yHi;
2195 HReg tHi = newVRegI(env);
2196 HReg tLo = newVRegI(env);
2197 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2198 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2199 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2200 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
2201 *rHi = tHi;
2202 *rLo = tLo;
2203 return;
2206 /* 32HLto64(e1,e2) */
2207 case Iop_32HLto64: {
2208 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2209 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2210 return;
2213 default:
2214 break;
2218 /* --------- UNARY ops --------- */
2219 if (e->tag == Iex_Unop) {
2220 switch (e->Iex.Unop.op) {
2222 /* ReinterpF64asI64 */
2223 case Iop_ReinterpF64asI64: {
2224 HReg dstHi = newVRegI(env);
2225 HReg dstLo = newVRegI(env);
2226 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2227 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2228 *rHi = dstHi;
2229 *rLo = dstLo;
2230 return;
2233 /* Left64(e) */
2234 case Iop_Left64: {
2235 HReg yLo, yHi;
2236 HReg tHi = newVRegI(env);
2237 HReg tLo = newVRegI(env);
2238 HReg zero = newVRegI(env);
2239 /* yHi:yLo = arg */
2240 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2241 /* zero = 0 */
2242 addInstr(env, ARMInstr_Imm32(zero, 0));
2243 /* tLo = 0 - yLo, and set carry */
2244 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2245 tLo, zero, ARMRI84_R(yLo)));
2246 /* tHi = 0 - yHi - carry */
2247 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2248 tHi, zero, ARMRI84_R(yHi)));
2249 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2250 back in, so as to give the final result
2251 tHi:tLo = arg | -arg. */
2252 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2253 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2254 *rHi = tHi;
2255 *rLo = tLo;
2256 return;
2259 /* CmpwNEZ64(e) */
2260 case Iop_CmpwNEZ64: {
2261 HReg srcLo, srcHi;
2262 HReg tmp1 = newVRegI(env);
2263 HReg tmp2 = newVRegI(env);
2264 /* srcHi:srcLo = arg */
2265 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2266 /* tmp1 = srcHi | srcLo */
2267 addInstr(env, ARMInstr_Alu(ARMalu_OR,
2268 tmp1, srcHi, ARMRI84_R(srcLo)));
2269 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2270 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2271 addInstr(env, ARMInstr_Alu(ARMalu_OR,
2272 tmp2, tmp2, ARMRI84_R(tmp1)));
2273 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2274 tmp2, tmp2, ARMRI5_I5(31)));
2275 *rHi = tmp2;
2276 *rLo = tmp2;
2277 return;
2280 case Iop_1Sto64: {
2281 HReg dst = newVRegI(env);
2282 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2283 ARMRI5* amt = ARMRI5_I5(31);
2284 /* This is really rough. We could do much better here;
2285 perhaps mvn{cond} dst, #0 as the second insn?
2286 (same applies to 1Sto32) */
2287 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2288 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2289 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2290 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2291 *rHi = dst;
2292 *rLo = dst;
2293 return;
2296 default:
2297 break;
2299 } /* if (e->tag == Iex_Unop) */
2301 /* --------- MULTIPLEX --------- */
2302 if (e->tag == Iex_ITE) { // VFD
2303 IRType tyC;
2304 HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2305 ARMCondCode cc;
2306 tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2307 vassert(tyC == Ity_I1);
2308 iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2309 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2310 dstHi = newVRegI(env);
2311 dstLo = newVRegI(env);
2312 addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2313 addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2314 cc = iselCondCode(env, e->Iex.ITE.cond);
2315 addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2316 addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2317 *rHi = dstHi;
2318 *rLo = dstLo;
2319 return;
2322 /* It is convenient sometimes to call iselInt64Expr even when we
2323 have NEON support (e.g. in do_helper_call we need 64-bit
2324 arguments as 2 x 32 regs). */
2325 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2326 HReg tHi = newVRegI(env);
2327 HReg tLo = newVRegI(env);
2328 HReg tmp = iselNeon64Expr(env, e);
2329 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2330 *rHi = tHi;
2331 *rLo = tLo;
2332 return ;
2335 ppIRExpr(e);
2336 vpanic("iselInt64Expr");
2340 /*---------------------------------------------------------*/
2341 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2342 /*---------------------------------------------------------*/
2344 static HReg iselNeon64Expr ( ISelEnv* env, const IRExpr* e )
2346 HReg r;
2347 vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
2348 r = iselNeon64Expr_wrk( env, e );
2349 vassert(hregClass(r) == HRcFlt64);
2350 vassert(hregIsVirtual(r));
2351 return r;
2354 /* DO NOT CALL THIS DIRECTLY */
2355 static HReg iselNeon64Expr_wrk ( ISelEnv* env, const IRExpr* e )
2357 IRType ty = typeOfIRExpr(env->type_env, e);
2358 MatchInfo mi;
2359 vassert(e);
2360 vassert(ty == Ity_I64);
2362 if (e->tag == Iex_RdTmp) {
2363 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2366 if (e->tag == Iex_Const) {
2367 HReg rLo, rHi;
2368 HReg res = newVRegD(env);
2369 iselInt64Expr(&rHi, &rLo, env, e);
2370 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2371 return res;
2374 /* 64-bit load */
2375 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2376 HReg res = newVRegD(env);
2377 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2378 vassert(ty == Ity_I64);
2379 addInstr(env, ARMInstr_NLdStD(True, res, am));
2380 return res;
2383 /* 64-bit GET */
2384 if (e->tag == Iex_Get) {
2385 HReg addr = newVRegI(env);
2386 HReg res = newVRegD(env);
2387 vassert(ty == Ity_I64);
2388 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2389 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2390 return res;
2393 /* --------- BINARY ops --------- */
2394 if (e->tag == Iex_Binop) {
2395 switch (e->Iex.Binop.op) {
2397 /* 32 x 32 -> 64 multiply */
2398 case Iop_MullS32:
2399 case Iop_MullU32: {
2400 HReg rLo, rHi;
2401 HReg res = newVRegD(env);
2402 iselInt64Expr(&rHi, &rLo, env, e);
2403 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2404 return res;
2407 case Iop_And64: {
2408 HReg res = newVRegD(env);
2409 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2410 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2411 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2412 res, argL, argR, 4, False));
2413 return res;
2415 case Iop_Or64: {
2416 HReg res = newVRegD(env);
2417 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2418 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2419 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2420 res, argL, argR, 4, False));
2421 return res;
2423 case Iop_Xor64: {
2424 HReg res = newVRegD(env);
2425 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2426 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2427 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2428 res, argL, argR, 4, False));
2429 return res;
2432 /* 32HLto64(e1,e2) */
2433 case Iop_32HLto64: {
2434 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2435 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2436 HReg res = newVRegD(env);
2437 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2438 return res;
2441 case Iop_Add8x8:
2442 case Iop_Add16x4:
2443 case Iop_Add32x2:
2444 case Iop_Add64: {
2445 HReg res = newVRegD(env);
2446 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2447 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2448 UInt size;
2449 switch (e->Iex.Binop.op) {
2450 case Iop_Add8x8: size = 0; break;
2451 case Iop_Add16x4: size = 1; break;
2452 case Iop_Add32x2: size = 2; break;
2453 case Iop_Add64: size = 3; break;
2454 default: vassert(0);
2456 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2457 res, argL, argR, size, False));
2458 return res;
2460 case Iop_Add32Fx2: {
2461 HReg res = newVRegD(env);
2462 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2463 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2464 UInt size = 0;
2465 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2466 res, argL, argR, size, False));
2467 return res;
2469 case Iop_RecipStep32Fx2: {
2470 HReg res = newVRegD(env);
2471 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2472 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2473 UInt size = 0;
2474 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2475 res, argL, argR, size, False));
2476 return res;
2478 case Iop_RSqrtStep32Fx2: {
2479 HReg res = newVRegD(env);
2480 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2481 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2482 UInt size = 0;
2483 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2484 res, argL, argR, size, False));
2485 return res;
2488 // These 6 verified 18 Apr 2013
2489 case Iop_InterleaveHI32x2:
2490 case Iop_InterleaveLO32x2:
2491 case Iop_InterleaveOddLanes8x8:
2492 case Iop_InterleaveEvenLanes8x8:
2493 case Iop_InterleaveOddLanes16x4:
2494 case Iop_InterleaveEvenLanes16x4: {
2495 HReg rD = newVRegD(env);
2496 HReg rM = newVRegD(env);
2497 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2498 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2499 UInt size;
2500 Bool resRd; // is the result in rD or rM ?
2501 switch (e->Iex.Binop.op) {
2502 case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break;
2503 case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break;
2504 case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break;
2505 case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break;
2506 case Iop_InterleaveHI32x2: resRd = False; size = 2; break;
2507 case Iop_InterleaveLO32x2: resRd = True; size = 2; break;
2508 default: vassert(0);
2510 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2511 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2512 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2513 return resRd ? rD : rM;
2516 // These 4 verified 18 Apr 2013
2517 case Iop_InterleaveHI8x8:
2518 case Iop_InterleaveLO8x8:
2519 case Iop_InterleaveHI16x4:
2520 case Iop_InterleaveLO16x4: {
2521 HReg rD = newVRegD(env);
2522 HReg rM = newVRegD(env);
2523 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2524 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2525 UInt size;
2526 Bool resRd; // is the result in rD or rM ?
2527 switch (e->Iex.Binop.op) {
2528 case Iop_InterleaveHI8x8: resRd = False; size = 0; break;
2529 case Iop_InterleaveLO8x8: resRd = True; size = 0; break;
2530 case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2531 case Iop_InterleaveLO16x4: resRd = True; size = 1; break;
2532 default: vassert(0);
2534 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2535 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2536 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2537 return resRd ? rD : rM;
2540 // These 4 verified 18 Apr 2013
2541 case Iop_CatOddLanes8x8:
2542 case Iop_CatEvenLanes8x8:
2543 case Iop_CatOddLanes16x4:
2544 case Iop_CatEvenLanes16x4: {
2545 HReg rD = newVRegD(env);
2546 HReg rM = newVRegD(env);
2547 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2548 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2549 UInt size;
2550 Bool resRd; // is the result in rD or rM ?
2551 switch (e->Iex.Binop.op) {
2552 case Iop_CatOddLanes8x8: resRd = False; size = 0; break;
2553 case Iop_CatEvenLanes8x8: resRd = True; size = 0; break;
2554 case Iop_CatOddLanes16x4: resRd = False; size = 1; break;
2555 case Iop_CatEvenLanes16x4: resRd = True; size = 1; break;
2556 default: vassert(0);
2558 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2559 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2560 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2561 return resRd ? rD : rM;
2564 case Iop_QAdd8Ux8:
2565 case Iop_QAdd16Ux4:
2566 case Iop_QAdd32Ux2:
2567 case Iop_QAdd64Ux1: {
2568 HReg res = newVRegD(env);
2569 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2570 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2571 UInt size;
2572 switch (e->Iex.Binop.op) {
2573 case Iop_QAdd8Ux8: size = 0; break;
2574 case Iop_QAdd16Ux4: size = 1; break;
2575 case Iop_QAdd32Ux2: size = 2; break;
2576 case Iop_QAdd64Ux1: size = 3; break;
2577 default: vassert(0);
2579 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2580 res, argL, argR, size, False));
2581 return res;
2583 case Iop_QAdd8Sx8:
2584 case Iop_QAdd16Sx4:
2585 case Iop_QAdd32Sx2:
2586 case Iop_QAdd64Sx1: {
2587 HReg res = newVRegD(env);
2588 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2589 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2590 UInt size;
2591 switch (e->Iex.Binop.op) {
2592 case Iop_QAdd8Sx8: size = 0; break;
2593 case Iop_QAdd16Sx4: size = 1; break;
2594 case Iop_QAdd32Sx2: size = 2; break;
2595 case Iop_QAdd64Sx1: size = 3; break;
2596 default: vassert(0);
2598 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2599 res, argL, argR, size, False));
2600 return res;
2602 case Iop_Sub8x8:
2603 case Iop_Sub16x4:
2604 case Iop_Sub32x2:
2605 case Iop_Sub64: {
2606 HReg res = newVRegD(env);
2607 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2608 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2609 UInt size;
2610 switch (e->Iex.Binop.op) {
2611 case Iop_Sub8x8: size = 0; break;
2612 case Iop_Sub16x4: size = 1; break;
2613 case Iop_Sub32x2: size = 2; break;
2614 case Iop_Sub64: size = 3; break;
2615 default: vassert(0);
2617 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2618 res, argL, argR, size, False));
2619 return res;
2621 case Iop_Sub32Fx2: {
2622 HReg res = newVRegD(env);
2623 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2624 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2625 UInt size = 0;
2626 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2627 res, argL, argR, size, False));
2628 return res;
2630 case Iop_QSub8Ux8:
2631 case Iop_QSub16Ux4:
2632 case Iop_QSub32Ux2:
2633 case Iop_QSub64Ux1: {
2634 HReg res = newVRegD(env);
2635 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2636 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2637 UInt size;
2638 switch (e->Iex.Binop.op) {
2639 case Iop_QSub8Ux8: size = 0; break;
2640 case Iop_QSub16Ux4: size = 1; break;
2641 case Iop_QSub32Ux2: size = 2; break;
2642 case Iop_QSub64Ux1: size = 3; break;
2643 default: vassert(0);
2645 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2646 res, argL, argR, size, False));
2647 return res;
2649 case Iop_QSub8Sx8:
2650 case Iop_QSub16Sx4:
2651 case Iop_QSub32Sx2:
2652 case Iop_QSub64Sx1: {
2653 HReg res = newVRegD(env);
2654 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2655 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2656 UInt size;
2657 switch (e->Iex.Binop.op) {
2658 case Iop_QSub8Sx8: size = 0; break;
2659 case Iop_QSub16Sx4: size = 1; break;
2660 case Iop_QSub32Sx2: size = 2; break;
2661 case Iop_QSub64Sx1: size = 3; break;
2662 default: vassert(0);
2664 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2665 res, argL, argR, size, False));
2666 return res;
2668 case Iop_Max8Ux8:
2669 case Iop_Max16Ux4:
2670 case Iop_Max32Ux2: {
2671 HReg res = newVRegD(env);
2672 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2673 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2674 UInt size;
2675 switch (e->Iex.Binop.op) {
2676 case Iop_Max8Ux8: size = 0; break;
2677 case Iop_Max16Ux4: size = 1; break;
2678 case Iop_Max32Ux2: size = 2; break;
2679 default: vassert(0);
2681 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2682 res, argL, argR, size, False));
2683 return res;
2685 case Iop_Max8Sx8:
2686 case Iop_Max16Sx4:
2687 case Iop_Max32Sx2: {
2688 HReg res = newVRegD(env);
2689 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2690 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2691 UInt size;
2692 switch (e->Iex.Binop.op) {
2693 case Iop_Max8Sx8: size = 0; break;
2694 case Iop_Max16Sx4: size = 1; break;
2695 case Iop_Max32Sx2: size = 2; break;
2696 default: vassert(0);
2698 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2699 res, argL, argR, size, False));
2700 return res;
2702 case Iop_Min8Ux8:
2703 case Iop_Min16Ux4:
2704 case Iop_Min32Ux2: {
2705 HReg res = newVRegD(env);
2706 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2707 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2708 UInt size;
2709 switch (e->Iex.Binop.op) {
2710 case Iop_Min8Ux8: size = 0; break;
2711 case Iop_Min16Ux4: size = 1; break;
2712 case Iop_Min32Ux2: size = 2; break;
2713 default: vassert(0);
2715 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2716 res, argL, argR, size, False));
2717 return res;
2719 case Iop_Min8Sx8:
2720 case Iop_Min16Sx4:
2721 case Iop_Min32Sx2: {
2722 HReg res = newVRegD(env);
2723 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2724 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2725 UInt size;
2726 switch (e->Iex.Binop.op) {
2727 case Iop_Min8Sx8: size = 0; break;
2728 case Iop_Min16Sx4: size = 1; break;
2729 case Iop_Min32Sx2: size = 2; break;
2730 default: vassert(0);
2732 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2733 res, argL, argR, size, False));
2734 return res;
2736 case Iop_Sar8x8:
2737 case Iop_Sar16x4:
2738 case Iop_Sar32x2: {
2739 HReg res = newVRegD(env);
2740 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2741 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2742 HReg argR2 = newVRegD(env);
2743 HReg zero = newVRegD(env);
2744 UInt size;
2745 switch (e->Iex.Binop.op) {
2746 case Iop_Sar8x8: size = 0; break;
2747 case Iop_Sar16x4: size = 1; break;
2748 case Iop_Sar32x2: size = 2; break;
2749 case Iop_Sar64: size = 3; break;
2750 default: vassert(0);
2752 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2753 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2754 argR2, zero, argR, size, False));
2755 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2756 res, argL, argR2, size, False));
2757 return res;
2759 case Iop_Sal8x8:
2760 case Iop_Sal16x4:
2761 case Iop_Sal32x2:
2762 case Iop_Sal64x1: {
2763 HReg res = newVRegD(env);
2764 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2765 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2766 UInt size;
2767 switch (e->Iex.Binop.op) {
2768 case Iop_Sal8x8: size = 0; break;
2769 case Iop_Sal16x4: size = 1; break;
2770 case Iop_Sal32x2: size = 2; break;
2771 case Iop_Sal64x1: size = 3; break;
2772 default: vassert(0);
2774 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2775 res, argL, argR, size, False));
2776 return res;
2778 case Iop_Shr8x8:
2779 case Iop_Shr16x4:
2780 case Iop_Shr32x2: {
2781 HReg res = newVRegD(env);
2782 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784 HReg argR2 = newVRegD(env);
2785 HReg zero = newVRegD(env);
2786 UInt size;
2787 switch (e->Iex.Binop.op) {
2788 case Iop_Shr8x8: size = 0; break;
2789 case Iop_Shr16x4: size = 1; break;
2790 case Iop_Shr32x2: size = 2; break;
2791 default: vassert(0);
2793 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2794 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2795 argR2, zero, argR, size, False));
2796 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2797 res, argL, argR2, size, False));
2798 return res;
2800 case Iop_Shl8x8:
2801 case Iop_Shl16x4:
2802 case Iop_Shl32x2: {
2803 HReg res = newVRegD(env);
2804 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2805 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2806 UInt size;
2807 switch (e->Iex.Binop.op) {
2808 case Iop_Shl8x8: size = 0; break;
2809 case Iop_Shl16x4: size = 1; break;
2810 case Iop_Shl32x2: size = 2; break;
2811 default: vassert(0);
2813 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2814 res, argL, argR, size, False));
2815 return res;
2817 case Iop_QShl8x8:
2818 case Iop_QShl16x4:
2819 case Iop_QShl32x2:
2820 case Iop_QShl64x1: {
2821 HReg res = newVRegD(env);
2822 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2823 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2824 UInt size;
2825 switch (e->Iex.Binop.op) {
2826 case Iop_QShl8x8: size = 0; break;
2827 case Iop_QShl16x4: size = 1; break;
2828 case Iop_QShl32x2: size = 2; break;
2829 case Iop_QShl64x1: size = 3; break;
2830 default: vassert(0);
2832 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2833 res, argL, argR, size, False));
2834 return res;
2836 case Iop_QSal8x8:
2837 case Iop_QSal16x4:
2838 case Iop_QSal32x2:
2839 case Iop_QSal64x1: {
2840 HReg res = newVRegD(env);
2841 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2842 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2843 UInt size;
2844 switch (e->Iex.Binop.op) {
2845 case Iop_QSal8x8: size = 0; break;
2846 case Iop_QSal16x4: size = 1; break;
2847 case Iop_QSal32x2: size = 2; break;
2848 case Iop_QSal64x1: size = 3; break;
2849 default: vassert(0);
2851 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2852 res, argL, argR, size, False));
2853 return res;
2855 case Iop_QShlNsatUU8x8:
2856 case Iop_QShlNsatUU16x4:
2857 case Iop_QShlNsatUU32x2:
2858 case Iop_QShlNsatUU64x1: {
2859 HReg res = newVRegD(env);
2860 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2861 UInt size, imm;
2862 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2863 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2864 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
2865 "second argument only\n");
2867 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2868 switch (e->Iex.Binop.op) {
2869 case Iop_QShlNsatUU8x8: size = 8 | imm; break;
2870 case Iop_QShlNsatUU16x4: size = 16 | imm; break;
2871 case Iop_QShlNsatUU32x2: size = 32 | imm; break;
2872 case Iop_QShlNsatUU64x1: size = 64 | imm; break;
2873 default: vassert(0);
2875 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2876 res, argL, size, False));
2877 return res;
2879 case Iop_QShlNsatSU8x8:
2880 case Iop_QShlNsatSU16x4:
2881 case Iop_QShlNsatSU32x2:
2882 case Iop_QShlNsatSU64x1: {
2883 HReg res = newVRegD(env);
2884 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2885 UInt size, imm;
2886 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2887 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2888 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
2889 "second argument only\n");
2891 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2892 switch (e->Iex.Binop.op) {
2893 case Iop_QShlNsatSU8x8: size = 8 | imm; break;
2894 case Iop_QShlNsatSU16x4: size = 16 | imm; break;
2895 case Iop_QShlNsatSU32x2: size = 32 | imm; break;
2896 case Iop_QShlNsatSU64x1: size = 64 | imm; break;
2897 default: vassert(0);
2899 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2900 res, argL, size, False));
2901 return res;
2903 case Iop_QShlNsatSS8x8:
2904 case Iop_QShlNsatSS16x4:
2905 case Iop_QShlNsatSS32x2:
2906 case Iop_QShlNsatSS64x1: {
2907 HReg res = newVRegD(env);
2908 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2909 UInt size, imm;
2910 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2911 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2912 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
2913 "second argument only\n");
2915 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2916 switch (e->Iex.Binop.op) {
2917 case Iop_QShlNsatSS8x8: size = 8 | imm; break;
2918 case Iop_QShlNsatSS16x4: size = 16 | imm; break;
2919 case Iop_QShlNsatSS32x2: size = 32 | imm; break;
2920 case Iop_QShlNsatSS64x1: size = 64 | imm; break;
2921 default: vassert(0);
2923 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2924 res, argL, size, False));
2925 return res;
2927 case Iop_ShrN8x8:
2928 case Iop_ShrN16x4:
2929 case Iop_ShrN32x2:
2930 case Iop_Shr64: {
2931 HReg res = newVRegD(env);
2932 HReg tmp = newVRegD(env);
2933 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2934 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2935 HReg argR2 = newVRegI(env);
2936 UInt size;
2937 switch (e->Iex.Binop.op) {
2938 case Iop_ShrN8x8: size = 0; break;
2939 case Iop_ShrN16x4: size = 1; break;
2940 case Iop_ShrN32x2: size = 2; break;
2941 case Iop_Shr64: size = 3; break;
2942 default: vassert(0);
2944 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2945 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2946 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2947 res, argL, tmp, size, False));
2948 return res;
2950 case Iop_ShlN8x8:
2951 case Iop_ShlN16x4:
2952 case Iop_ShlN32x2:
2953 case Iop_Shl64: {
2954 HReg res = newVRegD(env);
2955 HReg tmp = newVRegD(env);
2956 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2957 /* special-case Shl64(x, imm8) since the Neon front
2958 end produces a lot of those for V{LD,ST}{1,2,3,4}. */
2959 if (e->Iex.Binop.op == Iop_Shl64
2960 && e->Iex.Binop.arg2->tag == Iex_Const) {
2961 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
2962 Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2963 if (nshift >= 1 && nshift <= 63) {
2964 addInstr(env, ARMInstr_NShl64(res, argL, nshift));
2965 return res;
2967 /* else fall through to general case */
2969 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2970 UInt size;
2971 switch (e->Iex.Binop.op) {
2972 case Iop_ShlN8x8: size = 0; break;
2973 case Iop_ShlN16x4: size = 1; break;
2974 case Iop_ShlN32x2: size = 2; break;
2975 case Iop_Shl64: size = 3; break;
2976 default: vassert(0);
2978 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
2979 tmp, argR, 0, False));
2980 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2981 res, argL, tmp, size, False));
2982 return res;
2984 case Iop_SarN8x8:
2985 case Iop_SarN16x4:
2986 case Iop_SarN32x2:
2987 case Iop_Sar64: {
2988 HReg res = newVRegD(env);
2989 HReg tmp = newVRegD(env);
2990 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2991 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2992 HReg argR2 = newVRegI(env);
2993 UInt size;
2994 switch (e->Iex.Binop.op) {
2995 case Iop_SarN8x8: size = 0; break;
2996 case Iop_SarN16x4: size = 1; break;
2997 case Iop_SarN32x2: size = 2; break;
2998 case Iop_Sar64: size = 3; break;
2999 default: vassert(0);
3001 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3002 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3003 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3004 res, argL, tmp, size, False));
3005 return res;
3007 case Iop_CmpGT8Ux8:
3008 case Iop_CmpGT16Ux4:
3009 case Iop_CmpGT32Ux2: {
3010 HReg res = newVRegD(env);
3011 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3012 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3013 UInt size;
3014 switch (e->Iex.Binop.op) {
3015 case Iop_CmpGT8Ux8: size = 0; break;
3016 case Iop_CmpGT16Ux4: size = 1; break;
3017 case Iop_CmpGT32Ux2: size = 2; break;
3018 default: vassert(0);
3020 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
3021 res, argL, argR, size, False));
3022 return res;
3024 case Iop_CmpGT8Sx8:
3025 case Iop_CmpGT16Sx4:
3026 case Iop_CmpGT32Sx2: {
3027 HReg res = newVRegD(env);
3028 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3029 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3030 UInt size;
3031 switch (e->Iex.Binop.op) {
3032 case Iop_CmpGT8Sx8: size = 0; break;
3033 case Iop_CmpGT16Sx4: size = 1; break;
3034 case Iop_CmpGT32Sx2: size = 2; break;
3035 default: vassert(0);
3037 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
3038 res, argL, argR, size, False));
3039 return res;
3041 case Iop_CmpEQ8x8:
3042 case Iop_CmpEQ16x4:
3043 case Iop_CmpEQ32x2: {
3044 HReg res = newVRegD(env);
3045 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3046 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3047 UInt size;
3048 switch (e->Iex.Binop.op) {
3049 case Iop_CmpEQ8x8: size = 0; break;
3050 case Iop_CmpEQ16x4: size = 1; break;
3051 case Iop_CmpEQ32x2: size = 2; break;
3052 default: vassert(0);
3054 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
3055 res, argL, argR, size, False));
3056 return res;
3058 case Iop_Mul8x8:
3059 case Iop_Mul16x4:
3060 case Iop_Mul32x2: {
3061 HReg res = newVRegD(env);
3062 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3063 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3064 UInt size = 0;
3065 switch(e->Iex.Binop.op) {
3066 case Iop_Mul8x8: size = 0; break;
3067 case Iop_Mul16x4: size = 1; break;
3068 case Iop_Mul32x2: size = 2; break;
3069 default: vassert(0);
3071 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
3072 res, argL, argR, size, False));
3073 return res;
3075 case Iop_Mul32Fx2: {
3076 HReg res = newVRegD(env);
3077 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3078 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3079 UInt size = 0;
3080 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
3081 res, argL, argR, size, False));
3082 return res;
3084 case Iop_QDMulHi16Sx4:
3085 case Iop_QDMulHi32Sx2: {
3086 HReg res = newVRegD(env);
3087 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3088 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3089 UInt size = 0;
3090 switch(e->Iex.Binop.op) {
3091 case Iop_QDMulHi16Sx4: size = 1; break;
3092 case Iop_QDMulHi32Sx2: size = 2; break;
3093 default: vassert(0);
3095 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
3096 res, argL, argR, size, False));
3097 return res;
3100 case Iop_QRDMulHi16Sx4:
3101 case Iop_QRDMulHi32Sx2: {
3102 HReg res = newVRegD(env);
3103 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3104 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3105 UInt size = 0;
3106 switch(e->Iex.Binop.op) {
3107 case Iop_QRDMulHi16Sx4: size = 1; break;
3108 case Iop_QRDMulHi32Sx2: size = 2; break;
3109 default: vassert(0);
3111 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
3112 res, argL, argR, size, False));
3113 return res;
3116 case Iop_PwAdd8x8:
3117 case Iop_PwAdd16x4:
3118 case Iop_PwAdd32x2: {
3119 HReg res = newVRegD(env);
3120 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3121 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3122 UInt size = 0;
3123 switch(e->Iex.Binop.op) {
3124 case Iop_PwAdd8x8: size = 0; break;
3125 case Iop_PwAdd16x4: size = 1; break;
3126 case Iop_PwAdd32x2: size = 2; break;
3127 default: vassert(0);
3129 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
3130 res, argL, argR, size, False));
3131 return res;
3133 case Iop_PwAdd32Fx2: {
3134 HReg res = newVRegD(env);
3135 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3136 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3137 UInt size = 0;
3138 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
3139 res, argL, argR, size, False));
3140 return res;
3142 case Iop_PwMin8Ux8:
3143 case Iop_PwMin16Ux4:
3144 case Iop_PwMin32Ux2: {
3145 HReg res = newVRegD(env);
3146 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3147 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3148 UInt size = 0;
3149 switch(e->Iex.Binop.op) {
3150 case Iop_PwMin8Ux8: size = 0; break;
3151 case Iop_PwMin16Ux4: size = 1; break;
3152 case Iop_PwMin32Ux2: size = 2; break;
3153 default: vassert(0);
3155 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
3156 res, argL, argR, size, False));
3157 return res;
3159 case Iop_PwMin8Sx8:
3160 case Iop_PwMin16Sx4:
3161 case Iop_PwMin32Sx2: {
3162 HReg res = newVRegD(env);
3163 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3164 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3165 UInt size = 0;
3166 switch(e->Iex.Binop.op) {
3167 case Iop_PwMin8Sx8: size = 0; break;
3168 case Iop_PwMin16Sx4: size = 1; break;
3169 case Iop_PwMin32Sx2: size = 2; break;
3170 default: vassert(0);
3172 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3173 res, argL, argR, size, False));
3174 return res;
3176 case Iop_PwMax8Ux8:
3177 case Iop_PwMax16Ux4:
3178 case Iop_PwMax32Ux2: {
3179 HReg res = newVRegD(env);
3180 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3181 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3182 UInt size = 0;
3183 switch(e->Iex.Binop.op) {
3184 case Iop_PwMax8Ux8: size = 0; break;
3185 case Iop_PwMax16Ux4: size = 1; break;
3186 case Iop_PwMax32Ux2: size = 2; break;
3187 default: vassert(0);
3189 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3190 res, argL, argR, size, False));
3191 return res;
3193 case Iop_PwMax8Sx8:
3194 case Iop_PwMax16Sx4:
3195 case Iop_PwMax32Sx2: {
3196 HReg res = newVRegD(env);
3197 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3198 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3199 UInt size = 0;
3200 switch(e->Iex.Binop.op) {
3201 case Iop_PwMax8Sx8: size = 0; break;
3202 case Iop_PwMax16Sx4: size = 1; break;
3203 case Iop_PwMax32Sx2: size = 2; break;
3204 default: vassert(0);
3206 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3207 res, argL, argR, size, False));
3208 return res;
3210 case Iop_Perm8x8: {
3211 HReg res = newVRegD(env);
3212 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3213 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3214 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3215 res, argL, argR, 0, False));
3216 return res;
3218 case Iop_PolynomialMul8x8: {
3219 HReg res = newVRegD(env);
3220 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3221 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3222 UInt size = 0;
3223 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3224 res, argL, argR, size, False));
3225 return res;
3227 case Iop_Max32Fx2: {
3228 HReg res = newVRegD(env);
3229 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3230 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3231 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3232 res, argL, argR, 2, False));
3233 return res;
3235 case Iop_Min32Fx2: {
3236 HReg res = newVRegD(env);
3237 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3238 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3239 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3240 res, argL, argR, 2, False));
3241 return res;
3243 case Iop_PwMax32Fx2: {
3244 HReg res = newVRegD(env);
3245 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3246 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3247 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3248 res, argL, argR, 2, False));
3249 return res;
3251 case Iop_PwMin32Fx2: {
3252 HReg res = newVRegD(env);
3253 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3254 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3255 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3256 res, argL, argR, 2, False));
3257 return res;
3259 case Iop_CmpGT32Fx2: {
3260 HReg res = newVRegD(env);
3261 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3262 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3263 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3264 res, argL, argR, 2, False));
3265 return res;
3267 case Iop_CmpGE32Fx2: {
3268 HReg res = newVRegD(env);
3269 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3270 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3271 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3272 res, argL, argR, 2, False));
3273 return res;
3275 case Iop_CmpEQ32Fx2: {
3276 HReg res = newVRegD(env);
3277 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3278 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3279 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3280 res, argL, argR, 2, False));
3281 return res;
3283 case Iop_F32ToFixed32Ux2_RZ:
3284 case Iop_F32ToFixed32Sx2_RZ:
3285 case Iop_Fixed32UToF32x2_RN:
3286 case Iop_Fixed32SToF32x2_RN: {
3287 HReg res = newVRegD(env);
3288 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3289 ARMNeonUnOp op;
3290 UInt imm6;
3291 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3292 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3293 vpanic("ARM supports FP <-> Fixed conversion with constant "
3294 "second argument less than 33 only\n");
3296 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3297 vassert(imm6 <= 32 && imm6 > 0);
3298 imm6 = 64 - imm6;
3299 switch(e->Iex.Binop.op) {
3300 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3301 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3302 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3303 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3304 default: vassert(0);
3306 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3307 return res;
3310 FIXME: is this here or not?
3311 case Iop_VDup8x8:
3312 case Iop_VDup16x4:
3313 case Iop_VDup32x2: {
3314 HReg res = newVRegD(env);
3315 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3316 UInt index;
3317 UInt imm4;
3318 UInt size = 0;
3319 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3320 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3321 vpanic("ARM supports Iop_VDup with constant "
3322 "second argument less than 16 only\n");
3324 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3325 switch(e->Iex.Binop.op) {
3326 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3327 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3328 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3329 default: vassert(0);
3331 if (imm4 >= 16) {
3332 vpanic("ARM supports Iop_VDup with constant "
3333 "second argument less than 16 only\n");
3335 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3336 res, argL, imm4, False));
3337 return res;
3340 default:
3341 break;
3345 /* --------- UNARY ops --------- */
3346 if (e->tag == Iex_Unop) {
3347 switch (e->Iex.Unop.op) {
3349 /* 32Uto64 */
3350 case Iop_32Uto64: {
3351 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3352 HReg rHi = newVRegI(env);
3353 HReg res = newVRegD(env);
3354 addInstr(env, ARMInstr_Imm32(rHi, 0));
3355 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3356 return res;
3359 /* 32Sto64 */
3360 case Iop_32Sto64: {
3361 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3362 HReg rHi = newVRegI(env);
3363 addInstr(env, mk_iMOVds_RR(rHi, rLo));
3364 addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3365 HReg res = newVRegD(env);
3366 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3367 return res;
3370 /* The next 3 are pass-throughs */
3371 /* ReinterpF64asI64 */
3372 case Iop_ReinterpF64asI64:
3373 /* Left64(e) */
3374 case Iop_Left64:
3375 /* CmpwNEZ64(e) */
3376 case Iop_1Sto64: {
3377 HReg rLo, rHi;
3378 HReg res = newVRegD(env);
3379 iselInt64Expr(&rHi, &rLo, env, e);
3380 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3381 return res;
3384 case Iop_Not64: {
3385 DECLARE_PATTERN(p_veqz_8x8);
3386 DECLARE_PATTERN(p_veqz_16x4);
3387 DECLARE_PATTERN(p_veqz_32x2);
3388 DECLARE_PATTERN(p_vcge_8sx8);
3389 DECLARE_PATTERN(p_vcge_16sx4);
3390 DECLARE_PATTERN(p_vcge_32sx2);
3391 DECLARE_PATTERN(p_vcge_8ux8);
3392 DECLARE_PATTERN(p_vcge_16ux4);
3393 DECLARE_PATTERN(p_vcge_32ux2);
3394 DEFINE_PATTERN(p_veqz_8x8,
3395 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3396 DEFINE_PATTERN(p_veqz_16x4,
3397 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3398 DEFINE_PATTERN(p_veqz_32x2,
3399 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3400 DEFINE_PATTERN(p_vcge_8sx8,
3401 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3402 DEFINE_PATTERN(p_vcge_16sx4,
3403 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3404 DEFINE_PATTERN(p_vcge_32sx2,
3405 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3406 DEFINE_PATTERN(p_vcge_8ux8,
3407 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3408 DEFINE_PATTERN(p_vcge_16ux4,
3409 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3410 DEFINE_PATTERN(p_vcge_32ux2,
3411 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3412 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3413 HReg res = newVRegD(env);
3414 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3415 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3416 return res;
3417 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3418 HReg res = newVRegD(env);
3419 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3420 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3421 return res;
3422 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3423 HReg res = newVRegD(env);
3424 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3425 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3426 return res;
3427 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3428 HReg res = newVRegD(env);
3429 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3430 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3431 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3432 res, argL, argR, 0, False));
3433 return res;
3434 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3435 HReg res = newVRegD(env);
3436 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3437 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3438 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3439 res, argL, argR, 1, False));
3440 return res;
3441 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3442 HReg res = newVRegD(env);
3443 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3444 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3445 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3446 res, argL, argR, 2, False));
3447 return res;
3448 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3449 HReg res = newVRegD(env);
3450 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3451 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3452 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3453 res, argL, argR, 0, False));
3454 return res;
3455 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3456 HReg res = newVRegD(env);
3457 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3458 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3459 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3460 res, argL, argR, 1, False));
3461 return res;
3462 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3463 HReg res = newVRegD(env);
3464 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3465 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3466 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3467 res, argL, argR, 2, False));
3468 return res;
3469 } else {
3470 HReg res = newVRegD(env);
3471 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3472 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3473 return res;
3476 case Iop_Dup8x8:
3477 case Iop_Dup16x4:
3478 case Iop_Dup32x2: {
3479 HReg res, arg;
3480 UInt size;
3481 DECLARE_PATTERN(p_vdup_8x8);
3482 DECLARE_PATTERN(p_vdup_16x4);
3483 DECLARE_PATTERN(p_vdup_32x2);
3484 DEFINE_PATTERN(p_vdup_8x8,
3485 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3486 DEFINE_PATTERN(p_vdup_16x4,
3487 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3488 DEFINE_PATTERN(p_vdup_32x2,
3489 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3490 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3491 UInt index;
3492 UInt imm4;
3493 if (mi.bindee[1]->tag == Iex_Const &&
3494 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3495 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3496 imm4 = (index << 1) + 1;
3497 if (index < 8) {
3498 res = newVRegD(env);
3499 arg = iselNeon64Expr(env, mi.bindee[0]);
3500 addInstr(env, ARMInstr_NUnaryS(
3501 ARMneon_VDUP,
3502 mkARMNRS(ARMNRS_Reg, res, 0),
3503 mkARMNRS(ARMNRS_Scalar, arg, index),
3504 imm4, False
3506 return res;
3509 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3510 UInt index;
3511 UInt imm4;
3512 if (mi.bindee[1]->tag == Iex_Const &&
3513 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3514 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3515 imm4 = (index << 2) + 2;
3516 if (index < 4) {
3517 res = newVRegD(env);
3518 arg = iselNeon64Expr(env, mi.bindee[0]);
3519 addInstr(env, ARMInstr_NUnaryS(
3520 ARMneon_VDUP,
3521 mkARMNRS(ARMNRS_Reg, res, 0),
3522 mkARMNRS(ARMNRS_Scalar, arg, index),
3523 imm4, False
3525 return res;
3528 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3529 UInt index;
3530 UInt imm4;
3531 if (mi.bindee[1]->tag == Iex_Const &&
3532 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3533 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3534 imm4 = (index << 3) + 4;
3535 if (index < 2) {
3536 res = newVRegD(env);
3537 arg = iselNeon64Expr(env, mi.bindee[0]);
3538 addInstr(env, ARMInstr_NUnaryS(
3539 ARMneon_VDUP,
3540 mkARMNRS(ARMNRS_Reg, res, 0),
3541 mkARMNRS(ARMNRS_Scalar, arg, index),
3542 imm4, False
3544 return res;
3548 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3549 res = newVRegD(env);
3550 switch (e->Iex.Unop.op) {
3551 case Iop_Dup8x8: size = 0; break;
3552 case Iop_Dup16x4: size = 1; break;
3553 case Iop_Dup32x2: size = 2; break;
3554 default: vassert(0);
3556 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3557 return res;
3559 case Iop_Abs8x8:
3560 case Iop_Abs16x4:
3561 case Iop_Abs32x2: {
3562 HReg res = newVRegD(env);
3563 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3564 UInt size = 0;
3565 switch(e->Iex.Binop.op) {
3566 case Iop_Abs8x8: size = 0; break;
3567 case Iop_Abs16x4: size = 1; break;
3568 case Iop_Abs32x2: size = 2; break;
3569 default: vassert(0);
3571 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3572 return res;
3574 case Iop_Reverse8sIn64_x1:
3575 case Iop_Reverse16sIn64_x1:
3576 case Iop_Reverse32sIn64_x1: {
3577 HReg res = newVRegD(env);
3578 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3579 UInt size = 0;
3580 switch(e->Iex.Binop.op) {
3581 case Iop_Reverse8sIn64_x1: size = 0; break;
3582 case Iop_Reverse16sIn64_x1: size = 1; break;
3583 case Iop_Reverse32sIn64_x1: size = 2; break;
3584 default: vassert(0);
3586 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3587 res, arg, size, False));
3588 return res;
3590 case Iop_Reverse8sIn32_x2:
3591 case Iop_Reverse16sIn32_x2: {
3592 HReg res = newVRegD(env);
3593 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3594 UInt size = 0;
3595 switch(e->Iex.Binop.op) {
3596 case Iop_Reverse8sIn32_x2: size = 0; break;
3597 case Iop_Reverse16sIn32_x2: size = 1; break;
3598 default: vassert(0);
3600 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3601 res, arg, size, False));
3602 return res;
3604 case Iop_Reverse8sIn16_x4: {
3605 HReg res = newVRegD(env);
3606 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3607 UInt size = 0;
3608 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3609 res, arg, size, False));
3610 return res;
3612 case Iop_CmpwNEZ64: {
3613 HReg x_lsh = newVRegD(env);
3614 HReg x_rsh = newVRegD(env);
3615 HReg lsh_amt = newVRegD(env);
3616 HReg rsh_amt = newVRegD(env);
3617 HReg zero = newVRegD(env);
3618 HReg tmp = newVRegD(env);
3619 HReg tmp2 = newVRegD(env);
3620 HReg res = newVRegD(env);
3621 HReg x = newVRegD(env);
3622 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3623 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3624 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3625 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3626 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3627 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3628 rsh_amt, zero, lsh_amt, 2, False));
3629 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3630 x_lsh, x, lsh_amt, 3, False));
3631 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3632 x_rsh, x, rsh_amt, 3, False));
3633 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3634 tmp, x_lsh, x_rsh, 0, False));
3635 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3636 res, tmp, x, 0, False));
3637 return res;
3639 case Iop_CmpNEZ8x8:
3640 case Iop_CmpNEZ16x4:
3641 case Iop_CmpNEZ32x2: {
3642 HReg res = newVRegD(env);
3643 HReg tmp = newVRegD(env);
3644 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3645 UInt size;
3646 switch (e->Iex.Unop.op) {
3647 case Iop_CmpNEZ8x8: size = 0; break;
3648 case Iop_CmpNEZ16x4: size = 1; break;
3649 case Iop_CmpNEZ32x2: size = 2; break;
3650 default: vassert(0);
3652 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3653 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3654 return res;
3656 case Iop_NarrowUn16to8x8:
3657 case Iop_NarrowUn32to16x4:
3658 case Iop_NarrowUn64to32x2: {
3659 HReg res = newVRegD(env);
3660 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3661 UInt size = 0;
3662 switch(e->Iex.Binop.op) {
3663 case Iop_NarrowUn16to8x8: size = 0; break;
3664 case Iop_NarrowUn32to16x4: size = 1; break;
3665 case Iop_NarrowUn64to32x2: size = 2; break;
3666 default: vassert(0);
3668 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3669 res, arg, size, False));
3670 return res;
3672 case Iop_QNarrowUn16Sto8Sx8:
3673 case Iop_QNarrowUn32Sto16Sx4:
3674 case Iop_QNarrowUn64Sto32Sx2: {
3675 HReg res = newVRegD(env);
3676 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3677 UInt size = 0;
3678 switch(e->Iex.Binop.op) {
3679 case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3680 case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3681 case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3682 default: vassert(0);
3684 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3685 res, arg, size, False));
3686 return res;
3688 case Iop_QNarrowUn16Sto8Ux8:
3689 case Iop_QNarrowUn32Sto16Ux4:
3690 case Iop_QNarrowUn64Sto32Ux2: {
3691 HReg res = newVRegD(env);
3692 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3693 UInt size = 0;
3694 switch(e->Iex.Binop.op) {
3695 case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
3696 case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3697 case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3698 default: vassert(0);
3700 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3701 res, arg, size, False));
3702 return res;
3704 case Iop_QNarrowUn16Uto8Ux8:
3705 case Iop_QNarrowUn32Uto16Ux4:
3706 case Iop_QNarrowUn64Uto32Ux2: {
3707 HReg res = newVRegD(env);
3708 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3709 UInt size = 0;
3710 switch(e->Iex.Binop.op) {
3711 case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
3712 case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3713 case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3714 default: vassert(0);
3716 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3717 res, arg, size, False));
3718 return res;
3720 case Iop_PwAddL8Sx8:
3721 case Iop_PwAddL16Sx4:
3722 case Iop_PwAddL32Sx2: {
3723 HReg res = newVRegD(env);
3724 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3725 UInt size = 0;
3726 switch(e->Iex.Binop.op) {
3727 case Iop_PwAddL8Sx8: size = 0; break;
3728 case Iop_PwAddL16Sx4: size = 1; break;
3729 case Iop_PwAddL32Sx2: size = 2; break;
3730 default: vassert(0);
3732 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3733 res, arg, size, False));
3734 return res;
3736 case Iop_PwAddL8Ux8:
3737 case Iop_PwAddL16Ux4:
3738 case Iop_PwAddL32Ux2: {
3739 HReg res = newVRegD(env);
3740 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3741 UInt size = 0;
3742 switch(e->Iex.Binop.op) {
3743 case Iop_PwAddL8Ux8: size = 0; break;
3744 case Iop_PwAddL16Ux4: size = 1; break;
3745 case Iop_PwAddL32Ux2: size = 2; break;
3746 default: vassert(0);
3748 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3749 res, arg, size, False));
3750 return res;
3752 case Iop_Cnt8x8: {
3753 HReg res = newVRegD(env);
3754 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3755 UInt size = 0;
3756 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3757 res, arg, size, False));
3758 return res;
3760 case Iop_Clz8x8:
3761 case Iop_Clz16x4:
3762 case Iop_Clz32x2: {
3763 HReg res = newVRegD(env);
3764 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3765 UInt size = 0;
3766 switch(e->Iex.Binop.op) {
3767 case Iop_Clz8x8: size = 0; break;
3768 case Iop_Clz16x4: size = 1; break;
3769 case Iop_Clz32x2: size = 2; break;
3770 default: vassert(0);
3772 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3773 res, arg, size, False));
3774 return res;
3776 case Iop_Cls8x8:
3777 case Iop_Cls16x4:
3778 case Iop_Cls32x2: {
3779 HReg res = newVRegD(env);
3780 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3781 UInt size = 0;
3782 switch(e->Iex.Binop.op) {
3783 case Iop_Cls8x8: size = 0; break;
3784 case Iop_Cls16x4: size = 1; break;
3785 case Iop_Cls32x2: size = 2; break;
3786 default: vassert(0);
3788 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3789 res, arg, size, False));
3790 return res;
3792 case Iop_F32toI32Sx2_RZ: {
3793 HReg res = newVRegD(env);
3794 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3795 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3796 res, arg, 2, False));
3797 return res;
3799 case Iop_F32toI32Ux2_RZ: {
3800 HReg res = newVRegD(env);
3801 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3802 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3803 res, arg, 2, False));
3804 return res;
3806 case Iop_I32StoF32x2_DEP: {
3807 HReg res = newVRegD(env);
3808 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3809 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3810 res, arg, 2, False));
3811 return res;
3813 case Iop_I32UtoF32x2_DEP: {
3814 HReg res = newVRegD(env);
3815 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3816 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3817 res, arg, 2, False));
3818 return res;
3820 case Iop_F32toF16x4_DEP: {
3821 HReg res = newVRegD(env);
3822 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3823 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3824 res, arg, 2, False));
3825 return res;
3827 case Iop_RecipEst32Fx2: {
3828 HReg res = newVRegD(env);
3829 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3830 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3831 res, argL, 0, False));
3832 return res;
3834 case Iop_RecipEst32Ux2: {
3835 HReg res = newVRegD(env);
3836 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3837 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3838 res, argL, 0, False));
3839 return res;
3841 case Iop_Abs32Fx2: {
3842 DECLARE_PATTERN(p_vabd_32fx2);
3843 DEFINE_PATTERN(p_vabd_32fx2,
3844 unop(Iop_Abs32Fx2,
3845 binop(Iop_Sub32Fx2,
3846 bind(0),
3847 bind(1))));
3848 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3849 HReg res = newVRegD(env);
3850 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3851 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3852 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3853 res, argL, argR, 0, False));
3854 return res;
3855 } else {
3856 HReg res = newVRegD(env);
3857 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3858 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3859 res, arg, 0, False));
3860 return res;
3863 case Iop_RSqrtEst32Fx2: {
3864 HReg res = newVRegD(env);
3865 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3866 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3867 res, arg, 0, False));
3868 return res;
3870 case Iop_RSqrtEst32Ux2: {
3871 HReg res = newVRegD(env);
3872 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3873 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3874 res, arg, 0, False));
3875 return res;
3877 case Iop_Neg32Fx2: {
3878 HReg res = newVRegD(env);
3879 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3880 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3881 res, arg, 0, False));
3882 return res;
3884 case Iop_V128to64:
3885 case Iop_V128HIto64: {
3886 HReg src = iselNeonExpr(env, e->Iex.Unop.arg);
3887 HReg resLo = newVRegD(env);
3888 HReg resHi = newVRegD(env);
3889 addInstr(env, ARMInstr_VXferQ(False/*!toQ*/, src, resHi, resLo));
3890 return e->Iex.Unop.op == Iop_V128HIto64 ? resHi : resLo;
3892 default:
3893 break;
3895 } /* if (e->tag == Iex_Unop) */
3897 if (e->tag == Iex_Triop) {
3898 IRTriop *triop = e->Iex.Triop.details;
3900 switch (triop->op) {
3901 case Iop_Slice64: {
3902 HReg res = newVRegD(env);
3903 HReg argL = iselNeon64Expr(env, triop->arg2);
3904 HReg argR = iselNeon64Expr(env, triop->arg1);
3905 UInt imm4;
3906 if (triop->arg3->tag != Iex_Const ||
3907 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
3908 vpanic("ARM target supports Iop_Extract64 with constant "
3909 "third argument less than 16 only\n");
3911 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
3912 if (imm4 >= 8) {
3913 vpanic("ARM target supports Iop_Extract64 with constant "
3914 "third argument less than 16 only\n");
3916 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3917 res, argL, argR, imm4, False));
3918 return res;
3920 case Iop_SetElem8x8:
3921 case Iop_SetElem16x4:
3922 case Iop_SetElem32x2: {
3923 HReg res = newVRegD(env);
3924 HReg dreg = iselNeon64Expr(env, triop->arg1);
3925 HReg arg = iselIntExpr_R(env, triop->arg3);
3926 UInt index, size;
3927 if (triop->arg2->tag != Iex_Const ||
3928 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
3929 vpanic("ARM target supports SetElem with constant "
3930 "second argument only\n");
3932 index = triop->arg2->Iex.Const.con->Ico.U8;
3933 switch (triop->op) {
3934 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3935 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3936 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3937 default: vassert(0);
3939 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3940 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3941 mkARMNRS(ARMNRS_Scalar, res, index),
3942 mkARMNRS(ARMNRS_Reg, arg, 0),
3943 size, False));
3944 return res;
3946 default:
3947 break;
3951 /* --------- MULTIPLEX --------- */
3952 if (e->tag == Iex_ITE) { // VFD
3953 HReg rLo, rHi;
3954 HReg res = newVRegD(env);
3955 iselInt64Expr(&rHi, &rLo, env, e);
3956 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3957 return res;
3960 ppIRExpr(e);
3961 vpanic("iselNeon64Expr");
3965 static HReg iselNeonExpr ( ISelEnv* env, const IRExpr* e )
3967 HReg r;
3968 vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
3969 r = iselNeonExpr_wrk( env, e );
3970 vassert(hregClass(r) == HRcVec128);
3971 vassert(hregIsVirtual(r));
3972 return r;
3975 /* DO NOT CALL THIS DIRECTLY */
3976 static HReg iselNeonExpr_wrk ( ISelEnv* env, const IRExpr* e )
3978 IRType ty = typeOfIRExpr(env->type_env, e);
3979 MatchInfo mi;
3980 vassert(e);
3981 vassert(ty == Ity_V128);
3983 if (e->tag == Iex_RdTmp) {
3984 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3987 if (e->tag == Iex_Const) {
3988 /* At the moment there should be no 128-bit constants in IR for ARM
3989 generated during disassemble. They are represented as Iop_64HLtoV128
3990 binary operation and are handled among binary ops. */
3991 /* But zero can be created by valgrind internal optimizer */
3992 if (e->Iex.Const.con->Ico.V128 == 0x0000) {
3993 HReg res = newVRegV(env);
3994 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0)));
3995 return res;
3997 if (e->Iex.Const.con->Ico.V128 == 0xFFFF) {
3998 HReg res = newVRegV(env);
3999 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255)));
4000 return res;
4002 ppIRExpr(e);
4003 vpanic("128-bit constant is not implemented");
4006 if (e->tag == Iex_Load) {
4007 HReg res = newVRegV(env);
4008 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
4009 vassert(ty == Ity_V128);
4010 addInstr(env, ARMInstr_NLdStQ(True, res, am));
4011 return res;
4014 if (e->tag == Iex_Get) {
4015 HReg addr = newVRegI(env);
4016 HReg res = newVRegV(env);
4017 vassert(ty == Ity_V128);
4018 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
4019 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
4020 return res;
4023 if (e->tag == Iex_Unop) {
4024 switch (e->Iex.Unop.op) {
4025 case Iop_NotV128: {
4026 DECLARE_PATTERN(p_veqz_8x16);
4027 DECLARE_PATTERN(p_veqz_16x8);
4028 DECLARE_PATTERN(p_veqz_32x4);
4029 DECLARE_PATTERN(p_vcge_8sx16);
4030 DECLARE_PATTERN(p_vcge_16sx8);
4031 DECLARE_PATTERN(p_vcge_32sx4);
4032 DECLARE_PATTERN(p_vcge_8ux16);
4033 DECLARE_PATTERN(p_vcge_16ux8);
4034 DECLARE_PATTERN(p_vcge_32ux4);
4035 DEFINE_PATTERN(p_veqz_8x16,
4036 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
4037 DEFINE_PATTERN(p_veqz_16x8,
4038 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
4039 DEFINE_PATTERN(p_veqz_32x4,
4040 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
4041 DEFINE_PATTERN(p_vcge_8sx16,
4042 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
4043 DEFINE_PATTERN(p_vcge_16sx8,
4044 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
4045 DEFINE_PATTERN(p_vcge_32sx4,
4046 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
4047 DEFINE_PATTERN(p_vcge_8ux16,
4048 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
4049 DEFINE_PATTERN(p_vcge_16ux8,
4050 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
4051 DEFINE_PATTERN(p_vcge_32ux4,
4052 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
4053 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
4054 HReg res = newVRegV(env);
4055 HReg arg = iselNeonExpr(env, mi.bindee[0]);
4056 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
4057 return res;
4058 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
4059 HReg res = newVRegV(env);
4060 HReg arg = iselNeonExpr(env, mi.bindee[0]);
4061 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
4062 return res;
4063 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
4064 HReg res = newVRegV(env);
4065 HReg arg = iselNeonExpr(env, mi.bindee[0]);
4066 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
4067 return res;
4068 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
4069 HReg res = newVRegV(env);
4070 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4071 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4072 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4073 res, argL, argR, 0, True));
4074 return res;
4075 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
4076 HReg res = newVRegV(env);
4077 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4078 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4079 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4080 res, argL, argR, 1, True));
4081 return res;
4082 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
4083 HReg res = newVRegV(env);
4084 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4085 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4086 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4087 res, argL, argR, 2, True));
4088 return res;
4089 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
4090 HReg res = newVRegV(env);
4091 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4092 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4093 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4094 res, argL, argR, 0, True));
4095 return res;
4096 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
4097 HReg res = newVRegV(env);
4098 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4099 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4100 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4101 res, argL, argR, 1, True));
4102 return res;
4103 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
4104 HReg res = newVRegV(env);
4105 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4106 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4107 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4108 res, argL, argR, 2, True));
4109 return res;
4110 } else {
4111 HReg res = newVRegV(env);
4112 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4113 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
4114 return res;
4117 case Iop_Dup8x16:
4118 case Iop_Dup16x8:
4119 case Iop_Dup32x4: {
4120 HReg res, arg;
4121 UInt size;
4122 DECLARE_PATTERN(p_vdup_8x16);
4123 DECLARE_PATTERN(p_vdup_16x8);
4124 DECLARE_PATTERN(p_vdup_32x4);
4125 DEFINE_PATTERN(p_vdup_8x16,
4126 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
4127 DEFINE_PATTERN(p_vdup_16x8,
4128 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
4129 DEFINE_PATTERN(p_vdup_32x4,
4130 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
4131 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
4132 UInt index;
4133 UInt imm4;
4134 if (mi.bindee[1]->tag == Iex_Const &&
4135 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4136 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4137 imm4 = (index << 1) + 1;
4138 if (index < 8) {
4139 res = newVRegV(env);
4140 arg = iselNeon64Expr(env, mi.bindee[0]);
4141 addInstr(env, ARMInstr_NUnaryS(
4142 ARMneon_VDUP,
4143 mkARMNRS(ARMNRS_Reg, res, 0),
4144 mkARMNRS(ARMNRS_Scalar, arg, index),
4145 imm4, True
4147 return res;
4150 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
4151 UInt index;
4152 UInt imm4;
4153 if (mi.bindee[1]->tag == Iex_Const &&
4154 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4155 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4156 imm4 = (index << 2) + 2;
4157 if (index < 4) {
4158 res = newVRegV(env);
4159 arg = iselNeon64Expr(env, mi.bindee[0]);
4160 addInstr(env, ARMInstr_NUnaryS(
4161 ARMneon_VDUP,
4162 mkARMNRS(ARMNRS_Reg, res, 0),
4163 mkARMNRS(ARMNRS_Scalar, arg, index),
4164 imm4, True
4166 return res;
4169 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
4170 UInt index;
4171 UInt imm4;
4172 if (mi.bindee[1]->tag == Iex_Const &&
4173 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4174 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4175 imm4 = (index << 3) + 4;
4176 if (index < 2) {
4177 res = newVRegV(env);
4178 arg = iselNeon64Expr(env, mi.bindee[0]);
4179 addInstr(env, ARMInstr_NUnaryS(
4180 ARMneon_VDUP,
4181 mkARMNRS(ARMNRS_Reg, res, 0),
4182 mkARMNRS(ARMNRS_Scalar, arg, index),
4183 imm4, True
4185 return res;
4189 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4190 res = newVRegV(env);
4191 switch (e->Iex.Unop.op) {
4192 case Iop_Dup8x16: size = 0; break;
4193 case Iop_Dup16x8: size = 1; break;
4194 case Iop_Dup32x4: size = 2; break;
4195 default: vassert(0);
4197 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4198 return res;
4200 case Iop_Abs8x16:
4201 case Iop_Abs16x8:
4202 case Iop_Abs32x4: {
4203 HReg res = newVRegV(env);
4204 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4205 UInt size = 0;
4206 switch(e->Iex.Binop.op) {
4207 case Iop_Abs8x16: size = 0; break;
4208 case Iop_Abs16x8: size = 1; break;
4209 case Iop_Abs32x4: size = 2; break;
4210 default: vassert(0);
4212 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4213 return res;
4215 case Iop_Reverse8sIn64_x2:
4216 case Iop_Reverse16sIn64_x2:
4217 case Iop_Reverse32sIn64_x2: {
4218 HReg res = newVRegV(env);
4219 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4220 UInt size = 0;
4221 switch(e->Iex.Binop.op) {
4222 case Iop_Reverse8sIn64_x2: size = 0; break;
4223 case Iop_Reverse16sIn64_x2: size = 1; break;
4224 case Iop_Reverse32sIn64_x2: size = 2; break;
4225 default: vassert(0);
4227 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4228 res, arg, size, True));
4229 return res;
4231 case Iop_Reverse8sIn32_x4:
4232 case Iop_Reverse16sIn32_x4: {
4233 HReg res = newVRegV(env);
4234 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4235 UInt size = 0;
4236 switch(e->Iex.Binop.op) {
4237 case Iop_Reverse8sIn32_x4: size = 0; break;
4238 case Iop_Reverse16sIn32_x4: size = 1; break;
4239 default: vassert(0);
4241 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4242 res, arg, size, True));
4243 return res;
4245 case Iop_Reverse8sIn16_x8: {
4246 HReg res = newVRegV(env);
4247 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4248 UInt size = 0;
4249 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4250 res, arg, size, True));
4251 return res;
4253 case Iop_CmpNEZ64x2: {
4254 HReg x_lsh = newVRegV(env);
4255 HReg x_rsh = newVRegV(env);
4256 HReg lsh_amt = newVRegV(env);
4257 HReg rsh_amt = newVRegV(env);
4258 HReg zero = newVRegV(env);
4259 HReg tmp = newVRegV(env);
4260 HReg tmp2 = newVRegV(env);
4261 HReg res = newVRegV(env);
4262 HReg x = newVRegV(env);
4263 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4264 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4265 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4266 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4267 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4268 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4269 rsh_amt, zero, lsh_amt, 2, True));
4270 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4271 x_lsh, x, lsh_amt, 3, True));
4272 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4273 x_rsh, x, rsh_amt, 3, True));
4274 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4275 tmp, x_lsh, x_rsh, 0, True));
4276 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4277 res, tmp, x, 0, True));
4278 return res;
4280 case Iop_CmpNEZ8x16:
4281 case Iop_CmpNEZ16x8:
4282 case Iop_CmpNEZ32x4: {
4283 HReg res = newVRegV(env);
4284 HReg tmp = newVRegV(env);
4285 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4286 UInt size;
4287 switch (e->Iex.Unop.op) {
4288 case Iop_CmpNEZ8x16: size = 0; break;
4289 case Iop_CmpNEZ16x8: size = 1; break;
4290 case Iop_CmpNEZ32x4: size = 2; break;
4291 default: vassert(0);
4293 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4294 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4295 return res;
4297 case Iop_Widen8Uto16x8:
4298 case Iop_Widen16Uto32x4:
4299 case Iop_Widen32Uto64x2: {
4300 HReg res = newVRegV(env);
4301 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4302 UInt size;
4303 switch (e->Iex.Unop.op) {
4304 case Iop_Widen8Uto16x8: size = 0; break;
4305 case Iop_Widen16Uto32x4: size = 1; break;
4306 case Iop_Widen32Uto64x2: size = 2; break;
4307 default: vassert(0);
4309 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4310 res, arg, size, True));
4311 return res;
4313 case Iop_Widen8Sto16x8:
4314 case Iop_Widen16Sto32x4:
4315 case Iop_Widen32Sto64x2: {
4316 HReg res = newVRegV(env);
4317 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4318 UInt size;
4319 switch (e->Iex.Unop.op) {
4320 case Iop_Widen8Sto16x8: size = 0; break;
4321 case Iop_Widen16Sto32x4: size = 1; break;
4322 case Iop_Widen32Sto64x2: size = 2; break;
4323 default: vassert(0);
4325 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4326 res, arg, size, True));
4327 return res;
4329 case Iop_PwAddL8Sx16:
4330 case Iop_PwAddL16Sx8:
4331 case Iop_PwAddL32Sx4: {
4332 HReg res = newVRegV(env);
4333 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4334 UInt size = 0;
4335 switch(e->Iex.Binop.op) {
4336 case Iop_PwAddL8Sx16: size = 0; break;
4337 case Iop_PwAddL16Sx8: size = 1; break;
4338 case Iop_PwAddL32Sx4: size = 2; break;
4339 default: vassert(0);
4341 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4342 res, arg, size, True));
4343 return res;
4345 case Iop_PwAddL8Ux16:
4346 case Iop_PwAddL16Ux8:
4347 case Iop_PwAddL32Ux4: {
4348 HReg res = newVRegV(env);
4349 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4350 UInt size = 0;
4351 switch(e->Iex.Binop.op) {
4352 case Iop_PwAddL8Ux16: size = 0; break;
4353 case Iop_PwAddL16Ux8: size = 1; break;
4354 case Iop_PwAddL32Ux4: size = 2; break;
4355 default: vassert(0);
4357 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4358 res, arg, size, True));
4359 return res;
4361 case Iop_Cnt8x16: {
4362 HReg res = newVRegV(env);
4363 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4364 UInt size = 0;
4365 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4366 return res;
4368 case Iop_Clz8x16:
4369 case Iop_Clz16x8:
4370 case Iop_Clz32x4: {
4371 HReg res = newVRegV(env);
4372 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4373 UInt size = 0;
4374 switch(e->Iex.Binop.op) {
4375 case Iop_Clz8x16: size = 0; break;
4376 case Iop_Clz16x8: size = 1; break;
4377 case Iop_Clz32x4: size = 2; break;
4378 default: vassert(0);
4380 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4381 return res;
4383 case Iop_Cls8x16:
4384 case Iop_Cls16x8:
4385 case Iop_Cls32x4: {
4386 HReg res = newVRegV(env);
4387 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4388 UInt size = 0;
4389 switch(e->Iex.Binop.op) {
4390 case Iop_Cls8x16: size = 0; break;
4391 case Iop_Cls16x8: size = 1; break;
4392 case Iop_Cls32x4: size = 2; break;
4393 default: vassert(0);
4395 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4396 return res;
4398 case Iop_F32toI32Sx4_RZ: {
4399 HReg res = newVRegV(env);
4400 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4401 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4402 res, arg, 2, True));
4403 return res;
4405 case Iop_F32toI32Ux4_RZ: {
4406 HReg res = newVRegV(env);
4407 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4408 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4409 res, arg, 2, True));
4410 return res;
4412 case Iop_I32StoF32x4_DEP: {
4413 HReg res = newVRegV(env);
4414 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4415 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4416 res, arg, 2, True));
4417 return res;
4419 case Iop_I32UtoF32x4_DEP: {
4420 HReg res = newVRegV(env);
4421 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4422 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4423 res, arg, 2, True));
4424 return res;
4426 case Iop_F16toF32x4: {
4427 HReg res = newVRegV(env);
4428 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4429 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4430 res, arg, 2, True));
4431 return res;
4433 case Iop_RecipEst32Fx4: {
4434 HReg res = newVRegV(env);
4435 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4436 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4437 res, argL, 0, True));
4438 return res;
4440 case Iop_RecipEst32Ux4: {
4441 HReg res = newVRegV(env);
4442 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4443 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4444 res, argL, 0, True));
4445 return res;
4447 case Iop_Abs32Fx4: {
4448 HReg res = newVRegV(env);
4449 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4450 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4451 res, argL, 0, True));
4452 return res;
4454 case Iop_RSqrtEst32Fx4: {
4455 HReg res = newVRegV(env);
4456 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4457 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4458 res, argL, 0, True));
4459 return res;
4461 case Iop_RSqrtEst32Ux4: {
4462 HReg res = newVRegV(env);
4463 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4464 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4465 res, argL, 0, True));
4466 return res;
4468 case Iop_Neg32Fx4: {
4469 HReg res = newVRegV(env);
4470 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4471 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4472 res, arg, 0, True));
4473 return res;
4475 /* ... */
4476 default:
4477 break;
4481 if (e->tag == Iex_Binop) {
4482 switch (e->Iex.Binop.op) {
4483 case Iop_64HLtoV128: {
4484 /* Try to match into single "VMOV reg, imm" instruction */
4485 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4486 e->Iex.Binop.arg2->tag == Iex_Const &&
4487 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4488 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4489 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4490 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4491 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4492 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4493 if (imm) {
4494 HReg res = newVRegV(env);
4495 addInstr(env, ARMInstr_NeonImm(res, imm));
4496 return res;
4498 if ((imm64 >> 32) == 0LL &&
4499 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4500 HReg tmp1 = newVRegV(env);
4501 HReg tmp2 = newVRegV(env);
4502 HReg res = newVRegV(env);
4503 if (imm->type < 10) {
4504 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4505 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4506 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4507 res, tmp1, tmp2, 4, True));
4508 return res;
4511 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4512 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4513 HReg tmp1 = newVRegV(env);
4514 HReg tmp2 = newVRegV(env);
4515 HReg res = newVRegV(env);
4516 if (imm->type < 10) {
4517 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4518 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4519 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4520 res, tmp1, tmp2, 4, True));
4521 return res;
4525 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4526 it the slow way. */
4527 HReg dHi = iselNeon64Expr(env, e->Iex.Binop.arg1);
4528 HReg dLo = iselNeon64Expr(env, e->Iex.Binop.arg2);
4529 HReg res = newVRegV(env);
4530 addInstr(env, ARMInstr_VXferQ(True/*toQ*/, res, dHi, dLo));
4531 return res;
4533 case Iop_AndV128: {
4534 HReg res = newVRegV(env);
4535 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4536 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4537 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4538 res, argL, argR, 4, True));
4539 return res;
4541 case Iop_OrV128: {
4542 HReg res = newVRegV(env);
4543 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4544 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4545 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4546 res, argL, argR, 4, True));
4547 return res;
4549 case Iop_XorV128: {
4550 HReg res = newVRegV(env);
4551 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4552 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4553 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4554 res, argL, argR, 4, True));
4555 return res;
4557 case Iop_Add8x16:
4558 case Iop_Add16x8:
4559 case Iop_Add32x4:
4560 case Iop_Add64x2: {
4562 FIXME: remove this if not used
4563 DECLARE_PATTERN(p_vrhadd_32sx4);
4564 ULong one = (1LL << 32) | 1LL;
4565 DEFINE_PATTERN(p_vrhadd_32sx4,
4566 binop(Iop_Add32x4,
4567 binop(Iop_Add32x4,
4568 binop(Iop_SarN32x4,
4569 bind(0),
4570 mkU8(1)),
4571 binop(Iop_SarN32x4,
4572 bind(1),
4573 mkU8(1))),
4574 binop(Iop_SarN32x4,
4575 binop(Iop_Add32x4,
4576 binop(Iop_Add32x4,
4577 binop(Iop_AndV128,
4578 bind(0),
4579 mkU128(one)),
4580 binop(Iop_AndV128,
4581 bind(1),
4582 mkU128(one))),
4583 mkU128(one)),
4584 mkU8(1))));
4586 HReg res = newVRegV(env);
4587 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4588 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4589 UInt size;
4590 switch (e->Iex.Binop.op) {
4591 case Iop_Add8x16: size = 0; break;
4592 case Iop_Add16x8: size = 1; break;
4593 case Iop_Add32x4: size = 2; break;
4594 case Iop_Add64x2: size = 3; break;
4595 default:
4596 ppIROp(e->Iex.Binop.op);
4597 vpanic("Illegal element size in VADD");
4599 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4600 res, argL, argR, size, True));
4601 return res;
4603 case Iop_RecipStep32Fx4: {
4604 HReg res = newVRegV(env);
4605 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4606 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4607 UInt size = 0;
4608 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4609 res, argL, argR, size, True));
4610 return res;
4612 case Iop_RSqrtStep32Fx4: {
4613 HReg res = newVRegV(env);
4614 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4615 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4616 UInt size = 0;
4617 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4618 res, argL, argR, size, True));
4619 return res;
4622 // These 6 verified 18 Apr 2013
4623 case Iop_InterleaveEvenLanes8x16:
4624 case Iop_InterleaveOddLanes8x16:
4625 case Iop_InterleaveEvenLanes16x8:
4626 case Iop_InterleaveOddLanes16x8:
4627 case Iop_InterleaveEvenLanes32x4:
4628 case Iop_InterleaveOddLanes32x4: {
4629 HReg rD = newVRegV(env);
4630 HReg rM = newVRegV(env);
4631 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4632 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4633 UInt size;
4634 Bool resRd; // is the result in rD or rM ?
4635 switch (e->Iex.Binop.op) {
4636 case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break;
4637 case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break;
4638 case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break;
4639 case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break;
4640 case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break;
4641 case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break;
4642 default: vassert(0);
4644 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4645 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4646 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4647 return resRd ? rD : rM;
4650 // These 6 verified 18 Apr 2013
4651 case Iop_InterleaveHI8x16:
4652 case Iop_InterleaveLO8x16:
4653 case Iop_InterleaveHI16x8:
4654 case Iop_InterleaveLO16x8:
4655 case Iop_InterleaveHI32x4:
4656 case Iop_InterleaveLO32x4: {
4657 HReg rD = newVRegV(env);
4658 HReg rM = newVRegV(env);
4659 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4660 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4661 UInt size;
4662 Bool resRd; // is the result in rD or rM ?
4663 switch (e->Iex.Binop.op) {
4664 case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4665 case Iop_InterleaveLO8x16: resRd = True; size = 0; break;
4666 case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4667 case Iop_InterleaveLO16x8: resRd = True; size = 1; break;
4668 case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4669 case Iop_InterleaveLO32x4: resRd = True; size = 2; break;
4670 default: vassert(0);
4672 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4673 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4674 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4675 return resRd ? rD : rM;
4678 // These 6 verified 18 Apr 2013
4679 case Iop_CatOddLanes8x16:
4680 case Iop_CatEvenLanes8x16:
4681 case Iop_CatOddLanes16x8:
4682 case Iop_CatEvenLanes16x8:
4683 case Iop_CatOddLanes32x4:
4684 case Iop_CatEvenLanes32x4: {
4685 HReg rD = newVRegV(env);
4686 HReg rM = newVRegV(env);
4687 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4688 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4689 UInt size;
4690 Bool resRd; // is the result in rD or rM ?
4691 switch (e->Iex.Binop.op) {
4692 case Iop_CatOddLanes8x16: resRd = False; size = 0; break;
4693 case Iop_CatEvenLanes8x16: resRd = True; size = 0; break;
4694 case Iop_CatOddLanes16x8: resRd = False; size = 1; break;
4695 case Iop_CatEvenLanes16x8: resRd = True; size = 1; break;
4696 case Iop_CatOddLanes32x4: resRd = False; size = 2; break;
4697 case Iop_CatEvenLanes32x4: resRd = True; size = 2; break;
4698 default: vassert(0);
4700 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4701 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4702 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
4703 return resRd ? rD : rM;
4706 case Iop_QAdd8Ux16:
4707 case Iop_QAdd16Ux8:
4708 case Iop_QAdd32Ux4:
4709 case Iop_QAdd64Ux2: {
4710 HReg res = newVRegV(env);
4711 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4712 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4713 UInt size;
4714 switch (e->Iex.Binop.op) {
4715 case Iop_QAdd8Ux16: size = 0; break;
4716 case Iop_QAdd16Ux8: size = 1; break;
4717 case Iop_QAdd32Ux4: size = 2; break;
4718 case Iop_QAdd64Ux2: size = 3; break;
4719 default:
4720 ppIROp(e->Iex.Binop.op);
4721 vpanic("Illegal element size in VQADDU");
4723 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4724 res, argL, argR, size, True));
4725 return res;
4727 case Iop_QAdd8Sx16:
4728 case Iop_QAdd16Sx8:
4729 case Iop_QAdd32Sx4:
4730 case Iop_QAdd64Sx2: {
4731 HReg res = newVRegV(env);
4732 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4733 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4734 UInt size;
4735 switch (e->Iex.Binop.op) {
4736 case Iop_QAdd8Sx16: size = 0; break;
4737 case Iop_QAdd16Sx8: size = 1; break;
4738 case Iop_QAdd32Sx4: size = 2; break;
4739 case Iop_QAdd64Sx2: size = 3; break;
4740 default:
4741 ppIROp(e->Iex.Binop.op);
4742 vpanic("Illegal element size in VQADDS");
4744 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4745 res, argL, argR, size, True));
4746 return res;
4748 case Iop_Sub8x16:
4749 case Iop_Sub16x8:
4750 case Iop_Sub32x4:
4751 case Iop_Sub64x2: {
4752 HReg res = newVRegV(env);
4753 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4754 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4755 UInt size;
4756 switch (e->Iex.Binop.op) {
4757 case Iop_Sub8x16: size = 0; break;
4758 case Iop_Sub16x8: size = 1; break;
4759 case Iop_Sub32x4: size = 2; break;
4760 case Iop_Sub64x2: size = 3; break;
4761 default:
4762 ppIROp(e->Iex.Binop.op);
4763 vpanic("Illegal element size in VSUB");
4765 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4766 res, argL, argR, size, True));
4767 return res;
4769 case Iop_QSub8Ux16:
4770 case Iop_QSub16Ux8:
4771 case Iop_QSub32Ux4:
4772 case Iop_QSub64Ux2: {
4773 HReg res = newVRegV(env);
4774 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4775 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4776 UInt size;
4777 switch (e->Iex.Binop.op) {
4778 case Iop_QSub8Ux16: size = 0; break;
4779 case Iop_QSub16Ux8: size = 1; break;
4780 case Iop_QSub32Ux4: size = 2; break;
4781 case Iop_QSub64Ux2: size = 3; break;
4782 default:
4783 ppIROp(e->Iex.Binop.op);
4784 vpanic("Illegal element size in VQSUBU");
4786 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4787 res, argL, argR, size, True));
4788 return res;
4790 case Iop_QSub8Sx16:
4791 case Iop_QSub16Sx8:
4792 case Iop_QSub32Sx4:
4793 case Iop_QSub64Sx2: {
4794 HReg res = newVRegV(env);
4795 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4796 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4797 UInt size;
4798 switch (e->Iex.Binop.op) {
4799 case Iop_QSub8Sx16: size = 0; break;
4800 case Iop_QSub16Sx8: size = 1; break;
4801 case Iop_QSub32Sx4: size = 2; break;
4802 case Iop_QSub64Sx2: size = 3; break;
4803 default:
4804 ppIROp(e->Iex.Binop.op);
4805 vpanic("Illegal element size in VQSUBS");
4807 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4808 res, argL, argR, size, True));
4809 return res;
4811 case Iop_Max8Ux16:
4812 case Iop_Max16Ux8:
4813 case Iop_Max32Ux4: {
4814 HReg res = newVRegV(env);
4815 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4816 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4817 UInt size;
4818 switch (e->Iex.Binop.op) {
4819 case Iop_Max8Ux16: size = 0; break;
4820 case Iop_Max16Ux8: size = 1; break;
4821 case Iop_Max32Ux4: size = 2; break;
4822 default: vpanic("Illegal element size in VMAXU");
4824 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4825 res, argL, argR, size, True));
4826 return res;
4828 case Iop_Max8Sx16:
4829 case Iop_Max16Sx8:
4830 case Iop_Max32Sx4: {
4831 HReg res = newVRegV(env);
4832 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4833 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4834 UInt size;
4835 switch (e->Iex.Binop.op) {
4836 case Iop_Max8Sx16: size = 0; break;
4837 case Iop_Max16Sx8: size = 1; break;
4838 case Iop_Max32Sx4: size = 2; break;
4839 default: vpanic("Illegal element size in VMAXU");
4841 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4842 res, argL, argR, size, True));
4843 return res;
4845 case Iop_Min8Ux16:
4846 case Iop_Min16Ux8:
4847 case Iop_Min32Ux4: {
4848 HReg res = newVRegV(env);
4849 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4850 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4851 UInt size;
4852 switch (e->Iex.Binop.op) {
4853 case Iop_Min8Ux16: size = 0; break;
4854 case Iop_Min16Ux8: size = 1; break;
4855 case Iop_Min32Ux4: size = 2; break;
4856 default: vpanic("Illegal element size in VMAXU");
4858 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4859 res, argL, argR, size, True));
4860 return res;
4862 case Iop_Min8Sx16:
4863 case Iop_Min16Sx8:
4864 case Iop_Min32Sx4: {
4865 HReg res = newVRegV(env);
4866 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4867 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4868 UInt size;
4869 switch (e->Iex.Binop.op) {
4870 case Iop_Min8Sx16: size = 0; break;
4871 case Iop_Min16Sx8: size = 1; break;
4872 case Iop_Min32Sx4: size = 2; break;
4873 default: vpanic("Illegal element size in VMAXU");
4875 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4876 res, argL, argR, size, True));
4877 return res;
4879 case Iop_Sar8x16:
4880 case Iop_Sar16x8:
4881 case Iop_Sar32x4:
4882 case Iop_Sar64x2: {
4883 HReg res = newVRegV(env);
4884 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4885 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4886 HReg argR2 = newVRegV(env);
4887 HReg zero = newVRegV(env);
4888 UInt size;
4889 switch (e->Iex.Binop.op) {
4890 case Iop_Sar8x16: size = 0; break;
4891 case Iop_Sar16x8: size = 1; break;
4892 case Iop_Sar32x4: size = 2; break;
4893 case Iop_Sar64x2: size = 3; break;
4894 default: vassert(0);
4896 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4897 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4898 argR2, zero, argR, size, True));
4899 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4900 res, argL, argR2, size, True));
4901 return res;
4903 case Iop_Sal8x16:
4904 case Iop_Sal16x8:
4905 case Iop_Sal32x4:
4906 case Iop_Sal64x2: {
4907 HReg res = newVRegV(env);
4908 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4909 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4910 UInt size;
4911 switch (e->Iex.Binop.op) {
4912 case Iop_Sal8x16: size = 0; break;
4913 case Iop_Sal16x8: size = 1; break;
4914 case Iop_Sal32x4: size = 2; break;
4915 case Iop_Sal64x2: size = 3; break;
4916 default: vassert(0);
4918 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4919 res, argL, argR, size, True));
4920 return res;
4922 case Iop_Shr8x16:
4923 case Iop_Shr16x8:
4924 case Iop_Shr32x4:
4925 case Iop_Shr64x2: {
4926 HReg res = newVRegV(env);
4927 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4928 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4929 HReg argR2 = newVRegV(env);
4930 HReg zero = newVRegV(env);
4931 UInt size;
4932 switch (e->Iex.Binop.op) {
4933 case Iop_Shr8x16: size = 0; break;
4934 case Iop_Shr16x8: size = 1; break;
4935 case Iop_Shr32x4: size = 2; break;
4936 case Iop_Shr64x2: size = 3; break;
4937 default: vassert(0);
4939 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4940 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4941 argR2, zero, argR, size, True));
4942 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4943 res, argL, argR2, size, True));
4944 return res;
4946 case Iop_Shl8x16:
4947 case Iop_Shl16x8:
4948 case Iop_Shl32x4:
4949 case Iop_Shl64x2: {
4950 HReg res = newVRegV(env);
4951 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4952 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4953 UInt size;
4954 switch (e->Iex.Binop.op) {
4955 case Iop_Shl8x16: size = 0; break;
4956 case Iop_Shl16x8: size = 1; break;
4957 case Iop_Shl32x4: size = 2; break;
4958 case Iop_Shl64x2: size = 3; break;
4959 default: vassert(0);
4961 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4962 res, argL, argR, size, True));
4963 return res;
4965 case Iop_QShl8x16:
4966 case Iop_QShl16x8:
4967 case Iop_QShl32x4:
4968 case Iop_QShl64x2: {
4969 HReg res = newVRegV(env);
4970 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4971 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4972 UInt size;
4973 switch (e->Iex.Binop.op) {
4974 case Iop_QShl8x16: size = 0; break;
4975 case Iop_QShl16x8: size = 1; break;
4976 case Iop_QShl32x4: size = 2; break;
4977 case Iop_QShl64x2: size = 3; break;
4978 default: vassert(0);
4980 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4981 res, argL, argR, size, True));
4982 return res;
4984 case Iop_QSal8x16:
4985 case Iop_QSal16x8:
4986 case Iop_QSal32x4:
4987 case Iop_QSal64x2: {
4988 HReg res = newVRegV(env);
4989 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4990 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4991 UInt size;
4992 switch (e->Iex.Binop.op) {
4993 case Iop_QSal8x16: size = 0; break;
4994 case Iop_QSal16x8: size = 1; break;
4995 case Iop_QSal32x4: size = 2; break;
4996 case Iop_QSal64x2: size = 3; break;
4997 default: vassert(0);
4999 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
5000 res, argL, argR, size, True));
5001 return res;
5003 case Iop_QShlNsatUU8x16:
5004 case Iop_QShlNsatUU16x8:
5005 case Iop_QShlNsatUU32x4:
5006 case Iop_QShlNsatUU64x2: {
5007 HReg res = newVRegV(env);
5008 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5009 UInt size, imm;
5010 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5011 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5012 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
5013 "second argument only\n");
5015 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5016 switch (e->Iex.Binop.op) {
5017 case Iop_QShlNsatUU8x16: size = 8 | imm; break;
5018 case Iop_QShlNsatUU16x8: size = 16 | imm; break;
5019 case Iop_QShlNsatUU32x4: size = 32 | imm; break;
5020 case Iop_QShlNsatUU64x2: size = 64 | imm; break;
5021 default: vassert(0);
5023 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
5024 res, argL, size, True));
5025 return res;
5027 case Iop_QShlNsatSU8x16:
5028 case Iop_QShlNsatSU16x8:
5029 case Iop_QShlNsatSU32x4:
5030 case Iop_QShlNsatSU64x2: {
5031 HReg res = newVRegV(env);
5032 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5033 UInt size, imm;
5034 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5035 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5036 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
5037 "second argument only\n");
5039 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5040 switch (e->Iex.Binop.op) {
5041 case Iop_QShlNsatSU8x16: size = 8 | imm; break;
5042 case Iop_QShlNsatSU16x8: size = 16 | imm; break;
5043 case Iop_QShlNsatSU32x4: size = 32 | imm; break;
5044 case Iop_QShlNsatSU64x2: size = 64 | imm; break;
5045 default: vassert(0);
5047 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
5048 res, argL, size, True));
5049 return res;
5051 case Iop_QShlNsatSS8x16:
5052 case Iop_QShlNsatSS16x8:
5053 case Iop_QShlNsatSS32x4:
5054 case Iop_QShlNsatSS64x2: {
5055 HReg res = newVRegV(env);
5056 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5057 UInt size, imm;
5058 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5059 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5060 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
5061 "second argument only\n");
5063 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5064 switch (e->Iex.Binop.op) {
5065 case Iop_QShlNsatSS8x16: size = 8 | imm; break;
5066 case Iop_QShlNsatSS16x8: size = 16 | imm; break;
5067 case Iop_QShlNsatSS32x4: size = 32 | imm; break;
5068 case Iop_QShlNsatSS64x2: size = 64 | imm; break;
5069 default: vassert(0);
5071 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
5072 res, argL, size, True));
5073 return res;
5075 case Iop_ShrN8x16:
5076 case Iop_ShrN16x8:
5077 case Iop_ShrN32x4:
5078 case Iop_ShrN64x2: {
5079 HReg res = newVRegV(env);
5080 HReg tmp = newVRegV(env);
5081 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5082 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5083 HReg argR2 = newVRegI(env);
5084 UInt size;
5085 switch (e->Iex.Binop.op) {
5086 case Iop_ShrN8x16: size = 0; break;
5087 case Iop_ShrN16x8: size = 1; break;
5088 case Iop_ShrN32x4: size = 2; break;
5089 case Iop_ShrN64x2: size = 3; break;
5090 default: vassert(0);
5092 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5093 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
5094 tmp, argR2, 0, True));
5095 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5096 res, argL, tmp, size, True));
5097 return res;
5099 case Iop_ShlN8x16:
5100 case Iop_ShlN16x8:
5101 case Iop_ShlN32x4:
5102 case Iop_ShlN64x2: {
5103 HReg res = newVRegV(env);
5104 HReg tmp = newVRegV(env);
5105 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5106 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5107 UInt size;
5108 switch (e->Iex.Binop.op) {
5109 case Iop_ShlN8x16: size = 0; break;
5110 case Iop_ShlN16x8: size = 1; break;
5111 case Iop_ShlN32x4: size = 2; break;
5112 case Iop_ShlN64x2: size = 3; break;
5113 default: vassert(0);
5115 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
5116 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5117 res, argL, tmp, size, True));
5118 return res;
5120 case Iop_SarN8x16:
5121 case Iop_SarN16x8:
5122 case Iop_SarN32x4:
5123 case Iop_SarN64x2: {
5124 HReg res = newVRegV(env);
5125 HReg tmp = newVRegV(env);
5126 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5127 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5128 HReg argR2 = newVRegI(env);
5129 UInt size;
5130 switch (e->Iex.Binop.op) {
5131 case Iop_SarN8x16: size = 0; break;
5132 case Iop_SarN16x8: size = 1; break;
5133 case Iop_SarN32x4: size = 2; break;
5134 case Iop_SarN64x2: size = 3; break;
5135 default: vassert(0);
5137 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5138 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
5139 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5140 res, argL, tmp, size, True));
5141 return res;
5143 case Iop_CmpGT8Ux16:
5144 case Iop_CmpGT16Ux8:
5145 case Iop_CmpGT32Ux4: {
5146 HReg res = newVRegV(env);
5147 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5148 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5149 UInt size;
5150 switch (e->Iex.Binop.op) {
5151 case Iop_CmpGT8Ux16: size = 0; break;
5152 case Iop_CmpGT16Ux8: size = 1; break;
5153 case Iop_CmpGT32Ux4: size = 2; break;
5154 default: vassert(0);
5156 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5157 res, argL, argR, size, True));
5158 return res;
5160 case Iop_CmpGT8Sx16:
5161 case Iop_CmpGT16Sx8:
5162 case Iop_CmpGT32Sx4: {
5163 HReg res = newVRegV(env);
5164 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5165 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5166 UInt size;
5167 switch (e->Iex.Binop.op) {
5168 case Iop_CmpGT8Sx16: size = 0; break;
5169 case Iop_CmpGT16Sx8: size = 1; break;
5170 case Iop_CmpGT32Sx4: size = 2; break;
5171 default: vassert(0);
5173 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5174 res, argL, argR, size, True));
5175 return res;
5177 case Iop_CmpEQ8x16:
5178 case Iop_CmpEQ16x8:
5179 case Iop_CmpEQ32x4: {
5180 HReg res = newVRegV(env);
5181 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5182 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5183 UInt size;
5184 switch (e->Iex.Binop.op) {
5185 case Iop_CmpEQ8x16: size = 0; break;
5186 case Iop_CmpEQ16x8: size = 1; break;
5187 case Iop_CmpEQ32x4: size = 2; break;
5188 default: vassert(0);
5190 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5191 res, argL, argR, size, True));
5192 return res;
5194 case Iop_Mul8x16:
5195 case Iop_Mul16x8:
5196 case Iop_Mul32x4: {
5197 HReg res = newVRegV(env);
5198 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5199 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5200 UInt size = 0;
5201 switch(e->Iex.Binop.op) {
5202 case Iop_Mul8x16: size = 0; break;
5203 case Iop_Mul16x8: size = 1; break;
5204 case Iop_Mul32x4: size = 2; break;
5205 default: vassert(0);
5207 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5208 res, argL, argR, size, True));
5209 return res;
5211 case Iop_Mull8Ux8:
5212 case Iop_Mull16Ux4:
5213 case Iop_Mull32Ux2: {
5214 HReg res = newVRegV(env);
5215 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5216 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5217 UInt size = 0;
5218 switch(e->Iex.Binop.op) {
5219 case Iop_Mull8Ux8: size = 0; break;
5220 case Iop_Mull16Ux4: size = 1; break;
5221 case Iop_Mull32Ux2: size = 2; break;
5222 default: vassert(0);
5224 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5225 res, argL, argR, size, True));
5226 return res;
5229 case Iop_Mull8Sx8:
5230 case Iop_Mull16Sx4:
5231 case Iop_Mull32Sx2: {
5232 HReg res = newVRegV(env);
5233 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5234 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5235 UInt size = 0;
5236 switch(e->Iex.Binop.op) {
5237 case Iop_Mull8Sx8: size = 0; break;
5238 case Iop_Mull16Sx4: size = 1; break;
5239 case Iop_Mull32Sx2: size = 2; break;
5240 default: vassert(0);
5242 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5243 res, argL, argR, size, True));
5244 return res;
5247 case Iop_QDMulHi16Sx8:
5248 case Iop_QDMulHi32Sx4: {
5249 HReg res = newVRegV(env);
5250 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5251 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5252 UInt size = 0;
5253 switch(e->Iex.Binop.op) {
5254 case Iop_QDMulHi16Sx8: size = 1; break;
5255 case Iop_QDMulHi32Sx4: size = 2; break;
5256 default: vassert(0);
5258 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5259 res, argL, argR, size, True));
5260 return res;
5263 case Iop_QRDMulHi16Sx8:
5264 case Iop_QRDMulHi32Sx4: {
5265 HReg res = newVRegV(env);
5266 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5267 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5268 UInt size = 0;
5269 switch(e->Iex.Binop.op) {
5270 case Iop_QRDMulHi16Sx8: size = 1; break;
5271 case Iop_QRDMulHi32Sx4: size = 2; break;
5272 default: vassert(0);
5274 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5275 res, argL, argR, size, True));
5276 return res;
5279 case Iop_QDMull16Sx4:
5280 case Iop_QDMull32Sx2: {
5281 HReg res = newVRegV(env);
5282 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5283 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5284 UInt size = 0;
5285 switch(e->Iex.Binop.op) {
5286 case Iop_QDMull16Sx4: size = 1; break;
5287 case Iop_QDMull32Sx2: size = 2; break;
5288 default: vassert(0);
5290 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5291 res, argL, argR, size, True));
5292 return res;
5294 case Iop_PolynomialMul8x16: {
5295 HReg res = newVRegV(env);
5296 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5297 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5298 UInt size = 0;
5299 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5300 res, argL, argR, size, True));
5301 return res;
5303 case Iop_Max32Fx4: {
5304 HReg res = newVRegV(env);
5305 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5306 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5307 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5308 res, argL, argR, 2, True));
5309 return res;
5311 case Iop_Min32Fx4: {
5312 HReg res = newVRegV(env);
5313 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5314 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5315 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5316 res, argL, argR, 2, True));
5317 return res;
5319 case Iop_PwMax32Fx4: {
5320 HReg res = newVRegV(env);
5321 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5322 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5323 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5324 res, argL, argR, 2, True));
5325 return res;
5327 case Iop_PwMin32Fx4: {
5328 HReg res = newVRegV(env);
5329 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5330 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5331 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5332 res, argL, argR, 2, True));
5333 return res;
5335 case Iop_CmpGT32Fx4: {
5336 HReg res = newVRegV(env);
5337 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5338 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5339 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5340 res, argL, argR, 2, True));
5341 return res;
5343 case Iop_CmpGE32Fx4: {
5344 HReg res = newVRegV(env);
5345 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5346 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5347 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5348 res, argL, argR, 2, True));
5349 return res;
5351 case Iop_CmpEQ32Fx4: {
5352 HReg res = newVRegV(env);
5353 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5354 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5355 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5356 res, argL, argR, 2, True));
5357 return res;
5360 case Iop_PolynomialMull8x8: {
5361 HReg res = newVRegV(env);
5362 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5363 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5364 UInt size = 0;
5365 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5366 res, argL, argR, size, True));
5367 return res;
5369 case Iop_F32ToFixed32Ux4_RZ:
5370 case Iop_F32ToFixed32Sx4_RZ:
5371 case Iop_Fixed32UToF32x4_RN:
5372 case Iop_Fixed32SToF32x4_RN: {
5373 HReg res = newVRegV(env);
5374 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5375 ARMNeonUnOp op;
5376 UInt imm6;
5377 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5378 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5379 vpanic("ARM supports FP <-> Fixed conversion with constant "
5380 "second argument less than 33 only\n");
5382 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5383 vassert(imm6 <= 32 && imm6 > 0);
5384 imm6 = 64 - imm6;
5385 switch(e->Iex.Binop.op) {
5386 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5387 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5388 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5389 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5390 default: vassert(0);
5392 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5393 return res;
5396 FIXME remove if not used
5397 case Iop_VDup8x16:
5398 case Iop_VDup16x8:
5399 case Iop_VDup32x4: {
5400 HReg res = newVRegV(env);
5401 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5402 UInt imm4;
5403 UInt index;
5404 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5405 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5406 vpanic("ARM supports Iop_VDup with constant "
5407 "second argument less than 16 only\n");
5409 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5410 switch(e->Iex.Binop.op) {
5411 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5412 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5413 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5414 default: vassert(0);
5416 if (imm4 >= 16) {
5417 vpanic("ARM supports Iop_VDup with constant "
5418 "second argument less than 16 only\n");
5420 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5421 res, argL, imm4, True));
5422 return res;
5425 case Iop_PwAdd8x16:
5426 case Iop_PwAdd16x8:
5427 case Iop_PwAdd32x4: {
5428 HReg res = newVRegV(env);
5429 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5430 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5431 UInt size = 0;
5432 switch(e->Iex.Binop.op) {
5433 case Iop_PwAdd8x16: size = 0; break;
5434 case Iop_PwAdd16x8: size = 1; break;
5435 case Iop_PwAdd32x4: size = 2; break;
5436 default: vassert(0);
5438 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5439 res, argL, argR, size, True));
5440 return res;
5442 /* ... */
5443 default:
5444 break;
5448 if (e->tag == Iex_Triop) {
5449 IRTriop *triop = e->Iex.Triop.details;
5451 switch (triop->op) {
5452 case Iop_SliceV128: {
5453 HReg res = newVRegV(env);
5454 HReg argL = iselNeonExpr(env, triop->arg2);
5455 HReg argR = iselNeonExpr(env, triop->arg1);
5456 UInt imm4;
5457 if (triop->arg3->tag != Iex_Const ||
5458 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5459 vpanic("ARM target supports Iop_ExtractV128 with constant "
5460 "third argument less than 16 only\n");
5462 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5463 if (imm4 >= 16) {
5464 vpanic("ARM target supports Iop_ExtractV128 with constant "
5465 "third argument less than 16 only\n");
5467 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5468 res, argL, argR, imm4, True));
5469 return res;
5471 case Iop_Mul32Fx4:
5472 case Iop_Sub32Fx4:
5473 case Iop_Add32Fx4: {
5474 HReg res = newVRegV(env);
5475 HReg argL = iselNeonExpr(env, triop->arg2);
5476 HReg argR = iselNeonExpr(env, triop->arg3);
5477 UInt size = 0;
5478 ARMNeonBinOp op = ARMneon_INVALID;
5479 switch (triop->op) {
5480 case Iop_Mul32Fx4: op = ARMneon_VMULFP; break;
5481 case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break;
5482 case Iop_Add32Fx4: op = ARMneon_VADDFP; break;
5483 default: vassert(0);
5485 addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True));
5486 return res;
5488 default:
5489 break;
5493 if (e->tag == Iex_ITE) { // VFD
5494 ARMCondCode cc;
5495 HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue);
5496 HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse);
5497 HReg dst = newVRegV(env);
5498 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5499 cc = iselCondCode(env, e->Iex.ITE.cond);
5500 addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5501 return dst;
5504 /* neon_expr_bad: */
5505 ppIRExpr(e);
5506 vpanic("iselNeonExpr_wrk");
5509 /*---------------------------------------------------------*/
5510 /*--- ISEL: Floating point expressions (64 bit) ---*/
5511 /*---------------------------------------------------------*/
5513 /* Compute a 64-bit floating point value into a register, the identity
5514 of which is returned. As with iselIntExpr_R, the reg may be either
5515 real or virtual; in any case it must not be changed by subsequent
5516 code emitted by the caller. */
5518 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5520 HReg r = iselDblExpr_wrk( env, e );
5521 # if 0
5522 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5523 # endif
5524 vassert(hregClass(r) == HRcFlt64);
5525 vassert(hregIsVirtual(r));
5526 return r;
5529 /* DO NOT CALL THIS DIRECTLY */
5530 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5532 IRType ty = typeOfIRExpr(env->type_env,e);
5533 vassert(e);
5534 vassert(ty == Ity_F64);
5536 if (e->tag == Iex_RdTmp) {
5537 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5540 if (e->tag == Iex_Const) {
5541 /* Just handle the zero case. */
5542 IRConst* con = e->Iex.Const.con;
5543 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5544 HReg z32 = newVRegI(env);
5545 HReg dst = newVRegD(env);
5546 addInstr(env, ARMInstr_Imm32(z32, 0));
5547 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5548 return dst;
5552 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5553 ARMAModeV* am;
5554 HReg res = newVRegD(env);
5555 vassert(e->Iex.Load.ty == Ity_F64);
5556 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5557 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5558 return res;
5561 if (e->tag == Iex_Get) {
5562 // XXX This won't work if offset > 1020 or is not 0 % 4.
5563 // In which case we'll have to generate more longwinded code.
5564 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5565 HReg res = newVRegD(env);
5566 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5567 return res;
5570 if (e->tag == Iex_Unop) {
5571 switch (e->Iex.Unop.op) {
5572 case Iop_ReinterpI64asF64: {
5573 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5574 return iselNeon64Expr(env, e->Iex.Unop.arg);
5575 } else {
5576 HReg srcHi, srcLo;
5577 HReg dst = newVRegD(env);
5578 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5579 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5580 return dst;
5583 case Iop_NegF64: {
5584 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5585 HReg dst = newVRegD(env);
5586 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5587 return dst;
5589 case Iop_AbsF64: {
5590 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5591 HReg dst = newVRegD(env);
5592 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5593 return dst;
5595 case Iop_F32toF64: {
5596 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5597 HReg dst = newVRegD(env);
5598 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5599 return dst;
5601 case Iop_I32UtoF64:
5602 case Iop_I32StoF64: {
5603 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5604 HReg f32 = newVRegF(env);
5605 HReg dst = newVRegD(env);
5606 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5607 /* VMOV f32, src */
5608 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5609 /* FSITOD dst, f32 */
5610 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5611 dst, f32));
5612 return dst;
5614 default:
5615 break;
5619 if (e->tag == Iex_Binop) {
5620 switch (e->Iex.Binop.op) {
5621 case Iop_SqrtF64: {
5622 /* first arg is rounding mode; we ignore it. */
5623 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5624 HReg dst = newVRegD(env);
5625 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5626 return dst;
5628 case Iop_RoundF64toInt: {
5629 /* We can only generate this on a >= V8 capable target. But
5630 that's OK since we should only be asked to generate for V8
5631 capable guests, and we assume here that host == guest. */
5632 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5633 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5634 HReg dst = newVRegD(env);
5635 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5636 addInstr(env, ARMInstr_VRIntR(True/*isF64*/, dst, src));
5637 set_VFP_rounding_default(env);
5638 return dst;
5640 /* not a V8 target, so we can't select insns for this. */
5641 break;
5643 case Iop_MaxNumF64:
5644 case Iop_MinNumF64: {
5645 /* Same comments regarding V8 support as for Iop_RoundF64toInt. */
5646 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5647 HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
5648 HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
5649 HReg dst = newVRegD(env);
5650 Bool isMax = e->Iex.Binop.op == Iop_MaxNumF64;
5651 addInstr(env, ARMInstr_VMinMaxNum(
5652 True/*isF64*/, isMax, dst, srcL, srcR));
5653 return dst;
5655 /* not a V8 target, so we can't select insns for this. */
5656 break;
5658 default:
5659 break;
5663 if (e->tag == Iex_Triop) {
5664 IRTriop *triop = e->Iex.Triop.details;
5666 switch (triop->op) {
5667 case Iop_DivF64:
5668 case Iop_MulF64:
5669 case Iop_AddF64:
5670 case Iop_SubF64: {
5671 ARMVfpOp op = 0; /*INVALID*/
5672 HReg argL = iselDblExpr(env, triop->arg2);
5673 HReg argR = iselDblExpr(env, triop->arg3);
5674 HReg dst = newVRegD(env);
5675 switch (triop->op) {
5676 case Iop_DivF64: op = ARMvfp_DIV; break;
5677 case Iop_MulF64: op = ARMvfp_MUL; break;
5678 case Iop_AddF64: op = ARMvfp_ADD; break;
5679 case Iop_SubF64: op = ARMvfp_SUB; break;
5680 default: vassert(0);
5682 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5683 return dst;
5685 default:
5686 break;
5690 if (e->tag == Iex_ITE) { // VFD
5691 if (ty == Ity_F64
5692 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5693 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
5694 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
5695 HReg dst = newVRegD(env);
5696 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
5697 ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
5698 addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
5699 return dst;
5703 ppIRExpr(e);
5704 vpanic("iselDblExpr_wrk");
5708 /*---------------------------------------------------------*/
5709 /*--- ISEL: Floating point expressions (32 bit) ---*/
5710 /*---------------------------------------------------------*/
5712 /* Compute a 32-bit floating point value into a register, the identity
5713 of which is returned. As with iselIntExpr_R, the reg may be either
5714 real or virtual; in any case it must not be changed by subsequent
5715 code emitted by the caller. */
5717 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5719 HReg r = iselFltExpr_wrk( env, e );
5720 # if 0
5721 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5722 # endif
5723 vassert(hregClass(r) == HRcFlt32);
5724 vassert(hregIsVirtual(r));
5725 return r;
5728 /* DO NOT CALL THIS DIRECTLY */
5729 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5731 IRType ty = typeOfIRExpr(env->type_env,e);
5732 vassert(e);
5733 vassert(ty == Ity_F32);
5735 if (e->tag == Iex_RdTmp) {
5736 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5739 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5740 ARMAModeV* am;
5741 HReg res = newVRegF(env);
5742 vassert(e->Iex.Load.ty == Ity_F32);
5743 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5744 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5745 return res;
5748 if (e->tag == Iex_Get) {
5749 // XXX This won't work if offset > 1020 or is not 0 % 4.
5750 // In which case we'll have to generate more longwinded code.
5751 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5752 HReg res = newVRegF(env);
5753 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5754 return res;
5757 if (e->tag == Iex_Unop) {
5758 switch (e->Iex.Unop.op) {
5759 case Iop_ReinterpI32asF32: {
5760 HReg dst = newVRegF(env);
5761 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5762 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5763 return dst;
5765 case Iop_NegF32: {
5766 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5767 HReg dst = newVRegF(env);
5768 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5769 return dst;
5771 case Iop_AbsF32: {
5772 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5773 HReg dst = newVRegF(env);
5774 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5775 return dst;
5777 default:
5778 break;
5782 if (e->tag == Iex_Binop) {
5783 switch (e->Iex.Binop.op) {
5784 case Iop_SqrtF32: {
5785 /* first arg is rounding mode; we ignore it. */
5786 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5787 HReg dst = newVRegF(env);
5788 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5789 return dst;
5791 case Iop_F64toF32: {
5792 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5793 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5794 HReg valS = newVRegF(env);
5795 /* FCVTSD valS, valD */
5796 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5797 set_VFP_rounding_default(env);
5798 return valS;
5800 case Iop_RoundF32toInt: {
5801 /* We can only generate this on a >= V8 capable target. But
5802 that's OK since we should only be asked to generate for V8
5803 capable guests, and we assume here that host == guest. */
5804 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5805 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5806 HReg dst = newVRegF(env);
5807 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5808 addInstr(env, ARMInstr_VRIntR(False/*!isF64*/, dst, src));
5809 set_VFP_rounding_default(env);
5810 return dst;
5812 /* not a V8 target, so we can't select insns for this. */
5813 break;
5815 case Iop_MaxNumF32:
5816 case Iop_MinNumF32: {
5817 /* Same comments regarding V8 support as for Iop_RoundF32toInt. */
5818 if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5819 HReg srcL = iselFltExpr(env, e->Iex.Binop.arg1);
5820 HReg srcR = iselFltExpr(env, e->Iex.Binop.arg2);
5821 HReg dst = newVRegF(env);
5822 Bool isMax = e->Iex.Binop.op == Iop_MaxNumF32;
5823 addInstr(env, ARMInstr_VMinMaxNum(
5824 False/*!isF64*/, isMax, dst, srcL, srcR));
5825 return dst;
5827 /* not a V8 target, so we can't select insns for this. */
5828 break;
5830 default:
5831 break;
5835 if (e->tag == Iex_Triop) {
5836 IRTriop *triop = e->Iex.Triop.details;
5838 switch (triop->op) {
5839 case Iop_DivF32:
5840 case Iop_MulF32:
5841 case Iop_AddF32:
5842 case Iop_SubF32: {
5843 ARMVfpOp op = 0; /*INVALID*/
5844 HReg argL = iselFltExpr(env, triop->arg2);
5845 HReg argR = iselFltExpr(env, triop->arg3);
5846 HReg dst = newVRegF(env);
5847 switch (triop->op) {
5848 case Iop_DivF32: op = ARMvfp_DIV; break;
5849 case Iop_MulF32: op = ARMvfp_MUL; break;
5850 case Iop_AddF32: op = ARMvfp_ADD; break;
5851 case Iop_SubF32: op = ARMvfp_SUB; break;
5852 default: vassert(0);
5854 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5855 return dst;
5857 default:
5858 break;
5862 if (e->tag == Iex_ITE) { // VFD
5863 if (ty == Ity_F32
5864 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5865 ARMCondCode cc;
5866 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
5867 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
5868 HReg dst = newVRegF(env);
5869 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
5870 cc = iselCondCode(env, e->Iex.ITE.cond);
5871 addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
5872 return dst;
5876 ppIRExpr(e);
5877 vpanic("iselFltExpr_wrk");
5881 /*---------------------------------------------------------*/
5882 /*--- ISEL: Statements ---*/
5883 /*---------------------------------------------------------*/
5885 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5887 if (vex_traceflags & VEX_TRACE_VCODE) {
5888 vex_printf("\n-- ");
5889 ppIRStmt(stmt);
5890 vex_printf("\n");
5892 switch (stmt->tag) {
5894 /* --------- STORE --------- */
5895 /* little-endian write to memory */
5896 case Ist_Store: {
5897 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5898 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5899 IREndness end = stmt->Ist.Store.end;
5901 if (tya != Ity_I32 || end != Iend_LE)
5902 goto stmt_fail;
5904 if (tyd == Ity_I32) {
5905 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5906 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5907 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5908 return;
5910 if (tyd == Ity_I16) {
5911 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5912 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5913 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
5914 False/*!isLoad*/,
5915 False/*!isSignedLoad*/, rD, am));
5916 return;
5918 if (tyd == Ity_I8) {
5919 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5920 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5921 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
5922 return;
5924 if (tyd == Ity_I64) {
5925 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5926 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5927 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5928 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5929 } else {
5930 HReg rDhi, rDlo, rA;
5931 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5932 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5933 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
5934 ARMAMode1_RI(rA,4)));
5935 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
5936 ARMAMode1_RI(rA,0)));
5938 return;
5940 if (tyd == Ity_F64) {
5941 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5942 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5943 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5944 return;
5946 if (tyd == Ity_F32) {
5947 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5948 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5949 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5950 return;
5952 if (tyd == Ity_V128) {
5953 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5954 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5955 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5956 return;
5959 break;
5962 /* --------- CONDITIONAL STORE --------- */
5963 /* conditional little-endian write to memory */
5964 case Ist_StoreG: {
5965 IRStoreG* sg = stmt->Ist.StoreG.details;
5966 IRType tya = typeOfIRExpr(env->type_env, sg->addr);
5967 IRType tyd = typeOfIRExpr(env->type_env, sg->data);
5968 IREndness end = sg->end;
5970 if (tya != Ity_I32 || end != Iend_LE)
5971 goto stmt_fail;
5973 switch (tyd) {
5974 case Ity_I8:
5975 case Ity_I32: {
5976 HReg rD = iselIntExpr_R(env, sg->data);
5977 ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
5978 ARMCondCode cc = iselCondCode(env, sg->guard);
5979 addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
5980 (cc, False/*!isLoad*/, rD, am));
5981 return;
5983 case Ity_I16: {
5984 HReg rD = iselIntExpr_R(env, sg->data);
5985 ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
5986 ARMCondCode cc = iselCondCode(env, sg->guard);
5987 addInstr(env, ARMInstr_LdSt16(cc,
5988 False/*!isLoad*/,
5989 False/*!isSignedLoad*/, rD, am));
5990 return;
5992 default:
5993 break;
5995 break;
5998 /* --------- CONDITIONAL LOAD --------- */
5999 /* conditional little-endian load from memory */
6000 case Ist_LoadG: {
6001 IRLoadG* lg = stmt->Ist.LoadG.details;
6002 IRType tya = typeOfIRExpr(env->type_env, lg->addr);
6003 IREndness end = lg->end;
6005 if (tya != Ity_I32 || end != Iend_LE)
6006 goto stmt_fail;
6008 switch (lg->cvt) {
6009 case ILGop_8Uto32:
6010 case ILGop_Ident32: {
6011 HReg rAlt = iselIntExpr_R(env, lg->alt);
6012 ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
6013 HReg rD = lookupIRTemp(env, lg->dst);
6014 addInstr(env, mk_iMOVds_RR(rD, rAlt));
6015 ARMCondCode cc = iselCondCode(env, lg->guard);
6016 addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
6017 : ARMInstr_LdSt8U)
6018 (cc, True/*isLoad*/, rD, am));
6019 return;
6021 case ILGop_16Sto32:
6022 case ILGop_16Uto32:
6023 case ILGop_8Sto32: {
6024 HReg rAlt = iselIntExpr_R(env, lg->alt);
6025 ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
6026 HReg rD = lookupIRTemp(env, lg->dst);
6027 addInstr(env, mk_iMOVds_RR(rD, rAlt));
6028 ARMCondCode cc = iselCondCode(env, lg->guard);
6029 if (lg->cvt == ILGop_8Sto32) {
6030 addInstr(env, ARMInstr_Ld8S(cc, rD, am));
6031 } else {
6032 vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
6033 Bool sx = lg->cvt == ILGop_16Sto32;
6034 addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
6036 return;
6038 default:
6039 break;
6041 break;
6044 /* --------- PUT --------- */
6045 /* write guest state, fixed offset */
6046 case Ist_Put: {
6047 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6049 if (tyd == Ity_I32) {
6050 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6051 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
6052 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
6053 return;
6055 if (tyd == Ity_I64) {
6056 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6057 HReg addr = newVRegI(env);
6058 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
6059 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6060 stmt->Ist.Put.offset));
6061 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
6062 } else {
6063 HReg rDhi, rDlo;
6064 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
6065 stmt->Ist.Put.offset + 0);
6066 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
6067 stmt->Ist.Put.offset + 4);
6068 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
6069 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6070 rDhi, am4));
6071 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6072 rDlo, am0));
6074 return;
6076 if (tyd == Ity_F64) {
6077 // XXX This won't work if offset > 1020 or is not 0 % 4.
6078 // In which case we'll have to generate more longwinded code.
6079 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6080 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
6081 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
6082 return;
6084 if (tyd == Ity_F32) {
6085 // XXX This won't work if offset > 1020 or is not 0 % 4.
6086 // In which case we'll have to generate more longwinded code.
6087 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6088 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
6089 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
6090 return;
6092 if (tyd == Ity_V128) {
6093 HReg addr = newVRegI(env);
6094 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
6095 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6096 stmt->Ist.Put.offset));
6097 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
6098 return;
6100 break;
6103 /* --------- TMP --------- */
6104 /* assign value to temporary */
6105 case Ist_WrTmp: {
6106 IRTemp tmp = stmt->Ist.WrTmp.tmp;
6107 IRType ty = typeOfIRTemp(env->type_env, tmp);
6109 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6110 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
6111 env, stmt->Ist.WrTmp.data);
6112 HReg dst = lookupIRTemp(env, tmp);
6113 addInstr(env, ARMInstr_Mov(dst,ri84));
6114 return;
6116 if (ty == Ity_I1) {
6117 /* Here, we are generating a I1 value into a 32 bit register.
6118 Make sure the value in the register is only zero or one,
6119 but no other. This allows optimisation of the
6120 1Uto32(tmp:I1) case, by making it simply a copy of the
6121 register holding 'tmp'. The point being that the value in
6122 the register holding 'tmp' can only have been created
6123 here. */
6124 HReg dst = lookupIRTemp(env, tmp);
6125 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
6126 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
6127 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
6128 return;
6130 if (ty == Ity_I64) {
6131 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6132 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
6133 HReg dst = lookupIRTemp(env, tmp);
6134 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
6135 } else {
6136 HReg rHi, rLo, dstHi, dstLo;
6137 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
6138 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
6139 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
6140 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
6142 return;
6144 if (ty == Ity_F64) {
6145 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
6146 HReg dst = lookupIRTemp(env, tmp);
6147 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
6148 return;
6150 if (ty == Ity_F32) {
6151 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
6152 HReg dst = lookupIRTemp(env, tmp);
6153 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
6154 return;
6156 if (ty == Ity_V128) {
6157 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
6158 HReg dst = lookupIRTemp(env, tmp);
6159 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
6160 return;
6162 break;
6165 /* --------- Call to DIRTY helper --------- */
6166 /* call complex ("dirty") helper function */
6167 case Ist_Dirty: {
6168 IRDirty* d = stmt->Ist.Dirty.details;
6170 /* Figure out the return type, if any. */
6171 IRType retty = Ity_INVALID;
6172 if (d->tmp != IRTemp_INVALID)
6173 retty = typeOfIRTemp(env->type_env, d->tmp);
6175 Bool retty_ok = False;
6176 switch (retty) {
6177 case Ity_INVALID: /* function doesn't return anything */
6178 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6179 case Ity_V128:
6180 retty_ok = True; break;
6181 default:
6182 break;
6184 if (!retty_ok)
6185 break; /* will go to stmt_fail: */
6187 /* Marshal args, do the call, and set the return value to 0x555..555
6188 if this is a conditional call that returns a value and the
6189 call is skipped. */
6190 UInt addToSp = 0;
6191 RetLoc rloc = mk_RetLoc_INVALID();
6192 Bool ok = doHelperCall( &addToSp, &rloc, env,
6193 d->guard, d->cee, retty, d->args );
6194 if (!ok) goto stmt_fail;
6195 vassert(is_sane_RetLoc(rloc));
6197 /* Now figure out what to do with the returned value, if any. */
6198 switch (retty) {
6199 case Ity_INVALID: {
6200 /* No return value. Nothing to do. */
6201 vassert(d->tmp == IRTemp_INVALID);
6202 vassert(rloc.pri == RLPri_None);
6203 vassert(addToSp == 0);
6204 return;
6206 case Ity_I64: {
6207 vassert(rloc.pri == RLPri_2Int);
6208 vassert(addToSp == 0);
6209 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6210 HReg tmp = lookupIRTemp(env, d->tmp);
6211 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
6212 hregARM_R0()));
6213 } else {
6214 HReg dstHi, dstLo;
6215 /* The returned value is in r1:r0. Park it in the
6216 register-pair associated with tmp. */
6217 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
6218 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
6219 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
6221 return;
6223 case Ity_I32: case Ity_I16: case Ity_I8: {
6224 vassert(rloc.pri == RLPri_Int);
6225 vassert(addToSp == 0);
6226 /* The returned value is in r0. Park it in the register
6227 associated with tmp. */
6228 HReg dst = lookupIRTemp(env, d->tmp);
6229 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
6230 return;
6232 case Ity_V128: {
6233 /* The returned value is on the stack, and *retloc tells
6234 us where. Fish it off the stack and then move the
6235 stack pointer upwards to clear it, as directed by
6236 doHelperCall. */
6237 vassert(rloc.pri == RLPri_V128SpRel);
6238 vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
6239 vassert(addToSp >= 16);
6240 vassert(addToSp <= 256);
6241 /* Both the stack delta and the offset must be at least 8-aligned.
6242 If that isn't so, doHelperCall() has generated bad code. */
6243 vassert(0 == (rloc.spOff % 8));
6244 vassert(0 == (addToSp % 8));
6245 HReg dst = lookupIRTemp(env, d->tmp);
6246 HReg tmp = newVRegI(env);
6247 HReg sp = hregARM_R13();
6248 addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6249 tmp, sp, ARMRI84_I84(rloc.spOff,0)));
6250 ARMAModeN* am = mkARMAModeN_R(tmp);
6251 /* This load could be done with its effective address 0 % 8,
6252 because that's the best stack alignment that we can be
6253 assured of. */
6254 addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
6256 ARMRI84* spAdj
6257 = addToSp == 256 ? ARMRI84_I84(64, 15) // 64 `ror` (15 * 2)
6258 : ARMRI84_I84(addToSp, 0);
6259 addInstr(env, ARMInstr_Alu(ARMalu_ADD, sp, sp, spAdj));
6260 return;
6262 default:
6263 /*NOTREACHED*/
6264 vassert(0);
6266 break;
6269 /* --------- Load Linked and Store Conditional --------- */
6270 case Ist_LLSC: {
6271 if (stmt->Ist.LLSC.storedata == NULL) {
6272 /* LL */
6273 IRTemp res = stmt->Ist.LLSC.result;
6274 IRType ty = typeOfIRTemp(env->type_env, res);
6275 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6276 Int szB = 0;
6277 HReg r_dst = lookupIRTemp(env, res);
6278 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6279 switch (ty) {
6280 case Ity_I8: szB = 1; break;
6281 case Ity_I16: szB = 2; break;
6282 case Ity_I32: szB = 4; break;
6283 default: vassert(0);
6285 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6286 addInstr(env, ARMInstr_LdrEX(szB));
6287 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6288 return;
6290 if (ty == Ity_I64) {
6291 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6292 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6293 addInstr(env, ARMInstr_LdrEX(8));
6294 /* Result is in r3:r2. On a non-NEON capable CPU, we must
6295 move it into a result register pair. On a NEON capable
6296 CPU, the result register will be a 64 bit NEON
6297 register, so we must move it there instead. */
6298 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6299 HReg dst = lookupIRTemp(env, res);
6300 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6301 hregARM_R2()));
6302 } else {
6303 HReg r_dst_hi, r_dst_lo;
6304 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6305 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6306 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6308 return;
6310 /*NOTREACHED*/
6311 vassert(0);
6312 } else {
6313 /* SC */
6314 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6315 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6316 Int szB = 0;
6317 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6318 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6319 switch (tyd) {
6320 case Ity_I8: szB = 1; break;
6321 case Ity_I16: szB = 2; break;
6322 case Ity_I32: szB = 4; break;
6323 default: vassert(0);
6325 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6326 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6327 addInstr(env, ARMInstr_StrEX(szB));
6328 } else {
6329 vassert(tyd == Ity_I64);
6330 /* This is really ugly. There is no is/is-not NEON
6331 decision akin to the case for LL, because iselInt64Expr
6332 fudges this for us, and always gets the result into two
6333 GPRs even if this means moving it from a NEON
6334 register. */
6335 HReg rDhi, rDlo;
6336 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6337 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6338 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6339 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6340 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6341 addInstr(env, ARMInstr_StrEX(8));
6343 /* now r0 is 1 if failed, 0 if success. Change to IR
6344 conventions (0 is fail, 1 is success). Also transfer
6345 result to r_res. */
6346 IRTemp res = stmt->Ist.LLSC.result;
6347 IRType ty = typeOfIRTemp(env->type_env, res);
6348 HReg r_res = lookupIRTemp(env, res);
6349 ARMRI84* one = ARMRI84_I84(1,0);
6350 vassert(ty == Ity_I1);
6351 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6352 /* And be conservative -- mask off all but the lowest bit */
6353 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6354 return;
6356 break;
6359 /* --------- MEM FENCE --------- */
6360 case Ist_MBE:
6361 switch (stmt->Ist.MBE.event) {
6362 case Imbe_Fence:
6363 addInstr(env, ARMInstr_MFence());
6364 return;
6365 case Imbe_CancelReservation:
6366 addInstr(env, ARMInstr_CLREX());
6367 return;
6368 default:
6369 break;
6371 break;
6373 /* --------- INSTR MARK --------- */
6374 /* Doesn't generate any executable code ... */
6375 case Ist_IMark:
6376 return;
6378 /* --------- NO-OP --------- */
6379 case Ist_NoOp:
6380 return;
6382 /* --------- EXIT --------- */
6383 case Ist_Exit: {
6384 if (stmt->Ist.Exit.dst->tag != Ico_U32)
6385 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6387 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
6388 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
6389 stmt->Ist.Exit.offsIP);
6391 /* Case: boring transfer to known address */
6392 if (stmt->Ist.Exit.jk == Ijk_Boring
6393 || stmt->Ist.Exit.jk == Ijk_Call
6394 || stmt->Ist.Exit.jk == Ijk_Ret) {
6395 if (env->chainingAllowed) {
6396 /* .. almost always true .. */
6397 /* Skip the event check at the dst if this is a forwards
6398 edge. */
6399 Bool toFastEP
6400 = stmt->Ist.Exit.dst->Ico.U32 > env->max_ga;
6401 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6402 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6403 amR15T, cc, toFastEP));
6404 } else {
6405 /* .. very occasionally .. */
6406 /* We can't use chaining, so ask for an assisted transfer,
6407 as that's the only alternative that is allowable. */
6408 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6409 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6411 return;
6414 /* Case: assisted transfer to arbitrary address */
6415 switch (stmt->Ist.Exit.jk) {
6416 /* Keep this list in sync with that in iselNext below */
6417 case Ijk_ClientReq:
6418 case Ijk_NoDecode:
6419 case Ijk_NoRedir:
6420 case Ijk_Sys_syscall:
6421 case Ijk_InvalICache:
6422 case Ijk_Yield:
6424 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6425 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6426 stmt->Ist.Exit.jk));
6427 return;
6429 default:
6430 break;
6433 /* Do we ever expect to see any other kind? */
6434 goto stmt_fail;
6437 default: break;
6439 stmt_fail:
6440 ppIRStmt(stmt);
6441 vpanic("iselStmt");
6445 /*---------------------------------------------------------*/
6446 /*--- ISEL: Basic block terminators (Nexts) ---*/
6447 /*---------------------------------------------------------*/
6449 static void iselNext ( ISelEnv* env,
6450 IRExpr* next, IRJumpKind jk, Int offsIP )
6452 if (vex_traceflags & VEX_TRACE_VCODE) {
6453 vex_printf( "\n-- PUT(%d) = ", offsIP);
6454 ppIRExpr( next );
6455 vex_printf( "; exit-");
6456 ppIRJumpKind(jk);
6457 vex_printf( "\n");
6460 /* Case: boring transfer to known address */
6461 if (next->tag == Iex_Const) {
6462 IRConst* cdst = next->Iex.Const.con;
6463 vassert(cdst->tag == Ico_U32);
6464 if (jk == Ijk_Boring || jk == Ijk_Call) {
6465 /* Boring transfer to known address */
6466 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6467 if (env->chainingAllowed) {
6468 /* .. almost always true .. */
6469 /* Skip the event check at the dst if this is a forwards
6470 edge. */
6471 Bool toFastEP
6472 = cdst->Ico.U32 > env->max_ga;
6473 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6474 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6475 amR15T, ARMcc_AL,
6476 toFastEP));
6477 } else {
6478 /* .. very occasionally .. */
6479 /* We can't use chaining, so ask for an assisted transfer,
6480 as that's the only alternative that is allowable. */
6481 HReg r = iselIntExpr_R(env, next);
6482 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6483 Ijk_Boring));
6485 return;
6489 /* Case: call/return (==boring) transfer to any address */
6490 switch (jk) {
6491 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6492 HReg r = iselIntExpr_R(env, next);
6493 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6494 if (env->chainingAllowed) {
6495 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6496 } else {
6497 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6498 Ijk_Boring));
6500 return;
6502 default:
6503 break;
6506 /* Case: assisted transfer to arbitrary address */
6507 switch (jk) {
6508 /* Keep this list in sync with that for Ist_Exit above */
6509 case Ijk_ClientReq:
6510 case Ijk_NoDecode:
6511 case Ijk_NoRedir:
6512 case Ijk_Sys_syscall:
6513 case Ijk_InvalICache:
6514 case Ijk_Yield:
6516 HReg r = iselIntExpr_R(env, next);
6517 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6518 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6519 return;
6521 default:
6522 break;
6525 vex_printf( "\n-- PUT(%d) = ", offsIP);
6526 ppIRExpr( next );
6527 vex_printf( "; exit-");
6528 ppIRJumpKind(jk);
6529 vex_printf( "\n");
6530 vassert(0); // are we expecting any other kind?
6534 /*---------------------------------------------------------*/
6535 /*--- Insn selector top-level ---*/
6536 /*---------------------------------------------------------*/
6538 /* Translate an entire SB to arm code. */
6540 HInstrArray* iselSB_ARM ( const IRSB* bb,
6541 VexArch arch_host,
6542 const VexArchInfo* archinfo_host,
6543 const VexAbiInfo* vbi/*UNUSED*/,
6544 Int offs_Host_EvC_Counter,
6545 Int offs_Host_EvC_FailAddr,
6546 Bool chainingAllowed,
6547 Bool addProfInc,
6548 Addr max_ga )
6550 Int i, j;
6551 HReg hreg, hregHI;
6552 ISelEnv* env;
6553 UInt hwcaps_host = archinfo_host->hwcaps;
6554 ARMAMode1 *amCounter, *amFailAddr;
6556 /* sanity ... */
6557 vassert(arch_host == VexArchARM);
6559 /* Check that the host's endianness is as expected. */
6560 vassert(archinfo_host->endness == VexEndnessLE);
6562 /* guard against unexpected space regressions */
6563 vassert(sizeof(ARMInstr) <= 28);
6565 /* hwcaps should not change from one ISEL call to another. */
6566 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
6568 /* Make up an initial environment to use. */
6569 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
6570 env->vreg_ctr = 0;
6572 /* Set up output code array. */
6573 env->code = newHInstrArray();
6575 /* Copy BB's type env. */
6576 env->type_env = bb->tyenv;
6578 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6579 change as we go along. */
6580 env->n_vregmap = bb->tyenv->types_used;
6581 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6582 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6584 /* and finally ... */
6585 env->chainingAllowed = chainingAllowed;
6586 env->hwcaps = hwcaps_host;
6587 env->max_ga = max_ga;
6589 /* For each IR temporary, allocate a suitably-kinded virtual
6590 register. */
6591 j = 0;
6592 for (i = 0; i < env->n_vregmap; i++) {
6593 hregHI = hreg = INVALID_HREG;
6594 switch (bb->tyenv->types[i]) {
6595 case Ity_I1:
6596 case Ity_I8:
6597 case Ity_I16:
6598 case Ity_I32: hreg = mkHReg(True, HRcInt32, 0, j++); break;
6599 case Ity_I64:
6600 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
6601 hreg = mkHReg(True, HRcFlt64, 0, j++);
6602 } else {
6603 hregHI = mkHReg(True, HRcInt32, 0, j++);
6604 hreg = mkHReg(True, HRcInt32, 0, j++);
6606 break;
6607 case Ity_F32: hreg = mkHReg(True, HRcFlt32, 0, j++); break;
6608 case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break;
6609 case Ity_V128: hreg = mkHReg(True, HRcVec128, 0, j++); break;
6610 default: ppIRType(bb->tyenv->types[i]);
6611 vpanic("iselBB: IRTemp type");
6613 env->vregmap[i] = hreg;
6614 env->vregmapHI[i] = hregHI;
6616 env->vreg_ctr = j;
6618 /* The very first instruction must be an event check. */
6619 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6620 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6621 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6623 /* Possibly a block counter increment (for profiling). At this
6624 point we don't know the address of the counter, so just pretend
6625 it is zero. It will have to be patched later, but before this
6626 translation is used, by a call to LibVEX_patchProfCtr. */
6627 if (addProfInc) {
6628 addInstr(env, ARMInstr_ProfInc());
6631 /* Ok, finally we can iterate over the statements. */
6632 for (i = 0; i < bb->stmts_used; i++)
6633 iselStmt(env, bb->stmts[i]);
6635 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6637 /* record the number of vregs we used. */
6638 env->code->n_vregs = env->vreg_ctr;
6639 return env->code;
6643 /*---------------------------------------------------------------*/
6644 /*--- end host_arm_isel.c ---*/
6645 /*---------------------------------------------------------------*/