VEX/priv/host_arm64_isel.c

   1
   2 /*---------------------------------------------------------------*/
   3 /*--- begin                                 host_arm64_isel.c ---*/
   4 /*---------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2013-2017 OpenWorks
  11       info@open-works.net
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27 */
  28
  29 #include "libvex_basictypes.h"
  30 #include "libvex_ir.h"
  31 #include "libvex.h"
  32 #include "ir_match.h"
  33
  34 #include "main_util.h"
  35 #include "main_globals.h"
  36 #include "host_generic_regs.h"
  37 #include "host_generic_simd64.h"  // for 32-bit SIMD helpers
  38 #include "host_arm64_defs.h"
  39
  40
  41 /*---------------------------------------------------------*/
  42 /*--- ISelEnv                                           ---*/
  43 /*---------------------------------------------------------*/
  44
  45 /* This carries around:
  46
  47    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
  48      might encounter.  This is computed before insn selection starts,
  49      and does not change.
  50
  51    - A mapping from IRTemp to HReg.  This tells the insn selector
  52      which virtual register is associated with each IRTemp temporary.
  53      This is computed before insn selection starts, and does not
  54      change.  We expect this mapping to map precisely the same set of
  55      IRTemps as the type mapping does.
  56
  57      |vregmap|   holds the primary register for the IRTemp.
  58      |vregmapHI| is only used for 128-bit integer-typed
  59                  IRTemps.  It holds the identity of a second
  60                  64-bit virtual HReg, which holds the high half
  61                  of the value.
  62
  63    - The code array, that is, the insns selected so far.
  64
  65    - A counter, for generating new virtual registers.
  66
  67    - The host hardware capabilities word.  This is set at the start
  68      and does not change.
  69
  70    - A Bool for indicating whether we may generate chain-me
  71      instructions for control flow transfers, or whether we must use
  72      XAssisted.
  73
  74    - The maximum guest address of any guest insn in this block.
  75      Actually, the address of the highest-addressed byte from any insn
  76      in this block.  Is set at the start and does not change.  This is
  77      used for detecting jumps which are definitely forward-edges from
  78      this block, and therefore can be made (chained) to the fast entry
  79      point of the destination, thereby avoiding the destination's
  80      event check.
  81
  82     - An IRExpr*, which may be NULL, holding the IR expression (an
  83       IRRoundingMode-encoded value) to which the FPU's rounding mode
  84       was most recently set.  Setting to NULL is always safe.  Used to
  85       avoid redundant settings of the FPU's rounding mode, as
  86       described in set_FPCR_rounding_mode below.
  87
  88    Note, this is all (well, mostly) host-independent.
  89 */
  90
  91 typedef
  92    struct {
  93       /* Constant -- are set at the start and do not change. */
  94       IRTypeEnv*   type_env;
  95
  96       HReg*        vregmap;
  97       HReg*        vregmapHI;
  98       Int          n_vregmap;
  99
 100       UInt         hwcaps;
 101
 102       Bool         chainingAllowed;
 103       Addr64       max_ga;
 104
 105       /* These are modified as we go along. */
 106       HInstrArray* code;
 107       Int          vreg_ctr;
 108
 109       IRExpr*      previous_rm;
 110    }
 111    ISelEnv;
 112
 113 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
 114 {
 115    vassert(tmp < env->n_vregmap);
 116    return env->vregmap[tmp];
 117 }
 118
 119 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
 120                                ISelEnv* env, IRTemp tmp )
 121 {
 122    vassert(tmp < env->n_vregmap);
 123    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
 124    *vrLO = env->vregmap[tmp];
 125    *vrHI = env->vregmapHI[tmp];
 126 }
 127
 128 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
 129 {
 130    addHInstr(env->code, instr);
 131    if (vex_traceflags & VEX_TRACE_VCODE) {
 132       ppARM64Instr(instr);
 133       vex_printf("\n");
 134    }
 135 }
 136
 137 static HReg newVRegI ( ISelEnv* env )
 138 {
 139    HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
 140    env->vreg_ctr++;
 141    return reg;
 142 }
 143
 144 static HReg newVRegD ( ISelEnv* env )
 145 {
 146    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
 147    env->vreg_ctr++;
 148    return reg;
 149 }
 150
 151 static HReg newVRegV ( ISelEnv* env )
 152 {
 153    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
 154    env->vreg_ctr++;
 155    return reg;
 156 }
 157
 158
 159 /*---------------------------------------------------------*/
 160 /*--- ISEL: Forward declarations                        ---*/
 161 /*---------------------------------------------------------*/
 162
 163 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
 164    iselXXX_wrk do the real work, but are not to be called directly.
 165    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
 166    checks that all returned registers are virtual.  You should not
 167    call the _wrk version directly.
 168
 169    Because some forms of ARM64 memory amodes are implicitly scaled by
 170    the access size, iselIntExpr_AMode takes an IRType which tells it
 171    the type of the access for which the amode is to be used.  This
 172    type needs to be correct, else you'll get incorrect code.
 173 */
 174 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
 175                                            IRExpr* e, IRType dty );
 176 static ARM64AMode* iselIntExpr_AMode     ( ISelEnv* env,
 177                                            IRExpr* e, IRType dty );
 178
 179 static ARM64RIA*   iselIntExpr_RIA_wrk   ( ISelEnv* env, IRExpr* e );
 180 static ARM64RIA*   iselIntExpr_RIA       ( ISelEnv* env, IRExpr* e );
 181
 182 static ARM64RIL*   iselIntExpr_RIL_wrk   ( ISelEnv* env, IRExpr* e );
 183 static ARM64RIL*   iselIntExpr_RIL       ( ISelEnv* env, IRExpr* e );
 184
 185 static ARM64RI6*   iselIntExpr_RI6_wrk   ( ISelEnv* env, IRExpr* e );
 186 static ARM64RI6*   iselIntExpr_RI6       ( ISelEnv* env, IRExpr* e );
 187
 188 static ARM64CondCode iselCondCode_C_wrk  ( ISelEnv* env, IRExpr* e );
 189 static ARM64CondCode iselCondCode_C      ( ISelEnv* env, IRExpr* e );
 190
 191 static HReg        iselCondCode_R_wrk    ( ISelEnv* env, IRExpr* e );
 192 static HReg        iselCondCode_R        ( ISelEnv* env, IRExpr* e );
 193
 194 static HReg        iselIntExpr_R_wrk     ( ISelEnv* env, IRExpr* e );
 195 static HReg        iselIntExpr_R         ( ISelEnv* env, IRExpr* e );
 196
 197 static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
 198                                            ISelEnv* env, IRExpr* e );
 199 static void        iselInt128Expr        ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
 200                                            ISelEnv* env, IRExpr* e );
 201
 202 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
 203 static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
 204
 205 static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
 206 static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
 207
 208 static HReg        iselF16Expr_wrk        ( ISelEnv* env, IRExpr* e );
 209 static HReg        iselF16Expr            ( ISelEnv* env, IRExpr* e );
 210
 211 static HReg        iselV128Expr_wrk       ( ISelEnv* env, IRExpr* e );
 212 static HReg        iselV128Expr           ( ISelEnv* env, IRExpr* e );
 213
 214 static void        iselV256Expr_wrk       ( /*OUT*/HReg* rHi, HReg* rLo,
 215                                             ISelEnv* env, IRExpr* e );
 216 static void        iselV256Expr           ( /*OUT*/HReg* rHi, HReg* rLo,
 217                                             ISelEnv* env, IRExpr* e );
 218
 219 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
 220
 221
 222 /*---------------------------------------------------------*/
 223 /*--- ISEL: Misc helpers                                ---*/
 224 /*---------------------------------------------------------*/
 225
 226 /* Generate an amode suitable for a 64-bit sized access relative to
 227    the baseblock register (X21).  This generates an RI12 amode, which
 228    means its scaled by the access size, which is why the access size
 229    -- 64 bit -- is stated explicitly here.  Consequently |off| needs
 230    to be divisible by 8. */
 231 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
 232 {
 233    vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
 234    vassert((off & 7) == 0);  /* ditto */
 235    return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
 236 }
 237
 238 /* Ditto, for 32 bit accesses. */
 239 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
 240 {
 241    vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
 242    vassert((off & 3) == 0);  /* ditto */
 243    return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
 244 }
 245
 246 /* Ditto, for 16 bit accesses. */
 247 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
 248 {
 249    vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
 250    vassert((off & 1) == 0);  /* ditto */
 251    return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
 252 }
 253
 254 /* Ditto, for 8 bit accesses. */
 255 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
 256 {
 257    vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
 258    return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
 259 }
 260
 261 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
 262 {
 263    vassert(off < (1<<12));
 264    HReg r = newVRegI(env);
 265    addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
 266                                      ARM64RIA_I12(off,0), True/*isAdd*/));
 267    return r;
 268 }
 269
 270 static HReg get_baseblock_register ( void )
 271 {
 272    return hregARM64_X21();
 273 }
 274
 275 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
 276    a new register, and return the new register. */
 277 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
 278 {
 279    HReg      dst  = newVRegI(env);
 280    ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
 281    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
 282    return dst;
 283 }
 284
 285 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
 286    a new register, and return the new register. */
 287 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
 288 {
 289    HReg      dst = newVRegI(env);
 290    ARM64RI6* n48 = ARM64RI6_I6(48);
 291    addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
 292    addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
 293    return dst;
 294 }
 295
 296 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
 297    a new register, and return the new register. */
 298 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
 299 {
 300    HReg      dst  = newVRegI(env);
 301    ARM64RIL* mask = ARM64RIL_I13(1, 0, 15); /* encodes 0xFFFF */
 302    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
 303    return dst;
 304 }
 305
 306 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
 307    a new register, and return the new register. */
 308 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
 309 {
 310    HReg      dst = newVRegI(env);
 311    ARM64RI6* n32 = ARM64RI6_I6(32);
 312    addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
 313    addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
 314    return dst;
 315 }
 316
 317 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
 318    a new register, and return the new register. */
 319 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
 320 {
 321    HReg      dst = newVRegI(env);
 322    ARM64RI6* n56 = ARM64RI6_I6(56);
 323    addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
 324    addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
 325    return dst;
 326 }
 327
 328 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
 329 {
 330    HReg      dst  = newVRegI(env);
 331    ARM64RIL* mask = ARM64RIL_I13(1, 0, 7); /* encodes 0xFF */
 332    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
 333    return dst;
 334 }
 335
 336 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
 337 static Bool isZeroU64 ( IRExpr* e ) {
 338    if (e->tag != Iex_Const) return False;
 339    IRConst* con = e->Iex.Const.con;
 340    vassert(con->tag == Ico_U64);
 341    return con->Ico.U64 == 0;
 342 }
 343
 344
 345 /*---------------------------------------------------------*/
 346 /*--- ISEL: FP rounding mode helpers                    ---*/
 347 /*---------------------------------------------------------*/
 348
 349 /* Set the FP rounding mode: 'mode' is an I32-typed expression
 350    denoting a value in the range 0 .. 3, indicating a round mode
 351    encoded as per type IRRoundingMode -- the first four values only
 352    (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO).  Set the ARM64
 353    FSCR to have the same rounding.
 354
 355    For speed & simplicity, we're setting the *entire* FPCR here.
 356
 357    Setting the rounding mode is expensive.  So this function tries to
 358    avoid repeatedly setting the rounding mode to the same thing by
 359    first comparing 'mode' to the 'mode' tree supplied in the previous
 360    call to this function, if any.  (The previous value is stored in
 361    env->previous_rm.)  If 'mode' is a single IR temporary 't' and
 362    env->previous_rm is also just 't', then the setting is skipped.
 363
 364    This is safe because of the SSA property of IR: an IR temporary can
 365    only be defined once and so will have the same value regardless of
 366    where it appears in the block.  Cool stuff, SSA.
 367
 368    A safety condition: all attempts to set the RM must be aware of
 369    this mechanism - by being routed through the functions here.
 370
 371    Of course this only helps if blocks where the RM is set more than
 372    once and it is set to the same value each time, *and* that value is
 373    held in the same IR temporary each time.  In order to assure the
 374    latter as much as possible, the IR optimiser takes care to do CSE
 375    on any block with any sign of floating point activity.
 376 */
 377 static
 378 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
 379 {
 380    vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
 381
 382    /* Do we need to do anything? */
 383    if (env->previous_rm
 384        && env->previous_rm->tag == Iex_RdTmp
 385        && mode->tag == Iex_RdTmp
 386        && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
 387       /* no - setting it to what it was before.  */
 388       vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
 389       return;
 390    }
 391
 392    /* No luck - we better set it, and remember what we set it to. */
 393    env->previous_rm = mode;
 394
 395    /* Only supporting the rounding-mode bits - the rest of FPCR is set
 396       to zero - so we can set the whole register at once (faster). */
 397
 398    /* This isn't simple, because 'mode' carries an IR rounding
 399       encoding, and we need to translate that to an ARM64 FP one:
 400       The IR encoding:
 401          00  to nearest (the default)
 402          10  to +infinity
 403          01  to -infinity
 404          11  to zero
 405       The ARM64 FP encoding:
 406          00  to nearest
 407          01  to +infinity
 408          10  to -infinity
 409          11  to zero
 410       Easy enough to do; just swap the two bits.
 411    */
 412    HReg irrm = iselIntExpr_R(env, mode);
 413    HReg tL   = newVRegI(env);
 414    HReg tR   = newVRegI(env);
 415    HReg t3   = newVRegI(env);
 416    /* tL = irrm << 1;
 417       tR = irrm >> 1;  if we're lucky, these will issue together
 418       tL &= 2;
 419       tR &= 1;         ditto
 420       t3 = tL | tR;
 421       t3 <<= 22;
 422       fmxr fpscr, t3
 423    */
 424    ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
 425    ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
 426    vassert(ril_one && ril_two);
 427    addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
 428    addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
 429    addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
 430    addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
 431    addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
 432    addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
 433    addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
 434 }
 435
 436
 437 /*---------------------------------------------------------*/
 438 /*--- ISEL: Function call helpers                       ---*/
 439 /*---------------------------------------------------------*/
 440
 441 /* Used only in doHelperCall.  See big comment in doHelperCall re
 442    handling of register-parameter args.  This function figures out
 443    whether evaluation of an expression might require use of a fixed
 444    register.  If in doubt return True (safe but suboptimal).
 445 */
 446 static
 447 Bool mightRequireFixedRegs ( IRExpr* e )
 448 {
 449    if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
 450       // These are always "safe" -- either a copy of SP in some
 451       // arbitrary vreg, or a copy of x21, respectively.
 452       return False;
 453    }
 454    /* Else it's a "normal" expression. */
 455    switch (e->tag) {
 456       case Iex_RdTmp: case Iex_Const: case Iex_Get:
 457          return False;
 458       default:
 459          return True;
 460    }
 461 }
 462
 463
 464 /* Do a complete function call.  |guard| is a Ity_Bit expression
 465    indicating whether or not the call happens.  If guard==NULL, the
 466    call is unconditional.  |retloc| is set to indicate where the
 467    return value is after the call.  The caller (of this fn) must
 468    generate code to add |stackAdjustAfterCall| to the stack pointer
 469    after the call is done.  Returns True iff it managed to handle this
 470    combination of arg/return types, else returns False. */
 471
 472 static
 473 Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
 474                     /*OUT*/RetLoc* retloc,
 475                     ISelEnv* env,
 476                     IRExpr* guard,
 477                     IRCallee* cee, IRType retTy, IRExpr** args )
 478 {
 479    ARM64CondCode cc;
 480    HReg          argregs[ARM64_N_ARGREGS];
 481    HReg          tmpregs[ARM64_N_ARGREGS];
 482    Bool          go_fast;
 483    Int           n_args, i, nextArgReg;
 484    Addr64        target;
 485
 486    vassert(ARM64_N_ARGREGS == 8);
 487
 488    /* Set default returns.  We'll update them later if needed. */
 489    *stackAdjustAfterCall = 0;
 490    *retloc               = mk_RetLoc_INVALID();
 491
 492    /* These are used for cross-checking that IR-level constraints on
 493       the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
 494    UInt nVECRETs = 0;
 495    UInt nGSPTRs  = 0;
 496
 497    /* Marshal args for a call and do the call.
 498
 499       This function only deals with a tiny set of possibilities, which
 500       cover all helpers in practice.  The restrictions are that only
 501       arguments in registers are supported, hence only
 502       ARM64_N_REGPARMS x 64 integer bits in total can be passed.  In
 503       fact the only supported arg type is I64.
 504
 505       The return type can be I{64,32} or V128.  In the V128 case, it
 506       is expected that |args| will contain the special node
 507       IRExpr_VECRET(), in which case this routine generates code to
 508       allocate space on the stack for the vector return value.  Since
 509       we are not passing any scalars on the stack, it is enough to
 510       preallocate the return space before marshalling any arguments,
 511       in this case.
 512
 513       |args| may also contain IRExpr_GSPTR(), in which case the
 514       value in x21 is passed as the corresponding argument.
 515
 516       Generating code which is both efficient and correct when
 517       parameters are to be passed in registers is difficult, for the
 518       reasons elaborated in detail in comments attached to
 519       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
 520       of the method described in those comments.
 521
 522       The problem is split into two cases: the fast scheme and the
 523       slow scheme.  In the fast scheme, arguments are computed
 524       directly into the target (real) registers.  This is only safe
 525       when we can be sure that computation of each argument will not
 526       trash any real registers set by computation of any other
 527       argument.
 528
 529       In the slow scheme, all args are first computed into vregs, and
 530       once they are all done, they are moved to the relevant real
 531       regs.  This always gives correct code, but it also gives a bunch
 532       of vreg-to-rreg moves which are usually redundant but are hard
 533       for the register allocator to get rid of.
 534
 535       To decide which scheme to use, all argument expressions are
 536       first examined.  If they are all so simple that it is clear they
 537       will be evaluated without use of any fixed registers, use the
 538       fast scheme, else use the slow scheme.  Note also that only
 539       unconditional calls may use the fast scheme, since having to
 540       compute a condition expression could itself trash real
 541       registers.
 542
 543       Note this requires being able to examine an expression and
 544       determine whether or not evaluation of it might use a fixed
 545       register.  That requires knowledge of how the rest of this insn
 546       selector works.  Currently just the following 3 are regarded as
 547       safe -- hopefully they cover the majority of arguments in
 548       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
 549    */
 550
 551    /* Note that the cee->regparms field is meaningless on ARM64 hosts
 552       (since there is only one calling convention) and so we always
 553       ignore it. */
 554
 555    n_args = 0;
 556    for (i = 0; args[i]; i++) {
 557       IRExpr* arg = args[i];
 558       if (UNLIKELY(arg->tag == Iex_VECRET)) {
 559          nVECRETs++;
 560       } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 561          nGSPTRs++;
 562       }
 563       n_args++;
 564    }
 565
 566    /* If this fails, the IR is ill-formed */
 567    vassert(nGSPTRs == 0 || nGSPTRs == 1);
 568
 569    /* If we have a VECRET, allocate space on the stack for the return
 570       value, and record the stack pointer after that. */
 571    HReg r_vecRetAddr = INVALID_HREG;
 572    if (nVECRETs == 1) {
 573       vassert(retTy == Ity_V128 || retTy == Ity_V256);
 574       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
 575       r_vecRetAddr = newVRegI(env);
 576       addInstr(env, ARM64Instr_AddToSP(-16));
 577       addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
 578    } else {
 579       // If either of these fail, the IR is ill-formed
 580       vassert(retTy != Ity_V128 && retTy != Ity_V256);
 581       vassert(nVECRETs == 0);
 582    }
 583
 584    argregs[0] = hregARM64_X0();
 585    argregs[1] = hregARM64_X1();
 586    argregs[2] = hregARM64_X2();
 587    argregs[3] = hregARM64_X3();
 588    argregs[4] = hregARM64_X4();
 589    argregs[5] = hregARM64_X5();
 590    argregs[6] = hregARM64_X6();
 591    argregs[7] = hregARM64_X7();
 592
 593    tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
 594    tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
 595
 596    /* First decide which scheme (slow or fast) is to be used.  First
 597       assume the fast scheme, and select slow if any contraindications
 598       (wow) appear. */
 599
 600    go_fast = True;
 601
 602    if (guard) {
 603       if (guard->tag == Iex_Const
 604           && guard->Iex.Const.con->tag == Ico_U1
 605           && guard->Iex.Const.con->Ico.U1 == True) {
 606          /* unconditional */
 607       } else {
 608          /* Not manifestly unconditional -- be conservative. */
 609          go_fast = False;
 610       }
 611    }
 612
 613    if (go_fast) {
 614       for (i = 0; i < n_args; i++) {
 615          if (mightRequireFixedRegs(args[i])) {
 616             go_fast = False;
 617             break;
 618          }
 619       }
 620    }
 621
 622    if (go_fast) {
 623       if (retTy == Ity_V128 || retTy == Ity_V256)
 624          go_fast = False;
 625    }
 626
 627    /* At this point the scheme to use has been established.  Generate
 628       code to get the arg values into the argument rregs.  If we run
 629       out of arg regs, give up. */
 630
 631    if (go_fast) {
 632
 633       /* FAST SCHEME */
 634       nextArgReg = 0;
 635
 636       for (i = 0; i < n_args; i++) {
 637          IRExpr* arg = args[i];
 638
 639          IRType  aTy = Ity_INVALID;
 640          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
 641             aTy = typeOfIRExpr(env->type_env, args[i]);
 642
 643          if (nextArgReg >= ARM64_N_ARGREGS)
 644             return False; /* out of argregs */
 645
 646          if (aTy == Ity_I64) {
 647             addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
 648                                            iselIntExpr_R(env, args[i]) ));
 649             nextArgReg++;
 650          }
 651          else if (arg->tag == Iex_GSPTR) {
 652             vassert(0); //ATC
 653             addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
 654                                            hregARM64_X21() ));
 655             nextArgReg++;
 656          }
 657          else if (arg->tag == Iex_VECRET) {
 658             // because of the go_fast logic above, we can't get here,
 659             // since vector return values makes us use the slow path
 660             // instead.
 661             vassert(0);
 662          }
 663          else
 664             return False; /* unhandled arg type */
 665       }
 666
 667       /* Fast scheme only applies for unconditional calls.  Hence: */
 668       cc = ARM64cc_AL;
 669
 670    } else {
 671
 672       /* SLOW SCHEME; move via temporaries */
 673       nextArgReg = 0;
 674
 675       for (i = 0; i < n_args; i++) {
 676          IRExpr* arg = args[i];
 677
 678          IRType  aTy = Ity_INVALID;
 679          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
 680             aTy = typeOfIRExpr(env->type_env, args[i]);
 681
 682          if (nextArgReg >= ARM64_N_ARGREGS)
 683             return False; /* out of argregs */
 684
 685          if (aTy == Ity_I64) {
 686             tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
 687             nextArgReg++;
 688          }
 689          else if (arg->tag == Iex_GSPTR) {
 690             vassert(0); //ATC
 691             tmpregs[nextArgReg] = hregARM64_X21();
 692             nextArgReg++;
 693          }
 694          else if (arg->tag == Iex_VECRET) {
 695             vassert(!hregIsInvalid(r_vecRetAddr));
 696             tmpregs[nextArgReg] = r_vecRetAddr;
 697             nextArgReg++;
 698          }
 699          else
 700             return False; /* unhandled arg type */
 701       }
 702
 703       /* Now we can compute the condition.  We can't do it earlier
 704          because the argument computations could trash the condition
 705          codes.  Be a bit clever to handle the common case where the
 706          guard is 1:Bit. */
 707       cc = ARM64cc_AL;
 708       if (guard) {
 709          if (guard->tag == Iex_Const
 710              && guard->Iex.Const.con->tag == Ico_U1
 711              && guard->Iex.Const.con->Ico.U1 == True) {
 712             /* unconditional -- do nothing */
 713          } else {
 714             cc = iselCondCode_C( env, guard );
 715          }
 716       }
 717
 718       /* Move the args to their final destinations. */
 719       for (i = 0; i < nextArgReg; i++) {
 720          vassert(!(hregIsInvalid(tmpregs[i])));
 721          /* None of these insns, including any spill code that might
 722             be generated, may alter the condition codes. */
 723          addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
 724       }
 725
 726    }
 727
 728    /* Should be assured by checks above */
 729    vassert(nextArgReg <= ARM64_N_ARGREGS);
 730
 731    /* Do final checks, set the return values, and generate the call
 732       instruction proper. */
 733    vassert(nGSPTRs == 0 || nGSPTRs == 1);
 734    vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
 735    vassert(*stackAdjustAfterCall == 0);
 736    vassert(is_RetLoc_INVALID(*retloc));
 737    switch (retTy) {
 738       case Ity_INVALID:
 739          /* Function doesn't return a value. */
 740          *retloc = mk_RetLoc_simple(RLPri_None);
 741          break;
 742       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
 743          *retloc = mk_RetLoc_simple(RLPri_Int);
 744          break;
 745       case Ity_V128:
 746          *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
 747          *stackAdjustAfterCall = 16;
 748          break;
 749       case Ity_V256:
 750          vassert(0); // ATC
 751          *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
 752          *stackAdjustAfterCall = 32;
 753          break;
 754       default:
 755          /* IR can denote other possible return types, but we don't
 756             handle those here. */
 757          vassert(0);
 758    }
 759
 760    /* Finally, generate the call itself.  This needs the *retloc value
 761       set in the switch above, which is why it's at the end. */
 762
 763    /* nextArgReg doles out argument registers.  Since these are
 764       assigned in the order x0 .. x7, its numeric value at this point,
 765       which must be between 0 and 8 inclusive, is going to be equal to
 766       the number of arg regs in use for the call.  Hence bake that
 767       number into the call (we'll need to know it when doing register
 768       allocation, to know what regs the call reads.) */
 769
 770    target = (Addr)cee->addr;
 771    addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
 772
 773    return True; /* success */
 774 }
 775
 776
 777 /*---------------------------------------------------------*/
 778 /*--- ISEL: Integer expressions (64/32 bit)             ---*/
 779 /*---------------------------------------------------------*/
 780
 781 /* Select insns for an integer-typed expression, and add them to the
 782    code list.  Return a reg holding the result.  This reg will be a
 783    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
 784    want to modify it, ask for a new vreg, copy it in there, and modify
 785    the copy.  The register allocator will do its best to map both
 786    vregs to the same real register, so the copies will often disappear
 787    later in the game.
 788
 789    This should handle expressions of 64- and 32-bit type.  All results
 790    are returned in a 64-bit register.  For 32-bit expressions, the
 791    upper 32 bits are arbitrary, so you should mask or sign extend
 792    partial values if necessary.
 793 */
 794
 795 /* ---------------- RRS matching helper ---------------- */
 796
 797 /* This helper matches 64-bit integer expressions of the form
 798       {Add,Sub,And,Or,Xor}(E1, {Shl,Shr,Sar}(E2, immediate))
 799    and
 800       {Add,And,Or,Xor}({Shl,Shr,Sar}(E1, immediate), E2)
 801    which is a useful thing to do because AArch64 can compute those in
 802    a single instruction.
 803  */
 804 static Bool matchesRegRegShift(/*OUT*/ARM64RRSOp* mainOp,
 805                                /*OUT*/ARM64ShiftOp* shiftOp,
 806                                /*OUT*/UChar* amt,
 807                                /*OUT*/IRExpr** argUnshifted,
 808                                /*OUT*/IRExpr** argToBeShifted,
 809                                IRExpr* e)
 810 {
 811    *mainOp         = (ARM64RRSOp)0;
 812    *shiftOp        = (ARM64ShiftOp)0;
 813    *amt            = 0;
 814    *argUnshifted   = NULL;
 815    *argToBeShifted = NULL;
 816    if (e->tag != Iex_Binop) {
 817       return False;
 818    }
 819    const IROp irMainOp = e->Iex.Binop.op;
 820    Bool canSwap = True;
 821    switch (irMainOp) {
 822       case Iop_And64: *mainOp = ARM64rrs_AND; break;
 823       case Iop_Or64:  *mainOp = ARM64rrs_OR;  break;
 824       case Iop_Xor64: *mainOp = ARM64rrs_XOR; break;
 825       case Iop_Add64: *mainOp = ARM64rrs_ADD; break;
 826       case Iop_Sub64: *mainOp = ARM64rrs_SUB; canSwap = False; break;
 827       default: return False;
 828    }
 829    /* The root node is OK.  Now check the right (2nd) arg. */
 830    IRExpr* argL = e->Iex.Binop.arg1;
 831    IRExpr* argR = e->Iex.Binop.arg2;
 832
 833    // This loop runs either one or two iterations.  In the first iteration, we
 834    // check for a shiftable right (second) arg.  If that fails, at the end of
 835    // the first iteration, the args are swapped, if that is valid, and we go
 836    // round again, hence checking for a shiftable left (first) arg.
 837    UInt iterNo = 1;
 838    while (True) {
 839       vassert(iterNo == 1 || iterNo == 2);
 840       if (argR->tag == Iex_Binop) {
 841          const IROp irShiftOp = argR->Iex.Binop.op;
 842          if (irShiftOp == Iop_Shl64
 843              || irShiftOp == Iop_Shr64 || irShiftOp == Iop_Sar64) {
 844             IRExpr* argRL = argR->Iex.Binop.arg1;
 845             const IRExpr* argRR = argR->Iex.Binop.arg2;
 846             if (argRR->tag == Iex_Const) {
 847                const IRConst* argRRconst = argRR->Iex.Const.con;
 848                vassert(argRRconst->tag == Ico_U8); // due to typecheck rules
 849                const UChar amount = argRRconst->Ico.U8;
 850                if (amount >= 1 && amount <= 63) {
 851                   // We got a match \o/
 852                   // *mainOp is already set
 853                   switch (irShiftOp) {
 854                      case Iop_Shl64: *shiftOp = ARM64sh_SHL; break;
 855                      case Iop_Shr64: *shiftOp = ARM64sh_SHR; break;
 856                      case Iop_Sar64: *shiftOp = ARM64sh_SAR; break;
 857                      default: vassert(0); // guarded above
 858                   }
 859                   *amt = amount;
 860                   *argUnshifted = argL;
 861                   *argToBeShifted = argRL;
 862                   return True;
 863                }
 864             }
 865          }
 866       }
 867       // We failed to get a match in the first iteration.  So, provided the
 868       // root node isn't SUB, swap the arguments and make one further
 869       // iteration.  If that doesn't succeed, we must give up.
 870       if (iterNo == 1 && canSwap) {
 871          IRExpr* tmp = argL;
 872          argL = argR;
 873          argR = tmp;
 874          iterNo = 2;
 875          continue;
 876       }
 877       // Give up.
 878       return False;
 879    }
 880    /*NOTREACHED*/
 881  }
 882
 883 /* --------------------- AMode --------------------- */
 884
 885 /* Return an AMode which computes the value of the specified
 886    expression, possibly also adding insns to the code list as a
 887    result.  The expression may only be a 64-bit one.
 888 */
 889
 890 static Bool isValidScale ( UChar scale )
 891 {
 892    switch (scale) {
 893       case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
 894       default: return False;
 895    }
 896 }
 897
 898 static Bool sane_AMode ( ARM64AMode* am )
 899 {
 900    switch (am->tag) {
 901       case ARM64am_RI9:
 902          return
 903             toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
 904                     && (hregIsVirtual(am->ARM64am.RI9.reg)
 905                         /* || sameHReg(am->ARM64am.RI9.reg,
 906                                        hregARM64_X21()) */ )
 907                     && am->ARM64am.RI9.simm9 >= -256
 908                     && am->ARM64am.RI9.simm9 <= 255 );
 909       case ARM64am_RI12:
 910          return
 911             toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
 912                     && (hregIsVirtual(am->ARM64am.RI12.reg)
 913                         /* || sameHReg(am->ARM64am.RI12.reg,
 914                                        hregARM64_X21()) */ )
 915                     && am->ARM64am.RI12.uimm12 < 4096
 916                     && isValidScale(am->ARM64am.RI12.szB) );
 917       case ARM64am_RR:
 918          return
 919             toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
 920                     && hregIsVirtual(am->ARM64am.RR.base)
 921                     && hregClass(am->ARM64am.RR.index) == HRcInt64
 922                     && hregIsVirtual(am->ARM64am.RR.index) );
 923       default:
 924          vpanic("sane_AMode: unknown ARM64 AMode1 tag");
 925    }
 926 }
 927
 928 static
 929 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
 930 {
 931    ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
 932    vassert(sane_AMode(am));
 933    return am;
 934 }
 935
 936 static
 937 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
 938 {
 939    IRType ty = typeOfIRExpr(env->type_env,e);
 940    vassert(ty == Ity_I64);
 941
 942    ULong szBbits = 0;
 943    switch (dty) {
 944       case Ity_I64: szBbits = 3; break;
 945       case Ity_I32: szBbits = 2; break;
 946       case Ity_I16: szBbits = 1; break;
 947       case Ity_I8:  szBbits = 0; break;
 948       default: vassert(0);
 949    }
 950
 951    /* {Add64,Sub64}(expr,simm9).  We don't care about |dty| here since
 952       we're going to create an amode suitable for LDU* or STU*
 953       instructions, which use unscaled immediate offsets.  */
 954    if (e->tag == Iex_Binop
 955        && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
 956        && e->Iex.Binop.arg2->tag == Iex_Const
 957        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
 958       Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
 959       if (simm >= -255 && simm <= 255) {
 960          /* Although the gating condition might seem to be
 961                simm >= -256 && simm <= 255
 962             we will need to negate simm in the case where the op is Sub64.
 963             Hence limit the lower value to -255 in order that its negation
 964             is representable. */
 965          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
 966          if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
 967          return ARM64AMode_RI9(reg, (Int)simm);
 968       }
 969    }
 970
 971    /* Add64(expr, uimm12 * transfer-size) */
 972    if (e->tag == Iex_Binop
 973        && e->Iex.Binop.op == Iop_Add64
 974        && e->Iex.Binop.arg2->tag == Iex_Const
 975        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
 976       ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
 977       ULong szB  = 1 << szBbits;
 978       if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
 979           && (uimm >> szBbits) < 4096) {
 980          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
 981          return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
 982       }
 983    }
 984
 985    /* Add64(expr1, expr2) */
 986    if (e->tag == Iex_Binop
 987        && e->Iex.Binop.op == Iop_Add64) {
 988       HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
 989       HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
 990       return ARM64AMode_RR(reg1, reg2);
 991    }
 992
 993    /* Doesn't match anything in particular.  Generate it into
 994       a register and use that. */
 995    HReg reg = iselIntExpr_R(env, e);
 996    return ARM64AMode_RI9(reg, 0);
 997 }
 998
 999
1000 /* --------------------- RIA --------------------- */
1001
1002 /* Select instructions to generate 'e' into a RIA. */
1003
1004 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
1005 {
1006    ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
1007    /* sanity checks ... */
1008    switch (ri->tag) {
1009       case ARM64riA_I12:
1010          vassert(ri->ARM64riA.I12.imm12 < 4096);
1011          vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
1012          return ri;
1013       case ARM64riA_R:
1014          vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
1015          vassert(hregIsVirtual(ri->ARM64riA.R.reg));
1016          return ri;
1017       default:
1018          vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1019    }
1020 }
1021
1022 /* DO NOT CALL THIS DIRECTLY ! */
1023 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
1024 {
1025    IRType ty = typeOfIRExpr(env->type_env,e);
1026    vassert(ty == Ity_I64 || ty == Ity_I32);
1027
1028    /* special case: immediate */
1029    if (e->tag == Iex_Const) {
1030       ULong u = 0xF000000ULL; /* invalid */
1031       switch (e->Iex.Const.con->tag) {
1032          case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
1033          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1034          default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1035       }
1036       if (0 == (u & ~(0xFFFULL << 0)))
1037          return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
1038       if (0 == (u & ~(0xFFFULL << 12)))
1039          return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
1040       /* else fail, fall through to default case */
1041    }
1042
1043    /* default case: calculate into a register and return that */
1044    {
1045       HReg r = iselIntExpr_R ( env, e );
1046       return ARM64RIA_R(r);
1047    }
1048 }
1049
1050
1051 /* --------------------- RIL --------------------- */
1052
1053 /* Select instructions to generate 'e' into a RIL.  At this point we
1054    have to deal with the strange bitfield-immediate encoding for logic
1055    instructions. */
1056
1057
1058 // The following four functions
1059 //    CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1060 // are copied, with modifications, from
1061 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1062 // which has the following copyright notice:
1063 /*
1064    Copyright 2013, ARM Limited
1065    All rights reserved.
1066
1067    Redistribution and use in source and binary forms, with or without
1068    modification, are permitted provided that the following conditions are met:
1069
1070    * Redistributions of source code must retain the above copyright notice,
1071      this list of conditions and the following disclaimer.
1072    * Redistributions in binary form must reproduce the above copyright notice,
1073      this list of conditions and the following disclaimer in the documentation
1074      and/or other materials provided with the distribution.
1075    * Neither the name of ARM Limited nor the names of its contributors may be
1076      used to endorse or promote products derived from this software without
1077      specific prior written permission.
1078
1079    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1080    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1081    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1082    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1083    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1084    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1085    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1086    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1087    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1088    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1089 */
1090
1091 static Int CountLeadingZeros(ULong value, Int width)
1092 {
1093    vassert(width == 32 || width == 64);
1094    Int count = 0;
1095    ULong bit_test = 1ULL << (width - 1);
1096    while ((count < width) && ((bit_test & value) == 0)) {
1097       count++;
1098       bit_test >>= 1;
1099    }
1100    return count;
1101 }
1102
1103 static Int CountTrailingZeros(ULong value, Int width)
1104 {
1105    vassert(width == 32 || width == 64);
1106    Int count = 0;
1107    while ((count < width) && (((value >> count) & 1) == 0)) {
1108       count++;
1109    }
1110    return count;
1111 }
1112
1113 static Int CountSetBits(ULong value, Int width)
1114 {
1115    // TODO: Other widths could be added here, as the implementation already
1116    // supports them.
1117    vassert(width == 32 || width == 64);
1118
1119    // Mask out unused bits to ensure that they are not counted.
1120    value &= (0xffffffffffffffffULL >> (64-width));
1121
1122    // Add up the set bits.
1123    // The algorithm works by adding pairs of bit fields together iteratively,
1124    // where the size of each bit field doubles each time.
1125    // An example for an 8-bit value:
1126    // Bits: h g f e d c b a
1127    // \ | \ | \ | \ |
1128    // value = h+g f+e d+c b+a
1129    // \ | \ |
1130    // value = h+g+f+e d+c+b+a
1131    // \ |
1132    // value = h+g+f+e+d+c+b+a
1133    value = ((value >>  1) & 0x5555555555555555ULL)
1134                  + (value & 0x5555555555555555ULL);
1135    value = ((value >>  2) & 0x3333333333333333ULL)
1136                  + (value & 0x3333333333333333ULL);
1137    value = ((value >>  4) & 0x0f0f0f0f0f0f0f0fULL)
1138                  + (value & 0x0f0f0f0f0f0f0f0fULL);
1139    value = ((value >>  8) & 0x00ff00ff00ff00ffULL)
1140                  + (value & 0x00ff00ff00ff00ffULL);
1141    value = ((value >> 16) & 0x0000ffff0000ffffULL)
1142                  + (value & 0x0000ffff0000ffffULL);
1143    value = ((value >> 32) & 0x00000000ffffffffULL)
1144                  + (value & 0x00000000ffffffffULL);
1145
1146    return value;
1147 }
1148
1149 static Bool isImmLogical ( /*OUT*/UInt* n,
1150                            /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1151                            ULong value, UInt width )
1152 {
1153   // Test if a given value can be encoded in the immediate field of a
1154   // logical instruction.
1155
1156   // If it can be encoded, the function returns true, and values
1157   // pointed to by n, imm_s and imm_r are updated with immediates
1158   // encoded in the format required by the corresponding fields in the
1159   // logical instruction.  If it can not be encoded, the function
1160   // returns false, and the values pointed to by n, imm_s and imm_r
1161   // are undefined.
1162   vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1163   vassert(width == 32 || width == 64);
1164
1165   // Logical immediates are encoded using parameters n, imm_s and imm_r using
1166   // the following table:
1167   //
1168   // N imms immr size S R
1169   // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1170   // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1171   // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1172   // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1173   // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1174   // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1175   // (s bits must not be all set)
1176   //
1177   // A pattern is constructed of size bits, where the least significant S+1
1178   // bits are set. The pattern is rotated right by R, and repeated across a
1179   // 32 or 64-bit value, depending on destination register width.
1180   //
1181   // To test if an arbitrary immediate can be encoded using this scheme, an
1182   // iterative algorithm is used.
1183   //
1184   // TODO: This code does not consider using X/W register overlap to support
1185   // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1186   // are an encodable logical immediate.
1187
1188   // 1. If the value has all set or all clear bits, it can't be encoded.
1189   if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1190       ((width == 32) && (value == 0xffffffff))) {
1191     return False;
1192   }
1193
1194   UInt lead_zero = CountLeadingZeros(value, width);
1195   UInt lead_one = CountLeadingZeros(~value, width);
1196   UInt trail_zero = CountTrailingZeros(value, width);
1197   UInt trail_one = CountTrailingZeros(~value, width);
1198   UInt set_bits = CountSetBits(value, width);
1199
1200   // The fixed bits in the immediate s field.
1201   // If width == 64 (X reg), start at 0xFFFFFF80.
1202   // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1203   // widths won't be executed.
1204   Int imm_s_fixed = (width == 64) ? -128 : -64;
1205   Int imm_s_mask = 0x3F;
1206
1207   for (;;) {
1208     // 2. If the value is two bits wide, it can be encoded.
1209     if (width == 2) {
1210       *n = 0;
1211       *imm_s = 0x3C;
1212       *imm_r = (value & 3) - 1;
1213       return True;
1214     }
1215
1216     *n = (width == 64) ? 1 : 0;
1217     *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1218     if ((lead_zero + set_bits) == width) {
1219       *imm_r = 0;
1220     } else {
1221       *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1222     }
1223
1224     // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1225     // the bit width of the value, it can be encoded.
1226     if (lead_zero + trail_zero + set_bits == width) {
1227       return True;
1228     }
1229
1230     // 4. If the sum of leading ones, trailing ones and unset bits in the
1231     // value is equal to the bit width of the value, it can be encoded.
1232     if (lead_one + trail_one + (width - set_bits) == width) {
1233       return True;
1234     }
1235
1236     // 5. If the most-significant half of the bitwise value is equal to the
1237     // least-significant half, return to step 2 using the least-significant
1238     // half of the value.
1239     ULong mask = (1ULL << (width >> 1)) - 1;
1240     if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1241       width >>= 1;
1242       set_bits >>= 1;
1243       imm_s_fixed >>= 1;
1244       continue;
1245     }
1246
1247     // 6. Otherwise, the value can't be encoded.
1248     return False;
1249   }
1250 }
1251
1252
1253 /* Create a RIL for the given immediate, if it is representable, or
1254    return NULL if not. */
1255
1256 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1257 {
1258    UInt n = 0, imm_s = 0, imm_r = 0;
1259    Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1260    if (!ok) return NULL;
1261    vassert(n < 2 && imm_s < 64 && imm_r < 64);
1262    return ARM64RIL_I13(n, imm_r, imm_s);
1263 }
1264
1265 /* So, finally .. */
1266
1267 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1268 {
1269    ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1270    /* sanity checks ... */
1271    switch (ri->tag) {
1272       case ARM64riL_I13:
1273          vassert(ri->ARM64riL.I13.bitN < 2);
1274          vassert(ri->ARM64riL.I13.immR < 64);
1275          vassert(ri->ARM64riL.I13.immS < 64);
1276          return ri;
1277       case ARM64riL_R:
1278          vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1279          vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1280          return ri;
1281       default:
1282          vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1283    }
1284 }
1285
1286 /* DO NOT CALL THIS DIRECTLY ! */
1287 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1288 {
1289    IRType ty = typeOfIRExpr(env->type_env,e);
1290    vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
1291
1292    /* special case: immediate */
1293    if (e->tag == Iex_Const) {
1294       ARM64RIL* maybe = NULL;
1295       if (ty == Ity_I64) {
1296          vassert(e->Iex.Const.con->tag == Ico_U64);
1297          maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1298       } else if (ty == Ity_I32) {
1299          vassert(ty == Ity_I32);
1300          vassert(e->Iex.Const.con->tag == Ico_U32);
1301          UInt  u32 = e->Iex.Const.con->Ico.U32;
1302          ULong u64 = (ULong)u32;
1303          /* First try with 32 leading zeroes. */
1304          maybe = mb_mkARM64RIL_I(u64);
1305          /* If that doesn't work, try with 2 copies, since it doesn't
1306             matter what winds up in the upper 32 bits. */
1307          if (!maybe) {
1308             maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1309          }
1310       } else {
1311          vassert(ty == Ity_I16);
1312          vassert(e->Iex.Const.con->tag == Ico_U16);
1313          // `maybe` is still NULL.  Be lame and fall through to the default
1314          // case.  Obviously we could do better here.
1315       }
1316       if (maybe) return maybe;
1317       /* else fail, fall through to default case */
1318    }
1319
1320    /* default case: calculate into a register and return that */
1321    {
1322       HReg r = iselIntExpr_R ( env, e );
1323       return ARM64RIL_R(r);
1324    }
1325 }
1326
1327
1328 /* --------------------- RI6 --------------------- */
1329
1330 /* Select instructions to generate 'e' into a RI6. */
1331
1332 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1333 {
1334    ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1335    /* sanity checks ... */
1336    switch (ri->tag) {
1337       case ARM64ri6_I6:
1338          vassert(ri->ARM64ri6.I6.imm6 < 64);
1339          vassert(ri->ARM64ri6.I6.imm6 > 0);
1340          return ri;
1341       case ARM64ri6_R:
1342          vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1343          vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1344          return ri;
1345       default:
1346          vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1347    }
1348 }
1349
1350 /* DO NOT CALL THIS DIRECTLY ! */
1351 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1352 {
1353    IRType ty = typeOfIRExpr(env->type_env,e);
1354    vassert(ty == Ity_I64 || ty == Ity_I8);
1355
1356    /* special case: immediate */
1357    if (e->tag == Iex_Const) {
1358       switch (e->Iex.Const.con->tag) {
1359          case Ico_U8: {
1360             UInt u = e->Iex.Const.con->Ico.U8;
1361             if (u > 0 && u < 64)
1362               return ARM64RI6_I6(u);
1363             break;
1364          default:
1365             break;
1366          }
1367       }
1368       /* else fail, fall through to default case */
1369    }
1370
1371    /* default case: calculate into a register and return that */
1372    {
1373       HReg r = iselIntExpr_R ( env, e );
1374       return ARM64RI6_R(r);
1375    }
1376 }
1377
1378
1379 /* ------------------- CondCode ------------------- */
1380
1381 /* Generate code to evaluated a bit-typed expression, returning the
1382    condition code which would correspond when the expression would
1383    notionally have returned 1.
1384
1385    Note that iselCondCode_C and iselCondCode_R are mutually recursive.  For
1386    future changes to either of them, take care not to introduce an infinite
1387    loop involving the two of them.
1388 */
1389 static ARM64CondCode iselCondCode_C ( ISelEnv* env, IRExpr* e )
1390 {
1391    ARM64CondCode cc = iselCondCode_C_wrk(env,e);
1392    vassert(cc != ARM64cc_NV);
1393    return cc;
1394 }
1395
1396 static ARM64CondCode iselCondCode_C_wrk ( ISelEnv* env, IRExpr* e )
1397 {
1398    vassert(e);
1399    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1400
1401    /* var */
1402    if (e->tag == Iex_RdTmp) {
1403       HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1404       /* Cmp doesn't modify rTmp; so this is OK. */
1405       ARM64RIL* one = mb_mkARM64RIL_I(1);
1406       vassert(one);
1407       addInstr(env, ARM64Instr_Test(rTmp, one));
1408       return ARM64cc_NE;
1409    }
1410
1411    /* Constant 1:Bit */
1412    if (e->tag == Iex_Const) {
1413       /* This is a very stupid translation.  Hopefully it doesn't occur much,
1414          if ever. */
1415       vassert(e->Iex.Const.con->tag == Ico_U1);
1416       vassert(e->Iex.Const.con->Ico.U1 == True
1417               || e->Iex.Const.con->Ico.U1 == False);
1418       HReg rTmp = newVRegI(env);
1419       addInstr(env, ARM64Instr_Imm64(rTmp, 0));
1420       ARM64RIL* one = mb_mkARM64RIL_I(1);
1421       vassert(one);
1422       addInstr(env, ARM64Instr_Test(rTmp, one));
1423       return e->Iex.Const.con->Ico.U1 ? ARM64cc_EQ : ARM64cc_NE;
1424    }
1425
1426    /* Not1(e) */
1427    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1428       /* Generate code for the arg, and negate the test condition */
1429       ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
1430       if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1431         return ARM64cc_AL;
1432       } else {
1433         return 1 ^ cc;
1434       }
1435    }
1436
1437    /* --- patterns rooted at: 64to1 --- */
1438
1439    if (e->tag == Iex_Unop
1440        && e->Iex.Unop.op == Iop_64to1) {
1441       HReg      rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1442       ARM64RIL* one  = mb_mkARM64RIL_I(1);
1443       vassert(one); /* '1' must be representable */
1444       addInstr(env, ARM64Instr_Test(rTmp, one));
1445       return ARM64cc_NE;
1446    }
1447
1448    /* --- patterns rooted at: CmpNEZ8 --- */
1449
1450    if (e->tag == Iex_Unop
1451        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1452       HReg      r1  = iselIntExpr_R(env, e->Iex.Unop.arg);
1453       ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1454       addInstr(env, ARM64Instr_Test(r1, xFF));
1455       return ARM64cc_NE;
1456    }
1457
1458    /* --- patterns rooted at: CmpNEZ16 --- */
1459
1460    if (e->tag == Iex_Unop
1461        && e->Iex.Unop.op == Iop_CmpNEZ16) {
1462       HReg      r1    = iselIntExpr_R(env, e->Iex.Unop.arg);
1463       ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1464       addInstr(env, ARM64Instr_Test(r1, xFFFF));
1465       return ARM64cc_NE;
1466    }
1467
1468    /* --- patterns rooted at: CmpNEZ64 --- */
1469
1470    if (e->tag == Iex_Unop
1471        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1472       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1473       ARM64RIA* zero = ARM64RIA_I12(0,0);
1474       addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1475       return ARM64cc_NE;
1476    }
1477
1478    /* --- patterns rooted at: CmpNEZ32 --- */
1479
1480    if (e->tag == Iex_Unop
1481        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1482       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1483       ARM64RIA* zero = ARM64RIA_I12(0,0);
1484       addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1485       return ARM64cc_NE;
1486    }
1487
1488    /* --- Cmp*64*(x,y) --- */
1489    if (e->tag == Iex_Binop
1490        && (e->Iex.Binop.op == Iop_CmpEQ64
1491            || e->Iex.Binop.op == Iop_CmpNE64
1492            || e->Iex.Binop.op == Iop_CmpLT64S
1493            || e->Iex.Binop.op == Iop_CmpLT64U
1494            || e->Iex.Binop.op == Iop_CmpLE64S
1495            || e->Iex.Binop.op == Iop_CmpLE64U
1496            || e->Iex.Binop.op == Iop_CasCmpEQ64
1497            || e->Iex.Binop.op == Iop_CasCmpNE64)) {
1498       HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1499       ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1500       addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1501       switch (e->Iex.Binop.op) {
1502          case Iop_CmpEQ64: case Iop_CasCmpEQ64: return ARM64cc_EQ;
1503          case Iop_CmpNE64: case Iop_CasCmpNE64: return ARM64cc_NE;
1504          case Iop_CmpLT64S: return ARM64cc_LT;
1505          case Iop_CmpLT64U: return ARM64cc_CC;
1506          case Iop_CmpLE64S: return ARM64cc_LE;
1507          case Iop_CmpLE64U: return ARM64cc_LS;
1508          default: vpanic("iselCondCode_C(arm64): CmpXX64");
1509       }
1510    }
1511
1512    /* --- Cmp*32*(x,y) --- */
1513    if (e->tag == Iex_Binop
1514        && (e->Iex.Binop.op == Iop_CmpEQ32
1515            || e->Iex.Binop.op == Iop_CmpNE32
1516            || e->Iex.Binop.op == Iop_CmpLT32S
1517            || e->Iex.Binop.op == Iop_CmpLT32U
1518            || e->Iex.Binop.op == Iop_CmpLE32S
1519            || e->Iex.Binop.op == Iop_CmpLE32U
1520            || e->Iex.Binop.op == Iop_CasCmpEQ32
1521            || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1522       HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1523       ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1524       addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1525       switch (e->Iex.Binop.op) {
1526          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return ARM64cc_EQ;
1527          case Iop_CmpNE32: case Iop_CasCmpNE32: return ARM64cc_NE;
1528          case Iop_CmpLT32S: return ARM64cc_LT;
1529          case Iop_CmpLT32U: return ARM64cc_CC;
1530          case Iop_CmpLE32S: return ARM64cc_LE;
1531          case Iop_CmpLE32U: return ARM64cc_LS;
1532          default: vpanic("iselCondCode_C(arm64): CmpXX32");
1533       }
1534    }
1535
1536    /* --- Cmp*16*(x,y) --- */
1537    if (e->tag == Iex_Binop
1538        && (e->Iex.Binop.op == Iop_CasCmpEQ16
1539            || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1540       HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1541       HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1542       HReg argL2 = widen_z_16_to_64(env, argL);
1543       HReg argR2 = widen_z_16_to_64(env, argR);
1544       addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1545       switch (e->Iex.Binop.op) {
1546          case Iop_CasCmpEQ16: return ARM64cc_EQ;
1547          case Iop_CasCmpNE16: return ARM64cc_NE;
1548          default: vpanic("iselCondCode_C(arm64): CmpXX16");
1549       }
1550    }
1551
1552    /* --- Cmp*8*(x,y) --- */
1553    if (e->tag == Iex_Binop
1554        && (e->Iex.Binop.op == Iop_CasCmpEQ8
1555            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1556       HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1557       HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1558       HReg argL2 = widen_z_8_to_64(env, argL);
1559       HReg argR2 = widen_z_8_to_64(env, argR);
1560       addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1561       switch (e->Iex.Binop.op) {
1562          case Iop_CasCmpEQ8: return ARM64cc_EQ;
1563          case Iop_CasCmpNE8: return ARM64cc_NE;
1564          default: vpanic("iselCondCode_C(arm64): CmpXX8");
1565       }
1566    }
1567
1568    /* --- And1(x,y), Or1(x,y) --- */
1569    if (e->tag == Iex_Binop
1570         && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1571       HReg tmp = iselCondCode_R(env, e);
1572       ARM64RIL* one = mb_mkARM64RIL_I(1);
1573       vassert(one);
1574       addInstr(env, ARM64Instr_Test(tmp, one));
1575       return ARM64cc_NE;
1576    }
1577
1578    ppIRExpr(e);
1579    vpanic("iselCondCode_C");
1580 }
1581
1582
1583 /* --------------------- CONDCODE as int reg --------------------- */
1584
1585 /* Generate code to evaluated a bit-typed expression, returning the resulting
1586    value in bit 0 of an integer register.  WARNING: all of the other bits in the
1587    register can be arbitrary.  Callers must mask them off or otherwise ignore
1588    them, as necessary.
1589
1590    Note that iselCondCode_C and iselCondCode_R are mutually recursive.  For
1591    future changes to either of them, take care not to introduce an infinite
1592    loop involving the two of them.
1593 */
1594 static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e )
1595 {
1596    /* Uh, there's nothing we can sanity check here, unfortunately. */
1597    return iselCondCode_R_wrk(env,e);
1598 }
1599
1600 /* DO NOT CALL THIS DIRECTLY ! */
1601 static HReg iselCondCode_R_wrk ( ISelEnv* env, IRExpr* e )
1602 {
1603    vassert(e);
1604    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1605
1606    /* var */
1607    if (e->tag == Iex_RdTmp) {
1608       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1609    }
1610
1611    /* And1(x,y), Or1(x,y) */
1612    if (e->tag == Iex_Binop
1613        && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1614       HReg res = newVRegI(env);
1615       HReg x_as_64 = iselCondCode_R(env, e->Iex.Binop.arg1);
1616       HReg y_as_64 = iselCondCode_R(env, e->Iex.Binop.arg2);
1617       ARM64LogicOp lop
1618          = e->Iex.Binop.op == Iop_And1 ? ARM64lo_AND : ARM64lo_OR;
1619       addInstr(env, ARM64Instr_Logic(res, x_as_64, ARM64RIL_R(y_as_64), lop));
1620       return res;
1621    }
1622
1623    /* Anything else, we hand off to iselCondCode_C and force the value into a
1624       register. */
1625    HReg res = newVRegI(env);
1626    ARM64CondCode cc = iselCondCode_C(env, e);
1627    addInstr(env, ARM64Instr_Set64(res, cc));
1628    return res;
1629
1630    /* PJF the following two lines are dead code
1631    ppIRExpr(e);
1632    vpanic("iselCondCode_R(arm64)");
1633    */
1634 }
1635
1636
1637 /* --------------------- Reg --------------------- */
1638
1639 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1640 {
1641    HReg r = iselIntExpr_R_wrk(env, e);
1642    /* sanity checks ... */
1643 #  if 0
1644    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1645 #  endif
1646    vassert(hregClass(r) == HRcInt64);
1647    vassert(hregIsVirtual(r));
1648    return r;
1649 }
1650
1651 /* DO NOT CALL THIS DIRECTLY ! */
1652 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1653 {
1654    IRType ty = typeOfIRExpr(env->type_env,e);
1655    vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1656
1657    switch (e->tag) {
1658
1659    /* --------- TEMP --------- */
1660    case Iex_RdTmp: {
1661       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1662    }
1663
1664    /* --------- LOAD --------- */
1665    case Iex_Load: {
1666       HReg dst  = newVRegI(env);
1667
1668       if (e->Iex.Load.end != Iend_LE)
1669          goto irreducible;
1670
1671       if (ty == Ity_I64) {
1672          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1673          addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1674          return dst;
1675       }
1676       if (ty == Ity_I32) {
1677          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1678          addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1679          return dst;
1680       }
1681       if (ty == Ity_I16) {
1682          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1683          addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1684          return dst;
1685       }
1686       if (ty == Ity_I8) {
1687          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1688          addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1689          return dst;
1690       }
1691       break;
1692    }
1693
1694    /* --------- BINARY OP --------- */
1695    case Iex_Binop: {
1696
1697       ARM64LogicOp lop = 0; /* invalid */
1698       ARM64ShiftOp sop = 0; /* invalid */
1699
1700       /* Special-case 0-x into a Neg instruction.  Not because it's
1701          particularly useful but more so as to give value flow using
1702          this instruction, so as to check its assembly correctness for
1703          implementation of Left32/Left64. */
1704       switch (e->Iex.Binop.op) {
1705          case Iop_Sub64:
1706             if (isZeroU64(e->Iex.Binop.arg1)) {
1707                HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1708                HReg dst  = newVRegI(env);
1709                addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1710                return dst;
1711             }
1712             break;
1713          default:
1714             break;
1715       }
1716
1717       /* AND64/OR64/XOR64/ADD64/SUB64(e1, e2 shifted by imm)
1718          AND64/OR64/XOR64/ADD64(e1 shifted by imm, e2)
1719       */
1720       {
1721          switch (e->Iex.Binop.op) {
1722             case Iop_And64: case Iop_Or64: case Iop_Xor64:
1723             case Iop_Add64: case Iop_Sub64: {
1724                ARM64RRSOp mainOp = ARM64rrs_INVALID;
1725                ARM64ShiftOp shiftOp = (ARM64ShiftOp)0; // Invalid
1726                IRExpr* argUnshifted = NULL;
1727                IRExpr* argToBeShifted = NULL;
1728                UChar amt = 0;
1729                if (matchesRegRegShift(&mainOp, &shiftOp, &amt, &argUnshifted,
1730                                       &argToBeShifted, e)) {
1731                   HReg rDst = newVRegI(env);
1732                   HReg rUnshifted = iselIntExpr_R(env, argUnshifted);
1733                   HReg rToBeShifted = iselIntExpr_R(env, argToBeShifted);
1734                   addInstr(env, ARM64Instr_RRS(rDst, rUnshifted, rToBeShifted,
1735                                                shiftOp, amt, mainOp));
1736                   return rDst;
1737                }
1738             }
1739             default:
1740                break;
1741          }
1742       }
1743
1744       /* ADD/SUB(e1, e2) (for any e1, e2) */
1745       switch (e->Iex.Binop.op) {
1746          case Iop_Add64: case Iop_Add32:
1747          case Iop_Sub64: case Iop_Sub32: {
1748             Bool      isAdd = e->Iex.Binop.op == Iop_Add64
1749                               || e->Iex.Binop.op == Iop_Add32;
1750             HReg      dst   = newVRegI(env);
1751             HReg      argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1752             ARM64RIA* argR  = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1753             addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1754             return dst;
1755          }
1756          default:
1757             break;
1758       }
1759
1760       /* AND/OR/XOR(e1, e2) (for any e1, e2) */
1761       switch (e->Iex.Binop.op) {
1762          case Iop_And64: case Iop_And32:
1763             lop = ARM64lo_AND; goto log_binop;
1764          case Iop_Or64:  case Iop_Or32:  case Iop_Or16:
1765             lop = ARM64lo_OR;  goto log_binop;
1766          case Iop_Xor64: case Iop_Xor32:
1767             lop = ARM64lo_XOR; goto log_binop;
1768          log_binop: {
1769             HReg      dst  = newVRegI(env);
1770             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1771             ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1772             addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1773             return dst;
1774          }
1775          default:
1776             break;
1777       }
1778
1779       /* SHL/SHR/SAR */
1780       switch (e->Iex.Binop.op) {
1781          case Iop_Shr64:                 sop = ARM64sh_SHR; goto sh_binop;
1782          case Iop_Sar64:                 sop = ARM64sh_SAR; goto sh_binop;
1783          case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1784          sh_binop: {
1785             HReg      dst  = newVRegI(env);
1786             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1787             ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1788             addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1789             return dst;
1790          }
1791          case Iop_Shr32:
1792          case Iop_Sar32: {
1793             Bool      zx   = e->Iex.Binop.op == Iop_Shr32;
1794             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1795             ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1796             HReg      dst  = zx ? widen_z_32_to_64(env, argL)
1797                                 : widen_s_32_to_64(env, argL);
1798             addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1799             return dst;
1800          }
1801          default: break;
1802       }
1803
1804       /* MUL */
1805       if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1806          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1807          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1808          HReg dst  = newVRegI(env);
1809          addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1810          return dst;
1811       }
1812
1813       /* MULL */
1814       if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1815          Bool isS  = e->Iex.Binop.op == Iop_MullS32;
1816          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1817          HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1818          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1819          HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1820          HReg dst  = newVRegI(env);
1821          addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1822          return dst;
1823       }
1824
1825       /* Handle misc other ops. */
1826
1827       if (e->Iex.Binop.op == Iop_Max32U) {
1828          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1829          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1830          HReg dst  = newVRegI(env);
1831          addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1832          addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1833          return dst;
1834       }
1835
1836       if (e->Iex.Binop.op == Iop_32HLto64) {
1837          HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1838          HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1839          HReg lo32  = widen_z_32_to_64(env, lo32s);
1840          HReg hi32  = newVRegI(env);
1841          addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1842                                         ARM64sh_SHL));
1843          addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1844                                         ARM64lo_OR));
1845          return hi32;
1846       }
1847
1848       if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32 ||
1849           e->Iex.Binop.op == Iop_CmpF16) {
1850          HReg (*iselExpr)(ISelEnv*, IRExpr*) = NULL;
1851          ARM64Instr* (*VCmp)(HReg, HReg) = NULL;
1852          if (e->Iex.Binop.op == Iop_CmpF64) {
1853             iselExpr = &iselDblExpr;
1854             VCmp     = &ARM64Instr_VCmpD;
1855          }
1856          else if (e->Iex.Binop.op == Iop_CmpF32) {
1857             iselExpr = &iselFltExpr;
1858             VCmp     = &ARM64Instr_VCmpS;
1859          }
1860          else {
1861             iselExpr = &iselF16Expr;
1862             VCmp     = &ARM64Instr_VCmpH;
1863          }
1864          HReg dL  = (iselExpr)(env, e->Iex.Binop.arg1);
1865          HReg dR  = (iselExpr)(env, e->Iex.Binop.arg2);
1866          HReg dst = newVRegI(env);
1867          HReg imm = newVRegI(env);
1868          /* Do the compare (FCMP), which sets NZCV in PSTATE.  Then
1869             create in dst, the IRCmpF64Result encoded result. */
1870          addInstr(env, (VCmp)(dL, dR));
1871          addInstr(env, ARM64Instr_Imm64(dst, 0));
1872          addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1873          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1874          addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1875          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1876          addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1877          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1878          addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1879          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1880          return dst;
1881       }
1882
1883       { /* local scope */
1884         ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1885         Bool       srcIsD = False;
1886         switch (e->Iex.Binop.op) {
1887            case Iop_F64toI64S:
1888               cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1889            case Iop_F64toI64U:
1890               cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1891            case Iop_F64toI32S:
1892               cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1893            case Iop_F64toI32U:
1894               cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1895            case Iop_F32toI32S:
1896               cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1897            case Iop_F32toI32U:
1898               cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1899            case Iop_F32toI64S:
1900               cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1901            case Iop_F32toI64U:
1902               cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1903            default:
1904               break;
1905         }
1906         if (cvt_op != ARM64cvt_INVALID) {
1907            /* This is all a bit dodgy, because we can't handle a
1908               non-constant (not-known-at-JIT-time) rounding mode
1909               indication.  That's because there's no instruction
1910               AFAICS that does this conversion but rounds according to
1911               FPCR.RM, so we have to bake the rounding mode into the
1912               instruction right now.  But that should be OK because
1913               (1) the front end attaches a literal Irrm_ value to the
1914               conversion binop, and (2) iropt will never float that
1915               off via CSE, into a literal.  Hence we should always
1916               have an Irrm_ value as the first arg. */
1917            IRExpr* arg1 = e->Iex.Binop.arg1;
1918            if (arg1->tag != Iex_Const) goto irreducible;
1919            IRConst* arg1con = arg1->Iex.Const.con;
1920            vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1921            UInt irrm = arg1con->Ico.U32;
1922            /* Find the ARM-encoded equivalent for |irrm|. */
1923            UInt armrm = 4; /* impossible */
1924            Bool tiesToAway = False;
1925            switch (irrm) {
1926               case Irrm_NEAREST:            armrm = 0; break;
1927               case Irrm_NegINF:             armrm = 2; break;
1928               case Irrm_PosINF:             armrm = 1; break;
1929               case Irrm_ZERO:               armrm = 3; break;
1930               case Irrm_NEAREST_TIE_AWAY_0: armrm = 0; tiesToAway = True; break;
1931               default: goto irreducible;
1932            }
1933            HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1934                          (env, e->Iex.Binop.arg2);
1935            HReg dst = newVRegI(env);
1936            addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm, tiesToAway));
1937            return dst;
1938         }
1939       } /* local scope */
1940
1941       /* All cases involving host-side helper calls. */
1942       void* fn = NULL;
1943       switch (e->Iex.Binop.op) {
1944          case Iop_DivU32:
1945             fn = &h_calc_udiv32_w_arm_semantics; break;
1946          case Iop_DivS32:
1947             fn = &h_calc_sdiv32_w_arm_semantics; break;
1948          case Iop_DivU64:
1949             fn = &h_calc_udiv64_w_arm_semantics; break;
1950          case Iop_DivS64:
1951             fn = &h_calc_sdiv64_w_arm_semantics; break;
1952          default:
1953             break;
1954       }
1955
1956       if (fn) {
1957          HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1958          HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1959          HReg res  = newVRegI(env);
1960          addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1961          addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1962          addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1963                                         2, mk_RetLoc_simple(RLPri_Int) ));
1964          addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1965          return res;
1966       }
1967
1968       break;
1969    }
1970
1971    /* --------- UNARY OP --------- */
1972    case Iex_Unop: {
1973
1974       switch (e->Iex.Unop.op) {
1975          case Iop_16Uto64: {
1976             /* This probably doesn't occur often enough to be worth
1977                rolling the extension into the load. */
1978             IRExpr* arg = e->Iex.Unop.arg;
1979             HReg    src = iselIntExpr_R(env, arg);
1980             HReg    dst = widen_z_16_to_64(env, src);
1981             return dst;
1982          }
1983          case Iop_32Uto64: {
1984             IRExpr* arg = e->Iex.Unop.arg;
1985             if (arg->tag == Iex_Load) {
1986                /* This correctly zero extends because _LdSt32 is
1987                   defined to do a zero extending load. */
1988                HReg dst = newVRegI(env);
1989                ARM64AMode* am
1990                   = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1991                addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1992                return dst;
1993             }
1994             /* else be lame and mask it  */
1995             HReg src  = iselIntExpr_R(env, arg);
1996             HReg dst  = widen_z_32_to_64(env, src);
1997             return dst;
1998          }
1999          case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
2000          case Iop_8Uto64: {
2001             IRExpr* arg = e->Iex.Unop.arg;
2002             if (arg->tag == Iex_Load) {
2003                /* This correctly zero extends because _LdSt8 is
2004                   defined to do a zero extending load. */
2005                HReg dst = newVRegI(env);
2006                ARM64AMode* am
2007                   = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
2008                addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2009                return dst;
2010             }
2011             /* else be lame and mask it  */
2012             HReg src = iselIntExpr_R(env, arg);
2013             HReg dst = widen_z_8_to_64(env, src);
2014             return dst;
2015          }
2016          case Iop_128HIto64: {
2017             HReg rHi, rLo;
2018             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2019             return rHi; /* and abandon rLo */
2020          }
2021          case Iop_128to64: {
2022             HReg rHi, rLo;
2023             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2024             return rLo; /* and abandon rHi */
2025          }
2026          case Iop_8Sto32: case Iop_8Sto64: {
2027             IRExpr* arg = e->Iex.Unop.arg;
2028             HReg    src = iselIntExpr_R(env, arg);
2029             HReg    dst = widen_s_8_to_64(env, src);
2030             return dst;
2031          }
2032          case Iop_16Sto32: case Iop_16Sto64: {
2033             IRExpr* arg = e->Iex.Unop.arg;
2034             HReg    src = iselIntExpr_R(env, arg);
2035             HReg    dst = widen_s_16_to_64(env, src);
2036             return dst;
2037          }
2038          case Iop_32Sto64: {
2039             IRExpr* arg = e->Iex.Unop.arg;
2040             HReg    src = iselIntExpr_R(env, arg);
2041             HReg    dst = widen_s_32_to_64(env, src);
2042             return dst;
2043          }
2044          case Iop_Not32:
2045          case Iop_Not64: {
2046             HReg dst = newVRegI(env);
2047             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2048             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
2049             return dst;
2050          }
2051          case Iop_Clz64: {
2052             HReg dst = newVRegI(env);
2053             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2054             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
2055             return dst;
2056          }
2057          case Iop_Left32:
2058          case Iop_Left64: {
2059             /* Left64(src) = src | -src.  Left32 can use the same
2060                implementation since in that case we don't care what
2061                the upper 32 bits become. */
2062             HReg dst = newVRegI(env);
2063             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2064             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2065             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2066                                            ARM64lo_OR));
2067             return dst;
2068          }
2069          case Iop_CmpwNEZ64: {
2070            /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2071                              = Left64(src) >>s 63 */
2072             HReg dst = newVRegI(env);
2073             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2074             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2075             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2076                                            ARM64lo_OR));
2077             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2078                                            ARM64sh_SAR));
2079             return dst;
2080          }
2081          case Iop_CmpwNEZ32: {
2082             /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2083                               = Left64(src & 0xFFFFFFFF) >>s 63 */
2084             HReg dst = newVRegI(env);
2085             HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
2086             HReg src = widen_z_32_to_64(env, pre);
2087             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2088             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2089                                            ARM64lo_OR));
2090             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2091                                            ARM64sh_SAR));
2092             return dst;
2093          }
2094          case Iop_V128to64: case Iop_V128HIto64: {
2095             HReg dst    = newVRegI(env);
2096             HReg src    = iselV128Expr(env, e->Iex.Unop.arg);
2097             UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
2098             addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
2099             return dst;
2100          }
2101          case Iop_ReinterpF64asI64: {
2102             HReg dst = newVRegI(env);
2103             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2104             addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
2105             return dst;
2106          }
2107          case Iop_ReinterpF32asI32: {
2108             HReg dst = newVRegI(env);
2109             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2110             addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
2111             return dst;
2112          }
2113          case Iop_1Sto16:
2114          case Iop_1Sto32:
2115          case Iop_1Sto64: {
2116             /* As with the iselStmt case for 'tmp:I1 = expr', we could
2117                do a lot better here if it ever became necessary.  (CSDEC?) */
2118             HReg zero = hregARM64_XZR_XSP(); // XZR in this context
2119             HReg one  = newVRegI(env);
2120             HReg dst  = newVRegI(env);
2121             addInstr(env, ARM64Instr_Imm64(one,  1));
2122             ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
2123             addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2124             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2125                                            ARM64sh_SHL));
2126             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2127                                            ARM64sh_SAR));
2128             return dst;
2129          }
2130          case Iop_NarrowUn16to8x8:
2131          case Iop_NarrowUn32to16x4:
2132          case Iop_NarrowUn64to32x2:
2133          case Iop_QNarrowUn16Sto8Sx8:
2134          case Iop_QNarrowUn32Sto16Sx4:
2135          case Iop_QNarrowUn64Sto32Sx2:
2136          case Iop_QNarrowUn16Uto8Ux8:
2137          case Iop_QNarrowUn32Uto16Ux4:
2138          case Iop_QNarrowUn64Uto32Ux2:
2139          case Iop_QNarrowUn16Sto8Ux8:
2140          case Iop_QNarrowUn32Sto16Ux4:
2141          case Iop_QNarrowUn64Sto32Ux2:
2142          {
2143             HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2144             HReg tmp = newVRegV(env);
2145             HReg dst = newVRegI(env);
2146             UInt dszBlg2 = 3; /* illegal */
2147             ARM64VecNarrowOp op = ARM64vecna_INVALID;
2148             switch (e->Iex.Unop.op) {
2149                case Iop_NarrowUn16to8x8:
2150                   dszBlg2 = 0; op = ARM64vecna_XTN; break;
2151                case Iop_NarrowUn32to16x4:
2152                   dszBlg2 = 1; op = ARM64vecna_XTN; break;
2153                case Iop_NarrowUn64to32x2:
2154                   dszBlg2 = 2; op = ARM64vecna_XTN; break;
2155                case Iop_QNarrowUn16Sto8Sx8:
2156                   dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
2157                case Iop_QNarrowUn32Sto16Sx4:
2158                   dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
2159                case Iop_QNarrowUn64Sto32Sx2:
2160                   dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
2161                case Iop_QNarrowUn16Uto8Ux8:
2162                   dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
2163                case Iop_QNarrowUn32Uto16Ux4:
2164                   dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
2165                case Iop_QNarrowUn64Uto32Ux2:
2166                   dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
2167                case Iop_QNarrowUn16Sto8Ux8:
2168                   dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
2169                case Iop_QNarrowUn32Sto16Ux4:
2170                   dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
2171                case Iop_QNarrowUn64Sto32Ux2:
2172                   dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
2173                default:
2174                   vassert(0);
2175             }
2176             addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
2177             addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
2178             return dst;
2179          }
2180          case Iop_1Uto64: {
2181             /* 1Uto64(tmp). */
2182             HReg dst = newVRegI(env);
2183             if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
2184                ARM64RIL* one = mb_mkARM64RIL_I(1);
2185                HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
2186                vassert(one);
2187                addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
2188             } else {
2189                /* CLONE-01 */
2190                HReg zero = hregARM64_XZR_XSP(); // XZR in this context
2191                HReg one  = newVRegI(env);
2192                addInstr(env, ARM64Instr_Imm64(one,  1));
2193                ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
2194                addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2195             }
2196             return dst;
2197          }
2198          case Iop_64HIto32: {
2199             HReg dst = newVRegI(env);
2200             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2201             addInstr(env, ARM64Instr_Shift(dst, src, ARM64RI6_I6(32),
2202                                            ARM64sh_SHR));
2203             return dst;
2204          }
2205          case Iop_64to32:
2206          case Iop_64to16:
2207          case Iop_64to8:
2208          case Iop_32to16:
2209             /* These are no-ops. */
2210             return iselIntExpr_R(env, e->Iex.Unop.arg);
2211          default:
2212             break;
2213       }
2214
2215       break;
2216    }
2217
2218    /* --------- GET --------- */
2219    case Iex_Get: {
2220       if (ty == Ity_I64
2221           && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
2222          HReg        dst = newVRegI(env);
2223          ARM64AMode* am
2224             = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2225          addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2226          return dst;
2227       }
2228       if (ty == Ity_I32
2229           && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
2230          HReg        dst = newVRegI(env);
2231          ARM64AMode* am
2232             = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2233          addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2234          return dst;
2235       }
2236       if (ty == Ity_I16
2237           && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2238          HReg        dst = newVRegI(env);
2239          ARM64AMode* am
2240             = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2241          addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2242          return dst;
2243       }
2244       if (ty == Ity_I8
2245           /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2246          HReg        dst = newVRegI(env);
2247          ARM64AMode* am
2248             = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2249          addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2250          return dst;
2251       }
2252       break;
2253    }
2254
2255    /* --------- CCALL --------- */
2256    case Iex_CCall: {
2257       HReg    dst = newVRegI(env);
2258       vassert(ty == e->Iex.CCall.retty);
2259
2260       /* be very restrictive for now.  Only 64-bit ints allowed for
2261          args, and 64 bits for return type.  Don't forget to change
2262          the RetLoc if more types are allowed in future. */
2263       if (e->Iex.CCall.retty != Ity_I64)
2264          goto irreducible;
2265
2266       /* Marshal args, do the call, clear stack. */
2267       UInt   addToSp = 0;
2268       RetLoc rloc    = mk_RetLoc_INVALID();
2269       Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2270                                      e->Iex.CCall.cee, e->Iex.CCall.retty,
2271                                      e->Iex.CCall.args );
2272       /* */
2273       if (ok) {
2274          vassert(is_sane_RetLoc(rloc));
2275          vassert(rloc.pri == RLPri_Int);
2276          vassert(addToSp == 0);
2277          addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2278          return dst;
2279       }
2280       goto irreducible;
2281    }
2282
2283    /* --------- LITERAL --------- */
2284    /* 64-bit literals */
2285    case Iex_Const: {
2286       ULong u   = 0;
2287       HReg  dst = newVRegI(env);
2288       switch (e->Iex.Const.con->tag) {
2289          case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2290          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2291          case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2292          case Ico_U8:  u = e->Iex.Const.con->Ico.U8;  break;
2293          default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2294       }
2295       addInstr(env, ARM64Instr_Imm64(dst, u));
2296       return dst;
2297    }
2298
2299    /* --------- MULTIPLEX --------- */
2300    case Iex_ITE: {
2301       /* ITE(ccexpr, iftrue, iffalse) */
2302       if (ty == Ity_I64 || ty == Ity_I32) {
2303          ARM64CondCode cc;
2304          HReg r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2305          HReg r0  = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2306          HReg dst = newVRegI(env);
2307          cc = iselCondCode_C(env, e->Iex.ITE.cond);
2308          addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2309          return dst;
2310       }
2311       break;
2312    }
2313
2314    default:
2315    break;
2316    } /* switch (e->tag) */
2317
2318    /* We get here if no pattern matched. */
2319   irreducible:
2320    ppIRExpr(e);
2321    vpanic("iselIntExpr_R: cannot reduce tree");
2322 }
2323
2324
2325 /*---------------------------------------------------------*/
2326 /*--- ISEL: Integer expressions (128 bit)               ---*/
2327 /*---------------------------------------------------------*/
2328
2329 /* Compute a 128-bit value into a register pair, which is returned as
2330    the first two parameters.  As with iselIntExpr_R, these may be
2331    either real or virtual regs; in any case they must not be changed
2332    by subsequent code emitted by the caller.  */
2333
2334 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2335                              ISelEnv* env, IRExpr* e )
2336 {
2337    iselInt128Expr_wrk(rHi, rLo, env, e);
2338 #  if 0
2339    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2340 #  endif
2341    vassert(hregClass(*rHi) == HRcInt64);
2342    vassert(hregIsVirtual(*rHi));
2343    vassert(hregClass(*rLo) == HRcInt64);
2344    vassert(hregIsVirtual(*rLo));
2345 }
2346
2347 /* DO NOT CALL THIS DIRECTLY ! */
2348 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2349                                  ISelEnv* env, IRExpr* e )
2350 {
2351    vassert(e);
2352    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2353
2354    /* --------- TEMP --------- */
2355    if (e->tag == Iex_RdTmp) {
2356       lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp);
2357       return;
2358    }
2359
2360    /* --------- CONST --------- */
2361    if (e->tag == Iex_Const) {
2362       IRConst* c = e->Iex.Const.con;
2363       vassert(c->tag == Ico_U128);
2364       if (c->Ico.U128 == 0) {
2365          // The only case we need to handle (so far)
2366          HReg zero = newVRegI(env);
2367          addInstr(env, ARM64Instr_Imm64(zero, 0));
2368          *rHi = *rLo = zero;
2369          return;
2370       }
2371    }
2372
2373    /* --------- UNARY ops --------- */
2374    if (e->tag == Iex_Unop) {
2375       switch (e->Iex.Unop.op) {
2376          case Iop_ReinterpV128asI128: {
2377             HReg dstHi = newVRegI(env);
2378             HReg dstLo = newVRegI(env);
2379             HReg src    = iselV128Expr(env, e->Iex.Unop.arg);
2380             addInstr(env, ARM64Instr_VXfromQ(dstHi, src, 1));
2381             addInstr(env, ARM64Instr_VXfromQ(dstLo, src, 0));
2382             *rHi = dstHi;
2383             *rLo = dstLo;
2384             return;
2385          }
2386          default:
2387             break;
2388       }
2389    }
2390
2391    /* --------- BINARY ops --------- */
2392    if (e->tag == Iex_Binop) {
2393       switch (e->Iex.Binop.op) {
2394          /* 64 x 64 -> 128 multiply */
2395          case Iop_MullU64:
2396          case Iop_MullS64: {
2397             Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2398             HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
2399             HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
2400             HReg dstLo = newVRegI(env);
2401             HReg dstHi = newVRegI(env);
2402             addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2403                                          ARM64mul_PLAIN));
2404             addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2405                                          syned ? ARM64mul_SX : ARM64mul_ZX));
2406             *rHi = dstHi;
2407             *rLo = dstLo;
2408             return;
2409          }
2410          /* 64HLto128(e1,e2) */
2411          case Iop_64HLto128:
2412             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2413             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2414             return;
2415          default:
2416             break;
2417       }
2418    } /* if (e->tag == Iex_Binop) */
2419
2420    ppIRExpr(e);
2421    vpanic("iselInt128Expr(arm64)");
2422 }
2423
2424
2425 /*---------------------------------------------------------*/
2426 /*--- ISEL: Vector expressions (128 bit)                ---*/
2427 /*---------------------------------------------------------*/
2428
2429 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2430 {
2431    HReg r = iselV128Expr_wrk( env, e );
2432    vassert(hregClass(r) == HRcVec128);
2433    vassert(hregIsVirtual(r));
2434    return r;
2435 }
2436
2437 /* DO NOT CALL THIS DIRECTLY */
2438 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2439 {
2440    IRType ty = typeOfIRExpr(env->type_env, e);
2441    vassert(e);
2442    vassert(ty == Ity_V128);
2443
2444    if (e->tag == Iex_RdTmp) {
2445       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2446    }
2447
2448    if (e->tag == Iex_Const) {
2449       /* Only a very limited range of constants is handled. */
2450       vassert(e->Iex.Const.con->tag == Ico_V128);
2451       UShort con = e->Iex.Const.con->Ico.V128;
2452       HReg   res = newVRegV(env);
2453       switch (con) {
2454          case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2455             addInstr(env, ARM64Instr_VImmQ(res, con));
2456             return res;
2457          case 0x00F0:
2458             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2459             addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2460             return res;
2461          case 0x0F00:
2462             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2463             addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2464             return res;
2465          case 0x0FF0:
2466             addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2467             addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2468             return res;
2469          case 0x0FFF:
2470             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2471             addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2472             addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2473             return res;
2474          case 0xF000:
2475             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2476             addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2477             return res;
2478          case 0xFF00:
2479             addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2480             addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2481             return res;
2482          default:
2483             break;
2484       }
2485       /* Unhandled */
2486       goto v128_expr_bad;
2487    }
2488
2489    if (e->tag == Iex_Load) {
2490       HReg res = newVRegV(env);
2491       HReg rN  = iselIntExpr_R(env, e->Iex.Load.addr);
2492       vassert(ty == Ity_V128);
2493       addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2494       return res;
2495    }
2496
2497    if (e->tag == Iex_Get) {
2498       UInt offs = (UInt)e->Iex.Get.offset;
2499       if (offs < (1<<12)) {
2500          HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2501          HReg res  = newVRegV(env);
2502          vassert(ty == Ity_V128);
2503          addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2504          return res;
2505       }
2506       goto v128_expr_bad;
2507    }
2508
2509    if (e->tag == Iex_Unop) {
2510
2511       /* Iop_ZeroHIXXofV128 cases */
2512       UShort imm16 = 0;
2513       switch (e->Iex.Unop.op) {
2514          case Iop_ZeroHI64ofV128:  imm16 = 0x00FF; break;
2515          case Iop_ZeroHI96ofV128:  imm16 = 0x000F; break;
2516          case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2517          case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2518          default: break;
2519       }
2520       if (imm16 != 0) {
2521          HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2522          HReg imm = newVRegV(env);
2523          HReg res = newVRegV(env);
2524          addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2525          addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2526          return res;
2527       }
2528
2529       /* Other cases */
2530       switch (e->Iex.Unop.op) {
2531          case Iop_NotV128:
2532          case Iop_Abs64Fx2: case Iop_Abs32Fx4: case Iop_Abs16Fx8:
2533          case Iop_Neg64Fx2: case Iop_Neg32Fx4: case Iop_Neg16Fx8:
2534          case Iop_Abs64x2:  case Iop_Abs32x4:
2535          case Iop_Abs16x8:  case Iop_Abs8x16:
2536          case Iop_Cls32x4:  case Iop_Cls16x8:  case Iop_Cls8x16:
2537          case Iop_Clz32x4:  case Iop_Clz16x8:  case Iop_Clz8x16:
2538          case Iop_Cnt8x16:
2539          case Iop_Reverse1sIn8_x16:
2540          case Iop_Reverse8sIn16_x8:
2541          case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2542          case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2543          case Iop_Reverse32sIn64_x2:
2544          case Iop_RecipEst32Ux4:
2545          case Iop_RSqrtEst32Ux4:
2546          case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2547          case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2548          {
2549             HReg res   = newVRegV(env);
2550             HReg arg   = iselV128Expr(env, e->Iex.Unop.arg);
2551             Bool setRM = False;
2552             ARM64VecUnaryOp op = ARM64vecu_INVALID;
2553             switch (e->Iex.Unop.op) {
2554                case Iop_NotV128:           op = ARM64vecu_NOT;         break;
2555                case Iop_Abs64Fx2:          op = ARM64vecu_FABS64x2;    break;
2556                case Iop_Abs32Fx4:          op = ARM64vecu_FABS32x4;    break;
2557                case Iop_Abs16Fx8:          op = ARM64vecu_FABS16x8;    break;
2558                case Iop_Neg64Fx2:          op = ARM64vecu_FNEG64x2;    break;
2559                case Iop_Neg32Fx4:          op = ARM64vecu_FNEG32x4;    break;
2560                case Iop_Neg16Fx8:          op = ARM64vecu_FNEG16x8;    break;
2561                case Iop_Abs64x2:           op = ARM64vecu_ABS64x2;     break;
2562                case Iop_Abs32x4:           op = ARM64vecu_ABS32x4;     break;
2563                case Iop_Abs16x8:           op = ARM64vecu_ABS16x8;     break;
2564                case Iop_Abs8x16:           op = ARM64vecu_ABS8x16;     break;
2565                case Iop_Cls32x4:           op = ARM64vecu_CLS32x4;     break;
2566                case Iop_Cls16x8:           op = ARM64vecu_CLS16x8;     break;
2567                case Iop_Cls8x16:           op = ARM64vecu_CLS8x16;     break;
2568                case Iop_Clz32x4:           op = ARM64vecu_CLZ32x4;     break;
2569                case Iop_Clz16x8:           op = ARM64vecu_CLZ16x8;     break;
2570                case Iop_Clz8x16:           op = ARM64vecu_CLZ8x16;     break;
2571                case Iop_Cnt8x16:           op = ARM64vecu_CNT8x16;     break;
2572                case Iop_Reverse1sIn8_x16:  op = ARM64vecu_RBIT;        break;
2573                case Iop_Reverse8sIn16_x8:  op = ARM64vecu_REV1616B;    break;
2574                case Iop_Reverse8sIn32_x4:  op = ARM64vecu_REV3216B;    break;
2575                case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H;     break;
2576                case Iop_Reverse8sIn64_x2:  op = ARM64vecu_REV6416B;    break;
2577                case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H;     break;
2578                case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S;     break;
2579                case Iop_RecipEst32Ux4:     op = ARM64vecu_URECPE32x4;  break;
2580                case Iop_RSqrtEst32Ux4:     op = ARM64vecu_URSQRTE32x4; break;
2581                case Iop_RecipEst64Fx2:     setRM = True;
2582                                            op = ARM64vecu_FRECPE64x2;  break;
2583                case Iop_RecipEst32Fx4:     setRM = True;
2584                                            op = ARM64vecu_FRECPE32x4;  break;
2585                case Iop_RSqrtEst64Fx2:     setRM = True;
2586                                            op = ARM64vecu_FRSQRTE64x2; break;
2587                case Iop_RSqrtEst32Fx4:     setRM = True;
2588                                            op = ARM64vecu_FRSQRTE32x4; break;
2589                default: vassert(0);
2590             }
2591             if (setRM) {
2592                // This is a bit of a kludge.  We should do rm properly for
2593                // these recip-est insns, but that would require changing the
2594                // primop's type to take an rmode.
2595                set_FPCR_rounding_mode(env, IRExpr_Const(
2596                                               IRConst_U32(Irrm_NEAREST)));
2597             }
2598             addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2599             return res;
2600          }
2601          case Iop_CmpNEZ8x16:
2602          case Iop_CmpNEZ16x8:
2603          case Iop_CmpNEZ32x4:
2604          case Iop_CmpNEZ64x2: {
2605             HReg arg  = iselV128Expr(env, e->Iex.Unop.arg);
2606             HReg zero = newVRegV(env);
2607             HReg res  = newVRegV(env);
2608             ARM64VecBinOp cmp = ARM64vecb_INVALID;
2609             switch (e->Iex.Unop.op) {
2610                case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2611                case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2612                case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2613                case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2614                default: vassert(0);
2615             }
2616             // This is pretty feeble.  Better: use CMP against zero
2617             // and avoid the extra instruction and extra register.
2618             addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2619             addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2620             addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2621             return res;
2622          }
2623          case Iop_V256toV128_0:
2624          case Iop_V256toV128_1: {
2625             HReg vHi, vLo;
2626             iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2627             return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2628          }
2629          case Iop_64UtoV128: {
2630             HReg res = newVRegV(env);
2631             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2632             addInstr(env, ARM64Instr_VQfromX(res, arg));
2633             return res;
2634          }
2635          case Iop_Widen8Sto16x8: {
2636             HReg res = newVRegV(env);
2637             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2638             addInstr(env, ARM64Instr_VQfromX(res, arg));
2639             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2640             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2641                                                 res, res, 8));
2642             return res;
2643          }
2644          case Iop_Widen16Sto32x4: {
2645             HReg res = newVRegV(env);
2646             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2647             addInstr(env, ARM64Instr_VQfromX(res, arg));
2648             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2649             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2650                                                 res, res, 16));
2651             return res;
2652          }
2653          case Iop_Widen32Sto64x2: {
2654             HReg res = newVRegV(env);
2655             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2656             addInstr(env, ARM64Instr_VQfromX(res, arg));
2657             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2658             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2659                                                 res, res, 32));
2660             return res;
2661          }
2662          /* ... */
2663          default:
2664             break;
2665       } /* switch on the unop */
2666    } /* if (e->tag == Iex_Unop) */
2667
2668    if (e->tag == Iex_Binop) {
2669       switch (e->Iex.Binop.op) {
2670          case Iop_Sqrt16Fx8:
2671          case Iop_Sqrt32Fx4:
2672          case Iop_Sqrt64Fx2: {
2673             HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2674             HReg res = newVRegV(env);
2675             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2676             ARM64VecUnaryOp op;
2677             switch (e->Iex.Binop.op) {
2678                case Iop_Sqrt16Fx8: op = ARM64vecu_FSQRT16x8; break;
2679                case Iop_Sqrt32Fx4: op = ARM64vecu_FSQRT32x4; break;
2680                case Iop_Sqrt64Fx2: op = ARM64vecu_FSQRT64x2; break;
2681                default: vassert(0);
2682             }
2683             addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2684             return res;
2685          }
2686          case Iop_64HLtoV128: {
2687             HReg res  = newVRegV(env);
2688             HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2689             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2690             addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2691             return res;
2692          }
2693          /* -- Cases where we can generate a simple three-reg instruction. -- */
2694          case Iop_AndV128:
2695          case Iop_OrV128:
2696          case Iop_XorV128:
2697          case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2698          case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2699          case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2700          case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2701          case Iop_Add64x2: case Iop_Add32x4:
2702          case Iop_Add16x8: case Iop_Add8x16:
2703          case Iop_Sub64x2: case Iop_Sub32x4:
2704          case Iop_Sub16x8: case Iop_Sub8x16:
2705          case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2706          case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2707          case Iop_CmpEQ16x8:  case Iop_CmpEQ8x16:
2708          case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2709          case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2710          case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2711          case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2712          case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2713          case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2714          case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2715          case Iop_CmpLT16Fx8: case Iop_CmpLE16Fx8: case Iop_CmpEQ16Fx8:
2716          case Iop_Perm8x16:
2717          case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2718          case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2719          case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2720          case Iop_CatOddLanes16x8:  case Iop_CatOddLanes8x16:
2721          case Iop_InterleaveHI32x4:
2722          case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2723          case Iop_InterleaveLO32x4:
2724          case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2725          case Iop_PolynomialMul8x16:
2726          case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2727          case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2728          case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2729          case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2730          case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2731          case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2732          case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2733          case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2734          case Iop_QDMulHi32Sx4:  case Iop_QDMulHi16Sx8:
2735          case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2736          case Iop_Sh8Sx16:  case Iop_Sh16Sx8:
2737          case Iop_Sh32Sx4:  case Iop_Sh64Sx2:
2738          case Iop_Sh8Ux16:  case Iop_Sh16Ux8:
2739          case Iop_Sh32Ux4:  case Iop_Sh64Ux2:
2740          case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2741          case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2742          case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2743          case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2744          case Iop_Max64Fx2: case Iop_Max32Fx4:
2745          case Iop_Min64Fx2: case Iop_Min32Fx4:
2746          case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2747          case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2748          {
2749             HReg res   = newVRegV(env);
2750             HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
2751             HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
2752             Bool sw    = False;
2753             Bool setRM = False;
2754             ARM64VecBinOp op = ARM64vecb_INVALID;
2755             switch (e->Iex.Binop.op) {
2756                case Iop_AndV128:    op = ARM64vecb_AND; break;
2757                case Iop_OrV128:     op = ARM64vecb_ORR; break;
2758                case Iop_XorV128:    op = ARM64vecb_XOR; break;
2759                case Iop_Max32Ux4:   op = ARM64vecb_UMAX32x4; break;
2760                case Iop_Max16Ux8:   op = ARM64vecb_UMAX16x8; break;
2761                case Iop_Max8Ux16:   op = ARM64vecb_UMAX8x16; break;
2762                case Iop_Min32Ux4:   op = ARM64vecb_UMIN32x4; break;
2763                case Iop_Min16Ux8:   op = ARM64vecb_UMIN16x8; break;
2764                case Iop_Min8Ux16:   op = ARM64vecb_UMIN8x16; break;
2765                case Iop_Max32Sx4:   op = ARM64vecb_SMAX32x4; break;
2766                case Iop_Max16Sx8:   op = ARM64vecb_SMAX16x8; break;
2767                case Iop_Max8Sx16:   op = ARM64vecb_SMAX8x16; break;
2768                case Iop_Min32Sx4:   op = ARM64vecb_SMIN32x4; break;
2769                case Iop_Min16Sx8:   op = ARM64vecb_SMIN16x8; break;
2770                case Iop_Min8Sx16:   op = ARM64vecb_SMIN8x16; break;
2771                case Iop_Add64x2:    op = ARM64vecb_ADD64x2; break;
2772                case Iop_Add32x4:    op = ARM64vecb_ADD32x4; break;
2773                case Iop_Add16x8:    op = ARM64vecb_ADD16x8; break;
2774                case Iop_Add8x16:    op = ARM64vecb_ADD8x16; break;
2775                case Iop_Sub64x2:    op = ARM64vecb_SUB64x2; break;
2776                case Iop_Sub32x4:    op = ARM64vecb_SUB32x4; break;
2777                case Iop_Sub16x8:    op = ARM64vecb_SUB16x8; break;
2778                case Iop_Sub8x16:    op = ARM64vecb_SUB8x16; break;
2779                case Iop_Mul32x4:    op = ARM64vecb_MUL32x4; break;
2780                case Iop_Mul16x8:    op = ARM64vecb_MUL16x8; break;
2781                case Iop_Mul8x16:    op = ARM64vecb_MUL8x16; break;
2782                case Iop_CmpEQ64x2:  op = ARM64vecb_CMEQ64x2; break;
2783                case Iop_CmpEQ32x4:  op = ARM64vecb_CMEQ32x4; break;
2784                case Iop_CmpEQ16x8:  op = ARM64vecb_CMEQ16x8; break;
2785                case Iop_CmpEQ8x16:  op = ARM64vecb_CMEQ8x16; break;
2786                case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2787                case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2788                case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2789                case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2790                case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2791                case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2792                case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2793                case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2794                case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2795                case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2796                case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2797                case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2798                case Iop_CmpLE16Fx8: op = ARM64vecb_FCMGE16x8; sw = True; break;
2799                case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2800                case Iop_CmpLT16Fx8: op = ARM64vecb_FCMGT16x8; sw = True; break;
2801                case Iop_CmpEQ16Fx8: op = ARM64vecb_FCMEQ16x8; sw = True; break;
2802                case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2803                case Iop_Perm8x16:   op = ARM64vecb_TBL1; break;
2804                case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2805                                           break;
2806                case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2807                                           break;
2808                case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2809                                           break;
2810                case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2811                                           break;
2812                case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2813                                           break;
2814                case Iop_CatOddLanes32x4:  op = ARM64vecb_UZP232x4; sw = True;
2815                                           break;
2816                case Iop_CatOddLanes16x8:  op = ARM64vecb_UZP216x8; sw = True;
2817                                           break;
2818                case Iop_CatOddLanes8x16:  op = ARM64vecb_UZP28x16; sw = True;
2819                                           break;
2820                case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2821                                           break;
2822                case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2823                                           break;
2824                case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2825                                           break;
2826                case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2827                                           break;
2828                case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2829                                           break;
2830                case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2831                                           break;
2832                case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2833                case Iop_QAdd64Sx2:      op = ARM64vecb_SQADD64x2; break;
2834                case Iop_QAdd32Sx4:      op = ARM64vecb_SQADD32x4; break;
2835                case Iop_QAdd16Sx8:      op = ARM64vecb_SQADD16x8; break;
2836                case Iop_QAdd8Sx16:      op = ARM64vecb_SQADD8x16; break;
2837                case Iop_QAdd64Ux2:      op = ARM64vecb_UQADD64x2; break;
2838                case Iop_QAdd32Ux4:      op = ARM64vecb_UQADD32x4; break;
2839                case Iop_QAdd16Ux8:      op = ARM64vecb_UQADD16x8; break;
2840                case Iop_QAdd8Ux16:      op = ARM64vecb_UQADD8x16; break;
2841                case Iop_QSub64Sx2:      op = ARM64vecb_SQSUB64x2; break;
2842                case Iop_QSub32Sx4:      op = ARM64vecb_SQSUB32x4; break;
2843                case Iop_QSub16Sx8:      op = ARM64vecb_SQSUB16x8; break;
2844                case Iop_QSub8Sx16:      op = ARM64vecb_SQSUB8x16; break;
2845                case Iop_QSub64Ux2:      op = ARM64vecb_UQSUB64x2; break;
2846                case Iop_QSub32Ux4:      op = ARM64vecb_UQSUB32x4; break;
2847                case Iop_QSub16Ux8:      op = ARM64vecb_UQSUB16x8; break;
2848                case Iop_QSub8Ux16:      op = ARM64vecb_UQSUB8x16; break;
2849                case Iop_QDMulHi32Sx4:   op = ARM64vecb_SQDMULH32x4; break;
2850                case Iop_QDMulHi16Sx8:   op = ARM64vecb_SQDMULH16x8; break;
2851                case Iop_QRDMulHi32Sx4:  op = ARM64vecb_SQRDMULH32x4; break;
2852                case Iop_QRDMulHi16Sx8:  op = ARM64vecb_SQRDMULH16x8; break;
2853                case Iop_Sh8Sx16:        op = ARM64vecb_SSHL8x16; break;
2854                case Iop_Sh16Sx8:        op = ARM64vecb_SSHL16x8; break;
2855                case Iop_Sh32Sx4:        op = ARM64vecb_SSHL32x4; break;
2856                case Iop_Sh64Sx2:        op = ARM64vecb_SSHL64x2; break;
2857                case Iop_Sh8Ux16:        op = ARM64vecb_USHL8x16; break;
2858                case Iop_Sh16Ux8:        op = ARM64vecb_USHL16x8; break;
2859                case Iop_Sh32Ux4:        op = ARM64vecb_USHL32x4; break;
2860                case Iop_Sh64Ux2:        op = ARM64vecb_USHL64x2; break;
2861                case Iop_Rsh8Sx16:       op = ARM64vecb_SRSHL8x16; break;
2862                case Iop_Rsh16Sx8:       op = ARM64vecb_SRSHL16x8; break;
2863                case Iop_Rsh32Sx4:       op = ARM64vecb_SRSHL32x4; break;
2864                case Iop_Rsh64Sx2:       op = ARM64vecb_SRSHL64x2; break;
2865                case Iop_Rsh8Ux16:       op = ARM64vecb_URSHL8x16; break;
2866                case Iop_Rsh16Ux8:       op = ARM64vecb_URSHL16x8; break;
2867                case Iop_Rsh32Ux4:       op = ARM64vecb_URSHL32x4; break;
2868                case Iop_Rsh64Ux2:       op = ARM64vecb_URSHL64x2; break;
2869                case Iop_Max64Fx2:       op = ARM64vecb_FMAX64x2; break;
2870                case Iop_Max32Fx4:       op = ARM64vecb_FMAX32x4; break;
2871                case Iop_Min64Fx2:       op = ARM64vecb_FMIN64x2; break;
2872                case Iop_Min32Fx4:       op = ARM64vecb_FMIN32x4; break;
2873                case Iop_RecipStep64Fx2: setRM = True;
2874                                         op = ARM64vecb_FRECPS64x2; break;
2875                case Iop_RecipStep32Fx4: setRM = True;
2876                                         op = ARM64vecb_FRECPS32x4; break;
2877                case Iop_RSqrtStep64Fx2: setRM = True;
2878                                         op = ARM64vecb_FRSQRTS64x2; break;
2879                case Iop_RSqrtStep32Fx4: setRM = True;
2880                                         op = ARM64vecb_FRSQRTS32x4; break;
2881                default: vassert(0);
2882             }
2883             if (setRM) {
2884                // This is a bit of a kludge.  We should do rm properly for
2885                // these recip-step insns, but that would require changing the
2886                // primop's type to take an rmode.
2887                set_FPCR_rounding_mode(env, IRExpr_Const(
2888                                               IRConst_U32(Irrm_NEAREST)));
2889             }
2890             if (sw) {
2891                addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2892             } else {
2893                addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2894             }
2895             return res;
2896          }
2897          /* -- These only have 2 operand instructions, so we have to first move
2898             the first argument into a new register, for modification. -- */
2899          case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2900          case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2901          case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2902          case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2903          {
2904             HReg res  = newVRegV(env);
2905             HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2906             HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2907             ARM64VecModifyOp op = ARM64vecmo_INVALID;
2908             switch (e->Iex.Binop.op) {
2909                /* In the following 8 cases, the US - SU switching is intended.
2910                   See comments on the libvex_ir.h for details.  Also in the
2911                   ARM64 front end, where used these primops are generated. */
2912                case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2913                case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2914                case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2915                case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2916                case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2917                case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2918                case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2919                case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2920                default: vassert(0);
2921             }
2922             /* The order of the operands is important.  Although this is
2923                basically addition, the two operands are extended differently,
2924                making it important to get them into the correct registers in
2925                the instruction. */
2926             addInstr(env, ARM64Instr_VMov(16, res, argR));
2927             addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2928             return res;
2929          }
2930          /* -- Shifts by an immediate. -- */
2931          case Iop_ShrN64x2: case Iop_ShrN32x4:
2932          case Iop_ShrN16x8: case Iop_ShrN8x16:
2933          case Iop_SarN64x2: case Iop_SarN32x4:
2934          case Iop_SarN16x8: case Iop_SarN8x16:
2935          case Iop_ShlN64x2: case Iop_ShlN32x4:
2936          case Iop_ShlN16x8: case Iop_ShlN8x16:
2937          case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2938          case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2939          case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2940          case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2941          case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2942          case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2943          {
2944             IRExpr* argL = e->Iex.Binop.arg1;
2945             IRExpr* argR = e->Iex.Binop.arg2;
2946             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2947                UInt amt   = argR->Iex.Const.con->Ico.U8;
2948                UInt limLo = 0;
2949                UInt limHi = 0;
2950                ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2951                /* Establish the instruction to use. */
2952                switch (e->Iex.Binop.op) {
2953                   case Iop_ShrN64x2:       op = ARM64vecshi_USHR64x2;   break;
2954                   case Iop_ShrN32x4:       op = ARM64vecshi_USHR32x4;   break;
2955                   case Iop_ShrN16x8:       op = ARM64vecshi_USHR16x8;   break;
2956                   case Iop_ShrN8x16:       op = ARM64vecshi_USHR8x16;   break;
2957                   case Iop_SarN64x2:       op = ARM64vecshi_SSHR64x2;   break;
2958                   case Iop_SarN32x4:       op = ARM64vecshi_SSHR32x4;   break;
2959                   case Iop_SarN16x8:       op = ARM64vecshi_SSHR16x8;   break;
2960                   case Iop_SarN8x16:       op = ARM64vecshi_SSHR8x16;   break;
2961                   case Iop_ShlN64x2:       op = ARM64vecshi_SHL64x2;    break;
2962                   case Iop_ShlN32x4:       op = ARM64vecshi_SHL32x4;    break;
2963                   case Iop_ShlN16x8:       op = ARM64vecshi_SHL16x8;    break;
2964                   case Iop_ShlN8x16:       op = ARM64vecshi_SHL8x16;    break;
2965                   case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2;  break;
2966                   case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4;  break;
2967                   case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8;  break;
2968                   case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16;  break;
2969                   case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2;  break;
2970                   case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4;  break;
2971                   case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8;  break;
2972                   case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16;  break;
2973                   case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2974                   case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2975                   case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2976                   case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2977                   default: vassert(0);
2978                }
2979                /* Establish the shift limits, for sanity check purposes only. */
2980                switch (e->Iex.Binop.op) {
2981                   case Iop_ShrN64x2:       limLo = 1; limHi = 64; break;
2982                   case Iop_ShrN32x4:       limLo = 1; limHi = 32; break;
2983                   case Iop_ShrN16x8:       limLo = 1; limHi = 16; break;
2984                   case Iop_ShrN8x16:       limLo = 1; limHi = 8;  break;
2985                   case Iop_SarN64x2:       limLo = 1; limHi = 64; break;
2986                   case Iop_SarN32x4:       limLo = 1; limHi = 32; break;
2987                   case Iop_SarN16x8:       limLo = 1; limHi = 16; break;
2988                   case Iop_SarN8x16:       limLo = 1; limHi = 8;  break;
2989                   case Iop_ShlN64x2:       limLo = 0; limHi = 63; break;
2990                   case Iop_ShlN32x4:       limLo = 0; limHi = 31; break;
2991                   case Iop_ShlN16x8:       limLo = 0; limHi = 15; break;
2992                   case Iop_ShlN8x16:       limLo = 0; limHi = 7;  break;
2993                   case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2994                   case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2995                   case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2996                   case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7;  break;
2997                   case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2998                   case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2999                   case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
3000                   case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7;  break;
3001                   case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
3002                   case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
3003                   case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
3004                   case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7;  break;
3005                   default: vassert(0);
3006                }
3007                /* For left shifts, the allowable amt values are
3008                   0 .. lane_bits-1.  For right shifts the allowable
3009                   values are 1 .. lane_bits. */
3010                if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
3011                   HReg src = iselV128Expr(env, argL);
3012                   HReg dst = newVRegV(env);
3013                   addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
3014                   return dst;
3015                }
3016                /* Special case some no-op shifts that the arm64 front end
3017                   throws at us.  We can't generate any instructions for these,
3018                   but we don't need to either. */
3019                switch (e->Iex.Binop.op) {
3020                   case Iop_ShrN64x2: case Iop_ShrN32x4:
3021                   case Iop_ShrN16x8: case Iop_ShrN8x16:
3022                      if (amt == 0) {
3023                         return iselV128Expr(env, argL);
3024                      }
3025                      break;
3026                   default:
3027                      break;
3028                }
3029                /* otherwise unhandled */
3030             }
3031             /* else fall out; this is unhandled */
3032             break;
3033          }
3034          /* -- Saturating narrowing by an immediate -- */
3035          /* uu */
3036          case Iop_QandQShrNnarrow16Uto8Ux8:
3037          case Iop_QandQShrNnarrow32Uto16Ux4:
3038          case Iop_QandQShrNnarrow64Uto32Ux2:
3039          /* ss */
3040          case Iop_QandQSarNnarrow16Sto8Sx8:
3041          case Iop_QandQSarNnarrow32Sto16Sx4:
3042          case Iop_QandQSarNnarrow64Sto32Sx2:
3043          /* su */
3044          case Iop_QandQSarNnarrow16Sto8Ux8:
3045          case Iop_QandQSarNnarrow32Sto16Ux4:
3046          case Iop_QandQSarNnarrow64Sto32Ux2:
3047          /* ruu */
3048          case Iop_QandQRShrNnarrow16Uto8Ux8:
3049          case Iop_QandQRShrNnarrow32Uto16Ux4:
3050          case Iop_QandQRShrNnarrow64Uto32Ux2:
3051          /* rss */
3052          case Iop_QandQRSarNnarrow16Sto8Sx8:
3053          case Iop_QandQRSarNnarrow32Sto16Sx4:
3054          case Iop_QandQRSarNnarrow64Sto32Sx2:
3055          /* rsu */
3056          case Iop_QandQRSarNnarrow16Sto8Ux8:
3057          case Iop_QandQRSarNnarrow32Sto16Ux4:
3058          case Iop_QandQRSarNnarrow64Sto32Ux2:
3059          {
3060             IRExpr* argL = e->Iex.Binop.arg1;
3061             IRExpr* argR = e->Iex.Binop.arg2;
3062             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
3063                UInt amt   = argR->Iex.Const.con->Ico.U8;
3064                UInt limit = 0;
3065                ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
3066                switch (e->Iex.Binop.op) {
3067                   /* uu */
3068                   case Iop_QandQShrNnarrow64Uto32Ux2:
3069                      op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
3070                   case Iop_QandQShrNnarrow32Uto16Ux4:
3071                      op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
3072                   case Iop_QandQShrNnarrow16Uto8Ux8:
3073                      op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
3074                   /* ss */
3075                   case Iop_QandQSarNnarrow64Sto32Sx2:
3076                      op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
3077                   case Iop_QandQSarNnarrow32Sto16Sx4:
3078                      op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
3079                   case Iop_QandQSarNnarrow16Sto8Sx8:
3080                      op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
3081                   /* su */
3082                   case Iop_QandQSarNnarrow64Sto32Ux2:
3083                      op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
3084                   case Iop_QandQSarNnarrow32Sto16Ux4:
3085                      op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
3086                   case Iop_QandQSarNnarrow16Sto8Ux8:
3087                      op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
3088                   /* ruu */
3089                   case Iop_QandQRShrNnarrow64Uto32Ux2:
3090                      op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
3091                   case Iop_QandQRShrNnarrow32Uto16Ux4:
3092                      op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
3093                   case Iop_QandQRShrNnarrow16Uto8Ux8:
3094                      op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
3095                   /* rss */
3096                   case Iop_QandQRSarNnarrow64Sto32Sx2:
3097                      op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
3098                   case Iop_QandQRSarNnarrow32Sto16Sx4:
3099                      op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
3100                   case Iop_QandQRSarNnarrow16Sto8Sx8:
3101                      op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
3102                   /* rsu */
3103                   case Iop_QandQRSarNnarrow64Sto32Ux2:
3104                      op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
3105                   case Iop_QandQRSarNnarrow32Sto16Ux4:
3106                      op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
3107                   case Iop_QandQRSarNnarrow16Sto8Ux8:
3108                      op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
3109                   /**/
3110                   default:
3111                      vassert(0);
3112                }
3113                if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
3114                   HReg src  = iselV128Expr(env, argL);
3115                   HReg dst  = newVRegV(env);
3116                   HReg fpsr = newVRegI(env);
3117                   /* Clear FPSR.Q, do the operation, and return both its
3118                      result and the new value of FPSR.Q.  We can simply
3119                      zero out FPSR since all the other bits have no relevance
3120                      in VEX generated code. */
3121                   addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3122                   addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3123                   addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
3124                   addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3125                   addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3126                                                              ARM64sh_SHR));
3127                   ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3128                   vassert(ril_one);
3129                   addInstr(env, ARM64Instr_Logic(fpsr,
3130                                                  fpsr, ril_one, ARM64lo_AND));
3131                   /* Now we have: the main (shift) result in the bottom half
3132                      of |dst|, and the Q bit at the bottom of |fpsr|.
3133                      Combining them with a "InterleaveLO64x2" style operation
3134                      produces a 128 bit value, dst[63:0]:fpsr[63:0],
3135                      which is what we want. */
3136                   HReg scratch = newVRegV(env);
3137                   addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
3138                   addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
3139                                                  dst, dst, scratch));
3140                   return dst;
3141                }
3142             }
3143             /* else fall out; this is unhandled */
3144             break;
3145          }
3146
3147          // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
3148          // as it is in some ways more general and often leads to better
3149          // code overall.
3150          case Iop_ShlV128:
3151          case Iop_ShrV128: {
3152             Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
3153             /* This is tricky.  Generate an EXT instruction with zeroes in
3154                the high operand (shift right) or low operand (shift left).
3155                Note that we can only slice in the EXT instruction at a byte
3156                level of granularity, so the shift amount needs careful
3157                checking. */
3158             IRExpr* argL = e->Iex.Binop.arg1;
3159             IRExpr* argR = e->Iex.Binop.arg2;
3160             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
3161                UInt amt   = argR->Iex.Const.con->Ico.U8;
3162                Bool amtOK = False;
3163                switch (amt) {
3164                   case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
3165                   case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
3166                   case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
3167                      amtOK = True; break;
3168                }
3169                /* We could also deal with amt==0 by copying the source to
3170                   the destination, but there's no need for that so far. */
3171                if (amtOK) {
3172                   HReg src  = iselV128Expr(env, argL);
3173                   HReg srcZ = newVRegV(env);
3174                   addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
3175                   UInt immB = amt / 8;
3176                   vassert(immB >= 1 && immB <= 15);
3177                   HReg dst = newVRegV(env);
3178                   if (isSHR) {
3179                     addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
3180                                                          immB));
3181                   } else {
3182                     addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
3183                                                          16 - immB));
3184                   }
3185                   return dst;
3186                }
3187             }
3188             /* else fall out; this is unhandled */
3189             break;
3190          }
3191
3192          case Iop_PolynomialMull8x8:
3193          case Iop_Mull32Ux2:
3194          case Iop_Mull16Ux4:
3195          case Iop_Mull8Ux8:
3196          case Iop_Mull32Sx2:
3197          case Iop_Mull16Sx4:
3198          case Iop_Mull8Sx8:
3199          case Iop_QDMull32Sx2:
3200          case Iop_QDMull16Sx4:
3201          {
3202             HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
3203             HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3204             HReg vSrcL = newVRegV(env);
3205             HReg vSrcR = newVRegV(env);
3206             HReg dst   = newVRegV(env);
3207             ARM64VecBinOp op = ARM64vecb_INVALID;
3208             switch (e->Iex.Binop.op) {
3209                case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8;    break;
3210                case Iop_Mull32Ux2:         op = ARM64vecb_UMULL2DSS;   break;
3211                case Iop_Mull16Ux4:         op = ARM64vecb_UMULL4SHH;   break;
3212                case Iop_Mull8Ux8:          op = ARM64vecb_UMULL8HBB;   break;
3213                case Iop_Mull32Sx2:         op = ARM64vecb_SMULL2DSS;   break;
3214                case Iop_Mull16Sx4:         op = ARM64vecb_SMULL4SHH;   break;
3215                case Iop_Mull8Sx8:          op = ARM64vecb_SMULL8HBB;   break;
3216                case Iop_QDMull32Sx2:       op = ARM64vecb_SQDMULL2DSS; break;
3217                case Iop_QDMull16Sx4:       op = ARM64vecb_SQDMULL4SHH; break;
3218                default: vassert(0);
3219             }
3220             addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
3221             addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
3222             addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
3223             return dst;
3224          }
3225
3226          /* ... */
3227          default:
3228             break;
3229       } /* switch on the binop */
3230    } /* if (e->tag == Iex_Binop) */
3231
3232    if (e->tag == Iex_Triop) {
3233       IRTriop*      triop  = e->Iex.Triop.details;
3234       ARM64VecBinOp vecbop = ARM64vecb_INVALID;
3235       switch (triop->op) {
3236          case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
3237          case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
3238          case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
3239          case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
3240          case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
3241          case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
3242          case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
3243          case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
3244          case Iop_Add16Fx8: vecbop = ARM64vecb_FADD16x8; break;
3245          case Iop_Sub16Fx8: vecbop = ARM64vecb_FSUB16x8; break;
3246          default: break;
3247       }
3248       if (vecbop != ARM64vecb_INVALID) {
3249          HReg argL = iselV128Expr(env, triop->arg2);
3250          HReg argR = iselV128Expr(env, triop->arg3);
3251          HReg dst  = newVRegV(env);
3252          set_FPCR_rounding_mode(env, triop->arg1);
3253          addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
3254          return dst;
3255       }
3256
3257       if (triop->op == Iop_SliceV128) {
3258          /* Note that, compared to ShlV128/ShrV128 just above, the shift
3259             amount here is in bytes, not bits. */
3260          IRExpr* argHi  = triop->arg1;
3261          IRExpr* argLo  = triop->arg2;
3262          IRExpr* argAmt = triop->arg3;
3263          if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
3264             UInt amt   = argAmt->Iex.Const.con->Ico.U8;
3265             Bool amtOK = amt >= 1 && amt <= 15;
3266             /* We could also deal with amt==0 by copying argLO to
3267                the destination, but there's no need for that so far. */
3268             if (amtOK) {
3269                HReg srcHi = iselV128Expr(env, argHi);
3270                HReg srcLo = iselV128Expr(env, argLo);
3271                HReg dst = newVRegV(env);
3272               addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
3273                return dst;
3274             }
3275          }
3276          /* else fall out; this is unhandled */
3277       }
3278
3279    } /* if (e->tag == Iex_Triop) */
3280
3281    if (e->tag == Iex_ITE) {
3282       // This code sequence is pretty feeble.  We'd do better to generate BSL
3283       // here.
3284       HReg rX = newVRegI(env);
3285
3286       ARM64CondCode cc = iselCondCode_C(env, e->Iex.ITE.cond);
3287       addInstr(env, ARM64Instr_Set64(rX, cc));
3288       // cond: rX = 1   !cond: rX = 0
3289
3290       // Mask the Set64 result.  This is paranoia (should be unnecessary).
3291       ARM64RIL* one = mb_mkARM64RIL_I(1);
3292       vassert(one);
3293       addInstr(env, ARM64Instr_Logic(rX, rX, one, ARM64lo_AND));
3294       // cond: rX = 1   !cond: rX = 0
3295
3296       // Propagate to all bits in the 64 bit word by subtracting 1 from it.
3297       // This also inverts the sense of the value.
3298       addInstr(env, ARM64Instr_Arith(rX, rX, ARM64RIA_I12(1,0),
3299                                      /*isAdd=*/False));
3300       // cond: rX = 0-(62)-0   !cond: rX = 1-(62)-1
3301
3302       // Duplicate rX into a vector register
3303       HReg vMask = newVRegV(env);
3304       addInstr(env, ARM64Instr_VQfromXX(vMask, rX, rX));
3305       // cond: vMask = 0-(126)-0   !cond: vMask = 1-(126)-1
3306
3307       HReg vIfTrue = iselV128Expr(env, e->Iex.ITE.iftrue);
3308       HReg vIfFalse = iselV128Expr(env, e->Iex.ITE.iffalse);
3309
3310       // Mask out iffalse value as needed
3311       addInstr(env,
3312                ARM64Instr_VBinV(ARM64vecb_AND, vIfFalse, vIfFalse, vMask));
3313
3314       // Invert the mask so we can use it for the iftrue value
3315       addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, vMask, vMask));
3316       // cond: vMask = 1-(126)-1   !cond: vMask = 0-(126)-0
3317
3318       // Mask out iftrue value as needed
3319       addInstr(env,
3320                ARM64Instr_VBinV(ARM64vecb_AND, vIfTrue, vIfTrue, vMask));
3321
3322       // Merge the masked iftrue and iffalse results.
3323       HReg res = newVRegV(env);
3324       addInstr(env, ARM64Instr_VBinV(ARM64vecb_ORR, res, vIfTrue, vIfFalse));
3325
3326       return res;
3327    }
3328
3329   v128_expr_bad:
3330    ppIRExpr(e);
3331    vpanic("iselV128Expr_wrk");
3332 }
3333
3334
3335 /*---------------------------------------------------------*/
3336 /*--- ISEL: Floating point expressions (64 bit)         ---*/
3337 /*---------------------------------------------------------*/
3338
3339 /* Compute a 64-bit floating point value into a register, the identity
3340    of which is returned.  As with iselIntExpr_R, the reg may be either
3341    real or virtual; in any case it must not be changed by subsequent
3342    code emitted by the caller.  */
3343
3344 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
3345 {
3346    HReg r = iselDblExpr_wrk( env, e );
3347 #  if 0
3348    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3349 #  endif
3350    vassert(hregClass(r) == HRcFlt64);
3351    vassert(hregIsVirtual(r));
3352    return r;
3353 }
3354
3355 /* DO NOT CALL THIS DIRECTLY */
3356 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3357 {
3358    IRType ty = typeOfIRExpr(env->type_env,e);
3359    vassert(e);
3360    vassert(ty == Ity_F64);
3361
3362    if (e->tag == Iex_RdTmp) {
3363       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3364    }
3365
3366    if (e->tag == Iex_Const) {
3367       IRConst* con = e->Iex.Const.con;
3368       if (con->tag == Ico_F64i) {
3369          HReg src = newVRegI(env);
3370          HReg dst = newVRegD(env);
3371          addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3372          addInstr(env, ARM64Instr_VDfromX(dst, src));
3373          return dst;
3374       }
3375       if (con->tag == Ico_F64) {
3376          HReg src = newVRegI(env);
3377          HReg dst = newVRegD(env);
3378          union { Double d64; ULong u64; } u;
3379          vassert(sizeof(u) == 8);
3380          u.d64 = con->Ico.F64;
3381          addInstr(env, ARM64Instr_Imm64(src, u.u64));
3382          addInstr(env, ARM64Instr_VDfromX(dst, src));
3383          return dst;
3384       }
3385    }
3386
3387    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3388       vassert(e->Iex.Load.ty == Ity_F64);
3389       HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3390       HReg res  = newVRegD(env);
3391       addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3392       return res;
3393    }
3394
3395    if (e->tag == Iex_Get) {
3396       Int offs = e->Iex.Get.offset;
3397       if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3398          HReg rD = newVRegD(env);
3399          HReg rN = get_baseblock_register();
3400          addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3401          return rD;
3402       }
3403    }
3404
3405    if (e->tag == Iex_Unop) {
3406       switch (e->Iex.Unop.op) {
3407          case Iop_NegF64: {
3408             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3409             HReg dst = newVRegD(env);
3410             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3411             return dst;
3412          }
3413          case Iop_AbsF64: {
3414             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3415             HReg dst = newVRegD(env);
3416             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3417             return dst;
3418          }
3419          case Iop_F32toF64: {
3420             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3421             HReg dst = newVRegD(env);
3422             addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3423             return dst;
3424          }
3425          case Iop_F16toF64: {
3426             HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3427             HReg dst = newVRegD(env);
3428             addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3429             return dst;
3430          }
3431          case Iop_I32UtoF64:
3432          case Iop_I32StoF64: {
3433             /* Rounding mode is not involved here, since the
3434                conversion can always be done without loss of
3435                precision. */
3436             HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
3437             HReg dst   = newVRegD(env);
3438             Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3439             ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3440             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3441             return dst;
3442          }
3443          case Iop_RoundF64toIntA0: {
3444             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3445             HReg dst = newVRegD(env);
3446             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINTA0, dst, src));
3447             return dst;
3448          }
3449          case Iop_RoundF64toIntE: {
3450             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3451             HReg dst = newVRegD(env);
3452             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINTE, dst, src));
3453             return dst;
3454          }
3455          default:
3456             break;
3457       }
3458    }
3459
3460    if (e->tag == Iex_Binop) {
3461       switch (e->Iex.Binop.op) {
3462          case Iop_RoundF64toInt:
3463          case Iop_SqrtF64:
3464          case Iop_RecpExpF64: {
3465             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3466             HReg dst = newVRegD(env);
3467             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3468             ARM64FpUnaryOp op = ARM64fpu_INVALID;
3469             switch (e->Iex.Binop.op) {
3470                case Iop_RoundF64toInt: op = ARM64fpu_RINT;  break;
3471                case Iop_SqrtF64:       op = ARM64fpu_SQRT;  break;
3472                case Iop_RecpExpF64:    op = ARM64fpu_RECPX; break;
3473                default: vassert(0);
3474             }
3475             addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3476             return dst;
3477          }
3478          case Iop_I64StoF64:
3479          case Iop_I64UtoF64: {
3480             ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3481                                    ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3482             HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3483             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3484             HReg dstS = newVRegD(env);
3485             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3486             return dstS;
3487          }
3488          default:
3489             break;
3490       }
3491    }
3492
3493    if (e->tag == Iex_Triop) {
3494       IRTriop*     triop = e->Iex.Triop.details;
3495       ARM64FpBinOp dblop = ARM64fpb_INVALID;
3496       switch (triop->op) {
3497          case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3498          case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3499          case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3500          case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3501          default: break;
3502       }
3503       if (dblop != ARM64fpb_INVALID) {
3504          HReg argL = iselDblExpr(env, triop->arg2);
3505          HReg argR = iselDblExpr(env, triop->arg3);
3506          HReg dst  = newVRegD(env);
3507          set_FPCR_rounding_mode(env, triop->arg1);
3508          addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3509          return dst;
3510       }
3511    }
3512
3513    if (e->tag == Iex_Qop) {
3514       IRQop*       qop = e->Iex.Qop.details;
3515       ARM64FpTriOp triop = ARM64fpt_INVALID;
3516       switch (qop->op) {
3517          case Iop_MAddF64: triop = ARM64fpt_FMADD; break;
3518          case Iop_MSubF64: triop = ARM64fpt_FMSUB; break;
3519          default: break;
3520       }
3521       if (triop != ARM64fpt_INVALID) {
3522          HReg N = iselDblExpr(env, qop->arg2);
3523          HReg M = iselDblExpr(env, qop->arg3);
3524          HReg A = iselDblExpr(env, qop->arg4);
3525          HReg dst  = newVRegD(env);
3526          set_FPCR_rounding_mode(env, qop->arg1);
3527          addInstr(env, ARM64Instr_VTriD(triop, dst, N, M, A));
3528          return dst;
3529       }
3530    }
3531
3532    if (e->tag == Iex_ITE) {
3533       /* ITE(ccexpr, iftrue, iffalse) */
3534       ARM64CondCode cc;
3535       HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
3536       HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
3537       HReg dst = newVRegD(env);
3538       cc = iselCondCode_C(env, e->Iex.ITE.cond);
3539       addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3540       return dst;
3541    }
3542
3543    ppIRExpr(e);
3544    vpanic("iselDblExpr_wrk");
3545 }
3546
3547
3548 /*---------------------------------------------------------*/
3549 /*--- ISEL: Floating point expressions (32 bit)         ---*/
3550 /*---------------------------------------------------------*/
3551
3552 /* Compute a 32-bit floating point value into a register, the identity
3553    of which is returned.  As with iselIntExpr_R, the reg may be either
3554    real or virtual; in any case it must not be changed by subsequent
3555    code emitted by the caller.  Values are generated into HRcFlt64
3556    registers despite the values themselves being Ity_F32s. */
3557
3558 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3559 {
3560    HReg r = iselFltExpr_wrk( env, e );
3561 #  if 0
3562    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3563 #  endif
3564    vassert(hregClass(r) == HRcFlt64);
3565    vassert(hregIsVirtual(r));
3566    return r;
3567 }
3568
3569 /* DO NOT CALL THIS DIRECTLY */
3570 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3571 {
3572    IRType ty = typeOfIRExpr(env->type_env,e);
3573    vassert(e);
3574    vassert(ty == Ity_F32);
3575
3576    if (e->tag == Iex_RdTmp) {
3577       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3578    }
3579
3580    if (e->tag == Iex_Const) {
3581       /* This is something of a kludge.  Since a 32 bit floating point
3582          zero is just .. all zeroes, just create a 64 bit zero word
3583          and transfer it.  This avoids having to create a SfromW
3584          instruction for this specific case. */
3585       IRConst* con = e->Iex.Const.con;
3586       if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3587          HReg src = newVRegI(env);
3588          HReg dst = newVRegD(env);
3589          addInstr(env, ARM64Instr_Imm64(src, 0));
3590          addInstr(env, ARM64Instr_VDfromX(dst, src));
3591          return dst;
3592       }
3593       if (con->tag == Ico_F32) {
3594          HReg src = newVRegI(env);
3595          HReg dst = newVRegD(env);
3596          union { Float f32; UInt u32; } u;
3597          vassert(sizeof(u) == 4);
3598          u.f32 = con->Ico.F32;
3599          addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3600          addInstr(env, ARM64Instr_VDfromX(dst, src));
3601          return dst;
3602       }
3603    }
3604
3605    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3606       vassert(e->Iex.Load.ty == Ity_F32);
3607       HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3608       HReg res  = newVRegD(env);
3609       addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3610       return res;
3611    }
3612
3613    if (e->tag == Iex_Get) {
3614       Int offs = e->Iex.Get.offset;
3615       if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3616          HReg rD = newVRegD(env);
3617          HReg rN = get_baseblock_register();
3618          addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3619          return rD;
3620       }
3621    }
3622
3623    if (e->tag == Iex_Unop) {
3624       switch (e->Iex.Unop.op) {
3625          case Iop_NegF32: {
3626             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3627             HReg dst = newVRegD(env);
3628             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3629             return dst;
3630          }
3631          case Iop_AbsF32: {
3632             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3633             HReg dst = newVRegD(env);
3634             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3635             return dst;
3636          }
3637          case Iop_F16toF32: {
3638             HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3639             HReg dst = newVRegD(env);
3640             addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3641             return dst;
3642          }
3643          case Iop_RoundF32toIntA0: {
3644             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3645             HReg dst = newVRegD(env);
3646             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINTA0, dst, src));
3647             return dst;
3648          }
3649          case Iop_RoundF32toIntE: {
3650             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3651             HReg dst = newVRegD(env);
3652             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINTE, dst, src));
3653             return dst;
3654          }
3655          default:
3656             break;
3657       }
3658    }
3659
3660    if (e->tag == Iex_Binop) {
3661       switch (e->Iex.Binop.op) {
3662          case Iop_RoundF32toInt:
3663          case Iop_SqrtF32:
3664          case Iop_RecpExpF32: {
3665             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3666             HReg dst = newVRegD(env);
3667             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3668             ARM64FpUnaryOp op = ARM64fpu_INVALID;
3669             switch (e->Iex.Binop.op) {
3670                case Iop_RoundF32toInt: op = ARM64fpu_RINT;  break;
3671                case Iop_SqrtF32:       op = ARM64fpu_SQRT;  break;
3672                case Iop_RecpExpF32:    op = ARM64fpu_RECPX; break;
3673                default: vassert(0);
3674             }
3675             addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3676             return dst;
3677          }
3678          case Iop_F64toF32: {
3679             HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3680             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3681             HReg dstS = newVRegD(env);
3682             addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3683             return dstS;
3684          }
3685          case Iop_I32UtoF32:
3686          case Iop_I32StoF32:
3687          case Iop_I64UtoF32:
3688          case Iop_I64StoF32: {
3689             ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3690             switch (e->Iex.Binop.op) {
3691                case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3692                case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3693                case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3694                case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3695                default: vassert(0);
3696             }
3697             HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3698             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3699             HReg dstS = newVRegD(env);
3700             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3701             return dstS;
3702          }
3703          default:
3704             break;
3705       }
3706    }
3707
3708    if (e->tag == Iex_Triop) {
3709       IRTriop*     triop = e->Iex.Triop.details;
3710       ARM64FpBinOp sglop = ARM64fpb_INVALID;
3711       switch (triop->op) {
3712          case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3713          case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3714          case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3715          case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3716          default: break;
3717       }
3718       if (sglop != ARM64fpb_INVALID) {
3719          HReg argL = iselFltExpr(env, triop->arg2);
3720          HReg argR = iselFltExpr(env, triop->arg3);
3721          HReg dst  = newVRegD(env);
3722          set_FPCR_rounding_mode(env, triop->arg1);
3723          addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3724          return dst;
3725       }
3726    }
3727
3728    if (e->tag == Iex_ITE) {
3729       /* ITE(ccexpr, iftrue, iffalse) */
3730       ARM64CondCode cc;
3731       HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
3732       HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
3733       HReg dst = newVRegD(env);
3734       cc = iselCondCode_C(env, e->Iex.ITE.cond);
3735       addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3736       return dst;
3737    }
3738
3739    if (e->tag == Iex_Qop) {
3740       IRQop*       qop = e->Iex.Qop.details;
3741       ARM64FpTriOp triop = ARM64fpt_INVALID;
3742       switch (qop->op) {
3743          case Iop_MAddF32: triop = ARM64fpt_FMADD; break;
3744          case Iop_MSubF32: triop = ARM64fpt_FMSUB; break;
3745       default: break;
3746       }
3747
3748       if (triop != ARM64fpt_INVALID) {
3749          HReg N = iselFltExpr(env, qop->arg2);
3750          HReg M = iselFltExpr(env, qop->arg3);
3751          HReg A = iselFltExpr(env, qop->arg4);
3752          HReg dst  = newVRegD(env);
3753          set_FPCR_rounding_mode(env, qop->arg1);
3754          addInstr(env, ARM64Instr_VTriS(triop, dst, N, M, A));
3755          return dst;
3756       }
3757    }
3758
3759    ppIRExpr(e);
3760    vpanic("iselFltExpr_wrk");
3761 }
3762
3763
3764 /*---------------------------------------------------------*/
3765 /*--- ISEL: Floating point expressions (16 bit)         ---*/
3766 /*---------------------------------------------------------*/
3767
3768 /* Compute a 16-bit floating point value into a register, the identity
3769    of which is returned.  As with iselIntExpr_R, the reg may be either
3770    real or virtual; in any case it must not be changed by subsequent
3771    code emitted by the caller.  Values are generated into HRcFlt64
3772    registers despite the values themselves being Ity_F16s. */
3773
3774 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3775 {
3776    HReg r = iselF16Expr_wrk( env, e );
3777 #  if 0
3778    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3779 #  endif
3780    vassert(hregClass(r) == HRcFlt64);
3781    vassert(hregIsVirtual(r));
3782    return r;
3783 }
3784
3785 /* DO NOT CALL THIS DIRECTLY */
3786 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3787 {
3788    IRType ty = typeOfIRExpr(env->type_env,e);
3789    vassert(e);
3790    vassert(ty == Ity_F16);
3791
3792    if (e->tag == Iex_RdTmp) {
3793       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3794    }
3795
3796    if (e->tag == Iex_Get) {
3797       Int offs = e->Iex.Get.offset;
3798       if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3799          HReg rD = newVRegD(env);
3800          HReg rN = get_baseblock_register();
3801          addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3802          return rD;
3803       }
3804    }
3805
3806    if (e->tag == Iex_Unop) {
3807       switch (e->Iex.Unop.op) {
3808          case Iop_NegF16: {
3809             HReg srcH = iselF16Expr(env, e->Iex.Unop.arg);
3810             HReg dstH = newVRegD(env);
3811             addInstr(env, ARM64Instr_VUnaryH(ARM64fpu_NEG, dstH, srcH));
3812             return dstH;
3813          }
3814          case Iop_AbsF16: {
3815             HReg srcH = iselF16Expr(env, e->Iex.Unop.arg);
3816             HReg dstH = newVRegD(env);
3817             addInstr(env, ARM64Instr_VUnaryH(ARM64fpu_ABS, dstH, srcH));
3818             return dstH;
3819          }
3820          default:
3821             break;
3822       }
3823    }
3824
3825    if (e->tag == Iex_Binop) {
3826       switch (e->Iex.Binop.op) {
3827          case Iop_F32toF16: {
3828             HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3829             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3830             HReg dstH = newVRegD(env);
3831             addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3832             return dstH;
3833          }
3834          case Iop_F64toF16: {
3835             HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3836             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3837             HReg dstH = newVRegD(env);
3838             addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3839             return dstH;
3840          }
3841          case Iop_SqrtF16: {
3842             HReg src = iselF16Expr(env, e->Iex.Binop.arg2);
3843             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3844             HReg dst = newVRegD(env);
3845             addInstr(env, ARM64Instr_VUnaryH(ARM64fpu_SQRT, dst, src));
3846             return dst;
3847          }
3848          default:
3849             break;
3850       }
3851    }
3852
3853    if (e->tag == Iex_Triop) {
3854       IRTriop*     triop = e->Iex.Triop.details;
3855       ARM64FpBinOp sglop = ARM64fpb_INVALID;
3856       switch (triop->op) {
3857          case Iop_AddF16: sglop = ARM64fpb_ADD; break;
3858          case Iop_SubF16: sglop = ARM64fpb_SUB; break;
3859          default: break;
3860       }
3861       if (sglop != ARM64fpb_INVALID) {
3862          HReg argL = iselF16Expr(env, triop->arg2);
3863          HReg argR = iselF16Expr(env, triop->arg3);
3864          HReg dst  = newVRegD(env);
3865          set_FPCR_rounding_mode(env, triop->arg1);
3866          addInstr(env, ARM64Instr_VBinH(sglop, dst, argL, argR));
3867          return dst;
3868       }
3869    }
3870
3871    ppIRExpr(e);
3872    vpanic("iselF16Expr_wrk");
3873 }
3874
3875
3876 /*---------------------------------------------------------*/
3877 /*--- ISEL: Vector expressions (256 bit)                ---*/
3878 /*---------------------------------------------------------*/
3879
3880 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3881                            ISelEnv* env, IRExpr* e )
3882 {
3883    iselV256Expr_wrk( rHi, rLo, env, e );
3884    vassert(hregClass(*rHi) == HRcVec128);
3885    vassert(hregClass(*rLo) == HRcVec128);
3886    vassert(hregIsVirtual(*rHi));
3887    vassert(hregIsVirtual(*rLo));
3888 }
3889
3890 /* DO NOT CALL THIS DIRECTLY */
3891 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3892                                ISelEnv* env, IRExpr* e )
3893 {
3894    vassert(e);
3895    IRType ty = typeOfIRExpr(env->type_env,e);
3896    vassert(ty == Ity_V256);
3897
3898    /* read 256-bit IRTemp */
3899    if (e->tag == Iex_RdTmp) {
3900       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3901       return;
3902    }
3903
3904    if (e->tag == Iex_Binop) {
3905       switch (e->Iex.Binop.op) {
3906          case Iop_V128HLtoV256: {
3907             *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3908             *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3909             return;
3910          }
3911          case Iop_QandSQsh64x2:
3912          case Iop_QandSQsh32x4:
3913          case Iop_QandSQsh16x8:
3914          case Iop_QandSQsh8x16:
3915          case Iop_QandUQsh64x2:
3916          case Iop_QandUQsh32x4:
3917          case Iop_QandUQsh16x8:
3918          case Iop_QandUQsh8x16:
3919          case Iop_QandSQRsh64x2:
3920          case Iop_QandSQRsh32x4:
3921          case Iop_QandSQRsh16x8:
3922          case Iop_QandSQRsh8x16:
3923          case Iop_QandUQRsh64x2:
3924          case Iop_QandUQRsh32x4:
3925          case Iop_QandUQRsh16x8:
3926          case Iop_QandUQRsh8x16:
3927          {
3928             HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
3929             HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
3930             HReg fpsr  = newVRegI(env);
3931             HReg resHi = newVRegV(env);
3932             HReg resLo = newVRegV(env);
3933             ARM64VecBinOp op = ARM64vecb_INVALID;
3934             switch (e->Iex.Binop.op) {
3935                case Iop_QandSQsh64x2:  op = ARM64vecb_SQSHL64x2;  break;
3936                case Iop_QandSQsh32x4:  op = ARM64vecb_SQSHL32x4;  break;
3937                case Iop_QandSQsh16x8:  op = ARM64vecb_SQSHL16x8;  break;
3938                case Iop_QandSQsh8x16:  op = ARM64vecb_SQSHL8x16;  break;
3939                case Iop_QandUQsh64x2:  op = ARM64vecb_UQSHL64x2;  break;
3940                case Iop_QandUQsh32x4:  op = ARM64vecb_UQSHL32x4;  break;
3941                case Iop_QandUQsh16x8:  op = ARM64vecb_UQSHL16x8;  break;
3942                case Iop_QandUQsh8x16:  op = ARM64vecb_UQSHL8x16;  break;
3943                case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3944                case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3945                case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3946                case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3947                case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3948                case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3949                case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3950                case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3951                default: vassert(0);
3952             }
3953             /* Clear FPSR.Q, do the operation, and return both its result
3954                and the new value of FPSR.Q.  We can simply zero out FPSR
3955                since all the other bits have no relevance in VEX generated
3956                code. */
3957             addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3958             addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3959             addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3960             addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3961             addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3962                                                        ARM64sh_SHR));
3963             ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3964             vassert(ril_one);
3965             addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3966             /* Now we have: the main (shift) result in |resLo|, and the
3967                Q bit at the bottom of |fpsr|. */
3968             addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3969             *rHi = resHi;
3970             *rLo = resLo;
3971             return;
3972          }
3973
3974          /* ... */
3975          default:
3976             break;
3977       } /* switch on the binop */
3978    } /* if (e->tag == Iex_Binop) */
3979
3980    ppIRExpr(e);
3981    vpanic("iselV256Expr_wrk");
3982 }
3983
3984
3985 /*---------------------------------------------------------*/
3986 /*--- ISEL: Statements                                  ---*/
3987 /*---------------------------------------------------------*/
3988
3989 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3990 {
3991    if (vex_traceflags & VEX_TRACE_VCODE) {
3992       vex_printf("\n-- ");
3993       ppIRStmt(stmt);
3994       vex_printf("\n");
3995    }
3996    switch (stmt->tag) {
3997
3998    /* --------- STORE --------- */
3999    /* little-endian write to memory */
4000    case Ist_Store: {
4001       IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
4002       IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
4003       IREndness end  = stmt->Ist.Store.end;
4004
4005       if (tya != Ity_I64 || end != Iend_LE)
4006          goto stmt_fail;
4007
4008       if (tyd == Ity_I64) {
4009          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
4010          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
4011          addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
4012          return;
4013       }
4014       if (tyd == Ity_I32) {
4015          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
4016          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
4017          addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
4018          return;
4019       }
4020       if (tyd == Ity_I16) {
4021          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
4022          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
4023          addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
4024          return;
4025       }
4026       if (tyd == Ity_I8) {
4027          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
4028          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
4029          addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
4030          return;
4031       }
4032       if (tyd == Ity_V128) {
4033          HReg qD   = iselV128Expr(env, stmt->Ist.Store.data);
4034          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
4035          addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
4036          return;
4037       }
4038       if (tyd == Ity_F64) {
4039          HReg dD   = iselDblExpr(env, stmt->Ist.Store.data);
4040          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
4041          addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
4042          return;
4043       }
4044       if (tyd == Ity_F32) {
4045          HReg sD   = iselFltExpr(env, stmt->Ist.Store.data);
4046          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
4047          addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
4048          return;
4049       }
4050       break;
4051    }
4052
4053    /* --------- PUT --------- */
4054    /* write guest state, fixed offset */
4055    case Ist_Put: {
4056       IRType tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
4057       UInt   offs = (UInt)stmt->Ist.Put.offset;
4058       if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
4059          HReg rD = INVALID_HREG;
4060          if (isZeroU64(stmt->Ist.Put.data)) {
4061             // In this context, XZR_XSP denotes the zero register.
4062             rD = hregARM64_XZR_XSP();
4063          } else {
4064             rD = iselIntExpr_R(env, stmt->Ist.Put.data);
4065          }
4066          ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
4067          addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
4068          return;
4069       }
4070       if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
4071          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
4072          ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
4073          addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
4074          return;
4075       }
4076       if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
4077          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
4078          ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
4079          addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
4080          return;
4081       }
4082       if (tyd == Ity_I8 && offs < (1<<12)) {
4083          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
4084          ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
4085          addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
4086          return;
4087       }
4088       if (tyd == Ity_V128 && offs < (1<<12)) {
4089          HReg qD   = iselV128Expr(env, stmt->Ist.Put.data);
4090          HReg addr = mk_baseblock_128bit_access_addr(env, offs);
4091          addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
4092          return;
4093       }
4094       if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
4095          HReg dD   = iselDblExpr(env, stmt->Ist.Put.data);
4096          HReg bbp  = get_baseblock_register();
4097          addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
4098          return;
4099       }
4100       if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
4101          HReg sD   = iselFltExpr(env, stmt->Ist.Put.data);
4102          HReg bbp  = get_baseblock_register();
4103          addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
4104          return;
4105       }
4106       if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
4107          HReg hD   = iselF16Expr(env, stmt->Ist.Put.data);
4108          HReg bbp  = get_baseblock_register();
4109          addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
4110          return;
4111       }
4112
4113       break;
4114    }
4115
4116    /* --------- TMP --------- */
4117    /* assign value to temporary */
4118    case Ist_WrTmp: {
4119       IRTemp tmp = stmt->Ist.WrTmp.tmp;
4120       IRType ty  = typeOfIRTemp(env->type_env, tmp);
4121
4122       if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4123          /* We could do a lot better here.  But for the time being: */
4124          HReg dst = lookupIRTemp(env, tmp);
4125          HReg rD  = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
4126          addInstr(env, ARM64Instr_MovI(dst, rD));
4127          return;
4128       }
4129       if (ty == Ity_I1) {
4130          /* Here, we are generating a I1 value into a 64 bit register.
4131             Make sure the value in the register is only zero or one,
4132             but no other.  This allows optimisation of the
4133             1Uto64(tmp:I1) case, by making it simply a copy of the
4134             register holding 'tmp'.  The point being that the value in
4135             the register holding 'tmp' can only have been created
4136             here.  LATER: that seems dangerous; safer to do 'tmp & 1'
4137             in that case.  Also, could do this just with a single CINC
4138             insn. */
4139          /* CLONE-01 */
4140          HReg zero = hregARM64_XZR_XSP(); // XZR in this context
4141          HReg one  = newVRegI(env);
4142          HReg dst  = lookupIRTemp(env, tmp);
4143          addInstr(env, ARM64Instr_Imm64(one,  1));
4144          ARM64CondCode cc = iselCondCode_C(env, stmt->Ist.WrTmp.data);
4145          addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
4146          return;
4147       }
4148       if (ty == Ity_F64) {
4149          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4150          HReg dst = lookupIRTemp(env, tmp);
4151          addInstr(env, ARM64Instr_VMov(8, dst, src));
4152          return;
4153       }
4154       if (ty == Ity_F32) {
4155          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4156          HReg dst = lookupIRTemp(env, tmp);
4157          addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
4158          return;
4159       }
4160       if (ty == Ity_F16) {
4161          HReg src = iselF16Expr(env, stmt->Ist.WrTmp.data);
4162          HReg dst = lookupIRTemp(env, tmp);
4163          addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
4164          return;
4165       }
4166       if (ty == Ity_I128) {
4167          HReg rHi, rLo, dstHi, dstLo;
4168          iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4169          lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4170          addInstr(env, ARM64Instr_MovI(dstHi, rHi));
4171          addInstr(env, ARM64Instr_MovI(dstLo, rLo));
4172          return;
4173       }
4174       if (ty == Ity_V128) {
4175          HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
4176          HReg dst = lookupIRTemp(env, tmp);
4177          addInstr(env, ARM64Instr_VMov(16, dst, src));
4178          return;
4179       }
4180       if (ty == Ity_V256) {
4181          HReg srcHi, srcLo, dstHi, dstLo;
4182          iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
4183          lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4184          addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
4185          addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
4186          return;
4187       }
4188       break;
4189    }
4190
4191    /* --------- Call to DIRTY helper --------- */
4192    /* call complex ("dirty") helper function */
4193    case Ist_Dirty: {
4194       IRDirty* d = stmt->Ist.Dirty.details;
4195
4196       /* Figure out the return type, if any. */
4197       IRType retty = Ity_INVALID;
4198       if (d->tmp != IRTemp_INVALID)
4199          retty = typeOfIRTemp(env->type_env, d->tmp);
4200
4201       Bool retty_ok = False;
4202       switch (retty) {
4203          case Ity_INVALID: /* function doesn't return anything */
4204          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4205          case Ity_V128:
4206             retty_ok = True; break;
4207          default:
4208             break;
4209       }
4210       if (!retty_ok)
4211          break; /* will go to stmt_fail: */
4212
4213       /* Marshal args, do the call, and set the return value to 0x555..555
4214          if this is a conditional call that returns a value and the
4215          call is skipped. */
4216       UInt   addToSp = 0;
4217       RetLoc rloc    = mk_RetLoc_INVALID();
4218       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4219       vassert(is_sane_RetLoc(rloc));
4220
4221       /* Now figure out what to do with the returned value, if any. */
4222       switch (retty) {
4223          case Ity_INVALID: {
4224             /* No return value.  Nothing to do. */
4225             vassert(d->tmp == IRTemp_INVALID);
4226             vassert(rloc.pri == RLPri_None);
4227             vassert(addToSp == 0);
4228             return;
4229          }
4230          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
4231             vassert(rloc.pri == RLPri_Int);
4232             vassert(addToSp == 0);
4233             /* The returned value is in x0.  Park it in the register
4234                associated with tmp. */
4235             HReg dst = lookupIRTemp(env, d->tmp);
4236             addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
4237             return;
4238          }
4239          case Ity_V128: {
4240             /* The returned value is on the stack, and *retloc tells
4241                us where.  Fish it off the stack and then move the
4242                stack pointer upwards to clear it, as directed by
4243                doHelperCall. */
4244             vassert(rloc.pri == RLPri_V128SpRel);
4245             vassert(rloc.spOff < 256); // stay sane
4246             vassert(addToSp >= 16); // ditto
4247             vassert(addToSp < 256); // ditto
4248             HReg dst = lookupIRTemp(env, d->tmp);
4249             HReg tmp = newVRegI(env); // the address of the returned value
4250             addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
4251             addInstr(env, ARM64Instr_Arith(tmp, tmp,
4252                                            ARM64RIA_I12((UShort)rloc.spOff, 0),
4253                                            True/*isAdd*/ ));
4254             addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
4255             addInstr(env, ARM64Instr_AddToSP(addToSp));
4256             return;
4257          }
4258          default:
4259             /*NOTREACHED*/
4260             vassert(0);
4261       }
4262       break;
4263    }
4264
4265    /* --------- Load Linked and Store Conditional --------- */
4266    case Ist_LLSC: {
4267       if (stmt->Ist.LLSC.storedata == NULL) {
4268          /* LL */
4269          IRTemp res = stmt->Ist.LLSC.result;
4270          IRType ty  = typeOfIRTemp(env->type_env, res);
4271          if (ty == Ity_I128 || ty == Ity_I64 || ty == Ity_I32
4272              || ty == Ity_I16 || ty == Ity_I8) {
4273             Int  szB   = 0;
4274             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
4275             switch (ty) {
4276                case Ity_I8:   szB = 1;  break;
4277                case Ity_I16:  szB = 2;  break;
4278                case Ity_I32:  szB = 4;  break;
4279                case Ity_I64:  szB = 8;  break;
4280                case Ity_I128: szB = 16; break;
4281                default:       vassert(0);
4282             }
4283             if (szB == 16) {
4284                HReg r_dstMSword = INVALID_HREG;
4285                HReg r_dstLSword = INVALID_HREG;
4286                lookupIRTempPair(&r_dstMSword, &r_dstLSword, env, res);
4287                addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
4288                addInstr(env, ARM64Instr_LdrEXP());
4289                addInstr(env, ARM64Instr_MovI(r_dstLSword, hregARM64_X2()));
4290                addInstr(env, ARM64Instr_MovI(r_dstMSword, hregARM64_X3()));
4291             } else {
4292                vassert(szB != 0);
4293                HReg r_dst = lookupIRTemp(env, res);
4294                addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
4295                addInstr(env, ARM64Instr_LdrEX(szB));
4296                addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
4297             }
4298             return;
4299          }
4300          goto stmt_fail;
4301       } else {
4302          /* SC */
4303          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
4304          if (tyd == Ity_I128 || tyd == Ity_I64 || tyd == Ity_I32
4305              || tyd == Ity_I16 || tyd == Ity_I8) {
4306             Int  szB = 0;
4307             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
4308             switch (tyd) {
4309                case Ity_I8:   szB = 1; break;
4310                case Ity_I16:  szB = 2; break;
4311                case Ity_I32:  szB = 4; break;
4312                case Ity_I64:  szB = 8; break;
4313                case Ity_I128: szB = 16; break;
4314                default:       vassert(0);
4315             }
4316             if (szB == 16) {
4317                HReg rD_MSword = INVALID_HREG;
4318                HReg rD_LSword = INVALID_HREG;
4319                iselInt128Expr(&rD_MSword,
4320                               &rD_LSword, env, stmt->Ist.LLSC.storedata);
4321                addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD_LSword));
4322                addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rD_MSword));
4323                addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
4324                addInstr(env, ARM64Instr_StrEXP());
4325             } else {
4326                vassert(szB != 0);
4327                HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
4328                addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
4329                addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
4330                addInstr(env, ARM64Instr_StrEX(szB));
4331             }
4332          } else {
4333             goto stmt_fail;
4334          }
4335          /* now r0 is 1 if failed, 0 if success.  Change to IR
4336             conventions (0 is fail, 1 is success).  Also transfer
4337             result to r_res. */
4338          IRTemp    res   = stmt->Ist.LLSC.result;
4339          IRType    ty    = typeOfIRTemp(env->type_env, res);
4340          HReg      r_res = lookupIRTemp(env, res);
4341          ARM64RIL* one   = mb_mkARM64RIL_I(1);
4342          vassert(ty == Ity_I1);
4343          vassert(one);
4344          addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
4345                                         ARM64lo_XOR));
4346          /* And be conservative -- mask off all but the lowest bit. */
4347          addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
4348                                         ARM64lo_AND));
4349          return;
4350       }
4351       break;
4352    }
4353
4354    /* --------- ACAS --------- */
4355    case Ist_CAS: {
4356       IRCAS* cas = stmt->Ist.CAS.details;
4357       if (cas->oldHi == IRTemp_INVALID && cas->end == Iend_LE) {
4358          /* "normal" singleton CAS */
4359          UChar  sz;
4360          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4361          switch (ty) {
4362             case Ity_I64: sz = 8; break;
4363             case Ity_I32: sz = 4; break;
4364             case Ity_I16: sz = 2; break;
4365             case Ity_I8:  sz = 1; break;
4366             default: goto unhandled_cas;
4367          }
4368          HReg rAddr = iselIntExpr_R(env, cas->addr);
4369          HReg rExpd = iselIntExpr_R(env, cas->expdLo);
4370          HReg rData = iselIntExpr_R(env, cas->dataLo);
4371          vassert(cas->expdHi == NULL);
4372          vassert(cas->dataHi == NULL);
4373          addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rAddr));
4374          addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd));
4375          addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData));
4376          addInstr(env, ARM64Instr_CAS(sz));
4377          /* Now we have the lowest szB bytes of x1 are either equal to
4378             the lowest szB bytes of x5, indicating success, or they
4379             aren't, indicating failure. */
4380          HReg rResult = hregARM64_X1();
4381          switch (sz) {
4382             case 8:  break;
4383             case 4:  rResult = widen_z_32_to_64(env, rResult); break;
4384             case 2:  rResult = widen_z_16_to_64(env, rResult); break;
4385             case 1:  rResult = widen_z_8_to_64(env, rResult); break;
4386             default: vassert(0);
4387          }
4388          // "old" in this case is interpreted somewhat liberally, per
4389          // the previous comment.
4390          HReg rOld = lookupIRTemp(env, cas->oldLo);
4391          addInstr(env, ARM64Instr_MovI(rOld, rResult));
4392          return;
4393       }
4394       if (cas->oldHi != IRTemp_INVALID && cas->end == Iend_LE) {
4395          /* Paired register CAS, i.e. CASP */
4396          UChar  sz;
4397          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4398          switch (ty) {
4399             case Ity_I64: sz = 8; break;
4400             case Ity_I32: sz = 4; break;
4401             default: goto unhandled_cas;
4402          }
4403          HReg rAddr = iselIntExpr_R(env, cas->addr);
4404
4405          HReg rExpd0 = iselIntExpr_R(env, cas->expdLo);
4406          vassert(cas->expdHi != NULL);
4407          HReg rExpd1 = iselIntExpr_R(env, cas->expdHi);
4408
4409          HReg rData0 = iselIntExpr_R(env, cas->dataLo);
4410          vassert(cas->dataHi != NULL);
4411          HReg rData1 = iselIntExpr_R(env, cas->dataHi);
4412
4413          addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rAddr));
4414
4415          addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rExpd0));
4416          addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd1));
4417
4418          addInstr(env, ARM64Instr_MovI(hregARM64_X6(), rData0));
4419          addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData1));
4420
4421          addInstr(env, ARM64Instr_CASP(sz));
4422
4423          HReg rResult0 = hregARM64_X0();
4424          HReg rResult1 = hregARM64_X1();
4425          switch (sz) {
4426             case 8:  break;
4427             case 4:  rResult0 = widen_z_32_to_64(env, rResult0);
4428                      rResult1 = widen_z_32_to_64(env, rResult1);
4429                      break;
4430             default: vassert(0);
4431          }
4432          HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4433          HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4434          addInstr(env, ARM64Instr_MovI(rOldLo, rResult0));
4435          addInstr(env, ARM64Instr_MovI(rOldHi, rResult1));
4436          return;
4437       }
4438       unhandled_cas:
4439       break;
4440    }
4441
4442    /* --------- MEM FENCE --------- */
4443    case Ist_MBE:
4444       switch (stmt->Ist.MBE.event) {
4445          case Imbe_Fence:
4446             addInstr(env, ARM64Instr_MFence());
4447             return;
4448          case Imbe_CancelReservation:
4449             addInstr(env, ARM64Instr_ClrEX());
4450             return;
4451          default:
4452             break;
4453       }
4454       break;
4455
4456    /* --------- INSTR MARK --------- */
4457    /* Doesn't generate any executable code ... */
4458    case Ist_IMark:
4459        return;
4460
4461    /* --------- ABI HINT --------- */
4462    /* These have no meaning (denotation in the IR) and so we ignore
4463       them ... if any actually made it this far. */
4464    case Ist_AbiHint:
4465        return;
4466
4467    /* --------- NO-OP --------- */
4468    case Ist_NoOp:
4469        return;
4470
4471    /* --------- EXIT --------- */
4472    case Ist_Exit: {
4473       if (stmt->Ist.Exit.dst->tag != Ico_U64)
4474          vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
4475
4476       ARM64CondCode cc
4477          = iselCondCode_C(env, stmt->Ist.Exit.guard);
4478       ARM64AMode* amPC
4479          = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
4480
4481       /* Case: boring transfer to known address */
4482       if (stmt->Ist.Exit.jk == Ijk_Boring) {
4483          if (env->chainingAllowed) {
4484             /* .. almost always true .. */
4485             /* Skip the event check at the dst if this is a forwards
4486                edge. */
4487             Bool toFastEP
4488                = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4489             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4490             addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4491                                              amPC, cc, toFastEP));
4492          } else {
4493             /* .. very occasionally .. */
4494             /* We can't use chaining, so ask for an assisted transfer,
4495                as that's the only alternative that is allowable. */
4496             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4497             addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
4498          }
4499          return;
4500       }
4501
4502       /* Case: assisted transfer to arbitrary address */
4503       switch (stmt->Ist.Exit.jk) {
4504          /* Keep this list in sync with that for iselNext below */
4505          case Ijk_ClientReq:
4506          case Ijk_NoDecode:
4507          case Ijk_NoRedir:
4508          case Ijk_Sys_syscall:
4509          case Ijk_InvalICache:
4510          case Ijk_FlushDCache:
4511          case Ijk_SigTRAP:
4512          case Ijk_SigBUS:
4513          case Ijk_Yield: {
4514             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4515             addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
4516                                                stmt->Ist.Exit.jk));
4517             return;
4518          }
4519          default:
4520             break;
4521       }
4522
4523       /* Do we ever expect to see any other kind? */
4524       goto stmt_fail;
4525    }
4526
4527    default: break;
4528    }
4529   stmt_fail:
4530    ppIRStmt(stmt);
4531    vpanic("iselStmt");
4532 }
4533
4534
4535 /*---------------------------------------------------------*/
4536 /*--- ISEL: Basic block terminators (Nexts)             ---*/
4537 /*---------------------------------------------------------*/
4538
4539 static void iselNext ( ISelEnv* env,
4540                        IRExpr* next, IRJumpKind jk, Int offsIP )
4541 {
4542    if (vex_traceflags & VEX_TRACE_VCODE) {
4543       vex_printf( "\n-- PUT(%d) = ", offsIP);
4544       ppIRExpr( next );
4545       vex_printf( "; exit-");
4546       ppIRJumpKind(jk);
4547       vex_printf( "\n");
4548    }
4549
4550    /* Case: boring transfer to known address */
4551    if (next->tag == Iex_Const) {
4552       IRConst* cdst = next->Iex.Const.con;
4553       vassert(cdst->tag == Ico_U64);
4554       if (jk == Ijk_Boring || jk == Ijk_Call) {
4555          /* Boring transfer to known address */
4556          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4557          if (env->chainingAllowed) {
4558             /* .. almost always true .. */
4559             /* Skip the event check at the dst if this is a forwards
4560                edge. */
4561             Bool toFastEP
4562                = ((Addr64)cdst->Ico.U64) > env->max_ga;
4563             if (0) vex_printf("%s", toFastEP ? "X" : ".");
4564             addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
4565                                              amPC, ARM64cc_AL,
4566                                              toFastEP));
4567          } else {
4568             /* .. very occasionally .. */
4569             /* We can't use chaining, so ask for an assisted transfer,
4570                as that's the only alternative that is allowable. */
4571             HReg r = iselIntExpr_R(env, next);
4572             addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4573                                                Ijk_Boring));
4574          }
4575          return;
4576       }
4577    }
4578
4579    /* Case: call/return (==boring) transfer to any address */
4580    switch (jk) {
4581       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4582          HReg        r    = iselIntExpr_R(env, next);
4583          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4584          if (env->chainingAllowed) {
4585             addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
4586          } else {
4587             addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4588                                                Ijk_Boring));
4589          }
4590          return;
4591       }
4592       default:
4593          break;
4594    }
4595
4596    /* Case: assisted transfer to arbitrary address */
4597    switch (jk) {
4598       /* Keep this list in sync with that for Ist_Exit above */
4599       case Ijk_ClientReq:
4600       case Ijk_NoDecode:
4601       case Ijk_NoRedir:
4602       case Ijk_Sys_syscall:
4603       case Ijk_InvalICache:
4604       case Ijk_FlushDCache:
4605       case Ijk_SigTRAP:
4606       case Ijk_SigBUS:
4607       case Ijk_Yield: {
4608          HReg        r    = iselIntExpr_R(env, next);
4609          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4610          addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
4611          return;
4612       }
4613       default:
4614          break;
4615    }
4616
4617    vex_printf( "\n-- PUT(%d) = ", offsIP);
4618    ppIRExpr( next );
4619    vex_printf( "; exit-");
4620    ppIRJumpKind(jk);
4621    vex_printf( "\n");
4622    vassert(0); // are we expecting any other kind?
4623 }
4624
4625
4626 /*---------------------------------------------------------*/
4627 /*--- Insn selector top-level                           ---*/
4628 /*---------------------------------------------------------*/
4629
4630 /* Translate an entire SB to arm64 code. */
4631
4632 HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4633                             VexArch      arch_host,
4634                             const VexArchInfo* archinfo_host,
4635                             const VexAbiInfo*  vbi/*UNUSED*/,
4636                             Int offs_Host_EvC_Counter,
4637                             Int offs_Host_EvC_FailAddr,
4638                             Bool chainingAllowed,
4639                             Bool addProfInc,
4640                             Addr max_ga )
4641 {
4642    Int        i, j;
4643    HReg       hreg, hregHI;
4644    ISelEnv*   env;
4645    UInt       hwcaps_host = archinfo_host->hwcaps;
4646    ARM64AMode *amCounter, *amFailAddr;
4647
4648    /* sanity ... */
4649    vassert(arch_host == VexArchARM64);
4650
4651    /* Check that the host's endianness is as expected. */
4652    vassert(archinfo_host->endness == VexEndnessLE);
4653
4654    /* guard against unexpected space regressions */
4655    vassert(sizeof(ARM64Instr) <= 32);
4656
4657    /* Make up an initial environment to use. */
4658    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4659    env->vreg_ctr = 0;
4660
4661    /* Set up output code array. */
4662    env->code = newHInstrArray();
4663
4664    /* Copy BB's type env. */
4665    env->type_env = bb->tyenv;
4666
4667    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4668       change as we go along. */
4669    env->n_vregmap = bb->tyenv->types_used;
4670    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4671    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4672
4673    /* and finally ... */
4674    env->chainingAllowed = chainingAllowed;
4675    env->hwcaps          = hwcaps_host;
4676    env->previous_rm     = NULL;
4677    env->max_ga          = max_ga;
4678
4679    /* For each IR temporary, allocate a suitably-kinded virtual
4680       register. */
4681    j = 0;
4682    for (i = 0; i < env->n_vregmap; i++) {
4683       hregHI = hreg = INVALID_HREG;
4684       switch (bb->tyenv->types[i]) {
4685          case Ity_I1:
4686          case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4687             hreg = mkHReg(True, HRcInt64, 0, j++);
4688             break;
4689          case Ity_I128:
4690             hreg   = mkHReg(True, HRcInt64, 0, j++);
4691             hregHI = mkHReg(True, HRcInt64, 0, j++);
4692             break;
4693          case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4694          case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4695          case Ity_F64:
4696             hreg = mkHReg(True, HRcFlt64, 0, j++);
4697             break;
4698          case Ity_V128:
4699             hreg = mkHReg(True, HRcVec128, 0, j++);
4700             break;
4701          case Ity_V256:
4702             hreg   = mkHReg(True, HRcVec128, 0, j++);
4703             hregHI = mkHReg(True, HRcVec128, 0, j++);
4704             break;
4705          default:
4706             ppIRType(bb->tyenv->types[i]);
4707             vpanic("iselBB(arm64): IRTemp type");
4708       }
4709       env->vregmap[i]   = hreg;
4710       env->vregmapHI[i] = hregHI;
4711    }
4712    env->vreg_ctr = j;
4713
4714    /* The very first instruction must be an event check. */
4715    amCounter  = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4716    amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4717    addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4718
4719    /* Possibly a block counter increment (for profiling).  At this
4720       point we don't know the address of the counter, so just pretend
4721       it is zero.  It will have to be patched later, but before this
4722       translation is used, by a call to LibVEX_patchProfCtr. */
4723    if (addProfInc) {
4724       addInstr(env, ARM64Instr_ProfInc());
4725    }
4726
4727    /* Ok, finally we can iterate over the statements. */
4728    for (i = 0; i < bb->stmts_used; i++)
4729       iselStmt(env, bb->stmts[i]);
4730
4731    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4732
4733    /* record the number of vregs we used. */
4734    env->code->n_vregs = env->vreg_ctr;
4735    return env->code;
4736 }
4737
4738
4739 /*---------------------------------------------------------------*/
4740 /*--- end                                   host_arm64_isel.c ---*/
4741 /*---------------------------------------------------------------*/