VEX/priv/host_arm64_isel.c

   1
   2 /*---------------------------------------------------------------*/
   3 /*--- begin                                 host_arm64_isel.c ---*/
   4 /*---------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2013-2017 OpenWorks
  11       info@open-works.net
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27 */
  28
  29 #include "libvex_basictypes.h"
  30 #include "libvex_ir.h"
  31 #include "libvex.h"
  32 #include "ir_match.h"
  33
  34 #include "main_util.h"
  35 #include "main_globals.h"
  36 #include "host_generic_regs.h"
  37 #include "host_generic_simd64.h"  // for 32-bit SIMD helpers
  38 #include "host_arm64_defs.h"
  39
  40
  41 /*---------------------------------------------------------*/
  42 /*--- ISelEnv                                           ---*/
  43 /*---------------------------------------------------------*/
  44
  45 /* This carries around:
  46
  47    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
  48      might encounter.  This is computed before insn selection starts,
  49      and does not change.
  50
  51    - A mapping from IRTemp to HReg.  This tells the insn selector
  52      which virtual register is associated with each IRTemp temporary.
  53      This is computed before insn selection starts, and does not
  54      change.  We expect this mapping to map precisely the same set of
  55      IRTemps as the type mapping does.
  56
  57      |vregmap|   holds the primary register for the IRTemp.
  58      |vregmapHI| is only used for 128-bit integer-typed
  59                  IRTemps.  It holds the identity of a second
  60                  64-bit virtual HReg, which holds the high half
  61                  of the value.
  62
  63    - The code array, that is, the insns selected so far.
  64
  65    - A counter, for generating new virtual registers.
  66
  67    - The host hardware capabilities word.  This is set at the start
  68      and does not change.
  69
  70    - A Bool for indicating whether we may generate chain-me
  71      instructions for control flow transfers, or whether we must use
  72      XAssisted.
  73
  74    - The maximum guest address of any guest insn in this block.
  75      Actually, the address of the highest-addressed byte from any insn
  76      in this block.  Is set at the start and does not change.  This is
  77      used for detecting jumps which are definitely forward-edges from
  78      this block, and therefore can be made (chained) to the fast entry
  79      point of the destination, thereby avoiding the destination's
  80      event check.
  81
  82     - An IRExpr*, which may be NULL, holding the IR expression (an
  83       IRRoundingMode-encoded value) to which the FPU's rounding mode
  84       was most recently set.  Setting to NULL is always safe.  Used to
  85       avoid redundant settings of the FPU's rounding mode, as
  86       described in set_FPCR_rounding_mode below.
  87
  88    Note, this is all (well, mostly) host-independent.
  89 */
  90
  91 typedef
  92    struct {
  93       /* Constant -- are set at the start and do not change. */
  94       IRTypeEnv*   type_env;
  95
  96       HReg*        vregmap;
  97       HReg*        vregmapHI;
  98       Int          n_vregmap;
  99
 100       UInt         hwcaps;
 101
 102       Bool         chainingAllowed;
 103       Addr64       max_ga;
 104
 105       /* These are modified as we go along. */
 106       HInstrArray* code;
 107       Int          vreg_ctr;
 108
 109       IRExpr*      previous_rm;
 110    }
 111    ISelEnv;
 112
 113 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
 114 {
 115    vassert(tmp >= 0);
 116    vassert(tmp < env->n_vregmap);
 117    return env->vregmap[tmp];
 118 }
 119
 120 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
 121                                ISelEnv* env, IRTemp tmp )
 122 {
 123    vassert(tmp >= 0);
 124    vassert(tmp < env->n_vregmap);
 125    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
 126    *vrLO = env->vregmap[tmp];
 127    *vrHI = env->vregmapHI[tmp];
 128 }
 129
 130 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
 131 {
 132    addHInstr(env->code, instr);
 133    if (vex_traceflags & VEX_TRACE_VCODE) {
 134       ppARM64Instr(instr);
 135       vex_printf("\n");
 136    }
 137 }
 138
 139 static HReg newVRegI ( ISelEnv* env )
 140 {
 141    HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
 142    env->vreg_ctr++;
 143    return reg;
 144 }
 145
 146 static HReg newVRegD ( ISelEnv* env )
 147 {
 148    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
 149    env->vreg_ctr++;
 150    return reg;
 151 }
 152
 153 static HReg newVRegV ( ISelEnv* env )
 154 {
 155    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
 156    env->vreg_ctr++;
 157    return reg;
 158 }
 159
 160
 161 /*---------------------------------------------------------*/
 162 /*--- ISEL: Forward declarations                        ---*/
 163 /*---------------------------------------------------------*/
 164
 165 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
 166    iselXXX_wrk do the real work, but are not to be called directly.
 167    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
 168    checks that all returned registers are virtual.  You should not
 169    call the _wrk version directly.
 170
 171    Because some forms of ARM64 memory amodes are implicitly scaled by
 172    the access size, iselIntExpr_AMode takes an IRType which tells it
 173    the type of the access for which the amode is to be used.  This
 174    type needs to be correct, else you'll get incorrect code.
 175 */
 176 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
 177                                            IRExpr* e, IRType dty );
 178 static ARM64AMode* iselIntExpr_AMode     ( ISelEnv* env,
 179                                            IRExpr* e, IRType dty );
 180
 181 static ARM64RIA*   iselIntExpr_RIA_wrk   ( ISelEnv* env, IRExpr* e );
 182 static ARM64RIA*   iselIntExpr_RIA       ( ISelEnv* env, IRExpr* e );
 183
 184 static ARM64RIL*   iselIntExpr_RIL_wrk   ( ISelEnv* env, IRExpr* e );
 185 static ARM64RIL*   iselIntExpr_RIL       ( ISelEnv* env, IRExpr* e );
 186
 187 static ARM64RI6*   iselIntExpr_RI6_wrk   ( ISelEnv* env, IRExpr* e );
 188 static ARM64RI6*   iselIntExpr_RI6       ( ISelEnv* env, IRExpr* e );
 189
 190 static ARM64CondCode iselCondCode_C_wrk  ( ISelEnv* env, IRExpr* e );
 191 static ARM64CondCode iselCondCode_C      ( ISelEnv* env, IRExpr* e );
 192
 193 static HReg        iselCondCode_R_wrk    ( ISelEnv* env, IRExpr* e );
 194 static HReg        iselCondCode_R        ( ISelEnv* env, IRExpr* e );
 195
 196 static HReg        iselIntExpr_R_wrk     ( ISelEnv* env, IRExpr* e );
 197 static HReg        iselIntExpr_R         ( ISelEnv* env, IRExpr* e );
 198
 199 static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, HReg* rLo,
 200                                            ISelEnv* env, IRExpr* e );
 201 static void        iselInt128Expr        ( /*OUT*/HReg* rHi, HReg* rLo,
 202                                            ISelEnv* env, IRExpr* e );
 203
 204 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
 205 static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
 206
 207 static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
 208 static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
 209
 210 static HReg        iselF16Expr_wrk        ( ISelEnv* env, IRExpr* e );
 211 static HReg        iselF16Expr            ( ISelEnv* env, IRExpr* e );
 212
 213 static HReg        iselV128Expr_wrk       ( ISelEnv* env, IRExpr* e );
 214 static HReg        iselV128Expr           ( ISelEnv* env, IRExpr* e );
 215
 216 static void        iselV256Expr_wrk       ( /*OUT*/HReg* rHi, HReg* rLo,
 217                                             ISelEnv* env, IRExpr* e );
 218 static void        iselV256Expr           ( /*OUT*/HReg* rHi, HReg* rLo,
 219                                             ISelEnv* env, IRExpr* e );
 220
 221 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
 222
 223
 224 /*---------------------------------------------------------*/
 225 /*--- ISEL: Misc helpers                                ---*/
 226 /*---------------------------------------------------------*/
 227
 228 /* Generate an amode suitable for a 64-bit sized access relative to
 229    the baseblock register (X21).  This generates an RI12 amode, which
 230    means its scaled by the access size, which is why the access size
 231    -- 64 bit -- is stated explicitly here.  Consequently |off| needs
 232    to be divisible by 8. */
 233 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
 234 {
 235    vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
 236    vassert((off & 7) == 0);  /* ditto */
 237    return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
 238 }
 239
 240 /* Ditto, for 32 bit accesses. */
 241 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
 242 {
 243    vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
 244    vassert((off & 3) == 0);  /* ditto */
 245    return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
 246 }
 247
 248 /* Ditto, for 16 bit accesses. */
 249 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
 250 {
 251    vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
 252    vassert((off & 1) == 0);  /* ditto */
 253    return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
 254 }
 255
 256 /* Ditto, for 8 bit accesses. */
 257 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
 258 {
 259    vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
 260    return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
 261 }
 262
 263 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
 264 {
 265    vassert(off < (1<<12));
 266    HReg r = newVRegI(env);
 267    addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
 268                                      ARM64RIA_I12(off,0), True/*isAdd*/));
 269    return r;
 270 }
 271
 272 static HReg get_baseblock_register ( void )
 273 {
 274    return hregARM64_X21();
 275 }
 276
 277 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
 278    a new register, and return the new register. */
 279 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
 280 {
 281    HReg      dst  = newVRegI(env);
 282    ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
 283    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
 284    return dst;
 285 }
 286
 287 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
 288    a new register, and return the new register. */
 289 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
 290 {
 291    HReg      dst = newVRegI(env);
 292    ARM64RI6* n48 = ARM64RI6_I6(48);
 293    addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
 294    addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
 295    return dst;
 296 }
 297
 298 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
 299    a new register, and return the new register. */
 300 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
 301 {
 302    HReg      dst  = newVRegI(env);
 303    ARM64RIL* mask = ARM64RIL_I13(1, 0, 15); /* encodes 0xFFFF */
 304    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
 305    return dst;
 306 }
 307
 308 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
 309    a new register, and return the new register. */
 310 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
 311 {
 312    HReg      dst = newVRegI(env);
 313    ARM64RI6* n32 = ARM64RI6_I6(32);
 314    addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
 315    addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
 316    return dst;
 317 }
 318
 319 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
 320    a new register, and return the new register. */
 321 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
 322 {
 323    HReg      dst = newVRegI(env);
 324    ARM64RI6* n56 = ARM64RI6_I6(56);
 325    addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
 326    addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
 327    return dst;
 328 }
 329
 330 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
 331 {
 332    HReg      dst  = newVRegI(env);
 333    ARM64RIL* mask = ARM64RIL_I13(1, 0, 7); /* encodes 0xFF */
 334    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
 335    return dst;
 336 }
 337
 338 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
 339 static Bool isZeroU64 ( IRExpr* e ) {
 340    if (e->tag != Iex_Const) return False;
 341    IRConst* con = e->Iex.Const.con;
 342    vassert(con->tag == Ico_U64);
 343    return con->Ico.U64 == 0;
 344 }
 345
 346
 347 /*---------------------------------------------------------*/
 348 /*--- ISEL: FP rounding mode helpers                    ---*/
 349 /*---------------------------------------------------------*/
 350
 351 /* Set the FP rounding mode: 'mode' is an I32-typed expression
 352    denoting a value in the range 0 .. 3, indicating a round mode
 353    encoded as per type IRRoundingMode -- the first four values only
 354    (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO).  Set the ARM64
 355    FSCR to have the same rounding.
 356
 357    For speed & simplicity, we're setting the *entire* FPCR here.
 358
 359    Setting the rounding mode is expensive.  So this function tries to
 360    avoid repeatedly setting the rounding mode to the same thing by
 361    first comparing 'mode' to the 'mode' tree supplied in the previous
 362    call to this function, if any.  (The previous value is stored in
 363    env->previous_rm.)  If 'mode' is a single IR temporary 't' and
 364    env->previous_rm is also just 't', then the setting is skipped.
 365
 366    This is safe because of the SSA property of IR: an IR temporary can
 367    only be defined once and so will have the same value regardless of
 368    where it appears in the block.  Cool stuff, SSA.
 369
 370    A safety condition: all attempts to set the RM must be aware of
 371    this mechanism - by being routed through the functions here.
 372
 373    Of course this only helps if blocks where the RM is set more than
 374    once and it is set to the same value each time, *and* that value is
 375    held in the same IR temporary each time.  In order to assure the
 376    latter as much as possible, the IR optimiser takes care to do CSE
 377    on any block with any sign of floating point activity.
 378 */
 379 static
 380 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
 381 {
 382    vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
 383
 384    /* Do we need to do anything? */
 385    if (env->previous_rm
 386        && env->previous_rm->tag == Iex_RdTmp
 387        && mode->tag == Iex_RdTmp
 388        && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
 389       /* no - setting it to what it was before.  */
 390       vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
 391       return;
 392    }
 393
 394    /* No luck - we better set it, and remember what we set it to. */
 395    env->previous_rm = mode;
 396
 397    /* Only supporting the rounding-mode bits - the rest of FPCR is set
 398       to zero - so we can set the whole register at once (faster). */
 399
 400    /* This isn't simple, because 'mode' carries an IR rounding
 401       encoding, and we need to translate that to an ARM64 FP one:
 402       The IR encoding:
 403          00  to nearest (the default)
 404          10  to +infinity
 405          01  to -infinity
 406          11  to zero
 407       The ARM64 FP encoding:
 408          00  to nearest
 409          01  to +infinity
 410          10  to -infinity
 411          11  to zero
 412       Easy enough to do; just swap the two bits.
 413    */
 414    HReg irrm = iselIntExpr_R(env, mode);
 415    HReg tL   = newVRegI(env);
 416    HReg tR   = newVRegI(env);
 417    HReg t3   = newVRegI(env);
 418    /* tL = irrm << 1;
 419       tR = irrm >> 1;  if we're lucky, these will issue together
 420       tL &= 2;
 421       tR &= 1;         ditto
 422       t3 = tL | tR;
 423       t3 <<= 22;
 424       fmxr fpscr, t3
 425    */
 426    ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
 427    ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
 428    vassert(ril_one && ril_two);
 429    addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
 430    addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
 431    addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
 432    addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
 433    addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
 434    addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
 435    addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
 436 }
 437
 438
 439 /*---------------------------------------------------------*/
 440 /*--- ISEL: Function call helpers                       ---*/
 441 /*---------------------------------------------------------*/
 442
 443 /* Used only in doHelperCall.  See big comment in doHelperCall re
 444    handling of register-parameter args.  This function figures out
 445    whether evaluation of an expression might require use of a fixed
 446    register.  If in doubt return True (safe but suboptimal).
 447 */
 448 static
 449 Bool mightRequireFixedRegs ( IRExpr* e )
 450 {
 451    if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
 452       // These are always "safe" -- either a copy of SP in some
 453       // arbitrary vreg, or a copy of x21, respectively.
 454       return False;
 455    }
 456    /* Else it's a "normal" expression. */
 457    switch (e->tag) {
 458       case Iex_RdTmp: case Iex_Const: case Iex_Get:
 459          return False;
 460       default:
 461          return True;
 462    }
 463 }
 464
 465
 466 /* Do a complete function call.  |guard| is a Ity_Bit expression
 467    indicating whether or not the call happens.  If guard==NULL, the
 468    call is unconditional.  |retloc| is set to indicate where the
 469    return value is after the call.  The caller (of this fn) must
 470    generate code to add |stackAdjustAfterCall| to the stack pointer
 471    after the call is done.  Returns True iff it managed to handle this
 472    combination of arg/return types, else returns False. */
 473
 474 static
 475 Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
 476                     /*OUT*/RetLoc* retloc,
 477                     ISelEnv* env,
 478                     IRExpr* guard,
 479                     IRCallee* cee, IRType retTy, IRExpr** args )
 480 {
 481    ARM64CondCode cc;
 482    HReg          argregs[ARM64_N_ARGREGS];
 483    HReg          tmpregs[ARM64_N_ARGREGS];
 484    Bool          go_fast;
 485    Int           n_args, i, nextArgReg;
 486    Addr64        target;
 487
 488    vassert(ARM64_N_ARGREGS == 8);
 489
 490    /* Set default returns.  We'll update them later if needed. */
 491    *stackAdjustAfterCall = 0;
 492    *retloc               = mk_RetLoc_INVALID();
 493
 494    /* These are used for cross-checking that IR-level constraints on
 495       the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
 496    UInt nVECRETs = 0;
 497    UInt nGSPTRs  = 0;
 498
 499    /* Marshal args for a call and do the call.
 500
 501       This function only deals with a tiny set of possibilities, which
 502       cover all helpers in practice.  The restrictions are that only
 503       arguments in registers are supported, hence only
 504       ARM64_N_REGPARMS x 64 integer bits in total can be passed.  In
 505       fact the only supported arg type is I64.
 506
 507       The return type can be I{64,32} or V128.  In the V128 case, it
 508       is expected that |args| will contain the special node
 509       IRExpr_VECRET(), in which case this routine generates code to
 510       allocate space on the stack for the vector return value.  Since
 511       we are not passing any scalars on the stack, it is enough to
 512       preallocate the return space before marshalling any arguments,
 513       in this case.
 514
 515       |args| may also contain IRExpr_GSPTR(), in which case the
 516       value in x21 is passed as the corresponding argument.
 517
 518       Generating code which is both efficient and correct when
 519       parameters are to be passed in registers is difficult, for the
 520       reasons elaborated in detail in comments attached to
 521       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
 522       of the method described in those comments.
 523
 524       The problem is split into two cases: the fast scheme and the
 525       slow scheme.  In the fast scheme, arguments are computed
 526       directly into the target (real) registers.  This is only safe
 527       when we can be sure that computation of each argument will not
 528       trash any real registers set by computation of any other
 529       argument.
 530
 531       In the slow scheme, all args are first computed into vregs, and
 532       once they are all done, they are moved to the relevant real
 533       regs.  This always gives correct code, but it also gives a bunch
 534       of vreg-to-rreg moves which are usually redundant but are hard
 535       for the register allocator to get rid of.
 536
 537       To decide which scheme to use, all argument expressions are
 538       first examined.  If they are all so simple that it is clear they
 539       will be evaluated without use of any fixed registers, use the
 540       fast scheme, else use the slow scheme.  Note also that only
 541       unconditional calls may use the fast scheme, since having to
 542       compute a condition expression could itself trash real
 543       registers.
 544
 545       Note this requires being able to examine an expression and
 546       determine whether or not evaluation of it might use a fixed
 547       register.  That requires knowledge of how the rest of this insn
 548       selector works.  Currently just the following 3 are regarded as
 549       safe -- hopefully they cover the majority of arguments in
 550       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
 551    */
 552
 553    /* Note that the cee->regparms field is meaningless on ARM64 hosts
 554       (since there is only one calling convention) and so we always
 555       ignore it. */
 556
 557    n_args = 0;
 558    for (i = 0; args[i]; i++) {
 559       IRExpr* arg = args[i];
 560       if (UNLIKELY(arg->tag == Iex_VECRET)) {
 561          nVECRETs++;
 562       } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 563          nGSPTRs++;
 564       }
 565       n_args++;
 566    }
 567
 568    /* If this fails, the IR is ill-formed */
 569    vassert(nGSPTRs == 0 || nGSPTRs == 1);
 570
 571    /* If we have a VECRET, allocate space on the stack for the return
 572       value, and record the stack pointer after that. */
 573    HReg r_vecRetAddr = INVALID_HREG;
 574    if (nVECRETs == 1) {
 575       vassert(retTy == Ity_V128 || retTy == Ity_V256);
 576       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
 577       r_vecRetAddr = newVRegI(env);
 578       addInstr(env, ARM64Instr_AddToSP(-16));
 579       addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
 580    } else {
 581       // If either of these fail, the IR is ill-formed
 582       vassert(retTy != Ity_V128 && retTy != Ity_V256);
 583       vassert(nVECRETs == 0);
 584    }
 585
 586    argregs[0] = hregARM64_X0();
 587    argregs[1] = hregARM64_X1();
 588    argregs[2] = hregARM64_X2();
 589    argregs[3] = hregARM64_X3();
 590    argregs[4] = hregARM64_X4();
 591    argregs[5] = hregARM64_X5();
 592    argregs[6] = hregARM64_X6();
 593    argregs[7] = hregARM64_X7();
 594
 595    tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
 596    tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
 597
 598    /* First decide which scheme (slow or fast) is to be used.  First
 599       assume the fast scheme, and select slow if any contraindications
 600       (wow) appear. */
 601
 602    go_fast = True;
 603
 604    if (guard) {
 605       if (guard->tag == Iex_Const
 606           && guard->Iex.Const.con->tag == Ico_U1
 607           && guard->Iex.Const.con->Ico.U1 == True) {
 608          /* unconditional */
 609       } else {
 610          /* Not manifestly unconditional -- be conservative. */
 611          go_fast = False;
 612       }
 613    }
 614
 615    if (go_fast) {
 616       for (i = 0; i < n_args; i++) {
 617          if (mightRequireFixedRegs(args[i])) {
 618             go_fast = False;
 619             break;
 620          }
 621       }
 622    }
 623
 624    if (go_fast) {
 625       if (retTy == Ity_V128 || retTy == Ity_V256)
 626          go_fast = False;
 627    }
 628
 629    /* At this point the scheme to use has been established.  Generate
 630       code to get the arg values into the argument rregs.  If we run
 631       out of arg regs, give up. */
 632
 633    if (go_fast) {
 634
 635       /* FAST SCHEME */
 636       nextArgReg = 0;
 637
 638       for (i = 0; i < n_args; i++) {
 639          IRExpr* arg = args[i];
 640
 641          IRType  aTy = Ity_INVALID;
 642          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
 643             aTy = typeOfIRExpr(env->type_env, args[i]);
 644
 645          if (nextArgReg >= ARM64_N_ARGREGS)
 646             return False; /* out of argregs */
 647
 648          if (aTy == Ity_I64) {
 649             addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
 650                                            iselIntExpr_R(env, args[i]) ));
 651             nextArgReg++;
 652          }
 653          else if (arg->tag == Iex_GSPTR) {
 654             vassert(0); //ATC
 655             addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
 656                                            hregARM64_X21() ));
 657             nextArgReg++;
 658          }
 659          else if (arg->tag == Iex_VECRET) {
 660             // because of the go_fast logic above, we can't get here,
 661             // since vector return values makes us use the slow path
 662             // instead.
 663             vassert(0);
 664          }
 665          else
 666             return False; /* unhandled arg type */
 667       }
 668
 669       /* Fast scheme only applies for unconditional calls.  Hence: */
 670       cc = ARM64cc_AL;
 671
 672    } else {
 673
 674       /* SLOW SCHEME; move via temporaries */
 675       nextArgReg = 0;
 676
 677       for (i = 0; i < n_args; i++) {
 678          IRExpr* arg = args[i];
 679
 680          IRType  aTy = Ity_INVALID;
 681          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
 682             aTy = typeOfIRExpr(env->type_env, args[i]);
 683
 684          if (nextArgReg >= ARM64_N_ARGREGS)
 685             return False; /* out of argregs */
 686
 687          if (aTy == Ity_I64) {
 688             tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
 689             nextArgReg++;
 690          }
 691          else if (arg->tag == Iex_GSPTR) {
 692             vassert(0); //ATC
 693             tmpregs[nextArgReg] = hregARM64_X21();
 694             nextArgReg++;
 695          }
 696          else if (arg->tag == Iex_VECRET) {
 697             vassert(!hregIsInvalid(r_vecRetAddr));
 698             tmpregs[nextArgReg] = r_vecRetAddr;
 699             nextArgReg++;
 700          }
 701          else
 702             return False; /* unhandled arg type */
 703       }
 704
 705       /* Now we can compute the condition.  We can't do it earlier
 706          because the argument computations could trash the condition
 707          codes.  Be a bit clever to handle the common case where the
 708          guard is 1:Bit. */
 709       cc = ARM64cc_AL;
 710       if (guard) {
 711          if (guard->tag == Iex_Const
 712              && guard->Iex.Const.con->tag == Ico_U1
 713              && guard->Iex.Const.con->Ico.U1 == True) {
 714             /* unconditional -- do nothing */
 715          } else {
 716             cc = iselCondCode_C( env, guard );
 717          }
 718       }
 719
 720       /* Move the args to their final destinations. */
 721       for (i = 0; i < nextArgReg; i++) {
 722          vassert(!(hregIsInvalid(tmpregs[i])));
 723          /* None of these insns, including any spill code that might
 724             be generated, may alter the condition codes. */
 725          addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
 726       }
 727
 728    }
 729
 730    /* Should be assured by checks above */
 731    vassert(nextArgReg <= ARM64_N_ARGREGS);
 732
 733    /* Do final checks, set the return values, and generate the call
 734       instruction proper. */
 735    vassert(nGSPTRs == 0 || nGSPTRs == 1);
 736    vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
 737    vassert(*stackAdjustAfterCall == 0);
 738    vassert(is_RetLoc_INVALID(*retloc));
 739    switch (retTy) {
 740       case Ity_INVALID:
 741          /* Function doesn't return a value. */
 742          *retloc = mk_RetLoc_simple(RLPri_None);
 743          break;
 744       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
 745          *retloc = mk_RetLoc_simple(RLPri_Int);
 746          break;
 747       case Ity_V128:
 748          *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
 749          *stackAdjustAfterCall = 16;
 750          break;
 751       case Ity_V256:
 752          vassert(0); // ATC
 753          *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
 754          *stackAdjustAfterCall = 32;
 755          break;
 756       default:
 757          /* IR can denote other possible return types, but we don't
 758             handle those here. */
 759          vassert(0);
 760    }
 761
 762    /* Finally, generate the call itself.  This needs the *retloc value
 763       set in the switch above, which is why it's at the end. */
 764
 765    /* nextArgReg doles out argument registers.  Since these are
 766       assigned in the order x0 .. x7, its numeric value at this point,
 767       which must be between 0 and 8 inclusive, is going to be equal to
 768       the number of arg regs in use for the call.  Hence bake that
 769       number into the call (we'll need to know it when doing register
 770       allocation, to know what regs the call reads.) */
 771
 772    target = (Addr)cee->addr;
 773    addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
 774
 775    return True; /* success */
 776 }
 777
 778
 779 /*---------------------------------------------------------*/
 780 /*--- ISEL: Integer expressions (64/32 bit)             ---*/
 781 /*---------------------------------------------------------*/
 782
 783 /* Select insns for an integer-typed expression, and add them to the
 784    code list.  Return a reg holding the result.  This reg will be a
 785    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
 786    want to modify it, ask for a new vreg, copy it in there, and modify
 787    the copy.  The register allocator will do its best to map both
 788    vregs to the same real register, so the copies will often disappear
 789    later in the game.
 790
 791    This should handle expressions of 64- and 32-bit type.  All results
 792    are returned in a 64-bit register.  For 32-bit expressions, the
 793    upper 32 bits are arbitrary, so you should mask or sign extend
 794    partial values if necessary.
 795 */
 796
 797 /* ---------------- RRS matching helper ---------------- */
 798
 799 /* This helper matches 64-bit integer expressions of the form
 800       {Add,Sub,And,Or,Xor}(E1, {Shl,Shr,Sar}(E2, immediate))
 801    and
 802       {Add,And,Or,Xor}({Shl,Shr,Sar}(E1, immediate), E2)
 803    which is a useful thing to do because AArch64 can compute those in
 804    a single instruction.
 805  */
 806 static Bool matchesRegRegShift(/*OUT*/ARM64RRSOp* mainOp,
 807                                /*OUT*/ARM64ShiftOp* shiftOp,
 808                                /*OUT*/UChar* amt,
 809                                /*OUT*/IRExpr** argUnshifted,
 810                                /*OUT*/IRExpr** argToBeShifted,
 811                                IRExpr* e)
 812 {
 813    *mainOp         = (ARM64RRSOp)0;
 814    *shiftOp        = (ARM64ShiftOp)0;
 815    *amt            = 0;
 816    *argUnshifted   = NULL;
 817    *argToBeShifted = NULL;
 818    if (e->tag != Iex_Binop) {
 819       return False;
 820    }
 821    const IROp irMainOp = e->Iex.Binop.op;
 822    Bool canSwap = True;
 823    switch (irMainOp) {
 824       case Iop_And64: *mainOp = ARM64rrs_AND; break;
 825       case Iop_Or64:  *mainOp = ARM64rrs_OR;  break;
 826       case Iop_Xor64: *mainOp = ARM64rrs_XOR; break;
 827       case Iop_Add64: *mainOp = ARM64rrs_ADD; break;
 828       case Iop_Sub64: *mainOp = ARM64rrs_SUB; canSwap = False; break;
 829       default: return False;
 830    }
 831    /* The root node is OK.  Now check the right (2nd) arg. */
 832    IRExpr* argL = e->Iex.Binop.arg1;
 833    IRExpr* argR = e->Iex.Binop.arg2;
 834
 835    // This loop runs either one or two iterations.  In the first iteration, we
 836    // check for a shiftable right (second) arg.  If that fails, at the end of
 837    // the first iteration, the args are swapped, if that is valid, and we go
 838    // round again, hence checking for a shiftable left (first) arg.
 839    UInt iterNo = 1;
 840    while (True) {
 841       vassert(iterNo == 1 || iterNo == 2);
 842       if (argR->tag == Iex_Binop) {
 843          const IROp irShiftOp = argR->Iex.Binop.op;
 844          if (irShiftOp == Iop_Shl64
 845              || irShiftOp == Iop_Shr64 || irShiftOp == Iop_Sar64) {
 846             IRExpr* argRL = argR->Iex.Binop.arg1;
 847             const IRExpr* argRR = argR->Iex.Binop.arg2;
 848             if (argRR->tag == Iex_Const) {
 849                const IRConst* argRRconst = argRR->Iex.Const.con;
 850                vassert(argRRconst->tag == Ico_U8); // due to typecheck rules
 851                const UChar amount = argRRconst->Ico.U8;
 852                if (amount >= 1 && amount <= 63) {
 853                   // We got a match \o/
 854                   // *mainOp is already set
 855                   switch (irShiftOp) {
 856                      case Iop_Shl64: *shiftOp = ARM64sh_SHL; break;
 857                      case Iop_Shr64: *shiftOp = ARM64sh_SHR; break;
 858                      case Iop_Sar64: *shiftOp = ARM64sh_SAR; break;
 859                      default: vassert(0); // guarded above
 860                   }
 861                   *amt = amount;
 862                   *argUnshifted = argL;
 863                   *argToBeShifted = argRL;
 864                   return True;
 865                }
 866             }
 867          }
 868       }
 869       // We failed to get a match in the first iteration.  So, provided the
 870       // root node isn't SUB, swap the arguments and make one further
 871       // iteration.  If that doesn't succeed, we must give up.
 872       if (iterNo == 1 && canSwap) {
 873          IRExpr* tmp = argL;
 874          argL = argR;
 875          argR = tmp;
 876          iterNo = 2;
 877          continue;
 878       }
 879       // Give up.
 880       return False;
 881    }
 882    /*NOTREACHED*/
 883  }
 884
 885 /* --------------------- AMode --------------------- */
 886
 887 /* Return an AMode which computes the value of the specified
 888    expression, possibly also adding insns to the code list as a
 889    result.  The expression may only be a 64-bit one.
 890 */
 891
 892 static Bool isValidScale ( UChar scale )
 893 {
 894    switch (scale) {
 895       case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
 896       default: return False;
 897    }
 898 }
 899
 900 static Bool sane_AMode ( ARM64AMode* am )
 901 {
 902    switch (am->tag) {
 903       case ARM64am_RI9:
 904          return
 905             toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
 906                     && (hregIsVirtual(am->ARM64am.RI9.reg)
 907                         /* || sameHReg(am->ARM64am.RI9.reg,
 908                                        hregARM64_X21()) */ )
 909                     && am->ARM64am.RI9.simm9 >= -256
 910                     && am->ARM64am.RI9.simm9 <= 255 );
 911       case ARM64am_RI12:
 912          return
 913             toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
 914                     && (hregIsVirtual(am->ARM64am.RI12.reg)
 915                         /* || sameHReg(am->ARM64am.RI12.reg,
 916                                        hregARM64_X21()) */ )
 917                     && am->ARM64am.RI12.uimm12 < 4096
 918                     && isValidScale(am->ARM64am.RI12.szB) );
 919       case ARM64am_RR:
 920          return
 921             toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
 922                     && hregIsVirtual(am->ARM64am.RR.base)
 923                     && hregClass(am->ARM64am.RR.index) == HRcInt64
 924                     && hregIsVirtual(am->ARM64am.RR.index) );
 925       default:
 926          vpanic("sane_AMode: unknown ARM64 AMode1 tag");
 927    }
 928 }
 929
 930 static
 931 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
 932 {
 933    ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
 934    vassert(sane_AMode(am));
 935    return am;
 936 }
 937
 938 static
 939 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
 940 {
 941    IRType ty = typeOfIRExpr(env->type_env,e);
 942    vassert(ty == Ity_I64);
 943
 944    ULong szBbits = 0;
 945    switch (dty) {
 946       case Ity_I64: szBbits = 3; break;
 947       case Ity_I32: szBbits = 2; break;
 948       case Ity_I16: szBbits = 1; break;
 949       case Ity_I8:  szBbits = 0; break;
 950       default: vassert(0);
 951    }
 952
 953    /* {Add64,Sub64}(expr,simm9).  We don't care about |dty| here since
 954       we're going to create an amode suitable for LDU* or STU*
 955       instructions, which use unscaled immediate offsets.  */
 956    if (e->tag == Iex_Binop
 957        && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
 958        && e->Iex.Binop.arg2->tag == Iex_Const
 959        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
 960       Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
 961       if (simm >= -255 && simm <= 255) {
 962          /* Although the gating condition might seem to be
 963                simm >= -256 && simm <= 255
 964             we will need to negate simm in the case where the op is Sub64.
 965             Hence limit the lower value to -255 in order that its negation
 966             is representable. */
 967          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
 968          if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
 969          return ARM64AMode_RI9(reg, (Int)simm);
 970       }
 971    }
 972
 973    /* Add64(expr, uimm12 * transfer-size) */
 974    if (e->tag == Iex_Binop
 975        && e->Iex.Binop.op == Iop_Add64
 976        && e->Iex.Binop.arg2->tag == Iex_Const
 977        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
 978       ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
 979       ULong szB  = 1 << szBbits;
 980       if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
 981           && (uimm >> szBbits) < 4096) {
 982          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
 983          return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
 984       }
 985    }
 986
 987    /* Add64(expr1, expr2) */
 988    if (e->tag == Iex_Binop
 989        && e->Iex.Binop.op == Iop_Add64) {
 990       HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
 991       HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
 992       return ARM64AMode_RR(reg1, reg2);
 993    }
 994
 995    /* Doesn't match anything in particular.  Generate it into
 996       a register and use that. */
 997    HReg reg = iselIntExpr_R(env, e);
 998    return ARM64AMode_RI9(reg, 0);
 999 }
1000
1001
1002 /* --------------------- RIA --------------------- */
1003
1004 /* Select instructions to generate 'e' into a RIA. */
1005
1006 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
1007 {
1008    ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
1009    /* sanity checks ... */
1010    switch (ri->tag) {
1011       case ARM64riA_I12:
1012          vassert(ri->ARM64riA.I12.imm12 < 4096);
1013          vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
1014          return ri;
1015       case ARM64riA_R:
1016          vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
1017          vassert(hregIsVirtual(ri->ARM64riA.R.reg));
1018          return ri;
1019       default:
1020          vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1021    }
1022 }
1023
1024 /* DO NOT CALL THIS DIRECTLY ! */
1025 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
1026 {
1027    IRType ty = typeOfIRExpr(env->type_env,e);
1028    vassert(ty == Ity_I64 || ty == Ity_I32);
1029
1030    /* special case: immediate */
1031    if (e->tag == Iex_Const) {
1032       ULong u = 0xF000000ULL; /* invalid */
1033       switch (e->Iex.Const.con->tag) {
1034          case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
1035          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1036          default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1037       }
1038       if (0 == (u & ~(0xFFFULL << 0)))
1039          return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
1040       if (0 == (u & ~(0xFFFULL << 12)))
1041          return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
1042       /* else fail, fall through to default case */
1043    }
1044
1045    /* default case: calculate into a register and return that */
1046    {
1047       HReg r = iselIntExpr_R ( env, e );
1048       return ARM64RIA_R(r);
1049    }
1050 }
1051
1052
1053 /* --------------------- RIL --------------------- */
1054
1055 /* Select instructions to generate 'e' into a RIL.  At this point we
1056    have to deal with the strange bitfield-immediate encoding for logic
1057    instructions. */
1058
1059
1060 // The following four functions
1061 //    CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1062 // are copied, with modifications, from
1063 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1064 // which has the following copyright notice:
1065 /*
1066    Copyright 2013, ARM Limited
1067    All rights reserved.
1068
1069    Redistribution and use in source and binary forms, with or without
1070    modification, are permitted provided that the following conditions are met:
1071
1072    * Redistributions of source code must retain the above copyright notice,
1073      this list of conditions and the following disclaimer.
1074    * Redistributions in binary form must reproduce the above copyright notice,
1075      this list of conditions and the following disclaimer in the documentation
1076      and/or other materials provided with the distribution.
1077    * Neither the name of ARM Limited nor the names of its contributors may be
1078      used to endorse or promote products derived from this software without
1079      specific prior written permission.
1080
1081    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1082    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1083    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1084    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1085    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1086    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1087    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1088    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1089    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1090    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1091 */
1092
1093 static Int CountLeadingZeros(ULong value, Int width)
1094 {
1095    vassert(width == 32 || width == 64);
1096    Int count = 0;
1097    ULong bit_test = 1ULL << (width - 1);
1098    while ((count < width) && ((bit_test & value) == 0)) {
1099       count++;
1100       bit_test >>= 1;
1101    }
1102    return count;
1103 }
1104
1105 static Int CountTrailingZeros(ULong value, Int width)
1106 {
1107    vassert(width == 32 || width == 64);
1108    Int count = 0;
1109    while ((count < width) && (((value >> count) & 1) == 0)) {
1110       count++;
1111    }
1112    return count;
1113 }
1114
1115 static Int CountSetBits(ULong value, Int width)
1116 {
1117    // TODO: Other widths could be added here, as the implementation already
1118    // supports them.
1119    vassert(width == 32 || width == 64);
1120
1121    // Mask out unused bits to ensure that they are not counted.
1122    value &= (0xffffffffffffffffULL >> (64-width));
1123
1124    // Add up the set bits.
1125    // The algorithm works by adding pairs of bit fields together iteratively,
1126    // where the size of each bit field doubles each time.
1127    // An example for an 8-bit value:
1128    // Bits: h g f e d c b a
1129    // \ | \ | \ | \ |
1130    // value = h+g f+e d+c b+a
1131    // \ | \ |
1132    // value = h+g+f+e d+c+b+a
1133    // \ |
1134    // value = h+g+f+e+d+c+b+a
1135    value = ((value >>  1) & 0x5555555555555555ULL)
1136                  + (value & 0x5555555555555555ULL);
1137    value = ((value >>  2) & 0x3333333333333333ULL)
1138                  + (value & 0x3333333333333333ULL);
1139    value = ((value >>  4) & 0x0f0f0f0f0f0f0f0fULL)
1140                  + (value & 0x0f0f0f0f0f0f0f0fULL);
1141    value = ((value >>  8) & 0x00ff00ff00ff00ffULL)
1142                  + (value & 0x00ff00ff00ff00ffULL);
1143    value = ((value >> 16) & 0x0000ffff0000ffffULL)
1144                  + (value & 0x0000ffff0000ffffULL);
1145    value = ((value >> 32) & 0x00000000ffffffffULL)
1146                  + (value & 0x00000000ffffffffULL);
1147
1148    return value;
1149 }
1150
1151 static Bool isImmLogical ( /*OUT*/UInt* n,
1152                            /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1153                            ULong value, UInt width )
1154 {
1155   // Test if a given value can be encoded in the immediate field of a
1156   // logical instruction.
1157
1158   // If it can be encoded, the function returns true, and values
1159   // pointed to by n, imm_s and imm_r are updated with immediates
1160   // encoded in the format required by the corresponding fields in the
1161   // logical instruction.  If it can not be encoded, the function
1162   // returns false, and the values pointed to by n, imm_s and imm_r
1163   // are undefined.
1164   vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1165   vassert(width == 32 || width == 64);
1166
1167   // Logical immediates are encoded using parameters n, imm_s and imm_r using
1168   // the following table:
1169   //
1170   // N imms immr size S R
1171   // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1172   // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1173   // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1174   // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1175   // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1176   // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1177   // (s bits must not be all set)
1178   //
1179   // A pattern is constructed of size bits, where the least significant S+1
1180   // bits are set. The pattern is rotated right by R, and repeated across a
1181   // 32 or 64-bit value, depending on destination register width.
1182   //
1183   // To test if an arbitrary immediate can be encoded using this scheme, an
1184   // iterative algorithm is used.
1185   //
1186   // TODO: This code does not consider using X/W register overlap to support
1187   // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1188   // are an encodable logical immediate.
1189
1190   // 1. If the value has all set or all clear bits, it can't be encoded.
1191   if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1192       ((width == 32) && (value == 0xffffffff))) {
1193     return False;
1194   }
1195
1196   UInt lead_zero = CountLeadingZeros(value, width);
1197   UInt lead_one = CountLeadingZeros(~value, width);
1198   UInt trail_zero = CountTrailingZeros(value, width);
1199   UInt trail_one = CountTrailingZeros(~value, width);
1200   UInt set_bits = CountSetBits(value, width);
1201
1202   // The fixed bits in the immediate s field.
1203   // If width == 64 (X reg), start at 0xFFFFFF80.
1204   // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1205   // widths won't be executed.
1206   Int imm_s_fixed = (width == 64) ? -128 : -64;
1207   Int imm_s_mask = 0x3F;
1208
1209   for (;;) {
1210     // 2. If the value is two bits wide, it can be encoded.
1211     if (width == 2) {
1212       *n = 0;
1213       *imm_s = 0x3C;
1214       *imm_r = (value & 3) - 1;
1215       return True;
1216     }
1217
1218     *n = (width == 64) ? 1 : 0;
1219     *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1220     if ((lead_zero + set_bits) == width) {
1221       *imm_r = 0;
1222     } else {
1223       *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1224     }
1225
1226     // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1227     // the bit width of the value, it can be encoded.
1228     if (lead_zero + trail_zero + set_bits == width) {
1229       return True;
1230     }
1231
1232     // 4. If the sum of leading ones, trailing ones and unset bits in the
1233     // value is equal to the bit width of the value, it can be encoded.
1234     if (lead_one + trail_one + (width - set_bits) == width) {
1235       return True;
1236     }
1237
1238     // 5. If the most-significant half of the bitwise value is equal to the
1239     // least-significant half, return to step 2 using the least-significant
1240     // half of the value.
1241     ULong mask = (1ULL << (width >> 1)) - 1;
1242     if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1243       width >>= 1;
1244       set_bits >>= 1;
1245       imm_s_fixed >>= 1;
1246       continue;
1247     }
1248
1249     // 6. Otherwise, the value can't be encoded.
1250     return False;
1251   }
1252 }
1253
1254
1255 /* Create a RIL for the given immediate, if it is representable, or
1256    return NULL if not. */
1257
1258 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1259 {
1260    UInt n = 0, imm_s = 0, imm_r = 0;
1261    Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1262    if (!ok) return NULL;
1263    vassert(n < 2 && imm_s < 64 && imm_r < 64);
1264    return ARM64RIL_I13(n, imm_r, imm_s);
1265 }
1266
1267 /* So, finally .. */
1268
1269 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1270 {
1271    ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1272    /* sanity checks ... */
1273    switch (ri->tag) {
1274       case ARM64riL_I13:
1275          vassert(ri->ARM64riL.I13.bitN < 2);
1276          vassert(ri->ARM64riL.I13.immR < 64);
1277          vassert(ri->ARM64riL.I13.immS < 64);
1278          return ri;
1279       case ARM64riL_R:
1280          vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1281          vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1282          return ri;
1283       default:
1284          vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1285    }
1286 }
1287
1288 /* DO NOT CALL THIS DIRECTLY ! */
1289 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1290 {
1291    IRType ty = typeOfIRExpr(env->type_env,e);
1292    vassert(ty == Ity_I64 || ty == Ity_I32);
1293
1294    /* special case: immediate */
1295    if (e->tag == Iex_Const) {
1296       ARM64RIL* maybe = NULL;
1297       if (ty == Ity_I64) {
1298          vassert(e->Iex.Const.con->tag == Ico_U64);
1299          maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1300       } else {
1301          vassert(ty == Ity_I32);
1302          vassert(e->Iex.Const.con->tag == Ico_U32);
1303          UInt  u32 = e->Iex.Const.con->Ico.U32;
1304          ULong u64 = (ULong)u32;
1305          /* First try with 32 leading zeroes. */
1306          maybe = mb_mkARM64RIL_I(u64);
1307          /* If that doesn't work, try with 2 copies, since it doesn't
1308             matter what winds up in the upper 32 bits. */
1309          if (!maybe) {
1310             maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1311          }
1312       }
1313       if (maybe) return maybe;
1314       /* else fail, fall through to default case */
1315    }
1316
1317    /* default case: calculate into a register and return that */
1318    {
1319       HReg r = iselIntExpr_R ( env, e );
1320       return ARM64RIL_R(r);
1321    }
1322 }
1323
1324
1325 /* --------------------- RI6 --------------------- */
1326
1327 /* Select instructions to generate 'e' into a RI6. */
1328
1329 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1330 {
1331    ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1332    /* sanity checks ... */
1333    switch (ri->tag) {
1334       case ARM64ri6_I6:
1335          vassert(ri->ARM64ri6.I6.imm6 < 64);
1336          vassert(ri->ARM64ri6.I6.imm6 > 0);
1337          return ri;
1338       case ARM64ri6_R:
1339          vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1340          vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1341          return ri;
1342       default:
1343          vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1344    }
1345 }
1346
1347 /* DO NOT CALL THIS DIRECTLY ! */
1348 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1349 {
1350    IRType ty = typeOfIRExpr(env->type_env,e);
1351    vassert(ty == Ity_I64 || ty == Ity_I8);
1352
1353    /* special case: immediate */
1354    if (e->tag == Iex_Const) {
1355       switch (e->Iex.Const.con->tag) {
1356          case Ico_U8: {
1357             UInt u = e->Iex.Const.con->Ico.U8;
1358             if (u > 0 && u < 64)
1359               return ARM64RI6_I6(u);
1360             break;
1361          default:
1362             break;
1363          }
1364       }
1365       /* else fail, fall through to default case */
1366    }
1367
1368    /* default case: calculate into a register and return that */
1369    {
1370       HReg r = iselIntExpr_R ( env, e );
1371       return ARM64RI6_R(r);
1372    }
1373 }
1374
1375
1376 /* ------------------- CondCode ------------------- */
1377
1378 /* Generate code to evaluated a bit-typed expression, returning the
1379    condition code which would correspond when the expression would
1380    notionally have returned 1.
1381
1382    Note that iselCondCode_C and iselCondCode_R are mutually recursive.  For
1383    future changes to either of them, take care not to introduce an infinite
1384    loop involving the two of them.
1385 */
1386 static ARM64CondCode iselCondCode_C ( ISelEnv* env, IRExpr* e )
1387 {
1388    ARM64CondCode cc = iselCondCode_C_wrk(env,e);
1389    vassert(cc != ARM64cc_NV);
1390    return cc;
1391 }
1392
1393 static ARM64CondCode iselCondCode_C_wrk ( ISelEnv* env, IRExpr* e )
1394 {
1395    vassert(e);
1396    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1397
1398    /* var */
1399    if (e->tag == Iex_RdTmp) {
1400       HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1401       /* Cmp doesn't modify rTmp; so this is OK. */
1402       ARM64RIL* one = mb_mkARM64RIL_I(1);
1403       vassert(one);
1404       addInstr(env, ARM64Instr_Test(rTmp, one));
1405       return ARM64cc_NE;
1406    }
1407
1408    /* Constant 1:Bit */
1409    if (e->tag == Iex_Const) {
1410       /* This is a very stupid translation.  Hopefully it doesn't occur much,
1411          if ever. */
1412       vassert(e->Iex.Const.con->tag == Ico_U1);
1413       vassert(e->Iex.Const.con->Ico.U1 == True
1414               || e->Iex.Const.con->Ico.U1 == False);
1415       HReg rTmp = newVRegI(env);
1416       addInstr(env, ARM64Instr_Imm64(rTmp, 0));
1417       ARM64RIL* one = mb_mkARM64RIL_I(1);
1418       vassert(one);
1419       addInstr(env, ARM64Instr_Test(rTmp, one));
1420       return e->Iex.Const.con->Ico.U1 ? ARM64cc_EQ : ARM64cc_NE;
1421    }
1422
1423    /* Not1(e) */
1424    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1425       /* Generate code for the arg, and negate the test condition */
1426       ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
1427       if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1428         return ARM64cc_AL;
1429       } else {
1430         return 1 ^ cc;
1431       }
1432    }
1433
1434    /* --- patterns rooted at: 64to1 --- */
1435
1436    if (e->tag == Iex_Unop
1437        && e->Iex.Unop.op == Iop_64to1) {
1438       HReg      rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1439       ARM64RIL* one  = mb_mkARM64RIL_I(1);
1440       vassert(one); /* '1' must be representable */
1441       addInstr(env, ARM64Instr_Test(rTmp, one));
1442       return ARM64cc_NE;
1443    }
1444
1445    /* --- patterns rooted at: CmpNEZ8 --- */
1446
1447    if (e->tag == Iex_Unop
1448        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1449       HReg      r1  = iselIntExpr_R(env, e->Iex.Unop.arg);
1450       ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1451       addInstr(env, ARM64Instr_Test(r1, xFF));
1452       return ARM64cc_NE;
1453    }
1454
1455    /* --- patterns rooted at: CmpNEZ16 --- */
1456
1457    if (e->tag == Iex_Unop
1458        && e->Iex.Unop.op == Iop_CmpNEZ16) {
1459       HReg      r1    = iselIntExpr_R(env, e->Iex.Unop.arg);
1460       ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1461       addInstr(env, ARM64Instr_Test(r1, xFFFF));
1462       return ARM64cc_NE;
1463    }
1464
1465    /* --- patterns rooted at: CmpNEZ64 --- */
1466
1467    if (e->tag == Iex_Unop
1468        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1469       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1470       ARM64RIA* zero = ARM64RIA_I12(0,0);
1471       addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1472       return ARM64cc_NE;
1473    }
1474
1475    /* --- patterns rooted at: CmpNEZ32 --- */
1476
1477    if (e->tag == Iex_Unop
1478        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1479       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1480       ARM64RIA* zero = ARM64RIA_I12(0,0);
1481       addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1482       return ARM64cc_NE;
1483    }
1484
1485    /* --- Cmp*64*(x,y) --- */
1486    if (e->tag == Iex_Binop
1487        && (e->Iex.Binop.op == Iop_CmpEQ64
1488            || e->Iex.Binop.op == Iop_CmpNE64
1489            || e->Iex.Binop.op == Iop_CmpLT64S
1490            || e->Iex.Binop.op == Iop_CmpLT64U
1491            || e->Iex.Binop.op == Iop_CmpLE64S
1492            || e->Iex.Binop.op == Iop_CmpLE64U
1493            || e->Iex.Binop.op == Iop_CasCmpEQ64
1494            || e->Iex.Binop.op == Iop_CasCmpNE64)) {
1495       HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1496       ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1497       addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1498       switch (e->Iex.Binop.op) {
1499          case Iop_CmpEQ64: case Iop_CasCmpEQ64: return ARM64cc_EQ;
1500          case Iop_CmpNE64: case Iop_CasCmpNE64: return ARM64cc_NE;
1501          case Iop_CmpLT64S: return ARM64cc_LT;
1502          case Iop_CmpLT64U: return ARM64cc_CC;
1503          case Iop_CmpLE64S: return ARM64cc_LE;
1504          case Iop_CmpLE64U: return ARM64cc_LS;
1505          default: vpanic("iselCondCode_C(arm64): CmpXX64");
1506       }
1507    }
1508
1509    /* --- Cmp*32*(x,y) --- */
1510    if (e->tag == Iex_Binop
1511        && (e->Iex.Binop.op == Iop_CmpEQ32
1512            || e->Iex.Binop.op == Iop_CmpNE32
1513            || e->Iex.Binop.op == Iop_CmpLT32S
1514            || e->Iex.Binop.op == Iop_CmpLT32U
1515            || e->Iex.Binop.op == Iop_CmpLE32S
1516            || e->Iex.Binop.op == Iop_CmpLE32U
1517            || e->Iex.Binop.op == Iop_CasCmpEQ32
1518            || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1519       HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1520       ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1521       addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1522       switch (e->Iex.Binop.op) {
1523          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return ARM64cc_EQ;
1524          case Iop_CmpNE32: case Iop_CasCmpNE32: return ARM64cc_NE;
1525          case Iop_CmpLT32S: return ARM64cc_LT;
1526          case Iop_CmpLT32U: return ARM64cc_CC;
1527          case Iop_CmpLE32S: return ARM64cc_LE;
1528          case Iop_CmpLE32U: return ARM64cc_LS;
1529          default: vpanic("iselCondCode_C(arm64): CmpXX32");
1530       }
1531    }
1532
1533    /* --- Cmp*16*(x,y) --- */
1534    if (e->tag == Iex_Binop
1535        && (e->Iex.Binop.op == Iop_CasCmpEQ16
1536            || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1537       HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1538       HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1539       HReg argL2 = widen_z_16_to_64(env, argL);
1540       HReg argR2 = widen_z_16_to_64(env, argR);
1541       addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1542       switch (e->Iex.Binop.op) {
1543          case Iop_CasCmpEQ16: return ARM64cc_EQ;
1544          case Iop_CasCmpNE16: return ARM64cc_NE;
1545          default: vpanic("iselCondCode_C(arm64): CmpXX16");
1546       }
1547    }
1548
1549    /* --- Cmp*8*(x,y) --- */
1550    if (e->tag == Iex_Binop
1551        && (e->Iex.Binop.op == Iop_CasCmpEQ8
1552            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1553       HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1554       HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1555       HReg argL2 = widen_z_8_to_64(env, argL);
1556       HReg argR2 = widen_z_8_to_64(env, argR);
1557       addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1558       switch (e->Iex.Binop.op) {
1559          case Iop_CasCmpEQ8: return ARM64cc_EQ;
1560          case Iop_CasCmpNE8: return ARM64cc_NE;
1561          default: vpanic("iselCondCode_C(arm64): CmpXX8");
1562       }
1563    }
1564
1565    /* --- And1(x,y), Or1(x,y) --- */
1566    if (e->tag == Iex_Binop
1567         && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1568       HReg tmp = iselCondCode_R(env, e);
1569       ARM64RIL* one = mb_mkARM64RIL_I(1);
1570       vassert(one);
1571       addInstr(env, ARM64Instr_Test(tmp, one));
1572       return ARM64cc_NE;
1573    }
1574
1575    ppIRExpr(e);
1576    vpanic("iselCondCode_C");
1577 }
1578
1579
1580 /* --------------------- CONDCODE as int reg --------------------- */
1581
1582 /* Generate code to evaluated a bit-typed expression, returning the resulting
1583    value in bit 0 of an integer register.  WARNING: all of the other bits in the
1584    register can be arbitrary.  Callers must mask them off or otherwise ignore
1585    them, as necessary.
1586
1587    Note that iselCondCode_C and iselCondCode_R are mutually recursive.  For
1588    future changes to either of them, take care not to introduce an infinite
1589    loop involving the two of them.
1590 */
1591 static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e )
1592 {
1593    /* Uh, there's nothing we can sanity check here, unfortunately. */
1594    return iselCondCode_R_wrk(env,e);
1595 }
1596
1597 /* DO NOT CALL THIS DIRECTLY ! */
1598 static HReg iselCondCode_R_wrk ( ISelEnv* env, IRExpr* e )
1599 {
1600    vassert(e);
1601    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1602
1603    /* var */
1604    if (e->tag == Iex_RdTmp) {
1605       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1606    }
1607
1608    /* And1(x,y), Or1(x,y) */
1609    if (e->tag == Iex_Binop
1610        && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1611       HReg res = newVRegI(env);
1612       HReg x_as_64 = iselCondCode_R(env, e->Iex.Binop.arg1);
1613       HReg y_as_64 = iselCondCode_R(env, e->Iex.Binop.arg2);
1614       ARM64LogicOp lop
1615          = e->Iex.Binop.op == Iop_And1 ? ARM64lo_AND : ARM64lo_OR;
1616       addInstr(env, ARM64Instr_Logic(res, x_as_64, ARM64RIL_R(y_as_64), lop));
1617       return res;
1618    }
1619
1620    /* Anything else, we hand off to iselCondCode_C and force the value into a
1621       register. */
1622    HReg res = newVRegI(env);
1623    ARM64CondCode cc = iselCondCode_C(env, e);
1624    addInstr(env, ARM64Instr_Set64(res, cc));
1625    return res;
1626
1627    ppIRExpr(e);
1628    vpanic("iselCondCode_R(amd64)");
1629 }
1630
1631
1632 /* --------------------- Reg --------------------- */
1633
1634 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1635 {
1636    HReg r = iselIntExpr_R_wrk(env, e);
1637    /* sanity checks ... */
1638 #  if 0
1639    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1640 #  endif
1641    vassert(hregClass(r) == HRcInt64);
1642    vassert(hregIsVirtual(r));
1643    return r;
1644 }
1645
1646 /* DO NOT CALL THIS DIRECTLY ! */
1647 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1648 {
1649    IRType ty = typeOfIRExpr(env->type_env,e);
1650    vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1651
1652    switch (e->tag) {
1653
1654    /* --------- TEMP --------- */
1655    case Iex_RdTmp: {
1656       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1657    }
1658
1659    /* --------- LOAD --------- */
1660    case Iex_Load: {
1661       HReg dst  = newVRegI(env);
1662
1663       if (e->Iex.Load.end != Iend_LE)
1664          goto irreducible;
1665
1666       if (ty == Ity_I64) {
1667          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1668          addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1669          return dst;
1670       }
1671       if (ty == Ity_I32) {
1672          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1673          addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1674          return dst;
1675       }
1676       if (ty == Ity_I16) {
1677          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1678          addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1679          return dst;
1680       }
1681       if (ty == Ity_I8) {
1682          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1683          addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1684          return dst;
1685       }
1686       break;
1687    }
1688
1689    /* --------- BINARY OP --------- */
1690    case Iex_Binop: {
1691
1692       ARM64LogicOp lop = 0; /* invalid */
1693       ARM64ShiftOp sop = 0; /* invalid */
1694
1695       /* Special-case 0-x into a Neg instruction.  Not because it's
1696          particularly useful but more so as to give value flow using
1697          this instruction, so as to check its assembly correctness for
1698          implementation of Left32/Left64. */
1699       switch (e->Iex.Binop.op) {
1700          case Iop_Sub64:
1701             if (isZeroU64(e->Iex.Binop.arg1)) {
1702                HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1703                HReg dst  = newVRegI(env);
1704                addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1705                return dst;
1706             }
1707             break;
1708          default:
1709             break;
1710       }
1711
1712       /* AND64/OR64/XOR64/ADD64/SUB64(e1, e2 shifted by imm)
1713          AND64/OR64/XOR64/ADD64(e1 shifted by imm, e2)
1714       */
1715       {
1716          switch (e->Iex.Binop.op) {
1717             case Iop_And64: case Iop_Or64: case Iop_Xor64:
1718             case Iop_Add64: case Iop_Sub64:{
1719                ARM64RRSOp mainOp = ARM64rrs_INVALID;
1720                ARM64ShiftOp shiftOp = (ARM64ShiftOp)0; // Invalid
1721                IRExpr* argUnshifted = NULL;
1722                IRExpr* argToBeShifted = NULL;
1723                UChar amt = 0;
1724                if (matchesRegRegShift(&mainOp, &shiftOp, &amt, &argUnshifted,
1725                                       &argToBeShifted, e)) {
1726                   HReg rDst = newVRegI(env);
1727                   HReg rUnshifted = iselIntExpr_R(env, argUnshifted);
1728                   HReg rToBeShifted = iselIntExpr_R(env, argToBeShifted);
1729                   addInstr(env, ARM64Instr_RRS(rDst, rUnshifted, rToBeShifted,
1730                                                shiftOp, amt, mainOp));
1731                   return rDst;
1732                }
1733             }
1734             default:
1735                break;
1736          }
1737       }
1738
1739       /* ADD/SUB(e1, e2) (for any e1, e2) */
1740       switch (e->Iex.Binop.op) {
1741          case Iop_Add64: case Iop_Add32:
1742          case Iop_Sub64: case Iop_Sub32: {
1743             Bool      isAdd = e->Iex.Binop.op == Iop_Add64
1744                               || e->Iex.Binop.op == Iop_Add32;
1745             HReg      dst   = newVRegI(env);
1746             HReg      argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1747             ARM64RIA* argR  = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1748             addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1749             return dst;
1750          }
1751          default:
1752             break;
1753       }
1754
1755       /* AND/OR/XOR(e1, e2) (for any e1, e2) */
1756       switch (e->Iex.Binop.op) {
1757          case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1758          case Iop_Or64:  case Iop_Or32:  lop = ARM64lo_OR;  goto log_binop;
1759          case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1760          log_binop: {
1761             HReg      dst  = newVRegI(env);
1762             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1763             ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1764             addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1765             return dst;
1766          }
1767          default:
1768             break;
1769       }
1770
1771       /* SHL/SHR/SAR */
1772       switch (e->Iex.Binop.op) {
1773          case Iop_Shr64:                 sop = ARM64sh_SHR; goto sh_binop;
1774          case Iop_Sar64:                 sop = ARM64sh_SAR; goto sh_binop;
1775          case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1776          sh_binop: {
1777             HReg      dst  = newVRegI(env);
1778             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1779             ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1780             addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1781             return dst;
1782          }
1783          case Iop_Shr32:
1784          case Iop_Sar32: {
1785             Bool      zx   = e->Iex.Binop.op == Iop_Shr32;
1786             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1787             ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1788             HReg      dst  = zx ? widen_z_32_to_64(env, argL)
1789                                 : widen_s_32_to_64(env, argL);
1790             addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1791             return dst;
1792          }
1793          default: break;
1794       }
1795
1796       /* MUL */
1797       if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1798          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1799          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1800          HReg dst  = newVRegI(env);
1801          addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1802          return dst;
1803       }
1804
1805       /* MULL */
1806       if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1807          Bool isS  = e->Iex.Binop.op == Iop_MullS32;
1808          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1809          HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1810          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1811          HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1812          HReg dst  = newVRegI(env);
1813          addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1814          return dst;
1815       }
1816
1817       /* Handle misc other ops. */
1818
1819       if (e->Iex.Binop.op == Iop_Max32U) {
1820          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1821          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1822          HReg dst  = newVRegI(env);
1823          addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1824          addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1825          return dst;
1826       }
1827
1828       if (e->Iex.Binop.op == Iop_32HLto64) {
1829          HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1830          HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1831          HReg lo32  = widen_z_32_to_64(env, lo32s);
1832          HReg hi32  = newVRegI(env);
1833          addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1834                                         ARM64sh_SHL));
1835          addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1836                                         ARM64lo_OR));
1837          return hi32;
1838       }
1839
1840       if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1841          Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1842          HReg dL  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1843          HReg dR  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1844          HReg dst = newVRegI(env);
1845          HReg imm = newVRegI(env);
1846          /* Do the compare (FCMP), which sets NZCV in PSTATE.  Then
1847             create in dst, the IRCmpF64Result encoded result. */
1848          addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1849          addInstr(env, ARM64Instr_Imm64(dst, 0));
1850          addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1851          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1852          addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1853          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1854          addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1855          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1856          addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1857          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1858          return dst;
1859       }
1860
1861       { /* local scope */
1862         ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1863         Bool       srcIsD = False;
1864         switch (e->Iex.Binop.op) {
1865            case Iop_F64toI64S:
1866               cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1867            case Iop_F64toI64U:
1868               cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1869            case Iop_F64toI32S:
1870               cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1871            case Iop_F64toI32U:
1872               cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1873            case Iop_F32toI32S:
1874               cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1875            case Iop_F32toI32U:
1876               cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1877            case Iop_F32toI64S:
1878               cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1879            case Iop_F32toI64U:
1880               cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1881            default:
1882               break;
1883         }
1884         if (cvt_op != ARM64cvt_INVALID) {
1885            /* This is all a bit dodgy, because we can't handle a
1886               non-constant (not-known-at-JIT-time) rounding mode
1887               indication.  That's because there's no instruction
1888               AFAICS that does this conversion but rounds according to
1889               FPCR.RM, so we have to bake the rounding mode into the
1890               instruction right now.  But that should be OK because
1891               (1) the front end attaches a literal Irrm_ value to the
1892               conversion binop, and (2) iropt will never float that
1893               off via CSE, into a literal.  Hence we should always
1894               have an Irrm_ value as the first arg. */
1895            IRExpr* arg1 = e->Iex.Binop.arg1;
1896            if (arg1->tag != Iex_Const) goto irreducible;
1897            IRConst* arg1con = arg1->Iex.Const.con;
1898            vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1899            UInt irrm = arg1con->Ico.U32;
1900            /* Find the ARM-encoded equivalent for |irrm|. */
1901            UInt armrm = 4; /* impossible */
1902            switch (irrm) {
1903               case Irrm_NEAREST: armrm = 0; break;
1904               case Irrm_NegINF:  armrm = 2; break;
1905               case Irrm_PosINF:  armrm = 1; break;
1906               case Irrm_ZERO:    armrm = 3; break;
1907               default: goto irreducible;
1908            }
1909            HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1910                          (env, e->Iex.Binop.arg2);
1911            HReg dst = newVRegI(env);
1912            addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1913            return dst;
1914         }
1915       } /* local scope */
1916
1917       /* All cases involving host-side helper calls. */
1918       void* fn = NULL;
1919       switch (e->Iex.Binop.op) {
1920          case Iop_DivU32:
1921             fn = &h_calc_udiv32_w_arm_semantics; break;
1922          case Iop_DivS32:
1923             fn = &h_calc_sdiv32_w_arm_semantics; break;
1924          case Iop_DivU64:
1925             fn = &h_calc_udiv64_w_arm_semantics; break;
1926          case Iop_DivS64:
1927             fn = &h_calc_sdiv64_w_arm_semantics; break;
1928          default:
1929             break;
1930       }
1931
1932       if (fn) {
1933          HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1934          HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1935          HReg res  = newVRegI(env);
1936          addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1937          addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1938          addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1939                                         2, mk_RetLoc_simple(RLPri_Int) ));
1940          addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1941          return res;
1942       }
1943
1944       break;
1945    }
1946
1947    /* --------- UNARY OP --------- */
1948    case Iex_Unop: {
1949
1950       switch (e->Iex.Unop.op) {
1951          case Iop_16Uto64: {
1952             /* This probably doesn't occur often enough to be worth
1953                rolling the extension into the load. */
1954             IRExpr* arg = e->Iex.Unop.arg;
1955             HReg    src = iselIntExpr_R(env, arg);
1956             HReg    dst = widen_z_16_to_64(env, src);
1957             return dst;
1958          }
1959          case Iop_32Uto64: {
1960             IRExpr* arg = e->Iex.Unop.arg;
1961             if (arg->tag == Iex_Load) {
1962                /* This correctly zero extends because _LdSt32 is
1963                   defined to do a zero extending load. */
1964                HReg dst = newVRegI(env);
1965                ARM64AMode* am
1966                   = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1967                addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1968                return dst;
1969             }
1970             /* else be lame and mask it  */
1971             HReg src  = iselIntExpr_R(env, arg);
1972             HReg dst  = widen_z_32_to_64(env, src);
1973             return dst;
1974          }
1975          case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
1976          case Iop_8Uto64: {
1977             IRExpr* arg = e->Iex.Unop.arg;
1978             if (arg->tag == Iex_Load) {
1979                /* This correctly zero extends because _LdSt8 is
1980                   defined to do a zero extending load. */
1981                HReg dst = newVRegI(env);
1982                ARM64AMode* am
1983                   = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
1984                addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1985                return dst;
1986             }
1987             /* else be lame and mask it  */
1988             HReg src = iselIntExpr_R(env, arg);
1989             HReg dst = widen_z_8_to_64(env, src);
1990             return dst;
1991          }
1992          case Iop_128HIto64: {
1993             HReg rHi, rLo;
1994             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1995             return rHi; /* and abandon rLo */
1996          }
1997          case Iop_8Sto32: case Iop_8Sto64: {
1998             IRExpr* arg = e->Iex.Unop.arg;
1999             HReg    src = iselIntExpr_R(env, arg);
2000             HReg    dst = widen_s_8_to_64(env, src);
2001             return dst;
2002          }
2003          case Iop_16Sto32: case Iop_16Sto64: {
2004             IRExpr* arg = e->Iex.Unop.arg;
2005             HReg    src = iselIntExpr_R(env, arg);
2006             HReg    dst = widen_s_16_to_64(env, src);
2007             return dst;
2008          }
2009          case Iop_32Sto64: {
2010             IRExpr* arg = e->Iex.Unop.arg;
2011             HReg    src = iselIntExpr_R(env, arg);
2012             HReg    dst = widen_s_32_to_64(env, src);
2013             return dst;
2014          }
2015          case Iop_Not32:
2016          case Iop_Not64: {
2017             HReg dst = newVRegI(env);
2018             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2019             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
2020             return dst;
2021          }
2022          case Iop_Clz64: {
2023             HReg dst = newVRegI(env);
2024             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2025             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
2026             return dst;
2027          }
2028          case Iop_Left32:
2029          case Iop_Left64: {
2030             /* Left64(src) = src | -src.  Left32 can use the same
2031                implementation since in that case we don't care what
2032                the upper 32 bits become. */
2033             HReg dst = newVRegI(env);
2034             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2035             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2036             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2037                                            ARM64lo_OR));
2038             return dst;
2039          }
2040          case Iop_CmpwNEZ64: {
2041            /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2042                              = Left64(src) >>s 63 */
2043             HReg dst = newVRegI(env);
2044             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2045             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2046             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2047                                            ARM64lo_OR));
2048             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2049                                            ARM64sh_SAR));
2050             return dst;
2051          }
2052          case Iop_CmpwNEZ32: {
2053             /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2054                               = Left64(src & 0xFFFFFFFF) >>s 63 */
2055             HReg dst = newVRegI(env);
2056             HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
2057             HReg src = widen_z_32_to_64(env, pre);
2058             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2059             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2060                                            ARM64lo_OR));
2061             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2062                                            ARM64sh_SAR));
2063             return dst;
2064          }
2065          case Iop_V128to64: case Iop_V128HIto64: {
2066             HReg dst    = newVRegI(env);
2067             HReg src    = iselV128Expr(env, e->Iex.Unop.arg);
2068             UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
2069             addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
2070             return dst;
2071          }
2072          case Iop_ReinterpF64asI64: {
2073             HReg dst = newVRegI(env);
2074             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2075             addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
2076             return dst;
2077          }
2078          case Iop_ReinterpF32asI32: {
2079             HReg dst = newVRegI(env);
2080             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2081             addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
2082             return dst;
2083          }
2084          case Iop_1Sto16:
2085          case Iop_1Sto32:
2086          case Iop_1Sto64: {
2087             /* As with the iselStmt case for 'tmp:I1 = expr', we could
2088                do a lot better here if it ever became necessary.  (CSDEC?) */
2089             HReg zero = hregARM64_XZR_XSP(); // XZR in this context
2090             HReg one  = newVRegI(env);
2091             HReg dst  = newVRegI(env);
2092             addInstr(env, ARM64Instr_Imm64(one,  1));
2093             ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
2094             addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2095             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2096                                            ARM64sh_SHL));
2097             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2098                                            ARM64sh_SAR));
2099             return dst;
2100          }
2101          case Iop_NarrowUn16to8x8:
2102          case Iop_NarrowUn32to16x4:
2103          case Iop_NarrowUn64to32x2:
2104          case Iop_QNarrowUn16Sto8Sx8:
2105          case Iop_QNarrowUn32Sto16Sx4:
2106          case Iop_QNarrowUn64Sto32Sx2:
2107          case Iop_QNarrowUn16Uto8Ux8:
2108          case Iop_QNarrowUn32Uto16Ux4:
2109          case Iop_QNarrowUn64Uto32Ux2:
2110          case Iop_QNarrowUn16Sto8Ux8:
2111          case Iop_QNarrowUn32Sto16Ux4:
2112          case Iop_QNarrowUn64Sto32Ux2:
2113          {
2114             HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2115             HReg tmp = newVRegV(env);
2116             HReg dst = newVRegI(env);
2117             UInt dszBlg2 = 3; /* illegal */
2118             ARM64VecNarrowOp op = ARM64vecna_INVALID;
2119             switch (e->Iex.Unop.op) {
2120                case Iop_NarrowUn16to8x8:
2121                   dszBlg2 = 0; op = ARM64vecna_XTN; break;
2122                case Iop_NarrowUn32to16x4:
2123                   dszBlg2 = 1; op = ARM64vecna_XTN; break;
2124                case Iop_NarrowUn64to32x2:
2125                   dszBlg2 = 2; op = ARM64vecna_XTN; break;
2126                case Iop_QNarrowUn16Sto8Sx8:
2127                   dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
2128                case Iop_QNarrowUn32Sto16Sx4:
2129                   dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
2130                case Iop_QNarrowUn64Sto32Sx2:
2131                   dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
2132                case Iop_QNarrowUn16Uto8Ux8:
2133                   dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
2134                case Iop_QNarrowUn32Uto16Ux4:
2135                   dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
2136                case Iop_QNarrowUn64Uto32Ux2:
2137                   dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
2138                case Iop_QNarrowUn16Sto8Ux8:
2139                   dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
2140                case Iop_QNarrowUn32Sto16Ux4:
2141                   dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
2142                case Iop_QNarrowUn64Sto32Ux2:
2143                   dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
2144                default:
2145                   vassert(0);
2146             }
2147             addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
2148             addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
2149             return dst;
2150          }
2151          case Iop_1Uto64: {
2152             /* 1Uto64(tmp). */
2153             HReg dst = newVRegI(env);
2154             if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
2155                ARM64RIL* one = mb_mkARM64RIL_I(1);
2156                HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
2157                vassert(one);
2158                addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
2159             } else {
2160                /* CLONE-01 */
2161                HReg zero = hregARM64_XZR_XSP(); // XZR in this context
2162                HReg one  = newVRegI(env);
2163                addInstr(env, ARM64Instr_Imm64(one,  1));
2164                ARM64CondCode cc = iselCondCode_C(env, e->Iex.Unop.arg);
2165                addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2166             }
2167             return dst;
2168          }
2169          case Iop_64to32:
2170          case Iop_64to16:
2171          case Iop_64to8:
2172             /* These are no-ops. */
2173             return iselIntExpr_R(env, e->Iex.Unop.arg);
2174
2175          default:
2176             break;
2177       }
2178
2179       break;
2180    }
2181
2182    /* --------- GET --------- */
2183    case Iex_Get: {
2184       if (ty == Ity_I64
2185           && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
2186          HReg        dst = newVRegI(env);
2187          ARM64AMode* am
2188             = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2189          addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2190          return dst;
2191       }
2192       if (ty == Ity_I32
2193           && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
2194          HReg        dst = newVRegI(env);
2195          ARM64AMode* am
2196             = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2197          addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2198          return dst;
2199       }
2200       if (ty == Ity_I16
2201           && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2202          HReg        dst = newVRegI(env);
2203          ARM64AMode* am
2204             = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2205          addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2206          return dst;
2207       }
2208       if (ty == Ity_I8
2209           /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2210          HReg        dst = newVRegI(env);
2211          ARM64AMode* am
2212             = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2213          addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2214          return dst;
2215       }
2216       break;
2217    }
2218
2219    /* --------- CCALL --------- */
2220    case Iex_CCall: {
2221       HReg    dst = newVRegI(env);
2222       vassert(ty == e->Iex.CCall.retty);
2223
2224       /* be very restrictive for now.  Only 64-bit ints allowed for
2225          args, and 64 bits for return type.  Don't forget to change
2226          the RetLoc if more types are allowed in future. */
2227       if (e->Iex.CCall.retty != Ity_I64)
2228          goto irreducible;
2229
2230       /* Marshal args, do the call, clear stack. */
2231       UInt   addToSp = 0;
2232       RetLoc rloc    = mk_RetLoc_INVALID();
2233       Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2234                                      e->Iex.CCall.cee, e->Iex.CCall.retty,
2235                                      e->Iex.CCall.args );
2236       /* */
2237       if (ok) {
2238          vassert(is_sane_RetLoc(rloc));
2239          vassert(rloc.pri == RLPri_Int);
2240          vassert(addToSp == 0);
2241          addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2242          return dst;
2243       }
2244       goto irreducible;
2245    }
2246
2247    /* --------- LITERAL --------- */
2248    /* 64-bit literals */
2249    case Iex_Const: {
2250       ULong u   = 0;
2251       HReg  dst = newVRegI(env);
2252       switch (e->Iex.Const.con->tag) {
2253          case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2254          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2255          case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2256          case Ico_U8:  u = e->Iex.Const.con->Ico.U8;  break;
2257          default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2258       }
2259       addInstr(env, ARM64Instr_Imm64(dst, u));
2260       return dst;
2261    }
2262
2263    /* --------- MULTIPLEX --------- */
2264    case Iex_ITE: {
2265       /* ITE(ccexpr, iftrue, iffalse) */
2266       if (ty == Ity_I64 || ty == Ity_I32) {
2267          ARM64CondCode cc;
2268          HReg r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2269          HReg r0  = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2270          HReg dst = newVRegI(env);
2271          cc = iselCondCode_C(env, e->Iex.ITE.cond);
2272          addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2273          return dst;
2274       }
2275       break;
2276    }
2277
2278    default:
2279    break;
2280    } /* switch (e->tag) */
2281
2282    /* We get here if no pattern matched. */
2283   irreducible:
2284    ppIRExpr(e);
2285    vpanic("iselIntExpr_R: cannot reduce tree");
2286 }
2287
2288
2289 /*---------------------------------------------------------*/
2290 /*--- ISEL: Integer expressions (128 bit)               ---*/
2291 /*---------------------------------------------------------*/
2292
2293 /* Compute a 128-bit value into a register pair, which is returned as
2294    the first two parameters.  As with iselIntExpr_R, these may be
2295    either real or virtual regs; in any case they must not be changed
2296    by subsequent code emitted by the caller.  */
2297
2298 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2299                              ISelEnv* env, IRExpr* e )
2300 {
2301    iselInt128Expr_wrk(rHi, rLo, env, e);
2302 #  if 0
2303    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2304 #  endif
2305    vassert(hregClass(*rHi) == HRcInt64);
2306    vassert(hregIsVirtual(*rHi));
2307    vassert(hregClass(*rLo) == HRcInt64);
2308    vassert(hregIsVirtual(*rLo));
2309 }
2310
2311 /* DO NOT CALL THIS DIRECTLY ! */
2312 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2313                                  ISelEnv* env, IRExpr* e )
2314 {
2315    vassert(e);
2316    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2317
2318    /* --------- BINARY ops --------- */
2319    if (e->tag == Iex_Binop) {
2320       switch (e->Iex.Binop.op) {
2321          /* 64 x 64 -> 128 multiply */
2322          case Iop_MullU64:
2323          case Iop_MullS64: {
2324             Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2325             HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
2326             HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
2327             HReg dstLo = newVRegI(env);
2328             HReg dstHi = newVRegI(env);
2329             addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2330                                          ARM64mul_PLAIN));
2331             addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2332                                          syned ? ARM64mul_SX : ARM64mul_ZX));
2333             *rHi = dstHi;
2334             *rLo = dstLo;
2335             return;
2336          }
2337          /* 64HLto128(e1,e2) */
2338          case Iop_64HLto128:
2339             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2340             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2341             return;
2342          default:
2343             break;
2344       }
2345    } /* if (e->tag == Iex_Binop) */
2346
2347    ppIRExpr(e);
2348    vpanic("iselInt128Expr(arm64)");
2349 }
2350
2351
2352 /*---------------------------------------------------------*/
2353 /*--- ISEL: Vector expressions (128 bit)                ---*/
2354 /*---------------------------------------------------------*/
2355
2356 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2357 {
2358    HReg r = iselV128Expr_wrk( env, e );
2359    vassert(hregClass(r) == HRcVec128);
2360    vassert(hregIsVirtual(r));
2361    return r;
2362 }
2363
2364 /* DO NOT CALL THIS DIRECTLY */
2365 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2366 {
2367    IRType ty = typeOfIRExpr(env->type_env, e);
2368    vassert(e);
2369    vassert(ty == Ity_V128);
2370
2371    if (e->tag == Iex_RdTmp) {
2372       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2373    }
2374
2375    if (e->tag == Iex_Const) {
2376       /* Only a very limited range of constants is handled. */
2377       vassert(e->Iex.Const.con->tag == Ico_V128);
2378       UShort con = e->Iex.Const.con->Ico.V128;
2379       HReg   res = newVRegV(env);
2380       switch (con) {
2381          case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2382             addInstr(env, ARM64Instr_VImmQ(res, con));
2383             return res;
2384          case 0x00F0:
2385             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2386             addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2387             return res;
2388          case 0x0F00:
2389             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2390             addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2391             return res;
2392          case 0x0FF0:
2393             addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2394             addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2395             return res;
2396          case 0x0FFF:
2397             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2398             addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2399             addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2400             return res;
2401          case 0xF000:
2402             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2403             addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2404             return res;
2405          case 0xFF00:
2406             addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2407             addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2408             return res;
2409          default:
2410             break;
2411       }
2412       /* Unhandled */
2413       goto v128_expr_bad;
2414    }
2415
2416    if (e->tag == Iex_Load) {
2417       HReg res = newVRegV(env);
2418       HReg rN  = iselIntExpr_R(env, e->Iex.Load.addr);
2419       vassert(ty == Ity_V128);
2420       addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2421       return res;
2422    }
2423
2424    if (e->tag == Iex_Get) {
2425       UInt offs = (UInt)e->Iex.Get.offset;
2426       if (offs < (1<<12)) {
2427          HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2428          HReg res  = newVRegV(env);
2429          vassert(ty == Ity_V128);
2430          addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2431          return res;
2432       }
2433       goto v128_expr_bad;
2434    }
2435
2436    if (e->tag == Iex_Unop) {
2437
2438       /* Iop_ZeroHIXXofV128 cases */
2439       UShort imm16 = 0;
2440       switch (e->Iex.Unop.op) {
2441          case Iop_ZeroHI64ofV128:  imm16 = 0x00FF; break;
2442          case Iop_ZeroHI96ofV128:  imm16 = 0x000F; break;
2443          case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2444          case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2445          default: break;
2446       }
2447       if (imm16 != 0) {
2448          HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2449          HReg imm = newVRegV(env);
2450          HReg res = newVRegV(env);
2451          addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2452          addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2453          return res;
2454       }
2455
2456       /* Other cases */
2457       switch (e->Iex.Unop.op) {
2458          case Iop_NotV128:
2459          case Iop_Abs64Fx2: case Iop_Abs32Fx4:
2460          case Iop_Neg64Fx2: case Iop_Neg32Fx4:
2461          case Iop_Abs64x2:  case Iop_Abs32x4:
2462          case Iop_Abs16x8:  case Iop_Abs8x16:
2463          case Iop_Cls32x4:  case Iop_Cls16x8:  case Iop_Cls8x16:
2464          case Iop_Clz32x4:  case Iop_Clz16x8:  case Iop_Clz8x16:
2465          case Iop_Cnt8x16:
2466          case Iop_Reverse1sIn8_x16:
2467          case Iop_Reverse8sIn16_x8:
2468          case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2469          case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2470          case Iop_Reverse32sIn64_x2:
2471          case Iop_RecipEst32Ux4:
2472          case Iop_RSqrtEst32Ux4:
2473          case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2474          case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2475          {
2476             HReg res   = newVRegV(env);
2477             HReg arg   = iselV128Expr(env, e->Iex.Unop.arg);
2478             Bool setRM = False;
2479             ARM64VecUnaryOp op = ARM64vecu_INVALID;
2480             switch (e->Iex.Unop.op) {
2481                case Iop_NotV128:           op = ARM64vecu_NOT;         break;
2482                case Iop_Abs64Fx2:          op = ARM64vecu_FABS64x2;    break;
2483                case Iop_Abs32Fx4:          op = ARM64vecu_FABS32x4;    break;
2484                case Iop_Neg64Fx2:          op = ARM64vecu_FNEG64x2;    break;
2485                case Iop_Neg32Fx4:          op = ARM64vecu_FNEG32x4;    break;
2486                case Iop_Abs64x2:           op = ARM64vecu_ABS64x2;     break;
2487                case Iop_Abs32x4:           op = ARM64vecu_ABS32x4;     break;
2488                case Iop_Abs16x8:           op = ARM64vecu_ABS16x8;     break;
2489                case Iop_Abs8x16:           op = ARM64vecu_ABS8x16;     break;
2490                case Iop_Cls32x4:           op = ARM64vecu_CLS32x4;     break;
2491                case Iop_Cls16x8:           op = ARM64vecu_CLS16x8;     break;
2492                case Iop_Cls8x16:           op = ARM64vecu_CLS8x16;     break;
2493                case Iop_Clz32x4:           op = ARM64vecu_CLZ32x4;     break;
2494                case Iop_Clz16x8:           op = ARM64vecu_CLZ16x8;     break;
2495                case Iop_Clz8x16:           op = ARM64vecu_CLZ8x16;     break;
2496                case Iop_Cnt8x16:           op = ARM64vecu_CNT8x16;     break;
2497                case Iop_Reverse1sIn8_x16:  op = ARM64vecu_RBIT;        break;
2498                case Iop_Reverse8sIn16_x8:  op = ARM64vecu_REV1616B;    break;
2499                case Iop_Reverse8sIn32_x4:  op = ARM64vecu_REV3216B;    break;
2500                case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H;     break;
2501                case Iop_Reverse8sIn64_x2:  op = ARM64vecu_REV6416B;    break;
2502                case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H;     break;
2503                case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S;     break;
2504                case Iop_RecipEst32Ux4:     op = ARM64vecu_URECPE32x4;  break;
2505                case Iop_RSqrtEst32Ux4:     op = ARM64vecu_URSQRTE32x4; break;
2506                case Iop_RecipEst64Fx2:     setRM = True;
2507                                            op = ARM64vecu_FRECPE64x2;  break;
2508                case Iop_RecipEst32Fx4:     setRM = True;
2509                                            op = ARM64vecu_FRECPE32x4;  break;
2510                case Iop_RSqrtEst64Fx2:     setRM = True;
2511                                            op = ARM64vecu_FRSQRTE64x2; break;
2512                case Iop_RSqrtEst32Fx4:     setRM = True;
2513                                            op = ARM64vecu_FRSQRTE32x4; break;
2514                default: vassert(0);
2515             }
2516             if (setRM) {
2517                // This is a bit of a kludge.  We should do rm properly for
2518                // these recip-est insns, but that would require changing the
2519                // primop's type to take an rmode.
2520                set_FPCR_rounding_mode(env, IRExpr_Const(
2521                                               IRConst_U32(Irrm_NEAREST)));
2522             }
2523             addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2524             return res;
2525          }
2526          case Iop_CmpNEZ8x16:
2527          case Iop_CmpNEZ16x8:
2528          case Iop_CmpNEZ32x4:
2529          case Iop_CmpNEZ64x2: {
2530             HReg arg  = iselV128Expr(env, e->Iex.Unop.arg);
2531             HReg zero = newVRegV(env);
2532             HReg res  = newVRegV(env);
2533             ARM64VecBinOp cmp = ARM64vecb_INVALID;
2534             switch (e->Iex.Unop.op) {
2535                case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2536                case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2537                case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2538                case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2539                default: vassert(0);
2540             }
2541             // This is pretty feeble.  Better: use CMP against zero
2542             // and avoid the extra instruction and extra register.
2543             addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2544             addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2545             addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2546             return res;
2547          }
2548          case Iop_V256toV128_0:
2549          case Iop_V256toV128_1: {
2550             HReg vHi, vLo;
2551             iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2552             return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2553          }
2554          case Iop_64UtoV128: {
2555             HReg res = newVRegV(env);
2556             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2557             addInstr(env, ARM64Instr_VQfromX(res, arg));
2558             return res;
2559          }
2560          case Iop_Widen8Sto16x8: {
2561             HReg res = newVRegV(env);
2562             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2563             addInstr(env, ARM64Instr_VQfromX(res, arg));
2564             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2565             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2566                                                 res, res, 8));
2567             return res;
2568          }
2569          case Iop_Widen16Sto32x4: {
2570             HReg res = newVRegV(env);
2571             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2572             addInstr(env, ARM64Instr_VQfromX(res, arg));
2573             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2574             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2575                                                 res, res, 16));
2576             return res;
2577          }
2578          case Iop_Widen32Sto64x2: {
2579             HReg res = newVRegV(env);
2580             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2581             addInstr(env, ARM64Instr_VQfromX(res, arg));
2582             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2583             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2584                                                 res, res, 32));
2585             return res;
2586          }
2587          /* ... */
2588          default:
2589             break;
2590       } /* switch on the unop */
2591    } /* if (e->tag == Iex_Unop) */
2592
2593    if (e->tag == Iex_Binop) {
2594       switch (e->Iex.Binop.op) {
2595          case Iop_Sqrt32Fx4:
2596          case Iop_Sqrt64Fx2: {
2597             HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2598             HReg res = newVRegV(env);
2599             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2600             ARM64VecUnaryOp op
2601                = e->Iex.Binop.op == Iop_Sqrt32Fx4
2602                     ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2;
2603             addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2604             return res;
2605          }
2606          case Iop_64HLtoV128: {
2607             HReg res  = newVRegV(env);
2608             HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2609             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2610             addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2611             return res;
2612          }
2613          /* -- Cases where we can generate a simple three-reg instruction. -- */
2614          case Iop_AndV128:
2615          case Iop_OrV128:
2616          case Iop_XorV128:
2617          case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2618          case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2619          case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2620          case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2621          case Iop_Add64x2: case Iop_Add32x4:
2622          case Iop_Add16x8: case Iop_Add8x16:
2623          case Iop_Sub64x2: case Iop_Sub32x4:
2624          case Iop_Sub16x8: case Iop_Sub8x16:
2625          case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2626          case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2627          case Iop_CmpEQ16x8:  case Iop_CmpEQ8x16:
2628          case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2629          case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2630          case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2631          case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2632          case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2633          case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2634          case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2635          case Iop_Perm8x16:
2636          case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2637          case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2638          case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2639          case Iop_CatOddLanes16x8:  case Iop_CatOddLanes8x16:
2640          case Iop_InterleaveHI32x4:
2641          case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2642          case Iop_InterleaveLO32x4:
2643          case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2644          case Iop_PolynomialMul8x16:
2645          case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2646          case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2647          case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2648          case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2649          case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2650          case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2651          case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2652          case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2653          case Iop_QDMulHi32Sx4:  case Iop_QDMulHi16Sx8:
2654          case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2655          case Iop_Sh8Sx16:  case Iop_Sh16Sx8:
2656          case Iop_Sh32Sx4:  case Iop_Sh64Sx2:
2657          case Iop_Sh8Ux16:  case Iop_Sh16Ux8:
2658          case Iop_Sh32Ux4:  case Iop_Sh64Ux2:
2659          case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2660          case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2661          case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2662          case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2663          case Iop_Max64Fx2: case Iop_Max32Fx4:
2664          case Iop_Min64Fx2: case Iop_Min32Fx4:
2665          case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2666          case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2667          {
2668             HReg res   = newVRegV(env);
2669             HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
2670             HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
2671             Bool sw    = False;
2672             Bool setRM = False;
2673             ARM64VecBinOp op = ARM64vecb_INVALID;
2674             switch (e->Iex.Binop.op) {
2675                case Iop_AndV128:    op = ARM64vecb_AND; break;
2676                case Iop_OrV128:     op = ARM64vecb_ORR; break;
2677                case Iop_XorV128:    op = ARM64vecb_XOR; break;
2678                case Iop_Max32Ux4:   op = ARM64vecb_UMAX32x4; break;
2679                case Iop_Max16Ux8:   op = ARM64vecb_UMAX16x8; break;
2680                case Iop_Max8Ux16:   op = ARM64vecb_UMAX8x16; break;
2681                case Iop_Min32Ux4:   op = ARM64vecb_UMIN32x4; break;
2682                case Iop_Min16Ux8:   op = ARM64vecb_UMIN16x8; break;
2683                case Iop_Min8Ux16:   op = ARM64vecb_UMIN8x16; break;
2684                case Iop_Max32Sx4:   op = ARM64vecb_SMAX32x4; break;
2685                case Iop_Max16Sx8:   op = ARM64vecb_SMAX16x8; break;
2686                case Iop_Max8Sx16:   op = ARM64vecb_SMAX8x16; break;
2687                case Iop_Min32Sx4:   op = ARM64vecb_SMIN32x4; break;
2688                case Iop_Min16Sx8:   op = ARM64vecb_SMIN16x8; break;
2689                case Iop_Min8Sx16:   op = ARM64vecb_SMIN8x16; break;
2690                case Iop_Add64x2:    op = ARM64vecb_ADD64x2; break;
2691                case Iop_Add32x4:    op = ARM64vecb_ADD32x4; break;
2692                case Iop_Add16x8:    op = ARM64vecb_ADD16x8; break;
2693                case Iop_Add8x16:    op = ARM64vecb_ADD8x16; break;
2694                case Iop_Sub64x2:    op = ARM64vecb_SUB64x2; break;
2695                case Iop_Sub32x4:    op = ARM64vecb_SUB32x4; break;
2696                case Iop_Sub16x8:    op = ARM64vecb_SUB16x8; break;
2697                case Iop_Sub8x16:    op = ARM64vecb_SUB8x16; break;
2698                case Iop_Mul32x4:    op = ARM64vecb_MUL32x4; break;
2699                case Iop_Mul16x8:    op = ARM64vecb_MUL16x8; break;
2700                case Iop_Mul8x16:    op = ARM64vecb_MUL8x16; break;
2701                case Iop_CmpEQ64x2:  op = ARM64vecb_CMEQ64x2; break;
2702                case Iop_CmpEQ32x4:  op = ARM64vecb_CMEQ32x4; break;
2703                case Iop_CmpEQ16x8:  op = ARM64vecb_CMEQ16x8; break;
2704                case Iop_CmpEQ8x16:  op = ARM64vecb_CMEQ8x16; break;
2705                case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2706                case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2707                case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2708                case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2709                case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2710                case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2711                case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2712                case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2713                case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2714                case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2715                case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2716                case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2717                case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2718                case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2719                case Iop_Perm8x16:   op = ARM64vecb_TBL1; break;
2720                case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2721                                           break;
2722                case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2723                                           break;
2724                case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2725                                           break;
2726                case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2727                                           break;
2728                case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2729                                           break;
2730                case Iop_CatOddLanes32x4:  op = ARM64vecb_UZP232x4; sw = True;
2731                                           break;
2732                case Iop_CatOddLanes16x8:  op = ARM64vecb_UZP216x8; sw = True;
2733                                           break;
2734                case Iop_CatOddLanes8x16:  op = ARM64vecb_UZP28x16; sw = True;
2735                                           break;
2736                case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2737                                           break;
2738                case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2739                                           break;
2740                case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2741                                           break;
2742                case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2743                                           break;
2744                case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2745                                           break;
2746                case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2747                                           break;
2748                case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2749                case Iop_QAdd64Sx2:      op = ARM64vecb_SQADD64x2; break;
2750                case Iop_QAdd32Sx4:      op = ARM64vecb_SQADD32x4; break;
2751                case Iop_QAdd16Sx8:      op = ARM64vecb_SQADD16x8; break;
2752                case Iop_QAdd8Sx16:      op = ARM64vecb_SQADD8x16; break;
2753                case Iop_QAdd64Ux2:      op = ARM64vecb_UQADD64x2; break;
2754                case Iop_QAdd32Ux4:      op = ARM64vecb_UQADD32x4; break;
2755                case Iop_QAdd16Ux8:      op = ARM64vecb_UQADD16x8; break;
2756                case Iop_QAdd8Ux16:      op = ARM64vecb_UQADD8x16; break;
2757                case Iop_QSub64Sx2:      op = ARM64vecb_SQSUB64x2; break;
2758                case Iop_QSub32Sx4:      op = ARM64vecb_SQSUB32x4; break;
2759                case Iop_QSub16Sx8:      op = ARM64vecb_SQSUB16x8; break;
2760                case Iop_QSub8Sx16:      op = ARM64vecb_SQSUB8x16; break;
2761                case Iop_QSub64Ux2:      op = ARM64vecb_UQSUB64x2; break;
2762                case Iop_QSub32Ux4:      op = ARM64vecb_UQSUB32x4; break;
2763                case Iop_QSub16Ux8:      op = ARM64vecb_UQSUB16x8; break;
2764                case Iop_QSub8Ux16:      op = ARM64vecb_UQSUB8x16; break;
2765                case Iop_QDMulHi32Sx4:   op = ARM64vecb_SQDMULH32x4; break;
2766                case Iop_QDMulHi16Sx8:   op = ARM64vecb_SQDMULH16x8; break;
2767                case Iop_QRDMulHi32Sx4:  op = ARM64vecb_SQRDMULH32x4; break;
2768                case Iop_QRDMulHi16Sx8:  op = ARM64vecb_SQRDMULH16x8; break;
2769                case Iop_Sh8Sx16:        op = ARM64vecb_SSHL8x16; break;
2770                case Iop_Sh16Sx8:        op = ARM64vecb_SSHL16x8; break;
2771                case Iop_Sh32Sx4:        op = ARM64vecb_SSHL32x4; break;
2772                case Iop_Sh64Sx2:        op = ARM64vecb_SSHL64x2; break;
2773                case Iop_Sh8Ux16:        op = ARM64vecb_USHL8x16; break;
2774                case Iop_Sh16Ux8:        op = ARM64vecb_USHL16x8; break;
2775                case Iop_Sh32Ux4:        op = ARM64vecb_USHL32x4; break;
2776                case Iop_Sh64Ux2:        op = ARM64vecb_USHL64x2; break;
2777                case Iop_Rsh8Sx16:       op = ARM64vecb_SRSHL8x16; break;
2778                case Iop_Rsh16Sx8:       op = ARM64vecb_SRSHL16x8; break;
2779                case Iop_Rsh32Sx4:       op = ARM64vecb_SRSHL32x4; break;
2780                case Iop_Rsh64Sx2:       op = ARM64vecb_SRSHL64x2; break;
2781                case Iop_Rsh8Ux16:       op = ARM64vecb_URSHL8x16; break;
2782                case Iop_Rsh16Ux8:       op = ARM64vecb_URSHL16x8; break;
2783                case Iop_Rsh32Ux4:       op = ARM64vecb_URSHL32x4; break;
2784                case Iop_Rsh64Ux2:       op = ARM64vecb_URSHL64x2; break;
2785                case Iop_Max64Fx2:       op = ARM64vecb_FMAX64x2; break;
2786                case Iop_Max32Fx4:       op = ARM64vecb_FMAX32x4; break;
2787                case Iop_Min64Fx2:       op = ARM64vecb_FMIN64x2; break;
2788                case Iop_Min32Fx4:       op = ARM64vecb_FMIN32x4; break;
2789                case Iop_RecipStep64Fx2: setRM = True;
2790                                         op = ARM64vecb_FRECPS64x2; break;
2791                case Iop_RecipStep32Fx4: setRM = True;
2792                                         op = ARM64vecb_FRECPS32x4; break;
2793                case Iop_RSqrtStep64Fx2: setRM = True;
2794                                         op = ARM64vecb_FRSQRTS64x2; break;
2795                case Iop_RSqrtStep32Fx4: setRM = True;
2796                                         op = ARM64vecb_FRSQRTS32x4; break;
2797                default: vassert(0);
2798             }
2799             if (setRM) {
2800                // This is a bit of a kludge.  We should do rm properly for
2801                // these recip-step insns, but that would require changing the
2802                // primop's type to take an rmode.
2803                set_FPCR_rounding_mode(env, IRExpr_Const(
2804                                               IRConst_U32(Irrm_NEAREST)));
2805             }
2806             if (sw) {
2807                addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2808             } else {
2809                addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2810             }
2811             return res;
2812          }
2813          /* -- These only have 2 operand instructions, so we have to first move
2814             the first argument into a new register, for modification. -- */
2815          case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2816          case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2817          case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2818          case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2819          {
2820             HReg res  = newVRegV(env);
2821             HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2822             HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2823             ARM64VecModifyOp op = ARM64vecmo_INVALID;
2824             switch (e->Iex.Binop.op) {
2825                /* In the following 8 cases, the US - SU switching is intended.
2826                   See comments on the libvex_ir.h for details.  Also in the
2827                   ARM64 front end, where used these primops are generated. */
2828                case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2829                case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2830                case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2831                case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2832                case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2833                case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2834                case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2835                case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2836                default: vassert(0);
2837             }
2838             /* The order of the operands is important.  Although this is
2839                basically addition, the two operands are extended differently,
2840                making it important to get them into the correct registers in
2841                the instruction. */
2842             addInstr(env, ARM64Instr_VMov(16, res, argR));
2843             addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2844             return res;
2845          }
2846          /* -- Shifts by an immediate. -- */
2847          case Iop_ShrN64x2: case Iop_ShrN32x4:
2848          case Iop_ShrN16x8: case Iop_ShrN8x16:
2849          case Iop_SarN64x2: case Iop_SarN32x4:
2850          case Iop_SarN16x8: case Iop_SarN8x16:
2851          case Iop_ShlN64x2: case Iop_ShlN32x4:
2852          case Iop_ShlN16x8: case Iop_ShlN8x16:
2853          case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2854          case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2855          case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2856          case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2857          case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2858          case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2859          {
2860             IRExpr* argL = e->Iex.Binop.arg1;
2861             IRExpr* argR = e->Iex.Binop.arg2;
2862             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2863                UInt amt   = argR->Iex.Const.con->Ico.U8;
2864                UInt limLo = 0;
2865                UInt limHi = 0;
2866                ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2867                /* Establish the instruction to use. */
2868                switch (e->Iex.Binop.op) {
2869                   case Iop_ShrN64x2:       op = ARM64vecshi_USHR64x2;   break;
2870                   case Iop_ShrN32x4:       op = ARM64vecshi_USHR32x4;   break;
2871                   case Iop_ShrN16x8:       op = ARM64vecshi_USHR16x8;   break;
2872                   case Iop_ShrN8x16:       op = ARM64vecshi_USHR8x16;   break;
2873                   case Iop_SarN64x2:       op = ARM64vecshi_SSHR64x2;   break;
2874                   case Iop_SarN32x4:       op = ARM64vecshi_SSHR32x4;   break;
2875                   case Iop_SarN16x8:       op = ARM64vecshi_SSHR16x8;   break;
2876                   case Iop_SarN8x16:       op = ARM64vecshi_SSHR8x16;   break;
2877                   case Iop_ShlN64x2:       op = ARM64vecshi_SHL64x2;    break;
2878                   case Iop_ShlN32x4:       op = ARM64vecshi_SHL32x4;    break;
2879                   case Iop_ShlN16x8:       op = ARM64vecshi_SHL16x8;    break;
2880                   case Iop_ShlN8x16:       op = ARM64vecshi_SHL8x16;    break;
2881                   case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2;  break;
2882                   case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4;  break;
2883                   case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8;  break;
2884                   case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16;  break;
2885                   case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2;  break;
2886                   case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4;  break;
2887                   case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8;  break;
2888                   case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16;  break;
2889                   case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2890                   case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2891                   case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2892                   case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2893                   default: vassert(0);
2894                }
2895                /* Establish the shift limits, for sanity check purposes only. */
2896                switch (e->Iex.Binop.op) {
2897                   case Iop_ShrN64x2:       limLo = 1; limHi = 64; break;
2898                   case Iop_ShrN32x4:       limLo = 1; limHi = 32; break;
2899                   case Iop_ShrN16x8:       limLo = 1; limHi = 16; break;
2900                   case Iop_ShrN8x16:       limLo = 1; limHi = 8;  break;
2901                   case Iop_SarN64x2:       limLo = 1; limHi = 64; break;
2902                   case Iop_SarN32x4:       limLo = 1; limHi = 32; break;
2903                   case Iop_SarN16x8:       limLo = 1; limHi = 16; break;
2904                   case Iop_SarN8x16:       limLo = 1; limHi = 8;  break;
2905                   case Iop_ShlN64x2:       limLo = 0; limHi = 63; break;
2906                   case Iop_ShlN32x4:       limLo = 0; limHi = 31; break;
2907                   case Iop_ShlN16x8:       limLo = 0; limHi = 15; break;
2908                   case Iop_ShlN8x16:       limLo = 0; limHi = 7;  break;
2909                   case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2910                   case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2911                   case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2912                   case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7;  break;
2913                   case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2914                   case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2915                   case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
2916                   case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7;  break;
2917                   case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
2918                   case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
2919                   case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
2920                   case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7;  break;
2921                   default: vassert(0);
2922                }
2923                /* For left shifts, the allowable amt values are
2924                   0 .. lane_bits-1.  For right shifts the allowable
2925                   values are 1 .. lane_bits. */
2926                if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
2927                   HReg src = iselV128Expr(env, argL);
2928                   HReg dst = newVRegV(env);
2929                   addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2930                   return dst;
2931                }
2932                /* Special case some no-op shifts that the arm64 front end
2933                   throws at us.  We can't generate any instructions for these,
2934                   but we don't need to either. */
2935                switch (e->Iex.Binop.op) {
2936                   case Iop_ShrN64x2: case Iop_ShrN32x4:
2937                   case Iop_ShrN16x8: case Iop_ShrN8x16:
2938                      if (amt == 0) {
2939                         return iselV128Expr(env, argL);
2940                      }
2941                      break;
2942                   default:
2943                      break;
2944                }
2945                /* otherwise unhandled */
2946             }
2947             /* else fall out; this is unhandled */
2948             break;
2949          }
2950          /* -- Saturating narrowing by an immediate -- */
2951          /* uu */
2952          case Iop_QandQShrNnarrow16Uto8Ux8:
2953          case Iop_QandQShrNnarrow32Uto16Ux4:
2954          case Iop_QandQShrNnarrow64Uto32Ux2:
2955          /* ss */
2956          case Iop_QandQSarNnarrow16Sto8Sx8:
2957          case Iop_QandQSarNnarrow32Sto16Sx4:
2958          case Iop_QandQSarNnarrow64Sto32Sx2:
2959          /* su */
2960          case Iop_QandQSarNnarrow16Sto8Ux8:
2961          case Iop_QandQSarNnarrow32Sto16Ux4:
2962          case Iop_QandQSarNnarrow64Sto32Ux2:
2963          /* ruu */
2964          case Iop_QandQRShrNnarrow16Uto8Ux8:
2965          case Iop_QandQRShrNnarrow32Uto16Ux4:
2966          case Iop_QandQRShrNnarrow64Uto32Ux2:
2967          /* rss */
2968          case Iop_QandQRSarNnarrow16Sto8Sx8:
2969          case Iop_QandQRSarNnarrow32Sto16Sx4:
2970          case Iop_QandQRSarNnarrow64Sto32Sx2:
2971          /* rsu */
2972          case Iop_QandQRSarNnarrow16Sto8Ux8:
2973          case Iop_QandQRSarNnarrow32Sto16Ux4:
2974          case Iop_QandQRSarNnarrow64Sto32Ux2:
2975          {
2976             IRExpr* argL = e->Iex.Binop.arg1;
2977             IRExpr* argR = e->Iex.Binop.arg2;
2978             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2979                UInt amt   = argR->Iex.Const.con->Ico.U8;
2980                UInt limit = 0;
2981                ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2982                switch (e->Iex.Binop.op) {
2983                   /* uu */
2984                   case Iop_QandQShrNnarrow64Uto32Ux2:
2985                      op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
2986                   case Iop_QandQShrNnarrow32Uto16Ux4:
2987                      op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
2988                   case Iop_QandQShrNnarrow16Uto8Ux8:
2989                      op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
2990                   /* ss */
2991                   case Iop_QandQSarNnarrow64Sto32Sx2:
2992                      op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
2993                   case Iop_QandQSarNnarrow32Sto16Sx4:
2994                      op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
2995                   case Iop_QandQSarNnarrow16Sto8Sx8:
2996                      op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
2997                   /* su */
2998                   case Iop_QandQSarNnarrow64Sto32Ux2:
2999                      op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
3000                   case Iop_QandQSarNnarrow32Sto16Ux4:
3001                      op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
3002                   case Iop_QandQSarNnarrow16Sto8Ux8:
3003                      op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
3004                   /* ruu */
3005                   case Iop_QandQRShrNnarrow64Uto32Ux2:
3006                      op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
3007                   case Iop_QandQRShrNnarrow32Uto16Ux4:
3008                      op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
3009                   case Iop_QandQRShrNnarrow16Uto8Ux8:
3010                      op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
3011                   /* rss */
3012                   case Iop_QandQRSarNnarrow64Sto32Sx2:
3013                      op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
3014                   case Iop_QandQRSarNnarrow32Sto16Sx4:
3015                      op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
3016                   case Iop_QandQRSarNnarrow16Sto8Sx8:
3017                      op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
3018                   /* rsu */
3019                   case Iop_QandQRSarNnarrow64Sto32Ux2:
3020                      op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
3021                   case Iop_QandQRSarNnarrow32Sto16Ux4:
3022                      op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
3023                   case Iop_QandQRSarNnarrow16Sto8Ux8:
3024                      op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
3025                   /**/
3026                   default:
3027                      vassert(0);
3028                }
3029                if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
3030                   HReg src  = iselV128Expr(env, argL);
3031                   HReg dst  = newVRegV(env);
3032                   HReg fpsr = newVRegI(env);
3033                   /* Clear FPSR.Q, do the operation, and return both its
3034                      result and the new value of FPSR.Q.  We can simply
3035                      zero out FPSR since all the other bits have no relevance
3036                      in VEX generated code. */
3037                   addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3038                   addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3039                   addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
3040                   addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3041                   addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3042                                                              ARM64sh_SHR));
3043                   ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3044                   vassert(ril_one);
3045                   addInstr(env, ARM64Instr_Logic(fpsr,
3046                                                  fpsr, ril_one, ARM64lo_AND));
3047                   /* Now we have: the main (shift) result in the bottom half
3048                      of |dst|, and the Q bit at the bottom of |fpsr|.
3049                      Combining them with a "InterleaveLO64x2" style operation
3050                      produces a 128 bit value, dst[63:0]:fpsr[63:0],
3051                      which is what we want. */
3052                   HReg scratch = newVRegV(env);
3053                   addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
3054                   addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
3055                                                  dst, dst, scratch));
3056                   return dst;
3057                }
3058             }
3059             /* else fall out; this is unhandled */
3060             break;
3061          }
3062
3063          // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
3064          // as it is in some ways more general and often leads to better
3065          // code overall.
3066          case Iop_ShlV128:
3067          case Iop_ShrV128: {
3068             Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
3069             /* This is tricky.  Generate an EXT instruction with zeroes in
3070                the high operand (shift right) or low operand (shift left).
3071                Note that we can only slice in the EXT instruction at a byte
3072                level of granularity, so the shift amount needs careful
3073                checking. */
3074             IRExpr* argL = e->Iex.Binop.arg1;
3075             IRExpr* argR = e->Iex.Binop.arg2;
3076             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
3077                UInt amt   = argR->Iex.Const.con->Ico.U8;
3078                Bool amtOK = False;
3079                switch (amt) {
3080                   case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
3081                   case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
3082                   case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
3083                      amtOK = True; break;
3084                }
3085                /* We could also deal with amt==0 by copying the source to
3086                   the destination, but there's no need for that so far. */
3087                if (amtOK) {
3088                   HReg src  = iselV128Expr(env, argL);
3089                   HReg srcZ = newVRegV(env);
3090                   addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
3091                   UInt immB = amt / 8;
3092                   vassert(immB >= 1 && immB <= 15);
3093                   HReg dst = newVRegV(env);
3094                   if (isSHR) {
3095                     addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
3096                                                          immB));
3097                   } else {
3098                     addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
3099                                                          16 - immB));
3100                   }
3101                   return dst;
3102                }
3103             }
3104             /* else fall out; this is unhandled */
3105             break;
3106          }
3107
3108          case Iop_PolynomialMull8x8:
3109          case Iop_Mull32Ux2:
3110          case Iop_Mull16Ux4:
3111          case Iop_Mull8Ux8:
3112          case Iop_Mull32Sx2:
3113          case Iop_Mull16Sx4:
3114          case Iop_Mull8Sx8:
3115          case Iop_QDMull32Sx2:
3116          case Iop_QDMull16Sx4:
3117          {
3118             HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
3119             HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3120             HReg vSrcL = newVRegV(env);
3121             HReg vSrcR = newVRegV(env);
3122             HReg dst   = newVRegV(env);
3123             ARM64VecBinOp op = ARM64vecb_INVALID;
3124             switch (e->Iex.Binop.op) {
3125                case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8;    break;
3126                case Iop_Mull32Ux2:         op = ARM64vecb_UMULL2DSS;   break;
3127                case Iop_Mull16Ux4:         op = ARM64vecb_UMULL4SHH;   break;
3128                case Iop_Mull8Ux8:          op = ARM64vecb_UMULL8HBB;   break;
3129                case Iop_Mull32Sx2:         op = ARM64vecb_SMULL2DSS;   break;
3130                case Iop_Mull16Sx4:         op = ARM64vecb_SMULL4SHH;   break;
3131                case Iop_Mull8Sx8:          op = ARM64vecb_SMULL8HBB;   break;
3132                case Iop_QDMull32Sx2:       op = ARM64vecb_SQDMULL2DSS; break;
3133                case Iop_QDMull16Sx4:       op = ARM64vecb_SQDMULL4SHH; break;
3134                default: vassert(0);
3135             }
3136             addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
3137             addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
3138             addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
3139             return dst;
3140          }
3141
3142          /* ... */
3143          default:
3144             break;
3145       } /* switch on the binop */
3146    } /* if (e->tag == Iex_Binop) */
3147
3148    if (e->tag == Iex_Triop) {
3149       IRTriop*      triop  = e->Iex.Triop.details;
3150       ARM64VecBinOp vecbop = ARM64vecb_INVALID;
3151       switch (triop->op) {
3152          case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
3153          case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
3154          case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
3155          case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
3156          case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
3157          case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
3158          case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
3159          case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
3160          default: break;
3161       }
3162       if (vecbop != ARM64vecb_INVALID) {
3163          HReg argL = iselV128Expr(env, triop->arg2);
3164          HReg argR = iselV128Expr(env, triop->arg3);
3165          HReg dst  = newVRegV(env);
3166          set_FPCR_rounding_mode(env, triop->arg1);
3167          addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
3168          return dst;
3169       }
3170
3171       if (triop->op == Iop_SliceV128) {
3172          /* Note that, compared to ShlV128/ShrV128 just above, the shift
3173             amount here is in bytes, not bits. */
3174          IRExpr* argHi  = triop->arg1;
3175          IRExpr* argLo  = triop->arg2;
3176          IRExpr* argAmt = triop->arg3;
3177          if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
3178             UInt amt   = argAmt->Iex.Const.con->Ico.U8;
3179             Bool amtOK = amt >= 1 && amt <= 15;
3180             /* We could also deal with amt==0 by copying argLO to
3181                the destination, but there's no need for that so far. */
3182             if (amtOK) {
3183                HReg srcHi = iselV128Expr(env, argHi);
3184                HReg srcLo = iselV128Expr(env, argLo);
3185                HReg dst = newVRegV(env);
3186               addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
3187                return dst;
3188             }
3189          }
3190          /* else fall out; this is unhandled */
3191       }
3192
3193    } /* if (e->tag == Iex_Triop) */
3194
3195    if (e->tag == Iex_ITE) {
3196       // This code sequence is pretty feeble.  We'd do better to generate BSL
3197       // here.
3198       HReg rX = newVRegI(env);
3199
3200       ARM64CondCode cc = iselCondCode_C(env, e->Iex.ITE.cond);
3201       addInstr(env, ARM64Instr_Set64(rX, cc));
3202       // cond: rX = 1   !cond: rX = 0
3203
3204       // Mask the Set64 result.  This is paranoia (should be unnecessary).
3205       ARM64RIL* one = mb_mkARM64RIL_I(1);
3206       vassert(one);
3207       addInstr(env, ARM64Instr_Logic(rX, rX, one, ARM64lo_AND));
3208       // cond: rX = 1   !cond: rX = 0
3209
3210       // Propagate to all bits in the 64 bit word by subtracting 1 from it.
3211       // This also inverts the sense of the value.
3212       addInstr(env, ARM64Instr_Arith(rX, rX, ARM64RIA_I12(1,0),
3213                                      /*isAdd=*/False));
3214       // cond: rX = 0-(62)-0   !cond: rX = 1-(62)-1
3215
3216       // Duplicate rX into a vector register
3217       HReg vMask = newVRegV(env);
3218       addInstr(env, ARM64Instr_VQfromXX(vMask, rX, rX));
3219       // cond: vMask = 0-(126)-0   !cond: vMask = 1-(126)-1
3220
3221       HReg vIfTrue = iselV128Expr(env, e->Iex.ITE.iftrue);
3222       HReg vIfFalse = iselV128Expr(env, e->Iex.ITE.iffalse);
3223
3224       // Mask out iffalse value as needed
3225       addInstr(env,
3226                ARM64Instr_VBinV(ARM64vecb_AND, vIfFalse, vIfFalse, vMask));
3227
3228       // Invert the mask so we can use it for the iftrue value
3229       addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, vMask, vMask));
3230       // cond: vMask = 1-(126)-1   !cond: vMask = 0-(126)-0
3231
3232       // Mask out iftrue value as needed
3233       addInstr(env,
3234                ARM64Instr_VBinV(ARM64vecb_AND, vIfTrue, vIfTrue, vMask));
3235
3236       // Merge the masked iftrue and iffalse results.
3237       HReg res = newVRegV(env);
3238       addInstr(env, ARM64Instr_VBinV(ARM64vecb_ORR, res, vIfTrue, vIfFalse));
3239
3240       return res;
3241    }
3242
3243   v128_expr_bad:
3244    ppIRExpr(e);
3245    vpanic("iselV128Expr_wrk");
3246 }
3247
3248
3249 /*---------------------------------------------------------*/
3250 /*--- ISEL: Floating point expressions (64 bit)         ---*/
3251 /*---------------------------------------------------------*/
3252
3253 /* Compute a 64-bit floating point value into a register, the identity
3254    of which is returned.  As with iselIntExpr_R, the reg may be either
3255    real or virtual; in any case it must not be changed by subsequent
3256    code emitted by the caller.  */
3257
3258 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
3259 {
3260    HReg r = iselDblExpr_wrk( env, e );
3261 #  if 0
3262    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3263 #  endif
3264    vassert(hregClass(r) == HRcFlt64);
3265    vassert(hregIsVirtual(r));
3266    return r;
3267 }
3268
3269 /* DO NOT CALL THIS DIRECTLY */
3270 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3271 {
3272    IRType ty = typeOfIRExpr(env->type_env,e);
3273    vassert(e);
3274    vassert(ty == Ity_F64);
3275
3276    if (e->tag == Iex_RdTmp) {
3277       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3278    }
3279
3280    if (e->tag == Iex_Const) {
3281       IRConst* con = e->Iex.Const.con;
3282       if (con->tag == Ico_F64i) {
3283          HReg src = newVRegI(env);
3284          HReg dst = newVRegD(env);
3285          addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3286          addInstr(env, ARM64Instr_VDfromX(dst, src));
3287          return dst;
3288       }
3289       if (con->tag == Ico_F64) {
3290          HReg src = newVRegI(env);
3291          HReg dst = newVRegD(env);
3292          union { Double d64; ULong u64; } u;
3293          vassert(sizeof(u) == 8);
3294          u.d64 = con->Ico.F64;
3295          addInstr(env, ARM64Instr_Imm64(src, u.u64));
3296          addInstr(env, ARM64Instr_VDfromX(dst, src));
3297          return dst;
3298       }
3299    }
3300
3301    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3302       vassert(e->Iex.Load.ty == Ity_F64);
3303       HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3304       HReg res  = newVRegD(env);
3305       addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3306       return res;
3307    }
3308
3309    if (e->tag == Iex_Get) {
3310       Int offs = e->Iex.Get.offset;
3311       if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3312          HReg rD = newVRegD(env);
3313          HReg rN = get_baseblock_register();
3314          addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3315          return rD;
3316       }
3317    }
3318
3319    if (e->tag == Iex_Unop) {
3320       switch (e->Iex.Unop.op) {
3321          case Iop_NegF64: {
3322             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3323             HReg dst = newVRegD(env);
3324             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3325             return dst;
3326          }
3327          case Iop_AbsF64: {
3328             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3329             HReg dst = newVRegD(env);
3330             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3331             return dst;
3332          }
3333          case Iop_F32toF64: {
3334             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3335             HReg dst = newVRegD(env);
3336             addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3337             return dst;
3338          }
3339          case Iop_F16toF64: {
3340             HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3341             HReg dst = newVRegD(env);
3342             addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3343             return dst;
3344          }
3345          case Iop_I32UtoF64:
3346          case Iop_I32StoF64: {
3347             /* Rounding mode is not involved here, since the
3348                conversion can always be done without loss of
3349                precision. */
3350             HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
3351             HReg dst   = newVRegD(env);
3352             Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3353             ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3354             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3355             return dst;
3356          }
3357          default:
3358             break;
3359       }
3360    }
3361
3362    if (e->tag == Iex_Binop) {
3363       switch (e->Iex.Binop.op) {
3364          case Iop_RoundF64toInt:
3365          case Iop_SqrtF64:
3366          case Iop_RecpExpF64: {
3367             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3368             HReg dst = newVRegD(env);
3369             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3370             ARM64FpUnaryOp op = ARM64fpu_INVALID;
3371             switch (e->Iex.Binop.op) {
3372                case Iop_RoundF64toInt: op = ARM64fpu_RINT;  break;
3373                case Iop_SqrtF64:       op = ARM64fpu_SQRT;  break;
3374                case Iop_RecpExpF64:    op = ARM64fpu_RECPX; break;
3375                default: vassert(0);
3376             }
3377             addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3378             return dst;
3379          }
3380          case Iop_I64StoF64:
3381          case Iop_I64UtoF64: {
3382             ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3383                                    ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3384             HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3385             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3386             HReg dstS = newVRegD(env);
3387             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3388             return dstS;
3389          }
3390          default:
3391             break;
3392       }
3393    }
3394
3395    if (e->tag == Iex_Triop) {
3396       IRTriop*     triop = e->Iex.Triop.details;
3397       ARM64FpBinOp dblop = ARM64fpb_INVALID;
3398       switch (triop->op) {
3399          case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3400          case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3401          case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3402          case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3403          default: break;
3404       }
3405       if (dblop != ARM64fpb_INVALID) {
3406          HReg argL = iselDblExpr(env, triop->arg2);
3407          HReg argR = iselDblExpr(env, triop->arg3);
3408          HReg dst  = newVRegD(env);
3409          set_FPCR_rounding_mode(env, triop->arg1);
3410          addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3411          return dst;
3412       }
3413    }
3414
3415    if (e->tag == Iex_Qop) {
3416       IRQop*       qop = e->Iex.Qop.details;
3417       ARM64FpTriOp triop = ARM64fpt_INVALID;
3418       switch (qop->op) {
3419          case Iop_MAddF64: triop = ARM64fpt_FMADD; break;
3420          case Iop_MSubF64: triop = ARM64fpt_FMSUB; break;
3421          default: break;
3422       }
3423       if (triop != ARM64fpt_INVALID) {
3424          HReg N = iselDblExpr(env, qop->arg2);
3425          HReg M = iselDblExpr(env, qop->arg3);
3426          HReg A = iselDblExpr(env, qop->arg4);
3427          HReg dst  = newVRegD(env);
3428          set_FPCR_rounding_mode(env, qop->arg1);
3429          addInstr(env, ARM64Instr_VTriD(triop, dst, N, M, A));
3430          return dst;
3431       }
3432    }
3433
3434    if (e->tag == Iex_ITE) {
3435       /* ITE(ccexpr, iftrue, iffalse) */
3436       ARM64CondCode cc;
3437       HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
3438       HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
3439       HReg dst = newVRegD(env);
3440       cc = iselCondCode_C(env, e->Iex.ITE.cond);
3441       addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3442       return dst;
3443    }
3444
3445    ppIRExpr(e);
3446    vpanic("iselDblExpr_wrk");
3447 }
3448
3449
3450 /*---------------------------------------------------------*/
3451 /*--- ISEL: Floating point expressions (32 bit)         ---*/
3452 /*---------------------------------------------------------*/
3453
3454 /* Compute a 32-bit floating point value into a register, the identity
3455    of which is returned.  As with iselIntExpr_R, the reg may be either
3456    real or virtual; in any case it must not be changed by subsequent
3457    code emitted by the caller.  Values are generated into HRcFlt64
3458    registers despite the values themselves being Ity_F32s. */
3459
3460 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3461 {
3462    HReg r = iselFltExpr_wrk( env, e );
3463 #  if 0
3464    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3465 #  endif
3466    vassert(hregClass(r) == HRcFlt64);
3467    vassert(hregIsVirtual(r));
3468    return r;
3469 }
3470
3471 /* DO NOT CALL THIS DIRECTLY */
3472 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3473 {
3474    IRType ty = typeOfIRExpr(env->type_env,e);
3475    vassert(e);
3476    vassert(ty == Ity_F32);
3477
3478    if (e->tag == Iex_RdTmp) {
3479       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3480    }
3481
3482    if (e->tag == Iex_Const) {
3483       /* This is something of a kludge.  Since a 32 bit floating point
3484          zero is just .. all zeroes, just create a 64 bit zero word
3485          and transfer it.  This avoids having to create a SfromW
3486          instruction for this specific case. */
3487       IRConst* con = e->Iex.Const.con;
3488       if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3489          HReg src = newVRegI(env);
3490          HReg dst = newVRegD(env);
3491          addInstr(env, ARM64Instr_Imm64(src, 0));
3492          addInstr(env, ARM64Instr_VDfromX(dst, src));
3493          return dst;
3494       }
3495       if (con->tag == Ico_F32) {
3496          HReg src = newVRegI(env);
3497          HReg dst = newVRegD(env);
3498          union { Float f32; UInt u32; } u;
3499          vassert(sizeof(u) == 4);
3500          u.f32 = con->Ico.F32;
3501          addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3502          addInstr(env, ARM64Instr_VDfromX(dst, src));
3503          return dst;
3504       }
3505    }
3506
3507    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3508       vassert(e->Iex.Load.ty == Ity_F32);
3509       HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3510       HReg res  = newVRegD(env);
3511       addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3512       return res;
3513    }
3514
3515    if (e->tag == Iex_Get) {
3516       Int offs = e->Iex.Get.offset;
3517       if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3518          HReg rD = newVRegD(env);
3519          HReg rN = get_baseblock_register();
3520          addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3521          return rD;
3522       }
3523    }
3524
3525    if (e->tag == Iex_Unop) {
3526       switch (e->Iex.Unop.op) {
3527          case Iop_NegF32: {
3528             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3529             HReg dst = newVRegD(env);
3530             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3531             return dst;
3532          }
3533          case Iop_AbsF32: {
3534             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3535             HReg dst = newVRegD(env);
3536             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3537             return dst;
3538          }
3539          case Iop_F16toF32: {
3540             HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3541             HReg dst = newVRegD(env);
3542             addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3543             return dst;
3544          }
3545          default:
3546             break;
3547       }
3548    }
3549
3550    if (e->tag == Iex_Binop) {
3551       switch (e->Iex.Binop.op) {
3552          case Iop_RoundF32toInt:
3553          case Iop_SqrtF32:
3554          case Iop_RecpExpF32: {
3555             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3556             HReg dst = newVRegD(env);
3557             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3558             ARM64FpUnaryOp op = ARM64fpu_INVALID;
3559             switch (e->Iex.Binop.op) {
3560                case Iop_RoundF32toInt: op = ARM64fpu_RINT;  break;
3561                case Iop_SqrtF32:       op = ARM64fpu_SQRT;  break;
3562                case Iop_RecpExpF32:    op = ARM64fpu_RECPX; break;
3563                default: vassert(0);
3564             }
3565             addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3566             return dst;
3567          }
3568          case Iop_F64toF32: {
3569             HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3570             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3571             HReg dstS = newVRegD(env);
3572             addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3573             return dstS;
3574          }
3575          case Iop_I32UtoF32:
3576          case Iop_I32StoF32:
3577          case Iop_I64UtoF32:
3578          case Iop_I64StoF32: {
3579             ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3580             switch (e->Iex.Binop.op) {
3581                case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3582                case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3583                case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3584                case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3585                default: vassert(0);
3586             }
3587             HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3588             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3589             HReg dstS = newVRegD(env);
3590             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3591             return dstS;
3592          }
3593          default:
3594             break;
3595       }
3596    }
3597
3598    if (e->tag == Iex_Triop) {
3599       IRTriop*     triop = e->Iex.Triop.details;
3600       ARM64FpBinOp sglop = ARM64fpb_INVALID;
3601       switch (triop->op) {
3602          case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3603          case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3604          case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3605          case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3606          default: break;
3607       }
3608       if (sglop != ARM64fpb_INVALID) {
3609          HReg argL = iselFltExpr(env, triop->arg2);
3610          HReg argR = iselFltExpr(env, triop->arg3);
3611          HReg dst  = newVRegD(env);
3612          set_FPCR_rounding_mode(env, triop->arg1);
3613          addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3614          return dst;
3615       }
3616    }
3617
3618    if (e->tag == Iex_ITE) {
3619       /* ITE(ccexpr, iftrue, iffalse) */
3620       ARM64CondCode cc;
3621       HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
3622       HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
3623       HReg dst = newVRegD(env);
3624       cc = iselCondCode_C(env, e->Iex.ITE.cond);
3625       addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3626       return dst;
3627    }
3628
3629    if (e->tag == Iex_Qop) {
3630       IRQop*       qop = e->Iex.Qop.details;
3631       ARM64FpTriOp triop = ARM64fpt_INVALID;
3632       switch (qop->op) {
3633          case Iop_MAddF32: triop = ARM64fpt_FMADD; break;
3634          case Iop_MSubF32: triop = ARM64fpt_FMSUB; break;
3635       default: break;
3636       }
3637
3638       if (triop != ARM64fpt_INVALID) {
3639          HReg N = iselFltExpr(env, qop->arg2);
3640          HReg M = iselFltExpr(env, qop->arg3);
3641          HReg A = iselFltExpr(env, qop->arg4);
3642          HReg dst  = newVRegD(env);
3643          set_FPCR_rounding_mode(env, qop->arg1);
3644          addInstr(env, ARM64Instr_VTriS(triop, dst, N, M, A));
3645          return dst;
3646       }
3647    }
3648
3649    ppIRExpr(e);
3650    vpanic("iselFltExpr_wrk");
3651 }
3652
3653
3654 /*---------------------------------------------------------*/
3655 /*--- ISEL: Floating point expressions (16 bit)         ---*/
3656 /*---------------------------------------------------------*/
3657
3658 /* Compute a 16-bit floating point value into a register, the identity
3659    of which is returned.  As with iselIntExpr_R, the reg may be either
3660    real or virtual; in any case it must not be changed by subsequent
3661    code emitted by the caller.  Values are generated into HRcFlt64
3662    registers despite the values themselves being Ity_F16s. */
3663
3664 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3665 {
3666    HReg r = iselF16Expr_wrk( env, e );
3667 #  if 0
3668    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3669 #  endif
3670    vassert(hregClass(r) == HRcFlt64);
3671    vassert(hregIsVirtual(r));
3672    return r;
3673 }
3674
3675 /* DO NOT CALL THIS DIRECTLY */
3676 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3677 {
3678    IRType ty = typeOfIRExpr(env->type_env,e);
3679    vassert(e);
3680    vassert(ty == Ity_F16);
3681
3682    if (e->tag == Iex_RdTmp) {
3683       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3684    }
3685
3686    if (e->tag == Iex_Get) {
3687       Int offs = e->Iex.Get.offset;
3688       if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3689          HReg rD = newVRegD(env);
3690          HReg rN = get_baseblock_register();
3691          addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3692          return rD;
3693       }
3694    }
3695
3696    if (e->tag == Iex_Binop) {
3697       switch (e->Iex.Binop.op) {
3698          case Iop_F32toF16: {
3699             HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3700             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3701             HReg dstH = newVRegD(env);
3702             addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3703             return dstH;
3704          }
3705          case Iop_F64toF16: {
3706             HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3707             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3708             HReg dstH = newVRegD(env);
3709             addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3710             return dstH;
3711          }
3712          default:
3713             break;
3714       }
3715    }
3716
3717    ppIRExpr(e);
3718    vpanic("iselF16Expr_wrk");
3719 }
3720
3721
3722 /*---------------------------------------------------------*/
3723 /*--- ISEL: Vector expressions (256 bit)                ---*/
3724 /*---------------------------------------------------------*/
3725
3726 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3727                            ISelEnv* env, IRExpr* e )
3728 {
3729    iselV256Expr_wrk( rHi, rLo, env, e );
3730    vassert(hregClass(*rHi) == HRcVec128);
3731    vassert(hregClass(*rLo) == HRcVec128);
3732    vassert(hregIsVirtual(*rHi));
3733    vassert(hregIsVirtual(*rLo));
3734 }
3735
3736 /* DO NOT CALL THIS DIRECTLY */
3737 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3738                                ISelEnv* env, IRExpr* e )
3739 {
3740    vassert(e);
3741    IRType ty = typeOfIRExpr(env->type_env,e);
3742    vassert(ty == Ity_V256);
3743
3744    /* read 256-bit IRTemp */
3745    if (e->tag == Iex_RdTmp) {
3746       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3747       return;
3748    }
3749
3750    if (e->tag == Iex_Binop) {
3751       switch (e->Iex.Binop.op) {
3752          case Iop_V128HLtoV256: {
3753             *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3754             *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3755             return;
3756          }
3757          case Iop_QandSQsh64x2:
3758          case Iop_QandSQsh32x4:
3759          case Iop_QandSQsh16x8:
3760          case Iop_QandSQsh8x16:
3761          case Iop_QandUQsh64x2:
3762          case Iop_QandUQsh32x4:
3763          case Iop_QandUQsh16x8:
3764          case Iop_QandUQsh8x16:
3765          case Iop_QandSQRsh64x2:
3766          case Iop_QandSQRsh32x4:
3767          case Iop_QandSQRsh16x8:
3768          case Iop_QandSQRsh8x16:
3769          case Iop_QandUQRsh64x2:
3770          case Iop_QandUQRsh32x4:
3771          case Iop_QandUQRsh16x8:
3772          case Iop_QandUQRsh8x16:
3773          {
3774             HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
3775             HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
3776             HReg fpsr  = newVRegI(env);
3777             HReg resHi = newVRegV(env);
3778             HReg resLo = newVRegV(env);
3779             ARM64VecBinOp op = ARM64vecb_INVALID;
3780             switch (e->Iex.Binop.op) {
3781                case Iop_QandSQsh64x2:  op = ARM64vecb_SQSHL64x2;  break;
3782                case Iop_QandSQsh32x4:  op = ARM64vecb_SQSHL32x4;  break;
3783                case Iop_QandSQsh16x8:  op = ARM64vecb_SQSHL16x8;  break;
3784                case Iop_QandSQsh8x16:  op = ARM64vecb_SQSHL8x16;  break;
3785                case Iop_QandUQsh64x2:  op = ARM64vecb_UQSHL64x2;  break;
3786                case Iop_QandUQsh32x4:  op = ARM64vecb_UQSHL32x4;  break;
3787                case Iop_QandUQsh16x8:  op = ARM64vecb_UQSHL16x8;  break;
3788                case Iop_QandUQsh8x16:  op = ARM64vecb_UQSHL8x16;  break;
3789                case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3790                case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3791                case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3792                case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3793                case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3794                case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3795                case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3796                case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3797                default: vassert(0);
3798             }
3799             /* Clear FPSR.Q, do the operation, and return both its result
3800                and the new value of FPSR.Q.  We can simply zero out FPSR
3801                since all the other bits have no relevance in VEX generated
3802                code. */
3803             addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3804             addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3805             addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3806             addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3807             addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3808                                                        ARM64sh_SHR));
3809             ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3810             vassert(ril_one);
3811             addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3812             /* Now we have: the main (shift) result in |resLo|, and the
3813                Q bit at the bottom of |fpsr|. */
3814             addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3815             *rHi = resHi;
3816             *rLo = resLo;
3817             return;
3818          }
3819
3820          /* ... */
3821          default:
3822             break;
3823       } /* switch on the binop */
3824    } /* if (e->tag == Iex_Binop) */
3825
3826    ppIRExpr(e);
3827    vpanic("iselV256Expr_wrk");
3828 }
3829
3830
3831 /*---------------------------------------------------------*/
3832 /*--- ISEL: Statements                                  ---*/
3833 /*---------------------------------------------------------*/
3834
3835 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3836 {
3837    if (vex_traceflags & VEX_TRACE_VCODE) {
3838       vex_printf("\n-- ");
3839       ppIRStmt(stmt);
3840       vex_printf("\n");
3841    }
3842    switch (stmt->tag) {
3843
3844    /* --------- STORE --------- */
3845    /* little-endian write to memory */
3846    case Ist_Store: {
3847       IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3848       IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3849       IREndness end  = stmt->Ist.Store.end;
3850
3851       if (tya != Ity_I64 || end != Iend_LE)
3852          goto stmt_fail;
3853
3854       if (tyd == Ity_I64) {
3855          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3856          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3857          addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3858          return;
3859       }
3860       if (tyd == Ity_I32) {
3861          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3862          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3863          addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3864          return;
3865       }
3866       if (tyd == Ity_I16) {
3867          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3868          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3869          addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3870          return;
3871       }
3872       if (tyd == Ity_I8) {
3873          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3874          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3875          addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3876          return;
3877       }
3878       if (tyd == Ity_V128) {
3879          HReg qD   = iselV128Expr(env, stmt->Ist.Store.data);
3880          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3881          addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3882          return;
3883       }
3884       if (tyd == Ity_F64) {
3885          HReg dD   = iselDblExpr(env, stmt->Ist.Store.data);
3886          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3887          addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
3888          return;
3889       }
3890       if (tyd == Ity_F32) {
3891          HReg sD   = iselFltExpr(env, stmt->Ist.Store.data);
3892          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3893          addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
3894          return;
3895       }
3896       break;
3897    }
3898
3899    /* --------- PUT --------- */
3900    /* write guest state, fixed offset */
3901    case Ist_Put: {
3902       IRType tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3903       UInt   offs = (UInt)stmt->Ist.Put.offset;
3904       if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
3905          HReg rD = INVALID_HREG;
3906          if (isZeroU64(stmt->Ist.Put.data)) {
3907             // In this context, XZR_XSP denotes the zero register.
3908             rD = hregARM64_XZR_XSP();
3909          } else {
3910             rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3911          }
3912          ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
3913          addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3914          return;
3915       }
3916       if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
3917          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3918          ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
3919          addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3920          return;
3921       }
3922       if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
3923          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3924          ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
3925          addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3926          return;
3927       }
3928       if (tyd == Ity_I8 && offs < (1<<12)) {
3929          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3930          ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
3931          addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3932          return;
3933       }
3934       if (tyd == Ity_V128 && offs < (1<<12)) {
3935          HReg qD   = iselV128Expr(env, stmt->Ist.Put.data);
3936          HReg addr = mk_baseblock_128bit_access_addr(env, offs);
3937          addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3938          return;
3939       }
3940       if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
3941          HReg dD   = iselDblExpr(env, stmt->Ist.Put.data);
3942          HReg bbp  = get_baseblock_register();
3943          addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
3944          return;
3945       }
3946       if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
3947          HReg sD   = iselFltExpr(env, stmt->Ist.Put.data);
3948          HReg bbp  = get_baseblock_register();
3949          addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
3950          return;
3951       }
3952       if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
3953          HReg hD   = iselF16Expr(env, stmt->Ist.Put.data);
3954          HReg bbp  = get_baseblock_register();
3955          addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
3956          return;
3957       }
3958
3959       break;
3960    }
3961
3962    /* --------- TMP --------- */
3963    /* assign value to temporary */
3964    case Ist_WrTmp: {
3965       IRTemp tmp = stmt->Ist.WrTmp.tmp;
3966       IRType ty  = typeOfIRTemp(env->type_env, tmp);
3967
3968       if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3969          /* We could do a lot better here.  But for the time being: */
3970          HReg dst = lookupIRTemp(env, tmp);
3971          HReg rD  = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
3972          addInstr(env, ARM64Instr_MovI(dst, rD));
3973          return;
3974       }
3975       if (ty == Ity_I1) {
3976          /* Here, we are generating a I1 value into a 64 bit register.
3977             Make sure the value in the register is only zero or one,
3978             but no other.  This allows optimisation of the
3979             1Uto64(tmp:I1) case, by making it simply a copy of the
3980             register holding 'tmp'.  The point being that the value in
3981             the register holding 'tmp' can only have been created
3982             here.  LATER: that seems dangerous; safer to do 'tmp & 1'
3983             in that case.  Also, could do this just with a single CINC
3984             insn. */
3985          /* CLONE-01 */
3986          HReg zero = hregARM64_XZR_XSP(); // XZR in this context
3987          HReg one  = newVRegI(env);
3988          HReg dst  = lookupIRTemp(env, tmp);
3989          addInstr(env, ARM64Instr_Imm64(one,  1));
3990          ARM64CondCode cc = iselCondCode_C(env, stmt->Ist.WrTmp.data);
3991          addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
3992          return;
3993       }
3994       if (ty == Ity_F64) {
3995          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3996          HReg dst = lookupIRTemp(env, tmp);
3997          addInstr(env, ARM64Instr_VMov(8, dst, src));
3998          return;
3999       }
4000       if (ty == Ity_F32) {
4001          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4002          HReg dst = lookupIRTemp(env, tmp);
4003          addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
4004          return;
4005       }
4006       if (ty == Ity_F16) {
4007          HReg src = iselF16Expr(env, stmt->Ist.WrTmp.data);
4008          HReg dst = lookupIRTemp(env, tmp);
4009          addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
4010          return;
4011       }
4012       if (ty == Ity_V128) {
4013          HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
4014          HReg dst = lookupIRTemp(env, tmp);
4015          addInstr(env, ARM64Instr_VMov(16, dst, src));
4016          return;
4017       }
4018       if (ty == Ity_V256) {
4019          HReg srcHi, srcLo, dstHi, dstLo;
4020          iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
4021          lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4022          addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
4023          addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
4024          return;
4025       }
4026       break;
4027    }
4028
4029    /* --------- Call to DIRTY helper --------- */
4030    /* call complex ("dirty") helper function */
4031    case Ist_Dirty: {
4032       IRDirty* d = stmt->Ist.Dirty.details;
4033
4034       /* Figure out the return type, if any. */
4035       IRType retty = Ity_INVALID;
4036       if (d->tmp != IRTemp_INVALID)
4037          retty = typeOfIRTemp(env->type_env, d->tmp);
4038
4039       Bool retty_ok = False;
4040       switch (retty) {
4041          case Ity_INVALID: /* function doesn't return anything */
4042          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4043          case Ity_V128:
4044             retty_ok = True; break;
4045          default:
4046             break;
4047       }
4048       if (!retty_ok)
4049          break; /* will go to stmt_fail: */
4050
4051       /* Marshal args, do the call, and set the return value to 0x555..555
4052          if this is a conditional call that returns a value and the
4053          call is skipped. */
4054       UInt   addToSp = 0;
4055       RetLoc rloc    = mk_RetLoc_INVALID();
4056       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4057       vassert(is_sane_RetLoc(rloc));
4058
4059       /* Now figure out what to do with the returned value, if any. */
4060       switch (retty) {
4061          case Ity_INVALID: {
4062             /* No return value.  Nothing to do. */
4063             vassert(d->tmp == IRTemp_INVALID);
4064             vassert(rloc.pri == RLPri_None);
4065             vassert(addToSp == 0);
4066             return;
4067          }
4068          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
4069             vassert(rloc.pri == RLPri_Int);
4070             vassert(addToSp == 0);
4071             /* The returned value is in x0.  Park it in the register
4072                associated with tmp. */
4073             HReg dst = lookupIRTemp(env, d->tmp);
4074             addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
4075             return;
4076          }
4077          case Ity_V128: {
4078             /* The returned value is on the stack, and *retloc tells
4079                us where.  Fish it off the stack and then move the
4080                stack pointer upwards to clear it, as directed by
4081                doHelperCall. */
4082             vassert(rloc.pri == RLPri_V128SpRel);
4083             vassert(rloc.spOff < 256); // stay sane
4084             vassert(addToSp >= 16); // ditto
4085             vassert(addToSp < 256); // ditto
4086             HReg dst = lookupIRTemp(env, d->tmp);
4087             HReg tmp = newVRegI(env); // the address of the returned value
4088             addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
4089             addInstr(env, ARM64Instr_Arith(tmp, tmp,
4090                                            ARM64RIA_I12((UShort)rloc.spOff, 0),
4091                                            True/*isAdd*/ ));
4092             addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
4093             addInstr(env, ARM64Instr_AddToSP(addToSp));
4094             return;
4095          }
4096          default:
4097             /*NOTREACHED*/
4098             vassert(0);
4099       }
4100       break;
4101    }
4102
4103    /* --------- Load Linked and Store Conditional --------- */
4104    case Ist_LLSC: {
4105       if (stmt->Ist.LLSC.storedata == NULL) {
4106          /* LL */
4107          IRTemp res = stmt->Ist.LLSC.result;
4108          IRType ty  = typeOfIRTemp(env->type_env, res);
4109          if (ty == Ity_I64 || ty == Ity_I32
4110              || ty == Ity_I16 || ty == Ity_I8) {
4111             Int  szB   = 0;
4112             HReg r_dst = lookupIRTemp(env, res);
4113             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
4114             switch (ty) {
4115                case Ity_I8:  szB = 1; break;
4116                case Ity_I16: szB = 2; break;
4117                case Ity_I32: szB = 4; break;
4118                case Ity_I64: szB = 8; break;
4119                default:      vassert(0);
4120             }
4121             addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
4122             addInstr(env, ARM64Instr_LdrEX(szB));
4123             addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
4124             return;
4125          }
4126          goto stmt_fail;
4127       } else {
4128          /* SC */
4129          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
4130          if (tyd == Ity_I64 || tyd == Ity_I32
4131              || tyd == Ity_I16 || tyd == Ity_I8) {
4132             Int  szB = 0;
4133             HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
4134             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
4135             switch (tyd) {
4136                case Ity_I8:  szB = 1; break;
4137                case Ity_I16: szB = 2; break;
4138                case Ity_I32: szB = 4; break;
4139                case Ity_I64: szB = 8; break;
4140                default:      vassert(0);
4141             }
4142             addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
4143             addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
4144             addInstr(env, ARM64Instr_StrEX(szB));
4145          } else {
4146             goto stmt_fail;
4147          }
4148          /* now r0 is 1 if failed, 0 if success.  Change to IR
4149             conventions (0 is fail, 1 is success).  Also transfer
4150             result to r_res. */
4151          IRTemp    res   = stmt->Ist.LLSC.result;
4152          IRType    ty    = typeOfIRTemp(env->type_env, res);
4153          HReg      r_res = lookupIRTemp(env, res);
4154          ARM64RIL* one   = mb_mkARM64RIL_I(1);
4155          vassert(ty == Ity_I1);
4156          vassert(one);
4157          addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
4158                                         ARM64lo_XOR));
4159          /* And be conservative -- mask off all but the lowest bit. */
4160          addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
4161                                         ARM64lo_AND));
4162          return;
4163       }
4164       break;
4165    }
4166
4167    /* --------- ACAS --------- */
4168    case Ist_CAS: {
4169       if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4170          /* "normal" singleton CAS */
4171          UChar  sz;
4172          IRCAS* cas = stmt->Ist.CAS.details;
4173          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4174          switch (ty) {
4175             case Ity_I64: sz = 8; break;
4176             case Ity_I32: sz = 4; break;
4177             case Ity_I16: sz = 2; break;
4178             case Ity_I8:  sz = 1; break;
4179             default: goto unhandled_cas;
4180          }
4181          HReg rAddr = iselIntExpr_R(env, cas->addr);
4182          HReg rExpd = iselIntExpr_R(env, cas->expdLo);
4183          HReg rData = iselIntExpr_R(env, cas->dataLo);
4184          vassert(cas->expdHi == NULL);
4185          vassert(cas->dataHi == NULL);
4186          addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rAddr));
4187          addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd));
4188          addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData));
4189          addInstr(env, ARM64Instr_CAS(sz));
4190          /* Now we have the lowest szB bytes of x1 are either equal to
4191             the lowest szB bytes of x5, indicating success, or they
4192             aren't, indicating failure. */
4193          HReg rResult = hregARM64_X1();
4194          switch (sz) {
4195             case 8:  break;
4196             case 4:  rResult = widen_z_32_to_64(env, rResult); break;
4197             case 2:  rResult = widen_z_16_to_64(env, rResult); break;
4198             case 1:  rResult = widen_z_8_to_64(env, rResult); break;
4199             default: vassert(0);
4200          }
4201          // "old" in this case is interpreted somewhat liberally, per
4202          // the previous comment.
4203          HReg rOld = lookupIRTemp(env, cas->oldLo);
4204          addInstr(env, ARM64Instr_MovI(rOld, rResult));
4205          return;
4206       }
4207       else {
4208          /* Paired register CAS, i.e. CASP */
4209          UChar  sz;
4210          IRCAS* cas = stmt->Ist.CAS.details;
4211          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4212          switch (ty) {
4213             case Ity_I64: sz = 8; break;
4214             case Ity_I32: sz = 4; break;
4215             default: goto unhandled_cas;
4216          }
4217          HReg rAddr = iselIntExpr_R(env, cas->addr);
4218
4219          HReg rExpd0 = iselIntExpr_R(env, cas->expdLo);
4220          vassert(cas->expdHi != NULL);
4221          HReg rExpd1 = iselIntExpr_R(env, cas->expdHi);
4222
4223          HReg rData0 = iselIntExpr_R(env, cas->dataLo);
4224          vassert(cas->dataHi != NULL);
4225          HReg rData1 = iselIntExpr_R(env, cas->dataHi);
4226
4227          addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rAddr));
4228
4229          addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rExpd0));
4230          addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd1));
4231
4232          addInstr(env, ARM64Instr_MovI(hregARM64_X6(), rData0));
4233          addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData1));
4234
4235          addInstr(env, ARM64Instr_CASP(sz));
4236
4237          HReg rResult0 = hregARM64_X0();
4238          HReg rResult1 = hregARM64_X1();
4239          switch (sz) {
4240             case 8:  break;
4241             case 4:  rResult0 = widen_z_32_to_64(env, rResult0);
4242                      rResult1 = widen_z_32_to_64(env, rResult1);
4243                      break;
4244             default: vassert(0);
4245          }
4246          HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4247          HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4248          addInstr(env, ARM64Instr_MovI(rOldLo, rResult0));
4249          addInstr(env, ARM64Instr_MovI(rOldHi, rResult1));
4250          return;
4251       }
4252       unhandled_cas:
4253       break;
4254    }
4255
4256    /* --------- MEM FENCE --------- */
4257    case Ist_MBE:
4258       switch (stmt->Ist.MBE.event) {
4259          case Imbe_Fence:
4260             addInstr(env, ARM64Instr_MFence());
4261             return;
4262          case Imbe_CancelReservation:
4263             addInstr(env, ARM64Instr_ClrEX());
4264             return;
4265          default:
4266             break;
4267       }
4268       break;
4269
4270    /* --------- INSTR MARK --------- */
4271    /* Doesn't generate any executable code ... */
4272    case Ist_IMark:
4273        return;
4274
4275    /* --------- ABI HINT --------- */
4276    /* These have no meaning (denotation in the IR) and so we ignore
4277       them ... if any actually made it this far. */
4278    case Ist_AbiHint:
4279        return;
4280
4281    /* --------- NO-OP --------- */
4282    case Ist_NoOp:
4283        return;
4284
4285    /* --------- EXIT --------- */
4286    case Ist_Exit: {
4287       if (stmt->Ist.Exit.dst->tag != Ico_U64)
4288          vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
4289
4290       ARM64CondCode cc
4291          = iselCondCode_C(env, stmt->Ist.Exit.guard);
4292       ARM64AMode* amPC
4293          = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
4294
4295       /* Case: boring transfer to known address */
4296       if (stmt->Ist.Exit.jk == Ijk_Boring) {
4297          if (env->chainingAllowed) {
4298             /* .. almost always true .. */
4299             /* Skip the event check at the dst if this is a forwards
4300                edge. */
4301             Bool toFastEP
4302                = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4303             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4304             addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4305                                              amPC, cc, toFastEP));
4306          } else {
4307             /* .. very occasionally .. */
4308             /* We can't use chaining, so ask for an assisted transfer,
4309                as that's the only alternative that is allowable. */
4310             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4311             addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
4312          }
4313          return;
4314       }
4315
4316       /* Case: assisted transfer to arbitrary address */
4317       switch (stmt->Ist.Exit.jk) {
4318          /* Keep this list in sync with that for iselNext below */
4319          case Ijk_ClientReq:
4320          case Ijk_NoDecode:
4321          case Ijk_NoRedir:
4322          case Ijk_Sys_syscall:
4323          case Ijk_InvalICache:
4324          case Ijk_FlushDCache:
4325          case Ijk_SigTRAP:
4326          case Ijk_Yield: {
4327             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4328             addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
4329                                                stmt->Ist.Exit.jk));
4330             return;
4331          }
4332          default:
4333             break;
4334       }
4335
4336       /* Do we ever expect to see any other kind? */
4337       goto stmt_fail;
4338    }
4339
4340    default: break;
4341    }
4342   stmt_fail:
4343    ppIRStmt(stmt);
4344    vpanic("iselStmt");
4345 }
4346
4347
4348 /*---------------------------------------------------------*/
4349 /*--- ISEL: Basic block terminators (Nexts)             ---*/
4350 /*---------------------------------------------------------*/
4351
4352 static void iselNext ( ISelEnv* env,
4353                        IRExpr* next, IRJumpKind jk, Int offsIP )
4354 {
4355    if (vex_traceflags & VEX_TRACE_VCODE) {
4356       vex_printf( "\n-- PUT(%d) = ", offsIP);
4357       ppIRExpr( next );
4358       vex_printf( "; exit-");
4359       ppIRJumpKind(jk);
4360       vex_printf( "\n");
4361    }
4362
4363    /* Case: boring transfer to known address */
4364    if (next->tag == Iex_Const) {
4365       IRConst* cdst = next->Iex.Const.con;
4366       vassert(cdst->tag == Ico_U64);
4367       if (jk == Ijk_Boring || jk == Ijk_Call) {
4368          /* Boring transfer to known address */
4369          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4370          if (env->chainingAllowed) {
4371             /* .. almost always true .. */
4372             /* Skip the event check at the dst if this is a forwards
4373                edge. */
4374             Bool toFastEP
4375                = ((Addr64)cdst->Ico.U64) > env->max_ga;
4376             if (0) vex_printf("%s", toFastEP ? "X" : ".");
4377             addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
4378                                              amPC, ARM64cc_AL,
4379                                              toFastEP));
4380          } else {
4381             /* .. very occasionally .. */
4382             /* We can't use chaining, so ask for an assisted transfer,
4383                as that's the only alternative that is allowable. */
4384             HReg r = iselIntExpr_R(env, next);
4385             addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4386                                                Ijk_Boring));
4387          }
4388          return;
4389       }
4390    }
4391
4392    /* Case: call/return (==boring) transfer to any address */
4393    switch (jk) {
4394       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4395          HReg        r    = iselIntExpr_R(env, next);
4396          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4397          if (env->chainingAllowed) {
4398             addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
4399          } else {
4400             addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4401                                                Ijk_Boring));
4402          }
4403          return;
4404       }
4405       default:
4406          break;
4407    }
4408
4409    /* Case: assisted transfer to arbitrary address */
4410    switch (jk) {
4411       /* Keep this list in sync with that for Ist_Exit above */
4412       case Ijk_ClientReq:
4413       case Ijk_NoDecode:
4414       case Ijk_NoRedir:
4415       case Ijk_Sys_syscall:
4416       case Ijk_InvalICache:
4417       case Ijk_FlushDCache:
4418       case Ijk_SigTRAP:
4419       case Ijk_Yield:
4420       {
4421          HReg        r    = iselIntExpr_R(env, next);
4422          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4423          addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
4424          return;
4425       }
4426       default:
4427          break;
4428    }
4429
4430    vex_printf( "\n-- PUT(%d) = ", offsIP);
4431    ppIRExpr( next );
4432    vex_printf( "; exit-");
4433    ppIRJumpKind(jk);
4434    vex_printf( "\n");
4435    vassert(0); // are we expecting any other kind?
4436 }
4437
4438
4439 /*---------------------------------------------------------*/
4440 /*--- Insn selector top-level                           ---*/
4441 /*---------------------------------------------------------*/
4442
4443 /* Translate an entire SB to arm64 code. */
4444
4445 HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4446                             VexArch      arch_host,
4447                             const VexArchInfo* archinfo_host,
4448                             const VexAbiInfo*  vbi/*UNUSED*/,
4449                             Int offs_Host_EvC_Counter,
4450                             Int offs_Host_EvC_FailAddr,
4451                             Bool chainingAllowed,
4452                             Bool addProfInc,
4453                             Addr max_ga )
4454 {
4455    Int        i, j;
4456    HReg       hreg, hregHI;
4457    ISelEnv*   env;
4458    UInt       hwcaps_host = archinfo_host->hwcaps;
4459    ARM64AMode *amCounter, *amFailAddr;
4460
4461    /* sanity ... */
4462    vassert(arch_host == VexArchARM64);
4463
4464    /* Check that the host's endianness is as expected. */
4465    vassert(archinfo_host->endness == VexEndnessLE);
4466
4467    /* guard against unexpected space regressions */
4468    vassert(sizeof(ARM64Instr) <= 32);
4469
4470    /* Make up an initial environment to use. */
4471    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4472    env->vreg_ctr = 0;
4473
4474    /* Set up output code array. */
4475    env->code = newHInstrArray();
4476
4477    /* Copy BB's type env. */
4478    env->type_env = bb->tyenv;
4479
4480    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4481       change as we go along. */
4482    env->n_vregmap = bb->tyenv->types_used;
4483    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4484    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4485
4486    /* and finally ... */
4487    env->chainingAllowed = chainingAllowed;
4488    env->hwcaps          = hwcaps_host;
4489    env->previous_rm     = NULL;
4490    env->max_ga          = max_ga;
4491
4492    /* For each IR temporary, allocate a suitably-kinded virtual
4493       register. */
4494    j = 0;
4495    for (i = 0; i < env->n_vregmap; i++) {
4496       hregHI = hreg = INVALID_HREG;
4497       switch (bb->tyenv->types[i]) {
4498          case Ity_I1:
4499          case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4500             hreg = mkHReg(True, HRcInt64, 0, j++);
4501             break;
4502          case Ity_I128:
4503             hreg   = mkHReg(True, HRcInt64, 0, j++);
4504             hregHI = mkHReg(True, HRcInt64, 0, j++);
4505             break;
4506          case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4507          case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4508          case Ity_F64:
4509             hreg = mkHReg(True, HRcFlt64, 0, j++);
4510             break;
4511          case Ity_V128:
4512             hreg = mkHReg(True, HRcVec128, 0, j++);
4513             break;
4514          case Ity_V256:
4515             hreg   = mkHReg(True, HRcVec128, 0, j++);
4516             hregHI = mkHReg(True, HRcVec128, 0, j++);
4517             break;
4518          default:
4519             ppIRType(bb->tyenv->types[i]);
4520             vpanic("iselBB(arm64): IRTemp type");
4521       }
4522       env->vregmap[i]   = hreg;
4523       env->vregmapHI[i] = hregHI;
4524    }
4525    env->vreg_ctr = j;
4526
4527    /* The very first instruction must be an event check. */
4528    amCounter  = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4529    amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4530    addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4531
4532    /* Possibly a block counter increment (for profiling).  At this
4533       point we don't know the address of the counter, so just pretend
4534       it is zero.  It will have to be patched later, but before this
4535       translation is used, by a call to LibVEX_patchProfCtr. */
4536    if (addProfInc) {
4537       addInstr(env, ARM64Instr_ProfInc());
4538    }
4539
4540    /* Ok, finally we can iterate over the statements. */
4541    for (i = 0; i < bb->stmts_used; i++)
4542       iselStmt(env, bb->stmts[i]);
4543
4544    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4545
4546    /* record the number of vregs we used. */
4547    env->code->n_vregs = env->vreg_ctr;
4548    return env->code;
4549 }
4550
4551
4552 /*---------------------------------------------------------------*/
4553 /*--- end                                   host_arm64_isel.c ---*/
4554 /*---------------------------------------------------------------*/