VEX/priv/host_arm64_isel.c

   1
   2 /*---------------------------------------------------------------*/
   3 /*--- begin                                 host_arm64_isel.c ---*/
   4 /*---------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2013-2017 OpenWorks
  11       info@open-works.net
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27 */
  28
  29 #include "libvex_basictypes.h"
  30 #include "libvex_ir.h"
  31 #include "libvex.h"
  32 #include "ir_match.h"
  33
  34 #include "main_util.h"
  35 #include "main_globals.h"
  36 #include "host_generic_regs.h"
  37 #include "host_generic_simd64.h"  // for 32-bit SIMD helpers
  38 #include "host_arm64_defs.h"
  39
  40
  41 /*---------------------------------------------------------*/
  42 /*--- ISelEnv                                           ---*/
  43 /*---------------------------------------------------------*/
  44
  45 /* This carries around:
  46
  47    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
  48      might encounter.  This is computed before insn selection starts,
  49      and does not change.
  50
  51    - A mapping from IRTemp to HReg.  This tells the insn selector
  52      which virtual register is associated with each IRTemp temporary.
  53      This is computed before insn selection starts, and does not
  54      change.  We expect this mapping to map precisely the same set of
  55      IRTemps as the type mapping does.
  56
  57      |vregmap|   holds the primary register for the IRTemp.
  58      |vregmapHI| is only used for 128-bit integer-typed
  59                  IRTemps.  It holds the identity of a second
  60                  64-bit virtual HReg, which holds the high half
  61                  of the value.
  62
  63    - The code array, that is, the insns selected so far.
  64
  65    - A counter, for generating new virtual registers.
  66
  67    - The host hardware capabilities word.  This is set at the start
  68      and does not change.
  69
  70    - A Bool for indicating whether we may generate chain-me
  71      instructions for control flow transfers, or whether we must use
  72      XAssisted.
  73
  74    - The maximum guest address of any guest insn in this block.
  75      Actually, the address of the highest-addressed byte from any insn
  76      in this block.  Is set at the start and does not change.  This is
  77      used for detecting jumps which are definitely forward-edges from
  78      this block, and therefore can be made (chained) to the fast entry
  79      point of the destination, thereby avoiding the destination's
  80      event check.
  81
  82     - An IRExpr*, which may be NULL, holding the IR expression (an
  83       IRRoundingMode-encoded value) to which the FPU's rounding mode
  84       was most recently set.  Setting to NULL is always safe.  Used to
  85       avoid redundant settings of the FPU's rounding mode, as
  86       described in set_FPCR_rounding_mode below.
  87
  88    Note, this is all (well, mostly) host-independent.
  89 */
  90
  91 typedef
  92    struct {
  93       /* Constant -- are set at the start and do not change. */
  94       IRTypeEnv*   type_env;
  95
  96       HReg*        vregmap;
  97       HReg*        vregmapHI;
  98       Int          n_vregmap;
  99
 100       UInt         hwcaps;
 101
 102       Bool         chainingAllowed;
 103       Addr64       max_ga;
 104
 105       /* These are modified as we go along. */
 106       HInstrArray* code;
 107       Int          vreg_ctr;
 108
 109       IRExpr*      previous_rm;
 110    }
 111    ISelEnv;
 112
 113 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
 114 {
 115    vassert(tmp >= 0);
 116    vassert(tmp < env->n_vregmap);
 117    return env->vregmap[tmp];
 118 }
 119
 120 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
 121                                ISelEnv* env, IRTemp tmp )
 122 {
 123    vassert(tmp >= 0);
 124    vassert(tmp < env->n_vregmap);
 125    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
 126    *vrLO = env->vregmap[tmp];
 127    *vrHI = env->vregmapHI[tmp];
 128 }
 129
 130 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
 131 {
 132    addHInstr(env->code, instr);
 133    if (vex_traceflags & VEX_TRACE_VCODE) {
 134       ppARM64Instr(instr);
 135       vex_printf("\n");
 136    }
 137 }
 138
 139 static HReg newVRegI ( ISelEnv* env )
 140 {
 141    HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
 142    env->vreg_ctr++;
 143    return reg;
 144 }
 145
 146 static HReg newVRegD ( ISelEnv* env )
 147 {
 148    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
 149    env->vreg_ctr++;
 150    return reg;
 151 }
 152
 153 static HReg newVRegV ( ISelEnv* env )
 154 {
 155    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
 156    env->vreg_ctr++;
 157    return reg;
 158 }
 159
 160
 161 /*---------------------------------------------------------*/
 162 /*--- ISEL: Forward declarations                        ---*/
 163 /*---------------------------------------------------------*/
 164
 165 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
 166    iselXXX_wrk do the real work, but are not to be called directly.
 167    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
 168    checks that all returned registers are virtual.  You should not
 169    call the _wrk version directly.
 170
 171    Because some forms of ARM64 memory amodes are implicitly scaled by
 172    the access size, iselIntExpr_AMode takes an IRType which tells it
 173    the type of the access for which the amode is to be used.  This
 174    type needs to be correct, else you'll get incorrect code.
 175 */
 176 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
 177                                            IRExpr* e, IRType dty );
 178 static ARM64AMode* iselIntExpr_AMode     ( ISelEnv* env,
 179                                            IRExpr* e, IRType dty );
 180
 181 static ARM64RIA*   iselIntExpr_RIA_wrk   ( ISelEnv* env, IRExpr* e );
 182 static ARM64RIA*   iselIntExpr_RIA       ( ISelEnv* env, IRExpr* e );
 183
 184 static ARM64RIL*   iselIntExpr_RIL_wrk   ( ISelEnv* env, IRExpr* e );
 185 static ARM64RIL*   iselIntExpr_RIL       ( ISelEnv* env, IRExpr* e );
 186
 187 static ARM64RI6*   iselIntExpr_RI6_wrk   ( ISelEnv* env, IRExpr* e );
 188 static ARM64RI6*   iselIntExpr_RI6       ( ISelEnv* env, IRExpr* e );
 189
 190 static ARM64CondCode iselCondCode_wrk    ( ISelEnv* env, IRExpr* e );
 191 static ARM64CondCode iselCondCode        ( ISelEnv* env, IRExpr* e );
 192
 193 static HReg        iselIntExpr_R_wrk     ( ISelEnv* env, IRExpr* e );
 194 static HReg        iselIntExpr_R         ( ISelEnv* env, IRExpr* e );
 195
 196 static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, HReg* rLo,
 197                                            ISelEnv* env, IRExpr* e );
 198 static void        iselInt128Expr        ( /*OUT*/HReg* rHi, HReg* rLo,
 199                                            ISelEnv* env, IRExpr* e );
 200
 201 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
 202 static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
 203
 204 static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
 205 static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
 206
 207 static HReg        iselF16Expr_wrk        ( ISelEnv* env, IRExpr* e );
 208 static HReg        iselF16Expr            ( ISelEnv* env, IRExpr* e );
 209
 210 static HReg        iselV128Expr_wrk       ( ISelEnv* env, IRExpr* e );
 211 static HReg        iselV128Expr           ( ISelEnv* env, IRExpr* e );
 212
 213 static void        iselV256Expr_wrk       ( /*OUT*/HReg* rHi, HReg* rLo,
 214                                             ISelEnv* env, IRExpr* e );
 215 static void        iselV256Expr           ( /*OUT*/HReg* rHi, HReg* rLo,
 216                                             ISelEnv* env, IRExpr* e );
 217
 218 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
 219
 220
 221 /*---------------------------------------------------------*/
 222 /*--- ISEL: Misc helpers                                ---*/
 223 /*---------------------------------------------------------*/
 224
 225 /* Generate an amode suitable for a 64-bit sized access relative to
 226    the baseblock register (X21).  This generates an RI12 amode, which
 227    means its scaled by the access size, which is why the access size
 228    -- 64 bit -- is stated explicitly here.  Consequently |off| needs
 229    to be divisible by 8. */
 230 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
 231 {
 232    vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
 233    vassert((off & 7) == 0);  /* ditto */
 234    return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
 235 }
 236
 237 /* Ditto, for 32 bit accesses. */
 238 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
 239 {
 240    vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
 241    vassert((off & 3) == 0);  /* ditto */
 242    return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
 243 }
 244
 245 /* Ditto, for 16 bit accesses. */
 246 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
 247 {
 248    vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
 249    vassert((off & 1) == 0);  /* ditto */
 250    return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
 251 }
 252
 253 /* Ditto, for 8 bit accesses. */
 254 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
 255 {
 256    vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
 257    return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
 258 }
 259
 260 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
 261 {
 262    vassert(off < (1<<12));
 263    HReg r = newVRegI(env);
 264    addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
 265                                      ARM64RIA_I12(off,0), True/*isAdd*/));
 266    return r;
 267 }
 268
 269 static HReg get_baseblock_register ( void )
 270 {
 271    return hregARM64_X21();
 272 }
 273
 274 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
 275    a new register, and return the new register. */
 276 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
 277 {
 278    HReg      dst  = newVRegI(env);
 279    ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
 280    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
 281    return dst;
 282 }
 283
 284 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
 285    a new register, and return the new register. */
 286 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
 287 {
 288    HReg      dst = newVRegI(env);
 289    ARM64RI6* n48 = ARM64RI6_I6(48);
 290    addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
 291    addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
 292    return dst;
 293 }
 294
 295 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
 296    a new register, and return the new register. */
 297 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
 298 {
 299    HReg      dst  = newVRegI(env);
 300    ARM64RIL* mask = ARM64RIL_I13(1, 0, 15); /* encodes 0xFFFF */
 301    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
 302    return dst;
 303 }
 304
 305 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
 306    a new register, and return the new register. */
 307 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
 308 {
 309    HReg      dst = newVRegI(env);
 310    ARM64RI6* n32 = ARM64RI6_I6(32);
 311    addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
 312    addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
 313    return dst;
 314 }
 315
 316 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
 317    a new register, and return the new register. */
 318 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
 319 {
 320    HReg      dst = newVRegI(env);
 321    ARM64RI6* n56 = ARM64RI6_I6(56);
 322    addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
 323    addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
 324    return dst;
 325 }
 326
 327 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
 328 {
 329    HReg      dst  = newVRegI(env);
 330    ARM64RIL* mask = ARM64RIL_I13(1, 0, 7); /* encodes 0xFF */
 331    addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
 332    return dst;
 333 }
 334
 335 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
 336 static Bool isZeroU64 ( IRExpr* e ) {
 337    if (e->tag != Iex_Const) return False;
 338    IRConst* con = e->Iex.Const.con;
 339    vassert(con->tag == Ico_U64);
 340    return con->Ico.U64 == 0;
 341 }
 342
 343
 344 /*---------------------------------------------------------*/
 345 /*--- ISEL: FP rounding mode helpers                    ---*/
 346 /*---------------------------------------------------------*/
 347
 348 /* Set the FP rounding mode: 'mode' is an I32-typed expression
 349    denoting a value in the range 0 .. 3, indicating a round mode
 350    encoded as per type IRRoundingMode -- the first four values only
 351    (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO).  Set the ARM64
 352    FSCR to have the same rounding.
 353
 354    For speed & simplicity, we're setting the *entire* FPCR here.
 355
 356    Setting the rounding mode is expensive.  So this function tries to
 357    avoid repeatedly setting the rounding mode to the same thing by
 358    first comparing 'mode' to the 'mode' tree supplied in the previous
 359    call to this function, if any.  (The previous value is stored in
 360    env->previous_rm.)  If 'mode' is a single IR temporary 't' and
 361    env->previous_rm is also just 't', then the setting is skipped.
 362
 363    This is safe because of the SSA property of IR: an IR temporary can
 364    only be defined once and so will have the same value regardless of
 365    where it appears in the block.  Cool stuff, SSA.
 366
 367    A safety condition: all attempts to set the RM must be aware of
 368    this mechanism - by being routed through the functions here.
 369
 370    Of course this only helps if blocks where the RM is set more than
 371    once and it is set to the same value each time, *and* that value is
 372    held in the same IR temporary each time.  In order to assure the
 373    latter as much as possible, the IR optimiser takes care to do CSE
 374    on any block with any sign of floating point activity.
 375 */
 376 static
 377 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
 378 {
 379    vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
 380
 381    /* Do we need to do anything? */
 382    if (env->previous_rm
 383        && env->previous_rm->tag == Iex_RdTmp
 384        && mode->tag == Iex_RdTmp
 385        && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
 386       /* no - setting it to what it was before.  */
 387       vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
 388       return;
 389    }
 390
 391    /* No luck - we better set it, and remember what we set it to. */
 392    env->previous_rm = mode;
 393
 394    /* Only supporting the rounding-mode bits - the rest of FPCR is set
 395       to zero - so we can set the whole register at once (faster). */
 396
 397    /* This isn't simple, because 'mode' carries an IR rounding
 398       encoding, and we need to translate that to an ARM64 FP one:
 399       The IR encoding:
 400          00  to nearest (the default)
 401          10  to +infinity
 402          01  to -infinity
 403          11  to zero
 404       The ARM64 FP encoding:
 405          00  to nearest
 406          01  to +infinity
 407          10  to -infinity
 408          11  to zero
 409       Easy enough to do; just swap the two bits.
 410    */
 411    HReg irrm = iselIntExpr_R(env, mode);
 412    HReg tL   = newVRegI(env);
 413    HReg tR   = newVRegI(env);
 414    HReg t3   = newVRegI(env);
 415    /* tL = irrm << 1;
 416       tR = irrm >> 1;  if we're lucky, these will issue together
 417       tL &= 2;
 418       tR &= 1;         ditto
 419       t3 = tL | tR;
 420       t3 <<= 22;
 421       fmxr fpscr, t3
 422    */
 423    ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
 424    ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
 425    vassert(ril_one && ril_two);
 426    addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
 427    addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
 428    addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
 429    addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
 430    addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
 431    addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
 432    addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
 433 }
 434
 435
 436 /*---------------------------------------------------------*/
 437 /*--- ISEL: Function call helpers                       ---*/
 438 /*---------------------------------------------------------*/
 439
 440 /* Used only in doHelperCall.  See big comment in doHelperCall re
 441    handling of register-parameter args.  This function figures out
 442    whether evaluation of an expression might require use of a fixed
 443    register.  If in doubt return True (safe but suboptimal).
 444 */
 445 static
 446 Bool mightRequireFixedRegs ( IRExpr* e )
 447 {
 448    if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
 449       // These are always "safe" -- either a copy of SP in some
 450       // arbitrary vreg, or a copy of x21, respectively.
 451       return False;
 452    }
 453    /* Else it's a "normal" expression. */
 454    switch (e->tag) {
 455       case Iex_RdTmp: case Iex_Const: case Iex_Get:
 456          return False;
 457       default:
 458          return True;
 459    }
 460 }
 461
 462
 463 /* Do a complete function call.  |guard| is a Ity_Bit expression
 464    indicating whether or not the call happens.  If guard==NULL, the
 465    call is unconditional.  |retloc| is set to indicate where the
 466    return value is after the call.  The caller (of this fn) must
 467    generate code to add |stackAdjustAfterCall| to the stack pointer
 468    after the call is done.  Returns True iff it managed to handle this
 469    combination of arg/return types, else returns False. */
 470
 471 static
 472 Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
 473                     /*OUT*/RetLoc* retloc,
 474                     ISelEnv* env,
 475                     IRExpr* guard,
 476                     IRCallee* cee, IRType retTy, IRExpr** args )
 477 {
 478    ARM64CondCode cc;
 479    HReg          argregs[ARM64_N_ARGREGS];
 480    HReg          tmpregs[ARM64_N_ARGREGS];
 481    Bool          go_fast;
 482    Int           n_args, i, nextArgReg;
 483    Addr64        target;
 484
 485    vassert(ARM64_N_ARGREGS == 8);
 486
 487    /* Set default returns.  We'll update them later if needed. */
 488    *stackAdjustAfterCall = 0;
 489    *retloc               = mk_RetLoc_INVALID();
 490
 491    /* These are used for cross-checking that IR-level constraints on
 492       the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
 493    UInt nVECRETs = 0;
 494    UInt nGSPTRs  = 0;
 495
 496    /* Marshal args for a call and do the call.
 497
 498       This function only deals with a tiny set of possibilities, which
 499       cover all helpers in practice.  The restrictions are that only
 500       arguments in registers are supported, hence only
 501       ARM64_N_REGPARMS x 64 integer bits in total can be passed.  In
 502       fact the only supported arg type is I64.
 503
 504       The return type can be I{64,32} or V128.  In the V128 case, it
 505       is expected that |args| will contain the special node
 506       IRExpr_VECRET(), in which case this routine generates code to
 507       allocate space on the stack for the vector return value.  Since
 508       we are not passing any scalars on the stack, it is enough to
 509       preallocate the return space before marshalling any arguments,
 510       in this case.
 511
 512       |args| may also contain IRExpr_GSPTR(), in which case the
 513       value in x21 is passed as the corresponding argument.
 514
 515       Generating code which is both efficient and correct when
 516       parameters are to be passed in registers is difficult, for the
 517       reasons elaborated in detail in comments attached to
 518       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
 519       of the method described in those comments.
 520
 521       The problem is split into two cases: the fast scheme and the
 522       slow scheme.  In the fast scheme, arguments are computed
 523       directly into the target (real) registers.  This is only safe
 524       when we can be sure that computation of each argument will not
 525       trash any real registers set by computation of any other
 526       argument.
 527
 528       In the slow scheme, all args are first computed into vregs, and
 529       once they are all done, they are moved to the relevant real
 530       regs.  This always gives correct code, but it also gives a bunch
 531       of vreg-to-rreg moves which are usually redundant but are hard
 532       for the register allocator to get rid of.
 533
 534       To decide which scheme to use, all argument expressions are
 535       first examined.  If they are all so simple that it is clear they
 536       will be evaluated without use of any fixed registers, use the
 537       fast scheme, else use the slow scheme.  Note also that only
 538       unconditional calls may use the fast scheme, since having to
 539       compute a condition expression could itself trash real
 540       registers.
 541
 542       Note this requires being able to examine an expression and
 543       determine whether or not evaluation of it might use a fixed
 544       register.  That requires knowledge of how the rest of this insn
 545       selector works.  Currently just the following 3 are regarded as
 546       safe -- hopefully they cover the majority of arguments in
 547       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
 548    */
 549
 550    /* Note that the cee->regparms field is meaningless on ARM64 hosts
 551       (since there is only one calling convention) and so we always
 552       ignore it. */
 553
 554    n_args = 0;
 555    for (i = 0; args[i]; i++) {
 556       IRExpr* arg = args[i];
 557       if (UNLIKELY(arg->tag == Iex_VECRET)) {
 558          nVECRETs++;
 559       } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 560          nGSPTRs++;
 561       }
 562       n_args++;
 563    }
 564
 565    /* If this fails, the IR is ill-formed */
 566    vassert(nGSPTRs == 0 || nGSPTRs == 1);
 567
 568    /* If we have a VECRET, allocate space on the stack for the return
 569       value, and record the stack pointer after that. */
 570    HReg r_vecRetAddr = INVALID_HREG;
 571    if (nVECRETs == 1) {
 572       vassert(retTy == Ity_V128 || retTy == Ity_V256);
 573       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
 574       r_vecRetAddr = newVRegI(env);
 575       addInstr(env, ARM64Instr_AddToSP(-16));
 576       addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
 577    } else {
 578       // If either of these fail, the IR is ill-formed
 579       vassert(retTy != Ity_V128 && retTy != Ity_V256);
 580       vassert(nVECRETs == 0);
 581    }
 582
 583    argregs[0] = hregARM64_X0();
 584    argregs[1] = hregARM64_X1();
 585    argregs[2] = hregARM64_X2();
 586    argregs[3] = hregARM64_X3();
 587    argregs[4] = hregARM64_X4();
 588    argregs[5] = hregARM64_X5();
 589    argregs[6] = hregARM64_X6();
 590    argregs[7] = hregARM64_X7();
 591
 592    tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
 593    tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
 594
 595    /* First decide which scheme (slow or fast) is to be used.  First
 596       assume the fast scheme, and select slow if any contraindications
 597       (wow) appear. */
 598
 599    go_fast = True;
 600
 601    if (guard) {
 602       if (guard->tag == Iex_Const
 603           && guard->Iex.Const.con->tag == Ico_U1
 604           && guard->Iex.Const.con->Ico.U1 == True) {
 605          /* unconditional */
 606       } else {
 607          /* Not manifestly unconditional -- be conservative. */
 608          go_fast = False;
 609       }
 610    }
 611
 612    if (go_fast) {
 613       for (i = 0; i < n_args; i++) {
 614          if (mightRequireFixedRegs(args[i])) {
 615             go_fast = False;
 616             break;
 617          }
 618       }
 619    }
 620
 621    if (go_fast) {
 622       if (retTy == Ity_V128 || retTy == Ity_V256)
 623          go_fast = False;
 624    }
 625
 626    /* At this point the scheme to use has been established.  Generate
 627       code to get the arg values into the argument rregs.  If we run
 628       out of arg regs, give up. */
 629
 630    if (go_fast) {
 631
 632       /* FAST SCHEME */
 633       nextArgReg = 0;
 634
 635       for (i = 0; i < n_args; i++) {
 636          IRExpr* arg = args[i];
 637
 638          IRType  aTy = Ity_INVALID;
 639          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
 640             aTy = typeOfIRExpr(env->type_env, args[i]);
 641
 642          if (nextArgReg >= ARM64_N_ARGREGS)
 643             return False; /* out of argregs */
 644
 645          if (aTy == Ity_I64) {
 646             addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
 647                                            iselIntExpr_R(env, args[i]) ));
 648             nextArgReg++;
 649          }
 650          else if (arg->tag == Iex_GSPTR) {
 651             vassert(0); //ATC
 652             addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
 653                                            hregARM64_X21() ));
 654             nextArgReg++;
 655          }
 656          else if (arg->tag == Iex_VECRET) {
 657             // because of the go_fast logic above, we can't get here,
 658             // since vector return values makes us use the slow path
 659             // instead.
 660             vassert(0);
 661          }
 662          else
 663             return False; /* unhandled arg type */
 664       }
 665
 666       /* Fast scheme only applies for unconditional calls.  Hence: */
 667       cc = ARM64cc_AL;
 668
 669    } else {
 670
 671       /* SLOW SCHEME; move via temporaries */
 672       nextArgReg = 0;
 673
 674       for (i = 0; i < n_args; i++) {
 675          IRExpr* arg = args[i];
 676
 677          IRType  aTy = Ity_INVALID;
 678          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
 679             aTy = typeOfIRExpr(env->type_env, args[i]);
 680
 681          if (nextArgReg >= ARM64_N_ARGREGS)
 682             return False; /* out of argregs */
 683
 684          if (aTy == Ity_I64) {
 685             tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
 686             nextArgReg++;
 687          }
 688          else if (arg->tag == Iex_GSPTR) {
 689             vassert(0); //ATC
 690             tmpregs[nextArgReg] = hregARM64_X21();
 691             nextArgReg++;
 692          }
 693          else if (arg->tag == Iex_VECRET) {
 694             vassert(!hregIsInvalid(r_vecRetAddr));
 695             tmpregs[nextArgReg] = r_vecRetAddr;
 696             nextArgReg++;
 697          }
 698          else
 699             return False; /* unhandled arg type */
 700       }
 701
 702       /* Now we can compute the condition.  We can't do it earlier
 703          because the argument computations could trash the condition
 704          codes.  Be a bit clever to handle the common case where the
 705          guard is 1:Bit. */
 706       cc = ARM64cc_AL;
 707       if (guard) {
 708          if (guard->tag == Iex_Const
 709              && guard->Iex.Const.con->tag == Ico_U1
 710              && guard->Iex.Const.con->Ico.U1 == True) {
 711             /* unconditional -- do nothing */
 712          } else {
 713             cc = iselCondCode( env, guard );
 714          }
 715       }
 716
 717       /* Move the args to their final destinations. */
 718       for (i = 0; i < nextArgReg; i++) {
 719          vassert(!(hregIsInvalid(tmpregs[i])));
 720          /* None of these insns, including any spill code that might
 721             be generated, may alter the condition codes. */
 722          addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
 723       }
 724
 725    }
 726
 727    /* Should be assured by checks above */
 728    vassert(nextArgReg <= ARM64_N_ARGREGS);
 729
 730    /* Do final checks, set the return values, and generate the call
 731       instruction proper. */
 732    vassert(nGSPTRs == 0 || nGSPTRs == 1);
 733    vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
 734    vassert(*stackAdjustAfterCall == 0);
 735    vassert(is_RetLoc_INVALID(*retloc));
 736    switch (retTy) {
 737       case Ity_INVALID:
 738          /* Function doesn't return a value. */
 739          *retloc = mk_RetLoc_simple(RLPri_None);
 740          break;
 741       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
 742          *retloc = mk_RetLoc_simple(RLPri_Int);
 743          break;
 744       case Ity_V128:
 745          *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
 746          *stackAdjustAfterCall = 16;
 747          break;
 748       case Ity_V256:
 749          vassert(0); // ATC
 750          *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
 751          *stackAdjustAfterCall = 32;
 752          break;
 753       default:
 754          /* IR can denote other possible return types, but we don't
 755             handle those here. */
 756          vassert(0);
 757    }
 758
 759    /* Finally, generate the call itself.  This needs the *retloc value
 760       set in the switch above, which is why it's at the end. */
 761
 762    /* nextArgReg doles out argument registers.  Since these are
 763       assigned in the order x0 .. x7, its numeric value at this point,
 764       which must be between 0 and 8 inclusive, is going to be equal to
 765       the number of arg regs in use for the call.  Hence bake that
 766       number into the call (we'll need to know it when doing register
 767       allocation, to know what regs the call reads.) */
 768
 769    target = (Addr)cee->addr;
 770    addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
 771
 772    return True; /* success */
 773 }
 774
 775
 776 /*---------------------------------------------------------*/
 777 /*--- ISEL: Integer expressions (64/32 bit)             ---*/
 778 /*---------------------------------------------------------*/
 779
 780 /* Select insns for an integer-typed expression, and add them to the
 781    code list.  Return a reg holding the result.  This reg will be a
 782    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
 783    want to modify it, ask for a new vreg, copy it in there, and modify
 784    the copy.  The register allocator will do its best to map both
 785    vregs to the same real register, so the copies will often disappear
 786    later in the game.
 787
 788    This should handle expressions of 64- and 32-bit type.  All results
 789    are returned in a 64-bit register.  For 32-bit expressions, the
 790    upper 32 bits are arbitrary, so you should mask or sign extend
 791    partial values if necessary.
 792 */
 793
 794 /* --------------------- AMode --------------------- */
 795
 796 /* Return an AMode which computes the value of the specified
 797    expression, possibly also adding insns to the code list as a
 798    result.  The expression may only be a 64-bit one.
 799 */
 800
 801 static Bool isValidScale ( UChar scale )
 802 {
 803    switch (scale) {
 804       case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
 805       default: return False;
 806    }
 807 }
 808
 809 static Bool sane_AMode ( ARM64AMode* am )
 810 {
 811    switch (am->tag) {
 812       case ARM64am_RI9:
 813          return
 814             toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
 815                     && (hregIsVirtual(am->ARM64am.RI9.reg)
 816                         /* || sameHReg(am->ARM64am.RI9.reg,
 817                                        hregARM64_X21()) */ )
 818                     && am->ARM64am.RI9.simm9 >= -256
 819                     && am->ARM64am.RI9.simm9 <= 255 );
 820       case ARM64am_RI12:
 821          return
 822             toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
 823                     && (hregIsVirtual(am->ARM64am.RI12.reg)
 824                         /* || sameHReg(am->ARM64am.RI12.reg,
 825                                        hregARM64_X21()) */ )
 826                     && am->ARM64am.RI12.uimm12 < 4096
 827                     && isValidScale(am->ARM64am.RI12.szB) );
 828       case ARM64am_RR:
 829          return
 830             toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
 831                     && hregIsVirtual(am->ARM64am.RR.base)
 832                     && hregClass(am->ARM64am.RR.index) == HRcInt64
 833                     && hregIsVirtual(am->ARM64am.RR.index) );
 834       default:
 835          vpanic("sane_AMode: unknown ARM64 AMode1 tag");
 836    }
 837 }
 838
 839 static
 840 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
 841 {
 842    ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
 843    vassert(sane_AMode(am));
 844    return am;
 845 }
 846
 847 static
 848 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
 849 {
 850    IRType ty = typeOfIRExpr(env->type_env,e);
 851    vassert(ty == Ity_I64);
 852
 853    ULong szBbits = 0;
 854    switch (dty) {
 855       case Ity_I64: szBbits = 3; break;
 856       case Ity_I32: szBbits = 2; break;
 857       case Ity_I16: szBbits = 1; break;
 858       case Ity_I8:  szBbits = 0; break;
 859       default: vassert(0);
 860    }
 861
 862    /* {Add64,Sub64}(expr,simm9).  We don't care about |dty| here since
 863       we're going to create an amode suitable for LDU* or STU*
 864       instructions, which use unscaled immediate offsets.  */
 865    if (e->tag == Iex_Binop
 866        && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
 867        && e->Iex.Binop.arg2->tag == Iex_Const
 868        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
 869       Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
 870       if (simm >= -255 && simm <= 255) {
 871          /* Although the gating condition might seem to be
 872                simm >= -256 && simm <= 255
 873             we will need to negate simm in the case where the op is Sub64.
 874             Hence limit the lower value to -255 in order that its negation
 875             is representable. */
 876          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
 877          if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
 878          return ARM64AMode_RI9(reg, (Int)simm);
 879       }
 880    }
 881
 882    /* Add64(expr, uimm12 * transfer-size) */
 883    if (e->tag == Iex_Binop
 884        && e->Iex.Binop.op == Iop_Add64
 885        && e->Iex.Binop.arg2->tag == Iex_Const
 886        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
 887       ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
 888       ULong szB  = 1 << szBbits;
 889       if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
 890           && (uimm >> szBbits) < 4096) {
 891          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
 892          return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
 893       }
 894    }
 895
 896    /* Add64(expr1, expr2) */
 897    if (e->tag == Iex_Binop
 898        && e->Iex.Binop.op == Iop_Add64) {
 899       HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
 900       HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
 901       return ARM64AMode_RR(reg1, reg2);
 902    }
 903
 904    /* Doesn't match anything in particular.  Generate it into
 905       a register and use that. */
 906    HReg reg = iselIntExpr_R(env, e);
 907    return ARM64AMode_RI9(reg, 0);
 908 }
 909
 910
 911 /* --------------------- RIA --------------------- */
 912
 913 /* Select instructions to generate 'e' into a RIA. */
 914
 915 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
 916 {
 917    ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
 918    /* sanity checks ... */
 919    switch (ri->tag) {
 920       case ARM64riA_I12:
 921          vassert(ri->ARM64riA.I12.imm12 < 4096);
 922          vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
 923          return ri;
 924       case ARM64riA_R:
 925          vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
 926          vassert(hregIsVirtual(ri->ARM64riA.R.reg));
 927          return ri;
 928       default:
 929          vpanic("iselIntExpr_RIA: unknown arm RIA tag");
 930    }
 931 }
 932
 933 /* DO NOT CALL THIS DIRECTLY ! */
 934 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
 935 {
 936    IRType ty = typeOfIRExpr(env->type_env,e);
 937    vassert(ty == Ity_I64 || ty == Ity_I32);
 938
 939    /* special case: immediate */
 940    if (e->tag == Iex_Const) {
 941       ULong u = 0xF000000ULL; /* invalid */
 942       switch (e->Iex.Const.con->tag) {
 943          case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
 944          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
 945          default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
 946       }
 947       if (0 == (u & ~(0xFFFULL << 0)))
 948          return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
 949       if (0 == (u & ~(0xFFFULL << 12)))
 950          return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
 951       /* else fail, fall through to default case */
 952    }
 953
 954    /* default case: calculate into a register and return that */
 955    {
 956       HReg r = iselIntExpr_R ( env, e );
 957       return ARM64RIA_R(r);
 958    }
 959 }
 960
 961
 962 /* --------------------- RIL --------------------- */
 963
 964 /* Select instructions to generate 'e' into a RIL.  At this point we
 965    have to deal with the strange bitfield-immediate encoding for logic
 966    instructions. */
 967
 968
 969 // The following four functions
 970 //    CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
 971 // are copied, with modifications, from
 972 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
 973 // which has the following copyright notice:
 974 /*
 975    Copyright 2013, ARM Limited
 976    All rights reserved.
 977
 978    Redistribution and use in source and binary forms, with or without
 979    modification, are permitted provided that the following conditions are met:
 980
 981    * Redistributions of source code must retain the above copyright notice,
 982      this list of conditions and the following disclaimer.
 983    * Redistributions in binary form must reproduce the above copyright notice,
 984      this list of conditions and the following disclaimer in the documentation
 985      and/or other materials provided with the distribution.
 986    * Neither the name of ARM Limited nor the names of its contributors may be
 987      used to endorse or promote products derived from this software without
 988      specific prior written permission.
 989
 990    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 991    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 992    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 993    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 994    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 995    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 996    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 997    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 998    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 999    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1000 */
1001
1002 static Int CountLeadingZeros(ULong value, Int width)
1003 {
1004    vassert(width == 32 || width == 64);
1005    Int count = 0;
1006    ULong bit_test = 1ULL << (width - 1);
1007    while ((count < width) && ((bit_test & value) == 0)) {
1008       count++;
1009       bit_test >>= 1;
1010    }
1011    return count;
1012 }
1013
1014 static Int CountTrailingZeros(ULong value, Int width)
1015 {
1016    vassert(width == 32 || width == 64);
1017    Int count = 0;
1018    while ((count < width) && (((value >> count) & 1) == 0)) {
1019       count++;
1020    }
1021    return count;
1022 }
1023
1024 static Int CountSetBits(ULong value, Int width)
1025 {
1026    // TODO: Other widths could be added here, as the implementation already
1027    // supports them.
1028    vassert(width == 32 || width == 64);
1029
1030    // Mask out unused bits to ensure that they are not counted.
1031    value &= (0xffffffffffffffffULL >> (64-width));
1032
1033    // Add up the set bits.
1034    // The algorithm works by adding pairs of bit fields together iteratively,
1035    // where the size of each bit field doubles each time.
1036    // An example for an 8-bit value:
1037    // Bits: h g f e d c b a
1038    // \ | \ | \ | \ |
1039    // value = h+g f+e d+c b+a
1040    // \ | \ |
1041    // value = h+g+f+e d+c+b+a
1042    // \ |
1043    // value = h+g+f+e+d+c+b+a
1044    value = ((value >>  1) & 0x5555555555555555ULL)
1045                  + (value & 0x5555555555555555ULL);
1046    value = ((value >>  2) & 0x3333333333333333ULL)
1047                  + (value & 0x3333333333333333ULL);
1048    value = ((value >>  4) & 0x0f0f0f0f0f0f0f0fULL)
1049                  + (value & 0x0f0f0f0f0f0f0f0fULL);
1050    value = ((value >>  8) & 0x00ff00ff00ff00ffULL)
1051                  + (value & 0x00ff00ff00ff00ffULL);
1052    value = ((value >> 16) & 0x0000ffff0000ffffULL)
1053                  + (value & 0x0000ffff0000ffffULL);
1054    value = ((value >> 32) & 0x00000000ffffffffULL)
1055                  + (value & 0x00000000ffffffffULL);
1056
1057    return value;
1058 }
1059
1060 static Bool isImmLogical ( /*OUT*/UInt* n,
1061                            /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1062                            ULong value, UInt width )
1063 {
1064   // Test if a given value can be encoded in the immediate field of a
1065   // logical instruction.
1066
1067   // If it can be encoded, the function returns true, and values
1068   // pointed to by n, imm_s and imm_r are updated with immediates
1069   // encoded in the format required by the corresponding fields in the
1070   // logical instruction.  If it can not be encoded, the function
1071   // returns false, and the values pointed to by n, imm_s and imm_r
1072   // are undefined.
1073   vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1074   vassert(width == 32 || width == 64);
1075
1076   // Logical immediates are encoded using parameters n, imm_s and imm_r using
1077   // the following table:
1078   //
1079   // N imms immr size S R
1080   // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1081   // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1082   // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1083   // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1084   // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1085   // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1086   // (s bits must not be all set)
1087   //
1088   // A pattern is constructed of size bits, where the least significant S+1
1089   // bits are set. The pattern is rotated right by R, and repeated across a
1090   // 32 or 64-bit value, depending on destination register width.
1091   //
1092   // To test if an arbitrary immediate can be encoded using this scheme, an
1093   // iterative algorithm is used.
1094   //
1095   // TODO: This code does not consider using X/W register overlap to support
1096   // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1097   // are an encodable logical immediate.
1098
1099   // 1. If the value has all set or all clear bits, it can't be encoded.
1100   if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1101       ((width == 32) && (value == 0xffffffff))) {
1102     return False;
1103   }
1104
1105   UInt lead_zero = CountLeadingZeros(value, width);
1106   UInt lead_one = CountLeadingZeros(~value, width);
1107   UInt trail_zero = CountTrailingZeros(value, width);
1108   UInt trail_one = CountTrailingZeros(~value, width);
1109   UInt set_bits = CountSetBits(value, width);
1110
1111   // The fixed bits in the immediate s field.
1112   // If width == 64 (X reg), start at 0xFFFFFF80.
1113   // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1114   // widths won't be executed.
1115   Int imm_s_fixed = (width == 64) ? -128 : -64;
1116   Int imm_s_mask = 0x3F;
1117
1118   for (;;) {
1119     // 2. If the value is two bits wide, it can be encoded.
1120     if (width == 2) {
1121       *n = 0;
1122       *imm_s = 0x3C;
1123       *imm_r = (value & 3) - 1;
1124       return True;
1125     }
1126
1127     *n = (width == 64) ? 1 : 0;
1128     *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1129     if ((lead_zero + set_bits) == width) {
1130       *imm_r = 0;
1131     } else {
1132       *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1133     }
1134
1135     // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1136     // the bit width of the value, it can be encoded.
1137     if (lead_zero + trail_zero + set_bits == width) {
1138       return True;
1139     }
1140
1141     // 4. If the sum of leading ones, trailing ones and unset bits in the
1142     // value is equal to the bit width of the value, it can be encoded.
1143     if (lead_one + trail_one + (width - set_bits) == width) {
1144       return True;
1145     }
1146
1147     // 5. If the most-significant half of the bitwise value is equal to the
1148     // least-significant half, return to step 2 using the least-significant
1149     // half of the value.
1150     ULong mask = (1ULL << (width >> 1)) - 1;
1151     if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1152       width >>= 1;
1153       set_bits >>= 1;
1154       imm_s_fixed >>= 1;
1155       continue;
1156     }
1157
1158     // 6. Otherwise, the value can't be encoded.
1159     return False;
1160   }
1161 }
1162
1163
1164 /* Create a RIL for the given immediate, if it is representable, or
1165    return NULL if not. */
1166
1167 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1168 {
1169    UInt n = 0, imm_s = 0, imm_r = 0;
1170    Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1171    if (!ok) return NULL;
1172    vassert(n < 2 && imm_s < 64 && imm_r < 64);
1173    return ARM64RIL_I13(n, imm_r, imm_s);
1174 }
1175
1176 /* So, finally .. */
1177
1178 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1179 {
1180    ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1181    /* sanity checks ... */
1182    switch (ri->tag) {
1183       case ARM64riL_I13:
1184          vassert(ri->ARM64riL.I13.bitN < 2);
1185          vassert(ri->ARM64riL.I13.immR < 64);
1186          vassert(ri->ARM64riL.I13.immS < 64);
1187          return ri;
1188       case ARM64riL_R:
1189          vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1190          vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1191          return ri;
1192       default:
1193          vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1194    }
1195 }
1196
1197 /* DO NOT CALL THIS DIRECTLY ! */
1198 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1199 {
1200    IRType ty = typeOfIRExpr(env->type_env,e);
1201    vassert(ty == Ity_I64 || ty == Ity_I32);
1202
1203    /* special case: immediate */
1204    if (e->tag == Iex_Const) {
1205       ARM64RIL* maybe = NULL;
1206       if (ty == Ity_I64) {
1207          vassert(e->Iex.Const.con->tag == Ico_U64);
1208          maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1209       } else {
1210          vassert(ty == Ity_I32);
1211          vassert(e->Iex.Const.con->tag == Ico_U32);
1212          UInt  u32 = e->Iex.Const.con->Ico.U32;
1213          ULong u64 = (ULong)u32;
1214          /* First try with 32 leading zeroes. */
1215          maybe = mb_mkARM64RIL_I(u64);
1216          /* If that doesn't work, try with 2 copies, since it doesn't
1217             matter what winds up in the upper 32 bits. */
1218          if (!maybe) {
1219             maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1220          }
1221       }
1222       if (maybe) return maybe;
1223       /* else fail, fall through to default case */
1224    }
1225
1226    /* default case: calculate into a register and return that */
1227    {
1228       HReg r = iselIntExpr_R ( env, e );
1229       return ARM64RIL_R(r);
1230    }
1231 }
1232
1233
1234 /* --------------------- RI6 --------------------- */
1235
1236 /* Select instructions to generate 'e' into a RI6. */
1237
1238 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1239 {
1240    ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1241    /* sanity checks ... */
1242    switch (ri->tag) {
1243       case ARM64ri6_I6:
1244          vassert(ri->ARM64ri6.I6.imm6 < 64);
1245          vassert(ri->ARM64ri6.I6.imm6 > 0);
1246          return ri;
1247       case ARM64ri6_R:
1248          vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1249          vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1250          return ri;
1251       default:
1252          vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1253    }
1254 }
1255
1256 /* DO NOT CALL THIS DIRECTLY ! */
1257 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1258 {
1259    IRType ty = typeOfIRExpr(env->type_env,e);
1260    vassert(ty == Ity_I64 || ty == Ity_I8);
1261
1262    /* special case: immediate */
1263    if (e->tag == Iex_Const) {
1264       switch (e->Iex.Const.con->tag) {
1265          case Ico_U8: {
1266             UInt u = e->Iex.Const.con->Ico.U8;
1267             if (u > 0 && u < 64)
1268               return ARM64RI6_I6(u);
1269             break;
1270          default:
1271             break;
1272          }
1273       }
1274       /* else fail, fall through to default case */
1275    }
1276
1277    /* default case: calculate into a register and return that */
1278    {
1279       HReg r = iselIntExpr_R ( env, e );
1280       return ARM64RI6_R(r);
1281    }
1282 }
1283
1284
1285 /* ------------------- CondCode ------------------- */
1286
1287 /* Generate code to evaluated a bit-typed expression, returning the
1288    condition code which would correspond when the expression would
1289    notionally have returned 1. */
1290
1291 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1292 {
1293    ARM64CondCode cc = iselCondCode_wrk(env,e);
1294    vassert(cc != ARM64cc_NV);
1295    return cc;
1296 }
1297
1298 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1299 {
1300    vassert(e);
1301    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1302
1303    /* var */
1304    if (e->tag == Iex_RdTmp) {
1305       HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1306       /* Cmp doesn't modify rTmp; so this is OK. */
1307       ARM64RIL* one = mb_mkARM64RIL_I(1);
1308       vassert(one);
1309       addInstr(env, ARM64Instr_Test(rTmp, one));
1310       return ARM64cc_NE;
1311    }
1312
1313    /* Constant 1:Bit */
1314    if (e->tag == Iex_Const) {
1315       /* This is a very stupid translation.  Hopefully it doesn't occur much,
1316          if ever. */
1317       vassert(e->Iex.Const.con->tag == Ico_U1);
1318       vassert(e->Iex.Const.con->Ico.U1 == True
1319               || e->Iex.Const.con->Ico.U1 == False);
1320       HReg rTmp = newVRegI(env);
1321       addInstr(env, ARM64Instr_Imm64(rTmp, 0));
1322       ARM64RIL* one = mb_mkARM64RIL_I(1);
1323       vassert(one);
1324       addInstr(env, ARM64Instr_Test(rTmp, one));
1325       return e->Iex.Const.con->Ico.U1 ? ARM64cc_EQ : ARM64cc_NE;
1326    }
1327
1328    /* Not1(e) */
1329    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1330       /* Generate code for the arg, and negate the test condition */
1331       ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1332       if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1333         return ARM64cc_AL;
1334       } else {
1335         return 1 ^ cc;
1336       }
1337    }
1338
1339    /* --- patterns rooted at: 64to1 --- */
1340
1341    if (e->tag == Iex_Unop
1342        && e->Iex.Unop.op == Iop_64to1) {
1343       HReg      rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1344       ARM64RIL* one  = mb_mkARM64RIL_I(1);
1345       vassert(one); /* '1' must be representable */
1346       addInstr(env, ARM64Instr_Test(rTmp, one));
1347       return ARM64cc_NE;
1348    }
1349
1350    /* --- patterns rooted at: CmpNEZ8 --- */
1351
1352    if (e->tag == Iex_Unop
1353        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1354       HReg      r1  = iselIntExpr_R(env, e->Iex.Unop.arg);
1355       ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1356       addInstr(env, ARM64Instr_Test(r1, xFF));
1357       return ARM64cc_NE;
1358    }
1359
1360    /* --- patterns rooted at: CmpNEZ16 --- */
1361
1362    if (e->tag == Iex_Unop
1363        && e->Iex.Unop.op == Iop_CmpNEZ16) {
1364       HReg      r1    = iselIntExpr_R(env, e->Iex.Unop.arg);
1365       ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1366       addInstr(env, ARM64Instr_Test(r1, xFFFF));
1367       return ARM64cc_NE;
1368    }
1369
1370    /* --- patterns rooted at: CmpNEZ64 --- */
1371
1372    if (e->tag == Iex_Unop
1373        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1374       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1375       ARM64RIA* zero = ARM64RIA_I12(0,0);
1376       addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1377       return ARM64cc_NE;
1378    }
1379
1380    /* --- patterns rooted at: CmpNEZ32 --- */
1381
1382    if (e->tag == Iex_Unop
1383        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1384       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1385       ARM64RIA* zero = ARM64RIA_I12(0,0);
1386       addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1387       return ARM64cc_NE;
1388    }
1389
1390    /* --- Cmp*64*(x,y) --- */
1391    if (e->tag == Iex_Binop
1392        && (e->Iex.Binop.op == Iop_CmpEQ64
1393            || e->Iex.Binop.op == Iop_CmpNE64
1394            || e->Iex.Binop.op == Iop_CmpLT64S
1395            || e->Iex.Binop.op == Iop_CmpLT64U
1396            || e->Iex.Binop.op == Iop_CmpLE64S
1397            || e->Iex.Binop.op == Iop_CmpLE64U
1398            || e->Iex.Binop.op == Iop_CasCmpEQ64
1399            || e->Iex.Binop.op == Iop_CasCmpNE64)) {
1400       HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1401       ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1402       addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1403       switch (e->Iex.Binop.op) {
1404          case Iop_CmpEQ64: case Iop_CasCmpEQ64: return ARM64cc_EQ;
1405          case Iop_CmpNE64: case Iop_CasCmpNE64: return ARM64cc_NE;
1406          case Iop_CmpLT64S: return ARM64cc_LT;
1407          case Iop_CmpLT64U: return ARM64cc_CC;
1408          case Iop_CmpLE64S: return ARM64cc_LE;
1409          case Iop_CmpLE64U: return ARM64cc_LS;
1410          default: vpanic("iselCondCode(arm64): CmpXX64");
1411       }
1412    }
1413
1414    /* --- Cmp*32*(x,y) --- */
1415    if (e->tag == Iex_Binop
1416        && (e->Iex.Binop.op == Iop_CmpEQ32
1417            || e->Iex.Binop.op == Iop_CmpNE32
1418            || e->Iex.Binop.op == Iop_CmpLT32S
1419            || e->Iex.Binop.op == Iop_CmpLT32U
1420            || e->Iex.Binop.op == Iop_CmpLE32S
1421            || e->Iex.Binop.op == Iop_CmpLE32U
1422            || e->Iex.Binop.op == Iop_CasCmpEQ32
1423            || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1424       HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1425       ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1426       addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1427       switch (e->Iex.Binop.op) {
1428          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return ARM64cc_EQ;
1429          case Iop_CmpNE32: case Iop_CasCmpNE32: return ARM64cc_NE;
1430          case Iop_CmpLT32S: return ARM64cc_LT;
1431          case Iop_CmpLT32U: return ARM64cc_CC;
1432          case Iop_CmpLE32S: return ARM64cc_LE;
1433          case Iop_CmpLE32U: return ARM64cc_LS;
1434          default: vpanic("iselCondCode(arm64): CmpXX32");
1435       }
1436    }
1437
1438    /* --- Cmp*16*(x,y) --- */
1439    if (e->tag == Iex_Binop
1440        && (e->Iex.Binop.op == Iop_CasCmpEQ16
1441            || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1442       HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1443       HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1444       HReg argL2 = widen_z_16_to_64(env, argL);
1445       HReg argR2 = widen_z_16_to_64(env, argR);
1446       addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1447       switch (e->Iex.Binop.op) {
1448          case Iop_CasCmpEQ16: return ARM64cc_EQ;
1449          case Iop_CasCmpNE16: return ARM64cc_NE;
1450          default: vpanic("iselCondCode(arm64): CmpXX16");
1451       }
1452    }
1453
1454    /* --- Cmp*8*(x,y) --- */
1455    if (e->tag == Iex_Binop
1456        && (e->Iex.Binop.op == Iop_CasCmpEQ8
1457            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1458       HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1459       HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1460       HReg argL2 = widen_z_8_to_64(env, argL);
1461       HReg argR2 = widen_z_8_to_64(env, argR);
1462       addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1463       switch (e->Iex.Binop.op) {
1464          case Iop_CasCmpEQ8: return ARM64cc_EQ;
1465          case Iop_CasCmpNE8: return ARM64cc_NE;
1466          default: vpanic("iselCondCode(arm64): CmpXX8");
1467       }
1468    }
1469
1470    /* --- And1(x,y), Or1(x,y) --- */
1471    /* FIXME: We could (and probably should) do a lot better here, by using the
1472       iselCondCode_C/_R scheme used in the amd64 insn selector. */
1473     if (e->tag == Iex_Binop
1474         && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
1475        HReg x_as_64 = newVRegI(env);
1476        ARM64CondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1);
1477        addInstr(env, ARM64Instr_Set64(x_as_64, cc_x));
1478
1479        HReg y_as_64 = newVRegI(env);
1480        ARM64CondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2);
1481        addInstr(env, ARM64Instr_Set64(y_as_64, cc_y));
1482
1483        HReg tmp = newVRegI(env);
1484        ARM64LogicOp lop
1485           = e->Iex.Binop.op == Iop_And1 ? ARM64lo_AND : ARM64lo_OR;
1486        addInstr(env, ARM64Instr_Logic(tmp, x_as_64, ARM64RIL_R(y_as_64), lop));
1487
1488        ARM64RIL* one = mb_mkARM64RIL_I(1);
1489        vassert(one);
1490        addInstr(env, ARM64Instr_Test(tmp, one));
1491
1492        return ARM64cc_NE;
1493     }
1494
1495    ppIRExpr(e);
1496    vpanic("iselCondCode");
1497 }
1498
1499
1500 /* --------------------- Reg --------------------- */
1501
1502 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1503 {
1504    HReg r = iselIntExpr_R_wrk(env, e);
1505    /* sanity checks ... */
1506 #  if 0
1507    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1508 #  endif
1509    vassert(hregClass(r) == HRcInt64);
1510    vassert(hregIsVirtual(r));
1511    return r;
1512 }
1513
1514 /* DO NOT CALL THIS DIRECTLY ! */
1515 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1516 {
1517    IRType ty = typeOfIRExpr(env->type_env,e);
1518    vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1519
1520    switch (e->tag) {
1521
1522    /* --------- TEMP --------- */
1523    case Iex_RdTmp: {
1524       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1525    }
1526
1527    /* --------- LOAD --------- */
1528    case Iex_Load: {
1529       HReg dst  = newVRegI(env);
1530
1531       if (e->Iex.Load.end != Iend_LE)
1532          goto irreducible;
1533
1534       if (ty == Ity_I64) {
1535          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1536          addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1537          return dst;
1538       }
1539       if (ty == Ity_I32) {
1540          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1541          addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1542          return dst;
1543       }
1544       if (ty == Ity_I16) {
1545          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1546          addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1547          return dst;
1548       }
1549       if (ty == Ity_I8) {
1550          ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1551          addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1552          return dst;
1553       }
1554       break;
1555    }
1556
1557    /* --------- BINARY OP --------- */
1558    case Iex_Binop: {
1559
1560       ARM64LogicOp lop = 0; /* invalid */
1561       ARM64ShiftOp sop = 0; /* invalid */
1562
1563       /* Special-case 0-x into a Neg instruction.  Not because it's
1564          particularly useful but more so as to give value flow using
1565          this instruction, so as to check its assembly correctness for
1566          implementation of Left32/Left64. */
1567       switch (e->Iex.Binop.op) {
1568          case Iop_Sub64:
1569             if (isZeroU64(e->Iex.Binop.arg1)) {
1570                HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1571                HReg dst  = newVRegI(env);
1572                addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1573                return dst;
1574             }
1575             break;
1576          default:
1577             break;
1578       }
1579
1580       /* ADD/SUB */
1581       switch (e->Iex.Binop.op) {
1582          case Iop_Add64: case Iop_Add32:
1583          case Iop_Sub64: case Iop_Sub32: {
1584             Bool      isAdd = e->Iex.Binop.op == Iop_Add64
1585                               || e->Iex.Binop.op == Iop_Add32;
1586             HReg      dst   = newVRegI(env);
1587             HReg      argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1588             ARM64RIA* argR  = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1589             addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1590             return dst;
1591          }
1592          default:
1593             break;
1594       }
1595
1596       /* AND/OR/XOR */
1597       switch (e->Iex.Binop.op) {
1598          case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1599          case Iop_Or64:  case Iop_Or32:  lop = ARM64lo_OR;  goto log_binop;
1600          case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1601          log_binop: {
1602             HReg      dst  = newVRegI(env);
1603             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1604             ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1605             addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1606             return dst;
1607          }
1608          default:
1609             break;
1610       }
1611
1612       /* SHL/SHR/SAR */
1613       switch (e->Iex.Binop.op) {
1614          case Iop_Shr64:                 sop = ARM64sh_SHR; goto sh_binop;
1615          case Iop_Sar64:                 sop = ARM64sh_SAR; goto sh_binop;
1616          case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1617          sh_binop: {
1618             HReg      dst  = newVRegI(env);
1619             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1620             ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1621             addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1622             return dst;
1623          }
1624          case Iop_Shr32:
1625          case Iop_Sar32: {
1626             Bool      zx   = e->Iex.Binop.op == Iop_Shr32;
1627             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1628             ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1629             HReg      dst  = zx ? widen_z_32_to_64(env, argL)
1630                                 : widen_s_32_to_64(env, argL);
1631             addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1632             return dst;
1633          }
1634          default: break;
1635       }
1636
1637       /* MUL */
1638       if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1639          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1640          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1641          HReg dst  = newVRegI(env);
1642          addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1643          return dst;
1644       }
1645
1646       /* MULL */
1647       if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1648          Bool isS  = e->Iex.Binop.op == Iop_MullS32;
1649          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1650          HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1651          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1652          HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1653          HReg dst  = newVRegI(env);
1654          addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1655          return dst;
1656       }
1657
1658       /* Handle misc other ops. */
1659
1660       if (e->Iex.Binop.op == Iop_Max32U) {
1661          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1662          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1663          HReg dst  = newVRegI(env);
1664          addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1665          addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1666          return dst;
1667       }
1668
1669       if (e->Iex.Binop.op == Iop_32HLto64) {
1670          HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1671          HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1672          HReg lo32  = widen_z_32_to_64(env, lo32s);
1673          HReg hi32  = newVRegI(env);
1674          addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1675                                         ARM64sh_SHL));
1676          addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1677                                         ARM64lo_OR));
1678          return hi32;
1679       }
1680
1681       if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1682          Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1683          HReg dL  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1684          HReg dR  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1685          HReg dst = newVRegI(env);
1686          HReg imm = newVRegI(env);
1687          /* Do the compare (FCMP), which sets NZCV in PSTATE.  Then
1688             create in dst, the IRCmpF64Result encoded result. */
1689          addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1690          addInstr(env, ARM64Instr_Imm64(dst, 0));
1691          addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1692          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1693          addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1694          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1695          addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1696          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1697          addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1698          addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1699          return dst;
1700       }
1701
1702       { /* local scope */
1703         ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1704         Bool       srcIsD = False;
1705         switch (e->Iex.Binop.op) {
1706            case Iop_F64toI64S:
1707               cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1708            case Iop_F64toI64U:
1709               cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1710            case Iop_F64toI32S:
1711               cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1712            case Iop_F64toI32U:
1713               cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1714            case Iop_F32toI32S:
1715               cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1716            case Iop_F32toI32U:
1717               cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1718            case Iop_F32toI64S:
1719               cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1720            case Iop_F32toI64U:
1721               cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1722            default:
1723               break;
1724         }
1725         if (cvt_op != ARM64cvt_INVALID) {
1726            /* This is all a bit dodgy, because we can't handle a
1727               non-constant (not-known-at-JIT-time) rounding mode
1728               indication.  That's because there's no instruction
1729               AFAICS that does this conversion but rounds according to
1730               FPCR.RM, so we have to bake the rounding mode into the
1731               instruction right now.  But that should be OK because
1732               (1) the front end attaches a literal Irrm_ value to the
1733               conversion binop, and (2) iropt will never float that
1734               off via CSE, into a literal.  Hence we should always
1735               have an Irrm_ value as the first arg. */
1736            IRExpr* arg1 = e->Iex.Binop.arg1;
1737            if (arg1->tag != Iex_Const) goto irreducible;
1738            IRConst* arg1con = arg1->Iex.Const.con;
1739            vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1740            UInt irrm = arg1con->Ico.U32;
1741            /* Find the ARM-encoded equivalent for |irrm|. */
1742            UInt armrm = 4; /* impossible */
1743            switch (irrm) {
1744               case Irrm_NEAREST: armrm = 0; break;
1745               case Irrm_NegINF:  armrm = 2; break;
1746               case Irrm_PosINF:  armrm = 1; break;
1747               case Irrm_ZERO:    armrm = 3; break;
1748               default: goto irreducible;
1749            }
1750            HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1751                          (env, e->Iex.Binop.arg2);
1752            HReg dst = newVRegI(env);
1753            addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1754            return dst;
1755         }
1756       } /* local scope */
1757
1758       /* All cases involving host-side helper calls. */
1759       void* fn = NULL;
1760       switch (e->Iex.Binop.op) {
1761          case Iop_DivU32:
1762             fn = &h_calc_udiv32_w_arm_semantics; break;
1763          case Iop_DivS32:
1764             fn = &h_calc_sdiv32_w_arm_semantics; break;
1765          case Iop_DivU64:
1766             fn = &h_calc_udiv64_w_arm_semantics; break;
1767          case Iop_DivS64:
1768             fn = &h_calc_sdiv64_w_arm_semantics; break;
1769          default:
1770             break;
1771       }
1772
1773       if (fn) {
1774          HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1775          HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1776          HReg res  = newVRegI(env);
1777          addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1778          addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1779          addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1780                                         2, mk_RetLoc_simple(RLPri_Int) ));
1781          addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1782          return res;
1783       }
1784
1785       break;
1786    }
1787
1788    /* --------- UNARY OP --------- */
1789    case Iex_Unop: {
1790
1791       switch (e->Iex.Unop.op) {
1792          case Iop_16Uto64: {
1793             /* This probably doesn't occur often enough to be worth
1794                rolling the extension into the load. */
1795             IRExpr* arg = e->Iex.Unop.arg;
1796             HReg    src = iselIntExpr_R(env, arg);
1797             HReg    dst = widen_z_16_to_64(env, src);
1798             return dst;
1799          }
1800          case Iop_32Uto64: {
1801             IRExpr* arg = e->Iex.Unop.arg;
1802             if (arg->tag == Iex_Load) {
1803                /* This correctly zero extends because _LdSt32 is
1804                   defined to do a zero extending load. */
1805                HReg dst = newVRegI(env);
1806                ARM64AMode* am
1807                   = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1808                addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1809                return dst;
1810             }
1811             /* else be lame and mask it  */
1812             HReg src  = iselIntExpr_R(env, arg);
1813             HReg dst  = widen_z_32_to_64(env, src);
1814             return dst;
1815          }
1816          case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
1817          case Iop_8Uto64: {
1818             IRExpr* arg = e->Iex.Unop.arg;
1819             if (arg->tag == Iex_Load) {
1820                /* This correctly zero extends because _LdSt8 is
1821                   defined to do a zero extending load. */
1822                HReg dst = newVRegI(env);
1823                ARM64AMode* am
1824                   = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
1825                addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1826                return dst;
1827             }
1828             /* else be lame and mask it  */
1829             HReg src = iselIntExpr_R(env, arg);
1830             HReg dst = widen_z_8_to_64(env, src);
1831             return dst;
1832          }
1833          case Iop_128HIto64: {
1834             HReg rHi, rLo;
1835             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1836             return rHi; /* and abandon rLo */
1837          }
1838          case Iop_8Sto32: case Iop_8Sto64: {
1839             IRExpr* arg = e->Iex.Unop.arg;
1840             HReg    src = iselIntExpr_R(env, arg);
1841             HReg    dst = widen_s_8_to_64(env, src);
1842             return dst;
1843          }
1844          case Iop_16Sto32: case Iop_16Sto64: {
1845             IRExpr* arg = e->Iex.Unop.arg;
1846             HReg    src = iselIntExpr_R(env, arg);
1847             HReg    dst = widen_s_16_to_64(env, src);
1848             return dst;
1849          }
1850          case Iop_32Sto64: {
1851             IRExpr* arg = e->Iex.Unop.arg;
1852             HReg    src = iselIntExpr_R(env, arg);
1853             HReg    dst = widen_s_32_to_64(env, src);
1854             return dst;
1855          }
1856          case Iop_Not32:
1857          case Iop_Not64: {
1858             HReg dst = newVRegI(env);
1859             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1860             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
1861             return dst;
1862          }
1863          case Iop_Clz64: {
1864             HReg dst = newVRegI(env);
1865             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1866             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
1867             return dst;
1868          }
1869          case Iop_Left32:
1870          case Iop_Left64: {
1871             /* Left64(src) = src | -src.  Left32 can use the same
1872                implementation since in that case we don't care what
1873                the upper 32 bits become. */
1874             HReg dst = newVRegI(env);
1875             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1876             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1877             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1878                                            ARM64lo_OR));
1879             return dst;
1880          }
1881          case Iop_CmpwNEZ64: {
1882            /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
1883                              = Left64(src) >>s 63 */
1884             HReg dst = newVRegI(env);
1885             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1886             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1887             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1888                                            ARM64lo_OR));
1889             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1890                                            ARM64sh_SAR));
1891             return dst;
1892          }
1893          case Iop_CmpwNEZ32: {
1894             /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
1895                               = Left64(src & 0xFFFFFFFF) >>s 63 */
1896             HReg dst = newVRegI(env);
1897             HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1898             HReg src = widen_z_32_to_64(env, pre);
1899             addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1900             addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1901                                            ARM64lo_OR));
1902             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1903                                            ARM64sh_SAR));
1904             return dst;
1905          }
1906          case Iop_V128to64: case Iop_V128HIto64: {
1907             HReg dst    = newVRegI(env);
1908             HReg src    = iselV128Expr(env, e->Iex.Unop.arg);
1909             UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
1910             addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
1911             return dst;
1912          }
1913          case Iop_ReinterpF64asI64: {
1914             HReg dst = newVRegI(env);
1915             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1916             addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
1917             return dst;
1918          }
1919          case Iop_ReinterpF32asI32: {
1920             HReg dst = newVRegI(env);
1921             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1922             addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
1923             return dst;
1924          }
1925          case Iop_1Sto16:
1926          case Iop_1Sto32:
1927          case Iop_1Sto64: {
1928             /* As with the iselStmt case for 'tmp:I1 = expr', we could
1929                do a lot better here if it ever became necessary. */
1930             HReg zero = newVRegI(env);
1931             HReg one  = newVRegI(env);
1932             HReg dst  = newVRegI(env);
1933             addInstr(env, ARM64Instr_Imm64(zero, 0));
1934             addInstr(env, ARM64Instr_Imm64(one,  1));
1935             ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1936             addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1937             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1938                                            ARM64sh_SHL));
1939             addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1940                                            ARM64sh_SAR));
1941             return dst;
1942          }
1943          case Iop_NarrowUn16to8x8:
1944          case Iop_NarrowUn32to16x4:
1945          case Iop_NarrowUn64to32x2:
1946          case Iop_QNarrowUn16Sto8Sx8:
1947          case Iop_QNarrowUn32Sto16Sx4:
1948          case Iop_QNarrowUn64Sto32Sx2:
1949          case Iop_QNarrowUn16Uto8Ux8:
1950          case Iop_QNarrowUn32Uto16Ux4:
1951          case Iop_QNarrowUn64Uto32Ux2:
1952          case Iop_QNarrowUn16Sto8Ux8:
1953          case Iop_QNarrowUn32Sto16Ux4:
1954          case Iop_QNarrowUn64Sto32Ux2:
1955          {
1956             HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1957             HReg tmp = newVRegV(env);
1958             HReg dst = newVRegI(env);
1959             UInt dszBlg2 = 3; /* illegal */
1960             ARM64VecNarrowOp op = ARM64vecna_INVALID;
1961             switch (e->Iex.Unop.op) {
1962                case Iop_NarrowUn16to8x8:
1963                   dszBlg2 = 0; op = ARM64vecna_XTN; break;
1964                case Iop_NarrowUn32to16x4:
1965                   dszBlg2 = 1; op = ARM64vecna_XTN; break;
1966                case Iop_NarrowUn64to32x2:
1967                   dszBlg2 = 2; op = ARM64vecna_XTN; break;
1968                case Iop_QNarrowUn16Sto8Sx8:
1969                   dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
1970                case Iop_QNarrowUn32Sto16Sx4:
1971                   dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
1972                case Iop_QNarrowUn64Sto32Sx2:
1973                   dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
1974                case Iop_QNarrowUn16Uto8Ux8:
1975                   dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
1976                case Iop_QNarrowUn32Uto16Ux4:
1977                   dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
1978                case Iop_QNarrowUn64Uto32Ux2:
1979                   dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
1980                case Iop_QNarrowUn16Sto8Ux8:
1981                   dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
1982                case Iop_QNarrowUn32Sto16Ux4:
1983                   dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
1984                case Iop_QNarrowUn64Sto32Ux2:
1985                   dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
1986                default:
1987                   vassert(0);
1988             }
1989             addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
1990             addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
1991             return dst;
1992          }
1993          case Iop_1Uto64: {
1994             /* 1Uto64(tmp). */
1995             HReg dst = newVRegI(env);
1996             if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1997                ARM64RIL* one = mb_mkARM64RIL_I(1);
1998                HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1999                vassert(one);
2000                addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
2001             } else {
2002                /* CLONE-01 */
2003                HReg zero = newVRegI(env);
2004                HReg one  = newVRegI(env);
2005                addInstr(env, ARM64Instr_Imm64(zero, 0));
2006                addInstr(env, ARM64Instr_Imm64(one,  1));
2007                ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2008                addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2009             }
2010             return dst;
2011          }
2012          case Iop_64to32:
2013          case Iop_64to16:
2014          case Iop_64to8:
2015             /* These are no-ops. */
2016             return iselIntExpr_R(env, e->Iex.Unop.arg);
2017
2018          default:
2019             break;
2020       }
2021
2022       break;
2023    }
2024
2025    /* --------- GET --------- */
2026    case Iex_Get: {
2027       if (ty == Ity_I64
2028           && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
2029          HReg        dst = newVRegI(env);
2030          ARM64AMode* am
2031             = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2032          addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2033          return dst;
2034       }
2035       if (ty == Ity_I32
2036           && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
2037          HReg        dst = newVRegI(env);
2038          ARM64AMode* am
2039             = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2040          addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2041          return dst;
2042       }
2043       if (ty == Ity_I16
2044           && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2045          HReg        dst = newVRegI(env);
2046          ARM64AMode* am
2047             = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2048          addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2049          return dst;
2050       }
2051       if (ty == Ity_I8
2052           /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2053          HReg        dst = newVRegI(env);
2054          ARM64AMode* am
2055             = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2056          addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2057          return dst;
2058       }
2059       break;
2060    }
2061
2062    /* --------- CCALL --------- */
2063    case Iex_CCall: {
2064       HReg    dst = newVRegI(env);
2065       vassert(ty == e->Iex.CCall.retty);
2066
2067       /* be very restrictive for now.  Only 64-bit ints allowed for
2068          args, and 64 bits for return type.  Don't forget to change
2069          the RetLoc if more types are allowed in future. */
2070       if (e->Iex.CCall.retty != Ity_I64)
2071          goto irreducible;
2072
2073       /* Marshal args, do the call, clear stack. */
2074       UInt   addToSp = 0;
2075       RetLoc rloc    = mk_RetLoc_INVALID();
2076       Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2077                                      e->Iex.CCall.cee, e->Iex.CCall.retty,
2078                                      e->Iex.CCall.args );
2079       /* */
2080       if (ok) {
2081          vassert(is_sane_RetLoc(rloc));
2082          vassert(rloc.pri == RLPri_Int);
2083          vassert(addToSp == 0);
2084          addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2085          return dst;
2086       }
2087       goto irreducible;
2088    }
2089
2090    /* --------- LITERAL --------- */
2091    /* 64-bit literals */
2092    case Iex_Const: {
2093       ULong u   = 0;
2094       HReg  dst = newVRegI(env);
2095       switch (e->Iex.Const.con->tag) {
2096          case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2097          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2098          case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2099          case Ico_U8:  u = e->Iex.Const.con->Ico.U8;  break;
2100          default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2101       }
2102       addInstr(env, ARM64Instr_Imm64(dst, u));
2103       return dst;
2104    }
2105
2106    /* --------- MULTIPLEX --------- */
2107    case Iex_ITE: {
2108       /* ITE(ccexpr, iftrue, iffalse) */
2109       if (ty == Ity_I64 || ty == Ity_I32) {
2110          ARM64CondCode cc;
2111          HReg r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2112          HReg r0  = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2113          HReg dst = newVRegI(env);
2114          cc = iselCondCode(env, e->Iex.ITE.cond);
2115          addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2116          return dst;
2117       }
2118       break;
2119    }
2120
2121    default:
2122    break;
2123    } /* switch (e->tag) */
2124
2125    /* We get here if no pattern matched. */
2126   irreducible:
2127    ppIRExpr(e);
2128    vpanic("iselIntExpr_R: cannot reduce tree");
2129 }
2130
2131
2132 /*---------------------------------------------------------*/
2133 /*--- ISEL: Integer expressions (128 bit)               ---*/
2134 /*---------------------------------------------------------*/
2135
2136 /* Compute a 128-bit value into a register pair, which is returned as
2137    the first two parameters.  As with iselIntExpr_R, these may be
2138    either real or virtual regs; in any case they must not be changed
2139    by subsequent code emitted by the caller.  */
2140
2141 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2142                              ISelEnv* env, IRExpr* e )
2143 {
2144    iselInt128Expr_wrk(rHi, rLo, env, e);
2145 #  if 0
2146    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2147 #  endif
2148    vassert(hregClass(*rHi) == HRcInt64);
2149    vassert(hregIsVirtual(*rHi));
2150    vassert(hregClass(*rLo) == HRcInt64);
2151    vassert(hregIsVirtual(*rLo));
2152 }
2153
2154 /* DO NOT CALL THIS DIRECTLY ! */
2155 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2156                                  ISelEnv* env, IRExpr* e )
2157 {
2158    vassert(e);
2159    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2160
2161    /* --------- BINARY ops --------- */
2162    if (e->tag == Iex_Binop) {
2163       switch (e->Iex.Binop.op) {
2164          /* 64 x 64 -> 128 multiply */
2165          case Iop_MullU64:
2166          case Iop_MullS64: {
2167             Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2168             HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
2169             HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
2170             HReg dstLo = newVRegI(env);
2171             HReg dstHi = newVRegI(env);
2172             addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2173                                          ARM64mul_PLAIN));
2174             addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2175                                          syned ? ARM64mul_SX : ARM64mul_ZX));
2176             *rHi = dstHi;
2177             *rLo = dstLo;
2178             return;
2179          }
2180          /* 64HLto128(e1,e2) */
2181          case Iop_64HLto128:
2182             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2183             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2184             return;
2185          default:
2186             break;
2187       }
2188    } /* if (e->tag == Iex_Binop) */
2189
2190    ppIRExpr(e);
2191    vpanic("iselInt128Expr(arm64)");
2192 }
2193
2194
2195 /*---------------------------------------------------------*/
2196 /*--- ISEL: Vector expressions (128 bit)                ---*/
2197 /*---------------------------------------------------------*/
2198
2199 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2200 {
2201    HReg r = iselV128Expr_wrk( env, e );
2202    vassert(hregClass(r) == HRcVec128);
2203    vassert(hregIsVirtual(r));
2204    return r;
2205 }
2206
2207 /* DO NOT CALL THIS DIRECTLY */
2208 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2209 {
2210    IRType ty = typeOfIRExpr(env->type_env, e);
2211    vassert(e);
2212    vassert(ty == Ity_V128);
2213
2214    if (e->tag == Iex_RdTmp) {
2215       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2216    }
2217
2218    if (e->tag == Iex_Const) {
2219       /* Only a very limited range of constants is handled. */
2220       vassert(e->Iex.Const.con->tag == Ico_V128);
2221       UShort con = e->Iex.Const.con->Ico.V128;
2222       HReg   res = newVRegV(env);
2223       switch (con) {
2224          case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2225             addInstr(env, ARM64Instr_VImmQ(res, con));
2226             return res;
2227          case 0x00F0:
2228             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2229             addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2230             return res;
2231          case 0x0F00:
2232             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2233             addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2234             return res;
2235          case 0x0FF0:
2236             addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2237             addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2238             return res;
2239          case 0x0FFF:
2240             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2241             addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2242             addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2243             return res;
2244          case 0xF000:
2245             addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2246             addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2247             return res;
2248          case 0xFF00:
2249             addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2250             addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2251             return res;
2252          default:
2253             break;
2254       }
2255       /* Unhandled */
2256       goto v128_expr_bad;
2257    }
2258
2259    if (e->tag == Iex_Load) {
2260       HReg res = newVRegV(env);
2261       HReg rN  = iselIntExpr_R(env, e->Iex.Load.addr);
2262       vassert(ty == Ity_V128);
2263       addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2264       return res;
2265    }
2266
2267    if (e->tag == Iex_Get) {
2268       UInt offs = (UInt)e->Iex.Get.offset;
2269       if (offs < (1<<12)) {
2270          HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2271          HReg res  = newVRegV(env);
2272          vassert(ty == Ity_V128);
2273          addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2274          return res;
2275       }
2276       goto v128_expr_bad;
2277    }
2278
2279    if (e->tag == Iex_Unop) {
2280
2281       /* Iop_ZeroHIXXofV128 cases */
2282       UShort imm16 = 0;
2283       switch (e->Iex.Unop.op) {
2284          case Iop_ZeroHI64ofV128:  imm16 = 0x00FF; break;
2285          case Iop_ZeroHI96ofV128:  imm16 = 0x000F; break;
2286          case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2287          case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2288          default: break;
2289       }
2290       if (imm16 != 0) {
2291          HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2292          HReg imm = newVRegV(env);
2293          HReg res = newVRegV(env);
2294          addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2295          addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2296          return res;
2297       }
2298
2299       /* Other cases */
2300       switch (e->Iex.Unop.op) {
2301          case Iop_NotV128:
2302          case Iop_Abs64Fx2: case Iop_Abs32Fx4:
2303          case Iop_Neg64Fx2: case Iop_Neg32Fx4:
2304          case Iop_Abs64x2:  case Iop_Abs32x4:
2305          case Iop_Abs16x8:  case Iop_Abs8x16:
2306          case Iop_Cls32x4:  case Iop_Cls16x8:  case Iop_Cls8x16:
2307          case Iop_Clz32x4:  case Iop_Clz16x8:  case Iop_Clz8x16:
2308          case Iop_Cnt8x16:
2309          case Iop_Reverse1sIn8_x16:
2310          case Iop_Reverse8sIn16_x8:
2311          case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2312          case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2313          case Iop_Reverse32sIn64_x2:
2314          case Iop_RecipEst32Ux4:
2315          case Iop_RSqrtEst32Ux4:
2316          case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2317          case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2318          {
2319             HReg res   = newVRegV(env);
2320             HReg arg   = iselV128Expr(env, e->Iex.Unop.arg);
2321             Bool setRM = False;
2322             ARM64VecUnaryOp op = ARM64vecu_INVALID;
2323             switch (e->Iex.Unop.op) {
2324                case Iop_NotV128:           op = ARM64vecu_NOT;         break;
2325                case Iop_Abs64Fx2:          op = ARM64vecu_FABS64x2;    break;
2326                case Iop_Abs32Fx4:          op = ARM64vecu_FABS32x4;    break;
2327                case Iop_Neg64Fx2:          op = ARM64vecu_FNEG64x2;    break;
2328                case Iop_Neg32Fx4:          op = ARM64vecu_FNEG32x4;    break;
2329                case Iop_Abs64x2:           op = ARM64vecu_ABS64x2;     break;
2330                case Iop_Abs32x4:           op = ARM64vecu_ABS32x4;     break;
2331                case Iop_Abs16x8:           op = ARM64vecu_ABS16x8;     break;
2332                case Iop_Abs8x16:           op = ARM64vecu_ABS8x16;     break;
2333                case Iop_Cls32x4:           op = ARM64vecu_CLS32x4;     break;
2334                case Iop_Cls16x8:           op = ARM64vecu_CLS16x8;     break;
2335                case Iop_Cls8x16:           op = ARM64vecu_CLS8x16;     break;
2336                case Iop_Clz32x4:           op = ARM64vecu_CLZ32x4;     break;
2337                case Iop_Clz16x8:           op = ARM64vecu_CLZ16x8;     break;
2338                case Iop_Clz8x16:           op = ARM64vecu_CLZ8x16;     break;
2339                case Iop_Cnt8x16:           op = ARM64vecu_CNT8x16;     break;
2340                case Iop_Reverse1sIn8_x16:  op = ARM64vecu_RBIT;        break;
2341                case Iop_Reverse8sIn16_x8:  op = ARM64vecu_REV1616B;    break;
2342                case Iop_Reverse8sIn32_x4:  op = ARM64vecu_REV3216B;    break;
2343                case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H;     break;
2344                case Iop_Reverse8sIn64_x2:  op = ARM64vecu_REV6416B;    break;
2345                case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H;     break;
2346                case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S;     break;
2347                case Iop_RecipEst32Ux4:     op = ARM64vecu_URECPE32x4;  break;
2348                case Iop_RSqrtEst32Ux4:     op = ARM64vecu_URSQRTE32x4; break;
2349                case Iop_RecipEst64Fx2:     setRM = True;
2350                                            op = ARM64vecu_FRECPE64x2;  break;
2351                case Iop_RecipEst32Fx4:     setRM = True;
2352                                            op = ARM64vecu_FRECPE32x4;  break;
2353                case Iop_RSqrtEst64Fx2:     setRM = True;
2354                                            op = ARM64vecu_FRSQRTE64x2; break;
2355                case Iop_RSqrtEst32Fx4:     setRM = True;
2356                                            op = ARM64vecu_FRSQRTE32x4; break;
2357                default: vassert(0);
2358             }
2359             if (setRM) {
2360                // This is a bit of a kludge.  We should do rm properly for
2361                // these recip-est insns, but that would require changing the
2362                // primop's type to take an rmode.
2363                set_FPCR_rounding_mode(env, IRExpr_Const(
2364                                               IRConst_U32(Irrm_NEAREST)));
2365             }
2366             addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2367             return res;
2368          }
2369          case Iop_CmpNEZ8x16:
2370          case Iop_CmpNEZ16x8:
2371          case Iop_CmpNEZ32x4:
2372          case Iop_CmpNEZ64x2: {
2373             HReg arg  = iselV128Expr(env, e->Iex.Unop.arg);
2374             HReg zero = newVRegV(env);
2375             HReg res  = newVRegV(env);
2376             ARM64VecBinOp cmp = ARM64vecb_INVALID;
2377             switch (e->Iex.Unop.op) {
2378                case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2379                case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2380                case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2381                case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2382                default: vassert(0);
2383             }
2384             // This is pretty feeble.  Better: use CMP against zero
2385             // and avoid the extra instruction and extra register.
2386             addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2387             addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2388             addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2389             return res;
2390          }
2391          case Iop_V256toV128_0:
2392          case Iop_V256toV128_1: {
2393             HReg vHi, vLo;
2394             iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2395             return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2396          }
2397          case Iop_64UtoV128: {
2398             HReg res = newVRegV(env);
2399             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2400             addInstr(env, ARM64Instr_VQfromX(res, arg));
2401             return res;
2402          }
2403          case Iop_Widen8Sto16x8: {
2404             HReg res = newVRegV(env);
2405             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2406             addInstr(env, ARM64Instr_VQfromX(res, arg));
2407             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2408             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2409                                                 res, res, 8));
2410             return res;
2411          }
2412          case Iop_Widen16Sto32x4: {
2413             HReg res = newVRegV(env);
2414             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2415             addInstr(env, ARM64Instr_VQfromX(res, arg));
2416             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2417             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2418                                                 res, res, 16));
2419             return res;
2420          }
2421          case Iop_Widen32Sto64x2: {
2422             HReg res = newVRegV(env);
2423             HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2424             addInstr(env, ARM64Instr_VQfromX(res, arg));
2425             addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2426             addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2427                                                 res, res, 32));
2428             return res;
2429          }
2430          /* ... */
2431          default:
2432             break;
2433       } /* switch on the unop */
2434    } /* if (e->tag == Iex_Unop) */
2435
2436    if (e->tag == Iex_Binop) {
2437       switch (e->Iex.Binop.op) {
2438          case Iop_Sqrt32Fx4:
2439          case Iop_Sqrt64Fx2: {
2440             HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2441             HReg res = newVRegV(env);
2442             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2443             ARM64VecUnaryOp op
2444                = e->Iex.Binop.op == Iop_Sqrt32Fx4
2445                     ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2;
2446             addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2447             return res;
2448          }
2449          case Iop_64HLtoV128: {
2450             HReg res  = newVRegV(env);
2451             HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2452             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2453             addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2454             return res;
2455          }
2456          /* -- Cases where we can generate a simple three-reg instruction. -- */
2457          case Iop_AndV128:
2458          case Iop_OrV128:
2459          case Iop_XorV128:
2460          case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2461          case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2462          case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2463          case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2464          case Iop_Add64x2: case Iop_Add32x4:
2465          case Iop_Add16x8: case Iop_Add8x16:
2466          case Iop_Sub64x2: case Iop_Sub32x4:
2467          case Iop_Sub16x8: case Iop_Sub8x16:
2468          case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2469          case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2470          case Iop_CmpEQ16x8:  case Iop_CmpEQ8x16:
2471          case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2472          case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2473          case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2474          case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2475          case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2476          case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2477          case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2478          case Iop_Perm8x16:
2479          case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2480          case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2481          case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2482          case Iop_CatOddLanes16x8:  case Iop_CatOddLanes8x16:
2483          case Iop_InterleaveHI32x4:
2484          case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2485          case Iop_InterleaveLO32x4:
2486          case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2487          case Iop_PolynomialMul8x16:
2488          case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2489          case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2490          case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2491          case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2492          case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2493          case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2494          case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2495          case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2496          case Iop_QDMulHi32Sx4:  case Iop_QDMulHi16Sx8:
2497          case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2498          case Iop_Sh8Sx16:  case Iop_Sh16Sx8:
2499          case Iop_Sh32Sx4:  case Iop_Sh64Sx2:
2500          case Iop_Sh8Ux16:  case Iop_Sh16Ux8:
2501          case Iop_Sh32Ux4:  case Iop_Sh64Ux2:
2502          case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2503          case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2504          case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2505          case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2506          case Iop_Max64Fx2: case Iop_Max32Fx4:
2507          case Iop_Min64Fx2: case Iop_Min32Fx4:
2508          case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2509          case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2510          {
2511             HReg res   = newVRegV(env);
2512             HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
2513             HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
2514             Bool sw    = False;
2515             Bool setRM = False;
2516             ARM64VecBinOp op = ARM64vecb_INVALID;
2517             switch (e->Iex.Binop.op) {
2518                case Iop_AndV128:    op = ARM64vecb_AND; break;
2519                case Iop_OrV128:     op = ARM64vecb_ORR; break;
2520                case Iop_XorV128:    op = ARM64vecb_XOR; break;
2521                case Iop_Max32Ux4:   op = ARM64vecb_UMAX32x4; break;
2522                case Iop_Max16Ux8:   op = ARM64vecb_UMAX16x8; break;
2523                case Iop_Max8Ux16:   op = ARM64vecb_UMAX8x16; break;
2524                case Iop_Min32Ux4:   op = ARM64vecb_UMIN32x4; break;
2525                case Iop_Min16Ux8:   op = ARM64vecb_UMIN16x8; break;
2526                case Iop_Min8Ux16:   op = ARM64vecb_UMIN8x16; break;
2527                case Iop_Max32Sx4:   op = ARM64vecb_SMAX32x4; break;
2528                case Iop_Max16Sx8:   op = ARM64vecb_SMAX16x8; break;
2529                case Iop_Max8Sx16:   op = ARM64vecb_SMAX8x16; break;
2530                case Iop_Min32Sx4:   op = ARM64vecb_SMIN32x4; break;
2531                case Iop_Min16Sx8:   op = ARM64vecb_SMIN16x8; break;
2532                case Iop_Min8Sx16:   op = ARM64vecb_SMIN8x16; break;
2533                case Iop_Add64x2:    op = ARM64vecb_ADD64x2; break;
2534                case Iop_Add32x4:    op = ARM64vecb_ADD32x4; break;
2535                case Iop_Add16x8:    op = ARM64vecb_ADD16x8; break;
2536                case Iop_Add8x16:    op = ARM64vecb_ADD8x16; break;
2537                case Iop_Sub64x2:    op = ARM64vecb_SUB64x2; break;
2538                case Iop_Sub32x4:    op = ARM64vecb_SUB32x4; break;
2539                case Iop_Sub16x8:    op = ARM64vecb_SUB16x8; break;
2540                case Iop_Sub8x16:    op = ARM64vecb_SUB8x16; break;
2541                case Iop_Mul32x4:    op = ARM64vecb_MUL32x4; break;
2542                case Iop_Mul16x8:    op = ARM64vecb_MUL16x8; break;
2543                case Iop_Mul8x16:    op = ARM64vecb_MUL8x16; break;
2544                case Iop_CmpEQ64x2:  op = ARM64vecb_CMEQ64x2; break;
2545                case Iop_CmpEQ32x4:  op = ARM64vecb_CMEQ32x4; break;
2546                case Iop_CmpEQ16x8:  op = ARM64vecb_CMEQ16x8; break;
2547                case Iop_CmpEQ8x16:  op = ARM64vecb_CMEQ8x16; break;
2548                case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2549                case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2550                case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2551                case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2552                case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2553                case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2554                case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2555                case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2556                case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2557                case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2558                case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2559                case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2560                case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2561                case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2562                case Iop_Perm8x16:   op = ARM64vecb_TBL1; break;
2563                case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2564                                           break;
2565                case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2566                                           break;
2567                case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2568                                           break;
2569                case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2570                                           break;
2571                case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2572                                           break;
2573                case Iop_CatOddLanes32x4:  op = ARM64vecb_UZP232x4; sw = True;
2574                                           break;
2575                case Iop_CatOddLanes16x8:  op = ARM64vecb_UZP216x8; sw = True;
2576                                           break;
2577                case Iop_CatOddLanes8x16:  op = ARM64vecb_UZP28x16; sw = True;
2578                                           break;
2579                case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2580                                           break;
2581                case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2582                                           break;
2583                case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2584                                           break;
2585                case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2586                                           break;
2587                case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2588                                           break;
2589                case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2590                                           break;
2591                case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2592                case Iop_QAdd64Sx2:      op = ARM64vecb_SQADD64x2; break;
2593                case Iop_QAdd32Sx4:      op = ARM64vecb_SQADD32x4; break;
2594                case Iop_QAdd16Sx8:      op = ARM64vecb_SQADD16x8; break;
2595                case Iop_QAdd8Sx16:      op = ARM64vecb_SQADD8x16; break;
2596                case Iop_QAdd64Ux2:      op = ARM64vecb_UQADD64x2; break;
2597                case Iop_QAdd32Ux4:      op = ARM64vecb_UQADD32x4; break;
2598                case Iop_QAdd16Ux8:      op = ARM64vecb_UQADD16x8; break;
2599                case Iop_QAdd8Ux16:      op = ARM64vecb_UQADD8x16; break;
2600                case Iop_QSub64Sx2:      op = ARM64vecb_SQSUB64x2; break;
2601                case Iop_QSub32Sx4:      op = ARM64vecb_SQSUB32x4; break;
2602                case Iop_QSub16Sx8:      op = ARM64vecb_SQSUB16x8; break;
2603                case Iop_QSub8Sx16:      op = ARM64vecb_SQSUB8x16; break;
2604                case Iop_QSub64Ux2:      op = ARM64vecb_UQSUB64x2; break;
2605                case Iop_QSub32Ux4:      op = ARM64vecb_UQSUB32x4; break;
2606                case Iop_QSub16Ux8:      op = ARM64vecb_UQSUB16x8; break;
2607                case Iop_QSub8Ux16:      op = ARM64vecb_UQSUB8x16; break;
2608                case Iop_QDMulHi32Sx4:   op = ARM64vecb_SQDMULH32x4; break;
2609                case Iop_QDMulHi16Sx8:   op = ARM64vecb_SQDMULH16x8; break;
2610                case Iop_QRDMulHi32Sx4:  op = ARM64vecb_SQRDMULH32x4; break;
2611                case Iop_QRDMulHi16Sx8:  op = ARM64vecb_SQRDMULH16x8; break;
2612                case Iop_Sh8Sx16:        op = ARM64vecb_SSHL8x16; break;
2613                case Iop_Sh16Sx8:        op = ARM64vecb_SSHL16x8; break;
2614                case Iop_Sh32Sx4:        op = ARM64vecb_SSHL32x4; break;
2615                case Iop_Sh64Sx2:        op = ARM64vecb_SSHL64x2; break;
2616                case Iop_Sh8Ux16:        op = ARM64vecb_USHL8x16; break;
2617                case Iop_Sh16Ux8:        op = ARM64vecb_USHL16x8; break;
2618                case Iop_Sh32Ux4:        op = ARM64vecb_USHL32x4; break;
2619                case Iop_Sh64Ux2:        op = ARM64vecb_USHL64x2; break;
2620                case Iop_Rsh8Sx16:       op = ARM64vecb_SRSHL8x16; break;
2621                case Iop_Rsh16Sx8:       op = ARM64vecb_SRSHL16x8; break;
2622                case Iop_Rsh32Sx4:       op = ARM64vecb_SRSHL32x4; break;
2623                case Iop_Rsh64Sx2:       op = ARM64vecb_SRSHL64x2; break;
2624                case Iop_Rsh8Ux16:       op = ARM64vecb_URSHL8x16; break;
2625                case Iop_Rsh16Ux8:       op = ARM64vecb_URSHL16x8; break;
2626                case Iop_Rsh32Ux4:       op = ARM64vecb_URSHL32x4; break;
2627                case Iop_Rsh64Ux2:       op = ARM64vecb_URSHL64x2; break;
2628                case Iop_Max64Fx2:       op = ARM64vecb_FMAX64x2; break;
2629                case Iop_Max32Fx4:       op = ARM64vecb_FMAX32x4; break;
2630                case Iop_Min64Fx2:       op = ARM64vecb_FMIN64x2; break;
2631                case Iop_Min32Fx4:       op = ARM64vecb_FMIN32x4; break;
2632                case Iop_RecipStep64Fx2: setRM = True;
2633                                         op = ARM64vecb_FRECPS64x2; break;
2634                case Iop_RecipStep32Fx4: setRM = True;
2635                                         op = ARM64vecb_FRECPS32x4; break;
2636                case Iop_RSqrtStep64Fx2: setRM = True;
2637                                         op = ARM64vecb_FRSQRTS64x2; break;
2638                case Iop_RSqrtStep32Fx4: setRM = True;
2639                                         op = ARM64vecb_FRSQRTS32x4; break;
2640                default: vassert(0);
2641             }
2642             if (setRM) {
2643                // This is a bit of a kludge.  We should do rm properly for
2644                // these recip-step insns, but that would require changing the
2645                // primop's type to take an rmode.
2646                set_FPCR_rounding_mode(env, IRExpr_Const(
2647                                               IRConst_U32(Irrm_NEAREST)));
2648             }
2649             if (sw) {
2650                addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2651             } else {
2652                addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2653             }
2654             return res;
2655          }
2656          /* -- These only have 2 operand instructions, so we have to first move
2657             the first argument into a new register, for modification. -- */
2658          case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2659          case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2660          case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2661          case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2662          {
2663             HReg res  = newVRegV(env);
2664             HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2665             HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2666             ARM64VecModifyOp op = ARM64vecmo_INVALID;
2667             switch (e->Iex.Binop.op) {
2668                /* In the following 8 cases, the US - SU switching is intended.
2669                   See comments on the libvex_ir.h for details.  Also in the
2670                   ARM64 front end, where used these primops are generated. */
2671                case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2672                case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2673                case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2674                case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2675                case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2676                case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2677                case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2678                case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2679                default: vassert(0);
2680             }
2681             /* The order of the operands is important.  Although this is
2682                basically addition, the two operands are extended differently,
2683                making it important to get them into the correct registers in
2684                the instruction. */
2685             addInstr(env, ARM64Instr_VMov(16, res, argR));
2686             addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2687             return res;
2688          }
2689          /* -- Shifts by an immediate. -- */
2690          case Iop_ShrN64x2: case Iop_ShrN32x4:
2691          case Iop_ShrN16x8: case Iop_ShrN8x16:
2692          case Iop_SarN64x2: case Iop_SarN32x4:
2693          case Iop_SarN16x8: case Iop_SarN8x16:
2694          case Iop_ShlN64x2: case Iop_ShlN32x4:
2695          case Iop_ShlN16x8: case Iop_ShlN8x16:
2696          case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2697          case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2698          case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2699          case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2700          case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2701          case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2702          {
2703             IRExpr* argL = e->Iex.Binop.arg1;
2704             IRExpr* argR = e->Iex.Binop.arg2;
2705             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2706                UInt amt   = argR->Iex.Const.con->Ico.U8;
2707                UInt limLo = 0;
2708                UInt limHi = 0;
2709                ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2710                /* Establish the instruction to use. */
2711                switch (e->Iex.Binop.op) {
2712                   case Iop_ShrN64x2:       op = ARM64vecshi_USHR64x2;   break;
2713                   case Iop_ShrN32x4:       op = ARM64vecshi_USHR32x4;   break;
2714                   case Iop_ShrN16x8:       op = ARM64vecshi_USHR16x8;   break;
2715                   case Iop_ShrN8x16:       op = ARM64vecshi_USHR8x16;   break;
2716                   case Iop_SarN64x2:       op = ARM64vecshi_SSHR64x2;   break;
2717                   case Iop_SarN32x4:       op = ARM64vecshi_SSHR32x4;   break;
2718                   case Iop_SarN16x8:       op = ARM64vecshi_SSHR16x8;   break;
2719                   case Iop_SarN8x16:       op = ARM64vecshi_SSHR8x16;   break;
2720                   case Iop_ShlN64x2:       op = ARM64vecshi_SHL64x2;    break;
2721                   case Iop_ShlN32x4:       op = ARM64vecshi_SHL32x4;    break;
2722                   case Iop_ShlN16x8:       op = ARM64vecshi_SHL16x8;    break;
2723                   case Iop_ShlN8x16:       op = ARM64vecshi_SHL8x16;    break;
2724                   case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2;  break;
2725                   case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4;  break;
2726                   case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8;  break;
2727                   case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16;  break;
2728                   case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2;  break;
2729                   case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4;  break;
2730                   case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8;  break;
2731                   case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16;  break;
2732                   case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2733                   case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2734                   case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2735                   case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2736                   default: vassert(0);
2737                }
2738                /* Establish the shift limits, for sanity check purposes only. */
2739                switch (e->Iex.Binop.op) {
2740                   case Iop_ShrN64x2:       limLo = 1; limHi = 64; break;
2741                   case Iop_ShrN32x4:       limLo = 1; limHi = 32; break;
2742                   case Iop_ShrN16x8:       limLo = 1; limHi = 16; break;
2743                   case Iop_ShrN8x16:       limLo = 1; limHi = 8;  break;
2744                   case Iop_SarN64x2:       limLo = 1; limHi = 64; break;
2745                   case Iop_SarN32x4:       limLo = 1; limHi = 32; break;
2746                   case Iop_SarN16x8:       limLo = 1; limHi = 16; break;
2747                   case Iop_SarN8x16:       limLo = 1; limHi = 8;  break;
2748                   case Iop_ShlN64x2:       limLo = 0; limHi = 63; break;
2749                   case Iop_ShlN32x4:       limLo = 0; limHi = 31; break;
2750                   case Iop_ShlN16x8:       limLo = 0; limHi = 15; break;
2751                   case Iop_ShlN8x16:       limLo = 0; limHi = 7;  break;
2752                   case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2753                   case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2754                   case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2755                   case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7;  break;
2756                   case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2757                   case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2758                   case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
2759                   case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7;  break;
2760                   case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
2761                   case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
2762                   case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
2763                   case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7;  break;
2764                   default: vassert(0);
2765                }
2766                /* For left shifts, the allowable amt values are
2767                   0 .. lane_bits-1.  For right shifts the allowable
2768                   values are 1 .. lane_bits. */
2769                if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
2770                   HReg src = iselV128Expr(env, argL);
2771                   HReg dst = newVRegV(env);
2772                   addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2773                   return dst;
2774                }
2775                /* Special case some no-op shifts that the arm64 front end
2776                   throws at us.  We can't generate any instructions for these,
2777                   but we don't need to either. */
2778                switch (e->Iex.Binop.op) {
2779                   case Iop_ShrN64x2: case Iop_ShrN32x4:
2780                   case Iop_ShrN16x8: case Iop_ShrN8x16:
2781                      if (amt == 0) {
2782                         return iselV128Expr(env, argL);
2783                      }
2784                      break;
2785                   default:
2786                      break;
2787                }
2788                /* otherwise unhandled */
2789             }
2790             /* else fall out; this is unhandled */
2791             break;
2792          }
2793          /* -- Saturating narrowing by an immediate -- */
2794          /* uu */
2795          case Iop_QandQShrNnarrow16Uto8Ux8:
2796          case Iop_QandQShrNnarrow32Uto16Ux4:
2797          case Iop_QandQShrNnarrow64Uto32Ux2:
2798          /* ss */
2799          case Iop_QandQSarNnarrow16Sto8Sx8:
2800          case Iop_QandQSarNnarrow32Sto16Sx4:
2801          case Iop_QandQSarNnarrow64Sto32Sx2:
2802          /* su */
2803          case Iop_QandQSarNnarrow16Sto8Ux8:
2804          case Iop_QandQSarNnarrow32Sto16Ux4:
2805          case Iop_QandQSarNnarrow64Sto32Ux2:
2806          /* ruu */
2807          case Iop_QandQRShrNnarrow16Uto8Ux8:
2808          case Iop_QandQRShrNnarrow32Uto16Ux4:
2809          case Iop_QandQRShrNnarrow64Uto32Ux2:
2810          /* rss */
2811          case Iop_QandQRSarNnarrow16Sto8Sx8:
2812          case Iop_QandQRSarNnarrow32Sto16Sx4:
2813          case Iop_QandQRSarNnarrow64Sto32Sx2:
2814          /* rsu */
2815          case Iop_QandQRSarNnarrow16Sto8Ux8:
2816          case Iop_QandQRSarNnarrow32Sto16Ux4:
2817          case Iop_QandQRSarNnarrow64Sto32Ux2:
2818          {
2819             IRExpr* argL = e->Iex.Binop.arg1;
2820             IRExpr* argR = e->Iex.Binop.arg2;
2821             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2822                UInt amt   = argR->Iex.Const.con->Ico.U8;
2823                UInt limit = 0;
2824                ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2825                switch (e->Iex.Binop.op) {
2826                   /* uu */
2827                   case Iop_QandQShrNnarrow64Uto32Ux2:
2828                      op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
2829                   case Iop_QandQShrNnarrow32Uto16Ux4:
2830                      op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
2831                   case Iop_QandQShrNnarrow16Uto8Ux8:
2832                      op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
2833                   /* ss */
2834                   case Iop_QandQSarNnarrow64Sto32Sx2:
2835                      op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
2836                   case Iop_QandQSarNnarrow32Sto16Sx4:
2837                      op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
2838                   case Iop_QandQSarNnarrow16Sto8Sx8:
2839                      op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
2840                   /* su */
2841                   case Iop_QandQSarNnarrow64Sto32Ux2:
2842                      op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
2843                   case Iop_QandQSarNnarrow32Sto16Ux4:
2844                      op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
2845                   case Iop_QandQSarNnarrow16Sto8Ux8:
2846                      op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
2847                   /* ruu */
2848                   case Iop_QandQRShrNnarrow64Uto32Ux2:
2849                      op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
2850                   case Iop_QandQRShrNnarrow32Uto16Ux4:
2851                      op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
2852                   case Iop_QandQRShrNnarrow16Uto8Ux8:
2853                      op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
2854                   /* rss */
2855                   case Iop_QandQRSarNnarrow64Sto32Sx2:
2856                      op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
2857                   case Iop_QandQRSarNnarrow32Sto16Sx4:
2858                      op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
2859                   case Iop_QandQRSarNnarrow16Sto8Sx8:
2860                      op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
2861                   /* rsu */
2862                   case Iop_QandQRSarNnarrow64Sto32Ux2:
2863                      op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
2864                   case Iop_QandQRSarNnarrow32Sto16Ux4:
2865                      op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
2866                   case Iop_QandQRSarNnarrow16Sto8Ux8:
2867                      op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
2868                   /**/
2869                   default:
2870                      vassert(0);
2871                }
2872                if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
2873                   HReg src  = iselV128Expr(env, argL);
2874                   HReg dst  = newVRegV(env);
2875                   HReg fpsr = newVRegI(env);
2876                   /* Clear FPSR.Q, do the operation, and return both its
2877                      result and the new value of FPSR.Q.  We can simply
2878                      zero out FPSR since all the other bits have no relevance
2879                      in VEX generated code. */
2880                   addInstr(env, ARM64Instr_Imm64(fpsr, 0));
2881                   addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
2882                   addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2883                   addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
2884                   addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
2885                                                              ARM64sh_SHR));
2886                   ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
2887                   vassert(ril_one);
2888                   addInstr(env, ARM64Instr_Logic(fpsr,
2889                                                  fpsr, ril_one, ARM64lo_AND));
2890                   /* Now we have: the main (shift) result in the bottom half
2891                      of |dst|, and the Q bit at the bottom of |fpsr|.
2892                      Combining them with a "InterleaveLO64x2" style operation
2893                      produces a 128 bit value, dst[63:0]:fpsr[63:0],
2894                      which is what we want. */
2895                   HReg scratch = newVRegV(env);
2896                   addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
2897                   addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
2898                                                  dst, dst, scratch));
2899                   return dst;
2900                }
2901             }
2902             /* else fall out; this is unhandled */
2903             break;
2904          }
2905
2906          // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
2907          // as it is in some ways more general and often leads to better
2908          // code overall.
2909          case Iop_ShlV128:
2910          case Iop_ShrV128: {
2911             Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
2912             /* This is tricky.  Generate an EXT instruction with zeroes in
2913                the high operand (shift right) or low operand (shift left).
2914                Note that we can only slice in the EXT instruction at a byte
2915                level of granularity, so the shift amount needs careful
2916                checking. */
2917             IRExpr* argL = e->Iex.Binop.arg1;
2918             IRExpr* argR = e->Iex.Binop.arg2;
2919             if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2920                UInt amt   = argR->Iex.Const.con->Ico.U8;
2921                Bool amtOK = False;
2922                switch (amt) {
2923                   case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
2924                   case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
2925                   case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
2926                      amtOK = True; break;
2927                }
2928                /* We could also deal with amt==0 by copying the source to
2929                   the destination, but there's no need for that so far. */
2930                if (amtOK) {
2931                   HReg src  = iselV128Expr(env, argL);
2932                   HReg srcZ = newVRegV(env);
2933                   addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
2934                   UInt immB = amt / 8;
2935                   vassert(immB >= 1 && immB <= 15);
2936                   HReg dst = newVRegV(env);
2937                   if (isSHR) {
2938                     addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
2939                                                          immB));
2940                   } else {
2941                     addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
2942                                                          16 - immB));
2943                   }
2944                   return dst;
2945                }
2946             }
2947             /* else fall out; this is unhandled */
2948             break;
2949          }
2950
2951          case Iop_PolynomialMull8x8:
2952          case Iop_Mull32Ux2:
2953          case Iop_Mull16Ux4:
2954          case Iop_Mull8Ux8:
2955          case Iop_Mull32Sx2:
2956          case Iop_Mull16Sx4:
2957          case Iop_Mull8Sx8:
2958          case Iop_QDMull32Sx2:
2959          case Iop_QDMull16Sx4:
2960          {
2961             HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2962             HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2963             HReg vSrcL = newVRegV(env);
2964             HReg vSrcR = newVRegV(env);
2965             HReg dst   = newVRegV(env);
2966             ARM64VecBinOp op = ARM64vecb_INVALID;
2967             switch (e->Iex.Binop.op) {
2968                case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8;    break;
2969                case Iop_Mull32Ux2:         op = ARM64vecb_UMULL2DSS;   break;
2970                case Iop_Mull16Ux4:         op = ARM64vecb_UMULL4SHH;   break;
2971                case Iop_Mull8Ux8:          op = ARM64vecb_UMULL8HBB;   break;
2972                case Iop_Mull32Sx2:         op = ARM64vecb_SMULL2DSS;   break;
2973                case Iop_Mull16Sx4:         op = ARM64vecb_SMULL4SHH;   break;
2974                case Iop_Mull8Sx8:          op = ARM64vecb_SMULL8HBB;   break;
2975                case Iop_QDMull32Sx2:       op = ARM64vecb_SQDMULL2DSS; break;
2976                case Iop_QDMull16Sx4:       op = ARM64vecb_SQDMULL4SHH; break;
2977                default: vassert(0);
2978             }
2979             addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
2980             addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
2981             addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
2982             return dst;
2983          }
2984
2985          /* ... */
2986          default:
2987             break;
2988       } /* switch on the binop */
2989    } /* if (e->tag == Iex_Binop) */
2990
2991    if (e->tag == Iex_Triop) {
2992       IRTriop*      triop  = e->Iex.Triop.details;
2993       ARM64VecBinOp vecbop = ARM64vecb_INVALID;
2994       switch (triop->op) {
2995          case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
2996          case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
2997          case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
2998          case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
2999          case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
3000          case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
3001          case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
3002          case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
3003          default: break;
3004       }
3005       if (vecbop != ARM64vecb_INVALID) {
3006          HReg argL = iselV128Expr(env, triop->arg2);
3007          HReg argR = iselV128Expr(env, triop->arg3);
3008          HReg dst  = newVRegV(env);
3009          set_FPCR_rounding_mode(env, triop->arg1);
3010          addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
3011          return dst;
3012       }
3013
3014       if (triop->op == Iop_SliceV128) {
3015          /* Note that, compared to ShlV128/ShrV128 just above, the shift
3016             amount here is in bytes, not bits. */
3017          IRExpr* argHi  = triop->arg1;
3018          IRExpr* argLo  = triop->arg2;
3019          IRExpr* argAmt = triop->arg3;
3020          if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
3021             UInt amt   = argAmt->Iex.Const.con->Ico.U8;
3022             Bool amtOK = amt >= 1 && amt <= 15;
3023             /* We could also deal with amt==0 by copying argLO to
3024                the destination, but there's no need for that so far. */
3025             if (amtOK) {
3026                HReg srcHi = iselV128Expr(env, argHi);
3027                HReg srcLo = iselV128Expr(env, argLo);
3028                HReg dst = newVRegV(env);
3029               addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
3030                return dst;
3031             }
3032          }
3033          /* else fall out; this is unhandled */
3034       }
3035
3036    } /* if (e->tag == Iex_Triop) */
3037
3038    if (e->tag == Iex_ITE) {
3039       // This code sequence is pretty feeble.  We'd do better to generate BSL
3040       // here.
3041       HReg rX = newVRegI(env);
3042
3043       ARM64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3044       addInstr(env, ARM64Instr_Set64(rX, cc));
3045       // cond: rX = 1   !cond: rX = 0
3046
3047       // Mask the Set64 result.  This is paranoia (should be unnecessary).
3048       ARM64RIL* one = mb_mkARM64RIL_I(1);
3049       vassert(one);
3050       addInstr(env, ARM64Instr_Logic(rX, rX, one, ARM64lo_AND));
3051       // cond: rX = 1   !cond: rX = 0
3052
3053       // Propagate to all bits in the 64 bit word by subtracting 1 from it.
3054       // This also inverts the sense of the value.
3055       addInstr(env, ARM64Instr_Arith(rX, rX, ARM64RIA_I12(1,0),
3056                                      /*isAdd=*/False));
3057       // cond: rX = 0-(62)-0   !cond: rX = 1-(62)-1
3058
3059       // Duplicate rX into a vector register
3060       HReg vMask = newVRegV(env);
3061       addInstr(env, ARM64Instr_VQfromXX(vMask, rX, rX));
3062       // cond: vMask = 0-(126)-0   !cond: vMask = 1-(126)-1
3063
3064       HReg vIfTrue = iselV128Expr(env, e->Iex.ITE.iftrue);
3065       HReg vIfFalse = iselV128Expr(env, e->Iex.ITE.iffalse);
3066
3067       // Mask out iffalse value as needed
3068       addInstr(env,
3069                ARM64Instr_VBinV(ARM64vecb_AND, vIfFalse, vIfFalse, vMask));
3070
3071       // Invert the mask so we can use it for the iftrue value
3072       addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, vMask, vMask));
3073       // cond: vMask = 1-(126)-1   !cond: vMask = 0-(126)-0
3074
3075       // Mask out iftrue value as needed
3076       addInstr(env,
3077                ARM64Instr_VBinV(ARM64vecb_AND, vIfTrue, vIfTrue, vMask));
3078
3079       // Merge the masked iftrue and iffalse results.
3080       HReg res = newVRegV(env);
3081       addInstr(env, ARM64Instr_VBinV(ARM64vecb_ORR, res, vIfTrue, vIfFalse));
3082
3083       return res;
3084    }
3085
3086   v128_expr_bad:
3087    ppIRExpr(e);
3088    vpanic("iselV128Expr_wrk");
3089 }
3090
3091
3092 /*---------------------------------------------------------*/
3093 /*--- ISEL: Floating point expressions (64 bit)         ---*/
3094 /*---------------------------------------------------------*/
3095
3096 /* Compute a 64-bit floating point value into a register, the identity
3097    of which is returned.  As with iselIntExpr_R, the reg may be either
3098    real or virtual; in any case it must not be changed by subsequent
3099    code emitted by the caller.  */
3100
3101 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
3102 {
3103    HReg r = iselDblExpr_wrk( env, e );
3104 #  if 0
3105    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3106 #  endif
3107    vassert(hregClass(r) == HRcFlt64);
3108    vassert(hregIsVirtual(r));
3109    return r;
3110 }
3111
3112 /* DO NOT CALL THIS DIRECTLY */
3113 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3114 {
3115    IRType ty = typeOfIRExpr(env->type_env,e);
3116    vassert(e);
3117    vassert(ty == Ity_F64);
3118
3119    if (e->tag == Iex_RdTmp) {
3120       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3121    }
3122
3123    if (e->tag == Iex_Const) {
3124       IRConst* con = e->Iex.Const.con;
3125       if (con->tag == Ico_F64i) {
3126          HReg src = newVRegI(env);
3127          HReg dst = newVRegD(env);
3128          addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3129          addInstr(env, ARM64Instr_VDfromX(dst, src));
3130          return dst;
3131       }
3132       if (con->tag == Ico_F64) {
3133          HReg src = newVRegI(env);
3134          HReg dst = newVRegD(env);
3135          union { Double d64; ULong u64; } u;
3136          vassert(sizeof(u) == 8);
3137          u.d64 = con->Ico.F64;
3138          addInstr(env, ARM64Instr_Imm64(src, u.u64));
3139          addInstr(env, ARM64Instr_VDfromX(dst, src));
3140          return dst;
3141       }
3142    }
3143
3144    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3145       vassert(e->Iex.Load.ty == Ity_F64);
3146       HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3147       HReg res  = newVRegD(env);
3148       addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3149       return res;
3150    }
3151
3152    if (e->tag == Iex_Get) {
3153       Int offs = e->Iex.Get.offset;
3154       if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3155          HReg rD = newVRegD(env);
3156          HReg rN = get_baseblock_register();
3157          addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3158          return rD;
3159       }
3160    }
3161
3162    if (e->tag == Iex_Unop) {
3163       switch (e->Iex.Unop.op) {
3164          case Iop_NegF64: {
3165             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3166             HReg dst = newVRegD(env);
3167             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3168             return dst;
3169          }
3170          case Iop_AbsF64: {
3171             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3172             HReg dst = newVRegD(env);
3173             addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3174             return dst;
3175          }
3176          case Iop_F32toF64: {
3177             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3178             HReg dst = newVRegD(env);
3179             addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3180             return dst;
3181          }
3182          case Iop_F16toF64: {
3183             HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3184             HReg dst = newVRegD(env);
3185             addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3186             return dst;
3187          }
3188          case Iop_I32UtoF64:
3189          case Iop_I32StoF64: {
3190             /* Rounding mode is not involved here, since the
3191                conversion can always be done without loss of
3192                precision. */
3193             HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
3194             HReg dst   = newVRegD(env);
3195             Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3196             ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3197             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3198             return dst;
3199          }
3200          default:
3201             break;
3202       }
3203    }
3204
3205    if (e->tag == Iex_Binop) {
3206       switch (e->Iex.Binop.op) {
3207          case Iop_RoundF64toInt:
3208          case Iop_SqrtF64:
3209          case Iop_RecpExpF64: {
3210             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3211             HReg dst = newVRegD(env);
3212             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3213             ARM64FpUnaryOp op = ARM64fpu_INVALID;
3214             switch (e->Iex.Binop.op) {
3215                case Iop_RoundF64toInt: op = ARM64fpu_RINT;  break;
3216                case Iop_SqrtF64:       op = ARM64fpu_SQRT;  break;
3217                case Iop_RecpExpF64:    op = ARM64fpu_RECPX; break;
3218                default: vassert(0);
3219             }
3220             addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3221             return dst;
3222          }
3223          case Iop_I64StoF64:
3224          case Iop_I64UtoF64: {
3225             ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3226                                    ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3227             HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3228             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3229             HReg dstS = newVRegD(env);
3230             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3231             return dstS;
3232          }
3233          default:
3234             break;
3235       }
3236    }
3237
3238    if (e->tag == Iex_Triop) {
3239       IRTriop*     triop = e->Iex.Triop.details;
3240       ARM64FpBinOp dblop = ARM64fpb_INVALID;
3241       switch (triop->op) {
3242          case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3243          case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3244          case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3245          case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3246          default: break;
3247       }
3248       if (dblop != ARM64fpb_INVALID) {
3249          HReg argL = iselDblExpr(env, triop->arg2);
3250          HReg argR = iselDblExpr(env, triop->arg3);
3251          HReg dst  = newVRegD(env);
3252          set_FPCR_rounding_mode(env, triop->arg1);
3253          addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3254          return dst;
3255       }
3256    }
3257
3258    if (e->tag == Iex_ITE) {
3259       /* ITE(ccexpr, iftrue, iffalse) */
3260       ARM64CondCode cc;
3261       HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
3262       HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
3263       HReg dst = newVRegD(env);
3264       cc = iselCondCode(env, e->Iex.ITE.cond);
3265       addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3266       return dst;
3267    }
3268
3269    ppIRExpr(e);
3270    vpanic("iselDblExpr_wrk");
3271 }
3272
3273
3274 /*---------------------------------------------------------*/
3275 /*--- ISEL: Floating point expressions (32 bit)         ---*/
3276 /*---------------------------------------------------------*/
3277
3278 /* Compute a 32-bit floating point value into a register, the identity
3279    of which is returned.  As with iselIntExpr_R, the reg may be either
3280    real or virtual; in any case it must not be changed by subsequent
3281    code emitted by the caller.  Values are generated into HRcFlt64
3282    registers despite the values themselves being Ity_F32s. */
3283
3284 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3285 {
3286    HReg r = iselFltExpr_wrk( env, e );
3287 #  if 0
3288    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3289 #  endif
3290    vassert(hregClass(r) == HRcFlt64);
3291    vassert(hregIsVirtual(r));
3292    return r;
3293 }
3294
3295 /* DO NOT CALL THIS DIRECTLY */
3296 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3297 {
3298    IRType ty = typeOfIRExpr(env->type_env,e);
3299    vassert(e);
3300    vassert(ty == Ity_F32);
3301
3302    if (e->tag == Iex_RdTmp) {
3303       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3304    }
3305
3306    if (e->tag == Iex_Const) {
3307       /* This is something of a kludge.  Since a 32 bit floating point
3308          zero is just .. all zeroes, just create a 64 bit zero word
3309          and transfer it.  This avoids having to create a SfromW
3310          instruction for this specific case. */
3311       IRConst* con = e->Iex.Const.con;
3312       if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3313          HReg src = newVRegI(env);
3314          HReg dst = newVRegD(env);
3315          addInstr(env, ARM64Instr_Imm64(src, 0));
3316          addInstr(env, ARM64Instr_VDfromX(dst, src));
3317          return dst;
3318       }
3319       if (con->tag == Ico_F32) {
3320          HReg src = newVRegI(env);
3321          HReg dst = newVRegD(env);
3322          union { Float f32; UInt u32; } u;
3323          vassert(sizeof(u) == 4);
3324          u.f32 = con->Ico.F32;
3325          addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3326          addInstr(env, ARM64Instr_VDfromX(dst, src));
3327          return dst;
3328       }
3329    }
3330
3331    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3332       vassert(e->Iex.Load.ty == Ity_F32);
3333       HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3334       HReg res  = newVRegD(env);
3335       addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3336       return res;
3337    }
3338
3339    if (e->tag == Iex_Get) {
3340       Int offs = e->Iex.Get.offset;
3341       if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3342          HReg rD = newVRegD(env);
3343          HReg rN = get_baseblock_register();
3344          addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3345          return rD;
3346       }
3347    }
3348
3349    if (e->tag == Iex_Unop) {
3350       switch (e->Iex.Unop.op) {
3351          case Iop_NegF32: {
3352             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3353             HReg dst = newVRegD(env);
3354             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3355             return dst;
3356          }
3357          case Iop_AbsF32: {
3358             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3359             HReg dst = newVRegD(env);
3360             addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3361             return dst;
3362          }
3363          case Iop_F16toF32: {
3364             HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3365             HReg dst = newVRegD(env);
3366             addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3367             return dst;
3368          }
3369          default:
3370             break;
3371       }
3372    }
3373
3374    if (e->tag == Iex_Binop) {
3375       switch (e->Iex.Binop.op) {
3376          case Iop_RoundF32toInt:
3377          case Iop_SqrtF32:
3378          case Iop_RecpExpF32: {
3379             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3380             HReg dst = newVRegD(env);
3381             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3382             ARM64FpUnaryOp op = ARM64fpu_INVALID;
3383             switch (e->Iex.Binop.op) {
3384                case Iop_RoundF32toInt: op = ARM64fpu_RINT;  break;
3385                case Iop_SqrtF32:       op = ARM64fpu_SQRT;  break;
3386                case Iop_RecpExpF32:    op = ARM64fpu_RECPX; break;
3387                default: vassert(0);
3388             }
3389             addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3390             return dst;
3391          }
3392          case Iop_F64toF32: {
3393             HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3394             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3395             HReg dstS = newVRegD(env);
3396             addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3397             return dstS;
3398          }
3399          case Iop_I32UtoF32:
3400          case Iop_I32StoF32:
3401          case Iop_I64UtoF32:
3402          case Iop_I64StoF32: {
3403             ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3404             switch (e->Iex.Binop.op) {
3405                case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3406                case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3407                case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3408                case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3409                default: vassert(0);
3410             }
3411             HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3412             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3413             HReg dstS = newVRegD(env);
3414             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3415             return dstS;
3416          }
3417          default:
3418             break;
3419       }
3420    }
3421
3422    if (e->tag == Iex_Triop) {
3423       IRTriop*     triop = e->Iex.Triop.details;
3424       ARM64FpBinOp sglop = ARM64fpb_INVALID;
3425       switch (triop->op) {
3426          case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3427          case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3428          case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3429          case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3430          default: break;
3431       }
3432       if (sglop != ARM64fpb_INVALID) {
3433          HReg argL = iselFltExpr(env, triop->arg2);
3434          HReg argR = iselFltExpr(env, triop->arg3);
3435          HReg dst  = newVRegD(env);
3436          set_FPCR_rounding_mode(env, triop->arg1);
3437          addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3438          return dst;
3439       }
3440    }
3441
3442    if (e->tag == Iex_ITE) {
3443       /* ITE(ccexpr, iftrue, iffalse) */
3444       ARM64CondCode cc;
3445       HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
3446       HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
3447       HReg dst = newVRegD(env);
3448       cc = iselCondCode(env, e->Iex.ITE.cond);
3449       addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3450       return dst;
3451    }
3452
3453    ppIRExpr(e);
3454    vpanic("iselFltExpr_wrk");
3455 }
3456
3457
3458 /*---------------------------------------------------------*/
3459 /*--- ISEL: Floating point expressions (16 bit)         ---*/
3460 /*---------------------------------------------------------*/
3461
3462 /* Compute a 16-bit floating point value into a register, the identity
3463    of which is returned.  As with iselIntExpr_R, the reg may be either
3464    real or virtual; in any case it must not be changed by subsequent
3465    code emitted by the caller.  Values are generated into HRcFlt64
3466    registers despite the values themselves being Ity_F16s. */
3467
3468 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3469 {
3470    HReg r = iselF16Expr_wrk( env, e );
3471 #  if 0
3472    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3473 #  endif
3474    vassert(hregClass(r) == HRcFlt64);
3475    vassert(hregIsVirtual(r));
3476    return r;
3477 }
3478
3479 /* DO NOT CALL THIS DIRECTLY */
3480 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3481 {
3482    IRType ty = typeOfIRExpr(env->type_env,e);
3483    vassert(e);
3484    vassert(ty == Ity_F16);
3485
3486    if (e->tag == Iex_RdTmp) {
3487       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3488    }
3489
3490    if (e->tag == Iex_Get) {
3491       Int offs = e->Iex.Get.offset;
3492       if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3493          HReg rD = newVRegD(env);
3494          HReg rN = get_baseblock_register();
3495          addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3496          return rD;
3497       }
3498    }
3499
3500    if (e->tag == Iex_Binop) {
3501       switch (e->Iex.Binop.op) {
3502          case Iop_F32toF16: {
3503             HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3504             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3505             HReg dstH = newVRegD(env);
3506             addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3507             return dstH;
3508          }
3509          case Iop_F64toF16: {
3510             HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3511             set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3512             HReg dstH = newVRegD(env);
3513             addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3514             return dstH;
3515          }
3516          default:
3517             break;
3518       }
3519    }
3520
3521    ppIRExpr(e);
3522    vpanic("iselF16Expr_wrk");
3523 }
3524
3525
3526 /*---------------------------------------------------------*/
3527 /*--- ISEL: Vector expressions (256 bit)                ---*/
3528 /*---------------------------------------------------------*/
3529
3530 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3531                            ISelEnv* env, IRExpr* e )
3532 {
3533    iselV256Expr_wrk( rHi, rLo, env, e );
3534    vassert(hregClass(*rHi) == HRcVec128);
3535    vassert(hregClass(*rLo) == HRcVec128);
3536    vassert(hregIsVirtual(*rHi));
3537    vassert(hregIsVirtual(*rLo));
3538 }
3539
3540 /* DO NOT CALL THIS DIRECTLY */
3541 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3542                                ISelEnv* env, IRExpr* e )
3543 {
3544    vassert(e);
3545    IRType ty = typeOfIRExpr(env->type_env,e);
3546    vassert(ty == Ity_V256);
3547
3548    /* read 256-bit IRTemp */
3549    if (e->tag == Iex_RdTmp) {
3550       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3551       return;
3552    }
3553
3554    if (e->tag == Iex_Binop) {
3555       switch (e->Iex.Binop.op) {
3556          case Iop_V128HLtoV256: {
3557             *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3558             *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3559             return;
3560          }
3561          case Iop_QandSQsh64x2:
3562          case Iop_QandSQsh32x4:
3563          case Iop_QandSQsh16x8:
3564          case Iop_QandSQsh8x16:
3565          case Iop_QandUQsh64x2:
3566          case Iop_QandUQsh32x4:
3567          case Iop_QandUQsh16x8:
3568          case Iop_QandUQsh8x16:
3569          case Iop_QandSQRsh64x2:
3570          case Iop_QandSQRsh32x4:
3571          case Iop_QandSQRsh16x8:
3572          case Iop_QandSQRsh8x16:
3573          case Iop_QandUQRsh64x2:
3574          case Iop_QandUQRsh32x4:
3575          case Iop_QandUQRsh16x8:
3576          case Iop_QandUQRsh8x16:
3577          {
3578             HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
3579             HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
3580             HReg fpsr  = newVRegI(env);
3581             HReg resHi = newVRegV(env);
3582             HReg resLo = newVRegV(env);
3583             ARM64VecBinOp op = ARM64vecb_INVALID;
3584             switch (e->Iex.Binop.op) {
3585                case Iop_QandSQsh64x2:  op = ARM64vecb_SQSHL64x2;  break;
3586                case Iop_QandSQsh32x4:  op = ARM64vecb_SQSHL32x4;  break;
3587                case Iop_QandSQsh16x8:  op = ARM64vecb_SQSHL16x8;  break;
3588                case Iop_QandSQsh8x16:  op = ARM64vecb_SQSHL8x16;  break;
3589                case Iop_QandUQsh64x2:  op = ARM64vecb_UQSHL64x2;  break;
3590                case Iop_QandUQsh32x4:  op = ARM64vecb_UQSHL32x4;  break;
3591                case Iop_QandUQsh16x8:  op = ARM64vecb_UQSHL16x8;  break;
3592                case Iop_QandUQsh8x16:  op = ARM64vecb_UQSHL8x16;  break;
3593                case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3594                case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3595                case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3596                case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3597                case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3598                case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3599                case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3600                case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3601                default: vassert(0);
3602             }
3603             /* Clear FPSR.Q, do the operation, and return both its result
3604                and the new value of FPSR.Q.  We can simply zero out FPSR
3605                since all the other bits have no relevance in VEX generated
3606                code. */
3607             addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3608             addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3609             addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3610             addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3611             addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3612                                                        ARM64sh_SHR));
3613             ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3614             vassert(ril_one);
3615             addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3616             /* Now we have: the main (shift) result in |resLo|, and the
3617                Q bit at the bottom of |fpsr|. */
3618             addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3619             *rHi = resHi;
3620             *rLo = resLo;
3621             return;
3622          }
3623
3624          /* ... */
3625          default:
3626             break;
3627       } /* switch on the binop */
3628    } /* if (e->tag == Iex_Binop) */
3629
3630    ppIRExpr(e);
3631    vpanic("iselV256Expr_wrk");
3632 }
3633
3634
3635 /*---------------------------------------------------------*/
3636 /*--- ISEL: Statements                                  ---*/
3637 /*---------------------------------------------------------*/
3638
3639 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3640 {
3641    if (vex_traceflags & VEX_TRACE_VCODE) {
3642       vex_printf("\n-- ");
3643       ppIRStmt(stmt);
3644       vex_printf("\n");
3645    }
3646    switch (stmt->tag) {
3647
3648    /* --------- STORE --------- */
3649    /* little-endian write to memory */
3650    case Ist_Store: {
3651       IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3652       IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3653       IREndness end  = stmt->Ist.Store.end;
3654
3655       if (tya != Ity_I64 || end != Iend_LE)
3656          goto stmt_fail;
3657
3658       if (tyd == Ity_I64) {
3659          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3660          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3661          addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3662          return;
3663       }
3664       if (tyd == Ity_I32) {
3665          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3666          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3667          addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3668          return;
3669       }
3670       if (tyd == Ity_I16) {
3671          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3672          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3673          addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3674          return;
3675       }
3676       if (tyd == Ity_I8) {
3677          HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3678          ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3679          addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3680          return;
3681       }
3682       if (tyd == Ity_V128) {
3683          HReg qD   = iselV128Expr(env, stmt->Ist.Store.data);
3684          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3685          addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3686          return;
3687       }
3688       if (tyd == Ity_F64) {
3689          HReg dD   = iselDblExpr(env, stmt->Ist.Store.data);
3690          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3691          addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
3692          return;
3693       }
3694       if (tyd == Ity_F32) {
3695          HReg sD   = iselFltExpr(env, stmt->Ist.Store.data);
3696          HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3697          addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
3698          return;
3699       }
3700       break;
3701    }
3702
3703    /* --------- PUT --------- */
3704    /* write guest state, fixed offset */
3705    case Ist_Put: {
3706       IRType tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3707       UInt   offs = (UInt)stmt->Ist.Put.offset;
3708       if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
3709          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3710          ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
3711          addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3712          return;
3713       }
3714       if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
3715          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3716          ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
3717          addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3718          return;
3719       }
3720       if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
3721          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3722          ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
3723          addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3724          return;
3725       }
3726       if (tyd == Ity_I8 && offs < (1<<12)) {
3727          HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3728          ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
3729          addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3730          return;
3731       }
3732       if (tyd == Ity_V128 && offs < (1<<12)) {
3733          HReg qD   = iselV128Expr(env, stmt->Ist.Put.data);
3734          HReg addr = mk_baseblock_128bit_access_addr(env, offs);
3735          addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3736          return;
3737       }
3738       if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
3739          HReg dD   = iselDblExpr(env, stmt->Ist.Put.data);
3740          HReg bbp  = get_baseblock_register();
3741          addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
3742          return;
3743       }
3744       if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
3745          HReg sD   = iselFltExpr(env, stmt->Ist.Put.data);
3746          HReg bbp  = get_baseblock_register();
3747          addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
3748          return;
3749       }
3750       if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
3751          HReg hD   = iselF16Expr(env, stmt->Ist.Put.data);
3752          HReg bbp  = get_baseblock_register();
3753          addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
3754          return;
3755       }
3756
3757       break;
3758    }
3759
3760    /* --------- TMP --------- */
3761    /* assign value to temporary */
3762    case Ist_WrTmp: {
3763       IRTemp tmp = stmt->Ist.WrTmp.tmp;
3764       IRType ty  = typeOfIRTemp(env->type_env, tmp);
3765
3766       if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3767          /* We could do a lot better here.  But for the time being: */
3768          HReg dst = lookupIRTemp(env, tmp);
3769          HReg rD  = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
3770          addInstr(env, ARM64Instr_MovI(dst, rD));
3771          return;
3772       }
3773       if (ty == Ity_I1) {
3774          /* Here, we are generating a I1 value into a 64 bit register.
3775             Make sure the value in the register is only zero or one,
3776             but no other.  This allows optimisation of the
3777             1Uto64(tmp:I1) case, by making it simply a copy of the
3778             register holding 'tmp'.  The point being that the value in
3779             the register holding 'tmp' can only have been created
3780             here.  LATER: that seems dangerous; safer to do 'tmp & 1'
3781             in that case.  Also, could do this just with a single CINC
3782             insn. */
3783          /* CLONE-01 */
3784          HReg zero = newVRegI(env);
3785          HReg one  = newVRegI(env);
3786          HReg dst  = lookupIRTemp(env, tmp);
3787          addInstr(env, ARM64Instr_Imm64(zero, 0));
3788          addInstr(env, ARM64Instr_Imm64(one,  1));
3789          ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
3790          addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
3791          return;
3792       }
3793       if (ty == Ity_F64) {
3794          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3795          HReg dst = lookupIRTemp(env, tmp);
3796          addInstr(env, ARM64Instr_VMov(8, dst, src));
3797          return;
3798       }
3799       if (ty == Ity_F32) {
3800          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3801          HReg dst = lookupIRTemp(env, tmp);
3802          addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
3803          return;
3804       }
3805       if (ty == Ity_F16) {
3806          HReg src = iselF16Expr(env, stmt->Ist.WrTmp.data);
3807          HReg dst = lookupIRTemp(env, tmp);
3808          addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
3809          return;
3810       }
3811       if (ty == Ity_V128) {
3812          HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
3813          HReg dst = lookupIRTemp(env, tmp);
3814          addInstr(env, ARM64Instr_VMov(16, dst, src));
3815          return;
3816       }
3817       if (ty == Ity_V256) {
3818          HReg srcHi, srcLo, dstHi, dstLo;
3819          iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
3820          lookupIRTempPair( &dstHi, &dstLo, env, tmp);
3821          addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
3822          addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
3823          return;
3824       }
3825       break;
3826    }
3827
3828    /* --------- Call to DIRTY helper --------- */
3829    /* call complex ("dirty") helper function */
3830    case Ist_Dirty: {
3831       IRDirty* d = stmt->Ist.Dirty.details;
3832
3833       /* Figure out the return type, if any. */
3834       IRType retty = Ity_INVALID;
3835       if (d->tmp != IRTemp_INVALID)
3836          retty = typeOfIRTemp(env->type_env, d->tmp);
3837
3838       Bool retty_ok = False;
3839       switch (retty) {
3840          case Ity_INVALID: /* function doesn't return anything */
3841          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
3842          case Ity_V128:
3843             retty_ok = True; break;
3844          default:
3845             break;
3846       }
3847       if (!retty_ok)
3848          break; /* will go to stmt_fail: */
3849
3850       /* Marshal args, do the call, and set the return value to 0x555..555
3851          if this is a conditional call that returns a value and the
3852          call is skipped. */
3853       UInt   addToSp = 0;
3854       RetLoc rloc    = mk_RetLoc_INVALID();
3855       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
3856       vassert(is_sane_RetLoc(rloc));
3857
3858       /* Now figure out what to do with the returned value, if any. */
3859       switch (retty) {
3860          case Ity_INVALID: {
3861             /* No return value.  Nothing to do. */
3862             vassert(d->tmp == IRTemp_INVALID);
3863             vassert(rloc.pri == RLPri_None);
3864             vassert(addToSp == 0);
3865             return;
3866          }
3867          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
3868             vassert(rloc.pri == RLPri_Int);
3869             vassert(addToSp == 0);
3870             /* The returned value is in x0.  Park it in the register
3871                associated with tmp. */
3872             HReg dst = lookupIRTemp(env, d->tmp);
3873             addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
3874             return;
3875          }
3876          case Ity_V128: {
3877             /* The returned value is on the stack, and *retloc tells
3878                us where.  Fish it off the stack and then move the
3879                stack pointer upwards to clear it, as directed by
3880                doHelperCall. */
3881             vassert(rloc.pri == RLPri_V128SpRel);
3882             vassert(rloc.spOff < 256); // stay sane
3883             vassert(addToSp >= 16); // ditto
3884             vassert(addToSp < 256); // ditto
3885             HReg dst = lookupIRTemp(env, d->tmp);
3886             HReg tmp = newVRegI(env); // the address of the returned value
3887             addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
3888             addInstr(env, ARM64Instr_Arith(tmp, tmp,
3889                                            ARM64RIA_I12((UShort)rloc.spOff, 0),
3890                                            True/*isAdd*/ ));
3891             addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
3892             addInstr(env, ARM64Instr_AddToSP(addToSp));
3893             return;
3894          }
3895          default:
3896             /*NOTREACHED*/
3897             vassert(0);
3898       }
3899       break;
3900    }
3901
3902    /* --------- Load Linked and Store Conditional --------- */
3903    case Ist_LLSC: {
3904       if (stmt->Ist.LLSC.storedata == NULL) {
3905          /* LL */
3906          IRTemp res = stmt->Ist.LLSC.result;
3907          IRType ty  = typeOfIRTemp(env->type_env, res);
3908          if (ty == Ity_I64 || ty == Ity_I32
3909              || ty == Ity_I16 || ty == Ity_I8) {
3910             Int  szB   = 0;
3911             HReg r_dst = lookupIRTemp(env, res);
3912             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3913             switch (ty) {
3914                case Ity_I8:  szB = 1; break;
3915                case Ity_I16: szB = 2; break;
3916                case Ity_I32: szB = 4; break;
3917                case Ity_I64: szB = 8; break;
3918                default:      vassert(0);
3919             }
3920             addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
3921             addInstr(env, ARM64Instr_LdrEX(szB));
3922             addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
3923             return;
3924          }
3925          goto stmt_fail;
3926       } else {
3927          /* SC */
3928          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
3929          if (tyd == Ity_I64 || tyd == Ity_I32
3930              || tyd == Ity_I16 || tyd == Ity_I8) {
3931             Int  szB = 0;
3932             HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
3933             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3934             switch (tyd) {
3935                case Ity_I8:  szB = 1; break;
3936                case Ity_I16: szB = 2; break;
3937                case Ity_I32: szB = 4; break;
3938                case Ity_I64: szB = 8; break;
3939                default:      vassert(0);
3940             }
3941             addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
3942             addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
3943             addInstr(env, ARM64Instr_StrEX(szB));
3944          } else {
3945             goto stmt_fail;
3946          }
3947          /* now r0 is 1 if failed, 0 if success.  Change to IR
3948             conventions (0 is fail, 1 is success).  Also transfer
3949             result to r_res. */
3950          IRTemp    res   = stmt->Ist.LLSC.result;
3951          IRType    ty    = typeOfIRTemp(env->type_env, res);
3952          HReg      r_res = lookupIRTemp(env, res);
3953          ARM64RIL* one   = mb_mkARM64RIL_I(1);
3954          vassert(ty == Ity_I1);
3955          vassert(one);
3956          addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
3957                                         ARM64lo_XOR));
3958          /* And be conservative -- mask off all but the lowest bit. */
3959          addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
3960                                         ARM64lo_AND));
3961          return;
3962       }
3963       break;
3964    }
3965
3966    /* --------- ACAS --------- */
3967    case Ist_CAS: {
3968       if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3969          /* "normal" singleton CAS */
3970          UChar  sz;
3971          IRCAS* cas = stmt->Ist.CAS.details;
3972          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
3973          switch (ty) {
3974             case Ity_I64: sz = 8; break;
3975             case Ity_I32: sz = 4; break;
3976             case Ity_I16: sz = 2; break;
3977             case Ity_I8:  sz = 1; break;
3978             default: goto unhandled_cas;
3979          }
3980          HReg rAddr = iselIntExpr_R(env, cas->addr);
3981          HReg rExpd = iselIntExpr_R(env, cas->expdLo);
3982          HReg rData = iselIntExpr_R(env, cas->dataLo);
3983          vassert(cas->expdHi == NULL);
3984          vassert(cas->dataHi == NULL);
3985          addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rAddr));
3986          addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd));
3987          addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData));
3988          addInstr(env, ARM64Instr_CAS(sz));
3989          /* Now we have the lowest szB bytes of x1 are either equal to
3990             the lowest szB bytes of x5, indicating success, or they
3991             aren't, indicating failure. */
3992          HReg rResult = hregARM64_X1();
3993          switch (sz) {
3994             case 8:  break;
3995             case 4:  rResult = widen_z_32_to_64(env, rResult); break;
3996             case 2:  rResult = widen_z_16_to_64(env, rResult); break;
3997             case 1:  rResult = widen_z_8_to_64(env, rResult); break;
3998             default: vassert(0);
3999          }
4000          // "old" in this case is interpreted somewhat liberally, per
4001          // the previous comment.
4002          HReg rOld = lookupIRTemp(env, cas->oldLo);
4003          addInstr(env, ARM64Instr_MovI(rOld, rResult));
4004          return;
4005       }
4006       else {
4007          /* Paired register CAS, i.e. CASP */
4008          UChar  sz;
4009          IRCAS* cas = stmt->Ist.CAS.details;
4010          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4011          switch (ty) {
4012             case Ity_I64: sz = 8; break;
4013             case Ity_I32: sz = 4; break;
4014             default: goto unhandled_cas;
4015          }
4016          HReg rAddr = iselIntExpr_R(env, cas->addr);
4017
4018          HReg rExpd0 = iselIntExpr_R(env, cas->expdLo);
4019          vassert(cas->expdHi != NULL);
4020          HReg rExpd1 = iselIntExpr_R(env, cas->expdHi);
4021
4022          HReg rData0 = iselIntExpr_R(env, cas->dataLo);
4023          vassert(cas->dataHi != NULL);
4024          HReg rData1 = iselIntExpr_R(env, cas->dataHi);
4025
4026          addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rAddr));
4027
4028          addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rExpd0));
4029          addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd1));
4030
4031          addInstr(env, ARM64Instr_MovI(hregARM64_X6(), rData0));
4032          addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData1));
4033
4034          addInstr(env, ARM64Instr_CASP(sz));
4035
4036          HReg rResult0 = hregARM64_X0();
4037          HReg rResult1 = hregARM64_X1();
4038          switch (sz) {
4039             case 8:  break;
4040             case 4:  rResult0 = widen_z_32_to_64(env, rResult0);
4041                      rResult1 = widen_z_32_to_64(env, rResult1);
4042                      break;
4043             default: vassert(0);
4044          }
4045          HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4046          HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4047          addInstr(env, ARM64Instr_MovI(rOldLo, rResult0));
4048          addInstr(env, ARM64Instr_MovI(rOldHi, rResult1));
4049          return;
4050       }
4051       unhandled_cas:
4052       break;
4053    }
4054
4055    /* --------- MEM FENCE --------- */
4056    case Ist_MBE:
4057       switch (stmt->Ist.MBE.event) {
4058          case Imbe_Fence:
4059             addInstr(env, ARM64Instr_MFence());
4060             return;
4061          case Imbe_CancelReservation:
4062             addInstr(env, ARM64Instr_ClrEX());
4063             return;
4064          default:
4065             break;
4066       }
4067       break;
4068
4069    /* --------- INSTR MARK --------- */
4070    /* Doesn't generate any executable code ... */
4071    case Ist_IMark:
4072        return;
4073
4074    /* --------- ABI HINT --------- */
4075    /* These have no meaning (denotation in the IR) and so we ignore
4076       them ... if any actually made it this far. */
4077    case Ist_AbiHint:
4078        return;
4079
4080    /* --------- NO-OP --------- */
4081    case Ist_NoOp:
4082        return;
4083
4084    /* --------- EXIT --------- */
4085    case Ist_Exit: {
4086       if (stmt->Ist.Exit.dst->tag != Ico_U64)
4087          vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
4088
4089       ARM64CondCode cc
4090          = iselCondCode(env, stmt->Ist.Exit.guard);
4091       ARM64AMode* amPC
4092          = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
4093
4094       /* Case: boring transfer to known address */
4095       if (stmt->Ist.Exit.jk == Ijk_Boring) {
4096          if (env->chainingAllowed) {
4097             /* .. almost always true .. */
4098             /* Skip the event check at the dst if this is a forwards
4099                edge. */
4100             Bool toFastEP
4101                = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4102             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4103             addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4104                                              amPC, cc, toFastEP));
4105          } else {
4106             /* .. very occasionally .. */
4107             /* We can't use chaining, so ask for an assisted transfer,
4108                as that's the only alternative that is allowable. */
4109             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4110             addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
4111          }
4112          return;
4113       }
4114
4115       /* Case: assisted transfer to arbitrary address */
4116       switch (stmt->Ist.Exit.jk) {
4117          /* Keep this list in sync with that for iselNext below */
4118          case Ijk_ClientReq:
4119          case Ijk_NoDecode:
4120          case Ijk_NoRedir:
4121          case Ijk_Sys_syscall:
4122          case Ijk_InvalICache:
4123          case Ijk_FlushDCache:
4124          case Ijk_SigTRAP:
4125          case Ijk_Yield: {
4126             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4127             addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
4128                                                stmt->Ist.Exit.jk));
4129             return;
4130          }
4131          default:
4132             break;
4133       }
4134
4135       /* Do we ever expect to see any other kind? */
4136       goto stmt_fail;
4137    }
4138
4139    default: break;
4140    }
4141   stmt_fail:
4142    ppIRStmt(stmt);
4143    vpanic("iselStmt");
4144 }
4145
4146
4147 /*---------------------------------------------------------*/
4148 /*--- ISEL: Basic block terminators (Nexts)             ---*/
4149 /*---------------------------------------------------------*/
4150
4151 static void iselNext ( ISelEnv* env,
4152                        IRExpr* next, IRJumpKind jk, Int offsIP )
4153 {
4154    if (vex_traceflags & VEX_TRACE_VCODE) {
4155       vex_printf( "\n-- PUT(%d) = ", offsIP);
4156       ppIRExpr( next );
4157       vex_printf( "; exit-");
4158       ppIRJumpKind(jk);
4159       vex_printf( "\n");
4160    }
4161
4162    /* Case: boring transfer to known address */
4163    if (next->tag == Iex_Const) {
4164       IRConst* cdst = next->Iex.Const.con;
4165       vassert(cdst->tag == Ico_U64);
4166       if (jk == Ijk_Boring || jk == Ijk_Call) {
4167          /* Boring transfer to known address */
4168          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4169          if (env->chainingAllowed) {
4170             /* .. almost always true .. */
4171             /* Skip the event check at the dst if this is a forwards
4172                edge. */
4173             Bool toFastEP
4174                = ((Addr64)cdst->Ico.U64) > env->max_ga;
4175             if (0) vex_printf("%s", toFastEP ? "X" : ".");
4176             addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
4177                                              amPC, ARM64cc_AL,
4178                                              toFastEP));
4179          } else {
4180             /* .. very occasionally .. */
4181             /* We can't use chaining, so ask for an assisted transfer,
4182                as that's the only alternative that is allowable. */
4183             HReg r = iselIntExpr_R(env, next);
4184             addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4185                                                Ijk_Boring));
4186          }
4187          return;
4188       }
4189    }
4190
4191    /* Case: call/return (==boring) transfer to any address */
4192    switch (jk) {
4193       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4194          HReg        r    = iselIntExpr_R(env, next);
4195          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4196          if (env->chainingAllowed) {
4197             addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
4198          } else {
4199             addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4200                                                Ijk_Boring));
4201          }
4202          return;
4203       }
4204       default:
4205          break;
4206    }
4207
4208    /* Case: assisted transfer to arbitrary address */
4209    switch (jk) {
4210       /* Keep this list in sync with that for Ist_Exit above */
4211       case Ijk_ClientReq:
4212       case Ijk_NoDecode:
4213       case Ijk_NoRedir:
4214       case Ijk_Sys_syscall:
4215       case Ijk_InvalICache:
4216       case Ijk_FlushDCache:
4217       case Ijk_SigTRAP:
4218       case Ijk_Yield:
4219       {
4220          HReg        r    = iselIntExpr_R(env, next);
4221          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4222          addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
4223          return;
4224       }
4225       default:
4226          break;
4227    }
4228
4229    vex_printf( "\n-- PUT(%d) = ", offsIP);
4230    ppIRExpr( next );
4231    vex_printf( "; exit-");
4232    ppIRJumpKind(jk);
4233    vex_printf( "\n");
4234    vassert(0); // are we expecting any other kind?
4235 }
4236
4237
4238 /*---------------------------------------------------------*/
4239 /*--- Insn selector top-level                           ---*/
4240 /*---------------------------------------------------------*/
4241
4242 /* Translate an entire SB to arm64 code. */
4243
4244 HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4245                             VexArch      arch_host,
4246                             const VexArchInfo* archinfo_host,
4247                             const VexAbiInfo*  vbi/*UNUSED*/,
4248                             Int offs_Host_EvC_Counter,
4249                             Int offs_Host_EvC_FailAddr,
4250                             Bool chainingAllowed,
4251                             Bool addProfInc,
4252                             Addr max_ga )
4253 {
4254    Int        i, j;
4255    HReg       hreg, hregHI;
4256    ISelEnv*   env;
4257    UInt       hwcaps_host = archinfo_host->hwcaps;
4258    ARM64AMode *amCounter, *amFailAddr;
4259
4260    /* sanity ... */
4261    vassert(arch_host == VexArchARM64);
4262
4263    /* Check that the host's endianness is as expected. */
4264    vassert(archinfo_host->endness == VexEndnessLE);
4265
4266    /* guard against unexpected space regressions */
4267    vassert(sizeof(ARM64Instr) <= 32);
4268
4269    /* Make up an initial environment to use. */
4270    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4271    env->vreg_ctr = 0;
4272
4273    /* Set up output code array. */
4274    env->code = newHInstrArray();
4275
4276    /* Copy BB's type env. */
4277    env->type_env = bb->tyenv;
4278
4279    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4280       change as we go along. */
4281    env->n_vregmap = bb->tyenv->types_used;
4282    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4283    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4284
4285    /* and finally ... */
4286    env->chainingAllowed = chainingAllowed;
4287    env->hwcaps          = hwcaps_host;
4288    env->previous_rm     = NULL;
4289    env->max_ga          = max_ga;
4290
4291    /* For each IR temporary, allocate a suitably-kinded virtual
4292       register. */
4293    j = 0;
4294    for (i = 0; i < env->n_vregmap; i++) {
4295       hregHI = hreg = INVALID_HREG;
4296       switch (bb->tyenv->types[i]) {
4297          case Ity_I1:
4298          case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4299             hreg = mkHReg(True, HRcInt64, 0, j++);
4300             break;
4301          case Ity_I128:
4302             hreg   = mkHReg(True, HRcInt64, 0, j++);
4303             hregHI = mkHReg(True, HRcInt64, 0, j++);
4304             break;
4305          case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4306          case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4307          case Ity_F64:
4308             hreg = mkHReg(True, HRcFlt64, 0, j++);
4309             break;
4310          case Ity_V128:
4311             hreg = mkHReg(True, HRcVec128, 0, j++);
4312             break;
4313          case Ity_V256:
4314             hreg   = mkHReg(True, HRcVec128, 0, j++);
4315             hregHI = mkHReg(True, HRcVec128, 0, j++);
4316             break;
4317          default:
4318             ppIRType(bb->tyenv->types[i]);
4319             vpanic("iselBB(arm64): IRTemp type");
4320       }
4321       env->vregmap[i]   = hreg;
4322       env->vregmapHI[i] = hregHI;
4323    }
4324    env->vreg_ctr = j;
4325
4326    /* The very first instruction must be an event check. */
4327    amCounter  = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4328    amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4329    addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4330
4331    /* Possibly a block counter increment (for profiling).  At this
4332       point we don't know the address of the counter, so just pretend
4333       it is zero.  It will have to be patched later, but before this
4334       translation is used, by a call to LibVEX_patchProfCtr. */
4335    if (addProfInc) {
4336       addInstr(env, ARM64Instr_ProfInc());
4337    }
4338
4339    /* Ok, finally we can iterate over the statements. */
4340    for (i = 0; i < bb->stmts_used; i++)
4341       iselStmt(env, bb->stmts[i]);
4342
4343    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4344
4345    /* record the number of vregs we used. */
4346    env->code->n_vregs = env->vreg_ctr;
4347    return env->code;
4348 }
4349
4350
4351 /*---------------------------------------------------------------*/
4352 /*--- end                                   host_arm64_isel.c ---*/
4353 /*---------------------------------------------------------------*/